comparison cpack/dml/lib/xmlarchive.pl @ 0:718306e29690 tip

commiting public release
author Daniel Wolff
date Tue, 09 Feb 2016 21:05:06 +0100
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:718306e29690
1 /* Part of DML (Digital Music Laboratory)
2 Copyright 2014-2015 Samer Abdallah, University of London
3
4 This program is free software; you can redistribute it and/or
5 modify it under the terms of the GNU General Public License
6 as published by the Free Software Foundation; either version 2
7 of the License, or (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public
15 License along with this library; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19 :- module(xmlarchive,
20 [ load_xmlns/3
21 , load_xmlns/2
22 , archive_doc/3
23 , map_archive_entries/4
24 , with_archive_stream/3
25 , with_archive_file/3
26 , with_archive/2
27 , with_current_entry_stream/3
28 , archive_stats/1
29 , op(200,fx,@)
30 ]).
31
32 /* <module> Provides the ability to read XML documents in an archive
33 */
34
35 :- use_module(library(sgml)).
36 :- use_module(library(termutils)).
37 :- use_module(library(lambda)).
38
39 :- meta_predicate map_archive_streams(4,?,?,+),
40 map_archive_entries(5,?,?,+),
41 with_archive_stream(+,?,1),
42 with_archive_file(+,?,1),
43 with_current_entry_stream(+,-,0),
44 with_archive(+,1).
45
46 :- thread_local xmlns/2.
47 :- thread_local errors/2.
48
49 %% load_xmlns(+Source,-Doc:list,+Opts:list) is det.
50 %
51 % Loads an XML document using the xmlns dialect to handle element names
52 % that involve namespaces. On exit, Doc is a list of the top level elements
53 % in the document. Valid options are:
54 % * ns(-Namespaces:list(pair(atom,url)))
55 % On exit, Namespaces will contain a list of abbreviation-URL pairs for
56 % all the namespaces declared in the document.
57 % * errors(-Errors:list(pair(oneof([error,warning]),text))
58 % On exit, Errors will contain a list of all the errors and warnings
59 % encountered while parsing the document.
60 load_xmlns(Source,Doc,Opts) :-
61 option(ns(Namespaces),Opts,_),
62 option(errors(Errors),Opts,_),
63 retractall(xmlns(_,_)),
64 retractall(errors(_,_)),
65 load_xml(Source,Doc,
66 [ dialect(xmlns)
67 , space(remove)
68 , call(xmlns,on_xmlns)
69 , call(error,on_error)
70 , call(urlns,on_urlns)
71 , max_errors(-1)
72 ]),
73 findall(S-M,errors(S,M),Errors),
74 findall(N-URL,xmlns(N,URL),Namespaces).
75
76 %% load_xmlns(+Source,-Doc:list) is det.
77 load_xmlns(Source,Doc) :-
78 retractall(xmlns(_,_)),
79 retractall(errors(_,_)),
80 load_xml(Source,Doc,
81 [ dialect(xmlns)
82 , space(remove)
83 , call(xmlns,on_xmlns)
84 , call(urlns,on_urlns)
85 , call(error,on_error)
86 , max_errors(-1)
87 ]).
88
89 on_xmlns(Prefix,URL,_) :-
90 debug(xmlarchive,'XML Namespace: ~w -> ~w',[Prefix,URL]),
91 ( rdf_current_prefix(Prefix,URL1)
92 -> debug(xmlarchive,'~w already known as ~w',[Prefix,URL1]),
93 asserta(xmlns(Prefix,URL)),
94 ( URL=URL1 -> assert(xmlns(Prefix,URL))
95 ; debug(xmlarchive,'*** Namespace CLASH',[]),
96 assert(xmlns(Prefix,URL))
97 )
98 ; asserta(xmlns(Prefix,URL))
99 ).
100
101 on_urlns(xmlns,xmlns,_) :- !.
102 on_urlns(URL,Prefix,_) :-
103 ( xmlns(Prefix,URL)
104 -> debug(xmlarchive,'URL ~w mapped to ~w',[URL,Prefix])
105 ; debug(xmlarchive,'*** on_urlns failed on ~w, ~w',[URL,Prefix])
106 ).
107
108 on_error(Severity,Message,_Parser) :-
109 debug(xmlarchive,'SGML ~w: ~s',[Severity,Message]),
110 assertz(errors(Severity,Message)).
111
112
113
114
115 %% archive_doc(+File:filename, -Doc:list(xml_element), +Opts:options) is nondet.
116 %
117 % Is true when archive File contains a file encoding XML document Doc.
118 % Valid options are:
119 % * name(-Name:atom)
120 % On exit, Name will contain the name of the file in the archive that
121 % was parsed to produce Doc.
122 % * ns(-NS:list(pair(atom,url)))
123 % On exit, NS will contain a list of namespaces used in the document.
124 archive_doc(File,Doc,Opts) :-
125 select_option(name(Name),Opts,Opts1,_),
126 with_archive_stream(File,Name,Doc+\S^load_xmlns(S,Doc,Opts1)).
127
128 archive_stats(File) :-
129 nl,
130 with_status_line( with_archive( File,
131 map_archive_streams(load_and_count,0-e(0,0,[]),T-e(N,M,L)))),
132 format('Compiling statistics...\n',[]),
133 aggregate(count, Errs^Msg^errors_error(L,warning,Msg), NumWarnings),
134 once(aggregate(count, Errs^Msg^errors_error(L,error,Msg), NumErrors); NumErrors=0),
135 setof(Msg, Sev^errors_error(L,Sev,Msg), Msgs),
136 length(Msgs,NumMessageTypes),
137 nl,
138 format(' Number of loaded files: ~d\n',[T]),
139 format('Number of files with problems: ~d\n',[N]),
140 format(' Total number of problems: ~d\n',[M]),
141 format(' Number of errors: ~d\n',[NumErrors]),
142 format(' Number of warnings: ~d\n',[NumWarnings]),
143 format(' Number of distinct messages: ~d\n',[NumMessageTypes]).
144
145 errors_error(L,Sev,Msg) :-
146 member(_-Errs,L),
147 member(Sev-Msg,Errs).
148
149 load_and_count(Name,Stream,I1-E1,I2-E2) :-
150 succ(I1,I2),
151 status('Loading: ~d - ~s',[I2,Name]),
152 load_xmlns(Stream,_,[errors(Errors)]),
153 ( Errors=[] -> E2=E1
154 ; length(Errors,DM),
155 E1=e(N1,M1,L1), E2=e(N2,M2,L2),
156 N2 is N1 + 1, M2 is M1 + DM,
157 L2=[Name-Errors|L1]
158 ).
159
160 % ---------- General archive handling stuff ---------
161
162
163 %% with_archive_stream(+File:text,?Name:atom,+Goal:pred(+A:archive) is nondet.
164 %
165 % Unifies Name with the name of an entry in archive File, and calls Goal
166 % as call(Goal,Stream), where Stream is available for reading the entry.
167 % Runs through all available entries on backtracking.
168 with_archive_stream(File,Name,Goal) :-
169 with_archive(File, with_stream_in_archive(Name,Goal)).
170
171
172 with_archive_file(File,Name,Goal) :-
173 with_archive(File, with_file_in_archive(Name,Goal)).
174
175 with_file_in_archive(Name,Goal,Archive) :-
176 archive_entry_name(Archive,Name),
177 archive_header_property(Archive,filetype(file)),
178 call(Goal,Archive).
179
180 with_stream_in_archive(Name,Goal,Archive) :-
181 archive_entry_name(Archive,Name),
182 archive_header_property(Archive,filetype(file)),
183 setup_call_cleanup(
184 archive_open_entry(Archive,Stream),
185 call(Goal,Stream),
186 close(Stream)).
187
188 archive_entry_name(Archive,Name) :- var(Name), !,
189 catch(( repeat,
190 (archive_next_header(Archive,Name) -> true; throw(nomore))
191 ), nomore,fail).
192
193 archive_entry_name(Archive,Name) :-
194 archive_next_header(Archive,Name).
195
196 map_archive_entries(Goal,S1,S3,Archive) :-
197 ( archive_next_header(Archive,Name)
198 -> archive_header_property(Archive,filetype(Type)),
199 call(Goal,Archive,Type,Name,S1,S2),
200 map_archive_entries(Goal,S2,S3,Archive)
201 ; S1=S3
202 ).
203
204 map_archive_streams(Goal,S1,S2,Archive) :-
205 map_archive_entries( call_with_archive_stream(Goal), S1,S2,Archive).
206
207 call_with_archive_stream(Goal,Archive,file,Name,S1,S2) :- !,
208 with_current_entry_stream(Archive,Stream, call(Goal,Name,Stream,S1,S2)).
209 call_with_archive_stream(_,_,_,S1,S1).
210
211 with_current_entry_stream(Archive,Stream,Goal) :-
212 setup_call_cleanup(
213 archive_open_entry(Archive,Stream), Goal,
214 close(Stream)).
215
216
217 with_archive(File,Goal) :-
218 setup_call_cleanup(
219 archive_open(File,A,[]), call(Goal,A),
220 archive_close(A)).