Daniel@0
|
1 /* Part of DML (Digital Music Laboratory)
|
Daniel@0
|
2 Copyright 2014-2015 Samer Abdallah, University of London
|
Daniel@0
|
3
|
Daniel@0
|
4 This program is free software; you can redistribute it and/or
|
Daniel@0
|
5 modify it under the terms of the GNU General Public License
|
Daniel@0
|
6 as published by the Free Software Foundation; either version 2
|
Daniel@0
|
7 of the License, or (at your option) any later version.
|
Daniel@0
|
8
|
Daniel@0
|
9 This program is distributed in the hope that it will be useful,
|
Daniel@0
|
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
|
Daniel@0
|
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
Daniel@0
|
12 GNU General Public License for more details.
|
Daniel@0
|
13
|
Daniel@0
|
14 You should have received a copy of the GNU General Public
|
Daniel@0
|
15 License along with this library; if not, write to the Free Software
|
Daniel@0
|
16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
Daniel@0
|
17 */
|
Daniel@0
|
18
|
Daniel@0
|
19 :- module(xmlarchive,
|
Daniel@0
|
20 [ load_xmlns/3
|
Daniel@0
|
21 , load_xmlns/2
|
Daniel@0
|
22 , archive_doc/3
|
Daniel@0
|
23 , map_archive_entries/4
|
Daniel@0
|
24 , with_archive_stream/3
|
Daniel@0
|
25 , with_archive_file/3
|
Daniel@0
|
26 , with_archive/2
|
Daniel@0
|
27 , with_current_entry_stream/3
|
Daniel@0
|
28 , archive_stats/1
|
Daniel@0
|
29 , op(200,fx,@)
|
Daniel@0
|
30 ]).
|
Daniel@0
|
31
|
Daniel@0
|
32 /* <module> Provides the ability to read XML documents in an archive
|
Daniel@0
|
33 */
|
Daniel@0
|
34
|
Daniel@0
|
35 :- use_module(library(sgml)).
|
Daniel@0
|
36 :- use_module(library(termutils)).
|
Daniel@0
|
37 :- use_module(library(lambda)).
|
Daniel@0
|
38
|
Daniel@0
|
39 :- meta_predicate map_archive_streams(4,?,?,+),
|
Daniel@0
|
40 map_archive_entries(5,?,?,+),
|
Daniel@0
|
41 with_archive_stream(+,?,1),
|
Daniel@0
|
42 with_archive_file(+,?,1),
|
Daniel@0
|
43 with_current_entry_stream(+,-,0),
|
Daniel@0
|
44 with_archive(+,1).
|
Daniel@0
|
45
|
Daniel@0
|
46 :- thread_local xmlns/2.
|
Daniel@0
|
47 :- thread_local errors/2.
|
Daniel@0
|
48
|
Daniel@0
|
49 %% load_xmlns(+Source,-Doc:list,+Opts:list) is det.
|
Daniel@0
|
50 %
|
Daniel@0
|
51 % Loads an XML document using the xmlns dialect to handle element names
|
Daniel@0
|
52 % that involve namespaces. On exit, Doc is a list of the top level elements
|
Daniel@0
|
53 % in the document. Valid options are:
|
Daniel@0
|
54 % * ns(-Namespaces:list(pair(atom,url)))
|
Daniel@0
|
55 % On exit, Namespaces will contain a list of abbreviation-URL pairs for
|
Daniel@0
|
56 % all the namespaces declared in the document.
|
Daniel@0
|
57 % * errors(-Errors:list(pair(oneof([error,warning]),text))
|
Daniel@0
|
58 % On exit, Errors will contain a list of all the errors and warnings
|
Daniel@0
|
59 % encountered while parsing the document.
|
Daniel@0
|
60 load_xmlns(Source,Doc,Opts) :-
|
Daniel@0
|
61 option(ns(Namespaces),Opts,_),
|
Daniel@0
|
62 option(errors(Errors),Opts,_),
|
Daniel@0
|
63 retractall(xmlns(_,_)),
|
Daniel@0
|
64 retractall(errors(_,_)),
|
Daniel@0
|
65 load_xml(Source,Doc,
|
Daniel@0
|
66 [ dialect(xmlns)
|
Daniel@0
|
67 , space(remove)
|
Daniel@0
|
68 , call(xmlns,on_xmlns)
|
Daniel@0
|
69 , call(error,on_error)
|
Daniel@0
|
70 , call(urlns,on_urlns)
|
Daniel@0
|
71 , max_errors(-1)
|
Daniel@0
|
72 ]),
|
Daniel@0
|
73 findall(S-M,errors(S,M),Errors),
|
Daniel@0
|
74 findall(N-URL,xmlns(N,URL),Namespaces).
|
Daniel@0
|
75
|
Daniel@0
|
76 %% load_xmlns(+Source,-Doc:list) is det.
|
Daniel@0
|
77 load_xmlns(Source,Doc) :-
|
Daniel@0
|
78 retractall(xmlns(_,_)),
|
Daniel@0
|
79 retractall(errors(_,_)),
|
Daniel@0
|
80 load_xml(Source,Doc,
|
Daniel@0
|
81 [ dialect(xmlns)
|
Daniel@0
|
82 , space(remove)
|
Daniel@0
|
83 , call(xmlns,on_xmlns)
|
Daniel@0
|
84 , call(urlns,on_urlns)
|
Daniel@0
|
85 , call(error,on_error)
|
Daniel@0
|
86 , max_errors(-1)
|
Daniel@0
|
87 ]).
|
Daniel@0
|
88
|
Daniel@0
|
89 on_xmlns(Prefix,URL,_) :-
|
Daniel@0
|
90 debug(xmlarchive,'XML Namespace: ~w -> ~w',[Prefix,URL]),
|
Daniel@0
|
91 ( rdf_current_prefix(Prefix,URL1)
|
Daniel@0
|
92 -> debug(xmlarchive,'~w already known as ~w',[Prefix,URL1]),
|
Daniel@0
|
93 asserta(xmlns(Prefix,URL)),
|
Daniel@0
|
94 ( URL=URL1 -> assert(xmlns(Prefix,URL))
|
Daniel@0
|
95 ; debug(xmlarchive,'*** Namespace CLASH',[]),
|
Daniel@0
|
96 assert(xmlns(Prefix,URL))
|
Daniel@0
|
97 )
|
Daniel@0
|
98 ; asserta(xmlns(Prefix,URL))
|
Daniel@0
|
99 ).
|
Daniel@0
|
100
|
Daniel@0
|
101 on_urlns(xmlns,xmlns,_) :- !.
|
Daniel@0
|
102 on_urlns(URL,Prefix,_) :-
|
Daniel@0
|
103 ( xmlns(Prefix,URL)
|
Daniel@0
|
104 -> debug(xmlarchive,'URL ~w mapped to ~w',[URL,Prefix])
|
Daniel@0
|
105 ; debug(xmlarchive,'*** on_urlns failed on ~w, ~w',[URL,Prefix])
|
Daniel@0
|
106 ).
|
Daniel@0
|
107
|
Daniel@0
|
108 on_error(Severity,Message,_Parser) :-
|
Daniel@0
|
109 debug(xmlarchive,'SGML ~w: ~s',[Severity,Message]),
|
Daniel@0
|
110 assertz(errors(Severity,Message)).
|
Daniel@0
|
111
|
Daniel@0
|
112
|
Daniel@0
|
113
|
Daniel@0
|
114
|
Daniel@0
|
115 %% archive_doc(+File:filename, -Doc:list(xml_element), +Opts:options) is nondet.
|
Daniel@0
|
116 %
|
Daniel@0
|
117 % Is true when archive File contains a file encoding XML document Doc.
|
Daniel@0
|
118 % Valid options are:
|
Daniel@0
|
119 % * name(-Name:atom)
|
Daniel@0
|
120 % On exit, Name will contain the name of the file in the archive that
|
Daniel@0
|
121 % was parsed to produce Doc.
|
Daniel@0
|
122 % * ns(-NS:list(pair(atom,url)))
|
Daniel@0
|
123 % On exit, NS will contain a list of namespaces used in the document.
|
Daniel@0
|
124 archive_doc(File,Doc,Opts) :-
|
Daniel@0
|
125 select_option(name(Name),Opts,Opts1,_),
|
Daniel@0
|
126 with_archive_stream(File,Name,Doc+\S^load_xmlns(S,Doc,Opts1)).
|
Daniel@0
|
127
|
Daniel@0
|
128 archive_stats(File) :-
|
Daniel@0
|
129 nl,
|
Daniel@0
|
130 with_status_line( with_archive( File,
|
Daniel@0
|
131 map_archive_streams(load_and_count,0-e(0,0,[]),T-e(N,M,L)))),
|
Daniel@0
|
132 format('Compiling statistics...\n',[]),
|
Daniel@0
|
133 aggregate(count, Errs^Msg^errors_error(L,warning,Msg), NumWarnings),
|
Daniel@0
|
134 once(aggregate(count, Errs^Msg^errors_error(L,error,Msg), NumErrors); NumErrors=0),
|
Daniel@0
|
135 setof(Msg, Sev^errors_error(L,Sev,Msg), Msgs),
|
Daniel@0
|
136 length(Msgs,NumMessageTypes),
|
Daniel@0
|
137 nl,
|
Daniel@0
|
138 format(' Number of loaded files: ~d\n',[T]),
|
Daniel@0
|
139 format('Number of files with problems: ~d\n',[N]),
|
Daniel@0
|
140 format(' Total number of problems: ~d\n',[M]),
|
Daniel@0
|
141 format(' Number of errors: ~d\n',[NumErrors]),
|
Daniel@0
|
142 format(' Number of warnings: ~d\n',[NumWarnings]),
|
Daniel@0
|
143 format(' Number of distinct messages: ~d\n',[NumMessageTypes]).
|
Daniel@0
|
144
|
Daniel@0
|
145 errors_error(L,Sev,Msg) :-
|
Daniel@0
|
146 member(_-Errs,L),
|
Daniel@0
|
147 member(Sev-Msg,Errs).
|
Daniel@0
|
148
|
Daniel@0
|
149 load_and_count(Name,Stream,I1-E1,I2-E2) :-
|
Daniel@0
|
150 succ(I1,I2),
|
Daniel@0
|
151 status('Loading: ~d - ~s',[I2,Name]),
|
Daniel@0
|
152 load_xmlns(Stream,_,[errors(Errors)]),
|
Daniel@0
|
153 ( Errors=[] -> E2=E1
|
Daniel@0
|
154 ; length(Errors,DM),
|
Daniel@0
|
155 E1=e(N1,M1,L1), E2=e(N2,M2,L2),
|
Daniel@0
|
156 N2 is N1 + 1, M2 is M1 + DM,
|
Daniel@0
|
157 L2=[Name-Errors|L1]
|
Daniel@0
|
158 ).
|
Daniel@0
|
159
|
Daniel@0
|
160 % ---------- General archive handling stuff ---------
|
Daniel@0
|
161
|
Daniel@0
|
162
|
Daniel@0
|
163 %% with_archive_stream(+File:text,?Name:atom,+Goal:pred(+A:archive) is nondet.
|
Daniel@0
|
164 %
|
Daniel@0
|
165 % Unifies Name with the name of an entry in archive File, and calls Goal
|
Daniel@0
|
166 % as call(Goal,Stream), where Stream is available for reading the entry.
|
Daniel@0
|
167 % Runs through all available entries on backtracking.
|
Daniel@0
|
168 with_archive_stream(File,Name,Goal) :-
|
Daniel@0
|
169 with_archive(File, with_stream_in_archive(Name,Goal)).
|
Daniel@0
|
170
|
Daniel@0
|
171
|
Daniel@0
|
172 with_archive_file(File,Name,Goal) :-
|
Daniel@0
|
173 with_archive(File, with_file_in_archive(Name,Goal)).
|
Daniel@0
|
174
|
Daniel@0
|
175 with_file_in_archive(Name,Goal,Archive) :-
|
Daniel@0
|
176 archive_entry_name(Archive,Name),
|
Daniel@0
|
177 archive_header_property(Archive,filetype(file)),
|
Daniel@0
|
178 call(Goal,Archive).
|
Daniel@0
|
179
|
Daniel@0
|
180 with_stream_in_archive(Name,Goal,Archive) :-
|
Daniel@0
|
181 archive_entry_name(Archive,Name),
|
Daniel@0
|
182 archive_header_property(Archive,filetype(file)),
|
Daniel@0
|
183 setup_call_cleanup(
|
Daniel@0
|
184 archive_open_entry(Archive,Stream),
|
Daniel@0
|
185 call(Goal,Stream),
|
Daniel@0
|
186 close(Stream)).
|
Daniel@0
|
187
|
Daniel@0
|
188 archive_entry_name(Archive,Name) :- var(Name), !,
|
Daniel@0
|
189 catch(( repeat,
|
Daniel@0
|
190 (archive_next_header(Archive,Name) -> true; throw(nomore))
|
Daniel@0
|
191 ), nomore,fail).
|
Daniel@0
|
192
|
Daniel@0
|
193 archive_entry_name(Archive,Name) :-
|
Daniel@0
|
194 archive_next_header(Archive,Name).
|
Daniel@0
|
195
|
Daniel@0
|
196 map_archive_entries(Goal,S1,S3,Archive) :-
|
Daniel@0
|
197 ( archive_next_header(Archive,Name)
|
Daniel@0
|
198 -> archive_header_property(Archive,filetype(Type)),
|
Daniel@0
|
199 call(Goal,Archive,Type,Name,S1,S2),
|
Daniel@0
|
200 map_archive_entries(Goal,S2,S3,Archive)
|
Daniel@0
|
201 ; S1=S3
|
Daniel@0
|
202 ).
|
Daniel@0
|
203
|
Daniel@0
|
204 map_archive_streams(Goal,S1,S2,Archive) :-
|
Daniel@0
|
205 map_archive_entries( call_with_archive_stream(Goal), S1,S2,Archive).
|
Daniel@0
|
206
|
Daniel@0
|
207 call_with_archive_stream(Goal,Archive,file,Name,S1,S2) :- !,
|
Daniel@0
|
208 with_current_entry_stream(Archive,Stream, call(Goal,Name,Stream,S1,S2)).
|
Daniel@0
|
209 call_with_archive_stream(_,_,_,S1,S1).
|
Daniel@0
|
210
|
Daniel@0
|
211 with_current_entry_stream(Archive,Stream,Goal) :-
|
Daniel@0
|
212 setup_call_cleanup(
|
Daniel@0
|
213 archive_open_entry(Archive,Stream), Goal,
|
Daniel@0
|
214 close(Stream)).
|
Daniel@0
|
215
|
Daniel@0
|
216
|
Daniel@0
|
217 with_archive(File,Goal) :-
|
Daniel@0
|
218 setup_call_cleanup(
|
Daniel@0
|
219 archive_open(File,A,[]), call(Goal,A),
|
Daniel@0
|
220 archive_close(A)).
|