Mercurial > hg > dml-open-cliopatria
comparison cpack/dml/lib/xmlarchive.pl @ 0:718306e29690 tip
commiting public release
author | Daniel Wolff |
---|---|
date | Tue, 09 Feb 2016 21:05:06 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:718306e29690 |
---|---|
1 /* Part of DML (Digital Music Laboratory) | |
2 Copyright 2014-2015 Samer Abdallah, University of London | |
3 | |
4 This program is free software; you can redistribute it and/or | |
5 modify it under the terms of the GNU General Public License | |
6 as published by the Free Software Foundation; either version 2 | |
7 of the License, or (at your option) any later version. | |
8 | |
9 This program is distributed in the hope that it will be useful, | |
10 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 GNU General Public License for more details. | |
13 | |
14 You should have received a copy of the GNU General Public | |
15 License along with this library; if not, write to the Free Software | |
16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
17 */ | |
18 | |
19 :- module(xmlarchive, | |
20 [ load_xmlns/3 | |
21 , load_xmlns/2 | |
22 , archive_doc/3 | |
23 , map_archive_entries/4 | |
24 , with_archive_stream/3 | |
25 , with_archive_file/3 | |
26 , with_archive/2 | |
27 , with_current_entry_stream/3 | |
28 , archive_stats/1 | |
29 , op(200,fx,@) | |
30 ]). | |
31 | |
32 /* <module> Provides the ability to read XML documents in an archive | |
33 */ | |
34 | |
35 :- use_module(library(sgml)). | |
36 :- use_module(library(termutils)). | |
37 :- use_module(library(lambda)). | |
38 | |
39 :- meta_predicate map_archive_streams(4,?,?,+), | |
40 map_archive_entries(5,?,?,+), | |
41 with_archive_stream(+,?,1), | |
42 with_archive_file(+,?,1), | |
43 with_current_entry_stream(+,-,0), | |
44 with_archive(+,1). | |
45 | |
46 :- thread_local xmlns/2. | |
47 :- thread_local errors/2. | |
48 | |
49 %% load_xmlns(+Source,-Doc:list,+Opts:list) is det. | |
50 % | |
51 % Loads an XML document using the xmlns dialect to handle element names | |
52 % that involve namespaces. On exit, Doc is a list of the top level elements | |
53 % in the document. Valid options are: | |
54 % * ns(-Namespaces:list(pair(atom,url))) | |
55 % On exit, Namespaces will contain a list of abbreviation-URL pairs for | |
56 % all the namespaces declared in the document. | |
57 % * errors(-Errors:list(pair(oneof([error,warning]),text)) | |
58 % On exit, Errors will contain a list of all the errors and warnings | |
59 % encountered while parsing the document. | |
60 load_xmlns(Source,Doc,Opts) :- | |
61 option(ns(Namespaces),Opts,_), | |
62 option(errors(Errors),Opts,_), | |
63 retractall(xmlns(_,_)), | |
64 retractall(errors(_,_)), | |
65 load_xml(Source,Doc, | |
66 [ dialect(xmlns) | |
67 , space(remove) | |
68 , call(xmlns,on_xmlns) | |
69 , call(error,on_error) | |
70 , call(urlns,on_urlns) | |
71 , max_errors(-1) | |
72 ]), | |
73 findall(S-M,errors(S,M),Errors), | |
74 findall(N-URL,xmlns(N,URL),Namespaces). | |
75 | |
76 %% load_xmlns(+Source,-Doc:list) is det. | |
77 load_xmlns(Source,Doc) :- | |
78 retractall(xmlns(_,_)), | |
79 retractall(errors(_,_)), | |
80 load_xml(Source,Doc, | |
81 [ dialect(xmlns) | |
82 , space(remove) | |
83 , call(xmlns,on_xmlns) | |
84 , call(urlns,on_urlns) | |
85 , call(error,on_error) | |
86 , max_errors(-1) | |
87 ]). | |
88 | |
89 on_xmlns(Prefix,URL,_) :- | |
90 debug(xmlarchive,'XML Namespace: ~w -> ~w',[Prefix,URL]), | |
91 ( rdf_current_prefix(Prefix,URL1) | |
92 -> debug(xmlarchive,'~w already known as ~w',[Prefix,URL1]), | |
93 asserta(xmlns(Prefix,URL)), | |
94 ( URL=URL1 -> assert(xmlns(Prefix,URL)) | |
95 ; debug(xmlarchive,'*** Namespace CLASH',[]), | |
96 assert(xmlns(Prefix,URL)) | |
97 ) | |
98 ; asserta(xmlns(Prefix,URL)) | |
99 ). | |
100 | |
101 on_urlns(xmlns,xmlns,_) :- !. | |
102 on_urlns(URL,Prefix,_) :- | |
103 ( xmlns(Prefix,URL) | |
104 -> debug(xmlarchive,'URL ~w mapped to ~w',[URL,Prefix]) | |
105 ; debug(xmlarchive,'*** on_urlns failed on ~w, ~w',[URL,Prefix]) | |
106 ). | |
107 | |
108 on_error(Severity,Message,_Parser) :- | |
109 debug(xmlarchive,'SGML ~w: ~s',[Severity,Message]), | |
110 assertz(errors(Severity,Message)). | |
111 | |
112 | |
113 | |
114 | |
115 %% archive_doc(+File:filename, -Doc:list(xml_element), +Opts:options) is nondet. | |
116 % | |
117 % Is true when archive File contains a file encoding XML document Doc. | |
118 % Valid options are: | |
119 % * name(-Name:atom) | |
120 % On exit, Name will contain the name of the file in the archive that | |
121 % was parsed to produce Doc. | |
122 % * ns(-NS:list(pair(atom,url))) | |
123 % On exit, NS will contain a list of namespaces used in the document. | |
124 archive_doc(File,Doc,Opts) :- | |
125 select_option(name(Name),Opts,Opts1,_), | |
126 with_archive_stream(File,Name,Doc+\S^load_xmlns(S,Doc,Opts1)). | |
127 | |
128 archive_stats(File) :- | |
129 nl, | |
130 with_status_line( with_archive( File, | |
131 map_archive_streams(load_and_count,0-e(0,0,[]),T-e(N,M,L)))), | |
132 format('Compiling statistics...\n',[]), | |
133 aggregate(count, Errs^Msg^errors_error(L,warning,Msg), NumWarnings), | |
134 once(aggregate(count, Errs^Msg^errors_error(L,error,Msg), NumErrors); NumErrors=0), | |
135 setof(Msg, Sev^errors_error(L,Sev,Msg), Msgs), | |
136 length(Msgs,NumMessageTypes), | |
137 nl, | |
138 format(' Number of loaded files: ~d\n',[T]), | |
139 format('Number of files with problems: ~d\n',[N]), | |
140 format(' Total number of problems: ~d\n',[M]), | |
141 format(' Number of errors: ~d\n',[NumErrors]), | |
142 format(' Number of warnings: ~d\n',[NumWarnings]), | |
143 format(' Number of distinct messages: ~d\n',[NumMessageTypes]). | |
144 | |
145 errors_error(L,Sev,Msg) :- | |
146 member(_-Errs,L), | |
147 member(Sev-Msg,Errs). | |
148 | |
149 load_and_count(Name,Stream,I1-E1,I2-E2) :- | |
150 succ(I1,I2), | |
151 status('Loading: ~d - ~s',[I2,Name]), | |
152 load_xmlns(Stream,_,[errors(Errors)]), | |
153 ( Errors=[] -> E2=E1 | |
154 ; length(Errors,DM), | |
155 E1=e(N1,M1,L1), E2=e(N2,M2,L2), | |
156 N2 is N1 + 1, M2 is M1 + DM, | |
157 L2=[Name-Errors|L1] | |
158 ). | |
159 | |
160 % ---------- General archive handling stuff --------- | |
161 | |
162 | |
163 %% with_archive_stream(+File:text,?Name:atom,+Goal:pred(+A:archive) is nondet. | |
164 % | |
165 % Unifies Name with the name of an entry in archive File, and calls Goal | |
166 % as call(Goal,Stream), where Stream is available for reading the entry. | |
167 % Runs through all available entries on backtracking. | |
168 with_archive_stream(File,Name,Goal) :- | |
169 with_archive(File, with_stream_in_archive(Name,Goal)). | |
170 | |
171 | |
172 with_archive_file(File,Name,Goal) :- | |
173 with_archive(File, with_file_in_archive(Name,Goal)). | |
174 | |
175 with_file_in_archive(Name,Goal,Archive) :- | |
176 archive_entry_name(Archive,Name), | |
177 archive_header_property(Archive,filetype(file)), | |
178 call(Goal,Archive). | |
179 | |
180 with_stream_in_archive(Name,Goal,Archive) :- | |
181 archive_entry_name(Archive,Name), | |
182 archive_header_property(Archive,filetype(file)), | |
183 setup_call_cleanup( | |
184 archive_open_entry(Archive,Stream), | |
185 call(Goal,Stream), | |
186 close(Stream)). | |
187 | |
188 archive_entry_name(Archive,Name) :- var(Name), !, | |
189 catch(( repeat, | |
190 (archive_next_header(Archive,Name) -> true; throw(nomore)) | |
191 ), nomore,fail). | |
192 | |
193 archive_entry_name(Archive,Name) :- | |
194 archive_next_header(Archive,Name). | |
195 | |
196 map_archive_entries(Goal,S1,S3,Archive) :- | |
197 ( archive_next_header(Archive,Name) | |
198 -> archive_header_property(Archive,filetype(Type)), | |
199 call(Goal,Archive,Type,Name,S1,S2), | |
200 map_archive_entries(Goal,S2,S3,Archive) | |
201 ; S1=S3 | |
202 ). | |
203 | |
204 map_archive_streams(Goal,S1,S2,Archive) :- | |
205 map_archive_entries( call_with_archive_stream(Goal), S1,S2,Archive). | |
206 | |
207 call_with_archive_stream(Goal,Archive,file,Name,S1,S2) :- !, | |
208 with_current_entry_stream(Archive,Stream, call(Goal,Name,Stream,S1,S2)). | |
209 call_with_archive_stream(_,_,_,S1,S1). | |
210 | |
211 with_current_entry_stream(Archive,Stream,Goal) :- | |
212 setup_call_cleanup( | |
213 archive_open_entry(Archive,Stream), Goal, | |
214 close(Stream)). | |
215 | |
216 | |
217 with_archive(File,Goal) :- | |
218 setup_call_cleanup( | |
219 archive_open(File,A,[]), call(Goal,A), | |
220 archive_close(A)). |