Daniel@0: /* Part of DML (Digital Music Laboratory) Daniel@0: Copyright 2014-2015 Samer Abdallah, University of London Daniel@0: Daniel@0: This program is free software; you can redistribute it and/or Daniel@0: modify it under the terms of the GNU General Public License Daniel@0: as published by the Free Software Foundation; either version 2 Daniel@0: of the License, or (at your option) any later version. Daniel@0: Daniel@0: This program is distributed in the hope that it will be useful, Daniel@0: but WITHOUT ANY WARRANTY; without even the implied warranty of Daniel@0: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Daniel@0: GNU General Public License for more details. Daniel@0: Daniel@0: You should have received a copy of the GNU General Public Daniel@0: License along with this library; if not, write to the Free Software Daniel@0: Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Daniel@0: */ Daniel@0: Daniel@0: :- module(xmlarchive, Daniel@0: [ load_xmlns/3 Daniel@0: , load_xmlns/2 Daniel@0: , archive_doc/3 Daniel@0: , map_archive_entries/4 Daniel@0: , with_archive_stream/3 Daniel@0: , with_archive_file/3 Daniel@0: , with_archive/2 Daniel@0: , with_current_entry_stream/3 Daniel@0: , archive_stats/1 Daniel@0: , op(200,fx,@) Daniel@0: ]). Daniel@0: Daniel@0: /* Provides the ability to read XML documents in an archive Daniel@0: */ Daniel@0: Daniel@0: :- use_module(library(sgml)). Daniel@0: :- use_module(library(termutils)). Daniel@0: :- use_module(library(lambda)). Daniel@0: Daniel@0: :- meta_predicate map_archive_streams(4,?,?,+), Daniel@0: map_archive_entries(5,?,?,+), Daniel@0: with_archive_stream(+,?,1), Daniel@0: with_archive_file(+,?,1), Daniel@0: with_current_entry_stream(+,-,0), Daniel@0: with_archive(+,1). Daniel@0: Daniel@0: :- thread_local xmlns/2. Daniel@0: :- thread_local errors/2. Daniel@0: Daniel@0: %% load_xmlns(+Source,-Doc:list,+Opts:list) is det. Daniel@0: % Daniel@0: % Loads an XML document using the xmlns dialect to handle element names Daniel@0: % that involve namespaces. On exit, Doc is a list of the top level elements Daniel@0: % in the document. Valid options are: Daniel@0: % * ns(-Namespaces:list(pair(atom,url))) Daniel@0: % On exit, Namespaces will contain a list of abbreviation-URL pairs for Daniel@0: % all the namespaces declared in the document. Daniel@0: % * errors(-Errors:list(pair(oneof([error,warning]),text)) Daniel@0: % On exit, Errors will contain a list of all the errors and warnings Daniel@0: % encountered while parsing the document. Daniel@0: load_xmlns(Source,Doc,Opts) :- Daniel@0: option(ns(Namespaces),Opts,_), Daniel@0: option(errors(Errors),Opts,_), Daniel@0: retractall(xmlns(_,_)), Daniel@0: retractall(errors(_,_)), Daniel@0: load_xml(Source,Doc, Daniel@0: [ dialect(xmlns) Daniel@0: , space(remove) Daniel@0: , call(xmlns,on_xmlns) Daniel@0: , call(error,on_error) Daniel@0: , call(urlns,on_urlns) Daniel@0: , max_errors(-1) Daniel@0: ]), Daniel@0: findall(S-M,errors(S,M),Errors), Daniel@0: findall(N-URL,xmlns(N,URL),Namespaces). Daniel@0: Daniel@0: %% load_xmlns(+Source,-Doc:list) is det. Daniel@0: load_xmlns(Source,Doc) :- Daniel@0: retractall(xmlns(_,_)), Daniel@0: retractall(errors(_,_)), Daniel@0: load_xml(Source,Doc, Daniel@0: [ dialect(xmlns) Daniel@0: , space(remove) Daniel@0: , call(xmlns,on_xmlns) Daniel@0: , call(urlns,on_urlns) Daniel@0: , call(error,on_error) Daniel@0: , max_errors(-1) Daniel@0: ]). Daniel@0: Daniel@0: on_xmlns(Prefix,URL,_) :- Daniel@0: debug(xmlarchive,'XML Namespace: ~w -> ~w',[Prefix,URL]), Daniel@0: ( rdf_current_prefix(Prefix,URL1) Daniel@0: -> debug(xmlarchive,'~w already known as ~w',[Prefix,URL1]), Daniel@0: asserta(xmlns(Prefix,URL)), Daniel@0: ( URL=URL1 -> assert(xmlns(Prefix,URL)) Daniel@0: ; debug(xmlarchive,'*** Namespace CLASH',[]), Daniel@0: assert(xmlns(Prefix,URL)) Daniel@0: ) Daniel@0: ; asserta(xmlns(Prefix,URL)) Daniel@0: ). Daniel@0: Daniel@0: on_urlns(xmlns,xmlns,_) :- !. Daniel@0: on_urlns(URL,Prefix,_) :- Daniel@0: ( xmlns(Prefix,URL) Daniel@0: -> debug(xmlarchive,'URL ~w mapped to ~w',[URL,Prefix]) Daniel@0: ; debug(xmlarchive,'*** on_urlns failed on ~w, ~w',[URL,Prefix]) Daniel@0: ). Daniel@0: Daniel@0: on_error(Severity,Message,_Parser) :- Daniel@0: debug(xmlarchive,'SGML ~w: ~s',[Severity,Message]), Daniel@0: assertz(errors(Severity,Message)). Daniel@0: Daniel@0: Daniel@0: Daniel@0: Daniel@0: %% archive_doc(+File:filename, -Doc:list(xml_element), +Opts:options) is nondet. Daniel@0: % Daniel@0: % Is true when archive File contains a file encoding XML document Doc. Daniel@0: % Valid options are: Daniel@0: % * name(-Name:atom) Daniel@0: % On exit, Name will contain the name of the file in the archive that Daniel@0: % was parsed to produce Doc. Daniel@0: % * ns(-NS:list(pair(atom,url))) Daniel@0: % On exit, NS will contain a list of namespaces used in the document. Daniel@0: archive_doc(File,Doc,Opts) :- Daniel@0: select_option(name(Name),Opts,Opts1,_), Daniel@0: with_archive_stream(File,Name,Doc+\S^load_xmlns(S,Doc,Opts1)). Daniel@0: Daniel@0: archive_stats(File) :- Daniel@0: nl, Daniel@0: with_status_line( with_archive( File, Daniel@0: map_archive_streams(load_and_count,0-e(0,0,[]),T-e(N,M,L)))), Daniel@0: format('Compiling statistics...\n',[]), Daniel@0: aggregate(count, Errs^Msg^errors_error(L,warning,Msg), NumWarnings), Daniel@0: once(aggregate(count, Errs^Msg^errors_error(L,error,Msg), NumErrors); NumErrors=0), Daniel@0: setof(Msg, Sev^errors_error(L,Sev,Msg), Msgs), Daniel@0: length(Msgs,NumMessageTypes), Daniel@0: nl, Daniel@0: format(' Number of loaded files: ~d\n',[T]), Daniel@0: format('Number of files with problems: ~d\n',[N]), Daniel@0: format(' Total number of problems: ~d\n',[M]), Daniel@0: format(' Number of errors: ~d\n',[NumErrors]), Daniel@0: format(' Number of warnings: ~d\n',[NumWarnings]), Daniel@0: format(' Number of distinct messages: ~d\n',[NumMessageTypes]). Daniel@0: Daniel@0: errors_error(L,Sev,Msg) :- Daniel@0: member(_-Errs,L), Daniel@0: member(Sev-Msg,Errs). Daniel@0: Daniel@0: load_and_count(Name,Stream,I1-E1,I2-E2) :- Daniel@0: succ(I1,I2), Daniel@0: status('Loading: ~d - ~s',[I2,Name]), Daniel@0: load_xmlns(Stream,_,[errors(Errors)]), Daniel@0: ( Errors=[] -> E2=E1 Daniel@0: ; length(Errors,DM), Daniel@0: E1=e(N1,M1,L1), E2=e(N2,M2,L2), Daniel@0: N2 is N1 + 1, M2 is M1 + DM, Daniel@0: L2=[Name-Errors|L1] Daniel@0: ). Daniel@0: Daniel@0: % ---------- General archive handling stuff --------- Daniel@0: Daniel@0: Daniel@0: %% with_archive_stream(+File:text,?Name:atom,+Goal:pred(+A:archive) is nondet. Daniel@0: % Daniel@0: % Unifies Name with the name of an entry in archive File, and calls Goal Daniel@0: % as call(Goal,Stream), where Stream is available for reading the entry. Daniel@0: % Runs through all available entries on backtracking. Daniel@0: with_archive_stream(File,Name,Goal) :- Daniel@0: with_archive(File, with_stream_in_archive(Name,Goal)). Daniel@0: Daniel@0: Daniel@0: with_archive_file(File,Name,Goal) :- Daniel@0: with_archive(File, with_file_in_archive(Name,Goal)). Daniel@0: Daniel@0: with_file_in_archive(Name,Goal,Archive) :- Daniel@0: archive_entry_name(Archive,Name), Daniel@0: archive_header_property(Archive,filetype(file)), Daniel@0: call(Goal,Archive). Daniel@0: Daniel@0: with_stream_in_archive(Name,Goal,Archive) :- Daniel@0: archive_entry_name(Archive,Name), Daniel@0: archive_header_property(Archive,filetype(file)), Daniel@0: setup_call_cleanup( Daniel@0: archive_open_entry(Archive,Stream), Daniel@0: call(Goal,Stream), Daniel@0: close(Stream)). Daniel@0: Daniel@0: archive_entry_name(Archive,Name) :- var(Name), !, Daniel@0: catch(( repeat, Daniel@0: (archive_next_header(Archive,Name) -> true; throw(nomore)) Daniel@0: ), nomore,fail). Daniel@0: Daniel@0: archive_entry_name(Archive,Name) :- Daniel@0: archive_next_header(Archive,Name). Daniel@0: Daniel@0: map_archive_entries(Goal,S1,S3,Archive) :- Daniel@0: ( archive_next_header(Archive,Name) Daniel@0: -> archive_header_property(Archive,filetype(Type)), Daniel@0: call(Goal,Archive,Type,Name,S1,S2), Daniel@0: map_archive_entries(Goal,S2,S3,Archive) Daniel@0: ; S1=S3 Daniel@0: ). Daniel@0: Daniel@0: map_archive_streams(Goal,S1,S2,Archive) :- Daniel@0: map_archive_entries( call_with_archive_stream(Goal), S1,S2,Archive). Daniel@0: Daniel@0: call_with_archive_stream(Goal,Archive,file,Name,S1,S2) :- !, Daniel@0: with_current_entry_stream(Archive,Stream, call(Goal,Name,Stream,S1,S2)). Daniel@0: call_with_archive_stream(_,_,_,S1,S1). Daniel@0: Daniel@0: with_current_entry_stream(Archive,Stream,Goal) :- Daniel@0: setup_call_cleanup( Daniel@0: archive_open_entry(Archive,Stream), Goal, Daniel@0: close(Stream)). Daniel@0: Daniel@0: Daniel@0: with_archive(File,Goal) :- Daniel@0: setup_call_cleanup( Daniel@0: archive_open(File,A,[]), call(Goal,A), Daniel@0: archive_close(A)).