Mercurial > hg > dml-open-cliopatria
diff cpack/dml/lib/xmlarchive.pl @ 0:718306e29690 tip
commiting public release
author | Daniel Wolff |
---|---|
date | Tue, 09 Feb 2016 21:05:06 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpack/dml/lib/xmlarchive.pl Tue Feb 09 21:05:06 2016 +0100 @@ -0,0 +1,220 @@ +/* Part of DML (Digital Music Laboratory) + Copyright 2014-2015 Samer Abdallah, University of London + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +:- module(xmlarchive, + [ load_xmlns/3 + , load_xmlns/2 + , archive_doc/3 + , map_archive_entries/4 + , with_archive_stream/3 + , with_archive_file/3 + , with_archive/2 + , with_current_entry_stream/3 + , archive_stats/1 + , op(200,fx,@) + ]). + +/* <module> Provides the ability to read XML documents in an archive +*/ + +:- use_module(library(sgml)). +:- use_module(library(termutils)). +:- use_module(library(lambda)). + +:- meta_predicate map_archive_streams(4,?,?,+), + map_archive_entries(5,?,?,+), + with_archive_stream(+,?,1), + with_archive_file(+,?,1), + with_current_entry_stream(+,-,0), + with_archive(+,1). + +:- thread_local xmlns/2. +:- thread_local errors/2. + +%% load_xmlns(+Source,-Doc:list,+Opts:list) is det. +% +% Loads an XML document using the xmlns dialect to handle element names +% that involve namespaces. On exit, Doc is a list of the top level elements +% in the document. Valid options are: +% * ns(-Namespaces:list(pair(atom,url))) +% On exit, Namespaces will contain a list of abbreviation-URL pairs for +% all the namespaces declared in the document. +% * errors(-Errors:list(pair(oneof([error,warning]),text)) +% On exit, Errors will contain a list of all the errors and warnings +% encountered while parsing the document. +load_xmlns(Source,Doc,Opts) :- + option(ns(Namespaces),Opts,_), + option(errors(Errors),Opts,_), + retractall(xmlns(_,_)), + retractall(errors(_,_)), + load_xml(Source,Doc, + [ dialect(xmlns) + , space(remove) + , call(xmlns,on_xmlns) + , call(error,on_error) + , call(urlns,on_urlns) + , max_errors(-1) + ]), + findall(S-M,errors(S,M),Errors), + findall(N-URL,xmlns(N,URL),Namespaces). + +%% load_xmlns(+Source,-Doc:list) is det. +load_xmlns(Source,Doc) :- + retractall(xmlns(_,_)), + retractall(errors(_,_)), + load_xml(Source,Doc, + [ dialect(xmlns) + , space(remove) + , call(xmlns,on_xmlns) + , call(urlns,on_urlns) + , call(error,on_error) + , max_errors(-1) + ]). + +on_xmlns(Prefix,URL,_) :- + debug(xmlarchive,'XML Namespace: ~w -> ~w',[Prefix,URL]), + ( rdf_current_prefix(Prefix,URL1) + -> debug(xmlarchive,'~w already known as ~w',[Prefix,URL1]), + asserta(xmlns(Prefix,URL)), + ( URL=URL1 -> assert(xmlns(Prefix,URL)) + ; debug(xmlarchive,'*** Namespace CLASH',[]), + assert(xmlns(Prefix,URL)) + ) + ; asserta(xmlns(Prefix,URL)) + ). + +on_urlns(xmlns,xmlns,_) :- !. +on_urlns(URL,Prefix,_) :- + ( xmlns(Prefix,URL) + -> debug(xmlarchive,'URL ~w mapped to ~w',[URL,Prefix]) + ; debug(xmlarchive,'*** on_urlns failed on ~w, ~w',[URL,Prefix]) + ). + +on_error(Severity,Message,_Parser) :- + debug(xmlarchive,'SGML ~w: ~s',[Severity,Message]), + assertz(errors(Severity,Message)). + + + + +%% archive_doc(+File:filename, -Doc:list(xml_element), +Opts:options) is nondet. +% +% Is true when archive File contains a file encoding XML document Doc. +% Valid options are: +% * name(-Name:atom) +% On exit, Name will contain the name of the file in the archive that +% was parsed to produce Doc. +% * ns(-NS:list(pair(atom,url))) +% On exit, NS will contain a list of namespaces used in the document. +archive_doc(File,Doc,Opts) :- + select_option(name(Name),Opts,Opts1,_), + with_archive_stream(File,Name,Doc+\S^load_xmlns(S,Doc,Opts1)). + +archive_stats(File) :- + nl, + with_status_line( with_archive( File, + map_archive_streams(load_and_count,0-e(0,0,[]),T-e(N,M,L)))), + format('Compiling statistics...\n',[]), + aggregate(count, Errs^Msg^errors_error(L,warning,Msg), NumWarnings), + once(aggregate(count, Errs^Msg^errors_error(L,error,Msg), NumErrors); NumErrors=0), + setof(Msg, Sev^errors_error(L,Sev,Msg), Msgs), + length(Msgs,NumMessageTypes), + nl, + format(' Number of loaded files: ~d\n',[T]), + format('Number of files with problems: ~d\n',[N]), + format(' Total number of problems: ~d\n',[M]), + format(' Number of errors: ~d\n',[NumErrors]), + format(' Number of warnings: ~d\n',[NumWarnings]), + format(' Number of distinct messages: ~d\n',[NumMessageTypes]). + +errors_error(L,Sev,Msg) :- + member(_-Errs,L), + member(Sev-Msg,Errs). + +load_and_count(Name,Stream,I1-E1,I2-E2) :- + succ(I1,I2), + status('Loading: ~d - ~s',[I2,Name]), + load_xmlns(Stream,_,[errors(Errors)]), + ( Errors=[] -> E2=E1 + ; length(Errors,DM), + E1=e(N1,M1,L1), E2=e(N2,M2,L2), + N2 is N1 + 1, M2 is M1 + DM, + L2=[Name-Errors|L1] + ). + +% ---------- General archive handling stuff --------- + + +%% with_archive_stream(+File:text,?Name:atom,+Goal:pred(+A:archive) is nondet. +% +% Unifies Name with the name of an entry in archive File, and calls Goal +% as call(Goal,Stream), where Stream is available for reading the entry. +% Runs through all available entries on backtracking. +with_archive_stream(File,Name,Goal) :- + with_archive(File, with_stream_in_archive(Name,Goal)). + + +with_archive_file(File,Name,Goal) :- + with_archive(File, with_file_in_archive(Name,Goal)). + +with_file_in_archive(Name,Goal,Archive) :- + archive_entry_name(Archive,Name), + archive_header_property(Archive,filetype(file)), + call(Goal,Archive). + +with_stream_in_archive(Name,Goal,Archive) :- + archive_entry_name(Archive,Name), + archive_header_property(Archive,filetype(file)), + setup_call_cleanup( + archive_open_entry(Archive,Stream), + call(Goal,Stream), + close(Stream)). + +archive_entry_name(Archive,Name) :- var(Name), !, + catch(( repeat, + (archive_next_header(Archive,Name) -> true; throw(nomore)) + ), nomore,fail). + +archive_entry_name(Archive,Name) :- + archive_next_header(Archive,Name). + +map_archive_entries(Goal,S1,S3,Archive) :- + ( archive_next_header(Archive,Name) + -> archive_header_property(Archive,filetype(Type)), + call(Goal,Archive,Type,Name,S1,S2), + map_archive_entries(Goal,S2,S3,Archive) + ; S1=S3 + ). + +map_archive_streams(Goal,S1,S2,Archive) :- + map_archive_entries( call_with_archive_stream(Goal), S1,S2,Archive). + +call_with_archive_stream(Goal,Archive,file,Name,S1,S2) :- !, + with_current_entry_stream(Archive,Stream, call(Goal,Name,Stream,S1,S2)). +call_with_archive_stream(_,_,_,S1,S1). + +with_current_entry_stream(Archive,Stream,Goal) :- + setup_call_cleanup( + archive_open_entry(Archive,Stream), Goal, + close(Stream)). + + +with_archive(File,Goal) :- + setup_call_cleanup( + archive_open(File,A,[]), call(Goal,A), + archive_close(A)).