Mercurial > hg > dml-open-cliopatria
view cpack/dml/lib/xmlarchive.pl @ 0:718306e29690 tip
commiting public release
author | Daniel Wolff |
---|---|
date | Tue, 09 Feb 2016 21:05:06 +0100 |
parents | |
children |
line wrap: on
line source
/* Part of DML (Digital Music Laboratory) Copyright 2014-2015 Samer Abdallah, University of London This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ :- module(xmlarchive, [ load_xmlns/3 , load_xmlns/2 , archive_doc/3 , map_archive_entries/4 , with_archive_stream/3 , with_archive_file/3 , with_archive/2 , with_current_entry_stream/3 , archive_stats/1 , op(200,fx,@) ]). /* <module> Provides the ability to read XML documents in an archive */ :- use_module(library(sgml)). :- use_module(library(termutils)). :- use_module(library(lambda)). :- meta_predicate map_archive_streams(4,?,?,+), map_archive_entries(5,?,?,+), with_archive_stream(+,?,1), with_archive_file(+,?,1), with_current_entry_stream(+,-,0), with_archive(+,1). :- thread_local xmlns/2. :- thread_local errors/2. %% load_xmlns(+Source,-Doc:list,+Opts:list) is det. % % Loads an XML document using the xmlns dialect to handle element names % that involve namespaces. On exit, Doc is a list of the top level elements % in the document. Valid options are: % * ns(-Namespaces:list(pair(atom,url))) % On exit, Namespaces will contain a list of abbreviation-URL pairs for % all the namespaces declared in the document. % * errors(-Errors:list(pair(oneof([error,warning]),text)) % On exit, Errors will contain a list of all the errors and warnings % encountered while parsing the document. load_xmlns(Source,Doc,Opts) :- option(ns(Namespaces),Opts,_), option(errors(Errors),Opts,_), retractall(xmlns(_,_)), retractall(errors(_,_)), load_xml(Source,Doc, [ dialect(xmlns) , space(remove) , call(xmlns,on_xmlns) , call(error,on_error) , call(urlns,on_urlns) , max_errors(-1) ]), findall(S-M,errors(S,M),Errors), findall(N-URL,xmlns(N,URL),Namespaces). %% load_xmlns(+Source,-Doc:list) is det. load_xmlns(Source,Doc) :- retractall(xmlns(_,_)), retractall(errors(_,_)), load_xml(Source,Doc, [ dialect(xmlns) , space(remove) , call(xmlns,on_xmlns) , call(urlns,on_urlns) , call(error,on_error) , max_errors(-1) ]). on_xmlns(Prefix,URL,_) :- debug(xmlarchive,'XML Namespace: ~w -> ~w',[Prefix,URL]), ( rdf_current_prefix(Prefix,URL1) -> debug(xmlarchive,'~w already known as ~w',[Prefix,URL1]), asserta(xmlns(Prefix,URL)), ( URL=URL1 -> assert(xmlns(Prefix,URL)) ; debug(xmlarchive,'*** Namespace CLASH',[]), assert(xmlns(Prefix,URL)) ) ; asserta(xmlns(Prefix,URL)) ). on_urlns(xmlns,xmlns,_) :- !. on_urlns(URL,Prefix,_) :- ( xmlns(Prefix,URL) -> debug(xmlarchive,'URL ~w mapped to ~w',[URL,Prefix]) ; debug(xmlarchive,'*** on_urlns failed on ~w, ~w',[URL,Prefix]) ). on_error(Severity,Message,_Parser) :- debug(xmlarchive,'SGML ~w: ~s',[Severity,Message]), assertz(errors(Severity,Message)). %% archive_doc(+File:filename, -Doc:list(xml_element), +Opts:options) is nondet. % % Is true when archive File contains a file encoding XML document Doc. % Valid options are: % * name(-Name:atom) % On exit, Name will contain the name of the file in the archive that % was parsed to produce Doc. % * ns(-NS:list(pair(atom,url))) % On exit, NS will contain a list of namespaces used in the document. archive_doc(File,Doc,Opts) :- select_option(name(Name),Opts,Opts1,_), with_archive_stream(File,Name,Doc+\S^load_xmlns(S,Doc,Opts1)). archive_stats(File) :- nl, with_status_line( with_archive( File, map_archive_streams(load_and_count,0-e(0,0,[]),T-e(N,M,L)))), format('Compiling statistics...\n',[]), aggregate(count, Errs^Msg^errors_error(L,warning,Msg), NumWarnings), once(aggregate(count, Errs^Msg^errors_error(L,error,Msg), NumErrors); NumErrors=0), setof(Msg, Sev^errors_error(L,Sev,Msg), Msgs), length(Msgs,NumMessageTypes), nl, format(' Number of loaded files: ~d\n',[T]), format('Number of files with problems: ~d\n',[N]), format(' Total number of problems: ~d\n',[M]), format(' Number of errors: ~d\n',[NumErrors]), format(' Number of warnings: ~d\n',[NumWarnings]), format(' Number of distinct messages: ~d\n',[NumMessageTypes]). errors_error(L,Sev,Msg) :- member(_-Errs,L), member(Sev-Msg,Errs). load_and_count(Name,Stream,I1-E1,I2-E2) :- succ(I1,I2), status('Loading: ~d - ~s',[I2,Name]), load_xmlns(Stream,_,[errors(Errors)]), ( Errors=[] -> E2=E1 ; length(Errors,DM), E1=e(N1,M1,L1), E2=e(N2,M2,L2), N2 is N1 + 1, M2 is M1 + DM, L2=[Name-Errors|L1] ). % ---------- General archive handling stuff --------- %% with_archive_stream(+File:text,?Name:atom,+Goal:pred(+A:archive) is nondet. % % Unifies Name with the name of an entry in archive File, and calls Goal % as call(Goal,Stream), where Stream is available for reading the entry. % Runs through all available entries on backtracking. with_archive_stream(File,Name,Goal) :- with_archive(File, with_stream_in_archive(Name,Goal)). with_archive_file(File,Name,Goal) :- with_archive(File, with_file_in_archive(Name,Goal)). with_file_in_archive(Name,Goal,Archive) :- archive_entry_name(Archive,Name), archive_header_property(Archive,filetype(file)), call(Goal,Archive). with_stream_in_archive(Name,Goal,Archive) :- archive_entry_name(Archive,Name), archive_header_property(Archive,filetype(file)), setup_call_cleanup( archive_open_entry(Archive,Stream), call(Goal,Stream), close(Stream)). archive_entry_name(Archive,Name) :- var(Name), !, catch(( repeat, (archive_next_header(Archive,Name) -> true; throw(nomore)) ), nomore,fail). archive_entry_name(Archive,Name) :- archive_next_header(Archive,Name). map_archive_entries(Goal,S1,S3,Archive) :- ( archive_next_header(Archive,Name) -> archive_header_property(Archive,filetype(Type)), call(Goal,Archive,Type,Name,S1,S2), map_archive_entries(Goal,S2,S3,Archive) ; S1=S3 ). map_archive_streams(Goal,S1,S2,Archive) :- map_archive_entries( call_with_archive_stream(Goal), S1,S2,Archive). call_with_archive_stream(Goal,Archive,file,Name,S1,S2) :- !, with_current_entry_stream(Archive,Stream, call(Goal,Name,Stream,S1,S2)). call_with_archive_stream(_,_,_,S1,S1). with_current_entry_stream(Archive,Stream,Goal) :- setup_call_cleanup( archive_open_entry(Archive,Stream), Goal, close(Stream)). with_archive(File,Goal) :- setup_call_cleanup( archive_open(File,A,[]), call(Goal,A), archive_close(A)).