diff cpack/dml/lib/xmlarchive.pl @ 0:718306e29690 tip

commiting public release
author Daniel Wolff
date Tue, 09 Feb 2016 21:05:06 +0100
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cpack/dml/lib/xmlarchive.pl	Tue Feb 09 21:05:06 2016 +0100
@@ -0,0 +1,220 @@
+/* Part of DML (Digital Music Laboratory)
+	Copyright 2014-2015 Samer Abdallah, University of London
+	 
+	This program is free software; you can redistribute it and/or
+	modify it under the terms of the GNU General Public License
+	as published by the Free Software Foundation; either version 2
+	of the License, or (at your option) any later version.
+
+	This program is distributed in the hope that it will be useful,
+	but WITHOUT ANY WARRANTY; without even the implied warranty of
+	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+	GNU General Public License for more details.
+
+	You should have received a copy of the GNU General Public
+	License along with this library; if not, write to the Free Software
+	Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+*/
+
+:- module(xmlarchive, 
+      [  load_xmlns/3
+      ,  load_xmlns/2
+      ,  archive_doc/3
+      ,  map_archive_entries/4
+      ,  with_archive_stream/3
+      ,  with_archive_file/3
+      ,  with_archive/2
+      ,  with_current_entry_stream/3
+      ,  archive_stats/1
+      ,  op(200,fx,@)
+      ]).
+
+/* <module> Provides the ability to read XML documents in an archive
+*/
+
+:- use_module(library(sgml)).
+:- use_module(library(termutils)).
+:- use_module(library(lambda)).
+
+:- meta_predicate map_archive_streams(4,?,?,+),
+                  map_archive_entries(5,?,?,+),
+                  with_archive_stream(+,?,1),
+                  with_archive_file(+,?,1),
+                  with_current_entry_stream(+,-,0),
+                  with_archive(+,1).
+
+:- thread_local xmlns/2.
+:- thread_local errors/2.
+
+%% load_xmlns(+Source,-Doc:list,+Opts:list) is det.
+%
+%  Loads an XML document using the xmlns dialect to handle element names
+%  that involve namespaces. On exit, Doc is a list of the top level elements
+%  in the document. Valid options are:
+%     *  ns(-Namespaces:list(pair(atom,url)))
+%        On exit, Namespaces will contain a list of abbreviation-URL pairs for
+%        all the namespaces declared in the document.
+%     *  errors(-Errors:list(pair(oneof([error,warning]),text))
+%        On exit, Errors will contain a list of all the errors and warnings
+%        encountered while parsing the document.
+load_xmlns(Source,Doc,Opts) :-
+   option(ns(Namespaces),Opts,_),
+   option(errors(Errors),Opts,_),
+   retractall(xmlns(_,_)),
+   retractall(errors(_,_)),
+   load_xml(Source,Doc,
+      [  dialect(xmlns)
+      ,  space(remove)
+      ,  call(xmlns,on_xmlns)
+      ,  call(error,on_error)
+      ,  call(urlns,on_urlns)
+      ,  max_errors(-1)
+      ]), 
+   findall(S-M,errors(S,M),Errors),
+   findall(N-URL,xmlns(N,URL),Namespaces).
+
+%% load_xmlns(+Source,-Doc:list) is det.
+load_xmlns(Source,Doc) :-
+   retractall(xmlns(_,_)),
+   retractall(errors(_,_)),
+   load_xml(Source,Doc,
+      [  dialect(xmlns)
+      ,  space(remove)
+      ,  call(xmlns,on_xmlns)
+      ,  call(urlns,on_urlns)
+      ,  call(error,on_error)
+      ,  max_errors(-1)
+      ]). 
+
+on_xmlns(Prefix,URL,_) :- 
+   debug(xmlarchive,'XML Namespace: ~w -> ~w',[Prefix,URL]),
+   (  rdf_current_prefix(Prefix,URL1)
+   -> debug(xmlarchive,'~w already known as ~w',[Prefix,URL1]),
+      asserta(xmlns(Prefix,URL)),
+      (  URL=URL1 -> assert(xmlns(Prefix,URL))
+      ;  debug(xmlarchive,'*** Namespace CLASH',[]),
+         assert(xmlns(Prefix,URL))
+      )
+   ;  asserta(xmlns(Prefix,URL))
+   ).
+
+on_urlns(xmlns,xmlns,_) :- !.
+on_urlns(URL,Prefix,_) :-
+   (  xmlns(Prefix,URL)
+   -> debug(xmlarchive,'URL ~w mapped to ~w',[URL,Prefix])
+   ;  debug(xmlarchive,'*** on_urlns failed on ~w, ~w',[URL,Prefix])
+   ).
+
+on_error(Severity,Message,_Parser) :-
+   debug(xmlarchive,'SGML ~w: ~s',[Severity,Message]),
+   assertz(errors(Severity,Message)).
+
+
+
+
+%% archive_doc(+File:filename, -Doc:list(xml_element), +Opts:options) is nondet.
+%
+%  Is true when archive File contains a file encoding XML document Doc.
+%  Valid options are:
+%     *  name(-Name:atom)
+%        On exit, Name will contain the name of the file in the archive that
+%        was parsed to produce Doc.
+%     *  ns(-NS:list(pair(atom,url)))
+%        On exit, NS will contain a list of namespaces used in the document.
+archive_doc(File,Doc,Opts) :-
+   select_option(name(Name),Opts,Opts1,_),
+   with_archive_stream(File,Name,Doc+\S^load_xmlns(S,Doc,Opts1)).
+
+archive_stats(File) :-
+   nl,
+   with_status_line( with_archive( File,
+      map_archive_streams(load_and_count,0-e(0,0,[]),T-e(N,M,L)))),
+   format('Compiling statistics...\n',[]),
+   aggregate(count, Errs^Msg^errors_error(L,warning,Msg), NumWarnings),
+   once(aggregate(count, Errs^Msg^errors_error(L,error,Msg), NumErrors); NumErrors=0),
+   setof(Msg, Sev^errors_error(L,Sev,Msg), Msgs),
+   length(Msgs,NumMessageTypes),
+   nl,
+   format('       Number of loaded files: ~d\n',[T]),
+   format('Number of files with problems: ~d\n',[N]),
+   format('     Total number of problems: ~d\n',[M]),
+   format('             Number of errors: ~d\n',[NumErrors]),
+   format('           Number of warnings: ~d\n',[NumWarnings]),
+   format('  Number of distinct messages: ~d\n',[NumMessageTypes]).
+
+errors_error(L,Sev,Msg) :-
+   member(_-Errs,L),
+   member(Sev-Msg,Errs).
+
+load_and_count(Name,Stream,I1-E1,I2-E2) :-
+   succ(I1,I2),
+   status('Loading: ~d - ~s',[I2,Name]),
+   load_xmlns(Stream,_,[errors(Errors)]),
+   (  Errors=[] -> E2=E1
+   ;  length(Errors,DM),
+      E1=e(N1,M1,L1), E2=e(N2,M2,L2),
+      N2 is N1 + 1,   M2 is M1 + DM,
+      L2=[Name-Errors|L1]
+   ).
+
+% ---------- General archive handling stuff ---------
+
+
+%% with_archive_stream(+File:text,?Name:atom,+Goal:pred(+A:archive) is nondet.
+%
+%  Unifies Name with the name of an entry in archive File, and calls Goal
+%  as call(Goal,Stream), where Stream is available for reading the entry.
+%  Runs through all available entries on backtracking.
+with_archive_stream(File,Name,Goal) :-
+   with_archive(File, with_stream_in_archive(Name,Goal)).
+
+
+with_archive_file(File,Name,Goal) :-
+   with_archive(File, with_file_in_archive(Name,Goal)).
+
+with_file_in_archive(Name,Goal,Archive) :-
+   archive_entry_name(Archive,Name),
+   archive_header_property(Archive,filetype(file)),
+   call(Goal,Archive).
+
+with_stream_in_archive(Name,Goal,Archive) :-
+   archive_entry_name(Archive,Name),
+   archive_header_property(Archive,filetype(file)),
+   setup_call_cleanup(
+      archive_open_entry(Archive,Stream),
+      call(Goal,Stream),
+      close(Stream)).
+
+archive_entry_name(Archive,Name) :- var(Name), !,
+   catch((  repeat, 
+            (archive_next_header(Archive,Name) -> true; throw(nomore))
+         ), nomore,fail).
+
+archive_entry_name(Archive,Name) :- 
+   archive_next_header(Archive,Name).
+
+map_archive_entries(Goal,S1,S3,Archive) :-
+   (  archive_next_header(Archive,Name)
+   -> archive_header_property(Archive,filetype(Type)),
+      call(Goal,Archive,Type,Name,S1,S2),
+      map_archive_entries(Goal,S2,S3,Archive)
+   ;  S1=S3
+   ).
+
+map_archive_streams(Goal,S1,S2,Archive) :-
+   map_archive_entries( call_with_archive_stream(Goal), S1,S2,Archive).
+
+call_with_archive_stream(Goal,Archive,file,Name,S1,S2) :- !,
+   with_current_entry_stream(Archive,Stream, call(Goal,Name,Stream,S1,S2)).
+call_with_archive_stream(_,_,_,S1,S1).
+
+with_current_entry_stream(Archive,Stream,Goal) :-
+   setup_call_cleanup(
+      archive_open_entry(Archive,Stream), Goal,
+      close(Stream)).
+
+
+with_archive(File,Goal) :-
+   setup_call_cleanup(
+      archive_open(File,A,[]), call(Goal,A),
+      archive_close(A)).