view cpack/dml/lib/xmlarchive.pl @ 0:718306e29690 tip

commiting public release
author Daniel Wolff
date Tue, 09 Feb 2016 21:05:06 +0100
parents
children
line wrap: on
line source
/* Part of DML (Digital Music Laboratory)
	Copyright 2014-2015 Samer Abdallah, University of London
	 
	This program is free software; you can redistribute it and/or
	modify it under the terms of the GNU General Public License
	as published by the Free Software Foundation; either version 2
	of the License, or (at your option) any later version.

	This program is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
	GNU General Public License for more details.

	You should have received a copy of the GNU General Public
	License along with this library; if not, write to the Free Software
	Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
*/

:- module(xmlarchive, 
      [  load_xmlns/3
      ,  load_xmlns/2
      ,  archive_doc/3
      ,  map_archive_entries/4
      ,  with_archive_stream/3
      ,  with_archive_file/3
      ,  with_archive/2
      ,  with_current_entry_stream/3
      ,  archive_stats/1
      ,  op(200,fx,@)
      ]).

/* <module> Provides the ability to read XML documents in an archive
*/

:- use_module(library(sgml)).
:- use_module(library(termutils)).
:- use_module(library(lambda)).

:- meta_predicate map_archive_streams(4,?,?,+),
                  map_archive_entries(5,?,?,+),
                  with_archive_stream(+,?,1),
                  with_archive_file(+,?,1),
                  with_current_entry_stream(+,-,0),
                  with_archive(+,1).

:- thread_local xmlns/2.
:- thread_local errors/2.

%% load_xmlns(+Source,-Doc:list,+Opts:list) is det.
%
%  Loads an XML document using the xmlns dialect to handle element names
%  that involve namespaces. On exit, Doc is a list of the top level elements
%  in the document. Valid options are:
%     *  ns(-Namespaces:list(pair(atom,url)))
%        On exit, Namespaces will contain a list of abbreviation-URL pairs for
%        all the namespaces declared in the document.
%     *  errors(-Errors:list(pair(oneof([error,warning]),text))
%        On exit, Errors will contain a list of all the errors and warnings
%        encountered while parsing the document.
load_xmlns(Source,Doc,Opts) :-
   option(ns(Namespaces),Opts,_),
   option(errors(Errors),Opts,_),
   retractall(xmlns(_,_)),
   retractall(errors(_,_)),
   load_xml(Source,Doc,
      [  dialect(xmlns)
      ,  space(remove)
      ,  call(xmlns,on_xmlns)
      ,  call(error,on_error)
      ,  call(urlns,on_urlns)
      ,  max_errors(-1)
      ]), 
   findall(S-M,errors(S,M),Errors),
   findall(N-URL,xmlns(N,URL),Namespaces).

%% load_xmlns(+Source,-Doc:list) is det.
load_xmlns(Source,Doc) :-
   retractall(xmlns(_,_)),
   retractall(errors(_,_)),
   load_xml(Source,Doc,
      [  dialect(xmlns)
      ,  space(remove)
      ,  call(xmlns,on_xmlns)
      ,  call(urlns,on_urlns)
      ,  call(error,on_error)
      ,  max_errors(-1)
      ]). 

on_xmlns(Prefix,URL,_) :- 
   debug(xmlarchive,'XML Namespace: ~w -> ~w',[Prefix,URL]),
   (  rdf_current_prefix(Prefix,URL1)
   -> debug(xmlarchive,'~w already known as ~w',[Prefix,URL1]),
      asserta(xmlns(Prefix,URL)),
      (  URL=URL1 -> assert(xmlns(Prefix,URL))
      ;  debug(xmlarchive,'*** Namespace CLASH',[]),
         assert(xmlns(Prefix,URL))
      )
   ;  asserta(xmlns(Prefix,URL))
   ).

on_urlns(xmlns,xmlns,_) :- !.
on_urlns(URL,Prefix,_) :-
   (  xmlns(Prefix,URL)
   -> debug(xmlarchive,'URL ~w mapped to ~w',[URL,Prefix])
   ;  debug(xmlarchive,'*** on_urlns failed on ~w, ~w',[URL,Prefix])
   ).

on_error(Severity,Message,_Parser) :-
   debug(xmlarchive,'SGML ~w: ~s',[Severity,Message]),
   assertz(errors(Severity,Message)).




%% archive_doc(+File:filename, -Doc:list(xml_element), +Opts:options) is nondet.
%
%  Is true when archive File contains a file encoding XML document Doc.
%  Valid options are:
%     *  name(-Name:atom)
%        On exit, Name will contain the name of the file in the archive that
%        was parsed to produce Doc.
%     *  ns(-NS:list(pair(atom,url)))
%        On exit, NS will contain a list of namespaces used in the document.
archive_doc(File,Doc,Opts) :-
   select_option(name(Name),Opts,Opts1,_),
   with_archive_stream(File,Name,Doc+\S^load_xmlns(S,Doc,Opts1)).

archive_stats(File) :-
   nl,
   with_status_line( with_archive( File,
      map_archive_streams(load_and_count,0-e(0,0,[]),T-e(N,M,L)))),
   format('Compiling statistics...\n',[]),
   aggregate(count, Errs^Msg^errors_error(L,warning,Msg), NumWarnings),
   once(aggregate(count, Errs^Msg^errors_error(L,error,Msg), NumErrors); NumErrors=0),
   setof(Msg, Sev^errors_error(L,Sev,Msg), Msgs),
   length(Msgs,NumMessageTypes),
   nl,
   format('       Number of loaded files: ~d\n',[T]),
   format('Number of files with problems: ~d\n',[N]),
   format('     Total number of problems: ~d\n',[M]),
   format('             Number of errors: ~d\n',[NumErrors]),
   format('           Number of warnings: ~d\n',[NumWarnings]),
   format('  Number of distinct messages: ~d\n',[NumMessageTypes]).

errors_error(L,Sev,Msg) :-
   member(_-Errs,L),
   member(Sev-Msg,Errs).

load_and_count(Name,Stream,I1-E1,I2-E2) :-
   succ(I1,I2),
   status('Loading: ~d - ~s',[I2,Name]),
   load_xmlns(Stream,_,[errors(Errors)]),
   (  Errors=[] -> E2=E1
   ;  length(Errors,DM),
      E1=e(N1,M1,L1), E2=e(N2,M2,L2),
      N2 is N1 + 1,   M2 is M1 + DM,
      L2=[Name-Errors|L1]
   ).

% ---------- General archive handling stuff ---------


%% with_archive_stream(+File:text,?Name:atom,+Goal:pred(+A:archive) is nondet.
%
%  Unifies Name with the name of an entry in archive File, and calls Goal
%  as call(Goal,Stream), where Stream is available for reading the entry.
%  Runs through all available entries on backtracking.
with_archive_stream(File,Name,Goal) :-
   with_archive(File, with_stream_in_archive(Name,Goal)).


with_archive_file(File,Name,Goal) :-
   with_archive(File, with_file_in_archive(Name,Goal)).

with_file_in_archive(Name,Goal,Archive) :-
   archive_entry_name(Archive,Name),
   archive_header_property(Archive,filetype(file)),
   call(Goal,Archive).

with_stream_in_archive(Name,Goal,Archive) :-
   archive_entry_name(Archive,Name),
   archive_header_property(Archive,filetype(file)),
   setup_call_cleanup(
      archive_open_entry(Archive,Stream),
      call(Goal,Stream),
      close(Stream)).

archive_entry_name(Archive,Name) :- var(Name), !,
   catch((  repeat, 
            (archive_next_header(Archive,Name) -> true; throw(nomore))
         ), nomore,fail).

archive_entry_name(Archive,Name) :- 
   archive_next_header(Archive,Name).

map_archive_entries(Goal,S1,S3,Archive) :-
   (  archive_next_header(Archive,Name)
   -> archive_header_property(Archive,filetype(Type)),
      call(Goal,Archive,Type,Name,S1,S2),
      map_archive_entries(Goal,S2,S3,Archive)
   ;  S1=S3
   ).

map_archive_streams(Goal,S1,S2,Archive) :-
   map_archive_entries( call_with_archive_stream(Goal), S1,S2,Archive).

call_with_archive_stream(Goal,Archive,file,Name,S1,S2) :- !,
   with_current_entry_stream(Archive,Stream, call(Goal,Name,Stream,S1,S2)).
call_with_archive_stream(_,_,_,S1,S1).

with_current_entry_stream(Archive,Stream,Goal) :-
   setup_call_cleanup(
      archive_open_entry(Archive,Stream), Goal,
      close(Stream)).


with_archive(File,Goal) :-
   setup_call_cleanup(
      archive_open(File,A,[]), call(Goal,A),
      archive_close(A)).