Mercurial > hg > dbtune-rdf-services
view jamendo/sparql-archived/SeRQL/lib/semweb/rdf_library.pl @ 27:d95e683fbd35 tip
Enable CORS on urispace redirects as well
author | Chris Cannam |
---|---|
date | Tue, 20 Feb 2018 14:52:02 +0000 |
parents | df9685986338 |
children |
line wrap: on
line source
/* $Id$ Part of SWI-Prolog Author: Jan Wielemaker E-mail: wielemak@science.uva.nl WWW: http://www.swi-prolog.org Copyright (C): 2007, University of Amsterdam This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA As a special exception, if you link this library with other files, compiled with a Free Software compiler, to produce an executable, this library does not by itself cause the resulting executable to be covered by the GNU General Public License. This exception does not however invalidate any other reasons why the executable file might be covered by the GNU General Public License. */ :- module(rdf_library, [ rdf_attach_library/1, % +Dir rdf_load_library/1, % +Ontology rdf_load_library/2, % +Ontology, +Options rdf_list_library/0, rdf_list_library/1, % +Ontology rdf_list_library/2, % +Ontology, +Options rdf_library_index/2 % ?Id, ?Facet ]). :- use_module(library('semweb/rdf_db')). :- use_module(library('semweb/rdf_turtle')). :- use_module(library(rdf)). :- use_module(library(lists)). :- use_module(library(option)). :- use_module(library(debug)). :- use_module(library(error)). :- use_module(library(pairs)). :- use_module(library(date)). :- use_module(library(url)). :- use_module(library(http/http_open)). :- use_module(library(thread)). /** <module> RDF Library Manager This module manages an ontology library. Such a library consists of a directory with manifest files named =manifest,rdf= or =manifest.ttl= (Turtle). The manifest files define ontologies appearing in the library as well as namespace mnemonics and dependencies. The typical usage scenario is == ?- rdf_attach_library('/some/directory'). ?- rdf_load_library(my_ontology). == @tbd Add caching info @tbd Allow HTTP-hosted repositories @author Jan Wielemaker */ :- rdf_register_ns(lib, 'http://www.swi-prolog.org/rdf/library/'). :- dynamic manifest/2, % Path, Time library_db/3. % Name, URL, Facets % Force compile-time namespace expansion :- rdf_meta edge(+, r,r,o). /******************************* * LOADING * *******************************/ %% rdf_load_library(+Id) is det. %% rdf_load_library(+Id, +Options) is det. % % Load ontologies from the library. A library must first be % attached using rdf_attach_library/1. Defined Options are: % % * import(Bool) % If =true= (default), also load ontologies that are % explicitely imported. % % * base_uri(URI) % BaseURI used for loading RDF. Local definitions in % ontologies overrule this option. % % * claimed_source(URL) % URL from which we claim to have loaded the data. % % * not_found(+Level) % The system does a pre-check for the existence of % all references RDF databases. If Level is =error= % it reports missing databases as an error and fails. % If =warning= it prints them, but continues. If % =silent=, no checks are preformed. Default is =error=. % % * concurrent(Threads) % Perform the load concurrently using N threads. If not % specified, the number is determined by % guess_concurrency/2. % % * load(+Bool) % If =false=, to all the preparation, but do not execute % the actual loading. See also rdf_list_library/2. rdf_load_library(Id) :- rdf_load_library(Id, []). rdf_load_library(Id, Options) :- load_commands(Id, Options, Pairs), pairs_values(Pairs, Commands), list_to_set(Commands, Cmds2), delete_virtual(Cmds2, Cmds3), find_conflicts(Cmds3), check_existence(Cmds3, Cmds, Options), ( option(concurrent(Threads), Options) -> true ; guess_concurrency(Cmds, Threads) ), length(Cmds, NSources), print_message(informational, rdf(loading(NSources, Threads))), ( option(load(true), Options, true) -> concurrent(Threads, Cmds, [ local(2000), % we only need small stacks global(4000), trail(4000) ]) ; true ). delete_virtual([], []). delete_virtual([virtual(_)|T0], T) :- !, delete_virtual(T0, T). delete_virtual([H|T0], [H|T]) :- delete_virtual(T0, T). %% find_conflicts(+LoadCommands) is semidet. % % Find possibly conflicting options for loading the same source find_conflicts(Commands) :- sort(Commands, Cmds), conflicts(Cmds, Conflicts), report_conflics(Conflicts), Conflicts == []. conflicts([], []). conflicts([C1, C2|T0], [C1-C2|T]) :- conflict(C1, C2), !, conflicts([C2|T0], T). conflicts([_|T0], T) :- conflicts(T0, T). conflict(rdf_load(Src, Options1), rdf_load(Src, Options2)) :- sort(Options1, S1), sort(Options2, S2), S1 \== S2. report_conflics([]). report_conflics([C1-C2|T]) :- print_message(warning, rdf(load_conflict(C1,C2))), report_conflics(T). %% check_existence(+CommandsIn, -Commands, +Options) is det. % % Report existence errors. Fail if at least one source does not % exist. and the not_found level is not =silent=. % % @error existence_error(urls, ListOfUrls) check_existence(CommandsIn, Commands, Options) :- option(not_found(Level), Options, error), must_be(oneof([error,warning,silent]), Level), ( Level == silent -> true ; missing_urls(CommandsIn, Commands, Missing), ( Missing == [] -> true ; Level == warning -> report_missing(Missing, Level) ; existence_error(urls, Missing) ) ). missing_urls([], [], []). missing_urls([H|T0], Cmds, Missing) :- H = rdf_load(URL, _), ( exists_url(URL) -> Cmds = [H|T], missing_urls(T0, T, Missing) ; Missing = [URL|T], missing_urls(T0, Cmds, T) ). report_missing([], _). report_missing([H|T], Level) :- print_message(Level, error(existence_error(url, H), _)), report_missing(T, Level). %% guess_concurrency(+Commands, -Threads) is det. % % How much concurrency to use? Set to the number of CPUs if all % input comes from files or 5 if network based loading is % demanded. guess_concurrency(Commands, Threads) :- count_non_file_url(Commands, Count), ( current_prolog_flag(cpu_count, CPUs) -> true ; CPUs = 1 ), Threads is max(CPUs, min(5, Count)). count_non_file_url([], 0). count_non_file_url([rdf_load(URL, _)|T], Count) :- sub_atom(URL, 0, _, _, 'file://'), !, count_non_file_url(T, Count). count_non_file_url([_|T], Count) :- count_non_file_url(T, C0), Count is C0 + 1. %% load_commands(+Id, +Options, -Pairs:list(Level-Command)) is det. % % Commands are the RDF commands to execute for rdf_load_library/2. % Splitting in command collection and execution allows for % concurrent execution as well as forward checking of possible % problems. % % @tbd Fix poor style; avoid assert/retract. :- thread_local command/2. load_commands(Id, Options, Commands) :- retractall(command(_,_)), rdf_update_library_index, dry_load(Id, 1, Options), findall(Level-Cmd, retract(command(Level, Cmd)), Commands). dry_load(Id, Level, Options) :- ( library(Id, File, Facets) -> merge_base_uri(Facets, Options, Options1), merge_source(Facets, Options1, Options2), merge_blanks(Facets, Options2, Options3), ( \+ memberchk(virtual, Facets) -> load_options(Options3, File, RdfOptions), assert(command(Level, rdf_load(File, RdfOptions))) ; assert(command(Level, virtual(File))) ), ( option(import(true), Options, true) -> Level1 is Level + 1, forall(member(imports(_, Import), Facets), import(Import, Level1, Options3)) ; true ) ; existence_error(ontology, Id) ). merge_base_uri(Facets, Options0, Options) :- ( option(base_uri(Base), Facets) -> delete(Options0, base_uri(_), Options1), Options = [base_uri(Base)|Options1] ; Options = Options0 ). merge_source(Facets, Options0, Options) :- ( option(claimed_source(Base), Facets) -> delete(Options0, claimed_source(_), Options1), Options = [claimed_source(Base)|Options1] ; Options = Options0 ). merge_blanks(Facets, Options0, Options) :- ( option(blank_nodes(Share), Facets) -> delete(Options0, blank_nodes(_), Options1), Options = [blank_nodes(Share)|Options1] ; Options = Options0 ). load_options(Options, File, RDFOptions) :- findall(O, load_option(Options, File, O), RDFOptions). load_option(Options, File, db(Source)) :- option(claimed_source(Source0), Options), ( sub_atom(Source0, _, _, 0, /) -> file_base_name(File, Base), atom_concat(Source0, Base, Source) ; atom_concat(Source, #, Source0) -> true ). load_option(Options, File, base_uri(BaseURI)) :- option(base_uri(Base0), Options), sub_atom(/, _, _, 0, Base0), atom_concat(Base0, File, BaseURI). load_option(Options, _File, blank_nodes(Share)) :- option(blank_nodes(Share), Options). %% import(+URL, +Level, +Options) is det. import(Path, Level, Options) :- ( ( library(Id, Path, _) -> true ; manifest_for_path(Path, Manifest), catch(exists_url(Manifest), _, fail) -> process_manifest(Manifest), library(Id, Path, _) ) -> dry_load(Id, Level, Options) ; load_options(Options, Path, RdfOptions), assert(command(Level, rdf_load(Path, RdfOptions))) ). manifest_for_path(URL, Manifest) :- file_directory_name(URL, Parent), manifest_file(Base), rdf_extension(Ext), concat_atom([Parent, /, Base, '.', Ext], Manifest). %% rdf_list_library(+Id) is det. %% rdf_list_library(+Id, +Options) is det. % % Print library dependency tree to the terminal. Options include % options for rdf_load_library/2 and % % * show_source(+Boolean) % If =true= (default), show location we are loading % % * show_graph(+Boolean) % If =true= (default =false=), show name of graph % % * show_virtual(+Boolean) % If =false= (default =true=), do not show virtual % repositories. % % * indent(Atom) % Atom repeated for indentation levels rdf_list_library(Id) :- rdf_list_library(Id, []). rdf_list_library(Id, Options) :- load_commands(Id, Options, Commands), maplist(print_load(Options), Commands). print_load(Options, _Level-virtual(_)) :- option(show_virtual(false), Options), !. print_load(Options, Level-Command) :- option(indent(Indent), Options, '. '), forall(between(2, Level, _), format(Indent)), print_command(Command, Options), format('~N'). print_command(virtual(URL), _Options) :- format('<~w>', [URL]). print_command(rdf_load(URL), Options) :- print_command(rdf_load(URL, []), Options). print_command(rdf_load(URL, RDFOptions), Options) :- ( option(show_source(true), Options, true) -> format('~w', [URL]), ( option(blank_nodes(noshare), RDFOptions) -> format(' <not shared>') ; true ), ( exists_url(URL) -> true ; format(' [NOT FOUND]') ) ; true ), ( option(show_graph(true), Options, false), option(db(Base), RDFOptions) -> format('~N\tSource: ~w', [Base]) ; true ). exists_url(URL) :- rdf_db:rdf_input(URL, Source, _BaseURI), exists_source(Source). exists_source(file(Path)) :- !, access_file(Path, read). exists_source(url(http, URL)) :- !, catch(http_open(URL, Stream, [ method(head) ]), _, fail), close(Stream). %% rdf_list_library % % Prints known RDF library identifiers to current output. rdf_list_library :- rdf_update_library_index, ( rdf_library_index(Id, title(Title)), format('~w ~t~20|~w', [Id, Title]), ( rdf_library_index(Id, version(Version)) -> format(' (version ~w)', [Version]) ; true ), nl, fail ; true ). %% rdf_library_index(?Id, ?Facet) is nondet. % % Query the content of the library. Defined facets are: % % * source(URL) % Location from which to load the ontology % % * title(Atom) % Title used for the ontology % % * comment(Atom) % Additional comments for the ontology % % * version(Atom) % Version information on the ontology % % * imports(Type, URL) % URLs needed by this ontology. May succeed multiple % times. Type is one of =ontology=, =schema= or =instances=. % % * base_uri(BaseURI) % Base URI to use when loading documents. If BaseURI % ends in =|/|=, the actual filename is attached. % % * claimed_source(Source) % URL from which we claim to have loaded the RDF. If % Source ends in =|/|=, the actual filename is % attached. % % * blank_nodes(Share) % Defines how equivalent blank nodes are handled, where % Share is one of =share= or =noshare=. Default is to % share. % % * provides_ns(URL) % Ontology provides definitions in the namespace URL. % The formal definition of this is troublesome, but in % practice it means the ontology has triples whose % subjects are in the given namespace. % % * uses_ns(URL) % The ontology depends on the given namespace. Normally % means it contains triples that have predicates or % objects in the given namespace. % % * manifest(Path) % Manifest file this ontology is defined in % % * virtual % Entry is virtual (cannot be loaded) rdf_library_index(Id, Facet) :- library(Id, Path, Facets), ( Facet = source(Path) ; member(Facet, Facets) ). /******************************* * MANIFEST PROCESSING * *******************************/ %% rdf_attach_library(+Source) % % Attach manifest from Source. Source is one of % % * URL % Load single manifest from this URL % * File % Load single manifest from this file % * Directory % Scan all subdirectories and load all =|Manifest.ttl|= or % =|Manifest.rdf|= found. % % Encountered namespaces are registered using rdf_register_ns/2. % Encountered ontologies are added to the index. If a manifest was % already loaded it will be reloaded if the modification time has % changed. rdf_attach_library(URL) :- atom(URL), is_absolute_url(URL), !, process_manifest(URL). rdf_attach_library(File) :- absolute_file_name(File, Path, [ extensions([rdf,ttl]), access(read), file_errors(fail) ]), !, process_manifest(Path). rdf_attach_library(Dir) :- absolute_file_name(Dir, Path, [ file_type(directory), access(read) ]), attach_dir(Path, []). %% rdf_update_library_index % % Reload all Manifest files. rdf_update_library_index :- forall(manifest(Location, _Time), process_manifest(Location)). attach_dir(Path, Visited) :- memberchk(Path, Visited), !. attach_dir(Path, Visited) :- atom_concat(Path, '/*', Pattern), expand_file_name(Pattern, Members), ( member(Manifest, Members), is_manifest_file(Manifest) -> process_manifest(Manifest) ; print_message(silent, rdf(no_manifest(Path))) ), ( member(Dir, Members), exists_directory(Dir), file_base_name(Dir, Base), \+ hidden_base(Base), attach_dir(Dir, [Path|Visited]), fail ; true ). hidden_base('CVS'). hidden_base('cvs'). % Windows %% process_manifest(+Location) is det. % % Process a manifest file, registering encountered namespaces and % creating clauses for library/3. No op if manifest was loaded and % not changed. Removes old data if the manifest was changed. % % @param Location is either a path name or a URL. process_manifest(Source) :- ( file_name_to_url(Manifest0, Source) -> absolute_file_name(Manifest0, Manifest) ; Manifest = Source ), source_time(Manifest, MT), ( manifest(Manifest, Time), ( MT =< Time -> ! ; retractall(manifest(Manifest, Time)), library_db(Id, URL, Facets), memberchk(manifest(Manifest), Facets), retractall(library_db(Id, URL, Facets)), fail ) ; read_triples(Manifest, Triples), process_triples(Manifest, Triples), print_message(informational, rdf(manifest(loaded, Manifest))), assert(manifest(Manifest, MT)) ). process_triples(Manifest, Triples) :- findall(ns(Mnemonic, NameSpace), extract_namespace(Triples, Mnemonic, NameSpace), NameSpaces), findall(Ontology, extract_ontology(Triples, Ontology), Ontologies), maplist(define_namespace, NameSpaces), maplist(assert_ontology(Manifest), Ontologies). %% extract_namespace(+Triples, -Mnemonic, -NameSpace) % % True if Mnemonic is an abbreviation of NameSpace. extract_namespace(Triples, Mnemonic, Namespace) :- edge(Triples, Decl, lib:mnemonic, literal(Mnemonic)), edge(Triples, Decl, lib:namespace, Namespace). %% extract_ontology(+Triples, -Ontology) is nondet. % % Extract definition of an ontology extract_ontology(Triples, library(Name, URL, Options)) :- edge(Triples, URL, rdf:type, Type), ( ontology_type(Type) -> file_base_name(URL, BaseName), file_name_extension(Name, _, BaseName), findall(Facet, facet(Triples, URL, Facet), Options) ). ontology_type(X) :- ( rdf_equal(X, lib:'Ontology') ; rdf_equal(X, lib:'Schema') ; rdf_equal(X, lib:'Instances') ). %% facet(+Triples, +File, -Facet) is nondet. % % Enumerate facets about File from Triples. Facets are described % with rdf_library_index/2. facet(Triples, File, title(Title)) :- edge(Triples, File, dc:title, literal(Title)). facet(Triples, File, version(Version)) :- edge(Triples, File, owl:versionInfo, literal(Version)). facet(Triples, File, comment(Comment)) :- edge(Triples, File, rdfs:comment, literal(Comment)). facet(Triples, File, base_uri(BaseURI)) :- edge(Triples, File, lib:baseURI, BaseURI). facet(Triples, File, claimed_source(Source)) :- edge(Triples, File, lib:source, Source). facet(Triples, File, blank_nodes(Mode)) :- edge(Triples, File, lib:blankNodes, literal(Mode)), must_be(oneof([share,noshare]), Mode). facet(Triples, File, imports(ontology, Path)) :- edge(Triples, File, owl:imports, Path). facet(Triples, File, imports(schema, Path)) :- edge(Triples, File, lib:schema, Path). facet(Triples, File, imports(instances, Path)) :- edge(Triples, File, lib:instances, Path). facet(Triples, File, provides_ns(NS)) :- edge(Triples, File, lib:providesNamespace, NSDecl), edge(Triples, NSDecl, lib:namespace, NS). facet(Triples, File, uses_ns(NS)) :- edge(Triples, File, lib:usesNamespace, NSDecl), edge(Triples, NSDecl, lib:namespace, NS). facet(Triples, File, virtual) :- edge(Triples, File, rdf:type, lib:'Virtual'). %% edge(+Triples, ?S, ?P, ?O) is nondet. % % Like rdf/3 over a list of Triples. edge(Triples, S, P, O) :- member(rdf(S,P,O), Triples). %% source_time(+Source, -Modified) is semidet. % % Modified is the last modification time of Source. % % @error existence_error(Type, Source). source_time(URL, Modified) :- sub_atom(URL, 0, _, _, 'http://'), !, http_open(URL, Stream, [ header(last_modified, Date), method(head) ]), close(Stream), Date \== '', parse_time(Date, Modified). source_time(URL, Modified) :- file_name_to_url(File, URL), !, time_file(File, Modified). source_time(File, Modified) :- time_file(File, Modified). %% read_triples(+File, -Triples) is det. % % Read RDF/XML or Turtle file into a list of triples. read_triples(File, Triples) :- file_name_extension(_, rdf, File), !, load_rdf(File, Triples). read_triples(File, Triples) :- file_name_extension(_, ttl, File), !, rdf_load_turtle(File, Triples, []). %% is_manifest_file(+Path) % % True if Path is the name of a manifest file. is_manifest_file(Path) :- file_base_name(Path, File), downcase_atom(File, Lwr), file_name_extension(Base, Ext, Lwr), manifest_file(Base), rdf_extension(Ext), !. manifest_file('Manifest'). manifest_file('manifest'). rdf_extension(ttl). rdf_extension(rdf). %% assert_ontology(+Manifest, +Term:library(Name, File, Facets)) is det. % % Add ontology to our library. % % @tbd Proper behaviour of re-definition? assert_ontology(Manifest, Term) :- Term = library(Name, URL, Facets), ( library(Name, _URL2, Facets2) -> memberchk(manifest(Manifest2), Facets2), print_message(warning, rdf(redefined(Manifest, Name, Manifest2))) ; true ), assert(library_db(Name, URL, [ manifest(Manifest) | Facets ])). %% library(?Id, ?URL, ?Facets) % % Access DB for library information. library(Id, URL, Facets) :- nonvar(URL), canonical_url(URL, CanonicalURL), library_db(Id, CanonicalURL, Facets). library(Id, URL, Facets) :- library_db(Id, URL, Facets). %% canonical_url(+URL, -CanonicalURL) is det. % % Translate a URL into a canonical form. Currently deals with % file:// urls to take care of filesystem properies such as being % case insensitive and symbolic names. % % @tbd Generic URL handling should also strip ../, etc. canonical_url(FileURL, URL) :- file_name_to_url(File, FileURL), !, absolute_file_name(File, Abs), file_name_to_url(Abs, URL). canonical_url(URL, URL). %% define_namespace(NS:ns(Mnemonic, Namespace)) is det. % % Add namespace declaration for Mnemonic. define_namespace(ns(Mnemonic, Namespace)) :- debug(rdf_library, 'Adding NS ~w = ~q', [Mnemonic, Namespace]), rdf_register_ns(Mnemonic, Namespace, [ ]). /******************************* * MESSAGES * *******************************/ :- multifile prolog:message/3. prolog:message(rdf(no_manifest(Path))) --> [ 'Directory ~w has no Manifest.{ttl,rdf} file'-[Path] ]. prolog:message(rdf(redefined(Manifest, Name, Manifest2))) --> [ '~w: Ontology ~w already defined in ~w'- [Manifest, Name, Manifest2] ]. prolog:message(rdf(manifest(loaded, Manifest))) --> [ 'Loaded RDF manifest ~w'-[Manifest] ]. prolog:message(rdf(load_conflict(C1, C2))) --> [ 'Conflicting loads: ~p <-> ~p'-[C1, C2] ]. prolog:message(rdf(loading(Files, Threads))) --> [ 'Loading ~D files using ~D threads ...'-[Files, Threads] ].