Chris@0: /* $Id$ Chris@0: Chris@0: Part of SWI-Prolog Chris@0: Chris@0: Author: Jan Wielemaker Chris@0: E-mail: jan@swi.psy.uva.nl Chris@0: WWW: http://www.swi-prolog.org Chris@0: Copyright (C): 1985-2004, University of Amsterdam Chris@0: Chris@0: This program is free software; you can redistribute it and/or Chris@0: modify it under the terms of the GNU General Public License Chris@0: as published by the Free Software Foundation; either version 2 Chris@0: of the License, or (at your option) any later version. Chris@0: Chris@0: This program is distributed in the hope that it will be useful, Chris@0: but WITHOUT ANY WARRANTY; without even the implied warranty of Chris@0: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Chris@0: GNU General Public License for more details. Chris@0: Chris@0: You should have received a copy of the GNU Lesser General Public Chris@0: License along with this library; if not, write to the Free Software Chris@0: Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Chris@0: Chris@0: As a special exception, if you link this library with other files, Chris@0: compiled with a Free Software compiler, to produce an executable, this Chris@0: library does not by itself cause the resulting executable to be covered Chris@0: by the GNU General Public License. This exception does not however Chris@0: invalidate any other reasons why the executable file might be covered by Chris@0: the GNU General Public License. Chris@0: */ Chris@0: Chris@0: :- module(serql_rdf_turtle, Chris@0: [ rdf_load_turtle_file/3 % +File, -Triples, +Options Chris@0: ]). Chris@0: :- use_module(library(assoc)). Chris@0: :- use_module(library(option)). Chris@0: :- use_module(library('semweb/rdf_db')). Chris@0: :- use_module(library(debug)). Chris@0: :- use_module(library(url)). Chris@0: Chris@0: /* NOTE: must be changed to load library('semweb/rdf_turtle'))!!! Chris@0: */ Chris@0: Chris@0: Chris@0: /** Turtle - Terse RDF Triple Language Chris@0: Chris@0: This module implements the Turtle language for representing the RDF Chris@0: triple model as defined by Dave Beckett from the Institute for Learning Chris@0: and Research Technology University of Bristol in the document: Chris@0: Chris@0: * http://www.ilrt.bris.ac.uk/discovery/2004/01/turtle/ Chris@0: Chris@0: The current parser handles all positive and negative examples provided Chris@0: by the above document at october 17, 2004. Chris@0: Chris@0: @tbd * Much better error handling Chris@0: * Write turtle data Chris@0: */ Chris@0: Chris@0: %% rdf_load_turtle_file(+Input, -Triples, +Options) Chris@0: % Chris@0: % Read a stream or file into a set of triples of the format Chris@0: % Chris@0: % rdf(Subject, Predicate, Object) Chris@0: % Chris@0: % The representation is consistent with the SWI-Prolog RDF/XML Chris@0: % and ntriples parsers. Provided options are: Chris@0: % Chris@0: % * base_uri(+BaseURI) Chris@0: % Initial base URI. Defaults to file:// for loading Chris@0: % files. Chris@0: % Chris@0: % * anon_prefix(+Prefix) Chris@0: % Blank nodes are generated as 1, 2, etc. Chris@0: % If Prefix is not an atom blank nodes are generated as Chris@0: % node(1), node(2), ... Chris@0: Chris@0: rdf_load_turtle_file(stream(In), Triples, Options) :- !, Chris@0: option(anon_prefix(Prefix), Options, '__bnode'), Chris@0: load_turtle(In, [], Prefix, Triples). Chris@0: rdf_load_turtle_file(RelFile, Triples, Options) :- Chris@0: absolute_file_name(RelFile, File), Chris@0: atom_concat('file://', File, FileURI), Chris@0: atom_concat('__', FileURI, DefAnonPrefix), Chris@0: option(anon_prefix(Prefix), Options, DefAnonPrefix), Chris@0: option(base_uri(BaseURI), Options, FileURI), Chris@0: open(File, read, In, [encoding(utf8)]), Chris@0: call_cleanup(load_turtle(In, BaseURI, Prefix, Triples), Chris@0: close(In)). Chris@0: Chris@0: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Chris@0: The parser is a two-stage processor. The first reads the raw file input Chris@0: and generates a list of tokens, stripping comments and white space. It Chris@0: is defined to read a single statement upto its terminating '.'. The Chris@0: second stage is a traditional DCG parser generating the triples for the Chris@0: statement. Chris@0: Chris@0: State: Chris@0: arg(1) BaseURI Chris@0: arg(2) Prefix --> URI map Chris@0: arg(3) NodeID --> URI map Chris@0: arg(4) AnonPrefix Chris@0: arg(5) AnonCount Chris@0: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ Chris@0: Chris@0: Chris@0: load_turtle(In, BaseURI, Prefix, Triples) :- Chris@0: empty_assoc(Map), Chris@0: empty_assoc(NodeMap), Chris@0: State = state(BaseURI, Map, NodeMap, Prefix, 1), Chris@0: phrase(turtle_file(State, In), Triples). Chris@0: Chris@0: Chris@0: turtle_file(State, In) --> Chris@0: { ( turtle_tokens(In, Tokens) Chris@0: -> debug(turtle, 'Tokens: ~w~n', [Tokens]) Chris@0: ; syntax_error(In, illegal_token) Chris@0: ) Chris@0: }, Chris@0: ( { Tokens == end_of_file } Chris@0: -> [] Chris@0: ; { phrase(triples(State, Triples), Tokens) } Chris@0: -> Triples, Chris@0: turtle_file(State, In) Chris@0: ; { syntax_error(In, cannot_parse) Chris@0: } Chris@0: ). Chris@0: Chris@0: triples(State, []) --> Chris@0: [ '@', name(prefix), name(Prefix), : ], !, Chris@0: uri(State, URI), Chris@0: { arg(2, State, Map0), Chris@0: put_assoc(Prefix, Map0, URI, Map), Chris@0: setarg(2, State, Map) Chris@0: }. Chris@0: triples(State, []) --> Chris@0: [ '@', name(prefix), ':' ], !, Chris@0: uri(State, URI), Chris@0: { setarg(1, State, URI) Chris@0: }. Chris@0: triples(State, Triples) --> Chris@0: subject(State, Subject, Triples, T), Chris@0: ( predicate_object_list(State, Subject, T, []) Chris@0: -> [] Chris@0: ; { Triples \== T } % [ p o ; ... ] . Chris@0: -> { T = [] } Chris@0: ). Chris@0: Chris@0: subject(State, Subject, T, T) --> Chris@0: resource(State, Subject), !. Chris@0: subject(State, Subject, T0, T) --> Chris@0: blank(State, Subject, T0, T). Chris@0: Chris@0: predicate_object_list(State, Subject, Triples, Tail) --> Chris@0: verb(State, Predicate), Chris@0: object_list(State, Subject, Predicate, Triples, Tail0), Chris@0: ( [';'] Chris@0: -> opt_predicate_object_list(State, Subject, Tail0, Tail) Chris@0: ; {Tail0 = Tail} Chris@0: ). Chris@0: Chris@0: opt_predicate_object_list(State, Subject, Triples, Tail) --> Chris@0: predicate_object_list(State, Subject, Triples, Tail), !. Chris@0: opt_predicate_object_list(_, _, Tail, Tail) --> Chris@0: []. Chris@0: Chris@0: object_list(State, Subject, Predicate, Chris@0: [rdf(Subject, Predicate, Object)|T0], T) --> Chris@0: object(State, Object, T0, T1), Chris@0: ( [','] Chris@0: -> object_list(State, Subject, Predicate, T1, T) Chris@0: ; {T1 = T} Chris@0: ). Chris@0: Chris@0: verb(_, P) --> Chris@0: [name(a)], !, Chris@0: { rdf_equal(rdf:type, P) Chris@0: }. Chris@0: verb(State, P) --> Chris@0: resource(State, P). Chris@0: Chris@0: object(State, Object, T, T) --> Chris@0: [ literal(Value) ], !, Chris@0: { mk_object(Value, State, Object) Chris@0: }. Chris@0: object(_, literal(type(Type, N)), T, T) --> Chris@0: [ numeric(Tp, Codes) ], !, Chris@0: { numeric_url(Tp, Type), Chris@0: atom_codes(N, Codes) Chris@0: }. Chris@0: object(State, Object, T, T) --> Chris@0: resource(State, Object), !. Chris@0: object(State, Object, T0, T) --> Chris@0: blank(State, Object, T0, T), !. Chris@0: object(_, _, _, _) --> Chris@0: rest(Tokens), Chris@0: { format(user_error, 'Expected object, found: ~p~n', [Tokens]), Chris@0: fail Chris@0: }. Chris@0: Chris@0: rest(Rest, Rest, []). Chris@0: Chris@0: term_expansion(numeric_url(I, Local), Chris@0: numeric_url(I, URI)) :- Chris@0: rdf_global_id(Local, URI). Chris@0: Chris@0: numeric_url(integer, xsd:integer). Chris@0: numeric_url(decimal, xsd:decimal). Chris@0: numeric_url(double, xsd:double). Chris@0: Chris@0: resource(State, URI) --> Chris@0: uri(State, URI), !. Chris@0: resource(State, URI) --> Chris@0: [ :(Name) ], !, Chris@0: { arg(1, State, Base), Chris@0: atom_concat(Base, Name, URI) Chris@0: }. Chris@0: resource(State, URI) --> Chris@0: [ name(Prefix), : ], !, Chris@0: { arg(2, State, Map), Chris@0: get_assoc(Prefix, Map, URI) Chris@0: }. Chris@0: resource(State, URI) --> Chris@0: [ Prefix:Name ], !, Chris@0: { arg(2, State, Map), Chris@0: ( get_assoc(Prefix, Map, Base) Chris@0: -> atom_concat(Base, Name, URI) Chris@0: ; throw(error(existence_error(prefix, Prefix), _)) Chris@0: ) Chris@0: }. Chris@0: resource(State, BaseURI) --> Chris@0: [ : ], !, Chris@0: { arg(1, State, BaseURI) Chris@0: }. Chris@0: Chris@0: Chris@0: uri(State, URI) --> Chris@0: [ relative_uri(Rel) Chris@0: ], Chris@0: { arg(1, State, Base), Chris@0: ( Rel == '' % must be in global_url? Chris@0: -> URI = Base Chris@0: ; global_url(Rel, Base, URI) Chris@0: ) Chris@0: }. Chris@0: Chris@0: blank(State, Resource, T, T) --> Chris@0: [ nodeId(NodeId) ], !, Chris@0: { arg(3, State, IdMap), Chris@0: ( get_assoc(NodeId, IdMap, Resource) Chris@0: -> true Chris@0: ; anonid(State, NodeId, Resource), Chris@0: put_assoc(NodeId, IdMap, Resource, NewIdMap), Chris@0: setarg(3, State, NewIdMap) Chris@0: ) Chris@0: }. Chris@0: blank(State, Resource, T, T) --> Chris@0: [ '[', ']' ], !, Chris@0: { anonid(State, Resource) Chris@0: }. Chris@0: blank(State, Resource, T0, T) --> Chris@0: [ '[' ], !, Chris@0: { anonid(State, Resource) Chris@0: }, Chris@0: predicate_object_list(State, Resource, T0, T), Chris@0: [ ']' ]. Chris@0: blank(State, Resource, T0, T) --> Chris@0: [ '(' ], Chris@0: item_list(State, Resource, T0, T). Chris@0: Chris@0: item_list(_State, Resource, T, T) --> Chris@0: [ ')' ], !, Chris@0: { rdf_equal(rdf:nil, Resource) Chris@0: }. Chris@0: item_list(State, Resource, T0, T) --> Chris@0: { anonid(State, Resource) }, Chris@0: object(State, Object, T0, T1), Chris@0: { rdf_equal(rdf:first, First), Chris@0: rdf_equal(rdf:rest, Rest), Chris@0: T1 = [ rdf(Resource, First, Object), Chris@0: rdf(Resource, Rest, Tail) Chris@0: | T2 Chris@0: ] Chris@0: }, Chris@0: item_list(State, Tail, T2, T). Chris@0: Chris@0: Chris@0: anonid(State, Node) :- Chris@0: arg(4, State, AnonPrefix), Chris@0: arg(5, State, Count), Chris@0: ( atom(AnonPrefix) Chris@0: -> atom_concat(AnonPrefix, Count, Node) Chris@0: ; Node = node(Count) Chris@0: ), Chris@0: C2 is Count + 1, Chris@0: setarg(5, State, C2). Chris@0: Chris@0: anonid(State, _NodeId, Node) :- Chris@0: arg(4, State, AnonPrefix), Chris@0: atom(AnonPrefix), !, Chris@0: anonid(State, Node). Chris@0: anonid(_State, NodeId, node(NodeId)). Chris@0: Chris@0: mk_object(type(Prefix:Name, Value), State, literal(type(Type, Value))) :- !, Chris@0: arg(2, State, Map), Chris@0: get_assoc(Prefix, Map, Base), Chris@0: atom_concat(Base, Name, Type). Chris@0: mk_object(type(relative_uri(Rel), Value), State, literal(type(Type, Value))) :- !, Chris@0: arg(1, State, Base), Chris@0: ( Rel == '' % must be in global_url? Chris@0: -> Type = Base Chris@0: ; global_url(Rel, Base, Type) Chris@0: ). Chris@0: mk_object(type(:(Name), Value), State, literal(type(Type, Value))) :- !, Chris@0: arg(1, State, Base), Chris@0: atom_concat(Base, Name, Type). Chris@0: mk_object(Value, _State, literal(Value)). Chris@0: Chris@0: Chris@0: /******************************* Chris@0: * TOKENISER * Chris@0: *******************************/ Chris@0: Chris@0: %% turtle_tokens(+In, -List) Chris@0: % Chris@0: % Read a statement from a turtle file, returning the contents as a Chris@0: % list of tokens. Chris@0: Chris@0: turtle_tokens(In, List) :- Chris@0: get_code(In, C0), Chris@0: turtle_token(C0, In, C1, Tok1), Chris@0: ( Tok1 == end_of_file Chris@0: -> List = end_of_file Chris@0: ; List = [Tok1|Tokens], Chris@0: turtle_tokens(C1, In, Tokens) Chris@0: ). Chris@0: Chris@0: turtle_tokens(C0, In, List) :- Chris@0: ( turtle_token(C0, In, C1, H) Chris@0: -> debug(turtle(token), 'Token: ~q', [H]) Chris@0: ; syntax_error(In, illegal_token) Chris@0: ), Chris@0: ( H == '.' Chris@0: -> List = [] Chris@0: ; H == end_of_file Chris@0: -> syntax_error(In, unexpected_end_of_input) Chris@0: ; List = [H|T], Chris@0: turtle_tokens(C1, In, T) Chris@0: ). Chris@0: Chris@0: turtle_token(-1, _, -1, end_of_file) :- !. Chris@0: turtle_token(0'., In, C, '.') :- !, Chris@0: get_code(In, C). Chris@0: turtle_token(0'#, In, C, Token) :- !, Chris@0: get_code(In, C1), Chris@0: skip_line(C1, In, C2), Chris@0: turtle_token(C2, In, C, Token). Chris@0: turtle_token(WS, In, C, Token) :- Chris@0: turtle_ws(WS), !, Chris@0: get_code(In, C1), Chris@0: turtle_token(C1, In, C, Token). Chris@0: turtle_token(C0, In, C, Number) :- Chris@0: between(0'0, 0'9, C0), !, Chris@0: turtle_number(C0, In, C, Number). Chris@0: turtle_token(0'-, In, C, Number) :- !, Chris@0: turtle_number(0'-, In, C, Number). Chris@0: turtle_token(0'+, In, C, Number) :- !, Chris@0: turtle_number(0'+, In, C, Number). Chris@0: turtle_token(0'", In, C, Literal) :- !, Chris@0: get_code(In, C1), Chris@0: turtle_string(C1, In, C2, Codes), Chris@0: atom_codes(Atom, Codes), Chris@0: ( C2 == 0'@ Chris@0: -> get_code(In, C3), Chris@0: language(C3, In, C, LangCodes), Chris@0: atom_codes(LangId, LangCodes), Chris@0: Literal = literal(lang(LangId, Atom)) Chris@0: ; C2 == 0'^, Chris@0: peek_code(In, 0'^) Chris@0: -> get_code(In, 0'^), Chris@0: get_code(In, C3), Chris@0: resource_token(C3, In, C, Type), Chris@0: Literal = literal(type(Type, Atom)) Chris@0: ; C = C2, Chris@0: Literal = literal(Atom) Chris@0: ). Chris@0: turtle_token(0'_, In, C, nodeId(NodeID)) :- Chris@0: peek_code(In, 0':), !, Chris@0: get_code(In, _), Chris@0: get_code(In, C1), Chris@0: name(C1, In, C, NodeID). Chris@0: turtle_token(0'<, In, C, URI) :- !, Chris@0: resource_token(0'<, In, C, URI). Chris@0: turtle_token(0':, In, C, URI) :- !, Chris@0: resource_token(0':, In, C, URI). Chris@0: turtle_token(C0, In, C, Token) :- Chris@0: name(C0, In, C1, Name), !, Chris@0: ( C1 == 0':, Chris@0: \+ sub_atom(Name, 0, _, _, '_'), Chris@0: peek_code(In, C2), Chris@0: name_start_char(C2) Chris@0: -> get_code(In, C2), Chris@0: name(C2, In, C, Name2), Chris@0: Token = (Name:Name2) Chris@0: ; Token = name(Name), Chris@0: C = C1 Chris@0: ). Chris@0: turtle_token(Punct, In, C, P) :- Chris@0: punctuation(Punct, P), !, Chris@0: get_code(In, C). Chris@0: Chris@0: %% turtle_number(+Char0, +In, -CharNext, -Value) Chris@0: % Chris@0: % Value is Type:CodeList Chris@0: Chris@0: turtle_number(0'-, In, CN, numeric(T, [0'-|Codes])) :- !, Chris@0: get_code(In, C0), Chris@0: turtle_number_nn(C0, In, CN, numeric(T, Codes)). Chris@0: turtle_number(0'+, In, CN, numeric(T, [0'+|Codes])) :- !, Chris@0: get_code(In, C0), Chris@0: turtle_number_nn(C0, In, CN, numeric(T, Codes)). Chris@0: turtle_number(C0, In, CN, Value) :- Chris@0: turtle_number_nn(C0, In, CN, Value). Chris@0: Chris@0: turtle_number_nn(C, In, CN, numeric(Type, Codes)) :- Chris@0: turtle_integer_codes(C, In, CN0, Codes, T0), % [0-9]+ Chris@0: ( CN0 == 0'. Chris@0: -> T0 = [CN0|T1], Chris@0: get_code(In, C1), Chris@0: turtle_integer_codes(C1, In, CN1, T1, T2), % [0-9]+.[0-9]+ Chris@0: ( exponent(CN1, In, CN, T2) Chris@0: -> Type = double Chris@0: ; CN = CN1, Chris@0: T2 = [], Chris@0: Type = decimal Chris@0: ) Chris@0: ; exponent(CN0, In, CN, T0) Chris@0: -> Type = double Chris@0: ; T0 = [], Chris@0: CN = CN0, Chris@0: Type = integer Chris@0: ). Chris@0: Chris@0: turtle_integer_codes(C0, In, CN, [C0|T0], T) :- Chris@0: between(0'0, 0'9, C0), !, Chris@0: get_code(In, C1), Chris@0: turtle_integer_codes(C1, In, CN, T0, T). Chris@0: turtle_integer_codes(CN, _, CN, T, T). Chris@0: Chris@0: exponent(C0, In, CN, [C0|T0]) :- Chris@0: e(C0), !, Chris@0: get_code(In, C1), Chris@0: optional_sign(C1, In, CN0, T0, T1), Chris@0: turtle_integer_codes(CN0, In, CN, T1, []). Chris@0: Chris@0: optional_sign(C0, In, CN, [C0|T], T) :- Chris@0: sign(C0), !, Chris@0: get_code(In, CN). Chris@0: optional_sign(CN, _, CN, T, T). Chris@0: Chris@0: e(0'e). Chris@0: e(0'E). Chris@0: Chris@0: sign(0'-). Chris@0: sign(0'+). Chris@0: Chris@0: % string Chris@0: turtle_string(-1, In, _, []) :- !, Chris@0: syntax_error(In, unexpected_end_of_input). Chris@0: turtle_string(0'", In, C, []) :- !, Chris@0: get_code(In, C). Chris@0: turtle_string(0'\\, In, C, [H|T]) :- !, Chris@0: get_code(In, C1), Chris@0: string_escape(C1, In, C2, H), Chris@0: turtle_string(C2, In, C, T). Chris@0: turtle_string(C0, In, C, [C0|T]) :- Chris@0: get_code(In, C1), Chris@0: turtle_string(C1, In, C, T). Chris@0: Chris@0: Chris@0: string_escape(0'n, In, C, 0'\n) :- !, Chris@0: get_code(In, C). Chris@0: string_escape(0'", In, C, 0'") :- !, Chris@0: get_code(In, C). Chris@0: string_escape(0'\\, In, C, 0'\\) :- !, Chris@0: get_code(In, C). Chris@0: string_escape(0't, In, C, 0'\t) :- !, Chris@0: get_code(In, C). Chris@0: string_escape(0'r, In, C, 0'\r) :- !, Chris@0: get_code(In, C). Chris@0: string_escape(0'u, In, C, Code) :- !, Chris@0: get_hhhh(In, Code), Chris@0: get_code(In, C). Chris@0: string_escape(0'U, In, C, Code) :- !, Chris@0: get_hhhh(In, Code0), Chris@0: get_hhhh(In, Code1), Chris@0: Code is Code0 << 16 + Code1, Chris@0: get_code(In, C). Chris@0: Chris@0: get_hhhh(In, Code) :- Chris@0: get_code(In, C1), code_type(C1, xdigit(D1)), Chris@0: get_code(In, C2), code_type(C2, xdigit(D2)), Chris@0: get_code(In, C3), code_type(C3, xdigit(D3)), Chris@0: get_code(In, C4), code_type(C4, xdigit(D4)), Chris@0: Code is D1<<12+D2<<8+D3<<4+D4. Chris@0: Chris@0: % language: [a-z]+ ('-' [a-z0-9]+ )* Chris@0: language(C0, In, C, [C0|Codes]) :- Chris@0: code_type(C0, lower), Chris@0: get_code(In, C1), Chris@0: lwr_word(C1, In, C2, Codes, Tail), Chris@0: sub_langs(C2, In, C, Tail, []). Chris@0: Chris@0: lwr_word(C0, In, C, [C0|T0], T) :- Chris@0: code_type(C0, lower), !, Chris@0: get_code(In, C1), Chris@0: lwr_word(C1, In, C, T0, T). Chris@0: lwr_word(C, _, C, T, T). Chris@0: Chris@0: sub_langs(0'-, In, C, [0'-, C1|Codes], T) :- !, Chris@0: get_code(In, C1), Chris@0: lwrdig(C1), !, Chris@0: get_code(In, C2), Chris@0: lwrdigs(C2, In, C3, Codes, Tail), Chris@0: sub_langs(C3, In, C, Tail, T). Chris@0: sub_langs(C, _, C, T, T). Chris@0: Chris@0: lwrdig(C) :- Chris@0: code_type(C, lower), !. Chris@0: lwrdig(C) :- Chris@0: code_type(C, digit). Chris@0: Chris@0: lwrdigs(C0, In, C, [C0|T0], T) :- Chris@0: lwrdig(C0), !, Chris@0: get_code(In, C1), Chris@0: lwr_word(C1, In, C, T0, T). Chris@0: lwrdigs(C, _, C, T, T). Chris@0: Chris@0: % resource_token Chris@0: resource_token(0'<, In, C, relative_uri(URI)) :- !, Chris@0: get_code(In, C1), Chris@0: uri_chars(C1, In, C, Codes), Chris@0: atom_codes(URI, Codes). Chris@0: resource_token(0':, In, C, Token) :- !, Chris@0: get_code(In, C0), Chris@0: ( name(C0, In, C, Name) Chris@0: -> Token = :(Name) Chris@0: ; Token = :, Chris@0: C = C0 Chris@0: ). Chris@0: resource_token(C0, In, C, Prefix:Name) :- Chris@0: name(C0, In, C1, Prefix), Chris@0: \+ sub_atom(Prefix, 0, _, _, '_'), !, Chris@0: C1 == 0':, Chris@0: get_code(In, C2), Chris@0: name(C2, In, C, Name). Chris@0: Chris@0: Chris@0: uri_chars(0'>, In, C, []) :- !, Chris@0: get_code(In, C). Chris@0: uri_chars(0'\\, In, C, [H|T]) :- !, Chris@0: get_code(In, C1), Chris@0: string_escape(C1, In, C2, H), Chris@0: uri_chars(C2, In, C, T). Chris@0: uri_chars(C0, In, C, [C0|T]) :- Chris@0: get_code(In, C1), Chris@0: uri_chars(C1, In, C, T). Chris@0: Chris@0: % name Chris@0: name(C0, In, C, Atom) :- Chris@0: name_start_char(C0), Chris@0: get_code(In, C1), Chris@0: name_chars(C1, In, C, T), Chris@0: atom_codes(Atom, [C0|T]). Chris@0: Chris@0: name_chars(C0, In, C, [C0|T]) :- Chris@0: name_char(C0), !, Chris@0: get_code(In, C1), Chris@0: name_chars(C1, In, C, T). Chris@0: name_chars(C, _, C, []). Chris@0: Chris@0: name_start_char(C) :- code_type(C, csymf). Chris@0: name_start_char(C) :- between(0xC0, 0xD6, C). Chris@0: name_start_char(C) :- between(0xD8, 0xF6, C). Chris@0: name_start_char(C) :- between(0xF8, 0x2FF, C). Chris@0: name_start_char(C) :- between(0x370, 0x37D, C). Chris@0: name_start_char(C) :- between(0x37F, 0x1FFF, C). Chris@0: name_start_char(C) :- between(0x200C, 0x200D, C). Chris@0: name_start_char(C) :- between(0x2070, 0x218F, C). Chris@0: name_start_char(C) :- between(0x2C00, 0x2FEF, C). Chris@0: name_start_char(C) :- between(0x3001, 0xD7FF, C). Chris@0: name_start_char(C) :- between(0xF900, 0xFDCF, C). Chris@0: name_start_char(C) :- between(0xFDF0, 0xFFFD, C). Chris@0: name_start_char(C) :- between(0x10000, 0xEFFFF, C). Chris@0: Chris@0: name_char(C) :- name_start_char(C). Chris@0: name_char(0'-). Chris@0: name_char(D) :- code_type(D, digit). Chris@0: name_char(0xB7). Chris@0: name_char(C) :- between(0x0300, 0x036F, C). Chris@0: name_char(C) :- between(0x203F, 0x2040, C). Chris@0: Chris@0: punctuation(0'(, '('). Chris@0: punctuation(0'), ')'). Chris@0: punctuation(0'[, '['). Chris@0: punctuation(0'], ']'). Chris@0: punctuation(0',, ','). Chris@0: punctuation(0'@, '@'). Chris@0: punctuation(0':, ':'). Chris@0: punctuation(0';, ';'). Chris@0: Chris@0: % comment Chris@0: skip_line(0xA, In, C) :- !, Chris@0: get_code(In, C). Chris@0: skip_line(0xD, In, C) :- !, Chris@0: get_code(In, C). Chris@0: skip_line(_, In, C) :- !, Chris@0: get_code(In, C1), Chris@0: skip_line(C1, In, C). Chris@0: Chris@0: % ws Chris@0: turtle_ws(0x9). Chris@0: turtle_ws(0xA). Chris@0: turtle_ws(0xD). Chris@0: turtle_ws(0x20). Chris@0: Chris@0: syntax_error(Stream, Which) :- Chris@0: stream_property(Stream, file_name(File)), Chris@0: line_count(Stream, LineNo), Chris@0: line_position(Stream, LinePos), Chris@0: character_count(Stream, CharIndex), Chris@0: throw(error(syntax_error(Which), Chris@0: file(File, LineNo, LinePos, CharIndex))). Chris@0: Chris@0: Chris@0: /******************************* Chris@0: * HOOK * Chris@0: *******************************/ Chris@0: Chris@0: :- multifile Chris@0: rdf_io:load_triples/3, Chris@0: rdf_io:get_triples/4. Chris@0: Chris@0: rdf_io:load_triples(turtle, Input, Options) :- !, Chris@0: debug(turtle, 'Loading turtle data from ~w', [Input]), Chris@0: rdf_load_turtle_file(Input, Triples, Options), Chris@0: option(base_uri(DB), Options, []), Chris@0: length(Triples, N), Chris@0: debug(turtle, 'Loaded ~D triples into ~w', [N, DB]), Chris@0: assert_triples(Triples, DB). Chris@0: Chris@0: assert_triples([], _). Chris@0: assert_triples([rdf(S,P,O)|T], DB) :- Chris@0: rdf_assert(S,P,O,DB), Chris@0: assert_triples(T, DB). Chris@0: Chris@0: Chris@0: rdf_io:get_triples(turtle, Input, Triples, Options) :- !, Chris@0: debug(turtle, 'Loading turtle data from ~w', [Input]), Chris@0: rdf_load_turtle_file(Input, Triples, Options).