annotate jamendo/sparql-archived/SeRQL/lib/semweb/rdf_history.pl @ 27:d95e683fbd35 tip

Enable CORS on urispace redirects as well
author Chris Cannam
date Tue, 20 Feb 2018 14:52:02 +0000
parents df9685986338
children
rev   line source
Chris@0 1 /* $Id$
Chris@0 2
Chris@0 3 Part of SWI-Prolog
Chris@0 4
Chris@0 5 Author: Jan Wielemaker
Chris@0 6 E-mail: wielemak@science.uva.nl
Chris@0 7 WWW: http://www.swi-prolog.org
Chris@0 8 Copyright (C): 1985-2007, University of Amsterdam
Chris@0 9
Chris@0 10 This program is free software; you can redistribute it and/or
Chris@0 11 modify it under the terms of the GNU General Public License
Chris@0 12 as published by the Free Software Foundation; either version 2
Chris@0 13 of the License, or (at your option) any later version.
Chris@0 14
Chris@0 15 This program is distributed in the hope that it will be useful,
Chris@0 16 but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@0 17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@0 18 GNU General Public License for more details.
Chris@0 19
Chris@0 20 You should have received a copy of the GNU General Public
Chris@0 21 License along with this library; if not, write to the Free Software
Chris@0 22 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Chris@0 23
Chris@0 24 As a special exception, if you link this library with other files,
Chris@0 25 compiled with a Free Software compiler, to produce an executable, this
Chris@0 26 library does not by itself cause the resulting executable to be covered
Chris@0 27 by the GNU General Public License. This exception does not however
Chris@0 28 invalidate any other reasons why the executable file might be covered by
Chris@0 29 the GNU General Public License.
Chris@0 30 */
Chris@0 31
Chris@0 32 :- module(rdf_history,
Chris@0 33 [ rdfh_transaction/1, % :Goal
Chris@0 34 rdfh_assert/3, % +S,+P,+O
Chris@0 35 rdfh_retractall/3, % +S,+P,+O
Chris@0 36 rdfh_update/3, % +S[->NS],+P[->NP],+O[->[NO]
Chris@0 37 rdfh_db_transaction/3, % ?DB, +Condition, ?Transaction
Chris@0 38 rdfh_triple_transaction/2, % +Triple, -Transaction
Chris@0 39 rdfh_transaction_member/2 % ?Action, +Transaction
Chris@0 40 ]).
Chris@0 41 :- use_module(library('http/http_session')).
Chris@0 42 :- use_module(library(lists)).
Chris@0 43 :- use_module(library(record)).
Chris@0 44 :- use_module(library(error)).
Chris@0 45 :- use_module(library(debug)).
Chris@0 46 :- use_module(library('semweb/rdf_persistency')).
Chris@0 47 :- use_module(library('semweb/rdf_db')).
Chris@0 48
Chris@0 49
Chris@0 50 /** <module> RDF Persistent store change history
Chris@0 51
Chris@0 52 This module deals with accessing the journal files of the RDF
Chris@0 53 persistency layer to get insight in the provenance and history of the
Chris@0 54 RDF database. It is designed for Wiki-like collaborative editing of an
Chris@0 55 RDF graph. We make the following assumptions:
Chris@0 56
Chris@0 57 * Users are identified using a URI, typically an OpenID (http://openid.net/)
Chris@0 58 * Triples created by a user are added to a named graph identified by the
Chris@0 59 URI of the user.
Chris@0 60 * Changes are grouped using rdf_transaction(Goal, log(Message, User))
Chris@0 61 * The number that is associated with the named graph of a triple (normally
Chris@0 62 expressing the line number in the source) is used to store the time-stamp.
Chris@0 63 Although this information is redundant (the time stamp is the same as
Chris@0 64 for the transaction), it allows for binary search through the history
Chris@0 65 file for the enclosing transaction.
Chris@0 66
Chris@0 67 @tbd Cleanup thoughts on delete and update.
Chris@0 68
Chris@0 69 @author Jan Wielemaker
Chris@0 70 */
Chris@0 71
Chris@0 72 /*******************************
Chris@0 73 * DECLARATIONS *
Chris@0 74 *******************************/
Chris@0 75
Chris@0 76 :- module_transparent
Chris@0 77 rdfh_transaction/1.
Chris@0 78
Chris@0 79 :- rdf_meta
Chris@0 80 rdfh_assert(r,r,o),
Chris@0 81 rdfh_retractall(r,r,o),
Chris@0 82 rdfh_update(t,t,t).
Chris@0 83
Chris@0 84 :- multifile
Chris@0 85 rdfh_hook/1.
Chris@0 86
Chris@0 87 :- record
Chris@0 88 rdf_transaction(id:integer,
Chris@0 89 nesting:integer,
Chris@0 90 time:number,
Chris@0 91 message,
Chris@0 92 actions:list,
Chris@0 93 other_graphs:list).
Chris@0 94
Chris@0 95
Chris@0 96 /*******************************
Chris@0 97 * MODIFICATIONS *
Chris@0 98 *******************************/
Chris@0 99
Chris@0 100 %% rdfh_transaction(:Goal) is semidet.
Chris@0 101 %
Chris@0 102 % Run Goal using rdf_transaction/2, using information from the HTTP
Chris@0 103 % layer to provide OpenID and session-id.
Chris@0 104
Chris@0 105 rdfh_transaction(Goal) :-
Chris@0 106 rdfh_user(User),
Chris@0 107 transaction_context(Context),
Chris@0 108 rdf_transaction(Goal, log(rdfh([user(User)|Context]), User)).
Chris@0 109
Chris@0 110
Chris@0 111 %% rdfh_assert(+S, +P, +O) is det.
Chris@0 112 %
Chris@0 113 % Assert a triple, adding current user and time to the triple
Chris@0 114 % context.
Chris@0 115
Chris@0 116 rdfh_assert(S,P,O) :-
Chris@0 117 ( rdf_active_transaction(log(rdfh(_), User))
Chris@0 118 -> rdfh_time(Time),
Chris@0 119 rdf_assert(S,P,O,User:Time)
Chris@0 120 ; throw(error(permission_error(assert, triple, rdf(S,P,O)),
Chris@0 121 context(_, 'No rdfh_transaction/1')))
Chris@0 122 ).
Chris@0 123
Chris@0 124
Chris@0 125 %% rdfh_retractall(+S, +P, +O) is det.
Chris@0 126 %
Chris@0 127 % Retract triples that match {S,P,O}. Note that all matching
Chris@0 128 % triples are added to the journal, so we can undo the action as
Chris@0 129 % well as report on retracted triples, even if multiple are
Chris@0 130 % retracted at the same time.
Chris@0 131 %
Chris@0 132 % One of the problems we are faced with is that a retract action
Chris@0 133 % goes into the journal of the user whose triple is retracted,
Chris@0 134 % which may or may not be the one who performed the action.
Chris@0 135
Chris@0 136 rdfh_retractall(S,P,O) :-
Chris@0 137 ( rdf_active_transaction(log(rdfh(_), _User))
Chris@0 138 -> rdf_retractall(S,P,O)
Chris@0 139 ; throw(error(permission_error(retract, triple, rdf(S,P,O)),
Chris@0 140 context(_, 'No rdfh_transaction/1')))
Chris@0 141 ).
Chris@0 142
Chris@0 143
Chris@0 144 %% rdfh_update(+S, +P, +O) is det.
Chris@0 145 %
Chris@0 146 % More tricky stuff, replacing a triple by another. Typically this
Chris@0 147 % will be changing the predicate or object. Provenance info should
Chris@0 148 % move the new triple to the user making the change, surely if the
Chris@0 149 % object is changed. If the predicate is changed to a related
Chris@0 150 % predicate, this actually becomes less obvious.
Chris@0 151 %
Chris@0 152 % Current simple-minded approach is to turn an update into a
Chris@0 153 % retract and assert. The S,P,O specifications are either a ground
Chris@0 154 % value or of the form _Old_ =|->|= _New_. Here is an example:
Chris@0 155 %
Chris@0 156 % ==
Chris@0 157 % rdfh_update(Work, Style, wn:oldstyle -> wn:newstyle)
Chris@0 158 % ==
Chris@0 159
Chris@0 160 rdfh_update(S,P,O) :-
Chris@0 161 ( rdf_active_transaction(log(rdfh(_), User))
Chris@0 162 -> update(S,P,O, rdf(RS, RP, RO), rdf(AS, AP, AO)),
Chris@0 163 must_be(ground, RS),
Chris@0 164 must_be(ground, RP),
Chris@0 165 must_be(ground, RO),
Chris@0 166 rdfh_time(Time),
Chris@0 167 rdf_retractall(RS, RP, RO),
Chris@0 168 rdf_assert(AS, AP, AO, User:Time)
Chris@0 169 ; throw(error(permission_error(retract, triple, rdf(S,P,O)),
Chris@0 170 context(_, 'No rdfh_transaction/1')))
Chris@0 171 ).
Chris@0 172
Chris@0 173 update(Ss, Ps, Os, rdf(S0, P0, O0), rdf(S,P,O)) :-
Chris@0 174 update(Ss, S0, S),
Chris@0 175 update(Ps, P0, P),
Chris@0 176 update(Os, O0, O).
Chris@0 177
Chris@0 178 update(From->To, From, To) :- !.
Chris@0 179 update(Value, Value, Value).
Chris@0 180
Chris@0 181
Chris@0 182 %% transaction_context(-Term) is det.
Chris@0 183 %
Chris@0 184 % Context to pass with an RDF transaction. Note that we pass the
Chris@0 185 % user. We don't need this for simple additions, but we do need it
Chris@0 186 % to track deletions.
Chris@0 187
Chris@0 188 transaction_context(Context) :-
Chris@0 189 ( rdfh_session(Session)
Chris@0 190 -> Context = [session(Session)]
Chris@0 191 ; Context = []
Chris@0 192 ).
Chris@0 193
Chris@0 194 %% rdfh_session(-Session) is semidet.
Chris@0 195 %
Chris@0 196 % Session is a (ground) identifier for the current session.
Chris@0 197
Chris@0 198 rdfh_session(Session) :-
Chris@0 199 rdfh_hook(session(Session)), !.
Chris@0 200 rdfh_session(Session) :-
Chris@0 201 catch(http_session_id(Session), _, fail).
Chris@0 202
Chris@0 203
Chris@0 204 %% rdfh_user(-URI) is det.
Chris@0 205 %
Chris@0 206 % Get user-id of current session.
Chris@0 207 %
Chris@0 208 % @tbd Make hookable, so we can use the SeRQL user/openid hooks
Chris@0 209
Chris@0 210 rdfh_user(User) :-
Chris@0 211 rdfh_hook(user(User)), !.
Chris@0 212 rdfh_user(OpenId) :-
Chris@0 213 http_session_data(openid(OpenId)).
Chris@0 214
Chris@0 215 %% rdfh_time(-Time:integer) is det.
Chris@0 216 %
Chris@0 217 % Get time stamp as integer. Second resolution is enough, and
Chris@0 218 % avoids rounding problems associated with floats.
Chris@0 219
Chris@0 220 rdfh_time(Seconds) :-
Chris@0 221 get_time(Now),
Chris@0 222 Seconds is round(Now).
Chris@0 223
Chris@0 224
Chris@0 225 /*******************************
Chris@0 226 * EXAMINE HISTORY *
Chris@0 227 *******************************/
Chris@0 228
Chris@0 229 %% rdfh_triple_transaction(+Triple:rdf(S,P,O), -Transaction) is nondet.
Chris@0 230 %
Chris@0 231 % True if the (partial) Triple is modified in Transaction.
Chris@0 232
Chris@0 233 rdfh_triple_transaction(rdf(S,P,O), Transaction) :-
Chris@0 234 rdf(S,P,O,DB:Time),
Chris@0 235 After is Time - 1,
Chris@0 236 rdfh_db_transaction(DB, after(After), Transaction),
Chris@0 237 rdfh_transaction_member(assert(S,P,O,Time), Transaction).
Chris@0 238
Chris@0 239 %% rdfh_db_transaction(?DB, +Condition, ?Transaction) is nondet.
Chris@0 240 %
Chris@0 241 % True if Transaction satisfying Condition was executed on DB.
Chris@0 242 % Condition is one of:
Chris@0 243 %
Chris@0 244 % * true
Chris@0 245 % Always true, returns all transactions.
Chris@0 246 % * id(Id)
Chris@0 247 % Specifies the identifier of the transaction. Only makes sense
Chris@0 248 % if DB is specified as transaction identifiers are local to each
Chris@0 249 % DB.
Chris@0 250 % * after(Time)
Chris@0 251 % True if transaction is executed at or after Time.
Chris@0 252 %
Chris@0 253 % @tbd More conditions (e.g. before(Time)).
Chris@0 254
Chris@0 255 rdfh_db_transaction(DB, true, Transaction) :- !,
Chris@0 256 rdf_journal_file(DB, Journal),
Chris@0 257 journal_transaction(Journal, Transaction).
Chris@0 258 rdfh_db_transaction(DB, id(Id), Transaction) :- !,
Chris@0 259 must_be(atom, DB),
Chris@0 260 rdf_journal_file(DB, Journal),
Chris@0 261 open_journal(Journal, Fd),
Chris@0 262 call_cleanup((seek_journal(Fd, id(Id)),
Chris@0 263 read_transaction(Fd, Transaction)),
Chris@0 264 close(Fd)).
Chris@0 265 rdfh_db_transaction(DB, Condition, Transaction) :- !,
Chris@0 266 valid_condition(Condition),
Chris@0 267 rdf_journal_file(DB, Journal),
Chris@0 268 open_journal(Journal, Fd),
Chris@0 269 seek_journal(Fd, Condition),
Chris@0 270 stream_transaction(Fd, Transaction).
Chris@0 271
Chris@0 272 valid_condition(Var) :-
Chris@0 273 var(Var), !,
Chris@0 274 instantiation_error(Var).
Chris@0 275 valid_condition(after(Time)) :- !,
Chris@0 276 must_be(number, Time).
Chris@0 277 valid_condition(Cond) :-
Chris@0 278 type_error(condition, Cond).
Chris@0 279
Chris@0 280 %% open_journal(+File, -Stream) is det.
Chris@0 281 %
Chris@0 282 % Open a journal file. Journal files are always UTF-8 encoded.
Chris@0 283
Chris@0 284 open_journal(JournalFile, Fd) :-
Chris@0 285 open(JournalFile, read, Fd, [encoding(utf8)]).
Chris@0 286
Chris@0 287 %% journal_transaction(+JournalFile, ?Transaction) is nondet.
Chris@0 288 %
Chris@0 289 % True if Transaction is a transaction in JournalFile,
Chris@0 290
Chris@0 291 journal_transaction(JournalFile, Transaction) :-
Chris@0 292 open_journal(JournalFile, Fd),
Chris@0 293 stream_transaction(Fd, Transaction).
Chris@0 294
Chris@0 295 stream_transaction(JFD, Transaction) :-
Chris@0 296 call_cleanup(read_transaction(JFD, Transaction), close(JFD)).
Chris@0 297
Chris@0 298 read_transaction(In, Transaction) :-
Chris@0 299 repeat,
Chris@0 300 read(In, T0),
Chris@0 301 ( T0 == end_of_file
Chris@0 302 -> !, fail
Chris@0 303 ; transaction(T0, In, T), % transaction/3 is not steadfast
Chris@0 304 T = Transaction
Chris@0 305 ).
Chris@0 306
Chris@0 307 transaction(begin(Id, Nest, Time, Msg), In,
Chris@0 308 rdf_transaction(Id, Nest, Time, Msg, Actions, Others)) :- !,
Chris@0 309 read(In, T2),
Chris@0 310 read_transaction_actions(T2, Id, In, Actions, Others).
Chris@0 311 transaction(start(_), _, _) :- !, fail. % Open journal
Chris@0 312 transaction(end(_), _, _) :- !, fail. % Close journal
Chris@0 313 transaction(Action, _, Action). % Action outside transaction?
Chris@0 314
Chris@0 315 read_transaction_actions(end(Id, _, Others), Id, _, [], Others) :- !.
Chris@0 316 read_transaction_actions(end_of_file, _, _, [], []) :- !. % TBD: Incomplete transaction (error)
Chris@0 317 read_transaction_actions(Action, Id, In, Actions, Others) :-
Chris@0 318 ignore_in_transaction(Action), !,
Chris@0 319 read(In, T2),
Chris@0 320 read_transaction_actions(T2, Id, In, Actions, Others).
Chris@0 321 read_transaction_actions(Action, Id, In, [Action|Actions], Others) :-
Chris@0 322 read(In, T2),
Chris@0 323 read_transaction_actions(T2, Id, In, Actions, Others).
Chris@0 324
Chris@0 325 ignore_in_transaction(start(_)).
Chris@0 326 ignore_in_transaction(end(_)).
Chris@0 327 ignore_in_transaction(begin(_,_,_,_)).
Chris@0 328 ignore_in_transaction(end(_,_,_)).
Chris@0 329
Chris@0 330
Chris@0 331 %% seek_journal(+Fd:stream, +Spec) is semidet.
Chris@0 332 %
Chris@0 333 % See an open journal descriptor to the start of a transaction
Chris@0 334 % specified by Spec. Spec is one of:
Chris@0 335 %
Chris@0 336 % * after(Time)
Chris@0 337 % First transaction at or after Time. Fails if there are no
Chris@0 338 % transactions after time.
Chris@0 339 % * id(Id)
Chris@0 340 % Start of transaction labeled with given Id. Fails if there
Chris@0 341 % is no transaction labeled Id.
Chris@0 342 %
Chris@0 343 % The implementation relies on the incrementing identifier numbers
Chris@0 344 % and time-stamps.
Chris@0 345
Chris@0 346 seek_journal(Fd, Spec) :-
Chris@0 347 stream_property(Fd, file_name(File)),
Chris@0 348 size_file(File, Size),
Chris@0 349 Here is Size//2,
Chris@0 350 Last = last(-),
Chris@0 351 ( is_after_spec(Spec)
Chris@0 352 -> ( bsearch_journal(Fd, 0, Here, Size, Spec, Last)
Chris@0 353 -> true
Chris@0 354 ; arg(1, Last, StartOfTerm),
Chris@0 355 StartOfTerm \== (-),
Chris@0 356 seek(Fd, StartOfTerm, bof, _)
Chris@0 357 )
Chris@0 358 ; bsearch_journal(Fd, 0, Here, Size, Spec, Last)
Chris@0 359 ).
Chris@0 360
Chris@0 361 is_after_spec(after(_Time)).
Chris@0 362
Chris@0 363 %% bsearch_journal(+Fd, +Start, +Here, +End, +Spec, !Last) is semidet.
Chris@0 364 %
Chris@0 365 % Perform a binary search in the journal opened as Fd.
Chris@0 366
Chris@0 367 bsearch_journal(Fd, Start, Here, End, Spec, Last) :-
Chris@0 368 start_of_transaction(Fd, Here, StartOfTerm, Begin), !,
Chris@0 369 compare_transaction(Spec, Begin, Diff),
Chris@0 370 ( Diff == (=)
Chris@0 371 -> seek(Fd, StartOfTerm, bof, _)
Chris@0 372 ; Diff == (<)
Chris@0 373 -> NewHere is Start+(Here-Start)//2,
Chris@0 374 NewHere < Here,
Chris@0 375 nb_setarg(1, Last, StartOfTerm),
Chris@0 376 bsearch_journal(Fd, Start, NewHere, Here, Spec, Last)
Chris@0 377 ; NewHere is StartOfTerm+(End-StartOfTerm)//2,
Chris@0 378 NewHere > StartOfTerm,
Chris@0 379 bsearch_journal(Fd, StartOfTerm, NewHere, End, Spec, Last)
Chris@0 380 ).
Chris@0 381 bsearch_journal(Fd, Start, Here, _End, Spec, Last) :-
Chris@0 382 NewHere is Start+(Here-Start)//2,
Chris@0 383 NewHere < Here,
Chris@0 384 bsearch_journal(Fd, Start, NewHere, Here, Spec, Last).
Chris@0 385
Chris@0 386 compare_transaction(id(Id), begin(Id2,_,_,_), Diff) :- !,
Chris@0 387 compare(Diff, Id, Id2).
Chris@0 388 compare_transaction(after(Time), begin(_,_,T,_), Diff) :- !,
Chris@0 389 compare(Diff, Time, T).
Chris@0 390
Chris@0 391 %% start_of_transaction(+Fd, +From, -Start, -Term) is semidet.
Chris@0 392 %
Chris@0 393 % Term is the start term of the first transaction after byte
Chris@0 394 % position From. Fails if no transaction can be found after From.
Chris@0 395
Chris@0 396 start_of_transaction(Fd, From, Start, Term) :-
Chris@0 397 seek(Fd, From, bof, _),
Chris@0 398 skip(Fd, 10),
Chris@0 399 repeat,
Chris@0 400 seek(Fd, 0, current, Start),
Chris@0 401 read(Fd, Term),
Chris@0 402 ( transaction_start(Term)
Chris@0 403 -> !
Chris@0 404 ; Term == end_of_file
Chris@0 405 -> !, fail
Chris@0 406 ; fail
Chris@0 407 ).
Chris@0 408
Chris@0 409 transaction_start(begin(_Id,_Nest,_Time,_Message)).
Chris@0 410
Chris@0 411 %% rdfh_transaction_member(Action, Transaction) is nondet.
Chris@0 412 %
Chris@0 413 % True if Action is an action in Transaction.
Chris@0 414
Chris@0 415 rdfh_transaction_member(Action, Transaction) :-
Chris@0 416 rdf_transaction_actions(Transaction, Actions),
Chris@0 417 member(Action, Actions).