annotate cpack/dml/lib/dataset.pl @ 0:718306e29690 tip

commiting public release
author Daniel Wolff
date Tue, 09 Feb 2016 21:05:06 +0100
parents
children
rev   line source
Daniel@0 1 /* Part of DML (Digital Music Laboratory)
Daniel@0 2 Copyright 2014-2015 Samer Abdallah, University of London
Daniel@0 3
Daniel@0 4 This program is free software; you can redistribute it and/or
Daniel@0 5 modify it under the terms of the GNU General Public License
Daniel@0 6 as published by the Free Software Foundation; either version 2
Daniel@0 7 of the License, or (at your option) any later version.
Daniel@0 8
Daniel@0 9 This program is distributed in the hope that it will be useful,
Daniel@0 10 but WITHOUT ANY WARRANTY; without even the implied warranty of
Daniel@0 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Daniel@0 12 GNU General Public License for more details.
Daniel@0 13
Daniel@0 14 You should have received a copy of the GNU General Public
Daniel@0 15 License along with this library; if not, write to the Free Software
Daniel@0 16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Daniel@0 17 */
Daniel@0 18
Daniel@0 19 :- module(dataset,
Daniel@0 20 [ dataset/5
Daniel@0 21 , dataset_size/2
Daniel@0 22 , dataset_query/2
Daniel@0 23 , dataset_query_dv/3
Daniel@0 24 , dataset_item/2
Daniel@0 25 , dataset_items/2
Daniel@0 26 , dataset_query_id/3
Daniel@0 27 , random_subset/4
Daniel@0 28 ]).
Daniel@0 29
Daniel@0 30 /** <module> Definition and memoising of datasets */
Daniel@0 31
Daniel@0 32 :- use_module(library(memo)).
Daniel@0 33 :- use_module(library(semweb/rdf_db)).
Daniel@0 34 :- use_module(library(sandbox)).
Daniel@0 35 :- use_module(library(typedef)).
Daniel@0 36
Daniel@0 37 :- type natural == nonneg.
Daniel@0 38 :- persistent_memo dataset(+class:ground,+dbv:ground,-id:ground,-size:natural, -items:list).
Daniel@0 39 :- persistent_memo term_hash_id(+object:any, +hash:atom, -id:atom).
Daniel@0 40 :- persistent_memo random_subset( +domain:nonneg, +size:nonneg, +index:nonneg, -set:list(nonneg)).
Daniel@0 41
Daniel@0 42 random_subset(N,K,_,Indices) :- randset(K,N,Indices).
Daniel@0 43
Daniel@0 44
Daniel@0 45 :- initialization memo_attach(memo(datasets),[]).
Daniel@0 46
Daniel@0 47 :- meta_predicate dataset(1,+,-,-,-), dataset_query_id(1,+,-).
Daniel@0 48
Daniel@0 49 sandbox:safe_meta(dataset(G,_,_,_,_),[G]).
Daniel@0 50 sandbox:safe_meta(dataset_query_id(G,_,_),[G]).
Daniel@0 51
Daniel@0 52 %% dataset( +Generator:pred(A), +DBVersion, -ID:atom, -Size:natural, -Items:list(A)) is det.
Daniel@0 53 %
Daniel@0 54 % Note: Generator must be ground.
Daniel@0 55 dataset(Generator,_,ID,Size,Items) :-
Daniel@0 56 setof(X, call(Generator,X), Items),
Daniel@0 57 length(Items,Size),
Daniel@0 58 variant_sha1(Items,Hash),
Daniel@0 59 term_hash_id(Items,Hash,ID).
Daniel@0 60
Daniel@0 61 term_hash_id(X,Hash,ID) :-
Daniel@0 62 ( browse(term_hash_id(X,Hash,ID)) -> true % makes it safe to call in compute mode
Daniel@0 63 ; (ID=Hash; between(1,100,I), variant_sha1(Hash-I,ID)),
Daniel@0 64 \+browse(term_hash_id(_,_,ID))
Daniel@0 65 ; throw(unable_to_unique_id(X))
Daniel@0 66 ).
Daniel@0 67
Daniel@0 68
Daniel@0 69
Daniel@0 70 %% dataset_query_id(+Query:class,+DBVersion:ground,-ID:ground) is det.
Daniel@0 71 % Get ID for given query and database version. If this query has been
Daniel@0 72 % requested before, the previously generated ID will be unified with ID.
Daniel@0 73 % Otherwise, a new ID will be created and the list of items stored in
Daniel@0 74 % the persistent Prolog database. An empty dataset results in an
Daniel@0 75 % exception.
Daniel@0 76 dataset_query_id(Q,V,ID) :-
Daniel@0 77 dif(Status,fail),
Daniel@0 78 memo(dataset(Q,V,ID,_,_),_-Status).
Daniel@0 79
Daniel@0 80 %% dataset_size(-ID:ground,-Size:natural) is nondet.
Daniel@0 81 %% dataset_size(+ID:ground,-Size:natural) is semidet.
Daniel@0 82 % True when Size is the number of items in existing dataset with id ID.
Daniel@0 83 dataset_size(ID,Size) :- distinct(current_dataset(ID,Size,_)).
Daniel@0 84
Daniel@0 85 %% dataset_items(-ID,-Items:list) is nondet.
Daniel@0 86 %% dataset_items(+ID,-Items:list) is semidet.
Daniel@0 87 % True when Items is the list of items in existing dataset with id ID.
Daniel@0 88 dataset_items(ID,Items) :- distinct(current_dataset(ID,_,Items)).
Daniel@0 89
Daniel@0 90 %% dataset_item(-ID,-Item) is nondet.
Daniel@0 91 %% dataset_item(+ID,-Item) is nondet.
Daniel@0 92 % True when dataset ID contains Item.
Daniel@0 93 dataset_item(ID,Item) :- dataset_items(ID,Items), member(Item,Items).
Daniel@0 94
Daniel@0 95 %% dataset_query(-ID,-Query) is nondet.
Daniel@0 96 %% dataset_query(+ID,-Query) is nondet.
Daniel@0 97 % True when dataset ID contains Item.
Daniel@0 98 dataset_query(ID,Q) :- browse(dataset(Q,_,ID,_,_)).
Daniel@0 99
Daniel@0 100 %% dataset_query_dv(-ID,-Query,-DV) is nondet.
Daniel@0 101 %% dataset_query_dv(+ID,-Query,-DV) is nondet.
Daniel@0 102 % True when dataset ID contains Item.
Daniel@0 103 dataset_query_dv(ID,Q,DV) :- browse(dataset(Q,DV,ID,_,_)).
Daniel@0 104
Daniel@0 105 current_dataset(ID,Size,Items) :- browse(dataset(_,_,ID,Size,Items)).