Daniel@0
|
1 /* Part of DML (Digital Music Laboratory)
|
Daniel@0
|
2 Copyright 2014-2015 Samer Abdallah, University of London
|
Daniel@0
|
3
|
Daniel@0
|
4 This program is free software; you can redistribute it and/or
|
Daniel@0
|
5 modify it under the terms of the GNU General Public License
|
Daniel@0
|
6 as published by the Free Software Foundation; either version 2
|
Daniel@0
|
7 of the License, or (at your option) any later version.
|
Daniel@0
|
8
|
Daniel@0
|
9 This program is distributed in the hope that it will be useful,
|
Daniel@0
|
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
|
Daniel@0
|
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
Daniel@0
|
12 GNU General Public License for more details.
|
Daniel@0
|
13
|
Daniel@0
|
14 You should have received a copy of the GNU General Public
|
Daniel@0
|
15 License along with this library; if not, write to the Free Software
|
Daniel@0
|
16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
Daniel@0
|
17 */
|
Daniel@0
|
18
|
Daniel@0
|
19 :- module(dataset,
|
Daniel@0
|
20 [ dataset/5
|
Daniel@0
|
21 , dataset_size/2
|
Daniel@0
|
22 , dataset_query/2
|
Daniel@0
|
23 , dataset_query_dv/3
|
Daniel@0
|
24 , dataset_item/2
|
Daniel@0
|
25 , dataset_items/2
|
Daniel@0
|
26 , dataset_query_id/3
|
Daniel@0
|
27 , random_subset/4
|
Daniel@0
|
28 ]).
|
Daniel@0
|
29
|
Daniel@0
|
30 /** <module> Definition and memoising of datasets */
|
Daniel@0
|
31
|
Daniel@0
|
32 :- use_module(library(memo)).
|
Daniel@0
|
33 :- use_module(library(semweb/rdf_db)).
|
Daniel@0
|
34 :- use_module(library(sandbox)).
|
Daniel@0
|
35 :- use_module(library(typedef)).
|
Daniel@0
|
36
|
Daniel@0
|
37 :- type natural == nonneg.
|
Daniel@0
|
38 :- persistent_memo dataset(+class:ground,+dbv:ground,-id:ground,-size:natural, -items:list).
|
Daniel@0
|
39 :- persistent_memo term_hash_id(+object:any, +hash:atom, -id:atom).
|
Daniel@0
|
40 :- persistent_memo random_subset( +domain:nonneg, +size:nonneg, +index:nonneg, -set:list(nonneg)).
|
Daniel@0
|
41
|
Daniel@0
|
42 random_subset(N,K,_,Indices) :- randset(K,N,Indices).
|
Daniel@0
|
43
|
Daniel@0
|
44
|
Daniel@0
|
45 :- initialization memo_attach(memo(datasets),[]).
|
Daniel@0
|
46
|
Daniel@0
|
47 :- meta_predicate dataset(1,+,-,-,-), dataset_query_id(1,+,-).
|
Daniel@0
|
48
|
Daniel@0
|
49 sandbox:safe_meta(dataset(G,_,_,_,_),[G]).
|
Daniel@0
|
50 sandbox:safe_meta(dataset_query_id(G,_,_),[G]).
|
Daniel@0
|
51
|
Daniel@0
|
52 %% dataset( +Generator:pred(A), +DBVersion, -ID:atom, -Size:natural, -Items:list(A)) is det.
|
Daniel@0
|
53 %
|
Daniel@0
|
54 % Note: Generator must be ground.
|
Daniel@0
|
55 dataset(Generator,_,ID,Size,Items) :-
|
Daniel@0
|
56 setof(X, call(Generator,X), Items),
|
Daniel@0
|
57 length(Items,Size),
|
Daniel@0
|
58 variant_sha1(Items,Hash),
|
Daniel@0
|
59 term_hash_id(Items,Hash,ID).
|
Daniel@0
|
60
|
Daniel@0
|
61 term_hash_id(X,Hash,ID) :-
|
Daniel@0
|
62 ( browse(term_hash_id(X,Hash,ID)) -> true % makes it safe to call in compute mode
|
Daniel@0
|
63 ; (ID=Hash; between(1,100,I), variant_sha1(Hash-I,ID)),
|
Daniel@0
|
64 \+browse(term_hash_id(_,_,ID))
|
Daniel@0
|
65 ; throw(unable_to_unique_id(X))
|
Daniel@0
|
66 ).
|
Daniel@0
|
67
|
Daniel@0
|
68
|
Daniel@0
|
69
|
Daniel@0
|
70 %% dataset_query_id(+Query:class,+DBVersion:ground,-ID:ground) is det.
|
Daniel@0
|
71 % Get ID for given query and database version. If this query has been
|
Daniel@0
|
72 % requested before, the previously generated ID will be unified with ID.
|
Daniel@0
|
73 % Otherwise, a new ID will be created and the list of items stored in
|
Daniel@0
|
74 % the persistent Prolog database. An empty dataset results in an
|
Daniel@0
|
75 % exception.
|
Daniel@0
|
76 dataset_query_id(Q,V,ID) :-
|
Daniel@0
|
77 dif(Status,fail),
|
Daniel@0
|
78 memo(dataset(Q,V,ID,_,_),_-Status).
|
Daniel@0
|
79
|
Daniel@0
|
80 %% dataset_size(-ID:ground,-Size:natural) is nondet.
|
Daniel@0
|
81 %% dataset_size(+ID:ground,-Size:natural) is semidet.
|
Daniel@0
|
82 % True when Size is the number of items in existing dataset with id ID.
|
Daniel@0
|
83 dataset_size(ID,Size) :- distinct(current_dataset(ID,Size,_)).
|
Daniel@0
|
84
|
Daniel@0
|
85 %% dataset_items(-ID,-Items:list) is nondet.
|
Daniel@0
|
86 %% dataset_items(+ID,-Items:list) is semidet.
|
Daniel@0
|
87 % True when Items is the list of items in existing dataset with id ID.
|
Daniel@0
|
88 dataset_items(ID,Items) :- distinct(current_dataset(ID,_,Items)).
|
Daniel@0
|
89
|
Daniel@0
|
90 %% dataset_item(-ID,-Item) is nondet.
|
Daniel@0
|
91 %% dataset_item(+ID,-Item) is nondet.
|
Daniel@0
|
92 % True when dataset ID contains Item.
|
Daniel@0
|
93 dataset_item(ID,Item) :- dataset_items(ID,Items), member(Item,Items).
|
Daniel@0
|
94
|
Daniel@0
|
95 %% dataset_query(-ID,-Query) is nondet.
|
Daniel@0
|
96 %% dataset_query(+ID,-Query) is nondet.
|
Daniel@0
|
97 % True when dataset ID contains Item.
|
Daniel@0
|
98 dataset_query(ID,Q) :- browse(dataset(Q,_,ID,_,_)).
|
Daniel@0
|
99
|
Daniel@0
|
100 %% dataset_query_dv(-ID,-Query,-DV) is nondet.
|
Daniel@0
|
101 %% dataset_query_dv(+ID,-Query,-DV) is nondet.
|
Daniel@0
|
102 % True when dataset ID contains Item.
|
Daniel@0
|
103 dataset_query_dv(ID,Q,DV) :- browse(dataset(Q,DV,ID,_,_)).
|
Daniel@0
|
104
|
Daniel@0
|
105 current_dataset(ID,Size,Items) :- browse(dataset(_,_,ID,Size,Items)).
|