Daniel@0: /* Part of DML (Digital Music Laboratory) Daniel@0: Copyright 2014-2015 Samer Abdallah, University of London Daniel@0: Daniel@0: This program is free software; you can redistribute it and/or Daniel@0: modify it under the terms of the GNU General Public License Daniel@0: as published by the Free Software Foundation; either version 2 Daniel@0: of the License, or (at your option) any later version. Daniel@0: Daniel@0: This program is distributed in the hope that it will be useful, Daniel@0: but WITHOUT ANY WARRANTY; without even the implied warranty of Daniel@0: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Daniel@0: GNU General Public License for more details. Daniel@0: Daniel@0: You should have received a copy of the GNU General Public Daniel@0: License along with this library; if not, write to the Free Software Daniel@0: Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Daniel@0: */ Daniel@0: Daniel@0: :- module(dataset, Daniel@0: [ dataset/5 Daniel@0: , dataset_size/2 Daniel@0: , dataset_query/2 Daniel@0: , dataset_query_dv/3 Daniel@0: , dataset_item/2 Daniel@0: , dataset_items/2 Daniel@0: , dataset_query_id/3 Daniel@0: , random_subset/4 Daniel@0: ]). Daniel@0: Daniel@0: /** Definition and memoising of datasets */ Daniel@0: Daniel@0: :- use_module(library(memo)). Daniel@0: :- use_module(library(semweb/rdf_db)). Daniel@0: :- use_module(library(sandbox)). Daniel@0: :- use_module(library(typedef)). Daniel@0: Daniel@0: :- type natural == nonneg. Daniel@0: :- persistent_memo dataset(+class:ground,+dbv:ground,-id:ground,-size:natural, -items:list). Daniel@0: :- persistent_memo term_hash_id(+object:any, +hash:atom, -id:atom). Daniel@0: :- persistent_memo random_subset( +domain:nonneg, +size:nonneg, +index:nonneg, -set:list(nonneg)). Daniel@0: Daniel@0: random_subset(N,K,_,Indices) :- randset(K,N,Indices). Daniel@0: Daniel@0: Daniel@0: :- initialization memo_attach(memo(datasets),[]). Daniel@0: Daniel@0: :- meta_predicate dataset(1,+,-,-,-), dataset_query_id(1,+,-). Daniel@0: Daniel@0: sandbox:safe_meta(dataset(G,_,_,_,_),[G]). Daniel@0: sandbox:safe_meta(dataset_query_id(G,_,_),[G]). Daniel@0: Daniel@0: %% dataset( +Generator:pred(A), +DBVersion, -ID:atom, -Size:natural, -Items:list(A)) is det. Daniel@0: % Daniel@0: % Note: Generator must be ground. Daniel@0: dataset(Generator,_,ID,Size,Items) :- Daniel@0: setof(X, call(Generator,X), Items), Daniel@0: length(Items,Size), Daniel@0: variant_sha1(Items,Hash), Daniel@0: term_hash_id(Items,Hash,ID). Daniel@0: Daniel@0: term_hash_id(X,Hash,ID) :- Daniel@0: ( browse(term_hash_id(X,Hash,ID)) -> true % makes it safe to call in compute mode Daniel@0: ; (ID=Hash; between(1,100,I), variant_sha1(Hash-I,ID)), Daniel@0: \+browse(term_hash_id(_,_,ID)) Daniel@0: ; throw(unable_to_unique_id(X)) Daniel@0: ). Daniel@0: Daniel@0: Daniel@0: Daniel@0: %% dataset_query_id(+Query:class,+DBVersion:ground,-ID:ground) is det. Daniel@0: % Get ID for given query and database version. If this query has been Daniel@0: % requested before, the previously generated ID will be unified with ID. Daniel@0: % Otherwise, a new ID will be created and the list of items stored in Daniel@0: % the persistent Prolog database. An empty dataset results in an Daniel@0: % exception. Daniel@0: dataset_query_id(Q,V,ID) :- Daniel@0: dif(Status,fail), Daniel@0: memo(dataset(Q,V,ID,_,_),_-Status). Daniel@0: Daniel@0: %% dataset_size(-ID:ground,-Size:natural) is nondet. Daniel@0: %% dataset_size(+ID:ground,-Size:natural) is semidet. Daniel@0: % True when Size is the number of items in existing dataset with id ID. Daniel@0: dataset_size(ID,Size) :- distinct(current_dataset(ID,Size,_)). Daniel@0: Daniel@0: %% dataset_items(-ID,-Items:list) is nondet. Daniel@0: %% dataset_items(+ID,-Items:list) is semidet. Daniel@0: % True when Items is the list of items in existing dataset with id ID. Daniel@0: dataset_items(ID,Items) :- distinct(current_dataset(ID,_,Items)). Daniel@0: Daniel@0: %% dataset_item(-ID,-Item) is nondet. Daniel@0: %% dataset_item(+ID,-Item) is nondet. Daniel@0: % True when dataset ID contains Item. Daniel@0: dataset_item(ID,Item) :- dataset_items(ID,Items), member(Item,Items). Daniel@0: Daniel@0: %% dataset_query(-ID,-Query) is nondet. Daniel@0: %% dataset_query(+ID,-Query) is nondet. Daniel@0: % True when dataset ID contains Item. Daniel@0: dataset_query(ID,Q) :- browse(dataset(Q,_,ID,_,_)). Daniel@0: Daniel@0: %% dataset_query_dv(-ID,-Query,-DV) is nondet. Daniel@0: %% dataset_query_dv(+ID,-Query,-DV) is nondet. Daniel@0: % True when dataset ID contains Item. Daniel@0: dataset_query_dv(ID,Q,DV) :- browse(dataset(Q,DV,ID,_,_)). Daniel@0: Daniel@0: current_dataset(ID,Size,Items) :- browse(dataset(_,_,ID,Size,Items)).