diff cpack/dml/lib/dataset.pl @ 0:718306e29690 tip

commiting public release
author Daniel Wolff
date Tue, 09 Feb 2016 21:05:06 +0100
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cpack/dml/lib/dataset.pl	Tue Feb 09 21:05:06 2016 +0100
@@ -0,0 +1,105 @@
+/* Part of DML (Digital Music Laboratory)
+	Copyright 2014-2015 Samer Abdallah, University of London
+	 
+	This program is free software; you can redistribute it and/or
+	modify it under the terms of the GNU General Public License
+	as published by the Free Software Foundation; either version 2
+	of the License, or (at your option) any later version.
+
+	This program is distributed in the hope that it will be useful,
+	but WITHOUT ANY WARRANTY; without even the implied warranty of
+	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+	GNU General Public License for more details.
+
+	You should have received a copy of the GNU General Public
+	License along with this library; if not, write to the Free Software
+	Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+*/
+
+:- module(dataset, 
+      [  dataset/5 
+      ,  dataset_size/2
+      ,  dataset_query/2
+      ,  dataset_query_dv/3
+      ,  dataset_item/2
+      ,  dataset_items/2
+      ,  dataset_query_id/3
+      ,  random_subset/4
+      ]).
+
+/** <module> Definition and memoising of datasets */
+
+:- use_module(library(memo)).
+:- use_module(library(semweb/rdf_db)).
+:- use_module(library(sandbox)).
+:- use_module(library(typedef)).
+
+:- type natural == nonneg.
+:- persistent_memo dataset(+class:ground,+dbv:ground,-id:ground,-size:natural, -items:list).
+:- persistent_memo term_hash_id(+object:any, +hash:atom, -id:atom).
+:- persistent_memo random_subset( +domain:nonneg, +size:nonneg, +index:nonneg, -set:list(nonneg)).
+
+random_subset(N,K,_,Indices) :- randset(K,N,Indices).
+
+
+:- initialization memo_attach(memo(datasets),[]).
+
+:- meta_predicate dataset(1,+,-,-,-), dataset_query_id(1,+,-).
+
+sandbox:safe_meta(dataset(G,_,_,_,_),[G]).
+sandbox:safe_meta(dataset_query_id(G,_,_),[G]).
+
+%% dataset( +Generator:pred(A), +DBVersion, -ID:atom, -Size:natural, -Items:list(A)) is det.
+%
+%  Note: Generator must be ground.
+dataset(Generator,_,ID,Size,Items) :-
+   setof(X, call(Generator,X), Items),
+   length(Items,Size),
+   variant_sha1(Items,Hash),
+   term_hash_id(Items,Hash,ID).
+
+term_hash_id(X,Hash,ID) :-
+   (  browse(term_hash_id(X,Hash,ID)) -> true % makes it safe to call in compute mode
+   ;  (ID=Hash; between(1,100,I), variant_sha1(Hash-I,ID)),
+      \+browse(term_hash_id(_,_,ID))
+   ;  throw(unable_to_unique_id(X))
+   ).
+   
+
+
+%% dataset_query_id(+Query:class,+DBVersion:ground,-ID:ground) is det.
+%  Get ID for given query and database version. If this query has been
+%  requested before, the previously generated ID will be unified with ID.
+%  Otherwise, a new ID will be created and the list of items stored in
+%  the persistent Prolog database. An empty dataset results in an
+%  exception.
+dataset_query_id(Q,V,ID) :- 
+   dif(Status,fail),
+   memo(dataset(Q,V,ID,_,_),_-Status).
+
+%% dataset_size(-ID:ground,-Size:natural) is nondet.
+%% dataset_size(+ID:ground,-Size:natural) is semidet.
+%  True when Size is the number of items in existing dataset with id ID.  
+dataset_size(ID,Size) :- distinct(current_dataset(ID,Size,_)).
+
+%% dataset_items(-ID,-Items:list) is nondet.
+%% dataset_items(+ID,-Items:list) is semidet.
+%  True when Items is the list of items in existing dataset with id ID.  
+dataset_items(ID,Items) :- distinct(current_dataset(ID,_,Items)).
+
+%% dataset_item(-ID,-Item) is nondet.
+%% dataset_item(+ID,-Item) is nondet.
+%  True when dataset ID contains Item.
+dataset_item(ID,Item) :- dataset_items(ID,Items), member(Item,Items).
+
+%% dataset_query(-ID,-Query) is nondet.
+%% dataset_query(+ID,-Query) is nondet.
+%  True when dataset ID contains Item.
+dataset_query(ID,Q) :- browse(dataset(Q,_,ID,_,_)).
+
+%% dataset_query_dv(-ID,-Query,-DV) is nondet.
+%% dataset_query_dv(+ID,-Query,-DV) is nondet.
+%  True when dataset ID contains Item.
+dataset_query_dv(ID,Q,DV) :- browse(dataset(Q,DV,ID,_,_)).
+
+current_dataset(ID,Size,Items) :- browse(dataset(_,_,ID,Size,Items)).