Mercurial > hg > dml-open-cliopatria
diff cpack/dml/lib/dataset.pl @ 0:718306e29690 tip
commiting public release
author | Daniel Wolff |
---|---|
date | Tue, 09 Feb 2016 21:05:06 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpack/dml/lib/dataset.pl Tue Feb 09 21:05:06 2016 +0100 @@ -0,0 +1,105 @@ +/* Part of DML (Digital Music Laboratory) + Copyright 2014-2015 Samer Abdallah, University of London + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +:- module(dataset, + [ dataset/5 + , dataset_size/2 + , dataset_query/2 + , dataset_query_dv/3 + , dataset_item/2 + , dataset_items/2 + , dataset_query_id/3 + , random_subset/4 + ]). + +/** <module> Definition and memoising of datasets */ + +:- use_module(library(memo)). +:- use_module(library(semweb/rdf_db)). +:- use_module(library(sandbox)). +:- use_module(library(typedef)). + +:- type natural == nonneg. +:- persistent_memo dataset(+class:ground,+dbv:ground,-id:ground,-size:natural, -items:list). +:- persistent_memo term_hash_id(+object:any, +hash:atom, -id:atom). +:- persistent_memo random_subset( +domain:nonneg, +size:nonneg, +index:nonneg, -set:list(nonneg)). + +random_subset(N,K,_,Indices) :- randset(K,N,Indices). + + +:- initialization memo_attach(memo(datasets),[]). + +:- meta_predicate dataset(1,+,-,-,-), dataset_query_id(1,+,-). + +sandbox:safe_meta(dataset(G,_,_,_,_),[G]). +sandbox:safe_meta(dataset_query_id(G,_,_),[G]). + +%% dataset( +Generator:pred(A), +DBVersion, -ID:atom, -Size:natural, -Items:list(A)) is det. +% +% Note: Generator must be ground. +dataset(Generator,_,ID,Size,Items) :- + setof(X, call(Generator,X), Items), + length(Items,Size), + variant_sha1(Items,Hash), + term_hash_id(Items,Hash,ID). + +term_hash_id(X,Hash,ID) :- + ( browse(term_hash_id(X,Hash,ID)) -> true % makes it safe to call in compute mode + ; (ID=Hash; between(1,100,I), variant_sha1(Hash-I,ID)), + \+browse(term_hash_id(_,_,ID)) + ; throw(unable_to_unique_id(X)) + ). + + + +%% dataset_query_id(+Query:class,+DBVersion:ground,-ID:ground) is det. +% Get ID for given query and database version. If this query has been +% requested before, the previously generated ID will be unified with ID. +% Otherwise, a new ID will be created and the list of items stored in +% the persistent Prolog database. An empty dataset results in an +% exception. +dataset_query_id(Q,V,ID) :- + dif(Status,fail), + memo(dataset(Q,V,ID,_,_),_-Status). + +%% dataset_size(-ID:ground,-Size:natural) is nondet. +%% dataset_size(+ID:ground,-Size:natural) is semidet. +% True when Size is the number of items in existing dataset with id ID. +dataset_size(ID,Size) :- distinct(current_dataset(ID,Size,_)). + +%% dataset_items(-ID,-Items:list) is nondet. +%% dataset_items(+ID,-Items:list) is semidet. +% True when Items is the list of items in existing dataset with id ID. +dataset_items(ID,Items) :- distinct(current_dataset(ID,_,Items)). + +%% dataset_item(-ID,-Item) is nondet. +%% dataset_item(+ID,-Item) is nondet. +% True when dataset ID contains Item. +dataset_item(ID,Item) :- dataset_items(ID,Items), member(Item,Items). + +%% dataset_query(-ID,-Query) is nondet. +%% dataset_query(+ID,-Query) is nondet. +% True when dataset ID contains Item. +dataset_query(ID,Q) :- browse(dataset(Q,_,ID,_,_)). + +%% dataset_query_dv(-ID,-Query,-DV) is nondet. +%% dataset_query_dv(+ID,-Query,-DV) is nondet. +% True when dataset ID contains Item. +dataset_query_dv(ID,Q,DV) :- browse(dataset(Q,DV,ID,_,_)). + +current_dataset(ID,Size,Items) :- browse(dataset(_,_,ID,Size,Items)).