Mercurial > hg > dml-open-cliopatria

diff cpack/dml/api/dmlvis.pl @ 0:718306e29690 tip
commiting public release
author: Daniel Wolff
date: Tue, 09 Feb 2016 21:05:06 +0100
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cpack/dml/api/dmlvis.pl	Tue Feb 09 21:05:06 2016 +0100
@@ -0,0 +1,717 @@
+/* Part of DML (Digital Music Laboratory)
+	Copyright 2014-2015 Samer Abdallah, University of London
+	 
+	This program is free software; you can redistribute it and/or
+	modify it under the terms of the GNU General Public License
+	as published by the Free Software Foundation; either version 2
+	of the License, or (at your option) any later version.
+
+	This program is distributed in the hope that it will be useful,
+	but WITHOUT ANY WARRANTY; without even the implied warranty of
+	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+	GNU General Public License for more details.
+
+	You should have received a copy of the GNU General Public
+	License along with this library; if not, write to the Free Software
+	Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+*/
+
+:- module(dmlvis, 
+      [  cc/3
+      ,  recording_property/3 
+      ]).
+
+/** <module> DML Visualisation web service
+ */
+
+
+:- use_module(library(http/html_write)).
+:- use_module(library(http/html_head)).
+:- use_module(library(http/thread_httpd)).
+:- use_module(library(http/http_dispatch)).
+:- use_module(library(http/http_parameters)).
+:- use_module(library(http/http_json)).
+:- use_module(library(http/json)).
+:- use_module(library(thread_pool)).
+:- use_module(library(semweb/rdf_label)).
+:- use_module(library(sandbox)).
+:- use_module(library(dcg_core)).
+:- use_module(library(dcg_pair)).
+:- use_module(library(dcg_codes)).
+:- use_module(library(dcg_macros)).
+:- use_module(library(fileutils)).
+:- use_module(library(listutils)).
+:- use_module(library(optutils)).
+:- use_module(library(swipe)).
+:- use_module(library(memo)).
+:- use_module(library(rdfutils)).
+:- use_module(library(httpfiles)).
+:- use_module(library(dataset)).
+:- use_module(library(computations), [rows_cols/3]).
+:- use_module(library(csvutils), [uri_to_csv/2]).
+:- use_module(library(insist)).
+:- use_module(library(lambda)).
+:- use_module(library(dcg/basics), [string_without//2, integer//1]).
+:- use_module(library(solution_sequences)).
+
+:- use_module(components(audio)).
+
+:- meta_predicate cc(2,2,1).
+:- meta_predicate method_result_cc(+,+,2,1).
+
+:- set_prolog_flag(double_quotes,codes).
+:- set_prolog_flag(back_quotes,string).
+
+http:location(v1,root(api/v4),[]).
+
+:- http_handler(v1(getCollectionId), getCollectionId, []).
+:- http_handler(v1(getCollectionPerspective), getCollectionPerspective, []).
+:- http_handler(v1(getRecordingPerspective), getRecordingPerspective, [spawn(vis_recording)]).
+:- http_handler(v1(listCollections), listCollections, []).
+:- http_handler(v1(listPlaces), listPlaces, []).
+:- http_handler(v1(listPerspectives), listPerspectives, []).
+:- http_handler(v1(csv_time_window), csv_time_window, []).
+
+% ------------------------------------------------------------------------------------
+
+%% vis(+Method,+Request) is det.
+%
+%  General VIS API HTTP handler. Returns either JSON or JSONP depending on the
+%  format parameter. If JSONP is selected (the default), the callback parameter
+%  determines the Javascript callback function. The JSON result structure is of
+%  type
+%  ==
+%  { 
+%     query:  string ~'The original query, excluding protocol and host',
+%     result: (error_struct|A) ~'Result structure A or error information'
+%  }
+%  ==
+%  where 
+%  ==
+%  error_struct == { code: integer, decs:string }
+%  ==
+%  See individual methods for more information about their return types.
+vis(Method,Request) :-
+   get_time(T0),
+   method_params(Method,Params),
+   member(request_uri(Query),Request),
+   vis_parameters(Request, [format(Format), callback(Callback) | Params],ROpts),
+   (Format=jsonp -> Fmt=jsonp(Callback,ROpts); Fmt=json(ROpts)),
+   Responder=send_response(Fmt,Query,T0),
+   debug(dmlvis(method),'~q',[method_result_cc(Method,Params,Responder,writeln)]),
+   method_result_cc(Method,Params,dmlvis:Responder,error_cont(Query,Responder)).
+
+send_response(Fmt,Query,T0,Result,Status) :-
+   get_time(T1), DT=T1-T0,
+   debug(dmlvis(response),'Sending response after ~3f s to ~q',[DT,Query]),
+   once(reply_as(Fmt,Status,_{ query:Query, result: Result})).
+
+error_cont(Q,Cont,Ex) :-
+   debug(dmlvis(error),'Sending error ~W in response to ~q',[Ex,[quoted(true),max_depth(8)],Q]),
+   error_result(Ex,Result),
+   call(Cont,Result,error).
+
+vis_parameters(Request,Params,ReplyOpts) :-
+   Params1=[random(_, [optional(true)]) | Params],
+   http_parameters(Request, Params1, [attribute_declarations(param), form_data(AllParams)]),
+   findall(N, (member(N=_,AllParams), \+used_param(N,Params1)), Unused),
+   (select(indent,Unused,Unused1) -> ReplyOpts=[]; ReplyOpts=[width(0)], Unused1=Unused),
+   (select(dv,Unused1,Unused2) -> true; Unused2=Unused1),
+   insist(Unused2=[], unrecognised_parameters(Unused2)).
+
+used_param(N,ValidParams) :-
+   member(Q,ValidParams),
+   functor(Q,N,_).
+
+% result_response(Fmt,Q,Result,Status,Response) :- 
+%    with_output_to(string(Response),
+%       reply_as(Fmt,Status,_{ query:Q, result: Result})).
+
+reply_as(json(Opts),Status,Dict) :- !,
+   write_headers([Status,type(json)]),
+   json_write_dict(current_output, Dict, Opts).
+reply_as(jsonp(Cb,Opts),Status,Dict) :- !, 
+   write_headers([Status,type(jsonp)]),
+   write(Cb), write('('),
+   json_write_dict(current_output, Dict, Opts),
+   write(');').
+
+
+error_result(Ex, _{ errors:[Err] }) :- error_result1(Ex,Err).
+
+error_result1(dml_error(Code,Descriptor),_{ code: Code, desc:Descriptor }) :- !.
+
+error_result1(Ex, _{ code: 500, desc: Msg }) :-
+   message_to_string(Ex,Msg).
+
+% ------------------------------------------------------------------------------------
+% Handlers with documentation
+
+%% getCollectionId(+Request) is det.
+%
+%  Define a collection satisfying given search terms. Result is a collection
+%  ID, which may be the same as a previously defined collection if it consists
+%  of the same items. 
+%
+%  Parameters are as follows. In all cases, =|sclist(Type)|= denotes a semicolon
+%  separated list of values of given type OR the string '*', which is the default.
+%  The value '*' means that no filtering is done on the releveant property. The
+%  list is interpreted as an AND-list or an OR-list depending on which property 
+%  is being tested. String matching is case insensitive. 
+%  The only required parameter is =|dv|=. 
+%  ==
+%   dv         : integer              ~ database version
+%   library    : sclist(lib_name)     ~ OR-list of libraries to search
+%   collection : sclist(pattern)      ~ OR-list of BL collection name patterns
+%   composer   : sclist(string)       ~ OR-list of composer prefixes 
+%   performer  : sclist(string)       ~ OR-list of performer prefixes 
+%   title      : sclist(string)       ~ AND-list of title substrings 
+%   genre      : sclist(string)       ~ OR-list of genre substrings 
+%   place      : sclist(string)       ~ OR-list of place substrings 
+%   language   : sclist(string)       ~ OR-list of langauage name prefixes 
+%   year       : sclist(year) | range ~ OR-list of genre substrings 
+%   sample     : nonneg | 'all'       ~ sample a random subset of this size
+%   sv         : integer              ~ sample version for sample
+%
+%  lib_name --> 'bl' | 'charm' | 'ilm' | 'mazurka' | 'beets'.
+%  range    --> year, '-', year.
+%  ==
+%  NB. A =|pattern|= is a string which may contain '*' to match any sequence of 
+%  characters. The pattern must match the WHOLE value being tested, eg to search
+%  for the substring 'Botswana', you must include '*' at both ends of the pattern,
+%  eg =|'*botswana*'|=.
+%  
+%  A successful result is of type
+%  ==
+%  result(getCollectionId) == { 
+%     cid: string ~'collection id', 
+%     size: natural ~'number of items in collection' 
+%  }.
+%  ==
+getCollectionId(R) :- vis(getCollectionId,R).
+
+%% getCollectionPerspective(+Request) is det. 
+%
+%  Get a given perspective on a previously defined collection.
+%  Result will depend on the particular perspective chosen. 
+%  All perspectives require the parameter =|cid(string)|=. Most
+%  perspectives accept the parameters =|recompute(oneof([none,failed,force]))|=
+%  and =|vamp_on_demand(boolean)|=.
+%  Defined perspectives are
+%
+%  ==
+%  summary : [] -> { cid:string, size: natural, goal: string }.
+%
+%  list : 
+%     [ limit(natural)/5000, offset(natural)/0, sort_by(oneof([label, date]))/label ] 
+%  -> { cid:string, items: list( {uri:uri, label:string, audio:list(link(audio)), date:string }) }.
+%
+%  midi_pitch_histogram : 
+%     [ weighting(oneof([none,dur,vel]))/none ] 
+%  -> { values:list(integer), counts:list(natural) }.
+%
+%  pitch_histogram : 
+%     [ weighting(oneof([none,dur,vel]))/none, quant(natural)/5, min(integer)/20, max(integer)/100, lang(oneof([ml,r]))/ml ]
+%  -> { edges:list(nonneg), counts:list(natural) }.
+%
+%  tempo_histogram : 
+%     [ period(nonneg)/1, min(integer)/20, max(integer)/100, num_bins(natural)/50, lang(oneof([ml,r]))/ml ]
+%  -> { edges:list(nonneg), counts:list(natural) }.
+%
+%  mean_tempo_curve : 
+%     [ num_samples(natural)/20, lang(oneof([ml,r]))/ml ]
+%  -> { means:list(nonneg), std_devs:list(nonneg) }.
+%
+%  tonic_relative_pitch_class_histogram : [] 
+%  -> { values:list(string), counts:list(nonneg), ok_count:natural, failed:{prolog:list, python:list}}.
+%
+%  pitch_lookup :
+%     [ midi_pitch(between(0,127)), weighting(oneof([none,dur,vel]))/none,
+%       limit(natural)/5000, offset(natural)/0 ]
+%  -> { }.
+%
+%  tonic_histogram : [] -> { }.
+%
+%  pitch_class_histogram : [] -> { }.
+%
+%  tuning_stats : [] -> { }.
+%
+%  tuning_stats_by_year : [] -> { }.
+%
+%  places_hist : [] -> { }.
+%
+%  key_relative_chord_seq : 
+%     [  spm_minlen(natural)/2, spm_maxseqs(natural)/500, spm_algorithm(Alg)/'CM-SPADE',
+%        spm_ignore_n(natural)/1, spm_maxtime(number)/60, spm_minsupport(number)/50 ] 
+%  -> { }.
+%
+%  similarity :
+%     [  sim_downsample(number)/1,sim_clusters(number)/40,sim_reclimit(number)/2000,
+%        sim_type(string)/'euclidean',sim_features(string)/'chromagram',
+%        sim_compressor(string)/'zlib'] 
+%  -> { }.
+%  ==
+getCollectionPerspective(R) :- vis(getCollectionPerspective,R).
+
+%% getCollectionPairPerspective(+Request) is det.
+%
+%  Get a given comparative perspective on a pair of previously defined collections.
+%  Result will depend on the particular perspective chosen. Defined perspectives are:
+getCollectionPairPerspective(R) :- vis(getCollectionPairPerspective,R).
+
+%% getMultiCollectionPerspective(+Request) is det.
+%  Get a given comparative perspective on a set of previously defined collections.
+getMultiCollectionPerspective(R) :- vis(getMultiCollectionPerspective,R).
+
+%% getRecordingPerspective(+Request) is det.
+%  Get a given perspective for a given recording URI. Perspectives, their parameters and
+%  their output types are as follows (all perspectives require a uri parameter)
+%  ==
+%  properties  : [] -> { 
+%     library:string, 
+%     title:string, 
+%     composer:list(string), 
+%     performer:list(string),
+%     genre:list(string),
+%     place:string,
+%     language:string,
+%     recording_date:string
+%  }.
+%
+%  transcription      : [] -> { csv: uri(csv([time,dur,freq,vel,pitch_name])) }.
+%  transcription_fine : [] -> { csv: uri(csv([time,dur,freq,vel,pitch_name)) }.
+%  chords             : [] -> { csv: uri(csv([time,chord_name])) }.
+%  chords_notes       : [] -> { csv: uri(csv([time,dur,integer)) }.
+%  key                : [] -> { csv: uri(csv([time,integer,string])) }.
+%  key_tonic          : [] -> { csv: uri(csv) }.
+%  tempo              : [] -> { csv: uri(csv([time,nonneg,tempo_string])).
+%  beats              : [] -> { csv: uri(csv([time,tempo_string])) }.
+%  beatroot           : [] -> { csv: uri(csv([time])) }.
+%
+%  tempo_nonuniform : [] -> { times:list(float), values:list(float) }. 
+%  tempo_uniform    : [ lang(oneof([ml,r]))/ml ] -> { times:list(float), values:list(float) }. 
+%  tempo_normalised : [ lang(oneof([ml,r]))/ml ] -> { times:list(float), values:list(float) }. 
+%  tempo_histogram  : 
+%     [ period(nonneg)/1, num_bins(natural)/50, min(nonneg)/20, max(nonneg)/100, lang(oneof([ml,r]))/ml  ]
+%  -> { edges: list(float), counts: list(natural) }.
+%
+%  midi_pitch_histogram : 
+%     [ weighting(note_weight)/none ]
+%  -> { values: list(integer), counts: list(natural) }.
+%
+%  pitch_histogram : 
+%     [weighting(note_weight)/none, quant(natural)/5, min(integer)/20, max(integer)/100, lang(oneof([ml,r]))/ml ]
+%  -> { edges: list(float), counts: list(natural) }.
+%
+%  chord_histogram : [] -> { values:list(string), counts:list(natural) }.
+%
+%  spectrogram : [offset(nonneg)/0, length(nonneg)/60 ] -> { csv: uri }.
+%  ==
+%
+%  Supplementary types:
+%  ==
+%  note_weight --> 'none'; 'dur'; 'vel'; 'dur*vel'.
+%
+%  time == float.
+%  dur  == nonneg.
+%  freq == nonneg.
+%  vel  == nonneg.
+%  ==
+getRecordingPerspective(R) :- vis(getRecordingPerspective,R).
+
+%% listCollections(+Request) is det.
+%  Lists previously defined collections and the Prolog queries that define them.
+%  ==
+%  listCollections : [] -> { 
+%     collections: list({  cid       : string, 
+%                          query     : string, 
+%                          dv        : integer, 
+%                          size      : natural, 
+%                          timestamp : string 
+%                       })
+%  }.
+listCollections(R) :- vis(listCollections,R).
+
+%% listPlaces(+Request) is det.
+%  Lists known place names, currently the distinct values of the dcterms:spatial predicate.
+%  ==
+%  listPlaces : [] -> { 
+%     places: list({  name      : string 
+%                  })
+%  }.
+listPlaces(R) :- vis(listPlaces,R).
+
+%% listPerspectives(+Request) is det.
+%  Lists available perspective for a given method.
+listPerspectives(R) :- vis(listPerspectives,R).
+
+%% csv_time_window(+Request) is det.
+%
+%  Returns the contents of a CSV file between between given time limits,
+%  assuming the the first column is a time value.
+%  Also returns the time of the last row. Parameters and return structure are
+%  ==
+%  csv_time_window : 
+%     [uri(uri(csv)), start(float), end(float) ]
+%  -> { duration:float, columns: list(list) }.
+%  ==
+%  URI parameter must be the URI of a CSV file.
+%  Data is returned a list of lists, where each inner list is one column
+%  of the CSV.
+csv_time_window(R) :- vis(csv_time_window,R).
+
+
+% ------------------------------------------------------------------------------------
+% Implementation of methods
+
+:- multifile param/2.
+
+param( format,   [oneof([json,jsonp]), default(jsonp), description('Reply format')]).
+param( callback, [atom,default(jsonp_cb), description('Callback for jsonp reply')]).
+param( dv,       [nonneg, optional(false), description('Database version ID')]).
+
+param( cid,       [atom, optional(false), description('Collection ID')]).
+param( pid,       [atom, optional(false), description('Perspective ID')]).
+param( cids,      [atom, optional(false), description('Semicolon separated list of Collection IDs')]).
+param( uri,       [atom, optional(false), description('Item URI')]).
+param( library,   [atom, default(*), description('Semicolon separated list of libraries to search')]).
+param( genre,     [atom, default(*), description('Semicolon separated list of genre names, or "*"')]).
+param( year,      [atom, default(*), description('Semicolon separeted list of release years or a range (Y1-Y2) or "*"')]).
+param( composer,  [atom, default(*), description('Semicolon separated list of composers or "*"')]).
+param( performer, [atom, default(*), description('Semicolon separated list of performers or "*"')]).
+param( place,     [atom, default(*), description('Semicolon separated list of place names or "*"')]).
+param( language,  [atom, default(*), description('Semicolon separated list of language name prefixes or "*"')]).
+param( collection,[atom, default(*), description('Semicolon separated list of BL collection names or "*"')]).
+param( title,     [atom, default(*), description('Semicolon separated AND-list of title substrings or "*"')]).
+param( method,    [atom, optional(false), description('API method name')]).
+param( limit,     [integer, default(5000), description('Maximum number of things to return')]).
+param( offset,    [integer, default(0), description('Offset within list')]).
+param( sort_by,   [oneof([label,date]), default(label), description('Sort recordings list by this property')]).
+param( start,     [number, optional(false), description('Start of window in seconds')]).
+param( end,       [number, optional(false), description('End of window in seconds')]).
+param( sample,    [number, default(all), description('Sample a random subset of this size')]).
+param( sv,        [number, default(1), description('Version of random subset')]).
+param( midi_pitch,[nonneg, default(60), description('MIDI note number 0..127')]).
+
+method_params(listCollections,  []).
+method_params(listPlaces,  []).
+method_params(listPerspectives, [ method(_) ]).
+method_params(getCollectionId,  [ dv(_), library(_), genre(_), place(_), language(_), year(_), composer(_), 
+                                 performer(_), collection(_), title(_), sample(_), sv(_) ]).
+method_params(csv_time_window,  [ uri(_), start(_), end(_) ]).
+method_params(Method, [ pid(_) | Params ]) :-
+   once(perspective(Method,_)), 
+   setof(P, perspective_param_name(Method,P), Ps),
+   maplist(param_name_term,Ps,Params).
+
+param_name_term(Name,Term) :- functor(Term,Name,1).
+perspective_param_name(Method,Name) :-
+   perspective(Method,_,Specs,_),
+   member(S,Specs),
+   optspec_name(S,Name).
+
+   
+:- multifile perspective/4.
+
+%% cc(+Pred:pred(-A,-cc_status), +Cont:pred(+A,+cc_status), +ErrorCont:pred(+exception)) is det.
+%  Call a predicate with success and error continuations.
+%  Pred roduces a result of type A and a status. If it succeeds, the result and status are passed
+%  to the continuation Cont. Otherwise, an exception is passed to ErrorCont.
+cc(Goal,Cont,ErrorCont) :- catch(cc_wrapper(Goal,Cont), Ex, call(ErrorCont,Ex)).
+cc_wrapper(Goal,Cont) :- insist(call(Goal,Result,Status)), call(Cont,Result,Status).
+
+place_name(P) :- rdf(_,dcterms:spatial,literal(P)).
+
+%% method_result_cc(+Method,+Params:options,+C:success_cont,+E:error_cont) is det.
+%
+% Perspectives are handle using continuations to allow spawning
+% ==
+% success_cont == pred(+dict,+vis_status).
+% error_cont == pred(+exception).
+% vis_status ---> stable; unstable.
+% ==
+% Method must be a method id registered in perspective/2 or handled my method_resut/4.
+method_result_cc(Method,Params,Cont,ErrorCont) :-
+   perspective(Method,_), !,
+   option(pid(PId),Params),
+   catch((  insist( perspective(Method,PId,Specs,Pred), unknown_perspective(PId)),
+            insist( maplist(options_optspec(Params),Specs)),
+            call(Pred,Cont,ErrorCont)
+         ), Ex, call(ErrorCont,Ex)).
+
+% all non-perspective methods are handled in this thread
+method_result_cc(Method,Params,Cont,ErrorCont) :-
+   cc( method_result(Method,Params), Cont, ErrorCont).
+
+%% method_result(+Method, +Opts, -Result, -Status:vis_status) is det.
+%
+%  Handles methods =|listCollections, listPerspectives, getCollectionId, csv_time_window|=.
+method_result(listCollections, _, _{ collections:List }, unstable) :-
+   findall( _{ cid:Id, query:QA, dv:DV, size:SZ, timestamp:TS },
+            (  browse(dataset:dataset(dmlvis:Q, DV, Id, SZ, _),comp(_,Timestamp,_)-ok),
+               format_time(string(TS),'%FT%T%:z',Timestamp),
+               term_to_atom(Q,QA)
+            ),
+            List).
+
+method_result(listPlaces, _, _{ places:List }, unstable) :-
+   findall( _{ name:Name, count:N }, aggregate(count,place_name(Name),N), List).
+
+
+method_result(listPerspectives, Opts, _{ perspectives:List }, stable) :-
+   option(method(Method), Opts),
+   findall( P, perspective(Method,P), List).
+
+method_result(getCollectionId, Opts, _{ cid:Id, size:Size, full_size:FullSize }, stable) :-
+   build_dataset_query(Query,DBV,Size-FullSize,Opts,Remaining),
+   insist(Remaining=[], unrecognised_search_criteria(Remaining)),
+   insist(dataset_query_id( Query, DBV, Id),empty_dataset(Query)),
+   dataset_size(Id, Size).
+
+method_result(csv_time_window, Opts, _{ duration:Dur, columns:Columns }, stable) :-
+   maplist(options_optspec(Opts), [ \uri(URI), \start(Start), \end(End) ]),
+   uri_to_csv(URI,Rows),
+   insist(Rows\=[], empty_csv(URI)),
+
+   append(_,[EndRow],Rows), 
+   functor(EndRow,_,NumCols),
+   arg(1,EndRow,Dur),
+
+   numlist(1,NumCols,Is),
+   drop_while(row_before(Start),Rows,Rows1),
+   take_while(row_before(End),Rows1,Rows2),
+   rows_cols(Is,Rows2,Columns).
+
+row_before(T0,Row) :- arg(1,Row,T), T<T0.
+
+options_optspec(Opts,+O) :- option(O,Opts).
+options_optspec(Opts,O-Def) :- option(O,Opts,Def).
+% options_optspec(M,Opts,O>Goal) :- options_optspec(M,Opts,O), call(M:Goal).
+
+optspec_name(+O,Name) :- functor(O,Name,1).
+optspec_name(O-_,Name) :- functor(O,Name,1).
+% optspec_name(O>_,Name) :- optspec_name(O,Name).
+
+perspective(Method,Perspective) :- perspective(Method,Perspective,_,_).
+
+perspective(getRecordingPerspective,  properties, [+uri(URI)], cc(recording_info(URI))).
+perspective(getCollectionPerspective, summary,    [+cid(C)], cc(collection_summary(C))).
+perspective(getCollectionPerspective, list,       [+cid(C),limit(Lim)-5000,offset(Off)-0,sort_by(SortBy)-label], cc(collection_list(C,Lim,Off,SortBy))).
+
+collection_summary(Id,Result,stable) :-
+   insist(dataset_size(Id,Size), unknown_collection(Id)),
+   findall( _{goal:GoalA, dv:DV}, ( dataset_query_dv(Id,Goal,DV), 
+                                    term_to_atom(Goal,GoalA)     ), Queries),
+   Result = _{cid:Id, size:Size, queries:Queries }.
+
+
+collection_list(Id, Lim, Offset, SortBy, _{cid:Id, size:Size, items:Items}, stable) :-
+   insist(dataset_size(Id,Size), unknown_collection(Id)),
+   findall(SortVal-Item, dataset_itemx(SortBy,Id,Item,SortVal), KeyedItems), sort(KeyedItems,Sorted),
+   findall(Item,limit(Lim,offset(Offset,member(_-Item,Sorted))),Items).
+
+dataset_itemx(SortProp, Id, Itemx, SortVal) :-
+   dataset_item(Id,URI),
+   filter(Lib,URI,in_library),
+   findall(Prop:Val, (  member(Prop,[label,audio,date]),
+                        recording_property(URI,Lib,Prop,Val) ), Props),
+   (member(SortProp:Val,Props) -> SortVal=just(Val); SortVal=nothing),
+   dict_create(Itemx,_,[uri:URI | Props]).
+
+
+recording_info(URI, Result, stable) :-
+   insist(filter(Lib,URI,in_library),unknown_recording(URI)),
+   findall(Prop:Val, recording_property(URI,Lib,Prop,Val), Props),
+   dict_create(Result,_,[ library:Lib | Props ]).
+
+recording_property(URI,Prop,Val) :-
+   filter(Lib,URI,in_library),
+   recording_property(URI,Lib,Prop,Val).
+
+recording_property(URI,_,label,Label) :- 
+   rdf_display_label(URI,Label).
+recording_property(URI,_,audio,Links) :- 
+   % !!! HACK: force scraping for BL items here..
+   % DISABLED - seems to upset BL server...
+   % (rdf(URI,dml:blpage,_) -> once(bl_p2r:scrape_audio_link(URI,_)); true),
+   setof(L,F^audio_link(URI,L,F),Links).
+recording_property(URI,Lib,Prop,Val) :-
+   lib_property_read(Lib, Prop, Reader),
+   pred_values(URI,Lib:Prop,Reader,Val).
+
+
+pred_values(URI,Lib:Prop,-Converter,Val) :- 
+   lib_property(Lib,Prop,Pred),
+   once(rdf(URI,Pred,Obj)), 
+   call(Converter,Obj,Val).
+pred_values(URI,LibProp,+Converter,Vals) :-
+   findall(Val, pred_values(URI,LibProp,-Converter,Val), Vals).
+
+lib_property_read(_, collection, -literal_text).
+lib_property_read(_, composer,   +literal_text).
+lib_property_read(_, performer,  +literal_text).
+lib_property_read(_, title,      -literal_text).
+lib_property_read(_, date,       -literal_text).
+lib_property_read(_, place,      -literal_text).
+lib_property_read(_, language,   +literal_text).
+lib_property_read(ilm, genre,    +genre_label).
+
+genre_label(Genre,Label) :-
+   rdf(Genre,rdfs:label,Lit),
+   literal_text(Lit,Label).
+
+% ------------------- DEFINING NEW COLLECTIONS -----------------------
+
+build_dataset_query(Query,DBV,Size-FullSize) -->
+   select_option(dv(DBV)),
+   select_option(sample(Subset)),
+   select_option(sv(SV)),
+   full_query(FQ),
+   {  Subset=all -> Query=FQ, Size=FullSize
+   ;  Query=sample(Subset,SV,FQ),
+      aggregate_all(count,call(FQ,_),FullSize)
+   }.
+
+full_query(qc(Filters)) -->
+   run_left(seqmap(process_qopt, 
+                   [  qo( library,    atoms(LS), libraries(LS))
+                   ,  qo( collection, atoms(CC), any(collection,CC))
+                   ,  qo( title,      atoms(TS), all(title,TS))
+                   ,  qo( composer,   atoms(CS), any(composer,CS))
+                   ,  qo( performer,  atoms(PS), any(performer,PS))
+                   ,  qo( genre,      atoms(GS), any(genre,GS))
+                   ,  qo( year,       years(YS), year(YS))
+                   ,  qo( place,      atoms(PS), any(place,PS))
+                   ,  qo( language,   atoms(LNS), any(language,LNS))
+                   ]), Filters, []). 
+
+process_qopt(qo(OptName,Parser,Filter)) -->
+   { Opt=..[OptName,OptVal] }, 
+   \> option_default_select(Opt,'*'),
+   (  {OptVal='*'} -> []
+   ;  {parse_atom(Parser,OptVal)}, 
+      \< [Filter]
+   ).
+            
+sample(Size,_,Query,X) :-
+   setof(X, call(Query,X), All),
+   length(All,Total),
+   (  Total=<Size -> Subset=All
+   ;  randset(Size, Total, All, [], Subset)
+   ),
+   member(X,Subset).  
+
+randset(0, _, _) --> !.
+randset(K, N, [A|As]) -->
+   ({random(N)>=K} -> {K1=K}; {K1 is K-1}, cons(A)),
+   {N1 is N-1}, randset(K1,N1,As).
+
+
+qc(Filters,X) :- maplist(filter(Lib,X),Filters), filter(Lib,X,in_library).
+
+%% lib_property(-Library, -Property, -Predicate) is nondet.
+%
+%  This predicate manages the mapping from API search fields to RDF
+%  predicates. There are still some questions about:
+%
+%  $ composer : marcrel cmp, arr, lyr
+%  $ performer : marcrel prf, sng, cnd, drt
+:- rdf_meta lib_property(?,+,r).
+lib_property(bl,    collection, dcterms:isPartOf).
+lib_property(bl,    language, dcterms:language).
+lib_property(bl,    language, dc:language).
+lib_property(bl,    place, dcterms:spatial).
+lib_property(ilm,   genre, mo:genre).
+
+% lib_property(beets, date, beets:original_year).
+lib_property(ilm,   date, ilm:release_date).
+lib_property(charm, date, charm:recording_date).
+lib_property(mazurka, date, mazurka:recording_date).
+lib_property(bl,    date, dcterms:created).
+
+lib_property(charm, composer, charm:composer).
+lib_property(mazurka, composer, mazurka:composer).
+lib_property(bl,    composer, marcrel:cmp).
+lib_property(bl,    composer, marcrel:arr).
+lib_property(beets, composer, beets:composer).
+
+lib_property(charm, performer, charm:performer).
+lib_property(mazurka, performer, mazurka:performer).
+lib_property(bl,    performer, marcrel:prf).
+lib_property(bl,    performer, marcrel:sng).
+lib_property(beets, performer, beets:artist).
+lib_property(ilm,   performer, ilm:arist).
+
+lib_property(charm, title, charm:title).
+lib_property(mazurka, title, mazurka:title).
+lib_property(bl,    title, dc:title).
+lib_property(beets, title, beets:title).
+lib_property(ilm,   title, dc:title).
+
+lib_property_search(_,collection,X,   substring(X)).
+lib_property_search(_,place,     X,   substring(X)).
+lib_property_search(_,language,  X,   prefix(X)).
+lib_property_search(_,composer,  X,   prefix(X)).
+lib_property_search(_,performer, X,   prefix(X)).
+lib_property_search(_,title,     X,   substring(X)).
+lib_property_search(beets, date,  L-U, between(L,U1)) :- succ(U,U1). 
+lib_property_search(_Lib,   date,  L-U, between(LA,U1A)) :-
+   % Lib\=beets,
+   succ(U,U1), 
+   atom_number(LA,L), 
+   atom_number(U1A,U1). 
+
+%% filter(+Lib, -Resource, +SearchSpec) is nondet.
+filter(Lib,   _, libraries(Ls))  :- member(Lib,Ls).
+filter(Lib,   X, any(Prop,Vals)) :- member(Val,Vals), filter(Lib,X,Prop,Val).
+filter(Lib,   X, all(Prop,Vals)) :- maplist(filter(Lib,X,Prop),Vals).
+filter(Lib,   X, year(any(Ys)))  :- member(Y,Ys), filter(Lib,X,date,Y-Y).
+filter(Lib,   X, year(L-U))      :- filter(Lib,X,date,L-U).
+filter(Lib,   X, Prop-Val)       :- filter(Lib,X,Prop,Val).
+
+filter(charm, X, in_library)     :- rdf(X,charm:file_name,_,charm_p2r).
+filter(mazurka, X, in_library)   :- rdf(X,mazurka:pid,_,mazurka_p2r).
+filter(bl,    X, in_library)     :- rdf(X,rdf:type,mo:'Signal',bl_p2r).
+filter(ilm,   X, in_library)     :- rdf(X,mo:track_number,_,ilm_p2r).
+filter(beets, X, in_library)     :- rdf(X,rdf:type,mo:'AudioFile',beets_p2r).
+
+%% filter(+Lib, -Resource, +Property, +Value) is nondet.
+
+% filter(beets, X,genre,G) :- rdf_has(X,beets:genre,literal(substring(G),_)).
+filter(ilm,   X,genre,G) :- 
+   rdf(GR,rdfs:label,literal(substring(G),_),ilm_p2r),
+   rdf(GR,rdf:type,mo:'Genre',ilm_p2r),
+   rdf(X,mo:genre,GR).
+
+filter(Lib, X, Prop, Val) :- 
+   lib_property(Lib,Prop,Pred),
+   lib_property_search(Lib,Prop,Val,Search),
+   rdf(X,Pred,literal(Search,_)).
+
+% --------- parsers -----------
+
+% cids(Ids) --> seqmap_with_sep(",",alphanum,Ids).
+cids(Ids) --> semicolon_sep(atom_codes,Ids).
+
+% atoms('*') --> "*", !.
+atoms(AS) --> semicolon_sep(atom_codes,AS1), {maplist(downcase_atom,AS1,AS2),sort(AS2,AS)}.
+whole(A) --> string_without("",Codes), {atom_codes(A1,Codes), downcase_atom(A1,A)}.
+
+% years('*') --> "*", !.
+years(L-U) --> integer(L), "-", integer(U). 
+years(any(Ys)) --> semicolon_sep(number_codes,Ys1), {sort(Ys1,Ys)}.
+
+% alphanum(X) --> string_without(",",S), {atom_string(X,S)}.
+
+item(Conv,Item) --> string_without(";",Codes), {call(Conv,Item,Codes)}.
+semicolon_sep(Conv,Items) -->
+   seqmap_with_sep(";",item(Conv),Items).
+
+
+parse_atom(Phrase,Atom) :- 
+   atom_codes(Atom,Codes), 
+   insist( phrase(Phrase,Codes), parse_failure(Phrase)).
+
+thread_pool:create_pool(vis_recording) :-
+   current_prolog_flag(cpu_count,N),
+   thread_pool_create(vis_recording, N, [backlog(50)]).
author	Daniel Wolff
date	Tue, 09 Feb 2016 21:05:06 +0100
parents
children