Mercurial > hg > dml-open-cliopatria
diff cpack/dml/api/dmlvis.pl @ 0:718306e29690 tip
commiting public release
author | Daniel Wolff |
---|---|
date | Tue, 09 Feb 2016 21:05:06 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpack/dml/api/dmlvis.pl Tue Feb 09 21:05:06 2016 +0100 @@ -0,0 +1,717 @@ +/* Part of DML (Digital Music Laboratory) + Copyright 2014-2015 Samer Abdallah, University of London + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +:- module(dmlvis, + [ cc/3 + , recording_property/3 + ]). + +/** <module> DML Visualisation web service + */ + + +:- use_module(library(http/html_write)). +:- use_module(library(http/html_head)). +:- use_module(library(http/thread_httpd)). +:- use_module(library(http/http_dispatch)). +:- use_module(library(http/http_parameters)). +:- use_module(library(http/http_json)). +:- use_module(library(http/json)). +:- use_module(library(thread_pool)). +:- use_module(library(semweb/rdf_label)). +:- use_module(library(sandbox)). +:- use_module(library(dcg_core)). +:- use_module(library(dcg_pair)). +:- use_module(library(dcg_codes)). +:- use_module(library(dcg_macros)). +:- use_module(library(fileutils)). +:- use_module(library(listutils)). +:- use_module(library(optutils)). +:- use_module(library(swipe)). +:- use_module(library(memo)). +:- use_module(library(rdfutils)). +:- use_module(library(httpfiles)). +:- use_module(library(dataset)). +:- use_module(library(computations), [rows_cols/3]). +:- use_module(library(csvutils), [uri_to_csv/2]). +:- use_module(library(insist)). +:- use_module(library(lambda)). +:- use_module(library(dcg/basics), [string_without//2, integer//1]). +:- use_module(library(solution_sequences)). + +:- use_module(components(audio)). + +:- meta_predicate cc(2,2,1). +:- meta_predicate method_result_cc(+,+,2,1). + +:- set_prolog_flag(double_quotes,codes). +:- set_prolog_flag(back_quotes,string). + +http:location(v1,root(api/v4),[]). + +:- http_handler(v1(getCollectionId), getCollectionId, []). +:- http_handler(v1(getCollectionPerspective), getCollectionPerspective, []). +:- http_handler(v1(getRecordingPerspective), getRecordingPerspective, [spawn(vis_recording)]). +:- http_handler(v1(listCollections), listCollections, []). +:- http_handler(v1(listPlaces), listPlaces, []). +:- http_handler(v1(listPerspectives), listPerspectives, []). +:- http_handler(v1(csv_time_window), csv_time_window, []). + +% ------------------------------------------------------------------------------------ + +%% vis(+Method,+Request) is det. +% +% General VIS API HTTP handler. Returns either JSON or JSONP depending on the +% format parameter. If JSONP is selected (the default), the callback parameter +% determines the Javascript callback function. The JSON result structure is of +% type +% == +% { +% query: string ~'The original query, excluding protocol and host', +% result: (error_struct|A) ~'Result structure A or error information' +% } +% == +% where +% == +% error_struct == { code: integer, decs:string } +% == +% See individual methods for more information about their return types. +vis(Method,Request) :- + get_time(T0), + method_params(Method,Params), + member(request_uri(Query),Request), + vis_parameters(Request, [format(Format), callback(Callback) | Params],ROpts), + (Format=jsonp -> Fmt=jsonp(Callback,ROpts); Fmt=json(ROpts)), + Responder=send_response(Fmt,Query,T0), + debug(dmlvis(method),'~q',[method_result_cc(Method,Params,Responder,writeln)]), + method_result_cc(Method,Params,dmlvis:Responder,error_cont(Query,Responder)). + +send_response(Fmt,Query,T0,Result,Status) :- + get_time(T1), DT=T1-T0, + debug(dmlvis(response),'Sending response after ~3f s to ~q',[DT,Query]), + once(reply_as(Fmt,Status,_{ query:Query, result: Result})). + +error_cont(Q,Cont,Ex) :- + debug(dmlvis(error),'Sending error ~W in response to ~q',[Ex,[quoted(true),max_depth(8)],Q]), + error_result(Ex,Result), + call(Cont,Result,error). + +vis_parameters(Request,Params,ReplyOpts) :- + Params1=[random(_, [optional(true)]) | Params], + http_parameters(Request, Params1, [attribute_declarations(param), form_data(AllParams)]), + findall(N, (member(N=_,AllParams), \+used_param(N,Params1)), Unused), + (select(indent,Unused,Unused1) -> ReplyOpts=[]; ReplyOpts=[width(0)], Unused1=Unused), + (select(dv,Unused1,Unused2) -> true; Unused2=Unused1), + insist(Unused2=[], unrecognised_parameters(Unused2)). + +used_param(N,ValidParams) :- + member(Q,ValidParams), + functor(Q,N,_). + +% result_response(Fmt,Q,Result,Status,Response) :- +% with_output_to(string(Response), +% reply_as(Fmt,Status,_{ query:Q, result: Result})). + +reply_as(json(Opts),Status,Dict) :- !, + write_headers([Status,type(json)]), + json_write_dict(current_output, Dict, Opts). +reply_as(jsonp(Cb,Opts),Status,Dict) :- !, + write_headers([Status,type(jsonp)]), + write(Cb), write('('), + json_write_dict(current_output, Dict, Opts), + write(');'). + + +error_result(Ex, _{ errors:[Err] }) :- error_result1(Ex,Err). + +error_result1(dml_error(Code,Descriptor),_{ code: Code, desc:Descriptor }) :- !. + +error_result1(Ex, _{ code: 500, desc: Msg }) :- + message_to_string(Ex,Msg). + +% ------------------------------------------------------------------------------------ +% Handlers with documentation + +%% getCollectionId(+Request) is det. +% +% Define a collection satisfying given search terms. Result is a collection +% ID, which may be the same as a previously defined collection if it consists +% of the same items. +% +% Parameters are as follows. In all cases, =|sclist(Type)|= denotes a semicolon +% separated list of values of given type OR the string '*', which is the default. +% The value '*' means that no filtering is done on the releveant property. The +% list is interpreted as an AND-list or an OR-list depending on which property +% is being tested. String matching is case insensitive. +% The only required parameter is =|dv|=. +% == +% dv : integer ~ database version +% library : sclist(lib_name) ~ OR-list of libraries to search +% collection : sclist(pattern) ~ OR-list of BL collection name patterns +% composer : sclist(string) ~ OR-list of composer prefixes +% performer : sclist(string) ~ OR-list of performer prefixes +% title : sclist(string) ~ AND-list of title substrings +% genre : sclist(string) ~ OR-list of genre substrings +% place : sclist(string) ~ OR-list of place substrings +% language : sclist(string) ~ OR-list of langauage name prefixes +% year : sclist(year) | range ~ OR-list of genre substrings +% sample : nonneg | 'all' ~ sample a random subset of this size +% sv : integer ~ sample version for sample +% +% lib_name --> 'bl' | 'charm' | 'ilm' | 'mazurka' | 'beets'. +% range --> year, '-', year. +% == +% NB. A =|pattern|= is a string which may contain '*' to match any sequence of +% characters. The pattern must match the WHOLE value being tested, eg to search +% for the substring 'Botswana', you must include '*' at both ends of the pattern, +% eg =|'*botswana*'|=. +% +% A successful result is of type +% == +% result(getCollectionId) == { +% cid: string ~'collection id', +% size: natural ~'number of items in collection' +% }. +% == +getCollectionId(R) :- vis(getCollectionId,R). + +%% getCollectionPerspective(+Request) is det. +% +% Get a given perspective on a previously defined collection. +% Result will depend on the particular perspective chosen. +% All perspectives require the parameter =|cid(string)|=. Most +% perspectives accept the parameters =|recompute(oneof([none,failed,force]))|= +% and =|vamp_on_demand(boolean)|=. +% Defined perspectives are +% +% == +% summary : [] -> { cid:string, size: natural, goal: string }. +% +% list : +% [ limit(natural)/5000, offset(natural)/0, sort_by(oneof([label, date]))/label ] +% -> { cid:string, items: list( {uri:uri, label:string, audio:list(link(audio)), date:string }) }. +% +% midi_pitch_histogram : +% [ weighting(oneof([none,dur,vel]))/none ] +% -> { values:list(integer), counts:list(natural) }. +% +% pitch_histogram : +% [ weighting(oneof([none,dur,vel]))/none, quant(natural)/5, min(integer)/20, max(integer)/100, lang(oneof([ml,r]))/ml ] +% -> { edges:list(nonneg), counts:list(natural) }. +% +% tempo_histogram : +% [ period(nonneg)/1, min(integer)/20, max(integer)/100, num_bins(natural)/50, lang(oneof([ml,r]))/ml ] +% -> { edges:list(nonneg), counts:list(natural) }. +% +% mean_tempo_curve : +% [ num_samples(natural)/20, lang(oneof([ml,r]))/ml ] +% -> { means:list(nonneg), std_devs:list(nonneg) }. +% +% tonic_relative_pitch_class_histogram : [] +% -> { values:list(string), counts:list(nonneg), ok_count:natural, failed:{prolog:list, python:list}}. +% +% pitch_lookup : +% [ midi_pitch(between(0,127)), weighting(oneof([none,dur,vel]))/none, +% limit(natural)/5000, offset(natural)/0 ] +% -> { }. +% +% tonic_histogram : [] -> { }. +% +% pitch_class_histogram : [] -> { }. +% +% tuning_stats : [] -> { }. +% +% tuning_stats_by_year : [] -> { }. +% +% places_hist : [] -> { }. +% +% key_relative_chord_seq : +% [ spm_minlen(natural)/2, spm_maxseqs(natural)/500, spm_algorithm(Alg)/'CM-SPADE', +% spm_ignore_n(natural)/1, spm_maxtime(number)/60, spm_minsupport(number)/50 ] +% -> { }. +% +% similarity : +% [ sim_downsample(number)/1,sim_clusters(number)/40,sim_reclimit(number)/2000, +% sim_type(string)/'euclidean',sim_features(string)/'chromagram', +% sim_compressor(string)/'zlib'] +% -> { }. +% == +getCollectionPerspective(R) :- vis(getCollectionPerspective,R). + +%% getCollectionPairPerspective(+Request) is det. +% +% Get a given comparative perspective on a pair of previously defined collections. +% Result will depend on the particular perspective chosen. Defined perspectives are: +getCollectionPairPerspective(R) :- vis(getCollectionPairPerspective,R). + +%% getMultiCollectionPerspective(+Request) is det. +% Get a given comparative perspective on a set of previously defined collections. +getMultiCollectionPerspective(R) :- vis(getMultiCollectionPerspective,R). + +%% getRecordingPerspective(+Request) is det. +% Get a given perspective for a given recording URI. Perspectives, their parameters and +% their output types are as follows (all perspectives require a uri parameter) +% == +% properties : [] -> { +% library:string, +% title:string, +% composer:list(string), +% performer:list(string), +% genre:list(string), +% place:string, +% language:string, +% recording_date:string +% }. +% +% transcription : [] -> { csv: uri(csv([time,dur,freq,vel,pitch_name])) }. +% transcription_fine : [] -> { csv: uri(csv([time,dur,freq,vel,pitch_name)) }. +% chords : [] -> { csv: uri(csv([time,chord_name])) }. +% chords_notes : [] -> { csv: uri(csv([time,dur,integer)) }. +% key : [] -> { csv: uri(csv([time,integer,string])) }. +% key_tonic : [] -> { csv: uri(csv) }. +% tempo : [] -> { csv: uri(csv([time,nonneg,tempo_string])). +% beats : [] -> { csv: uri(csv([time,tempo_string])) }. +% beatroot : [] -> { csv: uri(csv([time])) }. +% +% tempo_nonuniform : [] -> { times:list(float), values:list(float) }. +% tempo_uniform : [ lang(oneof([ml,r]))/ml ] -> { times:list(float), values:list(float) }. +% tempo_normalised : [ lang(oneof([ml,r]))/ml ] -> { times:list(float), values:list(float) }. +% tempo_histogram : +% [ period(nonneg)/1, num_bins(natural)/50, min(nonneg)/20, max(nonneg)/100, lang(oneof([ml,r]))/ml ] +% -> { edges: list(float), counts: list(natural) }. +% +% midi_pitch_histogram : +% [ weighting(note_weight)/none ] +% -> { values: list(integer), counts: list(natural) }. +% +% pitch_histogram : +% [weighting(note_weight)/none, quant(natural)/5, min(integer)/20, max(integer)/100, lang(oneof([ml,r]))/ml ] +% -> { edges: list(float), counts: list(natural) }. +% +% chord_histogram : [] -> { values:list(string), counts:list(natural) }. +% +% spectrogram : [offset(nonneg)/0, length(nonneg)/60 ] -> { csv: uri }. +% == +% +% Supplementary types: +% == +% note_weight --> 'none'; 'dur'; 'vel'; 'dur*vel'. +% +% time == float. +% dur == nonneg. +% freq == nonneg. +% vel == nonneg. +% == +getRecordingPerspective(R) :- vis(getRecordingPerspective,R). + +%% listCollections(+Request) is det. +% Lists previously defined collections and the Prolog queries that define them. +% == +% listCollections : [] -> { +% collections: list({ cid : string, +% query : string, +% dv : integer, +% size : natural, +% timestamp : string +% }) +% }. +listCollections(R) :- vis(listCollections,R). + +%% listPlaces(+Request) is det. +% Lists known place names, currently the distinct values of the dcterms:spatial predicate. +% == +% listPlaces : [] -> { +% places: list({ name : string +% }) +% }. +listPlaces(R) :- vis(listPlaces,R). + +%% listPerspectives(+Request) is det. +% Lists available perspective for a given method. +listPerspectives(R) :- vis(listPerspectives,R). + +%% csv_time_window(+Request) is det. +% +% Returns the contents of a CSV file between between given time limits, +% assuming the the first column is a time value. +% Also returns the time of the last row. Parameters and return structure are +% == +% csv_time_window : +% [uri(uri(csv)), start(float), end(float) ] +% -> { duration:float, columns: list(list) }. +% == +% URI parameter must be the URI of a CSV file. +% Data is returned a list of lists, where each inner list is one column +% of the CSV. +csv_time_window(R) :- vis(csv_time_window,R). + + +% ------------------------------------------------------------------------------------ +% Implementation of methods + +:- multifile param/2. + +param( format, [oneof([json,jsonp]), default(jsonp), description('Reply format')]). +param( callback, [atom,default(jsonp_cb), description('Callback for jsonp reply')]). +param( dv, [nonneg, optional(false), description('Database version ID')]). + +param( cid, [atom, optional(false), description('Collection ID')]). +param( pid, [atom, optional(false), description('Perspective ID')]). +param( cids, [atom, optional(false), description('Semicolon separated list of Collection IDs')]). +param( uri, [atom, optional(false), description('Item URI')]). +param( library, [atom, default(*), description('Semicolon separated list of libraries to search')]). +param( genre, [atom, default(*), description('Semicolon separated list of genre names, or "*"')]). +param( year, [atom, default(*), description('Semicolon separeted list of release years or a range (Y1-Y2) or "*"')]). +param( composer, [atom, default(*), description('Semicolon separated list of composers or "*"')]). +param( performer, [atom, default(*), description('Semicolon separated list of performers or "*"')]). +param( place, [atom, default(*), description('Semicolon separated list of place names or "*"')]). +param( language, [atom, default(*), description('Semicolon separated list of language name prefixes or "*"')]). +param( collection,[atom, default(*), description('Semicolon separated list of BL collection names or "*"')]). +param( title, [atom, default(*), description('Semicolon separated AND-list of title substrings or "*"')]). +param( method, [atom, optional(false), description('API method name')]). +param( limit, [integer, default(5000), description('Maximum number of things to return')]). +param( offset, [integer, default(0), description('Offset within list')]). +param( sort_by, [oneof([label,date]), default(label), description('Sort recordings list by this property')]). +param( start, [number, optional(false), description('Start of window in seconds')]). +param( end, [number, optional(false), description('End of window in seconds')]). +param( sample, [number, default(all), description('Sample a random subset of this size')]). +param( sv, [number, default(1), description('Version of random subset')]). +param( midi_pitch,[nonneg, default(60), description('MIDI note number 0..127')]). + +method_params(listCollections, []). +method_params(listPlaces, []). +method_params(listPerspectives, [ method(_) ]). +method_params(getCollectionId, [ dv(_), library(_), genre(_), place(_), language(_), year(_), composer(_), + performer(_), collection(_), title(_), sample(_), sv(_) ]). +method_params(csv_time_window, [ uri(_), start(_), end(_) ]). +method_params(Method, [ pid(_) | Params ]) :- + once(perspective(Method,_)), + setof(P, perspective_param_name(Method,P), Ps), + maplist(param_name_term,Ps,Params). + +param_name_term(Name,Term) :- functor(Term,Name,1). +perspective_param_name(Method,Name) :- + perspective(Method,_,Specs,_), + member(S,Specs), + optspec_name(S,Name). + + +:- multifile perspective/4. + +%% cc(+Pred:pred(-A,-cc_status), +Cont:pred(+A,+cc_status), +ErrorCont:pred(+exception)) is det. +% Call a predicate with success and error continuations. +% Pred roduces a result of type A and a status. If it succeeds, the result and status are passed +% to the continuation Cont. Otherwise, an exception is passed to ErrorCont. +cc(Goal,Cont,ErrorCont) :- catch(cc_wrapper(Goal,Cont), Ex, call(ErrorCont,Ex)). +cc_wrapper(Goal,Cont) :- insist(call(Goal,Result,Status)), call(Cont,Result,Status). + +place_name(P) :- rdf(_,dcterms:spatial,literal(P)). + +%% method_result_cc(+Method,+Params:options,+C:success_cont,+E:error_cont) is det. +% +% Perspectives are handle using continuations to allow spawning +% == +% success_cont == pred(+dict,+vis_status). +% error_cont == pred(+exception). +% vis_status ---> stable; unstable. +% == +% Method must be a method id registered in perspective/2 or handled my method_resut/4. +method_result_cc(Method,Params,Cont,ErrorCont) :- + perspective(Method,_), !, + option(pid(PId),Params), + catch(( insist( perspective(Method,PId,Specs,Pred), unknown_perspective(PId)), + insist( maplist(options_optspec(Params),Specs)), + call(Pred,Cont,ErrorCont) + ), Ex, call(ErrorCont,Ex)). + +% all non-perspective methods are handled in this thread +method_result_cc(Method,Params,Cont,ErrorCont) :- + cc( method_result(Method,Params), Cont, ErrorCont). + +%% method_result(+Method, +Opts, -Result, -Status:vis_status) is det. +% +% Handles methods =|listCollections, listPerspectives, getCollectionId, csv_time_window|=. +method_result(listCollections, _, _{ collections:List }, unstable) :- + findall( _{ cid:Id, query:QA, dv:DV, size:SZ, timestamp:TS }, + ( browse(dataset:dataset(dmlvis:Q, DV, Id, SZ, _),comp(_,Timestamp,_)-ok), + format_time(string(TS),'%FT%T%:z',Timestamp), + term_to_atom(Q,QA) + ), + List). + +method_result(listPlaces, _, _{ places:List }, unstable) :- + findall( _{ name:Name, count:N }, aggregate(count,place_name(Name),N), List). + + +method_result(listPerspectives, Opts, _{ perspectives:List }, stable) :- + option(method(Method), Opts), + findall( P, perspective(Method,P), List). + +method_result(getCollectionId, Opts, _{ cid:Id, size:Size, full_size:FullSize }, stable) :- + build_dataset_query(Query,DBV,Size-FullSize,Opts,Remaining), + insist(Remaining=[], unrecognised_search_criteria(Remaining)), + insist(dataset_query_id( Query, DBV, Id),empty_dataset(Query)), + dataset_size(Id, Size). + +method_result(csv_time_window, Opts, _{ duration:Dur, columns:Columns }, stable) :- + maplist(options_optspec(Opts), [ \uri(URI), \start(Start), \end(End) ]), + uri_to_csv(URI,Rows), + insist(Rows\=[], empty_csv(URI)), + + append(_,[EndRow],Rows), + functor(EndRow,_,NumCols), + arg(1,EndRow,Dur), + + numlist(1,NumCols,Is), + drop_while(row_before(Start),Rows,Rows1), + take_while(row_before(End),Rows1,Rows2), + rows_cols(Is,Rows2,Columns). + +row_before(T0,Row) :- arg(1,Row,T), T<T0. + +options_optspec(Opts,+O) :- option(O,Opts). +options_optspec(Opts,O-Def) :- option(O,Opts,Def). +% options_optspec(M,Opts,O>Goal) :- options_optspec(M,Opts,O), call(M:Goal). + +optspec_name(+O,Name) :- functor(O,Name,1). +optspec_name(O-_,Name) :- functor(O,Name,1). +% optspec_name(O>_,Name) :- optspec_name(O,Name). + +perspective(Method,Perspective) :- perspective(Method,Perspective,_,_). + +perspective(getRecordingPerspective, properties, [+uri(URI)], cc(recording_info(URI))). +perspective(getCollectionPerspective, summary, [+cid(C)], cc(collection_summary(C))). +perspective(getCollectionPerspective, list, [+cid(C),limit(Lim)-5000,offset(Off)-0,sort_by(SortBy)-label], cc(collection_list(C,Lim,Off,SortBy))). + +collection_summary(Id,Result,stable) :- + insist(dataset_size(Id,Size), unknown_collection(Id)), + findall( _{goal:GoalA, dv:DV}, ( dataset_query_dv(Id,Goal,DV), + term_to_atom(Goal,GoalA) ), Queries), + Result = _{cid:Id, size:Size, queries:Queries }. + + +collection_list(Id, Lim, Offset, SortBy, _{cid:Id, size:Size, items:Items}, stable) :- + insist(dataset_size(Id,Size), unknown_collection(Id)), + findall(SortVal-Item, dataset_itemx(SortBy,Id,Item,SortVal), KeyedItems), sort(KeyedItems,Sorted), + findall(Item,limit(Lim,offset(Offset,member(_-Item,Sorted))),Items). + +dataset_itemx(SortProp, Id, Itemx, SortVal) :- + dataset_item(Id,URI), + filter(Lib,URI,in_library), + findall(Prop:Val, ( member(Prop,[label,audio,date]), + recording_property(URI,Lib,Prop,Val) ), Props), + (member(SortProp:Val,Props) -> SortVal=just(Val); SortVal=nothing), + dict_create(Itemx,_,[uri:URI | Props]). + + +recording_info(URI, Result, stable) :- + insist(filter(Lib,URI,in_library),unknown_recording(URI)), + findall(Prop:Val, recording_property(URI,Lib,Prop,Val), Props), + dict_create(Result,_,[ library:Lib | Props ]). + +recording_property(URI,Prop,Val) :- + filter(Lib,URI,in_library), + recording_property(URI,Lib,Prop,Val). + +recording_property(URI,_,label,Label) :- + rdf_display_label(URI,Label). +recording_property(URI,_,audio,Links) :- + % !!! HACK: force scraping for BL items here.. + % DISABLED - seems to upset BL server... + % (rdf(URI,dml:blpage,_) -> once(bl_p2r:scrape_audio_link(URI,_)); true), + setof(L,F^audio_link(URI,L,F),Links). +recording_property(URI,Lib,Prop,Val) :- + lib_property_read(Lib, Prop, Reader), + pred_values(URI,Lib:Prop,Reader,Val). + + +pred_values(URI,Lib:Prop,-Converter,Val) :- + lib_property(Lib,Prop,Pred), + once(rdf(URI,Pred,Obj)), + call(Converter,Obj,Val). +pred_values(URI,LibProp,+Converter,Vals) :- + findall(Val, pred_values(URI,LibProp,-Converter,Val), Vals). + +lib_property_read(_, collection, -literal_text). +lib_property_read(_, composer, +literal_text). +lib_property_read(_, performer, +literal_text). +lib_property_read(_, title, -literal_text). +lib_property_read(_, date, -literal_text). +lib_property_read(_, place, -literal_text). +lib_property_read(_, language, +literal_text). +lib_property_read(ilm, genre, +genre_label). + +genre_label(Genre,Label) :- + rdf(Genre,rdfs:label,Lit), + literal_text(Lit,Label). + +% ------------------- DEFINING NEW COLLECTIONS ----------------------- + +build_dataset_query(Query,DBV,Size-FullSize) --> + select_option(dv(DBV)), + select_option(sample(Subset)), + select_option(sv(SV)), + full_query(FQ), + { Subset=all -> Query=FQ, Size=FullSize + ; Query=sample(Subset,SV,FQ), + aggregate_all(count,call(FQ,_),FullSize) + }. + +full_query(qc(Filters)) --> + run_left(seqmap(process_qopt, + [ qo( library, atoms(LS), libraries(LS)) + , qo( collection, atoms(CC), any(collection,CC)) + , qo( title, atoms(TS), all(title,TS)) + , qo( composer, atoms(CS), any(composer,CS)) + , qo( performer, atoms(PS), any(performer,PS)) + , qo( genre, atoms(GS), any(genre,GS)) + , qo( year, years(YS), year(YS)) + , qo( place, atoms(PS), any(place,PS)) + , qo( language, atoms(LNS), any(language,LNS)) + ]), Filters, []). + +process_qopt(qo(OptName,Parser,Filter)) --> + { Opt=..[OptName,OptVal] }, + \> option_default_select(Opt,'*'), + ( {OptVal='*'} -> [] + ; {parse_atom(Parser,OptVal)}, + \< [Filter] + ). + +sample(Size,_,Query,X) :- + setof(X, call(Query,X), All), + length(All,Total), + ( Total=<Size -> Subset=All + ; randset(Size, Total, All, [], Subset) + ), + member(X,Subset). + +randset(0, _, _) --> !. +randset(K, N, [A|As]) --> + ({random(N)>=K} -> {K1=K}; {K1 is K-1}, cons(A)), + {N1 is N-1}, randset(K1,N1,As). + + +qc(Filters,X) :- maplist(filter(Lib,X),Filters), filter(Lib,X,in_library). + +%% lib_property(-Library, -Property, -Predicate) is nondet. +% +% This predicate manages the mapping from API search fields to RDF +% predicates. There are still some questions about: +% +% $ composer : marcrel cmp, arr, lyr +% $ performer : marcrel prf, sng, cnd, drt +:- rdf_meta lib_property(?,+,r). +lib_property(bl, collection, dcterms:isPartOf). +lib_property(bl, language, dcterms:language). +lib_property(bl, language, dc:language). +lib_property(bl, place, dcterms:spatial). +lib_property(ilm, genre, mo:genre). + +% lib_property(beets, date, beets:original_year). +lib_property(ilm, date, ilm:release_date). +lib_property(charm, date, charm:recording_date). +lib_property(mazurka, date, mazurka:recording_date). +lib_property(bl, date, dcterms:created). + +lib_property(charm, composer, charm:composer). +lib_property(mazurka, composer, mazurka:composer). +lib_property(bl, composer, marcrel:cmp). +lib_property(bl, composer, marcrel:arr). +lib_property(beets, composer, beets:composer). + +lib_property(charm, performer, charm:performer). +lib_property(mazurka, performer, mazurka:performer). +lib_property(bl, performer, marcrel:prf). +lib_property(bl, performer, marcrel:sng). +lib_property(beets, performer, beets:artist). +lib_property(ilm, performer, ilm:arist). + +lib_property(charm, title, charm:title). +lib_property(mazurka, title, mazurka:title). +lib_property(bl, title, dc:title). +lib_property(beets, title, beets:title). +lib_property(ilm, title, dc:title). + +lib_property_search(_,collection,X, substring(X)). +lib_property_search(_,place, X, substring(X)). +lib_property_search(_,language, X, prefix(X)). +lib_property_search(_,composer, X, prefix(X)). +lib_property_search(_,performer, X, prefix(X)). +lib_property_search(_,title, X, substring(X)). +lib_property_search(beets, date, L-U, between(L,U1)) :- succ(U,U1). +lib_property_search(_Lib, date, L-U, between(LA,U1A)) :- + % Lib\=beets, + succ(U,U1), + atom_number(LA,L), + atom_number(U1A,U1). + +%% filter(+Lib, -Resource, +SearchSpec) is nondet. +filter(Lib, _, libraries(Ls)) :- member(Lib,Ls). +filter(Lib, X, any(Prop,Vals)) :- member(Val,Vals), filter(Lib,X,Prop,Val). +filter(Lib, X, all(Prop,Vals)) :- maplist(filter(Lib,X,Prop),Vals). +filter(Lib, X, year(any(Ys))) :- member(Y,Ys), filter(Lib,X,date,Y-Y). +filter(Lib, X, year(L-U)) :- filter(Lib,X,date,L-U). +filter(Lib, X, Prop-Val) :- filter(Lib,X,Prop,Val). + +filter(charm, X, in_library) :- rdf(X,charm:file_name,_,charm_p2r). +filter(mazurka, X, in_library) :- rdf(X,mazurka:pid,_,mazurka_p2r). +filter(bl, X, in_library) :- rdf(X,rdf:type,mo:'Signal',bl_p2r). +filter(ilm, X, in_library) :- rdf(X,mo:track_number,_,ilm_p2r). +filter(beets, X, in_library) :- rdf(X,rdf:type,mo:'AudioFile',beets_p2r). + +%% filter(+Lib, -Resource, +Property, +Value) is nondet. + +% filter(beets, X,genre,G) :- rdf_has(X,beets:genre,literal(substring(G),_)). +filter(ilm, X,genre,G) :- + rdf(GR,rdfs:label,literal(substring(G),_),ilm_p2r), + rdf(GR,rdf:type,mo:'Genre',ilm_p2r), + rdf(X,mo:genre,GR). + +filter(Lib, X, Prop, Val) :- + lib_property(Lib,Prop,Pred), + lib_property_search(Lib,Prop,Val,Search), + rdf(X,Pred,literal(Search,_)). + +% --------- parsers ----------- + +% cids(Ids) --> seqmap_with_sep(",",alphanum,Ids). +cids(Ids) --> semicolon_sep(atom_codes,Ids). + +% atoms('*') --> "*", !. +atoms(AS) --> semicolon_sep(atom_codes,AS1), {maplist(downcase_atom,AS1,AS2),sort(AS2,AS)}. +whole(A) --> string_without("",Codes), {atom_codes(A1,Codes), downcase_atom(A1,A)}. + +% years('*') --> "*", !. +years(L-U) --> integer(L), "-", integer(U). +years(any(Ys)) --> semicolon_sep(number_codes,Ys1), {sort(Ys1,Ys)}. + +% alphanum(X) --> string_without(",",S), {atom_string(X,S)}. + +item(Conv,Item) --> string_without(";",Codes), {call(Conv,Item,Codes)}. +semicolon_sep(Conv,Items) --> + seqmap_with_sep(";",item(Conv),Items). + + +parse_atom(Phrase,Atom) :- + atom_codes(Atom,Codes), + insist( phrase(Phrase,Codes), parse_failure(Phrase)). + +thread_pool:create_pool(vis_recording) :- + current_prolog_flag(cpu_count,N), + thread_pool_create(vis_recording, N, [backlog(50)]).