Mercurial > hg > dml-open-cliopatria

/* Part of DML (Digital Music Laboratory)
	Copyright 2014-2015 Samer Abdallah, University of London

	This program is free software; you can redistribute it and/or
	modify it under the terms of the GNU General Public License
	as published by the Free Software Foundation; either version 2
	of the License, or (at your option) any later version.

	This program is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
	GNU General Public License for more details.

	You should have received a copy of the GNU General Public
	License along with this library; if not, write to the Free Software
	Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
*/

:- module(dmlvis,
      [  cc/3
      ,  recording_property/3
      ]).

/** <module> DML Visualisation web service
 */


:- use_module(library(http/html_write)).
:- use_module(library(http/html_head)).
:- use_module(library(http/thread_httpd)).
:- use_module(library(http/http_dispatch)).
:- use_module(library(http/http_parameters)).
:- use_module(library(http/http_json)).
:- use_module(library(http/json)).
:- use_module(library(thread_pool)).
:- use_module(library(semweb/rdf_label)).
:- use_module(library(sandbox)).
:- use_module(library(dcg_core)).
:- use_module(library(dcg_pair)).
:- use_module(library(dcg_codes)).
:- use_module(library(dcg_macros)).
:- use_module(library(fileutils)).
:- use_module(library(listutils)).
:- use_module(library(optutils)).
:- use_module(library(swipe)).
:- use_module(library(memo)).
:- use_module(library(rdfutils)).
:- use_module(library(httpfiles)).
:- use_module(library(dataset)).
:- use_module(library(computations), [rows_cols/3]).
:- use_module(library(csvutils), [uri_to_csv/2]).
:- use_module(library(insist)).
:- use_module(library(lambda)).
:- use_module(library(dcg/basics), [string_without//2, integer//1]).
:- use_module(library(solution_sequences)).

:- use_module(components(audio)).

:- meta_predicate cc(2,2,1).
:- meta_predicate method_result_cc(+,+,2,1).

:- set_prolog_flag(double_quotes,codes).
:- set_prolog_flag(back_quotes,string).

http:location(v1,root(api/v4),[]).

:- http_handler(v1(getCollectionId), getCollectionId, []).
:- http_handler(v1(getCollectionPerspective), getCollectionPerspective, []).
:- http_handler(v1(getRecordingPerspective), getRecordingPerspective, [spawn(vis_recording)]).
:- http_handler(v1(listCollections), listCollections, []).
:- http_handler(v1(listPlaces), listPlaces, []).
:- http_handler(v1(listPerspectives), listPerspectives, []).
:- http_handler(v1(csv_time_window), csv_time_window, []).

% ------------------------------------------------------------------------------------

%% vis(+Method,+Request) is det.
%
%  General VIS API HTTP handler. Returns either JSON or JSONP depending on the
%  format parameter. If JSONP is selected (the default), the callback parameter
%  determines the Javascript callback function. The JSON result structure is of
%  type
%  ==
%  {
%     query:  string ~'The original query, excluding protocol and host',
%     result: (error_struct|A) ~'Result structure A or error information'
%  }
%  ==
%  where
%  ==
%  error_struct == { code: integer, decs:string }
%  ==
%  See individual methods for more information about their return types.
vis(Method,Request) :-
   get_time(T0),
   method_params(Method,Params),
   member(request_uri(Query),Request),
   vis_parameters(Request, [format(Format), callback(Callback) | Params],ROpts),
   (Format=jsonp -> Fmt=jsonp(Callback,ROpts); Fmt=json(ROpts)),
   Responder=send_response(Fmt,Query,T0),
   debug(dmlvis(method),'~q',[method_result_cc(Method,Params,Responder,writeln)]),
   method_result_cc(Method,Params,dmlvis:Responder,error_cont(Query,Responder)).

send_response(Fmt,Query,T0,Result,Status) :-
   get_time(T1), DT=T1-T0,
   debug(dmlvis(response),'Sending response after ~3f s to ~q',[DT,Query]),
   once(reply_as(Fmt,Status,_{ query:Query, result: Result})).

error_cont(Q,Cont,Ex) :-
   debug(dmlvis(error),'Sending error ~W in response to ~q',[Ex,[quoted(true),max_depth(8)],Q]),
   error_result(Ex,Result),
   call(Cont,Result,error).

vis_parameters(Request,Params,ReplyOpts) :-
   Params1=[random(_, [optional(true)]) | Params],
   http_parameters(Request, Params1, [attribute_declarations(param), form_data(AllParams)]),
   findall(N, (member(N=_,AllParams), \+used_param(N,Params1)), Unused),
   (select(indent,Unused,Unused1) -> ReplyOpts=[]; ReplyOpts=[width(0)], Unused1=Unused),
   (select(dv,Unused1,Unused2) -> true; Unused2=Unused1),
   insist(Unused2=[], unrecognised_parameters(Unused2)).

used_param(N,ValidParams) :-
   member(Q,ValidParams),
   functor(Q,N,_).

% result_response(Fmt,Q,Result,Status,Response) :-
%    with_output_to(string(Response),
%       reply_as(Fmt,Status,_{ query:Q, result: Result})).

reply_as(json(Opts),Status,Dict) :- !,
   write_headers([Status,type(json)]),
   json_write_dict(current_output, Dict, Opts).
reply_as(jsonp(Cb,Opts),Status,Dict) :- !,
   write_headers([Status,type(jsonp)]),
   write(Cb), write('('),
   json_write_dict(current_output, Dict, Opts),
   write(');').


error_result(Ex, _{ errors:[Err] }) :- error_result1(Ex,Err).

error_result1(dml_error(Code,Descriptor),_{ code: Code, desc:Descriptor }) :- !.

error_result1(Ex, _{ code: 500, desc: Msg }) :-
   message_to_string(Ex,Msg).

% ------------------------------------------------------------------------------------
% Handlers with documentation

%% getCollectionId(+Request) is det.
%
%  Define a collection satisfying given search terms. Result is a collection
%  ID, which may be the same as a previously defined collection if it consists
%  of the same items.
%
%  Parameters are as follows. In all cases, =|sclist(Type)|= denotes a semicolon
%  separated list of values of given type OR the string '*', which is the default.
%  The value '*' means that no filtering is done on the releveant property. The
%  list is interpreted as an AND-list or an OR-list depending on which property
%  is being tested. String matching is case insensitive.
%  The only required parameter is =|dv|=.
%  ==
%   dv         : integer              ~ database version
%   library    : sclist(lib_name)     ~ OR-list of libraries to search
%   collection : sclist(pattern)      ~ OR-list of BL collection name patterns
%   composer   : sclist(string)       ~ OR-list of composer prefixes
%   performer  : sclist(string)       ~ OR-list of performer prefixes
%   title      : sclist(string)       ~ AND-list of title substrings
%   genre      : sclist(string)       ~ OR-list of genre substrings
%   place      : sclist(string)       ~ OR-list of place substrings
%   language   : sclist(string)       ~ OR-list of langauage name prefixes
%   year       : sclist(year) | range ~ OR-list of genre substrings
%   sample     : nonneg | 'all'       ~ sample a random subset of this size
%   sv         : integer              ~ sample version for sample
%
%  lib_name --> 'bl' | 'charm' | 'ilm' | 'mazurka' | 'beets'.
%  range    --> year, '-', year.
%  ==
%  NB. A =|pattern|= is a string which may contain '*' to match any sequence of
%  characters. The pattern must match the WHOLE value being tested, eg to search
%  for the substring 'Botswana', you must include '*' at both ends of the pattern,
%  eg =|'*botswana*'|=.
%
%  A successful result is of type
%  ==
%  result(getCollectionId) == {
%     cid: string ~'collection id',
%     size: natural ~'number of items in collection'
%  }.
%  ==
getCollectionId(R) :- vis(getCollectionId,R).

%% getCollectionPerspective(+Request) is det.
%
%  Get a given perspective on a previously defined collection.
%  Result will depend on the particular perspective chosen.
%  All perspectives require the parameter =|cid(string)|=. Most
%  perspectives accept the parameters =|recompute(oneof([none,failed,force]))|=
%  and =|vamp_on_demand(boolean)|=.
%  Defined perspectives are
%
%  ==
%  summary : [] -> { cid:string, size: natural, goal: string }.
%
%  list :
%     [ limit(natural)/5000, offset(natural)/0, sort_by(oneof([label, date]))/label ]
%  -> { cid:string, items: list( {uri:uri, label:string, audio:list(link(audio)), date:string }) }.
%
%  midi_pitch_histogram :
%     [ weighting(oneof([none,dur,vel]))/none ]
%  -> { values:list(integer), counts:list(natural) }.
%
%  pitch_histogram :
%     [ weighting(oneof([none,dur,vel]))/none, quant(natural)/5, min(integer)/20, max(integer)/100, lang(oneof([ml,r]))/ml ]
%  -> { edges:list(nonneg), counts:list(natural) }.
%
%  tempo_histogram :
%     [ period(nonneg)/1, min(integer)/20, max(integer)/100, num_bins(natural)/50, lang(oneof([ml,r]))/ml ]
%  -> { edges:list(nonneg), counts:list(natural) }.
%
%  mean_tempo_curve :
%     [ num_samples(natural)/20, lang(oneof([ml,r]))/ml ]
%  -> { means:list(nonneg), std_devs:list(nonneg) }.
%
%  tonic_relative_pitch_class_histogram : []
%  -> { values:list(string), counts:list(nonneg), ok_count:natural, failed:{prolog:list, python:list}}.
%
%  pitch_lookup :
%     [ midi_pitch(between(0,127)), weighting(oneof([none,dur,vel]))/none,
%       limit(natural)/5000, offset(natural)/0 ]
%  -> { }.
%
%  tonic_histogram : [] -> { }.
%
%  pitch_class_histogram : [] -> { }.
%
%  tuning_stats : [] -> { }.
%
%  tuning_stats_by_year : [] -> { }.
%
%  places_hist : [] -> { }.
%
%  key_relative_chord_seq :
%     [  spm_minlen(natural)/2, spm_maxseqs(natural)/500, spm_algorithm(Alg)/'CM-SPADE',
%        spm_ignore_n(natural)/1, spm_maxtime(number)/60, spm_minsupport(number)/50 ]
%  -> { }.
%
%  similarity :
%     [  sim_downsample(number)/1,sim_clusters(number)/40,sim_reclimit(number)/2000,
%        sim_type(string)/'euclidean',sim_features(string)/'chromagram',
%        sim_compressor(string)/'zlib']
%  -> { }.
%  ==
getCollectionPerspective(R) :- vis(getCollectionPerspective,R).

%% getCollectionPairPerspective(+Request) is det.
%
%  Get a given comparative perspective on a pair of previously defined collections.
%  Result will depend on the particular perspective chosen. Defined perspectives are:
getCollectionPairPerspective(R) :- vis(getCollectionPairPerspective,R).

%% getMultiCollectionPerspective(+Request) is det.
%  Get a given comparative perspective on a set of previously defined collections.
getMultiCollectionPerspective(R) :- vis(getMultiCollectionPerspective,R).

%% getRecordingPerspective(+Request) is det.
%  Get a given perspective for a given recording URI. Perspectives, their parameters and
%  their output types are as follows (all perspectives require a uri parameter)
%  ==
%  properties  : [] -> {
%     library:string,
%     title:string,
%     composer:list(string),
%     performer:list(string),
%     genre:list(string),
%     place:string,
%     language:string,
%     recording_date:string
%  }.
%
%  transcription      : [] -> { csv: uri(csv([time,dur,freq,vel,pitch_name])) }.
%  transcription_fine : [] -> { csv: uri(csv([time,dur,freq,vel,pitch_name)) }.
%  chords             : [] -> { csv: uri(csv([time,chord_name])) }.
%  chords_notes       : [] -> { csv: uri(csv([time,dur,integer)) }.
%  key                : [] -> { csv: uri(csv([time,integer,string])) }.
%  key_tonic          : [] -> { csv: uri(csv) }.
%  tempo              : [] -> { csv: uri(csv([time,nonneg,tempo_string])).
%  beats              : [] -> { csv: uri(csv([time,tempo_string])) }.
%  beatroot           : [] -> { csv: uri(csv([time])) }.
%
%  tempo_nonuniform : [] -> { times:list(float), values:list(float) }.
%  tempo_uniform    : [ lang(oneof([ml,r]))/ml ] -> { times:list(float), values:list(float) }.
%  tempo_normalised : [ lang(oneof([ml,r]))/ml ] -> { times:list(float), values:list(float) }.
%  tempo_histogram  :
%     [ period(nonneg)/1, num_bins(natural)/50, min(nonneg)/20, max(nonneg)/100, lang(oneof([ml,r]))/ml  ]
%  -> { edges: list(float), counts: list(natural) }.
%
%  midi_pitch_histogram :
%     [ weighting(note_weight)/none ]
%  -> { values: list(integer), counts: list(natural) }.
%
%  pitch_histogram :
%     [weighting(note_weight)/none, quant(natural)/5, min(integer)/20, max(integer)/100, lang(oneof([ml,r]))/ml ]
%  -> { edges: list(float), counts: list(natural) }.
%
%  chord_histogram : [] -> { values:list(string), counts:list(natural) }.
%
%  spectrogram : [offset(nonneg)/0, length(nonneg)/60 ] -> { csv: uri }.
%  ==
%
%  Supplementary types:
%  ==
%  note_weight --> 'none'; 'dur'; 'vel'; 'dur*vel'.
%
%  time == float.
%  dur  == nonneg.
%  freq == nonneg.
%  vel  == nonneg.
%  ==
getRecordingPerspective(R) :- vis(getRecordingPerspective,R).

%% listCollections(+Request) is det.
%  Lists previously defined collections and the Prolog queries that define them.
%  ==
%  listCollections : [] -> {
%     collections: list({  cid       : string,
%                          query     : string,
%                          dv        : integer,
%                          size      : natural,
%                          timestamp : string
%                       })
%  }.
listCollections(R) :- vis(listCollections,R).

%% listPlaces(+Request) is det.
%  Lists known place names, currently the distinct values of the dcterms:spatial predicate.
%  ==
%  listPlaces : [] -> {
%     places: list({  name      : string
%                  })
%  }.
listPlaces(R) :- vis(listPlaces,R).

%% listPerspectives(+Request) is det.
%  Lists available perspective for a given method.
listPerspectives(R) :- vis(listPerspectives,R).

%% csv_time_window(+Request) is det.
%
%  Returns the contents of a CSV file between between given time limits,
%  assuming the the first column is a time value.
%  Also returns the time of the last row. Parameters and return structure are
%  ==
%  csv_time_window :
%     [uri(uri(csv)), start(float), end(float) ]
%  -> { duration:float, columns: list(list) }.
%  ==
%  URI parameter must be the URI of a CSV file.
%  Data is returned a list of lists, where each inner list is one column
%  of the CSV.
csv_time_window(R) :- vis(csv_time_window,R).


% ------------------------------------------------------------------------------------
% Implementation of methods

:- multifile param/2.

param( format,   [oneof([json,jsonp]), default(jsonp), description('Reply format')]).
param( callback, [atom,default(jsonp_cb), description('Callback for jsonp reply')]).
param( dv,       [nonneg, optional(false), description('Database version ID')]).

param( cid,       [atom, optional(false), description('Collection ID')]).
param( pid,       [atom, optional(false), description('Perspective ID')]).
param( cids,      [atom, optional(false), description('Semicolon separated list of Collection IDs')]).
param( uri,       [atom, optional(false), description('Item URI')]).
param( library,   [atom, default(*), description('Semicolon separated list of libraries to search')]).
param( genre,     [atom, default(*), description('Semicolon separated list of genre names, or "*"')]).
param( year,      [atom, default(*), description('Semicolon separeted list of release years or a range (Y1-Y2) or "*"')]).
param( composer,  [atom, default(*), description('Semicolon separated list of composers or "*"')]).
param( performer, [atom, default(*), description('Semicolon separated list of performers or "*"')]).
param( place,     [atom, default(*), description('Semicolon separated list of place names or "*"')]).
param( language,  [atom, default(*), description('Semicolon separated list of language name prefixes or "*"')]).
param( collection,[atom, default(*), description('Semicolon separated list of BL collection names or "*"')]).
param( title,     [atom, default(*), description('Semicolon separated AND-list of title substrings or "*"')]).
param( method,    [atom, optional(false), description('API method name')]).
param( limit,     [integer, default(5000), description('Maximum number of things to return')]).
param( offset,    [integer, default(0), description('Offset within list')]).
param( sort_by,   [oneof([label,date]), default(label), description('Sort recordings list by this property')]).
param( start,     [number, optional(false), description('Start of window in seconds')]).
param( end,       [number, optional(false), description('End of window in seconds')]).
param( sample,    [number, default(all), description('Sample a random subset of this size')]).
param( sv,        [number, default(1), description('Version of random subset')]).
param( midi_pitch,[nonneg, default(60), description('MIDI note number 0..127')]).

method_params(listCollections,  []).
method_params(listPlaces,  []).
method_params(listPerspectives, [ method(_) ]).
method_params(getCollectionId,  [ dv(_), library(_), genre(_), place(_), language(_), year(_), composer(_),
                                 performer(_), collection(_), title(_), sample(_), sv(_) ]).
method_params(csv_time_window,  [ uri(_), start(_), end(_) ]).
method_params(Method, [ pid(_) | Params ]) :-
   once(perspective(Method,_)),
   setof(P, perspective_param_name(Method,P), Ps),
   maplist(param_name_term,Ps,Params).

param_name_term(Name,Term) :- functor(Term,Name,1).
perspective_param_name(Method,Name) :-
   perspective(Method,_,Specs,_),
   member(S,Specs),
   optspec_name(S,Name).


:- multifile perspective/4.

%% cc(+Pred:pred(-A,-cc_status), +Cont:pred(+A,+cc_status), +ErrorCont:pred(+exception)) is det.
%  Call a predicate with success and error continuations.
%  Pred roduces a result of type A and a status. If it succeeds, the result and status are passed
%  to the continuation Cont. Otherwise, an exception is passed to ErrorCont.
cc(Goal,Cont,ErrorCont) :- catch(cc_wrapper(Goal,Cont), Ex, call(ErrorCont,Ex)).
cc_wrapper(Goal,Cont) :- insist(call(Goal,Result,Status)), call(Cont,Result,Status).

place_name(P) :- rdf(_,dcterms:spatial,literal(P)).

%% method_result_cc(+Method,+Params:options,+C:success_cont,+E:error_cont) is det.
%
% Perspectives are handle using continuations to allow spawning
% ==
% success_cont == pred(+dict,+vis_status).
% error_cont == pred(+exception).
% vis_status ---> stable; unstable.
% ==
% Method must be a method id registered in perspective/2 or handled my method_resut/4.
method_result_cc(Method,Params,Cont,ErrorCont) :-
   perspective(Method,_), !,
   option(pid(PId),Params),
   catch((  insist( perspective(Method,PId,Specs,Pred), unknown_perspective(PId)),
            insist( maplist(options_optspec(Params),Specs)),
            call(Pred,Cont,ErrorCont)
         ), Ex, call(ErrorCont,Ex)).

% all non-perspective methods are handled in this thread
method_result_cc(Method,Params,Cont,ErrorCont) :-
   cc( method_result(Method,Params), Cont, ErrorCont).

%% method_result(+Method, +Opts, -Result, -Status:vis_status) is det.
%
%  Handles methods =|listCollections, listPerspectives, getCollectionId, csv_time_window|=.
method_result(listCollections, _, _{ collections:List }, unstable) :-
   findall( _{ cid:Id, query:QA, dv:DV, size:SZ, timestamp:TS },
            (  browse(dataset:dataset(dmlvis:Q, DV, Id, SZ, _),comp(_,Timestamp,_)-ok),
               format_time(string(TS),'%FT%T%:z',Timestamp),
               term_to_atom(Q,QA)
            ),
            List).

method_result(listPlaces, _, _{ places:List }, unstable) :-
   findall( _{ name:Name, count:N }, aggregate(count,place_name(Name),N), List).


method_result(listPerspectives, Opts, _{ perspectives:List }, stable) :-
   option(method(Method), Opts),
   findall( P, perspective(Method,P), List).

method_result(getCollectionId, Opts, _{ cid:Id, size:Size, full_size:FullSize }, stable) :-
   build_dataset_query(Query,DBV,Size-FullSize,Opts,Remaining),
   insist(Remaining=[], unrecognised_search_criteria(Remaining)),
   insist(dataset_query_id( Query, DBV, Id),empty_dataset(Query)),
   dataset_size(Id, Size).

method_result(csv_time_window, Opts, _{ duration:Dur, columns:Columns }, stable) :-
   maplist(options_optspec(Opts), [ \uri(URI), \start(Start), \end(End) ]),
   uri_to_csv(URI,Rows),
   insist(Rows\=[], empty_csv(URI)),

   append(_,[EndRow],Rows),
   functor(EndRow,_,NumCols),
   arg(1,EndRow,Dur),

   numlist(1,NumCols,Is),
   drop_while(row_before(Start),Rows,Rows1),
   take_while(row_before(End),Rows1,Rows2),
   rows_cols(Is,Rows2,Columns).

row_before(T0,Row) :- arg(1,Row,T), T<T0.

options_optspec(Opts,+O) :- option(O,Opts).
options_optspec(Opts,O-Def) :- option(O,Opts,Def).
% options_optspec(M,Opts,O>Goal) :- options_optspec(M,Opts,O), call(M:Goal).

optspec_name(+O,Name) :- functor(O,Name,1).
optspec_name(O-_,Name) :- functor(O,Name,1).
% optspec_name(O>_,Name) :- optspec_name(O,Name).

perspective(Method,Perspective) :- perspective(Method,Perspective,_,_).

perspective(getRecordingPerspective,  properties, [+uri(URI)], cc(recording_info(URI))).
perspective(getCollectionPerspective, summary,    [+cid(C)], cc(collection_summary(C))).
perspective(getCollectionPerspective, list,       [+cid(C),limit(Lim)-5000,offset(Off)-0,sort_by(SortBy)-label], cc(collection_list(C,Lim,Off,SortBy))).

collection_summary(Id,Result,stable) :-
   insist(dataset_size(Id,Size), unknown_collection(Id)),
   findall( _{goal:GoalA, dv:DV}, ( dataset_query_dv(Id,Goal,DV),
                                    term_to_atom(Goal,GoalA)     ), Queries),
   Result = _{cid:Id, size:Size, queries:Queries }.


collection_list(Id, Lim, Offset, SortBy, _{cid:Id, size:Size, items:Items}, stable) :-
   insist(dataset_size(Id,Size), unknown_collection(Id)),
   findall(SortVal-Item, dataset_itemx(SortBy,Id,Item,SortVal), KeyedItems), sort(KeyedItems,Sorted),
   findall(Item,limit(Lim,offset(Offset,member(_-Item,Sorted))),Items).

dataset_itemx(SortProp, Id, Itemx, SortVal) :-
   dataset_item(Id,URI),
   filter(Lib,URI,in_library),
   findall(Prop:Val, (  member(Prop,[label,audio,date]),
                        recording_property(URI,Lib,Prop,Val) ), Props),
   (member(SortProp:Val,Props) -> SortVal=just(Val); SortVal=nothing),
   dict_create(Itemx,_,[uri:URI | Props]).


recording_info(URI, Result, stable) :-
   insist(filter(Lib,URI,in_library),unknown_recording(URI)),
   findall(Prop:Val, recording_property(URI,Lib,Prop,Val), Props),
   dict_create(Result,_,[ library:Lib | Props ]).

recording_property(URI,Prop,Val) :-
   filter(Lib,URI,in_library),
   recording_property(URI,Lib,Prop,Val).

recording_property(URI,_,label,Label) :-
   rdf_display_label(URI,Label).
recording_property(URI,_,audio,Links) :-
   % !!! HACK: force scraping for BL items here..
   % DISABLED - seems to upset BL server...
   % (rdf(URI,dml:blpage,_) -> once(bl_p2r:scrape_audio_link(URI,_)); true),
   setof(L,F^audio_link(URI,L,F),Links).
recording_property(URI,Lib,Prop,Val) :-
   lib_property_read(Lib, Prop, Reader),
   pred_values(URI,Lib:Prop,Reader,Val).


pred_values(URI,Lib:Prop,-Converter,Val) :-
   lib_property(Lib,Prop,Pred),
   once(rdf(URI,Pred,Obj)),
   call(Converter,Obj,Val).
pred_values(URI,LibProp,+Converter,Vals) :-
   findall(Val, pred_values(URI,LibProp,-Converter,Val), Vals).

lib_property_read(_, collection, -literal_text).
lib_property_read(_, composer,   +literal_text).
lib_property_read(_, performer,  +literal_text).
lib_property_read(_, title,      -literal_text).
lib_property_read(_, date,       -literal_text).
lib_property_read(_, place,      -literal_text).
lib_property_read(_, language,   +literal_text).
lib_property_read(ilm, genre,    +genre_label).

genre_label(Genre,Label) :-
   rdf(Genre,rdfs:label,Lit),
   literal_text(Lit,Label).

% ------------------- DEFINING NEW COLLECTIONS -----------------------

build_dataset_query(Query,DBV,Size-FullSize) -->
   select_option(dv(DBV)),
   select_option(sample(Subset)),
   select_option(sv(SV)),
   full_query(FQ),
   {  Subset=all -> Query=FQ, Size=FullSize
   ;  Query=sample(Subset,SV,FQ),
      aggregate_all(count,call(FQ,_),FullSize)
   }.

full_query(qc(Filters)) -->
   run_left(seqmap(process_qopt,
                   [  qo( library,    atoms(LS), libraries(LS))
                   ,  qo( collection, atoms(CC), any(collection,CC))
                   ,  qo( title,      atoms(TS), all(title,TS))
                   ,  qo( composer,   atoms(CS), any(composer,CS))
                   ,  qo( performer,  atoms(PS), any(performer,PS))
                   ,  qo( genre,      atoms(GS), any(genre,GS))
                   ,  qo( year,       years(YS), year(YS))
                   ,  qo( place,      atoms(PS), any(place,PS))
                   ,  qo( language,   atoms(LNS), any(language,LNS))
                   ]), Filters, []).

process_qopt(qo(OptName,Parser,Filter)) -->
   { Opt=..[OptName,OptVal] },
   \> option_default_select(Opt,'*'),
   (  {OptVal='*'} -> []
   ;  {parse_atom(Parser,OptVal)},
      \< [Filter]
   ).

sample(Size,_,Query,X) :-
   setof(X, call(Query,X), All),
   length(All,Total),
   (  Total=<Size -> Subset=All
   ;  randset(Size, Total, All, [], Subset)
   ),
   member(X,Subset).

randset(0, _, _) --> !.
randset(K, N, [A|As]) -->
   ({random(N)>=K} -> {K1=K}; {K1 is K-1}, cons(A)),
   {N1 is N-1}, randset(K1,N1,As).


qc(Filters,X) :- maplist(filter(Lib,X),Filters), filter(Lib,X,in_library).

%% lib_property(-Library, -Property, -Predicate) is nondet.
%
%  This predicate manages the mapping from API search fields to RDF
%  predicates. There are still some questions about:
%
%  $ composer : marcrel cmp, arr, lyr
%  $ performer : marcrel prf, sng, cnd, drt
:- rdf_meta lib_property(?,+,r).
lib_property(bl,    collection, dcterms:isPartOf).
lib_property(bl,    language, dcterms:language).
lib_property(bl,    language, dc:language).
lib_property(bl,    place, dcterms:spatial).
lib_property(ilm,   genre, mo:genre).

% lib_property(beets, date, beets:original_year).
lib_property(ilm,   date, ilm:release_date).
lib_property(charm, date, charm:recording_date).
lib_property(mazurka, date, mazurka:recording_date).
lib_property(bl,    date, dcterms:created).

lib_property(charm, composer, charm:composer).
lib_property(mazurka, composer, mazurka:composer).
lib_property(bl,    composer, marcrel:cmp).
lib_property(bl,    composer, marcrel:arr).
lib_property(beets, composer, beets:composer).

lib_property(charm, performer, charm:performer).
lib_property(mazurka, performer, mazurka:performer).
lib_property(bl,    performer, marcrel:prf).
lib_property(bl,    performer, marcrel:sng).
lib_property(beets, performer, beets:artist).
lib_property(ilm,   performer, ilm:arist).

lib_property(charm, title, charm:title).
lib_property(mazurka, title, mazurka:title).
lib_property(bl,    title, dc:title).
lib_property(beets, title, beets:title).
lib_property(ilm,   title, dc:title).

lib_property_search(_,collection,X,   substring(X)).
lib_property_search(_,place,     X,   substring(X)).
lib_property_search(_,language,  X,   prefix(X)).
lib_property_search(_,composer,  X,   prefix(X)).
lib_property_search(_,performer, X,   prefix(X)).
lib_property_search(_,title,     X,   substring(X)).
lib_property_search(beets, date,  L-U, between(L,U1)) :- succ(U,U1).
lib_property_search(_Lib,   date,  L-U, between(LA,U1A)) :-
   % Lib\=beets,
   succ(U,U1),
   atom_number(LA,L),
   atom_number(U1A,U1).

%% filter(+Lib, -Resource, +SearchSpec) is nondet.
filter(Lib,   _, libraries(Ls))  :- member(Lib,Ls).
filter(Lib,   X, any(Prop,Vals)) :- member(Val,Vals), filter(Lib,X,Prop,Val).
filter(Lib,   X, all(Prop,Vals)) :- maplist(filter(Lib,X,Prop),Vals).
filter(Lib,   X, year(any(Ys)))  :- member(Y,Ys), filter(Lib,X,date,Y-Y).
filter(Lib,   X, year(L-U))      :- filter(Lib,X,date,L-U).
filter(Lib,   X, Prop-Val)       :- filter(Lib,X,Prop,Val).

filter(charm, X, in_library)     :- rdf(X,charm:file_name,_,charm_p2r).
filter(mazurka, X, in_library)   :- rdf(X,mazurka:pid,_,mazurka_p2r).
filter(bl,    X, in_library)     :- rdf(X,rdf:type,mo:'Signal',bl_p2r).
filter(ilm,   X, in_library)     :- rdf(X,mo:track_number,_,ilm_p2r).
filter(beets, X, in_library)     :- rdf(X,rdf:type,mo:'AudioFile',beets_p2r).

%% filter(+Lib, -Resource, +Property, +Value) is nondet.

% filter(beets, X,genre,G) :- rdf_has(X,beets:genre,literal(substring(G),_)).
filter(ilm,   X,genre,G) :-
   rdf(GR,rdfs:label,literal(substring(G),_),ilm_p2r),
   rdf(GR,rdf:type,mo:'Genre',ilm_p2r),
   rdf(X,mo:genre,GR).

filter(Lib, X, Prop, Val) :-
   lib_property(Lib,Prop,Pred),
   lib_property_search(Lib,Prop,Val,Search),
   rdf(X,Pred,literal(Search,_)).

% --------- parsers -----------

% cids(Ids) --> seqmap_with_sep(",",alphanum,Ids).
cids(Ids) --> semicolon_sep(atom_codes,Ids).

% atoms('*') --> "*", !.
atoms(AS) --> semicolon_sep(atom_codes,AS1), {maplist(downcase_atom,AS1,AS2),sort(AS2,AS)}.
whole(A) --> string_without("",Codes), {atom_codes(A1,Codes), downcase_atom(A1,A)}.

% years('*') --> "*", !.
years(L-U) --> integer(L), "-", integer(U).
years(any(Ys)) --> semicolon_sep(number_codes,Ys1), {sort(Ys1,Ys)}.

% alphanum(X) --> string_without(",",S), {atom_string(X,S)}.

item(Conv,Item) --> string_without(";",Codes), {call(Conv,Item,Codes)}.
semicolon_sep(Conv,Items) -->
   seqmap_with_sep(";",item(Conv),Items).


parse_atom(Phrase,Atom) :-
   atom_codes(Atom,Codes),
   insist( phrase(Phrase,Codes), parse_failure(Phrase)).

thread_pool:create_pool(vis_recording) :-
   current_prolog_flag(cpu_count,N),
   thread_pool_create(vis_recording, N, [backlog(50)]).
author	Daniel Wolff
date	Tue, 09 Feb 2016 21:05:06 +0100
parents
children