view cpack/dml/lib/kerndata.pl @ 0:718306e29690 tip

commiting public release
author Daniel Wolff
date Tue, 09 Feb 2016 21:05:06 +0100
parents
children
line wrap: on
line source
/* Part of DML (Digital Music Laboratory)
	Copyright 2014-2015 Samer Abdallah, University of London
	 
	This program is free software; you can redistribute it and/or
	modify it under the terms of the GNU General Public License
	as published by the Free Software Foundation; either version 2
	of the License, or (at your option) any later version.

	This program is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
	GNU General Public License for more details.

	You should have received a copy of the GNU General Public
	License along with this library; if not, write to the Free Software
	Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
*/

:- module(kerndata,
      [  uri_spine_pitches/3
      ,  uri_spine_events/4
      ,  uri_opus/2
      ,  events_nth_phrase/3
      ,  dataset_size/2
      ,  dataset_items/2
      ,  dataset_sequences/2
      ,  dataset_random_subsets/4
      ,  item_sequence/2
      ,  item_opus/2
      ,  item_uri/2
      ,  metpos/4
      ,  with_kern_module/3
      ,  fileset/1
      ]).

/** <module> Managings sets of Kern scores

   This module provides tools for defining and working with sets of musical
   fragments extracted from Humdrum/Kern scores. Sets of files are defined by 
   searches in the RDF database. Then, a small language of operations can be
   used to extract given spines and/or break melodic sequences into phrases.

   A named fileset is a term (the name) associated with a file set specification.
   A fileset specification is a nondeterministic binary predicate which yields
   both a Humdrum file URI and a spine number. An alternative way to refer to a
   file set is anonymously, as a term \Spec. A fileset, then is either a named
   fileset or an anonymous fileset:
   ==
   anonymous_filset ---> \fileset_spec.
        fileset_spec == pred(-uri, -spine_spec).
       named_fileset == term.
             fileset == named_fileset | anonymous_fileset.
               spine == natural. % {1,2,...}

      spine_spec ---> extract(spine)
                    ; trace(spine)
                    ; merge
                    ; all.
   ==
   Named filesets are defined using the multifile predicate fileset/2.

   Some predicates are available for convenient construction of fileset_specs:
   ==
   with_spine( +S:spine, +Finder:pred(-uri), -URI:uri, -S:spine_spec) is nondet.
   trace_spine( +S:spine, +Finder:pred(-uri), -URI:uri, -S:spine_spec) is nondet.
   merge_spines( +Finder:pred(-uri), -URI:uri, -S:spine_spec) is nondet.
   all_spines( +Finder:pred(-uri), -URI:uri, -S:spine_spec) is nondet.

   kern_uri( +FS:find_spec, -URI:uri) is nondet.

   find_spec ---> (find_spec ; find_spec) % set union
                ; (find_spec , find_spec) % set intersection
                ; under(path)             % recursive directory search
                ; in(path)                % single directory search
                ; uri=object.             % match on RDF property

     path == atom. % a path relative to root of Humdrum file library.
   object == uri | literal | literal_search_spec. 

   literal == { lang(atom,atom), type(uri,atom) } | atom.
   literal_search_spec ---> literal(literal_query,atom).
   literal_query ---> plain(atom) 
                    ; exact(atom)
                    ; substring(atom)
                    ; word(atom)
                    ; prefix(atom)
                    ; ge(atom)
                    ; le(atom)
                    ; between(atom,atom)
                    ; like(atom).
   ==
   Thus, as a fileset_spec, the term =|with_spine(S,Finder)|= yields the Sth spine
   of each URI produced by Finder. =|all_spines(Finder)|= yields all spines of each file
   produced by Finder. See rdf/3 for more information about literal query terms.

   Datasets are defined in terms of filesets:
   ==
   fileset :< dataset. % any fileset is also a dataset: each member yields one sequence
   phrases(fileset) :< dataset. % denotes the set of phrases segmented from fileset.
   extract(DS:dataset, Size:nonneg, Offset:nonneg)       :: dataset.
   subset(DS:dataset, Size:nonneg, Indices:list(nonneg)) :: dataset.
   subset_ref(DS:dataset, Size:nonneg, SubsetRef:atom)   :: dataset.
   null :: dataset.
   ==
   
   Each item in a dataset is represented by a term of the following type:
   ==
   dataset_item ---> whole(uri,spine) % Humdrum URI and spine number
                   ; phrase(uri,spine,natural). % URI, spine and phrase number
                   ; merged(uri). % all spines, highest note of chords
   ==
   Items can be obtained using dataset_items/2.
   Each item corresponds to a list of note numbers, which can be obtained using
   item_sequence/2, or dataset_sequences/2.

*/

:- meta_predicate with_kern_module(+,-,0).

:- use_module(library(memo)).
:- use_module(library(dcg_core)).
:- use_module(library(snobol)).
:- use_module(library(listutils)).
:- use_module(library(fileutils)).
:- use_module(library(termutils)).
:- use_module(library(insist)).
:- use_module(library(humdrum)).
:- use_module(library(humdrum/kern)).
:- use_module(library(humdrum/dynam)).
:- use_module(library(humdrum/humdrum_world)).
:- use_module(library(humdrum/kernutils)).
:- use_module(library(lambda)).
:- use_module(library(semweb/rdf_db)).
:- use_module(library(humdrum_p2r), [hum_uri_path/2]).
:- use_module(library(dataset), [random_subset/4]).

:- set_prolog_flag(double_quotes, codes).
:- initialization memo_attach(memo(kerndata),[]).

:- dynamic item_sequence/2.
:- multifile fileset/2.

%% fileset(-FS:named_fileset) is nondet.
%  True when FS identifies an explicitly declared Kern file-set.
fileset(FS) :- fileset(FS,Spec), FS \= \Spec.

%% fileset(+FS:named_fileset, -Spec:fileset_spec) is semidet.
%% fileset(-FS:named_fileset, -Spec:fileset_spec) is nondet.
%
%  Database of named filesets and their fileset specifiers.
fileset(lorraine,    with_spine(1,kern_uri(in('lorraine')))).
fileset(nova_scotia, with_spine(1,kern_uri(in('classical/songs/unaccompanied/nova-scotia/kern')))).
fileset(ireland,     with_spine(1,kern_uri(in('ireland')))).
fileset(chorales,    with_spine(1,kern_uri(in('classical/bach/chorales')))).
fileset(chorales371, with_spine(4,kern_uri(in('classical/bach/371chorales')))).
fileset(bach_cello,  with_spec(merge,kern_uri(in('classical/bach/cello')))).
fileset(bach_violin, with_spec(merge,kern_uri(in('classical/bach/violin')))).
fileset(beethoven_quartets, all_spines(kern_uri(in('classical/beethoven/quartet')))).
fileset(essen,       with_spine(1,kern_uri(under('essen/europe')))).
fileset(essen(china),with_spine(1,kern_uri(under('essen/china')))).
fileset(essen(Rgn),  with_spine(1,kern_uri(under(Dir)))) :-
   member(Rgn,[germany,rossiya,magyar,elsass,lothring,nederlan,schweiz,oesterrh,jugoslav]),
   atom_concat('essen/europe/',Rgn,Dir).
fileset(essen(germany,Sub),  with_spine(1,kern_uri(under(Dir)))) :-
   member(Sub,[dva,allerkbd,kinder,altdeu1,altdeu2,fink,zuccal,ballad,boehme,erk]),
   atom_concat('essen/europe/germany/',Sub,Dir).


%% uri_opus(+URI:uri,-Opus:atom) is semidet.
%% uri_opus(-URI:uri,-Opus:atom) is nondet.
%  Relation between Humdrum file URIs and their Humdrum SCT property.
uri_opus(URI,Opus) :- rdf(URI,hum:'refcode/SCT',literal(Opus)).

% :- volatile_memo file_opus( +file:atom, -opus:atom).
% file_opus(File,Opus) :-
%    with_kern_module(File, utf8, Mod, (Mod:ref('SCT',_,Opus) -> true; Opus=unknown(_))).

%% item_opus(+Item:dataset_item, -Opus:atom) is semidet.
%  Relation between a dataset item and the opus of its associated Humdrum file.
item_opus(Item,Opus) :- item_uri(Item,URI), uri_opus(URI,Opus).

%% item_uri(+Item:dataset_item, -URI:uri) is semidet.
%  Relation between a dataset item and the URI of its associated Humdrum file.
item_uri(phrase(URI,_,_),URI).
item_uri(whole(URI,_),URI).
item_uri(merged(URI),URI).


%% dataset_size(+DS:dataset, -N:nonneg) is det.
%  Get the size of a given dataset. This predicate is persistently memoised,
%  since computing the size of a large dataset may involve reading many Humdrum
%  files.

:- persistent_memo dataset_size( +ground, -nonneg).

dataset_size(Dataset,NumItems) :-
   eval_dataset(Dataset,Items),
   length(Items,NumItems).

%% dataset_items(+DS:dataset, -S:list(dataset_item)) is det.
dataset_items(DS,Items) :- eval_dataset(DS,Items).

%% dataset_sequences(+DS:dataset, -S:list(list(integer))) is det.
dataset_sequences(DS,SS) :-
   dataset_items(DS,Items),
   maplist(item_sequence,Items,SS).

%% eval_dataset(+DS:dataset, -Items:list(dataset_item)) is det.
eval_dataset(DS,_) :- must_be(ground,DS), fail.
eval_dataset(extract(DS,N,I), D2) :- !,
   eval_dataset(DS, D1),
   drop(I, D1, D3),
   take(N, D3, D2).

eval_dataset(subset_ref(DS,N,Ref), D2) :- !,
   dataset_size(DS,Size),
   random_subset(Size,N,Ref,IX),
   eval_dataset(subset(DS,N,IX), D2).

eval_dataset(subset(DS,N,IX), D2) :- !,
   eval_dataset(DS, D1),
   length(IX,N),
   maplist(\I^X^nth1(I,D1,X), IX, D2).

eval_dataset(null,[]).
eval_dataset(DS,Items) :-
   fileset_items(DS,Items).

:- thread_local item_/1.
:- volatile_memo fileset_items( +ds:ground, -items:list(ground)).
%% fileset_items(+FS, -Items:list(dataset_item)) is det.
%  FS can be a fileset or a term =|phrases(Fileset)|=, denoting the set of
%  sequence resulting from segmenting each item in Fileset into phrases.
fileset_items(DS, Items) :-
   setup_call_cleanup(
      load(DS),
      findall(Item,item_(Item),Items),
      retractall(item_(_))),
   insist(Items\=[]).

load(phrases(FS)) :- fileset(FS,Spec), forall_kern(Spec,assert_phrases).
load(FS)          :- fileset(FS,Spec), forall_kern(Spec,assert_whole).

%% item_sequence(+Item:dataset_item, -Seq:list(integer)) is det.
%  True when Seq is the list of note numbers for a given item.

%% spine_nth_phrase(+Mod:module,+S:spine,-N:natural,-P:list(integer)) is nondet.
%
%  True when P is the list of note numbers in the Nth phrase on spine S in the
%  Humdrum file represented in module Mod. Phrases can be defined in two ways:
%     1. Using explicit Kern phrase marks
%     2. Using notes annotated with a pause mark to determine phrase endings.
spine_nth_phrase(Mod,Spine,N,Phrase) :-
   Mod:spine(Spine),
   (  once(spine_phrase(Mod,Spine,_,_))
   -> % spine has explicit phrases markings
      kern_get_events(event_or_delim(spine_notenum,Mod,Spine), Events),
      pairs_values(Events,Events1),
      bagof(NNs, L^phrase(contains_group(L,NNs),Events1), Phrases),
      nth1(N,Phrases,Phrase)
   ;  % no phrase markings - use pauses
      kern_get_events(event_or_pause(spine_notenum,Mod,Spine), Events),
      events_nth_phrase(Events, N, Phrase)
   ).

assert_whole(URI,Mod,Spec) :-
   (Spec=extract(Spine); Spec=all),
   forall( kern_get_events(spine_notenum(Mod,Spine), Events),
           ( assert_id(item_sequence(whole(URI,Spine), Events)),
             assert(item_(whole(URI,Spine))))).

assert_whole(URI,Mod,merge) :-
   forall( kern_get_events(top_notenum(Mod), Events),
           ( assert_id(item_sequence(merged(URI), Events)),
             assert(item_(merged(URI))))).

assert_phrases(URI,Mod,Spec) :-
   (Spec=extract(Spine); Spec=all),
   forall(  spine_nth_phrase(Mod,Spine,N,Phrase),
            ( assert_id(item_sequence(phrase(URI,Spine,N), Phrase)),
              assert(item_(phrase(URI,Spine,N))))).

assert_id(Fact) :- call(Fact) -> true; assert(Fact).


contains_group(D,XX) -->  arb, group(D,XX,[]), rem.

group(D,X1,X2) -->
   delimiter(D,open),
   iterate(content(D),X1,X2),
   delimiter(D,close).

content(_,[X|XX],XX) --> [event(X)].
content(D,XX,XX) --> {dif(D,D1)}, delimiter(D1,_).
content(D,X1,X2) --> group(D,X1,X2).
delimiter(D,T) --> [T/D].


%% dataset_random_subsets(+DS:dataset,+K:nonneg,+L:nonneg,-DSX:list(dataset)) is det.
%
%  Returns L random subsets of K sequences each from dataset DS.
%  K is the size of each random subset. L is the number of subsets
%  returned. Each subset is a term of the form =|subset(DS,K,Indices)|=.
dataset_random_subsets(DS,K,L,DSX) :-
   dataset_size(DS,N),
   numlist(1,L,SubsetIndices),
   maplist(\I^subset(DS,K,S)^random_subset(N,K,I,S),SubsetIndices,DSX).


%% uri_spine_pitches(+URI:uri,+S:nonneg,-P:list(pitch)) is det.
%% uri_spine_pitches(+URI:uri,-S:nonneg,-P:list(pitch)) is nondet.
%
%  True when Pis a list of the pitches of notes in a spine S
%  of the Humdrum file pointed to by URI.
uri_spine_pitches(URI,Spine,Pitches) :-
   with_kern_module(URI, Mod,
      kern_get_events(spine_pitch(Mod,Spine), Pitches)).

%% uri_spine_events(+URI:uri,+G:getter,+S:spine,-E:list(event(A))) is det.
%% uri_spine_events(+URI:uri,+G:getter,-S:spine,-E:list(event(A))) is nondet.
%
% Get list of events from a given spine in a Humdrum file. The kind of events extracted
% depends on the getter predicate G, which must be of type:
% ==
% getter(A) == pred(+M:module,-S:spine,-T:time,-E:event(A)).
% event(A) ---> event(A) ; pause.
% ==

uri_spine_events(URI,Getter,Spine,Events) :-
   with_kern_module(URI, Mod,
      kern_get_events(event_or_pause(Getter,Mod,Spine), Events)).

:- meta_predicate forall_kern(2,3).
forall_kern(Findspec,Pred) :-
   with_status_line(
      forall( call(Findspec,URI,SpineSpec), (
         status('Loading: ~s',[URI]),
         with_kern_module(URI, Mod, call(Pred,URI,Mod,SpineSpec))
      ))),
   format('Finished loading.\n',[]).

%% with_kern_module(+URI:uri, -M:module, +Goal:pred) is nondet.
%
%  Calls Goal with M set to the name of a temporary moduling containing
%  the information in the Kern file specified by URI.
with_kern_module(URI,Mod,Goal) :-
   hum_uri_path(URI,Path),
   with_kern_module(Path,utf8,Mod,Goal).

:- meta_predicate with_spine(+,1,-,-).
with_spine(Spine,Finder,URI,extract(Spine)) :- call(Finder,URI).

:- meta_predicate all_spines(+,-,-).
all_spines(Finder,URI,all) :- call(Finder,URI).

:- meta_predicate with_spec(+,+,-,-).
with_spec(SpineSpec,Finder,URI,SpineSpec) :- call(Finder,URI).

%% kern_uri(+FS:find_spec, -URI:uri) is nondet.
%
%  True when URI refers to a Humdrum file that satisfies the given
%  specification. The specification type =|find_spec|= is defined in the
%  module header comment.
kern_uri(in(Dir),URI) :- rdf(URI,hum:directory,literal(Dir)).
kern_uri(under(Dir),URI) :- rdf(URI,hum:directory,literal(prefix(Dir),_)).
kern_uri(A;B, URI) :- kern_uri(A,URI); kern_uri(B,URI).
kern_uri((A,B), URI) :- kern_uri(A,URI), kern_uri(B,URI).
kern_uri(Prop=Val,URI) :- rdf(URI,Prop,Val).

%% spine_pitch(+Mod:module,-Spine:nonneg,-Time,-Pitch) is nondet.
spine_pitch(Mod,Spine,Time,P) :-
   Mod:note(pitch(P),_,Time,Spine).

spine_pitch_dur(Mod,Spine,Time,P-D) :-
   Mod:note(pitch(P),D,Time,Spine).

spine_notenum(Mod,Spine,Time,NN) :-
   spine_pitch(Mod,Spine,Time,Pitch),
   pitch_notenum(Pitch,NN).


%% get the highest pitch over all spines at each time slice.
top_notenum(Mod,Time,TopNN) :-
   aggregate(max(NN),time_notenum(Mod,Time,NN),TopNN).

time_notenum(Mod,Time,NN) :- 
   Mod:note(pitch(P),_,Time,_),
   pitch_notenum(P,NN).

spine_pause(Mod,Spine,Time,pause) :- spine_pause(Mod,Spine,Time).
spine_pause(Mod,Spine,Time) :-
   Mod:articulation(pause,Spine,Rec),
   Mod:time(T,Rec),
   Mod:duration(D,Rec),
   Time is T+D.

spine_phrase(Mod,Spine,Time,Time-(OpenClose/Label)) :-
   Mod:data(tok(Toks),Spine,Rec),
   member(par(OpenClose,phrase-Label),Toks),
   Mod:time(T,Rec),
   (  OpenClose=open -> Time=T
   ;  OpenClose=close -> Mod:duration(D,Rec), Time is T+D
   ).


twos(L) :- L=[2|L].

metre(4/4,1:M) :- twos(M).
metre(3/4,(3 rdiv 4):[3,2|M]) :- twos(M).
metre(6/8,(3 rdiv 4):[2,3|M]) :- twos(M).
metre(9/8,(9 rdiv 8):[3,3|M]) :- twos(M).
metre(12/8,(12 rdiv 8):[2,2,3|M]) :- twos(M).

%% metpos( +Bar:rational, +Metre:list(nat), +T0:rational, -R:nat) is det.
metpos(Bar,Metre,TT,R) :- metpos_(TT,[1/Bar|Metre],0,R).

metpos_(0,_,R,R) :- !.
metpos_(T0,[M0|MX],R0,R) :-
   mod1(T0*M0,T1), succ(R0,R1),
   metpos_(T1,MX,R1,R).

mod1(X,Y) :- Z is floor(X), Y is X - Z.

event_or_pause(Pred,Mod,Spine,Time,event(X)) :- call(Pred,Mod,Spine,Time,X).
event_or_pause(_,Mod,Spine,Time,pause) :- spine_pause(Mod,Spine,Time).

event_or_delim(Pred,Mod,Spine,Time,2-event(X)) :-
   call(Pred,Mod,Spine,Time,X).
event_or_delim(_,Mod,Spine,Time,Prio-Delim) :-
   spine_phrase(Mod,Spine,Time,_-Delim),
   delim_prio(Delim,Prio).

delim_prio(open/_,1).
delim_prio(close/_,1).

%% events_nth_phrase(+Events:list(event(A)), +N:natural, -P:list(A)) is det.
%  Extract Nth phrase from a list of events or pauses.
events_nth_phrase(Events,N,Phrase) :-
   phrase((seqmap_with_sep([pause],seqmap(event),Phrases), opt([pause])),Events,[]),
   nth1(N,Phrases,Phrase).

event(X) --> [event(X)].