diff cpack/dml/lib/kerndata.pl @ 0:718306e29690 tip

commiting public release
author Daniel Wolff
date Tue, 09 Feb 2016 21:05:06 +0100
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cpack/dml/lib/kerndata.pl	Tue Feb 09 21:05:06 2016 +0100
@@ -0,0 +1,442 @@
+/* Part of DML (Digital Music Laboratory)
+	Copyright 2014-2015 Samer Abdallah, University of London
+	 
+	This program is free software; you can redistribute it and/or
+	modify it under the terms of the GNU General Public License
+	as published by the Free Software Foundation; either version 2
+	of the License, or (at your option) any later version.
+
+	This program is distributed in the hope that it will be useful,
+	but WITHOUT ANY WARRANTY; without even the implied warranty of
+	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+	GNU General Public License for more details.
+
+	You should have received a copy of the GNU General Public
+	License along with this library; if not, write to the Free Software
+	Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+*/
+
+:- module(kerndata,
+      [  uri_spine_pitches/3
+      ,  uri_spine_events/4
+      ,  uri_opus/2
+      ,  events_nth_phrase/3
+      ,  dataset_size/2
+      ,  dataset_items/2
+      ,  dataset_sequences/2
+      ,  dataset_random_subsets/4
+      ,  item_sequence/2
+      ,  item_opus/2
+      ,  item_uri/2
+      ,  metpos/4
+      ,  with_kern_module/3
+      ,  fileset/1
+      ]).
+
+/** <module> Managings sets of Kern scores
+
+   This module provides tools for defining and working with sets of musical
+   fragments extracted from Humdrum/Kern scores. Sets of files are defined by 
+   searches in the RDF database. Then, a small language of operations can be
+   used to extract given spines and/or break melodic sequences into phrases.
+
+   A named fileset is a term (the name) associated with a file set specification.
+   A fileset specification is a nondeterministic binary predicate which yields
+   both a Humdrum file URI and a spine number. An alternative way to refer to a
+   file set is anonymously, as a term \Spec. A fileset, then is either a named
+   fileset or an anonymous fileset:
+   ==
+   anonymous_filset ---> \fileset_spec.
+        fileset_spec == pred(-uri, -spine_spec).
+       named_fileset == term.
+             fileset == named_fileset | anonymous_fileset.
+               spine == natural. % {1,2,...}
+
+      spine_spec ---> extract(spine)
+                    ; trace(spine)
+                    ; merge
+                    ; all.
+   ==
+   Named filesets are defined using the multifile predicate fileset/2.
+
+   Some predicates are available for convenient construction of fileset_specs:
+   ==
+   with_spine( +S:spine, +Finder:pred(-uri), -URI:uri, -S:spine_spec) is nondet.
+   trace_spine( +S:spine, +Finder:pred(-uri), -URI:uri, -S:spine_spec) is nondet.
+   merge_spines( +Finder:pred(-uri), -URI:uri, -S:spine_spec) is nondet.
+   all_spines( +Finder:pred(-uri), -URI:uri, -S:spine_spec) is nondet.
+
+   kern_uri( +FS:find_spec, -URI:uri) is nondet.
+
+   find_spec ---> (find_spec ; find_spec) % set union
+                ; (find_spec , find_spec) % set intersection
+                ; under(path)             % recursive directory search
+                ; in(path)                % single directory search
+                ; uri=object.             % match on RDF property
+
+     path == atom. % a path relative to root of Humdrum file library.
+   object == uri | literal | literal_search_spec. 
+
+   literal == { lang(atom,atom), type(uri,atom) } | atom.
+   literal_search_spec ---> literal(literal_query,atom).
+   literal_query ---> plain(atom) 
+                    ; exact(atom)
+                    ; substring(atom)
+                    ; word(atom)
+                    ; prefix(atom)
+                    ; ge(atom)
+                    ; le(atom)
+                    ; between(atom,atom)
+                    ; like(atom).
+   ==
+   Thus, as a fileset_spec, the term =|with_spine(S,Finder)|= yields the Sth spine
+   of each URI produced by Finder. =|all_spines(Finder)|= yields all spines of each file
+   produced by Finder. See rdf/3 for more information about literal query terms.
+
+   Datasets are defined in terms of filesets:
+   ==
+   fileset :< dataset. % any fileset is also a dataset: each member yields one sequence
+   phrases(fileset) :< dataset. % denotes the set of phrases segmented from fileset.
+   extract(DS:dataset, Size:nonneg, Offset:nonneg)       :: dataset.
+   subset(DS:dataset, Size:nonneg, Indices:list(nonneg)) :: dataset.
+   subset_ref(DS:dataset, Size:nonneg, SubsetRef:atom)   :: dataset.
+   null :: dataset.
+   ==
+   
+   Each item in a dataset is represented by a term of the following type:
+   ==
+   dataset_item ---> whole(uri,spine) % Humdrum URI and spine number
+                   ; phrase(uri,spine,natural). % URI, spine and phrase number
+                   ; merged(uri). % all spines, highest note of chords
+   ==
+   Items can be obtained using dataset_items/2.
+   Each item corresponds to a list of note numbers, which can be obtained using
+   item_sequence/2, or dataset_sequences/2.
+
+*/
+
+:- meta_predicate with_kern_module(+,-,0).
+
+:- use_module(library(memo)).
+:- use_module(library(dcg_core)).
+:- use_module(library(snobol)).
+:- use_module(library(listutils)).
+:- use_module(library(fileutils)).
+:- use_module(library(termutils)).
+:- use_module(library(insist)).
+:- use_module(library(humdrum)).
+:- use_module(library(humdrum/kern)).
+:- use_module(library(humdrum/dynam)).
+:- use_module(library(humdrum/humdrum_world)).
+:- use_module(library(humdrum/kernutils)).
+:- use_module(library(lambda)).
+:- use_module(library(semweb/rdf_db)).
+:- use_module(library(humdrum_p2r), [hum_uri_path/2]).
+:- use_module(library(dataset), [random_subset/4]).
+
+:- set_prolog_flag(double_quotes, codes).
+:- initialization memo_attach(memo(kerndata),[]).
+
+:- dynamic item_sequence/2.
+:- multifile fileset/2.
+
+%% fileset(-FS:named_fileset) is nondet.
+%  True when FS identifies an explicitly declared Kern file-set.
+fileset(FS) :- fileset(FS,Spec), FS \= \Spec.
+
+%% fileset(+FS:named_fileset, -Spec:fileset_spec) is semidet.
+%% fileset(-FS:named_fileset, -Spec:fileset_spec) is nondet.
+%
+%  Database of named filesets and their fileset specifiers.
+fileset(lorraine,    with_spine(1,kern_uri(in('lorraine')))).
+fileset(nova_scotia, with_spine(1,kern_uri(in('classical/songs/unaccompanied/nova-scotia/kern')))).
+fileset(ireland,     with_spine(1,kern_uri(in('ireland')))).
+fileset(chorales,    with_spine(1,kern_uri(in('classical/bach/chorales')))).
+fileset(chorales371, with_spine(4,kern_uri(in('classical/bach/371chorales')))).
+fileset(bach_cello,  with_spec(merge,kern_uri(in('classical/bach/cello')))).
+fileset(bach_violin, with_spec(merge,kern_uri(in('classical/bach/violin')))).
+fileset(beethoven_quartets, all_spines(kern_uri(in('classical/beethoven/quartet')))).
+fileset(essen,       with_spine(1,kern_uri(under('essen/europe')))).
+fileset(essen(china),with_spine(1,kern_uri(under('essen/china')))).
+fileset(essen(Rgn),  with_spine(1,kern_uri(under(Dir)))) :-
+   member(Rgn,[germany,rossiya,magyar,elsass,lothring,nederlan,schweiz,oesterrh,jugoslav]),
+   atom_concat('essen/europe/',Rgn,Dir).
+fileset(essen(germany,Sub),  with_spine(1,kern_uri(under(Dir)))) :-
+   member(Sub,[dva,allerkbd,kinder,altdeu1,altdeu2,fink,zuccal,ballad,boehme,erk]),
+   atom_concat('essen/europe/germany/',Sub,Dir).
+
+
+%% uri_opus(+URI:uri,-Opus:atom) is semidet.
+%% uri_opus(-URI:uri,-Opus:atom) is nondet.
+%  Relation between Humdrum file URIs and their Humdrum SCT property.
+uri_opus(URI,Opus) :- rdf(URI,hum:'refcode/SCT',literal(Opus)).
+
+% :- volatile_memo file_opus( +file:atom, -opus:atom).
+% file_opus(File,Opus) :-
+%    with_kern_module(File, utf8, Mod, (Mod:ref('SCT',_,Opus) -> true; Opus=unknown(_))).
+
+%% item_opus(+Item:dataset_item, -Opus:atom) is semidet.
+%  Relation between a dataset item and the opus of its associated Humdrum file.
+item_opus(Item,Opus) :- item_uri(Item,URI), uri_opus(URI,Opus).
+
+%% item_uri(+Item:dataset_item, -URI:uri) is semidet.
+%  Relation between a dataset item and the URI of its associated Humdrum file.
+item_uri(phrase(URI,_,_),URI).
+item_uri(whole(URI,_),URI).
+item_uri(merged(URI),URI).
+
+
+%% dataset_size(+DS:dataset, -N:nonneg) is det.
+%  Get the size of a given dataset. This predicate is persistently memoised,
+%  since computing the size of a large dataset may involve reading many Humdrum
+%  files.
+
+:- persistent_memo dataset_size( +ground, -nonneg).
+
+dataset_size(Dataset,NumItems) :-
+   eval_dataset(Dataset,Items),
+   length(Items,NumItems).
+
+%% dataset_items(+DS:dataset, -S:list(dataset_item)) is det.
+dataset_items(DS,Items) :- eval_dataset(DS,Items).
+
+%% dataset_sequences(+DS:dataset, -S:list(list(integer))) is det.
+dataset_sequences(DS,SS) :-
+   dataset_items(DS,Items),
+   maplist(item_sequence,Items,SS).
+
+%% eval_dataset(+DS:dataset, -Items:list(dataset_item)) is det.
+eval_dataset(DS,_) :- must_be(ground,DS), fail.
+eval_dataset(extract(DS,N,I), D2) :- !,
+   eval_dataset(DS, D1),
+   drop(I, D1, D3),
+   take(N, D3, D2).
+
+eval_dataset(subset_ref(DS,N,Ref), D2) :- !,
+   dataset_size(DS,Size),
+   random_subset(Size,N,Ref,IX),
+   eval_dataset(subset(DS,N,IX), D2).
+
+eval_dataset(subset(DS,N,IX), D2) :- !,
+   eval_dataset(DS, D1),
+   length(IX,N),
+   maplist(\I^X^nth1(I,D1,X), IX, D2).
+
+eval_dataset(null,[]).
+eval_dataset(DS,Items) :-
+   fileset_items(DS,Items).
+
+:- thread_local item_/1.
+:- volatile_memo fileset_items( +ds:ground, -items:list(ground)).
+%% fileset_items(+FS, -Items:list(dataset_item)) is det.
+%  FS can be a fileset or a term =|phrases(Fileset)|=, denoting the set of
+%  sequence resulting from segmenting each item in Fileset into phrases.
+fileset_items(DS, Items) :-
+   setup_call_cleanup(
+      load(DS),
+      findall(Item,item_(Item),Items),
+      retractall(item_(_))),
+   insist(Items\=[]).
+
+load(phrases(FS)) :- fileset(FS,Spec), forall_kern(Spec,assert_phrases).
+load(FS)          :- fileset(FS,Spec), forall_kern(Spec,assert_whole).
+
+%% item_sequence(+Item:dataset_item, -Seq:list(integer)) is det.
+%  True when Seq is the list of note numbers for a given item.
+
+%% spine_nth_phrase(+Mod:module,+S:spine,-N:natural,-P:list(integer)) is nondet.
+%
+%  True when P is the list of note numbers in the Nth phrase on spine S in the
+%  Humdrum file represented in module Mod. Phrases can be defined in two ways:
+%     1. Using explicit Kern phrase marks
+%     2. Using notes annotated with a pause mark to determine phrase endings.
+spine_nth_phrase(Mod,Spine,N,Phrase) :-
+   Mod:spine(Spine),
+   (  once(spine_phrase(Mod,Spine,_,_))
+   -> % spine has explicit phrases markings
+      kern_get_events(event_or_delim(spine_notenum,Mod,Spine), Events),
+      pairs_values(Events,Events1),
+      bagof(NNs, L^phrase(contains_group(L,NNs),Events1), Phrases),
+      nth1(N,Phrases,Phrase)
+   ;  % no phrase markings - use pauses
+      kern_get_events(event_or_pause(spine_notenum,Mod,Spine), Events),
+      events_nth_phrase(Events, N, Phrase)
+   ).
+
+assert_whole(URI,Mod,Spec) :-
+   (Spec=extract(Spine); Spec=all),
+   forall( kern_get_events(spine_notenum(Mod,Spine), Events),
+           ( assert_id(item_sequence(whole(URI,Spine), Events)),
+             assert(item_(whole(URI,Spine))))).
+
+assert_whole(URI,Mod,merge) :-
+   forall( kern_get_events(top_notenum(Mod), Events),
+           ( assert_id(item_sequence(merged(URI), Events)),
+             assert(item_(merged(URI))))).
+
+assert_phrases(URI,Mod,Spec) :-
+   (Spec=extract(Spine); Spec=all),
+   forall(  spine_nth_phrase(Mod,Spine,N,Phrase),
+            ( assert_id(item_sequence(phrase(URI,Spine,N), Phrase)),
+              assert(item_(phrase(URI,Spine,N))))).
+
+assert_id(Fact) :- call(Fact) -> true; assert(Fact).
+
+
+contains_group(D,XX) -->  arb, group(D,XX,[]), rem.
+
+group(D,X1,X2) -->
+   delimiter(D,open),
+   iterate(content(D),X1,X2),
+   delimiter(D,close).
+
+content(_,[X|XX],XX) --> [event(X)].
+content(D,XX,XX) --> {dif(D,D1)}, delimiter(D1,_).
+content(D,X1,X2) --> group(D,X1,X2).
+delimiter(D,T) --> [T/D].
+
+
+%% dataset_random_subsets(+DS:dataset,+K:nonneg,+L:nonneg,-DSX:list(dataset)) is det.
+%
+%  Returns L random subsets of K sequences each from dataset DS.
+%  K is the size of each random subset. L is the number of subsets
+%  returned. Each subset is a term of the form =|subset(DS,K,Indices)|=.
+dataset_random_subsets(DS,K,L,DSX) :-
+   dataset_size(DS,N),
+   numlist(1,L,SubsetIndices),
+   maplist(\I^subset(DS,K,S)^random_subset(N,K,I,S),SubsetIndices,DSX).
+
+
+%% uri_spine_pitches(+URI:uri,+S:nonneg,-P:list(pitch)) is det.
+%% uri_spine_pitches(+URI:uri,-S:nonneg,-P:list(pitch)) is nondet.
+%
+%  True when Pis a list of the pitches of notes in a spine S
+%  of the Humdrum file pointed to by URI.
+uri_spine_pitches(URI,Spine,Pitches) :-
+   with_kern_module(URI, Mod,
+      kern_get_events(spine_pitch(Mod,Spine), Pitches)).
+
+%% uri_spine_events(+URI:uri,+G:getter,+S:spine,-E:list(event(A))) is det.
+%% uri_spine_events(+URI:uri,+G:getter,-S:spine,-E:list(event(A))) is nondet.
+%
+% Get list of events from a given spine in a Humdrum file. The kind of events extracted
+% depends on the getter predicate G, which must be of type:
+% ==
+% getter(A) == pred(+M:module,-S:spine,-T:time,-E:event(A)).
+% event(A) ---> event(A) ; pause.
+% ==
+
+uri_spine_events(URI,Getter,Spine,Events) :-
+   with_kern_module(URI, Mod,
+      kern_get_events(event_or_pause(Getter,Mod,Spine), Events)).
+
+:- meta_predicate forall_kern(2,3).
+forall_kern(Findspec,Pred) :-
+   with_status_line(
+      forall( call(Findspec,URI,SpineSpec), (
+         status('Loading: ~s',[URI]),
+         with_kern_module(URI, Mod, call(Pred,URI,Mod,SpineSpec))
+      ))),
+   format('Finished loading.\n',[]).
+
+%% with_kern_module(+URI:uri, -M:module, +Goal:pred) is nondet.
+%
+%  Calls Goal with M set to the name of a temporary moduling containing
+%  the information in the Kern file specified by URI.
+with_kern_module(URI,Mod,Goal) :-
+   hum_uri_path(URI,Path),
+   with_kern_module(Path,utf8,Mod,Goal).
+
+:- meta_predicate with_spine(+,1,-,-).
+with_spine(Spine,Finder,URI,extract(Spine)) :- call(Finder,URI).
+
+:- meta_predicate all_spines(+,-,-).
+all_spines(Finder,URI,all) :- call(Finder,URI).
+
+:- meta_predicate with_spec(+,+,-,-).
+with_spec(SpineSpec,Finder,URI,SpineSpec) :- call(Finder,URI).
+
+%% kern_uri(+FS:find_spec, -URI:uri) is nondet.
+%
+%  True when URI refers to a Humdrum file that satisfies the given
+%  specification. The specification type =|find_spec|= is defined in the
+%  module header comment.
+kern_uri(in(Dir),URI) :- rdf(URI,hum:directory,literal(Dir)).
+kern_uri(under(Dir),URI) :- rdf(URI,hum:directory,literal(prefix(Dir),_)).
+kern_uri(A;B, URI) :- kern_uri(A,URI); kern_uri(B,URI).
+kern_uri((A,B), URI) :- kern_uri(A,URI), kern_uri(B,URI).
+kern_uri(Prop=Val,URI) :- rdf(URI,Prop,Val).
+
+%% spine_pitch(+Mod:module,-Spine:nonneg,-Time,-Pitch) is nondet.
+spine_pitch(Mod,Spine,Time,P) :-
+   Mod:note(pitch(P),_,Time,Spine).
+
+spine_pitch_dur(Mod,Spine,Time,P-D) :-
+   Mod:note(pitch(P),D,Time,Spine).
+
+spine_notenum(Mod,Spine,Time,NN) :-
+   spine_pitch(Mod,Spine,Time,Pitch),
+   pitch_notenum(Pitch,NN).
+
+
+%% get the highest pitch over all spines at each time slice.
+top_notenum(Mod,Time,TopNN) :-
+   aggregate(max(NN),time_notenum(Mod,Time,NN),TopNN).
+
+time_notenum(Mod,Time,NN) :- 
+   Mod:note(pitch(P),_,Time,_),
+   pitch_notenum(P,NN).
+
+spine_pause(Mod,Spine,Time,pause) :- spine_pause(Mod,Spine,Time).
+spine_pause(Mod,Spine,Time) :-
+   Mod:articulation(pause,Spine,Rec),
+   Mod:time(T,Rec),
+   Mod:duration(D,Rec),
+   Time is T+D.
+
+spine_phrase(Mod,Spine,Time,Time-(OpenClose/Label)) :-
+   Mod:data(tok(Toks),Spine,Rec),
+   member(par(OpenClose,phrase-Label),Toks),
+   Mod:time(T,Rec),
+   (  OpenClose=open -> Time=T
+   ;  OpenClose=close -> Mod:duration(D,Rec), Time is T+D
+   ).
+
+
+twos(L) :- L=[2|L].
+
+metre(4/4,1:M) :- twos(M).
+metre(3/4,(3 rdiv 4):[3,2|M]) :- twos(M).
+metre(6/8,(3 rdiv 4):[2,3|M]) :- twos(M).
+metre(9/8,(9 rdiv 8):[3,3|M]) :- twos(M).
+metre(12/8,(12 rdiv 8):[2,2,3|M]) :- twos(M).
+
+%% metpos( +Bar:rational, +Metre:list(nat), +T0:rational, -R:nat) is det.
+metpos(Bar,Metre,TT,R) :- metpos_(TT,[1/Bar|Metre],0,R).
+
+metpos_(0,_,R,R) :- !.
+metpos_(T0,[M0|MX],R0,R) :-
+   mod1(T0*M0,T1), succ(R0,R1),
+   metpos_(T1,MX,R1,R).
+
+mod1(X,Y) :- Z is floor(X), Y is X - Z.
+
+event_or_pause(Pred,Mod,Spine,Time,event(X)) :- call(Pred,Mod,Spine,Time,X).
+event_or_pause(_,Mod,Spine,Time,pause) :- spine_pause(Mod,Spine,Time).
+
+event_or_delim(Pred,Mod,Spine,Time,2-event(X)) :-
+   call(Pred,Mod,Spine,Time,X).
+event_or_delim(_,Mod,Spine,Time,Prio-Delim) :-
+   spine_phrase(Mod,Spine,Time,_-Delim),
+   delim_prio(Delim,Prio).
+
+delim_prio(open/_,1).
+delim_prio(close/_,1).
+
+%% events_nth_phrase(+Events:list(event(A)), +N:natural, -P:list(A)) is det.
+%  Extract Nth phrase from a list of events or pauses.
+events_nth_phrase(Events,N,Phrase) :-
+   phrase((seqmap_with_sep([pause],seqmap(event),Phrases), opt([pause])),Events,[]),
+   nth1(N,Phrases,Phrase).
+
+event(X) --> [event(X)].