Mercurial > hg > dml-open-cliopatria
diff cpack/dml/lib/kerndata.pl @ 0:718306e29690 tip
commiting public release
author | Daniel Wolff |
---|---|
date | Tue, 09 Feb 2016 21:05:06 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpack/dml/lib/kerndata.pl Tue Feb 09 21:05:06 2016 +0100 @@ -0,0 +1,442 @@ +/* Part of DML (Digital Music Laboratory) + Copyright 2014-2015 Samer Abdallah, University of London + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +:- module(kerndata, + [ uri_spine_pitches/3 + , uri_spine_events/4 + , uri_opus/2 + , events_nth_phrase/3 + , dataset_size/2 + , dataset_items/2 + , dataset_sequences/2 + , dataset_random_subsets/4 + , item_sequence/2 + , item_opus/2 + , item_uri/2 + , metpos/4 + , with_kern_module/3 + , fileset/1 + ]). + +/** <module> Managings sets of Kern scores + + This module provides tools for defining and working with sets of musical + fragments extracted from Humdrum/Kern scores. Sets of files are defined by + searches in the RDF database. Then, a small language of operations can be + used to extract given spines and/or break melodic sequences into phrases. + + A named fileset is a term (the name) associated with a file set specification. + A fileset specification is a nondeterministic binary predicate which yields + both a Humdrum file URI and a spine number. An alternative way to refer to a + file set is anonymously, as a term \Spec. A fileset, then is either a named + fileset or an anonymous fileset: + == + anonymous_filset ---> \fileset_spec. + fileset_spec == pred(-uri, -spine_spec). + named_fileset == term. + fileset == named_fileset | anonymous_fileset. + spine == natural. % {1,2,...} + + spine_spec ---> extract(spine) + ; trace(spine) + ; merge + ; all. + == + Named filesets are defined using the multifile predicate fileset/2. + + Some predicates are available for convenient construction of fileset_specs: + == + with_spine( +S:spine, +Finder:pred(-uri), -URI:uri, -S:spine_spec) is nondet. + trace_spine( +S:spine, +Finder:pred(-uri), -URI:uri, -S:spine_spec) is nondet. + merge_spines( +Finder:pred(-uri), -URI:uri, -S:spine_spec) is nondet. + all_spines( +Finder:pred(-uri), -URI:uri, -S:spine_spec) is nondet. + + kern_uri( +FS:find_spec, -URI:uri) is nondet. + + find_spec ---> (find_spec ; find_spec) % set union + ; (find_spec , find_spec) % set intersection + ; under(path) % recursive directory search + ; in(path) % single directory search + ; uri=object. % match on RDF property + + path == atom. % a path relative to root of Humdrum file library. + object == uri | literal | literal_search_spec. + + literal == { lang(atom,atom), type(uri,atom) } | atom. + literal_search_spec ---> literal(literal_query,atom). + literal_query ---> plain(atom) + ; exact(atom) + ; substring(atom) + ; word(atom) + ; prefix(atom) + ; ge(atom) + ; le(atom) + ; between(atom,atom) + ; like(atom). + == + Thus, as a fileset_spec, the term =|with_spine(S,Finder)|= yields the Sth spine + of each URI produced by Finder. =|all_spines(Finder)|= yields all spines of each file + produced by Finder. See rdf/3 for more information about literal query terms. + + Datasets are defined in terms of filesets: + == + fileset :< dataset. % any fileset is also a dataset: each member yields one sequence + phrases(fileset) :< dataset. % denotes the set of phrases segmented from fileset. + extract(DS:dataset, Size:nonneg, Offset:nonneg) :: dataset. + subset(DS:dataset, Size:nonneg, Indices:list(nonneg)) :: dataset. + subset_ref(DS:dataset, Size:nonneg, SubsetRef:atom) :: dataset. + null :: dataset. + == + + Each item in a dataset is represented by a term of the following type: + == + dataset_item ---> whole(uri,spine) % Humdrum URI and spine number + ; phrase(uri,spine,natural). % URI, spine and phrase number + ; merged(uri). % all spines, highest note of chords + == + Items can be obtained using dataset_items/2. + Each item corresponds to a list of note numbers, which can be obtained using + item_sequence/2, or dataset_sequences/2. + +*/ + +:- meta_predicate with_kern_module(+,-,0). + +:- use_module(library(memo)). +:- use_module(library(dcg_core)). +:- use_module(library(snobol)). +:- use_module(library(listutils)). +:- use_module(library(fileutils)). +:- use_module(library(termutils)). +:- use_module(library(insist)). +:- use_module(library(humdrum)). +:- use_module(library(humdrum/kern)). +:- use_module(library(humdrum/dynam)). +:- use_module(library(humdrum/humdrum_world)). +:- use_module(library(humdrum/kernutils)). +:- use_module(library(lambda)). +:- use_module(library(semweb/rdf_db)). +:- use_module(library(humdrum_p2r), [hum_uri_path/2]). +:- use_module(library(dataset), [random_subset/4]). + +:- set_prolog_flag(double_quotes, codes). +:- initialization memo_attach(memo(kerndata),[]). + +:- dynamic item_sequence/2. +:- multifile fileset/2. + +%% fileset(-FS:named_fileset) is nondet. +% True when FS identifies an explicitly declared Kern file-set. +fileset(FS) :- fileset(FS,Spec), FS \= \Spec. + +%% fileset(+FS:named_fileset, -Spec:fileset_spec) is semidet. +%% fileset(-FS:named_fileset, -Spec:fileset_spec) is nondet. +% +% Database of named filesets and their fileset specifiers. +fileset(lorraine, with_spine(1,kern_uri(in('lorraine')))). +fileset(nova_scotia, with_spine(1,kern_uri(in('classical/songs/unaccompanied/nova-scotia/kern')))). +fileset(ireland, with_spine(1,kern_uri(in('ireland')))). +fileset(chorales, with_spine(1,kern_uri(in('classical/bach/chorales')))). +fileset(chorales371, with_spine(4,kern_uri(in('classical/bach/371chorales')))). +fileset(bach_cello, with_spec(merge,kern_uri(in('classical/bach/cello')))). +fileset(bach_violin, with_spec(merge,kern_uri(in('classical/bach/violin')))). +fileset(beethoven_quartets, all_spines(kern_uri(in('classical/beethoven/quartet')))). +fileset(essen, with_spine(1,kern_uri(under('essen/europe')))). +fileset(essen(china),with_spine(1,kern_uri(under('essen/china')))). +fileset(essen(Rgn), with_spine(1,kern_uri(under(Dir)))) :- + member(Rgn,[germany,rossiya,magyar,elsass,lothring,nederlan,schweiz,oesterrh,jugoslav]), + atom_concat('essen/europe/',Rgn,Dir). +fileset(essen(germany,Sub), with_spine(1,kern_uri(under(Dir)))) :- + member(Sub,[dva,allerkbd,kinder,altdeu1,altdeu2,fink,zuccal,ballad,boehme,erk]), + atom_concat('essen/europe/germany/',Sub,Dir). + + +%% uri_opus(+URI:uri,-Opus:atom) is semidet. +%% uri_opus(-URI:uri,-Opus:atom) is nondet. +% Relation between Humdrum file URIs and their Humdrum SCT property. +uri_opus(URI,Opus) :- rdf(URI,hum:'refcode/SCT',literal(Opus)). + +% :- volatile_memo file_opus( +file:atom, -opus:atom). +% file_opus(File,Opus) :- +% with_kern_module(File, utf8, Mod, (Mod:ref('SCT',_,Opus) -> true; Opus=unknown(_))). + +%% item_opus(+Item:dataset_item, -Opus:atom) is semidet. +% Relation between a dataset item and the opus of its associated Humdrum file. +item_opus(Item,Opus) :- item_uri(Item,URI), uri_opus(URI,Opus). + +%% item_uri(+Item:dataset_item, -URI:uri) is semidet. +% Relation between a dataset item and the URI of its associated Humdrum file. +item_uri(phrase(URI,_,_),URI). +item_uri(whole(URI,_),URI). +item_uri(merged(URI),URI). + + +%% dataset_size(+DS:dataset, -N:nonneg) is det. +% Get the size of a given dataset. This predicate is persistently memoised, +% since computing the size of a large dataset may involve reading many Humdrum +% files. + +:- persistent_memo dataset_size( +ground, -nonneg). + +dataset_size(Dataset,NumItems) :- + eval_dataset(Dataset,Items), + length(Items,NumItems). + +%% dataset_items(+DS:dataset, -S:list(dataset_item)) is det. +dataset_items(DS,Items) :- eval_dataset(DS,Items). + +%% dataset_sequences(+DS:dataset, -S:list(list(integer))) is det. +dataset_sequences(DS,SS) :- + dataset_items(DS,Items), + maplist(item_sequence,Items,SS). + +%% eval_dataset(+DS:dataset, -Items:list(dataset_item)) is det. +eval_dataset(DS,_) :- must_be(ground,DS), fail. +eval_dataset(extract(DS,N,I), D2) :- !, + eval_dataset(DS, D1), + drop(I, D1, D3), + take(N, D3, D2). + +eval_dataset(subset_ref(DS,N,Ref), D2) :- !, + dataset_size(DS,Size), + random_subset(Size,N,Ref,IX), + eval_dataset(subset(DS,N,IX), D2). + +eval_dataset(subset(DS,N,IX), D2) :- !, + eval_dataset(DS, D1), + length(IX,N), + maplist(\I^X^nth1(I,D1,X), IX, D2). + +eval_dataset(null,[]). +eval_dataset(DS,Items) :- + fileset_items(DS,Items). + +:- thread_local item_/1. +:- volatile_memo fileset_items( +ds:ground, -items:list(ground)). +%% fileset_items(+FS, -Items:list(dataset_item)) is det. +% FS can be a fileset or a term =|phrases(Fileset)|=, denoting the set of +% sequence resulting from segmenting each item in Fileset into phrases. +fileset_items(DS, Items) :- + setup_call_cleanup( + load(DS), + findall(Item,item_(Item),Items), + retractall(item_(_))), + insist(Items\=[]). + +load(phrases(FS)) :- fileset(FS,Spec), forall_kern(Spec,assert_phrases). +load(FS) :- fileset(FS,Spec), forall_kern(Spec,assert_whole). + +%% item_sequence(+Item:dataset_item, -Seq:list(integer)) is det. +% True when Seq is the list of note numbers for a given item. + +%% spine_nth_phrase(+Mod:module,+S:spine,-N:natural,-P:list(integer)) is nondet. +% +% True when P is the list of note numbers in the Nth phrase on spine S in the +% Humdrum file represented in module Mod. Phrases can be defined in two ways: +% 1. Using explicit Kern phrase marks +% 2. Using notes annotated with a pause mark to determine phrase endings. +spine_nth_phrase(Mod,Spine,N,Phrase) :- + Mod:spine(Spine), + ( once(spine_phrase(Mod,Spine,_,_)) + -> % spine has explicit phrases markings + kern_get_events(event_or_delim(spine_notenum,Mod,Spine), Events), + pairs_values(Events,Events1), + bagof(NNs, L^phrase(contains_group(L,NNs),Events1), Phrases), + nth1(N,Phrases,Phrase) + ; % no phrase markings - use pauses + kern_get_events(event_or_pause(spine_notenum,Mod,Spine), Events), + events_nth_phrase(Events, N, Phrase) + ). + +assert_whole(URI,Mod,Spec) :- + (Spec=extract(Spine); Spec=all), + forall( kern_get_events(spine_notenum(Mod,Spine), Events), + ( assert_id(item_sequence(whole(URI,Spine), Events)), + assert(item_(whole(URI,Spine))))). + +assert_whole(URI,Mod,merge) :- + forall( kern_get_events(top_notenum(Mod), Events), + ( assert_id(item_sequence(merged(URI), Events)), + assert(item_(merged(URI))))). + +assert_phrases(URI,Mod,Spec) :- + (Spec=extract(Spine); Spec=all), + forall( spine_nth_phrase(Mod,Spine,N,Phrase), + ( assert_id(item_sequence(phrase(URI,Spine,N), Phrase)), + assert(item_(phrase(URI,Spine,N))))). + +assert_id(Fact) :- call(Fact) -> true; assert(Fact). + + +contains_group(D,XX) --> arb, group(D,XX,[]), rem. + +group(D,X1,X2) --> + delimiter(D,open), + iterate(content(D),X1,X2), + delimiter(D,close). + +content(_,[X|XX],XX) --> [event(X)]. +content(D,XX,XX) --> {dif(D,D1)}, delimiter(D1,_). +content(D,X1,X2) --> group(D,X1,X2). +delimiter(D,T) --> [T/D]. + + +%% dataset_random_subsets(+DS:dataset,+K:nonneg,+L:nonneg,-DSX:list(dataset)) is det. +% +% Returns L random subsets of K sequences each from dataset DS. +% K is the size of each random subset. L is the number of subsets +% returned. Each subset is a term of the form =|subset(DS,K,Indices)|=. +dataset_random_subsets(DS,K,L,DSX) :- + dataset_size(DS,N), + numlist(1,L,SubsetIndices), + maplist(\I^subset(DS,K,S)^random_subset(N,K,I,S),SubsetIndices,DSX). + + +%% uri_spine_pitches(+URI:uri,+S:nonneg,-P:list(pitch)) is det. +%% uri_spine_pitches(+URI:uri,-S:nonneg,-P:list(pitch)) is nondet. +% +% True when Pis a list of the pitches of notes in a spine S +% of the Humdrum file pointed to by URI. +uri_spine_pitches(URI,Spine,Pitches) :- + with_kern_module(URI, Mod, + kern_get_events(spine_pitch(Mod,Spine), Pitches)). + +%% uri_spine_events(+URI:uri,+G:getter,+S:spine,-E:list(event(A))) is det. +%% uri_spine_events(+URI:uri,+G:getter,-S:spine,-E:list(event(A))) is nondet. +% +% Get list of events from a given spine in a Humdrum file. The kind of events extracted +% depends on the getter predicate G, which must be of type: +% == +% getter(A) == pred(+M:module,-S:spine,-T:time,-E:event(A)). +% event(A) ---> event(A) ; pause. +% == + +uri_spine_events(URI,Getter,Spine,Events) :- + with_kern_module(URI, Mod, + kern_get_events(event_or_pause(Getter,Mod,Spine), Events)). + +:- meta_predicate forall_kern(2,3). +forall_kern(Findspec,Pred) :- + with_status_line( + forall( call(Findspec,URI,SpineSpec), ( + status('Loading: ~s',[URI]), + with_kern_module(URI, Mod, call(Pred,URI,Mod,SpineSpec)) + ))), + format('Finished loading.\n',[]). + +%% with_kern_module(+URI:uri, -M:module, +Goal:pred) is nondet. +% +% Calls Goal with M set to the name of a temporary moduling containing +% the information in the Kern file specified by URI. +with_kern_module(URI,Mod,Goal) :- + hum_uri_path(URI,Path), + with_kern_module(Path,utf8,Mod,Goal). + +:- meta_predicate with_spine(+,1,-,-). +with_spine(Spine,Finder,URI,extract(Spine)) :- call(Finder,URI). + +:- meta_predicate all_spines(+,-,-). +all_spines(Finder,URI,all) :- call(Finder,URI). + +:- meta_predicate with_spec(+,+,-,-). +with_spec(SpineSpec,Finder,URI,SpineSpec) :- call(Finder,URI). + +%% kern_uri(+FS:find_spec, -URI:uri) is nondet. +% +% True when URI refers to a Humdrum file that satisfies the given +% specification. The specification type =|find_spec|= is defined in the +% module header comment. +kern_uri(in(Dir),URI) :- rdf(URI,hum:directory,literal(Dir)). +kern_uri(under(Dir),URI) :- rdf(URI,hum:directory,literal(prefix(Dir),_)). +kern_uri(A;B, URI) :- kern_uri(A,URI); kern_uri(B,URI). +kern_uri((A,B), URI) :- kern_uri(A,URI), kern_uri(B,URI). +kern_uri(Prop=Val,URI) :- rdf(URI,Prop,Val). + +%% spine_pitch(+Mod:module,-Spine:nonneg,-Time,-Pitch) is nondet. +spine_pitch(Mod,Spine,Time,P) :- + Mod:note(pitch(P),_,Time,Spine). + +spine_pitch_dur(Mod,Spine,Time,P-D) :- + Mod:note(pitch(P),D,Time,Spine). + +spine_notenum(Mod,Spine,Time,NN) :- + spine_pitch(Mod,Spine,Time,Pitch), + pitch_notenum(Pitch,NN). + + +%% get the highest pitch over all spines at each time slice. +top_notenum(Mod,Time,TopNN) :- + aggregate(max(NN),time_notenum(Mod,Time,NN),TopNN). + +time_notenum(Mod,Time,NN) :- + Mod:note(pitch(P),_,Time,_), + pitch_notenum(P,NN). + +spine_pause(Mod,Spine,Time,pause) :- spine_pause(Mod,Spine,Time). +spine_pause(Mod,Spine,Time) :- + Mod:articulation(pause,Spine,Rec), + Mod:time(T,Rec), + Mod:duration(D,Rec), + Time is T+D. + +spine_phrase(Mod,Spine,Time,Time-(OpenClose/Label)) :- + Mod:data(tok(Toks),Spine,Rec), + member(par(OpenClose,phrase-Label),Toks), + Mod:time(T,Rec), + ( OpenClose=open -> Time=T + ; OpenClose=close -> Mod:duration(D,Rec), Time is T+D + ). + + +twos(L) :- L=[2|L]. + +metre(4/4,1:M) :- twos(M). +metre(3/4,(3 rdiv 4):[3,2|M]) :- twos(M). +metre(6/8,(3 rdiv 4):[2,3|M]) :- twos(M). +metre(9/8,(9 rdiv 8):[3,3|M]) :- twos(M). +metre(12/8,(12 rdiv 8):[2,2,3|M]) :- twos(M). + +%% metpos( +Bar:rational, +Metre:list(nat), +T0:rational, -R:nat) is det. +metpos(Bar,Metre,TT,R) :- metpos_(TT,[1/Bar|Metre],0,R). + +metpos_(0,_,R,R) :- !. +metpos_(T0,[M0|MX],R0,R) :- + mod1(T0*M0,T1), succ(R0,R1), + metpos_(T1,MX,R1,R). + +mod1(X,Y) :- Z is floor(X), Y is X - Z. + +event_or_pause(Pred,Mod,Spine,Time,event(X)) :- call(Pred,Mod,Spine,Time,X). +event_or_pause(_,Mod,Spine,Time,pause) :- spine_pause(Mod,Spine,Time). + +event_or_delim(Pred,Mod,Spine,Time,2-event(X)) :- + call(Pred,Mod,Spine,Time,X). +event_or_delim(_,Mod,Spine,Time,Prio-Delim) :- + spine_phrase(Mod,Spine,Time,_-Delim), + delim_prio(Delim,Prio). + +delim_prio(open/_,1). +delim_prio(close/_,1). + +%% events_nth_phrase(+Events:list(event(A)), +N:natural, -P:list(A)) is det. +% Extract Nth phrase from a list of events or pauses. +events_nth_phrase(Events,N,Phrase) :- + phrase((seqmap_with_sep([pause],seqmap(event),Phrases), opt([pause])),Events,[]), + nth1(N,Phrases,Phrase). + +event(X) --> [event(X)].