comparison cpack/dml/lib/kerndata.pl @ 0:718306e29690 tip

commiting public release
author Daniel Wolff
date Tue, 09 Feb 2016 21:05:06 +0100
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:718306e29690
1 /* Part of DML (Digital Music Laboratory)
2 Copyright 2014-2015 Samer Abdallah, University of London
3
4 This program is free software; you can redistribute it and/or
5 modify it under the terms of the GNU General Public License
6 as published by the Free Software Foundation; either version 2
7 of the License, or (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public
15 License along with this library; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19 :- module(kerndata,
20 [ uri_spine_pitches/3
21 , uri_spine_events/4
22 , uri_opus/2
23 , events_nth_phrase/3
24 , dataset_size/2
25 , dataset_items/2
26 , dataset_sequences/2
27 , dataset_random_subsets/4
28 , item_sequence/2
29 , item_opus/2
30 , item_uri/2
31 , metpos/4
32 , with_kern_module/3
33 , fileset/1
34 ]).
35
36 /** <module> Managings sets of Kern scores
37
38 This module provides tools for defining and working with sets of musical
39 fragments extracted from Humdrum/Kern scores. Sets of files are defined by
40 searches in the RDF database. Then, a small language of operations can be
41 used to extract given spines and/or break melodic sequences into phrases.
42
43 A named fileset is a term (the name) associated with a file set specification.
44 A fileset specification is a nondeterministic binary predicate which yields
45 both a Humdrum file URI and a spine number. An alternative way to refer to a
46 file set is anonymously, as a term \Spec. A fileset, then is either a named
47 fileset or an anonymous fileset:
48 ==
49 anonymous_filset ---> \fileset_spec.
50 fileset_spec == pred(-uri, -spine_spec).
51 named_fileset == term.
52 fileset == named_fileset | anonymous_fileset.
53 spine == natural. % {1,2,...}
54
55 spine_spec ---> extract(spine)
56 ; trace(spine)
57 ; merge
58 ; all.
59 ==
60 Named filesets are defined using the multifile predicate fileset/2.
61
62 Some predicates are available for convenient construction of fileset_specs:
63 ==
64 with_spine( +S:spine, +Finder:pred(-uri), -URI:uri, -S:spine_spec) is nondet.
65 trace_spine( +S:spine, +Finder:pred(-uri), -URI:uri, -S:spine_spec) is nondet.
66 merge_spines( +Finder:pred(-uri), -URI:uri, -S:spine_spec) is nondet.
67 all_spines( +Finder:pred(-uri), -URI:uri, -S:spine_spec) is nondet.
68
69 kern_uri( +FS:find_spec, -URI:uri) is nondet.
70
71 find_spec ---> (find_spec ; find_spec) % set union
72 ; (find_spec , find_spec) % set intersection
73 ; under(path) % recursive directory search
74 ; in(path) % single directory search
75 ; uri=object. % match on RDF property
76
77 path == atom. % a path relative to root of Humdrum file library.
78 object == uri | literal | literal_search_spec.
79
80 literal == { lang(atom,atom), type(uri,atom) } | atom.
81 literal_search_spec ---> literal(literal_query,atom).
82 literal_query ---> plain(atom)
83 ; exact(atom)
84 ; substring(atom)
85 ; word(atom)
86 ; prefix(atom)
87 ; ge(atom)
88 ; le(atom)
89 ; between(atom,atom)
90 ; like(atom).
91 ==
92 Thus, as a fileset_spec, the term =|with_spine(S,Finder)|= yields the Sth spine
93 of each URI produced by Finder. =|all_spines(Finder)|= yields all spines of each file
94 produced by Finder. See rdf/3 for more information about literal query terms.
95
96 Datasets are defined in terms of filesets:
97 ==
98 fileset :< dataset. % any fileset is also a dataset: each member yields one sequence
99 phrases(fileset) :< dataset. % denotes the set of phrases segmented from fileset.
100 extract(DS:dataset, Size:nonneg, Offset:nonneg) :: dataset.
101 subset(DS:dataset, Size:nonneg, Indices:list(nonneg)) :: dataset.
102 subset_ref(DS:dataset, Size:nonneg, SubsetRef:atom) :: dataset.
103 null :: dataset.
104 ==
105
106 Each item in a dataset is represented by a term of the following type:
107 ==
108 dataset_item ---> whole(uri,spine) % Humdrum URI and spine number
109 ; phrase(uri,spine,natural). % URI, spine and phrase number
110 ; merged(uri). % all spines, highest note of chords
111 ==
112 Items can be obtained using dataset_items/2.
113 Each item corresponds to a list of note numbers, which can be obtained using
114 item_sequence/2, or dataset_sequences/2.
115
116 */
117
118 :- meta_predicate with_kern_module(+,-,0).
119
120 :- use_module(library(memo)).
121 :- use_module(library(dcg_core)).
122 :- use_module(library(snobol)).
123 :- use_module(library(listutils)).
124 :- use_module(library(fileutils)).
125 :- use_module(library(termutils)).
126 :- use_module(library(insist)).
127 :- use_module(library(humdrum)).
128 :- use_module(library(humdrum/kern)).
129 :- use_module(library(humdrum/dynam)).
130 :- use_module(library(humdrum/humdrum_world)).
131 :- use_module(library(humdrum/kernutils)).
132 :- use_module(library(lambda)).
133 :- use_module(library(semweb/rdf_db)).
134 :- use_module(library(humdrum_p2r), [hum_uri_path/2]).
135 :- use_module(library(dataset), [random_subset/4]).
136
137 :- set_prolog_flag(double_quotes, codes).
138 :- initialization memo_attach(memo(kerndata),[]).
139
140 :- dynamic item_sequence/2.
141 :- multifile fileset/2.
142
143 %% fileset(-FS:named_fileset) is nondet.
144 % True when FS identifies an explicitly declared Kern file-set.
145 fileset(FS) :- fileset(FS,Spec), FS \= \Spec.
146
147 %% fileset(+FS:named_fileset, -Spec:fileset_spec) is semidet.
148 %% fileset(-FS:named_fileset, -Spec:fileset_spec) is nondet.
149 %
150 % Database of named filesets and their fileset specifiers.
151 fileset(lorraine, with_spine(1,kern_uri(in('lorraine')))).
152 fileset(nova_scotia, with_spine(1,kern_uri(in('classical/songs/unaccompanied/nova-scotia/kern')))).
153 fileset(ireland, with_spine(1,kern_uri(in('ireland')))).
154 fileset(chorales, with_spine(1,kern_uri(in('classical/bach/chorales')))).
155 fileset(chorales371, with_spine(4,kern_uri(in('classical/bach/371chorales')))).
156 fileset(bach_cello, with_spec(merge,kern_uri(in('classical/bach/cello')))).
157 fileset(bach_violin, with_spec(merge,kern_uri(in('classical/bach/violin')))).
158 fileset(beethoven_quartets, all_spines(kern_uri(in('classical/beethoven/quartet')))).
159 fileset(essen, with_spine(1,kern_uri(under('essen/europe')))).
160 fileset(essen(china),with_spine(1,kern_uri(under('essen/china')))).
161 fileset(essen(Rgn), with_spine(1,kern_uri(under(Dir)))) :-
162 member(Rgn,[germany,rossiya,magyar,elsass,lothring,nederlan,schweiz,oesterrh,jugoslav]),
163 atom_concat('essen/europe/',Rgn,Dir).
164 fileset(essen(germany,Sub), with_spine(1,kern_uri(under(Dir)))) :-
165 member(Sub,[dva,allerkbd,kinder,altdeu1,altdeu2,fink,zuccal,ballad,boehme,erk]),
166 atom_concat('essen/europe/germany/',Sub,Dir).
167
168
169 %% uri_opus(+URI:uri,-Opus:atom) is semidet.
170 %% uri_opus(-URI:uri,-Opus:atom) is nondet.
171 % Relation between Humdrum file URIs and their Humdrum SCT property.
172 uri_opus(URI,Opus) :- rdf(URI,hum:'refcode/SCT',literal(Opus)).
173
174 % :- volatile_memo file_opus( +file:atom, -opus:atom).
175 % file_opus(File,Opus) :-
176 % with_kern_module(File, utf8, Mod, (Mod:ref('SCT',_,Opus) -> true; Opus=unknown(_))).
177
178 %% item_opus(+Item:dataset_item, -Opus:atom) is semidet.
179 % Relation between a dataset item and the opus of its associated Humdrum file.
180 item_opus(Item,Opus) :- item_uri(Item,URI), uri_opus(URI,Opus).
181
182 %% item_uri(+Item:dataset_item, -URI:uri) is semidet.
183 % Relation between a dataset item and the URI of its associated Humdrum file.
184 item_uri(phrase(URI,_,_),URI).
185 item_uri(whole(URI,_),URI).
186 item_uri(merged(URI),URI).
187
188
189 %% dataset_size(+DS:dataset, -N:nonneg) is det.
190 % Get the size of a given dataset. This predicate is persistently memoised,
191 % since computing the size of a large dataset may involve reading many Humdrum
192 % files.
193
194 :- persistent_memo dataset_size( +ground, -nonneg).
195
196 dataset_size(Dataset,NumItems) :-
197 eval_dataset(Dataset,Items),
198 length(Items,NumItems).
199
200 %% dataset_items(+DS:dataset, -S:list(dataset_item)) is det.
201 dataset_items(DS,Items) :- eval_dataset(DS,Items).
202
203 %% dataset_sequences(+DS:dataset, -S:list(list(integer))) is det.
204 dataset_sequences(DS,SS) :-
205 dataset_items(DS,Items),
206 maplist(item_sequence,Items,SS).
207
208 %% eval_dataset(+DS:dataset, -Items:list(dataset_item)) is det.
209 eval_dataset(DS,_) :- must_be(ground,DS), fail.
210 eval_dataset(extract(DS,N,I), D2) :- !,
211 eval_dataset(DS, D1),
212 drop(I, D1, D3),
213 take(N, D3, D2).
214
215 eval_dataset(subset_ref(DS,N,Ref), D2) :- !,
216 dataset_size(DS,Size),
217 random_subset(Size,N,Ref,IX),
218 eval_dataset(subset(DS,N,IX), D2).
219
220 eval_dataset(subset(DS,N,IX), D2) :- !,
221 eval_dataset(DS, D1),
222 length(IX,N),
223 maplist(\I^X^nth1(I,D1,X), IX, D2).
224
225 eval_dataset(null,[]).
226 eval_dataset(DS,Items) :-
227 fileset_items(DS,Items).
228
229 :- thread_local item_/1.
230 :- volatile_memo fileset_items( +ds:ground, -items:list(ground)).
231 %% fileset_items(+FS, -Items:list(dataset_item)) is det.
232 % FS can be a fileset or a term =|phrases(Fileset)|=, denoting the set of
233 % sequence resulting from segmenting each item in Fileset into phrases.
234 fileset_items(DS, Items) :-
235 setup_call_cleanup(
236 load(DS),
237 findall(Item,item_(Item),Items),
238 retractall(item_(_))),
239 insist(Items\=[]).
240
241 load(phrases(FS)) :- fileset(FS,Spec), forall_kern(Spec,assert_phrases).
242 load(FS) :- fileset(FS,Spec), forall_kern(Spec,assert_whole).
243
244 %% item_sequence(+Item:dataset_item, -Seq:list(integer)) is det.
245 % True when Seq is the list of note numbers for a given item.
246
247 %% spine_nth_phrase(+Mod:module,+S:spine,-N:natural,-P:list(integer)) is nondet.
248 %
249 % True when P is the list of note numbers in the Nth phrase on spine S in the
250 % Humdrum file represented in module Mod. Phrases can be defined in two ways:
251 % 1. Using explicit Kern phrase marks
252 % 2. Using notes annotated with a pause mark to determine phrase endings.
253 spine_nth_phrase(Mod,Spine,N,Phrase) :-
254 Mod:spine(Spine),
255 ( once(spine_phrase(Mod,Spine,_,_))
256 -> % spine has explicit phrases markings
257 kern_get_events(event_or_delim(spine_notenum,Mod,Spine), Events),
258 pairs_values(Events,Events1),
259 bagof(NNs, L^phrase(contains_group(L,NNs),Events1), Phrases),
260 nth1(N,Phrases,Phrase)
261 ; % no phrase markings - use pauses
262 kern_get_events(event_or_pause(spine_notenum,Mod,Spine), Events),
263 events_nth_phrase(Events, N, Phrase)
264 ).
265
266 assert_whole(URI,Mod,Spec) :-
267 (Spec=extract(Spine); Spec=all),
268 forall( kern_get_events(spine_notenum(Mod,Spine), Events),
269 ( assert_id(item_sequence(whole(URI,Spine), Events)),
270 assert(item_(whole(URI,Spine))))).
271
272 assert_whole(URI,Mod,merge) :-
273 forall( kern_get_events(top_notenum(Mod), Events),
274 ( assert_id(item_sequence(merged(URI), Events)),
275 assert(item_(merged(URI))))).
276
277 assert_phrases(URI,Mod,Spec) :-
278 (Spec=extract(Spine); Spec=all),
279 forall( spine_nth_phrase(Mod,Spine,N,Phrase),
280 ( assert_id(item_sequence(phrase(URI,Spine,N), Phrase)),
281 assert(item_(phrase(URI,Spine,N))))).
282
283 assert_id(Fact) :- call(Fact) -> true; assert(Fact).
284
285
286 contains_group(D,XX) --> arb, group(D,XX,[]), rem.
287
288 group(D,X1,X2) -->
289 delimiter(D,open),
290 iterate(content(D),X1,X2),
291 delimiter(D,close).
292
293 content(_,[X|XX],XX) --> [event(X)].
294 content(D,XX,XX) --> {dif(D,D1)}, delimiter(D1,_).
295 content(D,X1,X2) --> group(D,X1,X2).
296 delimiter(D,T) --> [T/D].
297
298
299 %% dataset_random_subsets(+DS:dataset,+K:nonneg,+L:nonneg,-DSX:list(dataset)) is det.
300 %
301 % Returns L random subsets of K sequences each from dataset DS.
302 % K is the size of each random subset. L is the number of subsets
303 % returned. Each subset is a term of the form =|subset(DS,K,Indices)|=.
304 dataset_random_subsets(DS,K,L,DSX) :-
305 dataset_size(DS,N),
306 numlist(1,L,SubsetIndices),
307 maplist(\I^subset(DS,K,S)^random_subset(N,K,I,S),SubsetIndices,DSX).
308
309
310 %% uri_spine_pitches(+URI:uri,+S:nonneg,-P:list(pitch)) is det.
311 %% uri_spine_pitches(+URI:uri,-S:nonneg,-P:list(pitch)) is nondet.
312 %
313 % True when Pis a list of the pitches of notes in a spine S
314 % of the Humdrum file pointed to by URI.
315 uri_spine_pitches(URI,Spine,Pitches) :-
316 with_kern_module(URI, Mod,
317 kern_get_events(spine_pitch(Mod,Spine), Pitches)).
318
319 %% uri_spine_events(+URI:uri,+G:getter,+S:spine,-E:list(event(A))) is det.
320 %% uri_spine_events(+URI:uri,+G:getter,-S:spine,-E:list(event(A))) is nondet.
321 %
322 % Get list of events from a given spine in a Humdrum file. The kind of events extracted
323 % depends on the getter predicate G, which must be of type:
324 % ==
325 % getter(A) == pred(+M:module,-S:spine,-T:time,-E:event(A)).
326 % event(A) ---> event(A) ; pause.
327 % ==
328
329 uri_spine_events(URI,Getter,Spine,Events) :-
330 with_kern_module(URI, Mod,
331 kern_get_events(event_or_pause(Getter,Mod,Spine), Events)).
332
333 :- meta_predicate forall_kern(2,3).
334 forall_kern(Findspec,Pred) :-
335 with_status_line(
336 forall( call(Findspec,URI,SpineSpec), (
337 status('Loading: ~s',[URI]),
338 with_kern_module(URI, Mod, call(Pred,URI,Mod,SpineSpec))
339 ))),
340 format('Finished loading.\n',[]).
341
342 %% with_kern_module(+URI:uri, -M:module, +Goal:pred) is nondet.
343 %
344 % Calls Goal with M set to the name of a temporary moduling containing
345 % the information in the Kern file specified by URI.
346 with_kern_module(URI,Mod,Goal) :-
347 hum_uri_path(URI,Path),
348 with_kern_module(Path,utf8,Mod,Goal).
349
350 :- meta_predicate with_spine(+,1,-,-).
351 with_spine(Spine,Finder,URI,extract(Spine)) :- call(Finder,URI).
352
353 :- meta_predicate all_spines(+,-,-).
354 all_spines(Finder,URI,all) :- call(Finder,URI).
355
356 :- meta_predicate with_spec(+,+,-,-).
357 with_spec(SpineSpec,Finder,URI,SpineSpec) :- call(Finder,URI).
358
359 %% kern_uri(+FS:find_spec, -URI:uri) is nondet.
360 %
361 % True when URI refers to a Humdrum file that satisfies the given
362 % specification. The specification type =|find_spec|= is defined in the
363 % module header comment.
364 kern_uri(in(Dir),URI) :- rdf(URI,hum:directory,literal(Dir)).
365 kern_uri(under(Dir),URI) :- rdf(URI,hum:directory,literal(prefix(Dir),_)).
366 kern_uri(A;B, URI) :- kern_uri(A,URI); kern_uri(B,URI).
367 kern_uri((A,B), URI) :- kern_uri(A,URI), kern_uri(B,URI).
368 kern_uri(Prop=Val,URI) :- rdf(URI,Prop,Val).
369
370 %% spine_pitch(+Mod:module,-Spine:nonneg,-Time,-Pitch) is nondet.
371 spine_pitch(Mod,Spine,Time,P) :-
372 Mod:note(pitch(P),_,Time,Spine).
373
374 spine_pitch_dur(Mod,Spine,Time,P-D) :-
375 Mod:note(pitch(P),D,Time,Spine).
376
377 spine_notenum(Mod,Spine,Time,NN) :-
378 spine_pitch(Mod,Spine,Time,Pitch),
379 pitch_notenum(Pitch,NN).
380
381
382 %% get the highest pitch over all spines at each time slice.
383 top_notenum(Mod,Time,TopNN) :-
384 aggregate(max(NN),time_notenum(Mod,Time,NN),TopNN).
385
386 time_notenum(Mod,Time,NN) :-
387 Mod:note(pitch(P),_,Time,_),
388 pitch_notenum(P,NN).
389
390 spine_pause(Mod,Spine,Time,pause) :- spine_pause(Mod,Spine,Time).
391 spine_pause(Mod,Spine,Time) :-
392 Mod:articulation(pause,Spine,Rec),
393 Mod:time(T,Rec),
394 Mod:duration(D,Rec),
395 Time is T+D.
396
397 spine_phrase(Mod,Spine,Time,Time-(OpenClose/Label)) :-
398 Mod:data(tok(Toks),Spine,Rec),
399 member(par(OpenClose,phrase-Label),Toks),
400 Mod:time(T,Rec),
401 ( OpenClose=open -> Time=T
402 ; OpenClose=close -> Mod:duration(D,Rec), Time is T+D
403 ).
404
405
406 twos(L) :- L=[2|L].
407
408 metre(4/4,1:M) :- twos(M).
409 metre(3/4,(3 rdiv 4):[3,2|M]) :- twos(M).
410 metre(6/8,(3 rdiv 4):[2,3|M]) :- twos(M).
411 metre(9/8,(9 rdiv 8):[3,3|M]) :- twos(M).
412 metre(12/8,(12 rdiv 8):[2,2,3|M]) :- twos(M).
413
414 %% metpos( +Bar:rational, +Metre:list(nat), +T0:rational, -R:nat) is det.
415 metpos(Bar,Metre,TT,R) :- metpos_(TT,[1/Bar|Metre],0,R).
416
417 metpos_(0,_,R,R) :- !.
418 metpos_(T0,[M0|MX],R0,R) :-
419 mod1(T0*M0,T1), succ(R0,R1),
420 metpos_(T1,MX,R1,R).
421
422 mod1(X,Y) :- Z is floor(X), Y is X - Z.
423
424 event_or_pause(Pred,Mod,Spine,Time,event(X)) :- call(Pred,Mod,Spine,Time,X).
425 event_or_pause(_,Mod,Spine,Time,pause) :- spine_pause(Mod,Spine,Time).
426
427 event_or_delim(Pred,Mod,Spine,Time,2-event(X)) :-
428 call(Pred,Mod,Spine,Time,X).
429 event_or_delim(_,Mod,Spine,Time,Prio-Delim) :-
430 spine_phrase(Mod,Spine,Time,_-Delim),
431 delim_prio(Delim,Prio).
432
433 delim_prio(open/_,1).
434 delim_prio(close/_,1).
435
436 %% events_nth_phrase(+Events:list(event(A)), +N:natural, -P:list(A)) is det.
437 % Extract Nth phrase from a list of events or pauses.
438 events_nth_phrase(Events,N,Phrase) :-
439 phrase((seqmap_with_sep([pause],seqmap(event),Phrases), opt([pause])),Events,[]),
440 nth1(N,Phrases,Phrase).
441
442 event(X) --> [event(X)].