Mercurial > hg > dml-open-cliopatria
comparison dml-cla/python/chord_seq_key_relative.py @ 0:718306e29690 tip
commiting public release
author | Daniel Wolff |
---|---|
date | Tue, 09 Feb 2016 21:05:06 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:718306e29690 |
---|---|
1 #!/usr/bin/python | |
2 # Part of DML (Digital Music Laboratory) | |
3 # Copyright 2014-2015 Daniel Wolff, City University | |
4 | |
5 # This program is free software; you can redistribute it and/or | |
6 # modify it under the terms of the GNU General Public License | |
7 # as published by the Free Software Foundation; either version 2 | |
8 # of the License, or (at your option) any later version. | |
9 # | |
10 # This program is distributed in the hope that it will be useful, | |
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 # GNU General Public License for more details. | |
14 # | |
15 # You should have received a copy of the GNU General Public | |
16 # License along with this library; if not, write to the Free Software | |
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
18 | |
19 # -*- coding: utf-8 -*- | |
20 __author__="wolffd" | |
21 | |
22 # json testfile | |
23 # | |
24 #{ "module":"chord_seq_key_relative", | |
25 # "function":"aggregate", | |
26 # "arguments": [[ | |
27 # {"keys": { "tag": "csv", "value":"D:\\mirg\\Chord_Analysis20141216\\Beethoven\\qm_vamp_key_standard.n3_50ac9\\1CD0000653_BD01_vamp_qm-vamp-plugins_qm-keydetector_key.csv"}, | |
28 # "chords": { "tag": "csv", "value":"D:\\mirg\\Chord_Analysis20141216\\Beethoven\\chordino_simple.n3_1a812\\1CD0000653_BD01_vamp_nnls-chroma_chordino_simplechord.csv"}, | |
29 # "trackuri": "Eins"}, | |
30 # {"keys": { "tag": "csv", "value":"D:\\mirg\\Chord_Analysis20141216\\Beethoven\\qm_vamp_key_standard.n3_50ac9\\1CD0000653_BD01_vamp_qm-vamp-plugins_qm-keydetector_key.csv"}, | |
31 # "chords": { "tag": "csv", "value":"D:\\mirg\\Chord_Analysis20141216\\Beethoven\\chordino_simple.n3_1a812\\1CD0000653_BD01_vamp_nnls-chroma_chordino_simplechord.csv"}} | |
32 # ]] | |
33 #} | |
34 | |
35 # these for file reading etc | |
36 import re | |
37 import os | |
38 import csv | |
39 import numpy | |
40 | |
41 # spmf functions | |
42 import chord_seq_spmf_helper as spmf | |
43 | |
44 from aggregate import * | |
45 from csvutils import * | |
46 | |
47 # --- | |
48 # roots | |
49 # --- | |
50 chord_roots = ["C","D","E","F","G","A","B"] | |
51 | |
52 # create a dictionary for efficiency | |
53 roots_dic = dict(zip(chord_roots, [0,2,4,5,7,9,11])) | |
54 | |
55 mode_lbls = ['major','minor'] | |
56 mode_dic = dict(zip(mode_lbls, range(0,2))) | |
57 # --- | |
58 # types | |
59 # --- | |
60 type_labels = ["", "6", "7", "m","m6", "m7", "maj7", "m7b5", "dim", "dim7", "aug"] | |
61 type_dic = dict(zip(type_labels, range(0,len(type_labels)))) | |
62 | |
63 base_labels = ["1","","2","b3","3","4","","5","","6","b7","7"] | |
64 #base_dic = dict(zip(base_labels, range(0,len(base_labels)))) | |
65 | |
66 # functions | |
67 root_funs_maj = ['I','#I','II','#II','III','IV','#IV','V','#V','VI','#VI','VII'] | |
68 root_funs_min = ['I','#I','II','III','#III','IV','#IV','V','VI','#VI','VII','#VII'] | |
69 # dan's suggestion | |
70 #root_funs_maj = ['I','#I','II','#II','(M)III','IV','#IV','V','#V','VI','#VI','(M)VII'] | |
71 #root_funs_min = ['I','#I','II','(m)III','#III','IV','#IV','V','VI','#VI','(m)VII','#VII'] | |
72 | |
73 fun_dic_maj = dict(zip(range(0,len(root_funs_maj)),root_funs_maj)) | |
74 fun_dic_min = dict(zip(range(0,len(root_funs_min)),root_funs_min)) | |
75 # regex that separates roots and types, and gets chord base | |
76 # this only accepts chords with a sharp (#) and no flats | |
77 p = re.compile(r'(?P<root>[A-G,N](#|b)*)(?P<type>[a-z,0-9]*)(/(?P<base>[A-G](#|b)*))*') | |
78 p2 = re.compile(r'(?P<key>[A-G](#|b)*)(\s/\s[A-G](#|b)*)*\s(?P<mode>[major|minor]+)') | |
79 pclip = re.compile(r'(?P<clipid>[A-Z,0-9]+(\-|_)[A-Z,0-9]+((\-|_)[A-Z,0-9]+)*((\-|_)[A-Z,0-9]+)*)_(?P<type>vamp.*).(?P<ext>(csv|xml|txt|n3)+)') | |
80 | |
81 | |
82 | |
83 def chords_from_csv(filename): | |
84 # we assume CSV: time, chord_string | |
85 # return (time, chord_string) | |
86 return csv_map_rows(filename,2, lambda row:(float(row[0]),row[1])) | |
87 | |
88 def keys_from_csv(filename): | |
89 # we assume CSV: time, key_code, key_string | |
90 # return ( time, key_code, key_string) | |
91 return csv_map_rows(filename,3, lambda row:(float(row[0]),row[1],row[2])) | |
92 | |
93 # parsers for n3 / csv | |
94 key_parser_table = { 'csv':keys_from_csv } | |
95 chord_parser_table = { 'csv':chords_from_csv } | |
96 | |
97 # extracts relative chord sequences from inputs of chord / key data | |
98 # input list of pairs with instances of features: | |
99 # (['chords'] chordino_simple.n3_1a812 , ['keys'] qm_vamp_key_standard.n3_50ac9, | |
100 # optional: ['trackuri'] trackidentifier ) | |
101 # @note: in future we could add support for qm_key_tonic input | |
102 # | |
103 # opts : dictionary with opts["spm_algorithm"] = SPADE, TKS or ClaSP algorithm? | |
104 # and opts["spm_options"] = "70%" | |
105 # output: | |
106 # 'sequences': seq, 'support': sup | |
107 | |
108 trackctr = 0 | |
109 | |
110 def aggregate(inputs,opts={}): | |
111 print_status('In chord_seq_key_relative') | |
112 | |
113 | |
114 # SPADE, TKS or ClaSP algorithm? | |
115 algo = opts.get("spm_algorithm","CM-SPADE") | |
116 | |
117 # number of sequences | |
118 maxseqs = int(opts.get("spm_maxseqs",500)/2) | |
119 | |
120 # min. length of sequences | |
121 minlen = int(opts.get("spm_minlen",2)) | |
122 | |
123 # min. length of sequences in seconds | |
124 maxtime = int(opts.get("spm_maxtime",1*60)/2) | |
125 | |
126 ignoreN = int(opts.get("spm_ignore_n",1)) | |
127 | |
128 # min. length of sequences | |
129 minsup = int(opts.get("spm_minsupport",50)) | |
130 | |
131 # we now safe the mode of each piece | |
132 # to treat them separately | |
133 out_chords = [dict(), dict()]; | |
134 # generate dict[trackuri] = [ (time,key,mode,fun,typ,bfun) ] | |
135 def accum(item): | |
136 global trackctr | |
137 # increase virtual identifier | |
138 trackctr += 1 | |
139 | |
140 # get duration and normalised frequency for all tuning pitches (A3,A4,A5) | |
141 keys = decode_tagged(key_parser_table,item['keys']) | |
142 | |
143 # get most frequent key | |
144 key,mode = most_frequent_key(keys) | |
145 | |
146 relchords = [] | |
147 for (time,chord) in decode_tagged(chord_parser_table,item['chords']): | |
148 | |
149 # ignore chords that are 'N': | |
150 # a. the open pattern matching allows for arbitrary chords | |
151 # to appear inbetween those in a sequence | |
152 # b. the N chord potentially maps to any contents, so the | |
153 # inclusion of N chord has limited (or no) use | |
154 | |
155 # get chord function | |
156 (root,fun,typ, bfun) = chord2function(chord, key,mode) | |
157 | |
158 if not (ignoreN & (root == -1)): | |
159 # translate into text | |
160 txt = fun2txt(fun,typ, bfun, mode) | |
161 # print 'Chord: ' + chord + ', function: ' + txt | |
162 | |
163 # add to chords of this clip | |
164 relchords.append((time,key,mode,fun,typ,bfun)) | |
165 | |
166 # save results into dict for this track | |
167 trackuri = item.get('trackuri',trackctr) | |
168 out_chords[mode][trackuri] = relchords | |
169 | |
170 # collate relative chord information per file | |
171 st=for_each(inputs,accum) | |
172 # print_status('Finished accumulating') | |
173 | |
174 if trackctr < 2: | |
175 raise Exception("Need more than 1 track") | |
176 | |
177 seq = [[],[]] | |
178 sup = [[],[]] | |
179 | |
180 for mode in [0,1]: | |
181 # write to spmf file | |
182 spmffile = spmf.relchords2spmf(out_chords[mode]) | |
183 #print_status('Wrote SPMF data ' + spmffile.name) | |
184 | |
185 | |
186 # run sequential pattern matching | |
187 if algo == "TKS": | |
188 algoopts = opts.get("spm_options","") | |
189 seqfile = spmf.spmf(spmffile.name,'TKS',[str(maxseqs), algoopts]) | |
190 elif algo == "ClaSP": | |
191 algoopts = opts.get("spm_options",str(minsup) + "%") | |
192 seqfile = spmf.spmf(spmffile.name,'ClaSP',[algoopts, str(minlen)], timeout = maxtime) | |
193 elif algo == "SPADE": | |
194 algoopts = opts.get("spm_options",str(minsup) + "%") | |
195 seqfile = spmf.spmf(spmffile.name,'SPADE',[algoopts, str(minlen)], timeout = maxtime) | |
196 else: | |
197 print_status('Running CM-SPADE algo') | |
198 algoopts = opts.get("spm_options",str(minsup) + "%") | |
199 seqfile = spmf.spmf(spmffile.name,'CM-SPADE',[algoopts, str(minlen)], timeout = maxtime) | |
200 | |
201 #seqfile = spmf.spmf(spmffile.name,'BIDE+',['70%']) | |
202 #seqfile = "D:\mirg\Chord_Analysis20141216\Beethoven_60.txt" | |
203 | |
204 #print_status('SPADE finished in ' + seqfile) | |
205 # parse spmf output | |
206 seq[mode],sup[mode] = spmf.spmf2table(seqfile) | |
207 | |
208 #clean up | |
209 os.remove(spmffile.name) | |
210 os.remove(seqfile) | |
211 | |
212 # fold back sequences and support | |
213 # note that this results in the sequences being truncated together below | |
214 seq = [item for sublist in seq for item in sublist] | |
215 sup = [item for sublist in sup for item in sublist] | |
216 | |
217 # filter according to min. sequencelength and number of sequences | |
218 seq_out = [] | |
219 sup_out = [] | |
220 seq_count = 0 | |
221 | |
222 # sort in descending support and pick up sequences of sufficient length | |
223 for i in numpy.argsort(sup)[::-1]: | |
224 if len(seq[i]) >= minlen: | |
225 seq_out.append(seq[i]) | |
226 sup_out.append(sup[i]) | |
227 seq_count += 1 | |
228 | |
229 if seq_count >= maxseqs: | |
230 break | |
231 | |
232 return { 'result': { 'sequences': seq_out, 'support': sup_out}, | |
233 'stats' : st } | |
234 | |
235 | |
236 # most simple note2num | |
237 def note2num(notein = 'Cb'): | |
238 base = roots_dic[notein[0]] | |
239 if len(notein) > 1: | |
240 if notein[1] == 'b': | |
241 return (base - 1) % 12 | |
242 elif notein[1] == '#': | |
243 return (base + 1) % 12 | |
244 else: | |
245 print "Error parsing chord " + notein | |
246 raise | |
247 else: | |
248 return base % 12 | |
249 | |
250 | |
251 # convert key to number | |
252 def key2num(keyin = 'C major'): | |
253 # --- | |
254 # parse key string: separate root from rest | |
255 # --- | |
256 sepstring = p2.match(keyin) | |
257 if not sepstring: | |
258 print "Error parsing key " + keyin | |
259 raise | |
260 | |
261 # get relative position of chord and adapt for flats | |
262 key = sepstring.group('key') | |
263 key = note2num(key) | |
264 | |
265 # --- | |
266 # parse mode. care for (unknown) string | |
267 # --- | |
268 mode = sepstring.group('mode') | |
269 | |
270 if mode: | |
271 mode = mode_dic[mode] | |
272 else: | |
273 mode = -1 | |
274 | |
275 return (key, mode) | |
276 | |
277 | |
278 | |
279 # convert chord to relative function | |
280 def chord2function(cin = 'B',key=3, mode=0): | |
281 # --- | |
282 # parse chord string: separate root from rest | |
283 # --- | |
284 sepstring = p.match(cin) | |
285 | |
286 # test for N code -> no chord detected | |
287 if sepstring.group('root') == 'N': | |
288 return (-1,-1,-1,-1) | |
289 | |
290 # get root and type otherwise | |
291 root = note2num(sepstring.group('root')) | |
292 type = sepstring.group('type') | |
293 | |
294 typ = type_dic[type] | |
295 | |
296 # get relative position | |
297 fun = (root - key) % 12 | |
298 | |
299 #--- do we have a base key? | |
300 # if yes return it relative to chord root | |
301 # --- | |
302 if sepstring.group('base'): | |
303 broot = note2num(sepstring.group('base')) | |
304 bfun = (broot - root) % 12 | |
305 else: | |
306 # this standard gives 1 as a base key if not specified otherwise | |
307 bfun = 0 | |
308 | |
309 | |
310 # --- | |
311 # todo: integrate bfun in final type list | |
312 # --- | |
313 | |
314 return (root,fun,typ,bfun) | |
315 | |
316 # reads in any csv and returns a list of structure | |
317 # time(float), data1, data2 ....data2 | |
318 def read_vamp_csv(filein = ''): | |
319 output = [] | |
320 with open(filein, 'rb') as csvfile: | |
321 contents = csv.reader(csvfile, delimiter=',', quotechar='"') | |
322 for row in contents: | |
323 output.append([float(row[0])] + row[1:]) | |
324 return output | |
325 | |
326 | |
327 | |
328 # histogram of the last entry in a list | |
329 # returns the most frequently used key | |
330 def histogram(keysin = []): | |
331 # build histogram | |
332 histo = dict() | |
333 for row in keysin: | |
334 histo[row[-1]] = histo.get(row[-1], 0) + 1 | |
335 | |
336 # return most frequent key | |
337 return (histo, max(histo.iterkeys(), key=(lambda key: histo[key]))) | |
338 | |
339 def most_frequent_key(keys): | |
340 # delete 'unknown' keys | |
341 keys = [(time,knum,key) for (time,knum,key) in keys if not key == '(unknown)'] | |
342 | |
343 # aggregate to one key | |
344 (histo, skey) = histogram(keys) | |
345 | |
346 # bet key number | |
347 (key,mode) = key2num(skey) | |
348 return key,mode | |
349 | |
350 | |
351 | |
352 def fun2txt(fun,typ, bfun,mode): | |
353 # now we can interpret this function | |
354 # when given the mode of major or minor. | |
355 if (fun >= 0): | |
356 if (mode == 1): | |
357 pfun = fun_dic_min[fun] | |
358 md = '(m)' | |
359 elif (mode == 0): | |
360 pfun = fun_dic_maj[fun] | |
361 md = '(M)' | |
362 else: | |
363 return 'N' | |
364 | |
365 #if typ == 'm': | |
366 # print 'Key: ' + skey + ', chord: ' + chord + ' function ' + str(fun) + ' type ' + typ + ' bfun ' + str(bfun) | |
367 type = type_labels[typ] if typ > 0 else '' | |
368 | |
369 blb = '/' + base_labels[bfun] if (bfun >= 0 and base_labels[bfun]) else '' | |
370 return md + pfun + type + blb | |
371 | |
372 def fun2num(fun,typ, bfun,mode): | |
373 # now we can interpret this function | |
374 if not fun == -1: | |
375 return (mode+1)* 1000000 + (fun+1) * 10000 + (typ+1) * 100 + (bfun+1) | |
376 else: | |
377 return 0 | |
378 | |
379 | |
380 if __name__ == "__main__": | |
381 #chords2functions() | |
382 print "Creates a key-independent chord histogram. Usage: chord2function path_vamp_chords path_vamp_keys" | |
383 # sys.argv[1] | |
384 result = folder2histogram() | |
385 print "Please input a description for the chord function histogram" | |
386 c2j.data2json(result) |