comparison dml-cla/python/chord_seq_key_relative.py @ 0:718306e29690 tip

commiting public release
author Daniel Wolff
date Tue, 09 Feb 2016 21:05:06 +0100
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:718306e29690
1 #!/usr/bin/python
2 # Part of DML (Digital Music Laboratory)
3 # Copyright 2014-2015 Daniel Wolff, City University
4
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
9 #
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public
16 # License along with this library; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
19 # -*- coding: utf-8 -*-
20 __author__="wolffd"
21
22 # json testfile
23 #
24 #{ "module":"chord_seq_key_relative",
25 # "function":"aggregate",
26 # "arguments": [[
27 # {"keys": { "tag": "csv", "value":"D:\\mirg\\Chord_Analysis20141216\\Beethoven\\qm_vamp_key_standard.n3_50ac9\\1CD0000653_BD01_vamp_qm-vamp-plugins_qm-keydetector_key.csv"},
28 # "chords": { "tag": "csv", "value":"D:\\mirg\\Chord_Analysis20141216\\Beethoven\\chordino_simple.n3_1a812\\1CD0000653_BD01_vamp_nnls-chroma_chordino_simplechord.csv"},
29 # "trackuri": "Eins"},
30 # {"keys": { "tag": "csv", "value":"D:\\mirg\\Chord_Analysis20141216\\Beethoven\\qm_vamp_key_standard.n3_50ac9\\1CD0000653_BD01_vamp_qm-vamp-plugins_qm-keydetector_key.csv"},
31 # "chords": { "tag": "csv", "value":"D:\\mirg\\Chord_Analysis20141216\\Beethoven\\chordino_simple.n3_1a812\\1CD0000653_BD01_vamp_nnls-chroma_chordino_simplechord.csv"}}
32 # ]]
33 #}
34
35 # these for file reading etc
36 import re
37 import os
38 import csv
39 import numpy
40
41 # spmf functions
42 import chord_seq_spmf_helper as spmf
43
44 from aggregate import *
45 from csvutils import *
46
47 # ---
48 # roots
49 # ---
50 chord_roots = ["C","D","E","F","G","A","B"]
51
52 # create a dictionary for efficiency
53 roots_dic = dict(zip(chord_roots, [0,2,4,5,7,9,11]))
54
55 mode_lbls = ['major','minor']
56 mode_dic = dict(zip(mode_lbls, range(0,2)))
57 # ---
58 # types
59 # ---
60 type_labels = ["", "6", "7", "m","m6", "m7", "maj7", "m7b5", "dim", "dim7", "aug"]
61 type_dic = dict(zip(type_labels, range(0,len(type_labels))))
62
63 base_labels = ["1","","2","b3","3","4","","5","","6","b7","7"]
64 #base_dic = dict(zip(base_labels, range(0,len(base_labels))))
65
66 # functions
67 root_funs_maj = ['I','#I','II','#II','III','IV','#IV','V','#V','VI','#VI','VII']
68 root_funs_min = ['I','#I','II','III','#III','IV','#IV','V','VI','#VI','VII','#VII']
69 # dan's suggestion
70 #root_funs_maj = ['I','#I','II','#II','(M)III','IV','#IV','V','#V','VI','#VI','(M)VII']
71 #root_funs_min = ['I','#I','II','(m)III','#III','IV','#IV','V','VI','#VI','(m)VII','#VII']
72
73 fun_dic_maj = dict(zip(range(0,len(root_funs_maj)),root_funs_maj))
74 fun_dic_min = dict(zip(range(0,len(root_funs_min)),root_funs_min))
75 # regex that separates roots and types, and gets chord base
76 # this only accepts chords with a sharp (#) and no flats
77 p = re.compile(r'(?P<root>[A-G,N](#|b)*)(?P<type>[a-z,0-9]*)(/(?P<base>[A-G](#|b)*))*')
78 p2 = re.compile(r'(?P<key>[A-G](#|b)*)(\s/\s[A-G](#|b)*)*\s(?P<mode>[major|minor]+)')
79 pclip = re.compile(r'(?P<clipid>[A-Z,0-9]+(\-|_)[A-Z,0-9]+((\-|_)[A-Z,0-9]+)*((\-|_)[A-Z,0-9]+)*)_(?P<type>vamp.*).(?P<ext>(csv|xml|txt|n3)+)')
80
81
82
83 def chords_from_csv(filename):
84 # we assume CSV: time, chord_string
85 # return (time, chord_string)
86 return csv_map_rows(filename,2, lambda row:(float(row[0]),row[1]))
87
88 def keys_from_csv(filename):
89 # we assume CSV: time, key_code, key_string
90 # return ( time, key_code, key_string)
91 return csv_map_rows(filename,3, lambda row:(float(row[0]),row[1],row[2]))
92
93 # parsers for n3 / csv
94 key_parser_table = { 'csv':keys_from_csv }
95 chord_parser_table = { 'csv':chords_from_csv }
96
97 # extracts relative chord sequences from inputs of chord / key data
98 # input list of pairs with instances of features:
99 # (['chords'] chordino_simple.n3_1a812 , ['keys'] qm_vamp_key_standard.n3_50ac9,
100 # optional: ['trackuri'] trackidentifier )
101 # @note: in future we could add support for qm_key_tonic input
102 #
103 # opts : dictionary with opts["spm_algorithm"] = SPADE, TKS or ClaSP algorithm?
104 # and opts["spm_options"] = "70%"
105 # output:
106 # 'sequences': seq, 'support': sup
107
108 trackctr = 0
109
110 def aggregate(inputs,opts={}):
111 print_status('In chord_seq_key_relative')
112
113
114 # SPADE, TKS or ClaSP algorithm?
115 algo = opts.get("spm_algorithm","CM-SPADE")
116
117 # number of sequences
118 maxseqs = int(opts.get("spm_maxseqs",500)/2)
119
120 # min. length of sequences
121 minlen = int(opts.get("spm_minlen",2))
122
123 # min. length of sequences in seconds
124 maxtime = int(opts.get("spm_maxtime",1*60)/2)
125
126 ignoreN = int(opts.get("spm_ignore_n",1))
127
128 # min. length of sequences
129 minsup = int(opts.get("spm_minsupport",50))
130
131 # we now safe the mode of each piece
132 # to treat them separately
133 out_chords = [dict(), dict()];
134 # generate dict[trackuri] = [ (time,key,mode,fun,typ,bfun) ]
135 def accum(item):
136 global trackctr
137 # increase virtual identifier
138 trackctr += 1
139
140 # get duration and normalised frequency for all tuning pitches (A3,A4,A5)
141 keys = decode_tagged(key_parser_table,item['keys'])
142
143 # get most frequent key
144 key,mode = most_frequent_key(keys)
145
146 relchords = []
147 for (time,chord) in decode_tagged(chord_parser_table,item['chords']):
148
149 # ignore chords that are 'N':
150 # a. the open pattern matching allows for arbitrary chords
151 # to appear inbetween those in a sequence
152 # b. the N chord potentially maps to any contents, so the
153 # inclusion of N chord has limited (or no) use
154
155 # get chord function
156 (root,fun,typ, bfun) = chord2function(chord, key,mode)
157
158 if not (ignoreN & (root == -1)):
159 # translate into text
160 txt = fun2txt(fun,typ, bfun, mode)
161 # print 'Chord: ' + chord + ', function: ' + txt
162
163 # add to chords of this clip
164 relchords.append((time,key,mode,fun,typ,bfun))
165
166 # save results into dict for this track
167 trackuri = item.get('trackuri',trackctr)
168 out_chords[mode][trackuri] = relchords
169
170 # collate relative chord information per file
171 st=for_each(inputs,accum)
172 # print_status('Finished accumulating')
173
174 if trackctr < 2:
175 raise Exception("Need more than 1 track")
176
177 seq = [[],[]]
178 sup = [[],[]]
179
180 for mode in [0,1]:
181 # write to spmf file
182 spmffile = spmf.relchords2spmf(out_chords[mode])
183 #print_status('Wrote SPMF data ' + spmffile.name)
184
185
186 # run sequential pattern matching
187 if algo == "TKS":
188 algoopts = opts.get("spm_options","")
189 seqfile = spmf.spmf(spmffile.name,'TKS',[str(maxseqs), algoopts])
190 elif algo == "ClaSP":
191 algoopts = opts.get("spm_options",str(minsup) + "%")
192 seqfile = spmf.spmf(spmffile.name,'ClaSP',[algoopts, str(minlen)], timeout = maxtime)
193 elif algo == "SPADE":
194 algoopts = opts.get("spm_options",str(minsup) + "%")
195 seqfile = spmf.spmf(spmffile.name,'SPADE',[algoopts, str(minlen)], timeout = maxtime)
196 else:
197 print_status('Running CM-SPADE algo')
198 algoopts = opts.get("spm_options",str(minsup) + "%")
199 seqfile = spmf.spmf(spmffile.name,'CM-SPADE',[algoopts, str(minlen)], timeout = maxtime)
200
201 #seqfile = spmf.spmf(spmffile.name,'BIDE+',['70%'])
202 #seqfile = "D:\mirg\Chord_Analysis20141216\Beethoven_60.txt"
203
204 #print_status('SPADE finished in ' + seqfile)
205 # parse spmf output
206 seq[mode],sup[mode] = spmf.spmf2table(seqfile)
207
208 #clean up
209 os.remove(spmffile.name)
210 os.remove(seqfile)
211
212 # fold back sequences and support
213 # note that this results in the sequences being truncated together below
214 seq = [item for sublist in seq for item in sublist]
215 sup = [item for sublist in sup for item in sublist]
216
217 # filter according to min. sequencelength and number of sequences
218 seq_out = []
219 sup_out = []
220 seq_count = 0
221
222 # sort in descending support and pick up sequences of sufficient length
223 for i in numpy.argsort(sup)[::-1]:
224 if len(seq[i]) >= minlen:
225 seq_out.append(seq[i])
226 sup_out.append(sup[i])
227 seq_count += 1
228
229 if seq_count >= maxseqs:
230 break
231
232 return { 'result': { 'sequences': seq_out, 'support': sup_out},
233 'stats' : st }
234
235
236 # most simple note2num
237 def note2num(notein = 'Cb'):
238 base = roots_dic[notein[0]]
239 if len(notein) > 1:
240 if notein[1] == 'b':
241 return (base - 1) % 12
242 elif notein[1] == '#':
243 return (base + 1) % 12
244 else:
245 print "Error parsing chord " + notein
246 raise
247 else:
248 return base % 12
249
250
251 # convert key to number
252 def key2num(keyin = 'C major'):
253 # ---
254 # parse key string: separate root from rest
255 # ---
256 sepstring = p2.match(keyin)
257 if not sepstring:
258 print "Error parsing key " + keyin
259 raise
260
261 # get relative position of chord and adapt for flats
262 key = sepstring.group('key')
263 key = note2num(key)
264
265 # ---
266 # parse mode. care for (unknown) string
267 # ---
268 mode = sepstring.group('mode')
269
270 if mode:
271 mode = mode_dic[mode]
272 else:
273 mode = -1
274
275 return (key, mode)
276
277
278
279 # convert chord to relative function
280 def chord2function(cin = 'B',key=3, mode=0):
281 # ---
282 # parse chord string: separate root from rest
283 # ---
284 sepstring = p.match(cin)
285
286 # test for N code -> no chord detected
287 if sepstring.group('root') == 'N':
288 return (-1,-1,-1,-1)
289
290 # get root and type otherwise
291 root = note2num(sepstring.group('root'))
292 type = sepstring.group('type')
293
294 typ = type_dic[type]
295
296 # get relative position
297 fun = (root - key) % 12
298
299 #--- do we have a base key?
300 # if yes return it relative to chord root
301 # ---
302 if sepstring.group('base'):
303 broot = note2num(sepstring.group('base'))
304 bfun = (broot - root) % 12
305 else:
306 # this standard gives 1 as a base key if not specified otherwise
307 bfun = 0
308
309
310 # ---
311 # todo: integrate bfun in final type list
312 # ---
313
314 return (root,fun,typ,bfun)
315
316 # reads in any csv and returns a list of structure
317 # time(float), data1, data2 ....data2
318 def read_vamp_csv(filein = ''):
319 output = []
320 with open(filein, 'rb') as csvfile:
321 contents = csv.reader(csvfile, delimiter=',', quotechar='"')
322 for row in contents:
323 output.append([float(row[0])] + row[1:])
324 return output
325
326
327
328 # histogram of the last entry in a list
329 # returns the most frequently used key
330 def histogram(keysin = []):
331 # build histogram
332 histo = dict()
333 for row in keysin:
334 histo[row[-1]] = histo.get(row[-1], 0) + 1
335
336 # return most frequent key
337 return (histo, max(histo.iterkeys(), key=(lambda key: histo[key])))
338
339 def most_frequent_key(keys):
340 # delete 'unknown' keys
341 keys = [(time,knum,key) for (time,knum,key) in keys if not key == '(unknown)']
342
343 # aggregate to one key
344 (histo, skey) = histogram(keys)
345
346 # bet key number
347 (key,mode) = key2num(skey)
348 return key,mode
349
350
351
352 def fun2txt(fun,typ, bfun,mode):
353 # now we can interpret this function
354 # when given the mode of major or minor.
355 if (fun >= 0):
356 if (mode == 1):
357 pfun = fun_dic_min[fun]
358 md = '(m)'
359 elif (mode == 0):
360 pfun = fun_dic_maj[fun]
361 md = '(M)'
362 else:
363 return 'N'
364
365 #if typ == 'm':
366 # print 'Key: ' + skey + ', chord: ' + chord + ' function ' + str(fun) + ' type ' + typ + ' bfun ' + str(bfun)
367 type = type_labels[typ] if typ > 0 else ''
368
369 blb = '/' + base_labels[bfun] if (bfun >= 0 and base_labels[bfun]) else ''
370 return md + pfun + type + blb
371
372 def fun2num(fun,typ, bfun,mode):
373 # now we can interpret this function
374 if not fun == -1:
375 return (mode+1)* 1000000 + (fun+1) * 10000 + (typ+1) * 100 + (bfun+1)
376 else:
377 return 0
378
379
380 if __name__ == "__main__":
381 #chords2functions()
382 print "Creates a key-independent chord histogram. Usage: chord2function path_vamp_chords path_vamp_keys"
383 # sys.argv[1]
384 result = folder2histogram()
385 print "Please input a description for the chord function histogram"
386 c2j.data2json(result)