Mercurial > hg > dml-open-backendtools
comparison collection_analysis/chord_sequence_mining/chord2function.py @ 0:e34cf1b6fe09 tip
commit
author | Daniel Wolff |
---|---|
date | Sat, 20 Feb 2016 18:14:24 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e34cf1b6fe09 |
---|---|
1 # Part of DML (Digital Music Laboratory) | |
2 # Copyright 2014-2015 Daniel Wolff, City University | |
3 | |
4 # This program is free software; you can redistribute it and/or | |
5 # modify it under the terms of the GNU General Public License | |
6 # as published by the Free Software Foundation; either version 2 | |
7 # of the License, or (at your option) any later version. | |
8 # | |
9 # This program is distributed in the hope that it will be useful, | |
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 # GNU General Public License for more details. | |
13 # | |
14 # You should have received a copy of the GNU General Public | |
15 # License along with this library; if not, write to the Free Software | |
16 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
17 | |
18 #!/usr/bin/python | |
19 # -*- coding: utf-8 -*- | |
20 __author__="Daniel Wolff" | |
21 | |
22 import re | |
23 | |
24 # these for file reading etc | |
25 import fnmatch | |
26 import os | |
27 import csv | |
28 import spmf | |
29 | |
30 import sys | |
31 sys.path.insert(0, '../tools/') | |
32 import csv2json as c2j | |
33 | |
34 # --- | |
35 # roots | |
36 # --- | |
37 chord_roots = ["C","D","E","F","G","A","B"] | |
38 | |
39 # create a dictionary for efficiency | |
40 roots_dic = dict(zip(chord_roots, [0,2,4,5,7,9,11])) | |
41 | |
42 mode_lbls = ['major','minor'] | |
43 mode_dic = dict(zip(mode_lbls, range(0,2))) | |
44 # --- | |
45 # types | |
46 # --- | |
47 type_labels = ["", "6", "7", "m","m6", "m7", "maj7", "m7b5", "dim", "dim7", "aug"] | |
48 type_dic = dict(zip(type_labels, range(0,len(type_labels)))) | |
49 | |
50 base_labels = ["1","","2","b3","3","4","","5","","6","b7","7"] | |
51 #base_dic = dict(zip(base_labels, range(0,len(base_labels)))) | |
52 | |
53 # functions | |
54 root_funs_maj = ['I','#I','II','#II','III','IV','#IV','V','#V','VI','#VI','VII'] | |
55 root_funs_min = ['I','#I','II','III','#III','IV','#IV','V','VI','#VI','VII','#VII'] | |
56 # dan's suggestion | |
57 #root_funs_maj = ['I','#I','II','#II','(M)III','IV','#IV','V','#V','VI','#VI','(M)VII'] | |
58 #root_funs_min = ['I','#I','II','(m)III','#III','IV','#IV','V','VI','#VI','(m)VII','#VII'] | |
59 | |
60 fun_dic_maj = dict(zip(range(0,len(root_funs_maj)),root_funs_maj)) | |
61 fun_dic_min = dict(zip(range(0,len(root_funs_min)),root_funs_min)) | |
62 # regex that separates roots and types, and gets chord base | |
63 # this only accepts chords with a sharp (#) and no flats | |
64 p = re.compile(r'(?P<root>[A-G,N](#|b)*)(?P<type>[a-z,0-9]*)(/(?P<base>[A-G](#|b)*))*') | |
65 p2 = re.compile(r'(?P<key>[A-G](#|b)*)(\s/\s[A-G](#|b)*)*\s(?P<mode>[major|minor]+)') | |
66 pclip = re.compile(r'(?P<clipid>[A-Z,0-9]+(\-|_)[A-Z,0-9]+((\-|_)[A-Z,0-9]+)*((\-|_)[A-Z,0-9]+)*)_(?P<type>vamp.*).(?P<ext>(csv|xml|txt|n3)+)') | |
67 | |
68 ftype = {'key': 'vamp_qm-vamp-plugins_qm-keydetector_key', | |
69 'chord': 'vamp_nnls-chroma_chordino_simplechord'} | |
70 | |
71 # most simple note2num | |
72 def note2num(notein = 'Cb'): | |
73 base = roots_dic[notein[0]] | |
74 if len(notein) > 1: | |
75 if notein[1] == 'b': | |
76 return (base - 1) % 12 | |
77 elif notein[1] == '#': | |
78 return (base + 1) % 12 | |
79 else: | |
80 print "Error parsing chord " + notein | |
81 raise | |
82 else: | |
83 return base % 12 | |
84 | |
85 | |
86 # convert key to number | |
87 def key2num(keyin = 'C major'): | |
88 # --- | |
89 # parse key string: separate root from rest | |
90 # --- | |
91 sepstring = p2.match(keyin) | |
92 if not sepstring: | |
93 print "Error parsing key " + keyin | |
94 raise | |
95 | |
96 # get relative position of chord and adapt for flats | |
97 key = sepstring.group('key') | |
98 key = note2num(key) | |
99 | |
100 # --- | |
101 # parse mode. care for (unknown) string | |
102 # --- | |
103 mode = sepstring.group('mode') | |
104 if mode: | |
105 mode = mode_dic[mode] | |
106 else: | |
107 mode = -1 | |
108 | |
109 return (key, mode) | |
110 | |
111 | |
112 | |
113 # convert chord to relative function | |
114 def chord2function(cin = 'B',key=3, mode=0): | |
115 # --- | |
116 # parse chord string: separate root from rest | |
117 # --- | |
118 sepstring = p.match(cin) | |
119 | |
120 # test for N code -> no chord detected | |
121 if sepstring.group('root') == 'N': | |
122 return (-1,-1,-1,-1) | |
123 | |
124 # get root and type otherwise | |
125 root = note2num(sepstring.group('root')) | |
126 type = sepstring.group('type') | |
127 | |
128 typ = type_dic[type] | |
129 | |
130 # get relative position | |
131 fun = (root - key) % 12 | |
132 | |
133 #--- do we have a base key? | |
134 # if yes return it relative to chord root | |
135 # --- | |
136 if sepstring.group('base'): | |
137 broot = note2num(sepstring.group('base')) | |
138 bfun = (broot - root) % 12 | |
139 else: | |
140 # this standard gives 1 as a base key if not specified otherwise | |
141 bfun = 0 | |
142 | |
143 | |
144 # --- | |
145 # todo: integrate bfun in final type list | |
146 # --- | |
147 | |
148 return (root,fun,typ,bfun) | |
149 | |
150 # reads in any csv and returns a list of structure | |
151 # time(float), data1, data2 ....data2 | |
152 def read_vamp_csv(filein = ''): | |
153 output = [] | |
154 with open(filein, 'rb') as csvfile: | |
155 contents = csv.reader(csvfile, delimiter=',', quotechar='"') | |
156 for row in contents: | |
157 output.append([float(row[0])] + row[1:]) | |
158 return output | |
159 | |
160 # legacy:: finds featurefile for given piece | |
161 def find_features(clipin = '', type='key'): | |
162 # --- | |
163 # These Parametres are for the high-level parse functions | |
164 # --- | |
165 featuredirs = {'key':'.\qm_vamp_key_standard.n3_50ac9', | |
166 'chord': '.\chordino_simple.n3_1a812'} | |
167 | |
168 # search for featurefile | |
169 featuredir = featuredirs[type].replace('\\', '/') | |
170 for file in os.listdir(featuredir): | |
171 if fnmatch.fnmatch(file, clipin+ '*' + ftype[type] + '*.csv'): | |
172 return featuredirs[type] + '/' + file | |
173 | |
174 # reads features for given clip and of specified type | |
175 def get_features(clipin = '', type='key', featurefiles = 0): | |
176 if not featurefiles: | |
177 featurefiles = find_features(clipin, type) | |
178 return read_vamp_csv(featurefiles[type]) | |
179 | |
180 # histogram of the last entry in a list | |
181 # returns the most frequently used key | |
182 def histogram(keysin = []): | |
183 # build histogram | |
184 histo = dict() | |
185 for row in keysin: | |
186 histo[row[-1]] = histo.get(row[-1], 0) + 1 | |
187 | |
188 # return most frequent key | |
189 return (histo, max(histo.iterkeys(), key=(lambda key: histo[key]))) | |
190 | |
191 | |
192 # main function, processes all chords for one song | |
193 def chords2functions(clipin = '1CD0006591_BD11-14',featurefiles = '', constkey = 1): | |
194 | |
195 # get keys | |
196 keys = get_features(clipin,'key',featurefiles) | |
197 | |
198 relchords = [] | |
199 # chords | |
200 chords = get_features(clipin,'chord',featurefiles) | |
201 if constkey: | |
202 # delete 'unknown' keys | |
203 keys = [(time,knum,key) for (time,knum,key) in keys if not key == '(unknown)'] | |
204 | |
205 # aggregate to one key | |
206 (histo, skey) = histogram(keys) | |
207 | |
208 # bet key number | |
209 (key,mode) = key2num(skey) | |
210 | |
211 for (time,chord) in chords: | |
212 | |
213 # get chord function | |
214 (root,fun,typ, bfun) = chord2function(chord, key,mode) | |
215 | |
216 # translate into text | |
217 txt = fun2txt(fun,typ, bfun, mode) | |
218 #print 'Key: ' + skey + ', chord: ' + chord + ', function: ' + txt | |
219 | |
220 relchords.append((time,key,mode,fun,typ,bfun)) | |
221 return relchords | |
222 | |
223 def tracks_in_dir(dirin = ''): | |
224 | |
225 # --- | |
226 # we now only search for tracks which have chord data | |
227 # --- | |
228 | |
229 # data is a dictionary that | |
230 # for each filename contains the feature | |
231 # files for chords and keys | |
232 | |
233 data = dict(); | |
234 # traverse the file structure and get all track names | |
235 count = 0 | |
236 errcount = 0 | |
237 for (dirpath, dirnames, filenames) in os.walk(dirin): | |
238 for file in filenames: | |
239 #print '\rChecked %d files' % (count), | |
240 count = count + 1 | |
241 if file.endswith(".csv"): | |
242 # parse filename to get clip_id | |
243 parsed = pclip.match(file) | |
244 if parsed: | |
245 clipin = parsed.group('clipid') | |
246 | |
247 # initialise dict if necessary | |
248 if not data.has_key(clipin): | |
249 data[clipin] = dict() | |
250 | |
251 # add data to dictionary | |
252 if parsed.group('type') == (ftype['chord']): | |
253 data[clipin]['chord'] = os.path.join(dirpath, file).replace('\\', '/') | |
254 elif parsed.group('type') == (ftype['key']): | |
255 data[clipin]['key'] = os.path.join(dirpath, file).replace('\\', '/') | |
256 else: | |
257 errcount += 1 | |
258 print "Could not parse " + file | |
259 raise | |
260 return data | |
261 # return list of tracknames | |
262 # return list of feature dirs | |
263 | |
264 | |
265 def fun2txt(fun,typ, bfun,mode): | |
266 # now we can interpret this function | |
267 # when given the mode of major or minor. | |
268 if (fun >= 0): | |
269 if (mode == 1): | |
270 pfun = fun_dic_min[fun] | |
271 md = '(m)' | |
272 elif (mode == 0): | |
273 pfun = fun_dic_maj[fun] | |
274 md = '(M)' | |
275 else: | |
276 return 'N' | |
277 | |
278 #if typ == 'm': | |
279 # print 'Key: ' + skey + ', chord: ' + chord + ' function ' + str(fun) + ' type ' + typ + ' bfun ' + str(bfun) | |
280 type = type_labels[typ] if typ > 0 else '' | |
281 | |
282 blb = '/' + base_labels[bfun] if (bfun >= 0 and base_labels[bfun]) else '' | |
283 return md + pfun + type + blb | |
284 | |
285 def fun2num(fun,typ, bfun,mode): | |
286 # now we can interpret this function | |
287 if not fun == -1: | |
288 return (mode+1)* 1000000 + (fun+1) * 10000 + (typ+1) * 100 + (bfun+1) | |
289 else: | |
290 return 0 | |
291 | |
292 def folder2functions(path): | |
293 tracks = tracks_in_dir(path) | |
294 | |
295 # get chords for all files | |
296 #check for integrity: do we have keys and chords? | |
297 output = dict() | |
298 bfuns = [] | |
299 | |
300 for clip, featurefiles in tracks.iteritems(): | |
301 print clip | |
302 if len(featurefiles) == 2: | |
303 output[clip] = chords2functions(clip,featurefiles) | |
304 return output | |
305 | |
306 def folder2histogram(path= './'): | |
307 | |
308 # get chord functions for the folder | |
309 tracks = folder2functions(path) | |
310 | |
311 # concatenate string form | |
312 chords = [] | |
313 for track, contents in tracks.iteritems(): | |
314 for (time,key,mode,fun,typ,bfun) in contents: | |
315 chords.append([fun2num(fun,typ,bfun,mode)]) | |
316 | |
317 # counts | |
318 (v,w) = histogram(chords) | |
319 print v | |
320 return {"count":v.values(), "index":v.keys()} | |
321 | |
322 if __name__ == "__main__": | |
323 #chords2functions() | |
324 print "Creates a key-independent chord histogram. Usage: chord2function path_vamp_chords path_vamp_keys" | |
325 # sys.argv[1] | |
326 result = folder2histogram() | |
327 print "Please input a description for the chord function histogram" | |
328 c2j.data2json(result) |