Mercurial > hg > dml-open-backendtools
comparison collection_analysis/chord_sequence_mining/chord2function.py @ 0:e34cf1b6fe09 tip
commit
| author | Daniel Wolff |
|---|---|
| date | Sat, 20 Feb 2016 18:14:24 +0100 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:e34cf1b6fe09 |
|---|---|
| 1 # Part of DML (Digital Music Laboratory) | |
| 2 # Copyright 2014-2015 Daniel Wolff, City University | |
| 3 | |
| 4 # This program is free software; you can redistribute it and/or | |
| 5 # modify it under the terms of the GNU General Public License | |
| 6 # as published by the Free Software Foundation; either version 2 | |
| 7 # of the License, or (at your option) any later version. | |
| 8 # | |
| 9 # This program is distributed in the hope that it will be useful, | |
| 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 12 # GNU General Public License for more details. | |
| 13 # | |
| 14 # You should have received a copy of the GNU General Public | |
| 15 # License along with this library; if not, write to the Free Software | |
| 16 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
| 17 | |
| 18 #!/usr/bin/python | |
| 19 # -*- coding: utf-8 -*- | |
| 20 __author__="Daniel Wolff" | |
| 21 | |
| 22 import re | |
| 23 | |
| 24 # these for file reading etc | |
| 25 import fnmatch | |
| 26 import os | |
| 27 import csv | |
| 28 import spmf | |
| 29 | |
| 30 import sys | |
| 31 sys.path.insert(0, '../tools/') | |
| 32 import csv2json as c2j | |
| 33 | |
| 34 # --- | |
| 35 # roots | |
| 36 # --- | |
| 37 chord_roots = ["C","D","E","F","G","A","B"] | |
| 38 | |
| 39 # create a dictionary for efficiency | |
| 40 roots_dic = dict(zip(chord_roots, [0,2,4,5,7,9,11])) | |
| 41 | |
| 42 mode_lbls = ['major','minor'] | |
| 43 mode_dic = dict(zip(mode_lbls, range(0,2))) | |
| 44 # --- | |
| 45 # types | |
| 46 # --- | |
| 47 type_labels = ["", "6", "7", "m","m6", "m7", "maj7", "m7b5", "dim", "dim7", "aug"] | |
| 48 type_dic = dict(zip(type_labels, range(0,len(type_labels)))) | |
| 49 | |
| 50 base_labels = ["1","","2","b3","3","4","","5","","6","b7","7"] | |
| 51 #base_dic = dict(zip(base_labels, range(0,len(base_labels)))) | |
| 52 | |
| 53 # functions | |
| 54 root_funs_maj = ['I','#I','II','#II','III','IV','#IV','V','#V','VI','#VI','VII'] | |
| 55 root_funs_min = ['I','#I','II','III','#III','IV','#IV','V','VI','#VI','VII','#VII'] | |
| 56 # dan's suggestion | |
| 57 #root_funs_maj = ['I','#I','II','#II','(M)III','IV','#IV','V','#V','VI','#VI','(M)VII'] | |
| 58 #root_funs_min = ['I','#I','II','(m)III','#III','IV','#IV','V','VI','#VI','(m)VII','#VII'] | |
| 59 | |
| 60 fun_dic_maj = dict(zip(range(0,len(root_funs_maj)),root_funs_maj)) | |
| 61 fun_dic_min = dict(zip(range(0,len(root_funs_min)),root_funs_min)) | |
| 62 # regex that separates roots and types, and gets chord base | |
| 63 # this only accepts chords with a sharp (#) and no flats | |
| 64 p = re.compile(r'(?P<root>[A-G,N](#|b)*)(?P<type>[a-z,0-9]*)(/(?P<base>[A-G](#|b)*))*') | |
| 65 p2 = re.compile(r'(?P<key>[A-G](#|b)*)(\s/\s[A-G](#|b)*)*\s(?P<mode>[major|minor]+)') | |
| 66 pclip = re.compile(r'(?P<clipid>[A-Z,0-9]+(\-|_)[A-Z,0-9]+((\-|_)[A-Z,0-9]+)*((\-|_)[A-Z,0-9]+)*)_(?P<type>vamp.*).(?P<ext>(csv|xml|txt|n3)+)') | |
| 67 | |
| 68 ftype = {'key': 'vamp_qm-vamp-plugins_qm-keydetector_key', | |
| 69 'chord': 'vamp_nnls-chroma_chordino_simplechord'} | |
| 70 | |
| 71 # most simple note2num | |
| 72 def note2num(notein = 'Cb'): | |
| 73 base = roots_dic[notein[0]] | |
| 74 if len(notein) > 1: | |
| 75 if notein[1] == 'b': | |
| 76 return (base - 1) % 12 | |
| 77 elif notein[1] == '#': | |
| 78 return (base + 1) % 12 | |
| 79 else: | |
| 80 print "Error parsing chord " + notein | |
| 81 raise | |
| 82 else: | |
| 83 return base % 12 | |
| 84 | |
| 85 | |
| 86 # convert key to number | |
| 87 def key2num(keyin = 'C major'): | |
| 88 # --- | |
| 89 # parse key string: separate root from rest | |
| 90 # --- | |
| 91 sepstring = p2.match(keyin) | |
| 92 if not sepstring: | |
| 93 print "Error parsing key " + keyin | |
| 94 raise | |
| 95 | |
| 96 # get relative position of chord and adapt for flats | |
| 97 key = sepstring.group('key') | |
| 98 key = note2num(key) | |
| 99 | |
| 100 # --- | |
| 101 # parse mode. care for (unknown) string | |
| 102 # --- | |
| 103 mode = sepstring.group('mode') | |
| 104 if mode: | |
| 105 mode = mode_dic[mode] | |
| 106 else: | |
| 107 mode = -1 | |
| 108 | |
| 109 return (key, mode) | |
| 110 | |
| 111 | |
| 112 | |
| 113 # convert chord to relative function | |
| 114 def chord2function(cin = 'B',key=3, mode=0): | |
| 115 # --- | |
| 116 # parse chord string: separate root from rest | |
| 117 # --- | |
| 118 sepstring = p.match(cin) | |
| 119 | |
| 120 # test for N code -> no chord detected | |
| 121 if sepstring.group('root') == 'N': | |
| 122 return (-1,-1,-1,-1) | |
| 123 | |
| 124 # get root and type otherwise | |
| 125 root = note2num(sepstring.group('root')) | |
| 126 type = sepstring.group('type') | |
| 127 | |
| 128 typ = type_dic[type] | |
| 129 | |
| 130 # get relative position | |
| 131 fun = (root - key) % 12 | |
| 132 | |
| 133 #--- do we have a base key? | |
| 134 # if yes return it relative to chord root | |
| 135 # --- | |
| 136 if sepstring.group('base'): | |
| 137 broot = note2num(sepstring.group('base')) | |
| 138 bfun = (broot - root) % 12 | |
| 139 else: | |
| 140 # this standard gives 1 as a base key if not specified otherwise | |
| 141 bfun = 0 | |
| 142 | |
| 143 | |
| 144 # --- | |
| 145 # todo: integrate bfun in final type list | |
| 146 # --- | |
| 147 | |
| 148 return (root,fun,typ,bfun) | |
| 149 | |
| 150 # reads in any csv and returns a list of structure | |
| 151 # time(float), data1, data2 ....data2 | |
| 152 def read_vamp_csv(filein = ''): | |
| 153 output = [] | |
| 154 with open(filein, 'rb') as csvfile: | |
| 155 contents = csv.reader(csvfile, delimiter=',', quotechar='"') | |
| 156 for row in contents: | |
| 157 output.append([float(row[0])] + row[1:]) | |
| 158 return output | |
| 159 | |
| 160 # legacy:: finds featurefile for given piece | |
| 161 def find_features(clipin = '', type='key'): | |
| 162 # --- | |
| 163 # These Parametres are for the high-level parse functions | |
| 164 # --- | |
| 165 featuredirs = {'key':'.\qm_vamp_key_standard.n3_50ac9', | |
| 166 'chord': '.\chordino_simple.n3_1a812'} | |
| 167 | |
| 168 # search for featurefile | |
| 169 featuredir = featuredirs[type].replace('\\', '/') | |
| 170 for file in os.listdir(featuredir): | |
| 171 if fnmatch.fnmatch(file, clipin+ '*' + ftype[type] + '*.csv'): | |
| 172 return featuredirs[type] + '/' + file | |
| 173 | |
| 174 # reads features for given clip and of specified type | |
| 175 def get_features(clipin = '', type='key', featurefiles = 0): | |
| 176 if not featurefiles: | |
| 177 featurefiles = find_features(clipin, type) | |
| 178 return read_vamp_csv(featurefiles[type]) | |
| 179 | |
| 180 # histogram of the last entry in a list | |
| 181 # returns the most frequently used key | |
| 182 def histogram(keysin = []): | |
| 183 # build histogram | |
| 184 histo = dict() | |
| 185 for row in keysin: | |
| 186 histo[row[-1]] = histo.get(row[-1], 0) + 1 | |
| 187 | |
| 188 # return most frequent key | |
| 189 return (histo, max(histo.iterkeys(), key=(lambda key: histo[key]))) | |
| 190 | |
| 191 | |
| 192 # main function, processes all chords for one song | |
| 193 def chords2functions(clipin = '1CD0006591_BD11-14',featurefiles = '', constkey = 1): | |
| 194 | |
| 195 # get keys | |
| 196 keys = get_features(clipin,'key',featurefiles) | |
| 197 | |
| 198 relchords = [] | |
| 199 # chords | |
| 200 chords = get_features(clipin,'chord',featurefiles) | |
| 201 if constkey: | |
| 202 # delete 'unknown' keys | |
| 203 keys = [(time,knum,key) for (time,knum,key) in keys if not key == '(unknown)'] | |
| 204 | |
| 205 # aggregate to one key | |
| 206 (histo, skey) = histogram(keys) | |
| 207 | |
| 208 # bet key number | |
| 209 (key,mode) = key2num(skey) | |
| 210 | |
| 211 for (time,chord) in chords: | |
| 212 | |
| 213 # get chord function | |
| 214 (root,fun,typ, bfun) = chord2function(chord, key,mode) | |
| 215 | |
| 216 # translate into text | |
| 217 txt = fun2txt(fun,typ, bfun, mode) | |
| 218 #print 'Key: ' + skey + ', chord: ' + chord + ', function: ' + txt | |
| 219 | |
| 220 relchords.append((time,key,mode,fun,typ,bfun)) | |
| 221 return relchords | |
| 222 | |
| 223 def tracks_in_dir(dirin = ''): | |
| 224 | |
| 225 # --- | |
| 226 # we now only search for tracks which have chord data | |
| 227 # --- | |
| 228 | |
| 229 # data is a dictionary that | |
| 230 # for each filename contains the feature | |
| 231 # files for chords and keys | |
| 232 | |
| 233 data = dict(); | |
| 234 # traverse the file structure and get all track names | |
| 235 count = 0 | |
| 236 errcount = 0 | |
| 237 for (dirpath, dirnames, filenames) in os.walk(dirin): | |
| 238 for file in filenames: | |
| 239 #print '\rChecked %d files' % (count), | |
| 240 count = count + 1 | |
| 241 if file.endswith(".csv"): | |
| 242 # parse filename to get clip_id | |
| 243 parsed = pclip.match(file) | |
| 244 if parsed: | |
| 245 clipin = parsed.group('clipid') | |
| 246 | |
| 247 # initialise dict if necessary | |
| 248 if not data.has_key(clipin): | |
| 249 data[clipin] = dict() | |
| 250 | |
| 251 # add data to dictionary | |
| 252 if parsed.group('type') == (ftype['chord']): | |
| 253 data[clipin]['chord'] = os.path.join(dirpath, file).replace('\\', '/') | |
| 254 elif parsed.group('type') == (ftype['key']): | |
| 255 data[clipin]['key'] = os.path.join(dirpath, file).replace('\\', '/') | |
| 256 else: | |
| 257 errcount += 1 | |
| 258 print "Could not parse " + file | |
| 259 raise | |
| 260 return data | |
| 261 # return list of tracknames | |
| 262 # return list of feature dirs | |
| 263 | |
| 264 | |
| 265 def fun2txt(fun,typ, bfun,mode): | |
| 266 # now we can interpret this function | |
| 267 # when given the mode of major or minor. | |
| 268 if (fun >= 0): | |
| 269 if (mode == 1): | |
| 270 pfun = fun_dic_min[fun] | |
| 271 md = '(m)' | |
| 272 elif (mode == 0): | |
| 273 pfun = fun_dic_maj[fun] | |
| 274 md = '(M)' | |
| 275 else: | |
| 276 return 'N' | |
| 277 | |
| 278 #if typ == 'm': | |
| 279 # print 'Key: ' + skey + ', chord: ' + chord + ' function ' + str(fun) + ' type ' + typ + ' bfun ' + str(bfun) | |
| 280 type = type_labels[typ] if typ > 0 else '' | |
| 281 | |
| 282 blb = '/' + base_labels[bfun] if (bfun >= 0 and base_labels[bfun]) else '' | |
| 283 return md + pfun + type + blb | |
| 284 | |
| 285 def fun2num(fun,typ, bfun,mode): | |
| 286 # now we can interpret this function | |
| 287 if not fun == -1: | |
| 288 return (mode+1)* 1000000 + (fun+1) * 10000 + (typ+1) * 100 + (bfun+1) | |
| 289 else: | |
| 290 return 0 | |
| 291 | |
| 292 def folder2functions(path): | |
| 293 tracks = tracks_in_dir(path) | |
| 294 | |
| 295 # get chords for all files | |
| 296 #check for integrity: do we have keys and chords? | |
| 297 output = dict() | |
| 298 bfuns = [] | |
| 299 | |
| 300 for clip, featurefiles in tracks.iteritems(): | |
| 301 print clip | |
| 302 if len(featurefiles) == 2: | |
| 303 output[clip] = chords2functions(clip,featurefiles) | |
| 304 return output | |
| 305 | |
| 306 def folder2histogram(path= './'): | |
| 307 | |
| 308 # get chord functions for the folder | |
| 309 tracks = folder2functions(path) | |
| 310 | |
| 311 # concatenate string form | |
| 312 chords = [] | |
| 313 for track, contents in tracks.iteritems(): | |
| 314 for (time,key,mode,fun,typ,bfun) in contents: | |
| 315 chords.append([fun2num(fun,typ,bfun,mode)]) | |
| 316 | |
| 317 # counts | |
| 318 (v,w) = histogram(chords) | |
| 319 print v | |
| 320 return {"count":v.values(), "index":v.keys()} | |
| 321 | |
| 322 if __name__ == "__main__": | |
| 323 #chords2functions() | |
| 324 print "Creates a key-independent chord histogram. Usage: chord2function path_vamp_chords path_vamp_keys" | |
| 325 # sys.argv[1] | |
| 326 result = folder2histogram() | |
| 327 print "Please input a description for the chord function histogram" | |
| 328 c2j.data2json(result) |
