dml-open-backendtools: collection_analysis/chord_sequence

annotate collection_analysis/chord_sequence_mining/chord2function.py @ 0:e34cf1b6fe09 tip

commit

author	Daniel Wolff
date	Sat, 20 Feb 2016 18:14:24 +0100
parents
children

rev	line source
Daniel@0	1 # Part of DML (Digital Music Laboratory)
Daniel@0	2 # Copyright 2014-2015 Daniel Wolff, City University
Daniel@0	3
Daniel@0	4 # This program is free software; you can redistribute it and/or
Daniel@0	5 # modify it under the terms of the GNU General Public License
Daniel@0	6 # as published by the Free Software Foundation; either version 2
Daniel@0	7 # of the License, or (at your option) any later version.
Daniel@0	8 #
Daniel@0	9 # This program is distributed in the hope that it will be useful,
Daniel@0	10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
Daniel@0	11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Daniel@0	12 # GNU General Public License for more details.
Daniel@0	13 #
Daniel@0	14 # You should have received a copy of the GNU General Public
Daniel@0	15 # License along with this library; if not, write to the Free Software
Daniel@0	16 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Daniel@0	17
Daniel@0	18 #!/usr/bin/python
Daniel@0	19 # -- coding: utf-8 --
Daniel@0	20 __author__="Daniel Wolff"
Daniel@0	21
Daniel@0	22 import re
Daniel@0	23
Daniel@0	24 # these for file reading etc
Daniel@0	25 import fnmatch
Daniel@0	26 import os
Daniel@0	27 import csv
Daniel@0	28 import spmf
Daniel@0	29
Daniel@0	30 import sys
Daniel@0	31 sys.path.insert(0, '../tools/')
Daniel@0	32 import csv2json as c2j
Daniel@0	33
Daniel@0	34 # ---
Daniel@0	35 # roots
Daniel@0	36 # ---
Daniel@0	37 chord_roots = ["C","D","E","F","G","A","B"]
Daniel@0	38
Daniel@0	39 # create a dictionary for efficiency
Daniel@0	40 roots_dic = dict(zip(chord_roots, [0,2,4,5,7,9,11]))
Daniel@0	41
Daniel@0	42 mode_lbls = ['major','minor']
Daniel@0	43 mode_dic = dict(zip(mode_lbls, range(0,2)))
Daniel@0	44 # ---
Daniel@0	45 # types
Daniel@0	46 # ---
Daniel@0	47 type_labels = ["", "6", "7", "m","m6", "m7", "maj7", "m7b5", "dim", "dim7", "aug"]
Daniel@0	48 type_dic = dict(zip(type_labels, range(0,len(type_labels))))
Daniel@0	49
Daniel@0	50 base_labels = ["1","","2","b3","3","4","","5","","6","b7","7"]
Daniel@0	51 #base_dic = dict(zip(base_labels, range(0,len(base_labels))))
Daniel@0	52
Daniel@0	53 # functions
Daniel@0	54 root_funs_maj = ['I','#I','II','#II','III','IV','#IV','V','#V','VI','#VI','VII']
Daniel@0	55 root_funs_min = ['I','#I','II','III','#III','IV','#IV','V','VI','#VI','VII','#VII']
Daniel@0	56 # dan's suggestion
Daniel@0	57 #root_funs_maj = ['I','#I','II','#II','(M)III','IV','#IV','V','#V','VI','#VI','(M)VII']
Daniel@0	58 #root_funs_min = ['I','#I','II','(m)III','#III','IV','#IV','V','VI','#VI','(m)VII','#VII']
Daniel@0	59
Daniel@0	60 fun_dic_maj = dict(zip(range(0,len(root_funs_maj)),root_funs_maj))
Daniel@0	61 fun_dic_min = dict(zip(range(0,len(root_funs_min)),root_funs_min))
Daniel@0	62 # regex that separates roots and types, and gets chord base
Daniel@0	63 # this only accepts chords with a sharp (#) and no flats
Daniel@0	64 p = re.compile(r'(?P<root>[A-G,N](#\|b))(?P<type>[a-z,0-9])(/(?P<base>[A-G](#\|b)))')
Daniel@0	65 p2 = re.compile(r'(?P<key>[A-G](#\|b))(\s/\s[A-G](#\|b))*\s(?P<mode>[major\|minor]+)')
Daniel@0	66 pclip = re.compile(r'(?P<clipid>[A-Z,0-9]+(\-\|_)[A-Z,0-9]+((\-\|_)[A-Z,0-9]+)((\-\|_)[A-Z,0-9]+))_(?P<type>vamp.*).(?P<ext>(csv\|xml\|txt\|n3)+)')
Daniel@0	67
Daniel@0	68 ftype = {'key': 'vamp_qm-vamp-plugins_qm-keydetector_key',
Daniel@0	69 'chord': 'vamp_nnls-chroma_chordino_simplechord'}
Daniel@0	70
Daniel@0	71 # most simple note2num
Daniel@0	72 def note2num(notein = 'Cb'):
Daniel@0	73 base = roots_dic[notein[0]]
Daniel@0	74 if len(notein) > 1:
Daniel@0	75 if notein[1] == 'b':
Daniel@0	76 return (base - 1) % 12
Daniel@0	77 elif notein[1] == '#':
Daniel@0	78 return (base + 1) % 12
Daniel@0	79 else:
Daniel@0	80 print "Error parsing chord " + notein
Daniel@0	81 raise
Daniel@0	82 else:
Daniel@0	83 return base % 12
Daniel@0	84
Daniel@0	85
Daniel@0	86 # convert key to number
Daniel@0	87 def key2num(keyin = 'C major'):
Daniel@0	88 # ---
Daniel@0	89 # parse key string: separate root from rest
Daniel@0	90 # ---
Daniel@0	91 sepstring = p2.match(keyin)
Daniel@0	92 if not sepstring:
Daniel@0	93 print "Error parsing key " + keyin
Daniel@0	94 raise
Daniel@0	95
Daniel@0	96 # get relative position of chord and adapt for flats
Daniel@0	97 key = sepstring.group('key')
Daniel@0	98 key = note2num(key)
Daniel@0	99
Daniel@0	100 # ---
Daniel@0	101 # parse mode. care for (unknown) string
Daniel@0	102 # ---
Daniel@0	103 mode = sepstring.group('mode')
Daniel@0	104 if mode:
Daniel@0	105 mode = mode_dic[mode]
Daniel@0	106 else:
Daniel@0	107 mode = -1
Daniel@0	108
Daniel@0	109 return (key, mode)
Daniel@0	110
Daniel@0	111
Daniel@0	112
Daniel@0	113 # convert chord to relative function
Daniel@0	114 def chord2function(cin = 'B',key=3, mode=0):
Daniel@0	115 # ---
Daniel@0	116 # parse chord string: separate root from rest
Daniel@0	117 # ---
Daniel@0	118 sepstring = p.match(cin)
Daniel@0	119
Daniel@0	120 # test for N code -> no chord detected
Daniel@0	121 if sepstring.group('root') == 'N':
Daniel@0	122 return (-1,-1,-1,-1)
Daniel@0	123
Daniel@0	124 # get root and type otherwise
Daniel@0	125 root = note2num(sepstring.group('root'))
Daniel@0	126 type = sepstring.group('type')
Daniel@0	127
Daniel@0	128 typ = type_dic[type]
Daniel@0	129
Daniel@0	130 # get relative position
Daniel@0	131 fun = (root - key) % 12
Daniel@0	132
Daniel@0	133 #--- do we have a base key?
Daniel@0	134 # if yes return it relative to chord root
Daniel@0	135 # ---
Daniel@0	136 if sepstring.group('base'):
Daniel@0	137 broot = note2num(sepstring.group('base'))
Daniel@0	138 bfun = (broot - root) % 12
Daniel@0	139 else:
Daniel@0	140 # this standard gives 1 as a base key if not specified otherwise
Daniel@0	141 bfun = 0
Daniel@0	142
Daniel@0	143
Daniel@0	144 # ---
Daniel@0	145 # todo: integrate bfun in final type list
Daniel@0	146 # ---
Daniel@0	147
Daniel@0	148 return (root,fun,typ,bfun)
Daniel@0	149
Daniel@0	150 # reads in any csv and returns a list of structure
Daniel@0	151 # time(float), data1, data2 ....data2
Daniel@0	152 def read_vamp_csv(filein = ''):
Daniel@0	153 output = []
Daniel@0	154 with open(filein, 'rb') as csvfile:
Daniel@0	155 contents = csv.reader(csvfile, delimiter=',', quotechar='"')
Daniel@0	156 for row in contents:
Daniel@0	157 output.append([float(row[0])] + row[1:])
Daniel@0	158 return output
Daniel@0	159
Daniel@0	160 # legacy:: finds featurefile for given piece
Daniel@0	161 def find_features(clipin = '', type='key'):
Daniel@0	162 # ---
Daniel@0	163 # These Parametres are for the high-level parse functions
Daniel@0	164 # ---
Daniel@0	165 featuredirs = {'key':'.\qm_vamp_key_standard.n3_50ac9',
Daniel@0	166 'chord': '.\chordino_simple.n3_1a812'}
Daniel@0	167
Daniel@0	168 # search for featurefile
Daniel@0	169 featuredir = featuredirs[type].replace('\\', '/')
Daniel@0	170 for file in os.listdir(featuredir):
Daniel@0	171 if fnmatch.fnmatch(file, clipin+ '' + ftype[type] + '.csv'):
Daniel@0	172 return featuredirs[type] + '/' + file
Daniel@0	173
Daniel@0	174 # reads features for given clip and of specified type
Daniel@0	175 def get_features(clipin = '', type='key', featurefiles = 0):
Daniel@0	176 if not featurefiles:
Daniel@0	177 featurefiles = find_features(clipin, type)
Daniel@0	178 return read_vamp_csv(featurefiles[type])
Daniel@0	179
Daniel@0	180 # histogram of the last entry in a list
Daniel@0	181 # returns the most frequently used key
Daniel@0	182 def histogram(keysin = []):
Daniel@0	183 # build histogram
Daniel@0	184 histo = dict()
Daniel@0	185 for row in keysin:
Daniel@0	186 histo[row[-1]] = histo.get(row[-1], 0) + 1
Daniel@0	187
Daniel@0	188 # return most frequent key
Daniel@0	189 return (histo, max(histo.iterkeys(), key=(lambda key: histo[key])))
Daniel@0	190
Daniel@0	191
Daniel@0	192 # main function, processes all chords for one song
Daniel@0	193 def chords2functions(clipin = '1CD0006591_BD11-14',featurefiles = '', constkey = 1):
Daniel@0	194
Daniel@0	195 # get keys
Daniel@0	196 keys = get_features(clipin,'key',featurefiles)
Daniel@0	197
Daniel@0	198 relchords = []
Daniel@0	199 # chords
Daniel@0	200 chords = get_features(clipin,'chord',featurefiles)
Daniel@0	201 if constkey:
Daniel@0	202 # delete 'unknown' keys
Daniel@0	203 keys = [(time,knum,key) for (time,knum,key) in keys if not key == '(unknown)']
Daniel@0	204
Daniel@0	205 # aggregate to one key
Daniel@0	206 (histo, skey) = histogram(keys)
Daniel@0	207
Daniel@0	208 # bet key number
Daniel@0	209 (key,mode) = key2num(skey)
Daniel@0	210
Daniel@0	211 for (time,chord) in chords:
Daniel@0	212
Daniel@0	213 # get chord function
Daniel@0	214 (root,fun,typ, bfun) = chord2function(chord, key,mode)
Daniel@0	215
Daniel@0	216 # translate into text
Daniel@0	217 txt = fun2txt(fun,typ, bfun, mode)
Daniel@0	218 #print 'Key: ' + skey + ', chord: ' + chord + ', function: ' + txt
Daniel@0	219
Daniel@0	220 relchords.append((time,key,mode,fun,typ,bfun))
Daniel@0	221 return relchords
Daniel@0	222
Daniel@0	223 def tracks_in_dir(dirin = ''):
Daniel@0	224
Daniel@0	225 # ---
Daniel@0	226 # we now only search for tracks which have chord data
Daniel@0	227 # ---
Daniel@0	228
Daniel@0	229 # data is a dictionary that
Daniel@0	230 # for each filename contains the feature
Daniel@0	231 # files for chords and keys
Daniel@0	232
Daniel@0	233 data = dict();
Daniel@0	234 # traverse the file structure and get all track names
Daniel@0	235 count = 0
Daniel@0	236 errcount = 0
Daniel@0	237 for (dirpath, dirnames, filenames) in os.walk(dirin):
Daniel@0	238 for file in filenames:
Daniel@0	239 #print '\rChecked %d files' % (count),
Daniel@0	240 count = count + 1
Daniel@0	241 if file.endswith(".csv"):
Daniel@0	242 # parse filename to get clip_id
Daniel@0	243 parsed = pclip.match(file)
Daniel@0	244 if parsed:
Daniel@0	245 clipin = parsed.group('clipid')
Daniel@0	246
Daniel@0	247 # initialise dict if necessary
Daniel@0	248 if not data.has_key(clipin):
Daniel@0	249 data[clipin] = dict()
Daniel@0	250
Daniel@0	251 # add data to dictionary
Daniel@0	252 if parsed.group('type') == (ftype['chord']):
Daniel@0	253 data[clipin]['chord'] = os.path.join(dirpath, file).replace('\\', '/')
Daniel@0	254 elif parsed.group('type') == (ftype['key']):
Daniel@0	255 data[clipin]['key'] = os.path.join(dirpath, file).replace('\\', '/')
Daniel@0	256 else:
Daniel@0	257 errcount += 1
Daniel@0	258 print "Could not parse " + file
Daniel@0	259 raise
Daniel@0	260 return data
Daniel@0	261 # return list of tracknames
Daniel@0	262 # return list of feature dirs
Daniel@0	263
Daniel@0	264
Daniel@0	265 def fun2txt(fun,typ, bfun,mode):
Daniel@0	266 # now we can interpret this function
Daniel@0	267 # when given the mode of major or minor.
Daniel@0	268 if (fun >= 0):
Daniel@0	269 if (mode == 1):
Daniel@0	270 pfun = fun_dic_min[fun]
Daniel@0	271 md = '(m)'
Daniel@0	272 elif (mode == 0):
Daniel@0	273 pfun = fun_dic_maj[fun]
Daniel@0	274 md = '(M)'
Daniel@0	275 else:
Daniel@0	276 return 'N'
Daniel@0	277
Daniel@0	278 #if typ == 'm':
Daniel@0	279 # print 'Key: ' + skey + ', chord: ' + chord + ' function ' + str(fun) + ' type ' + typ + ' bfun ' + str(bfun)
Daniel@0	280 type = type_labels[typ] if typ > 0 else ''
Daniel@0	281
Daniel@0	282 blb = '/' + base_labels[bfun] if (bfun >= 0 and base_labels[bfun]) else ''
Daniel@0	283 return md + pfun + type + blb
Daniel@0	284
Daniel@0	285 def fun2num(fun,typ, bfun,mode):
Daniel@0	286 # now we can interpret this function
Daniel@0	287 if not fun == -1:
Daniel@0	288 return (mode+1)* 1000000 + (fun+1) * 10000 + (typ+1) * 100 + (bfun+1)
Daniel@0	289 else:
Daniel@0	290 return 0
Daniel@0	291
Daniel@0	292 def folder2functions(path):
Daniel@0	293 tracks = tracks_in_dir(path)
Daniel@0	294
Daniel@0	295 # get chords for all files
Daniel@0	296 #check for integrity: do we have keys and chords?
Daniel@0	297 output = dict()
Daniel@0	298 bfuns = []
Daniel@0	299
Daniel@0	300 for clip, featurefiles in tracks.iteritems():
Daniel@0	301 print clip
Daniel@0	302 if len(featurefiles) == 2:
Daniel@0	303 output[clip] = chords2functions(clip,featurefiles)
Daniel@0	304 return output
Daniel@0	305
Daniel@0	306 def folder2histogram(path= './'):
Daniel@0	307
Daniel@0	308 # get chord functions for the folder
Daniel@0	309 tracks = folder2functions(path)
Daniel@0	310
Daniel@0	311 # concatenate string form
Daniel@0	312 chords = []
Daniel@0	313 for track, contents in tracks.iteritems():
Daniel@0	314 for (time,key,mode,fun,typ,bfun) in contents:
Daniel@0	315 chords.append([fun2num(fun,typ,bfun,mode)])
Daniel@0	316
Daniel@0	317 # counts
Daniel@0	318 (v,w) = histogram(chords)
Daniel@0	319 print v
Daniel@0	320 return {"count":v.values(), "index":v.keys()}
Daniel@0	321
Daniel@0	322 if __name__ == "__main__":
Daniel@0	323 #chords2functions()
Daniel@0	324 print "Creates a key-independent chord histogram. Usage: chord2function path_vamp_chords path_vamp_keys"
Daniel@0	325 # sys.argv[1]
Daniel@0	326 result = folder2histogram()
Daniel@0	327 print "Please input a description for the chord function histogram"
Daniel@0	328 c2j.data2json(result)

Mercurial > hg > dml-open-backendtools

annotate collection_analysis/chord_sequence_mining/chord2function.py @ 0:e34cf1b6fe09 tip