Mercurial > hg > dml-open-backendtools
view collection_analysis/chord_sequence_mining/chord2function.py @ 0:e34cf1b6fe09 tip
commit
author | Daniel Wolff |
---|---|
date | Sat, 20 Feb 2016 18:14:24 +0100 |
parents | |
children |
line wrap: on
line source
# Part of DML (Digital Music Laboratory) # Copyright 2014-2015 Daniel Wolff, City University # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #!/usr/bin/python # -*- coding: utf-8 -*- __author__="Daniel Wolff" import re # these for file reading etc import fnmatch import os import csv import spmf import sys sys.path.insert(0, '../tools/') import csv2json as c2j # --- # roots # --- chord_roots = ["C","D","E","F","G","A","B"] # create a dictionary for efficiency roots_dic = dict(zip(chord_roots, [0,2,4,5,7,9,11])) mode_lbls = ['major','minor'] mode_dic = dict(zip(mode_lbls, range(0,2))) # --- # types # --- type_labels = ["", "6", "7", "m","m6", "m7", "maj7", "m7b5", "dim", "dim7", "aug"] type_dic = dict(zip(type_labels, range(0,len(type_labels)))) base_labels = ["1","","2","b3","3","4","","5","","6","b7","7"] #base_dic = dict(zip(base_labels, range(0,len(base_labels)))) # functions root_funs_maj = ['I','#I','II','#II','III','IV','#IV','V','#V','VI','#VI','VII'] root_funs_min = ['I','#I','II','III','#III','IV','#IV','V','VI','#VI','VII','#VII'] # dan's suggestion #root_funs_maj = ['I','#I','II','#II','(M)III','IV','#IV','V','#V','VI','#VI','(M)VII'] #root_funs_min = ['I','#I','II','(m)III','#III','IV','#IV','V','VI','#VI','(m)VII','#VII'] fun_dic_maj = dict(zip(range(0,len(root_funs_maj)),root_funs_maj)) fun_dic_min = dict(zip(range(0,len(root_funs_min)),root_funs_min)) # regex that separates roots and types, and gets chord base # this only accepts chords with a sharp (#) and no flats p = re.compile(r'(?P<root>[A-G,N](#|b)*)(?P<type>[a-z,0-9]*)(/(?P<base>[A-G](#|b)*))*') p2 = re.compile(r'(?P<key>[A-G](#|b)*)(\s/\s[A-G](#|b)*)*\s(?P<mode>[major|minor]+)') pclip = re.compile(r'(?P<clipid>[A-Z,0-9]+(\-|_)[A-Z,0-9]+((\-|_)[A-Z,0-9]+)*((\-|_)[A-Z,0-9]+)*)_(?P<type>vamp.*).(?P<ext>(csv|xml|txt|n3)+)') ftype = {'key': 'vamp_qm-vamp-plugins_qm-keydetector_key', 'chord': 'vamp_nnls-chroma_chordino_simplechord'} # most simple note2num def note2num(notein = 'Cb'): base = roots_dic[notein[0]] if len(notein) > 1: if notein[1] == 'b': return (base - 1) % 12 elif notein[1] == '#': return (base + 1) % 12 else: print "Error parsing chord " + notein raise else: return base % 12 # convert key to number def key2num(keyin = 'C major'): # --- # parse key string: separate root from rest # --- sepstring = p2.match(keyin) if not sepstring: print "Error parsing key " + keyin raise # get relative position of chord and adapt for flats key = sepstring.group('key') key = note2num(key) # --- # parse mode. care for (unknown) string # --- mode = sepstring.group('mode') if mode: mode = mode_dic[mode] else: mode = -1 return (key, mode) # convert chord to relative function def chord2function(cin = 'B',key=3, mode=0): # --- # parse chord string: separate root from rest # --- sepstring = p.match(cin) # test for N code -> no chord detected if sepstring.group('root') == 'N': return (-1,-1,-1,-1) # get root and type otherwise root = note2num(sepstring.group('root')) type = sepstring.group('type') typ = type_dic[type] # get relative position fun = (root - key) % 12 #--- do we have a base key? # if yes return it relative to chord root # --- if sepstring.group('base'): broot = note2num(sepstring.group('base')) bfun = (broot - root) % 12 else: # this standard gives 1 as a base key if not specified otherwise bfun = 0 # --- # todo: integrate bfun in final type list # --- return (root,fun,typ,bfun) # reads in any csv and returns a list of structure # time(float), data1, data2 ....data2 def read_vamp_csv(filein = ''): output = [] with open(filein, 'rb') as csvfile: contents = csv.reader(csvfile, delimiter=',', quotechar='"') for row in contents: output.append([float(row[0])] + row[1:]) return output # legacy:: finds featurefile for given piece def find_features(clipin = '', type='key'): # --- # These Parametres are for the high-level parse functions # --- featuredirs = {'key':'.\qm_vamp_key_standard.n3_50ac9', 'chord': '.\chordino_simple.n3_1a812'} # search for featurefile featuredir = featuredirs[type].replace('\\', '/') for file in os.listdir(featuredir): if fnmatch.fnmatch(file, clipin+ '*' + ftype[type] + '*.csv'): return featuredirs[type] + '/' + file # reads features for given clip and of specified type def get_features(clipin = '', type='key', featurefiles = 0): if not featurefiles: featurefiles = find_features(clipin, type) return read_vamp_csv(featurefiles[type]) # histogram of the last entry in a list # returns the most frequently used key def histogram(keysin = []): # build histogram histo = dict() for row in keysin: histo[row[-1]] = histo.get(row[-1], 0) + 1 # return most frequent key return (histo, max(histo.iterkeys(), key=(lambda key: histo[key]))) # main function, processes all chords for one song def chords2functions(clipin = '1CD0006591_BD11-14',featurefiles = '', constkey = 1): # get keys keys = get_features(clipin,'key',featurefiles) relchords = [] # chords chords = get_features(clipin,'chord',featurefiles) if constkey: # delete 'unknown' keys keys = [(time,knum,key) for (time,knum,key) in keys if not key == '(unknown)'] # aggregate to one key (histo, skey) = histogram(keys) # bet key number (key,mode) = key2num(skey) for (time,chord) in chords: # get chord function (root,fun,typ, bfun) = chord2function(chord, key,mode) # translate into text txt = fun2txt(fun,typ, bfun, mode) #print 'Key: ' + skey + ', chord: ' + chord + ', function: ' + txt relchords.append((time,key,mode,fun,typ,bfun)) return relchords def tracks_in_dir(dirin = ''): # --- # we now only search for tracks which have chord data # --- # data is a dictionary that # for each filename contains the feature # files for chords and keys data = dict(); # traverse the file structure and get all track names count = 0 errcount = 0 for (dirpath, dirnames, filenames) in os.walk(dirin): for file in filenames: #print '\rChecked %d files' % (count), count = count + 1 if file.endswith(".csv"): # parse filename to get clip_id parsed = pclip.match(file) if parsed: clipin = parsed.group('clipid') # initialise dict if necessary if not data.has_key(clipin): data[clipin] = dict() # add data to dictionary if parsed.group('type') == (ftype['chord']): data[clipin]['chord'] = os.path.join(dirpath, file).replace('\\', '/') elif parsed.group('type') == (ftype['key']): data[clipin]['key'] = os.path.join(dirpath, file).replace('\\', '/') else: errcount += 1 print "Could not parse " + file raise return data # return list of tracknames # return list of feature dirs def fun2txt(fun,typ, bfun,mode): # now we can interpret this function # when given the mode of major or minor. if (fun >= 0): if (mode == 1): pfun = fun_dic_min[fun] md = '(m)' elif (mode == 0): pfun = fun_dic_maj[fun] md = '(M)' else: return 'N' #if typ == 'm': # print 'Key: ' + skey + ', chord: ' + chord + ' function ' + str(fun) + ' type ' + typ + ' bfun ' + str(bfun) type = type_labels[typ] if typ > 0 else '' blb = '/' + base_labels[bfun] if (bfun >= 0 and base_labels[bfun]) else '' return md + pfun + type + blb def fun2num(fun,typ, bfun,mode): # now we can interpret this function if not fun == -1: return (mode+1)* 1000000 + (fun+1) * 10000 + (typ+1) * 100 + (bfun+1) else: return 0 def folder2functions(path): tracks = tracks_in_dir(path) # get chords for all files #check for integrity: do we have keys and chords? output = dict() bfuns = [] for clip, featurefiles in tracks.iteritems(): print clip if len(featurefiles) == 2: output[clip] = chords2functions(clip,featurefiles) return output def folder2histogram(path= './'): # get chord functions for the folder tracks = folder2functions(path) # concatenate string form chords = [] for track, contents in tracks.iteritems(): for (time,key,mode,fun,typ,bfun) in contents: chords.append([fun2num(fun,typ,bfun,mode)]) # counts (v,w) = histogram(chords) print v return {"count":v.values(), "index":v.keys()} if __name__ == "__main__": #chords2functions() print "Creates a key-independent chord histogram. Usage: chord2function path_vamp_chords path_vamp_keys" # sys.argv[1] result = folder2histogram() print "Please input a description for the chord function histogram" c2j.data2json(result)