Mercurial > hg > dml-open-backendtools
diff collection_analysis/chord_sequence_mining/chord2function.py @ 0:e34cf1b6fe09 tip
commit
author | Daniel Wolff |
---|---|
date | Sat, 20 Feb 2016 18:14:24 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/collection_analysis/chord_sequence_mining/chord2function.py Sat Feb 20 18:14:24 2016 +0100 @@ -0,0 +1,328 @@ +# Part of DML (Digital Music Laboratory) +# Copyright 2014-2015 Daniel Wolff, City University + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +#!/usr/bin/python +# -*- coding: utf-8 -*- +__author__="Daniel Wolff" + +import re + +# these for file reading etc +import fnmatch +import os +import csv +import spmf + +import sys +sys.path.insert(0, '../tools/') +import csv2json as c2j + +# --- +# roots +# --- +chord_roots = ["C","D","E","F","G","A","B"] + +# create a dictionary for efficiency +roots_dic = dict(zip(chord_roots, [0,2,4,5,7,9,11])) + +mode_lbls = ['major','minor'] +mode_dic = dict(zip(mode_lbls, range(0,2))) +# --- +# types +# --- +type_labels = ["", "6", "7", "m","m6", "m7", "maj7", "m7b5", "dim", "dim7", "aug"] +type_dic = dict(zip(type_labels, range(0,len(type_labels)))) + +base_labels = ["1","","2","b3","3","4","","5","","6","b7","7"] +#base_dic = dict(zip(base_labels, range(0,len(base_labels)))) + +# functions +root_funs_maj = ['I','#I','II','#II','III','IV','#IV','V','#V','VI','#VI','VII'] +root_funs_min = ['I','#I','II','III','#III','IV','#IV','V','VI','#VI','VII','#VII'] +# dan's suggestion +#root_funs_maj = ['I','#I','II','#II','(M)III','IV','#IV','V','#V','VI','#VI','(M)VII'] +#root_funs_min = ['I','#I','II','(m)III','#III','IV','#IV','V','VI','#VI','(m)VII','#VII'] + +fun_dic_maj = dict(zip(range(0,len(root_funs_maj)),root_funs_maj)) +fun_dic_min = dict(zip(range(0,len(root_funs_min)),root_funs_min)) +# regex that separates roots and types, and gets chord base +# this only accepts chords with a sharp (#) and no flats +p = re.compile(r'(?P<root>[A-G,N](#|b)*)(?P<type>[a-z,0-9]*)(/(?P<base>[A-G](#|b)*))*') +p2 = re.compile(r'(?P<key>[A-G](#|b)*)(\s/\s[A-G](#|b)*)*\s(?P<mode>[major|minor]+)') +pclip = re.compile(r'(?P<clipid>[A-Z,0-9]+(\-|_)[A-Z,0-9]+((\-|_)[A-Z,0-9]+)*((\-|_)[A-Z,0-9]+)*)_(?P<type>vamp.*).(?P<ext>(csv|xml|txt|n3)+)') + +ftype = {'key': 'vamp_qm-vamp-plugins_qm-keydetector_key', + 'chord': 'vamp_nnls-chroma_chordino_simplechord'} + +# most simple note2num +def note2num(notein = 'Cb'): + base = roots_dic[notein[0]] + if len(notein) > 1: + if notein[1] == 'b': + return (base - 1) % 12 + elif notein[1] == '#': + return (base + 1) % 12 + else: + print "Error parsing chord " + notein + raise + else: + return base % 12 + + +# convert key to number +def key2num(keyin = 'C major'): + # --- + # parse key string: separate root from rest + # --- + sepstring = p2.match(keyin) + if not sepstring: + print "Error parsing key " + keyin + raise + + # get relative position of chord and adapt for flats + key = sepstring.group('key') + key = note2num(key) + + # --- + # parse mode. care for (unknown) string + # --- + mode = sepstring.group('mode') + if mode: + mode = mode_dic[mode] + else: + mode = -1 + + return (key, mode) + + + +# convert chord to relative function +def chord2function(cin = 'B',key=3, mode=0): + # --- + # parse chord string: separate root from rest + # --- + sepstring = p.match(cin) + + # test for N code -> no chord detected + if sepstring.group('root') == 'N': + return (-1,-1,-1,-1) + + # get root and type otherwise + root = note2num(sepstring.group('root')) + type = sepstring.group('type') + + typ = type_dic[type] + + # get relative position + fun = (root - key) % 12 + + #--- do we have a base key? + # if yes return it relative to chord root + # --- + if sepstring.group('base'): + broot = note2num(sepstring.group('base')) + bfun = (broot - root) % 12 + else: + # this standard gives 1 as a base key if not specified otherwise + bfun = 0 + + + # --- + # todo: integrate bfun in final type list + # --- + + return (root,fun,typ,bfun) + +# reads in any csv and returns a list of structure +# time(float), data1, data2 ....data2 +def read_vamp_csv(filein = ''): + output = [] + with open(filein, 'rb') as csvfile: + contents = csv.reader(csvfile, delimiter=',', quotechar='"') + for row in contents: + output.append([float(row[0])] + row[1:]) + return output + +# legacy:: finds featurefile for given piece +def find_features(clipin = '', type='key'): + # --- + # These Parametres are for the high-level parse functions + # --- + featuredirs = {'key':'.\qm_vamp_key_standard.n3_50ac9', + 'chord': '.\chordino_simple.n3_1a812'} + + # search for featurefile + featuredir = featuredirs[type].replace('\\', '/') + for file in os.listdir(featuredir): + if fnmatch.fnmatch(file, clipin+ '*' + ftype[type] + '*.csv'): + return featuredirs[type] + '/' + file + +# reads features for given clip and of specified type +def get_features(clipin = '', type='key', featurefiles = 0): + if not featurefiles: + featurefiles = find_features(clipin, type) + return read_vamp_csv(featurefiles[type]) + +# histogram of the last entry in a list +# returns the most frequently used key +def histogram(keysin = []): + # build histogram + histo = dict() + for row in keysin: + histo[row[-1]] = histo.get(row[-1], 0) + 1 + + # return most frequent key + return (histo, max(histo.iterkeys(), key=(lambda key: histo[key]))) + + +# main function, processes all chords for one song +def chords2functions(clipin = '1CD0006591_BD11-14',featurefiles = '', constkey = 1): + + # get keys + keys = get_features(clipin,'key',featurefiles) + + relchords = [] + # chords + chords = get_features(clipin,'chord',featurefiles) + if constkey: + # delete 'unknown' keys + keys = [(time,knum,key) for (time,knum,key) in keys if not key == '(unknown)'] + + # aggregate to one key + (histo, skey) = histogram(keys) + + # bet key number + (key,mode) = key2num(skey) + + for (time,chord) in chords: + + # get chord function + (root,fun,typ, bfun) = chord2function(chord, key,mode) + + # translate into text + txt = fun2txt(fun,typ, bfun, mode) + #print 'Key: ' + skey + ', chord: ' + chord + ', function: ' + txt + + relchords.append((time,key,mode,fun,typ,bfun)) + return relchords + +def tracks_in_dir(dirin = ''): + + # --- + # we now only search for tracks which have chord data + # --- + + # data is a dictionary that + # for each filename contains the feature + # files for chords and keys + + data = dict(); + # traverse the file structure and get all track names + count = 0 + errcount = 0 + for (dirpath, dirnames, filenames) in os.walk(dirin): + for file in filenames: + #print '\rChecked %d files' % (count), + count = count + 1 + if file.endswith(".csv"): + # parse filename to get clip_id + parsed = pclip.match(file) + if parsed: + clipin = parsed.group('clipid') + + # initialise dict if necessary + if not data.has_key(clipin): + data[clipin] = dict() + + # add data to dictionary + if parsed.group('type') == (ftype['chord']): + data[clipin]['chord'] = os.path.join(dirpath, file).replace('\\', '/') + elif parsed.group('type') == (ftype['key']): + data[clipin]['key'] = os.path.join(dirpath, file).replace('\\', '/') + else: + errcount += 1 + print "Could not parse " + file + raise + return data + # return list of tracknames + # return list of feature dirs + + +def fun2txt(fun,typ, bfun,mode): + # now we can interpret this function + # when given the mode of major or minor. + if (fun >= 0): + if (mode == 1): + pfun = fun_dic_min[fun] + md = '(m)' + elif (mode == 0): + pfun = fun_dic_maj[fun] + md = '(M)' + else: + return 'N' + + #if typ == 'm': + # print 'Key: ' + skey + ', chord: ' + chord + ' function ' + str(fun) + ' type ' + typ + ' bfun ' + str(bfun) + type = type_labels[typ] if typ > 0 else '' + + blb = '/' + base_labels[bfun] if (bfun >= 0 and base_labels[bfun]) else '' + return md + pfun + type + blb + +def fun2num(fun,typ, bfun,mode): + # now we can interpret this function + if not fun == -1: + return (mode+1)* 1000000 + (fun+1) * 10000 + (typ+1) * 100 + (bfun+1) + else: + return 0 + +def folder2functions(path): + tracks = tracks_in_dir(path) + + # get chords for all files + #check for integrity: do we have keys and chords? + output = dict() + bfuns = [] + + for clip, featurefiles in tracks.iteritems(): + print clip + if len(featurefiles) == 2: + output[clip] = chords2functions(clip,featurefiles) + return output + +def folder2histogram(path= './'): + + # get chord functions for the folder + tracks = folder2functions(path) + + # concatenate string form + chords = [] + for track, contents in tracks.iteritems(): + for (time,key,mode,fun,typ,bfun) in contents: + chords.append([fun2num(fun,typ,bfun,mode)]) + + # counts + (v,w) = histogram(chords) + print v + return {"count":v.values(), "index":v.keys()} + +if __name__ == "__main__": + #chords2functions() + print "Creates a key-independent chord histogram. Usage: chord2function path_vamp_chords path_vamp_keys" + # sys.argv[1] + result = folder2histogram() + print "Please input a description for the chord function histogram" + c2j.data2json(result) \ No newline at end of file