view collection_analysis/chord_sequence_mining/chord2function.py @ 0:e34cf1b6fe09 tip

commit
author Daniel Wolff
date Sat, 20 Feb 2016 18:14:24 +0100
parents
children
line wrap: on
line source
# Part of DML (Digital Music Laboratory)
# Copyright 2014-2015 Daniel Wolff, City University
 
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA

#!/usr/bin/python
# -*- coding: utf-8 -*-
__author__="Daniel Wolff"

import re

# these for file reading etc
import fnmatch
import os
import csv
import spmf

import sys
sys.path.insert(0, '../tools/')
import csv2json as c2j

# ---
# roots
# ---
chord_roots = ["C","D","E","F","G","A","B"] 

# create a dictionary for efficiency
roots_dic = dict(zip(chord_roots, [0,2,4,5,7,9,11]))

mode_lbls = ['major','minor']
mode_dic = dict(zip(mode_lbls, range(0,2)))
# ---
# types
# ---
type_labels = ["", "6", "7", "m","m6", "m7", "maj7", "m7b5", "dim", "dim7", "aug"]
type_dic = dict(zip(type_labels, range(0,len(type_labels))))

base_labels = ["1","","2","b3","3","4","","5","","6","b7","7"]
#base_dic = dict(zip(base_labels, range(0,len(base_labels))))

# functions
root_funs_maj = ['I','#I','II','#II','III','IV','#IV','V','#V','VI','#VI','VII']
root_funs_min = ['I','#I','II','III','#III','IV','#IV','V','VI','#VI','VII','#VII']
# dan's suggestion
#root_funs_maj = ['I','#I','II','#II','(M)III','IV','#IV','V','#V','VI','#VI','(M)VII']
#root_funs_min = ['I','#I','II','(m)III','#III','IV','#IV','V','VI','#VI','(m)VII','#VII']

fun_dic_maj = dict(zip(range(0,len(root_funs_maj)),root_funs_maj))
fun_dic_min = dict(zip(range(0,len(root_funs_min)),root_funs_min))
# regex that separates roots and types, and gets chord base
# this only accepts chords with a sharp (#) and no flats
p = re.compile(r'(?P<root>[A-G,N](#|b)*)(?P<type>[a-z,0-9]*)(/(?P<base>[A-G](#|b)*))*')
p2 = re.compile(r'(?P<key>[A-G](#|b)*)(\s/\s[A-G](#|b)*)*\s(?P<mode>[major|minor]+)')
pclip = re.compile(r'(?P<clipid>[A-Z,0-9]+(\-|_)[A-Z,0-9]+((\-|_)[A-Z,0-9]+)*((\-|_)[A-Z,0-9]+)*)_(?P<type>vamp.*).(?P<ext>(csv|xml|txt|n3)+)')

ftype = {'key': 'vamp_qm-vamp-plugins_qm-keydetector_key', 
         'chord': 'vamp_nnls-chroma_chordino_simplechord'}

# most simple note2num
def note2num(notein = 'Cb'):
    base = roots_dic[notein[0]]
    if len(notein) > 1:
        if notein[1] == 'b':
            return (base - 1) % 12
        elif notein[1] == '#':
            return (base + 1) % 12
        else: 
            print "Error parsing chord " + notein
            raise
    else:
        return base % 12


# convert key to number
def key2num(keyin = 'C major'):
    # ---
    # parse key string: separate root from rest
    # ---
    sepstring = p2.match(keyin)
    if not sepstring:
        print "Error parsing key " + keyin
        raise
    
    # get relative position of chord and adapt for flats
    key = sepstring.group('key')      
    key = note2num(key)
    
    # ---
    # parse mode. care for (unknown) string
    # ---
    mode = sepstring.group('mode')   
    if mode:
        mode = mode_dic[mode]
    else:
        mode = -1

    return (key, mode)

    

# convert chord to relative function
def chord2function(cin = 'B',key=3, mode=0):
    # ---
    # parse chord string: separate root from rest
    # ---
    sepstring = p.match(cin)
    
    # test for N code -> no chord detected
    if sepstring.group('root') == 'N':
        return (-1,-1,-1,-1)
    
    # get root and type otherwise 
    root = note2num(sepstring.group('root'))
    type = sepstring.group('type') 
    
    typ = type_dic[type]

    # get relative position
    fun = (root - key) % 12
        
    #--- do we have a base key?
    # if yes return it relative to chord root
    # ---
    if sepstring.group('base'):
        broot = note2num(sepstring.group('base'))
        bfun = (broot - root) % 12
    else:
        # this standard gives 1 as a base key if not specified otherwise
        bfun = 0
        
    
    # ---
    # todo: integrate bfun in final type list
    # ---
    
    return (root,fun,typ,bfun)    

# reads in any csv and returns a list of structure
# time(float), data1, data2 ....data2
def read_vamp_csv(filein = ''):
    output = []
    with open(filein, 'rb') as csvfile:
        contents = csv.reader(csvfile, delimiter=',', quotechar='"')
        for row in contents:
            output.append([float(row[0])] + row[1:])
    return output

# legacy:: finds featurefile for given piece
def find_features(clipin = '', type='key'): 
    # ---
    # These Parametres are for the high-level parse functions
    # ---
    featuredirs =  {'key':'.\qm_vamp_key_standard.n3_50ac9',
               'chord': '.\chordino_simple.n3_1a812'}
    
    # search for featurefile
    featuredir = featuredirs[type].replace('\\', '/')
    for file in os.listdir(featuredir):
        if fnmatch.fnmatch(file, clipin+ '*' + ftype[type] + '*.csv'):
            return featuredirs[type] + '/' + file

# reads features for given clip and of specified type
def get_features(clipin = '', type='key', featurefiles = 0):
    if not featurefiles:
        featurefiles = find_features(clipin, type)
    return read_vamp_csv(featurefiles[type])
        
# histogram of the last entry in a list
# returns the most frequently used key
def histogram(keysin = []):
    # build histogram 
    histo = dict()
    for row in keysin:
        histo[row[-1]] = histo.get(row[-1], 0) + 1 

    # return most frequent key
    return (histo, max(histo.iterkeys(), key=(lambda key: histo[key])))

    
# main function, processes all chords for one song
def chords2functions(clipin = '1CD0006591_BD11-14',featurefiles = '', constkey = 1):
    
    # get keys 
    keys = get_features(clipin,'key',featurefiles)
    
    relchords = []
    # chords
    chords = get_features(clipin,'chord',featurefiles)
    if constkey:
        # delete 'unknown' keys
        keys = [(time,knum,key) for (time,knum,key) in keys if not key == '(unknown)']

        # aggregate to one key 
        (histo, skey) = histogram(keys)
                    
        # bet key number
        (key,mode) = key2num(skey)
        
        for (time,chord) in chords:
            
            # get chord function
            (root,fun,typ, bfun) = chord2function(chord, key,mode)
            
            # translate into text
            txt = fun2txt(fun,typ, bfun, mode)
            #print 'Key: ' + skey + ', chord: ' + chord + ', function: ' + txt

            relchords.append((time,key,mode,fun,typ,bfun))
    return relchords
                
def tracks_in_dir(dirin = ''):

    # ---
    # we now only search for tracks which have chord data
    # ---
    
    # data is a dictionary that 
    # for each filename contains the feature
    # files for chords and keys
    
    data = dict();
    # traverse the file structure and get all track names
    count = 0
    errcount = 0
    for (dirpath, dirnames, filenames) in os.walk(dirin):
        for file in filenames:
            #print '\rChecked %d files' % (count), 
            count = count + 1
            if file.endswith(".csv"):
                # parse filename to get clip_id
                parsed = pclip.match(file)
                if parsed:
                    clipin = parsed.group('clipid')

                    # initialise dict if necessary
                    if not data.has_key(clipin):
                        data[clipin] = dict()

                    # add data to dictionary
                    if parsed.group('type') == (ftype['chord']):
                        data[clipin]['chord'] = os.path.join(dirpath, file).replace('\\', '/')
                    elif parsed.group('type') == (ftype['key']):
                        data[clipin]['key'] = os.path.join(dirpath, file).replace('\\', '/')
                else:
                    errcount += 1
                    print "Could not parse " + file
                    raise
    return data
    # return list of tracknames
    # return list of feature dirs
    
    
def fun2txt(fun,typ, bfun,mode):
    # now we can interpret this function 
    # when given the mode of major or minor.
    if (fun >= 0):
        if (mode == 1):
            pfun = fun_dic_min[fun]
            md = '(m)'
        elif (mode == 0):
            pfun = fun_dic_maj[fun] 
            md = '(M)'
    else:
        return 'N'

    #if typ == 'm':
    #    print 'Key: ' + skey + ', chord: ' + chord + ' function ' + str(fun) + ' type ' + typ + ' bfun ' + str(bfun)
    type = type_labels[typ] if typ > 0 else ''
    
    blb = '/' + base_labels[bfun] if (bfun >= 0 and base_labels[bfun]) else ''
    return md + pfun + type + blb

def fun2num(fun,typ, bfun,mode):
    # now we can interpret this function 
    if not fun == -1:
        return (mode+1)* 1000000 + (fun+1) * 10000 + (typ+1) * 100 + (bfun+1)
    else: 
        return 0

def folder2functions(path):
    tracks = tracks_in_dir(path)
    
    # get chords for all files                    
    #check for integrity: do we have keys and chords?  
    output = dict()
    bfuns = []

    for clip, featurefiles in tracks.iteritems():
        print clip
        if len(featurefiles) == 2:
            output[clip] = chords2functions(clip,featurefiles)
    return output                
        
def folder2histogram(path= './'):

    # get chord functions for the folder
    tracks = folder2functions(path)
    
    # concatenate string form
    chords = []
    for track, contents in tracks.iteritems():
        for (time,key,mode,fun,typ,bfun) in contents:
            chords.append([fun2num(fun,typ,bfun,mode)])
        
    # counts 
    (v,w) = histogram(chords)        
    print v 
    return {"count":v.values(), "index":v.keys()}
        
if __name__ == "__main__":
    #chords2functions()
    print "Creates a key-independent chord histogram. Usage: chord2function path_vamp_chords path_vamp_keys"
    # sys.argv[1]
    result = folder2histogram()
    print "Please input a description for the chord function histogram"
    c2j.data2json(result)