diff dml-cla/python/chord_seq_key_relative.py @ 0:718306e29690 tip

commiting public release
author Daniel Wolff
date Tue, 09 Feb 2016 21:05:06 +0100
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/dml-cla/python/chord_seq_key_relative.py	Tue Feb 09 21:05:06 2016 +0100
@@ -0,0 +1,386 @@
+#!/usr/bin/python
+# Part of DML (Digital Music Laboratory)
+# Copyright 2014-2015 Daniel Wolff, City University
+ 
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+
+# -*- coding: utf-8 -*-
+__author__="wolffd"
+
+# json testfile
+#
+#{ "module":"chord_seq_key_relative",
+#      "function":"aggregate",
+#      "arguments": [[
+#      {"keys": { "tag": "csv", "value":"D:\\mirg\\Chord_Analysis20141216\\Beethoven\\qm_vamp_key_standard.n3_50ac9\\1CD0000653_BD01_vamp_qm-vamp-plugins_qm-keydetector_key.csv"},
+#          "chords": { "tag": "csv", "value":"D:\\mirg\\Chord_Analysis20141216\\Beethoven\\chordino_simple.n3_1a812\\1CD0000653_BD01_vamp_nnls-chroma_chordino_simplechord.csv"},
+#            "trackuri": "Eins"},
+#      {"keys": { "tag": "csv", "value":"D:\\mirg\\Chord_Analysis20141216\\Beethoven\\qm_vamp_key_standard.n3_50ac9\\1CD0000653_BD01_vamp_qm-vamp-plugins_qm-keydetector_key.csv"},
+#          "chords": { "tag": "csv", "value":"D:\\mirg\\Chord_Analysis20141216\\Beethoven\\chordino_simple.n3_1a812\\1CD0000653_BD01_vamp_nnls-chroma_chordino_simplechord.csv"}}
+#      ]]
+#}
+
+# these for file reading etc
+import re
+import os
+import csv
+import numpy
+
+# spmf functions
+import chord_seq_spmf_helper as spmf
+
+from aggregate import *
+from csvutils import *
+
+# ---
+# roots
+# ---
+chord_roots = ["C","D","E","F","G","A","B"] 
+
+# create a dictionary for efficiency
+roots_dic = dict(zip(chord_roots, [0,2,4,5,7,9,11]))
+
+mode_lbls = ['major','minor']
+mode_dic = dict(zip(mode_lbls, range(0,2)))
+# ---
+# types
+# ---
+type_labels = ["", "6", "7", "m","m6", "m7", "maj7", "m7b5", "dim", "dim7", "aug"]
+type_dic = dict(zip(type_labels, range(0,len(type_labels))))
+
+base_labels = ["1","","2","b3","3","4","","5","","6","b7","7"]
+#base_dic = dict(zip(base_labels, range(0,len(base_labels))))
+
+# functions
+root_funs_maj = ['I','#I','II','#II','III','IV','#IV','V','#V','VI','#VI','VII']
+root_funs_min = ['I','#I','II','III','#III','IV','#IV','V','VI','#VI','VII','#VII']
+# dan's suggestion
+#root_funs_maj = ['I','#I','II','#II','(M)III','IV','#IV','V','#V','VI','#VI','(M)VII']
+#root_funs_min = ['I','#I','II','(m)III','#III','IV','#IV','V','VI','#VI','(m)VII','#VII']
+
+fun_dic_maj = dict(zip(range(0,len(root_funs_maj)),root_funs_maj))
+fun_dic_min = dict(zip(range(0,len(root_funs_min)),root_funs_min))
+# regex that separates roots and types, and gets chord base
+# this only accepts chords with a sharp (#) and no flats
+p = re.compile(r'(?P<root>[A-G,N](#|b)*)(?P<type>[a-z,0-9]*)(/(?P<base>[A-G](#|b)*))*')
+p2 = re.compile(r'(?P<key>[A-G](#|b)*)(\s/\s[A-G](#|b)*)*\s(?P<mode>[major|minor]+)')
+pclip = re.compile(r'(?P<clipid>[A-Z,0-9]+(\-|_)[A-Z,0-9]+((\-|_)[A-Z,0-9]+)*((\-|_)[A-Z,0-9]+)*)_(?P<type>vamp.*).(?P<ext>(csv|xml|txt|n3)+)')
+
+
+
+def chords_from_csv(filename):
+    # we assume CSV: time, chord_string
+    # return (time, chord_string)
+    return csv_map_rows(filename,2, lambda row:(float(row[0]),row[1]))
+
+def keys_from_csv(filename):
+    # we assume CSV: time, key_code, key_string
+    # return ( time, key_code, key_string)
+    return csv_map_rows(filename,3, lambda row:(float(row[0]),row[1],row[2]))
+
+# parsers for n3 / csv
+key_parser_table = { 'csv':keys_from_csv }
+chord_parser_table = { 'csv':chords_from_csv }
+
+# extracts relative chord sequences from inputs of chord / key data
+# input list of pairs with instances of features:
+#    (['chords'] chordino_simple.n3_1a812 , ['keys'] qm_vamp_key_standard.n3_50ac9,
+#     optional:  ['trackuri'] trackidentifier )
+# @note: in future we could add support for qm_key_tonic input
+#
+# opts : dictionary with opts["spm_algorithm"] =  SPADE, TKS or ClaSP algorithm?
+#                       and opts["spm_options"]  = "70%"
+# output:
+# 'sequences': seq, 'support': sup
+
+trackctr = 0
+
+def aggregate(inputs,opts={}):
+    print_status('In chord_seq_key_relative')
+    
+     
+    # SPADE, TKS or ClaSP algorithm?
+    algo = opts.get("spm_algorithm","CM-SPADE")
+    
+    # number of sequences
+    maxseqs = int(opts.get("spm_maxseqs",500)/2)
+    
+    # min. length of sequences
+    minlen = int(opts.get("spm_minlen",2))
+    
+    # min. length of sequences in seconds
+    maxtime = int(opts.get("spm_maxtime",1*60)/2)
+    
+    ignoreN = int(opts.get("spm_ignore_n",1))
+    
+    # min. length of sequences
+    minsup = int(opts.get("spm_minsupport",50))
+    
+    # we now safe the mode of each piece 
+    # to treat them separately
+    out_chords = [dict(), dict()];
+    # generate dict[trackuri] = [ (time,key,mode,fun,typ,bfun) ]
+    def accum(item):
+        global trackctr
+        # increase virtual identifier
+        trackctr += 1
+        
+        # get duration and normalised frequency for all tuning pitches (A3,A4,A5)
+        keys = decode_tagged(key_parser_table,item['keys'])
+        
+        # get most frequent key
+        key,mode = most_frequent_key(keys)
+                    
+        relchords = []      
+        for (time,chord) in decode_tagged(chord_parser_table,item['chords']):
+
+            # ignore chords that are 'N':
+            # a. the open pattern matching allows for arbitrary chords 
+            #   to appear inbetween those in a sequence
+            # b. the N chord potentially maps to any contents, so the
+            #   inclusion of N chord has limited (or no) use 
+            
+            # get chord function
+            (root,fun,typ, bfun) = chord2function(chord, key,mode)
+            
+            if not (ignoreN & (root == -1)): 
+                # translate into text
+                txt = fun2txt(fun,typ, bfun, mode)
+                # print 'Chord: ' + chord + ', function: ' + txt
+                
+                # add to chords of this clip
+                relchords.append((time,key,mode,fun,typ,bfun))
+
+        # save results into dict for this track
+        trackuri = item.get('trackuri',trackctr)  
+        out_chords[mode][trackuri] = relchords
+
+    # collate relative chord information per file 
+    st=for_each(inputs,accum)
+    # print_status('Finished accumulating')
+    
+    if trackctr < 2:
+        raise Exception("Need more than 1 track")
+    
+    seq = [[],[]]
+    sup = [[],[]]
+    
+    for mode in [0,1]:
+        # write to spmf file
+        spmffile = spmf.relchords2spmf(out_chords[mode])
+        #print_status('Wrote SPMF data ' + spmffile.name)
+       
+        
+        # run sequential pattern matching
+        if algo == "TKS":
+            algoopts = opts.get("spm_options","")
+            seqfile = spmf.spmf(spmffile.name,'TKS',[str(maxseqs), algoopts])
+        elif algo == "ClaSP":
+            algoopts = opts.get("spm_options",str(minsup) + "%")
+            seqfile = spmf.spmf(spmffile.name,'ClaSP',[algoopts, str(minlen)], timeout = maxtime)
+        elif algo == "SPADE":
+            algoopts = opts.get("spm_options",str(minsup) + "%")
+            seqfile = spmf.spmf(spmffile.name,'SPADE',[algoopts, str(minlen)], timeout = maxtime)
+        else:
+            print_status('Running CM-SPADE algo')
+            algoopts = opts.get("spm_options",str(minsup) + "%")
+            seqfile = spmf.spmf(spmffile.name,'CM-SPADE',[algoopts, str(minlen)], timeout = maxtime)
+             
+        #seqfile = spmf.spmf(spmffile.name,'BIDE+',['70%'])
+        #seqfile = "D:\mirg\Chord_Analysis20141216\Beethoven_60.txt"
+        
+        #print_status('SPADE finished in ' + seqfile)
+        # parse spmf output
+        seq[mode],sup[mode] = spmf.spmf2table(seqfile)
+        
+        #clean up
+        os.remove(spmffile.name)
+        os.remove(seqfile)
+    
+    # fold back sequences and support
+    # note that this results in the sequences being truncated together below
+    seq = [item for sublist in seq for item in sublist] 
+    sup = [item for sublist in sup for item in sublist] 
+    
+    # filter according to min. sequencelength and number of sequences
+    seq_out = []
+    sup_out = []
+    seq_count = 0
+    
+    # sort in descending support and pick up sequences of sufficient length
+    for i in numpy.argsort(sup)[::-1]:
+        if len(seq[i]) >= minlen:
+            seq_out.append(seq[i])
+            sup_out.append(sup[i])
+            seq_count += 1
+            
+        if seq_count >= maxseqs:
+            break
+
+    return { 'result': { 'sequences': seq_out, 'support': sup_out}, 
+             'stats' : st }
+
+
+# most simple note2num
+def note2num(notein = 'Cb'):
+    base = roots_dic[notein[0]]
+    if len(notein) > 1:
+        if notein[1] == 'b':
+            return (base - 1) % 12
+        elif notein[1] == '#':
+            return (base + 1) % 12
+        else: 
+            print "Error parsing chord " + notein
+            raise
+    else:
+        return base % 12
+
+
+# convert key to number
+def key2num(keyin = 'C major'):
+    # ---
+    # parse key string: separate root from rest
+    # ---
+    sepstring = p2.match(keyin)
+    if not sepstring:
+        print "Error parsing key " + keyin
+        raise
+    
+    # get relative position of chord and adapt for flats
+    key = sepstring.group('key')      
+    key = note2num(key)
+    
+    # ---
+    # parse mode. care for (unknown) string
+    # ---
+    mode = sepstring.group('mode')   
+
+    if mode:
+        mode = mode_dic[mode]
+    else:
+        mode = -1
+
+    return (key, mode)
+
+    
+
+# convert chord to relative function
+def chord2function(cin = 'B',key=3, mode=0):
+    # ---
+    # parse chord string: separate root from rest
+    # ---
+    sepstring = p.match(cin)
+    
+    # test for N code -> no chord detected
+    if sepstring.group('root') == 'N':
+        return (-1,-1,-1,-1)
+    
+    # get root and type otherwise 
+    root = note2num(sepstring.group('root'))
+    type = sepstring.group('type') 
+    
+    typ = type_dic[type]
+
+    # get relative position
+    fun = (root - key) % 12
+        
+    #--- do we have a base key?
+    # if yes return it relative to chord root
+    # ---
+    if sepstring.group('base'):
+        broot = note2num(sepstring.group('base'))
+        bfun = (broot - root) % 12
+    else:
+        # this standard gives 1 as a base key if not specified otherwise
+        bfun = 0
+        
+    
+    # ---
+    # todo: integrate bfun in final type list
+    # ---
+    
+    return (root,fun,typ,bfun)    
+
+# reads in any csv and returns a list of structure
+# time(float), data1, data2 ....data2
+def read_vamp_csv(filein = ''):
+    output = []
+    with open(filein, 'rb') as csvfile:
+        contents = csv.reader(csvfile, delimiter=',', quotechar='"')
+        for row in contents:
+            output.append([float(row[0])] + row[1:])
+    return output
+
+
+
+# histogram of the last entry in a list
+# returns the most frequently used key
+def histogram(keysin = []):
+    # build histogram 
+    histo = dict()
+    for row in keysin:
+        histo[row[-1]] = histo.get(row[-1], 0) + 1 
+
+    # return most frequent key
+    return (histo, max(histo.iterkeys(), key=(lambda key: histo[key])))
+
+def most_frequent_key(keys):   
+    # delete 'unknown' keys
+    keys = [(time,knum,key) for (time,knum,key) in keys if not key == '(unknown)']
+
+    # aggregate to one key 
+    (histo, skey) = histogram(keys)
+
+    # bet key number
+    (key,mode) = key2num(skey)    
+    return key,mode
+    
+     
+    
+def fun2txt(fun,typ, bfun,mode):
+    # now we can interpret this function 
+    # when given the mode of major or minor.
+    if (fun >= 0):
+        if (mode == 1):
+            pfun = fun_dic_min[fun]
+            md = '(m)'
+        elif (mode == 0):
+            pfun = fun_dic_maj[fun] 
+            md = '(M)'
+    else:
+        return 'N'
+
+    #if typ == 'm':
+    #    print 'Key: ' + skey + ', chord: ' + chord + ' function ' + str(fun) + ' type ' + typ + ' bfun ' + str(bfun)
+    type = type_labels[typ] if typ > 0 else ''
+    
+    blb = '/' + base_labels[bfun] if (bfun >= 0 and base_labels[bfun]) else ''
+    return md + pfun + type + blb
+
+def fun2num(fun,typ, bfun,mode):
+    # now we can interpret this function 
+    if not fun == -1:
+        return (mode+1)* 1000000 + (fun+1) * 10000 + (typ+1) * 100 + (bfun+1)
+    else: 
+        return 0
+
+        
+if __name__ == "__main__":
+    #chords2functions()
+    print "Creates a key-independent chord histogram. Usage: chord2function path_vamp_chords path_vamp_keys"
+    # sys.argv[1]
+    result = folder2histogram()
+    print "Please input a description for the chord function histogram"
+    c2j.data2json(result)