annotate dml-cla/python/similarity.py @ 0:718306e29690 tip

commiting public release
author Daniel Wolff
date Tue, 09 Feb 2016 21:05:06 +0100
parents
children
rev   line source
Daniel@0 1 # Part of DML (Digital Music Laboratory)
Daniel@0 2 # Copyright 2014-2015 Daniel Wolff, City University
Daniel@0 3
Daniel@0 4 # This program is free software; you can redistribute it and/or
Daniel@0 5 # modify it under the terms of the GNU General Public License
Daniel@0 6 # as published by the Free Software Foundation; either version 2
Daniel@0 7 # of the License, or (at your option) any later version.
Daniel@0 8 #
Daniel@0 9 # This program is distributed in the hope that it will be useful,
Daniel@0 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
Daniel@0 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Daniel@0 12 # GNU General Public License for more details.
Daniel@0 13 #
Daniel@0 14 # You should have received a copy of the GNU General Public
Daniel@0 15 # License along with this library; if not, write to the Free Software
Daniel@0 16 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Daniel@0 17
Daniel@0 18 # -*- coding: utf-8 -*-
Daniel@0 19 __author__='wolffd'
Daniel@0 20
Daniel@0 21 # this script derives all pairwise similarity measures for the chroma vectors provided.
Daniel@0 22 # as a first experiment, only the mean chroma vectors per piece are compared
Daniel@0 23 # using euclidean distance
Daniel@0 24
Daniel@0 25
Daniel@0 26 # parameters to be forwarded to API
Daniel@0 27
Daniel@0 28 # similarity type:
Daniel@0 29 # euclidean, compression
Daniel@0 30 # simtype = 'euclidean'
Daniel@0 31
Daniel@0 32 # parallelisation
Daniel@0 33 num_cores = 10
Daniel@0 34
Daniel@0 35
Daniel@0 36 #min_clusters = 40## unused
Daniel@0 37 #max_clusters = 256## unused
Daniel@0 38
Daniel@0 39 #set_clusters = 40
Daniel@0 40 #max_clips = 50
Daniel@0 41 encoding = 'binary'
Daniel@0 42 #compressor = 'zxd'
Daniel@0 43 mds_init_tries = 4
Daniel@0 44 mds_max_iter = 100
Daniel@0 45 mfccbins = 12
Daniel@0 46
Daniel@0 47 # resample chroma / timbre values at this fraction for compression distance. 0 to switch off
Daniel@0 48 # we want a vector just every second.
Daniel@0 49 # The standard sample rate and window size are 44100 / 1024 for chroma / timbre
Daniel@0 50 # this is dependent on the sim_downsample parameter
Daniel@0 51 resample_factor = 44100/1024;
Daniel@0 52
Daniel@0 53
Daniel@0 54 from rdflib import RDF, RDFS
Daniel@0 55 from csvutils import *
Daniel@0 56 from aggregate import *
Daniel@0 57 from n3Parser import get_rdf_graph_from_n3
Daniel@0 58
Daniel@0 59 # numpy, scipy
Daniel@0 60 import numpy as np
Daniel@0 61 from scipy.spatial import distance
Daniel@0 62 from sklearn.metrics.pairwise import pairwise_distances
Daniel@0 63 from scipy.signal import resample
Daniel@0 64
Daniel@0 65 #scikitlearn
Daniel@0 66 from sklearn.datasets import make_blobs
Daniel@0 67 from sklearn.cluster import KMeans
Daniel@0 68 # from sklearn.metrics import silhouette_samples, silhouette_score
Daniel@0 69 from sklearn import manifold
Daniel@0 70
Daniel@0 71
Daniel@0 72 # chord processing
Daniel@0 73 from chord_seq_key_relative import chords_from_csv, keys_from_csv, chord2function, fun2txt, fun2num,most_frequent_key,chord_roots,type_labels
Daniel@0 74
Daniel@0 75 # subprocess, command line and threading
Daniel@0 76 import os, tempfile
Daniel@0 77 import subprocess, threading
Daniel@0 78
Daniel@0 79 # for system / compression calls
Daniel@0 80 import zlib
Daniel@0 81
Daniel@0 82
Daniel@0 83 def chroma_from_csv(filename):
Daniel@0 84 # we assume CSV: time, chroma 1, ... chroma 12]
Daniel@0 85 # return (time, [chroma 1-12])
Daniel@0 86 return csv_map_rows(filename,13, lambda row:(float(row[0]),np.array(row[1:12],dtype=float)))
Daniel@0 87
Daniel@0 88 def mfcc_from_csv(filename):
Daniel@0 89 # we assume CSV: time, mfcc 1, ... mfcc 20]
Daniel@0 90 # return (time, [chroma 1-12])
Daniel@0 91 return csv_map_rows(filename,21, lambda row:(float(row[0]),np.array(row[1:20],dtype=float)))
Daniel@0 92
Daniel@0 93
Daniel@0 94 chroma_parser_table = { 'csv':chroma_from_csv}
Daniel@0 95 mfcc_parser_table = { 'csv':mfcc_from_csv}
Daniel@0 96
Daniel@0 97 # in chord_seq_relative
Daniel@0 98 key_parser_table = { 'csv':keys_from_csv }
Daniel@0 99 chord_parser_table = { 'csv':chords_from_csv }
Daniel@0 100
Daniel@0 101 ## generate global dict of chord_keys
Daniel@0 102 chord_keys = []
Daniel@0 103 for chordnum in range(1,12+1):
Daniel@0 104 for typenum in range(1,11+1):
Daniel@0 105 chord_keys.append("%02d%02d" % (chordnum,typenum))
Daniel@0 106
Daniel@0 107 def per_file(inputs,opts={}):
Daniel@0 108 chromas = []
Daniel@0 109 chromas_idx = []
Daniel@0 110 mfccs = []
Daniel@0 111 mfccs_idx = []
Daniel@0 112 chords = []
Daniel@0 113 chords_idx = []
Daniel@0 114 uris = []
Daniel@0 115
Daniel@0 116 # get options from API
Daniel@0 117 # print_status(str(opts))
Daniel@0 118 simtype = opts['sim_type']
Daniel@0 119 set_clusters = opts['sim_clusters'] # def 40
Daniel@0 120 downsample = opts['sim_downsample'] # def 1
Daniel@0 121 limit = opts['sim_reclimit'] # def 50
Daniel@0 122 compressor = opts['sim_compressor'] # def 'zlib'
Daniel@0 123
Daniel@0 124 # parse feature list
Daniel@0 125 features = opts['sim_features'].split(',') # features, def: chroma
Daniel@0 126 use_chromagram = any(ext in 'chromagram' for ext in features)
Daniel@0 127 use_mfcc = any(ext in 'mfcc' for ext in features)
Daniel@0 128 use_chords = any(ext in 'chords' for ext in features)
Daniel@0 129
Daniel@0 130 # check number of inputs
Daniel@0 131 if len(inputs) > limit:
Daniel@0 132 #return { 'error': ''}
Daniel@0 133 print_status('Similarity: Too many inputs, truncating collection')
Daniel@0 134 inputs = inputs[0:limit]
Daniel@0 135
Daniel@0 136
Daniel@0 137 # accumulation for euclidean just gets the mean values over the whole clips
Daniel@0 138 # todo: add std and other statistics?
Daniel@0 139 def accum_euclidean(item):
Daniel@0 140 # accumulate chroma vectors for this piece
Daniel@0 141 if use_chromagram:
Daniel@0 142 chroma = [ res[1] for res in decode_tagged(chroma_parser_table,item['chromagram'])]
Daniel@0 143 # print_status('Chroma Raw Data' + str(chroma))
Daniel@0 144 # get mean chroma vector
Daniel@0 145 chroma_mean = np.mean(np.array(chroma), axis = 0)
Daniel@0 146 #print_status('Chroma Means' + str(chroma_mean))
Daniel@0 147
Daniel@0 148 # add vector to chromas table
Daniel@0 149 chromas.append(chroma_mean)
Daniel@0 150
Daniel@0 151 if use_mfcc:
Daniel@0 152 mfcc = [ res[1] for res in decode_tagged(mfcc_parser_table,item['mfcc'])]
Daniel@0 153 mfcc_mean = np.mean(np.array(mfcc), axis = 0)
Daniel@0 154 mfccs.append(mfcc_mean)
Daniel@0 155
Daniel@0 156 if use_chords:
Daniel@0 157 # get duration and normalised frequency for all tuning pitches (A3,A4,A5)
Daniel@0 158 keys = decode_tagged(key_parser_table,item['keys'])
Daniel@0 159 # get most frequent key
Daniel@0 160 key,mode = most_frequent_key(keys)
Daniel@0 161 relchords = []
Daniel@0 162 for (time,chord) in decode_tagged(chord_parser_table,item['chords']):
Daniel@0 163
Daniel@0 164 # get chord function
Daniel@0 165 (root,fun,typ, bfun) = chord2function(chord, key,mode)
Daniel@0 166
Daniel@0 167 # translate into text
Daniel@0 168 #txt = fun2txt(fun,typ, bfun, mode)
Daniel@0 169 #print_status('Chord: ' + chord + ', function: ' + txt)
Daniel@0 170 num = fun2num(fun,typ, bfun, mode)
Daniel@0 171 if num > 0:
Daniel@0 172 # add to chords of this clip
Daniel@0 173 #relchords.append((time,key,mode,fun,typ,bfun))
Daniel@0 174
Daniel@0 175 # get the root note of the chord and chord type
Daniel@0 176 # ignore mode and base note
Daniel@0 177 # format of num [1x mode, 2x function, 2x type, 2x base note]
Daniel@0 178 relchords.append(str(num)[1:5])
Daniel@0 179
Daniel@0 180 # append histogram of all chords for this recording
Daniel@0 181 hist = chord_histogram(relchords)
Daniel@0 182 chords.append(hist)
Daniel@0 183
Daniel@0 184
Daniel@0 185 # add uri if everything went well
Daniel@0 186 uris.append(item['list'])
Daniel@0 187
Daniel@0 188 # accumulation for compression:
Daniel@0 189 # save all chroma vectors
Daniel@0 190 # possibly build a codebook
Daniel@0 191 # otherwise compare for quantisation
Daniel@0 192
Daniel@0 193 def accum_compression(item):
Daniel@0 194
Daniel@0 195 # get chromas
Daniel@0 196 if use_chromagram:
Daniel@0 197 # accumulate chroma vectors for this piece
Daniel@0 198 chroma = [ res[1] for res in decode_tagged(chroma_parser_table,item['chromagram'])]
Daniel@0 199 # print_status('Chroma Raw Data' + str(chroma))
Daniel@0 200
Daniel@0 201 # downsample if necessary
Daniel@0 202 if downsample == 1:
Daniel@0 203 #chroma = resample(chroma, len(chroma)//resample_factor, axis=0, window=None)
Daniel@0 204 #chroma = [chroma[i] for i in np.random.randint(0,len(chroma),len(chroma)//resample_factor)]
Daniel@0 205 chroma = [chroma[i*resample_factor] for i in range(0,len(chroma)//resample_factor)]
Daniel@0 206
Daniel@0 207 chromas.extend(chroma)
Daniel@0 208 chromas_idx.append(len(chromas))
Daniel@0 209
Daniel@0 210
Daniel@0 211 if use_mfcc:
Daniel@0 212 mfcc = [ res[1] for res in decode_tagged(mfcc_parser_table,item['mfcc'])]
Daniel@0 213
Daniel@0 214 if downsample == 1:
Daniel@0 215 # mfcc = np.random.randint(0,len(mfcc),len(mfcc)//resample_factor)]
Daniel@0 216 mfcc = [mfcc[i*resample_factor] for i in range(0,len(mfcc)//resample_factor)]
Daniel@0 217 mfccs.extend(mfcc)
Daniel@0 218 mfccs_idx.append(len(mfccs))
Daniel@0 219
Daniel@0 220 if use_chords:
Daniel@0 221 # get duration and normalised frequency for all tuning pitches (A3,A4,A5)
Daniel@0 222 keys = decode_tagged(key_parser_table,item['keys'])
Daniel@0 223 # get most frequent key
Daniel@0 224 key,mode = most_frequent_key(keys)
Daniel@0 225 relchords = []
Daniel@0 226 for (time,chord) in decode_tagged(chord_parser_table,item['chords']):
Daniel@0 227
Daniel@0 228 # get chord function
Daniel@0 229 (root,fun,typ, bfun) = chord2function(chord, key,mode)
Daniel@0 230
Daniel@0 231 # translate into text
Daniel@0 232 #txt = fun2txt(fun,typ, bfun, mode)
Daniel@0 233 #print_status('Chord: ' + chord + ', function: ' + txt)
Daniel@0 234 num = fun2num(fun,typ, bfun, mode)
Daniel@0 235 if num > 0:
Daniel@0 236 # add to chords of this clip
Daniel@0 237 #relchords.append((time,key,mode,fun,typ,bfun))
Daniel@0 238
Daniel@0 239 # get the root note of the chord and chord type
Daniel@0 240 # ignore mode and base note
Daniel@0 241 # format of num [1x mode, 2x function, 2x type, 2x base note]
Daniel@0 242 relchords.append(int(str(num)[1:5]))
Daniel@0 243
Daniel@0 244 # append histogram of all chords for this recording
Daniel@0 245 #hist = chord_histogram(relchords)
Daniel@0 246 chords.extend(relchords)
Daniel@0 247 chords_idx.append(len(chords))
Daniel@0 248
Daniel@0 249 # add uri if everything went well
Daniel@0 250 uris.append(item['list'])
Daniel@0 251
Daniel@0 252
Daniel@0 253
Daniel@0 254 # ---
Daniel@0 255 # this is the euclidean distance
Daniel@0 256 # ---
Daniel@0 257 if (simtype == 'euclidean'):
Daniel@0 258 # accumulate over all inputs
Daniel@0 259 st=for_each(inputs,accum_euclidean)
Daniel@0 260
Daniel@0 261 # concatenate feature input for all features
Daniel@0 262 arr = np.empty((len(uris),0), float)
Daniel@0 263 # concatenate data to nparray for euclidean distance
Daniel@0 264 if use_chromagram:
Daniel@0 265 arr = np.append(arr, np.array(chromas), axis=1)
Daniel@0 266
Daniel@0 267 if use_mfcc:
Daniel@0 268 arr = np.append(arr, np.array(mfccs), axis=1)
Daniel@0 269
Daniel@0 270 if use_chords:
Daniel@0 271 # get chord dictionaries
Daniel@0 272 #print(str(np.array(chords).shape))
Daniel@0 273 arr = np.append(arr,np.array(chords) , axis=1)
Daniel@0 274
Daniel@0 275 #dist = distance.pdist(chromas, 'euclidean')
Daniel@0 276 dist = pairwise_distances(arr, metric = 'euclidean', n_jobs = num_cores)
Daniel@0 277
Daniel@0 278 # return to non-condensed matrix for simplicity.
Daniel@0 279 # this can be reversed using the very same function for data
Daniel@0 280 # efficiency
Daniel@0 281 #dist = distance.squareform(dist)
Daniel@0 282
Daniel@0 283 # ---
Daniel@0 284 # this is the normalised compression distance
Daniel@0 285 # ---
Daniel@0 286 elif (simtype == 'compression'):
Daniel@0 287 # accumulate over all inputs
Daniel@0 288 print_status('Similarity Module: Accumulating')
Daniel@0 289 st=for_each(inputs,accum_compression)
Daniel@0 290
Daniel@0 291 dist = np.zeros((len(uris),len(uris)))
Daniel@0 292 count = 0
Daniel@0 293 if use_chromagram:
Daniel@0 294 print_status('Similarity Module: Chroma Quantisation')
Daniel@0 295 chromas_coded = vector_quantisation(np.array(chromas), set_clusters,num_cores)
Daniel@0 296 print_status('Similarity Module: Chroma Compression Results')
Daniel@0 297 dist += similarity_by_mask(chromas_coded,chromas_idx,compressor,encoding)
Daniel@0 298 count +=1
Daniel@0 299
Daniel@0 300 if use_mfcc:
Daniel@0 301 print_status('Similarity Module: MFCC Quantisation')
Daniel@0 302 mfccs_coded = vector_quantisation(np.array(mfccs), set_clusters,num_cores)
Daniel@0 303 print_status('Similarity Module: MFCC Compression Results')
Daniel@0 304 dist += similarity_by_mask(mfccs_coded,mfccs_idx,compressor,encoding)
Daniel@0 305 count +=1
Daniel@0 306
Daniel@0 307 if use_chords:
Daniel@0 308 print_status('Similarity Module: Chord Compression Results')
Daniel@0 309 dist += similarity_by_mask(np.array(chords),chords_idx,compressor,encoding)
Daniel@0 310 count +=1
Daniel@0 311
Daniel@0 312 dist = dist / count
Daniel@0 313
Daniel@0 314
Daniel@0 315 # get rid of zeros in between
Daniel@0 316 #for idx1 in range(0,len(chromas_idx)):
Daniel@0 317 # dist[idx1][idx1] = 1
Daniel@0 318
Daniel@0 319 print_status('dist' + str(dist))
Daniel@0 320
Daniel@0 321 # Do MDS scaling with precomputed distance
Daniel@0 322 mds = manifold.MDS(n_components = 2, max_iter=mds_max_iter, n_init=mds_init_tries, dissimilarity='precomputed')
Daniel@0 323
Daniel@0 324 coordinates = mds.fit_transform(dist)
Daniel@0 325
Daniel@0 326 return { 'result': { 'list': uris, 'mds': coordinates.tolist()}, 'stats' : st }
Daniel@0 327 # return { 'result': { 'list': uris, 'distance': dist.tolist(), 'mds': coordinates.tolist()},
Daniel@0 328 # 'stats' : st }
Daniel@0 329
Daniel@0 330
Daniel@0 331
Daniel@0 332 def vector_quantisation(data, set_clusters,num_cores):
Daniel@0 333 # ---
Daniel@0 334 # build codebook!
Daniel@0 335 # ---
Daniel@0 336 # --- 1 quantise chroma data
Daniel@0 337 # --- 1a use scikit-learn k-means
Daniel@0 338 # http://scikit-learn.org/stable/modules/clustering.html
Daniel@0 339
Daniel@0 340 # quick try
Daniel@0 341 clusterer = KMeans(n_clusters=set_clusters,n_jobs = num_cores)
Daniel@0 342
Daniel@0 343 # --- 2 get compression distance
Daniel@0 344 # get all single compressed sizes?
Daniel@0 345 data_coded = clusterer.fit_predict(data)
Daniel@0 346 #print_status('Chromas Coded' + str(chromas_coded))
Daniel@0 347 # print_status('Coding Histogram' + str(np.histogram(chromas_coded)))
Daniel@0 348 return data_coded
Daniel@0 349
Daniel@0 350
Daniel@0 351
Daniel@0 352 def similarity_by_mask(data_coded,data_idx,compressor,encoding):
Daniel@0 353
Daniel@0 354 # idx is expected to start with the first chroma index of the second piece
Daniel@0 355 # TODO: check indexing (starts at 0 or 1?)
Daniel@0 356 lengths = []
Daniel@0 357 start_idx = [0] + data_idx[:-1]
Daniel@0 358 dist = np.zeros((len(data_idx),len(data_idx)))
Daniel@0 359 for idx1 in range(0,len(data_idx)):
Daniel@0 360 for idx2 in range(0,len(data_idx)):
Daniel@0 361 if (idx2 < idx1):
Daniel@0 362 # select encoded chromas for the clips
Daniel@0 363 data1_mask = np.zeros(len(data_coded), dtype=bool)
Daniel@0 364 data1_mask[start_idx[idx1]:data_idx[idx1]-1] = True
Daniel@0 365
Daniel@0 366 data2_mask = np.zeros(len(data_coded), dtype=bool)
Daniel@0 367 data2_mask[start_idx[idx2]:data_idx[idx2]-1] = True
Daniel@0 368
Daniel@0 369 a_coded = encode(data_coded[data1_mask],format = encoding)
Daniel@0 370 b_coded = encode(data_coded[data2_mask],format = encoding)
Daniel@0 371 # get compression lengths
Daniel@0 372 if compressor == 'zlib':
Daniel@0 373 (a,b,ab) = compressed_length(a_coded,b_coded,compressor)
Daniel@0 374
Daniel@0 375 else:
Daniel@0 376 # get complement chroma set
Daniel@0 377 ref_mask = ~data1_mask & ~data2_mask
Daniel@0 378 ref_coded = encode(data_coded[ref_mask],format = encoding)
Daniel@0 379 (a,b,ab) = delta_compressed_length(a_coded,b_coded,ref_coded,compressor)
Daniel@0 380
Daniel@0 381 #NCD(z - min(v, w))/ max(v, w);
Daniel@0 382 dist[idx1][idx2] = (ab - min(a,b))/float(max(a,b))
Daniel@0 383
Daniel@0 384 # the above normalised compression distance is symmetric
Daniel@0 385 # this is required by the nds routine below
Daniel@0 386 dist[idx2][idx1] = dist[idx1][idx2]
Daniel@0 387
Daniel@0 388 return dist
Daniel@0 389
Daniel@0 390 def encode(data, format = 'string'):
Daniel@0 391
Daniel@0 392 # Encoding
Daniel@0 393 if format == 'binary':
Daniel@0 394 data_coded = data.tostring()
Daniel@0 395 elif format == 'string':
Daniel@0 396 data_coded = str(data)
Daniel@0 397 return data_coded
Daniel@0 398
Daniel@0 399 def compressed_length(a_coded,b_coded, type = 'zlib'):
Daniel@0 400
Daniel@0 401 # Compression
Daniel@0 402 if type == 'zlib':
Daniel@0 403 # zlib is quite helpful https://docs.python.org/2/library/zlib.html#module-zlib
Daniel@0 404 a = len(zlib.compress(a_coded, 9))
Daniel@0 405 b = len(zlib.compress(a_coded, 9))
Daniel@0 406 ab = len(zlib.compress(a_coded + b_coded, 9))
Daniel@0 407
Daniel@0 408 return (a,b,ab)
Daniel@0 409
Daniel@0 410 def delta_compressed_length(a_coded,b_coded,ref_coded, type = 'zxd'):
Daniel@0 411 # Compression
Daniel@0 412 # zbs - use bsdiff
Daniel@0 413 # zxd - uses xdelta3
Daniel@0 414 # zvcd - uses open-vcdiff
Daniel@0 415 # zvcz - uses vczip
Daniel@0 416 # zdiff - converts binary to text and uses diff to produce an ed script
Daniel@0 417
Daniel@0 418 if type == 'zxd' or type == 'zbs' or type == 'zvcz' or type == 'zdiff' or type == 'zvcd':
Daniel@0 419
Daniel@0 420 freference = tempfile.NamedTemporaryFile(delete=False)
Daniel@0 421 freference.write(ref_coded)
Daniel@0 422 freference.close()
Daniel@0 423 #print_status('Ref File: ' + freference.name)
Daniel@0 424
Daniel@0 425 # to be optimised with bufs later
Daniel@0 426 # get length of a regarding reference
Daniel@0 427 command = '/home/dml/src/hg/dml-cliopatria/cpack/dml/scripts/compression/%s encode %s | /home/dml/src/hg/dml-cliopatria/cpack/dml/scripts/compression/length' % (type, freference.name)
Daniel@0 428 # print_status(command)
Daniel@0 429 p1 = subprocess.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.PIPE,shell=True)
Daniel@0 430 output,err = p1.communicate(input=a_coded)
Daniel@0 431 a = int(output)
Daniel@0 432
Daniel@0 433 # get length of b regarding reference
Daniel@0 434 p1 = subprocess.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.PIPE,shell=True)
Daniel@0 435 output,err = p1.communicate(input=b_coded)
Daniel@0 436 b = int(output)
Daniel@0 437
Daniel@0 438 # get length of a,b regarding reference
Daniel@0 439 p1 = subprocess.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.PIPE,shell=True)
Daniel@0 440 output,err = p1.communicate(input=a_coded + b_coded)
Daniel@0 441 ab = int(output)
Daniel@0 442
Daniel@0 443
Daniel@0 444 #print_status('Compressed Output' + compressed)
Daniel@0 445 #print_status('Compressed Size' + str(len(compressed)))
Daniel@0 446 os.remove(freference.name)
Daniel@0 447 return (a,b,ab)
Daniel@0 448
Daniel@0 449 # histogram of the last entry in a list
Daniel@0 450 # returns the most frequently used key
Daniel@0 451 def chord_histogram(chordstr = []):
Daniel@0 452 global chord_keys
Daniel@0 453 # build histogram
Daniel@0 454
Daniel@0 455 histo = dict.fromkeys(chord_keys,0)
Daniel@0 456 for chord in chordstr:
Daniel@0 457 histo[chord] = histo.get(chord,0) + 1
Daniel@0 458 #print_status(str(histo.keys()))
Daniel@0 459
Daniel@0 460 counts = np.array(histo.values(),float)
Daniel@0 461 if max(counts) > 0:
Daniel@0 462 counts = counts / max(counts)
Daniel@0 463 return (counts)
Daniel@0 464
Daniel@0 465