dml-open-cliopatria: dml-cla/python/similarity.py annotate

annotate dml-cla/python/similarity.py @ 0:718306e29690 tip

commiting public release

author	Daniel Wolff
date	Tue, 09 Feb 2016 21:05:06 +0100
parents
children

rev	line source
Daniel@0	1 # Part of DML (Digital Music Laboratory)
Daniel@0	2 # Copyright 2014-2015 Daniel Wolff, City University
Daniel@0	3
Daniel@0	4 # This program is free software; you can redistribute it and/or
Daniel@0	5 # modify it under the terms of the GNU General Public License
Daniel@0	6 # as published by the Free Software Foundation; either version 2
Daniel@0	7 # of the License, or (at your option) any later version.
Daniel@0	8 #
Daniel@0	9 # This program is distributed in the hope that it will be useful,
Daniel@0	10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
Daniel@0	11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Daniel@0	12 # GNU General Public License for more details.
Daniel@0	13 #
Daniel@0	14 # You should have received a copy of the GNU General Public
Daniel@0	15 # License along with this library; if not, write to the Free Software
Daniel@0	16 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Daniel@0	17
Daniel@0	18 # -- coding: utf-8 --
Daniel@0	19 __author__='wolffd'
Daniel@0	20
Daniel@0	21 # this script derives all pairwise similarity measures for the chroma vectors provided.
Daniel@0	22 # as a first experiment, only the mean chroma vectors per piece are compared
Daniel@0	23 # using euclidean distance
Daniel@0	24
Daniel@0	25
Daniel@0	26 # parameters to be forwarded to API
Daniel@0	27
Daniel@0	28 # similarity type:
Daniel@0	29 # euclidean, compression
Daniel@0	30 # simtype = 'euclidean'
Daniel@0	31
Daniel@0	32 # parallelisation
Daniel@0	33 num_cores = 10
Daniel@0	34
Daniel@0	35
Daniel@0	36 #min_clusters = 40## unused
Daniel@0	37 #max_clusters = 256## unused
Daniel@0	38
Daniel@0	39 #set_clusters = 40
Daniel@0	40 #max_clips = 50
Daniel@0	41 encoding = 'binary'
Daniel@0	42 #compressor = 'zxd'
Daniel@0	43 mds_init_tries = 4
Daniel@0	44 mds_max_iter = 100
Daniel@0	45 mfccbins = 12
Daniel@0	46
Daniel@0	47 # resample chroma / timbre values at this fraction for compression distance. 0 to switch off
Daniel@0	48 # we want a vector just every second.
Daniel@0	49 # The standard sample rate and window size are 44100 / 1024 for chroma / timbre
Daniel@0	50 # this is dependent on the sim_downsample parameter
Daniel@0	51 resample_factor = 44100/1024;
Daniel@0	52
Daniel@0	53
Daniel@0	54 from rdflib import RDF, RDFS
Daniel@0	55 from csvutils import *
Daniel@0	56 from aggregate import *
Daniel@0	57 from n3Parser import get_rdf_graph_from_n3
Daniel@0	58
Daniel@0	59 # numpy, scipy
Daniel@0	60 import numpy as np
Daniel@0	61 from scipy.spatial import distance
Daniel@0	62 from sklearn.metrics.pairwise import pairwise_distances
Daniel@0	63 from scipy.signal import resample
Daniel@0	64
Daniel@0	65 #scikitlearn
Daniel@0	66 from sklearn.datasets import make_blobs
Daniel@0	67 from sklearn.cluster import KMeans
Daniel@0	68 # from sklearn.metrics import silhouette_samples, silhouette_score
Daniel@0	69 from sklearn import manifold
Daniel@0	70
Daniel@0	71
Daniel@0	72 # chord processing
Daniel@0	73 from chord_seq_key_relative import chords_from_csv, keys_from_csv, chord2function, fun2txt, fun2num,most_frequent_key,chord_roots,type_labels
Daniel@0	74
Daniel@0	75 # subprocess, command line and threading
Daniel@0	76 import os, tempfile
Daniel@0	77 import subprocess, threading
Daniel@0	78
Daniel@0	79 # for system / compression calls
Daniel@0	80 import zlib
Daniel@0	81
Daniel@0	82
Daniel@0	83 def chroma_from_csv(filename):
Daniel@0	84 # we assume CSV: time, chroma 1, ... chroma 12]
Daniel@0	85 # return (time, [chroma 1-12])
Daniel@0	86 return csv_map_rows(filename,13, lambda row:(float(row[0]),np.array(row[1:12],dtype=float)))
Daniel@0	87
Daniel@0	88 def mfcc_from_csv(filename):
Daniel@0	89 # we assume CSV: time, mfcc 1, ... mfcc 20]
Daniel@0	90 # return (time, [chroma 1-12])
Daniel@0	91 return csv_map_rows(filename,21, lambda row:(float(row[0]),np.array(row[1:20],dtype=float)))
Daniel@0	92
Daniel@0	93
Daniel@0	94 chroma_parser_table = { 'csv':chroma_from_csv}
Daniel@0	95 mfcc_parser_table = { 'csv':mfcc_from_csv}
Daniel@0	96
Daniel@0	97 # in chord_seq_relative
Daniel@0	98 key_parser_table = { 'csv':keys_from_csv }
Daniel@0	99 chord_parser_table = { 'csv':chords_from_csv }
Daniel@0	100
Daniel@0	101 ## generate global dict of chord_keys
Daniel@0	102 chord_keys = []
Daniel@0	103 for chordnum in range(1,12+1):
Daniel@0	104 for typenum in range(1,11+1):
Daniel@0	105 chord_keys.append("%02d%02d" % (chordnum,typenum))
Daniel@0	106
Daniel@0	107 def per_file(inputs,opts={}):
Daniel@0	108 chromas = []
Daniel@0	109 chromas_idx = []
Daniel@0	110 mfccs = []
Daniel@0	111 mfccs_idx = []
Daniel@0	112 chords = []
Daniel@0	113 chords_idx = []
Daniel@0	114 uris = []
Daniel@0	115
Daniel@0	116 # get options from API
Daniel@0	117 # print_status(str(opts))
Daniel@0	118 simtype = opts['sim_type']
Daniel@0	119 set_clusters = opts['sim_clusters'] # def 40
Daniel@0	120 downsample = opts['sim_downsample'] # def 1
Daniel@0	121 limit = opts['sim_reclimit'] # def 50
Daniel@0	122 compressor = opts['sim_compressor'] # def 'zlib'
Daniel@0	123
Daniel@0	124 # parse feature list
Daniel@0	125 features = opts['sim_features'].split(',') # features, def: chroma
Daniel@0	126 use_chromagram = any(ext in 'chromagram' for ext in features)
Daniel@0	127 use_mfcc = any(ext in 'mfcc' for ext in features)
Daniel@0	128 use_chords = any(ext in 'chords' for ext in features)
Daniel@0	129
Daniel@0	130 # check number of inputs
Daniel@0	131 if len(inputs) > limit:
Daniel@0	132 #return { 'error': ''}
Daniel@0	133 print_status('Similarity: Too many inputs, truncating collection')
Daniel@0	134 inputs = inputs[0:limit]
Daniel@0	135
Daniel@0	136
Daniel@0	137 # accumulation for euclidean just gets the mean values over the whole clips
Daniel@0	138 # todo: add std and other statistics?
Daniel@0	139 def accum_euclidean(item):
Daniel@0	140 # accumulate chroma vectors for this piece
Daniel@0	141 if use_chromagram:
Daniel@0	142 chroma = [ res[1] for res in decode_tagged(chroma_parser_table,item['chromagram'])]
Daniel@0	143 # print_status('Chroma Raw Data' + str(chroma))
Daniel@0	144 # get mean chroma vector
Daniel@0	145 chroma_mean = np.mean(np.array(chroma), axis = 0)
Daniel@0	146 #print_status('Chroma Means' + str(chroma_mean))
Daniel@0	147
Daniel@0	148 # add vector to chromas table
Daniel@0	149 chromas.append(chroma_mean)
Daniel@0	150
Daniel@0	151 if use_mfcc:
Daniel@0	152 mfcc = [ res[1] for res in decode_tagged(mfcc_parser_table,item['mfcc'])]
Daniel@0	153 mfcc_mean = np.mean(np.array(mfcc), axis = 0)
Daniel@0	154 mfccs.append(mfcc_mean)
Daniel@0	155
Daniel@0	156 if use_chords:
Daniel@0	157 # get duration and normalised frequency for all tuning pitches (A3,A4,A5)
Daniel@0	158 keys = decode_tagged(key_parser_table,item['keys'])
Daniel@0	159 # get most frequent key
Daniel@0	160 key,mode = most_frequent_key(keys)
Daniel@0	161 relchords = []
Daniel@0	162 for (time,chord) in decode_tagged(chord_parser_table,item['chords']):
Daniel@0	163
Daniel@0	164 # get chord function
Daniel@0	165 (root,fun,typ, bfun) = chord2function(chord, key,mode)
Daniel@0	166
Daniel@0	167 # translate into text
Daniel@0	168 #txt = fun2txt(fun,typ, bfun, mode)
Daniel@0	169 #print_status('Chord: ' + chord + ', function: ' + txt)
Daniel@0	170 num = fun2num(fun,typ, bfun, mode)
Daniel@0	171 if num > 0:
Daniel@0	172 # add to chords of this clip
Daniel@0	173 #relchords.append((time,key,mode,fun,typ,bfun))
Daniel@0	174
Daniel@0	175 # get the root note of the chord and chord type
Daniel@0	176 # ignore mode and base note
Daniel@0	177 # format of num [1x mode, 2x function, 2x type, 2x base note]
Daniel@0	178 relchords.append(str(num)[1:5])
Daniel@0	179
Daniel@0	180 # append histogram of all chords for this recording
Daniel@0	181 hist = chord_histogram(relchords)
Daniel@0	182 chords.append(hist)
Daniel@0	183
Daniel@0	184
Daniel@0	185 # add uri if everything went well
Daniel@0	186 uris.append(item['list'])
Daniel@0	187
Daniel@0	188 # accumulation for compression:
Daniel@0	189 # save all chroma vectors
Daniel@0	190 # possibly build a codebook
Daniel@0	191 # otherwise compare for quantisation
Daniel@0	192
Daniel@0	193 def accum_compression(item):
Daniel@0	194
Daniel@0	195 # get chromas
Daniel@0	196 if use_chromagram:
Daniel@0	197 # accumulate chroma vectors for this piece
Daniel@0	198 chroma = [ res[1] for res in decode_tagged(chroma_parser_table,item['chromagram'])]
Daniel@0	199 # print_status('Chroma Raw Data' + str(chroma))
Daniel@0	200
Daniel@0	201 # downsample if necessary
Daniel@0	202 if downsample == 1:
Daniel@0	203 #chroma = resample(chroma, len(chroma)//resample_factor, axis=0, window=None)
Daniel@0	204 #chroma = [chroma[i] for i in np.random.randint(0,len(chroma),len(chroma)//resample_factor)]
Daniel@0	205 chroma = [chroma[i*resample_factor] for i in range(0,len(chroma)//resample_factor)]
Daniel@0	206
Daniel@0	207 chromas.extend(chroma)
Daniel@0	208 chromas_idx.append(len(chromas))
Daniel@0	209
Daniel@0	210
Daniel@0	211 if use_mfcc:
Daniel@0	212 mfcc = [ res[1] for res in decode_tagged(mfcc_parser_table,item['mfcc'])]
Daniel@0	213
Daniel@0	214 if downsample == 1:
Daniel@0	215 # mfcc = np.random.randint(0,len(mfcc),len(mfcc)//resample_factor)]
Daniel@0	216 mfcc = [mfcc[i*resample_factor] for i in range(0,len(mfcc)//resample_factor)]
Daniel@0	217 mfccs.extend(mfcc)
Daniel@0	218 mfccs_idx.append(len(mfccs))
Daniel@0	219
Daniel@0	220 if use_chords:
Daniel@0	221 # get duration and normalised frequency for all tuning pitches (A3,A4,A5)
Daniel@0	222 keys = decode_tagged(key_parser_table,item['keys'])
Daniel@0	223 # get most frequent key
Daniel@0	224 key,mode = most_frequent_key(keys)
Daniel@0	225 relchords = []
Daniel@0	226 for (time,chord) in decode_tagged(chord_parser_table,item['chords']):
Daniel@0	227
Daniel@0	228 # get chord function
Daniel@0	229 (root,fun,typ, bfun) = chord2function(chord, key,mode)
Daniel@0	230
Daniel@0	231 # translate into text
Daniel@0	232 #txt = fun2txt(fun,typ, bfun, mode)
Daniel@0	233 #print_status('Chord: ' + chord + ', function: ' + txt)
Daniel@0	234 num = fun2num(fun,typ, bfun, mode)
Daniel@0	235 if num > 0:
Daniel@0	236 # add to chords of this clip
Daniel@0	237 #relchords.append((time,key,mode,fun,typ,bfun))
Daniel@0	238
Daniel@0	239 # get the root note of the chord and chord type
Daniel@0	240 # ignore mode and base note
Daniel@0	241 # format of num [1x mode, 2x function, 2x type, 2x base note]
Daniel@0	242 relchords.append(int(str(num)[1:5]))
Daniel@0	243
Daniel@0	244 # append histogram of all chords for this recording
Daniel@0	245 #hist = chord_histogram(relchords)
Daniel@0	246 chords.extend(relchords)
Daniel@0	247 chords_idx.append(len(chords))
Daniel@0	248
Daniel@0	249 # add uri if everything went well
Daniel@0	250 uris.append(item['list'])
Daniel@0	251
Daniel@0	252
Daniel@0	253
Daniel@0	254 # ---
Daniel@0	255 # this is the euclidean distance
Daniel@0	256 # ---
Daniel@0	257 if (simtype == 'euclidean'):
Daniel@0	258 # accumulate over all inputs
Daniel@0	259 st=for_each(inputs,accum_euclidean)
Daniel@0	260
Daniel@0	261 # concatenate feature input for all features
Daniel@0	262 arr = np.empty((len(uris),0), float)
Daniel@0	263 # concatenate data to nparray for euclidean distance
Daniel@0	264 if use_chromagram:
Daniel@0	265 arr = np.append(arr, np.array(chromas), axis=1)
Daniel@0	266
Daniel@0	267 if use_mfcc:
Daniel@0	268 arr = np.append(arr, np.array(mfccs), axis=1)
Daniel@0	269
Daniel@0	270 if use_chords:
Daniel@0	271 # get chord dictionaries
Daniel@0	272 #print(str(np.array(chords).shape))
Daniel@0	273 arr = np.append(arr,np.array(chords) , axis=1)
Daniel@0	274
Daniel@0	275 #dist = distance.pdist(chromas, 'euclidean')
Daniel@0	276 dist = pairwise_distances(arr, metric = 'euclidean', n_jobs = num_cores)
Daniel@0	277
Daniel@0	278 # return to non-condensed matrix for simplicity.
Daniel@0	279 # this can be reversed using the very same function for data
Daniel@0	280 # efficiency
Daniel@0	281 #dist = distance.squareform(dist)
Daniel@0	282
Daniel@0	283 # ---
Daniel@0	284 # this is the normalised compression distance
Daniel@0	285 # ---
Daniel@0	286 elif (simtype == 'compression'):
Daniel@0	287 # accumulate over all inputs
Daniel@0	288 print_status('Similarity Module: Accumulating')
Daniel@0	289 st=for_each(inputs,accum_compression)
Daniel@0	290
Daniel@0	291 dist = np.zeros((len(uris),len(uris)))
Daniel@0	292 count = 0
Daniel@0	293 if use_chromagram:
Daniel@0	294 print_status('Similarity Module: Chroma Quantisation')
Daniel@0	295 chromas_coded = vector_quantisation(np.array(chromas), set_clusters,num_cores)
Daniel@0	296 print_status('Similarity Module: Chroma Compression Results')
Daniel@0	297 dist += similarity_by_mask(chromas_coded,chromas_idx,compressor,encoding)
Daniel@0	298 count +=1
Daniel@0	299
Daniel@0	300 if use_mfcc:
Daniel@0	301 print_status('Similarity Module: MFCC Quantisation')
Daniel@0	302 mfccs_coded = vector_quantisation(np.array(mfccs), set_clusters,num_cores)
Daniel@0	303 print_status('Similarity Module: MFCC Compression Results')
Daniel@0	304 dist += similarity_by_mask(mfccs_coded,mfccs_idx,compressor,encoding)
Daniel@0	305 count +=1
Daniel@0	306
Daniel@0	307 if use_chords:
Daniel@0	308 print_status('Similarity Module: Chord Compression Results')
Daniel@0	309 dist += similarity_by_mask(np.array(chords),chords_idx,compressor,encoding)
Daniel@0	310 count +=1
Daniel@0	311
Daniel@0	312 dist = dist / count
Daniel@0	313
Daniel@0	314
Daniel@0	315 # get rid of zeros in between
Daniel@0	316 #for idx1 in range(0,len(chromas_idx)):
Daniel@0	317 # dist[idx1][idx1] = 1
Daniel@0	318
Daniel@0	319 print_status('dist' + str(dist))
Daniel@0	320
Daniel@0	321 # Do MDS scaling with precomputed distance
Daniel@0	322 mds = manifold.MDS(n_components = 2, max_iter=mds_max_iter, n_init=mds_init_tries, dissimilarity='precomputed')
Daniel@0	323
Daniel@0	324 coordinates = mds.fit_transform(dist)
Daniel@0	325
Daniel@0	326 return { 'result': { 'list': uris, 'mds': coordinates.tolist()}, 'stats' : st }
Daniel@0	327 # return { 'result': { 'list': uris, 'distance': dist.tolist(), 'mds': coordinates.tolist()},
Daniel@0	328 # 'stats' : st }
Daniel@0	329
Daniel@0	330
Daniel@0	331
Daniel@0	332 def vector_quantisation(data, set_clusters,num_cores):
Daniel@0	333 # ---
Daniel@0	334 # build codebook!
Daniel@0	335 # ---
Daniel@0	336 # --- 1 quantise chroma data
Daniel@0	337 # --- 1a use scikit-learn k-means
Daniel@0	338 # http://scikit-learn.org/stable/modules/clustering.html
Daniel@0	339
Daniel@0	340 # quick try
Daniel@0	341 clusterer = KMeans(n_clusters=set_clusters,n_jobs = num_cores)
Daniel@0	342
Daniel@0	343 # --- 2 get compression distance
Daniel@0	344 # get all single compressed sizes?
Daniel@0	345 data_coded = clusterer.fit_predict(data)
Daniel@0	346 #print_status('Chromas Coded' + str(chromas_coded))
Daniel@0	347 # print_status('Coding Histogram' + str(np.histogram(chromas_coded)))
Daniel@0	348 return data_coded
Daniel@0	349
Daniel@0	350
Daniel@0	351
Daniel@0	352 def similarity_by_mask(data_coded,data_idx,compressor,encoding):
Daniel@0	353
Daniel@0	354 # idx is expected to start with the first chroma index of the second piece
Daniel@0	355 # TODO: check indexing (starts at 0 or 1?)
Daniel@0	356 lengths = []
Daniel@0	357 start_idx = [0] + data_idx[:-1]
Daniel@0	358 dist = np.zeros((len(data_idx),len(data_idx)))
Daniel@0	359 for idx1 in range(0,len(data_idx)):
Daniel@0	360 for idx2 in range(0,len(data_idx)):
Daniel@0	361 if (idx2 < idx1):
Daniel@0	362 # select encoded chromas for the clips
Daniel@0	363 data1_mask = np.zeros(len(data_coded), dtype=bool)
Daniel@0	364 data1_mask[start_idx[idx1]:data_idx[idx1]-1] = True
Daniel@0	365
Daniel@0	366 data2_mask = np.zeros(len(data_coded), dtype=bool)
Daniel@0	367 data2_mask[start_idx[idx2]:data_idx[idx2]-1] = True
Daniel@0	368
Daniel@0	369 a_coded = encode(data_coded[data1_mask],format = encoding)
Daniel@0	370 b_coded = encode(data_coded[data2_mask],format = encoding)
Daniel@0	371 # get compression lengths
Daniel@0	372 if compressor == 'zlib':
Daniel@0	373 (a,b,ab) = compressed_length(a_coded,b_coded,compressor)
Daniel@0	374
Daniel@0	375 else:
Daniel@0	376 # get complement chroma set
Daniel@0	377 ref_mask = ~data1_mask & ~data2_mask
Daniel@0	378 ref_coded = encode(data_coded[ref_mask],format = encoding)
Daniel@0	379 (a,b,ab) = delta_compressed_length(a_coded,b_coded,ref_coded,compressor)
Daniel@0	380
Daniel@0	381 #NCD(z - min(v, w))/ max(v, w);
Daniel@0	382 dist[idx1][idx2] = (ab - min(a,b))/float(max(a,b))
Daniel@0	383
Daniel@0	384 # the above normalised compression distance is symmetric
Daniel@0	385 # this is required by the nds routine below
Daniel@0	386 dist[idx2][idx1] = dist[idx1][idx2]
Daniel@0	387
Daniel@0	388 return dist
Daniel@0	389
Daniel@0	390 def encode(data, format = 'string'):
Daniel@0	391
Daniel@0	392 # Encoding
Daniel@0	393 if format == 'binary':
Daniel@0	394 data_coded = data.tostring()
Daniel@0	395 elif format == 'string':
Daniel@0	396 data_coded = str(data)
Daniel@0	397 return data_coded
Daniel@0	398
Daniel@0	399 def compressed_length(a_coded,b_coded, type = 'zlib'):
Daniel@0	400
Daniel@0	401 # Compression
Daniel@0	402 if type == 'zlib':
Daniel@0	403 # zlib is quite helpful https://docs.python.org/2/library/zlib.html#module-zlib
Daniel@0	404 a = len(zlib.compress(a_coded, 9))
Daniel@0	405 b = len(zlib.compress(a_coded, 9))
Daniel@0	406 ab = len(zlib.compress(a_coded + b_coded, 9))
Daniel@0	407
Daniel@0	408 return (a,b,ab)
Daniel@0	409
Daniel@0	410 def delta_compressed_length(a_coded,b_coded,ref_coded, type = 'zxd'):
Daniel@0	411 # Compression
Daniel@0	412 # zbs - use bsdiff
Daniel@0	413 # zxd - uses xdelta3
Daniel@0	414 # zvcd - uses open-vcdiff
Daniel@0	415 # zvcz - uses vczip
Daniel@0	416 # zdiff - converts binary to text and uses diff to produce an ed script
Daniel@0	417
Daniel@0	418 if type == 'zxd' or type == 'zbs' or type == 'zvcz' or type == 'zdiff' or type == 'zvcd':
Daniel@0	419
Daniel@0	420 freference = tempfile.NamedTemporaryFile(delete=False)
Daniel@0	421 freference.write(ref_coded)
Daniel@0	422 freference.close()
Daniel@0	423 #print_status('Ref File: ' + freference.name)
Daniel@0	424
Daniel@0	425 # to be optimised with bufs later
Daniel@0	426 # get length of a regarding reference
Daniel@0	427 command = '/home/dml/src/hg/dml-cliopatria/cpack/dml/scripts/compression/%s encode %s \| /home/dml/src/hg/dml-cliopatria/cpack/dml/scripts/compression/length' % (type, freference.name)
Daniel@0	428 # print_status(command)
Daniel@0	429 p1 = subprocess.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.PIPE,shell=True)
Daniel@0	430 output,err = p1.communicate(input=a_coded)
Daniel@0	431 a = int(output)
Daniel@0	432
Daniel@0	433 # get length of b regarding reference
Daniel@0	434 p1 = subprocess.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.PIPE,shell=True)
Daniel@0	435 output,err = p1.communicate(input=b_coded)
Daniel@0	436 b = int(output)
Daniel@0	437
Daniel@0	438 # get length of a,b regarding reference
Daniel@0	439 p1 = subprocess.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.PIPE,shell=True)
Daniel@0	440 output,err = p1.communicate(input=a_coded + b_coded)
Daniel@0	441 ab = int(output)
Daniel@0	442
Daniel@0	443
Daniel@0	444 #print_status('Compressed Output' + compressed)
Daniel@0	445 #print_status('Compressed Size' + str(len(compressed)))
Daniel@0	446 os.remove(freference.name)
Daniel@0	447 return (a,b,ab)
Daniel@0	448
Daniel@0	449 # histogram of the last entry in a list
Daniel@0	450 # returns the most frequently used key
Daniel@0	451 def chord_histogram(chordstr = []):
Daniel@0	452 global chord_keys
Daniel@0	453 # build histogram
Daniel@0	454
Daniel@0	455 histo = dict.fromkeys(chord_keys,0)
Daniel@0	456 for chord in chordstr:
Daniel@0	457 histo[chord] = histo.get(chord,0) + 1
Daniel@0	458 #print_status(str(histo.keys()))
Daniel@0	459
Daniel@0	460 counts = np.array(histo.values(),float)
Daniel@0	461 if max(counts) > 0:
Daniel@0	462 counts = counts / max(counts)
Daniel@0	463 return (counts)
Daniel@0	464
Daniel@0	465

Mercurial > hg > dml-open-cliopatria

annotate dml-cla/python/similarity.py @ 0:718306e29690 tip