mi@0
|
1 """
|
mi@0
|
2 Useful functions that are quite common for music segmentation
|
mi@0
|
3 """
|
mi@0
|
4 '''
|
mi@0
|
5 Modified and more funcs added.
|
mi@0
|
6 Mi Tian, April 2015.
|
mi@0
|
7 '''
|
mi@0
|
8
|
mi@0
|
9 __author__ = "Oriol Nieto"
|
mi@0
|
10 __copyright__ = "Copyright 2014, Music and Audio Research Lab (MARL)"
|
mi@0
|
11 __license__ = "GPL"
|
mi@0
|
12 __version__ = "1.0"
|
mi@0
|
13 __email__ = "oriol@nyu.edu"
|
mi@0
|
14
|
mi@0
|
15 import copy
|
mi@0
|
16 import numpy as np
|
mitian@1
|
17 import os, sys
|
mi@0
|
18 import scipy
|
mi@0
|
19 from scipy.spatial import distance
|
mi@0
|
20 from scipy.ndimage import filters, zoom
|
mi@0
|
21 from scipy import signal
|
mitian@14
|
22 from scipy.signal import correlate2d, convolve2d, filtfilt, resample, butter
|
mitian@17
|
23 # import pylab as plt
|
mi@0
|
24 from scipy.spatial.distance import squareform, pdist
|
mitian@14
|
25 from scipy.ndimage.filters import maximum_filter, minimum_filter, percentile_filter, uniform_filter
|
mitian@17
|
26 from scipy.ndimage.filters import median_filter as med_filter # median_filter is a user defined function in this script
|
mitian@4
|
27 from sklearn.metrics.pairwise import pairwise_distances
|
mi@0
|
28
|
mitian@17
|
29 from GmmMetrics import GmmDistance
|
mi@0
|
30
|
mi@0
|
31 def lognormalize_chroma(C):
|
mi@0
|
32 """Log-normalizes chroma such that each vector is between -80 to 0."""
|
mi@0
|
33 C += np.abs(C.min()) + 0.1
|
mi@0
|
34 C = C/C.max(axis=0)
|
mi@0
|
35 C = 80 * np.log10(C) # Normalize from -80 to 0
|
mi@0
|
36 return C
|
mi@0
|
37
|
mi@0
|
38
|
mi@0
|
39 def normalize_matrix(X):
|
mi@0
|
40 """Nomalizes a matrix such that it's maximum value is 1 and minimum is 0."""
|
mi@0
|
41 X += np.abs(X.min())
|
mi@0
|
42 X /= X.max()
|
mi@0
|
43 return X
|
mi@0
|
44
|
mi@0
|
45
|
mi@0
|
46 def ensure_dir(directory):
|
mi@0
|
47 """Makes sure that the given directory exists."""
|
mi@0
|
48 if not os.path.exists(directory):
|
mi@0
|
49 os.makedirs(directory)
|
mi@0
|
50
|
mitian@17
|
51
|
mi@0
|
52 def median_filter(X, M=8):
|
mi@0
|
53 """Median filter along the first axis of the feature matrix X."""
|
mi@0
|
54 for i in xrange(X.shape[1]):
|
mi@0
|
55 X[:, i] = filters.median_filter(X[:, i], size=M)
|
mi@0
|
56 return X
|
mi@0
|
57
|
mi@0
|
58
|
mi@0
|
59 def compute_gaussian_krnl(M):
|
mi@0
|
60 """Creates a gaussian kernel following Foote's paper."""
|
mi@0
|
61 g = signal.gaussian(M, M / 3., sym=True)
|
mi@0
|
62 G = np.dot(g.reshape(-1, 1), g.reshape(1, -1))
|
mi@0
|
63 G[M / 2:, :M / 2] = -G[M / 2:, :M / 2]
|
mi@0
|
64 G[:M / 2, M / 2:] = -G[:M / 2, M / 2:]
|
mi@0
|
65 return G
|
mi@0
|
66
|
mi@0
|
67
|
mi@0
|
68 def compute_ssm(X, metric="seuclidean"):
|
mi@0
|
69 """Computes the self-similarity matrix of X."""
|
mi@0
|
70 D = distance.pdist(X, metric=metric)
|
mi@0
|
71 D = distance.squareform(D)
|
mi@0
|
72 D /= D.max()
|
mi@0
|
73 return 1 - D
|
mi@0
|
74
|
mi@0
|
75
|
mi@0
|
76 def compute_nc(X, G):
|
mi@0
|
77 """Computes the novelty curve from the self-similarity matrix X and
|
mi@0
|
78 the gaussian kernel G."""
|
mi@0
|
79 N = X.shape[0]
|
mi@0
|
80 M = G.shape[0]
|
mi@0
|
81 nc = np.zeros(N)
|
mi@0
|
82
|
mi@0
|
83 for i in xrange(M / 2, N - M / 2 + 1):
|
mi@0
|
84 nc[i] = np.sum(X[i - M / 2:i + M / 2, i - M / 2:i + M / 2] * G)
|
mi@0
|
85
|
mi@0
|
86 # Normalize
|
mi@0
|
87 nc += nc.min()
|
mi@0
|
88 nc /= nc.max()
|
mi@0
|
89 return nc
|
mi@0
|
90
|
mi@0
|
91
|
mi@0
|
92 def resample_mx(X, incolpos, outcolpos):
|
mi@0
|
93 """
|
mi@0
|
94 Method from Librosa
|
mi@0
|
95 Y = resample_mx(X, incolpos, outcolpos)
|
mi@0
|
96 X is taken as a set of columns, each starting at 'time'
|
mi@0
|
97 colpos, and continuing until the start of the next column.
|
mi@0
|
98 Y is a similar matrix, with time boundaries defined by
|
mi@0
|
99 outcolpos. Each column of Y is a duration-weighted average of
|
mi@0
|
100 the overlapping columns of X.
|
mi@0
|
101 2010-04-14 Dan Ellis dpwe@ee.columbia.edu based on samplemx/beatavg
|
mi@0
|
102 -> python: TBM, 2011-11-05, TESTED
|
mi@0
|
103 """
|
mi@0
|
104 noutcols = len(outcolpos)
|
mi@0
|
105 Y = np.zeros((X.shape[0], noutcols))
|
mi@0
|
106 # assign 'end times' to final columns
|
mi@0
|
107 if outcolpos.max() > incolpos.max():
|
mi@0
|
108 incolpos = np.concatenate([incolpos,[outcolpos.max()]])
|
mi@0
|
109 X = np.concatenate([X, X[:,-1].reshape(X.shape[0],1)], axis=1)
|
mi@0
|
110 outcolpos = np.concatenate([outcolpos, [outcolpos[-1]]])
|
mi@0
|
111 # durations (default weights) of input columns)
|
mi@0
|
112 incoldurs = np.concatenate([np.diff(incolpos), [1]])
|
mi@0
|
113
|
mi@0
|
114 for c in range(noutcols):
|
mi@0
|
115 firstincol = np.where(incolpos <= outcolpos[c])[0][-1]
|
mi@0
|
116 firstincolnext = np.where(incolpos < outcolpos[c+1])[0][-1]
|
mi@0
|
117 lastincol = max(firstincol,firstincolnext)
|
mi@0
|
118 # default weights
|
mi@0
|
119 wts = copy.deepcopy(incoldurs[firstincol:lastincol+1])
|
mi@0
|
120 # now fix up by partial overlap at ends
|
mi@0
|
121 if len(wts) > 1:
|
mi@0
|
122 wts[0] = wts[0] - (outcolpos[c] - incolpos[firstincol])
|
mi@0
|
123 wts[-1] = wts[-1] - (incolpos[lastincol+1] - outcolpos[c+1])
|
mi@0
|
124 wts = wts * 1. /sum(wts)
|
mi@0
|
125 Y[:,c] = np.dot(X[:,firstincol:lastincol+1], wts)
|
mi@0
|
126 # done
|
mi@0
|
127 return Y
|
mi@0
|
128
|
mi@0
|
129
|
mi@0
|
130 def chroma_to_tonnetz(C):
|
mi@0
|
131 """Transforms chromagram to Tonnetz (Harte, Sandler, 2006)."""
|
mi@0
|
132 N = C.shape[0]
|
mi@0
|
133 T = np.zeros((N, 6))
|
mi@0
|
134
|
mi@0
|
135 r1 = 1 # Fifths
|
mi@0
|
136 r2 = 1 # Minor
|
mi@0
|
137 r3 = 0.5 # Major
|
mi@0
|
138
|
mi@0
|
139 # Generate Transformation matrix
|
mi@0
|
140 phi = np.zeros((6, 12))
|
mi@0
|
141 for i in range(6):
|
mi@0
|
142 for j in range(12):
|
mi@0
|
143 if i % 2 == 0:
|
mi@0
|
144 fun = np.sin
|
mi@0
|
145 else:
|
mi@0
|
146 fun = np.cos
|
mi@0
|
147
|
mi@0
|
148 if i < 2:
|
mi@0
|
149 phi[i, j] = r1 * fun(j * 7 * np.pi / 6.)
|
mi@0
|
150 elif i >= 2 and i < 4:
|
mi@0
|
151 phi[i, j] = r2 * fun(j * 3 * np.pi / 2.)
|
mi@0
|
152 else:
|
mi@0
|
153 phi[i, j] = r3 * fun(j * 2 * np.pi / 3.)
|
mi@0
|
154
|
mi@0
|
155 # Do the transform to tonnetz
|
mi@0
|
156 for i in range(N):
|
mi@0
|
157 for d in range(6):
|
mi@0
|
158 denom = float(C[i, :].sum())
|
mi@0
|
159 if denom == 0:
|
mi@0
|
160 T[i, d] = 0
|
mi@0
|
161 else:
|
mi@0
|
162 T[i, d] = 1 / denom * (phi[d, :] * C[i, :]).sum()
|
mi@0
|
163
|
mi@0
|
164 return T
|
mi@0
|
165
|
mi@0
|
166
|
mi@0
|
167 def most_frequent(x):
|
mi@0
|
168 """Returns the most frequent value in x."""
|
mi@0
|
169 return np.argmax(np.bincount(x))
|
mi@0
|
170
|
mi@0
|
171
|
mi@0
|
172 def pick_peaks(nc, L=16, plot=False):
|
mi@0
|
173 """Obtain peaks from a novelty curve using an adaptive threshold."""
|
mi@0
|
174 offset = nc.mean() / 3
|
mi@0
|
175 th = filters.median_filter(nc, size=L) + offset
|
mi@0
|
176 peaks = []
|
mi@0
|
177 for i in xrange(1, nc.shape[0] - 1):
|
mi@0
|
178 # is it a peak?
|
mi@0
|
179 if nc[i - 1] < nc[i] and nc[i] > nc[i + 1]:
|
mi@0
|
180 # is it above the threshold?
|
mi@0
|
181 if nc[i] > th[i]:
|
mi@0
|
182 peaks.append(i)
|
mi@0
|
183 if plot:
|
mi@0
|
184 plt.plot(nc)
|
mi@0
|
185 plt.plot(th)
|
mi@0
|
186 for peak in peaks:
|
mi@0
|
187 plt.axvline(peak, color="m")
|
mi@0
|
188 plt.show()
|
mi@0
|
189 return peaks
|
mi@0
|
190
|
mi@0
|
191
|
mi@0
|
192 def recurrence_matrix(data, k=None, width=1, metric='sqeuclidean', sym=False):
|
mi@0
|
193 '''
|
mi@0
|
194 Note: Copied from librosa
|
mi@0
|
195
|
mi@0
|
196 Compute the binary recurrence matrix from a time-series.
|
mi@0
|
197
|
mi@0
|
198 ``rec[i,j] == True`` <=> (``data[:,i]``, ``data[:,j]``) are
|
mi@0
|
199 k-nearest-neighbors and ``|i-j| >= width``
|
mi@0
|
200
|
mi@0
|
201 :usage:
|
mi@0
|
202 >>> mfcc = librosa.feature.mfcc(y=y, sr=sr)
|
mi@0
|
203 >>> R = librosa.segment.recurrence_matrix(mfcc)
|
mi@0
|
204
|
mi@0
|
205 >>> # Or fix the number of nearest neighbors to 5
|
mi@0
|
206 >>> R = librosa.segment.recurrence_matrix(mfcc, k=5)
|
mi@0
|
207
|
mi@0
|
208 >>> # Suppress neighbors within +- 7 samples
|
mi@0
|
209 >>> R = librosa.segment.recurrence_matrix(mfcc, width=7)
|
mi@0
|
210
|
mi@0
|
211 >>> # Use cosine similarity instead of Euclidean distance
|
mi@0
|
212 >>> R = librosa.segment.recurrence_matrix(mfcc, metric='cosine')
|
mi@0
|
213
|
mi@0
|
214 >>> # Require mutual nearest neighbors
|
mi@0
|
215 >>> R = librosa.segment.recurrence_matrix(mfcc, sym=True)
|
mi@0
|
216
|
mi@0
|
217 :parameters:
|
mi@0
|
218 - data : np.ndarray
|
mi@0
|
219 feature matrix (d-by-t)
|
mi@0
|
220
|
mi@0
|
221 - k : int > 0 or None
|
mi@0
|
222 the number of nearest-neighbors for each sample
|
mi@0
|
223
|
mi@0
|
224 Default: ``k = 2 * ceil(sqrt(t - 2 * width + 1))``,
|
mi@0
|
225 or ``k = 2`` if ``t <= 2 * width + 1``
|
mi@0
|
226
|
mi@0
|
227 - width : int > 0
|
mi@0
|
228 only link neighbors ``(data[:, i], data[:, j])``
|
mi@0
|
229 if ``|i-j| >= width``
|
mi@0
|
230
|
mi@0
|
231 - metric : str
|
mi@0
|
232 Distance metric to use for nearest-neighbor calculation.
|
mi@0
|
233
|
mi@0
|
234 See ``scipy.spatial.distance.cdist()`` for details.
|
mi@0
|
235
|
mi@0
|
236 - sym : bool
|
mi@0
|
237 set ``sym=True`` to only link mutual nearest-neighbors
|
mi@0
|
238
|
mi@0
|
239 :returns:
|
mi@0
|
240 - rec : np.ndarray, shape=(t,t), dtype=bool
|
mi@0
|
241 Binary recurrence matrix
|
mi@0
|
242 '''
|
mi@0
|
243
|
mi@0
|
244 t = data.shape[1]
|
mi@0
|
245
|
mi@0
|
246 if k is None:
|
mi@0
|
247 if t > 2 * width + 1:
|
mi@0
|
248 k = 2 * np.ceil(np.sqrt(t - 2 * width + 1))
|
mi@0
|
249 else:
|
mi@0
|
250 k = 2
|
mi@0
|
251
|
mi@0
|
252 k = int(k)
|
mi@0
|
253
|
mi@0
|
254 def _band_infinite():
|
mi@0
|
255 '''Suppress the diagonal+- of a distance matrix'''
|
mi@0
|
256
|
mi@0
|
257 band = np.empty((t, t))
|
mi@0
|
258 band.fill(np.inf)
|
mi@0
|
259 band[np.triu_indices_from(band, width)] = 0
|
mi@0
|
260 band[np.tril_indices_from(band, -width)] = 0
|
mi@0
|
261
|
mi@0
|
262 return band
|
mi@0
|
263
|
mi@0
|
264 # Build the distance matrix
|
mi@0
|
265 D = scipy.spatial.distance.cdist(data.T, data.T, metric=metric)
|
mi@0
|
266
|
mi@0
|
267 # Max out the diagonal band
|
mi@0
|
268 D = D + _band_infinite()
|
mi@0
|
269
|
mi@0
|
270 # build the recurrence plot
|
mi@0
|
271 rec = np.zeros((t, t), dtype=bool)
|
mi@0
|
272
|
mi@0
|
273 # get the k nearest neighbors for each point
|
mi@0
|
274 for i in range(t):
|
mi@0
|
275 for j in np.argsort(D[i])[:k]:
|
mi@0
|
276 rec[i, j] = True
|
mi@0
|
277
|
mi@0
|
278 # symmetrize
|
mi@0
|
279 if sym:
|
mi@0
|
280 rec = rec * rec.T
|
mi@0
|
281
|
mi@0
|
282 return rec
|
mi@0
|
283
|
mitian@14
|
284 def lp(signal, fc=0.34, axis=-1):
|
mitian@14
|
285 '''Low pass filter function
|
mitian@14
|
286 signal: Raw signal to be smoothed.
|
mitian@14
|
287 fc: Cutoff frequency of the butterworth filter. Normalized from 0 to 1, where 1 is the Nyquist frequency.
|
mitian@14
|
288 axis: The axis of x to which the filter is applied. Default is -1.'''
|
mitian@14
|
289 bCoeffs, aCoeffs = butter(2, fc)
|
mitian@14
|
290 lp_smoothed_signal = filtfilt(bCoeffs, aCoeffs, signal, axis)
|
mitian@14
|
291 return lp_smoothed_signal
|
mitian@14
|
292
|
mitian@17
|
293
|
mitian@14
|
294 def hp(signal, fc=0.34, axis=-1):
|
mitian@14
|
295 '''Low pass filter function
|
mitian@14
|
296 signal: Raw signal to be smoothed.
|
mitian@14
|
297 fc: Cutoff frequency of the butterworth filter.
|
mitian@14
|
298 axis: The axis of x to which the filter is applied. Default is -1.'''
|
mitian@14
|
299 bCoeffs, aCoeffs = butter(2, fc, 'highpass')
|
mitian@14
|
300 hp_smoothed_signal = filtfilt(bCoeffs, aCoeffs, signal, axis)
|
mitian@14
|
301 return hp_smoothed_signal
|
mitian@17
|
302
|
mitian@17
|
303
|
mi@0
|
304 def getMean(feature, winlen, stepsize):
|
mi@0
|
305 means = []
|
mi@0
|
306 steps = int((feature.shape[0] - winlen + stepsize) / stepsize)
|
mi@0
|
307 for i in xrange(steps):
|
mi@0
|
308 means.append(np.mean(feature[i*stepsize:(i*stepsize+winlen), :], axis=0))
|
mi@0
|
309 return np.array(means)
|
mi@0
|
310
|
mi@0
|
311
|
mi@0
|
312 def getStd(feature, winlen, stepsize):
|
mi@0
|
313 std = []
|
mi@0
|
314 steps = int((feature.shape[0] - winlen + stepsize) / stepsize)
|
mi@0
|
315 for i in xrange(steps):
|
mi@0
|
316 std.append(np.std(feature[i*stepsize:(i*stepsize+winlen), :], axis=0))
|
mi@0
|
317 return np.array(std)
|
mi@0
|
318
|
mi@0
|
319
|
mi@0
|
320 def getDelta(feature):
|
mi@0
|
321 delta_feature = np.vstack((np.zeros((1, feature.shape[1])), np.diff(feature, axis=0)))
|
mi@0
|
322 return delta_feature
|
mi@0
|
323
|
mi@0
|
324
|
mitian@14
|
325 def getSSM(feature_array, metric='cosine', norm='exp', reduce=False):
|
mi@0
|
326 '''Compute SSM given input feature array.
|
mi@0
|
327 args: norm: ['simple', 'remove_noise']
|
mi@0
|
328 '''
|
mi@0
|
329 dm = pairwise_distances(feature_array, metric=metric)
|
mi@0
|
330 dm = np.nan_to_num(dm)
|
mi@0
|
331 if norm == 'simple':
|
mi@0
|
332 ssm = 1 - (dm - np.min(dm)) / (np.max(dm) - np.min(dm))
|
mitian@14
|
333 if norm == 'exp': # Use with cosine metric only
|
mitian@17
|
334 ssm = 1 - np.exp(dm - 1)
|
mi@0
|
335 if reduce:
|
mi@0
|
336 ssm = reduceSSM(ssm)
|
mi@0
|
337 return ssm
|
mi@0
|
338
|
mitian@17
|
339
|
mitian@14
|
340 def enhanceSSM(ssm, fc=0.34, med_size=(5,5), max_size=(5,5), min_size=(5,5), filter_type='min', axis=-1):
|
mitian@14
|
341 '''A series of filtering for SSM enhancement
|
mitian@14
|
342 fc: cutoff frequency for LP filtering.
|
mitian@14
|
343 med_size: Median filter window size.
|
mitian@14
|
344 int or tuple. If using an integer for a 2d input, axis must be specified.
|
mitian@17
|
345 filter_type: Select either to use maximum filter or minimum filter according to the distance metric with which the SSM was computed.
|
mitian@14
|
346 float ['min', 'max', None]
|
mitian@14
|
347 max_size: Maximum filter window size.
|
mitian@14
|
348 int or tuple. If using an integer for a 2d input, axis must be specified.
|
mitian@14
|
349 Use this when homogeneity in the SSM is expressed by LARGE value.
|
mitian@14
|
350 min_size: Mininum filter window size.
|
mitian@14
|
351 int or tuple. If using an integer for a 2d input, axis must be specified.
|
mitian@14
|
352 Use this when homogeneity in the SSM is expressed by SMALL value.
|
mitian@14
|
353 (eg. When cosine metric and exp normalization and used for distance computation.)'''
|
mi@0
|
354
|
mitian@16
|
355 ssm_lp = lp(ssm, fc=fc)
|
mitian@14
|
356
|
mitian@14
|
357 # Use scipy.ndimage.filters.median_filter instead
|
mitian@14
|
358 ssm_med = med_filter(ssm_lp, size=med_size)
|
mitian@14
|
359
|
mitian@14
|
360 if filter_type == 'min':
|
mitian@14
|
361 enhanced_ssm = minimum_filter(ssm_med, size=min_size)
|
mitian@14
|
362 elif filter_type == 'max':
|
mitian@14
|
363 enhanced_ssm = maximum_filter(ssm_med, size=max_size)
|
mitian@14
|
364 else:
|
mitian@14
|
365 enhanced_ssm = ssm_med
|
mitian@14
|
366 return enhanced_ssm
|
mitian@17
|
367
|
mitian@17
|
368
|
mi@0
|
369 def reduceSSM(ssm, maxfilter_size = 2, remove_size=50):
|
mitian@14
|
370 '''Adaptive thresholding using OTSU method
|
mitian@17
|
371 Required package: skimage (0.10+) -- NOT installed on ignis, do NOT call this function!'''
|
mitian@14
|
372
|
mitian@14
|
373 from skimage.morphology import disk
|
mitian@14
|
374 # from skimage.filters import threshold_otsu, rank #skimage 0.12
|
mitian@14
|
375 from skimage.filter.rank import otsu #skimage 0.10
|
mitian@14
|
376 from skimage.filter import threshold_otsu
|
mitian@17
|
377
|
mitian@14
|
378 reduced_ssm = copy(ssm)
|
mi@0
|
379 reduced_ssm[reduced_ssm<0.75] = 0
|
mi@0
|
380 # # reduced_ssm = maximum_filter(reduced_ssm,size=maxfilter_size)
|
mi@0
|
381 # # reduced_ssm = morphology.remove_small_objects(reduced_ssm.astype(bool), min_size=remove_size)
|
mi@0
|
382 local_otsu = otsu(reduced_ssm, disk(5))
|
mi@0
|
383 local_otsu = (local_otsu.astype(float) - np.min(local_otsu)) / (np.max(local_otsu) - np.min(local_otsu))
|
mi@0
|
384 reduced_ssm = reduced_ssm - 0.6*local_otsu
|
mi@0
|
385 return reduced_ssm
|
mi@0
|
386
|
mi@0
|
387
|
mi@0
|
388 def upSample(feature_array, step):
|
mi@0
|
389 '''Resample downsized tempogram features, tempoWindo should be in accordance with input features'''
|
mi@0
|
390 # print feature_array.shape
|
mi@0
|
391 sampleRate = 44100
|
mi@0
|
392 stepSize = 1024.0
|
mi@0
|
393 # step = np.ceil(sampleRate/stepSize/5.0)
|
mi@0
|
394 feature_array = zoom(feature_array, (step,1))
|
mi@0
|
395 # print 'resampled', feature_array.shape
|
mi@0
|
396 return feature_array
|
mi@0
|
397
|
mi@0
|
398
|
mi@0
|
399 def normaliseFeature(feature_array):
|
mitian@13
|
400 '''Normalise features column wisely. Ensure numerical stability by adding a small constant.'''
|
mi@0
|
401 feature_array[np.isnan(feature_array)] = 0.0
|
mi@0
|
402 feature_array[np.isinf(feature_array)] = 0.0
|
mitian@13
|
403 feature_array = (feature_array - np.min(feature_array, axis=-1)[:,np.newaxis]) / (np.max(feature_array, axis=-1) - np.min(feature_array, axis=-1) + 0.005)[:,np.newaxis]
|
mi@0
|
404 feature_array[np.isnan(feature_array)] = 0.0
|
mi@0
|
405 feature_array[np.isinf(feature_array)] = 0.0
|
mi@0
|
406
|
mi@0
|
407 return feature_array
|
mi@0
|
408
|
mitian@17
|
409 def normaliseArray(X):
|
mitian@17
|
410 '''Normalise 1d array.'''
|
mitian@17
|
411 if np.max(X) == np.min(X):
|
mitian@17
|
412 return None
|
mitian@17
|
413 return (X - np.min(X)) / (np.max(X) - np.min(X))
|
mitian@17
|
414
|
mitian@17
|
415 def pairwiseSKL(self, gmm_list):
|
mitian@17
|
416 '''Compute pairwise symmetrised KL divergence of a list of GMMs.'''
|
mitian@17
|
417 n_GMMs = len(gmm_list)
|
mitian@17
|
418 distance_matrix = np.zeros((n_GMMs, n_GMMs))
|
mitian@17
|
419 for i in xrange(n_GMMs):
|
mitian@17
|
420 for j in xrange(i, n_GMMs):
|
mitian@17
|
421 distance_matrix[i][j] = gmm_list[i].skl_distance_full(gmm_list[j])
|
mitian@17
|
422 distance_matrix[j][i] = distance_matrix[i][j]
|
mitian@17
|
423
|
mitian@17
|
424 np.fill_diagonal(distance_matrix, 0.0)
|
mitian@17
|
425 distance_matrix[np.isinf(distance_matrix)] = np.finfo(np.float64).max
|
mitian@17
|
426
|
mitian@17
|
427 return distance_matrix
|
mi@0
|
428
|
mitian@12
|
429 def getRolloff(data, tpower, filterbank, thresh=0.9):
|
mitian@12
|
430 nFrames = data.shape[0]
|
mitian@12
|
431 nFilters = len(filterbank)
|
mitian@12
|
432 rolloff = np.zeros(nFrames)
|
mitian@12
|
433 for i in xrange(nFrames):
|
mitian@12
|
434 rolloffE = thresh * tpower[i]
|
mitian@12
|
435 temp = 0.0
|
mitian@12
|
436 tempE = 0.0
|
mitian@12
|
437 for band in xrange(nFilters):
|
mitian@12
|
438 temp += data[i][band]
|
mitian@12
|
439 if temp > rolloffE: break
|
mitian@12
|
440 rolloff[i] = filterbank[nFilters-band-1]
|
mitian@12
|
441
|
mitian@12
|
442 return rolloff
|
mitian@12
|
443
|
mitian@12
|
444
|
mi@0
|
445 def verifyPeaks(peak_canditates, dev_list):
|
mi@0
|
446 '''Verify peaks from the 1st round detection by applying adaptive thresholding to the deviation list.'''
|
mi@0
|
447
|
mi@0
|
448 final_peaks = copy(peak_canditates)
|
mi@0
|
449 dev_list = np.array([np.mean(x) for x in dev_list]) # get average of devs of different features
|
mi@0
|
450 med_dev = median_filter(dev_list, size=5)
|
mi@0
|
451 # print dev_list, np.min(dev_list), np.median(dev_list), np.mean(dev_list), np.std(dev_list)
|
mi@0
|
452 dev = dev_list - np.percentile(dev_list, 50)
|
mi@0
|
453 # print dev
|
mi@0
|
454 for i, x in enumerate(dev):
|
mi@0
|
455 if x < 0:
|
mi@0
|
456 final_peaks.remove(peak_canditates[i])
|
mi@0
|
457 return final_peaks
|
mi@0
|
458
|
mi@0
|
459
|
mi@0
|
460 def envelopeFollower(xc, AT, RT, prevG, scaler=1):
|
mi@0
|
461 '''Follows the amplitude envelope of input signal xc.'''
|
mi@0
|
462
|
mi@0
|
463 g = np.zeros_like(xc)
|
mi@0
|
464 length = len(xc)
|
mi@0
|
465
|
mi@0
|
466 for i in xrange(length):
|
mi@0
|
467 xSquared = xc[i] ** 2
|
mi@0
|
468 # if input is less than the previous output use attack, otherwise use the release
|
mi@0
|
469 if xSquared < prevG:
|
mi@0
|
470 coeff = AT
|
mi@0
|
471 else:
|
mi@0
|
472 coeff = RT
|
mi@0
|
473 g[i] = (xSquared - prevG)*coeff + prevG
|
mi@0
|
474 g[i] *= scaler
|
mi@0
|
475 prevG = g[i]
|
mi@0
|
476
|
mi@0
|
477 return g
|
mi@0
|
478
|
mi@0
|
479
|
mi@0
|
480 def getEnvPeaks(sig, sig_env, size=1):
|
mi@0
|
481 '''Finds peaks in the signal envelope.
|
mi@0
|
482 args: sig (1d array): orignal input signal
|
mi@0
|
483 sig_env (list): position of the signal envelope.
|
mi@0
|
484 size: ranges to locate local maxima in the envelope as peaks.
|
mi@0
|
485 '''
|
mi@0
|
486 envelope = sig[sig_env]
|
mi@0
|
487 peaks = []
|
mi@0
|
488 if len(envelope) > 1 and envelope[0] > envelope[1]:
|
mi@0
|
489 peaks.append(sig_env[0])
|
mi@0
|
490 for i in xrange(size, len(envelope)-size-1):
|
mi@0
|
491 if envelope[i] > np.max(envelope[i-size:i]) and envelope[i] > np.max(envelope[i+1:i+size+1]):
|
mi@0
|
492 peaks.append(sig_env[i])
|
mitian@13
|
493 return peaks
|
mitian@13
|
494
|
mitian@13
|
495
|
mitian@13
|
496 def deltaFeature(self, feature_array, step=1, axis=-1):
|
mitian@13
|
497 '''Return delta of a feature array'''
|
mitian@13
|
498 delta = np.zeros_like(feature_array)
|
mitian@13
|
499 delta[:, step:] = np.diff(feature_array, axis=axis)
|
mitian@13
|
500 return delta
|
mitian@13
|
501
|
mitian@13
|
502
|
mitian@13
|
503 def plotCurve(self, yp, yr, yf, x, labels):
|
mitian@13
|
504 '''Plot performance curve.
|
mitian@13
|
505 x axis: distance threshold for feature selection; y axis: f measure'''
|
mitian@13
|
506
|
mitian@13
|
507 f = plt.figure()
|
mitian@13
|
508 ax = f.add_axes([0.1, 0.1, 0.7, 0.7])
|
mitian@13
|
509 l1, l2, l3 = ax.plot(x, yp, 'rs-', x, yr, 'go-', x, yf, 'k^-')
|
mitian@13
|
510 f.legend((l1, l2, l3), ('Precision', 'Recall', 'F-measure'), 'upper left')
|
mitian@13
|
511 for i, label in enumerate(labels):
|
mitian@13
|
512 ax.annotate(label, (x[i], yf[i]))
|
mitian@13
|
513 plt.show()
|
mitian@13
|
514 plt.savefig('performance.pdf', format='pdf')
|
mitian@13
|
515
|
mitian@13
|
516 return None
|
mitian@13
|
517
|