annotate utils/SegUtil.py @ 12:c23658e8ae38

fp feature notebook
author mitian
date Mon, 25 May 2015 17:27:48 +0100
parents 56a2ca9359d0
children cc8ceb270e79
rev   line source
mi@0 1 """
mi@0 2 Useful functions that are quite common for music segmentation
mi@0 3 """
mi@0 4 '''
mi@0 5 Modified and more funcs added.
mi@0 6 Mi Tian, April 2015.
mi@0 7 '''
mi@0 8
mi@0 9 __author__ = "Oriol Nieto"
mi@0 10 __copyright__ = "Copyright 2014, Music and Audio Research Lab (MARL)"
mi@0 11 __license__ = "GPL"
mi@0 12 __version__ = "1.0"
mi@0 13 __email__ = "oriol@nyu.edu"
mi@0 14
mi@0 15 import copy
mi@0 16 import numpy as np
mitian@1 17 import os, sys
mi@0 18 import scipy
mi@0 19 from scipy.spatial import distance
mi@0 20 from scipy.ndimage import filters, zoom
mi@0 21 from scipy import signal
mi@0 22 import pylab as plt
mi@0 23 from scipy.spatial.distance import squareform, pdist
mitian@1 24 from scipy.ndimage.filters import *
mitian@4 25 from sklearn.metrics.pairwise import pairwise_distances
mi@0 26
mi@0 27
mi@0 28 def lognormalize_chroma(C):
mi@0 29 """Log-normalizes chroma such that each vector is between -80 to 0."""
mi@0 30 C += np.abs(C.min()) + 0.1
mi@0 31 C = C/C.max(axis=0)
mi@0 32 C = 80 * np.log10(C) # Normalize from -80 to 0
mi@0 33 return C
mi@0 34
mi@0 35
mi@0 36 def normalize_matrix(X):
mi@0 37 """Nomalizes a matrix such that it's maximum value is 1 and minimum is 0."""
mi@0 38 X += np.abs(X.min())
mi@0 39 X /= X.max()
mi@0 40 return X
mi@0 41
mi@0 42
mi@0 43 def ensure_dir(directory):
mi@0 44 """Makes sure that the given directory exists."""
mi@0 45 if not os.path.exists(directory):
mi@0 46 os.makedirs(directory)
mi@0 47
mi@0 48
mi@0 49 def median_filter(X, M=8):
mi@0 50 """Median filter along the first axis of the feature matrix X."""
mi@0 51 for i in xrange(X.shape[1]):
mi@0 52 X[:, i] = filters.median_filter(X[:, i], size=M)
mi@0 53 return X
mi@0 54
mi@0 55
mi@0 56 def compute_gaussian_krnl(M):
mi@0 57 """Creates a gaussian kernel following Foote's paper."""
mi@0 58 g = signal.gaussian(M, M / 3., sym=True)
mi@0 59 G = np.dot(g.reshape(-1, 1), g.reshape(1, -1))
mi@0 60 G[M / 2:, :M / 2] = -G[M / 2:, :M / 2]
mi@0 61 G[:M / 2, M / 2:] = -G[:M / 2, M / 2:]
mi@0 62 return G
mi@0 63
mi@0 64
mi@0 65 def compute_ssm(X, metric="seuclidean"):
mi@0 66 """Computes the self-similarity matrix of X."""
mi@0 67 D = distance.pdist(X, metric=metric)
mi@0 68 D = distance.squareform(D)
mi@0 69 D /= D.max()
mi@0 70 return 1 - D
mi@0 71
mi@0 72
mi@0 73 def compute_nc(X, G):
mi@0 74 """Computes the novelty curve from the self-similarity matrix X and
mi@0 75 the gaussian kernel G."""
mi@0 76 N = X.shape[0]
mi@0 77 M = G.shape[0]
mi@0 78 nc = np.zeros(N)
mi@0 79
mi@0 80 for i in xrange(M / 2, N - M / 2 + 1):
mi@0 81 nc[i] = np.sum(X[i - M / 2:i + M / 2, i - M / 2:i + M / 2] * G)
mi@0 82
mi@0 83 # Normalize
mi@0 84 nc += nc.min()
mi@0 85 nc /= nc.max()
mi@0 86 return nc
mi@0 87
mi@0 88
mi@0 89 def resample_mx(X, incolpos, outcolpos):
mi@0 90 """
mi@0 91 Method from Librosa
mi@0 92 Y = resample_mx(X, incolpos, outcolpos)
mi@0 93 X is taken as a set of columns, each starting at 'time'
mi@0 94 colpos, and continuing until the start of the next column.
mi@0 95 Y is a similar matrix, with time boundaries defined by
mi@0 96 outcolpos. Each column of Y is a duration-weighted average of
mi@0 97 the overlapping columns of X.
mi@0 98 2010-04-14 Dan Ellis dpwe@ee.columbia.edu based on samplemx/beatavg
mi@0 99 -> python: TBM, 2011-11-05, TESTED
mi@0 100 """
mi@0 101 noutcols = len(outcolpos)
mi@0 102 Y = np.zeros((X.shape[0], noutcols))
mi@0 103 # assign 'end times' to final columns
mi@0 104 if outcolpos.max() > incolpos.max():
mi@0 105 incolpos = np.concatenate([incolpos,[outcolpos.max()]])
mi@0 106 X = np.concatenate([X, X[:,-1].reshape(X.shape[0],1)], axis=1)
mi@0 107 outcolpos = np.concatenate([outcolpos, [outcolpos[-1]]])
mi@0 108 # durations (default weights) of input columns)
mi@0 109 incoldurs = np.concatenate([np.diff(incolpos), [1]])
mi@0 110
mi@0 111 for c in range(noutcols):
mi@0 112 firstincol = np.where(incolpos <= outcolpos[c])[0][-1]
mi@0 113 firstincolnext = np.where(incolpos < outcolpos[c+1])[0][-1]
mi@0 114 lastincol = max(firstincol,firstincolnext)
mi@0 115 # default weights
mi@0 116 wts = copy.deepcopy(incoldurs[firstincol:lastincol+1])
mi@0 117 # now fix up by partial overlap at ends
mi@0 118 if len(wts) > 1:
mi@0 119 wts[0] = wts[0] - (outcolpos[c] - incolpos[firstincol])
mi@0 120 wts[-1] = wts[-1] - (incolpos[lastincol+1] - outcolpos[c+1])
mi@0 121 wts = wts * 1. /sum(wts)
mi@0 122 Y[:,c] = np.dot(X[:,firstincol:lastincol+1], wts)
mi@0 123 # done
mi@0 124 return Y
mi@0 125
mi@0 126
mi@0 127 def chroma_to_tonnetz(C):
mi@0 128 """Transforms chromagram to Tonnetz (Harte, Sandler, 2006)."""
mi@0 129 N = C.shape[0]
mi@0 130 T = np.zeros((N, 6))
mi@0 131
mi@0 132 r1 = 1 # Fifths
mi@0 133 r2 = 1 # Minor
mi@0 134 r3 = 0.5 # Major
mi@0 135
mi@0 136 # Generate Transformation matrix
mi@0 137 phi = np.zeros((6, 12))
mi@0 138 for i in range(6):
mi@0 139 for j in range(12):
mi@0 140 if i % 2 == 0:
mi@0 141 fun = np.sin
mi@0 142 else:
mi@0 143 fun = np.cos
mi@0 144
mi@0 145 if i < 2:
mi@0 146 phi[i, j] = r1 * fun(j * 7 * np.pi / 6.)
mi@0 147 elif i >= 2 and i < 4:
mi@0 148 phi[i, j] = r2 * fun(j * 3 * np.pi / 2.)
mi@0 149 else:
mi@0 150 phi[i, j] = r3 * fun(j * 2 * np.pi / 3.)
mi@0 151
mi@0 152 # Do the transform to tonnetz
mi@0 153 for i in range(N):
mi@0 154 for d in range(6):
mi@0 155 denom = float(C[i, :].sum())
mi@0 156 if denom == 0:
mi@0 157 T[i, d] = 0
mi@0 158 else:
mi@0 159 T[i, d] = 1 / denom * (phi[d, :] * C[i, :]).sum()
mi@0 160
mi@0 161 return T
mi@0 162
mi@0 163
mi@0 164 def most_frequent(x):
mi@0 165 """Returns the most frequent value in x."""
mi@0 166 return np.argmax(np.bincount(x))
mi@0 167
mi@0 168
mi@0 169 def pick_peaks(nc, L=16, plot=False):
mi@0 170 """Obtain peaks from a novelty curve using an adaptive threshold."""
mi@0 171 offset = nc.mean() / 3
mi@0 172 th = filters.median_filter(nc, size=L) + offset
mi@0 173 peaks = []
mi@0 174 for i in xrange(1, nc.shape[0] - 1):
mi@0 175 # is it a peak?
mi@0 176 if nc[i - 1] < nc[i] and nc[i] > nc[i + 1]:
mi@0 177 # is it above the threshold?
mi@0 178 if nc[i] > th[i]:
mi@0 179 peaks.append(i)
mi@0 180 if plot:
mi@0 181 plt.plot(nc)
mi@0 182 plt.plot(th)
mi@0 183 for peak in peaks:
mi@0 184 plt.axvline(peak, color="m")
mi@0 185 plt.show()
mi@0 186 return peaks
mi@0 187
mi@0 188
mi@0 189 def recurrence_matrix(data, k=None, width=1, metric='sqeuclidean', sym=False):
mi@0 190 '''
mi@0 191 Note: Copied from librosa
mi@0 192
mi@0 193 Compute the binary recurrence matrix from a time-series.
mi@0 194
mi@0 195 ``rec[i,j] == True`` <=> (``data[:,i]``, ``data[:,j]``) are
mi@0 196 k-nearest-neighbors and ``|i-j| >= width``
mi@0 197
mi@0 198 :usage:
mi@0 199 >>> mfcc = librosa.feature.mfcc(y=y, sr=sr)
mi@0 200 >>> R = librosa.segment.recurrence_matrix(mfcc)
mi@0 201
mi@0 202 >>> # Or fix the number of nearest neighbors to 5
mi@0 203 >>> R = librosa.segment.recurrence_matrix(mfcc, k=5)
mi@0 204
mi@0 205 >>> # Suppress neighbors within +- 7 samples
mi@0 206 >>> R = librosa.segment.recurrence_matrix(mfcc, width=7)
mi@0 207
mi@0 208 >>> # Use cosine similarity instead of Euclidean distance
mi@0 209 >>> R = librosa.segment.recurrence_matrix(mfcc, metric='cosine')
mi@0 210
mi@0 211 >>> # Require mutual nearest neighbors
mi@0 212 >>> R = librosa.segment.recurrence_matrix(mfcc, sym=True)
mi@0 213
mi@0 214 :parameters:
mi@0 215 - data : np.ndarray
mi@0 216 feature matrix (d-by-t)
mi@0 217
mi@0 218 - k : int > 0 or None
mi@0 219 the number of nearest-neighbors for each sample
mi@0 220
mi@0 221 Default: ``k = 2 * ceil(sqrt(t - 2 * width + 1))``,
mi@0 222 or ``k = 2`` if ``t <= 2 * width + 1``
mi@0 223
mi@0 224 - width : int > 0
mi@0 225 only link neighbors ``(data[:, i], data[:, j])``
mi@0 226 if ``|i-j| >= width``
mi@0 227
mi@0 228 - metric : str
mi@0 229 Distance metric to use for nearest-neighbor calculation.
mi@0 230
mi@0 231 See ``scipy.spatial.distance.cdist()`` for details.
mi@0 232
mi@0 233 - sym : bool
mi@0 234 set ``sym=True`` to only link mutual nearest-neighbors
mi@0 235
mi@0 236 :returns:
mi@0 237 - rec : np.ndarray, shape=(t,t), dtype=bool
mi@0 238 Binary recurrence matrix
mi@0 239 '''
mi@0 240
mi@0 241 t = data.shape[1]
mi@0 242
mi@0 243 if k is None:
mi@0 244 if t > 2 * width + 1:
mi@0 245 k = 2 * np.ceil(np.sqrt(t - 2 * width + 1))
mi@0 246 else:
mi@0 247 k = 2
mi@0 248
mi@0 249 k = int(k)
mi@0 250
mi@0 251 def _band_infinite():
mi@0 252 '''Suppress the diagonal+- of a distance matrix'''
mi@0 253
mi@0 254 band = np.empty((t, t))
mi@0 255 band.fill(np.inf)
mi@0 256 band[np.triu_indices_from(band, width)] = 0
mi@0 257 band[np.tril_indices_from(band, -width)] = 0
mi@0 258
mi@0 259 return band
mi@0 260
mi@0 261 # Build the distance matrix
mi@0 262 D = scipy.spatial.distance.cdist(data.T, data.T, metric=metric)
mi@0 263
mi@0 264 # Max out the diagonal band
mi@0 265 D = D + _band_infinite()
mi@0 266
mi@0 267 # build the recurrence plot
mi@0 268 rec = np.zeros((t, t), dtype=bool)
mi@0 269
mi@0 270 # get the k nearest neighbors for each point
mi@0 271 for i in range(t):
mi@0 272 for j in np.argsort(D[i])[:k]:
mi@0 273 rec[i, j] = True
mi@0 274
mi@0 275 # symmetrize
mi@0 276 if sym:
mi@0 277 rec = rec * rec.T
mi@0 278
mi@0 279 return rec
mi@0 280
mi@0 281
mi@0 282 def getMean(feature, winlen, stepsize):
mi@0 283 means = []
mi@0 284 steps = int((feature.shape[0] - winlen + stepsize) / stepsize)
mi@0 285 for i in xrange(steps):
mi@0 286 means.append(np.mean(feature[i*stepsize:(i*stepsize+winlen), :], axis=0))
mi@0 287 return np.array(means)
mi@0 288
mi@0 289
mi@0 290 def getStd(feature, winlen, stepsize):
mi@0 291 std = []
mi@0 292 steps = int((feature.shape[0] - winlen + stepsize) / stepsize)
mi@0 293 for i in xrange(steps):
mi@0 294 std.append(np.std(feature[i*stepsize:(i*stepsize+winlen), :], axis=0))
mi@0 295 return np.array(std)
mi@0 296
mi@0 297
mi@0 298 def getDelta(feature):
mi@0 299 delta_feature = np.vstack((np.zeros((1, feature.shape[1])), np.diff(feature, axis=0)))
mi@0 300 return delta_feature
mi@0 301
mi@0 302
mi@0 303 def getSSM(feature_array, metric='cosine', norm='simple', reduce=False):
mi@0 304 '''Compute SSM given input feature array.
mi@0 305 args: norm: ['simple', 'remove_noise']
mi@0 306 '''
mi@0 307 dm = pairwise_distances(feature_array, metric=metric)
mi@0 308 dm = np.nan_to_num(dm)
mi@0 309 if norm == 'simple':
mi@0 310 ssm = 1 - (dm - np.min(dm)) / (np.max(dm) - np.min(dm))
mi@0 311 if reduce:
mi@0 312 ssm = reduceSSM(ssm)
mi@0 313 return ssm
mi@0 314
mi@0 315
mi@0 316 def reduceSSM(ssm, maxfilter_size = 2, remove_size=50):
mi@0 317 reduced_ssm = ssm
mi@0 318 reduced_ssm[reduced_ssm<0.75] = 0
mi@0 319 # # reduced_ssm = maximum_filter(reduced_ssm,size=maxfilter_size)
mi@0 320 # # reduced_ssm = morphology.remove_small_objects(reduced_ssm.astype(bool), min_size=remove_size)
mi@0 321 local_otsu = otsu(reduced_ssm, disk(5))
mi@0 322 local_otsu = (local_otsu.astype(float) - np.min(local_otsu)) / (np.max(local_otsu) - np.min(local_otsu))
mi@0 323 reduced_ssm = reduced_ssm - 0.6*local_otsu
mi@0 324 return reduced_ssm
mi@0 325
mi@0 326
mi@0 327 def upSample(feature_array, step):
mi@0 328 '''Resample downsized tempogram features, tempoWindo should be in accordance with input features'''
mi@0 329 # print feature_array.shape
mi@0 330 sampleRate = 44100
mi@0 331 stepSize = 1024.0
mi@0 332 # step = np.ceil(sampleRate/stepSize/5.0)
mi@0 333 feature_array = zoom(feature_array, (step,1))
mi@0 334 # print 'resampled', feature_array.shape
mi@0 335 return feature_array
mi@0 336
mi@0 337
mi@0 338 def normaliseFeature(feature_array):
mi@0 339 '''Normalise features column wisely.'''
mi@0 340 feature_array[np.isnan(feature_array)] = 0.0
mi@0 341 feature_array[np.isinf(feature_array)] = 0.0
mi@0 342 feature_array = (feature_array - np.min(feature_array, axis=-1)[:,np.newaxis]) / (np.max(feature_array, axis=-1) - np.min(feature_array, axis=-1))[:,np.newaxis]
mi@0 343 feature_array[np.isnan(feature_array)] = 0.0
mi@0 344 feature_array[np.isinf(feature_array)] = 0.0
mi@0 345
mi@0 346 return feature_array
mi@0 347
mi@0 348
mitian@12 349 def getRolloff(data, tpower, filterbank, thresh=0.9):
mitian@12 350 nFrames = data.shape[0]
mitian@12 351 nFilters = len(filterbank)
mitian@12 352 rolloff = np.zeros(nFrames)
mitian@12 353 for i in xrange(nFrames):
mitian@12 354 rolloffE = thresh * tpower[i]
mitian@12 355 temp = 0.0
mitian@12 356 tempE = 0.0
mitian@12 357 for band in xrange(nFilters):
mitian@12 358 temp += data[i][band]
mitian@12 359 if temp > rolloffE: break
mitian@12 360 rolloff[i] = filterbank[nFilters-band-1]
mitian@12 361
mitian@12 362 return rolloff
mitian@12 363
mitian@12 364
mi@0 365 def verifyPeaks(peak_canditates, dev_list):
mi@0 366 '''Verify peaks from the 1st round detection by applying adaptive thresholding to the deviation list.'''
mi@0 367
mi@0 368 final_peaks = copy(peak_canditates)
mi@0 369 dev_list = np.array([np.mean(x) for x in dev_list]) # get average of devs of different features
mi@0 370 med_dev = median_filter(dev_list, size=5)
mi@0 371 # print dev_list, np.min(dev_list), np.median(dev_list), np.mean(dev_list), np.std(dev_list)
mi@0 372 dev = dev_list - np.percentile(dev_list, 50)
mi@0 373 # print dev
mi@0 374 for i, x in enumerate(dev):
mi@0 375 if x < 0:
mi@0 376 final_peaks.remove(peak_canditates[i])
mi@0 377 return final_peaks
mi@0 378
mi@0 379
mi@0 380 def envelopeFollower(xc, AT, RT, prevG, scaler=1):
mi@0 381 '''Follows the amplitude envelope of input signal xc.'''
mi@0 382
mi@0 383 g = np.zeros_like(xc)
mi@0 384 length = len(xc)
mi@0 385
mi@0 386 for i in xrange(length):
mi@0 387 xSquared = xc[i] ** 2
mi@0 388 # if input is less than the previous output use attack, otherwise use the release
mi@0 389 if xSquared < prevG:
mi@0 390 coeff = AT
mi@0 391 else:
mi@0 392 coeff = RT
mi@0 393 g[i] = (xSquared - prevG)*coeff + prevG
mi@0 394 g[i] *= scaler
mi@0 395 prevG = g[i]
mi@0 396
mi@0 397 return g
mi@0 398
mi@0 399
mi@0 400 def getEnvPeaks(sig, sig_env, size=1):
mi@0 401 '''Finds peaks in the signal envelope.
mi@0 402 args: sig (1d array): orignal input signal
mi@0 403 sig_env (list): position of the signal envelope.
mi@0 404 size: ranges to locate local maxima in the envelope as peaks.
mi@0 405 '''
mi@0 406 envelope = sig[sig_env]
mi@0 407 peaks = []
mi@0 408 if len(envelope) > 1 and envelope[0] > envelope[1]:
mi@0 409 peaks.append(sig_env[0])
mi@0 410 for i in xrange(size, len(envelope)-size-1):
mi@0 411 if envelope[i] > np.max(envelope[i-size:i]) and envelope[i] > np.max(envelope[i+1:i+size+1]):
mi@0 412 peaks.append(sig_env[i])
mi@0 413 return peaks