annotate utils/plotSSM.py @ 19:890cfe424f4a tip

added annotations
author mitian
date Fri, 11 Dec 2015 09:47:40 +0000
parents 8b814fe5781d
children
rev   line source
mi@0 1 #!/usr/bin/env python
mi@0 2 # encoding: utf-8
mi@0 3 """
mi@0 4 plotSSM.py
mi@0 5
mi@0 6 A helper util to plot SSMs from different features.
mi@0 7 """
mi@0 8
mitian@12 9 import matplotlib
mitian@12 10 # matplotlib.use('Agg')
mitian@12 11 import sys, os, optparse, csv
mitian@12 12 from itertools import combinations
mitian@12 13 from os.path import join, isdir, isfile, abspath, dirname, basename, split, splitext
mitian@12 14 from copy import copy
mitian@12 15
mitian@12 16 import matplotlib.pyplot as plt
mitian@12 17
mi@0 18 import numpy as np
mitian@12 19 from scipy.signal import correlate2d, convolve2d, filtfilt, resample
mitian@12 20 from scipy.stats import mode, kurtosis, skew
mitian@12 21 from scipy.ndimage import zoom
mitian@12 22 from scipy.ndimage.morphology import binary_fill_holes
mitian@12 23 from scipy.ndimage.filters import *
mitian@12 24 from scipy.spatial.distance import squareform, pdist
mitian@12 25 from sklearn.decomposition import PCA
mitian@12 26 from sklearn.mixture import GMM
mitian@12 27 from sklearn.cluster import KMeans
mitian@12 28 from sklearn.preprocessing import normalize
mitian@12 29 from sklearn.metrics.pairwise import pairwise_distances
mitian@12 30 from skimage.transform import hough_line, hough_line_peaks, probabilistic_hough_line
mitian@12 31 from skimage.filter import canny, sobel
mitian@12 32 from skimage.filter.rank import otsu
mitian@12 33 from skimage import data, measure, segmentation, morphology
mitian@12 34 from skimage.morphology import disk
mi@0 35
mitian@12 36 from PeakPickerUtil import PeakPicker
mitian@16 37 from SegUtil import getMean, getStd, getDelta, getSSM, enhanceSSM, upSample, normaliseFeature
mitian@12 38
mitian@12 39 def parse_args():
mitian@12 40 # define parser
mitian@12 41 op = optparse.OptionParser()
mitian@12 42 # IO options
mitian@12 43 op.add_option('-g', '--gammatonegram-features', action="store", dest="GF", default='/Volumes/c4dm-03/people/mit/features/gammatonegram/qupujicheng/2048', type="str", help="Loading features from.." )
mitian@12 44 op.add_option('-s', '--spectrogram-features', action="store", dest="SF", default='/Volumes/c4dm-03/people/mit/features/spectrogram/qupujicheng/2048', type="str", help="Loading features from.." )
mitian@12 45 op.add_option('-t', '--tempogram-features', action="store", dest="TF", default='/Volumes/c4dm-03/people/mit/features/tempogram/qupujicheng/tempo_features_6s', type="str", help="Loading features from.." )
mitian@12 46 op.add_option('-a', '--annotations', action="store", dest="GT", default='/Volumes/c4dm-03/people/mit/annotation/qupujicheng/lowercase', type="str", help="Loading annotation files from.. ")
mitian@12 47 op.add_option('-o', '--ouput', action="store", dest="OUTPUT", default='/Volumes/c4dm-03/people/mit/segmentation/gammatone/qupujicheng', type="str", help="Write segmentation results to ")
mitian@13 48 op.add_option('-p', '--plot', action="store_true", dest="PLOT", default=False, help="Save plots")
mitian@12 49 op.add_option('-e', '--test-mode', action="store_true", dest="TEST", default=False, help="Test mode")
mitian@12 50 op.add_option('-v', '--verbose-output', action="store_true", dest="VERBOSE", default=False, help="Exported raw detections.")
mitian@12 51
mitian@12 52 return op.parse_args()
mitian@12 53 options, args = parse_args()
mi@0 54
mi@0 55 class FeatureObj() :
mitian@12 56 __slots__ = ['key', 'audio', 'timestamps', 'gammatone_features', 'tempo_features', 'timbre_features', 'fp_features', 'lpc_features' 'harmonic_features', 'gammatone_ssm', 'fp_ssm', 'tempo_ssm', 'timbre_ssm', 'lpc_ssm', 'harmonic_ssm', 'ssm_timestamps']
mi@0 57
mitian@12 58 class AudioObj():
mitian@12 59 __slots__ = ['name', 'feature_list', 'gt', 'label', 'gammatone_features', 'tempo_features', 'timbre_features', 'fp_features', 'lpc_features', 'harmonic_features', 'combined_features',\
mitian@12 60 'gammatone_ssm', 'tempo_ssm', 'timbre_ssm', 'lpc_ssm', 'harmonic_ssm', 'fp_ssm', 'combined_ssm', 'ssm', 'ssm_timestamps', 'tempo_timestamps']
mi@0 61
mitian@12 62 class EvalObj():
mitian@12 63 __slots__ = ['TP', 'FP', 'FN', 'P', 'R', 'F', 'AD', 'DA']
mi@0 64
mitian@12 65 class SSMseg(object):
mitian@12 66 '''The main segmentation object'''
mitian@12 67 def __init__(self):
mitian@12 68 self.SampleRate = 44100
mitian@12 69 self.NqHz = self.SampleRate/2
mitian@12 70 self.timestamp = []
mitian@12 71 self.previousSample = 0.0
mitian@12 72 self.featureWindow = 6.0
mitian@12 73 self.featureStep = 3.0
mitian@12 74 self.kernel_size = 64 # Adjust this param according to the feature resolution.pq
mitian@12 75 self.blockSize = 2048
mitian@12 76 self.stepSize = 1024
mitian@12 77
mitian@12 78 '''NOTE: Match the following params with those used for feature extraction!'''
mitian@12 79
mitian@12 80 '''NOTE: Unlike spectrogram ones, Gammatone features are extracted without an FFT or any overlap. The windowing is done under the purpose of chunking
mitian@12 81 the audio to facilitate the gammatone filtering. Despite of the overlap in the time domain, only the first half after the filtering is returned,
mitian@12 82 resulting in no overlapping effect in the extracted features. To obtain features for overlapped audio input, make the gammatoneLen equal to blockSize
mitian@12 83 and return the whole filter output.'''
mitian@12 84 self.gammatoneLen = 2048
mitian@12 85 self.gammatoneBandGroups = [0, 16, 34, 50, 64]
mitian@12 86 self.nGammatoneBands = 30
mitian@12 87 self.histRes = 40
mitian@12 88 self.lowFreq = 100
mitian@12 89 self.highFreq = self.SampleRate / 4
mitian@12 90
mitian@12 91 '''Settings for extracting tempogram features.'''
mitian@12 92 self.tempoWindow = 6.0
mitian@12 93 self.bpmBands = [30, 45, 60, 80, 100, 120, 180, 240, 400, 600]
mitian@12 94
mitian@12 95 '''Peak picking settings'''
mitian@12 96 self.threshold = 30
mitian@12 97 self.confidence_threshold = 0.5
mitian@12 98 self.delta_threshold = 0.0
mitian@12 99 self.backtracking_threshold = 1.9
mitian@12 100 self.polyfitting_on = True
mitian@12 101 self.medfilter_on = True
mitian@12 102 self.LPfilter_on = True
mitian@12 103 self.whitening_on = False
mitian@12 104 self.aCoeffs = [1.0000, -0.5949, 0.2348]
mitian@12 105 self.bCoeffs = [0.1600, 0.3200, 0.1600]
mitian@12 106 self.cutoff = 0.34
mitian@12 107 self.medianWin = 7
mitian@12 108
mitian@12 109 def getGaussianParams(self, length, featureRate, timeWindow):
mitian@12 110
mitian@12 111 win_len = round(timeWindow * featureRate)
mitian@12 112 win_len = win_len + (win_len % 2) - 1
mitian@12 113
mitian@12 114 # a 50% overlap between windows
mitian@12 115 stepsize = ceil(win_len * 0.5)
mitian@12 116 num_win = int(floor( (length) / stepsize))
mitian@12 117 gaussian_rate = featureRate / stepsize
mitian@12 118
mitian@12 119 return stepsize, num_win, win_len, gaussian_rate
mitian@12 120
mitian@12 121 def GaussianDistance(self, feature, featureRate, timeWindow):
mitian@12 122
mitian@12 123 stepsize, num_win, win_len, gr = self.getGaussianParams(feature.shape[0], featureRate, timeWindow)
mitian@12 124 print 'stepsize, num_win, feature', stepsize, num_win, feature.shape, featureRate, timeWindow
mitian@12 125 gaussian_list = []
mitian@12 126 gaussian_timestamps = []
mitian@12 127 tsi = 0
mitian@12 128
mitian@12 129 # f = open('/Users/mitian/Documents/experiments/features.txt','w')
mitian@12 130 # print 'divergence computing..'
mitian@12 131 for num in xrange(num_win):
mitian@12 132 # print num, num * stepsize , (num * stepsize) + win_len
mitian@12 133 gf=GaussianFeature(feature[int(num * stepsize) : int((num * stepsize) + win_len), :],2)
mitian@12 134 # f.write("\n%s" %str(gf))
mitian@12 135 gaussian_list.append(gf)
mitian@12 136 tsi = int(floor( num * stepsize + 1))
mitian@12 137 gaussian_timestamps.append(self.timestamp[tsi])
mitian@12 138
mitian@12 139 # f.close()
mitian@12 140
mitian@12 141 # print 'gaussian_list', len(gaussian_list), len(gaussian_timestamps)
mitian@12 142 dm = np.zeros((len(gaussian_list), len(gaussian_list)))
mitian@12 143
mitian@12 144 for v1, v2 in combinations(gaussian_list, 2):
mitian@12 145 i, j = gaussian_list.index(v1), gaussian_list.index(v2)
mitian@12 146 dm[i, j] = v1.distance(v2)
mitian@12 147 dm[j, i] = v2.distance(v1)
mitian@12 148 # print 'dm[i,j]',dm[i,j]
mitian@12 149 # sio.savemat("/Users/mitian/Documents/experiments/dm-from-segmenter.mat",{"dm":dm})
mitian@12 150 return dm, gaussian_timestamps
mitian@12 151
mitian@12 152 def gaussian_kernel(self, size):
mitian@12 153 '''Create a gaussian tapered 45 degrees rotated checkerboard kernel.
mitian@12 154 TODO: Unit testing: Should produce this with kernel size 3:
mitian@12 155 0.1353 -0.3679 0.1353
mitian@12 156 0.3679 1.0000 0.3679
mitian@12 157 0.1353 -0.3679 0.1353
mitian@12 158 '''
mitian@12 159 n = float(np.ceil(size / 2.0))
mitian@12 160 kernel = np.zeros((size,size))
mitian@12 161 for i in xrange(1,size+1) :
mitian@12 162 for j in xrange(1,size+1) :
mitian@12 163 gauss = np.exp( -4.0 * (np.square( (i-n)/n ) + np.square( (j-n)/n )) )
mitian@12 164 # gauss = 1
mitian@12 165 if np.logical_xor( j - n > np.floor((i-n) / 2.0), j - n > np.floor((n-i) / 2.0) ) :
mitian@12 166 kernel[i-1,j-1] = -gauss
mitian@12 167 else:
mitian@12 168 kernel[i-1,j-1] = gauss
mitian@12 169 return kernel
mitian@12 170
mitian@12 171 def getDiagonalSlice(self, ssm, width):
mitian@12 172 ''' Return a diagonal slice of the ssm given its width, with 45 degrees rotation.
mitian@12 173 Note: requres 45 degrees rotated kernel also.'''
mitian@12 174 w = int(np.floor(width/2.0))
mitian@12 175 length = len(np.diagonal(ssm))
mitian@12 176 slice = np.zeros((2*w+1,length))
mitian@12 177 # print 'diagonal', length, w, slice.shape
mitian@12 178 for i in xrange(-w, w+1) :
mitian@12 179 slice[w+i,:] = np.hstack(( np.zeros(int(np.floor(abs(i)/2.0))), np.diagonal(ssm,i), np.zeros(int(np.ceil(abs(i)/2.0))) ))
mitian@12 180 return slice
mitian@12 181
mitian@12 182 def getNoveltyCurve(self,dm, kernel_size):
mitian@12 183 '''Return novelty score from distance matrix.'''
mitian@12 184
mitian@12 185 kernel_size = int(np.floor(kernel_size/2.0)+1)
mitian@12 186 slice = self.getDiagonalSlice(dm, kernel_size)
mitian@12 187 kernel = self.gaussian_kernel(kernel_size)
mitian@12 188 xc = convolve2d(slice,kernel,mode='same')
mitian@12 189 xc[abs(xc)>1e+10]=0.00001
mitian@12 190 # print 'xc', xc.shape, xc
mitian@12 191 return xc[int(np.floor(xc.shape[0]/2.0)),:]
mi@0 192
mitian@12 193 def mergeBlocks(self, SSM, thresh=0.9, size=5):
mitian@12 194 '''Merge consequtive small blocks along the diagonal.'''
mitian@12 195 # found = False
mitian@12 196 # start = 0
mitian@12 197 # i = 0
mitian@12 198 # while i < len(SSM):
mitian@12 199 # j = i + 1
mitian@12 200 # if found: start = i
mitian@12 201 # while(j < len(SSM) and SSM[i, j]):
mitian@12 202 # if (j-i) > size:
mitian@12 203 # found = True
mitian@12 204 # i = j
mitian@12 205 # # print 'start,end', start, i
mitian@12 206 # start = i
mitian@12 207 # else:
mitian@12 208 # found = False
mitian@12 209 # j += 1
mitian@12 210 # if not found:
mitian@12 211 # print 'start,end', start, i
mitian@12 212 # SSM[start:i, start:i] = 0.9
mitian@12 213 # i = j
mitian@12 214 idx = 1
mitian@12 215 while idx < len(SSM):
mitian@12 216 i = 0
mitian@12 217 # if ((idx-1-i) > 0 and (idx+1+i) < len(SSM)):
mitian@12 218 while ((idx-1-i) > 0 and (idx+1+i) < len(SSM) and SSM[idx-1-i, idx] > 0 and SSM[idx+1+i, idx] > 0):
mitian@12 219 i += 1
mitian@12 220 if i > size/2:
mitian@12 221 SSM[idx-1-i:min(idx+i,len(SSM)), idx-1-i:min(idx+i,len(SSM))] = 1.0
mitian@12 222 idx += max(1, i)
mitian@12 223 return SSM
mitian@12 224
mitian@12 225 def getGMMs(self, feature, segment_boundaries):
mitian@12 226 '''Return GMMs for located segments'''
mitian@12 227 gmm_list = []
mitian@12 228 gmm_list.append(GmmDistance(feature[: segment_boundaries[0], :], components = 1))
mitian@12 229 for i in xrange(1, len(segment_boundaries)):
mitian@12 230 gmm_list.append(GmmDistance(feature[segment_boundaries[i-1] : segment_boundaries[i], :], components = 1))
mitian@12 231 return gmm_list
mitian@12 232
mitian@12 233 def normaliseFeature(self, feature_array):
mitian@12 234
mitian@12 235 feature_array = np.array(feature_array)
mitian@12 236 feature_array[np.isnan(feature_array)] = 0.0
mitian@12 237 feature_array[np.isinf(feature_array)] = 0.0
mitian@12 238
mitian@12 239 if len(feature_array.shape) == 1:
mitian@12 240 feature_array = (feature_array - feature_array.min()) / (feature_array.max() - feature_array.min())
mitian@12 241 else:
mitian@12 242 mins = feature_array.min(axis=1)
mitian@12 243 maxs = feature_array.max(axis=1)
mitian@12 244 feature_array = (feature_array - mins[:, np.newaxis]) / (maxs - mins)[:, np.newaxis]
mitian@12 245 feature_array[np.isnan(feature_array)] = 0.0
mitian@12 246 return feature_array
mitian@12 247
mitian@12 248 def upSample(self, feature_array, step):
mitian@12 249 '''Resample downsized tempogram features, tempoWindo should be in accordance with input features'''
mitian@12 250 # print feature_array.shape
mitian@12 251 sampleRate = 44100
mitian@12 252 stepSize = 1024.0
mitian@12 253 # step = np.ceil(sampleRate/stepSize/5.0)
mitian@12 254 feature_array = zoom(feature_array, (step,1))
mitian@12 255 # print 'resampled', feature_array.shape
mitian@12 256 return feature_array
mitian@12 257
mitian@12 258 def stripeDistance(self, feature_array, feature_len, step, metric='cosine'):
mitian@12 259 '''Return distance matrix calculated for 2d time invariant features.'''
mitian@12 260 size = feature_array.shape[0] / feature_len
mitian@12 261 dm = np.zeros((size, size))
mi@0 262
mitian@12 263 for i in xrange(size):
mitian@12 264 for j in xrange(i, size):
mitian@12 265 dm[i, j] = np.sum(pairwise_distances(feature_array[i*step:(i+1)*step, :], feature_array[j*step:(j+1)*step, :], metric))
mitian@12 266 dm[j, i] = dm[i, j]
mitian@12 267 # print 'np.nanmax(dm)', np.nanmax(dm)
mitian@12 268 dm[np.isnan(dm)] = np.nanmax(dm)
mitian@12 269 ssm = 1 - (dm - dm.min()) / (dm.max() - dm.min())
mitian@12 270 np.fill_diagonal(ssm, 1)
mitian@12 271 return ssm
mi@0 272
mitian@12 273
mitian@12 274 def getMean(self, feature, winlen, stepsize):
mitian@12 275 means = []
mitian@12 276 steps = int((feature.shape[0] - winlen + stepsize) / stepsize)
mitian@12 277 for i in xrange(steps):
mitian@12 278 means.append(np.mean(feature[i*stepsize:(i*stepsize+winlen), :], axis=0))
mitian@12 279 return np.array(means)
mitian@12 280
mitian@12 281 def getStd(self, feature, winlen, stepsize):
mitian@12 282 std = []
mitian@12 283 steps = int((feature.shape[0] - winlen + stepsize) / stepsize)
mitian@12 284 for i in xrange(steps):
mitian@12 285 std.append(np.std(feature[i*stepsize:(i*stepsize+winlen), :], axis=0))
mitian@12 286 return np.array(std)
mitian@12 287
mitian@12 288 def getDelta(self, feature):
mitian@12 289 delta_feature = np.vstack((np.zeros((1, feature.shape[1])), np.diff(feature, axis=0)))
mitian@12 290 return delta_feature
mitian@12 291
mitian@12 292 def getSkew(self, feature, axis=-1):
mitian@12 293 return skew(feature, axis=axis)
mitian@12 294
mitian@12 295 def getKurtosis(self, feature, axis=-1):
mitian@12 296 return kurtosis(feature, axis=axis)
mitian@12 297
mitian@12 298 def getRolloff(self, data, thresh=0.9):
mitian@12 299 nFrames, nChannels, nBands = data.shape
mitian@12 300 rolloff = np.zeros((nFrames, nChannels))
mitian@12 301 tpower = np.sum(data, axis=-1)
mitian@12 302 freq = np.linspace(0,10,nBands)
mitian@12 303 for i in xrange(nFrames):
mitian@12 304 rolloffE = thresh * tpower[i]
mitian@12 305 for j in xrange(nChannels):
mitian@12 306 temp = 0.0
mitian@12 307 tempE = 0.0
mitian@12 308 for band in xrange(nBands):
mitian@12 309 temp += data[i, j, band]
mitian@12 310 if temp > rolloffE[j]: break
mitian@12 311 rolloff[i, j] = freq[band]
mitian@12 312 # rolloff[i, j] = band
mitian@12 313 return rolloff
mitian@12 314
mitian@12 315 def getCentroid(self, X):
mitian@12 316 nFrames, nChannels, nBands = X.shape
mitian@12 317
mitian@12 318 centroid = np.zeros((nFrames, nChannels))
mitian@12 319 freq = np.linspace(0,10,nBands)
mitian@12 320 freq = np.vstack([freq for i in xrange(nChannels)])
mitian@12 321 for i in xrange(nFrames):
mitian@12 322 centroid[i, :] = np.sum(X[i, :, :] * freq, axis=-1) / (np.sum(X[i, :, :], axis=-1) + 0.0005*np.ones(nChannels))
mitian@12 323
mitian@12 324 return centroid
mitian@12 325
mitian@12 326 def trackDF(self, onset1_index, df2):
mitian@12 327 '''In the second round of detection, remove the known onsets from the DF by tracking from the peak given by the first round
mitian@12 328 to a valley to deminish the recognised peaks on top of which to start new detection.'''
mitian@12 329 for idx in xrange(len(onset1_index)) :
mitian@12 330 remove = True
mitian@12 331 for i in xrange(onset1_index[idx], 1, -1) :
mitian@12 332 if remove :
mitian@12 333 if df2[i] >= df2[i-1] :
mitian@12 334 df2[i] == 0.0
mitian@12 335 else:
mitian@12 336 remove = False
mitian@12 337 return df2
mitian@12 338
mitian@12 339 def getSSM(self, feature_array, metric='cosine', norm='simple'):
mitian@12 340 '''Compute SSM given input feature array.
mitian@12 341 args: norm: ['simple', 'remove_noise']
mitian@12 342 '''
mitian@12 343 dm = pairwise_distances(feature_array, metric=metric)
mitian@12 344 dm = np.nan_to_num(dm)
mitian@12 345 if norm == 'simple':
mitian@12 346 ssm = 1 - (dm - np.min(dm)) / (np.max(dm) - np.min(dm))
mitian@12 347 return ssm
mitian@12 348
mitian@12 349 def reduceSSM(self, ssm, maxfilter_size = 2, remove_size=50):
mitian@12 350 reduced_ssm = ssm
mitian@12 351 reduced_ssm[reduced_ssm<0.75] = 0
mitian@12 352 # # reduced_ssm = maximum_filter(reduced_ssm,size=maxfilter_size)
mitian@12 353 # # reduced_ssm = morphology.remove_small_objects(reduced_ssm.astype(bool), min_size=remove_size)
mitian@12 354 local_otsu = otsu(reduced_ssm, disk(5))
mitian@12 355 local_otsu = (local_otsu.astype(float) - np.min(local_otsu)) / (np.max(local_otsu) - np.min(local_otsu))
mitian@12 356 reduced_ssm = reduced_ssm - 0.6*local_otsu
mitian@12 357 return reduced_ssm
mitian@12 358
mitian@12 359 def pairwiseF(self, annotation, detection, tolerance=3.0, combine=1.0):
mitian@12 360 '''Pairwise F measure evaluation of detection rates.'''
mitian@12 361
mitian@12 362 # print 'detection', detection
mitian@12 363 detection = np.append(detection, annotation[-1])
mitian@12 364 res = EvalObj()
mitian@12 365 res.TP = 0 # Total number of matched ground truth and experimental data points
mitian@12 366 gt = len(annotation) # Total number of ground truth data points
mitian@12 367 dt = len(detection) # Total number of experimental data points
mitian@12 368 foundIdx = []
mitian@12 369 D_AD = np.zeros(gt)
mitian@12 370 D_DA = np.zeros(dt)
mitian@12 371
mitian@12 372 for dtIdx in xrange(dt):
mitian@12 373 # print detection[dtIdx], abs(detection[dtIdx] - annotation)
mitian@12 374 D_DA[dtIdx] = np.min(abs(detection[dtIdx] - annotation))
mitian@12 375 # D_DA[dtIdx] = min([abs(annot - detection[dtIdx]) for annot in annotation])
mitian@12 376 for gtIdx in xrange(gt):
mitian@12 377 D_AD[gtIdx] = np.min(abs(annotation[gtIdx] - detection))
mitian@12 378 # D_AD[gtIdx] = min([abs(det - annotation[gtIdx]) for det in detection])
mitian@12 379 for dtIdx in xrange(dt):
mitian@12 380 if (annotation[gtIdx] >= detection[dtIdx] - tolerance/2.0) and (annotation[gtIdx] <= detection[dtIdx] + tolerance/2.0):
mitian@12 381 res.TP = res.TP + 1.0
mitian@12 382 foundIdx.append(gtIdx)
mitian@12 383 foundIdx = list(set(foundIdx))
mitian@12 384 res.TP = len(foundIdx)
mitian@12 385 res.FP = max(0, dt - res.TP)
mitian@12 386 res.FN = max(0, gt - res.TP)
mitian@12 387
mitian@12 388 res.AD = np.mean(D_AD)
mitian@12 389 res.DA = np.mean(D_DA)
mi@0 390
mitian@12 391 res.P, res.R, res.F = 0.0, 0.0, 0.0
mi@0 392
mitian@12 393 if res.TP == 0:
mitian@12 394 return res
mitian@12 395
mitian@12 396 res.P = res.TP / float(dt)
mitian@12 397 res.R = res.TP / float(gt)
mitian@12 398 res.F = 2 * res.P * res.R / (res.P + res.R)
mitian@12 399 # return TP3, FP3, FN3, pairwisePrecision3, pairwiseRecall3, pairwiseFValue3, TP05, FP05, FN05, pairwisePrecision05, pairwiseRecall05, pairwiseFValue05
mitian@12 400 return res
mi@0 401
mitian@12 402 def plotDetection(self, ssm, novelty, smoothed_novelty, gt, det, filename):
mitian@12 403 '''Plot performance curve.
mitian@12 404 x axis: distance threshold for feature selection; y axis: f measure'''
mitian@12 405
mitian@12 406 plt.figure(figsize=(10,16))
mitian@12 407 gt_plot = gt / gt[-1] * len(novelty)
mitian@12 408 det_plot = det / gt[-1] * len(novelty)
mitian@12 409
mitian@12 410 gs = gridspec.GridSpec(2, 1, height_ratios=[3,1])
mitian@12 411 ax0 = plt.subplot(gs[0])
mitian@12 412 ax1 = plt.subplot(gs[1], sharex=ax0)
mitian@12 413
mitian@12 414 ax0.imshow(ssm)
mitian@12 415 ax0.vlines(gt_plot, 0, len(ssm), colors ='w', linestyles='solid')
mitian@12 416 ax0.vlines(det_plot, 0, len(ssm), colors='k', linestyles='dashed')
mitian@12 417 ax1.plot(np.linspace(0, len(novelty)-1, len(novelty)), novelty, 'g', np.linspace(0, len(novelty)-1, len(novelty)), smoothed_novelty,'b')
mitian@12 418 y_min, y_max = min([min(novelty), min(smoothed_novelty)]), max([max(novelty), max(smoothed_novelty)])
mitian@12 419 ax1.vlines(gt_plot, y_min, y_max, colors ='r', linestyles='solid')
mitian@12 420 ax1.vlines(det_plot, y_min, y_max, colors='k', linestyles='dashed')
mitian@12 421
mitian@12 422 # f, ax = plt.subplots(2, sharex=True)
mitian@12 423 # ax[0].imshow(ssm)
mitian@12 424 # ax[1].plot(np.linspace(0, len(novelty)-1, len(novelty)), novelty)
mitian@12 425 # ax[1].vlines(gt_plot, 0, len(novelty), colors ='r', linestyles='solid')
mitian@12 426 # ax[1].vlines(det_plot, 0, len(novelty), colors='b', linestyles='dashed')
mitian@12 427 #
mitian@12 428 # plt.show()
mitian@12 429 plt.savefig(filename)
mitian@12 430
mitian@12 431 return None
mitian@12 432
mitian@12 433 def process(self):
mitian@12 434 '''For the aggregated input features, discard a propertion each time as the pairwise distances within the feature space descending.
mitian@12 435 In the meanwhile evaluate the segmentation result and track the trend of perfomance changing by measuring the feature selection
mitian@12 436 threshold - segmentation f measure curve.
mitian@12 437 '''
mitian@12 438
mitian@12 439 peak_picker = PeakPicker()
mitian@12 440 peak_picker.params.alpha = 9.0 # Alpha norm
mitian@12 441 peak_picker.params.delta = self.delta_threshold # Adaptive thresholding delta
mitian@12 442 peak_picker.params.QuadThresh_a = (100 - self.threshold) / 1000.0
mitian@12 443 peak_picker.params.QuadThresh_b = 0.0
mitian@12 444 peak_picker.params.QuadThresh_c = (100 - self.threshold) / 1500.0
mitian@12 445 peak_picker.params.rawSensitivity = 20
mitian@12 446 peak_picker.params.aCoeffs = self.aCoeffs
mitian@12 447 peak_picker.params.bCoeffs = self.bCoeffs
mitian@12 448 peak_picker.params.preWin = self.medianWin
mitian@12 449 peak_picker.params.postWin = self.medianWin + 1
mitian@12 450 peak_picker.params.LP_on = self.LPfilter_on
mitian@12 451 peak_picker.params.Medfilt_on = self.medfilter_on
mitian@12 452 peak_picker.params.Polyfit_on = self.polyfitting_on
mitian@12 453 peak_picker.params.isMedianPositive = False
mitian@12 454
mitian@12 455 # Settings used for feature extraction
mitian@12 456 feature_window_frame = int(self.SampleRate / self.gammatoneLen * self.featureWindow)
mitian@12 457 feature_step_frame = int(0.5 * self.SampleRate / self.gammatoneLen * self.featureStep)
mitian@12 458 aggregation_window, aggregation_step = 100, 50
mitian@12 459 featureRate = float(self.SampleRate) / self.stepSize
mitian@12 460
mitian@12 461 audio_files = [x for x in os.listdir(options.GT) if not x.startswith(".") ]
mitian@12 462 audio_files.sort()
mitian@12 463 audio_files = audio_files
mitian@12 464 audio_list = []
mitian@12 465
mitian@12 466 gammatone_feature_list = [i for i in os.listdir(options.GF) if not i.startswith('.')]
mitian@12 467 gammatone_feature_list = ['dct', 'pcamean', 'contrast6']
mitian@12 468 tempo_feature_list = [i for i in os.listdir(options.TF) if not i.startswith('.')]
mitian@13 469 tempo_feature_list = ['ti_percussive_cdsf', 'tir_percussive_cdsf']
mitian@12 470 timbre_feature_list = ['mfcc_harmonic']
mitian@12 471 lpc_feature_list = ['lpcc']
mitian@12 472 harmonic_feature_list = ['chromagram_harmonic']
mitian@12 473
mitian@12 474 gammatone_feature_list = [join(options.GF, f) for f in gammatone_feature_list]
mitian@12 475 timbre_feature_list = [join(options.SF, f) for f in timbre_feature_list]
mitian@13 476 # lpc_feature_list = [join(options.SF, f) for f in lpc_feature_list]
mitian@12 477 tempo_feature_list = [join(options.TF, f) for f in tempo_feature_list]
mitian@12 478 harmonic_feature_list = [join(options.SF, f) for f in harmonic_feature_list]
mitian@12 479
mitian@12 480 fobj_list = []
mitian@12 481
mitian@12 482 # For each audio file, load specific features
mitian@12 483 for audio in audio_files:
mitian@12 484 ao = AudioObj()
mitian@12 485 ao.name = splitext(audio)[0]
mitian@16 486 # annotation_file = join(options.GT, ao.name+'.txt') # iso, salami
mitian@16 487 # ao.gt = np.genfromtxt(annotation_file, usecols=0)
mitian@16 488 # ao.label = np.genfromtxt(annotation_file, usecols=1, dtype=str)
mitian@13 489
mitian@14 490 # annotation_file = join(options.GT, ao.name+'.csv') # qupujicheng
mitian@14 491 # ao.gt = np.genfromtxt(annotation_file, usecols=0, delimiter=',')
mitian@14 492 # ao.label = np.genfromtxt(annotation_file, usecols=1, delimiter=',', dtype=str)
mitian@13 493
mitian@16 494 annotation_file = join(options.GT, ao.name+'.lab') # beatles
mitian@16 495 ao.gt = np.genfromtxt(annotation_file, usecols=(0,1))
mitian@16 496 ao.gt = np.unique(np.ndarray.flatten(ao.gt))
mitian@16 497 ao.label = np.genfromtxt(annotation_file, usecols=2, dtype=str)
mitian@12 498
mitian@12 499 gammatone_featureset, timbre_featureset, lpc_featureset, tempo_featureset, harmonic_featureset = [], [], [], [], []
mitian@12 500 for feature in gammatone_feature_list:
mitian@12 501 for f in os.listdir(feature):
mitian@12 502 if f[:f.find('_vamp')]==ao.name:
mitian@13 503 data = np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[:,1:]
mitian@13 504 if len(data) == 0: continue
mitian@13 505 gammatone_featureset.append(data)
mitian@12 506 break
mitian@12 507 if len(gammatone_feature_list) > 1:
mitian@12 508 n_frame = np.min([x.shape[0] for x in gammatone_featureset])
mitian@12 509 gammatone_featureset = [x[:n_frame,:] for x in gammatone_featureset]
mitian@12 510 ao.gammatone_features = np.hstack((gammatone_featureset))
mitian@12 511 else:
mitian@12 512 ao.gammatone_features = gammatone_featureset[0]
mitian@12 513
mitian@12 514 for feature in timbre_feature_list:
mitian@12 515 for f in os.listdir(feature):
mitian@12 516 if f[:f.find('_vamp')]==ao.name:
mitian@13 517 data = np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[:,1:]
mitian@13 518 if len(data) == 0: continue
mitian@13 519 timbre_featureset.append(data)
mitian@12 520 break
mitian@12 521 if len(timbre_feature_list) > 1:
mitian@12 522 n_frame = np.min([x.shape[0] for x in timbre_featureset])
mitian@12 523 timbre_featureset = [x[:n_frame,:] for x in timbre_featureset]
mitian@12 524 ao.timbre_features = np.hstack((timbre_featureset))
mitian@12 525 else:
mitian@12 526 ao.timbre_features = timbre_featureset[0]
mitian@12 527
mitian@13 528 # for feature in lpc_feature_list:
mitian@13 529 # for f in os.listdir(feature):
mitian@13 530 # if f[:f.find('_vamp')]==ao.name:
mitian@13 531 # lpc_featureset.append(np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[:,1:])
mitian@13 532 # break
mitian@13 533 # if len(lpc_feature_list) > 1:
mitian@13 534 # n_frame = np.min([x.shape[0] for x in lpc_featureset])
mitian@13 535 # lpc_featureset = [x[:n_frame,:] for x in lpc_featureset]
mitian@13 536 # ao.lpc_features = np.hstack((lpc_featureset))
mitian@13 537 # else:
mitian@13 538 # ao.lpc_features = lpc_featureset[0]
mitian@12 539
mitian@12 540 for feature in tempo_feature_list:
mitian@12 541 for f in os.listdir(feature):
mitian@13 542 if f[:f.find('_vamp')]==ao.name:
mitian@13 543 data = np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[1:,1:]
mitian@13 544 if len(data) == 0: continue
mitian@13 545 tempo_featureset.append(data)
mitian@12 546 ao.tempo_timestamps = np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[1:,0]
mitian@12 547 break
mitian@12 548 if len(tempo_feature_list) > 1:
mitian@12 549 n_frame = np.min([x.shape[0] for x in tempo_featureset])
mitian@12 550 tempo_featureset = [x[:n_frame,:] for x in tempo_featureset]
mitian@12 551 ao.tempo_features = np.hstack((tempo_featureset))
mitian@12 552 else:
mitian@12 553 ao.tempo_features = tempo_featureset[0]
mitian@12 554 for feature in harmonic_feature_list:
mitian@12 555 for f in os.listdir(feature):
mitian@12 556 if f[:f.find('_vamp')]==ao.name:
mitian@13 557 data = np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[:,1:]
mitian@13 558 if len(data) == 0: continue
mitian@13 559 harmonic_featureset.append(data)
mitian@12 560 break
mitian@12 561 if len(harmonic_feature_list) > 1:
mitian@12 562 n_frame = np.min([x.shape[0] for x in harmonic_featureset])
mitian@12 563 harmonic_featureset = [x[:n_frame,:] for x in harmonic_featureset]
mitian@12 564 ao.harmonic_features = np.hstack((harmonic_featureset))
mitian@12 565 else:
mitian@12 566 ao.harmonic_features = harmonic_featureset[0]
mitian@12 567
mitian@12 568 # Reshape features (downsample) to match tempogram ones
mitian@12 569 step = ao.tempo_features.shape[0]
mitian@12 570 # aggregation_step = (n_frames / (step+1.0))
mitian@12 571 # Get aggregated features for computing ssm
mitian@12 572 aggregation_window, aggregation_step = 1,1
mitian@12 573 featureRate = float(self.SampleRate) /self.stepSize
mitian@12 574 pca = PCA(n_components=5)
mitian@12 575
mitian@14 576 ao.gammatone_features = normaliseFeature(ao.gammatone_features)
mitian@12 577 ao.gammatone_features = resample(ao.gammatone_features, step)
mitian@12 578 ao.gammatone_features[np.isnan(ao.gammatone_features)] = 0.0
mitian@12 579 ao.gammatone_features[np.isinf(ao.gammatone_features)] = 0.0
mitian@14 580 ao.timbre_features = normaliseFeature(ao.timbre_features)
mitian@12 581 ao.timbre_features = resample(ao.timbre_features, step)
mitian@12 582 ao.timbre_features[np.isnan(ao.timbre_features)] = 0.0
mitian@12 583 ao.timbre_features[np.isinf(ao.timbre_features)] = 0.0
mitian@13 584 # ao.lpc_features = self.normaliseFeature(ao.lpc_features)
mitian@13 585 # ao.lpc_features = resample(ao.lpc_features, step)
mitian@13 586 # ao.lpc_features[np.isnan(ao.lpc_features)] = 0.0
mitian@13 587 # ao.lpc_features[np.isinf(ao.lpc_features)] = 0.0
mitian@14 588 ao.harmonic_features = normaliseFeature(ao.harmonic_features)
mitian@12 589 ao.harmonic_features = resample(ao.harmonic_features, step)
mitian@14 590 ao.tempo_features = normaliseFeature(ao.tempo_features)
mitian@12 591 ao.harmonic_features[np.isinf(ao.harmonic_features)] = 0.0
mitian@12 592 ao.tempo_features[np.isnan(ao.tempo_features)] = 0.0
mitian@12 593 ao.tempo_features[np.isinf(ao.tempo_features)] = 0.0
mitian@12 594
mitian@14 595 ao.gammatone_features = getMean(ao.gammatone_features, winlen=aggregation_window, stepsize=aggregation_step)
mitian@12 596 pca.fit(ao.gammatone_features)
mitian@12 597 ao.gammatone_features = pca.transform(ao.gammatone_features)
mitian@14 598 ao.gammatone_ssm = getSSM(ao.gammatone_features)
mitian@16 599 ao.gammatone_ssm = enhanceSSM(ao.gammatone_ssm)
mitian@12 600
mitian@14 601 ao.tempo_features = getMean(ao.tempo_features, winlen=aggregation_window, stepsize=aggregation_step)
mitian@12 602 pca.fit(ao.tempo_features)
mitian@12 603 ao.tempo_features = pca.transform(ao.tempo_features)
mitian@14 604 ao.tempo_ssm = getSSM(ao.tempo_features)
mitian@16 605 ao.tempo_ssm = enhanceSSM(ao.tempo_ssm)
mitian@12 606
mitian@14 607 ao.timbre_features = getMean(ao.timbre_features, winlen=aggregation_window, stepsize=aggregation_step)
mitian@12 608 pca.fit(ao.timbre_features)
mitian@12 609 ao.timbre_features = pca.transform(ao.timbre_features)
mitian@14 610 ao.timbre_ssm = getSSM(ao.timbre_features)
mitian@16 611 ao.timbre_ssm = enhanceSSM(ao.timbre_ssm)
mitian@12 612
mitian@13 613 # ao.lpc_features = self.getMean(ao.lpc_features, winlen=aggregation_window, stepsize=aggregation_step)
mitian@13 614 # pca.fit(ao.lpc_features)
mitian@13 615 # ao.lpc_features = pca.transform(ao.lpc_features)
mitian@13 616 # ao.lpc_ssm = self.getSSM(ao.lpc_features)
mitian@12 617
mitian@14 618 ao.harmonic_features = getMean(ao.harmonic_features, winlen=aggregation_window, stepsize=aggregation_step)
mitian@12 619 pca.fit(ao.harmonic_features)
mitian@12 620 ao.harmonic_features = pca.transform(ao.harmonic_features)
mitian@14 621 ao.harmonic_ssm = getSSM(ao.harmonic_features)
mitian@16 622 ao.harmonic_ssm = enhanceSSM(ao.harmonic_ssm)
mitian@12 623
mitian@12 624 ao.ssm_timestamps = np.array(map(lambda x: ao.tempo_timestamps[aggregation_step*x], np.arange(0, ao.gammatone_ssm.shape[0])))
mitian@12 625
mitian@12 626 # Single feature SSMs.
mitian@12 627 # gammatone_ssm = self.reduceSSM(ao.gammatone_ssm)
mitian@12 628 plt.figure(figsize=(10, 10))
mitian@13 629 plt.vlines(ao.gt / ao.gt[-1] * ao.gammatone_ssm.shape[0], 0, ao.gammatone_ssm.shape[0])
mitian@12 630 plt.imshow(ao.gammatone_ssm)
mitian@16 631 plt.savefig(join(options.OUTPUT, ao.name+'-enhanced-gammatone.pdf'),format='pdf')
mitian@12 632 plt.close()
mitian@12 633
mitian@12 634 # tempo_ssm = self.reduceSSM(ao.tempo_ssm)
mitian@12 635 plt.figure(figsize=(10, 10))
mitian@13 636 plt.vlines(ao.gt / ao.gt[-1] * ao.tempo_ssm.shape[0], 0, ao.tempo_ssm.shape[0])
mitian@12 637 plt.imshow(ao.tempo_ssm)
mitian@16 638 plt.savefig(join(options.OUTPUT, ao.name+'-enhanced-hpss_tempo.pdf'),format='pdf')
mitian@12 639 plt.close()
mitian@12 640
mitian@12 641 # timbre_ssm = self.reduceSSM(ao.timbre_ssm)
mitian@12 642 plt.figure(figsize=(10, 10))
mitian@13 643 plt.vlines(ao.gt / ao.gt[-1] * ao.timbre_ssm.shape[0], 0, ao.timbre_ssm.shape[0])
mitian@12 644 plt.imshow(ao.timbre_ssm)
mitian@16 645 plt.savefig(join(options.OUTPUT, ao.name+'-enhanced-hpss_mfcc.pdf'),format='pdf')
mitian@12 646 plt.close()
mitian@12 647
mitian@13 648 # # lpc_ssm = self.reduceSSM(ao.lpc_ssm)
mitian@13 649 # plt.figure(figsize=(10, 10))
mitian@13 650 # plt.vlines(ao.gt / ao.gt[-1] * ao.lpc_ssm.shape[0], 0, ao.lpc_ssm.shape[0], colors='k')
mitian@13 651 # plt.imshow(ao.lpc_ssm)
mitian@13 652 # plt.savefig(join(options.OUTPUT, 'ssm', ao.name+'-lpcc.pdf'),format='pdf')
mitian@13 653 # plt.close()
mitian@13 654 # np.savetxt(join(options.OUTPUT, 'ssm_data', ao.name+'-lpcc.txt'), np.array(ao.lpc_ssm), delimiter=',')
mitian@12 655
mitian@12 656 # harmonic_ssm = self.reduceSSM(ao.harmonic_ssm)
mitian@12 657 plt.figure(figsize=(10, 10))
mitian@13 658 plt.vlines(ao.gt / ao.gt[-1] * ao.harmonic_ssm.shape[0], 0, ao.harmonic_ssm.shape[0])
mitian@12 659 plt.imshow(ao.harmonic_ssm)
mitian@16 660 plt.savefig(join(options.OUTPUT, ao.name+'-enhanced-hpss_chroma.pdf'),format='pdf')
mitian@12 661 plt.close()
mitian@12 662
mitian@12 663 if options.VERBOSE:
mitian@12 664 np.savetxt(join(options.OUTPUT, 'ssm_data', ao.name+'-gammatone.txt'), np.array(ao.gammatone_ssm), delimiter=',')
mitian@12 665 np.savetxt(join(options.OUTPUT, 'ssm_data', ao.name+'-tempo.txt'), np.array(ao.tempo_ssm), delimiter=',')
mitian@13 666 # np.savetxt(join(options.OUTPUT, 'ssm_data', ao.name+'-hpss_tempo.txt'), np.array(ao.tempo_ssm), delimiter=',')
mitian@13 667 # np.savetxt(join(options.OUTPUT, 'ssm_data', ao.name+'-hpss_mfcc.txt'), np.array(ao.timbre_ssm), delimiter=',')
mitian@13 668 np.savetxt(join(options.OUTPUT, 'ssm_data', ao.name+'-mfcc.txt'), np.array(ao.timbre_ssm), delimiter=',')
mitian@13 669 # np.savetxt(join(options.OUTPUT, 'ssm_data', ao.name+'-lpc.txt'), np.array(ao.lpc_ssm), delimiter=',')
mitian@13 670 np.savetxt(join(options.OUTPUT, 'ssm_data', ao.name+'-chroma.txt'), np.array(ao.harmonic_ssm), delimiter=',')
mitian@13 671 # np.savetxt(join(options.OUTPUT, 'ssm_data', ao.name+'-hpss_chroma.txt'), np.array(ao.harmonic_ssm), delimiter=',')
mitian@12 672
mitian@14 673 # audio_list.append(ao)
mitian@12 674
mi@0 675
mi@0 676 def main():
mitian@12 677 segmenter = SSMseg()
mitian@12 678 segmenter.process()
mi@0 679
mi@0 680
mi@0 681 if __name__ == '__main__':
mi@0 682 main()
mi@0 683