annotate utils/som_seg.py @ 19:890cfe424f4a tip

added annotations
author mitian
date Fri, 11 Dec 2015 09:47:40 +0000
parents 26838b1f560f
children
rev   line source
mi@0 1 #!/usr/bin/env python
mi@0 2 # encoding: utf-8
mi@0 3 """
mi@0 4 feature_combine_seg.py
mi@0 5
mi@0 6 A script to evaluation script for the segmentation results using combinations of different features.
mi@0 7 """
mi@0 8
mi@0 9 import matplotlib
mi@0 10 # matplotlib.use('Agg')
mi@0 11 import sys, os, optparse, csv
mi@0 12 from itertools import combinations
mi@0 13 from os.path import join, isdir, isfile, abspath, dirname, basename, split, splitext
mi@0 14 from copy import copy
mi@0 15 from mvpa2.suite import *
mi@0 16
mi@0 17 import matplotlib.pyplot as plt
mi@0 18 import matplotlib.gridspec as gridspec
mi@0 19 import numpy as np
mi@0 20 from scipy.signal import correlate2d, convolve2d, filtfilt, resample
mi@0 21 from scipy.stats import mode
mi@0 22 from scipy.ndimage import zoom
mi@0 23 from scipy.ndimage.morphology import binary_fill_holes
mi@0 24 from scipy.ndimage.filters import *
mi@0 25 from scipy.spatial.distance import squareform, pdist
mi@0 26 from sklearn.decomposition import PCA
mi@0 27 from sklearn.mixture import GMM
mi@0 28 from sklearn.preprocessing import normalize
mi@0 29 from sklearn.metrics.pairwise import pairwise_distances
mi@0 30 from skimage.transform import hough_line, hough_line_peaks, probabilistic_hough_line
mi@0 31 from skimage.filter import canny, sobel
mi@0 32 from skimage import data, measure, segmentation, morphology
mi@0 33
mi@0 34 from PeakPickerUtil import PeakPicker
mi@0 35 from gmmdist import *
mi@0 36 from GmmMetrics import GmmDistance
mi@0 37 from RankClustering import rClustering
mi@0 38 from kmeans import Kmeans
mi@0 39
mi@0 40 def parse_args():
mi@0 41 # define parser
mi@0 42 op = optparse.OptionParser()
mi@0 43 # IO options
mi@0 44 op.add_option('-g', '--gammatonegram-features', action="store", dest="GF", default='/Volumes/c4dm-03/people/mit/features/gammatonegram/qupujicheng/2048', type="str", help="Loading features from.." )
mi@0 45 op.add_option('-s', '--spectrogram-features', action="store", dest="SF", default='/Volumes/c4dm-03/people/mit/features/spectrogram/qupujicheng/2048', type="str", help="Loading features from.." )
mi@0 46 op.add_option('-t', '--tempogram-features', action="store", dest="TF", default='/Volumes/c4dm-03/people/mit/features/tempogram/qupujicheng/tempo_features_6s', type="str", help="Loading features from.." )
mi@0 47 op.add_option('-a', '--annotations', action="store", dest="GT", default='/Volumes/c4dm-03/people/mit/annotation/qupujicheng/lowercase', type="str", help="Loading annotation files from.. ")
mi@0 48 op.add_option('-o', '--ouput', action="store", dest="OUTPUT", default='/Volumes/c4dm-03/people/mit/segmentation/gammatone/qupujicheng', type="str", help="Write segmentation results to ")
mi@0 49 op.add_option('-p', '--plot-novelty', action="store_true", dest="PLOT", default=False, help="Save novelty curev plot")
mi@0 50 op.add_option('-e', '--test-mode', action="store_true", dest="TEST", default=False, help="Test mode")
mi@0 51 op.add_option('-v', '--verbose-output', action="store_true", dest="VERBOSE", default=False, help="Exported raw detections.")
mi@0 52
mi@0 53 return op.parse_args()
mi@0 54 options, args = parse_args()
mi@0 55
mi@0 56 class FeatureObj() :
mi@0 57 __slots__ = ['key', 'audio', 'timestamps', 'gammatone_features', 'tempo_features', 'timbre_features', 'harmonic_features', 'gammatone_ssm', 'tempo_ssm', 'timbre_features', 'harmonic_ssm', 'ssm_timestamps']
mi@0 58
mi@0 59 class AudioObj():
mi@0 60 __slots__ = ['name', 'feature_list', 'gt', 'label', 'gammatone_features', 'tempo_features', 'timbre_features', 'harmonic_features', 'combined_features',\
mi@0 61 'gammatone_ssm', 'tempo_ssm', 'timbre_ssm', 'harmonic_ssm', 'combined_ssm', 'ssm', 'ssm_timestamps', 'tempo_timestamps']
mi@0 62
mi@0 63 class EvalObj():
mi@0 64 __slots__ = ['TP', 'FP', 'FN', 'P', 'R', 'F', 'AD', 'DA']
mi@0 65
mi@0 66 class SSMseg(object):
mi@0 67 '''The main segmentation object'''
mi@0 68 def __init__(self):
mi@0 69 self.SampleRate = 44100
mi@0 70 self.NqHz = self.SampleRate/2
mi@0 71 self.timestamp = []
mi@0 72 self.previousSample = 0.0
mi@0 73 self.featureWindow = 6.0
mi@0 74 self.featureStep = 3.0
mi@0 75 self.kernel_size = 80 # Adjust this param according to the feature resolution.
mi@0 76 self.blockSize = 4094
mi@0 77 self.stepSize = 2048
mi@0 78
mi@0 79 '''NOTE: Match the following params with those used for feature extraction!'''
mi@0 80
mi@0 81 '''NOTE: Unlike spectrogram ones, Gammatone features are extracted without an FFT or any overlap. The windowing is done under the purpose of chunking
mi@0 82 the audio to facilitate the gammatone filtering. Despite of the overlap in the time domain, only the first half after the filtering is returned,
mi@0 83 resulting in no overlapping effect in the extracted features. To obtain features for overlapped audio input, make the gammatoneLen equal to blockSize
mi@0 84 and return the whole filter output.'''
mi@0 85 self.gammatoneLen = 2048
mi@0 86 self.gammatoneBandGroups = [0, 16, 34, 50, 64]
mi@0 87 self.nGammatoneBands = 20
mi@0 88 self.histRes = 40
mi@0 89 self.lowFreq = 100
mi@0 90 self.highFreq = self.SampleRate / 4
mi@0 91
mi@0 92 '''Settings for extracting tempogram features.'''
mi@0 93 self.tempoWindow = 6.0
mi@0 94 self.bpmBands = [30, 45, 60, 80, 100, 120, 180, 240, 400, 600]
mi@0 95
mi@0 96 '''Peak picking settings'''
mi@0 97 self.threshold = 30
mi@0 98 self.delta_threshold = 0.5
mi@0 99 self.backtracking_threshold = 2.4
mi@0 100 self.polyfitting_on = True
mi@0 101 self.medfilter_on = True
mi@0 102 self.LPfilter_on = True
mi@0 103 self.whitening_on = False
mi@0 104 self.aCoeffs = [1.0000, -0.5949, 0.2348]
mi@0 105 self.bCoeffs = [0.1600, 0.3200, 0.1600]
mi@0 106 self.cutoff = 0.5
mi@0 107 self.medianWin = 5
mi@0 108
mi@0 109 def getGaussianParams(self, length, featureRate, timeWindow):
mi@0 110
mi@0 111 win_len = round(timeWindow * featureRate)
mi@0 112 win_len = win_len + (win_len % 2) - 1
mi@0 113
mi@0 114 # a 50% overlap between windows
mi@0 115 stepsize = ceil(win_len * 0.5)
mi@0 116 num_win = int(floor( (length) / stepsize))
mi@0 117 gaussian_rate = featureRate / stepsize
mi@0 118
mi@0 119 return stepsize, num_win, win_len, gaussian_rate
mi@0 120
mi@0 121 def GaussianDistance(self, feature, featureRate, timeWindow):
mi@0 122
mi@0 123 stepsize, num_win, win_len, gr = self.getGaussianParams(feature.shape[0], featureRate, timeWindow)
mi@0 124 print 'stepsize, num_win, feature', stepsize, num_win, feature.shape, featureRate, timeWindow
mi@0 125 gaussian_list = []
mi@0 126 gaussian_timestamps = []
mi@0 127 tsi = 0
mi@0 128
mi@0 129 # f = open('/Users/mitian/Documents/experiments/features.txt','w')
mi@0 130 # print 'divergence computing..'
mi@0 131 for num in xrange(num_win):
mi@0 132 # print num, num * stepsize , (num * stepsize) + win_len
mi@0 133 gf=GaussianFeature(feature[int(num * stepsize) : int((num * stepsize) + win_len), :],2)
mi@0 134 # f.write("\n%s" %str(gf))
mi@0 135 gaussian_list.append(gf)
mi@0 136 tsi = int(floor( num * stepsize + 1))
mi@0 137 gaussian_timestamps.append(self.timestamp[tsi])
mi@0 138
mi@0 139 # f.close()
mi@0 140
mi@0 141 # print 'gaussian_list', len(gaussian_list), len(gaussian_timestamps)
mi@0 142 dm = np.zeros((len(gaussian_list), len(gaussian_list)))
mi@0 143
mi@0 144 for v1, v2 in combinations(gaussian_list, 2):
mi@0 145 i, j = gaussian_list.index(v1), gaussian_list.index(v2)
mi@0 146 dm[i, j] = v1.distance(v2)
mi@0 147 dm[j, i] = v2.distance(v1)
mi@0 148 # print 'dm[i,j]',dm[i,j]
mi@0 149 # sio.savemat("/Users/mitian/Documents/experiments/dm-from-segmenter.mat",{"dm":dm})
mi@0 150 return dm, gaussian_timestamps
mi@0 151
mi@0 152 def gaussian_kernel(self, size):
mi@0 153 '''Create a gaussian tapered 45 degrees rotated checkerboard kernel.
mi@0 154 TODO: Unit testing: Should produce this with kernel size 3:
mi@0 155 0.1353 -0.3679 0.1353
mi@0 156 0.3679 1.0000 0.3679
mi@0 157 0.1353 -0.3679 0.1353
mi@0 158 '''
mi@0 159 n = float(np.ceil(size / 2.0))
mi@0 160 kernel = np.zeros((size,size))
mi@0 161 for i in xrange(1,size+1) :
mi@0 162 for j in xrange(1,size+1) :
mi@0 163 gauss = np.exp( -4.0 * (np.square( (i-n)/n ) + np.square( (j-n)/n )) )
mi@0 164 # gauss = 1
mi@0 165 if np.logical_xor( j - n > np.floor((i-n) / 2.0), j - n > np.floor((n-i) / 2.0) ) :
mi@0 166 kernel[i-1,j-1] = -gauss
mi@0 167 else:
mi@0 168 kernel[i-1,j-1] = gauss
mi@0 169 return kernel
mi@0 170
mi@0 171 def getDiagonalSlice(self, ssm, width):
mi@0 172 ''' Return a diagonal slice of the ssm given its width, with 45 degrees rotation.
mi@0 173 Note: requres 45 degrees rotated kernel also.'''
mi@0 174 w = int(np.floor(width/2.0))
mi@0 175 length = len(np.diagonal(ssm))
mi@0 176 slice = np.zeros((2*w+1,length))
mi@0 177 # print 'diagonal', length, w, slice.shape
mi@0 178 for i in xrange(-w, w+1) :
mi@0 179 slice[w+i,:] = np.hstack(( np.zeros(int(np.floor(abs(i)/2.0))), np.diagonal(ssm,i), np.zeros(int(np.ceil(abs(i)/2.0))) ))
mi@0 180 return slice
mi@0 181
mi@0 182 def getNoveltyCurve(self,dm, kernel_size):
mi@0 183 '''Return novelty score from distance matrix.'''
mi@0 184
mi@0 185 kernel_size = int(np.floor(kernel_size/2.0)+1)
mi@0 186 slice = self.getDiagonalSlice(dm, kernel_size)
mi@0 187 kernel = self.gaussian_kernel(kernel_size)
mi@0 188 xc = convolve2d(slice,kernel,mode='same')
mi@0 189 xc[abs(xc)>1e+10]=0.00001
mi@0 190 # print 'xc', xc.shape, xc
mi@0 191 return xc[int(np.floor(xc.shape[0]/2.0)),:]
mi@0 192
mi@0 193 def mergeBlocks(self, SSM, thresh=0.9, size=5):
mi@0 194 '''Merge consequtive small blocks along the diagonal.'''
mi@0 195 # found = False
mi@0 196 # start = 0
mi@0 197 # i = 0
mi@0 198 # while i < len(SSM):
mi@0 199 # j = i + 1
mi@0 200 # if found: start = i
mi@0 201 # while(j < len(SSM) and SSM[i, j]):
mi@0 202 # if (j-i) > size:
mi@0 203 # found = True
mi@0 204 # i = j
mi@0 205 # # print 'start,end', start, i
mi@0 206 # start = i
mi@0 207 # else:
mi@0 208 # found = False
mi@0 209 # j += 1
mi@0 210 # if not found:
mi@0 211 # print 'start,end', start, i
mi@0 212 # SSM[start:i, start:i] = 0.9
mi@0 213 # i = j
mi@0 214 idx = 1
mi@0 215 while idx < len(SSM):
mi@0 216 i = 0
mi@0 217 # if ((idx-1-i) > 0 and (idx+1+i) < len(SSM)):
mi@0 218 while ((idx-1-i) > 0 and (idx+1+i) < len(SSM) and SSM[idx-1-i, idx] > 0 and SSM[idx+1+i, idx] > 0):
mi@0 219 i += 1
mi@0 220 if i > size/2:
mi@0 221 SSM[idx-1-i:min(idx+i,len(SSM)), idx-1-i:min(idx+i,len(SSM))] = 1.0
mi@0 222 idx += max(1, i)
mi@0 223 return SSM
mi@0 224
mi@0 225 def getGMMs(self, feature, segment_boundaries):
mi@0 226 '''Return GMMs for located segments'''
mi@0 227 gmm_list = []
mi@0 228 gmm_list.append(GmmDistance(feature[: segment_boundaries[0], :], components = 1))
mi@0 229 for i in xrange(1, len(segment_boundaries)):
mi@0 230 gmm_list.append(GmmDistance(feature[segment_boundaries[i-1] : segment_boundaries[i], :], components = 1))
mi@0 231 return gmm_list
mi@0 232
mi@0 233 def trackValley(self, onset_index, smoothed_df):
mi@0 234 '''Back track to the valley location of detected peaks'''
mi@0 235 prevDiff = oldDiff = 0.0
mi@0 236 while (onset_index > 1) :
mi@0 237 diff = smoothed_df[onset_index] - smoothed_df[onset_index-1]
mi@0 238 # if (diff < 0.0 and 0 <= prevDiff < oldDiff * self.backtracking_threshold) : break
mi@0 239 if (diff < 0 and prevDiff >= 0.1 * smoothed_df[onset_index]) : break
mi@0 240 prevDiff = diff
mi@0 241 oldDiff = prevDiff
mi@0 242 onset_index -= 1
mi@0 243 return onset_index
mi@0 244
mi@0 245 def normaliseFeature(self, feature_array):
mi@0 246
mi@0 247 feature_array = np.array(feature_array)
mi@0 248 feature_array[np.isnan(feature_array)] = 0.0
mi@0 249 feature_array[np.isinf(feature_array)] = 0.0
mi@0 250
mi@0 251 if len(feature_array.shape) == 1:
mi@0 252 feature_array = (feature_array - feature_array.min()) / (feature_array.max() - feature_array.min())
mi@0 253 else:
mi@0 254 mins = feature_array.min(axis=1)
mi@0 255 maxs = feature_array.max(axis=1)
mi@0 256 feature_array = (feature_array - mins[:, np.newaxis]) / (maxs - mins)[:, np.newaxis]
mi@0 257 feature_array[np.isnan(feature_array)] = 0.0
mi@0 258 return feature_array
mi@0 259
mi@0 260 def upSample(self, feature_array, step):
mi@0 261 '''Resample downsized tempogram features, tempoWindo should be in accordance with input features'''
mi@0 262 # print feature_array.shape
mi@0 263 sampleRate = 44100
mi@0 264 stepSize = 1024.0
mi@0 265 # step = np.ceil(sampleRate/stepSize/5.0)
mi@0 266 feature_array = zoom(feature_array, (step,1))
mi@0 267 # print 'resampled', feature_array.shape
mi@0 268 return feature_array
mi@0 269
mi@0 270 def stripeDistance(self, feature_array, feature_len, step, metric='cosine'):
mi@0 271 '''Return distance matrix calculated for 2d time invariant features.'''
mi@0 272 size = feature_array.shape[0] / feature_len
mi@0 273 dm = np.zeros((size, size))
mi@0 274
mi@0 275 for i in xrange(size):
mi@0 276 for j in xrange(i, size):
mi@0 277 dm[i, j] = np.sum(pairwise_distances(feature_array[i*step:(i+1)*step, :], feature_array[j*step:(j+1)*step, :], metric))
mi@0 278 dm[j, i] = dm[i, j]
mi@0 279 # print 'np.nanmax(dm)', np.nanmax(dm)
mi@0 280 dm[np.isnan(dm)] = np.nanmax(dm)
mi@0 281 ssm = 1 - (dm - dm.min()) / (dm.max() - dm.min())
mi@0 282 np.fill_diagonal(ssm, 1)
mi@0 283 return ssm
mi@0 284
mi@0 285
mi@0 286 def getMean(self, feature, winlen, stepsize):
mi@0 287 means = []
mi@0 288 steps = int((feature.shape[0] - winlen + stepsize) / stepsize)
mi@0 289 for i in xrange(steps):
mi@0 290 means.append(np.mean(feature[i*stepsize:(i*stepsize+winlen), :], axis=0))
mi@0 291 return np.array(means)
mi@0 292
mi@0 293 def getStd(self, feature, winlen, stepsize):
mi@0 294 std = []
mi@0 295 steps = int((feature.shape[0] - winlen + stepsize) / stepsize)
mi@0 296 for i in xrange(steps):
mi@0 297 std.append(np.std(feature[i*stepsize:(i*stepsize+winlen), :], axis=0))
mi@0 298 return np.array(std)
mi@0 299
mi@0 300 def getDelta(self, feature):
mi@0 301 delta_feature = np.vstack((np.zeros((1, feature.shape[1])), np.diff(feature, axis=0)))
mi@0 302 return delta_feature
mi@0 303
mi@0 304 def backtrack(self, onset_index, smoothed_df):
mi@0 305 '''Backtrack the onsets to an earlier 'perceived' location from the actually detected peak...
mi@0 306 This is based on the rationale that the perceived onset tends to be a few frames before the detected peak.
mi@0 307 This tracks the position in the detection function back to where the peak is startng to build up.
mi@0 308 Notice the "out of the blue" parameter: 0.9. (Ideally, this should be tested, evaluated and reported...)'''
mi@0 309 prevDiff = 0.0
mi@0 310 while (onset_index > 1) :
mi@0 311 diff = smoothed_df[onset_index] - smoothed_df[onset_index-1]
mi@0 312 if diff < prevDiff * self.backtracking_threshold : break
mi@0 313 prevDiff = diff
mi@0 314 onset_index -= 1
mi@0 315 return onset_index
mi@0 316
mi@0 317 def trackDF(self, onset1_index, df2):
mi@0 318 '''In the second round of detection, remove the known onsets from the DF by tracking from the peak given by the first round
mi@0 319 to a valley to deminish the recognised peaks on top of which to start new detection.'''
mi@0 320 for idx in xrange(len(onset1_index)) :
mi@0 321 remove = True
mi@0 322 for i in xrange(onset1_index[idx], 1, -1) :
mi@0 323 if remove :
mi@0 324 if df2[i] >= df2[i-1] :
mi@0 325 df2[i] == 0.0
mi@0 326 else:
mi@0 327 remove = False
mi@0 328 return df2
mi@0 329
mi@0 330 def getSSM(self, feature_array, metric='cosine', norm='simple'):
mi@0 331 '''Compute SSM given input feature array.
mi@0 332 args: norm: ['simple', 'remove_noise']
mi@0 333 '''
mi@0 334 dm = pairwise_distances(feature_array, metric=metric)
mi@0 335 dm = np.nan_to_num(dm)
mi@0 336 if norm == 'simple':
mi@0 337 ssm = 1 - (dm - np.min(dm)) / (np.max(dm) - np.min(dm))
mi@0 338 return ssm
mi@0 339
mi@0 340 def reduceSSM(self, ssm, maxfilter_size = 2, remove_size=50):
mi@0 341 ssm[ssm<0.8] = 0
mi@0 342 ssm = maximum_filter(ssm,size=maxfilter_size)
mi@0 343 ssm = morphology.remove_small_objects(ssm.astype(bool), min_size=remove_size)
mi@0 344 return ssm
mi@0 345
mi@0 346 def getPeakFeatures(self, peak_candidates, featureset, winlen):
mi@0 347 '''
mi@0 348 args: winlen: length of feature window before and after an investigated peak
mi@0 349 featureset: A list of audio features for measuring the dissimilarity.
mi@0 350
mi@0 351 return: peak_features
mi@0 352 A list of tuples of features for windows before and after each peak.
mi@0 353 '''
mi@0 354 prev_features = []
mi@0 355 post_features = []
mi@0 356 feature_types = len(featureset)
mi@0 357 # print peak_candidates[-1], winlen, featureset[0].shape
mi@0 358 # if peak_candidates[-1] + winlen > featureset[0].shape[0]:
mi@0 359 # peak_candidates = peak_candidates[:-1]
mi@0 360 # for x in peak_candidates:
mi@0 361 # prev_features.append(tuple([featureset[i][x-winlen:x, :] for i in xrange(feature_types)]))
mi@0 362 # post_features.append(tuple([featureset[i][x:x+winlen, :] for i in xrange(feature_types)]))
mi@0 363 prev_features.append(tuple([featureset[i][:peak_candidates[0], :] for i in xrange(feature_types)]))
mi@0 364 post_features.append(tuple([featureset[i][peak_candidates[0]:peak_candidates[1], :] for i in xrange(feature_types)]))
mi@0 365 for idx in xrange(1, len(peak_candidates)-1):
mi@0 366 prev_features.append(tuple([featureset[i][peak_candidates[idx-1]:peak_candidates[idx], :] for i in xrange(feature_types)]))
mi@0 367 post_features.append(tuple([featureset[i][peak_candidates[idx]:peak_candidates[idx+1], :] for i in xrange(feature_types)]))
mi@0 368 prev_features.append(tuple([featureset[i][peak_candidates[-2]:peak_candidates[-1], :] for i in xrange(feature_types)]))
mi@0 369 post_features.append(tuple([featureset[i][peak_candidates[-1]:, :] for i in xrange(feature_types)]))
mi@0 370 return prev_features, post_features
mi@0 371
mi@0 372 def segmentDev(self, prev_features, post_features):
mi@0 373 '''Deviations are measured for each given feature type.
mi@0 374 peak_candidates: peaks from the 1st round detection
mi@0 375 peak_features: Features for measuring the dissimilarity for parts before and after each peak.
mi@0 376 dtype: tuple.
mi@0 377 '''
mi@0 378 dev_list = []
mi@0 379 n_peaks = len(prev_features)
mi@0 380 n_features = len(prev_features[0])
mi@0 381 # print 'n_peaks, n_features', n_peaks, n_features
mi@0 382 for x in xrange(n_peaks):
mi@0 383 f1, f2 = prev_features[x], post_features[x]
mi@0 384 dev_list.append(tuple([GmmDistance(f1[i], components=1).skl_distance_full(GmmDistance(f2[i], components=1)) for i in xrange(n_features)]))
mi@0 385 return dev_list
mi@0 386
mi@0 387 def verifyPeaks(self, peak_canditates, dev_list):
mi@0 388 '''Verify peaks from the 1st round detection by applying adaptive thresholding to the deviation list.'''
mi@0 389
mi@0 390 final_peaks = copy(peak_canditates)
mi@0 391 dev_list = np.array([np.mean(x) for x in dev_list]) # get average of devs of different features
mi@0 392 med_dev = median_filter(dev_list, size=5)
mi@0 393 # print dev_list, np.min(dev_list), np.median(dev_list), np.mean(dev_list), np.std(dev_list)
mi@0 394 dev = dev_list - med_dev
mi@0 395 # print dev
mi@0 396 for i, x in enumerate(dev):
mi@0 397 if x < 0:
mi@0 398 final_peaks.remove(peak_canditates[i])
mi@0 399 return final_peaks
mi@0 400
mi@0 401 def pairwiseF(self, annotation, detection, tolerance=3.0, combine=1.0):
mi@0 402 '''Pairwise F measure evaluation of detection rates.'''
mi@0 403
mi@0 404 # print 'detection', detection
mi@0 405 res = EvalObj()
mi@0 406 res.TP = 0 # Total number of matched ground truth and experimental data points
mi@0 407 gt = len(annotation) # Total number of ground truth data points
mi@0 408 dt = len(detection) # Total number of experimental data points
mi@0 409 foundIdx = []
mi@0 410 D_AD = np.zeros(gt)
mi@0 411 D_DA = np.zeros(dt)
mi@0 412
mi@0 413 for dtIdx in xrange(dt):
mi@0 414 # print detection[dtIdx], abs(detection[dtIdx] - annotation)
mi@0 415 D_DA[dtIdx] = np.min(abs(detection[dtIdx] - annotation))
mi@0 416 # D_DA[dtIdx] = min([abs(annot - detection[dtIdx]) for annot in annotation])
mi@0 417 for gtIdx in xrange(gt):
mi@0 418 D_AD[gtIdx] = np.min(abs(annotation[gtIdx] - detection))
mi@0 419 # D_AD[gtIdx] = min([abs(det - annotation[gtIdx]) for det in detection])
mi@0 420 for dtIdx in xrange(dt):
mi@0 421 if (annotation[gtIdx] >= detection[dtIdx] - tolerance/2.0) and (annotation[gtIdx] <= detection[dtIdx] + tolerance/2.0):
mi@0 422 res.TP = res.TP + 1.0
mi@0 423 foundIdx.append(gtIdx)
mi@0 424 foundIdx = list(set(foundIdx))
mi@0 425 res.TP = len(foundIdx)
mi@0 426 res.FP = dt - res.TP
mi@0 427 res.FN = gt - res.TP
mi@0 428
mi@0 429 res.AD = np.mean(D_AD)
mi@0 430 res.DA = np.mean(D_DA)
mi@0 431
mi@0 432 res.P, res.R, res.F = 0.0, 0.0, 0.0
mi@0 433
mi@0 434 if res.TP == 0:
mi@0 435 return res
mi@0 436
mi@0 437 res.P = res.TP / float(dt)
mi@0 438 res.R = res.TP / float(gt)
mi@0 439 # res.F = 2 * res.P * res.R / (res.P + res.F)
mi@0 440 res.F = 2.0 / (1.0/res.P + 1.0/res.R)
mi@0 441 # return TP3, FP3, FN3, pairwisePrecision3, pairwiseRecall3, pairwiseFValue3, TP05, FP05, FN05, pairwisePrecision05, pairwiseRecall05, pairwiseFValue05
mi@0 442 return res
mi@0 443
mi@0 444 def plotDetection(self, ssm, novelty, smoothed_novelty, gt, det, filename):
mi@0 445 '''Plot performance curve.
mi@0 446 x axis: distance threshold for feature selection; y axis: f measure'''
mi@0 447
mi@0 448 plt.figure(figsize=(10,16))
mi@0 449 gt_plot = gt / gt[-1] * len(novelty)
mi@0 450 det_plot = det / gt[-1] * len(novelty)
mi@0 451
mi@0 452 gs = gridspec.GridSpec(2, 1, height_ratios=[3,1])
mi@0 453 ax0 = plt.subplot(gs[0])
mi@0 454 ax1 = plt.subplot(gs[1], sharex=ax0)
mi@0 455
mi@0 456 ax0.imshow(ssm)
mi@0 457 ax0.vlines(gt_plot, 0, len(ssm), colors ='w', linestyles='solid')
mi@0 458 ax0.vlines(det_plot, 0, len(ssm), colors='k', linestyles='dashed')
mi@0 459 ax1.plot(np.linspace(0, len(novelty)-1, len(novelty)), novelty, 'g', np.linspace(0, len(novelty)-1, len(novelty)), smoothed_novelty,'b')
mi@0 460 y_min, y_max = min([min(novelty), min(smoothed_novelty)]), max([max(novelty), max(smoothed_novelty)])
mi@0 461 ax1.vlines(gt_plot, y_min, y_max, colors ='r', linestyles='solid')
mi@0 462 ax1.vlines(det_plot, y_min, y_max, colors='k', linestyles='dashed')
mi@0 463
mi@0 464 # f, ax = plt.subplots(2, sharex=True)
mi@0 465 # ax[0].imshow(ssm)
mi@0 466 # ax[1].plot(np.linspace(0, len(novelty)-1, len(novelty)), novelty)
mi@0 467 # ax[1].vlines(gt_plot, 0, len(novelty), colors ='r', linestyles='solid')
mi@0 468 # ax[1].vlines(det_plot, 0, len(novelty), colors='b', linestyles='dashed')
mi@0 469 #
mi@0 470 # plt.show()
mi@0 471 plt.savefig(filename)
mi@0 472
mi@0 473 return None
mi@0 474
mi@0 475 def process(self):
mi@0 476 '''For the aggregated input features, discard a propertion each time as the pairwise distances within the feature space descending.
mi@0 477 In the meanwhile evaluate the segmentation result and track the trend of perfomance changing by measuring the feature selection
mi@0 478 threshold - segmentation f measure curve.
mi@0 479 '''
mi@0 480 ssom = SimpleSOMMapper((30,30), 800, learning_rate=0.001)
mi@0 481
mi@0 482 peak_picker = PeakPicker()
mi@0 483 peak_picker.params.alpha = 9.0 # Alpha norm
mi@0 484 peak_picker.params.delta = self.delta_threshold # Adaptive thresholding delta
mi@0 485 peak_picker.params.QuadThresh_a = (100 - self.threshold) / 1000.0
mi@0 486 peak_picker.params.QuadThresh_b = 0.0
mi@0 487 peak_picker.params.QuadThresh_c = (100 - self.threshold) / 1500.0
mi@0 488 peak_picker.params.rawSensitivity = 20
mi@0 489 peak_picker.params.aCoeffs = self.aCoeffs
mi@0 490 peak_picker.params.bCoeffs = self.bCoeffs
mi@0 491 peak_picker.params.preWin = self.medianWin
mi@0 492 peak_picker.params.postWin = self.medianWin + 1
mi@0 493 peak_picker.params.LP_on = self.LPfilter_on
mi@0 494 peak_picker.params.Medfilt_on = self.medfilter_on
mi@0 495 peak_picker.params.Polyfit_on = self.polyfitting_on
mi@0 496 peak_picker.params.isMedianPositive = False
mi@0 497
mi@0 498 # Settings used for feature extraction
mi@0 499 feature_window_frame = int(self.SampleRate / self.gammatoneLen * self.featureWindow)
mi@0 500 feature_step_frame = int(0.5 * self.SampleRate / self.gammatoneLen * self.featureStep)
mi@0 501 aggregation_window, aggregation_step = 100, 50
mi@0 502 featureRate = float(self.SampleRate) / self.stepSize
mi@0 503
mi@0 504 audio_files = [x for x in os.listdir(options.GT) if not x.startswith(".") ]
mi@0 505 # audio_files = audio_files[:2]
mi@0 506 audio_files.sort()
mi@0 507 audio_list = []
mi@0 508
mi@0 509 gammatone_feature_list = [i for i in os.listdir(options.GF) if not i.startswith('.')]
mi@0 510 gammatone_feature_list = ['rolloff', 'contrast']
mi@0 511 tempo_feature_list = [i for i in os.listdir(options.TF) if not i.startswith('.')]
mi@0 512 # tempo_feature_list = ['intensity_bpm_renamed', 'loudness_bpm_renamed']
mi@0 513 timbre_feature_list = ['mfcc']
mi@0 514 harmonic_feature_list = ['nnls']
mi@0 515
mi@0 516 gammatone_feature_list = [join(options.GF, f) for f in gammatone_feature_list]
mi@0 517 timbre_feature_list = [join(options.SF, f) for f in timbre_feature_list]
mi@0 518 tempo_feature_list = [join(options.TF, f) for f in tempo_feature_list]
mi@0 519 harmonic_feature_list = [join(options.SF, f) for f in harmonic_feature_list]
mi@0 520
mi@0 521 fobj_list = []
mi@0 522
mi@0 523 # For each audio file, load specific features
mi@0 524 for audio in audio_files:
mi@0 525 ao = AudioObj()
mi@0 526 ao.name = splitext(audio)[0]
mi@0 527 # print 'audio:', ao.name
mi@0 528 # annotation_file = join(options.GT, ao.name+'.txt') # iso, salami
mi@0 529 # ao.gt = np.genfromtxt(annotation_file, usecols=0)
mi@0 530 # ao.label = np.genfromtxt(annotation_file, usecols=1, dtype=str)
mi@0 531 annotation_file = join(options.GT, ao.name+'.csv') # qupujicheng
mi@0 532 ao.gt = np.genfromtxt(annotation_file, usecols=0, delimiter=',')
mi@0 533 ao.label = np.genfromtxt(annotation_file, usecols=1, delimiter=',', dtype=str)
mi@0 534
mi@0 535 gammatone_featureset, timbre_featureset, tempo_featureset, harmonic_featureset = [], [], [], []
mi@0 536 for feature in gammatone_feature_list:
mi@0 537 for f in os.listdir(feature):
mi@0 538 if f[:f.find('_vamp')]==ao.name:
mi@0 539 gammatone_featureset.append(np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[:,1:])
mi@0 540 break
mi@0 541 if len(gammatone_feature_list) > 1:
mi@0 542 n_frame = np.min([x.shape[0] for x in gammatone_featureset])
mi@0 543 gammatone_featureset = [x[:n_frame,:] for x in gammatone_featureset]
mi@0 544 ao.gammatone_features = np.hstack((gammatone_featureset))
mi@0 545 else:
mi@0 546 ao.gammatone_features = gammatone_featureset[0]
mi@0 547
mi@0 548 for feature in timbre_feature_list:
mi@0 549 for f in os.listdir(feature):
mi@0 550 if f[:f.find('_vamp')]==ao.name:
mi@0 551 timbre_featureset.append(np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[:,1:])
mi@0 552 break
mi@0 553 if len(timbre_feature_list) > 1:
mi@0 554 n_frame = np.min([x.shape[0] for x in timbre_featureset])
mi@0 555 timbre_featureset = [x[:n_frame,:] for x in timbre_featureset]
mi@0 556 ao.timbre_features = np.hstack((timbre_featureset))
mi@0 557 else:
mi@0 558 ao.timbre_features = timbre_featureset[0]
mi@0 559 for feature in tempo_feature_list:
mi@0 560 for f in os.listdir(feature):
mi@0 561 if f[:f.find('_vamp')]==ao.name:
mi@0 562 tempo_featureset.append(np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[1:,1:])
mi@0 563 ao.tempo_timestamps = np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[1:,0]
mi@0 564 break
mi@0 565 if len(tempo_feature_list) > 1:
mi@0 566 n_frame = np.min([x.shape[0] for x in tempo_featureset])
mi@0 567 tempo_featureset = [x[:n_frame,:] for x in tempo_featureset]
mi@0 568 ao.tempo_features = np.hstack((tempo_featureset))
mi@0 569 else:
mi@0 570 ao.tempo_features = tempo_featureset[0]
mi@0 571 for feature in harmonic_feature_list:
mi@0 572 for f in os.listdir(feature):
mi@0 573 if f[:f.find('_vamp')]==ao.name:
mi@0 574 harmonic_featureset.append(np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[:,1:])
mi@0 575 break
mi@0 576 if len(harmonic_feature_list) > 1:
mi@0 577 n_frame = np.min([x.shape[0] for x in harmonic_featureset])
mi@0 578 harmonic_featureset = [x[:n_frame,:] for x in harmonic_featureset]
mi@0 579 ao.harmonic_features = np.hstack((harmonic_featureset))
mi@0 580 else:
mi@0 581 ao.harmonic_features = harmonic_featureset[0]
mi@0 582
mi@0 583 # # Reshaping features to keep identical dimension
mi@0 584 # n_frames = np.array([ao.gammatone_features.shape[0], ao.harmonic_features.shape[0], ao.timbre_features.shape[0]]).min()
mi@0 585 # step = n_frames / float(ao.tempo_features.shape[0])
mi@0 586 # # ao.tempo_features = self.upSample(ao.tempo_features, step)
mi@0 587 # ao.gammatone_features = ao.gammatone_features[:n_frames, :]
mi@0 588 # ao.harmonic_features = ao.harmonic_features[:n_frames, :]
mi@0 589 # ao.timbre_features = ao.timbre_features[:n_frames, :]
mi@0 590 # print ao.gammatone_features.shape, ao.harmonic_features.shape, ao.tempo_features.shape, ao.timbre_features.shape
mi@0 591
mi@0 592 # Reshape features (downsample) to match tempogram ones
mi@0 593 step = ao.tempo_features.shape[0]
mi@0 594 # aggregation_step = (n_frames / (step+1.0))
mi@0 595 # Get aggregated features for computing ssm
mi@0 596 aggregation_window, aggregation_step = 1,1
mi@0 597 featureRate = float(self.SampleRate) /self.stepSize
mi@0 598 pca = PCA(n_components=5)
mi@0 599
mi@0 600 ao.gammatone_features = resample(ao.gammatone_features, step)
mi@0 601 ao.gammatone_features = (ao.gammatone_features - np.min(ao.gammatone_features, axis=-1)[:,np.newaxis]) / (np.max(ao.gammatone_features, axis=-1) - np.min(ao.gammatone_features, axis=-1))[:,np.newaxis]
mi@0 602 ao.gammatone_features[np.isnan(ao.gammatone_features)] = 0.0
mi@0 603 ao.gammatone_features[np.isinf(ao.gammatone_features)] = 0.0
mi@0 604 ao.timbre_features = resample(ao.timbre_features, step)
mi@0 605 ao.timbre_features = (ao.timbre_features - np.min(ao.timbre_features, axis=-1)[:,np.newaxis]) / (np.max(ao.timbre_features, axis=-1) - np.min(ao.timbre_features, axis=-1))[:,np.newaxis]
mi@0 606 ao.timbre_features[np.isnan(ao.timbre_features)] = 0.0
mi@0 607 ao.timbre_features[np.isinf(ao.timbre_features)] = 0.0
mi@0 608 ao.harmonic_features = resample(ao.harmonic_features, step)
mi@0 609 ao.harmonic_features = (ao.harmonic_features - np.min(ao.harmonic_features, axis=-1)[:,np.newaxis]) / (np.max(ao.harmonic_features, axis=-1) - np.min(ao.harmonic_features, axis=-1))[:,np.newaxis]
mi@0 610 ao.harmonic_features[np.isnan(ao.harmonic_features)] = 0.0
mi@0 611 ao.harmonic_features[np.isinf(ao.harmonic_features)] = 0.0
mi@0 612 ao.tempo_features = (ao.tempo_features - np.min(ao.tempo_features, axis=-1)[:,np.newaxis]) / (np.max(ao.tempo_features, axis=-1) - np.min(ao.tempo_features, axis=-1))[:,np.newaxis]
mi@0 613 ao.tempo_features[np.isnan(ao.tempo_features)] = 0.0
mi@0 614 ao.tempo_features[np.isinf(ao.tempo_features)] = 0.0
mi@0 615 # print 'resampled', ao.gammatone_features.shape, ao.timbre_features.shape, ao.harmonic_features.shape
mi@0 616 # gt_feature_matrix = (ao.gammatone_features - np.min(ao.gammatone_features, axis=-1)[:,np.newaxis]) / (np.max(ao.gammatone_features, axis=-1) - np.min(ao.gammatone_features, axis=-1))[:,np.newaxis]
mi@0 617 # gt_feature_matrix[np.isnan(gt_feature_matrix)] = 0.0
mi@0 618 # mean_gt_feature = self.getMean(gt_feature_matrix, winlen=aggregation_window, stepsize=aggregation_step)
mi@0 619 # std_gt_feature = self.getStd(gt_feature_matrix, winlen=aggregation_window, stepsize=aggregation_step)
mi@0 620 # delta_gt_feature = self.getDelta(gt_feature_matrix)
mi@0 621 # mean_dgt_feature = self.getMean(delta_gt_feature, winlen=aggregation_window, stepsize=aggregation_step)
mi@0 622 # std_dgt_feature = self.getStd(delta_gt_feature, winlen=aggregation_window, stepsize=aggregation_step)
mi@0 623 # aggregated_gt_feature = np.hstack((mean_gt_feature, std_gt_feature))
mi@0 624 # aggregated_gt_feature = np.hstack((mean_gt_feature, std_gt_feature, mean_dgt_feature, std_dgt_feature))
mi@0 625 # aggregated_gt_feature = ao.gammatone_features
mi@0 626 aggregated_gt_feature = self.getMean(ao.gammatone_features, winlen=aggregation_window, stepsize=aggregation_step)
mi@0 627 pca.fit(aggregated_gt_feature)
mi@0 628 aggregated_gt_feature = pca.transform(aggregated_gt_feature)
mi@0 629 distance_gt_matrix = pairwise_distances(aggregated_gt_feature, metric = 'cosine')
mi@0 630 distance_gt_matrix = np.nan_to_num(distance_gt_matrix)
mi@0 631 ao.gammatone_ssm = 1 - (distance_gt_matrix - distance_gt_matrix.min()) / (distance_gt_matrix.max() - distance_gt_matrix.min())
mi@0 632
mi@0 633 # tempo_feature_matrix = (ao.tempo_features - np.min(ao.tempo_features, axis=-1)[:,np.newaxis]) / (np.max(ao.tempo_features, axis=-1) - np.min(ao.tempo_features, axis=-1))[:,np.newaxis]
mi@0 634 # tempo_feature_matrix[np.isnan(tempo_feature_matrix)] = 0.0
mi@0 635 # mean_tempo_feature = self.getMean(tempo_feature_matrix, winlen=aggregation_window, stepsize=aggregation_step)
mi@0 636 # std_tempo_feature = self.getStd(tempo_feature_matrix, winlen=aggregation_window, stepsize=aggregation_step)
mi@0 637 # delta_tempo_feature = self.getDelta(tempo_feature_matrix)
mi@0 638 # mean_dtempo_feature = self.getMean(delta_tempo_feature, winlen=aggregation_window, stepsize=aggregation_step)
mi@0 639 # std_dtempo_feature = self.getStd(delta_tempo_feature, winlen=aggregation_window, stepsize=aggregation_step)
mi@0 640 # aggregated_tempo_feature = np.hstack((mean_tempo_feature, std_tempo_feature))
mi@0 641 # aggregated_tempo_feature = np.hstack((mean_tempo_feature, std_tempo_feature, mean_dtempo_feature, std_dtempo_feature))
mi@0 642 # aggregated_tempo_feature = ao.tempo_features
mi@0 643 aggregated_tempo_feature = self.getMean(ao.tempo_features, winlen=aggregation_window, stepsize=aggregation_step)
mi@0 644 pca.fit(aggregated_tempo_feature)
mi@0 645 aggregated_tempo_feature = pca.transform(aggregated_tempo_feature)
mi@0 646 distance_tempo_matrix = pairwise_distances(aggregated_tempo_feature, metric = 'cosine')
mi@0 647 distance_tempo_matrix = np.nan_to_num(distance_tempo_matrix)
mi@0 648 ao.tempo_ssm = 1 - (distance_tempo_matrix - distance_tempo_matrix.min()) / (distance_tempo_matrix.max() - distance_tempo_matrix.min())
mi@0 649
mi@0 650 # timbre_feature_matrix = (ao.timbre_features - np.min(ao.timbre_features, axis=-1)[:,np.newaxis]) / (np.max(ao.timbre_features, axis=-1) - np.min(ao.timbre_features, axis=-1))[:,np.newaxis]
mi@0 651 # timbre_feature_matrix[np.isnan(timbre_feature_matrix)] = 0.0
mi@0 652 # mean_timbre_feature = self.getMean(timbre_feature_matrix, winlen=aggregation_window, stepsize=aggregation_step)
mi@0 653 # std_timbre_feature = self.getStd(timbre_feature_matrix, winlen=aggregation_window, stepsize=aggregation_step)
mi@0 654 # delta_timbre_feature = self.getDelta(timbre_feature_matrix)
mi@0 655 # mean_dtimbre_feature = self.getMean(delta_timbre_feature, winlen=aggregation_window, stepsize=aggregation_step)
mi@0 656 # std_dtimbre_feature = self.getStd(delta_timbre_feature, winlen=aggregation_window, stepsize=aggregation_step)
mi@0 657 # aggregated_timbre_feature = np.hstack((mean_timbre_feature, std_timbre_feature)
mi@0 658 # aggregated_timbre_feature = np.hstack((mean_timbre_feature, std_timbre_feature, mean_dtimbre_feature, std_dtimbre_feature))
mi@0 659 # aggregated_timbre_feature = ao.timbre_features
mi@0 660 aggregated_timbre_feature = self.getMean(ao.timbre_features, winlen=aggregation_window, stepsize=aggregation_step)
mi@0 661 pca.fit(aggregated_timbre_feature)
mi@0 662 aggregated_timbre_feature = pca.transform(aggregated_timbre_feature)
mi@0 663 distance_timbre_matrix = pairwise_distances(aggregated_timbre_feature, metric = 'cosine')
mi@0 664 distance_timbre_matrix = np.nan_to_num(distance_timbre_matrix)
mi@0 665 ao.timbre_ssm = 1 - (distance_timbre_matrix - distance_timbre_matrix.min()) / (distance_timbre_matrix.max() - distance_timbre_matrix.min())
mi@0 666
mi@0 667 # harmonic_feature_matrix = (ao.harmonic_features - np.min(ao.harmonic_features, axis=-1)[:,np.newaxis]) / (np.max(ao.harmonic_features, axis=-1) - np.min(ao.harmonic_features, axis=-1))[:,np.newaxis]
mi@0 668 # harmonic_feature_matrix[np.isnan(harmonic_feature_matrix)] = 0.0
mi@0 669 # mean_harmonic_feature = self.getMean(harmonic_feature_matrix, winlen=aggregation_window, stepsize=aggregation_step)
mi@0 670 # std_harmonic_feature = self.getStd(harmonic_feature_matrix, winlen=aggregation_window, stepsize=aggregation_step)
mi@0 671 # delta_harmonic_feature = self.getDelta(harmonic_feature_matrix)
mi@0 672 # mean_dharmonic_feature = self.getMean(delta_harmonic_feature, winlen=aggregation_window, stepsize=aggregation_step)
mi@0 673 # std_dharmonic_feature = self.getStd(delta_harmonic_feature, winlen=aggregation_window, stepsize=aggregation_step)
mi@0 674 # aggregated_harmonic_feature = np.hstack((mean_harmonic_feature, std_harmonic_feature))
mi@0 675 # aggregated_harmonic_feature = np.hstack((mean_harmonic_feature, std_harmonic_feature, mean_dharmonic_feature, std_dharmonic_feature))
mi@0 676 aggregated_harmonic_feature = ao.harmonic_features
mi@0 677 aggregated_harmonic_feature = self.getMean(ao.harmonic_features, winlen=aggregation_window, stepsize=aggregation_step)
mi@0 678 pca.fit(aggregated_harmonic_feature)
mi@0 679 aggregated_harmonic_feature = pca.transform(aggregated_harmonic_feature)
mi@0 680 distance_harmonic_matrix = pairwise_distances(aggregated_harmonic_feature, metric = 'cosine')
mi@0 681 distance_harmonic_matrix = np.nan_to_num(distance_harmonic_matrix)
mi@0 682 ao.harmonic_ssm = 1 - (distance_harmonic_matrix - distance_harmonic_matrix.min()) / (distance_harmonic_matrix.max() - distance_harmonic_matrix.min())
mi@0 683
mi@0 684 ao.combined_features = np.hstack((aggregated_gt_feature, aggregated_harmonic_feature, aggregated_timbre_feature, aggregated_tempo_feature))
mi@0 685 pca.fit(ao.combined_features)
mi@0 686 ao.combined_features = pca.transform(ao.combined_features)
mi@0 687 distance_combined_matrix = pairwise_distances(ao.combined_features, metric = 'cosine')
mi@0 688 distance_combined_matrix = np.nan_to_num(distance_combined_matrix)
mi@0 689 ao.combined_ssm = 1 - (distance_combined_matrix - distance_combined_matrix.min()) / (distance_combined_matrix.max() - distance_combined_matrix.min())
mi@0 690
mi@0 691 # Resample timestamps
mi@0 692 # ao.ssm_timestamps = np.array(map(lambda step: step * aggregation_step / featureRate, np.arange(0.0, aggregated_gt_feature.shape[0])))
mi@0 693 ao.ssm_timestamps = np.array(map(lambda x: ao.tempo_timestamps[aggregation_step*x], np.arange(0, ao.gammatone_ssm.shape[0])))
mi@0 694 # print ao.gammatone_ssm.shape, ao.tempo_ssm.shape, ao.timbre_ssm.shape, ao.harmonic_ssm.shape, len(ao.ssm_timestamps)
mi@0 695
mi@0 696 # # Save SSMs.
mi@0 697 # gammatone_ssm = copy(ao.gammatone_ssm)
mi@0 698 # gammatone_ssm[gammatone_ssm<0.8]=0.0
mi@0 699 # plt.figure(figsize=(10, 10))
mi@0 700 # plt.vlines(ao.gt / ao.gt[-1] * gammatone_ssm.shape[0], 0, gammatone_ssm.shape[0], colors='r')
mi@0 701 # plt.imshow(gammatone_ssm, cmap='Greys')
mi@0 702 # plt.savefig(join(options.OUTPUT, 'ssm', ao.name+'-gammatone.pdf'),format='pdf')
mi@0 703 #
mi@0 704 # tempo_ssm = copy(ao.tempo_ssm)
mi@0 705 # tempo_ssm[tempo_ssm<0.8]=0.0
mi@0 706 # plt.figure(figsize=(10, 10))
mi@0 707 # plt.vlines(ao.gt / ao.gt[-1] * tempo_ssm.shape[0], 0, tempo_ssm.shape[0], colors='r')
mi@0 708 # plt.imshow(tempo_ssm, cmap='Greys')
mi@0 709 # plt.savefig(join(options.OUTPUT, 'ssm', ao.name+'-tempo.pdf'),format='pdf')
mi@0 710 #
mi@0 711 # timbre_ssm = copy(ao.timbre_ssm)
mi@0 712 # timbre_ssm[timbre_ssm<0.8]=0.0
mi@0 713 # plt.figure(figsize=(10, 10))
mi@0 714 # plt.vlines(ao.gt / ao.gt[-1] * timbre_ssm.shape[0], 0, timbre_ssm.shape[0], colors='r')
mi@0 715 # plt.imshow(timbre_ssm, cmap='Greys')
mi@0 716 # plt.savefig(join(options.OUTPUT, 'ssm', ao.name+'-timbre.pdf'),format='pdf')
mi@0 717 #
mi@0 718 # harmonic_ssm = copy(ao.harmonic_ssm)
mi@0 719 # harmonic_ssm[harmonic_ssm<0.8]=0.0
mi@0 720 # plt.figure(figsize=(10, 10))
mi@0 721 # plt.vlines(ao.gt / ao.gt[-1] * harmonic_ssm.shape[0], 0, harmonic_ssm.shape[0], colors='r')
mi@0 722 # plt.imshow(harmonic_ssm, cmap='Greys')
mi@0 723 # plt.savefig(join(options.OUTPUT, 'ssm', ao.name+'-harmonic.pdf'),format='pdf')
mi@0 724 #
mi@0 725 # ssm_cleaned = copy(ao.combined_ssm)
mi@0 726 # ssm_cleaned[ssm_cleaned<0.8] = 0
mi@0 727 # plt.figure(figsize=(10, 10))
mi@0 728 # plt.vlines(ao.gt / ao.gt[-1] * ssm_cleaned.shape[0], 0, ssm_cleaned.shape[0], colors='r')
mi@0 729 # plt.imshow(ssm_cleaned, cmap='Greys')
mi@0 730 # plt.savefig(join(options.OUTPUT, 'ssm', ao.name+'-combined.pdf'),format='pdf')
mi@0 731
mi@0 732 audio_list.append(ao)
mi@0 733
mi@0 734 # Evaluate individual segmentation results.
mi@0 735 outfile1 = join(options.OUTPUT, 'individualSOM.csv')
mi@0 736 with open(outfile1, 'a') as f:
mi@0 737 csvwriter = csv.writer(f, delimiter=',')
mi@0 738 csvwriter.writerow(['audio', 'gammatone_tp_05', 'gammatone_fp_05', 'gammatone_fn_05', 'gammatone_P_05', 'gammatone_R_05', 'gammatone_F_05', 'gammatone_AD_05', 'gammatone_DA_05', 'gammatone_tp_3', \
mi@0 739 'gammatone_fp_3', 'gammatone_fn_3', 'gammatone_P_3', 'gammatone_R_3', 'gammatone_F_3', 'gammatone_AD_3', 'gammatone_DA_3', 'harmonic_tp_05', 'harmonic_fp_05', 'harmonic_fn_05', 'harmonic_P_05', \
mi@0 740 'harmonic_R_05', 'harmonic_F_05', 'harmonic_AD_05', 'harmonic_DA_05', 'harmonic_tp_3', 'harmonic_fp_3', 'harmonic_fn_3', 'harmonic_P_3', 'harmonic_R_3', 'harmonic_F_3', 'harmonic_AD_3', 'harmonic_DA_3', \
mi@0 741 'timbre_tp_05', 'timbre_fp_05', 'timbre_fn_05', 'timbre_P_05', 'timbre_R_05', 'timbre_F_05', 'timbre_AD_05', 'timbre_DA_05', 'timbre_tp_3', 'timbre_fp_3', 'timbre_fn_3', 'timbre_P_3', 'timbre_R_3', \
mi@0 742 'timbre_F_3', 'timbre_AD_3', 'timbre_DA_3', 'tempo_tp_05', 'tempo_fp_05', 'tempo_fn_05', 'tempo_P_05', 'tempo_R_05', 'tempo_F_05', 'tempo_AD_05', 'tempo_DA_05', \
mi@0 743 'tempo_tp_3', 'tempo_fp_3', 'tempo_fn_3', 'tempo_P_3', 'tempo_R_3', 'tempo_F_3', 'tempo_AD_3', 'tempo_DA_3'])
mi@0 744
mi@0 745 # outfile4 = join(options.OUTPUT, 'individualResDF.csv')
mi@0 746 # with open(outfile4, 'a') as f:
mi@0 747 # csvwriter = csv.writer(f, delimiter=',')
mi@0 748 # csvwriter.writerow(['audio', 'gammatone_tp_05', 'gammatone_fp_05', 'gammatone_fn_05', 'gammatone_P_05', 'gammatone_R_05', 'gammatone_F_05', 'gammatone_AD_05', 'gammatone_DA_05', 'gammatone_tp_3', \
mi@0 749 # 'gammatone_fp_3', 'gammatone_fn_3', 'gammatone_P_3', 'gammatone_R_3', 'gammatone_F_3', 'gammatone_AD_3', 'gammatone_DA_3', 'harmonic_tp_05', 'harmonic_fp_05', 'harmonic_fn_05', 'harmonic_P_05', \
mi@0 750 # 'harmonic_R_05', 'harmonic_F_05', 'harmonic_AD_05', 'harmonic_DA_05', 'harmonic_tp_3', 'harmonic_fp_3', 'harmonic_fn_3', 'harmonic_P_3', 'harmonic_R_3', 'harmonic_F_3', 'harmonic_AD_3', 'harmonic_DA_3', \
mi@0 751 # 'timbre_tp_05', 'timbre_fp_05', 'timbre_fn_05', 'timbre_P_05', 'timbre_R_05', 'timbre_F_05', 'timbre_AD_05', 'timbre_DA_05', 'timbre_tp_3', 'timbre_fp_3', 'timbre_fn_3', 'timbre_P_3', 'timbre_R_3', \
mi@0 752 # 'timbre_F_3', 'timbre_AD_3', 'timbre_DA_3', 'tempo_tp_05', 'tempo_fp_05', 'tempo_fn_05', 'tempo_P_05', 'tempo_R_05', 'tempo_F_05', 'tempo_AD_05', 'tempo_DA_05', \
mi@0 753 # 'tempo_tp_3', 'tempo_fp_3', 'tempo_fn_3', 'tempo_P_3', 'tempo_R_3', 'tempo_F_3', 'tempo_AD_3', 'tempo_DA_3'])
mi@0 754
mi@0 755 # Fuse novelty curves from individual segmentation results.
mi@0 756 outfile2 = join(options.OUTPUT, 'individualFuseSOM.csv')
mi@0 757 with open(outfile2, 'a') as f:
mi@0 758 csvwriter = csv.writer(f, delimiter=',')
mi@0 759 csvwriter.writerow(['audio', 'gt_tb_P_0.5', 'gt_tb_R_0.5', 'gt_tb_F_0.5', 'gt_tb_P_3', 'gt_tb_R_3', 'gt_tb_F_3', 'gt_tp_P_0.5', 'gt_tp_R_0.5', 'gt_tp_F_0.5', 'gt_tp_P_3', 'gt_tp_R_3', 'gt_tp_F_3',\
mi@0 760 'gt_hm_P_0.5', 'gt_hm_R_0.5', 'gt_hm_F_0.5', 'gt_hm_P_3', 'gt_hm_R_3', 'gt_hm_F_3', 'tb_tp_P_0.5', 'tb_tp_R_0.5', 'tb_tp_F_0.5', 'tb_tp_P_3', 'tb_tp_R_3', 'tb_tp_F_3', 'tb_hm_P_0.5', 'tb_hm_R_0.5', 'tb_hm_F_0.5', \
mi@0 761 'tb_hm_P_3', 'tb_hm_R_3', 'tb_hm_F_3', 'tp_hm_P_0.5', 'tp_hm_R_0.5', 'tp_hm_F_0.5', 'tp_hm_P_3', 'tp_hm_R_3', 'tp_hm_F_3', 'gt_tb_tp_P_0.5', 'gt_tb_tp_R_0.5', 'gt_tb_tp_F_0.5', 'gt_tb_tp_P_3', 'gt_tb_tp_R_3', \
mi@0 762 'gt_tb_tp_F_3', 'gt_tb_hm_P_0.5', 'gt_tb_hm_R_0.5', 'gt_tb_hm_F_0.5', 'gt_tb_hm_P_3', 'gt_tb_hm_R_3', 'gt_tb_hm_F_3', 'gt_tp_hm_P_0.5', 'gt_tp_hm_R_0.5', 'gt_tp_hm_F_0.5', 'gt_tp_hm_P_3', 'gt_tp_hm_R_3', 'gt_tp_hm_F_3', \
mi@0 763 'tb_tp_hm_P_0.5', 'tb_tp_hm_R_0.5', 'tb_tp_hm_F_0.5', 'tb_tp_hm_P_3', 'tb_tp_hm_R_3', 'tb_tp_hm_F_3', 'gt_tb_tp_hm_P_0.5', 'gt_tb_tp_hm_R_0.5', 'gt_tb_tp_hm_F_0.5', 'gt_tb_tp_hm_P_3', 'gt_tb_tp_hm_R_3', 'gt_tb_tp_hm_F_3'])
mi@0 764
mi@0 765
mi@0 766 for i,ao in enumerate(audio_list):
mi@0 767
mi@0 768 print 'processing self organizing maps for %s' %ao.name
mi@0 769
mi@0 770 # 1.Novelty based segmentation.
mi@0 771 # Correlate an Gaussian on the diagonal to contruct the novelty curve
mi@0 772 # print 'ssm', ao.gammatone_ssm.shape, ao.timbre_ssm.shape, ao.tempo_ssm.shape, ao.harmonic_ssm.shape
mi@0 773 ssom.train(ao.gammatone_features)
mi@0 774 gammatone_som = ssom(ao.gammatone_features)
mi@0 775 ssom.train(ao.timbre_features)
mi@0 776 timbre_som = ssom(ao.timbre_features)
mi@0 777 ssom.train(ao.tempo_features)
mi@0 778 tempo_som = ssom(ao.tempo_features)
mi@0 779 ssom.train(ao.harmonic_features)
mi@0 780 harmonic_som = ssom(ao.harmonic_features)
mi@0 781
mi@0 782 gammatone_harmonic_features = np.hstack((ao.gammatone_features, ao.harmonic_features))
mi@0 783 gammatone_timbre_features = np.hstack((ao.gammatone_features, ao.timbre_features))
mi@0 784 gammatone_tempo_features = np.hstack((ao.gammatone_features, ao.tempo_features))
mi@0 785 harmonic_timbre_features = np.hstack((ao.harmonic_features, ao.timbre_features))
mi@0 786 harmonic_tempo_features = np.hstack((ao.harmonic_features, ao.tempo_features))
mi@0 787 timbre_tempo_features = np.hstack((ao.timbre_features, ao.tempo_features))
mi@0 788
mi@0 789 gammatone_harmonic_timbre_features = np.hstack((ao.gammatone_features, ao.harmonic_features, ao.timbre_features))
mi@0 790 gammatone_harmonic_tempo_features = np.hstack((ao.gammatone_features, ao.harmonic_features, ao.tempo_features))
mi@0 791 gammatone_timbre_tempo_features = np.hstack((ao.gammatone_features, ao.timbre_features, ao.tempo_features))
mi@0 792 harmonic_timbre_tempo_features = np.hstack((ao.harmonic_features, ao.timbre_features, ao.tempo_features))
mi@0 793
mi@0 794 gammatone_harmonic_timbre_tempo_features = np.hstack((ao.gammatone_features, ao.harmonic_features, ao.timbre_features, ao.tempo_features))
mi@0 795
mi@0 796 ssom.train(gammatone_harmonic_features)
mi@0 797 gammatone_harmonic_som = ssom(gammatone_harmonic_features)
mi@0 798 ssom.train(gammatone_timbre_features)
mi@0 799 gammatone_timbre_som = ssom(gammatone_timbre_features)
mi@0 800 ssom.train(gammatone_tempo_features)
mi@0 801 gammatone_tempo_som = ssom(gammatone_tempo_features)
mi@0 802 ssom.train(harmonic_timbre_features)
mi@0 803 harmonic_timbre_som = ssom(harmonic_timbre_features)
mi@0 804 ssom.train(harmonic_timbre_features)
mi@0 805 harmonic_timbre_som = ssom(harmonic_timbre_features)
mi@0 806 ssom.train(harmonic_tempo_features)
mi@0 807 harmonic_tempo_som = ssom(harmonic_tempo_features)
mi@0 808 ssom.train(timbre_tempo_features)
mi@0 809 timbre_tempo_som = ssom(timbre_tempo_features)
mi@0 810
mi@0 811 ssom.train(gammatone_harmonic_timbre_features)
mi@0 812 gammatone_harmonic_timbre_som = ssom(gammatone_harmonic_timbre_features)
mi@0 813 ssom.train(gammatone_harmonic_tempo_features)
mi@0 814 gammatone_harmonic_tempo_som = ssom(gammatone_harmonic_tempo_features)
mi@0 815 ssom.train(gammatone_timbre_tempo_features)
mi@0 816 gammatone_timbre_tempo_som = ssom(gammatone_timbre_tempo_features)
mi@0 817 ssom.train(harmonic_timbre_tempo_features)
mi@0 818 harmonic_timbre_tempo_som = ssom(harmonic_timbre_tempo_features)
mi@0 819
mi@0 820 ssom.train(gammatone_harmonic_timbre_tempo_features)
mi@0 821 gammatone_harmonic_timbre_tempo_som = ssom(gammatone_harmonic_timbre_tempo_features)
mi@0 822
mi@0 823 gammatone_ssm = self.getSSM(gammatone_som)
mi@0 824 harmonic_ssm = self.getSSM(harmonic_som)
mi@0 825 timbre_ssm = self.getSSM(timbre_som)
mi@0 826 tempo_ssm = self.getSSM(tempo_som)
mi@0 827 gammatone_harmonic_ssm = self.getSSM(gammatone_harmonic_som)
mi@0 828 gammatone_timbre_ssm = self.getSSM(gammatone_timbre_som)
mi@0 829 gammatone_tempo_ssm = self.getSSM(gammatone_tempo_som)
mi@0 830 harmonic_timbre_ssm = self.getSSM(harmonic_timbre_som)
mi@0 831 harmonic_tempo_ssm = self.getSSM(harmonic_tempo_som)
mi@0 832 timbre_tempo_ssm = self.getSSM(timbre_tempo_som)
mi@0 833 gammatone_harmonic_timbre_ssm = self.getSSM(gammatone_harmonic_timbre_som)
mi@0 834 gammatone_harmonic_tempo_ssm = self.getSSM(gammatone_harmonic_tempo_som)
mi@0 835 gammatone_timbre_tempo_ssm = self.getSSM(gammatone_timbre_tempo_som)
mi@0 836 harmonic_timbre_tempo_ssm = self.getSSM(harmonic_timbre_tempo_som)
mi@0 837 gammatone_harmonic_timbre_tempo_ssm = self.getSSM(gammatone_harmonic_timbre_tempo_som)
mi@0 838
mi@0 839
mi@0 840 # Noise removal in ssm
mi@0 841 reduced_gammatone_ssm = self.reduceSSM(gammatone_ssm)
mi@0 842 reduced_timbre_ssm = self.reduceSSM(timbre_ssm)
mi@0 843 reduced_tempo_ssm = self.reduceSSM(ao.tempo_ssm)
mi@0 844 reduced_harmonic_ssm = self.reduceSSM(ao.harmonic_ssm)
mi@0 845 reduced_gammatone_harmonic_ssm = self.reduceSSM(gammatone_harmonic_ssm)
mi@0 846 reduced_gammatone_timbre_ssm = self.reduceSSM(gammatone_timbre_ssm)
mi@0 847 reduced_gammatone_tempo_ssm = self.reduceSSM(gammatone_tempo_ssm)
mi@0 848 reduced_harmonic_timbre_ssm = self.reduceSSM(harmonic_timbre_ssm)
mi@0 849 reduced_harmonic_tempo_ssm = self.reduceSSM(harmonic_tempo_ssm)
mi@0 850 reduced_timbre_tempo_ssm = self.reduceSSM(timbre_tempo_ssm)
mi@0 851 reduced_gammatone_harmonic_timbre_ssm = self.reduceSSM(gammatone_harmonic_timbre_ssm)
mi@0 852 reduced_gammatone_harmonic_tempo_ssm = self.reduceSSM(gammatone_harmonic_tempo_ssm)
mi@0 853 reduced_gammatone_timbre_tempo_ssm = self.reduceSSM(gammatone_timbre_tempo_ssm)
mi@0 854 reduced_harmonic_timbre_tempo_ssm = self.reduceSSM(harmonic_timbre_tempo_ssm)
mi@0 855 reduced_gammatone_harmonic_timbre_tempo_ssm = self.reduceSSM(gammatone_harmonic_timbre_tempo_ssm)
mi@0 856
mi@0 857
mi@0 858 gammatone_novelty = self.getNoveltyCurve(reduced_gammatone_ssm, self.kernel_size)
mi@0 859 gammatone_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in gammatone_novelty]
mi@0 860 timbre_novelty = self.getNoveltyCurve(reduced_timbre_ssm, self.kernel_size)
mi@0 861 timbre_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in timbre_novelty]
mi@0 862 tempo_novelty = self.getNoveltyCurve(reduced_tempo_ssm, self.kernel_size)
mi@0 863 tempo_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in tempo_novelty]
mi@0 864 harmonic_novelty = self.getNoveltyCurve(reduced_harmonic_ssm, self.kernel_size)
mi@0 865 harmonic_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in harmonic_novelty]
mi@0 866
mi@0 867 # Peak picking from the novelty curve
mi@0 868 smoothed_gammatone_novelty, gammatone_novelty_peaks = peak_picker.process(gammatone_novelty)
mi@0 869 gammatone_detection = [ao.ssm_timestamps[int(i)] for i in gammatone_novelty_peaks] + [ao.gt[-1]]
mi@0 870 smoothed_timbre_novelty, timbre_novelty_peaks = peak_picker.process(timbre_novelty)
mi@0 871 timbre_detection = [ao.ssm_timestamps[int(i)] for i in timbre_novelty_peaks] + [ao.gt[-1]]
mi@0 872 smoothed_harmonic_novelty, harmonic_novelty_peaks = peak_picker.process(harmonic_novelty)
mi@0 873 harmonic_detection = [ao.ssm_timestamps[int(i)] for i in harmonic_novelty_peaks] + [ao.gt[-1]]
mi@0 874 smoothed_tempo_novelty, tempo_novelty_peaks = peak_picker.process(tempo_novelty)
mi@0 875 tempo_detection = [ao.ssm_timestamps[int(i)] for i in tempo_novelty_peaks] + [ao.gt[-1]]
mi@0 876
mi@0 877 gt_res_05 = self.pairwiseF(ao.gt, gammatone_detection, tolerance=0.5, combine=1.0)
mi@0 878 gt_res_3 = self.pairwiseF(ao.gt, gammatone_detection, tolerance=3, combine=1.0)
mi@0 879 harmonic_res_05 = self.pairwiseF(ao.gt, harmonic_detection, tolerance=0.5, combine=1.0)
mi@0 880 harmonic_res_3 = self.pairwiseF(ao.gt, harmonic_detection, tolerance=3, combine=1.0)
mi@0 881 tempo_res_05 = self.pairwiseF(ao.gt, tempo_detection, tolerance=0.5, combine=1.0)
mi@0 882 tempo_res_3 = self.pairwiseF(ao.gt, tempo_detection, tolerance=3, combine=1.0)
mi@0 883 timbre_res_05 = self.pairwiseF(ao.gt, timbre_detection, tolerance=0.5, combine=1.0)
mi@0 884 timbre_res_3 = self.pairwiseF(ao.gt, timbre_detection, tolerance=3, combine=1.0)
mi@0 885
mi@0 886 with open(outfile1, 'a') as f:
mi@0 887 csvwriter = csv.writer(f, delimiter=',')
mi@0 888 csvwriter.writerow([ao.name, gt_res_05.TP, gt_res_05.FP, gt_res_05.FN, gt_res_05.P, gt_res_05.R, gt_res_05.F, gt_res_05.AD, gt_res_05.DA, gt_res_3.TP, gt_res_3.FP, gt_res_3.FN, gt_res_3.P, \
mi@0 889 gt_res_3.R, gt_res_3.F, gt_res_3.AD, gt_res_3.DA, harmonic_res_05.TP, harmonic_res_05.FP, harmonic_res_05.FN, harmonic_res_05.P, harmonic_res_05.R, harmonic_res_05.F, harmonic_res_05.AD, harmonic_res_05.DA, \
mi@0 890 harmonic_res_3.TP, harmonic_res_3.FP, harmonic_res_3.FN, harmonic_res_3.P, harmonic_res_3.R, harmonic_res_3.F, harmonic_res_3.AD, harmonic_res_3.DA, timbre_res_05.TP, timbre_res_05.FP, \
mi@0 891 timbre_res_05.FN, timbre_res_05.P, timbre_res_05.R, timbre_res_05.F, timbre_res_05.AD, timbre_res_05.DA, timbre_res_3.TP, timbre_res_3.FP, timbre_res_3.FN, timbre_res_3.P, timbre_res_3.R, timbre_res_3.F, \
mi@0 892 timbre_res_3.AD, timbre_res_3.DA, tempo_res_05.TP, tempo_res_05.FP, tempo_res_05.FN, tempo_res_05.P, tempo_res_05.R, tempo_res_05.F, tempo_res_05.AD, tempo_res_05.DA, tempo_res_3.TP, tempo_res_3.FP, \
mi@0 893 tempo_res_3.FN, tempo_res_3.P, tempo_res_3.R, tempo_res_3.F, tempo_res_3.AD, tempo_res_3.DA])
mi@0 894
mi@0 895 gt_hm_novelty = self.getNoveltyCurve(reduced_gammatone_harmonic_ssm, self.kernel_size)
mi@0 896 gt_hm_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in gt_hm_novelty]
mi@0 897 gt_tb_novelty = self.getNoveltyCurve(reduced_gammatone_timbre_ssm, self.kernel_size)
mi@0 898 gt_tb_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in gt_tb_novelty]
mi@0 899 gt_tp_novelty = self.getNoveltyCurve(reduced_gammatone_tempo_ssm, self.kernel_size)
mi@0 900 gt_tp_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in gt_tp_novelty]
mi@0 901 hm_tb_novelty = self.getNoveltyCurve(reduced_harmonic_timbre_ssm, self.kernel_size)
mi@0 902 hm_tb_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in hm_tb_novelty]
mi@0 903 hm_tp_novelty = self.getNoveltyCurve(reduced_harmonic_tempo_ssm, self.kernel_size)
mi@0 904 hm_tp_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in hm_tp_novelty]
mi@0 905 tb_tp_novelty = self.getNoveltyCurve(reduced_timbre_tempo_ssm, self.kernel_size)
mi@0 906 tb_tp_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in tb_tp_novelty]
mi@0 907
mi@0 908 smoothed_gt_tb_novelty, gt_tb_novelty_peaks = peak_picker.process(gt_tb_novelty)
mi@0 909 gt_tb_detection = [ao.ssm_timestamps[int(i)] for i in gt_tb_novelty_peaks] + [ao.gt[-1]]
mi@0 910 smoothed_gt_tp_novelty, gt_tp_novelty_peaks = peak_picker.process(gt_tp_novelty)
mi@0 911 gt_tp_detection = [ao.ssm_timestamps[int(i)] for i in gt_tp_novelty_peaks] + [ao.gt[-1]]
mi@0 912 smoothed_gt_hm_novelty, gt_hm_novelty_peaks = peak_picker.process(gt_hm_novelty)
mi@0 913 gt_hm_detection = [ao.ssm_timestamps[int(i)] for i in gt_hm_novelty_peaks] + [ao.gt[-1]]
mi@0 914 smoothed_tb_tp_novelty, tb_tp_novelty_peaks = peak_picker.process(tb_tp_novelty)
mi@0 915 tb_tp_detection = [ao.ssm_timestamps[int(i)] for i in tb_tp_novelty_peaks] + [ao.gt[-1]]
mi@0 916 smoothed_tb_hm_novelty, tb_hm_novelty_peaks = peak_picker.process(tb_hm_novelty)
mi@0 917 tb_hm_detection = [ao.ssm_timestamps[int(i)] for i in tb_hm_novelty_peaks] + [ao.gt[-1]]
mi@0 918 smoothed_tp_hm_novelty, tp_hm_novelty_peaks = peak_picker.process(tp_hm_novelty)
mi@0 919 tp_hm_detection = [ao.ssm_timestamps[int(i)] for i in tp_hm_novelty_peaks] + [ao.gt[-1]]
mi@0 920
mi@0 921 gt_tb_tp_novelty = self.getNoveltyCurve(reduced_gammatone_timbre_tempo_ssm, self.kernel_size)
mi@0 922 gt_tb_tp_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in gt_tb_tp_novelty]
mi@0 923 gt_tb_hm_novelty = self.getNoveltyCurve(reduced_gammatone_harmonic_timbre_ssm, self.kernel_size)
mi@0 924 gt_tb_hm_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in gt_tb_hm_novelty]
mi@0 925 gt_tp_hm_novelty = self.getNoveltyCurve(reduced_gammatone_harmonic_tempo_ssm, self.kernel_size)
mi@0 926 gt_tp_hm_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in gt_tp_hm_novelty]
mi@0 927 tb_tp_hm_novelty = self.getNoveltyCurve(reduced_harmonic_timbre_tempo_ssm, self.kernel_size)
mi@0 928 tb_tp_hm_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in tb_tp_hm_novelty]
mi@0 929 gt_tb_tp_hm_novelty = self.getNoveltyCurve(reduced_gammatone_harmonic_timbre_tempo_ssm, self.kernel_size)
mi@0 930 gt_tb_tp_hm_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in gt_tb_tp_hm_novelty]
mi@0 931
mi@0 932 smoothed_gt_tb_tp_novelty, gt_tb_tp_novelty_peaks = peak_picker.process(gt_tb_tp_novelty)
mi@0 933 gt_tb_tp_detection = [ao.ssm_timestamps[int(i)] for i in gt_tb_tp_novelty_peaks] + [ao.gt[-1]]
mi@0 934 smoothed_gt_tb_hm_novelty, gt_tb_hm_novelty_peaks = peak_picker.process(gt_tb_hm_novelty)
mi@0 935 gt_tb_hm_detection = [ao.ssm_timestamps[int(i)] for i in gt_tb_hm_novelty_peaks] + [ao.gt[-1]]
mi@0 936 smoothed_gt_tp_hm_novelty, gt_tp_hm_novelty_peaks = peak_picker.process(gt_tp_hm_novelty)
mi@0 937 gt_tp_hm_detection = [ao.ssm_timestamps[int(i)] for i in gt_tp_hm_novelty_peaks] + [ao.gt[-1]]
mi@0 938 smoothed_tb_tp_hm_novelty, tb_tp_hm_novelty_peaks = peak_picker.process(tb_tp_hm_novelty)
mi@0 939 tb_tp_hm_detection = [ao.ssm_timestamps[int(i)] for i in tb_tp_hm_novelty_peaks] + [ao.gt[-1]]
mi@0 940 smoothed_gt_tb_tp_hm_novelty, gt_tb_tp_hm_novelty_peaks = peak_picker.process(gt_tb_tp_hm_novelty)
mi@0 941 gt_tb_tp_hm_detection = [ao.ssm_timestamps[int(i)] for i in gt_tb_tp_hm_novelty_peaks] + [ao.gt[-1]]
mi@0 942
mi@0 943 # novelty_peaks = gt_tb_tp_hm_novelty_peaks
mi@0 944 # novelty_detection = [ao.ssm_timestamps[int(i)] for i in novelty_peaks] + [ao.gt[-1]]
mi@0 945
mi@0 946 if options.PLOT:
mi@0 947 self.plotDetection(ao.ssm, novelty, smoothed_novelty, ao.gt, detection, filename=join(options.OUTPUT+ ao.name)+'.pdf')
mi@0 948
mi@0 949 gt_tb_res_05 = self.pairwiseF(ao.gt, gt_tb_detection, tolerance=0.5, combine=1.0)
mi@0 950 gt_tb_res_3 = self.pairwiseF(ao.gt, gt_tb_detection, tolerance=3, combine=1.0)
mi@0 951 gt_tp_res_05 = self.pairwiseF(ao.gt, gt_tp_detection, tolerance=0.5, combine=1.0)
mi@0 952 gt_tp_res_3 = self.pairwiseF(ao.gt, gt_tp_detection, tolerance=3, combine=1.0)
mi@0 953 gt_hm_res_05 = self.pairwiseF(ao.gt, gt_hm_detection, tolerance=0.5, combine=1.0)
mi@0 954 gt_hm_res_3 = self.pairwiseF(ao.gt, gt_hm_detection, tolerance=3, combine=1.0)
mi@0 955 tb_tp_res_05 = self.pairwiseF(ao.gt, tb_tp_detection, tolerance=0.5, combine=1.0)
mi@0 956 tb_tp_res_3 = self.pairwiseF(ao.gt, tb_tp_detection, tolerance=3, combine=1.0)
mi@0 957 tb_hm_res_05 = self.pairwiseF(ao.gt, tb_hm_detection, tolerance=0.5, combine=1.0)
mi@0 958 tb_hm_res_3 = self.pairwiseF(ao.gt, tb_hm_detection, tolerance=3, combine=1.0)
mi@0 959 tp_hm_res_05 = self.pairwiseF(ao.gt, tp_hm_detection, tolerance=0.5, combine=1.0)
mi@0 960 tp_hm_res_3 = self.pairwiseF(ao.gt, tp_hm_detection, tolerance=3, combine=1.0)
mi@0 961
mi@0 962 gt_tb_tp_res_05 = self.pairwiseF(ao.gt, gt_tb_tp_detection, tolerance=0.5, combine=1.0)
mi@0 963 gt_tb_tp_res_3 = self.pairwiseF(ao.gt, gt_tb_tp_detection, tolerance=3, combine=1.0)
mi@0 964 gt_tb_hm_res_05 = self.pairwiseF(ao.gt, gt_tb_hm_detection, tolerance=0.5, combine=1.0)
mi@0 965 gt_tb_hm_res_3 = self.pairwiseF(ao.gt, gt_tb_hm_detection, tolerance=3, combine=1.0)
mi@0 966 gt_tp_hm_res_05 = self.pairwiseF(ao.gt, gt_tp_hm_detection, tolerance=0.5, combine=1.0)
mi@0 967 gt_tp_hm_res_3 = self.pairwiseF(ao.gt, gt_tp_hm_detection, tolerance=3, combine=1.0)
mi@0 968 tb_tp_hm_res_05 = self.pairwiseF(ao.gt, tb_tp_hm_detection, tolerance=0.5, combine=1.0)
mi@0 969 tb_tp_hm_res_3 = self.pairwiseF(ao.gt, tb_tp_hm_detection, tolerance=3, combine=1.0)
mi@0 970
mi@0 971 gt_tb_tp_hm_res_05 = self.pairwiseF(ao.gt, gt_tb_tp_hm_detection, tolerance=0.5, combine=1.0)
mi@0 972 gt_tb_tp_hm_res_3 = self.pairwiseF(ao.gt, gt_tb_tp_hm_detection, tolerance=3, combine=1.0)
mi@0 973
mi@0 974
mi@0 975 # Output detected segment locations.
mi@0 976 if options.VERBOSE:
mi@0 977 outdir = join(options.OUTPUT, 'detection', ao.name)
mi@0 978 if not isdir(outdir):
mi@0 979 os.mkdir(outdir)
mi@0 980 np.savetxt(join(outdir, 'gammatone.csv'), gammatone_detection)
mi@0 981 np.savetxt(join(outdir, 'timbre.csv'), timbre_detection)
mi@0 982 np.savetxt(join(outdir, 'tempo.csv'), tempo_detection)
mi@0 983 np.savetxt(join(outdir, 'harmonic.csv'), harmonic_detection)
mi@0 984
mi@0 985 np.savetxt(join(outdir, 'gammatone_timbre_novelty.csv'), gt_tb_detection)
mi@0 986 np.savetxt(join(outdir, 'gammatone_tempo_novelty.csv'), gt_tp_detection)
mi@0 987 np.savetxt(join(outdir, 'gammatone_harmonic_novelty.csv'), gt_hm_detection)
mi@0 988 np.savetxt(join(outdir, 'timbre_tempo_novelty.csv'), tb_tp_detection)
mi@0 989 np.savetxt(join(outdir, 'timbre_harmonic_novelty.csv'), tb_hm_detection)
mi@0 990 np.savetxt(join(outdir, 'tempo_harmonic_novelty.csv'), tp_hm_detection)
mi@0 991
mi@0 992 np.savetxt(join(outdir, 'gammatone_timbre_tempo_novelty.csv'), gt_tb_tp_detection)
mi@0 993 np.savetxt(join(outdir, 'gammatone_timbre_harmonic_novelty.csv'), gt_tb_hm_detection)
mi@0 994 np.savetxt(join(outdir, 'gammatone_tempo_harmonic_novelty.csv'), gt_tp_hm_detection)
mi@0 995 np.savetxt(join(outdir, 'timbre_tempo_harmonic_novelty.csv'), tb_tp_hm_detection)
mi@0 996 np.savetxt(join(outdir, 'gammatone_timbre_tempo_harmonic_novelty.csv'), gt_tb_tp_hm_detection)
mi@0 997
mi@0 998 # with open(outfile4, 'a') as f:
mi@0 999 # csvwriter = csv.writer(f, delimiter=',')
mi@0 1000 # csvwriter.writerow([ao.name, gt_df_05.TP, gt_df_05.FP, gt_df_05.FN, gt_df_05.P, gt_df_05.R, gt_df_05.F, gt_df_05.AD, gt_df_05.DA, gt_df_3.TP, gt_df_3.FP, gt_df_3.FN, gt_df_3.P, \
mi@0 1001 # gt_df_3.R, gt_df_3.F, gt_df_3.AD, gt_df_3.DA, harmonic_df_05.TP, harmonic_df_05.FP, harmonic_df_05.FN, harmonic_df_05.P, harmonic_df_05.R, harmonic_df_05.F, harmonic_df_05.AD, harmonic_df_05.DA, \
mi@0 1002 # harmonic_df_3.TP, harmonic_df_3.FP, harmonic_df_3.FN, harmonic_df_3.P, harmonic_df_3.R, harmonic_df_3.F, harmonic_df_3.AD, harmonic_df_3.DA, timbre_df_05.TP, timbre_df_05.FP, \
mi@0 1003 # timbre_df_05.FN, timbre_df_05.P, timbre_df_05.R, timbre_df_05.F, timbre_df_05.AD, timbre_df_05.DA, timbre_df_3.TP, timbre_df_3.FP, timbre_df_3.FN, timbre_df_3.P, timbre_df_3.R, timbre_df_3.F, \
mi@0 1004 # timbre_df_3.AD, timbre_df_3.DA, tempo_df_05.TP, tempo_df_05.FP, tempo_df_05.FN, tempo_df_05.P, tempo_df_05.R, tempo_df_05.F, tempo_df_05.AD, tempo_df_05.DA, tempo_df_3.TP, tempo_df_3.FP, \
mi@0 1005 # tempo_df_3.FN, tempo_df_3.P, tempo_df_3.R, tempo_df_3.F, tempo_df_3.AD, tempo_df_3.DA])
mi@0 1006
mi@0 1007 with open(outfile2, 'a') as f:
mi@0 1008 csvwriter = csv.writer(f, delimiter=',')
mi@0 1009 csvwriter.writerow([ao.name, gt_tb_res_05.P, gt_tb_res_05.R, gt_tb_res_05.F, gt_tb_res_3.P, gt_tb_res_3.R, gt_tb_res_3.F, gt_tp_res_05.P, gt_tp_res_05.R, gt_tp_res_05.F, gt_tp_res_3.P, gt_tp_res_3.R, gt_tp_res_3.F, \
mi@0 1010 gt_hm_res_05.P, gt_hm_res_05.R, gt_hm_res_05.F, gt_hm_res_3.P, gt_hm_res_3.R, gt_hm_res_3.F, tb_tp_res_05.P, tb_tp_res_05.R, tb_tp_res_05.F, tb_tp_res_3.P, tb_tp_res_3.R, tb_tp_res_3.F, \
mi@0 1011 tb_hm_res_05.P, tb_hm_res_05.R, tb_hm_res_05.F, tb_hm_res_3.P, tb_hm_res_3.R, tb_hm_res_3.F, tp_hm_res_05.P, tp_hm_res_05.R, tp_hm_res_05.F, tp_hm_res_3.P, tp_hm_res_3.R, tp_hm_res_3.F, \
mi@0 1012 gt_tb_tp_res_05.P, gt_tb_tp_res_05.R, gt_tb_tp_res_05.F, gt_tb_tp_res_3.P, gt_tb_tp_res_3.R, gt_tb_tp_res_3.F, gt_tb_hm_res_05.P, gt_tb_hm_res_05.R, gt_tb_hm_res_05.F, gt_tb_hm_res_3.P, gt_tb_hm_res_3.R, gt_tb_hm_res_3.F, \
mi@0 1013 gt_tp_hm_res_05.P, gt_tp_hm_res_05.R, gt_tp_hm_res_05.F, gt_tp_hm_res_3.P, gt_tp_hm_res_3.R, gt_tp_hm_res_3.F, tb_tp_hm_res_05.P, tb_tp_hm_res_05.R, tb_tp_hm_res_05.F, tb_tp_hm_res_3.P, tb_tp_hm_res_3.R, tb_tp_hm_res_3.F, \
mi@0 1014 gt_tb_tp_hm_res_05.P, gt_tb_tp_hm_res_05.R, gt_tb_tp_hm_res_05.F, gt_tb_tp_hm_res_3.P, gt_tb_tp_hm_res_3.R, gt_tb_tp_hm_res_3.F])
mi@0 1015
mi@0 1016
mi@0 1017 # Verification of detected boundaries by novelty fusion from the first round
mi@0 1018 # ao_featureset = [ao.gammatone_features, ao.harmonic_features, ao.timbre_features, ao.tempo_features]
mi@0 1019 # winlen = 1.5 * self.SampleRate / self.stepSize
mi@0 1020 # prev_features, post_features = self.getPeakFeatures(gt_tb_tp_hm_novelty_peaks, ao_featureset, winlen=10)
mi@0 1021 # dev_list = self.segmentDev(prev_features, post_features)
mi@0 1022 # gt_tb_tp_hm_novelty_peaks = gt_tb_tp_hm_novelty_peaks[:len(dev_list)]
mi@0 1023 # # print 'len(dev_list)', len(dev_list), len(gt_tb_tp_hm_novelty_peaks)
mi@0 1024 # # print gt_tb_tp_hm_novelty_peaks, dev_list
mi@0 1025 # dev_mean = [np.mean(x) for x in dev_list]
mi@0 1026 # np.savetxt(join(options.OUTPUT, 'dev', ao.name+'.csv'), np.vstack((gt_tb_tp_hm_detection[:len(dev_list)], dev_mean)).T, delimiter=',')
mi@0 1027 # peak_verified = self.verifyPeaks(gt_tb_tp_hm_novelty_peaks, dev_list)
mi@0 1028 #
mi@0 1029 # verified_detection = [ao.ssm_timestamps[int(i)] for i in peak_verified] + [ao.gt[-1]]
mi@0 1030 # verified_detection_05 = self.pairwiseF(ao.gt, verified_detection, tolerance=0.5, combine=1.0)
mi@0 1031 # verified_detection_3 = self.pairwiseF(ao.gt, verified_detection, tolerance=3, combine=1.0)
mi@0 1032 #
mi@0 1033 # print gt_tb_tp_hm_res_05.TP, gt_tb_tp_hm_res_05.FP, gt_tb_tp_hm_res_05.FN, gt_tb_tp_hm_res_05.P, gt_tb_tp_hm_res_05.R, gt_tb_tp_hm_res_05.F
mi@0 1034 # print gt_tb_tp_hm_res_3.TP, gt_tb_tp_hm_res_3.FP, gt_tb_tp_hm_res_3.FN, gt_tb_tp_hm_res_3.P, gt_tb_tp_hm_res_3.R, gt_tb_tp_hm_res_3.F
mi@0 1035 #
mi@0 1036 # print verified_detection_05.TP, verified_detection_05.FP, verified_detection_05.FN, verified_detection_05.P, verified_detection_05.R, verified_detection_05.F
mi@0 1037 # print verified_detection_3.TP, verified_detection_3.FP, verified_detection_3.FN, verified_detection_3.P, verified_detection_3.R, verified_detection_3.F
mi@0 1038
mi@0 1039 # if len(novelty_peaks):
mi@0 1040 # ao.gammatone_gmm = self.getGMMs(ao.gammatone_features, novelty_peaks)
mi@0 1041 # ao.harmonic_gmm = self.getGMMs(ao.harmonic_features, novelty_peaks)
mi@0 1042 # ao.tempo_gmm = self.getGMMs(ao.tempo_features, novelty_peaks)
mi@0 1043 # ao.timbre_gmm = self.getGMMs(ao.timbre_features, novelty_peaks)
mi@0 1044 #
mi@0 1045 # rc = rClustering(eps=1., k=8, rank='max_neighbors')
mi@0 1046 # rc.fit(ao.gammatone_gmm)
mi@0 1047 # gammatone_clf = rc.classification
mi@0 1048 # gammatone_neighborhood_size, gammatone_average_div, gammatone_node_rank = rc.getNodeRank()
mi@0 1049 # np.savetxt(join(options.OUTPUT, 'classification', ao.name+'-gammatone.csv'), np.vstack((novelty_detection[:-1], gammatone_clf)).T, delimiter=',')
mi@0 1050 # np.savetxt(join(options.OUTPUT, 'neighborhood_size', ao.name+'-gammatone.csv'), np.vstack((novelty_detection[:-1], gammatone_neighborhood_size)).T, delimiter=',')
mi@0 1051 # np.savetxt(join(options.OUTPUT, 'node_rank', ao.name+'-gammatone.csv'), np.vstack((novelty_detection[:-1], gammatone_average_div)).T, delimiter=',')
mi@0 1052 # np.savetxt(join(options.OUTPUT, 'average_div', ao.name+'-gammatone.csv'), np.vstack((novelty_detection[:-1], gammatone_node_rank)).T, delimiter=',')
mi@0 1053 #
mi@0 1054 # rc = rClustering(eps=1., k=8, rank='max_neighbors')
mi@0 1055 # rc.fit(ao.harmonic_gmm)
mi@0 1056 # harmonic_clf = rc.classification
mi@0 1057 # harmonic_neighborhood_size, harmonic_average_div, harmonic_node_rank = rc.getNodeRank()
mi@0 1058 # np.savetxt(join(options.OUTPUT, 'classification', ao.name+'-harmonic.csv'), np.vstack((novelty_detection[:-1], harmonic_clf)).T, delimiter=',')
mi@0 1059 # np.savetxt(join(options.OUTPUT, 'neighborhood_size', ao.name+'-harmonic.csv'), np.vstack((novelty_detection[:-1], harmonic_neighborhood_size)).T, delimiter=',')
mi@0 1060 # np.savetxt(join(options.OUTPUT, 'node_rank', ao.name+'-harmonic.csv'), np.vstack((novelty_detection[:-1], harmonic_average_div)).T, delimiter=',')
mi@0 1061 # np.savetxt(join(options.OUTPUT, 'average_div', ao.name+'-harmonic.csv'), np.vstack((novelty_detection[:-1], harmonic_node_rank)).T, delimiter=',')
mi@0 1062 #
mi@0 1063 # rc = rClustering(eps=1., k=8, rank='max_neighbors')
mi@0 1064 # rc.fit(ao.tempo_gmm)
mi@0 1065 # tempo_clf = rc.classification
mi@0 1066 # tempo_neighborhood_size, tempo_average_div, tempo_node_rank = rc.getNodeRank()
mi@0 1067 # np.savetxt(join(options.OUTPUT, 'classification', ao.name+'-tempo.csv'), np.vstack((novelty_detection[:-1], tempo_clf)).T, delimiter=',')
mi@0 1068 # np.savetxt(join(options.OUTPUT, 'neighborhood_size', ao.name+'-tempo.csv'), np.vstack((novelty_detection[:-1], tempo_neighborhood_size)).T, delimiter=',')
mi@0 1069 # np.savetxt(join(options.OUTPUT, 'node_rank', ao.name+'-tempo.csv'), np.vstack((novelty_detection[:-1], tempo_average_div)).T, delimiter=',')
mi@0 1070 # np.savetxt(join(options.OUTPUT, 'average_div', ao.name+'-tempo.csv'), np.vstack((novelty_detection[:-1], tempo_node_rank)).T, delimiter=',')
mi@0 1071 #
mi@0 1072 # rc = rClustering(eps=1., k=8, rank='max_neighbors')
mi@0 1073 # rc.fit(ao.timbre_gmm)
mi@0 1074 # timbre_clf = rc.classification
mi@0 1075 # timbre_neighborhood_size, timbre_average_div, timbre_node_rank = rc.getNodeRank()
mi@0 1076 # np.savetxt(join(options.OUTPUT, 'classification', ao.name+'-timbre.csv'), np.vstack((novelty_detection[:-1], timbre_clf)).T, delimiter=',')
mi@0 1077 # np.savetxt(join(options.OUTPUT, 'neighborhood_size', ao.name+'-timbre.csv'), np.vstack((novelty_detection[:-1], timbre_neighborhood_size)).T, delimiter=',')
mi@0 1078 # np.savetxt(join(options.OUTPUT, 'node_rank', ao.name+'-timbre.csv'), np.vstack((novelty_detection[:-1], timbre_average_div)).T, delimiter=',')
mi@0 1079 # np.savetxt(join(options.OUTPUT, 'average_div', ao.name+'-timbre.csv'), np.vstack((novelty_detection[:-1], timbre_node_rank)).T, delimiter=',')
mi@0 1080
mi@0 1081
mi@0 1082 # # Evaluate segmentation results using combined SSMs.
mi@0 1083 # outfile3 = join(options.OUTPUT, 'combinedSSMRes.csv')
mi@0 1084 # with open(outfile3, 'a') as f:
mi@0 1085 # csvwriter = csv.writer(f, delimiter=',')
mi@0 1086 # csvwriter.writerow(['audio', 'gt_tb_P_0.5', 'gt_tb_R_0.5', 'gt_tb_F_0.5', 'gt_tb_P_3', 'gt_tb_R_3', 'gt_tb_F_3', 'gt_tp_P_0.5', 'gt_tp_R_0.5', 'gt_tp_F_0.5', 'gt_tp_P_3', 'gt_tp_R_3', 'gt_tp_F_3',\
mi@0 1087 # 'gt_hm_P_0.5', 'gt_hm_R_0.5', 'gt_hm_F_0.5', 'gt_hm_P_3', 'gt_hm_R_3', 'gt_hm_F_3', 'tb_tp_P_0.5', 'tb_tp_R_0.5', 'tb_tp_F_0.5', 'tb_tp_P_3', 'tb_tp_R_3', 'tb_tp_F_3', 'tb_hm_P_0.5', 'tb_hm_R_0.5', 'tb_hm_F_0.5', \
mi@0 1088 # 'tb_hm_P_3', 'tb_hm_R_3', 'tb_hm_F_3', 'tp_hm_P_0.5', 'tp_hm_R_0.5', 'tp_hm_F_0.5', 'tp_hm_P_3', 'tp_hm_R_3', 'tp_hm_F_3', 'gt_tb_tp_P_0.5', 'gt_tb_tp_R_0.5', 'gt_tb_tp_F_0.5', 'gt_tb_tp_P_3', 'gt_tb_tp_R_3', \
mi@0 1089 # 'gt_tb_tp_F_3', 'gt_tb_hm_P_0.5', 'gt_tb_hm_R_0.5', 'gt_tb_hm_F_0.5', 'gt_tb_hm_P_3', 'gt_tb_hm_R_3', 'gt_tb_hm_F_3', 'gt_tp_hm_P_0.5', 'gt_tp_hm_R_0.5', 'gt_tp_hm_F_0.5', 'gt_tp_hm_P_3', 'gt_tp_hm_R_3', 'gt_tp_hm_F_3', \
mi@0 1090 # 'tb_tp_hm_P_0.5', 'tb_tp_hm_R_0.5', 'tb_tp_hm_F_0.5', 'tb_tp_hm_P_3', 'tb_tp_hm_R_3', 'tb_tp_hm_F_3', 'gt_tb_tp_hm_P_0.5', 'gt_tb_tp_hm_R_0.5', 'gt_tb_tp_hm_F_0.5', 'gt_tb_tp_hm_P_3', 'gt_tb_tp_hm_R_3', 'gt_tb_tp_hm_F_3'])
mi@0 1091 #
mi@0 1092 # for i,ao in enumerate(audio_list):
mi@0 1093 # # Combine SSMs computed from different features
mi@0 1094 # gt_hm_ssm = np.multiply(ao.gammatone_ssm, ao.harmonic_ssm)
mi@0 1095 # gt_tb_ssm = np.multiply(ao.gammatone_ssm, ao.timbre_ssm)
mi@0 1096 # gt_tp_ssm = np.multiply(ao.gammatone_ssm, ao.tempo_ssm)
mi@0 1097 # tb_tp_ssm = np.multiply(ao.timbre_ssm, ao.tempo_ssm)
mi@0 1098 # tb_hm_ssm = np.multiply(ao.timbre_ssm, ao.harmonic_ssm)
mi@0 1099 # tp_hm_ssm = np.multiply(ao.tempo_ssm, ao.harmonic_ssm)
mi@0 1100 #
mi@0 1101 # gt_hm_tb_ssm = np.multiply(ao.gammatone_ssm, ao.harmonic_ssm, ao.timbre_ssm)
mi@0 1102 # gt_hm_tp_ssm = np.multiply(ao.gammatone_ssm, ao.harmonic_ssm, ao.tempo_ssm)
mi@0 1103 # gt_tb_tp_ssm = np.multiply(ao.gammatone_ssm, ao.timbre_ssm, ao.tempo_ssm)
mi@0 1104 # hm_tb_tp_ssm = np.multiply(ao.harmonic_ssm, ao.timbre_ssm, ao.tempo_ssm)
mi@0 1105 #
mi@0 1106 # gt_hm_tb_tp_ssm = np.multiply(np.multiply(ao.gammatone_ssm, ao.harmonic_ssm), np.multiply(ao.timbre_ssm, ao.tempo_ssm))
mi@0 1107 #
mi@0 1108 # gt_hm_ssm_novelty = self.getNoveltyCurve(gt_hm_ssm, self.kernel_size)
mi@0 1109 # gt_hm_ssm_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in gt_hm_ssm_novelty]
mi@0 1110 # gt_tb_ssm_novelty = self.getNoveltyCurve(gt_tb_ssm, self.kernel_size)
mi@0 1111 # gt_tb_ssm_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in gt_tb_ssm_novelty]
mi@0 1112 # gt_tp_ssm_novelty = self.getNoveltyCurve(gt_hm_ssm, self.kernel_size)
mi@0 1113 # gt_tp_ssm_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in gt_tp_ssm_novelty]
mi@0 1114 # tb_tp_ssm_novelty = self.getNoveltyCurve(tb_tp_ssm, self.kernel_size)
mi@0 1115 # tb_tp_ssm_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in tb_tp_ssm_novelty]
mi@0 1116 # tb_hm_ssm_novelty = self.getNoveltyCurve(tb_hm_ssm, self.kernel_size)
mi@0 1117 # tb_hm_ssm_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in tb_hm_ssm_novelty]
mi@0 1118 # tp_hm_ssm_novelty = self.getNoveltyCurve(tp_hm_ssm, self.kernel_size)
mi@0 1119 # tp_hm_ssm_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in tp_hm_ssm_novelty]
mi@0 1120 #
mi@0 1121 # gt_hm_tb_ssm_novelty = self.getNoveltyCurve(gt_hm_tb_ssm, self.kernel_size)
mi@0 1122 # gt_hm_tb_ssm_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in gt_hm_tb_ssm_novelty]
mi@0 1123 # gt_hm_tp_ssm_novelty = self.getNoveltyCurve(gt_hm_tp_ssm, self.kernel_size)
mi@0 1124 # gt_hm_tp_ssm_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in gt_hm_tp_ssm_novelty]
mi@0 1125 # gt_tb_tp_ssm_novelty = self.getNoveltyCurve(gt_tb_tp_ssm, self.kernel_size)
mi@0 1126 # gt_tb_tp_ssm_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in gt_tb_tp_ssm_novelty]
mi@0 1127 # hm_tb_tp_ssm_novelty = self.getNoveltyCurve(hm_tb_tp_ssm, self.kernel_size)
mi@0 1128 # hm_tb_tp_ssm_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in hm_tb_tp_ssm_novelty]
mi@0 1129 #
mi@0 1130 # gt_hm_tb_tp_ssm_novelty = self.getNoveltyCurve(gt_hm_tb_tp_ssm, self.kernel_size)
mi@0 1131 # gt_hm_tb_tp_ssm_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in gt_hm_tb_tp_ssm_novelty]
mi@0 1132 #
mi@0 1133 # smoothed_gt_hm_ssm_novelty, gt_hm_ssm_novelty_peaks = peak_picker.process(gt_hm_ssm_novelty)
mi@0 1134 # gt_hm_ssm_detection = [ao.ssm_timestamps[int(i)] for i in gt_hm_ssm_novelty_peaks] + [ao.gt[-1]]
mi@0 1135 # smoothed_gt_tb_ssm_novelty, gt_tb_ssm_novelty_peaks = peak_picker.process(gt_tb_ssm_novelty)
mi@0 1136 # gt_tb_ssm_detection = [ao.ssm_timestamps[int(i)] for i in gt_tb_ssm_novelty_peaks] + [ao.gt[-1]]
mi@0 1137 # smoothed_gt_tp_ssm_novelty, gt_tp_ssm_novelty_peaks = peak_picker.process(gt_tp_ssm_novelty)
mi@0 1138 # gt_tp_ssm_detection = [ao.ssm_timestamps[int(i)] for i in gt_tp_ssm_novelty_peaks] + [ao.gt[-1]]
mi@0 1139 # smoothed_tb_tp_ssm_novelty, tb_tp_ssm_novelty_peaks = peak_picker.process(tb_tp_ssm_novelty)
mi@0 1140 # tb_tp_ssm_detection = [ao.ssm_timestamps[int(i)] for i in tb_tp_ssm_novelty_peaks] + [ao.gt[-1]]
mi@0 1141 # smoothed_tb_hm_ssm_novelty, tb_hm_ssm_novelty_peaks = peak_picker.process(tb_hm_ssm_novelty)
mi@0 1142 # tb_hm_ssm_detection = [ao.ssm_timestamps[int(i)] for i in tb_hm_ssm_novelty_peaks] + [ao.gt[-1]]
mi@0 1143 # smoothed_tp_hm_ssm_novelty, tp_hm_ssm_novelty_peaks = peak_picker.process(tp_hm_ssm_novelty)
mi@0 1144 # tp_hm_ssm_detection = [ao.ssm_timestamps[int(i)] for i in tp_hm_ssm_novelty_peaks] + [ao.gt[-1]]
mi@0 1145 #
mi@0 1146 # smoothed_gt_hm_tb_ssm_novelty, gt_hm_tb_ssm_novelty_peaks = peak_picker.process(gt_hm_tb_ssm_novelty)
mi@0 1147 # gt_hm_tb_ssm_detection = [ao.ssm_timestamps[int(i)] for i in gt_hm_tb_ssm_novelty_peaks] + [ao.gt[-1]]
mi@0 1148 # smoothed_gt_hm_tp_ssm_novelty, gt_hm_tp_ssm_novelty_peaks = peak_picker.process(gt_hm_tp_ssm_novelty)
mi@0 1149 # gt_hm_tp_ssm_detection = [ao.ssm_timestamps[int(i)] for i in gt_hm_tp_ssm_novelty_peaks] + [ao.gt[-1]]
mi@0 1150 # smoothed_gt_tb_tp_ssm_novelty, gt_tb_tp_ssm_novelty_peaks = peak_picker.process(gt_tb_tp_ssm_novelty)
mi@0 1151 # gt_tb_tp_ssm_detection = [ao.ssm_timestamps[int(i)] for i in gt_tb_tp_ssm_novelty_peaks] + [ao.gt[-1]]
mi@0 1152 # smoothed_hm_tb_tp_ssm_novelty, hm_tb_tp_ssm_novelty_peaks = peak_picker.process(hm_tb_tp_ssm_novelty)
mi@0 1153 # hm_tb_tp_ssm_detection = [ao.ssm_timestamps[int(i)] for i in hm_tb_tp_ssm_novelty_peaks] + [ao.gt[-1]]
mi@0 1154 #
mi@0 1155 # smoothed_gt_hm_tb_tp_ssm_novelty, gt_hm_tb_tp_ssm_novelty_peaks = peak_picker.process(gt_hm_tb_tp_ssm_novelty)
mi@0 1156 # gt_hm_tb_tp_ssm_detection = [ao.ssm_timestamps[int(i)] for i in gt_hm_tb_tp_ssm_novelty_peaks] + [ao.gt[-1]]
mi@0 1157 #
mi@0 1158 # # Output detected segment locations.
mi@0 1159 # if options.VERBOSE:
mi@0 1160 # outdir = join(options.OUTPUT, 'detection', ao.name)
mi@0 1161 # if not isdir(outdir):
mi@0 1162 # os.mkdir(outdir)
mi@0 1163 #
mi@0 1164 # np.savetxt(join(outdir, 'gammatone_timbre_ssm.csv'), gt_tb_ssm_detection)
mi@0 1165 # np.savetxt(join(outdir, 'gammatone_tempo_ssm.csv'), gt_tp_ssm_detection)
mi@0 1166 # np.savetxt(join(outdir, 'gammatone_harmonic_ssm.csv'), gt_hm_ssm_detection)
mi@0 1167 # np.savetxt(join(outdir, 'timbre_tempo_ssm.csv'), tb_tp_ssm_detection)
mi@0 1168 # np.savetxt(join(outdir, 'timbre_harmonic_ssm.csv'), tb_hm_ssm_detection)
mi@0 1169 # np.savetxt(join(outdir, 'tempo_harmonic_ssm.csv'), tp_hm_ssm_detection)
mi@0 1170 #
mi@0 1171 # np.savetxt(join(outdir, 'gammatone_timbre_tempo_ssm.csv'), gt_tb_tp_ssm_detection)
mi@0 1172 # np.savetxt(join(outdir, 'gammatone_timbre_harmonic_ssm.csv'), gt_hm_tb_ssm_detection)
mi@0 1173 # np.savetxt(join(outdir, 'gammatone_tempo_harmonic_ssm.csv'), gt_hm_tp_ssm_detection)
mi@0 1174 # np.savetxt(join(outdir, 'timbre_tempo_harmonic_ssm.csv'), hm_tb_tp_ssm_detection)
mi@0 1175 # np.savetxt(join(outdir, 'gammatone_timbre_tempo_harmonic_ssm.csv'), gt_hm_tb_tp_ssm_detection)
mi@0 1176 #
mi@0 1177 # gt_hm_ssm_res_05 = self.pairwiseF(ao.gt, gt_hm_ssm_detection, tolerance=0.5, combine=1.0)
mi@0 1178 # gt_hm_ssm_res_3 = self.pairwiseF(ao.gt, gt_hm_ssm_detection, tolerance=3, combine=1.0)
mi@0 1179 # gt_tb_ssm_res_05 = self.pairwiseF(ao.gt, gt_tb_ssm_detection, tolerance=0.5, combine=1.0)
mi@0 1180 # gt_tb_ssm_res_3 = self.pairwiseF(ao.gt, gt_tb_ssm_detection, tolerance=3, combine=1.0)
mi@0 1181 # gt_tp_ssm_res_05 = self.pairwiseF(ao.gt, gt_tp_ssm_detection, tolerance=0.5, combine=1.0)
mi@0 1182 # gt_tp_ssm_res_3 = self.pairwiseF(ao.gt, gt_tp_ssm_detection, tolerance=3, combine=1.0)
mi@0 1183 # tb_tp_ssm_res_05 = self.pairwiseF(ao.gt, tb_tp_ssm_detection, tolerance=0.5, combine=1.0)
mi@0 1184 # tb_tp_ssm_res_3 = self.pairwiseF(ao.gt, tb_tp_ssm_detection, tolerance=3, combine=1.0)
mi@0 1185 # tb_hm_ssm_res_05 = self.pairwiseF(ao.gt, tb_hm_ssm_detection, tolerance=0.5, combine=1.0)
mi@0 1186 # tb_hm_ssm_res_3 = self.pairwiseF(ao.gt, tb_hm_ssm_detection, tolerance=3, combine=1.0)
mi@0 1187 # tp_hm_ssm_res_05 = self.pairwiseF(ao.gt, tp_hm_ssm_detection, tolerance=0.5, combine=1.0)
mi@0 1188 # tp_hm_ssm_res_3 = self.pairwiseF(ao.gt, tp_hm_ssm_detection, tolerance=3, combine=1.0)
mi@0 1189 #
mi@0 1190 # gt_hm_tb_ssm_res_05 = self.pairwiseF(ao.gt, gt_hm_tb_ssm_detection, tolerance=0.5, combine=1.0)
mi@0 1191 # gt_hm_tb_ssm_res_3 = self.pairwiseF(ao.gt, gt_hm_tb_ssm_detection, tolerance=3, combine=1.0)
mi@0 1192 # gt_hm_tp_ssm_res_05 = self.pairwiseF(ao.gt, gt_hm_tp_ssm_detection, tolerance=0.5, combine=1.0)
mi@0 1193 # gt_hm_tp_ssm_res_3 = self.pairwiseF(ao.gt, gt_hm_tp_ssm_detection, tolerance=3, combine=1.0)
mi@0 1194 # gt_tb_tp_ssm_res_05 = self.pairwiseF(ao.gt, gt_tb_tp_ssm_detection, tolerance=0.5, combine=1.0)
mi@0 1195 # gt_tb_tp_ssm_res_3 = self.pairwiseF(ao.gt, gt_tb_tp_ssm_detection, tolerance=3, combine=1.0)
mi@0 1196 # hm_tb_tp_ssm_res_05 = self.pairwiseF(ao.gt, hm_tb_tp_ssm_detection, tolerance=0.5, combine=1.0)
mi@0 1197 # hm_tb_tp_ssm_res_3 = self.pairwiseF(ao.gt, hm_tb_tp_ssm_detection, tolerance=3, combine=1.0)
mi@0 1198 #
mi@0 1199 # gt_hm_tb_tp_ssm_res_05 = self.pairwiseF(ao.gt, gt_hm_tb_tp_ssm_detection, tolerance=0.5, combine=1.0)
mi@0 1200 # gt_hm_tb_tp_ssm_res_3 = self.pairwiseF(ao.gt, gt_hm_tb_tp_ssm_detection, tolerance=3, combine=1.0)
mi@0 1201 #
mi@0 1202 # with open(outfile3, 'a') as f:
mi@0 1203 # csvwriter = csv.writer(f, delimiter=',')
mi@0 1204 # csvwriter.writerow([ao.name, gt_tb_ssm_res_05.P, gt_tb_ssm_res_05.R, gt_tb_ssm_res_05.F, gt_tb_ssm_res_3.P, gt_tb_ssm_res_3.R, gt_tb_ssm_res_3.F, gt_tp_ssm_res_05.P, gt_tp_ssm_res_05.R, gt_tp_ssm_res_05.F, \
mi@0 1205 # gt_tp_ssm_res_3.P, gt_tp_ssm_res_3.R, gt_tp_ssm_res_3.F, gt_hm_ssm_res_05.P, gt_hm_ssm_res_05.R, gt_hm_ssm_res_05.F, gt_hm_ssm_res_3.P, gt_hm_ssm_res_3.R, gt_hm_ssm_res_3.F, \
mi@0 1206 # tb_tp_ssm_res_05.P, tb_tp_ssm_res_05.R, tb_tp_ssm_res_05.F, tb_tp_ssm_res_3.P, tb_tp_ssm_res_3.R, tb_tp_ssm_res_3.F, tb_hm_ssm_res_05.P, tb_hm_ssm_res_05.R, tb_hm_ssm_res_05.F, \
mi@0 1207 # tb_hm_ssm_res_3.P, tb_hm_ssm_res_3.R, tb_hm_ssm_res_3.F, tp_hm_ssm_res_05.P, tp_hm_ssm_res_05.R, tp_hm_ssm_res_05.F, tp_hm_ssm_res_3.P, tp_hm_ssm_res_3.R, tp_hm_ssm_res_3.F, \
mi@0 1208 # gt_tb_tp_ssm_res_05.P, gt_tb_tp_ssm_res_05.R, gt_tb_tp_ssm_res_05.F, gt_tb_tp_ssm_res_3.P, gt_tb_tp_ssm_res_3.R, gt_tb_tp_ssm_res_3.F, gt_hm_tb_ssm_res_05.P, gt_hm_tb_ssm_res_05.R, gt_hm_tb_ssm_res_05.F, \
mi@0 1209 # gt_hm_tb_ssm_res_3.P, gt_hm_tb_ssm_res_3.R, gt_hm_tb_ssm_res_3.F, gt_hm_tp_ssm_res_05.P, gt_hm_tp_ssm_res_05.R, gt_hm_tp_ssm_res_05.F, gt_hm_tp_ssm_res_3.P, gt_hm_tp_ssm_res_3.R, gt_hm_tp_ssm_res_3.F, \
mi@0 1210 # hm_tb_tp_ssm_res_05.P, hm_tb_tp_ssm_res_05.R, hm_tb_tp_ssm_res_05.F, hm_tb_tp_ssm_res_3.P, hm_tb_tp_ssm_res_3.R, hm_tb_tp_ssm_res_3.F, gt_hm_tb_tp_ssm_res_05.P, gt_hm_tb_tp_ssm_res_05.R, gt_hm_tb_tp_ssm_res_05.F, \
mi@0 1211 # gt_hm_tb_tp_ssm_res_3.P, gt_hm_tb_tp_ssm_res_3.R, gt_hm_tb_tp_ssm_res_3.F])
mi@0 1212
mi@0 1213
mi@0 1214 def main():
mi@0 1215 segmenter = SSMseg()
mi@0 1216 segmenter.process()
mi@0 1217
mi@0 1218
mi@0 1219 if __name__ == '__main__':
mi@0 1220 main()
mi@0 1221