Mercurial > hg > segmentation
view utils/som_seg.py @ 0:26838b1f560f
initial commit of a segmenter project
author | mi tian |
---|---|
date | Thu, 02 Apr 2015 18:09:27 +0100 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/env python # encoding: utf-8 """ feature_combine_seg.py A script to evaluation script for the segmentation results using combinations of different features. """ import matplotlib # matplotlib.use('Agg') import sys, os, optparse, csv from itertools import combinations from os.path import join, isdir, isfile, abspath, dirname, basename, split, splitext from copy import copy from mvpa2.suite import * import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import numpy as np from scipy.signal import correlate2d, convolve2d, filtfilt, resample from scipy.stats import mode from scipy.ndimage import zoom from scipy.ndimage.morphology import binary_fill_holes from scipy.ndimage.filters import * from scipy.spatial.distance import squareform, pdist from sklearn.decomposition import PCA from sklearn.mixture import GMM from sklearn.preprocessing import normalize from sklearn.metrics.pairwise import pairwise_distances from skimage.transform import hough_line, hough_line_peaks, probabilistic_hough_line from skimage.filter import canny, sobel from skimage import data, measure, segmentation, morphology from PeakPickerUtil import PeakPicker from gmmdist import * from GmmMetrics import GmmDistance from RankClustering import rClustering from kmeans import Kmeans def parse_args(): # define parser op = optparse.OptionParser() # IO options op.add_option('-g', '--gammatonegram-features', action="store", dest="GF", default='/Volumes/c4dm-03/people/mit/features/gammatonegram/qupujicheng/2048', type="str", help="Loading features from.." ) op.add_option('-s', '--spectrogram-features', action="store", dest="SF", default='/Volumes/c4dm-03/people/mit/features/spectrogram/qupujicheng/2048', type="str", help="Loading features from.." ) op.add_option('-t', '--tempogram-features', action="store", dest="TF", default='/Volumes/c4dm-03/people/mit/features/tempogram/qupujicheng/tempo_features_6s', type="str", help="Loading features from.." ) op.add_option('-a', '--annotations', action="store", dest="GT", default='/Volumes/c4dm-03/people/mit/annotation/qupujicheng/lowercase', type="str", help="Loading annotation files from.. ") op.add_option('-o', '--ouput', action="store", dest="OUTPUT", default='/Volumes/c4dm-03/people/mit/segmentation/gammatone/qupujicheng', type="str", help="Write segmentation results to ") op.add_option('-p', '--plot-novelty', action="store_true", dest="PLOT", default=False, help="Save novelty curev plot") op.add_option('-e', '--test-mode', action="store_true", dest="TEST", default=False, help="Test mode") op.add_option('-v', '--verbose-output', action="store_true", dest="VERBOSE", default=False, help="Exported raw detections.") return op.parse_args() options, args = parse_args() class FeatureObj() : __slots__ = ['key', 'audio', 'timestamps', 'gammatone_features', 'tempo_features', 'timbre_features', 'harmonic_features', 'gammatone_ssm', 'tempo_ssm', 'timbre_features', 'harmonic_ssm', 'ssm_timestamps'] class AudioObj(): __slots__ = ['name', 'feature_list', 'gt', 'label', 'gammatone_features', 'tempo_features', 'timbre_features', 'harmonic_features', 'combined_features',\ 'gammatone_ssm', 'tempo_ssm', 'timbre_ssm', 'harmonic_ssm', 'combined_ssm', 'ssm', 'ssm_timestamps', 'tempo_timestamps'] class EvalObj(): __slots__ = ['TP', 'FP', 'FN', 'P', 'R', 'F', 'AD', 'DA'] class SSMseg(object): '''The main segmentation object''' def __init__(self): self.SampleRate = 44100 self.NqHz = self.SampleRate/2 self.timestamp = [] self.previousSample = 0.0 self.featureWindow = 6.0 self.featureStep = 3.0 self.kernel_size = 80 # Adjust this param according to the feature resolution. self.blockSize = 4094 self.stepSize = 2048 '''NOTE: Match the following params with those used for feature extraction!''' '''NOTE: Unlike spectrogram ones, Gammatone features are extracted without an FFT or any overlap. The windowing is done under the purpose of chunking the audio to facilitate the gammatone filtering. Despite of the overlap in the time domain, only the first half after the filtering is returned, resulting in no overlapping effect in the extracted features. To obtain features for overlapped audio input, make the gammatoneLen equal to blockSize and return the whole filter output.''' self.gammatoneLen = 2048 self.gammatoneBandGroups = [0, 16, 34, 50, 64] self.nGammatoneBands = 20 self.histRes = 40 self.lowFreq = 100 self.highFreq = self.SampleRate / 4 '''Settings for extracting tempogram features.''' self.tempoWindow = 6.0 self.bpmBands = [30, 45, 60, 80, 100, 120, 180, 240, 400, 600] '''Peak picking settings''' self.threshold = 30 self.delta_threshold = 0.5 self.backtracking_threshold = 2.4 self.polyfitting_on = True self.medfilter_on = True self.LPfilter_on = True self.whitening_on = False self.aCoeffs = [1.0000, -0.5949, 0.2348] self.bCoeffs = [0.1600, 0.3200, 0.1600] self.cutoff = 0.5 self.medianWin = 5 def getGaussianParams(self, length, featureRate, timeWindow): win_len = round(timeWindow * featureRate) win_len = win_len + (win_len % 2) - 1 # a 50% overlap between windows stepsize = ceil(win_len * 0.5) num_win = int(floor( (length) / stepsize)) gaussian_rate = featureRate / stepsize return stepsize, num_win, win_len, gaussian_rate def GaussianDistance(self, feature, featureRate, timeWindow): stepsize, num_win, win_len, gr = self.getGaussianParams(feature.shape[0], featureRate, timeWindow) print 'stepsize, num_win, feature', stepsize, num_win, feature.shape, featureRate, timeWindow gaussian_list = [] gaussian_timestamps = [] tsi = 0 # f = open('/Users/mitian/Documents/experiments/features.txt','w') # print 'divergence computing..' for num in xrange(num_win): # print num, num * stepsize , (num * stepsize) + win_len gf=GaussianFeature(feature[int(num * stepsize) : int((num * stepsize) + win_len), :],2) # f.write("\n%s" %str(gf)) gaussian_list.append(gf) tsi = int(floor( num * stepsize + 1)) gaussian_timestamps.append(self.timestamp[tsi]) # f.close() # print 'gaussian_list', len(gaussian_list), len(gaussian_timestamps) dm = np.zeros((len(gaussian_list), len(gaussian_list))) for v1, v2 in combinations(gaussian_list, 2): i, j = gaussian_list.index(v1), gaussian_list.index(v2) dm[i, j] = v1.distance(v2) dm[j, i] = v2.distance(v1) # print 'dm[i,j]',dm[i,j] # sio.savemat("/Users/mitian/Documents/experiments/dm-from-segmenter.mat",{"dm":dm}) return dm, gaussian_timestamps def gaussian_kernel(self, size): '''Create a gaussian tapered 45 degrees rotated checkerboard kernel. TODO: Unit testing: Should produce this with kernel size 3: 0.1353 -0.3679 0.1353 0.3679 1.0000 0.3679 0.1353 -0.3679 0.1353 ''' n = float(np.ceil(size / 2.0)) kernel = np.zeros((size,size)) for i in xrange(1,size+1) : for j in xrange(1,size+1) : gauss = np.exp( -4.0 * (np.square( (i-n)/n ) + np.square( (j-n)/n )) ) # gauss = 1 if np.logical_xor( j - n > np.floor((i-n) / 2.0), j - n > np.floor((n-i) / 2.0) ) : kernel[i-1,j-1] = -gauss else: kernel[i-1,j-1] = gauss return kernel def getDiagonalSlice(self, ssm, width): ''' Return a diagonal slice of the ssm given its width, with 45 degrees rotation. Note: requres 45 degrees rotated kernel also.''' w = int(np.floor(width/2.0)) length = len(np.diagonal(ssm)) slice = np.zeros((2*w+1,length)) # print 'diagonal', length, w, slice.shape for i in xrange(-w, w+1) : slice[w+i,:] = np.hstack(( np.zeros(int(np.floor(abs(i)/2.0))), np.diagonal(ssm,i), np.zeros(int(np.ceil(abs(i)/2.0))) )) return slice def getNoveltyCurve(self,dm, kernel_size): '''Return novelty score from distance matrix.''' kernel_size = int(np.floor(kernel_size/2.0)+1) slice = self.getDiagonalSlice(dm, kernel_size) kernel = self.gaussian_kernel(kernel_size) xc = convolve2d(slice,kernel,mode='same') xc[abs(xc)>1e+10]=0.00001 # print 'xc', xc.shape, xc return xc[int(np.floor(xc.shape[0]/2.0)),:] def mergeBlocks(self, SSM, thresh=0.9, size=5): '''Merge consequtive small blocks along the diagonal.''' # found = False # start = 0 # i = 0 # while i < len(SSM): # j = i + 1 # if found: start = i # while(j < len(SSM) and SSM[i, j]): # if (j-i) > size: # found = True # i = j # # print 'start,end', start, i # start = i # else: # found = False # j += 1 # if not found: # print 'start,end', start, i # SSM[start:i, start:i] = 0.9 # i = j idx = 1 while idx < len(SSM): i = 0 # if ((idx-1-i) > 0 and (idx+1+i) < len(SSM)): while ((idx-1-i) > 0 and (idx+1+i) < len(SSM) and SSM[idx-1-i, idx] > 0 and SSM[idx+1+i, idx] > 0): i += 1 if i > size/2: SSM[idx-1-i:min(idx+i,len(SSM)), idx-1-i:min(idx+i,len(SSM))] = 1.0 idx += max(1, i) return SSM def getGMMs(self, feature, segment_boundaries): '''Return GMMs for located segments''' gmm_list = [] gmm_list.append(GmmDistance(feature[: segment_boundaries[0], :], components = 1)) for i in xrange(1, len(segment_boundaries)): gmm_list.append(GmmDistance(feature[segment_boundaries[i-1] : segment_boundaries[i], :], components = 1)) return gmm_list def trackValley(self, onset_index, smoothed_df): '''Back track to the valley location of detected peaks''' prevDiff = oldDiff = 0.0 while (onset_index > 1) : diff = smoothed_df[onset_index] - smoothed_df[onset_index-1] # if (diff < 0.0 and 0 <= prevDiff < oldDiff * self.backtracking_threshold) : break if (diff < 0 and prevDiff >= 0.1 * smoothed_df[onset_index]) : break prevDiff = diff oldDiff = prevDiff onset_index -= 1 return onset_index def normaliseFeature(self, feature_array): feature_array = np.array(feature_array) feature_array[np.isnan(feature_array)] = 0.0 feature_array[np.isinf(feature_array)] = 0.0 if len(feature_array.shape) == 1: feature_array = (feature_array - feature_array.min()) / (feature_array.max() - feature_array.min()) else: mins = feature_array.min(axis=1) maxs = feature_array.max(axis=1) feature_array = (feature_array - mins[:, np.newaxis]) / (maxs - mins)[:, np.newaxis] feature_array[np.isnan(feature_array)] = 0.0 return feature_array def upSample(self, feature_array, step): '''Resample downsized tempogram features, tempoWindo should be in accordance with input features''' # print feature_array.shape sampleRate = 44100 stepSize = 1024.0 # step = np.ceil(sampleRate/stepSize/5.0) feature_array = zoom(feature_array, (step,1)) # print 'resampled', feature_array.shape return feature_array def stripeDistance(self, feature_array, feature_len, step, metric='cosine'): '''Return distance matrix calculated for 2d time invariant features.''' size = feature_array.shape[0] / feature_len dm = np.zeros((size, size)) for i in xrange(size): for j in xrange(i, size): dm[i, j] = np.sum(pairwise_distances(feature_array[i*step:(i+1)*step, :], feature_array[j*step:(j+1)*step, :], metric)) dm[j, i] = dm[i, j] # print 'np.nanmax(dm)', np.nanmax(dm) dm[np.isnan(dm)] = np.nanmax(dm) ssm = 1 - (dm - dm.min()) / (dm.max() - dm.min()) np.fill_diagonal(ssm, 1) return ssm def getMean(self, feature, winlen, stepsize): means = [] steps = int((feature.shape[0] - winlen + stepsize) / stepsize) for i in xrange(steps): means.append(np.mean(feature[i*stepsize:(i*stepsize+winlen), :], axis=0)) return np.array(means) def getStd(self, feature, winlen, stepsize): std = [] steps = int((feature.shape[0] - winlen + stepsize) / stepsize) for i in xrange(steps): std.append(np.std(feature[i*stepsize:(i*stepsize+winlen), :], axis=0)) return np.array(std) def getDelta(self, feature): delta_feature = np.vstack((np.zeros((1, feature.shape[1])), np.diff(feature, axis=0))) return delta_feature def backtrack(self, onset_index, smoothed_df): '''Backtrack the onsets to an earlier 'perceived' location from the actually detected peak... This is based on the rationale that the perceived onset tends to be a few frames before the detected peak. This tracks the position in the detection function back to where the peak is startng to build up. Notice the "out of the blue" parameter: 0.9. (Ideally, this should be tested, evaluated and reported...)''' prevDiff = 0.0 while (onset_index > 1) : diff = smoothed_df[onset_index] - smoothed_df[onset_index-1] if diff < prevDiff * self.backtracking_threshold : break prevDiff = diff onset_index -= 1 return onset_index def trackDF(self, onset1_index, df2): '''In the second round of detection, remove the known onsets from the DF by tracking from the peak given by the first round to a valley to deminish the recognised peaks on top of which to start new detection.''' for idx in xrange(len(onset1_index)) : remove = True for i in xrange(onset1_index[idx], 1, -1) : if remove : if df2[i] >= df2[i-1] : df2[i] == 0.0 else: remove = False return df2 def getSSM(self, feature_array, metric='cosine', norm='simple'): '''Compute SSM given input feature array. args: norm: ['simple', 'remove_noise'] ''' dm = pairwise_distances(feature_array, metric=metric) dm = np.nan_to_num(dm) if norm == 'simple': ssm = 1 - (dm - np.min(dm)) / (np.max(dm) - np.min(dm)) return ssm def reduceSSM(self, ssm, maxfilter_size = 2, remove_size=50): ssm[ssm<0.8] = 0 ssm = maximum_filter(ssm,size=maxfilter_size) ssm = morphology.remove_small_objects(ssm.astype(bool), min_size=remove_size) return ssm def getPeakFeatures(self, peak_candidates, featureset, winlen): ''' args: winlen: length of feature window before and after an investigated peak featureset: A list of audio features for measuring the dissimilarity. return: peak_features A list of tuples of features for windows before and after each peak. ''' prev_features = [] post_features = [] feature_types = len(featureset) # print peak_candidates[-1], winlen, featureset[0].shape # if peak_candidates[-1] + winlen > featureset[0].shape[0]: # peak_candidates = peak_candidates[:-1] # for x in peak_candidates: # prev_features.append(tuple([featureset[i][x-winlen:x, :] for i in xrange(feature_types)])) # post_features.append(tuple([featureset[i][x:x+winlen, :] for i in xrange(feature_types)])) prev_features.append(tuple([featureset[i][:peak_candidates[0], :] for i in xrange(feature_types)])) post_features.append(tuple([featureset[i][peak_candidates[0]:peak_candidates[1], :] for i in xrange(feature_types)])) for idx in xrange(1, len(peak_candidates)-1): prev_features.append(tuple([featureset[i][peak_candidates[idx-1]:peak_candidates[idx], :] for i in xrange(feature_types)])) post_features.append(tuple([featureset[i][peak_candidates[idx]:peak_candidates[idx+1], :] for i in xrange(feature_types)])) prev_features.append(tuple([featureset[i][peak_candidates[-2]:peak_candidates[-1], :] for i in xrange(feature_types)])) post_features.append(tuple([featureset[i][peak_candidates[-1]:, :] for i in xrange(feature_types)])) return prev_features, post_features def segmentDev(self, prev_features, post_features): '''Deviations are measured for each given feature type. peak_candidates: peaks from the 1st round detection peak_features: Features for measuring the dissimilarity for parts before and after each peak. dtype: tuple. ''' dev_list = [] n_peaks = len(prev_features) n_features = len(prev_features[0]) # print 'n_peaks, n_features', n_peaks, n_features for x in xrange(n_peaks): f1, f2 = prev_features[x], post_features[x] dev_list.append(tuple([GmmDistance(f1[i], components=1).skl_distance_full(GmmDistance(f2[i], components=1)) for i in xrange(n_features)])) return dev_list def verifyPeaks(self, peak_canditates, dev_list): '''Verify peaks from the 1st round detection by applying adaptive thresholding to the deviation list.''' final_peaks = copy(peak_canditates) dev_list = np.array([np.mean(x) for x in dev_list]) # get average of devs of different features med_dev = median_filter(dev_list, size=5) # print dev_list, np.min(dev_list), np.median(dev_list), np.mean(dev_list), np.std(dev_list) dev = dev_list - med_dev # print dev for i, x in enumerate(dev): if x < 0: final_peaks.remove(peak_canditates[i]) return final_peaks def pairwiseF(self, annotation, detection, tolerance=3.0, combine=1.0): '''Pairwise F measure evaluation of detection rates.''' # print 'detection', detection res = EvalObj() res.TP = 0 # Total number of matched ground truth and experimental data points gt = len(annotation) # Total number of ground truth data points dt = len(detection) # Total number of experimental data points foundIdx = [] D_AD = np.zeros(gt) D_DA = np.zeros(dt) for dtIdx in xrange(dt): # print detection[dtIdx], abs(detection[dtIdx] - annotation) D_DA[dtIdx] = np.min(abs(detection[dtIdx] - annotation)) # D_DA[dtIdx] = min([abs(annot - detection[dtIdx]) for annot in annotation]) for gtIdx in xrange(gt): D_AD[gtIdx] = np.min(abs(annotation[gtIdx] - detection)) # D_AD[gtIdx] = min([abs(det - annotation[gtIdx]) for det in detection]) for dtIdx in xrange(dt): if (annotation[gtIdx] >= detection[dtIdx] - tolerance/2.0) and (annotation[gtIdx] <= detection[dtIdx] + tolerance/2.0): res.TP = res.TP + 1.0 foundIdx.append(gtIdx) foundIdx = list(set(foundIdx)) res.TP = len(foundIdx) res.FP = dt - res.TP res.FN = gt - res.TP res.AD = np.mean(D_AD) res.DA = np.mean(D_DA) res.P, res.R, res.F = 0.0, 0.0, 0.0 if res.TP == 0: return res res.P = res.TP / float(dt) res.R = res.TP / float(gt) # res.F = 2 * res.P * res.R / (res.P + res.F) res.F = 2.0 / (1.0/res.P + 1.0/res.R) # return TP3, FP3, FN3, pairwisePrecision3, pairwiseRecall3, pairwiseFValue3, TP05, FP05, FN05, pairwisePrecision05, pairwiseRecall05, pairwiseFValue05 return res def plotDetection(self, ssm, novelty, smoothed_novelty, gt, det, filename): '''Plot performance curve. x axis: distance threshold for feature selection; y axis: f measure''' plt.figure(figsize=(10,16)) gt_plot = gt / gt[-1] * len(novelty) det_plot = det / gt[-1] * len(novelty) gs = gridspec.GridSpec(2, 1, height_ratios=[3,1]) ax0 = plt.subplot(gs[0]) ax1 = plt.subplot(gs[1], sharex=ax0) ax0.imshow(ssm) ax0.vlines(gt_plot, 0, len(ssm), colors ='w', linestyles='solid') ax0.vlines(det_plot, 0, len(ssm), colors='k', linestyles='dashed') ax1.plot(np.linspace(0, len(novelty)-1, len(novelty)), novelty, 'g', np.linspace(0, len(novelty)-1, len(novelty)), smoothed_novelty,'b') y_min, y_max = min([min(novelty), min(smoothed_novelty)]), max([max(novelty), max(smoothed_novelty)]) ax1.vlines(gt_plot, y_min, y_max, colors ='r', linestyles='solid') ax1.vlines(det_plot, y_min, y_max, colors='k', linestyles='dashed') # f, ax = plt.subplots(2, sharex=True) # ax[0].imshow(ssm) # ax[1].plot(np.linspace(0, len(novelty)-1, len(novelty)), novelty) # ax[1].vlines(gt_plot, 0, len(novelty), colors ='r', linestyles='solid') # ax[1].vlines(det_plot, 0, len(novelty), colors='b', linestyles='dashed') # # plt.show() plt.savefig(filename) return None def process(self): '''For the aggregated input features, discard a propertion each time as the pairwise distances within the feature space descending. In the meanwhile evaluate the segmentation result and track the trend of perfomance changing by measuring the feature selection threshold - segmentation f measure curve. ''' ssom = SimpleSOMMapper((30,30), 800, learning_rate=0.001) peak_picker = PeakPicker() peak_picker.params.alpha = 9.0 # Alpha norm peak_picker.params.delta = self.delta_threshold # Adaptive thresholding delta peak_picker.params.QuadThresh_a = (100 - self.threshold) / 1000.0 peak_picker.params.QuadThresh_b = 0.0 peak_picker.params.QuadThresh_c = (100 - self.threshold) / 1500.0 peak_picker.params.rawSensitivity = 20 peak_picker.params.aCoeffs = self.aCoeffs peak_picker.params.bCoeffs = self.bCoeffs peak_picker.params.preWin = self.medianWin peak_picker.params.postWin = self.medianWin + 1 peak_picker.params.LP_on = self.LPfilter_on peak_picker.params.Medfilt_on = self.medfilter_on peak_picker.params.Polyfit_on = self.polyfitting_on peak_picker.params.isMedianPositive = False # Settings used for feature extraction feature_window_frame = int(self.SampleRate / self.gammatoneLen * self.featureWindow) feature_step_frame = int(0.5 * self.SampleRate / self.gammatoneLen * self.featureStep) aggregation_window, aggregation_step = 100, 50 featureRate = float(self.SampleRate) / self.stepSize audio_files = [x for x in os.listdir(options.GT) if not x.startswith(".") ] # audio_files = audio_files[:2] audio_files.sort() audio_list = [] gammatone_feature_list = [i for i in os.listdir(options.GF) if not i.startswith('.')] gammatone_feature_list = ['rolloff', 'contrast'] tempo_feature_list = [i for i in os.listdir(options.TF) if not i.startswith('.')] # tempo_feature_list = ['intensity_bpm_renamed', 'loudness_bpm_renamed'] timbre_feature_list = ['mfcc'] harmonic_feature_list = ['nnls'] gammatone_feature_list = [join(options.GF, f) for f in gammatone_feature_list] timbre_feature_list = [join(options.SF, f) for f in timbre_feature_list] tempo_feature_list = [join(options.TF, f) for f in tempo_feature_list] harmonic_feature_list = [join(options.SF, f) for f in harmonic_feature_list] fobj_list = [] # For each audio file, load specific features for audio in audio_files: ao = AudioObj() ao.name = splitext(audio)[0] # print 'audio:', ao.name # annotation_file = join(options.GT, ao.name+'.txt') # iso, salami # ao.gt = np.genfromtxt(annotation_file, usecols=0) # ao.label = np.genfromtxt(annotation_file, usecols=1, dtype=str) annotation_file = join(options.GT, ao.name+'.csv') # qupujicheng ao.gt = np.genfromtxt(annotation_file, usecols=0, delimiter=',') ao.label = np.genfromtxt(annotation_file, usecols=1, delimiter=',', dtype=str) gammatone_featureset, timbre_featureset, tempo_featureset, harmonic_featureset = [], [], [], [] for feature in gammatone_feature_list: for f in os.listdir(feature): if f[:f.find('_vamp')]==ao.name: gammatone_featureset.append(np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[:,1:]) break if len(gammatone_feature_list) > 1: n_frame = np.min([x.shape[0] for x in gammatone_featureset]) gammatone_featureset = [x[:n_frame,:] for x in gammatone_featureset] ao.gammatone_features = np.hstack((gammatone_featureset)) else: ao.gammatone_features = gammatone_featureset[0] for feature in timbre_feature_list: for f in os.listdir(feature): if f[:f.find('_vamp')]==ao.name: timbre_featureset.append(np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[:,1:]) break if len(timbre_feature_list) > 1: n_frame = np.min([x.shape[0] for x in timbre_featureset]) timbre_featureset = [x[:n_frame,:] for x in timbre_featureset] ao.timbre_features = np.hstack((timbre_featureset)) else: ao.timbre_features = timbre_featureset[0] for feature in tempo_feature_list: for f in os.listdir(feature): if f[:f.find('_vamp')]==ao.name: tempo_featureset.append(np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[1:,1:]) ao.tempo_timestamps = np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[1:,0] break if len(tempo_feature_list) > 1: n_frame = np.min([x.shape[0] for x in tempo_featureset]) tempo_featureset = [x[:n_frame,:] for x in tempo_featureset] ao.tempo_features = np.hstack((tempo_featureset)) else: ao.tempo_features = tempo_featureset[0] for feature in harmonic_feature_list: for f in os.listdir(feature): if f[:f.find('_vamp')]==ao.name: harmonic_featureset.append(np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[:,1:]) break if len(harmonic_feature_list) > 1: n_frame = np.min([x.shape[0] for x in harmonic_featureset]) harmonic_featureset = [x[:n_frame,:] for x in harmonic_featureset] ao.harmonic_features = np.hstack((harmonic_featureset)) else: ao.harmonic_features = harmonic_featureset[0] # # Reshaping features to keep identical dimension # n_frames = np.array([ao.gammatone_features.shape[0], ao.harmonic_features.shape[0], ao.timbre_features.shape[0]]).min() # step = n_frames / float(ao.tempo_features.shape[0]) # # ao.tempo_features = self.upSample(ao.tempo_features, step) # ao.gammatone_features = ao.gammatone_features[:n_frames, :] # ao.harmonic_features = ao.harmonic_features[:n_frames, :] # ao.timbre_features = ao.timbre_features[:n_frames, :] # print ao.gammatone_features.shape, ao.harmonic_features.shape, ao.tempo_features.shape, ao.timbre_features.shape # Reshape features (downsample) to match tempogram ones step = ao.tempo_features.shape[0] # aggregation_step = (n_frames / (step+1.0)) # Get aggregated features for computing ssm aggregation_window, aggregation_step = 1,1 featureRate = float(self.SampleRate) /self.stepSize pca = PCA(n_components=5) ao.gammatone_features = resample(ao.gammatone_features, step) ao.gammatone_features = (ao.gammatone_features - np.min(ao.gammatone_features, axis=-1)[:,np.newaxis]) / (np.max(ao.gammatone_features, axis=-1) - np.min(ao.gammatone_features, axis=-1))[:,np.newaxis] ao.gammatone_features[np.isnan(ao.gammatone_features)] = 0.0 ao.gammatone_features[np.isinf(ao.gammatone_features)] = 0.0 ao.timbre_features = resample(ao.timbre_features, step) ao.timbre_features = (ao.timbre_features - np.min(ao.timbre_features, axis=-1)[:,np.newaxis]) / (np.max(ao.timbre_features, axis=-1) - np.min(ao.timbre_features, axis=-1))[:,np.newaxis] ao.timbre_features[np.isnan(ao.timbre_features)] = 0.0 ao.timbre_features[np.isinf(ao.timbre_features)] = 0.0 ao.harmonic_features = resample(ao.harmonic_features, step) ao.harmonic_features = (ao.harmonic_features - np.min(ao.harmonic_features, axis=-1)[:,np.newaxis]) / (np.max(ao.harmonic_features, axis=-1) - np.min(ao.harmonic_features, axis=-1))[:,np.newaxis] ao.harmonic_features[np.isnan(ao.harmonic_features)] = 0.0 ao.harmonic_features[np.isinf(ao.harmonic_features)] = 0.0 ao.tempo_features = (ao.tempo_features - np.min(ao.tempo_features, axis=-1)[:,np.newaxis]) / (np.max(ao.tempo_features, axis=-1) - np.min(ao.tempo_features, axis=-1))[:,np.newaxis] ao.tempo_features[np.isnan(ao.tempo_features)] = 0.0 ao.tempo_features[np.isinf(ao.tempo_features)] = 0.0 # print 'resampled', ao.gammatone_features.shape, ao.timbre_features.shape, ao.harmonic_features.shape # gt_feature_matrix = (ao.gammatone_features - np.min(ao.gammatone_features, axis=-1)[:,np.newaxis]) / (np.max(ao.gammatone_features, axis=-1) - np.min(ao.gammatone_features, axis=-1))[:,np.newaxis] # gt_feature_matrix[np.isnan(gt_feature_matrix)] = 0.0 # mean_gt_feature = self.getMean(gt_feature_matrix, winlen=aggregation_window, stepsize=aggregation_step) # std_gt_feature = self.getStd(gt_feature_matrix, winlen=aggregation_window, stepsize=aggregation_step) # delta_gt_feature = self.getDelta(gt_feature_matrix) # mean_dgt_feature = self.getMean(delta_gt_feature, winlen=aggregation_window, stepsize=aggregation_step) # std_dgt_feature = self.getStd(delta_gt_feature, winlen=aggregation_window, stepsize=aggregation_step) # aggregated_gt_feature = np.hstack((mean_gt_feature, std_gt_feature)) # aggregated_gt_feature = np.hstack((mean_gt_feature, std_gt_feature, mean_dgt_feature, std_dgt_feature)) # aggregated_gt_feature = ao.gammatone_features aggregated_gt_feature = self.getMean(ao.gammatone_features, winlen=aggregation_window, stepsize=aggregation_step) pca.fit(aggregated_gt_feature) aggregated_gt_feature = pca.transform(aggregated_gt_feature) distance_gt_matrix = pairwise_distances(aggregated_gt_feature, metric = 'cosine') distance_gt_matrix = np.nan_to_num(distance_gt_matrix) ao.gammatone_ssm = 1 - (distance_gt_matrix - distance_gt_matrix.min()) / (distance_gt_matrix.max() - distance_gt_matrix.min()) # tempo_feature_matrix = (ao.tempo_features - np.min(ao.tempo_features, axis=-1)[:,np.newaxis]) / (np.max(ao.tempo_features, axis=-1) - np.min(ao.tempo_features, axis=-1))[:,np.newaxis] # tempo_feature_matrix[np.isnan(tempo_feature_matrix)] = 0.0 # mean_tempo_feature = self.getMean(tempo_feature_matrix, winlen=aggregation_window, stepsize=aggregation_step) # std_tempo_feature = self.getStd(tempo_feature_matrix, winlen=aggregation_window, stepsize=aggregation_step) # delta_tempo_feature = self.getDelta(tempo_feature_matrix) # mean_dtempo_feature = self.getMean(delta_tempo_feature, winlen=aggregation_window, stepsize=aggregation_step) # std_dtempo_feature = self.getStd(delta_tempo_feature, winlen=aggregation_window, stepsize=aggregation_step) # aggregated_tempo_feature = np.hstack((mean_tempo_feature, std_tempo_feature)) # aggregated_tempo_feature = np.hstack((mean_tempo_feature, std_tempo_feature, mean_dtempo_feature, std_dtempo_feature)) # aggregated_tempo_feature = ao.tempo_features aggregated_tempo_feature = self.getMean(ao.tempo_features, winlen=aggregation_window, stepsize=aggregation_step) pca.fit(aggregated_tempo_feature) aggregated_tempo_feature = pca.transform(aggregated_tempo_feature) distance_tempo_matrix = pairwise_distances(aggregated_tempo_feature, metric = 'cosine') distance_tempo_matrix = np.nan_to_num(distance_tempo_matrix) ao.tempo_ssm = 1 - (distance_tempo_matrix - distance_tempo_matrix.min()) / (distance_tempo_matrix.max() - distance_tempo_matrix.min()) # timbre_feature_matrix = (ao.timbre_features - np.min(ao.timbre_features, axis=-1)[:,np.newaxis]) / (np.max(ao.timbre_features, axis=-1) - np.min(ao.timbre_features, axis=-1))[:,np.newaxis] # timbre_feature_matrix[np.isnan(timbre_feature_matrix)] = 0.0 # mean_timbre_feature = self.getMean(timbre_feature_matrix, winlen=aggregation_window, stepsize=aggregation_step) # std_timbre_feature = self.getStd(timbre_feature_matrix, winlen=aggregation_window, stepsize=aggregation_step) # delta_timbre_feature = self.getDelta(timbre_feature_matrix) # mean_dtimbre_feature = self.getMean(delta_timbre_feature, winlen=aggregation_window, stepsize=aggregation_step) # std_dtimbre_feature = self.getStd(delta_timbre_feature, winlen=aggregation_window, stepsize=aggregation_step) # aggregated_timbre_feature = np.hstack((mean_timbre_feature, std_timbre_feature) # aggregated_timbre_feature = np.hstack((mean_timbre_feature, std_timbre_feature, mean_dtimbre_feature, std_dtimbre_feature)) # aggregated_timbre_feature = ao.timbre_features aggregated_timbre_feature = self.getMean(ao.timbre_features, winlen=aggregation_window, stepsize=aggregation_step) pca.fit(aggregated_timbre_feature) aggregated_timbre_feature = pca.transform(aggregated_timbre_feature) distance_timbre_matrix = pairwise_distances(aggregated_timbre_feature, metric = 'cosine') distance_timbre_matrix = np.nan_to_num(distance_timbre_matrix) ao.timbre_ssm = 1 - (distance_timbre_matrix - distance_timbre_matrix.min()) / (distance_timbre_matrix.max() - distance_timbre_matrix.min()) # harmonic_feature_matrix = (ao.harmonic_features - np.min(ao.harmonic_features, axis=-1)[:,np.newaxis]) / (np.max(ao.harmonic_features, axis=-1) - np.min(ao.harmonic_features, axis=-1))[:,np.newaxis] # harmonic_feature_matrix[np.isnan(harmonic_feature_matrix)] = 0.0 # mean_harmonic_feature = self.getMean(harmonic_feature_matrix, winlen=aggregation_window, stepsize=aggregation_step) # std_harmonic_feature = self.getStd(harmonic_feature_matrix, winlen=aggregation_window, stepsize=aggregation_step) # delta_harmonic_feature = self.getDelta(harmonic_feature_matrix) # mean_dharmonic_feature = self.getMean(delta_harmonic_feature, winlen=aggregation_window, stepsize=aggregation_step) # std_dharmonic_feature = self.getStd(delta_harmonic_feature, winlen=aggregation_window, stepsize=aggregation_step) # aggregated_harmonic_feature = np.hstack((mean_harmonic_feature, std_harmonic_feature)) # aggregated_harmonic_feature = np.hstack((mean_harmonic_feature, std_harmonic_feature, mean_dharmonic_feature, std_dharmonic_feature)) aggregated_harmonic_feature = ao.harmonic_features aggregated_harmonic_feature = self.getMean(ao.harmonic_features, winlen=aggregation_window, stepsize=aggregation_step) pca.fit(aggregated_harmonic_feature) aggregated_harmonic_feature = pca.transform(aggregated_harmonic_feature) distance_harmonic_matrix = pairwise_distances(aggregated_harmonic_feature, metric = 'cosine') distance_harmonic_matrix = np.nan_to_num(distance_harmonic_matrix) ao.harmonic_ssm = 1 - (distance_harmonic_matrix - distance_harmonic_matrix.min()) / (distance_harmonic_matrix.max() - distance_harmonic_matrix.min()) ao.combined_features = np.hstack((aggregated_gt_feature, aggregated_harmonic_feature, aggregated_timbre_feature, aggregated_tempo_feature)) pca.fit(ao.combined_features) ao.combined_features = pca.transform(ao.combined_features) distance_combined_matrix = pairwise_distances(ao.combined_features, metric = 'cosine') distance_combined_matrix = np.nan_to_num(distance_combined_matrix) ao.combined_ssm = 1 - (distance_combined_matrix - distance_combined_matrix.min()) / (distance_combined_matrix.max() - distance_combined_matrix.min()) # Resample timestamps # ao.ssm_timestamps = np.array(map(lambda step: step * aggregation_step / featureRate, np.arange(0.0, aggregated_gt_feature.shape[0]))) ao.ssm_timestamps = np.array(map(lambda x: ao.tempo_timestamps[aggregation_step*x], np.arange(0, ao.gammatone_ssm.shape[0]))) # print ao.gammatone_ssm.shape, ao.tempo_ssm.shape, ao.timbre_ssm.shape, ao.harmonic_ssm.shape, len(ao.ssm_timestamps) # # Save SSMs. # gammatone_ssm = copy(ao.gammatone_ssm) # gammatone_ssm[gammatone_ssm<0.8]=0.0 # plt.figure(figsize=(10, 10)) # plt.vlines(ao.gt / ao.gt[-1] * gammatone_ssm.shape[0], 0, gammatone_ssm.shape[0], colors='r') # plt.imshow(gammatone_ssm, cmap='Greys') # plt.savefig(join(options.OUTPUT, 'ssm', ao.name+'-gammatone.pdf'),format='pdf') # # tempo_ssm = copy(ao.tempo_ssm) # tempo_ssm[tempo_ssm<0.8]=0.0 # plt.figure(figsize=(10, 10)) # plt.vlines(ao.gt / ao.gt[-1] * tempo_ssm.shape[0], 0, tempo_ssm.shape[0], colors='r') # plt.imshow(tempo_ssm, cmap='Greys') # plt.savefig(join(options.OUTPUT, 'ssm', ao.name+'-tempo.pdf'),format='pdf') # # timbre_ssm = copy(ao.timbre_ssm) # timbre_ssm[timbre_ssm<0.8]=0.0 # plt.figure(figsize=(10, 10)) # plt.vlines(ao.gt / ao.gt[-1] * timbre_ssm.shape[0], 0, timbre_ssm.shape[0], colors='r') # plt.imshow(timbre_ssm, cmap='Greys') # plt.savefig(join(options.OUTPUT, 'ssm', ao.name+'-timbre.pdf'),format='pdf') # # harmonic_ssm = copy(ao.harmonic_ssm) # harmonic_ssm[harmonic_ssm<0.8]=0.0 # plt.figure(figsize=(10, 10)) # plt.vlines(ao.gt / ao.gt[-1] * harmonic_ssm.shape[0], 0, harmonic_ssm.shape[0], colors='r') # plt.imshow(harmonic_ssm, cmap='Greys') # plt.savefig(join(options.OUTPUT, 'ssm', ao.name+'-harmonic.pdf'),format='pdf') # # ssm_cleaned = copy(ao.combined_ssm) # ssm_cleaned[ssm_cleaned<0.8] = 0 # plt.figure(figsize=(10, 10)) # plt.vlines(ao.gt / ao.gt[-1] * ssm_cleaned.shape[0], 0, ssm_cleaned.shape[0], colors='r') # plt.imshow(ssm_cleaned, cmap='Greys') # plt.savefig(join(options.OUTPUT, 'ssm', ao.name+'-combined.pdf'),format='pdf') audio_list.append(ao) # Evaluate individual segmentation results. outfile1 = join(options.OUTPUT, 'individualSOM.csv') with open(outfile1, 'a') as f: csvwriter = csv.writer(f, delimiter=',') csvwriter.writerow(['audio', 'gammatone_tp_05', 'gammatone_fp_05', 'gammatone_fn_05', 'gammatone_P_05', 'gammatone_R_05', 'gammatone_F_05', 'gammatone_AD_05', 'gammatone_DA_05', 'gammatone_tp_3', \ 'gammatone_fp_3', 'gammatone_fn_3', 'gammatone_P_3', 'gammatone_R_3', 'gammatone_F_3', 'gammatone_AD_3', 'gammatone_DA_3', 'harmonic_tp_05', 'harmonic_fp_05', 'harmonic_fn_05', 'harmonic_P_05', \ 'harmonic_R_05', 'harmonic_F_05', 'harmonic_AD_05', 'harmonic_DA_05', 'harmonic_tp_3', 'harmonic_fp_3', 'harmonic_fn_3', 'harmonic_P_3', 'harmonic_R_3', 'harmonic_F_3', 'harmonic_AD_3', 'harmonic_DA_3', \ 'timbre_tp_05', 'timbre_fp_05', 'timbre_fn_05', 'timbre_P_05', 'timbre_R_05', 'timbre_F_05', 'timbre_AD_05', 'timbre_DA_05', 'timbre_tp_3', 'timbre_fp_3', 'timbre_fn_3', 'timbre_P_3', 'timbre_R_3', \ 'timbre_F_3', 'timbre_AD_3', 'timbre_DA_3', 'tempo_tp_05', 'tempo_fp_05', 'tempo_fn_05', 'tempo_P_05', 'tempo_R_05', 'tempo_F_05', 'tempo_AD_05', 'tempo_DA_05', \ 'tempo_tp_3', 'tempo_fp_3', 'tempo_fn_3', 'tempo_P_3', 'tempo_R_3', 'tempo_F_3', 'tempo_AD_3', 'tempo_DA_3']) # outfile4 = join(options.OUTPUT, 'individualResDF.csv') # with open(outfile4, 'a') as f: # csvwriter = csv.writer(f, delimiter=',') # csvwriter.writerow(['audio', 'gammatone_tp_05', 'gammatone_fp_05', 'gammatone_fn_05', 'gammatone_P_05', 'gammatone_R_05', 'gammatone_F_05', 'gammatone_AD_05', 'gammatone_DA_05', 'gammatone_tp_3', \ # 'gammatone_fp_3', 'gammatone_fn_3', 'gammatone_P_3', 'gammatone_R_3', 'gammatone_F_3', 'gammatone_AD_3', 'gammatone_DA_3', 'harmonic_tp_05', 'harmonic_fp_05', 'harmonic_fn_05', 'harmonic_P_05', \ # 'harmonic_R_05', 'harmonic_F_05', 'harmonic_AD_05', 'harmonic_DA_05', 'harmonic_tp_3', 'harmonic_fp_3', 'harmonic_fn_3', 'harmonic_P_3', 'harmonic_R_3', 'harmonic_F_3', 'harmonic_AD_3', 'harmonic_DA_3', \ # 'timbre_tp_05', 'timbre_fp_05', 'timbre_fn_05', 'timbre_P_05', 'timbre_R_05', 'timbre_F_05', 'timbre_AD_05', 'timbre_DA_05', 'timbre_tp_3', 'timbre_fp_3', 'timbre_fn_3', 'timbre_P_3', 'timbre_R_3', \ # 'timbre_F_3', 'timbre_AD_3', 'timbre_DA_3', 'tempo_tp_05', 'tempo_fp_05', 'tempo_fn_05', 'tempo_P_05', 'tempo_R_05', 'tempo_F_05', 'tempo_AD_05', 'tempo_DA_05', \ # 'tempo_tp_3', 'tempo_fp_3', 'tempo_fn_3', 'tempo_P_3', 'tempo_R_3', 'tempo_F_3', 'tempo_AD_3', 'tempo_DA_3']) # Fuse novelty curves from individual segmentation results. outfile2 = join(options.OUTPUT, 'individualFuseSOM.csv') with open(outfile2, 'a') as f: csvwriter = csv.writer(f, delimiter=',') csvwriter.writerow(['audio', 'gt_tb_P_0.5', 'gt_tb_R_0.5', 'gt_tb_F_0.5', 'gt_tb_P_3', 'gt_tb_R_3', 'gt_tb_F_3', 'gt_tp_P_0.5', 'gt_tp_R_0.5', 'gt_tp_F_0.5', 'gt_tp_P_3', 'gt_tp_R_3', 'gt_tp_F_3',\ 'gt_hm_P_0.5', 'gt_hm_R_0.5', 'gt_hm_F_0.5', 'gt_hm_P_3', 'gt_hm_R_3', 'gt_hm_F_3', 'tb_tp_P_0.5', 'tb_tp_R_0.5', 'tb_tp_F_0.5', 'tb_tp_P_3', 'tb_tp_R_3', 'tb_tp_F_3', 'tb_hm_P_0.5', 'tb_hm_R_0.5', 'tb_hm_F_0.5', \ 'tb_hm_P_3', 'tb_hm_R_3', 'tb_hm_F_3', 'tp_hm_P_0.5', 'tp_hm_R_0.5', 'tp_hm_F_0.5', 'tp_hm_P_3', 'tp_hm_R_3', 'tp_hm_F_3', 'gt_tb_tp_P_0.5', 'gt_tb_tp_R_0.5', 'gt_tb_tp_F_0.5', 'gt_tb_tp_P_3', 'gt_tb_tp_R_3', \ 'gt_tb_tp_F_3', 'gt_tb_hm_P_0.5', 'gt_tb_hm_R_0.5', 'gt_tb_hm_F_0.5', 'gt_tb_hm_P_3', 'gt_tb_hm_R_3', 'gt_tb_hm_F_3', 'gt_tp_hm_P_0.5', 'gt_tp_hm_R_0.5', 'gt_tp_hm_F_0.5', 'gt_tp_hm_P_3', 'gt_tp_hm_R_3', 'gt_tp_hm_F_3', \ 'tb_tp_hm_P_0.5', 'tb_tp_hm_R_0.5', 'tb_tp_hm_F_0.5', 'tb_tp_hm_P_3', 'tb_tp_hm_R_3', 'tb_tp_hm_F_3', 'gt_tb_tp_hm_P_0.5', 'gt_tb_tp_hm_R_0.5', 'gt_tb_tp_hm_F_0.5', 'gt_tb_tp_hm_P_3', 'gt_tb_tp_hm_R_3', 'gt_tb_tp_hm_F_3']) for i,ao in enumerate(audio_list): print 'processing self organizing maps for %s' %ao.name # 1.Novelty based segmentation. # Correlate an Gaussian on the diagonal to contruct the novelty curve # print 'ssm', ao.gammatone_ssm.shape, ao.timbre_ssm.shape, ao.tempo_ssm.shape, ao.harmonic_ssm.shape ssom.train(ao.gammatone_features) gammatone_som = ssom(ao.gammatone_features) ssom.train(ao.timbre_features) timbre_som = ssom(ao.timbre_features) ssom.train(ao.tempo_features) tempo_som = ssom(ao.tempo_features) ssom.train(ao.harmonic_features) harmonic_som = ssom(ao.harmonic_features) gammatone_harmonic_features = np.hstack((ao.gammatone_features, ao.harmonic_features)) gammatone_timbre_features = np.hstack((ao.gammatone_features, ao.timbre_features)) gammatone_tempo_features = np.hstack((ao.gammatone_features, ao.tempo_features)) harmonic_timbre_features = np.hstack((ao.harmonic_features, ao.timbre_features)) harmonic_tempo_features = np.hstack((ao.harmonic_features, ao.tempo_features)) timbre_tempo_features = np.hstack((ao.timbre_features, ao.tempo_features)) gammatone_harmonic_timbre_features = np.hstack((ao.gammatone_features, ao.harmonic_features, ao.timbre_features)) gammatone_harmonic_tempo_features = np.hstack((ao.gammatone_features, ao.harmonic_features, ao.tempo_features)) gammatone_timbre_tempo_features = np.hstack((ao.gammatone_features, ao.timbre_features, ao.tempo_features)) harmonic_timbre_tempo_features = np.hstack((ao.harmonic_features, ao.timbre_features, ao.tempo_features)) gammatone_harmonic_timbre_tempo_features = np.hstack((ao.gammatone_features, ao.harmonic_features, ao.timbre_features, ao.tempo_features)) ssom.train(gammatone_harmonic_features) gammatone_harmonic_som = ssom(gammatone_harmonic_features) ssom.train(gammatone_timbre_features) gammatone_timbre_som = ssom(gammatone_timbre_features) ssom.train(gammatone_tempo_features) gammatone_tempo_som = ssom(gammatone_tempo_features) ssom.train(harmonic_timbre_features) harmonic_timbre_som = ssom(harmonic_timbre_features) ssom.train(harmonic_timbre_features) harmonic_timbre_som = ssom(harmonic_timbre_features) ssom.train(harmonic_tempo_features) harmonic_tempo_som = ssom(harmonic_tempo_features) ssom.train(timbre_tempo_features) timbre_tempo_som = ssom(timbre_tempo_features) ssom.train(gammatone_harmonic_timbre_features) gammatone_harmonic_timbre_som = ssom(gammatone_harmonic_timbre_features) ssom.train(gammatone_harmonic_tempo_features) gammatone_harmonic_tempo_som = ssom(gammatone_harmonic_tempo_features) ssom.train(gammatone_timbre_tempo_features) gammatone_timbre_tempo_som = ssom(gammatone_timbre_tempo_features) ssom.train(harmonic_timbre_tempo_features) harmonic_timbre_tempo_som = ssom(harmonic_timbre_tempo_features) ssom.train(gammatone_harmonic_timbre_tempo_features) gammatone_harmonic_timbre_tempo_som = ssom(gammatone_harmonic_timbre_tempo_features) gammatone_ssm = self.getSSM(gammatone_som) harmonic_ssm = self.getSSM(harmonic_som) timbre_ssm = self.getSSM(timbre_som) tempo_ssm = self.getSSM(tempo_som) gammatone_harmonic_ssm = self.getSSM(gammatone_harmonic_som) gammatone_timbre_ssm = self.getSSM(gammatone_timbre_som) gammatone_tempo_ssm = self.getSSM(gammatone_tempo_som) harmonic_timbre_ssm = self.getSSM(harmonic_timbre_som) harmonic_tempo_ssm = self.getSSM(harmonic_tempo_som) timbre_tempo_ssm = self.getSSM(timbre_tempo_som) gammatone_harmonic_timbre_ssm = self.getSSM(gammatone_harmonic_timbre_som) gammatone_harmonic_tempo_ssm = self.getSSM(gammatone_harmonic_tempo_som) gammatone_timbre_tempo_ssm = self.getSSM(gammatone_timbre_tempo_som) harmonic_timbre_tempo_ssm = self.getSSM(harmonic_timbre_tempo_som) gammatone_harmonic_timbre_tempo_ssm = self.getSSM(gammatone_harmonic_timbre_tempo_som) # Noise removal in ssm reduced_gammatone_ssm = self.reduceSSM(gammatone_ssm) reduced_timbre_ssm = self.reduceSSM(timbre_ssm) reduced_tempo_ssm = self.reduceSSM(ao.tempo_ssm) reduced_harmonic_ssm = self.reduceSSM(ao.harmonic_ssm) reduced_gammatone_harmonic_ssm = self.reduceSSM(gammatone_harmonic_ssm) reduced_gammatone_timbre_ssm = self.reduceSSM(gammatone_timbre_ssm) reduced_gammatone_tempo_ssm = self.reduceSSM(gammatone_tempo_ssm) reduced_harmonic_timbre_ssm = self.reduceSSM(harmonic_timbre_ssm) reduced_harmonic_tempo_ssm = self.reduceSSM(harmonic_tempo_ssm) reduced_timbre_tempo_ssm = self.reduceSSM(timbre_tempo_ssm) reduced_gammatone_harmonic_timbre_ssm = self.reduceSSM(gammatone_harmonic_timbre_ssm) reduced_gammatone_harmonic_tempo_ssm = self.reduceSSM(gammatone_harmonic_tempo_ssm) reduced_gammatone_timbre_tempo_ssm = self.reduceSSM(gammatone_timbre_tempo_ssm) reduced_harmonic_timbre_tempo_ssm = self.reduceSSM(harmonic_timbre_tempo_ssm) reduced_gammatone_harmonic_timbre_tempo_ssm = self.reduceSSM(gammatone_harmonic_timbre_tempo_ssm) gammatone_novelty = self.getNoveltyCurve(reduced_gammatone_ssm, self.kernel_size) gammatone_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in gammatone_novelty] timbre_novelty = self.getNoveltyCurve(reduced_timbre_ssm, self.kernel_size) timbre_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in timbre_novelty] tempo_novelty = self.getNoveltyCurve(reduced_tempo_ssm, self.kernel_size) tempo_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in tempo_novelty] harmonic_novelty = self.getNoveltyCurve(reduced_harmonic_ssm, self.kernel_size) harmonic_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in harmonic_novelty] # Peak picking from the novelty curve smoothed_gammatone_novelty, gammatone_novelty_peaks = peak_picker.process(gammatone_novelty) gammatone_detection = [ao.ssm_timestamps[int(i)] for i in gammatone_novelty_peaks] + [ao.gt[-1]] smoothed_timbre_novelty, timbre_novelty_peaks = peak_picker.process(timbre_novelty) timbre_detection = [ao.ssm_timestamps[int(i)] for i in timbre_novelty_peaks] + [ao.gt[-1]] smoothed_harmonic_novelty, harmonic_novelty_peaks = peak_picker.process(harmonic_novelty) harmonic_detection = [ao.ssm_timestamps[int(i)] for i in harmonic_novelty_peaks] + [ao.gt[-1]] smoothed_tempo_novelty, tempo_novelty_peaks = peak_picker.process(tempo_novelty) tempo_detection = [ao.ssm_timestamps[int(i)] for i in tempo_novelty_peaks] + [ao.gt[-1]] gt_res_05 = self.pairwiseF(ao.gt, gammatone_detection, tolerance=0.5, combine=1.0) gt_res_3 = self.pairwiseF(ao.gt, gammatone_detection, tolerance=3, combine=1.0) harmonic_res_05 = self.pairwiseF(ao.gt, harmonic_detection, tolerance=0.5, combine=1.0) harmonic_res_3 = self.pairwiseF(ao.gt, harmonic_detection, tolerance=3, combine=1.0) tempo_res_05 = self.pairwiseF(ao.gt, tempo_detection, tolerance=0.5, combine=1.0) tempo_res_3 = self.pairwiseF(ao.gt, tempo_detection, tolerance=3, combine=1.0) timbre_res_05 = self.pairwiseF(ao.gt, timbre_detection, tolerance=0.5, combine=1.0) timbre_res_3 = self.pairwiseF(ao.gt, timbre_detection, tolerance=3, combine=1.0) with open(outfile1, 'a') as f: csvwriter = csv.writer(f, delimiter=',') csvwriter.writerow([ao.name, gt_res_05.TP, gt_res_05.FP, gt_res_05.FN, gt_res_05.P, gt_res_05.R, gt_res_05.F, gt_res_05.AD, gt_res_05.DA, gt_res_3.TP, gt_res_3.FP, gt_res_3.FN, gt_res_3.P, \ gt_res_3.R, gt_res_3.F, gt_res_3.AD, gt_res_3.DA, harmonic_res_05.TP, harmonic_res_05.FP, harmonic_res_05.FN, harmonic_res_05.P, harmonic_res_05.R, harmonic_res_05.F, harmonic_res_05.AD, harmonic_res_05.DA, \ harmonic_res_3.TP, harmonic_res_3.FP, harmonic_res_3.FN, harmonic_res_3.P, harmonic_res_3.R, harmonic_res_3.F, harmonic_res_3.AD, harmonic_res_3.DA, timbre_res_05.TP, timbre_res_05.FP, \ timbre_res_05.FN, timbre_res_05.P, timbre_res_05.R, timbre_res_05.F, timbre_res_05.AD, timbre_res_05.DA, timbre_res_3.TP, timbre_res_3.FP, timbre_res_3.FN, timbre_res_3.P, timbre_res_3.R, timbre_res_3.F, \ timbre_res_3.AD, timbre_res_3.DA, tempo_res_05.TP, tempo_res_05.FP, tempo_res_05.FN, tempo_res_05.P, tempo_res_05.R, tempo_res_05.F, tempo_res_05.AD, tempo_res_05.DA, tempo_res_3.TP, tempo_res_3.FP, \ tempo_res_3.FN, tempo_res_3.P, tempo_res_3.R, tempo_res_3.F, tempo_res_3.AD, tempo_res_3.DA]) gt_hm_novelty = self.getNoveltyCurve(reduced_gammatone_harmonic_ssm, self.kernel_size) gt_hm_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in gt_hm_novelty] gt_tb_novelty = self.getNoveltyCurve(reduced_gammatone_timbre_ssm, self.kernel_size) gt_tb_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in gt_tb_novelty] gt_tp_novelty = self.getNoveltyCurve(reduced_gammatone_tempo_ssm, self.kernel_size) gt_tp_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in gt_tp_novelty] hm_tb_novelty = self.getNoveltyCurve(reduced_harmonic_timbre_ssm, self.kernel_size) hm_tb_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in hm_tb_novelty] hm_tp_novelty = self.getNoveltyCurve(reduced_harmonic_tempo_ssm, self.kernel_size) hm_tp_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in hm_tp_novelty] tb_tp_novelty = self.getNoveltyCurve(reduced_timbre_tempo_ssm, self.kernel_size) tb_tp_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in tb_tp_novelty] smoothed_gt_tb_novelty, gt_tb_novelty_peaks = peak_picker.process(gt_tb_novelty) gt_tb_detection = [ao.ssm_timestamps[int(i)] for i in gt_tb_novelty_peaks] + [ao.gt[-1]] smoothed_gt_tp_novelty, gt_tp_novelty_peaks = peak_picker.process(gt_tp_novelty) gt_tp_detection = [ao.ssm_timestamps[int(i)] for i in gt_tp_novelty_peaks] + [ao.gt[-1]] smoothed_gt_hm_novelty, gt_hm_novelty_peaks = peak_picker.process(gt_hm_novelty) gt_hm_detection = [ao.ssm_timestamps[int(i)] for i in gt_hm_novelty_peaks] + [ao.gt[-1]] smoothed_tb_tp_novelty, tb_tp_novelty_peaks = peak_picker.process(tb_tp_novelty) tb_tp_detection = [ao.ssm_timestamps[int(i)] for i in tb_tp_novelty_peaks] + [ao.gt[-1]] smoothed_tb_hm_novelty, tb_hm_novelty_peaks = peak_picker.process(tb_hm_novelty) tb_hm_detection = [ao.ssm_timestamps[int(i)] for i in tb_hm_novelty_peaks] + [ao.gt[-1]] smoothed_tp_hm_novelty, tp_hm_novelty_peaks = peak_picker.process(tp_hm_novelty) tp_hm_detection = [ao.ssm_timestamps[int(i)] for i in tp_hm_novelty_peaks] + [ao.gt[-1]] gt_tb_tp_novelty = self.getNoveltyCurve(reduced_gammatone_timbre_tempo_ssm, self.kernel_size) gt_tb_tp_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in gt_tb_tp_novelty] gt_tb_hm_novelty = self.getNoveltyCurve(reduced_gammatone_harmonic_timbre_ssm, self.kernel_size) gt_tb_hm_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in gt_tb_hm_novelty] gt_tp_hm_novelty = self.getNoveltyCurve(reduced_gammatone_harmonic_tempo_ssm, self.kernel_size) gt_tp_hm_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in gt_tp_hm_novelty] tb_tp_hm_novelty = self.getNoveltyCurve(reduced_harmonic_timbre_tempo_ssm, self.kernel_size) tb_tp_hm_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in tb_tp_hm_novelty] gt_tb_tp_hm_novelty = self.getNoveltyCurve(reduced_gammatone_harmonic_timbre_tempo_ssm, self.kernel_size) gt_tb_tp_hm_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in gt_tb_tp_hm_novelty] smoothed_gt_tb_tp_novelty, gt_tb_tp_novelty_peaks = peak_picker.process(gt_tb_tp_novelty) gt_tb_tp_detection = [ao.ssm_timestamps[int(i)] for i in gt_tb_tp_novelty_peaks] + [ao.gt[-1]] smoothed_gt_tb_hm_novelty, gt_tb_hm_novelty_peaks = peak_picker.process(gt_tb_hm_novelty) gt_tb_hm_detection = [ao.ssm_timestamps[int(i)] for i in gt_tb_hm_novelty_peaks] + [ao.gt[-1]] smoothed_gt_tp_hm_novelty, gt_tp_hm_novelty_peaks = peak_picker.process(gt_tp_hm_novelty) gt_tp_hm_detection = [ao.ssm_timestamps[int(i)] for i in gt_tp_hm_novelty_peaks] + [ao.gt[-1]] smoothed_tb_tp_hm_novelty, tb_tp_hm_novelty_peaks = peak_picker.process(tb_tp_hm_novelty) tb_tp_hm_detection = [ao.ssm_timestamps[int(i)] for i in tb_tp_hm_novelty_peaks] + [ao.gt[-1]] smoothed_gt_tb_tp_hm_novelty, gt_tb_tp_hm_novelty_peaks = peak_picker.process(gt_tb_tp_hm_novelty) gt_tb_tp_hm_detection = [ao.ssm_timestamps[int(i)] for i in gt_tb_tp_hm_novelty_peaks] + [ao.gt[-1]] # novelty_peaks = gt_tb_tp_hm_novelty_peaks # novelty_detection = [ao.ssm_timestamps[int(i)] for i in novelty_peaks] + [ao.gt[-1]] if options.PLOT: self.plotDetection(ao.ssm, novelty, smoothed_novelty, ao.gt, detection, filename=join(options.OUTPUT+ ao.name)+'.pdf') gt_tb_res_05 = self.pairwiseF(ao.gt, gt_tb_detection, tolerance=0.5, combine=1.0) gt_tb_res_3 = self.pairwiseF(ao.gt, gt_tb_detection, tolerance=3, combine=1.0) gt_tp_res_05 = self.pairwiseF(ao.gt, gt_tp_detection, tolerance=0.5, combine=1.0) gt_tp_res_3 = self.pairwiseF(ao.gt, gt_tp_detection, tolerance=3, combine=1.0) gt_hm_res_05 = self.pairwiseF(ao.gt, gt_hm_detection, tolerance=0.5, combine=1.0) gt_hm_res_3 = self.pairwiseF(ao.gt, gt_hm_detection, tolerance=3, combine=1.0) tb_tp_res_05 = self.pairwiseF(ao.gt, tb_tp_detection, tolerance=0.5, combine=1.0) tb_tp_res_3 = self.pairwiseF(ao.gt, tb_tp_detection, tolerance=3, combine=1.0) tb_hm_res_05 = self.pairwiseF(ao.gt, tb_hm_detection, tolerance=0.5, combine=1.0) tb_hm_res_3 = self.pairwiseF(ao.gt, tb_hm_detection, tolerance=3, combine=1.0) tp_hm_res_05 = self.pairwiseF(ao.gt, tp_hm_detection, tolerance=0.5, combine=1.0) tp_hm_res_3 = self.pairwiseF(ao.gt, tp_hm_detection, tolerance=3, combine=1.0) gt_tb_tp_res_05 = self.pairwiseF(ao.gt, gt_tb_tp_detection, tolerance=0.5, combine=1.0) gt_tb_tp_res_3 = self.pairwiseF(ao.gt, gt_tb_tp_detection, tolerance=3, combine=1.0) gt_tb_hm_res_05 = self.pairwiseF(ao.gt, gt_tb_hm_detection, tolerance=0.5, combine=1.0) gt_tb_hm_res_3 = self.pairwiseF(ao.gt, gt_tb_hm_detection, tolerance=3, combine=1.0) gt_tp_hm_res_05 = self.pairwiseF(ao.gt, gt_tp_hm_detection, tolerance=0.5, combine=1.0) gt_tp_hm_res_3 = self.pairwiseF(ao.gt, gt_tp_hm_detection, tolerance=3, combine=1.0) tb_tp_hm_res_05 = self.pairwiseF(ao.gt, tb_tp_hm_detection, tolerance=0.5, combine=1.0) tb_tp_hm_res_3 = self.pairwiseF(ao.gt, tb_tp_hm_detection, tolerance=3, combine=1.0) gt_tb_tp_hm_res_05 = self.pairwiseF(ao.gt, gt_tb_tp_hm_detection, tolerance=0.5, combine=1.0) gt_tb_tp_hm_res_3 = self.pairwiseF(ao.gt, gt_tb_tp_hm_detection, tolerance=3, combine=1.0) # Output detected segment locations. if options.VERBOSE: outdir = join(options.OUTPUT, 'detection', ao.name) if not isdir(outdir): os.mkdir(outdir) np.savetxt(join(outdir, 'gammatone.csv'), gammatone_detection) np.savetxt(join(outdir, 'timbre.csv'), timbre_detection) np.savetxt(join(outdir, 'tempo.csv'), tempo_detection) np.savetxt(join(outdir, 'harmonic.csv'), harmonic_detection) np.savetxt(join(outdir, 'gammatone_timbre_novelty.csv'), gt_tb_detection) np.savetxt(join(outdir, 'gammatone_tempo_novelty.csv'), gt_tp_detection) np.savetxt(join(outdir, 'gammatone_harmonic_novelty.csv'), gt_hm_detection) np.savetxt(join(outdir, 'timbre_tempo_novelty.csv'), tb_tp_detection) np.savetxt(join(outdir, 'timbre_harmonic_novelty.csv'), tb_hm_detection) np.savetxt(join(outdir, 'tempo_harmonic_novelty.csv'), tp_hm_detection) np.savetxt(join(outdir, 'gammatone_timbre_tempo_novelty.csv'), gt_tb_tp_detection) np.savetxt(join(outdir, 'gammatone_timbre_harmonic_novelty.csv'), gt_tb_hm_detection) np.savetxt(join(outdir, 'gammatone_tempo_harmonic_novelty.csv'), gt_tp_hm_detection) np.savetxt(join(outdir, 'timbre_tempo_harmonic_novelty.csv'), tb_tp_hm_detection) np.savetxt(join(outdir, 'gammatone_timbre_tempo_harmonic_novelty.csv'), gt_tb_tp_hm_detection) # with open(outfile4, 'a') as f: # csvwriter = csv.writer(f, delimiter=',') # csvwriter.writerow([ao.name, gt_df_05.TP, gt_df_05.FP, gt_df_05.FN, gt_df_05.P, gt_df_05.R, gt_df_05.F, gt_df_05.AD, gt_df_05.DA, gt_df_3.TP, gt_df_3.FP, gt_df_3.FN, gt_df_3.P, \ # gt_df_3.R, gt_df_3.F, gt_df_3.AD, gt_df_3.DA, harmonic_df_05.TP, harmonic_df_05.FP, harmonic_df_05.FN, harmonic_df_05.P, harmonic_df_05.R, harmonic_df_05.F, harmonic_df_05.AD, harmonic_df_05.DA, \ # harmonic_df_3.TP, harmonic_df_3.FP, harmonic_df_3.FN, harmonic_df_3.P, harmonic_df_3.R, harmonic_df_3.F, harmonic_df_3.AD, harmonic_df_3.DA, timbre_df_05.TP, timbre_df_05.FP, \ # timbre_df_05.FN, timbre_df_05.P, timbre_df_05.R, timbre_df_05.F, timbre_df_05.AD, timbre_df_05.DA, timbre_df_3.TP, timbre_df_3.FP, timbre_df_3.FN, timbre_df_3.P, timbre_df_3.R, timbre_df_3.F, \ # timbre_df_3.AD, timbre_df_3.DA, tempo_df_05.TP, tempo_df_05.FP, tempo_df_05.FN, tempo_df_05.P, tempo_df_05.R, tempo_df_05.F, tempo_df_05.AD, tempo_df_05.DA, tempo_df_3.TP, tempo_df_3.FP, \ # tempo_df_3.FN, tempo_df_3.P, tempo_df_3.R, tempo_df_3.F, tempo_df_3.AD, tempo_df_3.DA]) with open(outfile2, 'a') as f: csvwriter = csv.writer(f, delimiter=',') csvwriter.writerow([ao.name, gt_tb_res_05.P, gt_tb_res_05.R, gt_tb_res_05.F, gt_tb_res_3.P, gt_tb_res_3.R, gt_tb_res_3.F, gt_tp_res_05.P, gt_tp_res_05.R, gt_tp_res_05.F, gt_tp_res_3.P, gt_tp_res_3.R, gt_tp_res_3.F, \ gt_hm_res_05.P, gt_hm_res_05.R, gt_hm_res_05.F, gt_hm_res_3.P, gt_hm_res_3.R, gt_hm_res_3.F, tb_tp_res_05.P, tb_tp_res_05.R, tb_tp_res_05.F, tb_tp_res_3.P, tb_tp_res_3.R, tb_tp_res_3.F, \ tb_hm_res_05.P, tb_hm_res_05.R, tb_hm_res_05.F, tb_hm_res_3.P, tb_hm_res_3.R, tb_hm_res_3.F, tp_hm_res_05.P, tp_hm_res_05.R, tp_hm_res_05.F, tp_hm_res_3.P, tp_hm_res_3.R, tp_hm_res_3.F, \ gt_tb_tp_res_05.P, gt_tb_tp_res_05.R, gt_tb_tp_res_05.F, gt_tb_tp_res_3.P, gt_tb_tp_res_3.R, gt_tb_tp_res_3.F, gt_tb_hm_res_05.P, gt_tb_hm_res_05.R, gt_tb_hm_res_05.F, gt_tb_hm_res_3.P, gt_tb_hm_res_3.R, gt_tb_hm_res_3.F, \ gt_tp_hm_res_05.P, gt_tp_hm_res_05.R, gt_tp_hm_res_05.F, gt_tp_hm_res_3.P, gt_tp_hm_res_3.R, gt_tp_hm_res_3.F, tb_tp_hm_res_05.P, tb_tp_hm_res_05.R, tb_tp_hm_res_05.F, tb_tp_hm_res_3.P, tb_tp_hm_res_3.R, tb_tp_hm_res_3.F, \ gt_tb_tp_hm_res_05.P, gt_tb_tp_hm_res_05.R, gt_tb_tp_hm_res_05.F, gt_tb_tp_hm_res_3.P, gt_tb_tp_hm_res_3.R, gt_tb_tp_hm_res_3.F]) # Verification of detected boundaries by novelty fusion from the first round # ao_featureset = [ao.gammatone_features, ao.harmonic_features, ao.timbre_features, ao.tempo_features] # winlen = 1.5 * self.SampleRate / self.stepSize # prev_features, post_features = self.getPeakFeatures(gt_tb_tp_hm_novelty_peaks, ao_featureset, winlen=10) # dev_list = self.segmentDev(prev_features, post_features) # gt_tb_tp_hm_novelty_peaks = gt_tb_tp_hm_novelty_peaks[:len(dev_list)] # # print 'len(dev_list)', len(dev_list), len(gt_tb_tp_hm_novelty_peaks) # # print gt_tb_tp_hm_novelty_peaks, dev_list # dev_mean = [np.mean(x) for x in dev_list] # np.savetxt(join(options.OUTPUT, 'dev', ao.name+'.csv'), np.vstack((gt_tb_tp_hm_detection[:len(dev_list)], dev_mean)).T, delimiter=',') # peak_verified = self.verifyPeaks(gt_tb_tp_hm_novelty_peaks, dev_list) # # verified_detection = [ao.ssm_timestamps[int(i)] for i in peak_verified] + [ao.gt[-1]] # verified_detection_05 = self.pairwiseF(ao.gt, verified_detection, tolerance=0.5, combine=1.0) # verified_detection_3 = self.pairwiseF(ao.gt, verified_detection, tolerance=3, combine=1.0) # # print gt_tb_tp_hm_res_05.TP, gt_tb_tp_hm_res_05.FP, gt_tb_tp_hm_res_05.FN, gt_tb_tp_hm_res_05.P, gt_tb_tp_hm_res_05.R, gt_tb_tp_hm_res_05.F # print gt_tb_tp_hm_res_3.TP, gt_tb_tp_hm_res_3.FP, gt_tb_tp_hm_res_3.FN, gt_tb_tp_hm_res_3.P, gt_tb_tp_hm_res_3.R, gt_tb_tp_hm_res_3.F # # print verified_detection_05.TP, verified_detection_05.FP, verified_detection_05.FN, verified_detection_05.P, verified_detection_05.R, verified_detection_05.F # print verified_detection_3.TP, verified_detection_3.FP, verified_detection_3.FN, verified_detection_3.P, verified_detection_3.R, verified_detection_3.F # if len(novelty_peaks): # ao.gammatone_gmm = self.getGMMs(ao.gammatone_features, novelty_peaks) # ao.harmonic_gmm = self.getGMMs(ao.harmonic_features, novelty_peaks) # ao.tempo_gmm = self.getGMMs(ao.tempo_features, novelty_peaks) # ao.timbre_gmm = self.getGMMs(ao.timbre_features, novelty_peaks) # # rc = rClustering(eps=1., k=8, rank='max_neighbors') # rc.fit(ao.gammatone_gmm) # gammatone_clf = rc.classification # gammatone_neighborhood_size, gammatone_average_div, gammatone_node_rank = rc.getNodeRank() # np.savetxt(join(options.OUTPUT, 'classification', ao.name+'-gammatone.csv'), np.vstack((novelty_detection[:-1], gammatone_clf)).T, delimiter=',') # np.savetxt(join(options.OUTPUT, 'neighborhood_size', ao.name+'-gammatone.csv'), np.vstack((novelty_detection[:-1], gammatone_neighborhood_size)).T, delimiter=',') # np.savetxt(join(options.OUTPUT, 'node_rank', ao.name+'-gammatone.csv'), np.vstack((novelty_detection[:-1], gammatone_average_div)).T, delimiter=',') # np.savetxt(join(options.OUTPUT, 'average_div', ao.name+'-gammatone.csv'), np.vstack((novelty_detection[:-1], gammatone_node_rank)).T, delimiter=',') # # rc = rClustering(eps=1., k=8, rank='max_neighbors') # rc.fit(ao.harmonic_gmm) # harmonic_clf = rc.classification # harmonic_neighborhood_size, harmonic_average_div, harmonic_node_rank = rc.getNodeRank() # np.savetxt(join(options.OUTPUT, 'classification', ao.name+'-harmonic.csv'), np.vstack((novelty_detection[:-1], harmonic_clf)).T, delimiter=',') # np.savetxt(join(options.OUTPUT, 'neighborhood_size', ao.name+'-harmonic.csv'), np.vstack((novelty_detection[:-1], harmonic_neighborhood_size)).T, delimiter=',') # np.savetxt(join(options.OUTPUT, 'node_rank', ao.name+'-harmonic.csv'), np.vstack((novelty_detection[:-1], harmonic_average_div)).T, delimiter=',') # np.savetxt(join(options.OUTPUT, 'average_div', ao.name+'-harmonic.csv'), np.vstack((novelty_detection[:-1], harmonic_node_rank)).T, delimiter=',') # # rc = rClustering(eps=1., k=8, rank='max_neighbors') # rc.fit(ao.tempo_gmm) # tempo_clf = rc.classification # tempo_neighborhood_size, tempo_average_div, tempo_node_rank = rc.getNodeRank() # np.savetxt(join(options.OUTPUT, 'classification', ao.name+'-tempo.csv'), np.vstack((novelty_detection[:-1], tempo_clf)).T, delimiter=',') # np.savetxt(join(options.OUTPUT, 'neighborhood_size', ao.name+'-tempo.csv'), np.vstack((novelty_detection[:-1], tempo_neighborhood_size)).T, delimiter=',') # np.savetxt(join(options.OUTPUT, 'node_rank', ao.name+'-tempo.csv'), np.vstack((novelty_detection[:-1], tempo_average_div)).T, delimiter=',') # np.savetxt(join(options.OUTPUT, 'average_div', ao.name+'-tempo.csv'), np.vstack((novelty_detection[:-1], tempo_node_rank)).T, delimiter=',') # # rc = rClustering(eps=1., k=8, rank='max_neighbors') # rc.fit(ao.timbre_gmm) # timbre_clf = rc.classification # timbre_neighborhood_size, timbre_average_div, timbre_node_rank = rc.getNodeRank() # np.savetxt(join(options.OUTPUT, 'classification', ao.name+'-timbre.csv'), np.vstack((novelty_detection[:-1], timbre_clf)).T, delimiter=',') # np.savetxt(join(options.OUTPUT, 'neighborhood_size', ao.name+'-timbre.csv'), np.vstack((novelty_detection[:-1], timbre_neighborhood_size)).T, delimiter=',') # np.savetxt(join(options.OUTPUT, 'node_rank', ao.name+'-timbre.csv'), np.vstack((novelty_detection[:-1], timbre_average_div)).T, delimiter=',') # np.savetxt(join(options.OUTPUT, 'average_div', ao.name+'-timbre.csv'), np.vstack((novelty_detection[:-1], timbre_node_rank)).T, delimiter=',') # # Evaluate segmentation results using combined SSMs. # outfile3 = join(options.OUTPUT, 'combinedSSMRes.csv') # with open(outfile3, 'a') as f: # csvwriter = csv.writer(f, delimiter=',') # csvwriter.writerow(['audio', 'gt_tb_P_0.5', 'gt_tb_R_0.5', 'gt_tb_F_0.5', 'gt_tb_P_3', 'gt_tb_R_3', 'gt_tb_F_3', 'gt_tp_P_0.5', 'gt_tp_R_0.5', 'gt_tp_F_0.5', 'gt_tp_P_3', 'gt_tp_R_3', 'gt_tp_F_3',\ # 'gt_hm_P_0.5', 'gt_hm_R_0.5', 'gt_hm_F_0.5', 'gt_hm_P_3', 'gt_hm_R_3', 'gt_hm_F_3', 'tb_tp_P_0.5', 'tb_tp_R_0.5', 'tb_tp_F_0.5', 'tb_tp_P_3', 'tb_tp_R_3', 'tb_tp_F_3', 'tb_hm_P_0.5', 'tb_hm_R_0.5', 'tb_hm_F_0.5', \ # 'tb_hm_P_3', 'tb_hm_R_3', 'tb_hm_F_3', 'tp_hm_P_0.5', 'tp_hm_R_0.5', 'tp_hm_F_0.5', 'tp_hm_P_3', 'tp_hm_R_3', 'tp_hm_F_3', 'gt_tb_tp_P_0.5', 'gt_tb_tp_R_0.5', 'gt_tb_tp_F_0.5', 'gt_tb_tp_P_3', 'gt_tb_tp_R_3', \ # 'gt_tb_tp_F_3', 'gt_tb_hm_P_0.5', 'gt_tb_hm_R_0.5', 'gt_tb_hm_F_0.5', 'gt_tb_hm_P_3', 'gt_tb_hm_R_3', 'gt_tb_hm_F_3', 'gt_tp_hm_P_0.5', 'gt_tp_hm_R_0.5', 'gt_tp_hm_F_0.5', 'gt_tp_hm_P_3', 'gt_tp_hm_R_3', 'gt_tp_hm_F_3', \ # 'tb_tp_hm_P_0.5', 'tb_tp_hm_R_0.5', 'tb_tp_hm_F_0.5', 'tb_tp_hm_P_3', 'tb_tp_hm_R_3', 'tb_tp_hm_F_3', 'gt_tb_tp_hm_P_0.5', 'gt_tb_tp_hm_R_0.5', 'gt_tb_tp_hm_F_0.5', 'gt_tb_tp_hm_P_3', 'gt_tb_tp_hm_R_3', 'gt_tb_tp_hm_F_3']) # # for i,ao in enumerate(audio_list): # # Combine SSMs computed from different features # gt_hm_ssm = np.multiply(ao.gammatone_ssm, ao.harmonic_ssm) # gt_tb_ssm = np.multiply(ao.gammatone_ssm, ao.timbre_ssm) # gt_tp_ssm = np.multiply(ao.gammatone_ssm, ao.tempo_ssm) # tb_tp_ssm = np.multiply(ao.timbre_ssm, ao.tempo_ssm) # tb_hm_ssm = np.multiply(ao.timbre_ssm, ao.harmonic_ssm) # tp_hm_ssm = np.multiply(ao.tempo_ssm, ao.harmonic_ssm) # # gt_hm_tb_ssm = np.multiply(ao.gammatone_ssm, ao.harmonic_ssm, ao.timbre_ssm) # gt_hm_tp_ssm = np.multiply(ao.gammatone_ssm, ao.harmonic_ssm, ao.tempo_ssm) # gt_tb_tp_ssm = np.multiply(ao.gammatone_ssm, ao.timbre_ssm, ao.tempo_ssm) # hm_tb_tp_ssm = np.multiply(ao.harmonic_ssm, ao.timbre_ssm, ao.tempo_ssm) # # gt_hm_tb_tp_ssm = np.multiply(np.multiply(ao.gammatone_ssm, ao.harmonic_ssm), np.multiply(ao.timbre_ssm, ao.tempo_ssm)) # # gt_hm_ssm_novelty = self.getNoveltyCurve(gt_hm_ssm, self.kernel_size) # gt_hm_ssm_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in gt_hm_ssm_novelty] # gt_tb_ssm_novelty = self.getNoveltyCurve(gt_tb_ssm, self.kernel_size) # gt_tb_ssm_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in gt_tb_ssm_novelty] # gt_tp_ssm_novelty = self.getNoveltyCurve(gt_hm_ssm, self.kernel_size) # gt_tp_ssm_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in gt_tp_ssm_novelty] # tb_tp_ssm_novelty = self.getNoveltyCurve(tb_tp_ssm, self.kernel_size) # tb_tp_ssm_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in tb_tp_ssm_novelty] # tb_hm_ssm_novelty = self.getNoveltyCurve(tb_hm_ssm, self.kernel_size) # tb_hm_ssm_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in tb_hm_ssm_novelty] # tp_hm_ssm_novelty = self.getNoveltyCurve(tp_hm_ssm, self.kernel_size) # tp_hm_ssm_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in tp_hm_ssm_novelty] # # gt_hm_tb_ssm_novelty = self.getNoveltyCurve(gt_hm_tb_ssm, self.kernel_size) # gt_hm_tb_ssm_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in gt_hm_tb_ssm_novelty] # gt_hm_tp_ssm_novelty = self.getNoveltyCurve(gt_hm_tp_ssm, self.kernel_size) # gt_hm_tp_ssm_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in gt_hm_tp_ssm_novelty] # gt_tb_tp_ssm_novelty = self.getNoveltyCurve(gt_tb_tp_ssm, self.kernel_size) # gt_tb_tp_ssm_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in gt_tb_tp_ssm_novelty] # hm_tb_tp_ssm_novelty = self.getNoveltyCurve(hm_tb_tp_ssm, self.kernel_size) # hm_tb_tp_ssm_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in hm_tb_tp_ssm_novelty] # # gt_hm_tb_tp_ssm_novelty = self.getNoveltyCurve(gt_hm_tb_tp_ssm, self.kernel_size) # gt_hm_tb_tp_ssm_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in gt_hm_tb_tp_ssm_novelty] # # smoothed_gt_hm_ssm_novelty, gt_hm_ssm_novelty_peaks = peak_picker.process(gt_hm_ssm_novelty) # gt_hm_ssm_detection = [ao.ssm_timestamps[int(i)] for i in gt_hm_ssm_novelty_peaks] + [ao.gt[-1]] # smoothed_gt_tb_ssm_novelty, gt_tb_ssm_novelty_peaks = peak_picker.process(gt_tb_ssm_novelty) # gt_tb_ssm_detection = [ao.ssm_timestamps[int(i)] for i in gt_tb_ssm_novelty_peaks] + [ao.gt[-1]] # smoothed_gt_tp_ssm_novelty, gt_tp_ssm_novelty_peaks = peak_picker.process(gt_tp_ssm_novelty) # gt_tp_ssm_detection = [ao.ssm_timestamps[int(i)] for i in gt_tp_ssm_novelty_peaks] + [ao.gt[-1]] # smoothed_tb_tp_ssm_novelty, tb_tp_ssm_novelty_peaks = peak_picker.process(tb_tp_ssm_novelty) # tb_tp_ssm_detection = [ao.ssm_timestamps[int(i)] for i in tb_tp_ssm_novelty_peaks] + [ao.gt[-1]] # smoothed_tb_hm_ssm_novelty, tb_hm_ssm_novelty_peaks = peak_picker.process(tb_hm_ssm_novelty) # tb_hm_ssm_detection = [ao.ssm_timestamps[int(i)] for i in tb_hm_ssm_novelty_peaks] + [ao.gt[-1]] # smoothed_tp_hm_ssm_novelty, tp_hm_ssm_novelty_peaks = peak_picker.process(tp_hm_ssm_novelty) # tp_hm_ssm_detection = [ao.ssm_timestamps[int(i)] for i in tp_hm_ssm_novelty_peaks] + [ao.gt[-1]] # # smoothed_gt_hm_tb_ssm_novelty, gt_hm_tb_ssm_novelty_peaks = peak_picker.process(gt_hm_tb_ssm_novelty) # gt_hm_tb_ssm_detection = [ao.ssm_timestamps[int(i)] for i in gt_hm_tb_ssm_novelty_peaks] + [ao.gt[-1]] # smoothed_gt_hm_tp_ssm_novelty, gt_hm_tp_ssm_novelty_peaks = peak_picker.process(gt_hm_tp_ssm_novelty) # gt_hm_tp_ssm_detection = [ao.ssm_timestamps[int(i)] for i in gt_hm_tp_ssm_novelty_peaks] + [ao.gt[-1]] # smoothed_gt_tb_tp_ssm_novelty, gt_tb_tp_ssm_novelty_peaks = peak_picker.process(gt_tb_tp_ssm_novelty) # gt_tb_tp_ssm_detection = [ao.ssm_timestamps[int(i)] for i in gt_tb_tp_ssm_novelty_peaks] + [ao.gt[-1]] # smoothed_hm_tb_tp_ssm_novelty, hm_tb_tp_ssm_novelty_peaks = peak_picker.process(hm_tb_tp_ssm_novelty) # hm_tb_tp_ssm_detection = [ao.ssm_timestamps[int(i)] for i in hm_tb_tp_ssm_novelty_peaks] + [ao.gt[-1]] # # smoothed_gt_hm_tb_tp_ssm_novelty, gt_hm_tb_tp_ssm_novelty_peaks = peak_picker.process(gt_hm_tb_tp_ssm_novelty) # gt_hm_tb_tp_ssm_detection = [ao.ssm_timestamps[int(i)] for i in gt_hm_tb_tp_ssm_novelty_peaks] + [ao.gt[-1]] # # # Output detected segment locations. # if options.VERBOSE: # outdir = join(options.OUTPUT, 'detection', ao.name) # if not isdir(outdir): # os.mkdir(outdir) # # np.savetxt(join(outdir, 'gammatone_timbre_ssm.csv'), gt_tb_ssm_detection) # np.savetxt(join(outdir, 'gammatone_tempo_ssm.csv'), gt_tp_ssm_detection) # np.savetxt(join(outdir, 'gammatone_harmonic_ssm.csv'), gt_hm_ssm_detection) # np.savetxt(join(outdir, 'timbre_tempo_ssm.csv'), tb_tp_ssm_detection) # np.savetxt(join(outdir, 'timbre_harmonic_ssm.csv'), tb_hm_ssm_detection) # np.savetxt(join(outdir, 'tempo_harmonic_ssm.csv'), tp_hm_ssm_detection) # # np.savetxt(join(outdir, 'gammatone_timbre_tempo_ssm.csv'), gt_tb_tp_ssm_detection) # np.savetxt(join(outdir, 'gammatone_timbre_harmonic_ssm.csv'), gt_hm_tb_ssm_detection) # np.savetxt(join(outdir, 'gammatone_tempo_harmonic_ssm.csv'), gt_hm_tp_ssm_detection) # np.savetxt(join(outdir, 'timbre_tempo_harmonic_ssm.csv'), hm_tb_tp_ssm_detection) # np.savetxt(join(outdir, 'gammatone_timbre_tempo_harmonic_ssm.csv'), gt_hm_tb_tp_ssm_detection) # # gt_hm_ssm_res_05 = self.pairwiseF(ao.gt, gt_hm_ssm_detection, tolerance=0.5, combine=1.0) # gt_hm_ssm_res_3 = self.pairwiseF(ao.gt, gt_hm_ssm_detection, tolerance=3, combine=1.0) # gt_tb_ssm_res_05 = self.pairwiseF(ao.gt, gt_tb_ssm_detection, tolerance=0.5, combine=1.0) # gt_tb_ssm_res_3 = self.pairwiseF(ao.gt, gt_tb_ssm_detection, tolerance=3, combine=1.0) # gt_tp_ssm_res_05 = self.pairwiseF(ao.gt, gt_tp_ssm_detection, tolerance=0.5, combine=1.0) # gt_tp_ssm_res_3 = self.pairwiseF(ao.gt, gt_tp_ssm_detection, tolerance=3, combine=1.0) # tb_tp_ssm_res_05 = self.pairwiseF(ao.gt, tb_tp_ssm_detection, tolerance=0.5, combine=1.0) # tb_tp_ssm_res_3 = self.pairwiseF(ao.gt, tb_tp_ssm_detection, tolerance=3, combine=1.0) # tb_hm_ssm_res_05 = self.pairwiseF(ao.gt, tb_hm_ssm_detection, tolerance=0.5, combine=1.0) # tb_hm_ssm_res_3 = self.pairwiseF(ao.gt, tb_hm_ssm_detection, tolerance=3, combine=1.0) # tp_hm_ssm_res_05 = self.pairwiseF(ao.gt, tp_hm_ssm_detection, tolerance=0.5, combine=1.0) # tp_hm_ssm_res_3 = self.pairwiseF(ao.gt, tp_hm_ssm_detection, tolerance=3, combine=1.0) # # gt_hm_tb_ssm_res_05 = self.pairwiseF(ao.gt, gt_hm_tb_ssm_detection, tolerance=0.5, combine=1.0) # gt_hm_tb_ssm_res_3 = self.pairwiseF(ao.gt, gt_hm_tb_ssm_detection, tolerance=3, combine=1.0) # gt_hm_tp_ssm_res_05 = self.pairwiseF(ao.gt, gt_hm_tp_ssm_detection, tolerance=0.5, combine=1.0) # gt_hm_tp_ssm_res_3 = self.pairwiseF(ao.gt, gt_hm_tp_ssm_detection, tolerance=3, combine=1.0) # gt_tb_tp_ssm_res_05 = self.pairwiseF(ao.gt, gt_tb_tp_ssm_detection, tolerance=0.5, combine=1.0) # gt_tb_tp_ssm_res_3 = self.pairwiseF(ao.gt, gt_tb_tp_ssm_detection, tolerance=3, combine=1.0) # hm_tb_tp_ssm_res_05 = self.pairwiseF(ao.gt, hm_tb_tp_ssm_detection, tolerance=0.5, combine=1.0) # hm_tb_tp_ssm_res_3 = self.pairwiseF(ao.gt, hm_tb_tp_ssm_detection, tolerance=3, combine=1.0) # # gt_hm_tb_tp_ssm_res_05 = self.pairwiseF(ao.gt, gt_hm_tb_tp_ssm_detection, tolerance=0.5, combine=1.0) # gt_hm_tb_tp_ssm_res_3 = self.pairwiseF(ao.gt, gt_hm_tb_tp_ssm_detection, tolerance=3, combine=1.0) # # with open(outfile3, 'a') as f: # csvwriter = csv.writer(f, delimiter=',') # csvwriter.writerow([ao.name, gt_tb_ssm_res_05.P, gt_tb_ssm_res_05.R, gt_tb_ssm_res_05.F, gt_tb_ssm_res_3.P, gt_tb_ssm_res_3.R, gt_tb_ssm_res_3.F, gt_tp_ssm_res_05.P, gt_tp_ssm_res_05.R, gt_tp_ssm_res_05.F, \ # gt_tp_ssm_res_3.P, gt_tp_ssm_res_3.R, gt_tp_ssm_res_3.F, gt_hm_ssm_res_05.P, gt_hm_ssm_res_05.R, gt_hm_ssm_res_05.F, gt_hm_ssm_res_3.P, gt_hm_ssm_res_3.R, gt_hm_ssm_res_3.F, \ # tb_tp_ssm_res_05.P, tb_tp_ssm_res_05.R, tb_tp_ssm_res_05.F, tb_tp_ssm_res_3.P, tb_tp_ssm_res_3.R, tb_tp_ssm_res_3.F, tb_hm_ssm_res_05.P, tb_hm_ssm_res_05.R, tb_hm_ssm_res_05.F, \ # tb_hm_ssm_res_3.P, tb_hm_ssm_res_3.R, tb_hm_ssm_res_3.F, tp_hm_ssm_res_05.P, tp_hm_ssm_res_05.R, tp_hm_ssm_res_05.F, tp_hm_ssm_res_3.P, tp_hm_ssm_res_3.R, tp_hm_ssm_res_3.F, \ # gt_tb_tp_ssm_res_05.P, gt_tb_tp_ssm_res_05.R, gt_tb_tp_ssm_res_05.F, gt_tb_tp_ssm_res_3.P, gt_tb_tp_ssm_res_3.R, gt_tb_tp_ssm_res_3.F, gt_hm_tb_ssm_res_05.P, gt_hm_tb_ssm_res_05.R, gt_hm_tb_ssm_res_05.F, \ # gt_hm_tb_ssm_res_3.P, gt_hm_tb_ssm_res_3.R, gt_hm_tb_ssm_res_3.F, gt_hm_tp_ssm_res_05.P, gt_hm_tp_ssm_res_05.R, gt_hm_tp_ssm_res_05.F, gt_hm_tp_ssm_res_3.P, gt_hm_tp_ssm_res_3.R, gt_hm_tp_ssm_res_3.F, \ # hm_tb_tp_ssm_res_05.P, hm_tb_tp_ssm_res_05.R, hm_tb_tp_ssm_res_05.F, hm_tb_tp_ssm_res_3.P, hm_tb_tp_ssm_res_3.R, hm_tb_tp_ssm_res_3.F, gt_hm_tb_tp_ssm_res_05.P, gt_hm_tb_tp_ssm_res_05.R, gt_hm_tb_tp_ssm_res_05.F, \ # gt_hm_tb_tp_ssm_res_3.P, gt_hm_tb_tp_ssm_res_3.R, gt_hm_tb_tp_ssm_res_3.F]) def main(): segmenter = SSMseg() segmenter.process() if __name__ == '__main__': main()