mi@0: #!/usr/bin/env python mi@0: # encoding: utf-8 mi@0: """ mi@0: plotSSM.py mi@0: mi@0: A helper util to plot SSMs from different features. mi@0: """ mi@0: mitian@12: import matplotlib mitian@12: # matplotlib.use('Agg') mitian@12: import sys, os, optparse, csv mitian@12: from itertools import combinations mitian@12: from os.path import join, isdir, isfile, abspath, dirname, basename, split, splitext mitian@12: from copy import copy mitian@12: mitian@12: import matplotlib.pyplot as plt mitian@12: mi@0: import numpy as np mitian@12: from scipy.signal import correlate2d, convolve2d, filtfilt, resample mitian@12: from scipy.stats import mode, kurtosis, skew mitian@12: from scipy.ndimage import zoom mitian@12: from scipy.ndimage.morphology import binary_fill_holes mitian@12: from scipy.ndimage.filters import * mitian@12: from scipy.spatial.distance import squareform, pdist mitian@12: from sklearn.decomposition import PCA mitian@12: from sklearn.mixture import GMM mitian@12: from sklearn.cluster import KMeans mitian@12: from sklearn.preprocessing import normalize mitian@12: from sklearn.metrics.pairwise import pairwise_distances mitian@12: from skimage.transform import hough_line, hough_line_peaks, probabilistic_hough_line mitian@12: from skimage.filter import canny, sobel mitian@12: from skimage.filter.rank import otsu mitian@12: from skimage import data, measure, segmentation, morphology mitian@12: from skimage.morphology import disk mi@0: mitian@12: from PeakPickerUtil import PeakPicker mitian@16: from SegUtil import getMean, getStd, getDelta, getSSM, enhanceSSM, upSample, normaliseFeature mitian@12: mitian@12: def parse_args(): mitian@12: # define parser mitian@12: op = optparse.OptionParser() mitian@12: # IO options mitian@12: op.add_option('-g', '--gammatonegram-features', action="store", dest="GF", default='/Volumes/c4dm-03/people/mit/features/gammatonegram/qupujicheng/2048', type="str", help="Loading features from.." ) mitian@12: op.add_option('-s', '--spectrogram-features', action="store", dest="SF", default='/Volumes/c4dm-03/people/mit/features/spectrogram/qupujicheng/2048', type="str", help="Loading features from.." ) mitian@12: op.add_option('-t', '--tempogram-features', action="store", dest="TF", default='/Volumes/c4dm-03/people/mit/features/tempogram/qupujicheng/tempo_features_6s', type="str", help="Loading features from.." ) mitian@12: op.add_option('-a', '--annotations', action="store", dest="GT", default='/Volumes/c4dm-03/people/mit/annotation/qupujicheng/lowercase', type="str", help="Loading annotation files from.. ") mitian@12: op.add_option('-o', '--ouput', action="store", dest="OUTPUT", default='/Volumes/c4dm-03/people/mit/segmentation/gammatone/qupujicheng', type="str", help="Write segmentation results to ") mitian@13: op.add_option('-p', '--plot', action="store_true", dest="PLOT", default=False, help="Save plots") mitian@12: op.add_option('-e', '--test-mode', action="store_true", dest="TEST", default=False, help="Test mode") mitian@12: op.add_option('-v', '--verbose-output', action="store_true", dest="VERBOSE", default=False, help="Exported raw detections.") mitian@12: mitian@12: return op.parse_args() mitian@12: options, args = parse_args() mi@0: mi@0: class FeatureObj() : mitian@12: __slots__ = ['key', 'audio', 'timestamps', 'gammatone_features', 'tempo_features', 'timbre_features', 'fp_features', 'lpc_features' 'harmonic_features', 'gammatone_ssm', 'fp_ssm', 'tempo_ssm', 'timbre_ssm', 'lpc_ssm', 'harmonic_ssm', 'ssm_timestamps'] mi@0: mitian@12: class AudioObj(): mitian@12: __slots__ = ['name', 'feature_list', 'gt', 'label', 'gammatone_features', 'tempo_features', 'timbre_features', 'fp_features', 'lpc_features', 'harmonic_features', 'combined_features',\ mitian@12: 'gammatone_ssm', 'tempo_ssm', 'timbre_ssm', 'lpc_ssm', 'harmonic_ssm', 'fp_ssm', 'combined_ssm', 'ssm', 'ssm_timestamps', 'tempo_timestamps'] mi@0: mitian@12: class EvalObj(): mitian@12: __slots__ = ['TP', 'FP', 'FN', 'P', 'R', 'F', 'AD', 'DA'] mi@0: mitian@12: class SSMseg(object): mitian@12: '''The main segmentation object''' mitian@12: def __init__(self): mitian@12: self.SampleRate = 44100 mitian@12: self.NqHz = self.SampleRate/2 mitian@12: self.timestamp = [] mitian@12: self.previousSample = 0.0 mitian@12: self.featureWindow = 6.0 mitian@12: self.featureStep = 3.0 mitian@12: self.kernel_size = 64 # Adjust this param according to the feature resolution.pq mitian@12: self.blockSize = 2048 mitian@12: self.stepSize = 1024 mitian@12: mitian@12: '''NOTE: Match the following params with those used for feature extraction!''' mitian@12: mitian@12: '''NOTE: Unlike spectrogram ones, Gammatone features are extracted without an FFT or any overlap. The windowing is done under the purpose of chunking mitian@12: the audio to facilitate the gammatone filtering. Despite of the overlap in the time domain, only the first half after the filtering is returned, mitian@12: resulting in no overlapping effect in the extracted features. To obtain features for overlapped audio input, make the gammatoneLen equal to blockSize mitian@12: and return the whole filter output.''' mitian@12: self.gammatoneLen = 2048 mitian@12: self.gammatoneBandGroups = [0, 16, 34, 50, 64] mitian@12: self.nGammatoneBands = 30 mitian@12: self.histRes = 40 mitian@12: self.lowFreq = 100 mitian@12: self.highFreq = self.SampleRate / 4 mitian@12: mitian@12: '''Settings for extracting tempogram features.''' mitian@12: self.tempoWindow = 6.0 mitian@12: self.bpmBands = [30, 45, 60, 80, 100, 120, 180, 240, 400, 600] mitian@12: mitian@12: '''Peak picking settings''' mitian@12: self.threshold = 30 mitian@12: self.confidence_threshold = 0.5 mitian@12: self.delta_threshold = 0.0 mitian@12: self.backtracking_threshold = 1.9 mitian@12: self.polyfitting_on = True mitian@12: self.medfilter_on = True mitian@12: self.LPfilter_on = True mitian@12: self.whitening_on = False mitian@12: self.aCoeffs = [1.0000, -0.5949, 0.2348] mitian@12: self.bCoeffs = [0.1600, 0.3200, 0.1600] mitian@12: self.cutoff = 0.34 mitian@12: self.medianWin = 7 mitian@12: mitian@12: def getGaussianParams(self, length, featureRate, timeWindow): mitian@12: mitian@12: win_len = round(timeWindow * featureRate) mitian@12: win_len = win_len + (win_len % 2) - 1 mitian@12: mitian@12: # a 50% overlap between windows mitian@12: stepsize = ceil(win_len * 0.5) mitian@12: num_win = int(floor( (length) / stepsize)) mitian@12: gaussian_rate = featureRate / stepsize mitian@12: mitian@12: return stepsize, num_win, win_len, gaussian_rate mitian@12: mitian@12: def GaussianDistance(self, feature, featureRate, timeWindow): mitian@12: mitian@12: stepsize, num_win, win_len, gr = self.getGaussianParams(feature.shape[0], featureRate, timeWindow) mitian@12: print 'stepsize, num_win, feature', stepsize, num_win, feature.shape, featureRate, timeWindow mitian@12: gaussian_list = [] mitian@12: gaussian_timestamps = [] mitian@12: tsi = 0 mitian@12: mitian@12: # f = open('/Users/mitian/Documents/experiments/features.txt','w') mitian@12: # print 'divergence computing..' mitian@12: for num in xrange(num_win): mitian@12: # print num, num * stepsize , (num * stepsize) + win_len mitian@12: gf=GaussianFeature(feature[int(num * stepsize) : int((num * stepsize) + win_len), :],2) mitian@12: # f.write("\n%s" %str(gf)) mitian@12: gaussian_list.append(gf) mitian@12: tsi = int(floor( num * stepsize + 1)) mitian@12: gaussian_timestamps.append(self.timestamp[tsi]) mitian@12: mitian@12: # f.close() mitian@12: mitian@12: # print 'gaussian_list', len(gaussian_list), len(gaussian_timestamps) mitian@12: dm = np.zeros((len(gaussian_list), len(gaussian_list))) mitian@12: mitian@12: for v1, v2 in combinations(gaussian_list, 2): mitian@12: i, j = gaussian_list.index(v1), gaussian_list.index(v2) mitian@12: dm[i, j] = v1.distance(v2) mitian@12: dm[j, i] = v2.distance(v1) mitian@12: # print 'dm[i,j]',dm[i,j] mitian@12: # sio.savemat("/Users/mitian/Documents/experiments/dm-from-segmenter.mat",{"dm":dm}) mitian@12: return dm, gaussian_timestamps mitian@12: mitian@12: def gaussian_kernel(self, size): mitian@12: '''Create a gaussian tapered 45 degrees rotated checkerboard kernel. mitian@12: TODO: Unit testing: Should produce this with kernel size 3: mitian@12: 0.1353 -0.3679 0.1353 mitian@12: 0.3679 1.0000 0.3679 mitian@12: 0.1353 -0.3679 0.1353 mitian@12: ''' mitian@12: n = float(np.ceil(size / 2.0)) mitian@12: kernel = np.zeros((size,size)) mitian@12: for i in xrange(1,size+1) : mitian@12: for j in xrange(1,size+1) : mitian@12: gauss = np.exp( -4.0 * (np.square( (i-n)/n ) + np.square( (j-n)/n )) ) mitian@12: # gauss = 1 mitian@12: if np.logical_xor( j - n > np.floor((i-n) / 2.0), j - n > np.floor((n-i) / 2.0) ) : mitian@12: kernel[i-1,j-1] = -gauss mitian@12: else: mitian@12: kernel[i-1,j-1] = gauss mitian@12: return kernel mitian@12: mitian@12: def getDiagonalSlice(self, ssm, width): mitian@12: ''' Return a diagonal slice of the ssm given its width, with 45 degrees rotation. mitian@12: Note: requres 45 degrees rotated kernel also.''' mitian@12: w = int(np.floor(width/2.0)) mitian@12: length = len(np.diagonal(ssm)) mitian@12: slice = np.zeros((2*w+1,length)) mitian@12: # print 'diagonal', length, w, slice.shape mitian@12: for i in xrange(-w, w+1) : mitian@12: slice[w+i,:] = np.hstack(( np.zeros(int(np.floor(abs(i)/2.0))), np.diagonal(ssm,i), np.zeros(int(np.ceil(abs(i)/2.0))) )) mitian@12: return slice mitian@12: mitian@12: def getNoveltyCurve(self,dm, kernel_size): mitian@12: '''Return novelty score from distance matrix.''' mitian@12: mitian@12: kernel_size = int(np.floor(kernel_size/2.0)+1) mitian@12: slice = self.getDiagonalSlice(dm, kernel_size) mitian@12: kernel = self.gaussian_kernel(kernel_size) mitian@12: xc = convolve2d(slice,kernel,mode='same') mitian@12: xc[abs(xc)>1e+10]=0.00001 mitian@12: # print 'xc', xc.shape, xc mitian@12: return xc[int(np.floor(xc.shape[0]/2.0)),:] mi@0: mitian@12: def mergeBlocks(self, SSM, thresh=0.9, size=5): mitian@12: '''Merge consequtive small blocks along the diagonal.''' mitian@12: # found = False mitian@12: # start = 0 mitian@12: # i = 0 mitian@12: # while i < len(SSM): mitian@12: # j = i + 1 mitian@12: # if found: start = i mitian@12: # while(j < len(SSM) and SSM[i, j]): mitian@12: # if (j-i) > size: mitian@12: # found = True mitian@12: # i = j mitian@12: # # print 'start,end', start, i mitian@12: # start = i mitian@12: # else: mitian@12: # found = False mitian@12: # j += 1 mitian@12: # if not found: mitian@12: # print 'start,end', start, i mitian@12: # SSM[start:i, start:i] = 0.9 mitian@12: # i = j mitian@12: idx = 1 mitian@12: while idx < len(SSM): mitian@12: i = 0 mitian@12: # if ((idx-1-i) > 0 and (idx+1+i) < len(SSM)): mitian@12: while ((idx-1-i) > 0 and (idx+1+i) < len(SSM) and SSM[idx-1-i, idx] > 0 and SSM[idx+1+i, idx] > 0): mitian@12: i += 1 mitian@12: if i > size/2: mitian@12: SSM[idx-1-i:min(idx+i,len(SSM)), idx-1-i:min(idx+i,len(SSM))] = 1.0 mitian@12: idx += max(1, i) mitian@12: return SSM mitian@12: mitian@12: def getGMMs(self, feature, segment_boundaries): mitian@12: '''Return GMMs for located segments''' mitian@12: gmm_list = [] mitian@12: gmm_list.append(GmmDistance(feature[: segment_boundaries[0], :], components = 1)) mitian@12: for i in xrange(1, len(segment_boundaries)): mitian@12: gmm_list.append(GmmDistance(feature[segment_boundaries[i-1] : segment_boundaries[i], :], components = 1)) mitian@12: return gmm_list mitian@12: mitian@12: def normaliseFeature(self, feature_array): mitian@12: mitian@12: feature_array = np.array(feature_array) mitian@12: feature_array[np.isnan(feature_array)] = 0.0 mitian@12: feature_array[np.isinf(feature_array)] = 0.0 mitian@12: mitian@12: if len(feature_array.shape) == 1: mitian@12: feature_array = (feature_array - feature_array.min()) / (feature_array.max() - feature_array.min()) mitian@12: else: mitian@12: mins = feature_array.min(axis=1) mitian@12: maxs = feature_array.max(axis=1) mitian@12: feature_array = (feature_array - mins[:, np.newaxis]) / (maxs - mins)[:, np.newaxis] mitian@12: feature_array[np.isnan(feature_array)] = 0.0 mitian@12: return feature_array mitian@12: mitian@12: def upSample(self, feature_array, step): mitian@12: '''Resample downsized tempogram features, tempoWindo should be in accordance with input features''' mitian@12: # print feature_array.shape mitian@12: sampleRate = 44100 mitian@12: stepSize = 1024.0 mitian@12: # step = np.ceil(sampleRate/stepSize/5.0) mitian@12: feature_array = zoom(feature_array, (step,1)) mitian@12: # print 'resampled', feature_array.shape mitian@12: return feature_array mitian@12: mitian@12: def stripeDistance(self, feature_array, feature_len, step, metric='cosine'): mitian@12: '''Return distance matrix calculated for 2d time invariant features.''' mitian@12: size = feature_array.shape[0] / feature_len mitian@12: dm = np.zeros((size, size)) mi@0: mitian@12: for i in xrange(size): mitian@12: for j in xrange(i, size): mitian@12: dm[i, j] = np.sum(pairwise_distances(feature_array[i*step:(i+1)*step, :], feature_array[j*step:(j+1)*step, :], metric)) mitian@12: dm[j, i] = dm[i, j] mitian@12: # print 'np.nanmax(dm)', np.nanmax(dm) mitian@12: dm[np.isnan(dm)] = np.nanmax(dm) mitian@12: ssm = 1 - (dm - dm.min()) / (dm.max() - dm.min()) mitian@12: np.fill_diagonal(ssm, 1) mitian@12: return ssm mi@0: mitian@12: mitian@12: def getMean(self, feature, winlen, stepsize): mitian@12: means = [] mitian@12: steps = int((feature.shape[0] - winlen + stepsize) / stepsize) mitian@12: for i in xrange(steps): mitian@12: means.append(np.mean(feature[i*stepsize:(i*stepsize+winlen), :], axis=0)) mitian@12: return np.array(means) mitian@12: mitian@12: def getStd(self, feature, winlen, stepsize): mitian@12: std = [] mitian@12: steps = int((feature.shape[0] - winlen + stepsize) / stepsize) mitian@12: for i in xrange(steps): mitian@12: std.append(np.std(feature[i*stepsize:(i*stepsize+winlen), :], axis=0)) mitian@12: return np.array(std) mitian@12: mitian@12: def getDelta(self, feature): mitian@12: delta_feature = np.vstack((np.zeros((1, feature.shape[1])), np.diff(feature, axis=0))) mitian@12: return delta_feature mitian@12: mitian@12: def getSkew(self, feature, axis=-1): mitian@12: return skew(feature, axis=axis) mitian@12: mitian@12: def getKurtosis(self, feature, axis=-1): mitian@12: return kurtosis(feature, axis=axis) mitian@12: mitian@12: def getRolloff(self, data, thresh=0.9): mitian@12: nFrames, nChannels, nBands = data.shape mitian@12: rolloff = np.zeros((nFrames, nChannels)) mitian@12: tpower = np.sum(data, axis=-1) mitian@12: freq = np.linspace(0,10,nBands) mitian@12: for i in xrange(nFrames): mitian@12: rolloffE = thresh * tpower[i] mitian@12: for j in xrange(nChannels): mitian@12: temp = 0.0 mitian@12: tempE = 0.0 mitian@12: for band in xrange(nBands): mitian@12: temp += data[i, j, band] mitian@12: if temp > rolloffE[j]: break mitian@12: rolloff[i, j] = freq[band] mitian@12: # rolloff[i, j] = band mitian@12: return rolloff mitian@12: mitian@12: def getCentroid(self, X): mitian@12: nFrames, nChannels, nBands = X.shape mitian@12: mitian@12: centroid = np.zeros((nFrames, nChannels)) mitian@12: freq = np.linspace(0,10,nBands) mitian@12: freq = np.vstack([freq for i in xrange(nChannels)]) mitian@12: for i in xrange(nFrames): mitian@12: centroid[i, :] = np.sum(X[i, :, :] * freq, axis=-1) / (np.sum(X[i, :, :], axis=-1) + 0.0005*np.ones(nChannels)) mitian@12: mitian@12: return centroid mitian@12: mitian@12: def trackDF(self, onset1_index, df2): mitian@12: '''In the second round of detection, remove the known onsets from the DF by tracking from the peak given by the first round mitian@12: to a valley to deminish the recognised peaks on top of which to start new detection.''' mitian@12: for idx in xrange(len(onset1_index)) : mitian@12: remove = True mitian@12: for i in xrange(onset1_index[idx], 1, -1) : mitian@12: if remove : mitian@12: if df2[i] >= df2[i-1] : mitian@12: df2[i] == 0.0 mitian@12: else: mitian@12: remove = False mitian@12: return df2 mitian@12: mitian@12: def getSSM(self, feature_array, metric='cosine', norm='simple'): mitian@12: '''Compute SSM given input feature array. mitian@12: args: norm: ['simple', 'remove_noise'] mitian@12: ''' mitian@12: dm = pairwise_distances(feature_array, metric=metric) mitian@12: dm = np.nan_to_num(dm) mitian@12: if norm == 'simple': mitian@12: ssm = 1 - (dm - np.min(dm)) / (np.max(dm) - np.min(dm)) mitian@12: return ssm mitian@12: mitian@12: def reduceSSM(self, ssm, maxfilter_size = 2, remove_size=50): mitian@12: reduced_ssm = ssm mitian@12: reduced_ssm[reduced_ssm<0.75] = 0 mitian@12: # # reduced_ssm = maximum_filter(reduced_ssm,size=maxfilter_size) mitian@12: # # reduced_ssm = morphology.remove_small_objects(reduced_ssm.astype(bool), min_size=remove_size) mitian@12: local_otsu = otsu(reduced_ssm, disk(5)) mitian@12: local_otsu = (local_otsu.astype(float) - np.min(local_otsu)) / (np.max(local_otsu) - np.min(local_otsu)) mitian@12: reduced_ssm = reduced_ssm - 0.6*local_otsu mitian@12: return reduced_ssm mitian@12: mitian@12: def pairwiseF(self, annotation, detection, tolerance=3.0, combine=1.0): mitian@12: '''Pairwise F measure evaluation of detection rates.''' mitian@12: mitian@12: # print 'detection', detection mitian@12: detection = np.append(detection, annotation[-1]) mitian@12: res = EvalObj() mitian@12: res.TP = 0 # Total number of matched ground truth and experimental data points mitian@12: gt = len(annotation) # Total number of ground truth data points mitian@12: dt = len(detection) # Total number of experimental data points mitian@12: foundIdx = [] mitian@12: D_AD = np.zeros(gt) mitian@12: D_DA = np.zeros(dt) mitian@12: mitian@12: for dtIdx in xrange(dt): mitian@12: # print detection[dtIdx], abs(detection[dtIdx] - annotation) mitian@12: D_DA[dtIdx] = np.min(abs(detection[dtIdx] - annotation)) mitian@12: # D_DA[dtIdx] = min([abs(annot - detection[dtIdx]) for annot in annotation]) mitian@12: for gtIdx in xrange(gt): mitian@12: D_AD[gtIdx] = np.min(abs(annotation[gtIdx] - detection)) mitian@12: # D_AD[gtIdx] = min([abs(det - annotation[gtIdx]) for det in detection]) mitian@12: for dtIdx in xrange(dt): mitian@12: if (annotation[gtIdx] >= detection[dtIdx] - tolerance/2.0) and (annotation[gtIdx] <= detection[dtIdx] + tolerance/2.0): mitian@12: res.TP = res.TP + 1.0 mitian@12: foundIdx.append(gtIdx) mitian@12: foundIdx = list(set(foundIdx)) mitian@12: res.TP = len(foundIdx) mitian@12: res.FP = max(0, dt - res.TP) mitian@12: res.FN = max(0, gt - res.TP) mitian@12: mitian@12: res.AD = np.mean(D_AD) mitian@12: res.DA = np.mean(D_DA) mi@0: mitian@12: res.P, res.R, res.F = 0.0, 0.0, 0.0 mi@0: mitian@12: if res.TP == 0: mitian@12: return res mitian@12: mitian@12: res.P = res.TP / float(dt) mitian@12: res.R = res.TP / float(gt) mitian@12: res.F = 2 * res.P * res.R / (res.P + res.R) mitian@12: # return TP3, FP3, FN3, pairwisePrecision3, pairwiseRecall3, pairwiseFValue3, TP05, FP05, FN05, pairwisePrecision05, pairwiseRecall05, pairwiseFValue05 mitian@12: return res mi@0: mitian@12: def plotDetection(self, ssm, novelty, smoothed_novelty, gt, det, filename): mitian@12: '''Plot performance curve. mitian@12: x axis: distance threshold for feature selection; y axis: f measure''' mitian@12: mitian@12: plt.figure(figsize=(10,16)) mitian@12: gt_plot = gt / gt[-1] * len(novelty) mitian@12: det_plot = det / gt[-1] * len(novelty) mitian@12: mitian@12: gs = gridspec.GridSpec(2, 1, height_ratios=[3,1]) mitian@12: ax0 = plt.subplot(gs[0]) mitian@12: ax1 = plt.subplot(gs[1], sharex=ax0) mitian@12: mitian@12: ax0.imshow(ssm) mitian@12: ax0.vlines(gt_plot, 0, len(ssm), colors ='w', linestyles='solid') mitian@12: ax0.vlines(det_plot, 0, len(ssm), colors='k', linestyles='dashed') mitian@12: ax1.plot(np.linspace(0, len(novelty)-1, len(novelty)), novelty, 'g', np.linspace(0, len(novelty)-1, len(novelty)), smoothed_novelty,'b') mitian@12: y_min, y_max = min([min(novelty), min(smoothed_novelty)]), max([max(novelty), max(smoothed_novelty)]) mitian@12: ax1.vlines(gt_plot, y_min, y_max, colors ='r', linestyles='solid') mitian@12: ax1.vlines(det_plot, y_min, y_max, colors='k', linestyles='dashed') mitian@12: mitian@12: # f, ax = plt.subplots(2, sharex=True) mitian@12: # ax[0].imshow(ssm) mitian@12: # ax[1].plot(np.linspace(0, len(novelty)-1, len(novelty)), novelty) mitian@12: # ax[1].vlines(gt_plot, 0, len(novelty), colors ='r', linestyles='solid') mitian@12: # ax[1].vlines(det_plot, 0, len(novelty), colors='b', linestyles='dashed') mitian@12: # mitian@12: # plt.show() mitian@12: plt.savefig(filename) mitian@12: mitian@12: return None mitian@12: mitian@12: def process(self): mitian@12: '''For the aggregated input features, discard a propertion each time as the pairwise distances within the feature space descending. mitian@12: In the meanwhile evaluate the segmentation result and track the trend of perfomance changing by measuring the feature selection mitian@12: threshold - segmentation f measure curve. mitian@12: ''' mitian@12: mitian@12: peak_picker = PeakPicker() mitian@12: peak_picker.params.alpha = 9.0 # Alpha norm mitian@12: peak_picker.params.delta = self.delta_threshold # Adaptive thresholding delta mitian@12: peak_picker.params.QuadThresh_a = (100 - self.threshold) / 1000.0 mitian@12: peak_picker.params.QuadThresh_b = 0.0 mitian@12: peak_picker.params.QuadThresh_c = (100 - self.threshold) / 1500.0 mitian@12: peak_picker.params.rawSensitivity = 20 mitian@12: peak_picker.params.aCoeffs = self.aCoeffs mitian@12: peak_picker.params.bCoeffs = self.bCoeffs mitian@12: peak_picker.params.preWin = self.medianWin mitian@12: peak_picker.params.postWin = self.medianWin + 1 mitian@12: peak_picker.params.LP_on = self.LPfilter_on mitian@12: peak_picker.params.Medfilt_on = self.medfilter_on mitian@12: peak_picker.params.Polyfit_on = self.polyfitting_on mitian@12: peak_picker.params.isMedianPositive = False mitian@12: mitian@12: # Settings used for feature extraction mitian@12: feature_window_frame = int(self.SampleRate / self.gammatoneLen * self.featureWindow) mitian@12: feature_step_frame = int(0.5 * self.SampleRate / self.gammatoneLen * self.featureStep) mitian@12: aggregation_window, aggregation_step = 100, 50 mitian@12: featureRate = float(self.SampleRate) / self.stepSize mitian@12: mitian@12: audio_files = [x for x in os.listdir(options.GT) if not x.startswith(".") ] mitian@12: audio_files.sort() mitian@12: audio_files = audio_files mitian@12: audio_list = [] mitian@12: mitian@12: gammatone_feature_list = [i for i in os.listdir(options.GF) if not i.startswith('.')] mitian@12: gammatone_feature_list = ['dct', 'pcamean', 'contrast6'] mitian@12: tempo_feature_list = [i for i in os.listdir(options.TF) if not i.startswith('.')] mitian@13: tempo_feature_list = ['ti_percussive_cdsf', 'tir_percussive_cdsf'] mitian@12: timbre_feature_list = ['mfcc_harmonic'] mitian@12: lpc_feature_list = ['lpcc'] mitian@12: harmonic_feature_list = ['chromagram_harmonic'] mitian@12: mitian@12: gammatone_feature_list = [join(options.GF, f) for f in gammatone_feature_list] mitian@12: timbre_feature_list = [join(options.SF, f) for f in timbre_feature_list] mitian@13: # lpc_feature_list = [join(options.SF, f) for f in lpc_feature_list] mitian@12: tempo_feature_list = [join(options.TF, f) for f in tempo_feature_list] mitian@12: harmonic_feature_list = [join(options.SF, f) for f in harmonic_feature_list] mitian@12: mitian@12: fobj_list = [] mitian@12: mitian@12: # For each audio file, load specific features mitian@12: for audio in audio_files: mitian@12: ao = AudioObj() mitian@12: ao.name = splitext(audio)[0] mitian@16: # annotation_file = join(options.GT, ao.name+'.txt') # iso, salami mitian@16: # ao.gt = np.genfromtxt(annotation_file, usecols=0) mitian@16: # ao.label = np.genfromtxt(annotation_file, usecols=1, dtype=str) mitian@13: mitian@14: # annotation_file = join(options.GT, ao.name+'.csv') # qupujicheng mitian@14: # ao.gt = np.genfromtxt(annotation_file, usecols=0, delimiter=',') mitian@14: # ao.label = np.genfromtxt(annotation_file, usecols=1, delimiter=',', dtype=str) mitian@13: mitian@16: annotation_file = join(options.GT, ao.name+'.lab') # beatles mitian@16: ao.gt = np.genfromtxt(annotation_file, usecols=(0,1)) mitian@16: ao.gt = np.unique(np.ndarray.flatten(ao.gt)) mitian@16: ao.label = np.genfromtxt(annotation_file, usecols=2, dtype=str) mitian@12: mitian@12: gammatone_featureset, timbre_featureset, lpc_featureset, tempo_featureset, harmonic_featureset = [], [], [], [], [] mitian@12: for feature in gammatone_feature_list: mitian@12: for f in os.listdir(feature): mitian@12: if f[:f.find('_vamp')]==ao.name: mitian@13: data = np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[:,1:] mitian@13: if len(data) == 0: continue mitian@13: gammatone_featureset.append(data) mitian@12: break mitian@12: if len(gammatone_feature_list) > 1: mitian@12: n_frame = np.min([x.shape[0] for x in gammatone_featureset]) mitian@12: gammatone_featureset = [x[:n_frame,:] for x in gammatone_featureset] mitian@12: ao.gammatone_features = np.hstack((gammatone_featureset)) mitian@12: else: mitian@12: ao.gammatone_features = gammatone_featureset[0] mitian@12: mitian@12: for feature in timbre_feature_list: mitian@12: for f in os.listdir(feature): mitian@12: if f[:f.find('_vamp')]==ao.name: mitian@13: data = np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[:,1:] mitian@13: if len(data) == 0: continue mitian@13: timbre_featureset.append(data) mitian@12: break mitian@12: if len(timbre_feature_list) > 1: mitian@12: n_frame = np.min([x.shape[0] for x in timbre_featureset]) mitian@12: timbre_featureset = [x[:n_frame,:] for x in timbre_featureset] mitian@12: ao.timbre_features = np.hstack((timbre_featureset)) mitian@12: else: mitian@12: ao.timbre_features = timbre_featureset[0] mitian@12: mitian@13: # for feature in lpc_feature_list: mitian@13: # for f in os.listdir(feature): mitian@13: # if f[:f.find('_vamp')]==ao.name: mitian@13: # lpc_featureset.append(np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[:,1:]) mitian@13: # break mitian@13: # if len(lpc_feature_list) > 1: mitian@13: # n_frame = np.min([x.shape[0] for x in lpc_featureset]) mitian@13: # lpc_featureset = [x[:n_frame,:] for x in lpc_featureset] mitian@13: # ao.lpc_features = np.hstack((lpc_featureset)) mitian@13: # else: mitian@13: # ao.lpc_features = lpc_featureset[0] mitian@12: mitian@12: for feature in tempo_feature_list: mitian@12: for f in os.listdir(feature): mitian@13: if f[:f.find('_vamp')]==ao.name: mitian@13: data = np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[1:,1:] mitian@13: if len(data) == 0: continue mitian@13: tempo_featureset.append(data) mitian@12: ao.tempo_timestamps = np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[1:,0] mitian@12: break mitian@12: if len(tempo_feature_list) > 1: mitian@12: n_frame = np.min([x.shape[0] for x in tempo_featureset]) mitian@12: tempo_featureset = [x[:n_frame,:] for x in tempo_featureset] mitian@12: ao.tempo_features = np.hstack((tempo_featureset)) mitian@12: else: mitian@12: ao.tempo_features = tempo_featureset[0] mitian@12: for feature in harmonic_feature_list: mitian@12: for f in os.listdir(feature): mitian@12: if f[:f.find('_vamp')]==ao.name: mitian@13: data = np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[:,1:] mitian@13: if len(data) == 0: continue mitian@13: harmonic_featureset.append(data) mitian@12: break mitian@12: if len(harmonic_feature_list) > 1: mitian@12: n_frame = np.min([x.shape[0] for x in harmonic_featureset]) mitian@12: harmonic_featureset = [x[:n_frame,:] for x in harmonic_featureset] mitian@12: ao.harmonic_features = np.hstack((harmonic_featureset)) mitian@12: else: mitian@12: ao.harmonic_features = harmonic_featureset[0] mitian@12: mitian@12: # Reshape features (downsample) to match tempogram ones mitian@12: step = ao.tempo_features.shape[0] mitian@12: # aggregation_step = (n_frames / (step+1.0)) mitian@12: # Get aggregated features for computing ssm mitian@12: aggregation_window, aggregation_step = 1,1 mitian@12: featureRate = float(self.SampleRate) /self.stepSize mitian@12: pca = PCA(n_components=5) mitian@12: mitian@14: ao.gammatone_features = normaliseFeature(ao.gammatone_features) mitian@12: ao.gammatone_features = resample(ao.gammatone_features, step) mitian@12: ao.gammatone_features[np.isnan(ao.gammatone_features)] = 0.0 mitian@12: ao.gammatone_features[np.isinf(ao.gammatone_features)] = 0.0 mitian@14: ao.timbre_features = normaliseFeature(ao.timbre_features) mitian@12: ao.timbre_features = resample(ao.timbre_features, step) mitian@12: ao.timbre_features[np.isnan(ao.timbre_features)] = 0.0 mitian@12: ao.timbre_features[np.isinf(ao.timbre_features)] = 0.0 mitian@13: # ao.lpc_features = self.normaliseFeature(ao.lpc_features) mitian@13: # ao.lpc_features = resample(ao.lpc_features, step) mitian@13: # ao.lpc_features[np.isnan(ao.lpc_features)] = 0.0 mitian@13: # ao.lpc_features[np.isinf(ao.lpc_features)] = 0.0 mitian@14: ao.harmonic_features = normaliseFeature(ao.harmonic_features) mitian@12: ao.harmonic_features = resample(ao.harmonic_features, step) mitian@14: ao.tempo_features = normaliseFeature(ao.tempo_features) mitian@12: ao.harmonic_features[np.isinf(ao.harmonic_features)] = 0.0 mitian@12: ao.tempo_features[np.isnan(ao.tempo_features)] = 0.0 mitian@12: ao.tempo_features[np.isinf(ao.tempo_features)] = 0.0 mitian@12: mitian@14: ao.gammatone_features = getMean(ao.gammatone_features, winlen=aggregation_window, stepsize=aggregation_step) mitian@12: pca.fit(ao.gammatone_features) mitian@12: ao.gammatone_features = pca.transform(ao.gammatone_features) mitian@14: ao.gammatone_ssm = getSSM(ao.gammatone_features) mitian@16: ao.gammatone_ssm = enhanceSSM(ao.gammatone_ssm) mitian@12: mitian@14: ao.tempo_features = getMean(ao.tempo_features, winlen=aggregation_window, stepsize=aggregation_step) mitian@12: pca.fit(ao.tempo_features) mitian@12: ao.tempo_features = pca.transform(ao.tempo_features) mitian@14: ao.tempo_ssm = getSSM(ao.tempo_features) mitian@16: ao.tempo_ssm = enhanceSSM(ao.tempo_ssm) mitian@12: mitian@14: ao.timbre_features = getMean(ao.timbre_features, winlen=aggregation_window, stepsize=aggregation_step) mitian@12: pca.fit(ao.timbre_features) mitian@12: ao.timbre_features = pca.transform(ao.timbre_features) mitian@14: ao.timbre_ssm = getSSM(ao.timbre_features) mitian@16: ao.timbre_ssm = enhanceSSM(ao.timbre_ssm) mitian@12: mitian@13: # ao.lpc_features = self.getMean(ao.lpc_features, winlen=aggregation_window, stepsize=aggregation_step) mitian@13: # pca.fit(ao.lpc_features) mitian@13: # ao.lpc_features = pca.transform(ao.lpc_features) mitian@13: # ao.lpc_ssm = self.getSSM(ao.lpc_features) mitian@12: mitian@14: ao.harmonic_features = getMean(ao.harmonic_features, winlen=aggregation_window, stepsize=aggregation_step) mitian@12: pca.fit(ao.harmonic_features) mitian@12: ao.harmonic_features = pca.transform(ao.harmonic_features) mitian@14: ao.harmonic_ssm = getSSM(ao.harmonic_features) mitian@16: ao.harmonic_ssm = enhanceSSM(ao.harmonic_ssm) mitian@12: mitian@12: ao.ssm_timestamps = np.array(map(lambda x: ao.tempo_timestamps[aggregation_step*x], np.arange(0, ao.gammatone_ssm.shape[0]))) mitian@12: mitian@12: # Single feature SSMs. mitian@12: # gammatone_ssm = self.reduceSSM(ao.gammatone_ssm) mitian@12: plt.figure(figsize=(10, 10)) mitian@13: plt.vlines(ao.gt / ao.gt[-1] * ao.gammatone_ssm.shape[0], 0, ao.gammatone_ssm.shape[0]) mitian@12: plt.imshow(ao.gammatone_ssm) mitian@16: plt.savefig(join(options.OUTPUT, ao.name+'-enhanced-gammatone.pdf'),format='pdf') mitian@12: plt.close() mitian@12: mitian@12: # tempo_ssm = self.reduceSSM(ao.tempo_ssm) mitian@12: plt.figure(figsize=(10, 10)) mitian@13: plt.vlines(ao.gt / ao.gt[-1] * ao.tempo_ssm.shape[0], 0, ao.tempo_ssm.shape[0]) mitian@12: plt.imshow(ao.tempo_ssm) mitian@16: plt.savefig(join(options.OUTPUT, ao.name+'-enhanced-hpss_tempo.pdf'),format='pdf') mitian@12: plt.close() mitian@12: mitian@12: # timbre_ssm = self.reduceSSM(ao.timbre_ssm) mitian@12: plt.figure(figsize=(10, 10)) mitian@13: plt.vlines(ao.gt / ao.gt[-1] * ao.timbre_ssm.shape[0], 0, ao.timbre_ssm.shape[0]) mitian@12: plt.imshow(ao.timbre_ssm) mitian@16: plt.savefig(join(options.OUTPUT, ao.name+'-enhanced-hpss_mfcc.pdf'),format='pdf') mitian@12: plt.close() mitian@12: mitian@13: # # lpc_ssm = self.reduceSSM(ao.lpc_ssm) mitian@13: # plt.figure(figsize=(10, 10)) mitian@13: # plt.vlines(ao.gt / ao.gt[-1] * ao.lpc_ssm.shape[0], 0, ao.lpc_ssm.shape[0], colors='k') mitian@13: # plt.imshow(ao.lpc_ssm) mitian@13: # plt.savefig(join(options.OUTPUT, 'ssm', ao.name+'-lpcc.pdf'),format='pdf') mitian@13: # plt.close() mitian@13: # np.savetxt(join(options.OUTPUT, 'ssm_data', ao.name+'-lpcc.txt'), np.array(ao.lpc_ssm), delimiter=',') mitian@12: mitian@12: # harmonic_ssm = self.reduceSSM(ao.harmonic_ssm) mitian@12: plt.figure(figsize=(10, 10)) mitian@13: plt.vlines(ao.gt / ao.gt[-1] * ao.harmonic_ssm.shape[0], 0, ao.harmonic_ssm.shape[0]) mitian@12: plt.imshow(ao.harmonic_ssm) mitian@16: plt.savefig(join(options.OUTPUT, ao.name+'-enhanced-hpss_chroma.pdf'),format='pdf') mitian@12: plt.close() mitian@12: mitian@12: if options.VERBOSE: mitian@12: np.savetxt(join(options.OUTPUT, 'ssm_data', ao.name+'-gammatone.txt'), np.array(ao.gammatone_ssm), delimiter=',') mitian@12: np.savetxt(join(options.OUTPUT, 'ssm_data', ao.name+'-tempo.txt'), np.array(ao.tempo_ssm), delimiter=',') mitian@13: # np.savetxt(join(options.OUTPUT, 'ssm_data', ao.name+'-hpss_tempo.txt'), np.array(ao.tempo_ssm), delimiter=',') mitian@13: # np.savetxt(join(options.OUTPUT, 'ssm_data', ao.name+'-hpss_mfcc.txt'), np.array(ao.timbre_ssm), delimiter=',') mitian@13: np.savetxt(join(options.OUTPUT, 'ssm_data', ao.name+'-mfcc.txt'), np.array(ao.timbre_ssm), delimiter=',') mitian@13: # np.savetxt(join(options.OUTPUT, 'ssm_data', ao.name+'-lpc.txt'), np.array(ao.lpc_ssm), delimiter=',') mitian@13: np.savetxt(join(options.OUTPUT, 'ssm_data', ao.name+'-chroma.txt'), np.array(ao.harmonic_ssm), delimiter=',') mitian@13: # np.savetxt(join(options.OUTPUT, 'ssm_data', ao.name+'-hpss_chroma.txt'), np.array(ao.harmonic_ssm), delimiter=',') mitian@12: mitian@14: # audio_list.append(ao) mitian@12: mi@0: mi@0: def main(): mitian@12: segmenter = SSMseg() mitian@12: segmenter.process() mi@0: mi@0: mi@0: if __name__ == '__main__': mi@0: main() mi@0: