Mercurial > hg > segmentation
view SegEval.py @ 12:c23658e8ae38
fp feature notebook
author | mitian |
---|---|
date | Mon, 25 May 2015 17:27:48 +0100 |
parents | 6d1c6639f5db |
children | 6dae41887406 |
line wrap: on
line source
#!/usr/bin/env python # encoding: utf-8 """ SegEval.py The main segmentation program. Created by mi tian on 2015-04-02. Copyright (c) 2015 __MyCompanyName__. All rights reserved. """ # Load starndard python libs import sys, os, optparse, csv from itertools import combinations from os.path import join, isdir, isfile, abspath, dirname, basename, split, splitext from copy import copy import matplotlib # matplotlib.use('Agg') import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import numpy as np import scipy as sp from scipy.signal import correlate2d, convolve2d, filtfilt, resample from scipy.ndimage.filters import * from sklearn.decomposition import PCA from sklearn.mixture import GMM from sklearn.cluster import KMeans from sklearn.preprocessing import normalize from sklearn.metrics.pairwise import pairwise_distances # Load dependencies from utils.SegUtil import getMean, getStd, getDelta, getSSM, reduceSSM, upSample, normaliseFeature from utils.PeakPickerUtil import PeakPicker from utils.gmmdist import * from utils.GmmMetrics import GmmDistance from utils.RankClustering import rClustering from utils.kmeans import Kmeans from utils.PathTracker import PathTracker from utils.OnsetPlotProc import onset_plot, plot_on # Load bourdary retrieval utilities import cnmf as cnmf_S import foote as foote_S import sf as sf_S import fmc2d as fmc2d_S import novelty as novelty_S # Algorithm params h = 8 # Size of median filter for features in C-NMF R = 15 # Size of the median filter for the activation matrix C-NMF rank = 4 # Rank of decomposition for the boundaries rank_labels = 6 # Rank of decomposition for the labels R_labels = 6 # Size of the median filter for the labels # Foote M = 2 # Median filter for the audio features (in beats) Mg = 32 # Gaussian kernel size L = 16 # Size of the median filter for the adaptive threshold # 2D-FMC N = 8 # Size of the fixed length segments (for 2D-FMC) # Define arg parser def parse_args(): op = optparse.OptionParser() # IO options op.add_option('-g', '--gammatonegram-features', action="store", dest="GF", default='/Volumes/c4dm-03/people/mit/features/gammatonegram/qupujicheng/2048', type="str", help="Loading gammatone features from.." ) op.add_option('-s', '--spectrogram-features', action="store", dest="SF", default='/Volumes/c4dm-03/people/mit/features/spectrogram/qupujicheng/2048', type="str", help="Loading spectral features from.." ) op.add_option('-t', '--tempogram-features', action="store", dest="TF", default='/Volumes/c4dm-03/people/mit/features/tempogram/qupujicheng/tempo_features_6s', type="str", help="Loading tempogram features from.." ) op.add_option('-f', '--featureset', action="store", dest="FEATURES", default='[0, 1, 2, 3]', type="str", help="Choose feature subsets (input a list of integers) used for segmentation -- gammtone, chroma, timbre, tempo -- 0, 1, 2, 3." ) op.add_option('-a', '--annotations', action="store", dest="GT", default='/Volumes/c4dm-03/people/mit/annotation/qupujicheng/lowercase', type="str", help="Loading annotation files from.. ") op.add_option('-o', '--ouput', action="store", dest="OUTPUT", default='/Volumes/c4dm-03/people/mit/segmentation/gammatone/qupujicheng', type="str", help="Write segmentation results to ") # boundary retrieval options op.add_option('-b', '--bounrary-method', action="store", dest="BOUNDARY", type='choice', choices=['novelty', 'cnmf', 'foote', 'sf'], default='novelty', help="Choose boundary retrieval algorithm ('novelty', 'cnmf', 'sf', 'fmc2d')." ) op.add_option('-l', '--labeling-method', action="store", dest="LABEL", type='choice', choices=['cnmf', 'fmc2d'], default='cnmf', help="Choose boundary labeling algorithm ('cnmf', 'fmc2d')." ) # Plot/print/mode options op.add_option('-p', '--plot', action="store_true", dest="PLOT", default=False, help="Save plots") op.add_option('-e', '--test-mode', action="store_true", dest="TEST", default=False, help="Test mode") op.add_option('-v', '--verbose-mode', action="store_true", dest="VERBOSE", default=False, help="Print results in verbose mode.") return op.parse_args() options, args = parse_args() class FeatureObj() : __slots__ = ['key', 'audio', 'timestamps', 'gammatone_features', 'tempo_features', 'timbre_features', 'harmonic_features', 'gammatone_ssm', 'tempo_ssm', 'timbre_features', 'harmonic_ssm', 'ssm_timestamps'] class AudioObj(): __slots__ = ['name', 'feature_list', 'gt', 'label', 'gammatone_features', 'tempo_features', 'timbre_features', 'harmonic_features', 'combined_features',\ 'gammatone_ssm', 'tempo_ssm', 'timbre_ssm', 'harmonic_ssm', 'combined_ssm', 'ssm', 'ssm_timestamps', 'tempo_timestamps'] class EvalObj(): __slots__ = ['TP', 'FP', 'FN', 'P', 'R', 'F', 'AD', 'DA'] class SSMseg(object): '''The main segmentation object''' def __init__(self): self.SampleRate = 44100 self.NqHz = self.SampleRate/2 self.timestamp = [] self.previousSample = 0.0 self.featureWindow = 6.0 self.featureStep = 3.0 self.kernel_size = 64 # Adjust this param according to the feature resolution.pq self.blockSize = 2048 self.stepSize = 1024 '''NOTE: Match the following params with those used for feature extraction!''' '''NOTE: Unlike spectrogram ones, Gammatone features are extracted without taking an FFT. The windowing is done under the purpose of chunking the audio to facilitate the gammatone filtering with the specified blockSize and stepSize. The resulting gammatonegram is aggregated every gammatoneLen without overlap.''' self.gammatoneLen = 2048 self.gammatoneBandGroups = [0, 2, 6, 10, 13, 17, 20] self.nGammatoneBands = 20 self.lowFreq = 100 self.highFreq = self.SampleRate / 4 '''Settings for extracting tempogram features.''' self.tempoWindow = 6.0 self.bpmBands = [30, 45, 60, 80, 100, 120, 180, 240, 400, 600] '''Peak picking settings for novelty based method''' self.threshold = 30 self.confidence_threshold = 0.5 self.delta_threshold = 0.0 self.backtracking_threshold = 1.9 self.polyfitting_on = True self.medfilter_on = True self.LPfilter_on = True self.whitening_on = False self.aCoeffs = [1.0000, -0.5949, 0.2348] self.bCoeffs = [0.1600, 0.3200, 0.1600] self.cutoff = 0.34 self.medianWin = 7 if plot_on : onset_plot.reset() def pairwiseF(self, annotation, detection, tolerance=3.0, combine=1.0, idx2time=None): '''Pairwise F measure evaluation of detection rates.''' res = EvalObj() res.TP, res.FP, res.FN = 0, 0, 0 res.P, res.R, res.F = 0.0, 0.0, 0.0 res.AD, res.DA = 0.0, 0.0 if len(detection) == 0: return res gt = len(annotation) # Total number of ground truth data points dt = len(detection) # Total number of experimental data points foundIdx = [] D_AD = np.zeros(gt) D_DA = np.zeros(dt) if idx2time != None: # Map detected idxs to real time detection = [idx2time[int(np.rint(i))] for i in detection] + [annotation[-1]] # print 'detection', detection detection = np.append(detection, annotation[-1]) for dtIdx in xrange(dt): D_DA[dtIdx] = np.min(abs(detection[dtIdx] - annotation)) for gtIdx in xrange(gt): D_AD[gtIdx] = np.min(abs(annotation[gtIdx] - detection)) for dtIdx in xrange(dt): if (annotation[gtIdx] >= detection[dtIdx] - tolerance/2.0) and (annotation[gtIdx] <= detection[dtIdx] + tolerance/2.0): res.TP = res.TP + 1.0 foundIdx.append(gtIdx) foundIdx = list(set(foundIdx)) res.TP = len(foundIdx) res.FP = max(0, dt - res.TP) res.FN = max(0, gt - res.TP) res.AD = np.mean(D_AD) res.DA = np.mean(D_DA) if res.TP == 0: return res res.P = res.TP / float(dt) res.R = res.TP / float(gt) res.F = 2 * res.P * res.R / (res.P + res.R) return res def writeIndividualHeader(self, filename): '''Write header of output files for individual features.''' with open(filename, 'a') as f: csvwriter = csv.writer(f, delimiter=',') csvwriter.writerow(['audio', 'gammatone_tp_05', 'gammatone_fp_05', 'gammatone_fn_05', 'gammatone_P_05', 'gammatone_R_05', 'gammatone_F_05', 'gammatone_AD_05', 'gammatone_DA_05', 'gammatone_tp_3', \ 'gammatone_fp_3', 'gammatone_fn_3', 'gammatone_P_3', 'gammatone_R_3', 'gammatone_F_3', 'gammatone_AD_3', 'gammatone_DA_3', 'harmonic_tp_05', 'harmonic_fp_05', 'harmonic_fn_05', 'harmonic_P_05', \ 'harmonic_R_05', 'harmonic_F_05', 'harmonic_AD_05', 'harmonic_DA_05', 'harmonic_tp_3', 'harmonic_fp_3', 'harmonic_fn_3', 'harmonic_P_3', 'harmonic_R_3', 'harmonic_F_3', 'harmonic_AD_3', 'harmonic_DA_3', \ 'timbre_tp_05', 'timbre_fp_05', 'timbre_fn_05', 'timbre_P_05', 'timbre_R_05', 'timbre_F_05', 'timbre_AD_05', 'timbre_DA_05', 'timbre_tp_3', 'timbre_fp_3', 'timbre_fn_3', 'timbre_P_3', 'timbre_R_3', \ 'timbre_F_3', 'timbre_AD_3', 'timbre_DA_3', 'tempo_tp_05', 'tempo_fp_05', 'tempo_fn_05', 'tempo_P_05', 'tempo_R_05', 'tempo_F_05', 'tempo_AD_05', 'tempo_DA_05', \ 'tempo_tp_3', 'tempo_fp_3', 'tempo_fn_3', 'tempo_P_3', 'tempo_R_3', 'tempo_F_3', 'tempo_AD_3', 'tempo_DA_3']) def writeIndividualRes(self, filename, ao_name, gt_res_05, gt_res_3, harmonic_res_05, harmonic_res_3, timbre_res_05, timbre_res_3, tempo_res_05, tempo_res_3): '''Write result of single detection for individual features.''' with open(filename, 'a') as f: csvwriter = csv.writer(f, delimiter=',') csvwriter.writerow([ao_name, gt_res_05.TP, gt_res_05.FP, gt_res_05.FN, gt_res_05.P, gt_res_05.R, gt_res_05.F, gt_res_05.AD, gt_res_05.DA, gt_res_3.TP, gt_res_3.FP, gt_res_3.FN, gt_res_3.P, \ gt_res_3.R, gt_res_3.F, gt_res_3.AD, gt_res_3.DA, harmonic_res_05.TP, harmonic_res_05.FP, harmonic_res_05.FN, harmonic_res_05.P, harmonic_res_05.R, harmonic_res_05.F, harmonic_res_05.AD, harmonic_res_05.DA, \ harmonic_res_3.TP, harmonic_res_3.FP, harmonic_res_3.FN, harmonic_res_3.P, harmonic_res_3.R, harmonic_res_3.F, harmonic_res_3.AD, harmonic_res_3.DA, timbre_res_05.TP, timbre_res_05.FP, \ timbre_res_05.FN, timbre_res_05.P, timbre_res_05.R, timbre_res_05.F, timbre_res_05.AD, timbre_res_05.DA, timbre_res_3.TP, timbre_res_3.FP, timbre_res_3.FN, timbre_res_3.P, timbre_res_3.R, timbre_res_3.F, \ timbre_res_3.AD, timbre_res_3.DA, tempo_res_05.TP, tempo_res_05.FP, tempo_res_05.FN, tempo_res_05.P, tempo_res_05.R, tempo_res_05.F, tempo_res_05.AD, tempo_res_05.DA, tempo_res_3.TP, tempo_res_3.FP, \ tempo_res_3.FN, tempo_res_3.P, tempo_res_3.R, tempo_res_3.F, tempo_res_3.AD, tempo_res_3.DA]) def writeCombinedHeader(self, filename): '''Write header of output files for combined features.''' with open(filename, 'a') as f: csvwriter = csv.writer(f, delimiter=',') csvwriter.writerow(['audio', 'gt_tb_P_0.5', 'gt_tb_R_0.5', 'gt_tb_F_0.5', 'gt_tb_P_3', 'gt_tb_R_3', 'gt_tb_F_3', 'gt_tp_P_0.5', 'gt_tp_R_0.5', 'gt_tp_F_0.5', 'gt_tp_P_3', 'gt_tp_R_3', 'gt_tp_F_3',\ 'gt_hm_P_0.5', 'gt_hm_R_0.5', 'gt_hm_F_0.5', 'gt_hm_P_3', 'gt_hm_R_3', 'gt_hm_F_3', 'tb_tp_P_0.5', 'tb_tp_R_0.5', 'tb_tp_F_0.5', 'tb_tp_P_3', 'tb_tp_R_3', 'tb_tp_F_3', 'hm_tb_P_0.5', 'hm_tb_R_0.5', 'hm_tb_F_0.5', \ 'hm_tb_P_3', 'hm_tb_R_3', 'hm_tb_F_3', 'hm_tp_P_0.5', 'hm_tp_R_0.5', 'hm_tp_F_0.5', 'hm_tp_P_3', 'hm_tp_R_3', 'hm_tp_F_3', 'gt_tb_tp_P_0.5', 'gt_tb_tp_R_0.5', 'gt_tb_tp_F_0.5', 'gt_tb_tp_P_3', 'gt_tb_tp_R_3', \ 'gt_tb_tp_F_3', 'gt_hm_tb_P_0.5', 'gt_hm_tb_R_0.5', 'gt_hm_tb_F_0.5', 'gt_hm_tb_P_3', 'gt_hm_tb_R_3', 'gt_hm_tb_F_3', 'gt_hm_tp_P_0.5', 'gt_hm_tp_R_0.5', 'gt_hm_tp_F_0.5', 'gt_hm_tp_P_3', 'gt_hm_tp_R_3', 'gt_hm_tp_F_3', \ 'hm_tb_tp_P_0.5', 'hm_tb_tp_R_0.5', 'hm_tb_tp_F_0.5', 'hm_tb_tp_P_3', 'hm_tb_tp_R_3', 'hm_tb_tp_F_3', 'gt_hm_tb_tp_P_0.5', 'gt_hm_tb_tp_R_0.5', 'gt_hm_tb_tp_F_0.5', 'gt_hm_tb_tp_P_3', 'gt_hm_tb_tp_R_3', 'gt_hm_tb_tp_F_3']) def writeCombinedRes(self, filename, ao_name, gt_hm_res_05, gt_hm_res_3, gt_tb_res_05, gt_tb_res_3, gt_tp_res_05, gt_tp_res_3, hm_tb_res_05, hm_tb_res_3, hm_tp_res_05, hm_tp_res_3, \ tb_tp_res_05, tb_tp_res_3, gt_hm_tb_res_05, gt_hm_tb_res_3, gt_hm_tp_res_05, gt_hm_tp_res_3, gt_tb_tp_res_05, gt_tb_tp_res_3, hm_tb_tp_res_05, hm_tb_tp_res_3, gt_hm_tb_tp_res_05, gt_hm_tb_tp_res_3): '''Write result of single detection for combined features.''' with open(filename, 'a') as f: csvwriter = csv.writer(f, delimiter=',') csvwriter.writerow([ao_name, gt_tb_res_05.P, gt_tb_res_05.R, gt_tb_res_05.F, gt_tb_res_3.P, gt_tb_res_3.R, gt_tb_res_3.F, gt_tp_res_05.P, gt_tp_res_05.R, gt_tp_res_05.F, gt_tp_res_3.P, gt_tp_res_3.R, gt_tp_res_3.F, \ gt_hm_res_05.P, gt_hm_res_05.R, gt_hm_res_05.F, gt_hm_res_3.P, gt_hm_res_3.R, gt_hm_res_3.F, tb_tp_res_05.P, tb_tp_res_05.R, tb_tp_res_05.F, tb_tp_res_3.P, tb_tp_res_3.R, tb_tp_res_3.F, \ hm_tb_res_05.P, hm_tb_res_05.R, hm_tb_res_05.F, hm_tb_res_3.P, hm_tb_res_3.R, hm_tb_res_3.F, hm_tp_res_05.P, hm_tp_res_05.R, hm_tp_res_05.F, hm_tp_res_3.P, hm_tp_res_3.R, hm_tp_res_3.F, \ gt_tb_tp_res_05.P, gt_tb_tp_res_05.R, gt_tb_tp_res_05.F, gt_tb_tp_res_3.P, gt_tb_tp_res_3.R, gt_tb_tp_res_3.F, gt_hm_tb_res_05.P, gt_hm_tb_res_05.R, gt_hm_tb_res_05.F, gt_hm_tb_res_3.P, gt_hm_tb_res_3.R, gt_hm_tb_res_3.F, \ gt_hm_tp_res_05.P, gt_hm_tp_res_05.R, gt_hm_tp_res_05.F, gt_hm_tp_res_3.P, gt_hm_tp_res_3.R, gt_hm_tp_res_3.F, hm_tb_tp_res_05.P, hm_tb_tp_res_05.R, hm_tb_tp_res_05.F, hm_tb_tp_res_3.P, hm_tb_tp_res_3.R, hm_tb_tp_res_3.F, \ gt_hm_tb_tp_res_05.P, gt_hm_tb_tp_res_05.R, gt_hm_tb_tp_res_05.F, gt_hm_tb_tp_res_3.P, gt_hm_tb_tp_res_3.R, gt_hm_tb_tp_res_3.F]) def writeMergedHeader(self, filename): '''Write header of output files merging individual detections.''' with open(filename, 'a') as f: csvwriter = csv.writer(f, delimiter=',') csvwriter.writerow(['audio', 'merged_tp_05', 'merged_fp_05', 'merged_fn_05', 'merged_P_05', 'merged_R_05', 'merged_F_05', 'merged_AD_05', 'merged_DA_05', 'merged_tp_3', \ 'merged_fp_3', 'merged_fn_3', 'merged_P_3', 'merged_R_3', 'merged_F_3', 'merged_AD_3', 'merged_DA_3']) def writeMergedRes(self, filename, ao_name, merged_res_05, merged_res_3): '''Write results by merging individual detections.''' with open(filename, 'a') as f: csvwriter = csv.writer(f, delimiter=',') csvwriter.writerow([ao_name, merged_res_05.TP, merged_res_05.FP, merged_res_05.FN, merged_res_05.P, merged_res_05.R, merged_res_05.F, merged_res_05.AD, merged_res_05.DA, \ merged_res_3.TP, merged_res_3.FP, merged_res_3.FN, merged_res_3.P, merged_res_3.R, merged_res_3.F, merged_res_3.AD, merged_res_3.DA]) def process(self): '''For the aggregated input features, discard a propertion each time as the pairwise distances within the feature space descending. In the meanwhile evaluate the segmentation result and track the trend of perfomance changing by measuring the feature selection threshold - segmentation f measure curve. ''' peak_picker = PeakPicker() peak_picker.params.alpha = 9.0 # Alpha norm peak_picker.params.delta = self.delta_threshold # Adaptive thresholding delta peak_picker.params.QuadThresh_a = (100 - self.threshold) / 1000.0 peak_picker.params.QuadThresh_b = 0.0 peak_picker.params.QuadThresh_c = (100 - self.threshold) / 1500.0 peak_picker.params.rawSensitivity = 20 peak_picker.params.aCoeffs = self.aCoeffs peak_picker.params.bCoeffs = self.bCoeffs peak_picker.params.preWin = self.medianWin peak_picker.params.postWin = self.medianWin + 1 peak_picker.params.LP_on = self.LPfilter_on peak_picker.params.Medfilt_on = self.medfilter_on peak_picker.params.Polyfit_on = self.polyfitting_on peak_picker.params.isMedianPositive = False # Settings used for feature extraction feature_window_frame = int(self.SampleRate / self.gammatoneLen * self.featureWindow) feature_step_frame = int(0.5 * self.SampleRate / self.gammatoneLen * self.featureStep) aggregation_window, aggregation_step = 100, 50 featureRate = float(self.SampleRate) / self.stepSize audio_files = [x for x in os.listdir(options.GT) if not x.startswith(".") ] if options.TEST: audio_files = audio_files[:1] audio_files.sort() audio_list = [] gammatone_feature_list = [i for i in os.listdir(options.GF) if not i.startswith('.')] gammatone_feature_list = ['contrast6', 'rolloff4', 'dct'] tempo_feature_list = [i for i in os.listdir(options.TF) if not i.startswith('.')] tempo_feature_list = ['ti', 'tir'] timbre_feature_list = ['mfcc_harmonic'] harmonic_feature_list = ['chromagram'] gammatone_feature_list = [join(options.GF, f) for f in gammatone_feature_list] timbre_feature_list = [join(options.SF, f) for f in timbre_feature_list] tempo_feature_list = [join(options.TF, f) for f in tempo_feature_list] harmonic_feature_list = [join(options.SF, f) for f in harmonic_feature_list] fobj_list = [] # For each audio file, load specific features for audio in audio_files: ao = AudioObj() ao.name = splitext(audio)[0] annotation_file = join(options.GT, ao.name+'.txt') # iso, salami ao.gt = np.genfromtxt(annotation_file, usecols=0) ao.label = np.genfromtxt(annotation_file, usecols=1, dtype=str) # annotation_file = join(options.GT, ao.name+'.csv') # qupujicheng # ao.gt = np.genfromtxt(annotation_file, usecols=0, delimiter=',') # ao.label = np.genfromtxt(annotation_file, usecols=1, delimiter=',', dtype=str) gammatone_featureset, timbre_featureset, tempo_featureset, harmonic_featureset = [], [], [], [] for feature in gammatone_feature_list: for f in os.listdir(feature): if f[:f.find('_vamp')]==ao.name: gammatone_featureset.append(np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[:,1:]) break if len(gammatone_feature_list) > 1: n_frame = np.min([x.shape[0] for x in gammatone_featureset]) gammatone_featureset = [x[:n_frame,:] for x in gammatone_featureset] ao.gammatone_features = np.hstack((gammatone_featureset)) else: ao.gammatone_features = gammatone_featureset[0] for feature in timbre_feature_list: for f in os.listdir(feature): if f[:f.find('_vamp')]==ao.name: timbre_featureset.append(np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[:,1:]) break if len(timbre_feature_list) > 1: n_frame = np.min([x.shape[0] for x in timbre_featureset]) timbre_featureset = [x[:n_frame,:] for x in timbre_featureset] ao.timbre_features = np.hstack((timbre_featureset)) else: ao.timbre_features = timbre_featureset[0] for feature in tempo_feature_list: for f in os.listdir(feature): if f[:f.find('_vamp')]==ao.name: tempo_featureset.append(np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[1:,1:]) ao.tempo_timestamps = np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[1:,0] break if len(tempo_feature_list) > 1: n_frame = np.min([x.shape[0] for x in tempo_featureset]) tempo_featureset = [x[:n_frame,:] for x in tempo_featureset] ao.tempo_features = np.hstack((tempo_featureset)) else: ao.tempo_features = tempo_featureset[0] for feature in harmonic_feature_list: for f in os.listdir(feature): if f[:f.find('_vamp')]==ao.name: harmonic_featureset.append(np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[:,1:]) break if len(harmonic_feature_list) > 1: n_frame = np.min([x.shape[0] for x in harmonic_featureset]) harmonic_featureset = [x[:n_frame,:] for x in harmonic_featureset] ao.harmonic_features = np.hstack((harmonic_featureset)) else: ao.harmonic_features = harmonic_featureset[0] # Get aggregated features for computing ssm aggregation_window, aggregation_step = 1,1 featureRate = float(self.SampleRate) /self.stepSize pca = PCA(n_components=5) # Resample and normalise features step = ao.tempo_features.shape[0] ao.gammatone_features = resample(ao.gammatone_features, step) ao.gammatone_features = normaliseFeature(ao.gammatone_features) ao.timbre_features = resample(ao.timbre_features, step) ao.timbre_features = normaliseFeature(ao.timbre_features) ao.harmonic_features = resample(ao.harmonic_features, step) ao.harmonic_features = normaliseFeature(ao.harmonic_features) ao.tempo_features = normaliseFeature(ao.tempo_features) pca.fit(ao.gammatone_features) ao.gammatone_features = pca.transform(ao.gammatone_features) ao.gammatone_ssm = getSSM(ao.gammatone_features) pca.fit(ao.tempo_features) ao.tempo_features = pca.transform(ao.tempo_features) ao.tempo_ssm = getSSM(ao.tempo_features) pca.fit(ao.timbre_features) ao.timbre_features = pca.transform(ao.timbre_features) ao.timbre_ssm = getSSM(ao.timbre_features) pca.fit(ao.harmonic_features) ao.harmonic_features = pca.transform(ao.harmonic_features) ao.harmonic_ssm = getSSM(ao.harmonic_features) ao.ssm_timestamps = np.array(map(lambda x: ao.tempo_timestamps[aggregation_step*x], np.arange(0, ao.gammatone_ssm.shape[0]))) audio_list.append(ao) # Prepare output files. outfile1 = join(options.OUTPUT, 'individual_novelty.csv') outfile2 = join(options.OUTPUT, 'individual_foote.csv') outfile3 = join(options.OUTPUT, 'individual_sf.csv') outfile4 = join(options.OUTPUT, 'individual_cnmf.csv') outfile5 = join(options.OUTPUT, 'combined_novelty.csv') outfile6 = join(options.OUTPUT, 'combined_foote.csv') outfile7 = join(options.OUTPUT, 'combined_sf.csv') outfile8 = join(options.OUTPUT, 'combined_cnmf.csv') outfile9 = join(options.OUTPUT, 'individual_merged.csv') self.writeIndividualHeader(outfile1) self.writeIndividualHeader(outfile2) self.writeIndividualHeader(outfile3) self.writeIndividualHeader(outfile4) # self.writeCombinedHeader(outfile5) # self.writeCombinedHeader(outfile6) self.writeCombinedHeader(outfile7) self.writeCombinedHeader(outfile8) self.writeMergedHeader(outfile9) print 'Segmenting using %s method' %options.BOUNDARY for i,ao in enumerate(audio_list): print 'processing: %s' %ao.name ############################################################################################################################################ # Experiment 1: segmentation using individual features. gammatone_novelty, smoothed_gammatone_novelty, gammatone_novelty_idxs = novelty_S.process(ao.gammatone_ssm, self.kernel_size, peak_picker) timbre_novelty, smoothed_timbre_novelty, timbre_novelty_idxs = novelty_S.process(ao.timbre_ssm, self.kernel_size, peak_picker) tempo_novelty, smoothed_tempo_novelty, tempo_novelty_idxs = novelty_S.process(ao.tempo_ssm, self.kernel_size, peak_picker) harmonic_novelty, smoothed_harmonic_novelty, harmonic_novelty_idxs = novelty_S.process(ao.harmonic_ssm, self.kernel_size, peak_picker) gammatone_cnmf_idxs = cnmf_S.segmentation(ao.gammatone_features, rank=rank, R=R, h=h, niter=300) timbre_cnmf_idxs = cnmf_S.segmentation(ao.timbre_features, rank=rank, R=R, h=h, niter=300) tempo_cnmf_idxs = cnmf_S.segmentation(ao.tempo_features, rank=rank, R=R, h=h, niter=300) harmonic_cnmf_idxs = cnmf_S.segmentation(ao.harmonic_features, rank=rank, R=R, h=h, niter=300) gammatone_foote_idxs = foote_S.segmentation(ao.gammatone_features, M=M, Mg=Mg, L=L) timbre_foote_idxs = foote_S.segmentation(ao.timbre_features, M=M, Mg=Mg, L=L) tempo_foote_idxs = foote_S.segmentation(ao.tempo_features, M=M, Mg=Mg, L=L) harmonic_foote_idxs = foote_S.segmentation(ao.harmonic_features, M=M, Mg=Mg, L=L) gammatone_sf_idxs = sf_S.segmentation(ao.gammatone_features) timbre_sf_idxs = sf_S.segmentation(ao.timbre_features) tempo_sf_idxs = sf_S.segmentation(ao.tempo_features) harmonic_sf_idxs = sf_S.segmentation(ao.harmonic_features) # Evaluate and write results. gt_novelty_05 = self.pairwiseF(ao.gt, gammatone_novelty_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) gt_novelty_3 = self.pairwiseF(ao.gt, gammatone_novelty_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) harmonic_novelty_05 = self.pairwiseF(ao.gt, harmonic_novelty_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) harmonic_novelty_3 = self.pairwiseF(ao.gt, harmonic_novelty_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) tempo_novelty_05 = self.pairwiseF(ao.gt, tempo_novelty_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) tempo_novelty_3 = self.pairwiseF(ao.gt, tempo_novelty_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) timbre_novelty_05 = self.pairwiseF(ao.gt, timbre_novelty_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) timbre_novelty_3 = self.pairwiseF(ao.gt, timbre_novelty_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) gt_cnmf_05 = self.pairwiseF(ao.gt, gammatone_cnmf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) gt_cnmf_3 = self.pairwiseF(ao.gt, gammatone_cnmf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) harmonic_cnmf_05 = self.pairwiseF(ao.gt, harmonic_cnmf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) harmonic_cnmf_3 = self.pairwiseF(ao.gt, harmonic_cnmf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) tempo_cnmf_05 = self.pairwiseF(ao.gt, tempo_cnmf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) tempo_cnmf_3 = self.pairwiseF(ao.gt, tempo_cnmf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) timbre_cnmf_05 = self.pairwiseF(ao.gt, timbre_cnmf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) timbre_cnmf_3 = self.pairwiseF(ao.gt, timbre_cnmf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) gt_sf_05 = self.pairwiseF(ao.gt, gammatone_sf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) gt_sf_3 = self.pairwiseF(ao.gt, gammatone_sf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) harmonic_sf_05 = self.pairwiseF(ao.gt, harmonic_sf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) harmonic_sf_3 = self.pairwiseF(ao.gt, harmonic_sf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) tempo_sf_05 = self.pairwiseF(ao.gt, tempo_sf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) tempo_sf_3 = self.pairwiseF(ao.gt, tempo_sf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) timbre_sf_05 = self.pairwiseF(ao.gt, timbre_sf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) timbre_sf_3 = self.pairwiseF(ao.gt, timbre_sf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) gt_foote_05 = self.pairwiseF(ao.gt, gammatone_foote_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) gt_foote_3 = self.pairwiseF(ao.gt, gammatone_foote_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) harmonic_foote_05 = self.pairwiseF(ao.gt, harmonic_foote_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) harmonic_foote_3 = self.pairwiseF(ao.gt, harmonic_foote_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) tempo_foote_05 = self.pairwiseF(ao.gt, tempo_foote_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) tempo_foote_3 = self.pairwiseF(ao.gt, tempo_foote_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) timbre_foote_05 = self.pairwiseF(ao.gt, timbre_foote_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) timbre_foote_3 = self.pairwiseF(ao.gt, timbre_foote_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) self.writeIndividualRes(outfile1, ao.name, gt_novelty_05, gt_novelty_3, harmonic_novelty_05, harmonic_novelty_3, tempo_novelty_05, tempo_novelty_3, timbre_novelty_05, timbre_novelty_3) self.writeIndividualRes(outfile2, ao.name, gt_cnmf_05, gt_cnmf_3, harmonic_cnmf_05, harmonic_cnmf_3, tempo_cnmf_05, tempo_cnmf_3, timbre_cnmf_05, timbre_cnmf_3) self.writeIndividualRes(outfile3, ao.name, gt_sf_05, gt_sf_3, harmonic_sf_05, harmonic_sf_3, tempo_sf_05, tempo_sf_3, timbre_sf_05, timbre_sf_3) self.writeIndividualRes(outfile4, ao.name, gt_foote_05, gt_foote_3, harmonic_foote_05, harmonic_foote_3, tempo_foote_05, tempo_foote_3, timbre_foote_05, timbre_foote_3) ############################################################################################################################################ # Experiment 2: segmentation using combined features. # Dumping features. gt_hm = np.hstack([ao.gammatone_features, ao.harmonic_features]) gt_tb = np.hstack([ao.gammatone_features, ao.timbre_features]) gt_tp = np.hstack([ao.gammatone_features, ao.tempo_features]) hm_tb = np.hstack([ao.harmonic_features, ao.timbre_features]) hm_tp = np.hstack([ao.harmonic_features, ao.tempo_features]) tb_tp = np.hstack([ao.timbre_features, ao.tempo_features]) gt_hm_tb = np.hstack([ao.gammatone_features, ao.harmonic_features, ao.timbre_features]) gt_hm_tp = np.hstack([ao.gammatone_features, ao.harmonic_features, ao.tempo_features]) gt_tb_tp = np.hstack([ao.gammatone_features, ao.timbre_features, ao.tempo_features]) hm_tb_tp = np.hstack([ao.harmonic_features, ao.timbre_features, ao.tempo_features]) gt_hm_tb_tp = np.hstack([ao.gammatone_features, ao.harmonic_features, ao.timbre_features, ao.tempo_features]) # Evaluting and writing results. gt_hm_sf_idxs = sf_S.segmentation(gt_hm) gt_tb_sf_idxs = sf_S.segmentation(gt_tb) gt_tp_sf_idxs = sf_S.segmentation(gt_tp) hm_tb_sf_idxs = sf_S.segmentation(hm_tb) hm_tp_sf_idxs = sf_S.segmentation(hm_tp) tb_tp_sf_idxs = sf_S.segmentation(tb_tp) gt_hm_tb_sf_idxs = sf_S.segmentation(gt_hm_tb) gt_hm_tp_sf_idxs = sf_S.segmentation(gt_hm_tp) gt_tb_tp_sf_idxs = sf_S.segmentation(gt_tb_tp) hm_tb_tp_sf_idxs = sf_S.segmentation(hm_tb_tp) gt_hm_tb_tp_sf_idxs = sf_S.segmentation(gt_hm_tb_tp) gt_hm_cnmf_idxs = cnmf_S.segmentation(gt_hm, rank=4, R=R, h=h, niter=300) gt_tb_cnmf_idxs = cnmf_S.segmentation(gt_tb, rank=4, R=R, h=h, niter=300) gt_tp_cnmf_idxs = cnmf_S.segmentation(gt_tp, rank=4, R=R, h=h, niter=300) hm_tb_cnmf_idxs = cnmf_S.segmentation(hm_tb, rank=4, R=R, h=h, niter=300) hm_tp_cnmf_idxs = cnmf_S.segmentation(hm_tp, rank=4, R=R, h=h, niter=300) tb_tp_cnmf_idxs = cnmf_S.segmentation(tb_tp, rank=4, R=R, h=h, niter=300) gt_hm_tb_cnmf_idxs = cnmf_S.segmentation(gt_hm_tb, rank=6, R=R, h=h, niter=300) gt_hm_tp_cnmf_idxs = cnmf_S.segmentation(gt_hm_tp, rank=6, R=R, h=h, niter=300) gt_tb_tp_cnmf_idxs = cnmf_S.segmentation(gt_tb_tp, rank=6, R=R, h=h, niter=300) hm_tb_tp_cnmf_idxs = cnmf_S.segmentation(hm_tb_tp, rank=6, R=R, h=h, niter=300) gt_hm_tb_tp_cnmf_idxs = cnmf_S.segmentation(gt_hm_tb_tp, rank=8, R=R, h=h, niter=300) gt_hm_sf_05 = self.pairwiseF(ao.gt, gt_hm_sf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) gt_tb_sf_05 = self.pairwiseF(ao.gt, gt_tb_sf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) gt_tp_sf_05 = self.pairwiseF(ao.gt, gt_tp_sf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) hm_tb_sf_05 = self.pairwiseF(ao.gt, hm_tb_sf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) hm_tp_sf_05 = self.pairwiseF(ao.gt, hm_tp_sf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) tb_tp_sf_05 = self.pairwiseF(ao.gt, tb_tp_sf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) gt_hm_tb_sf_05 = self.pairwiseF(ao.gt, gt_hm_tb_sf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) gt_hm_tp_sf_05 = self.pairwiseF(ao.gt, gt_hm_tp_sf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) gt_tb_tp_sf_05 = self.pairwiseF(ao.gt, gt_tb_tp_sf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) hm_tb_tp_sf_05 = self.pairwiseF(ao.gt, hm_tb_tp_sf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) gt_hm_tb_tp_sf_05 = self.pairwiseF(ao.gt, gt_hm_tb_tp_sf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) gt_hm_sf_3 = self.pairwiseF(ao.gt, gt_hm_sf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) gt_tb_sf_3 = self.pairwiseF(ao.gt, gt_tb_sf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) gt_tp_sf_3 = self.pairwiseF(ao.gt, gt_tp_sf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) hm_tb_sf_3 = self.pairwiseF(ao.gt, hm_tb_sf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) hm_tp_sf_3 = self.pairwiseF(ao.gt, hm_tp_sf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) tb_tp_sf_3 = self.pairwiseF(ao.gt, tb_tp_sf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) gt_hm_tb_sf_3 = self.pairwiseF(ao.gt, gt_hm_tb_sf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) gt_hm_tp_sf_3 = self.pairwiseF(ao.gt, gt_hm_tp_sf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) gt_tb_tp_sf_3 = self.pairwiseF(ao.gt, gt_tb_tp_sf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) hm_tb_tp_sf_3 = self.pairwiseF(ao.gt, hm_tb_tp_sf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) gt_hm_tb_tp_sf_3 = self.pairwiseF(ao.gt, gt_hm_tb_tp_sf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) gt_hm_cnmf_05 = self.pairwiseF(ao.gt, gt_hm_cnmf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) gt_tb_cnmf_05 = self.pairwiseF(ao.gt, gt_tb_cnmf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) gt_tp_cnmf_05 = self.pairwiseF(ao.gt, gt_tp_cnmf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) hm_tb_cnmf_05 = self.pairwiseF(ao.gt, hm_tb_cnmf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) hm_tp_cnmf_05 = self.pairwiseF(ao.gt, hm_tp_cnmf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) tb_tp_cnmf_05 = self.pairwiseF(ao.gt, tb_tp_cnmf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) gt_hm_tb_cnmf_05 = self.pairwiseF(ao.gt, gt_hm_tb_cnmf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) gt_hm_tp_cnmf_05 = self.pairwiseF(ao.gt, gt_hm_tp_cnmf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) gt_tb_tp_cnmf_05 = self.pairwiseF(ao.gt, gt_tb_tp_cnmf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) hm_tb_tp_cnmf_05 = self.pairwiseF(ao.gt, hm_tb_tp_cnmf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) gt_hm_tb_tp_cnmf_05 = self.pairwiseF(ao.gt, gt_hm_tb_tp_cnmf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) gt_hm_cnmf_3 = self.pairwiseF(ao.gt, gt_hm_cnmf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) gt_tb_cnmf_3 = self.pairwiseF(ao.gt, gt_tb_cnmf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) gt_tp_cnmf_3 = self.pairwiseF(ao.gt, gt_tp_cnmf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) hm_tb_cnmf_3 = self.pairwiseF(ao.gt, hm_tb_cnmf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) hm_tp_cnmf_3 = self.pairwiseF(ao.gt, hm_tp_cnmf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) tb_tp_cnmf_3 = self.pairwiseF(ao.gt, tb_tp_cnmf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) gt_hm_tb_cnmf_3 = self.pairwiseF(ao.gt, gt_hm_tb_cnmf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) gt_hm_tp_cnmf_3 = self.pairwiseF(ao.gt, gt_hm_tp_cnmf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) gt_tb_tp_cnmf_3 = self.pairwiseF(ao.gt, gt_tb_tp_cnmf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) hm_tb_tp_cnmf_3 = self.pairwiseF(ao.gt, hm_tb_tp_cnmf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) gt_hm_tb_tp_cnmf_3 = self.pairwiseF(ao.gt, gt_hm_tb_tp_cnmf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) self.writeCombinedRes(outfile7, ao.name, gt_hm_sf_05, gt_hm_sf_3, gt_tb_sf_05, gt_tb_sf_3, gt_tp_sf_05, gt_tp_sf_3, hm_tb_sf_05, hm_tb_sf_3, hm_tp_sf_05, hm_tp_sf_3, tb_tp_sf_05, tb_tp_sf_3,\ gt_hm_tb_sf_05, gt_hm_tb_sf_3, gt_hm_tp_sf_05, gt_hm_tp_sf_3, gt_tb_tp_sf_05, gt_tb_tp_sf_3, hm_tb_tp_sf_05, hm_tb_tp_sf_3, gt_hm_tb_tp_sf_05, gt_hm_tb_tp_sf_3) self.writeCombinedRes(outfile8, ao.name, gt_hm_cnmf_05, gt_hm_cnmf_3, gt_tb_cnmf_05, gt_tb_cnmf_3, gt_tp_cnmf_05, gt_tp_cnmf_3, hm_tb_cnmf_05, hm_tb_cnmf_3, hm_tp_cnmf_05, hm_tp_cnmf_3, tb_tp_cnmf_05, tb_tp_cnmf_3,\ gt_hm_tb_cnmf_05, gt_hm_tb_cnmf_3, gt_hm_tp_cnmf_05, gt_hm_tp_cnmf_3, gt_tb_tp_cnmf_05, gt_tb_tp_cnmf_3, hm_tb_tp_cnmf_05, hm_tb_tp_cnmf_3, gt_hm_tb_tp_cnmf_05, gt_hm_tb_tp_cnmf_3) ############################################################################################################################################ # Experiment 3: Pruning boundaries detected by individual boundary algorithms. # Use different boundary methods for different features gammatone_idxs, harmonic_idxs, timbre_idxs, tempo_idxs = gammatone_sf_idxs, harmonic_sf_idxs, timbre_sf_idxs, tempo_sf_idxs bound_candidates = list(gammatone_idxs) + list(harmonic_idxs) + list(timbre_idxs) + list(tempo_idxs) bound_candidates.sort() nBounds = len(bound_candidates) final_idxs = [] idx = 0 tol = 10 # tolerance window of merging boundary scores while idx < nBounds: temp = [bound_candidates[idx]] pos = [idx] idx += 1 while (idx + tol < nBounds and np.max(bound_candidates[idx: idx+tol]) > 0): temp += [bound_candidates[idx+delta] for delta in xrange(tol) if (bound_candidates[idx]+delta in bound_candidates)] pos += [idx+delta for delta in xrange(tol) if (bound_candidates[idx]+delta in bound_candidates)] idx += tol if len(temp) == 1: final_idxs.append(temp[0]) else: final_idxs.append(int(np.rint(np.mean(temp)))) merged_05 = self.pairwiseF(ao.gt, final_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) merged_3 = self.pairwiseF(ao.gt, final_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) self.writeMergedRes(outfile9, ao.name, merged_05, merged_3) # if options.BOUNDARY == 'novelty': # gammatone_novelty, smoothed_gammatone_novelty, gammatone_bound_idxs = novelty_S.process(ao.gammatone_ssm, self.kernel_size, peak_picker) # timbre_novelty, smoothed_timbre_novelty, timbre_bound_idxs = novelty_S.process(ao.timbre_ssm, self.kernel_size, peak_picker) # tempo_novelty, smoothed_harmonic_novelty, tempo_bound_idxs = novelty_S.process(ao.tempo_ssm, self.kernel_size, peak_picker) # harmonic_novelty, smoothed_tempo_novelty, harmonic_bound_idxs = novelty_S.process(ao.harmonic_ssm, self.kernel_size, peak_picker) # # if options.BOUNDARY == 'cnmf': # gammatone_cnmf_idxs = cnmf_S.segmentation(ao.gammatone_features, rank=rank, R=R, h=8, niter=300) # timbre_cnmf_idxs = cnmf_S.segmentation(ao.timbre_features, rank=rank, R=R, h=h, niter=300) # tempo_cnmf_idxs = cnmf_S.segmentation(ao.tempo_features, rank=rank, R=R, h=h, niter=300) # harmonic_cnmf_idxs = cnmf_S.segmentation(ao.harmonic_features, rank=rank, R=R, h=h, niter=300) # # if options.BOUNDARY == 'foote': # gammatone_foote_idxs = foote_S.segmentation(ao.gammatone_features, M=M, Mg=Mg, L=L) # timbre_foote_idxs = foote_S.segmentation(ao.timbre_features, M=M, Mg=Mg, L=L) # tempo_foote_idxs = foote_S.segmentation(ao.tempo_features, M=M, Mg=Mg, L=L) # harmonic_foote_idxs = foote_S.segmentation(ao.harmonic_features, M=M, Mg=Mg, L=L) # # if options.BOUNDARY == 'sf': # gammatone_sf_idxs = sf_S.segmentation(ao.gammatone_features) # timbre_sf_idxs = sf_S.segmentation(ao.timbre_features) # tempo_sf_idxs = sf_S.segmentation(ao.tempo_features) # harmonic_sf_idxs = sf_S.segmentation(ao.harmonic_features) # # gammatone_novelty_detection = [0.0] + [ao.ssm_timestamps[int(np.rint(i))] for i in gammatone_novelty_peaks] # timbre_novelty_detection = [0.0] + [ao.ssm_timestamps[int(np.rint(i))] for i in timbre_novelty_peaks] # harmonic_novelty_detection = [0.0] + [ao.ssm_timestamps[int(np.rint(i))] for i in harmonic_novelty_peaks] # tempo_novelty_detection = [0.0] + [ao.ssm_timestamps[int(np.rint(i))] for i in tempo_novelty_peaks] # # gammatone_cnmf_detection = [0.0] + [ao.ssm_timestamps[int(np.rint(i))] for i in gammatone_cnmf_peaks] # timbre_cnmf_detection = [0.0] + [ao.ssm_timestamps[int(np.rint(i))] for i in timbre_cnmf_peaks] # harmonic_cnmf_detection = [0.0] + [ao.ssm_timestamps[int(np.rint(i))] for i in harmonic_cnmf_peaks] # tempo_cnmf_detection = [0.0] + [ao.ssm_timestamps[int(np.rint(i))] for i in tempo_cnmf_peaks] # # gammatone_sf_detection = [0.0] + [ao.ssm_timestamps[int(np.rint(i))] for i in gammatone_sf_peaks] # timbre_sf_detection = [0.0] + [ao.ssm_timestamps[int(np.rint(i))] for i in timbre_sf_peaks] # harmonic_sf_detection = [0.0] + [ao.ssm_timestamps[int(np.rint(i))] for i in harmonic_sf_peaks] # tempo_sf_detection = [0.0] + [ao.ssm_timestamps[int(np.rint(i))] for i in tempo_sf_peaks] # # gammatone_foote_detection = [0.0] + [ao.ssm_timestamps[int(np.rint(i))] for i in gammatone_foote_peaks] # timbre_foote_detection = [0.0] + [ao.ssm_timestamps[int(np.rint(i))] for i in timbre_foote_peaks] # harmonic_foote_detection = [0.0] + [ao.ssm_timestamps[int(np.rint(i))] for i in harmonic_foote_peaks] # tempo_foote_detection = [0.0] + [ao.ssm_timestamps[int(np.rint(i))] for i in tempo_foote_peaks] # # # Experiment 2: Trying combined features using the best boundary retrieval method # ao_featureset = [ao.gammatone_features, ao.harmonic_features, ao.timbre_features, ao.tempo_features] # feature_sel = [int(x) for x in options.FEATURES if x.isdigit()] # fused_featureset = [ao_featureset[i] for i in feature_sel] # if options.LABEL == 'fmc2d': # gammatone_fmc2d_labels = fmc2d_S.compute_similarity(gammatone_bound_idxs, xmeans=True, N=N) # timbre_fmc2d_labels = fmc2d_S.compute_similarity(timbre_bound_idxs, xmeans=True, N=N) # tempo_fmc2d_labels = fmc2d_S.compute_similarity(tempo_bound_idxs, xmeans=True, N=N) # harmonic_fmc2d_labels = fmc2d_S.compute_similarity(harmonic_bound_idxs, xmeans=True, N=N) # # if options.LABEL == 'cnmf': # gammatone_cnmf_labels = cnmf_S.compute_labels(gammatone_bound_idxs, est_bound_idxs, nFrames) # timbre_cnmf_labels = cnmf_S.compute_labels(timbre_bound_idxs, est_bound_idxs, nFrames) # tempo_cnmf_labels = cnmf_S.compute_labels(tempo_bound_idxs, est_bound_idxs, nFrames) # harmonic_cnmf_labels = cnmf_S.compute_labels(harmonic_bound_idxs, est_bound_idxs, nFrames) # # def main(): segmenter = SSMseg() segmenter.process() if __name__ == '__main__': main()