Mercurial > hg > segmentation
view SegEval.py @ 19:890cfe424f4a tip
added annotations
author | mitian |
---|---|
date | Fri, 11 Dec 2015 09:47:40 +0000 |
parents | b4bf37f94e92 |
children |
line wrap: on
line source
#!/usr/bin/env python # encoding: utf-8 """ SegEval.py The main segmentation program. Created by mi tian on 2015-04-02. Copyright (c) 2015 __MyCompanyName__. All rights reserved. """ # Load starndard python libs import sys, os, optparse, csv from itertools import combinations from os.path import join, isdir, isfile, abspath, dirname, basename, split, splitext from copy import copy import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import numpy as np import scipy as sp from scipy.signal import correlate2d, convolve2d, filtfilt, resample from sklearn.decomposition import PCA from sklearn.mixture import GMM from sklearn.cluster import KMeans from sklearn.preprocessing import normalize from sklearn.metrics.pairwise import pairwise_distances # Load dependencies from utils.SegUtil import getMean, getStd, getDelta, getSSM, reduceSSM, upSample, normaliseFeature, normaliseArray from utils.PeakPickerUtil import PeakPicker from utils.gmmdist import * from utils.GmmMetrics import GmmDistance from utils.RankClustering import rClustering from utils.kmeans import Kmeans # from utils.PathTracker import PathTracker from utils.OnsetPlotProc import onset_plot, plot_on # Load bourdary retrieval utilities import cnmf as cnmf_S import foote as foote_S import sf as sf_S import fmc2d as fmc2d_S import novelty as novelty_S # Algorithm params # cnmf h = 8 # Size of median filter for features in C-NMF R = 12 # Size of the median filter for the activation matrix C-NMF rank = 4 # Rank of decomposition for the boundaries rank_labels = 16 # Rank of decomposition for the labels R_labels = 4 # Size of the median filter for the labels # Foote M = 2 # Median filter for the audio features (in beats) Mg = 32 # Gaussian kernel size L = 16 # Size of the median filter for the adaptive threshold # 2D-FMC N = 8 # Size of the fixed length segments (for 2D-FMC) # Define arg parser def parse_args(): op = optparse.OptionParser() # IO options op.add_option('-g', '--gammatonegram-features', action="store", dest="GF", default='/Volumes/c4dm-03/people/mit/features/gammatonegram/qupujicheng/2048', type="str", help="Loading gammatone features from.." ) op.add_option('-s', '--spectrogram-features', action="store", dest="SF", default='/Volumes/c4dm-03/people/mit/features/spectrogram/qupujicheng/2048', type="str", help="Loading spectral features from.." ) op.add_option('-t', '--tempogram-features', action="store", dest="TF", default='/Volumes/c4dm-03/people/mit/features/tempogram/qupujicheng/tempo_features_6s', type="str", help="Loading tempogram features from.." ) op.add_option('-a', '--annotations', action="store", dest="GT", default='/Volumes/c4dm-03/people/mit/annotation/qupujicheng/lowercase', type="str", help="Loading annotation files from.. ") op.add_option('-o', '--ouput', action="store", dest="OUTPUT", default='/Volumes/c4dm-03/people/mit/segmentation/gammatone/qupujicheng', type="str", help="Write segmentation results to ") op.add_option('-d', '--dataset', action="store", dest="DATASET", default='qupujicheng', type="str", help="Specify datasets") # parameterization options op.add_option('-b', '--bounrary-method', action="store", dest="BOUNDARY", type='choice', choices=['novelty', 'cnmf', 'foote', 'sf'], default='novelty', help="Choose boundary retrieval algorithm ('novelty', 'cnmf', 'sf', 'fmc2d')." ) op.add_option('-l', '--labeling-method', action="store", dest="LABEL", type='choice', choices=['cnmf', 'fmc2d'], default='cnmf', help="Choose boundary labeling algorithm ('cnmf', 'fmc2d')." ) op.add_option('-x', '--experiment', action="store", dest="EXPERIMENT", type='choice', choices=['all', 'individual', 'fuse_feature', 'fuse_ssm', 'fuse_novelty', 'fuse_bounds'], default='all', help="Specify experiment to execute." ) # Plot/print/mode options op.add_option('-p', '--plot', action="store_true", dest="PLOT", default=False, help="Save plots") op.add_option('-e', '--test-mode', action="store_true", dest="TEST", default=False, help="Test mode") op.add_option('-v', '--verbose-mode', action="store_true", dest="VERBOSE", default=False, help="Print results in verbose mode.") return op.parse_args() options, args = parse_args() class FeatureObj() : __slots__ = ['key', 'audio', 'timestamps', 'gammatone_features', 'tempo_features', 'timbre_features', 'harmonic_features', 'gammatone_ssm', 'tempo_ssm', 'timbre_features', 'harmonic_ssm', 'ssm_timestamps'] class AudioObj(): __slots__ = ['name', 'feature_list', 'gt', 'label', 'gammatone_features', 'tempo_features', 'timbre_features', 'harmonic_features', 'combined_features',\ 'gammatone_ssm', 'tempo_ssm', 'timbre_ssm', 'harmonic_ssm', 'combined_ssm', 'ssm', 'ssm_timestamps', 'timestamps'] class EvalObj(): __slots__ = ['TP', 'FP', 'FN', 'P', 'R', 'F', 'AD', 'DA', 'detection'] class SSMseg(object): '''The main segmentation object''' def __init__(self): self.SampleRate = 44100.0 self.NqHz = self.SampleRate/2 self.previousSample = 0.0 self.featureWindow = 6.0 self.featureStep = 3.0 self.kernel_size = 100 # Adjust this param according to the feature resolution.pq self.blockSize = 2048.0 self.stepSize = 1024.0 '''NOTE: Match the following params with those used for feature extraction!''' '''NOTE: Unlike spectrogram ones, Gammatone features are extracted without taking an FFT. The windowing is done under the purpose of chunking the audio to facilitate the gammatone filtering with the specified blockSize and stepSize. The resulting gammatonegram is aggregated every gammatoneLen without overlap.''' self.gammatoneLen = 2048 self.gammatoneBandGroups = [0, 2, 6, 10, 13, 17, 20] self.nGammatoneBands = 20 self.lowFreq = 100 self.highFreq = self.SampleRate / 4 '''Settings for extracting tempogram features.''' self.tempoWindow = 6.0 self.bpmBands = [30, 45, 60, 80, 100, 120, 180, 240, 400, 600] '''Peak picking settings for novelty based method''' self.threshold = 10 self.confidence_threshold = 0.4 self.delta_threshold = 0.0 self.backtracking_threshold = 1.9 self.polyfitting_on = True self.medfilter_on = True self.LPfilter_on = True self.whitening_on = True self.aCoeffs = [1.0000, -0.5949, 0.2348] self.bCoeffs = [0.1600, 0.3200, 0.1600] self.cutoff = 0.34 self.medianWin = 10 self.lin = 0.5 if plot_on : onset_plot.reset() def pairwiseF(self, annotation, detection, tolerance=3.0, combine=1.0, idx2time=None): '''Pairwise F measure evaluation of detection rates.''' res = EvalObj() res.TP, res.FP, res.FN = 0, 0, 0 res.P, res.R, res.F = 0.0, 0.0, 0.0 res.AD, res.DA = 0.0, 0.0 if len(detection) == 0: return res if idx2time != None: # Map detected idxs to real time detection.sort() if detection[-1] >= len(idx2time): detection = detection[:-len(np.array(detection)[np.array(detection)-len(idx2time)>=0])] detection = [idx2time[int(i)] for i in detection] detection = np.append(detection, annotation[-1]) res.detection = detection gt = len(annotation) # Total number of ground truth data points dt = len(detection) # Total number of experimental data points foundIdx = [] D_AD = np.zeros(gt) D_DA = np.zeros(dt) for dtIdx in xrange(dt): D_DA[dtIdx] = np.min(abs(detection[dtIdx] - annotation)) for gtIdx in xrange(gt): D_AD[gtIdx] = np.min(abs(annotation[gtIdx] - detection)) for dtIdx in xrange(dt): if (annotation[gtIdx] >= detection[dtIdx] - tolerance/2.0) and (annotation[gtIdx] <= detection[dtIdx] + tolerance/2.0): foundIdx.append(gtIdx) continue foundIdx = list(set(foundIdx)) res.TP = len(foundIdx) # res.FP = dt - res.TP res.FP = max(0, dt - res.TP) res.FN = gt - res.TP res.AD = np.mean(D_AD) res.DA = np.mean(D_DA) if res.TP == 0: return res res.P = res.TP / float(res.TP+res.FP) res.R = res.TP / float(res.TP+res.FN) res.F = 2 * res.P * res.R / (res.P + res.R) return res def writeIndividualHeader(self, filename): '''Write header of output files for individual features.''' with open(filename, 'a') as f: csvwriter = csv.writer(f, delimiter=',') csvwriter.writerow(['audio', 'harmonic_tp_05', 'harmonic_fp_05', 'harmonic_fn_05', 'harmonic_P_05', \ 'harmonic_R_05', 'harmonic_F_05', 'harmonic_AD_05', 'harmonic_DA_05', 'harmonic_tp_3', 'harmonic_fp_3', 'harmonic_fn_3', 'harmonic_P_3', 'harmonic_R_3', 'harmonic_F_3', 'harmonic_AD_3', 'harmonic_DA_3', \ 'timbre_tp_05', 'timbre_fp_05', 'timbre_fn_05', 'timbre_P_05', 'timbre_R_05', 'timbre_F_05', 'timbre_AD_05', 'timbre_DA_05', 'timbre_tp_3', 'timbre_fp_3', 'timbre_fn_3', 'timbre_P_3', 'timbre_R_3', \ 'timbre_F_3', 'timbre_AD_3', 'timbre_DA_3', 'tempo_tp_05', 'tempo_fp_05', 'tempo_fn_05', 'tempo_P_05', 'tempo_R_05', 'tempo_F_05', 'tempo_AD_05', 'tempo_DA_05', \ 'tempo_tp_3', 'tempo_fp_3', 'tempo_fn_3', 'tempo_P_3', 'tempo_R_3', 'tempo_F_3', 'tempo_AD_3', 'tempo_DA_3']) def writeIndividualRes(self, filename, ao_name, harmonic_res_05, harmonic_res_3, timbre_res_05, timbre_res_3, tempo_res_05, tempo_res_3): '''Write result of single detection for individual features.''' with open(filename, 'a') as f: csvwriter = csv.writer(f, delimiter=',') csvwriter.writerow([ao_name, harmonic_res_05.TP, harmonic_res_05.FP, harmonic_res_05.FN, harmonic_res_05.P, harmonic_res_05.R, harmonic_res_05.F, harmonic_res_05.AD, harmonic_res_05.DA, \ harmonic_res_3.TP, harmonic_res_3.FP, harmonic_res_3.FN, harmonic_res_3.P, harmonic_res_3.R, harmonic_res_3.F, harmonic_res_3.AD, harmonic_res_3.DA, timbre_res_05.TP, timbre_res_05.FP, \ timbre_res_05.FN, timbre_res_05.P, timbre_res_05.R, timbre_res_05.F, timbre_res_05.AD, timbre_res_05.DA, timbre_res_3.TP, timbre_res_3.FP, timbre_res_3.FN, timbre_res_3.P, timbre_res_3.R, timbre_res_3.F, \ timbre_res_3.AD, timbre_res_3.DA, tempo_res_05.TP, tempo_res_05.FP, tempo_res_05.FN, tempo_res_05.P, tempo_res_05.R, tempo_res_05.F, tempo_res_05.AD, tempo_res_05.DA, tempo_res_3.TP, tempo_res_3.FP, \ tempo_res_3.FN, tempo_res_3.P, tempo_res_3.R, tempo_res_3.F, tempo_res_3.AD, tempo_res_3.DA]) def writeCombinedHeader(self, filename): '''Write header of output files for combined features.''' with open(filename, 'a') as f: csvwriter = csv.writer(f, delimiter=',') csvwriter.writerow(['audio', 'hm_tb_P_0.5', 'hm_tb_R_0.5', 'hm_tb_F_0.5', 'hm_tb_P_3', 'hm_tb_R_3', 'hm_tb_F_3', 'hm_tp_P_0.5', 'hm_tp_R_0.5',\ 'hm_tp_F_0.5', 'hm_tp_P_3', 'hm_tp_R_3', 'hm_tp_F_3', 'tb_tp_P_0.5', 'tb_tp_R_0.5', 'tb_tp_F_0.5', 'tb_tp_P_3', 'tb_tp_R_3', 'tb_tp_F_3',\ 'hm_tb_tp_P_0.5', 'hm_tb_tp_R_0.5', 'hm_tb_tp_F_0.5', 'hm_tb_tp_P_3', 'hm_tb_tp_R_3', 'hm_tb_tp_F_3']) def writeCombinedRes(self, filename, ao_name, hm_tb_res_05, hm_tb_res_3, hm_tp_res_05, hm_tp_res_3, tb_tp_res_05, tb_tp_res_3, hm_tb_tp_res_05, hm_tb_tp_res_3): '''Write result of single detection for combined features.''' with open(filename, 'a') as f: csvwriter = csv.writer(f, delimiter=',') csvwriter.writerow([ao_name, hm_tb_res_05.P, hm_tb_res_05.R, hm_tb_res_05.F, hm_tb_res_3.P, hm_tb_res_3.R, hm_tb_res_3.F,\ hm_tp_res_05.P, hm_tp_res_05.R, hm_tp_res_05.F, hm_tp_res_3.P, hm_tp_res_3.R, hm_tp_res_3.F, tb_tp_res_05.P, tb_tp_res_05.R, tb_tp_res_05.F, tb_tp_res_3.P, tb_tp_res_3.R, tb_tp_res_3.F, \ hm_tb_tp_res_05.P, hm_tb_tp_res_05.R, hm_tb_tp_res_05.F, hm_tb_tp_res_3.P, hm_tb_tp_res_3.R, hm_tb_tp_res_3.F]) def removeDuplicates(self, bounds, tol=1.0): '''Remove duplicates by averaging boundaries located in the tolerance window.''' new_bounds = [] bounds = list(set(bounds)) bounds.sort() tol_win = int(tol * self.SampleRate / self.stepSize) bound_idx = 0 nBounds = len(bounds) while bound_idx < nBounds: start = bounds[bound_idx] cnt = 1 temp = [start] while (bound_idx+cnt < nBounds and (bounds[bound_idx+cnt] - start <= tol_win)): temp.append(bounds[bound_idx+cnt]) cnt += 1 new_bounds.append(int(np.mean(temp))) bound_idx += cnt # print 'new_bounds', nBounds, len(new_bounds) return new_bounds def selectBounds(self, nc, bounds, thresh=0.5): '''Select bounds with nc value above thresh.''' # return list(np.array(bounds)[np.where(np.array(nc)>thresh)[0]]) bounds_keep = [] nc = normaliseArray(nc) for i, x in enumerate(bounds): if nc[x] >= thresh: bounds_keep.append(x) # print 'bounds_keep', len(bounds), len(bounds_keep) return bounds_keep def process(self): '''For the aggregated input features, discard a propertion each time as the pairwise distances within the feature space descending. In the meanwhile evaluate the segmentation result and track the trend of perfomance changing by measuring the feature selection threshold - segmentation f measure curve. ''' peak_picker = PeakPicker() peak_picker.params.alpha = 9.0 # Alpha norm peak_picker.params.delta = self.delta_threshold # Adaptive thresholding delta peak_picker.params.QuadThresh_a = (100 - self.threshold) / 1000.0 peak_picker.params.QuadThresh_b = 0.0 peak_picker.params.QuadThresh_c = (100 - self.threshold) / 1500.0 peak_picker.params.rawSensitivity = 20 peak_picker.params.aCoeffs = self.aCoeffs peak_picker.params.bCoeffs = self.bCoeffs peak_picker.params.preWin = self.medianWin peak_picker.params.postWin = self.medianWin + 1 peak_picker.params.LP_on = self.LPfilter_on peak_picker.params.Medfilt_on = self.medfilter_on peak_picker.params.Polyfit_on = self.polyfitting_on peak_picker.params.isMedianPositive = False # Settings used for feature extraction feature_window_frame = int(self.SampleRate / self.gammatoneLen * self.featureWindow) feature_step_frame = int(0.5 * self.SampleRate / self.gammatoneLen * self.featureStep) aggregation_window, aggregation_step = 100, 50 featureRate = float(self.SampleRate) / self.stepSize audio_files = [x for x in os.listdir(options.GT) if not x.startswith(".") ] if options.TEST: audio_files = audio_files[:1] audio_files.sort() audio_list = [] gammatone_feature_list = [i for i in os.listdir(options.GF) if not i.startswith('.')] gammatone_feature_list = ['contrast6', 'rolloff4', 'dct'] tempo_feature_list = [i for i in os.listdir(options.TF) if not i.startswith('.')] tempo_feature_list = ['ti', 'tir'] timbre_feature_list = ['mfcc_harmonic'] harmonic_feature_list = ['chromagram_harmonic'] gammatone_feature_list = [join(options.GF, f) for f in gammatone_feature_list] timbre_feature_list = [join(options.SF, f) for f in timbre_feature_list] tempo_feature_list = [join(options.TF, f) for f in tempo_feature_list] harmonic_feature_list = [join(options.SF, f) for f in harmonic_feature_list] # Prepare output files. outfile1 = join(options.OUTPUT, 'individual_novelty.csv') outfile2 = join(options.OUTPUT, 'individual_cnmf.csv') outfile3 = join(options.OUTPUT, 'individual_sf.csv') outfile4 = join(options.OUTPUT, 'combinedFeatures_novelty.csv') outfile5 = join(options.OUTPUT, 'combinedFeatures_cnmf.csv') outfile6 = join(options.OUTPUT, 'combinedFeatures_sf.csv') # outfile7 = join(options.OUTPUT, 'combinedSSM_novelty.csv') # # outfile8 = join(options.OUTPUT, 'combinedBounds_novelty.csv') # outfile9 = join(options.OUTPUT, 'combinedBounds_sf.csv') # self.writeIndividualHeader(outfile1) # self.writeIndividualHeader(outfile2) # self.writeIndividualHeader(outfile3) # self.writeCombinedHeader(outfile4) self.writeCombinedHeader(outfile5) self.writeCombinedHeader(outfile6) # self.writeCombinedHeader(outfile7) # self.writeCombinedHeader(outfile8) # self.writeCombinedHeader(outfile9) # For each audio file, load specific features for audio in audio_files: ao = AudioObj() ao.name = splitext(audio)[0] # Load annotations for specified audio collection. if options.DATASET == 'qupujicheng': annotation_file = join(options.GT, ao.name+'.csv') # qupujicheng ao.gt = np.genfromtxt(annotation_file, usecols=0, delimiter=',') ao.label = np.genfromtxt(annotation_file, usecols=1, delimiter=',', dtype=str) elif options.DATASET == 'salami': annotation_file = join(options.GT, ao.name+'.txt') # iso, salami ao.gt = np.genfromtxt(annotation_file, usecols=0) ao.label = np.genfromtxt(annotation_file, usecols=1, dtype=str) else: annotation_file = join(options.GT, ao.name+'.lab') # beatles ao.gt = np.genfromtxt(annotation_file, usecols=(0,1)) ao.gt = np.unique(np.ndarray.flatten(ao.gt)) ao.label = np.genfromtxt(annotation_file, usecols=2, dtype=str) gammatone_featureset, timbre_featureset, tempo_featureset, harmonic_featureset = [], [], [], [] for feature in timbre_feature_list: for f in os.listdir(feature): if f[:f.find('_vamp')]==ao.name: timbre_featureset.append(np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[:,0:14]) break if len(timbre_feature_list) > 1: n_frame = np.min([x.shape[0] for x in timbre_featureset]) timbre_featureset = [x[:n_frame,:] for x in timbre_featureset] ao.timbre_features = np.hstack((timbre_featureset)) else: ao.timbre_features = timbre_featureset[0] for feature in tempo_feature_list: for f in os.listdir(feature): if f[:f.find('_vamp')]==ao.name: tempo_featureset.append(np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[1:,1:]) ao.tempo_timestamps = np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[1:,0] break if len(tempo_feature_list) > 1: n_frame = np.min([x.shape[0] for x in tempo_featureset]) tempo_featureset = [x[:n_frame,:] for x in tempo_featureset] ao.tempo_features = np.hstack((tempo_featureset)) else: ao.tempo_features = tempo_featureset[0] for feature in harmonic_feature_list: for f in os.listdir(feature): if f[:f.find('_vamp')]==ao.name: harmonic_featureset.append(np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[:,1:]) break if len(harmonic_feature_list) > 1: n_frame = np.min([x.shape[0] for x in harmonic_featureset]) harmonic_featureset = [x[:n_frame,:] for x in harmonic_featureset] ao.harmonic_features = np.hstack((harmonic_featureset)) else: ao.harmonic_features = harmonic_featureset[0] # Get aggregated features for computing ssm aggregation_window, aggregation_step = 20,10 featureRate = float(self.SampleRate) / self.stepSize pca = PCA(n_components=6) # Resample and normalise features # step = ao.tempo_features.shape[0] # ao.timbre_features = resample(ao.timbre_features, step) ao.timbre_features = normaliseFeature(ao.timbre_features) # ao.harmonic_features = resample(ao.harmonic_features, step) ao.harmonic_features = normaliseFeature(ao.harmonic_features) nFrames = np.min([ao.timbre_features.shapes[0], ao.harmonic_features.shapes[0]]) ao.timbre_features = ao.timbre_features[:nFrames, :] ao.harmonic_features = ao.harmonic_features[:nFrames, :] ao.tempo_features = normaliseFeature(ao.tempo_features) ao.tempo_features = upSample(ao.tempo_features, nFrames) ao.timestamps = np.array(map(lambda x: x / featureRate, np.arange(0, nFrames))) step = nFrames / 10 ao.timbre_features = resample(ao.timbre_features, step) ao.harmonic_features = resample(ao.harmonic_features, step) ao.tempo_features = resample(ao.tempo_features, step) pca.fit(ao.tempo_features) ao.tempo_features = pca.transform(ao.tempo_features) ao.tempo_ssm = getSSM(ao.tempo_features) pca.fit(ao.timbre_features) ao.timbre_features = pca.transform(ao.timbre_features) ao.timbre_ssm = getSSM(ao.timbre_features) pca.fit(ao.harmonic_features) ao.harmonic_features = pca.transform(ao.harmonic_features) ao.harmonic_ssm = getSSM(ao.harmonic_features) ############################################################################################################################################ # Experiment 1: segmentation using individual features. timbre_novelty, smoothed_timbre_novelty, timbre_novelty_idxs = novelty_S.process(ao.timbre_ssm, peak_picker, self.kernel_size) tempo_novelty, smoothed_tempo_novelty, tempo_novelty_idxs = novelty_S.process(ao.tempo_ssm, peak_picker, self.kernel_size) harmonic_novelty, smoothed_harmonic_novelty, harmonic_novelty_idxs = novelty_S.process(ao.harmonic_ssm, peak_picker, self.kernel_size) timbre_cnmf_idxs = cnmf_S.segmentation(ao.timbre_features, rank=rank, R=R, h=h, niter=300)[-1] tempo_cnmf_idxs = cnmf_S.segmentation(ao.tempo_features, rank=rank, R=R, h=h, niter=300)[-1] harmonic_cnmf_idxs = cnmf_S.segmentation(ao.harmonic_features, rank=rank, R=R, h=h, niter=300)[-1] timbre_sf_nc, timbre_sf_idxs = sf_S.segmentation(ao.timbre_features) tempo_sf_nc, tempo_sf_idxs = sf_S.segmentation(ao.tempo_features) harmonic_sf_nc, harmonic_sf_idxs = sf_S.segmentation(ao.harmonic_features) # Evaluate and write results. harmonic_novelty_05 = self.pairwiseF(ao.gt, harmonic_novelty_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) harmonic_novelty_3 = self.pairwiseF(ao.gt, harmonic_novelty_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) tempo_novelty_05 = self.pairwiseF(ao.gt, tempo_novelty_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) tempo_novelty_3 = self.pairwiseF(ao.gt, tempo_novelty_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) timbre_novelty_05 = self.pairwiseF(ao.gt, timbre_novelty_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) timbre_novelty_3 = self.pairwiseF(ao.gt, timbre_novelty_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) harmonic_cnmf_05 = self.pairwiseF(ao.gt, harmonic_cnmf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) harmonic_cnmf_3 = self.pairwiseF(ao.gt, harmonic_cnmf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) tempo_cnmf_05 = self.pairwiseF(ao.gt, tempo_cnmf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) tempo_cnmf_3 = self.pairwiseF(ao.gt, tempo_cnmf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) timbre_cnmf_05 = self.pairwiseF(ao.gt, timbre_cnmf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) timbre_cnmf_3 = self.pairwiseF(ao.gt, timbre_cnmf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) harmonic_sf_05 = self.pairwiseF(ao.gt, harmonic_sf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) harmonic_sf_3 = self.pairwiseF(ao.gt, harmonic_sf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) tempo_sf_05 = self.pairwiseF(ao.gt, tempo_sf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) tempo_sf_3 = self.pairwiseF(ao.gt, tempo_sf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) timbre_sf_05 = self.pairwiseF(ao.gt, timbre_sf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) timbre_sf_3 = self.pairwiseF(ao.gt, timbre_sf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) self.writeIndividualRes(outfile1, ao.name, harmonic_novelty_05, harmonic_novelty_3, tempo_novelty_05, tempo_novelty_3, timbre_novelty_05, timbre_novelty_3) self.writeIndividualRes(outfile2, ao.name, harmonic_cnmf_05, harmonic_cnmf_3, tempo_cnmf_05, tempo_cnmf_3, timbre_cnmf_05, timbre_cnmf_3) self.writeIndividualRes(outfile3, ao.name, harmonic_sf_05, harmonic_sf_3, tempo_sf_05, tempo_sf_3, timbre_sf_05, timbre_sf_3) ############################################################################################################################################ # Experiment 2: segmentation using combined features. # Dumping features. hm_tb = np.hstack([ao.harmonic_features, ao.timbre_features]) hm_tp = np.hstack([ao.harmonic_features, ao.tempo_features]) tb_tp = np.hstack([ao.timbre_features, ao.tempo_features]) hm_tb_tp = np.hstack([ao.harmonic_features, ao.timbre_features, ao.tempo_features]) hm_tb_feature_ssm = getSSM(hm_tb) hm_tp_feature_ssm = getSSM(hm_tp) tb_tp_feature_ssm = getSSM(tb_tp) hm_tb_tp_feature_ssm = getSSM(hm_tb_tp) # Evaluting and writing results. hm_tb_novelty_idxs = novelty_S.process(hm_tb_feature_ssm)[-1] hm_tp_novelty_idxs = novelty_S.process(hm_tp_feature_ssm)[-1] tb_tp_novelty_idxs = novelty_S.process(tb_tp_feature_ssm)[-1] hm_tb_tp_novelty_idxs = novelty_S.process(hm_tb_tp_feature_ssm)[-1] hm_tb_sf_idxs = sf_S.segmentation(hm_tb)[-1] hm_tp_sf_idxs = sf_S.segmentation(hm_tp)[-1] tb_tp_sf_idxs = sf_S.segmentation(tb_tp)[-1] hm_tb_tp_sf_idxs = sf_S.segmentation(hm_tb_tp)[-1] hm_tb_cnmf_idxs = cnmf_S.segmentation(hm_tb, rank=4, R=R, h=h, niter=300)[-1] hm_tp_cnmf_idxs = cnmf_S.segmentation(hm_tp, rank=4, R=R, h=h, niter=300)[-1] tb_tp_cnmf_idxs = cnmf_S.segmentation(tb_tp, rank=4, R=R, h=h, niter=300)[-1] hm_tb_tp_cnmf_idxs = cnmf_S.segmentation(hm_tb_tp, rank=6, R=R, h=h, niter=300)[-1] hm_tb_novelty_05 = self.pairwiseF(ao.gt, hm_tb_novelty_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) hm_tp_novelty_05 = self.pairwiseF(ao.gt, hm_tp_novelty_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) tb_tp_novelty_05 = self.pairwiseF(ao.gt, tb_tp_novelty_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) hm_tb_tp_novelty_05 = self.pairwiseF(ao.gt, hm_tb_tp_novelty_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) hm_tb_novelty_3 = self.pairwiseF(ao.gt, hm_tb_novelty_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) hm_tp_novelty_3 = self.pairwiseF(ao.gt, hm_tp_novelty_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) tb_tp_novelty_3 = self.pairwiseF(ao.gt, tb_tp_novelty_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) hm_tb_tp_novelty_3 = self.pairwiseF(ao.gt, hm_tb_tp_novelty_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) hm_tb_sf_05 = self.pairwiseF(ao.gt, hm_tb_sf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) hm_tp_sf_05 = self.pairwiseF(ao.gt, hm_tp_sf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) tb_tp_sf_05 = self.pairwiseF(ao.gt, tb_tp_sf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) hm_tb_tp_sf_05 = self.pairwiseF(ao.gt, hm_tb_tp_sf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) hm_tb_sf_3 = self.pairwiseF(ao.gt, hm_tb_sf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) hm_tp_sf_3 = self.pairwiseF(ao.gt, hm_tp_sf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) tb_tp_sf_3 = self.pairwiseF(ao.gt, tb_tp_sf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) hm_tb_tp_sf_3 = self.pairwiseF(ao.gt, hm_tb_tp_sf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) hm_tb_cnmf_05 = self.pairwiseF(ao.gt, hm_tb_cnmf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) hm_tp_cnmf_05 = self.pairwiseF(ao.gt, hm_tp_cnmf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) tb_tp_cnmf_05 = self.pairwiseF(ao.gt, tb_tp_cnmf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) hm_tb_tp_cnmf_05 = self.pairwiseF(ao.gt, hm_tb_tp_cnmf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) hm_tb_cnmf_3 = self.pairwiseF(ao.gt, hm_tb_cnmf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) hm_tp_cnmf_3 = self.pairwiseF(ao.gt, hm_tp_cnmf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) tb_tp_cnmf_3 = self.pairwiseF(ao.gt, tb_tp_cnmf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) hm_tb_tp_cnmf_3 = self.pairwiseF(ao.gt, hm_tb_tp_cnmf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) self.writeCombinedRes(outfile4, ao.name, hm_tb_novelty_05, hm_tb_novelty_3, hm_tp_novelty_05, hm_tp_novelty_3, tb_tp_novelty_05, tb_tp_novelty_3, hm_tb_tp_novelty_05, hm_tb_tp_novelty_3) self.writeCombinedRes(outfile5, ao.name, hm_tb_cnmf_05, hm_tb_cnmf_3, hm_tp_cnmf_05, hm_tp_cnmf_3, tb_tp_cnmf_05, tb_tp_cnmf_3, hm_tb_tp_cnmf_05, hm_tb_tp_cnmf_3) self.writeCombinedRes(outfile6, ao.name, hm_tb_sf_05, hm_tb_sf_3, hm_tp_sf_05, hm_tp_sf_3, tb_tp_sf_05, tb_tp_sf_3, hm_tb_tp_sf_05, hm_tb_tp_sf_3) # ############################################################################################################################################ # # Experiment 3: late fusion -- segmentation using combined ssms. # # hm_tb_ssm = self.lin * ao.harmonic_ssm + (1-self.lin) * ao.timbre_ssm # hm_tp_ssm = self.lin * ao.harmonic_ssm + (1-self.lin) * ao.tempo_ssm # tb_tp_ssm = self.lin * ao.timbre_ssm + (1-self.lin) * ao.tempo_ssm # hm_tb_tp_ssm = (ao.harmonic_ssm + ao.timbre_ssm + ao.tempo_ssm) / 3.0 # # hm_tb_ssm_novelty_idxs = novelty_S.process(hm_tb_ssm)[-1] # hm_tp_ssm_novelty_idxs = novelty_S.process(hm_tp_ssm)[-1] # tb_tp_ssm_novelty_idxs = novelty_S.process(tb_tp_ssm)[-1] # hm_tb_tp_ssm_novelty_idxs = novelty_S.process(hm_tb_tp_ssm)[-1] # # hm_tb_ssm_novelty_05 = self.pairwiseF(ao.gt, hm_tb_ssm_novelty_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) # hm_tp_ssm_novelty_05 = self.pairwiseF(ao.gt, hm_tp_ssm_novelty_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) # tb_tp_ssm_novelty_05 = self.pairwiseF(ao.gt, tb_tp_ssm_novelty_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) # hm_tb_tp_ssm_novelty_05 = self.pairwiseF(ao.gt, hm_tb_tp_ssm_novelty_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) # # hm_tb_ssm_novelty_3 = self.pairwiseF(ao.gt, hm_tb_ssm_novelty_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) # hm_tp_ssm_novelty_3 = self.pairwiseF(ao.gt, hm_tp_ssm_novelty_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) # tb_tp_ssm_novelty_3 = self.pairwiseF(ao.gt, tb_tp_ssm_novelty_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) # hm_tb_tp_ssm_novelty_3 = self.pairwiseF(ao.gt, hm_tb_tp_ssm_novelty_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) # # self.writeCombinedRes(outfile7, ao.name, hm_tb_ssm_novelty_05, hm_tb_ssm_novelty_3, hm_tp_ssm_novelty_05, hm_tp_ssm_novelty_3, tb_tp_ssm_novelty_05, tb_tp_ssm_novelty_3, hm_tb_tp_ssm_novelty_05, hm_tb_tp_ssm_novelty_3) # ############################################################################################################################################ # # Experiment 4: late fusion -- segmentation using combined boundaries. # hm_novelty_bounds = self.selectBounds(smoothed_harmonic_novelty, harmonic_novelty_idxs, self.confidence_threshold) # tb_novelty_bounds = self.selectBounds(smoothed_timbre_novelty, timbre_novelty_idxs, self.confidence_threshold) # tp_novelty_bounds = self.selectBounds(smoothed_tempo_novelty, timbre_novelty_idxs, self.confidence_threshold) # # hm_tb_novelty_bounds = hm_novelty_bounds + tb_novelty_bounds # hm_tp_novelty_bounds = hm_novelty_bounds + tp_novelty_bounds # tb_tp_novelty_bounds = tb_novelty_bounds + tp_novelty_bounds # hm_tb_tp_novelty_bounds = hm_novelty_bounds + tb_novelty_bounds + tp_novelty_bounds # # hm_tb_novelty_bounds = self.removeDuplicates(hm_tb_novelty_bounds, tol=1.0) # hm_tp_novelty_bounds = self.removeDuplicates(hm_tp_novelty_bounds, tol=1.0) # tb_tp_novelty_bounds = self.removeDuplicates(tb_tp_novelty_bounds, tol=1.0) # hm_tb_tp_novelty_bounds = self.removeDuplicates(hm_tb_tp_novelty_bounds, tol=1.0) # # hm_sf_bounds = self.selectBounds(harmonic_sf_nc, harmonic_sf_idxs, self.confidence_threshold) # tb_sf_bounds = self.selectBounds(timbre_sf_nc, timbre_sf_idxs, self.confidence_threshold) # tp_sf_bounds = self.selectBounds(tempo_sf_nc, tempo_sf_idxs, self.confidence_threshold) # # hm_tb_sf_bounds = hm_sf_bounds + tb_sf_bounds # hm_tp_sf_bounds = hm_sf_bounds + tp_sf_bounds # tb_tp_sf_bounds = tb_sf_bounds + tp_sf_bounds # hm_tb_tp_sf_bounds = hm_sf_bounds + tb_sf_bounds + tp_sf_bounds # # hm_tb_sf_bounds = self.removeDuplicates(hm_tb_sf_bounds, tol=1.0) # hm_tp_sf_bounds = self.removeDuplicates(hm_tp_sf_bounds, tol=1.0) # tb_tp_sf_bounds = self.removeDuplicates(tb_tp_sf_bounds, tol=1.0) # hm_tb_tp_sf_bounds = self.removeDuplicates(hm_tb_tp_sf_bounds, tol=1.0) # # # hm_tb_novelty_bounds_05 = self.pairwiseF(ao.gt, hm_tb_novelty_bounds, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) # hm_tp_novelty_bounds_05 = self.pairwiseF(ao.gt, hm_tp_novelty_bounds, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) # tb_tp_novelty_bounds_05 = self.pairwiseF(ao.gt, tb_tp_novelty_bounds, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) # hm_tb_tp_novelty_bounds_05 = self.pairwiseF(ao.gt, hm_tb_tp_novelty_bounds, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) # # hm_tb_sf_bounds_05 = self.pairwiseF(ao.gt, hm_tb_sf_bounds, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) # hm_tp_sf_bounds_05 = self.pairwiseF(ao.gt, hm_tp_sf_bounds, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) # tb_tp_sf_bounds_05 = self.pairwiseF(ao.gt, tb_tp_sf_bounds, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) # hm_tb_tp_sf_bounds_05 = self.pairwiseF(ao.gt, hm_tb_tp_sf_bounds, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) # # hm_tb_novelty_bounds_3 = self.pairwiseF(ao.gt, hm_tb_novelty_bounds, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) # hm_tp_novelty_bounds_3 = self.pairwiseF(ao.gt, hm_tp_novelty_bounds, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) # tb_tp_novelty_bounds_3 = self.pairwiseF(ao.gt, tb_tp_novelty_bounds, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) # hm_tb_tp_novelty_bounds_3 = self.pairwiseF(ao.gt, hm_tb_tp_novelty_bounds, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) # # hm_tb_sf_bounds_3 = self.pairwiseF(ao.gt, hm_tb_sf_bounds, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) # hm_tp_sf_bounds_3 = self.pairwiseF(ao.gt, hm_tp_sf_bounds, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) # tb_tp_sf_bounds_3 = self.pairwiseF(ao.gt, tb_tp_sf_bounds, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) # hm_tb_tp_sf_bounds_3 = self.pairwiseF(ao.gt, hm_tb_tp_sf_bounds, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) # # self.writeCombinedRes(outfile8, ao.name, hm_tb_novelty_bounds_05, hm_tb_novelty_bounds_3, hm_tp_novelty_bounds_05, hm_tp_novelty_bounds_3, tb_tp_novelty_bounds_05, tb_tp_novelty_bounds_3, hm_tb_tp_novelty_bounds_05, hm_tb_tp_novelty_bounds_3) # self.writeCombinedRes(outfile9, ao.name, hm_tb_sf_bounds_05, hm_tb_sf_bounds_3, hm_tp_sf_bounds_05, hm_tp_sf_bounds_3, tb_tp_sf_bounds_05, tb_tp_sf_bounds_3, hm_tb_tp_sf_bounds_05, hm_tb_tp_sf_bounds_3) def main(): segmenter = SSMseg() segmenter.process() if __name__ == '__main__': main()