mi@0: #!/usr/bin/env python mi@0: # encoding: utf-8 mi@0: """ mi@0: SegEval.py mi@0: mi@0: The main segmentation program. mi@0: mi@0: Created by mi tian on 2015-04-02. mi@0: Copyright (c) 2015 __MyCompanyName__. All rights reserved. mi@0: """ mi@0: mi@0: # Load starndard python libs mi@0: import sys, os, optparse, csv mi@0: from itertools import combinations mi@0: from os.path import join, isdir, isfile, abspath, dirname, basename, split, splitext mi@0: from copy import copy mi@0: mi@0: import matplotlib mi@0: # matplotlib.use('Agg') mi@0: import matplotlib.pyplot as plt mi@0: import matplotlib.gridspec as gridspec mi@0: import numpy as np mi@0: import scipy as sp mi@0: from scipy.signal import correlate2d, convolve2d, filtfilt, resample mi@0: from scipy.ndimage.filters import * mi@0: from sklearn.decomposition import PCA mi@0: from sklearn.mixture import GMM mi@0: from sklearn.cluster import KMeans mi@0: from sklearn.preprocessing import normalize mi@0: from sklearn.metrics.pairwise import pairwise_distances mi@0: mi@0: # Load dependencies mi@0: from utils.SegUtil import getMean, getStd, getDelta, getSSM, reduceSSM, upSample, normaliseFeature mi@0: from utils.PeakPickerUtil import PeakPicker mi@0: from utils.gmmdist import * mi@0: from utils.GmmMetrics import GmmDistance mi@0: from utils.RankClustering import rClustering mi@0: from utils.kmeans import Kmeans mi@0: from utils.PathTracker import PathTracker mi@0: mi@0: # Load bourdary retrieval utilities mi@0: import cnmf as cnmf_S mi@0: import foote as foote_S mi@0: import sf as sf_S mi@0: import fmc2d as fmc2d_S mitian@1: import novelty as novelty_S mitian@1: mitian@1: # Algorithm params mitian@1: h = 8 # Size of median filter for features in C-NMF mitian@1: R = 15 # Size of the median filter for the activation matrix C-NMF mitian@4: rank = 4 # Rank of decomposition for the boundaries mitian@1: rank_labels = 6 # Rank of decomposition for the labels mitian@1: R_labels = 6 # Size of the median filter for the labels mitian@1: # Foote mitian@1: M = 2 # Median filter for the audio features (in beats) mitian@1: Mg = 32 # Gaussian kernel size mitian@1: L = 16 # Size of the median filter for the adaptive threshold mitian@1: # 2D-FMC mitian@1: N = 8 # Size of the fixed length segments (for 2D-FMC) mitian@1: mi@0: mi@0: # Define arg parser mi@0: def parse_args(): mi@0: op = optparse.OptionParser() mi@0: # IO options mi@0: op.add_option('-g', '--gammatonegram-features', action="store", dest="GF", default='/Volumes/c4dm-03/people/mit/features/gammatonegram/qupujicheng/2048', type="str", help="Loading gammatone features from.." ) mi@0: op.add_option('-s', '--spectrogram-features', action="store", dest="SF", default='/Volumes/c4dm-03/people/mit/features/spectrogram/qupujicheng/2048', type="str", help="Loading spectral features from.." ) mi@0: op.add_option('-t', '--tempogram-features', action="store", dest="TF", default='/Volumes/c4dm-03/people/mit/features/tempogram/qupujicheng/tempo_features_6s', type="str", help="Loading tempogram features from.." ) mi@0: op.add_option('-f', '--featureset', action="store", dest="FEATURES", default='[0, 1, 2, 3]', type="str", help="Choose feature subsets (input a list of integers) used for segmentation -- gammtone, chroma, timbre, tempo -- 0, 1, 2, 3." ) mi@0: op.add_option('-a', '--annotations', action="store", dest="GT", default='/Volumes/c4dm-03/people/mit/annotation/qupujicheng/lowercase', type="str", help="Loading annotation files from.. ") mi@0: op.add_option('-o', '--ouput', action="store", dest="OUTPUT", default='/Volumes/c4dm-03/people/mit/segmentation/gammatone/qupujicheng', type="str", help="Write segmentation results to ") mi@0: mi@0: # boundary retrieval options mitian@1: op.add_option('-b', '--bounrary-method', action="store", dest="BOUNDARY", type='choice', choices=['novelty', 'cnmf', 'foote', 'sf'], default='novelty', help="Choose boundary retrieval algorithm ('novelty', 'cnmf', 'sf', 'fmc2d')." ) mitian@1: op.add_option('-l', '--labeling-method', action="store", dest="LABEL", type='choice', choices=['cnmf', 'fmc2d'], default='cnmf', help="Choose boundary labeling algorithm ('cnmf', 'fmc2d')." ) mi@0: mi@0: # Plot/print/mode options mi@0: op.add_option('-p', '--plot', action="store_true", dest="PLOT", default=False, help="Save plots") mi@0: op.add_option('-e', '--test-mode', action="store_true", dest="TEST", default=False, help="Test mode") mi@0: op.add_option('-v', '--verbose-mode', action="store_true", dest="VERBOSE", default=False, help="Print results in verbose mode.") mi@0: mi@0: return op.parse_args() mi@0: options, args = parse_args() mi@0: mi@0: class FeatureObj() : mi@0: __slots__ = ['key', 'audio', 'timestamps', 'gammatone_features', 'tempo_features', 'timbre_features', 'harmonic_features', 'gammatone_ssm', 'tempo_ssm', 'timbre_features', 'harmonic_ssm', 'ssm_timestamps'] mi@0: mi@0: class AudioObj(): mi@0: __slots__ = ['name', 'feature_list', 'gt', 'label', 'gammatone_features', 'tempo_features', 'timbre_features', 'harmonic_features', 'combined_features',\ mi@0: 'gammatone_ssm', 'tempo_ssm', 'timbre_ssm', 'harmonic_ssm', 'combined_ssm', 'ssm', 'ssm_timestamps', 'tempo_timestamps'] mi@0: mi@0: class EvalObj(): mi@0: __slots__ = ['TP', 'FP', 'FN', 'P', 'R', 'F', 'AD', 'DA'] mi@0: mi@0: mi@0: class SSMseg(object): mi@0: '''The main segmentation object''' mi@0: def __init__(self): mi@0: self.SampleRate = 44100 mi@0: self.NqHz = self.SampleRate/2 mi@0: self.timestamp = [] mi@0: self.previousSample = 0.0 mi@0: self.featureWindow = 6.0 mi@0: self.featureStep = 3.0 mi@0: self.kernel_size = 64 # Adjust this param according to the feature resolution.pq mi@0: self.blockSize = 2048 mi@0: self.stepSize = 1024 mi@0: mi@0: '''NOTE: Match the following params with those used for feature extraction!''' mi@0: mi@0: '''NOTE: Unlike spectrogram ones, Gammatone features are extracted without taking an FFT. The windowing is done under the purpose of chunking mi@0: the audio to facilitate the gammatone filtering with the specified blockSize and stepSize. The resulting gammatonegram is aggregated every mi@0: gammatoneLen without overlap.''' mi@0: self.gammatoneLen = 2048 mi@0: self.gammatoneBandGroups = [0, 2, 6, 10, 13, 17, 20] mi@0: self.nGammatoneBands = 20 mi@0: self.lowFreq = 100 mi@0: self.highFreq = self.SampleRate / 4 mi@0: mi@0: '''Settings for extracting tempogram features.''' mi@0: self.tempoWindow = 6.0 mi@0: self.bpmBands = [30, 45, 60, 80, 100, 120, 180, 240, 400, 600] mi@0: mitian@3: '''Peak picking settings for novelty based method''' mitian@4: self.threshold = 30 mi@0: self.confidence_threshold = 0.5 mi@0: self.delta_threshold = 0.0 mi@0: self.backtracking_threshold = 1.9 mi@0: self.polyfitting_on = True mi@0: self.medfilter_on = True mi@0: self.LPfilter_on = True mi@0: self.whitening_on = False mi@0: self.aCoeffs = [1.0000, -0.5949, 0.2348] mi@0: self.bCoeffs = [0.1600, 0.3200, 0.1600] mi@0: self.cutoff = 0.34 mi@0: self.medianWin = 7 mi@0: mi@0: mitian@3: def pairwiseF(self, annotation, detection, tolerance=3.0, combine=1.0, idx2time=None): mi@0: '''Pairwise F measure evaluation of detection rates.''' mitian@3: mitian@5: res = EvalObj() mitian@5: res.TP, res.FP, res.FN = 0, 0, 0 mitian@5: res.P, res.R, res.F = 0.0, 0.0, 0.0 mitian@5: res.AD, res.DA = 0.0, 0.0 mitian@5: mitian@5: if len(detection) == 0: mitian@5: return res mitian@5: mitian@5: gt = len(annotation) # Total number of ground truth data points mitian@5: dt = len(detection) # Total number of experimental data points mitian@5: foundIdx = [] mitian@5: D_AD = np.zeros(gt) mitian@5: D_DA = np.zeros(dt) mitian@5: mitian@4: if idx2time != None: mitian@3: # Map detected idxs to real time mitian@4: detection = [idx2time[int(np.rint(i))] for i in detection] + [annotation[-1]] mi@0: # print 'detection', detection mi@0: detection = np.append(detection, annotation[-1]) mitian@5: mi@0: for dtIdx in xrange(dt): mi@0: D_DA[dtIdx] = np.min(abs(detection[dtIdx] - annotation)) mi@0: for gtIdx in xrange(gt): mi@0: D_AD[gtIdx] = np.min(abs(annotation[gtIdx] - detection)) mi@0: for dtIdx in xrange(dt): mi@0: if (annotation[gtIdx] >= detection[dtIdx] - tolerance/2.0) and (annotation[gtIdx] <= detection[dtIdx] + tolerance/2.0): mi@0: res.TP = res.TP + 1.0 mi@0: foundIdx.append(gtIdx) mi@0: foundIdx = list(set(foundIdx)) mi@0: res.TP = len(foundIdx) mi@0: res.FP = max(0, dt - res.TP) mi@0: res.FN = max(0, gt - res.TP) mi@0: mi@0: res.AD = np.mean(D_AD) mi@0: res.DA = np.mean(D_DA) mi@0: mi@0: mi@0: if res.TP == 0: mi@0: return res mi@0: mi@0: res.P = res.TP / float(dt) mi@0: res.R = res.TP / float(gt) mi@0: res.F = 2 * res.P * res.R / (res.P + res.R) mi@0: return res mi@0: mitian@4: def writeIndividualHeader(self, filename): mitian@3: '''Write header of output files for individual features.''' mitian@3: mitian@3: with open(filename, 'a') as f: mitian@3: csvwriter = csv.writer(f, delimiter=',') mitian@3: csvwriter.writerow(['audio', 'gammatone_tp_05', 'gammatone_fp_05', 'gammatone_fn_05', 'gammatone_P_05', 'gammatone_R_05', 'gammatone_F_05', 'gammatone_AD_05', 'gammatone_DA_05', 'gammatone_tp_3', \ mitian@3: 'gammatone_fp_3', 'gammatone_fn_3', 'gammatone_P_3', 'gammatone_R_3', 'gammatone_F_3', 'gammatone_AD_3', 'gammatone_DA_3', 'harmonic_tp_05', 'harmonic_fp_05', 'harmonic_fn_05', 'harmonic_P_05', \ mitian@3: 'harmonic_R_05', 'harmonic_F_05', 'harmonic_AD_05', 'harmonic_DA_05', 'harmonic_tp_3', 'harmonic_fp_3', 'harmonic_fn_3', 'harmonic_P_3', 'harmonic_R_3', 'harmonic_F_3', 'harmonic_AD_3', 'harmonic_DA_3', \ mitian@3: 'timbre_tp_05', 'timbre_fp_05', 'timbre_fn_05', 'timbre_P_05', 'timbre_R_05', 'timbre_F_05', 'timbre_AD_05', 'timbre_DA_05', 'timbre_tp_3', 'timbre_fp_3', 'timbre_fn_3', 'timbre_P_3', 'timbre_R_3', \ mitian@3: 'timbre_F_3', 'timbre_AD_3', 'timbre_DA_3', 'tempo_tp_05', 'tempo_fp_05', 'tempo_fn_05', 'tempo_P_05', 'tempo_R_05', 'tempo_F_05', 'tempo_AD_05', 'tempo_DA_05', \ mitian@3: 'tempo_tp_3', 'tempo_fp_3', 'tempo_fn_3', 'tempo_P_3', 'tempo_R_3', 'tempo_F_3', 'tempo_AD_3', 'tempo_DA_3']) mitian@3: mitian@4: def writeIndividualRes(self, filename, ao_name, gt_res_05, gt_res_3, harmonic_res_05, harmonic_res_3, timbre_res_05, timbre_res_3, tempo_res_05, tempo_res_3): mitian@3: '''Write result of single detection for individual features.''' mitian@3: mitian@3: with open(filename, 'a') as f: mitian@3: csvwriter = csv.writer(f, delimiter=',') mitian@3: csvwriter.writerow([ao_name, gt_res_05.TP, gt_res_05.FP, gt_res_05.FN, gt_res_05.P, gt_res_05.R, gt_res_05.F, gt_res_05.AD, gt_res_05.DA, gt_res_3.TP, gt_res_3.FP, gt_res_3.FN, gt_res_3.P, \ mitian@3: gt_res_3.R, gt_res_3.F, gt_res_3.AD, gt_res_3.DA, harmonic_res_05.TP, harmonic_res_05.FP, harmonic_res_05.FN, harmonic_res_05.P, harmonic_res_05.R, harmonic_res_05.F, harmonic_res_05.AD, harmonic_res_05.DA, \ mitian@3: harmonic_res_3.TP, harmonic_res_3.FP, harmonic_res_3.FN, harmonic_res_3.P, harmonic_res_3.R, harmonic_res_3.F, harmonic_res_3.AD, harmonic_res_3.DA, timbre_res_05.TP, timbre_res_05.FP, \ mitian@3: timbre_res_05.FN, timbre_res_05.P, timbre_res_05.R, timbre_res_05.F, timbre_res_05.AD, timbre_res_05.DA, timbre_res_3.TP, timbre_res_3.FP, timbre_res_3.FN, timbre_res_3.P, timbre_res_3.R, timbre_res_3.F, \ mitian@3: timbre_res_3.AD, timbre_res_3.DA, tempo_res_05.TP, tempo_res_05.FP, tempo_res_05.FN, tempo_res_05.P, tempo_res_05.R, tempo_res_05.F, tempo_res_05.AD, tempo_res_05.DA, tempo_res_3.TP, tempo_res_3.FP, \ mitian@3: tempo_res_3.FN, tempo_res_3.P, tempo_res_3.R, tempo_res_3.F, tempo_res_3.AD, tempo_res_3.DA]) mitian@3: mitian@3: def writeCombinedHeader(self, filename): mitian@3: '''Write header of output files for combined features.''' mitian@3: mitian@3: with open(filename, 'a') as f: mitian@3: csvwriter = csv.writer(f, delimiter=',') mitian@3: csvwriter.writerow(['audio', 'gt_tb_P_0.5', 'gt_tb_R_0.5', 'gt_tb_F_0.5', 'gt_tb_P_3', 'gt_tb_R_3', 'gt_tb_F_3', 'gt_tp_P_0.5', 'gt_tp_R_0.5', 'gt_tp_F_0.5', 'gt_tp_P_3', 'gt_tp_R_3', 'gt_tp_F_3',\ mitian@4: 'gt_hm_P_0.5', 'gt_hm_R_0.5', 'gt_hm_F_0.5', 'gt_hm_P_3', 'gt_hm_R_3', 'gt_hm_F_3', 'tb_tp_P_0.5', 'tb_tp_R_0.5', 'tb_tp_F_0.5', 'tb_tp_P_3', 'tb_tp_R_3', 'tb_tp_F_3', 'hm_tb_P_0.5', 'hm_tb_R_0.5', 'hm_tb_F_0.5', \ mitian@4: 'hm_tb_P_3', 'hm_tb_R_3', 'hm_tb_F_3', 'hm_tp_P_0.5', 'hm_tp_R_0.5', 'hm_tp_F_0.5', 'hm_tp_P_3', 'hm_tp_R_3', 'hm_tp_F_3', 'gt_tb_tp_P_0.5', 'gt_tb_tp_R_0.5', 'gt_tb_tp_F_0.5', 'gt_tb_tp_P_3', 'gt_tb_tp_R_3', \ mitian@4: 'gt_tb_tp_F_3', 'gt_hm_tb_P_0.5', 'gt_hm_tb_R_0.5', 'gt_hm_tb_F_0.5', 'gt_hm_tb_P_3', 'gt_hm_tb_R_3', 'gt_hm_tb_F_3', 'gt_hm_tp_P_0.5', 'gt_hm_tp_R_0.5', 'gt_hm_tp_F_0.5', 'gt_hm_tp_P_3', 'gt_hm_tp_R_3', 'gt_hm_tp_F_3', \ mitian@4: 'hm_tb_tp_P_0.5', 'hm_tb_tp_R_0.5', 'hm_tb_tp_F_0.5', 'hm_tb_tp_P_3', 'hm_tb_tp_R_3', 'hm_tb_tp_F_3', 'gt_hm_tb_tp_P_0.5', 'gt_hm_tb_tp_R_0.5', 'gt_hm_tb_tp_F_0.5', 'gt_hm_tb_tp_P_3', 'gt_hm_tb_tp_R_3', 'gt_hm_tb_tp_F_3']) mitian@3: mitian@4: def writeCombinedRes(self, filename, ao_name, gt_hm_res_05, gt_hm_res_3, gt_tb_res_05, gt_tb_res_3, gt_tp_res_05, gt_tp_res_3, hm_tb_res_05, hm_tb_res_3, hm_tp_res_05, hm_tp_res_3, \ mitian@4: tb_tp_res_05, tb_tp_res_3, gt_hm_tb_res_05, gt_hm_tb_res_3, gt_hm_tp_res_05, gt_hm_tp_res_3, gt_tb_tp_res_05, gt_tb_tp_res_3, hm_tb_tp_res_05, hm_tb_tp_res_3, gt_hm_tb_tp_res_05, gt_hm_tb_tp_res_3): mitian@3: '''Write result of single detection for combined features.''' mitian@3: mitian@3: with open(filename, 'a') as f: mitian@3: csvwriter = csv.writer(f, delimiter=',') mitian@3: csvwriter.writerow([ao_name, gt_tb_res_05.P, gt_tb_res_05.R, gt_tb_res_05.F, gt_tb_res_3.P, gt_tb_res_3.R, gt_tb_res_3.F, gt_tp_res_05.P, gt_tp_res_05.R, gt_tp_res_05.F, gt_tp_res_3.P, gt_tp_res_3.R, gt_tp_res_3.F, \ mitian@3: gt_hm_res_05.P, gt_hm_res_05.R, gt_hm_res_05.F, gt_hm_res_3.P, gt_hm_res_3.R, gt_hm_res_3.F, tb_tp_res_05.P, tb_tp_res_05.R, tb_tp_res_05.F, tb_tp_res_3.P, tb_tp_res_3.R, tb_tp_res_3.F, \ mitian@4: hm_tb_res_05.P, hm_tb_res_05.R, hm_tb_res_05.F, hm_tb_res_3.P, hm_tb_res_3.R, hm_tb_res_3.F, hm_tp_res_05.P, hm_tp_res_05.R, hm_tp_res_05.F, hm_tp_res_3.P, hm_tp_res_3.R, hm_tp_res_3.F, \ mitian@4: gt_tb_tp_res_05.P, gt_tb_tp_res_05.R, gt_tb_tp_res_05.F, gt_tb_tp_res_3.P, gt_tb_tp_res_3.R, gt_tb_tp_res_3.F, gt_hm_tb_res_05.P, gt_hm_tb_res_05.R, gt_hm_tb_res_05.F, gt_hm_tb_res_3.P, gt_hm_tb_res_3.R, gt_hm_tb_res_3.F, \ mitian@4: gt_hm_tp_res_05.P, gt_hm_tp_res_05.R, gt_hm_tp_res_05.F, gt_hm_tp_res_3.P, gt_hm_tp_res_3.R, gt_hm_tp_res_3.F, hm_tb_tp_res_05.P, hm_tb_tp_res_05.R, hm_tb_tp_res_05.F, hm_tb_tp_res_3.P, hm_tb_tp_res_3.R, hm_tb_tp_res_3.F, \ mitian@4: gt_hm_tb_tp_res_05.P, gt_hm_tb_tp_res_05.R, gt_hm_tb_tp_res_05.F, gt_hm_tb_tp_res_3.P, gt_hm_tb_tp_res_3.R, gt_hm_tb_tp_res_3.F]) mitian@3: mitian@4: def writeMergedHeader(self, filename): mitian@4: '''Write header of output files merging individual detections.''' mitian@4: with open(filename, 'a') as f: mitian@4: csvwriter = csv.writer(f, delimiter=',') mitian@4: csvwriter.writerow(['audio', 'merged_tp_05', 'merged_fp_05', 'merged_fn_05', 'merged_P_05', 'merged_R_05', 'merged_F_05', 'merged_AD_05', 'merged_DA_05', 'merged_tp_3', \ mitian@4: 'merged_fp_3', 'merged_fn_3', 'merged_P_3', 'merged_R_3', 'merged_F_3', 'merged_AD_3', 'merged_DA_3']) mitian@4: mitian@4: def writeMergedRes(self, filename, ao_name, merged_res_05, merged_res_3): mitian@4: '''Write results by merging individual detections.''' mitian@4: with open(filename, 'a') as f: mitian@4: csvwriter = csv.writer(f, delimiter=',') mitian@4: csvwriter.writerow([ao_name, merged_res_05.TP, merged_res_05.FP, merged_res_05.FN, merged_res_05.P, merged_res_05.R, merged_res_05.F, merged_res_05.AD, merged_res_05.DA, \ mitian@4: merged_res_3.TP, merged_res_3.FP, merged_res_3.FN, merged_res_3.P, merged_res_3.R, merged_res_3.F, merged_res_3.AD, merged_res_3.DA]) mitian@4: mitian@4: mi@0: def process(self): mi@0: '''For the aggregated input features, discard a propertion each time as the pairwise distances within the feature space descending. mi@0: In the meanwhile evaluate the segmentation result and track the trend of perfomance changing by measuring the feature selection mi@0: threshold - segmentation f measure curve. mi@0: ''' mi@0: mi@0: peak_picker = PeakPicker() mi@0: peak_picker.params.alpha = 9.0 # Alpha norm mi@0: peak_picker.params.delta = self.delta_threshold # Adaptive thresholding delta mi@0: peak_picker.params.QuadThresh_a = (100 - self.threshold) / 1000.0 mi@0: peak_picker.params.QuadThresh_b = 0.0 mi@0: peak_picker.params.QuadThresh_c = (100 - self.threshold) / 1500.0 mi@0: peak_picker.params.rawSensitivity = 20 mi@0: peak_picker.params.aCoeffs = self.aCoeffs mi@0: peak_picker.params.bCoeffs = self.bCoeffs mi@0: peak_picker.params.preWin = self.medianWin mi@0: peak_picker.params.postWin = self.medianWin + 1 mi@0: peak_picker.params.LP_on = self.LPfilter_on mi@0: peak_picker.params.Medfilt_on = self.medfilter_on mi@0: peak_picker.params.Polyfit_on = self.polyfitting_on mi@0: peak_picker.params.isMedianPositive = False mi@0: mi@0: # Settings used for feature extraction mi@0: feature_window_frame = int(self.SampleRate / self.gammatoneLen * self.featureWindow) mi@0: feature_step_frame = int(0.5 * self.SampleRate / self.gammatoneLen * self.featureStep) mi@0: aggregation_window, aggregation_step = 100, 50 mi@0: featureRate = float(self.SampleRate) / self.stepSize mi@0: mi@0: audio_files = [x for x in os.listdir(options.GT) if not x.startswith(".") ] mitian@4: if options.TEST: mitian@4: audio_files = audio_files[:1] mi@0: audio_files.sort() mi@0: audio_list = [] mi@0: mi@0: gammatone_feature_list = [i for i in os.listdir(options.GF) if not i.startswith('.')] mitian@4: gammatone_feature_list = ['contrast6', 'rolloff4', 'dct'] mi@0: tempo_feature_list = [i for i in os.listdir(options.TF) if not i.startswith('.')] mitian@4: tempo_feature_list = ['ti', 'tir'] mitian@4: timbre_feature_list = ['mfcc_harmonic'] mitian@4: harmonic_feature_list = ['chromagram'] mi@0: mi@0: gammatone_feature_list = [join(options.GF, f) for f in gammatone_feature_list] mi@0: timbre_feature_list = [join(options.SF, f) for f in timbre_feature_list] mi@0: tempo_feature_list = [join(options.TF, f) for f in tempo_feature_list] mi@0: harmonic_feature_list = [join(options.SF, f) for f in harmonic_feature_list] mi@0: mi@0: fobj_list = [] mi@0: mi@0: # For each audio file, load specific features mi@0: for audio in audio_files: mi@0: ao = AudioObj() mi@0: ao.name = splitext(audio)[0] mitian@5: annotation_file = join(options.GT, ao.name+'.txt') # iso, salami mitian@5: ao.gt = np.genfromtxt(annotation_file, usecols=0) mitian@5: ao.label = np.genfromtxt(annotation_file, usecols=1, dtype=str) mitian@5: # annotation_file = join(options.GT, ao.name+'.csv') # qupujicheng mitian@5: # ao.gt = np.genfromtxt(annotation_file, usecols=0, delimiter=',') mitian@5: # ao.label = np.genfromtxt(annotation_file, usecols=1, delimiter=',', dtype=str) mi@0: mi@0: gammatone_featureset, timbre_featureset, tempo_featureset, harmonic_featureset = [], [], [], [] mi@0: for feature in gammatone_feature_list: mi@0: for f in os.listdir(feature): mi@0: if f[:f.find('_vamp')]==ao.name: mi@0: gammatone_featureset.append(np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[:,1:]) mi@0: break mi@0: if len(gammatone_feature_list) > 1: mi@0: n_frame = np.min([x.shape[0] for x in gammatone_featureset]) mi@0: gammatone_featureset = [x[:n_frame,:] for x in gammatone_featureset] mi@0: ao.gammatone_features = np.hstack((gammatone_featureset)) mi@0: else: mi@0: ao.gammatone_features = gammatone_featureset[0] mi@0: mi@0: for feature in timbre_feature_list: mi@0: for f in os.listdir(feature): mi@0: if f[:f.find('_vamp')]==ao.name: mi@0: timbre_featureset.append(np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[:,1:]) mi@0: break mi@0: if len(timbre_feature_list) > 1: mi@0: n_frame = np.min([x.shape[0] for x in timbre_featureset]) mi@0: timbre_featureset = [x[:n_frame,:] for x in timbre_featureset] mi@0: ao.timbre_features = np.hstack((timbre_featureset)) mi@0: else: mi@0: ao.timbre_features = timbre_featureset[0] mi@0: for feature in tempo_feature_list: mi@0: for f in os.listdir(feature): mi@0: if f[:f.find('_vamp')]==ao.name: mi@0: tempo_featureset.append(np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[1:,1:]) mi@0: ao.tempo_timestamps = np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[1:,0] mi@0: break mi@0: if len(tempo_feature_list) > 1: mi@0: n_frame = np.min([x.shape[0] for x in tempo_featureset]) mi@0: tempo_featureset = [x[:n_frame,:] for x in tempo_featureset] mi@0: ao.tempo_features = np.hstack((tempo_featureset)) mi@0: else: mi@0: ao.tempo_features = tempo_featureset[0] mi@0: for feature in harmonic_feature_list: mi@0: for f in os.listdir(feature): mi@0: if f[:f.find('_vamp')]==ao.name: mi@0: harmonic_featureset.append(np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[:,1:]) mi@0: break mi@0: if len(harmonic_feature_list) > 1: mi@0: n_frame = np.min([x.shape[0] for x in harmonic_featureset]) mi@0: harmonic_featureset = [x[:n_frame,:] for x in harmonic_featureset] mi@0: ao.harmonic_features = np.hstack((harmonic_featureset)) mi@0: else: mi@0: ao.harmonic_features = harmonic_featureset[0] mi@0: mi@0: # Get aggregated features for computing ssm mi@0: aggregation_window, aggregation_step = 1,1 mi@0: featureRate = float(self.SampleRate) /self.stepSize mi@0: pca = PCA(n_components=5) mi@0: mi@0: # Resample and normalise features mitian@4: step = ao.tempo_features.shape[0] mi@0: ao.gammatone_features = resample(ao.gammatone_features, step) mi@0: ao.gammatone_features = normaliseFeature(ao.gammatone_features) mi@0: ao.timbre_features = resample(ao.timbre_features, step) mi@0: ao.timbre_features = normaliseFeature(ao.timbre_features) mi@0: ao.harmonic_features = resample(ao.harmonic_features, step) mi@0: ao.harmonic_features = normaliseFeature(ao.harmonic_features) mitian@4: ao.tempo_features = normaliseFeature(ao.tempo_features) mi@0: mi@0: pca.fit(ao.gammatone_features) mi@0: ao.gammatone_features = pca.transform(ao.gammatone_features) mi@0: ao.gammatone_ssm = getSSM(ao.gammatone_features) mi@0: mi@0: pca.fit(ao.tempo_features) mi@0: ao.tempo_features = pca.transform(ao.tempo_features) mi@0: ao.tempo_ssm = getSSM(ao.tempo_features) mi@0: mi@0: pca.fit(ao.timbre_features) mi@0: ao.timbre_features = pca.transform(ao.timbre_features) mi@0: ao.timbre_ssm = getSSM(ao.timbre_features) mi@0: mi@0: pca.fit(ao.harmonic_features) mi@0: ao.harmonic_features = pca.transform(ao.harmonic_features) mi@0: ao.harmonic_ssm = getSSM(ao.harmonic_features) mi@0: mi@0: ao.ssm_timestamps = np.array(map(lambda x: ao.tempo_timestamps[aggregation_step*x], np.arange(0, ao.gammatone_ssm.shape[0]))) mi@0: mi@0: audio_list.append(ao) mi@0: mitian@3: # Prepare output files. mitian@3: outfile1 = join(options.OUTPUT, 'individual_novelty.csv') mitian@3: outfile2 = join(options.OUTPUT, 'individual_foote.csv') mitian@3: outfile3 = join(options.OUTPUT, 'individual_sf.csv') mitian@3: outfile4 = join(options.OUTPUT, 'individual_cnmf.csv') mitian@3: mitian@3: outfile5 = join(options.OUTPUT, 'combined_novelty.csv') mitian@3: outfile6 = join(options.OUTPUT, 'combined_foote.csv') mitian@3: outfile7 = join(options.OUTPUT, 'combined_sf.csv') mitian@3: outfile8 = join(options.OUTPUT, 'combined_cnmf.csv') mitian@3: mitian@4: outfile9 = join(options.OUTPUT, 'individual_merged.csv') mitian@4: mitian@4: self.writeIndividualHeader(outfile1) mitian@4: self.writeIndividualHeader(outfile2) mitian@4: self.writeIndividualHeader(outfile3) mitian@4: self.writeIndividualHeader(outfile4) mitian@3: mitian@4: # self.writeCombinedHeader(outfile5) mitian@4: # self.writeCombinedHeader(outfile6) mitian@4: self.writeCombinedHeader(outfile7) mitian@4: self.writeCombinedHeader(outfile8) mitian@4: mitian@4: self.writeMergedHeader(outfile9) mitian@3: mi@0: print 'Segmenting using %s method' %options.BOUNDARY mi@0: for i,ao in enumerate(audio_list): mi@0: print 'processing: %s' %ao.name mitian@3: mitian@3: ############################################################################################################################################ mitian@3: # Experiment 1: segmentation using individual features. mitian@3: mitian@3: gammatone_novelty, smoothed_gammatone_novelty, gammatone_novelty_idxs = novelty_S.process(ao.gammatone_ssm, self.kernel_size, peak_picker) mitian@3: timbre_novelty, smoothed_timbre_novelty, timbre_novelty_idxs = novelty_S.process(ao.timbre_ssm, self.kernel_size, peak_picker) mitian@3: tempo_novelty, smoothed_harmonic_novelty, tempo_novelty_idxs = novelty_S.process(ao.tempo_ssm, self.kernel_size, peak_picker) mitian@3: harmonic_novelty, smoothed_tempo_novelty, harmonic_novelty_idxs = novelty_S.process(ao.harmonic_ssm, self.kernel_size, peak_picker) mitian@3: mitian@3: gammatone_cnmf_idxs = cnmf_S.segmentation(ao.gammatone_features, rank=rank, R=R, h=h, niter=300) mitian@3: timbre_cnmf_idxs = cnmf_S.segmentation(ao.timbre_features, rank=rank, R=R, h=h, niter=300) mitian@3: tempo_cnmf_idxs = cnmf_S.segmentation(ao.tempo_features, rank=rank, R=R, h=h, niter=300) mitian@3: harmonic_cnmf_idxs = cnmf_S.segmentation(ao.harmonic_features, rank=rank, R=R, h=h, niter=300) mitian@3: mitian@3: gammatone_foote_idxs = foote_S.segmentation(ao.gammatone_features, M=M, Mg=Mg, L=L) mitian@3: timbre_foote_idxs = foote_S.segmentation(ao.timbre_features, M=M, Mg=Mg, L=L) mitian@3: tempo_foote_idxs = foote_S.segmentation(ao.tempo_features, M=M, Mg=Mg, L=L) mitian@3: harmonic_foote_idxs = foote_S.segmentation(ao.harmonic_features, M=M, Mg=Mg, L=L) mitian@3: mitian@3: gammatone_sf_idxs = sf_S.segmentation(ao.gammatone_features) mitian@3: timbre_sf_idxs = sf_S.segmentation(ao.timbre_features) mitian@3: tempo_sf_idxs = sf_S.segmentation(ao.tempo_features) mitian@3: harmonic_sf_idxs = sf_S.segmentation(ao.harmonic_features) mitian@1: mitian@3: # Evaluate and write results. mitian@3: gt_novelty_05 = self.pairwiseF(ao.gt, gammatone_novelty_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) mitian@3: gt_novelty_3 = self.pairwiseF(ao.gt, gammatone_novelty_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) mitian@3: harmonic_novelty_05 = self.pairwiseF(ao.gt, harmonic_novelty_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) mitian@3: harmonic_novelty_3 = self.pairwiseF(ao.gt, harmonic_novelty_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) mitian@3: tempo_novelty_05 = self.pairwiseF(ao.gt, tempo_novelty_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) mitian@3: tempo_novelty_3 = self.pairwiseF(ao.gt, tempo_novelty_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) mitian@3: timbre_novelty_05 = self.pairwiseF(ao.gt, timbre_novelty_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) mitian@3: timbre_novelty_3 = self.pairwiseF(ao.gt, timbre_novelty_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) mi@0: mitian@3: gt_cnmf_05 = self.pairwiseF(ao.gt, gammatone_cnmf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) mitian@3: gt_cnmf_3 = self.pairwiseF(ao.gt, gammatone_cnmf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) mitian@3: harmonic_cnmf_05 = self.pairwiseF(ao.gt, harmonic_cnmf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) mitian@3: harmonic_cnmf_3 = self.pairwiseF(ao.gt, harmonic_cnmf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) mitian@3: tempo_cnmf_05 = self.pairwiseF(ao.gt, tempo_cnmf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) mitian@3: tempo_cnmf_3 = self.pairwiseF(ao.gt, tempo_cnmf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) mitian@3: timbre_cnmf_05 = self.pairwiseF(ao.gt, timbre_cnmf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) mitian@3: timbre_cnmf_3 = self.pairwiseF(ao.gt, timbre_cnmf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) mitian@3: mitian@3: gt_sf_05 = self.pairwiseF(ao.gt, gammatone_sf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) mitian@3: gt_sf_3 = self.pairwiseF(ao.gt, gammatone_sf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) mitian@3: harmonic_sf_05 = self.pairwiseF(ao.gt, harmonic_sf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) mitian@3: harmonic_sf_3 = self.pairwiseF(ao.gt, harmonic_sf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) mitian@3: tempo_sf_05 = self.pairwiseF(ao.gt, tempo_sf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) mitian@3: tempo_sf_3 = self.pairwiseF(ao.gt, tempo_sf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) mitian@3: timbre_sf_05 = self.pairwiseF(ao.gt, timbre_sf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) mitian@3: timbre_sf_3 = self.pairwiseF(ao.gt, timbre_sf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) mitian@3: mitian@3: gt_foote_05 = self.pairwiseF(ao.gt, gammatone_foote_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) mitian@3: gt_foote_3 = self.pairwiseF(ao.gt, gammatone_foote_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) mitian@3: harmonic_foote_05 = self.pairwiseF(ao.gt, harmonic_foote_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) mitian@3: harmonic_foote_3 = self.pairwiseF(ao.gt, harmonic_foote_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) mitian@3: tempo_foote_05 = self.pairwiseF(ao.gt, tempo_foote_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) mitian@3: tempo_foote_3 = self.pairwiseF(ao.gt, tempo_foote_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) mitian@3: timbre_foote_05 = self.pairwiseF(ao.gt, timbre_foote_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) mitian@3: timbre_foote_3 = self.pairwiseF(ao.gt, timbre_foote_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) mitian@1: mitian@5: self.writeIndividualRes(outfile1, ao.name, gt_novelty_05, gt_novelty_3, harmonic_novelty_05, harmonic_novelty_3, tempo_novelty_05, tempo_novelty_3, timbre_novelty_05, timbre_novelty_3) mitian@5: self.writeIndividualRes(outfile2, ao.name, gt_cnmf_05, gt_cnmf_3, harmonic_cnmf_05, harmonic_cnmf_3, tempo_cnmf_05, tempo_cnmf_3, timbre_cnmf_05, timbre_cnmf_3) mitian@5: self.writeIndividualRes(outfile3, ao.name, gt_sf_05, gt_sf_3, harmonic_sf_05, harmonic_sf_3, tempo_sf_05, tempo_sf_3, timbre_sf_05, timbre_sf_3) mitian@5: self.writeIndividualRes(outfile4, ao.name, gt_foote_05, gt_foote_3, harmonic_foote_05, harmonic_foote_3, tempo_foote_05, tempo_foote_3, timbre_foote_05, timbre_foote_3) mitian@1: mitian@1: mitian@3: ############################################################################################################################################ mitian@3: # Experiment 2: segmentation using combined features. mi@2: mitian@3: # Dumping features. mitian@3: gt_hm = np.hstack([ao.gammatone_features, ao.harmonic_features]) mitian@3: gt_tb = np.hstack([ao.gammatone_features, ao.timbre_features]) mitian@3: gt_tp = np.hstack([ao.gammatone_features, ao.tempo_features]) mitian@3: hm_tb = np.hstack([ao.harmonic_features, ao.timbre_features]) mitian@3: hm_tp = np.hstack([ao.harmonic_features, ao.tempo_features]) mitian@3: tb_tp = np.hstack([ao.timbre_features, ao.tempo_features]) mitian@4: gt_hm_tb = np.hstack([ao.gammatone_features, ao.harmonic_features, ao.timbre_features]) mitian@3: gt_hm_tp = np.hstack([ao.gammatone_features, ao.harmonic_features, ao.tempo_features]) mitian@3: gt_tb_tp = np.hstack([ao.gammatone_features, ao.timbre_features, ao.tempo_features]) mitian@3: hm_tb_tp = np.hstack([ao.harmonic_features, ao.timbre_features, ao.tempo_features]) mitian@3: gt_hm_tb_tp = np.hstack([ao.gammatone_features, ao.harmonic_features, ao.timbre_features, ao.tempo_features]) mitian@3: mitian@4: # Evaluting and writing results. mitian@3: gt_hm_sf_idxs = sf_S.segmentation(gt_hm) mitian@3: gt_tb_sf_idxs = sf_S.segmentation(gt_tb) mitian@3: gt_tp_sf_idxs = sf_S.segmentation(gt_tp) mitian@3: hm_tb_sf_idxs = sf_S.segmentation(hm_tb) mitian@3: hm_tp_sf_idxs = sf_S.segmentation(hm_tp) mitian@3: tb_tp_sf_idxs = sf_S.segmentation(tb_tp) mitian@3: gt_hm_tb_sf_idxs = sf_S.segmentation(gt_hm_tb) mitian@3: gt_hm_tp_sf_idxs = sf_S.segmentation(gt_hm_tp) mitian@3: gt_tb_tp_sf_idxs = sf_S.segmentation(gt_tb_tp) mitian@3: hm_tb_tp_sf_idxs = sf_S.segmentation(hm_tb_tp) mitian@3: gt_hm_tb_tp_sf_idxs = sf_S.segmentation(gt_hm_tb_tp) mitian@3: mitian@4: gt_hm_cnmf_idxs = cnmf_S.segmentation(gt_hm, rank=4, R=R, h=h, niter=300) mitian@4: gt_tb_cnmf_idxs = cnmf_S.segmentation(gt_tb, rank=4, R=R, h=h, niter=300) mitian@4: gt_tp_cnmf_idxs = cnmf_S.segmentation(gt_tp, rank=4, R=R, h=h, niter=300) mitian@4: hm_tb_cnmf_idxs = cnmf_S.segmentation(hm_tb, rank=4, R=R, h=h, niter=300) mitian@4: hm_tp_cnmf_idxs = cnmf_S.segmentation(hm_tp, rank=4, R=R, h=h, niter=300) mitian@4: tb_tp_cnmf_idxs = cnmf_S.segmentation(tb_tp, rank=4, R=R, h=h, niter=300) mitian@4: gt_hm_tb_cnmf_idxs = cnmf_S.segmentation(gt_hm_tb, rank=6, R=R, h=h, niter=300) mitian@4: gt_hm_tp_cnmf_idxs = cnmf_S.segmentation(gt_hm_tp, rank=6, R=R, h=h, niter=300) mitian@4: gt_tb_tp_cnmf_idxs = cnmf_S.segmentation(gt_tb_tp, rank=6, R=R, h=h, niter=300) mitian@4: hm_tb_tp_cnmf_idxs = cnmf_S.segmentation(hm_tb_tp, rank=6, R=R, h=h, niter=300) mitian@4: gt_hm_tb_tp_cnmf_idxs = cnmf_S.segmentation(gt_hm_tb_tp, rank=8, R=R, h=h, niter=300) mitian@3: mitian@4: gt_hm_sf_05 = self.pairwiseF(ao.gt, gt_hm_sf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) mitian@4: gt_tb_sf_05 = self.pairwiseF(ao.gt, gt_tb_sf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) mitian@4: gt_tp_sf_05 = self.pairwiseF(ao.gt, gt_tp_sf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) mitian@4: hm_tb_sf_05 = self.pairwiseF(ao.gt, hm_tb_sf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) mitian@4: hm_tp_sf_05 = self.pairwiseF(ao.gt, hm_tp_sf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) mitian@4: tb_tp_sf_05 = self.pairwiseF(ao.gt, tb_tp_sf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) mitian@4: gt_hm_tb_sf_05 = self.pairwiseF(ao.gt, gt_hm_tb_sf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) mitian@4: gt_hm_tp_sf_05 = self.pairwiseF(ao.gt, gt_hm_tp_sf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) mitian@4: gt_tb_tp_sf_05 = self.pairwiseF(ao.gt, gt_tb_tp_sf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) mitian@4: hm_tb_tp_sf_05 = self.pairwiseF(ao.gt, hm_tb_tp_sf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) mitian@4: gt_hm_tb_tp_sf_05 = self.pairwiseF(ao.gt, gt_hm_tb_tp_sf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) mitian@4: mitian@4: gt_hm_sf_3 = self.pairwiseF(ao.gt, gt_hm_sf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) mitian@4: gt_tb_sf_3 = self.pairwiseF(ao.gt, gt_tb_sf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) mitian@4: gt_tp_sf_3 = self.pairwiseF(ao.gt, gt_tp_sf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) mitian@4: hm_tb_sf_3 = self.pairwiseF(ao.gt, hm_tb_sf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) mitian@4: hm_tp_sf_3 = self.pairwiseF(ao.gt, hm_tp_sf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) mitian@4: tb_tp_sf_3 = self.pairwiseF(ao.gt, tb_tp_sf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) mitian@4: gt_hm_tb_sf_3 = self.pairwiseF(ao.gt, gt_hm_tb_sf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) mitian@4: gt_hm_tp_sf_3 = self.pairwiseF(ao.gt, gt_hm_tp_sf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) mitian@4: gt_tb_tp_sf_3 = self.pairwiseF(ao.gt, gt_tb_tp_sf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) mitian@4: hm_tb_tp_sf_3 = self.pairwiseF(ao.gt, hm_tb_tp_sf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) mitian@4: gt_hm_tb_tp_sf_3 = self.pairwiseF(ao.gt, gt_hm_tb_tp_sf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) mitian@4: mitian@4: gt_hm_cnmf_05 = self.pairwiseF(ao.gt, gt_hm_cnmf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) mitian@4: gt_tb_cnmf_05 = self.pairwiseF(ao.gt, gt_tb_cnmf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) mitian@4: gt_tp_cnmf_05 = self.pairwiseF(ao.gt, gt_tp_cnmf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) mitian@4: hm_tb_cnmf_05 = self.pairwiseF(ao.gt, hm_tb_cnmf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) mitian@4: hm_tp_cnmf_05 = self.pairwiseF(ao.gt, hm_tp_cnmf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) mitian@4: tb_tp_cnmf_05 = self.pairwiseF(ao.gt, tb_tp_cnmf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) mitian@4: gt_hm_tb_cnmf_05 = self.pairwiseF(ao.gt, gt_hm_tb_cnmf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) mitian@4: gt_hm_tp_cnmf_05 = self.pairwiseF(ao.gt, gt_hm_tp_cnmf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) mitian@4: gt_tb_tp_cnmf_05 = self.pairwiseF(ao.gt, gt_tb_tp_cnmf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) mitian@4: hm_tb_tp_cnmf_05 = self.pairwiseF(ao.gt, hm_tb_tp_cnmf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) mitian@4: gt_hm_tb_tp_cnmf_05 = self.pairwiseF(ao.gt, gt_hm_tb_tp_cnmf_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) mitian@4: mitian@4: gt_hm_cnmf_3 = self.pairwiseF(ao.gt, gt_hm_cnmf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) mitian@4: gt_tb_cnmf_3 = self.pairwiseF(ao.gt, gt_tb_cnmf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) mitian@4: gt_tp_cnmf_3 = self.pairwiseF(ao.gt, gt_tp_cnmf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) mitian@4: hm_tb_cnmf_3 = self.pairwiseF(ao.gt, hm_tb_cnmf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) mitian@4: hm_tp_cnmf_3 = self.pairwiseF(ao.gt, hm_tp_cnmf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) mitian@4: tb_tp_cnmf_3 = self.pairwiseF(ao.gt, tb_tp_cnmf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) mitian@4: gt_hm_tb_cnmf_3 = self.pairwiseF(ao.gt, gt_hm_tb_cnmf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) mitian@4: gt_hm_tp_cnmf_3 = self.pairwiseF(ao.gt, gt_hm_tp_cnmf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) mitian@4: gt_tb_tp_cnmf_3 = self.pairwiseF(ao.gt, gt_tb_tp_cnmf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) mitian@4: hm_tb_tp_cnmf_3 = self.pairwiseF(ao.gt, hm_tb_tp_cnmf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) mitian@4: gt_hm_tb_tp_cnmf_3 = self.pairwiseF(ao.gt, gt_hm_tb_tp_cnmf_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) mitian@3: mitian@4: self.writeCombinedRes(outfile7, ao.name, gt_hm_sf_05, gt_hm_sf_3, gt_tb_sf_05, gt_tb_sf_3, gt_tp_sf_05, gt_tp_sf_3, hm_tb_sf_05, hm_tb_sf_3, hm_tp_sf_05, hm_tp_sf_3, tb_tp_sf_05, tb_tp_sf_3,\ mitian@4: gt_hm_tb_sf_05, gt_hm_tb_sf_3, gt_hm_tp_sf_05, gt_hm_tp_sf_3, gt_tb_tp_sf_05, gt_tb_tp_sf_3, hm_tb_tp_sf_05, hm_tb_tp_sf_3, gt_hm_tb_tp_sf_05, gt_hm_tb_tp_sf_3) mitian@4: mitian@4: self.writeCombinedRes(outfile8, ao.name, gt_hm_cnmf_05, gt_hm_cnmf_3, gt_tb_cnmf_05, gt_tb_cnmf_3, gt_tp_cnmf_05, gt_tp_cnmf_3, hm_tb_cnmf_05, hm_tb_cnmf_3, hm_tp_cnmf_05, hm_tp_cnmf_3, tb_tp_cnmf_05, tb_tp_cnmf_3,\ mitian@4: gt_hm_tb_cnmf_05, gt_hm_tb_cnmf_3, gt_hm_tp_cnmf_05, gt_hm_tp_cnmf_3, gt_tb_tp_cnmf_05, gt_tb_tp_cnmf_3, hm_tb_tp_cnmf_05, hm_tb_tp_cnmf_3, gt_hm_tb_tp_cnmf_05, gt_hm_tb_tp_cnmf_3) mitian@3: mitian@3: ############################################################################################################################################ mitian@3: # Experiment 3: Pruning boundaries detected by individual boundary algorithms. mitian@3: mitian@4: # Use different boundary methods for different features mitian@4: gammatone_idxs, harmonic_idxs, timbre_idxs, tempo_idxs = gammatone_sf_idxs, harmonic_sf_idxs, timbre_sf_idxs, tempo_sf_idxs mitian@4: bound_candidates = list(gammatone_idxs) + list(harmonic_idxs) + list(timbre_idxs) + list(tempo_idxs) mitian@4: bound_candidates.sort() mitian@4: mitian@4: nBounds = len(bound_candidates) mitian@4: final_idxs = [] mitian@4: idx = 0 mitian@4: tol = 10 # tolerance window of merging boundary scores mitian@4: while idx < nBounds: mitian@4: temp = [bound_candidates[idx]] mitian@4: pos = [idx] mitian@4: idx += 1 mitian@4: while (idx + tol < nBounds and np.max(bound_candidates[idx: idx+tol]) > 0): mitian@4: temp += [bound_candidates[idx+delta] for delta in xrange(tol) if (bound_candidates[idx]+delta in bound_candidates)] mitian@4: pos += [idx+delta for delta in xrange(tol) if (bound_candidates[idx]+delta in bound_candidates)] mitian@4: idx += tol mitian@4: if len(temp) == 1: mitian@4: final_idxs.append(temp[0]) mitian@4: else: mitian@4: final_idxs.append(int(np.rint(np.mean(temp)))) mitian@4: mitian@4: merged_05 = self.pairwiseF(ao.gt, final_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps) mitian@4: merged_3 = self.pairwiseF(ao.gt, final_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps) mitian@4: mitian@4: self.writeMergedRes(outfile9, ao.name, merged_05, merged_3) mitian@4: mitian@3: # if options.BOUNDARY == 'novelty': mitian@3: # gammatone_novelty, smoothed_gammatone_novelty, gammatone_bound_idxs = novelty_S.process(ao.gammatone_ssm, self.kernel_size, peak_picker) mitian@3: # timbre_novelty, smoothed_timbre_novelty, timbre_bound_idxs = novelty_S.process(ao.timbre_ssm, self.kernel_size, peak_picker) mitian@3: # tempo_novelty, smoothed_harmonic_novelty, tempo_bound_idxs = novelty_S.process(ao.tempo_ssm, self.kernel_size, peak_picker) mitian@3: # harmonic_novelty, smoothed_tempo_novelty, harmonic_bound_idxs = novelty_S.process(ao.harmonic_ssm, self.kernel_size, peak_picker) mitian@3: # mitian@3: # if options.BOUNDARY == 'cnmf': mitian@3: # gammatone_cnmf_idxs = cnmf_S.segmentation(ao.gammatone_features, rank=rank, R=R, h=8, niter=300) mitian@3: # timbre_cnmf_idxs = cnmf_S.segmentation(ao.timbre_features, rank=rank, R=R, h=h, niter=300) mitian@3: # tempo_cnmf_idxs = cnmf_S.segmentation(ao.tempo_features, rank=rank, R=R, h=h, niter=300) mitian@3: # harmonic_cnmf_idxs = cnmf_S.segmentation(ao.harmonic_features, rank=rank, R=R, h=h, niter=300) mitian@3: # mitian@3: # if options.BOUNDARY == 'foote': mitian@3: # gammatone_foote_idxs = foote_S.segmentation(ao.gammatone_features, M=M, Mg=Mg, L=L) mitian@3: # timbre_foote_idxs = foote_S.segmentation(ao.timbre_features, M=M, Mg=Mg, L=L) mitian@3: # tempo_foote_idxs = foote_S.segmentation(ao.tempo_features, M=M, Mg=Mg, L=L) mitian@3: # harmonic_foote_idxs = foote_S.segmentation(ao.harmonic_features, M=M, Mg=Mg, L=L) mitian@3: # mitian@3: # if options.BOUNDARY == 'sf': mitian@3: # gammatone_sf_idxs = sf_S.segmentation(ao.gammatone_features) mitian@3: # timbre_sf_idxs = sf_S.segmentation(ao.timbre_features) mitian@3: # tempo_sf_idxs = sf_S.segmentation(ao.tempo_features) mitian@3: # harmonic_sf_idxs = sf_S.segmentation(ao.harmonic_features) mitian@3: # mitian@3: # gammatone_novelty_detection = [0.0] + [ao.ssm_timestamps[int(np.rint(i))] for i in gammatone_novelty_peaks] mitian@3: # timbre_novelty_detection = [0.0] + [ao.ssm_timestamps[int(np.rint(i))] for i in timbre_novelty_peaks] mitian@3: # harmonic_novelty_detection = [0.0] + [ao.ssm_timestamps[int(np.rint(i))] for i in harmonic_novelty_peaks] mitian@3: # tempo_novelty_detection = [0.0] + [ao.ssm_timestamps[int(np.rint(i))] for i in tempo_novelty_peaks] mitian@3: # mitian@3: # gammatone_cnmf_detection = [0.0] + [ao.ssm_timestamps[int(np.rint(i))] for i in gammatone_cnmf_peaks] mitian@3: # timbre_cnmf_detection = [0.0] + [ao.ssm_timestamps[int(np.rint(i))] for i in timbre_cnmf_peaks] mitian@3: # harmonic_cnmf_detection = [0.0] + [ao.ssm_timestamps[int(np.rint(i))] for i in harmonic_cnmf_peaks] mitian@3: # tempo_cnmf_detection = [0.0] + [ao.ssm_timestamps[int(np.rint(i))] for i in tempo_cnmf_peaks] mitian@3: # mitian@3: # gammatone_sf_detection = [0.0] + [ao.ssm_timestamps[int(np.rint(i))] for i in gammatone_sf_peaks] mitian@3: # timbre_sf_detection = [0.0] + [ao.ssm_timestamps[int(np.rint(i))] for i in timbre_sf_peaks] mitian@3: # harmonic_sf_detection = [0.0] + [ao.ssm_timestamps[int(np.rint(i))] for i in harmonic_sf_peaks] mitian@3: # tempo_sf_detection = [0.0] + [ao.ssm_timestamps[int(np.rint(i))] for i in tempo_sf_peaks] mitian@3: # mitian@3: # gammatone_foote_detection = [0.0] + [ao.ssm_timestamps[int(np.rint(i))] for i in gammatone_foote_peaks] mitian@3: # timbre_foote_detection = [0.0] + [ao.ssm_timestamps[int(np.rint(i))] for i in timbre_foote_peaks] mitian@3: # harmonic_foote_detection = [0.0] + [ao.ssm_timestamps[int(np.rint(i))] for i in harmonic_foote_peaks] mitian@3: # tempo_foote_detection = [0.0] + [ao.ssm_timestamps[int(np.rint(i))] for i in tempo_foote_peaks] mitian@3: # mitian@3: # # Experiment 2: Trying combined features using the best boundary retrieval method mitian@3: # ao_featureset = [ao.gammatone_features, ao.harmonic_features, ao.timbre_features, ao.tempo_features] mitian@3: # feature_sel = [int(x) for x in options.FEATURES if x.isdigit()] mitian@3: # fused_featureset = [ao_featureset[i] for i in feature_sel] mitian@3: mitian@3: # if options.LABEL == 'fmc2d': mitian@3: # gammatone_fmc2d_labels = fmc2d_S.compute_similarity(gammatone_bound_idxs, xmeans=True, N=N) mitian@3: # timbre_fmc2d_labels = fmc2d_S.compute_similarity(timbre_bound_idxs, xmeans=True, N=N) mitian@3: # tempo_fmc2d_labels = fmc2d_S.compute_similarity(tempo_bound_idxs, xmeans=True, N=N) mitian@3: # harmonic_fmc2d_labels = fmc2d_S.compute_similarity(harmonic_bound_idxs, xmeans=True, N=N) mitian@3: # mitian@3: # if options.LABEL == 'cnmf': mitian@3: # gammatone_cnmf_labels = cnmf_S.compute_labels(gammatone_bound_idxs, est_bound_idxs, nFrames) mitian@3: # timbre_cnmf_labels = cnmf_S.compute_labels(timbre_bound_idxs, est_bound_idxs, nFrames) mitian@3: # tempo_cnmf_labels = cnmf_S.compute_labels(tempo_bound_idxs, est_bound_idxs, nFrames) mitian@3: # harmonic_cnmf_labels = cnmf_S.compute_labels(harmonic_bound_idxs, est_bound_idxs, nFrames) mitian@3: # mitian@3: # mitian@3: mi@0: mi@0: mi@0: def main(): mi@0: mi@0: segmenter = SSMseg() mi@0: segmenter.process() mi@0: mi@0: mi@0: if __name__ == '__main__': mi@0: main() mi@0: