Mercurial > hg > segmentation
view SegEval.py @ 0:26838b1f560f
initial commit of a segmenter project
author | mi tian |
---|---|
date | Thu, 02 Apr 2015 18:09:27 +0100 |
parents | |
children | c11ea9e0357f |
line wrap: on
line source
#!/usr/bin/env python # encoding: utf-8 """ SegEval.py The main segmentation program. Created by mi tian on 2015-04-02. Copyright (c) 2015 __MyCompanyName__. All rights reserved. """ # Load starndard python libs import sys, os, optparse, csv from itertools import combinations from os.path import join, isdir, isfile, abspath, dirname, basename, split, splitext from copy import copy import matplotlib # matplotlib.use('Agg') import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import numpy as np import scipy as sp from scipy.signal import correlate2d, convolve2d, filtfilt, resample from scipy.ndimage.filters import * from sklearn.decomposition import PCA from sklearn.mixture import GMM from sklearn.cluster import KMeans from sklearn.preprocessing import normalize from sklearn.metrics.pairwise import pairwise_distances # Load dependencies from utils.SegUtil import getMean, getStd, getDelta, getSSM, reduceSSM, upSample, normaliseFeature from utils.PeakPickerUtil import PeakPicker from utils.gmmdist import * from utils.GmmMetrics import GmmDistance from utils.RankClustering import rClustering from utils.kmeans import Kmeans from utils.PathTracker import PathTracker # Load bourdary retrieval utilities import cnmf as cnmf_S import foote as foote_S import sf as sf_S import fmc2d as fmc2d_S # Define arg parser def parse_args(): op = optparse.OptionParser() # IO options op.add_option('-g', '--gammatonegram-features', action="store", dest="GF", default='/Volumes/c4dm-03/people/mit/features/gammatonegram/qupujicheng/2048', type="str", help="Loading gammatone features from.." ) op.add_option('-s', '--spectrogram-features', action="store", dest="SF", default='/Volumes/c4dm-03/people/mit/features/spectrogram/qupujicheng/2048', type="str", help="Loading spectral features from.." ) op.add_option('-t', '--tempogram-features', action="store", dest="TF", default='/Volumes/c4dm-03/people/mit/features/tempogram/qupujicheng/tempo_features_6s', type="str", help="Loading tempogram features from.." ) op.add_option('-f', '--featureset', action="store", dest="FEATURES", default='[0, 1, 2, 3]', type="str", help="Choose feature subsets (input a list of integers) used for segmentation -- gammtone, chroma, timbre, tempo -- 0, 1, 2, 3." ) op.add_option('-a', '--annotations', action="store", dest="GT", default='/Volumes/c4dm-03/people/mit/annotation/qupujicheng/lowercase', type="str", help="Loading annotation files from.. ") op.add_option('-o', '--ouput', action="store", dest="OUTPUT", default='/Volumes/c4dm-03/people/mit/segmentation/gammatone/qupujicheng', type="str", help="Write segmentation results to ") # boundary retrieval options op.add_option('-b', '--bounrary-method', action="store", dest="BOUNDARY", default=['novelty', 'cnmf', 'sf', 'fmc2d'], help="Choose boundary retrieval algorithm ('novelty', 'cnmf', 'sf', 'fmc2d')." ) # Plot/print/mode options op.add_option('-p', '--plot', action="store_true", dest="PLOT", default=False, help="Save plots") op.add_option('-e', '--test-mode', action="store_true", dest="TEST", default=False, help="Test mode") op.add_option('-v', '--verbose-mode', action="store_true", dest="VERBOSE", default=False, help="Print results in verbose mode.") return op.parse_args() options, args = parse_args() class FeatureObj() : __slots__ = ['key', 'audio', 'timestamps', 'gammatone_features', 'tempo_features', 'timbre_features', 'harmonic_features', 'gammatone_ssm', 'tempo_ssm', 'timbre_features', 'harmonic_ssm', 'ssm_timestamps'] class AudioObj(): __slots__ = ['name', 'feature_list', 'gt', 'label', 'gammatone_features', 'tempo_features', 'timbre_features', 'harmonic_features', 'combined_features',\ 'gammatone_ssm', 'tempo_ssm', 'timbre_ssm', 'harmonic_ssm', 'combined_ssm', 'ssm', 'ssm_timestamps', 'tempo_timestamps'] class EvalObj(): __slots__ = ['TP', 'FP', 'FN', 'P', 'R', 'F', 'AD', 'DA'] class SSMseg(object): '''The main segmentation object''' def __init__(self): self.SampleRate = 44100 self.NqHz = self.SampleRate/2 self.timestamp = [] self.previousSample = 0.0 self.featureWindow = 6.0 self.featureStep = 3.0 self.kernel_size = 64 # Adjust this param according to the feature resolution.pq self.blockSize = 2048 self.stepSize = 1024 '''NOTE: Match the following params with those used for feature extraction!''' '''NOTE: Unlike spectrogram ones, Gammatone features are extracted without taking an FFT. The windowing is done under the purpose of chunking the audio to facilitate the gammatone filtering with the specified blockSize and stepSize. The resulting gammatonegram is aggregated every gammatoneLen without overlap.''' self.gammatoneLen = 2048 self.gammatoneBandGroups = [0, 2, 6, 10, 13, 17, 20] self.nGammatoneBands = 20 self.lowFreq = 100 self.highFreq = self.SampleRate / 4 '''Settings for extracting tempogram features.''' self.tempoWindow = 6.0 self.bpmBands = [30, 45, 60, 80, 100, 120, 180, 240, 400, 600] '''Peak picking settings''' self.threshold = 50 self.confidence_threshold = 0.5 self.delta_threshold = 0.0 self.backtracking_threshold = 1.9 self.polyfitting_on = True self.medfilter_on = True self.LPfilter_on = True self.whitening_on = False self.aCoeffs = [1.0000, -0.5949, 0.2348] self.bCoeffs = [0.1600, 0.3200, 0.1600] self.cutoff = 0.34 self.medianWin = 7 def pairwiseF(self, annotation, detection, tolerance=3.0, combine=1.0): '''Pairwise F measure evaluation of detection rates.''' # print 'detection', detection detection = np.append(detection, annotation[-1]) res = EvalObj() res.TP = 0 # Total number of matched ground truth and experimental data points gt = len(annotation) # Total number of ground truth data points dt = len(detection) # Total number of experimental data points foundIdx = [] D_AD = np.zeros(gt) D_DA = np.zeros(dt) for dtIdx in xrange(dt): D_DA[dtIdx] = np.min(abs(detection[dtIdx] - annotation)) for gtIdx in xrange(gt): D_AD[gtIdx] = np.min(abs(annotation[gtIdx] - detection)) for dtIdx in xrange(dt): if (annotation[gtIdx] >= detection[dtIdx] - tolerance/2.0) and (annotation[gtIdx] <= detection[dtIdx] + tolerance/2.0): res.TP = res.TP + 1.0 foundIdx.append(gtIdx) foundIdx = list(set(foundIdx)) res.TP = len(foundIdx) res.FP = max(0, dt - res.TP) res.FN = max(0, gt - res.TP) res.AD = np.mean(D_AD) res.DA = np.mean(D_DA) res.P, res.R, res.F = 0.0, 0.0, 0.0 if res.TP == 0: return res res.P = res.TP / float(dt) res.R = res.TP / float(gt) res.F = 2 * res.P * res.R / (res.P + res.R) return res def process(self): '''For the aggregated input features, discard a propertion each time as the pairwise distances within the feature space descending. In the meanwhile evaluate the segmentation result and track the trend of perfomance changing by measuring the feature selection threshold - segmentation f measure curve. ''' peak_picker = PeakPicker() peak_picker.params.alpha = 9.0 # Alpha norm peak_picker.params.delta = self.delta_threshold # Adaptive thresholding delta peak_picker.params.QuadThresh_a = (100 - self.threshold) / 1000.0 peak_picker.params.QuadThresh_b = 0.0 peak_picker.params.QuadThresh_c = (100 - self.threshold) / 1500.0 peak_picker.params.rawSensitivity = 20 peak_picker.params.aCoeffs = self.aCoeffs peak_picker.params.bCoeffs = self.bCoeffs peak_picker.params.preWin = self.medianWin peak_picker.params.postWin = self.medianWin + 1 peak_picker.params.LP_on = self.LPfilter_on peak_picker.params.Medfilt_on = self.medfilter_on peak_picker.params.Polyfit_on = self.polyfitting_on peak_picker.params.isMedianPositive = False # Settings used for feature extraction feature_window_frame = int(self.SampleRate / self.gammatoneLen * self.featureWindow) feature_step_frame = int(0.5 * self.SampleRate / self.gammatoneLen * self.featureStep) aggregation_window, aggregation_step = 100, 50 featureRate = float(self.SampleRate) / self.stepSize audio_files = [x for x in os.listdir(options.GT) if not x.startswith(".") ] # audio_files = audio_files[:2] audio_files.sort() audio_list = [] gammatone_feature_list = [i for i in os.listdir(options.GF) if not i.startswith('.')] gammatone_feature_list = ['contrast4', 'rolloff', 'dct'] tempo_feature_list = [i for i in os.listdir(options.TF) if not i.startswith('.')] tempo_feature_list = ['intensity_bpm', 'loudness_bpm'] timbre_feature_list = ['mfcc'] harmonic_feature_list = ['nnls'] gammatone_feature_list = [join(options.GF, f) for f in gammatone_feature_list] timbre_feature_list = [join(options.SF, f) for f in timbre_feature_list] tempo_feature_list = [join(options.TF, f) for f in tempo_feature_list] harmonic_feature_list = [join(options.SF, f) for f in harmonic_feature_list] fobj_list = [] # For each audio file, load specific features for audio in audio_files: ao = AudioObj() ao.name = splitext(audio)[0] print ao.name # annotation_file = join(options.GT, ao.name+'.txt') # iso, salami # ao.gt = np.genfromtxt(annotation_file, usecols=0) # ao.label = np.genfromtxt(annotation_file, usecols=1, dtype=str) annotation_file = join(options.GT, ao.name+'.csv') # qupujicheng ao.gt = np.genfromtxt(annotation_file, usecols=0, delimiter=',') ao.label = np.genfromtxt(annotation_file, usecols=1, delimiter=',', dtype=str) gammatone_featureset, timbre_featureset, tempo_featureset, harmonic_featureset = [], [], [], [] for feature in gammatone_feature_list: for f in os.listdir(feature): if f[:f.find('_vamp')]==ao.name: gammatone_featureset.append(np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[:,1:]) break if len(gammatone_feature_list) > 1: n_frame = np.min([x.shape[0] for x in gammatone_featureset]) gammatone_featureset = [x[:n_frame,:] for x in gammatone_featureset] ao.gammatone_features = np.hstack((gammatone_featureset)) else: ao.gammatone_features = gammatone_featureset[0] for feature in timbre_feature_list: for f in os.listdir(feature): if f[:f.find('_vamp')]==ao.name: timbre_featureset.append(np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[:,1:]) break if len(timbre_feature_list) > 1: n_frame = np.min([x.shape[0] for x in timbre_featureset]) timbre_featureset = [x[:n_frame,:] for x in timbre_featureset] ao.timbre_features = np.hstack((timbre_featureset)) else: ao.timbre_features = timbre_featureset[0] for feature in tempo_feature_list: for f in os.listdir(feature): if f[:f.find('_vamp')]==ao.name: tempo_featureset.append(np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[1:,1:]) ao.tempo_timestamps = np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[1:,0] break if len(tempo_feature_list) > 1: n_frame = np.min([x.shape[0] for x in tempo_featureset]) tempo_featureset = [x[:n_frame,:] for x in tempo_featureset] ao.tempo_features = np.hstack((tempo_featureset)) else: ao.tempo_features = tempo_featureset[0] for feature in harmonic_feature_list: for f in os.listdir(feature): if f[:f.find('_vamp')]==ao.name: harmonic_featureset.append(np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[:,1:]) break if len(harmonic_feature_list) > 1: n_frame = np.min([x.shape[0] for x in harmonic_featureset]) harmonic_featureset = [x[:n_frame,:] for x in harmonic_featureset] ao.harmonic_features = np.hstack((harmonic_featureset)) else: ao.harmonic_features = harmonic_featureset[0] # Get aggregated features for computing ssm aggregation_window, aggregation_step = 1,1 featureRate = float(self.SampleRate) /self.stepSize pca = PCA(n_components=5) # Resample and normalise features ao.gammatone_features = resample(ao.gammatone_features, step) ao.gammatone_features = normaliseFeature(ao.gammatone_features) ao.timbre_features = resample(ao.timbre_features, step) ao.timbre_features = normaliseFeature(ao.timbre_features) ao.harmonic_features = resample(ao.harmonic_features, step) ao.harmonic_features = normaliseFeature(ao.harmonic_features) ao.tempo_features = normaliseFeature(ao.harmonic_features) pca.fit(ao.gammatone_features) ao.gammatone_features = pca.transform(ao.gammatone_features) ao.gammatone_ssm = getSSM(ao.gammatone_features) pca.fit(ao.tempo_features) ao.tempo_features = pca.transform(ao.tempo_features) ao.tempo_ssm = getSSM(ao.tempo_features) pca.fit(ao.timbre_features) ao.timbre_features = pca.transform(ao.timbre_features) ao.timbre_ssm = getSSM(ao.timbre_features) pca.fit(ao.harmonic_features) ao.harmonic_features = pca.transform(ao.harmonic_features) ao.harmonic_ssm = getSSM(ao.harmonic_features) ao.ssm_timestamps = np.array(map(lambda x: ao.tempo_timestamps[aggregation_step*x], np.arange(0, ao.gammatone_ssm.shape[0]))) audio_list.append(ao) # Segment input audio using specified boundary retrieval method. print 'Segmenting using %s method' %options.BOUNDARY for i,ao in enumerate(audio_list): print 'processing: %s' %ao.name ao_featureset = [ao.gammatone_features, ao.harmonic_features, ao.timbre_features, ao.tempo_features] feature_sel = [int(x) for x in options.FEATURES if x.isdigit()] ao_featureset = [ao_featureset[i] for i in feature_sel] gammatone_novelty, smoothed_gammatone_novelty, gammatone_novelty_peaks = getNoveltyPeaks(ao.gammatone_ssm, self.kernel_size, peak_picker) timbre_novelty, smoothed_timbre_novelty, timbre_novelty_peaks = getNoveltyPeaks(ao.timbre_ssm, self.kernel_size, peak_picker) tempo_novelty, smoothed_harmonic_novelty, harmonic_novelty_peaks = getNoveltyPeaks(ao.tempo_ssm, self.kernel_size, peak_picker) harmonic_novelty, smoothed_tempo_novelty, tempo_novelty_peaks = getNoveltyPeaks(ao.harmonic_ssm, self.kernel_size, peak_picker) # Peak picking from the novelty curve smoothed_gammatone_novelty, gammatone_novelty_peaks = peak_picker.process(gammatone_novelty) gammatone_detection = [ao.ssm_timestamps[int(np.rint(i))] for i in gammatone_novelty_peaks] smoothed_timbre_novelty, timbre_novelty_peaks = peak_picker.process(timbre_novelty) timbre_detection = [ao.ssm_timestamps[int(np.rint(i))] for i in timbre_novelty_peaks] smoothed_harmonic_novelty, harmonic_novelty_peaks = peak_picker.process(harmonic_novelty) harmonic_detection = [ao.ssm_timestamps[int(np.rint(i))] for i in harmonic_novelty_peaks] smoothed_tempo_novelty, tempo_novelty_peaks = peak_picker.process(tempo_novelty) tempo_detection = [ao.ssm_timestamps[int(np.rint(i))] for i in tempo_novelty_peaks] if (len(gammatone_novelty_peaks) == 0 or len(harmonic_novelty_peaks)== 0 or len(timbre_novelty_peaks) == 0 or len(tempo_novelty_peaks) == 0): print ao.name, len(gammatone_novelty_peaks), len(harmonic_novelty_peaks), len(timbre_novelty_peaks), len(tempo_novelty_peaks) smoothed_gammatone_novelty -= np.min(smoothed_gammatone_novelty) smoothed_harmonic_novelty -= np.min(smoothed_harmonic_novelty) smoothed_timbre_novelty -= np.min(smoothed_timbre_novelty) smoothed_tempo_novelty -= np.min(smoothed_tempo_novelty) combined_sdf = (np.array(smoothed_gammatone_novelty) + np.array(smoothed_harmonic_novelty) + np.array(smoothed_timbre_novelty) + np.array(smoothed_tempo_novelty)) def main(): segmenter = SSMseg() segmenter.process() if __name__ == '__main__': main()