Mercurial > hg > segmentation
comparison SegEval.py @ 0:26838b1f560f
initial commit of a segmenter project
author | mi tian |
---|---|
date | Thu, 02 Apr 2015 18:09:27 +0100 |
parents | |
children | c11ea9e0357f |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:26838b1f560f |
---|---|
1 #!/usr/bin/env python | |
2 # encoding: utf-8 | |
3 """ | |
4 SegEval.py | |
5 | |
6 The main segmentation program. | |
7 | |
8 Created by mi tian on 2015-04-02. | |
9 Copyright (c) 2015 __MyCompanyName__. All rights reserved. | |
10 """ | |
11 | |
12 # Load starndard python libs | |
13 import sys, os, optparse, csv | |
14 from itertools import combinations | |
15 from os.path import join, isdir, isfile, abspath, dirname, basename, split, splitext | |
16 from copy import copy | |
17 | |
18 import matplotlib | |
19 # matplotlib.use('Agg') | |
20 import matplotlib.pyplot as plt | |
21 import matplotlib.gridspec as gridspec | |
22 import numpy as np | |
23 import scipy as sp | |
24 from scipy.signal import correlate2d, convolve2d, filtfilt, resample | |
25 from scipy.ndimage.filters import * | |
26 from sklearn.decomposition import PCA | |
27 from sklearn.mixture import GMM | |
28 from sklearn.cluster import KMeans | |
29 from sklearn.preprocessing import normalize | |
30 from sklearn.metrics.pairwise import pairwise_distances | |
31 | |
32 # Load dependencies | |
33 from utils.SegUtil import getMean, getStd, getDelta, getSSM, reduceSSM, upSample, normaliseFeature | |
34 from utils.PeakPickerUtil import PeakPicker | |
35 from utils.gmmdist import * | |
36 from utils.GmmMetrics import GmmDistance | |
37 from utils.RankClustering import rClustering | |
38 from utils.kmeans import Kmeans | |
39 from utils.PathTracker import PathTracker | |
40 | |
41 # Load bourdary retrieval utilities | |
42 import cnmf as cnmf_S | |
43 import foote as foote_S | |
44 import sf as sf_S | |
45 import fmc2d as fmc2d_S | |
46 | |
47 # Define arg parser | |
48 def parse_args(): | |
49 op = optparse.OptionParser() | |
50 # IO options | |
51 op.add_option('-g', '--gammatonegram-features', action="store", dest="GF", default='/Volumes/c4dm-03/people/mit/features/gammatonegram/qupujicheng/2048', type="str", help="Loading gammatone features from.." ) | |
52 op.add_option('-s', '--spectrogram-features', action="store", dest="SF", default='/Volumes/c4dm-03/people/mit/features/spectrogram/qupujicheng/2048', type="str", help="Loading spectral features from.." ) | |
53 op.add_option('-t', '--tempogram-features', action="store", dest="TF", default='/Volumes/c4dm-03/people/mit/features/tempogram/qupujicheng/tempo_features_6s', type="str", help="Loading tempogram features from.." ) | |
54 op.add_option('-f', '--featureset', action="store", dest="FEATURES", default='[0, 1, 2, 3]', type="str", help="Choose feature subsets (input a list of integers) used for segmentation -- gammtone, chroma, timbre, tempo -- 0, 1, 2, 3." ) | |
55 op.add_option('-a', '--annotations', action="store", dest="GT", default='/Volumes/c4dm-03/people/mit/annotation/qupujicheng/lowercase', type="str", help="Loading annotation files from.. ") | |
56 op.add_option('-o', '--ouput', action="store", dest="OUTPUT", default='/Volumes/c4dm-03/people/mit/segmentation/gammatone/qupujicheng', type="str", help="Write segmentation results to ") | |
57 | |
58 # boundary retrieval options | |
59 op.add_option('-b', '--bounrary-method', action="store", dest="BOUNDARY", default=['novelty', 'cnmf', 'sf', 'fmc2d'], help="Choose boundary retrieval algorithm ('novelty', 'cnmf', 'sf', 'fmc2d')." ) | |
60 | |
61 # Plot/print/mode options | |
62 op.add_option('-p', '--plot', action="store_true", dest="PLOT", default=False, help="Save plots") | |
63 op.add_option('-e', '--test-mode', action="store_true", dest="TEST", default=False, help="Test mode") | |
64 op.add_option('-v', '--verbose-mode', action="store_true", dest="VERBOSE", default=False, help="Print results in verbose mode.") | |
65 | |
66 return op.parse_args() | |
67 options, args = parse_args() | |
68 | |
69 class FeatureObj() : | |
70 __slots__ = ['key', 'audio', 'timestamps', 'gammatone_features', 'tempo_features', 'timbre_features', 'harmonic_features', 'gammatone_ssm', 'tempo_ssm', 'timbre_features', 'harmonic_ssm', 'ssm_timestamps'] | |
71 | |
72 class AudioObj(): | |
73 __slots__ = ['name', 'feature_list', 'gt', 'label', 'gammatone_features', 'tempo_features', 'timbre_features', 'harmonic_features', 'combined_features',\ | |
74 'gammatone_ssm', 'tempo_ssm', 'timbre_ssm', 'harmonic_ssm', 'combined_ssm', 'ssm', 'ssm_timestamps', 'tempo_timestamps'] | |
75 | |
76 class EvalObj(): | |
77 __slots__ = ['TP', 'FP', 'FN', 'P', 'R', 'F', 'AD', 'DA'] | |
78 | |
79 | |
80 class SSMseg(object): | |
81 '''The main segmentation object''' | |
82 def __init__(self): | |
83 self.SampleRate = 44100 | |
84 self.NqHz = self.SampleRate/2 | |
85 self.timestamp = [] | |
86 self.previousSample = 0.0 | |
87 self.featureWindow = 6.0 | |
88 self.featureStep = 3.0 | |
89 self.kernel_size = 64 # Adjust this param according to the feature resolution.pq | |
90 self.blockSize = 2048 | |
91 self.stepSize = 1024 | |
92 | |
93 '''NOTE: Match the following params with those used for feature extraction!''' | |
94 | |
95 '''NOTE: Unlike spectrogram ones, Gammatone features are extracted without taking an FFT. The windowing is done under the purpose of chunking | |
96 the audio to facilitate the gammatone filtering with the specified blockSize and stepSize. The resulting gammatonegram is aggregated every | |
97 gammatoneLen without overlap.''' | |
98 self.gammatoneLen = 2048 | |
99 self.gammatoneBandGroups = [0, 2, 6, 10, 13, 17, 20] | |
100 self.nGammatoneBands = 20 | |
101 self.lowFreq = 100 | |
102 self.highFreq = self.SampleRate / 4 | |
103 | |
104 '''Settings for extracting tempogram features.''' | |
105 self.tempoWindow = 6.0 | |
106 self.bpmBands = [30, 45, 60, 80, 100, 120, 180, 240, 400, 600] | |
107 | |
108 '''Peak picking settings''' | |
109 self.threshold = 50 | |
110 self.confidence_threshold = 0.5 | |
111 self.delta_threshold = 0.0 | |
112 self.backtracking_threshold = 1.9 | |
113 self.polyfitting_on = True | |
114 self.medfilter_on = True | |
115 self.LPfilter_on = True | |
116 self.whitening_on = False | |
117 self.aCoeffs = [1.0000, -0.5949, 0.2348] | |
118 self.bCoeffs = [0.1600, 0.3200, 0.1600] | |
119 self.cutoff = 0.34 | |
120 self.medianWin = 7 | |
121 | |
122 | |
123 def pairwiseF(self, annotation, detection, tolerance=3.0, combine=1.0): | |
124 '''Pairwise F measure evaluation of detection rates.''' | |
125 | |
126 # print 'detection', detection | |
127 detection = np.append(detection, annotation[-1]) | |
128 res = EvalObj() | |
129 res.TP = 0 # Total number of matched ground truth and experimental data points | |
130 gt = len(annotation) # Total number of ground truth data points | |
131 dt = len(detection) # Total number of experimental data points | |
132 foundIdx = [] | |
133 D_AD = np.zeros(gt) | |
134 D_DA = np.zeros(dt) | |
135 | |
136 for dtIdx in xrange(dt): | |
137 D_DA[dtIdx] = np.min(abs(detection[dtIdx] - annotation)) | |
138 for gtIdx in xrange(gt): | |
139 D_AD[gtIdx] = np.min(abs(annotation[gtIdx] - detection)) | |
140 for dtIdx in xrange(dt): | |
141 if (annotation[gtIdx] >= detection[dtIdx] - tolerance/2.0) and (annotation[gtIdx] <= detection[dtIdx] + tolerance/2.0): | |
142 res.TP = res.TP + 1.0 | |
143 foundIdx.append(gtIdx) | |
144 foundIdx = list(set(foundIdx)) | |
145 res.TP = len(foundIdx) | |
146 res.FP = max(0, dt - res.TP) | |
147 res.FN = max(0, gt - res.TP) | |
148 | |
149 res.AD = np.mean(D_AD) | |
150 res.DA = np.mean(D_DA) | |
151 | |
152 res.P, res.R, res.F = 0.0, 0.0, 0.0 | |
153 | |
154 if res.TP == 0: | |
155 return res | |
156 | |
157 res.P = res.TP / float(dt) | |
158 res.R = res.TP / float(gt) | |
159 res.F = 2 * res.P * res.R / (res.P + res.R) | |
160 return res | |
161 | |
162 | |
163 def process(self): | |
164 '''For the aggregated input features, discard a propertion each time as the pairwise distances within the feature space descending. | |
165 In the meanwhile evaluate the segmentation result and track the trend of perfomance changing by measuring the feature selection | |
166 threshold - segmentation f measure curve. | |
167 ''' | |
168 | |
169 peak_picker = PeakPicker() | |
170 peak_picker.params.alpha = 9.0 # Alpha norm | |
171 peak_picker.params.delta = self.delta_threshold # Adaptive thresholding delta | |
172 peak_picker.params.QuadThresh_a = (100 - self.threshold) / 1000.0 | |
173 peak_picker.params.QuadThresh_b = 0.0 | |
174 peak_picker.params.QuadThresh_c = (100 - self.threshold) / 1500.0 | |
175 peak_picker.params.rawSensitivity = 20 | |
176 peak_picker.params.aCoeffs = self.aCoeffs | |
177 peak_picker.params.bCoeffs = self.bCoeffs | |
178 peak_picker.params.preWin = self.medianWin | |
179 peak_picker.params.postWin = self.medianWin + 1 | |
180 peak_picker.params.LP_on = self.LPfilter_on | |
181 peak_picker.params.Medfilt_on = self.medfilter_on | |
182 peak_picker.params.Polyfit_on = self.polyfitting_on | |
183 peak_picker.params.isMedianPositive = False | |
184 | |
185 # Settings used for feature extraction | |
186 feature_window_frame = int(self.SampleRate / self.gammatoneLen * self.featureWindow) | |
187 feature_step_frame = int(0.5 * self.SampleRate / self.gammatoneLen * self.featureStep) | |
188 aggregation_window, aggregation_step = 100, 50 | |
189 featureRate = float(self.SampleRate) / self.stepSize | |
190 | |
191 audio_files = [x for x in os.listdir(options.GT) if not x.startswith(".") ] | |
192 # audio_files = audio_files[:2] | |
193 audio_files.sort() | |
194 audio_list = [] | |
195 | |
196 gammatone_feature_list = [i for i in os.listdir(options.GF) if not i.startswith('.')] | |
197 gammatone_feature_list = ['contrast4', 'rolloff', 'dct'] | |
198 tempo_feature_list = [i for i in os.listdir(options.TF) if not i.startswith('.')] | |
199 tempo_feature_list = ['intensity_bpm', 'loudness_bpm'] | |
200 timbre_feature_list = ['mfcc'] | |
201 harmonic_feature_list = ['nnls'] | |
202 | |
203 gammatone_feature_list = [join(options.GF, f) for f in gammatone_feature_list] | |
204 timbre_feature_list = [join(options.SF, f) for f in timbre_feature_list] | |
205 tempo_feature_list = [join(options.TF, f) for f in tempo_feature_list] | |
206 harmonic_feature_list = [join(options.SF, f) for f in harmonic_feature_list] | |
207 | |
208 fobj_list = [] | |
209 | |
210 # For each audio file, load specific features | |
211 for audio in audio_files: | |
212 ao = AudioObj() | |
213 ao.name = splitext(audio)[0] | |
214 print ao.name | |
215 # annotation_file = join(options.GT, ao.name+'.txt') # iso, salami | |
216 # ao.gt = np.genfromtxt(annotation_file, usecols=0) | |
217 # ao.label = np.genfromtxt(annotation_file, usecols=1, dtype=str) | |
218 annotation_file = join(options.GT, ao.name+'.csv') # qupujicheng | |
219 ao.gt = np.genfromtxt(annotation_file, usecols=0, delimiter=',') | |
220 ao.label = np.genfromtxt(annotation_file, usecols=1, delimiter=',', dtype=str) | |
221 | |
222 gammatone_featureset, timbre_featureset, tempo_featureset, harmonic_featureset = [], [], [], [] | |
223 for feature in gammatone_feature_list: | |
224 for f in os.listdir(feature): | |
225 if f[:f.find('_vamp')]==ao.name: | |
226 gammatone_featureset.append(np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[:,1:]) | |
227 break | |
228 if len(gammatone_feature_list) > 1: | |
229 n_frame = np.min([x.shape[0] for x in gammatone_featureset]) | |
230 gammatone_featureset = [x[:n_frame,:] for x in gammatone_featureset] | |
231 ao.gammatone_features = np.hstack((gammatone_featureset)) | |
232 else: | |
233 ao.gammatone_features = gammatone_featureset[0] | |
234 | |
235 for feature in timbre_feature_list: | |
236 for f in os.listdir(feature): | |
237 if f[:f.find('_vamp')]==ao.name: | |
238 timbre_featureset.append(np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[:,1:]) | |
239 break | |
240 if len(timbre_feature_list) > 1: | |
241 n_frame = np.min([x.shape[0] for x in timbre_featureset]) | |
242 timbre_featureset = [x[:n_frame,:] for x in timbre_featureset] | |
243 ao.timbre_features = np.hstack((timbre_featureset)) | |
244 else: | |
245 ao.timbre_features = timbre_featureset[0] | |
246 for feature in tempo_feature_list: | |
247 for f in os.listdir(feature): | |
248 if f[:f.find('_vamp')]==ao.name: | |
249 tempo_featureset.append(np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[1:,1:]) | |
250 ao.tempo_timestamps = np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[1:,0] | |
251 break | |
252 if len(tempo_feature_list) > 1: | |
253 n_frame = np.min([x.shape[0] for x in tempo_featureset]) | |
254 tempo_featureset = [x[:n_frame,:] for x in tempo_featureset] | |
255 ao.tempo_features = np.hstack((tempo_featureset)) | |
256 else: | |
257 ao.tempo_features = tempo_featureset[0] | |
258 for feature in harmonic_feature_list: | |
259 for f in os.listdir(feature): | |
260 if f[:f.find('_vamp')]==ao.name: | |
261 harmonic_featureset.append(np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[:,1:]) | |
262 break | |
263 if len(harmonic_feature_list) > 1: | |
264 n_frame = np.min([x.shape[0] for x in harmonic_featureset]) | |
265 harmonic_featureset = [x[:n_frame,:] for x in harmonic_featureset] | |
266 ao.harmonic_features = np.hstack((harmonic_featureset)) | |
267 else: | |
268 ao.harmonic_features = harmonic_featureset[0] | |
269 | |
270 # Get aggregated features for computing ssm | |
271 aggregation_window, aggregation_step = 1,1 | |
272 featureRate = float(self.SampleRate) /self.stepSize | |
273 pca = PCA(n_components=5) | |
274 | |
275 # Resample and normalise features | |
276 ao.gammatone_features = resample(ao.gammatone_features, step) | |
277 ao.gammatone_features = normaliseFeature(ao.gammatone_features) | |
278 ao.timbre_features = resample(ao.timbre_features, step) | |
279 ao.timbre_features = normaliseFeature(ao.timbre_features) | |
280 ao.harmonic_features = resample(ao.harmonic_features, step) | |
281 ao.harmonic_features = normaliseFeature(ao.harmonic_features) | |
282 ao.tempo_features = normaliseFeature(ao.harmonic_features) | |
283 | |
284 pca.fit(ao.gammatone_features) | |
285 ao.gammatone_features = pca.transform(ao.gammatone_features) | |
286 ao.gammatone_ssm = getSSM(ao.gammatone_features) | |
287 | |
288 pca.fit(ao.tempo_features) | |
289 ao.tempo_features = pca.transform(ao.tempo_features) | |
290 ao.tempo_ssm = getSSM(ao.tempo_features) | |
291 | |
292 pca.fit(ao.timbre_features) | |
293 ao.timbre_features = pca.transform(ao.timbre_features) | |
294 ao.timbre_ssm = getSSM(ao.timbre_features) | |
295 | |
296 pca.fit(ao.harmonic_features) | |
297 ao.harmonic_features = pca.transform(ao.harmonic_features) | |
298 ao.harmonic_ssm = getSSM(ao.harmonic_features) | |
299 | |
300 ao.ssm_timestamps = np.array(map(lambda x: ao.tempo_timestamps[aggregation_step*x], np.arange(0, ao.gammatone_ssm.shape[0]))) | |
301 | |
302 audio_list.append(ao) | |
303 | |
304 # Segment input audio using specified boundary retrieval method. | |
305 print 'Segmenting using %s method' %options.BOUNDARY | |
306 for i,ao in enumerate(audio_list): | |
307 print 'processing: %s' %ao.name | |
308 | |
309 | |
310 | |
311 | |
312 ao_featureset = [ao.gammatone_features, ao.harmonic_features, ao.timbre_features, ao.tempo_features] | |
313 feature_sel = [int(x) for x in options.FEATURES if x.isdigit()] | |
314 ao_featureset = [ao_featureset[i] for i in feature_sel] | |
315 | |
316 gammatone_novelty, smoothed_gammatone_novelty, gammatone_novelty_peaks = getNoveltyPeaks(ao.gammatone_ssm, self.kernel_size, peak_picker) | |
317 timbre_novelty, smoothed_timbre_novelty, timbre_novelty_peaks = getNoveltyPeaks(ao.timbre_ssm, self.kernel_size, peak_picker) | |
318 tempo_novelty, smoothed_harmonic_novelty, harmonic_novelty_peaks = getNoveltyPeaks(ao.tempo_ssm, self.kernel_size, peak_picker) | |
319 harmonic_novelty, smoothed_tempo_novelty, tempo_novelty_peaks = getNoveltyPeaks(ao.harmonic_ssm, self.kernel_size, peak_picker) | |
320 | |
321 # Peak picking from the novelty curve | |
322 smoothed_gammatone_novelty, gammatone_novelty_peaks = peak_picker.process(gammatone_novelty) | |
323 gammatone_detection = [ao.ssm_timestamps[int(np.rint(i))] for i in gammatone_novelty_peaks] | |
324 smoothed_timbre_novelty, timbre_novelty_peaks = peak_picker.process(timbre_novelty) | |
325 timbre_detection = [ao.ssm_timestamps[int(np.rint(i))] for i in timbre_novelty_peaks] | |
326 smoothed_harmonic_novelty, harmonic_novelty_peaks = peak_picker.process(harmonic_novelty) | |
327 harmonic_detection = [ao.ssm_timestamps[int(np.rint(i))] for i in harmonic_novelty_peaks] | |
328 smoothed_tempo_novelty, tempo_novelty_peaks = peak_picker.process(tempo_novelty) | |
329 tempo_detection = [ao.ssm_timestamps[int(np.rint(i))] for i in tempo_novelty_peaks] | |
330 | |
331 if (len(gammatone_novelty_peaks) == 0 or len(harmonic_novelty_peaks)== 0 or len(timbre_novelty_peaks) == 0 or len(tempo_novelty_peaks) == 0): | |
332 print ao.name, len(gammatone_novelty_peaks), len(harmonic_novelty_peaks), len(timbre_novelty_peaks), len(tempo_novelty_peaks) | |
333 | |
334 smoothed_gammatone_novelty -= np.min(smoothed_gammatone_novelty) | |
335 smoothed_harmonic_novelty -= np.min(smoothed_harmonic_novelty) | |
336 smoothed_timbre_novelty -= np.min(smoothed_timbre_novelty) | |
337 smoothed_tempo_novelty -= np.min(smoothed_tempo_novelty) | |
338 combined_sdf = (np.array(smoothed_gammatone_novelty) + np.array(smoothed_harmonic_novelty) + np.array(smoothed_timbre_novelty) + np.array(smoothed_tempo_novelty)) | |
339 | |
340 | |
341 | |
342 def main(): | |
343 | |
344 segmenter = SSMseg() | |
345 segmenter.process() | |
346 | |
347 | |
348 if __name__ == '__main__': | |
349 main() | |
350 |