comparison SegEval.py @ 0:26838b1f560f

initial commit of a segmenter project
author mi tian
date Thu, 02 Apr 2015 18:09:27 +0100
parents
children c11ea9e0357f
comparison
equal deleted inserted replaced
-1:000000000000 0:26838b1f560f
1 #!/usr/bin/env python
2 # encoding: utf-8
3 """
4 SegEval.py
5
6 The main segmentation program.
7
8 Created by mi tian on 2015-04-02.
9 Copyright (c) 2015 __MyCompanyName__. All rights reserved.
10 """
11
12 # Load starndard python libs
13 import sys, os, optparse, csv
14 from itertools import combinations
15 from os.path import join, isdir, isfile, abspath, dirname, basename, split, splitext
16 from copy import copy
17
18 import matplotlib
19 # matplotlib.use('Agg')
20 import matplotlib.pyplot as plt
21 import matplotlib.gridspec as gridspec
22 import numpy as np
23 import scipy as sp
24 from scipy.signal import correlate2d, convolve2d, filtfilt, resample
25 from scipy.ndimage.filters import *
26 from sklearn.decomposition import PCA
27 from sklearn.mixture import GMM
28 from sklearn.cluster import KMeans
29 from sklearn.preprocessing import normalize
30 from sklearn.metrics.pairwise import pairwise_distances
31
32 # Load dependencies
33 from utils.SegUtil import getMean, getStd, getDelta, getSSM, reduceSSM, upSample, normaliseFeature
34 from utils.PeakPickerUtil import PeakPicker
35 from utils.gmmdist import *
36 from utils.GmmMetrics import GmmDistance
37 from utils.RankClustering import rClustering
38 from utils.kmeans import Kmeans
39 from utils.PathTracker import PathTracker
40
41 # Load bourdary retrieval utilities
42 import cnmf as cnmf_S
43 import foote as foote_S
44 import sf as sf_S
45 import fmc2d as fmc2d_S
46
47 # Define arg parser
48 def parse_args():
49 op = optparse.OptionParser()
50 # IO options
51 op.add_option('-g', '--gammatonegram-features', action="store", dest="GF", default='/Volumes/c4dm-03/people/mit/features/gammatonegram/qupujicheng/2048', type="str", help="Loading gammatone features from.." )
52 op.add_option('-s', '--spectrogram-features', action="store", dest="SF", default='/Volumes/c4dm-03/people/mit/features/spectrogram/qupujicheng/2048', type="str", help="Loading spectral features from.." )
53 op.add_option('-t', '--tempogram-features', action="store", dest="TF", default='/Volumes/c4dm-03/people/mit/features/tempogram/qupujicheng/tempo_features_6s', type="str", help="Loading tempogram features from.." )
54 op.add_option('-f', '--featureset', action="store", dest="FEATURES", default='[0, 1, 2, 3]', type="str", help="Choose feature subsets (input a list of integers) used for segmentation -- gammtone, chroma, timbre, tempo -- 0, 1, 2, 3." )
55 op.add_option('-a', '--annotations', action="store", dest="GT", default='/Volumes/c4dm-03/people/mit/annotation/qupujicheng/lowercase', type="str", help="Loading annotation files from.. ")
56 op.add_option('-o', '--ouput', action="store", dest="OUTPUT", default='/Volumes/c4dm-03/people/mit/segmentation/gammatone/qupujicheng', type="str", help="Write segmentation results to ")
57
58 # boundary retrieval options
59 op.add_option('-b', '--bounrary-method', action="store", dest="BOUNDARY", default=['novelty', 'cnmf', 'sf', 'fmc2d'], help="Choose boundary retrieval algorithm ('novelty', 'cnmf', 'sf', 'fmc2d')." )
60
61 # Plot/print/mode options
62 op.add_option('-p', '--plot', action="store_true", dest="PLOT", default=False, help="Save plots")
63 op.add_option('-e', '--test-mode', action="store_true", dest="TEST", default=False, help="Test mode")
64 op.add_option('-v', '--verbose-mode', action="store_true", dest="VERBOSE", default=False, help="Print results in verbose mode.")
65
66 return op.parse_args()
67 options, args = parse_args()
68
69 class FeatureObj() :
70 __slots__ = ['key', 'audio', 'timestamps', 'gammatone_features', 'tempo_features', 'timbre_features', 'harmonic_features', 'gammatone_ssm', 'tempo_ssm', 'timbre_features', 'harmonic_ssm', 'ssm_timestamps']
71
72 class AudioObj():
73 __slots__ = ['name', 'feature_list', 'gt', 'label', 'gammatone_features', 'tempo_features', 'timbre_features', 'harmonic_features', 'combined_features',\
74 'gammatone_ssm', 'tempo_ssm', 'timbre_ssm', 'harmonic_ssm', 'combined_ssm', 'ssm', 'ssm_timestamps', 'tempo_timestamps']
75
76 class EvalObj():
77 __slots__ = ['TP', 'FP', 'FN', 'P', 'R', 'F', 'AD', 'DA']
78
79
80 class SSMseg(object):
81 '''The main segmentation object'''
82 def __init__(self):
83 self.SampleRate = 44100
84 self.NqHz = self.SampleRate/2
85 self.timestamp = []
86 self.previousSample = 0.0
87 self.featureWindow = 6.0
88 self.featureStep = 3.0
89 self.kernel_size = 64 # Adjust this param according to the feature resolution.pq
90 self.blockSize = 2048
91 self.stepSize = 1024
92
93 '''NOTE: Match the following params with those used for feature extraction!'''
94
95 '''NOTE: Unlike spectrogram ones, Gammatone features are extracted without taking an FFT. The windowing is done under the purpose of chunking
96 the audio to facilitate the gammatone filtering with the specified blockSize and stepSize. The resulting gammatonegram is aggregated every
97 gammatoneLen without overlap.'''
98 self.gammatoneLen = 2048
99 self.gammatoneBandGroups = [0, 2, 6, 10, 13, 17, 20]
100 self.nGammatoneBands = 20
101 self.lowFreq = 100
102 self.highFreq = self.SampleRate / 4
103
104 '''Settings for extracting tempogram features.'''
105 self.tempoWindow = 6.0
106 self.bpmBands = [30, 45, 60, 80, 100, 120, 180, 240, 400, 600]
107
108 '''Peak picking settings'''
109 self.threshold = 50
110 self.confidence_threshold = 0.5
111 self.delta_threshold = 0.0
112 self.backtracking_threshold = 1.9
113 self.polyfitting_on = True
114 self.medfilter_on = True
115 self.LPfilter_on = True
116 self.whitening_on = False
117 self.aCoeffs = [1.0000, -0.5949, 0.2348]
118 self.bCoeffs = [0.1600, 0.3200, 0.1600]
119 self.cutoff = 0.34
120 self.medianWin = 7
121
122
123 def pairwiseF(self, annotation, detection, tolerance=3.0, combine=1.0):
124 '''Pairwise F measure evaluation of detection rates.'''
125
126 # print 'detection', detection
127 detection = np.append(detection, annotation[-1])
128 res = EvalObj()
129 res.TP = 0 # Total number of matched ground truth and experimental data points
130 gt = len(annotation) # Total number of ground truth data points
131 dt = len(detection) # Total number of experimental data points
132 foundIdx = []
133 D_AD = np.zeros(gt)
134 D_DA = np.zeros(dt)
135
136 for dtIdx in xrange(dt):
137 D_DA[dtIdx] = np.min(abs(detection[dtIdx] - annotation))
138 for gtIdx in xrange(gt):
139 D_AD[gtIdx] = np.min(abs(annotation[gtIdx] - detection))
140 for dtIdx in xrange(dt):
141 if (annotation[gtIdx] >= detection[dtIdx] - tolerance/2.0) and (annotation[gtIdx] <= detection[dtIdx] + tolerance/2.0):
142 res.TP = res.TP + 1.0
143 foundIdx.append(gtIdx)
144 foundIdx = list(set(foundIdx))
145 res.TP = len(foundIdx)
146 res.FP = max(0, dt - res.TP)
147 res.FN = max(0, gt - res.TP)
148
149 res.AD = np.mean(D_AD)
150 res.DA = np.mean(D_DA)
151
152 res.P, res.R, res.F = 0.0, 0.0, 0.0
153
154 if res.TP == 0:
155 return res
156
157 res.P = res.TP / float(dt)
158 res.R = res.TP / float(gt)
159 res.F = 2 * res.P * res.R / (res.P + res.R)
160 return res
161
162
163 def process(self):
164 '''For the aggregated input features, discard a propertion each time as the pairwise distances within the feature space descending.
165 In the meanwhile evaluate the segmentation result and track the trend of perfomance changing by measuring the feature selection
166 threshold - segmentation f measure curve.
167 '''
168
169 peak_picker = PeakPicker()
170 peak_picker.params.alpha = 9.0 # Alpha norm
171 peak_picker.params.delta = self.delta_threshold # Adaptive thresholding delta
172 peak_picker.params.QuadThresh_a = (100 - self.threshold) / 1000.0
173 peak_picker.params.QuadThresh_b = 0.0
174 peak_picker.params.QuadThresh_c = (100 - self.threshold) / 1500.0
175 peak_picker.params.rawSensitivity = 20
176 peak_picker.params.aCoeffs = self.aCoeffs
177 peak_picker.params.bCoeffs = self.bCoeffs
178 peak_picker.params.preWin = self.medianWin
179 peak_picker.params.postWin = self.medianWin + 1
180 peak_picker.params.LP_on = self.LPfilter_on
181 peak_picker.params.Medfilt_on = self.medfilter_on
182 peak_picker.params.Polyfit_on = self.polyfitting_on
183 peak_picker.params.isMedianPositive = False
184
185 # Settings used for feature extraction
186 feature_window_frame = int(self.SampleRate / self.gammatoneLen * self.featureWindow)
187 feature_step_frame = int(0.5 * self.SampleRate / self.gammatoneLen * self.featureStep)
188 aggregation_window, aggregation_step = 100, 50
189 featureRate = float(self.SampleRate) / self.stepSize
190
191 audio_files = [x for x in os.listdir(options.GT) if not x.startswith(".") ]
192 # audio_files = audio_files[:2]
193 audio_files.sort()
194 audio_list = []
195
196 gammatone_feature_list = [i for i in os.listdir(options.GF) if not i.startswith('.')]
197 gammatone_feature_list = ['contrast4', 'rolloff', 'dct']
198 tempo_feature_list = [i for i in os.listdir(options.TF) if not i.startswith('.')]
199 tempo_feature_list = ['intensity_bpm', 'loudness_bpm']
200 timbre_feature_list = ['mfcc']
201 harmonic_feature_list = ['nnls']
202
203 gammatone_feature_list = [join(options.GF, f) for f in gammatone_feature_list]
204 timbre_feature_list = [join(options.SF, f) for f in timbre_feature_list]
205 tempo_feature_list = [join(options.TF, f) for f in tempo_feature_list]
206 harmonic_feature_list = [join(options.SF, f) for f in harmonic_feature_list]
207
208 fobj_list = []
209
210 # For each audio file, load specific features
211 for audio in audio_files:
212 ao = AudioObj()
213 ao.name = splitext(audio)[0]
214 print ao.name
215 # annotation_file = join(options.GT, ao.name+'.txt') # iso, salami
216 # ao.gt = np.genfromtxt(annotation_file, usecols=0)
217 # ao.label = np.genfromtxt(annotation_file, usecols=1, dtype=str)
218 annotation_file = join(options.GT, ao.name+'.csv') # qupujicheng
219 ao.gt = np.genfromtxt(annotation_file, usecols=0, delimiter=',')
220 ao.label = np.genfromtxt(annotation_file, usecols=1, delimiter=',', dtype=str)
221
222 gammatone_featureset, timbre_featureset, tempo_featureset, harmonic_featureset = [], [], [], []
223 for feature in gammatone_feature_list:
224 for f in os.listdir(feature):
225 if f[:f.find('_vamp')]==ao.name:
226 gammatone_featureset.append(np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[:,1:])
227 break
228 if len(gammatone_feature_list) > 1:
229 n_frame = np.min([x.shape[0] for x in gammatone_featureset])
230 gammatone_featureset = [x[:n_frame,:] for x in gammatone_featureset]
231 ao.gammatone_features = np.hstack((gammatone_featureset))
232 else:
233 ao.gammatone_features = gammatone_featureset[0]
234
235 for feature in timbre_feature_list:
236 for f in os.listdir(feature):
237 if f[:f.find('_vamp')]==ao.name:
238 timbre_featureset.append(np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[:,1:])
239 break
240 if len(timbre_feature_list) > 1:
241 n_frame = np.min([x.shape[0] for x in timbre_featureset])
242 timbre_featureset = [x[:n_frame,:] for x in timbre_featureset]
243 ao.timbre_features = np.hstack((timbre_featureset))
244 else:
245 ao.timbre_features = timbre_featureset[0]
246 for feature in tempo_feature_list:
247 for f in os.listdir(feature):
248 if f[:f.find('_vamp')]==ao.name:
249 tempo_featureset.append(np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[1:,1:])
250 ao.tempo_timestamps = np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[1:,0]
251 break
252 if len(tempo_feature_list) > 1:
253 n_frame = np.min([x.shape[0] for x in tempo_featureset])
254 tempo_featureset = [x[:n_frame,:] for x in tempo_featureset]
255 ao.tempo_features = np.hstack((tempo_featureset))
256 else:
257 ao.tempo_features = tempo_featureset[0]
258 for feature in harmonic_feature_list:
259 for f in os.listdir(feature):
260 if f[:f.find('_vamp')]==ao.name:
261 harmonic_featureset.append(np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[:,1:])
262 break
263 if len(harmonic_feature_list) > 1:
264 n_frame = np.min([x.shape[0] for x in harmonic_featureset])
265 harmonic_featureset = [x[:n_frame,:] for x in harmonic_featureset]
266 ao.harmonic_features = np.hstack((harmonic_featureset))
267 else:
268 ao.harmonic_features = harmonic_featureset[0]
269
270 # Get aggregated features for computing ssm
271 aggregation_window, aggregation_step = 1,1
272 featureRate = float(self.SampleRate) /self.stepSize
273 pca = PCA(n_components=5)
274
275 # Resample and normalise features
276 ao.gammatone_features = resample(ao.gammatone_features, step)
277 ao.gammatone_features = normaliseFeature(ao.gammatone_features)
278 ao.timbre_features = resample(ao.timbre_features, step)
279 ao.timbre_features = normaliseFeature(ao.timbre_features)
280 ao.harmonic_features = resample(ao.harmonic_features, step)
281 ao.harmonic_features = normaliseFeature(ao.harmonic_features)
282 ao.tempo_features = normaliseFeature(ao.harmonic_features)
283
284 pca.fit(ao.gammatone_features)
285 ao.gammatone_features = pca.transform(ao.gammatone_features)
286 ao.gammatone_ssm = getSSM(ao.gammatone_features)
287
288 pca.fit(ao.tempo_features)
289 ao.tempo_features = pca.transform(ao.tempo_features)
290 ao.tempo_ssm = getSSM(ao.tempo_features)
291
292 pca.fit(ao.timbre_features)
293 ao.timbre_features = pca.transform(ao.timbre_features)
294 ao.timbre_ssm = getSSM(ao.timbre_features)
295
296 pca.fit(ao.harmonic_features)
297 ao.harmonic_features = pca.transform(ao.harmonic_features)
298 ao.harmonic_ssm = getSSM(ao.harmonic_features)
299
300 ao.ssm_timestamps = np.array(map(lambda x: ao.tempo_timestamps[aggregation_step*x], np.arange(0, ao.gammatone_ssm.shape[0])))
301
302 audio_list.append(ao)
303
304 # Segment input audio using specified boundary retrieval method.
305 print 'Segmenting using %s method' %options.BOUNDARY
306 for i,ao in enumerate(audio_list):
307 print 'processing: %s' %ao.name
308
309
310
311
312 ao_featureset = [ao.gammatone_features, ao.harmonic_features, ao.timbre_features, ao.tempo_features]
313 feature_sel = [int(x) for x in options.FEATURES if x.isdigit()]
314 ao_featureset = [ao_featureset[i] for i in feature_sel]
315
316 gammatone_novelty, smoothed_gammatone_novelty, gammatone_novelty_peaks = getNoveltyPeaks(ao.gammatone_ssm, self.kernel_size, peak_picker)
317 timbre_novelty, smoothed_timbre_novelty, timbre_novelty_peaks = getNoveltyPeaks(ao.timbre_ssm, self.kernel_size, peak_picker)
318 tempo_novelty, smoothed_harmonic_novelty, harmonic_novelty_peaks = getNoveltyPeaks(ao.tempo_ssm, self.kernel_size, peak_picker)
319 harmonic_novelty, smoothed_tempo_novelty, tempo_novelty_peaks = getNoveltyPeaks(ao.harmonic_ssm, self.kernel_size, peak_picker)
320
321 # Peak picking from the novelty curve
322 smoothed_gammatone_novelty, gammatone_novelty_peaks = peak_picker.process(gammatone_novelty)
323 gammatone_detection = [ao.ssm_timestamps[int(np.rint(i))] for i in gammatone_novelty_peaks]
324 smoothed_timbre_novelty, timbre_novelty_peaks = peak_picker.process(timbre_novelty)
325 timbre_detection = [ao.ssm_timestamps[int(np.rint(i))] for i in timbre_novelty_peaks]
326 smoothed_harmonic_novelty, harmonic_novelty_peaks = peak_picker.process(harmonic_novelty)
327 harmonic_detection = [ao.ssm_timestamps[int(np.rint(i))] for i in harmonic_novelty_peaks]
328 smoothed_tempo_novelty, tempo_novelty_peaks = peak_picker.process(tempo_novelty)
329 tempo_detection = [ao.ssm_timestamps[int(np.rint(i))] for i in tempo_novelty_peaks]
330
331 if (len(gammatone_novelty_peaks) == 0 or len(harmonic_novelty_peaks)== 0 or len(timbre_novelty_peaks) == 0 or len(tempo_novelty_peaks) == 0):
332 print ao.name, len(gammatone_novelty_peaks), len(harmonic_novelty_peaks), len(timbre_novelty_peaks), len(tempo_novelty_peaks)
333
334 smoothed_gammatone_novelty -= np.min(smoothed_gammatone_novelty)
335 smoothed_harmonic_novelty -= np.min(smoothed_harmonic_novelty)
336 smoothed_timbre_novelty -= np.min(smoothed_timbre_novelty)
337 smoothed_tempo_novelty -= np.min(smoothed_tempo_novelty)
338 combined_sdf = (np.array(smoothed_gammatone_novelty) + np.array(smoothed_harmonic_novelty) + np.array(smoothed_timbre_novelty) + np.array(smoothed_tempo_novelty))
339
340
341
342 def main():
343
344 segmenter = SSMseg()
345 segmenter.process()
346
347
348 if __name__ == '__main__':
349 main()
350