Mercurial > hg > segmentation
comparison SegEval.py @ 1:c11ea9e0357f
adding funcs
author | mitian |
---|---|
date | Thu, 02 Apr 2015 22:16:38 +0100 |
parents | 26838b1f560f |
children | ef1fd8b0f3c4 |
comparison
equal
deleted
inserted
replaced
0:26838b1f560f | 1:c11ea9e0357f |
---|---|
41 # Load bourdary retrieval utilities | 41 # Load bourdary retrieval utilities |
42 import cnmf as cnmf_S | 42 import cnmf as cnmf_S |
43 import foote as foote_S | 43 import foote as foote_S |
44 import sf as sf_S | 44 import sf as sf_S |
45 import fmc2d as fmc2d_S | 45 import fmc2d as fmc2d_S |
46 import novelty as novelty_S | |
47 | |
48 # Algorithm params | |
49 h = 8 # Size of median filter for features in C-NMF | |
50 R = 15 # Size of the median filter for the activation matrix C-NMF | |
51 rank = 4 # Rank of decomposition for the boundaries | |
52 rank_labels = 6 # Rank of decomposition for the labels | |
53 R_labels = 6 # Size of the median filter for the labels | |
54 # Foote | |
55 M = 2 # Median filter for the audio features (in beats) | |
56 Mg = 32 # Gaussian kernel size | |
57 L = 16 # Size of the median filter for the adaptive threshold | |
58 # 2D-FMC | |
59 N = 8 # Size of the fixed length segments (for 2D-FMC) | |
60 | |
46 | 61 |
47 # Define arg parser | 62 # Define arg parser |
48 def parse_args(): | 63 def parse_args(): |
49 op = optparse.OptionParser() | 64 op = optparse.OptionParser() |
50 # IO options | 65 # IO options |
54 op.add_option('-f', '--featureset', action="store", dest="FEATURES", default='[0, 1, 2, 3]', type="str", help="Choose feature subsets (input a list of integers) used for segmentation -- gammtone, chroma, timbre, tempo -- 0, 1, 2, 3." ) | 69 op.add_option('-f', '--featureset', action="store", dest="FEATURES", default='[0, 1, 2, 3]', type="str", help="Choose feature subsets (input a list of integers) used for segmentation -- gammtone, chroma, timbre, tempo -- 0, 1, 2, 3." ) |
55 op.add_option('-a', '--annotations', action="store", dest="GT", default='/Volumes/c4dm-03/people/mit/annotation/qupujicheng/lowercase', type="str", help="Loading annotation files from.. ") | 70 op.add_option('-a', '--annotations', action="store", dest="GT", default='/Volumes/c4dm-03/people/mit/annotation/qupujicheng/lowercase', type="str", help="Loading annotation files from.. ") |
56 op.add_option('-o', '--ouput', action="store", dest="OUTPUT", default='/Volumes/c4dm-03/people/mit/segmentation/gammatone/qupujicheng', type="str", help="Write segmentation results to ") | 71 op.add_option('-o', '--ouput', action="store", dest="OUTPUT", default='/Volumes/c4dm-03/people/mit/segmentation/gammatone/qupujicheng', type="str", help="Write segmentation results to ") |
57 | 72 |
58 # boundary retrieval options | 73 # boundary retrieval options |
59 op.add_option('-b', '--bounrary-method', action="store", dest="BOUNDARY", default=['novelty', 'cnmf', 'sf', 'fmc2d'], help="Choose boundary retrieval algorithm ('novelty', 'cnmf', 'sf', 'fmc2d')." ) | 74 op.add_option('-b', '--bounrary-method', action="store", dest="BOUNDARY", type='choice', choices=['novelty', 'cnmf', 'foote', 'sf'], default='novelty', help="Choose boundary retrieval algorithm ('novelty', 'cnmf', 'sf', 'fmc2d')." ) |
75 op.add_option('-l', '--labeling-method', action="store", dest="LABEL", type='choice', choices=['cnmf', 'fmc2d'], default='cnmf', help="Choose boundary labeling algorithm ('cnmf', 'fmc2d')." ) | |
60 | 76 |
61 # Plot/print/mode options | 77 # Plot/print/mode options |
62 op.add_option('-p', '--plot', action="store_true", dest="PLOT", default=False, help="Save plots") | 78 op.add_option('-p', '--plot', action="store_true", dest="PLOT", default=False, help="Save plots") |
63 op.add_option('-e', '--test-mode', action="store_true", dest="TEST", default=False, help="Test mode") | 79 op.add_option('-e', '--test-mode', action="store_true", dest="TEST", default=False, help="Test mode") |
64 op.add_option('-v', '--verbose-mode', action="store_true", dest="VERBOSE", default=False, help="Print results in verbose mode.") | 80 op.add_option('-v', '--verbose-mode', action="store_true", dest="VERBOSE", default=False, help="Print results in verbose mode.") |
303 | 319 |
304 # Segment input audio using specified boundary retrieval method. | 320 # Segment input audio using specified boundary retrieval method. |
305 print 'Segmenting using %s method' %options.BOUNDARY | 321 print 'Segmenting using %s method' %options.BOUNDARY |
306 for i,ao in enumerate(audio_list): | 322 for i,ao in enumerate(audio_list): |
307 print 'processing: %s' %ao.name | 323 print 'processing: %s' %ao.name |
308 | 324 |
309 | 325 # Experiment 1: segmentation using individual features. |
310 | 326 if options.BOUNDARY == 'novelty': |
311 | 327 # Peak picking from the novelty curve |
328 gammatone_novelty, smoothed_gammatone_novelty, gammatone_bound_idxs = novelty_S.process(ao.gammatone_ssm, self.kernel_size, peak_picker) | |
329 timbre_novelty, smoothed_timbre_novelty, timbre_bound_idxs = novelty_S.process(ao.timbre_ssm, self.kernel_size, peak_picker) | |
330 tempo_novelty, smoothed_harmonic_novelty, tempo_bound_idxs = novelty_S.process(ao.tempo_ssm, self.kernel_size, peak_picker) | |
331 harmonic_novelty, smoothed_tempo_novelty, harmonic_bound_idxs = novelty_S.process(ao.harmonic_ssm, self.kernel_size, peak_picker) | |
332 | |
333 if options.BOUNDARY == 'cnmf': | |
334 gammatone_bound_idxs = cnmf_S.segmentation(ao.gammatone_features, rank=rank, R=R, h=8, niter=300) | |
335 timbre_bound_idxs = cnmf_S.segmentation(ao.timbre_features, rank=rank, R=R, h=h, niter=300) | |
336 tempo_bound_idxs = cnmf_S.segmentation(ao.tempo_features, rank=rank, R=R, h=h, niter=300) | |
337 harmonic_bound_idxs = cnmf_S.segmentation(ao.harmonic_features, rank=rank, R=R, h=h, niter=300) | |
338 | |
339 if options.BOUNDARY == 'foote': | |
340 gammatone_bound_idxs = foote_S.segmentation(ao.gammatone_features, M=M, Mg=Mg, L=L) | |
341 timbre_bound_idxs = foote_S.segmentation(ao.timbre_features, M=M, Mg=Mg, L=L) | |
342 tempo_bound_idxs = foote_S.segmentation(ao.tempo_features, M=M, Mg=Mg, L=L) | |
343 harmonic_bound_idxs = foote_S.segmentation(ao.harmonic_features, M=M, Mg=Mg, L=L) | |
344 | |
345 if options.BOUNDARY == 'sf': | |
346 gammatone_bound_idxs = sf_S.segmentation(ao.gammatone_features) | |
347 timbre_bound_idxs = sf_S.segmentation(ao.timbre_features) | |
348 tempo_bound_idxs = sf_S.segmentation(ao.tempo_features) | |
349 harmonic_bound_idxs = sf_S.segmentation(ao.harmonic_features) | |
350 | |
351 if options.LABEL == 'fmc2d': | |
352 gammatone_bound_labels = fmc2d_S.compute_similarity(gammatone_bound_idxs, xmeans=True, N=N) | |
353 timbre_bound_labels = fmc2d_S.compute_similarity(timbre_bound_idxs, xmeans=True, N=N) | |
354 tempo_bound_labels = fmc2d_S.compute_similarity(tempo_bound_idxs, xmeans=True, N=N) | |
355 harmonic_bound_labels = fmc2d_S.compute_similarity(harmonic_bound_idxs, xmeans=True, N=N) | |
356 | |
357 if options.LABEL == 'cnmf': | |
358 gammatone_bound_labels = cnmf_S.compute_labels(gammatone_bound_idxs, est_bound_idxs, nFrames) | |
359 timbre_bound_labels = cnmf_S.compute_labels(timbre_bound_idxs, est_bound_idxs, nFrames) | |
360 tempo_bound_labels = cnmf_S.compute_labels(tempo_bound_idxs, est_bound_idxs, nFrames) | |
361 harmonic_bound_labels = cnmf_S.compute_labels(harmonic_bound_idxs, est_bound_idxs, nFrames) | |
362 | |
363 gammatone_detection = [0.0] + [ao.ssm_timestamps[int(np.rint(i))] for i in gammatone_novelty_peaks] | |
364 timbre_detection = [0.0] + [ao.ssm_timestamps[int(np.rint(i))] for i in timbre_novelty_peaks] | |
365 harmonic_detection = [0.0] + [ao.ssm_timestamps[int(np.rint(i))] for i in harmonic_novelty_peaks] | |
366 tempo_detection = [0.0] + [ao.ssm_timestamps[int(np.rint(i))] for i in tempo_novelty_peaks] | |
367 | |
368 # Experiment 2: Trying combined features using the best boundary retrieval method | |
312 ao_featureset = [ao.gammatone_features, ao.harmonic_features, ao.timbre_features, ao.tempo_features] | 369 ao_featureset = [ao.gammatone_features, ao.harmonic_features, ao.timbre_features, ao.tempo_features] |
313 feature_sel = [int(x) for x in options.FEATURES if x.isdigit()] | 370 feature_sel = [int(x) for x in options.FEATURES if x.isdigit()] |
314 ao_featureset = [ao_featureset[i] for i in feature_sel] | 371 ao_featureset = [ao_featureset[i] for i in feature_sel] |
315 | 372 |
316 gammatone_novelty, smoothed_gammatone_novelty, gammatone_novelty_peaks = getNoveltyPeaks(ao.gammatone_ssm, self.kernel_size, peak_picker) | |
317 timbre_novelty, smoothed_timbre_novelty, timbre_novelty_peaks = getNoveltyPeaks(ao.timbre_ssm, self.kernel_size, peak_picker) | |
318 tempo_novelty, smoothed_harmonic_novelty, harmonic_novelty_peaks = getNoveltyPeaks(ao.tempo_ssm, self.kernel_size, peak_picker) | |
319 harmonic_novelty, smoothed_tempo_novelty, tempo_novelty_peaks = getNoveltyPeaks(ao.harmonic_ssm, self.kernel_size, peak_picker) | |
320 | |
321 # Peak picking from the novelty curve | |
322 smoothed_gammatone_novelty, gammatone_novelty_peaks = peak_picker.process(gammatone_novelty) | |
323 gammatone_detection = [ao.ssm_timestamps[int(np.rint(i))] for i in gammatone_novelty_peaks] | |
324 smoothed_timbre_novelty, timbre_novelty_peaks = peak_picker.process(timbre_novelty) | |
325 timbre_detection = [ao.ssm_timestamps[int(np.rint(i))] for i in timbre_novelty_peaks] | |
326 smoothed_harmonic_novelty, harmonic_novelty_peaks = peak_picker.process(harmonic_novelty) | |
327 harmonic_detection = [ao.ssm_timestamps[int(np.rint(i))] for i in harmonic_novelty_peaks] | |
328 smoothed_tempo_novelty, tempo_novelty_peaks = peak_picker.process(tempo_novelty) | |
329 tempo_detection = [ao.ssm_timestamps[int(np.rint(i))] for i in tempo_novelty_peaks] | |
330 | |
331 if (len(gammatone_novelty_peaks) == 0 or len(harmonic_novelty_peaks)== 0 or len(timbre_novelty_peaks) == 0 or len(tempo_novelty_peaks) == 0): | |
332 print ao.name, len(gammatone_novelty_peaks), len(harmonic_novelty_peaks), len(timbre_novelty_peaks), len(tempo_novelty_peaks) | |
333 | |
334 smoothed_gammatone_novelty -= np.min(smoothed_gammatone_novelty) | |
335 smoothed_harmonic_novelty -= np.min(smoothed_harmonic_novelty) | |
336 smoothed_timbre_novelty -= np.min(smoothed_timbre_novelty) | |
337 smoothed_tempo_novelty -= np.min(smoothed_tempo_novelty) | |
338 combined_sdf = (np.array(smoothed_gammatone_novelty) + np.array(smoothed_harmonic_novelty) + np.array(smoothed_timbre_novelty) + np.array(smoothed_tempo_novelty)) | |
339 | |
340 | 373 |
341 | 374 |
342 def main(): | 375 def main(): |
343 | 376 |
344 segmenter = SSMseg() | 377 segmenter = SSMseg() |