comparison SegEval.py @ 1:c11ea9e0357f

adding funcs
author mitian
date Thu, 02 Apr 2015 22:16:38 +0100
parents 26838b1f560f
children ef1fd8b0f3c4
comparison
equal deleted inserted replaced
0:26838b1f560f 1:c11ea9e0357f
41 # Load bourdary retrieval utilities 41 # Load bourdary retrieval utilities
42 import cnmf as cnmf_S 42 import cnmf as cnmf_S
43 import foote as foote_S 43 import foote as foote_S
44 import sf as sf_S 44 import sf as sf_S
45 import fmc2d as fmc2d_S 45 import fmc2d as fmc2d_S
46 import novelty as novelty_S
47
48 # Algorithm params
49 h = 8 # Size of median filter for features in C-NMF
50 R = 15 # Size of the median filter for the activation matrix C-NMF
51 rank = 4 # Rank of decomposition for the boundaries
52 rank_labels = 6 # Rank of decomposition for the labels
53 R_labels = 6 # Size of the median filter for the labels
54 # Foote
55 M = 2 # Median filter for the audio features (in beats)
56 Mg = 32 # Gaussian kernel size
57 L = 16 # Size of the median filter for the adaptive threshold
58 # 2D-FMC
59 N = 8 # Size of the fixed length segments (for 2D-FMC)
60
46 61
47 # Define arg parser 62 # Define arg parser
48 def parse_args(): 63 def parse_args():
49 op = optparse.OptionParser() 64 op = optparse.OptionParser()
50 # IO options 65 # IO options
54 op.add_option('-f', '--featureset', action="store", dest="FEATURES", default='[0, 1, 2, 3]', type="str", help="Choose feature subsets (input a list of integers) used for segmentation -- gammtone, chroma, timbre, tempo -- 0, 1, 2, 3." ) 69 op.add_option('-f', '--featureset', action="store", dest="FEATURES", default='[0, 1, 2, 3]', type="str", help="Choose feature subsets (input a list of integers) used for segmentation -- gammtone, chroma, timbre, tempo -- 0, 1, 2, 3." )
55 op.add_option('-a', '--annotations', action="store", dest="GT", default='/Volumes/c4dm-03/people/mit/annotation/qupujicheng/lowercase', type="str", help="Loading annotation files from.. ") 70 op.add_option('-a', '--annotations', action="store", dest="GT", default='/Volumes/c4dm-03/people/mit/annotation/qupujicheng/lowercase', type="str", help="Loading annotation files from.. ")
56 op.add_option('-o', '--ouput', action="store", dest="OUTPUT", default='/Volumes/c4dm-03/people/mit/segmentation/gammatone/qupujicheng', type="str", help="Write segmentation results to ") 71 op.add_option('-o', '--ouput', action="store", dest="OUTPUT", default='/Volumes/c4dm-03/people/mit/segmentation/gammatone/qupujicheng', type="str", help="Write segmentation results to ")
57 72
58 # boundary retrieval options 73 # boundary retrieval options
59 op.add_option('-b', '--bounrary-method', action="store", dest="BOUNDARY", default=['novelty', 'cnmf', 'sf', 'fmc2d'], help="Choose boundary retrieval algorithm ('novelty', 'cnmf', 'sf', 'fmc2d')." ) 74 op.add_option('-b', '--bounrary-method', action="store", dest="BOUNDARY", type='choice', choices=['novelty', 'cnmf', 'foote', 'sf'], default='novelty', help="Choose boundary retrieval algorithm ('novelty', 'cnmf', 'sf', 'fmc2d')." )
75 op.add_option('-l', '--labeling-method', action="store", dest="LABEL", type='choice', choices=['cnmf', 'fmc2d'], default='cnmf', help="Choose boundary labeling algorithm ('cnmf', 'fmc2d')." )
60 76
61 # Plot/print/mode options 77 # Plot/print/mode options
62 op.add_option('-p', '--plot', action="store_true", dest="PLOT", default=False, help="Save plots") 78 op.add_option('-p', '--plot', action="store_true", dest="PLOT", default=False, help="Save plots")
63 op.add_option('-e', '--test-mode', action="store_true", dest="TEST", default=False, help="Test mode") 79 op.add_option('-e', '--test-mode', action="store_true", dest="TEST", default=False, help="Test mode")
64 op.add_option('-v', '--verbose-mode', action="store_true", dest="VERBOSE", default=False, help="Print results in verbose mode.") 80 op.add_option('-v', '--verbose-mode', action="store_true", dest="VERBOSE", default=False, help="Print results in verbose mode.")
303 319
304 # Segment input audio using specified boundary retrieval method. 320 # Segment input audio using specified boundary retrieval method.
305 print 'Segmenting using %s method' %options.BOUNDARY 321 print 'Segmenting using %s method' %options.BOUNDARY
306 for i,ao in enumerate(audio_list): 322 for i,ao in enumerate(audio_list):
307 print 'processing: %s' %ao.name 323 print 'processing: %s' %ao.name
308 324
309 325 # Experiment 1: segmentation using individual features.
310 326 if options.BOUNDARY == 'novelty':
311 327 # Peak picking from the novelty curve
328 gammatone_novelty, smoothed_gammatone_novelty, gammatone_bound_idxs = novelty_S.process(ao.gammatone_ssm, self.kernel_size, peak_picker)
329 timbre_novelty, smoothed_timbre_novelty, timbre_bound_idxs = novelty_S.process(ao.timbre_ssm, self.kernel_size, peak_picker)
330 tempo_novelty, smoothed_harmonic_novelty, tempo_bound_idxs = novelty_S.process(ao.tempo_ssm, self.kernel_size, peak_picker)
331 harmonic_novelty, smoothed_tempo_novelty, harmonic_bound_idxs = novelty_S.process(ao.harmonic_ssm, self.kernel_size, peak_picker)
332
333 if options.BOUNDARY == 'cnmf':
334 gammatone_bound_idxs = cnmf_S.segmentation(ao.gammatone_features, rank=rank, R=R, h=8, niter=300)
335 timbre_bound_idxs = cnmf_S.segmentation(ao.timbre_features, rank=rank, R=R, h=h, niter=300)
336 tempo_bound_idxs = cnmf_S.segmentation(ao.tempo_features, rank=rank, R=R, h=h, niter=300)
337 harmonic_bound_idxs = cnmf_S.segmentation(ao.harmonic_features, rank=rank, R=R, h=h, niter=300)
338
339 if options.BOUNDARY == 'foote':
340 gammatone_bound_idxs = foote_S.segmentation(ao.gammatone_features, M=M, Mg=Mg, L=L)
341 timbre_bound_idxs = foote_S.segmentation(ao.timbre_features, M=M, Mg=Mg, L=L)
342 tempo_bound_idxs = foote_S.segmentation(ao.tempo_features, M=M, Mg=Mg, L=L)
343 harmonic_bound_idxs = foote_S.segmentation(ao.harmonic_features, M=M, Mg=Mg, L=L)
344
345 if options.BOUNDARY == 'sf':
346 gammatone_bound_idxs = sf_S.segmentation(ao.gammatone_features)
347 timbre_bound_idxs = sf_S.segmentation(ao.timbre_features)
348 tempo_bound_idxs = sf_S.segmentation(ao.tempo_features)
349 harmonic_bound_idxs = sf_S.segmentation(ao.harmonic_features)
350
351 if options.LABEL == 'fmc2d':
352 gammatone_bound_labels = fmc2d_S.compute_similarity(gammatone_bound_idxs, xmeans=True, N=N)
353 timbre_bound_labels = fmc2d_S.compute_similarity(timbre_bound_idxs, xmeans=True, N=N)
354 tempo_bound_labels = fmc2d_S.compute_similarity(tempo_bound_idxs, xmeans=True, N=N)
355 harmonic_bound_labels = fmc2d_S.compute_similarity(harmonic_bound_idxs, xmeans=True, N=N)
356
357 if options.LABEL == 'cnmf':
358 gammatone_bound_labels = cnmf_S.compute_labels(gammatone_bound_idxs, est_bound_idxs, nFrames)
359 timbre_bound_labels = cnmf_S.compute_labels(timbre_bound_idxs, est_bound_idxs, nFrames)
360 tempo_bound_labels = cnmf_S.compute_labels(tempo_bound_idxs, est_bound_idxs, nFrames)
361 harmonic_bound_labels = cnmf_S.compute_labels(harmonic_bound_idxs, est_bound_idxs, nFrames)
362
363 gammatone_detection = [0.0] + [ao.ssm_timestamps[int(np.rint(i))] for i in gammatone_novelty_peaks]
364 timbre_detection = [0.0] + [ao.ssm_timestamps[int(np.rint(i))] for i in timbre_novelty_peaks]
365 harmonic_detection = [0.0] + [ao.ssm_timestamps[int(np.rint(i))] for i in harmonic_novelty_peaks]
366 tempo_detection = [0.0] + [ao.ssm_timestamps[int(np.rint(i))] for i in tempo_novelty_peaks]
367
368 # Experiment 2: Trying combined features using the best boundary retrieval method
312 ao_featureset = [ao.gammatone_features, ao.harmonic_features, ao.timbre_features, ao.tempo_features] 369 ao_featureset = [ao.gammatone_features, ao.harmonic_features, ao.timbre_features, ao.tempo_features]
313 feature_sel = [int(x) for x in options.FEATURES if x.isdigit()] 370 feature_sel = [int(x) for x in options.FEATURES if x.isdigit()]
314 ao_featureset = [ao_featureset[i] for i in feature_sel] 371 ao_featureset = [ao_featureset[i] for i in feature_sel]
315 372
316 gammatone_novelty, smoothed_gammatone_novelty, gammatone_novelty_peaks = getNoveltyPeaks(ao.gammatone_ssm, self.kernel_size, peak_picker)
317 timbre_novelty, smoothed_timbre_novelty, timbre_novelty_peaks = getNoveltyPeaks(ao.timbre_ssm, self.kernel_size, peak_picker)
318 tempo_novelty, smoothed_harmonic_novelty, harmonic_novelty_peaks = getNoveltyPeaks(ao.tempo_ssm, self.kernel_size, peak_picker)
319 harmonic_novelty, smoothed_tempo_novelty, tempo_novelty_peaks = getNoveltyPeaks(ao.harmonic_ssm, self.kernel_size, peak_picker)
320
321 # Peak picking from the novelty curve
322 smoothed_gammatone_novelty, gammatone_novelty_peaks = peak_picker.process(gammatone_novelty)
323 gammatone_detection = [ao.ssm_timestamps[int(np.rint(i))] for i in gammatone_novelty_peaks]
324 smoothed_timbre_novelty, timbre_novelty_peaks = peak_picker.process(timbre_novelty)
325 timbre_detection = [ao.ssm_timestamps[int(np.rint(i))] for i in timbre_novelty_peaks]
326 smoothed_harmonic_novelty, harmonic_novelty_peaks = peak_picker.process(harmonic_novelty)
327 harmonic_detection = [ao.ssm_timestamps[int(np.rint(i))] for i in harmonic_novelty_peaks]
328 smoothed_tempo_novelty, tempo_novelty_peaks = peak_picker.process(tempo_novelty)
329 tempo_detection = [ao.ssm_timestamps[int(np.rint(i))] for i in tempo_novelty_peaks]
330
331 if (len(gammatone_novelty_peaks) == 0 or len(harmonic_novelty_peaks)== 0 or len(timbre_novelty_peaks) == 0 or len(tempo_novelty_peaks) == 0):
332 print ao.name, len(gammatone_novelty_peaks), len(harmonic_novelty_peaks), len(timbre_novelty_peaks), len(tempo_novelty_peaks)
333
334 smoothed_gammatone_novelty -= np.min(smoothed_gammatone_novelty)
335 smoothed_harmonic_novelty -= np.min(smoothed_harmonic_novelty)
336 smoothed_timbre_novelty -= np.min(smoothed_timbre_novelty)
337 smoothed_tempo_novelty -= np.min(smoothed_tempo_novelty)
338 combined_sdf = (np.array(smoothed_gammatone_novelty) + np.array(smoothed_harmonic_novelty) + np.array(smoothed_timbre_novelty) + np.array(smoothed_tempo_novelty))
339
340 373
341 374
342 def main(): 375 def main():
343 376
344 segmenter = SSMseg() 377 segmenter = SSMseg()