mitian@17
|
1 #!/usr/bin/env python
|
mitian@17
|
2 # encoding: utf-8
|
mitian@17
|
3 """
|
mitian@17
|
4 HSeg.py
|
mitian@17
|
5
|
mitian@17
|
6 Created by mi tian on 2015-08-14.
|
mitian@17
|
7 Copyright (c) 2015 __MyCompanyName__. All rights reserved.
|
mitian@17
|
8 """
|
mitian@17
|
9
|
mitian@17
|
10 import sys, os, optparse, csv
|
mitian@17
|
11 from itertools import combinations
|
mitian@17
|
12 from os.path import join, isdir, isfile, abspath, dirname, basename, split, splitext
|
mitian@17
|
13 from copy import copy
|
mitian@17
|
14
|
mitian@17
|
15 import matplotlib
|
mitian@17
|
16 matplotlib.use('Agg')
|
mitian@17
|
17 import matplotlib.pyplot as plt
|
mitian@17
|
18 import matplotlib.gridspec as gridspec
|
mitian@17
|
19 import numpy as np
|
mitian@17
|
20 import scipy as sp
|
mitian@17
|
21 from scipy.signal import correlate2d, convolve2d, filtfilt, resample
|
mitian@17
|
22 from scipy.ndimage.filters import *
|
mitian@17
|
23 from scipy.ndimage.interpolation import zoom
|
mitian@17
|
24 from sklearn.decomposition import PCA
|
mitian@17
|
25 from sklearn.mixture import GMM
|
mitian@17
|
26 from sklearn.cluster import KMeans
|
mitian@17
|
27 from sklearn.preprocessing import normalize
|
mitian@17
|
28 from sklearn.metrics.pairwise import pairwise_distances
|
mitian@17
|
29
|
mitian@17
|
30 # Load dependencies
|
mitian@17
|
31 from utils.SegUtil import getMean, getStd, getDelta, getSSM, reduceSSM, upSample, normaliseFeature, normaliseArray
|
mitian@17
|
32 from utils.PeakPickerUtil import PeakPicker
|
mitian@17
|
33 from utils.gmmdist import *
|
mitian@17
|
34 from utils.GmmMetrics import GmmDistance
|
mitian@17
|
35 from utils.RankClustering import rClustering
|
mitian@17
|
36 from utils.kmeans import Kmeans
|
mitian@17
|
37
|
mitian@17
|
38 # Using the novelty based (Tian) boundary retrieval method
|
mitian@17
|
39 import novelty as novelty_S
|
mitian@17
|
40 import sf as sf_S
|
mitian@17
|
41 import cnmf as cnmf_S
|
mitian@17
|
42
|
mitian@17
|
43 # Algorithm params
|
mitian@17
|
44 h = 8 # Size of median filter for features in C-NMF
|
mitian@17
|
45 R = 15 # Size of the median filter for the activation matrix C-NMF
|
mitian@17
|
46 rank = 4 # Rank of decomposition for the boundaries
|
mitian@17
|
47 rank_labels = 6 # Rank of decomposition for the labels
|
mitian@17
|
48 R_labels = 6 # Size of the median filter for the labels
|
mitian@17
|
49 # Foote
|
mitian@17
|
50 M = 2 # Median filter for the audio features (in beats)
|
mitian@17
|
51 Mg = 32 # Gaussian kernel size
|
mitian@17
|
52 L = 16 # Size of the median filter for the adaptive threshold
|
mitian@17
|
53 # 2D-FMC
|
mitian@17
|
54 N = 8 # Size of the fixed length segments (for 2D-FMC)
|
mitian@17
|
55
|
mitian@17
|
56 # Define arg parser
|
mitian@17
|
57 def parse_args():
|
mitian@17
|
58 op = optparse.OptionParser()
|
mitian@17
|
59 # IO options
|
mitian@17
|
60 op.add_option('-f', '--features1', action="store", dest="F1", default='/Users/mitian/Documents/experiments/mit/features/gammatonegram_fft/qupujicheng/2048_1024', type="str", help="Loading features from.." )
|
mitian@17
|
61 op.add_option('-e', '--features2', action="store", dest="F2", default='/Users/mitian/Documents/experiments/mit/features/gammatonegram_fft/qupujicheng/2048_1024', type="str", help="Loading features from.." )
|
mitian@17
|
62 op.add_option('-a', '--annotations', action="store", dest="GT", default='/Volumes/c4dm-03/people/mit/annotation/qupujicheng/lowercase', type="str", help="Loading annotation files from.. ")
|
mitian@17
|
63 op.add_option('-d', '--dataset', action="store", dest="DATASET", default='qupujicheng', type="str", help="Specify datasets")
|
mitian@17
|
64 op.add_option('-o', '--output', action="store", dest="OUTPUT", default=' /Users/mitian/Documents/experiments/mit/gmm/gammatone_fft/qupujicheng', type="str", help="Loading annotation files from.. ")
|
mitian@17
|
65 op.add_option('-c', '--cache', action="store", dest="CACHE", default='/Users/mitian/Documents/experiments/mit/gmm/gammatone_fft/qupujicheng', type="str", help="Saving temporary cache files to.. ")
|
mitian@17
|
66 op.add_option('-n', '--name', action="store", dest="NAME", default=None, type="str", help="Save output under the name..")
|
mitian@17
|
67
|
mitian@17
|
68 # Plot/print/mode options
|
mitian@17
|
69 op.add_option('-b', '--boundary-method', action="store_true", dest="BOUNDARY_ALL", default=False, help="Use all boundary method.")
|
mitian@17
|
70 op.add_option('-p', '--plot', action="store_true", dest="PLOT", default=False, help="Save plots")
|
mitian@17
|
71 op.add_option('-t', '--test-mode', action="store_true", dest="TEST", default=False, help="Test mode")
|
mitian@17
|
72 op.add_option('-v', '--verbose-mode', action="store_true", dest="VERBOSE", default=False, help="Print results in verbose mode.")
|
mitian@17
|
73
|
mitian@17
|
74 return op.parse_args()
|
mitian@17
|
75 options, args = parse_args()
|
mitian@17
|
76
|
mitian@17
|
77 class AudioObj():
|
mitian@17
|
78 __slots__ = ['name', 'feature_list', 'gt', 'label', 'features1', 'features2', 'ssm_timestamps']
|
mitian@17
|
79
|
mitian@17
|
80 class EvalObj():
|
mitian@17
|
81 __slots__ = ['TP', 'FP', 'FN', 'P', 'R', 'F', 'AD', 'DA', 'detection']
|
mitian@17
|
82
|
mitian@17
|
83
|
mitian@17
|
84 class Seg(object):
|
mitian@17
|
85 '''The main segmentation object'''
|
mitian@17
|
86 def __init__(self):
|
mitian@17
|
87 self.SampleRate = 44100
|
mitian@17
|
88 self.NqHz = self.SampleRate/2
|
mitian@17
|
89 self.timestamp = []
|
mitian@17
|
90 self.previousSample = 0.0
|
mitian@17
|
91 self.featureWindow = 6.0
|
mitian@17
|
92 self.featureStep = 3.0
|
mitian@17
|
93 self.kernel_size = 100 # Adjust this param according to the feature resolution.pq
|
mitian@17
|
94 self.blockSize = 2048
|
mitian@17
|
95 self.stepSize = 1024
|
mitian@17
|
96
|
mitian@17
|
97 '''NOTE: Match the following params with those used for feature extraction!'''
|
mitian@17
|
98
|
mitian@17
|
99 '''NOTE: Unlike spectrogram ones, Gammatone features are extracted without taking an FFT. The windowing is done under the purpose of chunking
|
mitian@17
|
100 the audio to facilitate the gammatone filtering with the specified blockSize and stepSize. The resulting gammatonegram is aggregated every
|
mitian@17
|
101 gammatoneLen without overlap.'''
|
mitian@17
|
102 self.gammatoneLen = 2048
|
mitian@17
|
103 self.gammatoneBandGroups = [0, 2, 6, 10, 13, 17, 20]
|
mitian@17
|
104 self.nGammatoneBands = 20
|
mitian@17
|
105 self.lowFreq = 100
|
mitian@17
|
106 self.highFreq = self.SampleRate / 4
|
mitian@17
|
107
|
mitian@17
|
108 '''Settings for extracting tempogram features.'''
|
mitian@17
|
109 self.tempoWindow = 6.0
|
mitian@17
|
110 self.bpmBands = [30, 45, 60, 80, 100, 120, 180, 240, 400, 600]
|
mitian@17
|
111
|
mitian@17
|
112 '''Peak picking settings for novelty based method'''
|
mitian@17
|
113 self.threshold = 30
|
mitian@17
|
114 self.confidence_threshold = 0.5
|
mitian@17
|
115 self.delta_threshold = 0.0
|
mitian@17
|
116 self.backtracking_threshold = 1.9
|
mitian@17
|
117 self.polyfitting_on = True
|
mitian@17
|
118 self.medfilter_on = True
|
mitian@17
|
119 self.LPfilter_on = True
|
mitian@17
|
120 self.whitening_on = False
|
mitian@17
|
121 self.aCoeffs = [1.0000, -0.5949, 0.2348]
|
mitian@17
|
122 self.bCoeffs = [0.1600, 0.3200, 0.1600]
|
mitian@17
|
123 self.cutoff = 0.34
|
mitian@17
|
124 self.medianWin = 7
|
mitian@17
|
125
|
mitian@17
|
126
|
mitian@17
|
127 def pairwiseF(self, annotation, detection, tolerance=3.0, combine=1.0, idx2time=None):
|
mitian@17
|
128 '''Pairwise F measure evaluation of detection rates.'''
|
mitian@17
|
129
|
mitian@17
|
130 res = EvalObj()
|
mitian@17
|
131 res.TP, res.FP, res.FN = 0, 0, 0
|
mitian@17
|
132 res.P, res.R, res.F = 0.0, 0.0, 0.0
|
mitian@17
|
133 res.AD, res.DA = 0.0, 0.0
|
mitian@17
|
134
|
mitian@17
|
135 if len(detection) == 0:
|
mitian@17
|
136 return res
|
mitian@18
|
137
|
mitian@17
|
138 if idx2time != None:
|
mitian@17
|
139 # Map detected idxs to real time
|
mitian@17
|
140 detection.sort()
|
mitian@17
|
141 if detection[-1] >= len(idx2time):
|
mitian@18
|
142 detection = detection[:-len(np.array(detection)[np.array(detection)-len(idx2time)>=0])]
|
mitian@17
|
143 detection = [idx2time[int(i)] for i in detection]
|
mitian@17
|
144 detection = np.append(detection, annotation[-1])
|
mitian@17
|
145 res.detection = detection
|
mitian@18
|
146
|
mitian@17
|
147 gt = len(annotation) # Total number of ground truth data points
|
mitian@17
|
148 dt = len(detection) # Total number of experimental data points
|
mitian@17
|
149 foundIdx = []
|
mitian@17
|
150 D_AD = np.zeros(gt)
|
mitian@17
|
151 D_DA = np.zeros(dt)
|
mitian@17
|
152
|
mitian@17
|
153 for dtIdx in xrange(dt):
|
mitian@17
|
154 D_DA[dtIdx] = np.min(abs(detection[dtIdx] - annotation))
|
mitian@17
|
155
|
mitian@17
|
156 for gtIdx in xrange(gt):
|
mitian@17
|
157 D_AD[gtIdx] = np.min(abs(annotation[gtIdx] - detection))
|
mitian@17
|
158 for dtIdx in xrange(dt):
|
mitian@17
|
159 if (annotation[gtIdx] >= detection[dtIdx] - tolerance/2.0) and (annotation[gtIdx] <= detection[dtIdx] + tolerance/2.0):
|
mitian@17
|
160 foundIdx.append(gtIdx)
|
mitian@17
|
161 continue
|
mitian@17
|
162 foundIdx = list(set(foundIdx))
|
mitian@17
|
163 res.TP = len(foundIdx)
|
mitian@17
|
164 # res.FP = dt - res.TP
|
mitian@17
|
165 res.FP = max(0, dt - res.TP)
|
mitian@17
|
166 res.FN = gt - res.TP
|
mitian@17
|
167
|
mitian@17
|
168 res.AD = np.mean(D_AD)
|
mitian@17
|
169 res.DA = np.mean(D_DA)
|
mitian@17
|
170
|
mitian@17
|
171 if res.TP == 0:
|
mitian@17
|
172 return res
|
mitian@17
|
173
|
mitian@17
|
174 res.P = res.TP / float(res.TP+res.FP)
|
mitian@17
|
175 res.R = res.TP / float(res.TP+res.FN)
|
mitian@17
|
176 res.F = 2 * res.P * res.R / (res.P + res.R)
|
mitian@17
|
177 return res
|
mitian@17
|
178
|
mitian@17
|
179 def writeVerifiedHeader(self, filename):
|
mitian@17
|
180 '''Write header of output files for verified segmentation.'''
|
mitian@17
|
181
|
mitian@17
|
182 with open(filename, 'a') as f:
|
mitian@17
|
183 csvwriter = csv.writer(f, delimiter=',')
|
mitian@17
|
184 csvwriter.writerow(['audio', 'novelty_05_TP', 'novelty_05_FP', 'novelty_05_FN', 'novelty_05_P', 'novelty_05_R', 'novelty_05_F', 'novelty_05_AD', 'novelty_05_DA', 'novelty_3_TP', 'novelty_3_FP', 'novelty_3_FN', 'novelty_3_P', 'novelty_3_R', 'novelty_3_F', 'novelty_3_AD', 'novelty_3_DA',\
|
mitian@17
|
185 'verified_novelty_05_TP', 'verified_novelty_05_FP', 'verified_novelty_05_FN', 'verified_novelty_05_P', 'verified_novelty_05_R', 'verified_novelty_05_F', 'verified_novelty_05_AD', 'verified_novelty_05_DA', 'verified_novelty_3_TP', 'verified_novelty_3_FP',\
|
mitian@17
|
186 'gt_verified_3_FN'])
|
mitian@17
|
187
|
mitian@17
|
188 def writeVerifiedRes(self, filename, ao_name, novelty_05, novelty_3, verified_novelty_05, verified_novelty_3):
|
mitian@17
|
189 '''Write result of single detection for verified segmentation.'''
|
mitian@17
|
190
|
mitian@17
|
191 with open(filename, 'a') as f:
|
mitian@17
|
192 csvwriter = csv.writer(f, delimiter=',')
|
mitian@17
|
193 csvwriter.writerow([ao_name, novelty_05.TP, novelty_05.FP, novelty_05.FN, novelty_05.P, novelty_05.R, novelty_05.F, novelty_05.AD, novelty_05.DA, novelty_3.TP, novelty_3.FP, novelty_3.FN, novelty_3.P, novelty_3.R,\
|
mitian@17
|
194 novelty_3.F, novelty_3.AD, novelty_3.DA, verified_novelty_05.TP, verified_novelty_05.FP, verified_novelty_05.FN, verified_novelty_05.P, verified_novelty_05.R, verified_novelty_05.F, verified_novelty_05.AD, verified_novelty_05.DA,
|
mitian@17
|
195 verified_novelty_3.TP, verified_novelty_3.FP, verified_novelty_3.FN, verified_novelty_3.P, verified_novelty_3.R, verified_novelty_3.F, verified_novelty_3.AD, verified_novelty_3.DA])
|
mitian@17
|
196
|
mitian@17
|
197 def localBoundaries(self, bound_idx, feature, confidence, peak_picker, thresh=0.3, tol=3, metric='novelty'):
|
mitian@17
|
198 '''Detect local bounderies within fixed-len window around a boudary from the first round detection.
|
mitian@17
|
199 args: bound_idx: index of boundary condidate for local inspection
|
mitian@17
|
200 feature: an alternative feature for local pairwise distance measuring
|
mitian@17
|
201 confidence: a list of confidence values assigned to all boundary candidates.
|
mitian@17
|
202 thresh: threshold for boundary confidence
|
mitian@17
|
203 tol: window length (L = 2*tol + 1) for extracting local features (unit=s)
|
mitian@17
|
204 metric: 'novelty' (default), 'sf', 'cnmf'
|
mitian@17
|
205 '''
|
mitian@17
|
206
|
mitian@17
|
207 local_boundaries = []
|
mitian@17
|
208 smoothed_local_novelty = None
|
mitian@17
|
209
|
mitian@17
|
210 tol_win = float(tol) / self.stepSize * self.SampleRate
|
mitian@17
|
211
|
mitian@17
|
212 if confidence[bound_idx] < thresh:
|
mitian@17
|
213 return local_boundaries, smoothed_local_novelty
|
mitian@17
|
214 # print 'bound_idx', bound_idx, len(confidence), feature.shape
|
mitian@17
|
215
|
mitian@17
|
216 # If the boundary is of high relative confidence, keep it anyway
|
mitian@17
|
217 if (1 < bound_idx < len(confidence)-1):
|
mitian@17
|
218 if confidence[bound_idx-1] / thresh <= confidence[bound_idx] and confidence[bound_idx-1] / thresh <= confidence[bound_idx]:
|
mitian@17
|
219 local_boundaries.append(bound_idx)
|
mitian@17
|
220 pre_win = np.max([int(bound_idx-tol_win), 0])
|
mitian@17
|
221 post_win = np.min([int(bound_idx+tol_win), len(confidence)])
|
mitian@17
|
222
|
mitian@17
|
223 local_feature = feature[pre_win: post_win, :]
|
mitian@17
|
224 local_ssm = pairwise_distances(local_feature, metric='cosine')
|
mitian@17
|
225
|
mitian@17
|
226 if metric == 'novelty': local_novelty, smoothed_local_novelty, local_idxs = novelty_S.process(local_ssm, peak_picker, 48)
|
mitian@17
|
227 elif metric == 'sf': nc, local_idxs = sf_S.segmentation(local_ssm)
|
mitian@17
|
228 elif metric == 'cnmf': G, local_idxs = cnmf_S.segmentation(local_ssm)
|
mitian@17
|
229
|
mitian@17
|
230 if local_idxs: local_idxs += map(lambda x: x+bound_idx, local_idxs)
|
mitian@17
|
231 # print local_idxs
|
mitian@17
|
232
|
mitian@17
|
233 return list(set(local_idxs)), smoothed_local_novelty
|
mitian@17
|
234
|
mitian@17
|
235 def verifyPeaks(self, bound_idx, second_detection, thresh=0.6, tol = 3, second_detection_conf=None):
|
mitian@17
|
236 '''Pruning second round detection.
|
mitian@17
|
237 args: bound_idx: index of boundary condidate for local inspection
|
mitian@17
|
238 second_detection: a list of peaks detected in the second round near a boudary candidates.
|
mitian@17
|
239 thresh: confidence threshold for discarding detection
|
mitian@17
|
240 second_detection_conf: a list of confidence values assigned to all local peaks.
|
mitian@17
|
241 '''
|
mitian@17
|
242
|
mitian@17
|
243 tol_win = float(tol) / self.stepSize * self.SampleRate
|
mitian@17
|
244
|
mitian@17
|
245 # Select peak with the highest local confidence
|
mitian@17
|
246 if second_detection_conf:
|
mitian@17
|
247 if np.max(second_detection_conf) > thresh:
|
mitian@17
|
248 verified = bound_idx + np.argmax(second_detection_conf) - tol_win
|
mitian@17
|
249 return verified
|
mitian@17
|
250 else:
|
mitian@17
|
251 return None
|
mitian@17
|
252
|
mitian@17
|
253 # Select peak closest to the 1st round detection
|
mitian@17
|
254 elif second_detection:
|
mitian@17
|
255 # pos = np.argmin(abs(np.array(second_detection)-bound_idx))
|
mitian@17
|
256 pos = int(np.mean(np.where(abs(np.array(second_detection)-bound_idx) == abs(np.array(second_detection)-bound_idx).min())[0]))
|
mitian@17
|
257 verified_peak = second_detection[pos]
|
mitian@17
|
258 return verified_peak
|
mitian@17
|
259
|
mitian@17
|
260 # No peak is secured around bound_idx in the second round verification
|
mitian@17
|
261 else:
|
mitian@17
|
262 return None
|
mitian@17
|
263
|
mitian@18
|
264 def secondRoundDetection(self, peak_candidates, candidates_conf, feature, peak_verifier=None, tol=3, thresh1=0.4, thresh2=0.5):
|
mitian@17
|
265 '''Second round detection.'''
|
mitian@17
|
266
|
mitian@17
|
267 peaks = []
|
mitian@17
|
268
|
mitian@17
|
269 tol_win = float(tol) / self.stepSize * self.SampleRate
|
mitian@17
|
270 for i, x in enumerate(peak_candidates):
|
mitian@18
|
271 # Bypass peak candidates with low confidence
|
mitian@18
|
272 if candidates_conf[x] < thresh1: continue
|
mitian@17
|
273
|
mitian@18
|
274 # If with high confidence, keep it straight
|
mitian@18
|
275 if candidates_conf[x] > 0.7:
|
mitian@18
|
276 peaks.append(x)
|
mitian@18
|
277 continue
|
mitian@18
|
278
|
mitian@18
|
279 # 2nd round detection for questionable peaks
|
mitian@17
|
280 pre_win = np.max([int(x-tol_win), 0])
|
mitian@17
|
281 post_win = np.min([int(x+tol_win), len(candidates_conf)])
|
mitian@17
|
282 if pre_win == post_win: continue
|
mitian@17
|
283
|
mitian@17
|
284 local_feature = feature[pre_win: post_win, :]
|
mitian@17
|
285 local_ssm = pairwise_distances(local_feature, metric='cosine')
|
mitian@17
|
286
|
mitian@18
|
287 local_conf, local_peaks = novelty_S.process(local_ssm,peak_verifier,100)[-2:]
|
mitian@17
|
288 if len(local_peaks)==0:
|
mitian@17
|
289 continue
|
mitian@17
|
290
|
mitian@17
|
291 local_conf = normaliseArray(local_conf)
|
mitian@17
|
292
|
mitian@17
|
293 # Keep the one detected from the 2nd around with the highest confidence as final peak
|
mitian@18
|
294 # if np.max(local_conf[local_peaks]) > thresh2:
|
mitian@18
|
295 if local_conf[tol_win] >= thresh2: #v2
|
mitian@17
|
296 local_bound = x - tol_win + np.argmax(local_conf)
|
mitian@17
|
297 peaks.append(np.rint(local_bound))
|
mitian@18
|
298
|
mitian@18
|
299 # remove very closely located peaks (duplicates from different rounds of detection)
|
mitian@18
|
300
|
mitian@17
|
301 peaks.sort()
|
mitian@17
|
302 return peaks
|
mitian@17
|
303
|
mitian@17
|
304 def process(self):
|
mitian@17
|
305 '''Load precomputed features for all audio samples and make segmentation calls.'''
|
mitian@17
|
306
|
mitian@17
|
307 # peak_picker for the 1st round boudary detection
|
mitian@17
|
308 peak_picker = PeakPicker()
|
mitian@17
|
309 peak_picker.params.alpha = 9.0 # Alpha norm
|
mitian@17
|
310 peak_picker.params.delta = self.delta_threshold # Adaptive thresholding delta
|
mitian@17
|
311 peak_picker.params.QuadThresh_a = (100 - self.threshold) / 1000.0
|
mitian@17
|
312 peak_picker.params.QuadThresh_b = 0.0
|
mitian@17
|
313 peak_picker.params.QuadThresh_c = (100 - self.threshold) / 1500.0
|
mitian@17
|
314 peak_picker.params.rawSensitivity = 20
|
mitian@17
|
315 peak_picker.params.aCoeffs = self.aCoeffs
|
mitian@17
|
316 peak_picker.params.bCoeffs = self.bCoeffs
|
mitian@17
|
317 peak_picker.params.preWin = self.medianWin
|
mitian@17
|
318 peak_picker.params.postWin = self.medianWin + 1
|
mitian@17
|
319 peak_picker.params.LP_on = self.LPfilter_on
|
mitian@17
|
320 peak_picker.params.Medfilt_on = self.medfilter_on
|
mitian@17
|
321 peak_picker.params.Polyfit_on = self.polyfitting_on
|
mitian@17
|
322 peak_picker.params.isMedianPositive = False
|
mitian@17
|
323
|
mitian@17
|
324 # peak_picker for the second round boudary verification (might need lower sensitivity)
|
mitian@17
|
325 peak_verifier = PeakPicker()
|
mitian@17
|
326 peak_verifier.params.alpha = 9.0 # Alpha norm
|
mitian@17
|
327 peak_verifier.params.delta = self.delta_threshold # Adaptive thresholding delta
|
mitian@18
|
328 peak_verifier.params.QuadThresh_a = (100 - 20) / 1000.0
|
mitian@17
|
329 peak_verifier.params.QuadThresh_b = 0.0
|
mitian@18
|
330 peak_verifier.params.QuadThresh_c = (100 - 20) / 1500.0
|
mitian@18
|
331 peak_verifier.params.rawSensitivity = 20
|
mitian@17
|
332 peak_verifier.params.aCoeffs = self.aCoeffs
|
mitian@17
|
333 peak_verifier.params.bCoeffs = self.bCoeffs
|
mitian@18
|
334 peak_verifier.params.preWin = 20
|
mitian@18
|
335 peak_verifier.params.postWin = 20 + 1
|
mitian@17
|
336 peak_verifier.params.LP_on = self.LPfilter_on
|
mitian@17
|
337 peak_verifier.params.Medfilt_on = self.medfilter_on
|
mitian@17
|
338 peak_verifier.params.Polyfit_on = self.polyfitting_on
|
mitian@17
|
339 peak_verifier.params.isMedianPositive = False
|
mitian@17
|
340
|
mitian@17
|
341 # Getting aggregated features.
|
mitian@17
|
342 featureRate = float(self.SampleRate) / self.stepSize
|
mitian@17
|
343 aggregation_window, aggregation_step = 20, 10
|
mitian@18
|
344 tempo_step = 0.204995725 * featureRate
|
mitian@18
|
345
|
mitian@17
|
346 audio_files = [x for x in os.listdir(options.GT) if not x.startswith(".") ]
|
mitian@17
|
347 audio_files.sort()
|
mitian@17
|
348 if options.TEST:
|
mitian@18
|
349 audio_files = audio_files[:2]
|
mitian@17
|
350 audio_list = []
|
mitian@17
|
351
|
mitian@17
|
352 # Use mfccs feature 1st round segmentation (coarse)
|
mitian@18
|
353 feature_list1 =['mfcc_harmonic']
|
mitian@18
|
354 # feature_list1 = ['ti_15_30_4_03_05', 'tir_15_30_4_03_05']
|
mitian@17
|
355 feature_list1 = [join(options.F1, f) for f in feature_list1]
|
mitian@18
|
356 # feature_list2 = ['gt_stft_lpc']
|
mitian@18
|
357 feature_list2 = ['chromagram_harmonic']
|
mitian@17
|
358 feature_list2 = [join(options.F2, f) for f in feature_list2]
|
mitian@17
|
359
|
mitian@17
|
360 # Prepare output files.
|
mitian@18
|
361 outfile1 = join(options.OUTPUT, 'tbhm_verified_novelty_tol1th103th205_07_v2.csv')
|
mitian@17
|
362 self.writeVerifiedHeader(outfile1)
|
mitian@17
|
363
|
mitian@17
|
364 # For each audio file, load specific features
|
mitian@17
|
365 for audio in audio_files:
|
mitian@17
|
366 ao = AudioObj()
|
mitian@17
|
367 ao.name = splitext(audio)[0]
|
mitian@17
|
368
|
mitian@17
|
369 # Load annotations for specified audio collection.
|
mitian@17
|
370 if options.DATASET == 'qupujicheng':
|
mitian@17
|
371 annotation_file = join(options.GT, ao.name+'.csv') # qupujicheng
|
mitian@17
|
372 ao.gt = np.genfromtxt(annotation_file, usecols=0, delimiter=',')
|
mitian@17
|
373 ao.label = np.genfromtxt(annotation_file, usecols=1, delimiter=',', dtype=str)
|
mitian@17
|
374 elif options.DATASET == 'salami':
|
mitian@17
|
375 annotation_file = join(options.GT, ao.name+'.txt') # iso, salami
|
mitian@17
|
376 ao.gt = np.genfromtxt(annotation_file, usecols=0)
|
mitian@17
|
377 ao.label = np.genfromtxt(annotation_file, usecols=1, dtype=str)
|
mitian@17
|
378 else:
|
mitian@17
|
379 annotation_file = join(options.GT, ao.name+'.lab') # beatles
|
mitian@17
|
380 ao.gt = np.genfromtxt(annotation_file, usecols=(0,1))
|
mitian@17
|
381 ao.gt = np.unique(np.ndarray.flatten(ao.gt))
|
mitian@17
|
382 ao.label = np.genfromtxt(annotation_file, usecols=2, dtype=str)
|
mitian@17
|
383
|
mitian@17
|
384 featureset1, featureset2 = [], []
|
mitian@17
|
385 # Features for 1st round segmentation
|
mitian@17
|
386 for feature in feature_list1:
|
mitian@17
|
387 for f in os.listdir(feature):
|
mitian@17
|
388 if f[:f.find('_vamp')]==ao.name:
|
mitian@18
|
389 ao.ssm_timestamps = np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[1:,0]
|
mitian@18
|
390 featureset1.append(np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[1:,1:14])
|
mitian@17
|
391 break
|
mitian@17
|
392 if len(feature_list1) > 1:
|
mitian@17
|
393 n_frame = np.min([x.shape[0] for x in featureset1])
|
mitian@17
|
394 featureset1 = [x[:n_frame,:] for x in featureset1]
|
mitian@17
|
395 ao.features1 = np.hstack((featureset1))
|
mitian@17
|
396 else:
|
mitian@17
|
397 ao.features1 = featureset1[0]
|
mitian@17
|
398
|
mitian@18
|
399 # ao.ssm_timestamps = np.arange(0, ao.gt[-1], float(self.stepSize)/self.SampleRate)
|
mitian@17
|
400
|
mitian@17
|
401 # Features for 2nd round verification
|
mitian@17
|
402 for feature in feature_list2:
|
mitian@17
|
403 for f in os.listdir(feature):
|
mitian@17
|
404 if f[:f.find('_vamp')]==ao.name:
|
mitian@18
|
405 featureset2.append(np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[:,1:14])
|
mitian@17
|
406 break
|
mitian@17
|
407 if len(feature_list2) > 1:
|
mitian@17
|
408 n_frame = np.min([x.shape[0] for x in featureset2])
|
mitian@17
|
409 featureset2 = [x[:n_frame,:] for x in featureset2]
|
mitian@17
|
410 ao.features2 = np.hstack((featureset2))
|
mitian@17
|
411 else:
|
mitian@17
|
412 ao.features2 = featureset2[0]
|
mitian@17
|
413
|
mitian@17
|
414
|
mitian@17
|
415 pca = PCA(n_components=6)
|
mitian@17
|
416
|
mitian@17
|
417 # Normalise features.
|
mitian@17
|
418 # Note that tempogram features are extracted from a different stepsize as the others.
|
mitian@17
|
419 step = ao.features1.shape[0] / aggregation_step
|
mitian@17
|
420 ao.features1 = resample(ao.features1, step)
|
mitian@17
|
421 ao.features1 = normaliseFeature(ao.features1)
|
mitian@18
|
422 ao.features2 = normaliseFeature(ao.features2)
|
mitian@17
|
423
|
mitian@17
|
424 pca.fit(ao.features1)
|
mitian@17
|
425 ao.features1 = pca.transform(ao.features1)
|
mitian@18
|
426 pca.fit(ao.features2)
|
mitian@18
|
427 ao.features2 = pca.transform(ao.features2)
|
mitian@18
|
428
|
mitian@18
|
429
|
mitian@17
|
430 ssm1 = getSSM(ao.features1)
|
mitian@17
|
431
|
mitian@17
|
432 ssm1_timestamps = ao.ssm_timestamps[::aggregation_step]
|
mitian@18
|
433
|
mitian@17
|
434 # Take care with this! It gains memory pressure when processing large dataset.
|
mitian@17
|
435 # audio_list.append(ao)
|
mitian@17
|
436
|
mitian@17
|
437 # Segment the music at a coarse scale
|
mitian@17
|
438 # Within the neighborhood of each peak candidate, verify the boundary location at a finer scale.
|
mitian@17
|
439 novelty, smoothed_novelty, novelty_idxs = novelty_S.process(ssm1, peak_picker, self.kernel_size)
|
mitian@17
|
440
|
mitian@17
|
441 novelty_05 = self.pairwiseF(ao.gt, novelty_idxs, tolerance=0.5, combine=1.0, idx2time=ssm1_timestamps)
|
mitian@17
|
442 novelty_3 = self.pairwiseF(ao.gt, novelty_idxs, tolerance=3, combine=1.0, idx2time=ssm1_timestamps)
|
mitian@17
|
443
|
mitian@17
|
444 # Verification using different features at a finer scale.
|
mitian@17
|
445 # Map to the orginal time scale
|
mitian@17
|
446 peak_candidates = np.array(map(lambda x: int(np.rint(x*aggregation_step)), novelty_idxs))
|
mitian@18
|
447 # peak_candidates = np.array(map(lambda x: int((np.rint(x*tempo_step))), novelty_idxs))
|
mitian@18
|
448
|
mitian@17
|
449 peak_conf = normaliseArray(smoothed_novelty)
|
mitian@17
|
450 if options.VERBOSE:
|
mitian@17
|
451 np.savetxt(join(options.CACHE, ao.name+'-sn-raw.txt'), np.array(zip(ssm1_timestamps, peak_conf)), delimiter=',')
|
mitian@17
|
452 peak_conf = zoom(peak_conf, aggregation_step)
|
mitian@18
|
453 # peak_conf = zoom(peak_conf, tempo_step)
|
mitian@17
|
454 peak_candidates = peak_candidates[:len(peak_conf)]
|
mitian@17
|
455
|
mitian@18
|
456 verified_novelty_idxs = self.secondRoundDetection(peak_candidates, peak_conf, ao.features2, peak_verifier, tol=1, thresh1=0.3, thresh2=0.5)
|
mitian@17
|
457 verified_novelty_idxs = list(set(verified_novelty_idxs))
|
mitian@17
|
458
|
mitian@17
|
459 verified_novelty_05 = self.pairwiseF(ao.gt, verified_novelty_idxs, tolerance=0.5, combine=1.0, idx2time=ao.ssm_timestamps)
|
mitian@17
|
460 verified_novelty_3 = self.pairwiseF(ao.gt, verified_novelty_idxs, tolerance=3, combine=1.0, idx2time=ao.ssm_timestamps)
|
mitian@17
|
461
|
mitian@17
|
462 # Write results.
|
mitian@17
|
463 self.writeVerifiedRes(outfile1, ao.name, novelty_05, novelty_3, verified_novelty_05, verified_novelty_3)
|
mitian@17
|
464
|
mitian@17
|
465 if options.VERBOSE:
|
mitian@17
|
466 print ao.name, novelty_3.TP, novelty_3.FP, novelty_3.FN, novelty_3.P, novelty_3.R, novelty_3.F, verified_novelty_3.TP, verified_novelty_3.FP, verified_novelty_3.FN, verified_novelty_3.P, verified_novelty_3.R, verified_novelty_3.F
|
mitian@17
|
467 np.savetxt(join(options.CACHE, ao.name+'-raw.txt'), novelty_3.detection, delimiter=',')
|
mitian@17
|
468 np.savetxt(join(options.CACHE, ao.name+'-verified.txt'), verified_novelty_3.detection, delimiter=',')
|
mitian@17
|
469
|
mitian@17
|
470 def main():
|
mitian@17
|
471 segmenter = Seg()
|
mitian@17
|
472 segmenter.process()
|
mitian@17
|
473
|
mitian@17
|
474
|
mitian@17
|
475 if __name__ == '__main__':
|
mitian@17
|
476 main()
|
mitian@17
|
477
|