Mercurial > hg > chourdakisreiss2016
comparison experiment-reverb/code/panagiotakis.py @ 0:246d5546657c
initial commit, needs cleanup
author | Emmanouil Theofanis Chourdakis <e.t.chourdakis@qmul.ac.uk> |
---|---|
date | Wed, 14 Dec 2016 13:15:48 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:246d5546657c |
---|---|
1 # -*- coding: utf-8 -*- | |
2 """ | |
3 Created on Mon Jun 1 17:17:34 2015 | |
4 | |
5 @author: mmxgn | |
6 """ | |
7 | |
8 | |
9 from sys import argv | |
10 | |
11 | |
12 | |
13 def trmszc(audio, SR=21000): | |
14 # Segmentation based on 1 second segmentation | |
15 print("[II] Calculating features for %s, please wait..." % fname) | |
16 | |
17 rmsval = [] | |
18 zcrval = [] | |
19 rmszcrval = [] | |
20 meanrms = [] | |
21 varrms = [] | |
22 meanzcr = [] | |
23 varzcr = [] | |
24 histograms = [] | |
25 | |
26 | |
27 | |
28 # Parameters for the chi square distribution | |
29 alphas = [] | |
30 betas = [] | |
31 L = int(len(audio)/SR) | |
32 for n in range(0, L): | |
33 segment = audio[n*SR:(n+1)*SR] | |
34 framerms = [] | |
35 framezcr = [] | |
36 # for frame in FrameGenerator(segment, frameSize = frameSize, hopSize = frameSize): | |
37 for k in range(0, 50): | |
38 frame = segment[0.02*SR*k:0.02*SR*(k+1)] | |
39 | |
40 rmsv = sqrt(sum(frame**2)) | |
41 framerms.append(rmsv) | |
42 f1 = frame[0:-1] | |
43 f2 = frame[1:] | |
44 mm1 = f1*f2 | |
45 zc = 0.5*sum((abs(sign(mm1))-sign(mm1))) | |
46 | |
47 rmsval.append(rmsv) | |
48 zcrval.append(zc) | |
49 rmszcrval.append(rmsv*zc) | |
50 framezcr.append(zc) | |
51 | |
52 meanrms.append(mean(framerms)) | |
53 meanzcr.append(mean(framezcr)) | |
54 | |
55 mu = mean(framerms) | |
56 sigmasq = var(framerms, ddof=1) | |
57 | |
58 beta = sigmasq/mu | |
59 alpha = (mu/beta) - 1 | |
60 | |
61 if mu != 0 and sigmasq != 0: | |
62 betas.append(beta) | |
63 alphas.append(alpha) | |
64 else: | |
65 betas.append(1000000) | |
66 alphas.append(-1) | |
67 | |
68 histograms.append(histogram(framerms, 256)) | |
69 | |
70 varrms.append(var(framerms, ddof=1)) | |
71 varzcr.append(var(framerms, ddof=1)) | |
72 | |
73 | |
74 # Computation of KVRMS | |
75 | |
76 l = len(rmsval) | |
77 ftm = [mean(rmsval[0:50])] | |
78 ftv = [var(rmsval[0:50], ddof=1)] | |
79 kvrms = [0] | |
80 | |
81 for i in range(1,l-50): | |
82 ftm_ = ftm[i-1]+(rmsval[i+49] - rmsval[i-1])/50 | |
83 ftv_ = ftv[i-1]+ftm[i-1]**2 + ((rmsval[i+49]**2-rmsval[i-1]**2)/50) - ftm_**2 | |
84 ftm.append(ftm_) | |
85 ftv.append(ftv_) | |
86 | |
87 if ftm_ != 0: | |
88 kvrms.append(ftv_/ftm_**2) | |
89 else: | |
90 kvrms.append(0) | |
91 | |
92 | |
93 m = mean(kvrms) | |
94 v = var(kvrms, ddof=1) | |
95 | |
96 b = v/m | |
97 a = m/b - 1 | |
98 | |
99 # Computing Similarity | |
100 | |
101 sim = [] | |
102 from scipy.special import gamma | |
103 | |
104 for i in range(1,L): | |
105 k = (alphas[i]+alphas[i-1]+2)/2 | |
106 sim_ = ((2/(betas[i-1]+betas[i]))**k)*(betas[i-1]**((alphas[i]+1)/2))*(betas[i]**((alphas[i-1]+1)/2))*gamma(k)/(sqrt(gamma(alphas[i-1]+1)*gamma(alphas[i]+1))) | |
107 | |
108 # 0 If overflow | |
109 if isinf(sim_) or isnan(sim_): | |
110 sim_ = 0 | |
111 sim.append(sim_) | |
112 | |
113 sim2 = [] | |
114 | |
115 for i in range(2,L): | |
116 k = (alphas[i]+alphas[i-2]+2)/2 | |
117 sim_ = ((2/(betas[i-2]+betas[i]))**k)*(betas[i-2]**((alphas[i]+1)/2))*(betas[i]**((alphas[i-2]+1)/2))*gamma(k)/(sqrt(gamma(alphas[i-2]+1)*gamma(alphas[i]+1))) | |
118 | |
119 if isinf(sim_) or isnan(sim_): | |
120 sim_ = 0 | |
121 sim2.append(sim_) | |
122 | |
123 | |
124 p = lambda x, a, b: x**a*exp(-b*x)/b**(a+1)/gamma(a+1) | |
125 # | |
126 rho = lambda alpha1,beta1,alpha2,beta2: gamma((alpha1+alpha2)/2 + 1)/sqrt(gamma(alpha1+1)*gamma(alpha2+1))*2**((alpha1+alpha2)/2+1)*beta1**((alpha2+1)/2)*beta2**((alpha1+1)/2)/(beta1+beta2)**((alpha1+alpha2)/2+1) | |
127 | |
128 | |
129 P = [0] | |
130 for i in range(1, L-1): | |
131 P_ = (1-sim2[i-1])*(1-sim[i-1]+1-sim[i])*(1-sim2[i-1]) | |
132 P_ = (1-sim2[i-1]) | |
133 P.append(P_) | |
134 | |
135 | |
136 P.append(0) | |
137 P = array(P) | |
138 | |
139 M = mean(P) | |
140 V1 = mean(abs(P[0:L-3]-P[1:L-2])) | |
141 V2 = mean(abs(P[0:L-4]-P[2:L-2])) | |
142 # V = (V1+V2)*0.25; | |
143 V = 0.25*median(abs((P[0:L-4]-P[2:L-2]))) | |
144 | |
145 Pd = [P[0],P[1]] | |
146 | |
147 | |
148 for i in range(2,L-2): | |
149 m = mean(concatenate((P[i-2:i],P[i+1:i+3]))) | |
150 m1 = max(concatenate((P[i-2:i],P[i+1:i+3]))) | |
151 | |
152 if m<0.1*M: | |
153 m1 = 0.1*M | |
154 | |
155 d = (P[i]-m)/m1 | |
156 | |
157 if d < 0: | |
158 d = 0 | |
159 Pd.append(P[i]*d/V) | |
160 | |
161 Pd.append(P[L-2]) | |
162 Pd.append(P[L-1]) | |
163 | |
164 | |
165 # Selection of candidate windows where there is a change | |
166 | |
167 j = 0 | |
168 pos = [] | |
169 for i in range(2, L-3): | |
170 if Pd[i] > 5 and P[i] > P[i+1] and P[i] > P[i-1]: | |
171 j += 1 | |
172 pos.append(i) | |
173 | |
174 if len(pos) == 0: | |
175 sys.exit(0) | |
176 | |
177 | |
178 # Computation of Change with 20 ms accuracy | |
179 | |
180 k2 = 0 | |
181 c = 25 | |
182 | |
183 Sm = zeros((2*c+51,)) | |
184 posms = zeros((len(pos),)) | |
185 msec = zeros((len(pos),)) | |
186 mpos = zeros((len(pos), )) | |
187 | |
188 for k1 in range(0,len(pos)): | |
189 i = 50*(pos[k1] - 1); # ms | |
190 for j in range(-c,50+c+1): | |
191 if ftv[i+j] != 0 and ftm[i+j] != 0: | |
192 b1 = ftv[i+j]/ftm[i+j] | |
193 a1 = ftm[i+j]/b1 - 1 | |
194 else: | |
195 b1 = 1000 | |
196 a1 = -1 | |
197 | |
198 if ftv[i+j+50-1] != 0 and ftm[i+j+50-1] != 0: | |
199 b2 = ftv[i+j+50-1]/ftm[i+j+50-1] | |
200 a2 = ftm[i+j+50-1]/b2 - 1 | |
201 else: | |
202 b2 = 1000 | |
203 a2 = -1 | |
204 | |
205 k = (a2+a1+2)/2 | |
206 Sm[j+c] = ((2/(b1+b2))**k)*(b1**((a2+1)/2))*(b2**((a1+1)/2))*gamma(k)/(sqrt(gamma(a1+1)*gamma(a2+1))) | |
207 | |
208 if isnan(Sm[j+c]) or isinf(Sm[j+c]): | |
209 Sm[j+c] = 0 | |
210 | |
211 posms[k1] = argmin(Sm) | |
212 x = Sm[posms[k1]] | |
213 | |
214 h = 1 - Sm | |
215 | |
216 if mean(h[max(posms[k1] - 25,1):min(2*c+51,posms[k1]+25)]) > 0.1: | |
217 msec[k2] = posms[k1]-1-c | |
218 mpos[k2] = pos[k1] | |
219 k2 = k2+1 | |
220 else: | |
221 msec[k2] = 0 | |
222 mpos[k2] = 0 | |
223 | |
224 | |
225 posms = msec | |
226 pos = mpos | |
227 | |
228 fpos = pos+0.02*posms | |
229 | |
230 pfpos = floor(50*pos+posms); | |
231 | |
232 bvoice = 0.14339738781836; | |
233 bmusic = 0.04399754659151; | |
234 amusic = 1.66349817725076; | |
235 avoice = 2.32677887950291; | |
236 | |
237 pfpos = concatenate((array([0]), pfpos, array([len(kvrms)-1]))) | |
238 | |
239 L = len(pfpos)-1 | |
240 V = zeros((L,2)) | |
241 Vg = zeros((L,2)) | |
242 | |
243 Pzero = [] | |
244 Fsp1 = [] | |
245 Type = zeros((L,)) | |
246 Typeg = zeros((L,)) | |
247 | |
248 | |
249 # Classification for each segment | |
250 | |
251 noise = zeros((L,)) | |
252 noise2 = zeros((L,)) | |
253 | |
254 pfpos = pfpos.astype(int) | |
255 Pmusicg = zeros((L,)) | |
256 Pvoiceg = zeros((L,)) | |
257 Pmusic = zeros((L,)) | |
258 Pvoice = zeros((L,)) | |
259 Pspace = zeros((L,)) | |
260 zpos = zeros((L,)) | |
261 Pmax = zeros((L,)) | |
262 Pmaxg = zeros((L,)) | |
263 Freq = zeros((L,6)) | |
264 | |
265 zcrval = array(zcrval) | |
266 rmsval = array(rmsval) | |
267 rmszcrval = array(rmszcrval) | |
268 i | |
269 for i in range(0, L): | |
270 d = 0 | |
271 | |
272 x1 = mean(kvrms[int(pfpos[i]):int(pfpos[i+1])+1]) | |
273 x = x1 | |
274 y = rmszcrval[int(pfpos[i])+d:int(pfpos[i+1])-d+1] | |
275 y = y/(2*max(rmsval[pfpos[i]+d:pfpos[i+1]-d+1]) - min(rmsval[pfpos[i]+d:pfpos[i+1]-d+1]) - median(rmsval[pfpos[i]+d:pfpos[i+1]-d+1])) | |
276 noise[i] = 50*mean(exp(-y)) | |
277 | |
278 Pmusic[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1)) | |
279 Pvoice[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1)) | |
280 | |
281 sm = 0.7*median(ftm[pfpos[i]:pfpos[i+1]+1])+0.3*mean(ftm[pfpos[i]:pfpos[i+1]+1]) | |
282 | |
283 Pspace[i] = 6*exp((-sm**2)/(2*0.6**2)) | |
284 | |
285 zpos[i] = sum(exp(-10*zcrval[pfpos[i]+d:pfpos[i+1]-d+1]))/float(len(zcrval[pfpos[i]+d:pfpos[i+1]-d+1])) | |
286 | |
287 if zpos[i] > 0.08 or x > 3: | |
288 Pvoice[i] = 10 | |
289 else: | |
290 Pmusic[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1)) | |
291 | |
292 V[i,0] = Pmusic[i] | |
293 V[i,1] = Pvoice[i] | |
294 Type[i] = argmax(V[i,:]) | |
295 Pmax[i] = V[i,Type[i]] | |
296 | |
297 noise2[i] = 50 * mean(exp(-4*y)) | |
298 Pmusicg[i] = 0 | |
299 | |
300 if noise2[i] > 3.5: | |
301 Pvoiceg[i] = 10 | |
302 else: | |
303 Pmusicg[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1)) | |
304 Pvoiceg[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1)) | |
305 | |
306 | |
307 Vg[i,0] = Pmusicg[i] | |
308 Vg[i,1] = Pvoiceg[i] | |
309 | |
310 Typeg[i] = argmax(Vg[i,:]) | |
311 Pmaxg[i] = Vg[i,Typeg[i]] | |
312 | |
313 Vk = array(sign(zcrval[pfpos[i]:pfpos[i+1]+1])) | |
314 tempV = rmsval[pfpos[i]:pfpos[i+1]+1] | |
315 rr = max(tempV)+0.001 | |
316 tempV1 = tempV/rr | |
317 Vk0 = Vk.copy() | |
318 | |
319 for j in range(0, len(Vk)): | |
320 if (tempV1[j] < 0.1 and tempV[j] < 1.5) or tempV[j] < 1: | |
321 Vk[j] = 0 | |
322 # print "Changing %d" % j | |
323 Vk1 = Vk.copy() | |
324 | |
325 for j in range(0, len(Vk)): | |
326 if tempV1[j] < 0.4 or tempV[j] < 2 or tempV[j] < mean(tempV): | |
327 # print "Changing %d" % j | |
328 | |
329 Vk1[j] = 0 | |
330 | |
331 Zca = Vk1*(zcrval[pfpos[i]:pfpos[i+1]+1]) | |
332 Pol = (ones((len(Vk),)) - sign(Vk))*Vk0 | |
333 | |
334 VZC = Pol*zcrval[pfpos[i]:pfpos[i+1]+1] | |
335 dVk = abs(Vk[2:len(Vk)] - Vk[1:len(Vk)-1]) | |
336 | |
337 Freq[i, 0] = 50*sum(dVk)/(2*len(Vk)) | |
338 Freq[i, 1] = len(Vk)/50 | |
339 Freq[i, 2] = sum(Freq[:,2]) | |
340 Freq[i, 3] = zpos[i] | |
341 Freq[i, 4] = noise2[i] | |
342 Freq[i, 5] = max(Zca) | |
343 | |
344 Pmusicg[i] = 0 | |
345 Pvoiceg[i] = 0 | |
346 | |
347 if Freq[i,0] < 0.59 and Freq[i,1] > 2.5: | |
348 Pmusicg[i] = 10 | |
349 else: | |
350 Pmusicg[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1)) | |
351 Pvoiceg[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1)) | |
352 | |
353 if x > 3: | |
354 Pvoiceg[i] = Pvoiceg[i] + 0.1 | |
355 | |
356 if x > 300: | |
357 Pmusicg[i] = 9 | |
358 | |
359 | |
360 if noise2[i] > 3.5: | |
361 Pvoiceg[i] = 11 | |
362 | |
363 if max(Zca) > 280: | |
364 Pmusicg[i] = 11 | |
365 | |
366 | |
367 if Freq[i,0] > 4.62 and Freq[i,1] > 2.5 and (noise2[i] > 0.1 or zpos[i] > 0.05 ): | |
368 Pvoiceg[i] = 12 | |
369 | |
370 | |
371 if zpos[i] > 0.15: | |
372 Pvoiceg[i] = 13 | |
373 | |
374 | |
375 Vg[i,0] = Pmusicg[i] | |
376 Vg[i,1] = Pvoiceg[i] | |
377 | |
378 Typeg[i] = argmax(Vg[i,:]) | |
379 Pmaxg[i] = Vg[i, Typeg[i]] | |
380 | |
381 Fsp1.extend(Vk) | |
382 | |
383 if (Pspace[i] > 4 and noise2[i] < 1) or (Pspace[i] > 4.5): | |
384 Type[i] = 2 | |
385 Typeg[i] = 2 | |
386 | |
387 | |
388 if (pfpos[i+1] - pfpos[i])/50 < 0.5 and i >= 1: | |
389 Type[i] = Type[i-1] | |
390 | |
391 | |
392 | |
393 maxpos = pfpos[-1] | |
394 | |
395 tt = zeros((maxpos,)) | |
396 pt = zeros((maxpos,)) | |
397 pm = zeros((maxpos,)) | |
398 pv = zeros((maxpos,)) | |
399 ps = zeros((maxpos,)) | |
400 | |
401 for i in range(0, L): | |
402 tt[pfpos[i]:pfpos[i+1]] = Typeg[i] | |
403 pt[pfpos[i]:pfpos[i+1]] = Pmaxg[i] | |
404 pm[pfpos[i]:pfpos[i+1]] = Pmusicg[i] | |
405 pv[pfpos[i]:pfpos[i+1]] = Pvoiceg[i] | |
406 ps[pfpos[i]:pfpos[i+1]] = Pspace[i] | |
407 | |
408 | |
409 | |
410 | |
411 | |
412 | |
413 for k in range(0, len(rmsval)): | |
414 if rmsval[k] < 2 or rmsval[k] < 0.1*max(rmsval): | |
415 zcrval[k] = 0 | |
416 | |
417 | |
418 classes = [] | |
419 positions = [] | |
420 | |
421 class_ = 5 | |
422 | |
423 for j in range(0, len(tt)): | |
424 if class_ != tt[j]: | |
425 class_ = tt[j] | |
426 classes.append(int(class_)) | |
427 positions.append(int(j*0.02*SR)) | |
428 | |
429 | |
430 | |
431 return positions, classes | |
432 | |
433 | |
434 | |
435 if __name__=="__main__": | |
436 if len(argv) != 3: | |
437 print("Incorrect number of arguments:") | |
438 print("Usage: ") | |
439 print("%s <input>") | |
440 print("") | |
441 print("Arguments:") | |
442 print("<input>\tThe input filename. Can be .wav, .mp3, etc...") | |
443 sys.exit(-1) | |
444 else: | |
445 print("[II] Panagiotakis and Tziritas RMS/ZC Speech/Music/Silence Discriminator") | |
446 print("[II] Loading libraries") | |
447 | |
448 import essentia | |
449 from essentia import Pool | |
450 from essentia.standard import * | |
451 import csv | |
452 import yaml | |
453 | |
454 # reqyures matplotlib | |
455 from pylab import * | |
456 | |
457 #requires numpy | |
458 from numpy import * | |
459 | |
460 #requires scikit-learn | |
461 from sklearn.metrics import pairwise_distances | |
462 | |
463 d = {} | |
464 v = {} | |
465 | |
466 fname = argv[1] | |
467 outfname = argv[2] | |
468 | |
469 | |
470 | |
471 trackname = fname.split('.')[0].split('/')[-1] | |
472 | |
473 | |
474 if outfname.partition('.')[-1].lower() not in ['json', 'yaml']: | |
475 print("Please choose a .json or .yaml as an output file.") | |
476 sys.exit(-1) | |
477 else: | |
478 if outfname.partition('.')[-1].lower() == 'json': | |
479 output = YamlOutput(filename = outfname, format='json') | |
480 else: | |
481 output = YamlOutput(filename = outfname, format='yaml') | |
482 | |
483 print("Feature extraction of `%s\'" % fname) | |
484 | |
485 # Sampling Rate | |
486 SR = 21000.0 | |
487 | |
488 # Sampling Frequency | |
489 T = 1.0/SR | |
490 | |
491 # SegmentSize | |
492 tsegmentSize = 1000 #ms | |
493 segmentSize = int(ceil(tsegmentSize*SR/1000)) if mod(ceil(tsegmentSize*SR/1000),2) == 0 \ | |
494 else int(floor(tsegmentSize*SR/1000)) | |
495 | |
496 | |
497 # FrameSize | |
498 tframeSize = 20 #ms | |
499 frameSize = int(ceil(tframeSize*SR/1000)) if mod(ceil(tframeSize*SR/1000),2) == 0 \ | |
500 else int(floor(tframeSize*SR/1000)) | |
501 | |
502 # HopSize | |
503 hopSize = frameSize | |
504 | |
505 # Load Audio | |
506 audio = MonoLoader(filename = fname, sampleRate=SR)() | |
507 | |
508 | |
509 | |
510 | |
511 | |
512 # # Zero Crossing Rate | |
513 # zcr = ZeroCrossingRate() | |
514 # | |
515 # # RMS | |
516 # rms = RMS() | |
517 # | |
518 # # Segmentation based on 1 second segmentation | |
519 # print("[II] Calculating features for %s, please wait..." % fname) | |
520 # | |
521 # rmsval = [] | |
522 # zcrval = [] | |
523 # rmszcrval = [] | |
524 # meanrms = [] | |
525 # varrms = [] | |
526 # meanzcr = [] | |
527 # varzcr = [] | |
528 # histograms = [] | |
529 # | |
530 # | |
531 # | |
532 # # Parameters for the chi square distribution | |
533 # alphas = [] | |
534 # betas = [] | |
535 # L = int(len(audio)/SR) | |
536 # for n in range(0, L): | |
537 # segment = audio[n*SR:(n+1)*SR] | |
538 # framerms = [] | |
539 # framezcr = [] | |
540 # # for frame in FrameGenerator(segment, frameSize = frameSize, hopSize = frameSize): | |
541 # for k in range(0, 50): | |
542 # frame = segment[0.02*SR*k:0.02*SR*(k+1)] | |
543 # | |
544 # rmsv = sqrt(sum(frame**2)) | |
545 # framerms.append(rmsv) | |
546 # f1 = frame[0:-1] | |
547 # f2 = frame[1:] | |
548 # mm1 = f1*f2 | |
549 # zc = 0.5*sum((abs(sign(mm1))-sign(mm1))) | |
550 # | |
551 # rmsval.append(rmsv) | |
552 # zcrval.append(zc) | |
553 # rmszcrval.append(rmsv*zc) | |
554 # framezcr.append(zc) | |
555 # | |
556 # meanrms.append(mean(framerms)) | |
557 # meanzcr.append(mean(framezcr)) | |
558 # | |
559 # mu = mean(framerms) | |
560 # sigmasq = var(framerms, ddof=1) | |
561 # | |
562 # beta = sigmasq/mu | |
563 # alpha = (mu/beta) - 1 | |
564 # | |
565 # if mu != 0 and sigmasq != 0: | |
566 # betas.append(beta) | |
567 # alphas.append(alpha) | |
568 # else: | |
569 # betas.append(1000000) | |
570 # alphas.append(-1) | |
571 # | |
572 # histograms.append(histogram(framerms, 256)) | |
573 # | |
574 # varrms.append(var(framerms, ddof=1)) | |
575 # varzcr.append(var(framerms, ddof=1)) | |
576 # | |
577 # | |
578 # # Computation of KVRMS | |
579 # | |
580 # l = len(rmsval) | |
581 # ftm = [mean(rmsval[0:50])] | |
582 # ftv = [var(rmsval[0:50], ddof=1)] | |
583 # kvrms = [0] | |
584 # | |
585 # for i in range(1,l-50): | |
586 # ftm_ = ftm[i-1]+(rmsval[i+49] - rmsval[i-1])/50 | |
587 # ftv_ = ftv[i-1]+ftm[i-1]**2 + ((rmsval[i+49]**2-rmsval[i-1]**2)/50) - ftm_**2 | |
588 # ftm.append(ftm_) | |
589 # ftv.append(ftv_) | |
590 # | |
591 # if ftm_ != 0: | |
592 # kvrms.append(ftv_/ftm_**2) | |
593 # else: | |
594 # kvrms.append(0) | |
595 # | |
596 # | |
597 # m = mean(kvrms) | |
598 # v = var(kvrms, ddof=1) | |
599 # | |
600 # b = v/m | |
601 # a = m/b - 1 | |
602 # | |
603 # # Computing Similarity | |
604 # | |
605 # sim = [] | |
606 # from scipy.special import gamma | |
607 # | |
608 # for i in range(1,L): | |
609 # k = (alphas[i]+alphas[i-1]+2)/2 | |
610 # sim_ = ((2/(betas[i-1]+betas[i]))**k)*(betas[i-1]**((alphas[i]+1)/2))*(betas[i]**((alphas[i-1]+1)/2))*gamma(k)/(sqrt(gamma(alphas[i-1]+1)*gamma(alphas[i]+1))) | |
611 # | |
612 # # 0 If overflow | |
613 # if isinf(sim_) or isnan(sim_): | |
614 # sim_ = 0 | |
615 # sim.append(sim_) | |
616 # | |
617 # sim2 = [] | |
618 # | |
619 # for i in range(2,L): | |
620 # k = (alphas[i]+alphas[i-2]+2)/2 | |
621 # sim_ = ((2/(betas[i-2]+betas[i]))**k)*(betas[i-2]**((alphas[i]+1)/2))*(betas[i]**((alphas[i-2]+1)/2))*gamma(k)/(sqrt(gamma(alphas[i-2]+1)*gamma(alphas[i]+1))) | |
622 # | |
623 # if isinf(sim_) or isnan(sim_): | |
624 # sim_ = 0 | |
625 # sim2.append(sim_) | |
626 # | |
627 # | |
628 # p = lambda x, a, b: x**a*exp(-b*x)/b**(a+1)/gamma(a+1) | |
629 ## | |
630 # rho = lambda alpha1,beta1,alpha2,beta2: gamma((alpha1+alpha2)/2 + 1)/sqrt(gamma(alpha1+1)*gamma(alpha2+1))*2**((alpha1+alpha2)/2+1)*beta1**((alpha2+1)/2)*beta2**((alpha1+1)/2)/(beta1+beta2)**((alpha1+alpha2)/2+1) | |
631 # | |
632 # | |
633 # P = [0] | |
634 # for i in range(1, L-1): | |
635 # P_ = (1-sim2[i-1])*(1-sim[i-1]+1-sim[i])*(1-sim2[i-1]) | |
636 # P_ = (1-sim2[i-1]) | |
637 # P.append(P_) | |
638 # | |
639 # | |
640 # P.append(0) | |
641 # P = array(P) | |
642 # | |
643 # M = mean(P) | |
644 # V1 = mean(abs(P[0:L-3]-P[1:L-2])) | |
645 # V2 = mean(abs(P[0:L-4]-P[2:L-2])) | |
646 ## V = (V1+V2)*0.25; | |
647 # V = 0.25*median(abs((P[0:L-4]-P[2:L-2]))) | |
648 # | |
649 # Pd = [P[0],P[1]] | |
650 # | |
651 # | |
652 # for i in range(2,L-2): | |
653 # m = mean(concatenate((P[i-2:i],P[i+1:i+3]))) | |
654 # m1 = max(concatenate((P[i-2:i],P[i+1:i+3]))) | |
655 # | |
656 # if m<0.1*M: | |
657 # m1 = 0.1*M | |
658 # | |
659 # d = (P[i]-m)/m1 | |
660 # | |
661 # if d < 0: | |
662 # d = 0 | |
663 # Pd.append(P[i]*d/V) | |
664 # | |
665 # Pd.append(P[L-2]) | |
666 # Pd.append(P[L-1]) | |
667 # | |
668 # | |
669 # # Selection of candidate windows where there is a change | |
670 # | |
671 # j = 0 | |
672 # pos = [] | |
673 # for i in range(2, L-3): | |
674 # if Pd[i] > 5 and P[i] > P[i+1] and P[i] > P[i-1]: | |
675 # j += 1 | |
676 # pos.append(i) | |
677 # | |
678 # if len(pos) == 0: | |
679 # sys.exit(0) | |
680 # | |
681 # | |
682 # # Computation of Change with 20 ms accuracy | |
683 # | |
684 # k2 = 0 | |
685 # c = 25 | |
686 # | |
687 # Sm = zeros((2*c+51,)) | |
688 # posms = zeros((len(pos),)) | |
689 # msec = zeros((len(pos),)) | |
690 # mpos = zeros((len(pos), )) | |
691 # | |
692 # for k1 in range(0,len(pos)): | |
693 # i = 50*(pos[k1] - 1); # ms | |
694 # for j in range(-c,50+c+1): | |
695 # if ftv[i+j] != 0 and ftm[i+j] != 0: | |
696 # b1 = ftv[i+j]/ftm[i+j] | |
697 # a1 = ftm[i+j]/b1 - 1 | |
698 # else: | |
699 # b1 = 1000 | |
700 # a1 = -1 | |
701 # | |
702 # if ftv[i+j+50-1] != 0 and ftm[i+j+50-1] != 0: | |
703 # b2 = ftv[i+j+50-1]/ftm[i+j+50-1] | |
704 # a2 = ftm[i+j+50-1]/b2 - 1 | |
705 # else: | |
706 # b2 = 1000 | |
707 # a2 = -1 | |
708 # | |
709 # k = (a2+a1+2)/2 | |
710 # Sm[j+c] = ((2/(b1+b2))**k)*(b1**((a2+1)/2))*(b2**((a1+1)/2))*gamma(k)/(sqrt(gamma(a1+1)*gamma(a2+1))) | |
711 # | |
712 # if isnan(Sm[j+c]) or isinf(Sm[j+c]): | |
713 # Sm[j+c] = 0 | |
714 # | |
715 # posms[k1] = argmin(Sm) | |
716 # x = Sm[posms[k1]] | |
717 # | |
718 # h = 1 - Sm | |
719 # | |
720 # if mean(h[max(posms[k1] - 25,1):min(2*c+51,posms[k1]+25)]) > 0.1: | |
721 # msec[k2] = posms[k1]-1-c | |
722 # mpos[k2] = pos[k1] | |
723 # k2 = k2+1 | |
724 # else: | |
725 # msec[k2] = 0 | |
726 # mpos[k2] = 0 | |
727 # | |
728 # | |
729 # posms = msec | |
730 # pos = mpos | |
731 # | |
732 # fpos = pos+0.02*posms | |
733 # | |
734 # pfpos = floor(50*pos+posms); | |
735 # | |
736 # bvoice = 0.14339738781836; | |
737 # bmusic = 0.04399754659151; | |
738 # amusic = 1.66349817725076; | |
739 # avoice = 2.32677887950291; | |
740 # | |
741 # pfpos = concatenate((array([0]), pfpos, array([len(kvrms)-1]))) | |
742 # | |
743 # L = len(pfpos)-1 | |
744 # V = zeros((L,2)) | |
745 # Vg = zeros((L,2)) | |
746 # | |
747 # Pzero = [] | |
748 # Fsp1 = [] | |
749 # Type = zeros((L,)) | |
750 # Typeg = zeros((L,)) | |
751 # | |
752 # | |
753 # # Classification for each segment | |
754 # | |
755 # noise = zeros((L,)) | |
756 # noise2 = zeros((L,)) | |
757 # | |
758 # pfpos = pfpos.astype(int) | |
759 # Pmusicg = zeros((L,)) | |
760 # Pvoiceg = zeros((L,)) | |
761 # Pmusic = zeros((L,)) | |
762 # Pvoice = zeros((L,)) | |
763 # Pspace = zeros((L,)) | |
764 # zpos = zeros((L,)) | |
765 # Pmax = zeros((L,)) | |
766 # Pmaxg = zeros((L,)) | |
767 # Freq = zeros((L,6)) | |
768 # | |
769 # zcrval = array(zcrval) | |
770 # rmsval = array(rmsval) | |
771 # rmszcrval = array(rmszcrval) | |
772 # i | |
773 # for i in range(0, L): | |
774 # d = 0 | |
775 # | |
776 # x1 = mean(kvrms[int(pfpos[i]):int(pfpos[i+1])+1]) | |
777 # x = x1 | |
778 # y = rmszcrval[int(pfpos[i])+d:int(pfpos[i+1])-d+1] | |
779 # y = y/(2*max(rmsval[pfpos[i]+d:pfpos[i+1]-d+1]) - min(rmsval[pfpos[i]+d:pfpos[i+1]-d+1]) - median(rmsval[pfpos[i]+d:pfpos[i+1]-d+1])) | |
780 # noise[i] = 50*mean(exp(-y)) | |
781 # | |
782 # Pmusic[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1)) | |
783 # Pvoice[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1)) | |
784 # | |
785 # sm = 0.7*median(ftm[pfpos[i]:pfpos[i+1]+1])+0.3*mean(ftm[pfpos[i]:pfpos[i+1]+1]) | |
786 # | |
787 # Pspace[i] = 6*exp((-sm**2)/(2*0.6**2)) | |
788 # | |
789 # zpos[i] = sum(exp(-10*zcrval[pfpos[i]+d:pfpos[i+1]-d+1]))/float(len(zcrval[pfpos[i]+d:pfpos[i+1]-d+1])) | |
790 # | |
791 # if zpos[i] > 0.08 or x > 3: | |
792 # Pvoice[i] = 10 | |
793 # else: | |
794 # Pmusic[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1)) | |
795 # | |
796 # V[i,0] = Pmusic[i] | |
797 # V[i,1] = Pvoice[i] | |
798 # Type[i] = argmax(V[i,:]) | |
799 # Pmax[i] = V[i,Type[i]] | |
800 # | |
801 # noise2[i] = 50 * mean(exp(-4*y)) | |
802 # Pmusicg[i] = 0 | |
803 # | |
804 # if noise2[i] > 3.5: | |
805 # Pvoiceg[i] = 10 | |
806 # else: | |
807 # Pmusicg[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1)) | |
808 # Pvoiceg[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1)) | |
809 # | |
810 # | |
811 # Vg[i,0] = Pmusicg[i] | |
812 # Vg[i,1] = Pvoiceg[i] | |
813 # | |
814 # Typeg[i] = argmax(Vg[i,:]) | |
815 # Pmaxg[i] = Vg[i,Typeg[i]] | |
816 # | |
817 # Vk = array(sign(zcrval[pfpos[i]:pfpos[i+1]+1])) | |
818 # tempV = rmsval[pfpos[i]:pfpos[i+1]+1] | |
819 # rr = max(tempV)+0.001 | |
820 # tempV1 = tempV/rr | |
821 # Vk0 = Vk.copy() | |
822 # | |
823 # for j in range(0, len(Vk)): | |
824 # if (tempV1[j] < 0.1 and tempV[j] < 1.5) or tempV[j] < 1: | |
825 # Vk[j] = 0 | |
826 # # print "Changing %d" % j | |
827 # Vk1 = Vk.copy() | |
828 # | |
829 # for j in range(0, len(Vk)): | |
830 # if tempV1[j] < 0.4 or tempV[j] < 2 or tempV[j] < mean(tempV): | |
831 # # print "Changing %d" % j | |
832 # | |
833 # Vk1[j] = 0 | |
834 # | |
835 # Zca = Vk1*(zcrval[pfpos[i]:pfpos[i+1]+1]) | |
836 # Pol = (ones((len(Vk),)) - sign(Vk))*Vk0 | |
837 # | |
838 # VZC = Pol*zcrval[pfpos[i]:pfpos[i+1]+1] | |
839 # dVk = abs(Vk[2:len(Vk)] - Vk[1:len(Vk)-1]) | |
840 # | |
841 # Freq[i, 0] = 50*sum(dVk)/(2*len(Vk)) | |
842 # Freq[i, 1] = len(Vk)/50 | |
843 # Freq[i, 2] = sum(Freq[:,2]) | |
844 # Freq[i, 3] = zpos[i] | |
845 # Freq[i, 4] = noise2[i] | |
846 # Freq[i, 5] = max(Zca) | |
847 # | |
848 # Pmusicg[i] = 0 | |
849 # Pvoiceg[i] = 0 | |
850 # | |
851 # if Freq[i,0] < 0.59 and Freq[i,1] > 2.5: | |
852 # Pmusicg[i] = 10 | |
853 # else: | |
854 # Pmusicg[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1)) | |
855 # Pvoiceg[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1)) | |
856 # | |
857 # if x > 3: | |
858 # Pvoiceg[i] = Pvoiceg[i] + 0.1 | |
859 # | |
860 # if x > 300: | |
861 # Pmusicg[i] = 9 | |
862 # | |
863 # | |
864 # if noise2[i] > 3.5: | |
865 # Pvoiceg[i] = 11 | |
866 # | |
867 # if max(Zca) > 280: | |
868 # Pmusicg[i] = 11 | |
869 # | |
870 # | |
871 # if Freq[i,0] > 4.62 and Freq[i,1] > 2.5 and (noise2[i] > 0.1 or zpos[i] > 0.05 ): | |
872 # Pvoiceg[i] = 12 | |
873 # | |
874 # | |
875 # if zpos[i] > 0.15: | |
876 # Pvoiceg[i] = 13 | |
877 # | |
878 # | |
879 # Vg[i,0] = Pmusicg[i] | |
880 # Vg[i,1] = Pvoiceg[i] | |
881 # | |
882 # Typeg[i] = argmax(Vg[i,:]) | |
883 # Pmaxg[i] = Vg[i, Typeg[i]] | |
884 # | |
885 # Fsp1.extend(Vk) | |
886 # | |
887 # if (Pspace[i] > 4 and noise2[i] < 1) or (Pspace[i] > 4.5): | |
888 # Type[i] = 2 | |
889 # Typeg[i] = 2 | |
890 # | |
891 # | |
892 # if (pfpos[i+1] - pfpos[i])/50 < 0.5 and i >= 1: | |
893 # Type[i] = Type[i-1] | |
894 # | |
895 # | |
896 # | |
897 # maxpos = pfpos[-1] | |
898 # | |
899 # tt = zeros((maxpos,)) | |
900 # pt = zeros((maxpos,)) | |
901 # pm = zeros((maxpos,)) | |
902 # pv = zeros((maxpos,)) | |
903 # ps = zeros((maxpos,)) | |
904 # | |
905 # for i in range(0, L): | |
906 # tt[pfpos[i]:pfpos[i+1]] = Typeg[i] | |
907 # pt[pfpos[i]:pfpos[i+1]] = Pmaxg[i] | |
908 # pm[pfpos[i]:pfpos[i+1]] = Pmusicg[i] | |
909 # pv[pfpos[i]:pfpos[i+1]] = Pvoiceg[i] | |
910 # ps[pfpos[i]:pfpos[i+1]] = Pspace[i] | |
911 # | |
912 # | |
913 # | |
914 # | |
915 # | |
916 # | |
917 # for k in range(0, len(rmsval)): | |
918 # if rmsval[k] < 2 or rmsval[k] < 0.1*max(rmsval): | |
919 # zcrval[k] = 0 | |
920 # | |
921 # | |
922 # classes = [] | |
923 # positions = [] | |
924 # | |
925 # class_ = 5 | |
926 # | |
927 # for j in range(0, len(tt)): | |
928 # if class_ != tt[j]: | |
929 # class_ = tt[j] | |
930 # classes.append(int(class_)) | |
931 # positions.append(int(j*0.02*SR)) | |
932 # | |
933 # classvec = zeros((len(audio),)) | |
934 # | |
935 # if len(positions) == 1: | |
936 # classvec = classes[0]*ones((len(audio),)) | |
937 # for j in range(1, len(positions)): | |
938 # classvec[positions[j-1]:positions[j]] = classes[j-1] | |
939 # | |
940 # | |
941 # | |
942 # | |
943 # | |
944 # | |
945 # | |
946 # | |
947 # | |
948 # | |
949 # | |
950 | |
951 # | |
952 | |
953 | |
954 positions, classes = trmszc(audio, SR) | |
955 | |
956 classvec = zeros((len(audio),)) | |
957 | |
958 if len(positions) == 1: | |
959 classvec = classes[0]*ones((len(audio),)) | |
960 for j in range(1, len(positions)): | |
961 classvec[positions[j-1]:positions[j]] = classes[j-1] | |
962 | |
963 plot(audio) | |
964 plot(classvec) | |
965 |