comparison experiment-reverb/code/panagiotakis.py @ 0:246d5546657c

initial commit, needs cleanup
author Emmanouil Theofanis Chourdakis <e.t.chourdakis@qmul.ac.uk>
date Wed, 14 Dec 2016 13:15:48 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:246d5546657c
1 # -*- coding: utf-8 -*-
2 """
3 Created on Mon Jun 1 17:17:34 2015
4
5 @author: mmxgn
6 """
7
8
9 from sys import argv
10
11
12
13 def trmszc(audio, SR=21000):
14 # Segmentation based on 1 second segmentation
15 print("[II] Calculating features for %s, please wait..." % fname)
16
17 rmsval = []
18 zcrval = []
19 rmszcrval = []
20 meanrms = []
21 varrms = []
22 meanzcr = []
23 varzcr = []
24 histograms = []
25
26
27
28 # Parameters for the chi square distribution
29 alphas = []
30 betas = []
31 L = int(len(audio)/SR)
32 for n in range(0, L):
33 segment = audio[n*SR:(n+1)*SR]
34 framerms = []
35 framezcr = []
36 # for frame in FrameGenerator(segment, frameSize = frameSize, hopSize = frameSize):
37 for k in range(0, 50):
38 frame = segment[0.02*SR*k:0.02*SR*(k+1)]
39
40 rmsv = sqrt(sum(frame**2))
41 framerms.append(rmsv)
42 f1 = frame[0:-1]
43 f2 = frame[1:]
44 mm1 = f1*f2
45 zc = 0.5*sum((abs(sign(mm1))-sign(mm1)))
46
47 rmsval.append(rmsv)
48 zcrval.append(zc)
49 rmszcrval.append(rmsv*zc)
50 framezcr.append(zc)
51
52 meanrms.append(mean(framerms))
53 meanzcr.append(mean(framezcr))
54
55 mu = mean(framerms)
56 sigmasq = var(framerms, ddof=1)
57
58 beta = sigmasq/mu
59 alpha = (mu/beta) - 1
60
61 if mu != 0 and sigmasq != 0:
62 betas.append(beta)
63 alphas.append(alpha)
64 else:
65 betas.append(1000000)
66 alphas.append(-1)
67
68 histograms.append(histogram(framerms, 256))
69
70 varrms.append(var(framerms, ddof=1))
71 varzcr.append(var(framerms, ddof=1))
72
73
74 # Computation of KVRMS
75
76 l = len(rmsval)
77 ftm = [mean(rmsval[0:50])]
78 ftv = [var(rmsval[0:50], ddof=1)]
79 kvrms = [0]
80
81 for i in range(1,l-50):
82 ftm_ = ftm[i-1]+(rmsval[i+49] - rmsval[i-1])/50
83 ftv_ = ftv[i-1]+ftm[i-1]**2 + ((rmsval[i+49]**2-rmsval[i-1]**2)/50) - ftm_**2
84 ftm.append(ftm_)
85 ftv.append(ftv_)
86
87 if ftm_ != 0:
88 kvrms.append(ftv_/ftm_**2)
89 else:
90 kvrms.append(0)
91
92
93 m = mean(kvrms)
94 v = var(kvrms, ddof=1)
95
96 b = v/m
97 a = m/b - 1
98
99 # Computing Similarity
100
101 sim = []
102 from scipy.special import gamma
103
104 for i in range(1,L):
105 k = (alphas[i]+alphas[i-1]+2)/2
106 sim_ = ((2/(betas[i-1]+betas[i]))**k)*(betas[i-1]**((alphas[i]+1)/2))*(betas[i]**((alphas[i-1]+1)/2))*gamma(k)/(sqrt(gamma(alphas[i-1]+1)*gamma(alphas[i]+1)))
107
108 # 0 If overflow
109 if isinf(sim_) or isnan(sim_):
110 sim_ = 0
111 sim.append(sim_)
112
113 sim2 = []
114
115 for i in range(2,L):
116 k = (alphas[i]+alphas[i-2]+2)/2
117 sim_ = ((2/(betas[i-2]+betas[i]))**k)*(betas[i-2]**((alphas[i]+1)/2))*(betas[i]**((alphas[i-2]+1)/2))*gamma(k)/(sqrt(gamma(alphas[i-2]+1)*gamma(alphas[i]+1)))
118
119 if isinf(sim_) or isnan(sim_):
120 sim_ = 0
121 sim2.append(sim_)
122
123
124 p = lambda x, a, b: x**a*exp(-b*x)/b**(a+1)/gamma(a+1)
125 #
126 rho = lambda alpha1,beta1,alpha2,beta2: gamma((alpha1+alpha2)/2 + 1)/sqrt(gamma(alpha1+1)*gamma(alpha2+1))*2**((alpha1+alpha2)/2+1)*beta1**((alpha2+1)/2)*beta2**((alpha1+1)/2)/(beta1+beta2)**((alpha1+alpha2)/2+1)
127
128
129 P = [0]
130 for i in range(1, L-1):
131 P_ = (1-sim2[i-1])*(1-sim[i-1]+1-sim[i])*(1-sim2[i-1])
132 P_ = (1-sim2[i-1])
133 P.append(P_)
134
135
136 P.append(0)
137 P = array(P)
138
139 M = mean(P)
140 V1 = mean(abs(P[0:L-3]-P[1:L-2]))
141 V2 = mean(abs(P[0:L-4]-P[2:L-2]))
142 # V = (V1+V2)*0.25;
143 V = 0.25*median(abs((P[0:L-4]-P[2:L-2])))
144
145 Pd = [P[0],P[1]]
146
147
148 for i in range(2,L-2):
149 m = mean(concatenate((P[i-2:i],P[i+1:i+3])))
150 m1 = max(concatenate((P[i-2:i],P[i+1:i+3])))
151
152 if m<0.1*M:
153 m1 = 0.1*M
154
155 d = (P[i]-m)/m1
156
157 if d < 0:
158 d = 0
159 Pd.append(P[i]*d/V)
160
161 Pd.append(P[L-2])
162 Pd.append(P[L-1])
163
164
165 # Selection of candidate windows where there is a change
166
167 j = 0
168 pos = []
169 for i in range(2, L-3):
170 if Pd[i] > 5 and P[i] > P[i+1] and P[i] > P[i-1]:
171 j += 1
172 pos.append(i)
173
174 if len(pos) == 0:
175 sys.exit(0)
176
177
178 # Computation of Change with 20 ms accuracy
179
180 k2 = 0
181 c = 25
182
183 Sm = zeros((2*c+51,))
184 posms = zeros((len(pos),))
185 msec = zeros((len(pos),))
186 mpos = zeros((len(pos), ))
187
188 for k1 in range(0,len(pos)):
189 i = 50*(pos[k1] - 1); # ms
190 for j in range(-c,50+c+1):
191 if ftv[i+j] != 0 and ftm[i+j] != 0:
192 b1 = ftv[i+j]/ftm[i+j]
193 a1 = ftm[i+j]/b1 - 1
194 else:
195 b1 = 1000
196 a1 = -1
197
198 if ftv[i+j+50-1] != 0 and ftm[i+j+50-1] != 0:
199 b2 = ftv[i+j+50-1]/ftm[i+j+50-1]
200 a2 = ftm[i+j+50-1]/b2 - 1
201 else:
202 b2 = 1000
203 a2 = -1
204
205 k = (a2+a1+2)/2
206 Sm[j+c] = ((2/(b1+b2))**k)*(b1**((a2+1)/2))*(b2**((a1+1)/2))*gamma(k)/(sqrt(gamma(a1+1)*gamma(a2+1)))
207
208 if isnan(Sm[j+c]) or isinf(Sm[j+c]):
209 Sm[j+c] = 0
210
211 posms[k1] = argmin(Sm)
212 x = Sm[posms[k1]]
213
214 h = 1 - Sm
215
216 if mean(h[max(posms[k1] - 25,1):min(2*c+51,posms[k1]+25)]) > 0.1:
217 msec[k2] = posms[k1]-1-c
218 mpos[k2] = pos[k1]
219 k2 = k2+1
220 else:
221 msec[k2] = 0
222 mpos[k2] = 0
223
224
225 posms = msec
226 pos = mpos
227
228 fpos = pos+0.02*posms
229
230 pfpos = floor(50*pos+posms);
231
232 bvoice = 0.14339738781836;
233 bmusic = 0.04399754659151;
234 amusic = 1.66349817725076;
235 avoice = 2.32677887950291;
236
237 pfpos = concatenate((array([0]), pfpos, array([len(kvrms)-1])))
238
239 L = len(pfpos)-1
240 V = zeros((L,2))
241 Vg = zeros((L,2))
242
243 Pzero = []
244 Fsp1 = []
245 Type = zeros((L,))
246 Typeg = zeros((L,))
247
248
249 # Classification for each segment
250
251 noise = zeros((L,))
252 noise2 = zeros((L,))
253
254 pfpos = pfpos.astype(int)
255 Pmusicg = zeros((L,))
256 Pvoiceg = zeros((L,))
257 Pmusic = zeros((L,))
258 Pvoice = zeros((L,))
259 Pspace = zeros((L,))
260 zpos = zeros((L,))
261 Pmax = zeros((L,))
262 Pmaxg = zeros((L,))
263 Freq = zeros((L,6))
264
265 zcrval = array(zcrval)
266 rmsval = array(rmsval)
267 rmszcrval = array(rmszcrval)
268 i
269 for i in range(0, L):
270 d = 0
271
272 x1 = mean(kvrms[int(pfpos[i]):int(pfpos[i+1])+1])
273 x = x1
274 y = rmszcrval[int(pfpos[i])+d:int(pfpos[i+1])-d+1]
275 y = y/(2*max(rmsval[pfpos[i]+d:pfpos[i+1]-d+1]) - min(rmsval[pfpos[i]+d:pfpos[i+1]-d+1]) - median(rmsval[pfpos[i]+d:pfpos[i+1]-d+1]))
276 noise[i] = 50*mean(exp(-y))
277
278 Pmusic[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1))
279 Pvoice[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1))
280
281 sm = 0.7*median(ftm[pfpos[i]:pfpos[i+1]+1])+0.3*mean(ftm[pfpos[i]:pfpos[i+1]+1])
282
283 Pspace[i] = 6*exp((-sm**2)/(2*0.6**2))
284
285 zpos[i] = sum(exp(-10*zcrval[pfpos[i]+d:pfpos[i+1]-d+1]))/float(len(zcrval[pfpos[i]+d:pfpos[i+1]-d+1]))
286
287 if zpos[i] > 0.08 or x > 3:
288 Pvoice[i] = 10
289 else:
290 Pmusic[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1))
291
292 V[i,0] = Pmusic[i]
293 V[i,1] = Pvoice[i]
294 Type[i] = argmax(V[i,:])
295 Pmax[i] = V[i,Type[i]]
296
297 noise2[i] = 50 * mean(exp(-4*y))
298 Pmusicg[i] = 0
299
300 if noise2[i] > 3.5:
301 Pvoiceg[i] = 10
302 else:
303 Pmusicg[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1))
304 Pvoiceg[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1))
305
306
307 Vg[i,0] = Pmusicg[i]
308 Vg[i,1] = Pvoiceg[i]
309
310 Typeg[i] = argmax(Vg[i,:])
311 Pmaxg[i] = Vg[i,Typeg[i]]
312
313 Vk = array(sign(zcrval[pfpos[i]:pfpos[i+1]+1]))
314 tempV = rmsval[pfpos[i]:pfpos[i+1]+1]
315 rr = max(tempV)+0.001
316 tempV1 = tempV/rr
317 Vk0 = Vk.copy()
318
319 for j in range(0, len(Vk)):
320 if (tempV1[j] < 0.1 and tempV[j] < 1.5) or tempV[j] < 1:
321 Vk[j] = 0
322 # print "Changing %d" % j
323 Vk1 = Vk.copy()
324
325 for j in range(0, len(Vk)):
326 if tempV1[j] < 0.4 or tempV[j] < 2 or tempV[j] < mean(tempV):
327 # print "Changing %d" % j
328
329 Vk1[j] = 0
330
331 Zca = Vk1*(zcrval[pfpos[i]:pfpos[i+1]+1])
332 Pol = (ones((len(Vk),)) - sign(Vk))*Vk0
333
334 VZC = Pol*zcrval[pfpos[i]:pfpos[i+1]+1]
335 dVk = abs(Vk[2:len(Vk)] - Vk[1:len(Vk)-1])
336
337 Freq[i, 0] = 50*sum(dVk)/(2*len(Vk))
338 Freq[i, 1] = len(Vk)/50
339 Freq[i, 2] = sum(Freq[:,2])
340 Freq[i, 3] = zpos[i]
341 Freq[i, 4] = noise2[i]
342 Freq[i, 5] = max(Zca)
343
344 Pmusicg[i] = 0
345 Pvoiceg[i] = 0
346
347 if Freq[i,0] < 0.59 and Freq[i,1] > 2.5:
348 Pmusicg[i] = 10
349 else:
350 Pmusicg[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1))
351 Pvoiceg[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1))
352
353 if x > 3:
354 Pvoiceg[i] = Pvoiceg[i] + 0.1
355
356 if x > 300:
357 Pmusicg[i] = 9
358
359
360 if noise2[i] > 3.5:
361 Pvoiceg[i] = 11
362
363 if max(Zca) > 280:
364 Pmusicg[i] = 11
365
366
367 if Freq[i,0] > 4.62 and Freq[i,1] > 2.5 and (noise2[i] > 0.1 or zpos[i] > 0.05 ):
368 Pvoiceg[i] = 12
369
370
371 if zpos[i] > 0.15:
372 Pvoiceg[i] = 13
373
374
375 Vg[i,0] = Pmusicg[i]
376 Vg[i,1] = Pvoiceg[i]
377
378 Typeg[i] = argmax(Vg[i,:])
379 Pmaxg[i] = Vg[i, Typeg[i]]
380
381 Fsp1.extend(Vk)
382
383 if (Pspace[i] > 4 and noise2[i] < 1) or (Pspace[i] > 4.5):
384 Type[i] = 2
385 Typeg[i] = 2
386
387
388 if (pfpos[i+1] - pfpos[i])/50 < 0.5 and i >= 1:
389 Type[i] = Type[i-1]
390
391
392
393 maxpos = pfpos[-1]
394
395 tt = zeros((maxpos,))
396 pt = zeros((maxpos,))
397 pm = zeros((maxpos,))
398 pv = zeros((maxpos,))
399 ps = zeros((maxpos,))
400
401 for i in range(0, L):
402 tt[pfpos[i]:pfpos[i+1]] = Typeg[i]
403 pt[pfpos[i]:pfpos[i+1]] = Pmaxg[i]
404 pm[pfpos[i]:pfpos[i+1]] = Pmusicg[i]
405 pv[pfpos[i]:pfpos[i+1]] = Pvoiceg[i]
406 ps[pfpos[i]:pfpos[i+1]] = Pspace[i]
407
408
409
410
411
412
413 for k in range(0, len(rmsval)):
414 if rmsval[k] < 2 or rmsval[k] < 0.1*max(rmsval):
415 zcrval[k] = 0
416
417
418 classes = []
419 positions = []
420
421 class_ = 5
422
423 for j in range(0, len(tt)):
424 if class_ != tt[j]:
425 class_ = tt[j]
426 classes.append(int(class_))
427 positions.append(int(j*0.02*SR))
428
429
430
431 return positions, classes
432
433
434
435 if __name__=="__main__":
436 if len(argv) != 3:
437 print("Incorrect number of arguments:")
438 print("Usage: ")
439 print("%s <input>")
440 print("")
441 print("Arguments:")
442 print("<input>\tThe input filename. Can be .wav, .mp3, etc...")
443 sys.exit(-1)
444 else:
445 print("[II] Panagiotakis and Tziritas RMS/ZC Speech/Music/Silence Discriminator")
446 print("[II] Loading libraries")
447
448 import essentia
449 from essentia import Pool
450 from essentia.standard import *
451 import csv
452 import yaml
453
454 # reqyures matplotlib
455 from pylab import *
456
457 #requires numpy
458 from numpy import *
459
460 #requires scikit-learn
461 from sklearn.metrics import pairwise_distances
462
463 d = {}
464 v = {}
465
466 fname = argv[1]
467 outfname = argv[2]
468
469
470
471 trackname = fname.split('.')[0].split('/')[-1]
472
473
474 if outfname.partition('.')[-1].lower() not in ['json', 'yaml']:
475 print("Please choose a .json or .yaml as an output file.")
476 sys.exit(-1)
477 else:
478 if outfname.partition('.')[-1].lower() == 'json':
479 output = YamlOutput(filename = outfname, format='json')
480 else:
481 output = YamlOutput(filename = outfname, format='yaml')
482
483 print("Feature extraction of `%s\'" % fname)
484
485 # Sampling Rate
486 SR = 21000.0
487
488 # Sampling Frequency
489 T = 1.0/SR
490
491 # SegmentSize
492 tsegmentSize = 1000 #ms
493 segmentSize = int(ceil(tsegmentSize*SR/1000)) if mod(ceil(tsegmentSize*SR/1000),2) == 0 \
494 else int(floor(tsegmentSize*SR/1000))
495
496
497 # FrameSize
498 tframeSize = 20 #ms
499 frameSize = int(ceil(tframeSize*SR/1000)) if mod(ceil(tframeSize*SR/1000),2) == 0 \
500 else int(floor(tframeSize*SR/1000))
501
502 # HopSize
503 hopSize = frameSize
504
505 # Load Audio
506 audio = MonoLoader(filename = fname, sampleRate=SR)()
507
508
509
510
511
512 # # Zero Crossing Rate
513 # zcr = ZeroCrossingRate()
514 #
515 # # RMS
516 # rms = RMS()
517 #
518 # # Segmentation based on 1 second segmentation
519 # print("[II] Calculating features for %s, please wait..." % fname)
520 #
521 # rmsval = []
522 # zcrval = []
523 # rmszcrval = []
524 # meanrms = []
525 # varrms = []
526 # meanzcr = []
527 # varzcr = []
528 # histograms = []
529 #
530 #
531 #
532 # # Parameters for the chi square distribution
533 # alphas = []
534 # betas = []
535 # L = int(len(audio)/SR)
536 # for n in range(0, L):
537 # segment = audio[n*SR:(n+1)*SR]
538 # framerms = []
539 # framezcr = []
540 # # for frame in FrameGenerator(segment, frameSize = frameSize, hopSize = frameSize):
541 # for k in range(0, 50):
542 # frame = segment[0.02*SR*k:0.02*SR*(k+1)]
543 #
544 # rmsv = sqrt(sum(frame**2))
545 # framerms.append(rmsv)
546 # f1 = frame[0:-1]
547 # f2 = frame[1:]
548 # mm1 = f1*f2
549 # zc = 0.5*sum((abs(sign(mm1))-sign(mm1)))
550 #
551 # rmsval.append(rmsv)
552 # zcrval.append(zc)
553 # rmszcrval.append(rmsv*zc)
554 # framezcr.append(zc)
555 #
556 # meanrms.append(mean(framerms))
557 # meanzcr.append(mean(framezcr))
558 #
559 # mu = mean(framerms)
560 # sigmasq = var(framerms, ddof=1)
561 #
562 # beta = sigmasq/mu
563 # alpha = (mu/beta) - 1
564 #
565 # if mu != 0 and sigmasq != 0:
566 # betas.append(beta)
567 # alphas.append(alpha)
568 # else:
569 # betas.append(1000000)
570 # alphas.append(-1)
571 #
572 # histograms.append(histogram(framerms, 256))
573 #
574 # varrms.append(var(framerms, ddof=1))
575 # varzcr.append(var(framerms, ddof=1))
576 #
577 #
578 # # Computation of KVRMS
579 #
580 # l = len(rmsval)
581 # ftm = [mean(rmsval[0:50])]
582 # ftv = [var(rmsval[0:50], ddof=1)]
583 # kvrms = [0]
584 #
585 # for i in range(1,l-50):
586 # ftm_ = ftm[i-1]+(rmsval[i+49] - rmsval[i-1])/50
587 # ftv_ = ftv[i-1]+ftm[i-1]**2 + ((rmsval[i+49]**2-rmsval[i-1]**2)/50) - ftm_**2
588 # ftm.append(ftm_)
589 # ftv.append(ftv_)
590 #
591 # if ftm_ != 0:
592 # kvrms.append(ftv_/ftm_**2)
593 # else:
594 # kvrms.append(0)
595 #
596 #
597 # m = mean(kvrms)
598 # v = var(kvrms, ddof=1)
599 #
600 # b = v/m
601 # a = m/b - 1
602 #
603 # # Computing Similarity
604 #
605 # sim = []
606 # from scipy.special import gamma
607 #
608 # for i in range(1,L):
609 # k = (alphas[i]+alphas[i-1]+2)/2
610 # sim_ = ((2/(betas[i-1]+betas[i]))**k)*(betas[i-1]**((alphas[i]+1)/2))*(betas[i]**((alphas[i-1]+1)/2))*gamma(k)/(sqrt(gamma(alphas[i-1]+1)*gamma(alphas[i]+1)))
611 #
612 # # 0 If overflow
613 # if isinf(sim_) or isnan(sim_):
614 # sim_ = 0
615 # sim.append(sim_)
616 #
617 # sim2 = []
618 #
619 # for i in range(2,L):
620 # k = (alphas[i]+alphas[i-2]+2)/2
621 # sim_ = ((2/(betas[i-2]+betas[i]))**k)*(betas[i-2]**((alphas[i]+1)/2))*(betas[i]**((alphas[i-2]+1)/2))*gamma(k)/(sqrt(gamma(alphas[i-2]+1)*gamma(alphas[i]+1)))
622 #
623 # if isinf(sim_) or isnan(sim_):
624 # sim_ = 0
625 # sim2.append(sim_)
626 #
627 #
628 # p = lambda x, a, b: x**a*exp(-b*x)/b**(a+1)/gamma(a+1)
629 ##
630 # rho = lambda alpha1,beta1,alpha2,beta2: gamma((alpha1+alpha2)/2 + 1)/sqrt(gamma(alpha1+1)*gamma(alpha2+1))*2**((alpha1+alpha2)/2+1)*beta1**((alpha2+1)/2)*beta2**((alpha1+1)/2)/(beta1+beta2)**((alpha1+alpha2)/2+1)
631 #
632 #
633 # P = [0]
634 # for i in range(1, L-1):
635 # P_ = (1-sim2[i-1])*(1-sim[i-1]+1-sim[i])*(1-sim2[i-1])
636 # P_ = (1-sim2[i-1])
637 # P.append(P_)
638 #
639 #
640 # P.append(0)
641 # P = array(P)
642 #
643 # M = mean(P)
644 # V1 = mean(abs(P[0:L-3]-P[1:L-2]))
645 # V2 = mean(abs(P[0:L-4]-P[2:L-2]))
646 ## V = (V1+V2)*0.25;
647 # V = 0.25*median(abs((P[0:L-4]-P[2:L-2])))
648 #
649 # Pd = [P[0],P[1]]
650 #
651 #
652 # for i in range(2,L-2):
653 # m = mean(concatenate((P[i-2:i],P[i+1:i+3])))
654 # m1 = max(concatenate((P[i-2:i],P[i+1:i+3])))
655 #
656 # if m<0.1*M:
657 # m1 = 0.1*M
658 #
659 # d = (P[i]-m)/m1
660 #
661 # if d < 0:
662 # d = 0
663 # Pd.append(P[i]*d/V)
664 #
665 # Pd.append(P[L-2])
666 # Pd.append(P[L-1])
667 #
668 #
669 # # Selection of candidate windows where there is a change
670 #
671 # j = 0
672 # pos = []
673 # for i in range(2, L-3):
674 # if Pd[i] > 5 and P[i] > P[i+1] and P[i] > P[i-1]:
675 # j += 1
676 # pos.append(i)
677 #
678 # if len(pos) == 0:
679 # sys.exit(0)
680 #
681 #
682 # # Computation of Change with 20 ms accuracy
683 #
684 # k2 = 0
685 # c = 25
686 #
687 # Sm = zeros((2*c+51,))
688 # posms = zeros((len(pos),))
689 # msec = zeros((len(pos),))
690 # mpos = zeros((len(pos), ))
691 #
692 # for k1 in range(0,len(pos)):
693 # i = 50*(pos[k1] - 1); # ms
694 # for j in range(-c,50+c+1):
695 # if ftv[i+j] != 0 and ftm[i+j] != 0:
696 # b1 = ftv[i+j]/ftm[i+j]
697 # a1 = ftm[i+j]/b1 - 1
698 # else:
699 # b1 = 1000
700 # a1 = -1
701 #
702 # if ftv[i+j+50-1] != 0 and ftm[i+j+50-1] != 0:
703 # b2 = ftv[i+j+50-1]/ftm[i+j+50-1]
704 # a2 = ftm[i+j+50-1]/b2 - 1
705 # else:
706 # b2 = 1000
707 # a2 = -1
708 #
709 # k = (a2+a1+2)/2
710 # Sm[j+c] = ((2/(b1+b2))**k)*(b1**((a2+1)/2))*(b2**((a1+1)/2))*gamma(k)/(sqrt(gamma(a1+1)*gamma(a2+1)))
711 #
712 # if isnan(Sm[j+c]) or isinf(Sm[j+c]):
713 # Sm[j+c] = 0
714 #
715 # posms[k1] = argmin(Sm)
716 # x = Sm[posms[k1]]
717 #
718 # h = 1 - Sm
719 #
720 # if mean(h[max(posms[k1] - 25,1):min(2*c+51,posms[k1]+25)]) > 0.1:
721 # msec[k2] = posms[k1]-1-c
722 # mpos[k2] = pos[k1]
723 # k2 = k2+1
724 # else:
725 # msec[k2] = 0
726 # mpos[k2] = 0
727 #
728 #
729 # posms = msec
730 # pos = mpos
731 #
732 # fpos = pos+0.02*posms
733 #
734 # pfpos = floor(50*pos+posms);
735 #
736 # bvoice = 0.14339738781836;
737 # bmusic = 0.04399754659151;
738 # amusic = 1.66349817725076;
739 # avoice = 2.32677887950291;
740 #
741 # pfpos = concatenate((array([0]), pfpos, array([len(kvrms)-1])))
742 #
743 # L = len(pfpos)-1
744 # V = zeros((L,2))
745 # Vg = zeros((L,2))
746 #
747 # Pzero = []
748 # Fsp1 = []
749 # Type = zeros((L,))
750 # Typeg = zeros((L,))
751 #
752 #
753 # # Classification for each segment
754 #
755 # noise = zeros((L,))
756 # noise2 = zeros((L,))
757 #
758 # pfpos = pfpos.astype(int)
759 # Pmusicg = zeros((L,))
760 # Pvoiceg = zeros((L,))
761 # Pmusic = zeros((L,))
762 # Pvoice = zeros((L,))
763 # Pspace = zeros((L,))
764 # zpos = zeros((L,))
765 # Pmax = zeros((L,))
766 # Pmaxg = zeros((L,))
767 # Freq = zeros((L,6))
768 #
769 # zcrval = array(zcrval)
770 # rmsval = array(rmsval)
771 # rmszcrval = array(rmszcrval)
772 # i
773 # for i in range(0, L):
774 # d = 0
775 #
776 # x1 = mean(kvrms[int(pfpos[i]):int(pfpos[i+1])+1])
777 # x = x1
778 # y = rmszcrval[int(pfpos[i])+d:int(pfpos[i+1])-d+1]
779 # y = y/(2*max(rmsval[pfpos[i]+d:pfpos[i+1]-d+1]) - min(rmsval[pfpos[i]+d:pfpos[i+1]-d+1]) - median(rmsval[pfpos[i]+d:pfpos[i+1]-d+1]))
780 # noise[i] = 50*mean(exp(-y))
781 #
782 # Pmusic[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1))
783 # Pvoice[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1))
784 #
785 # sm = 0.7*median(ftm[pfpos[i]:pfpos[i+1]+1])+0.3*mean(ftm[pfpos[i]:pfpos[i+1]+1])
786 #
787 # Pspace[i] = 6*exp((-sm**2)/(2*0.6**2))
788 #
789 # zpos[i] = sum(exp(-10*zcrval[pfpos[i]+d:pfpos[i+1]-d+1]))/float(len(zcrval[pfpos[i]+d:pfpos[i+1]-d+1]))
790 #
791 # if zpos[i] > 0.08 or x > 3:
792 # Pvoice[i] = 10
793 # else:
794 # Pmusic[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1))
795 #
796 # V[i,0] = Pmusic[i]
797 # V[i,1] = Pvoice[i]
798 # Type[i] = argmax(V[i,:])
799 # Pmax[i] = V[i,Type[i]]
800 #
801 # noise2[i] = 50 * mean(exp(-4*y))
802 # Pmusicg[i] = 0
803 #
804 # if noise2[i] > 3.5:
805 # Pvoiceg[i] = 10
806 # else:
807 # Pmusicg[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1))
808 # Pvoiceg[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1))
809 #
810 #
811 # Vg[i,0] = Pmusicg[i]
812 # Vg[i,1] = Pvoiceg[i]
813 #
814 # Typeg[i] = argmax(Vg[i,:])
815 # Pmaxg[i] = Vg[i,Typeg[i]]
816 #
817 # Vk = array(sign(zcrval[pfpos[i]:pfpos[i+1]+1]))
818 # tempV = rmsval[pfpos[i]:pfpos[i+1]+1]
819 # rr = max(tempV)+0.001
820 # tempV1 = tempV/rr
821 # Vk0 = Vk.copy()
822 #
823 # for j in range(0, len(Vk)):
824 # if (tempV1[j] < 0.1 and tempV[j] < 1.5) or tempV[j] < 1:
825 # Vk[j] = 0
826 # # print "Changing %d" % j
827 # Vk1 = Vk.copy()
828 #
829 # for j in range(0, len(Vk)):
830 # if tempV1[j] < 0.4 or tempV[j] < 2 or tempV[j] < mean(tempV):
831 # # print "Changing %d" % j
832 #
833 # Vk1[j] = 0
834 #
835 # Zca = Vk1*(zcrval[pfpos[i]:pfpos[i+1]+1])
836 # Pol = (ones((len(Vk),)) - sign(Vk))*Vk0
837 #
838 # VZC = Pol*zcrval[pfpos[i]:pfpos[i+1]+1]
839 # dVk = abs(Vk[2:len(Vk)] - Vk[1:len(Vk)-1])
840 #
841 # Freq[i, 0] = 50*sum(dVk)/(2*len(Vk))
842 # Freq[i, 1] = len(Vk)/50
843 # Freq[i, 2] = sum(Freq[:,2])
844 # Freq[i, 3] = zpos[i]
845 # Freq[i, 4] = noise2[i]
846 # Freq[i, 5] = max(Zca)
847 #
848 # Pmusicg[i] = 0
849 # Pvoiceg[i] = 0
850 #
851 # if Freq[i,0] < 0.59 and Freq[i,1] > 2.5:
852 # Pmusicg[i] = 10
853 # else:
854 # Pmusicg[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1))
855 # Pvoiceg[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1))
856 #
857 # if x > 3:
858 # Pvoiceg[i] = Pvoiceg[i] + 0.1
859 #
860 # if x > 300:
861 # Pmusicg[i] = 9
862 #
863 #
864 # if noise2[i] > 3.5:
865 # Pvoiceg[i] = 11
866 #
867 # if max(Zca) > 280:
868 # Pmusicg[i] = 11
869 #
870 #
871 # if Freq[i,0] > 4.62 and Freq[i,1] > 2.5 and (noise2[i] > 0.1 or zpos[i] > 0.05 ):
872 # Pvoiceg[i] = 12
873 #
874 #
875 # if zpos[i] > 0.15:
876 # Pvoiceg[i] = 13
877 #
878 #
879 # Vg[i,0] = Pmusicg[i]
880 # Vg[i,1] = Pvoiceg[i]
881 #
882 # Typeg[i] = argmax(Vg[i,:])
883 # Pmaxg[i] = Vg[i, Typeg[i]]
884 #
885 # Fsp1.extend(Vk)
886 #
887 # if (Pspace[i] > 4 and noise2[i] < 1) or (Pspace[i] > 4.5):
888 # Type[i] = 2
889 # Typeg[i] = 2
890 #
891 #
892 # if (pfpos[i+1] - pfpos[i])/50 < 0.5 and i >= 1:
893 # Type[i] = Type[i-1]
894 #
895 #
896 #
897 # maxpos = pfpos[-1]
898 #
899 # tt = zeros((maxpos,))
900 # pt = zeros((maxpos,))
901 # pm = zeros((maxpos,))
902 # pv = zeros((maxpos,))
903 # ps = zeros((maxpos,))
904 #
905 # for i in range(0, L):
906 # tt[pfpos[i]:pfpos[i+1]] = Typeg[i]
907 # pt[pfpos[i]:pfpos[i+1]] = Pmaxg[i]
908 # pm[pfpos[i]:pfpos[i+1]] = Pmusicg[i]
909 # pv[pfpos[i]:pfpos[i+1]] = Pvoiceg[i]
910 # ps[pfpos[i]:pfpos[i+1]] = Pspace[i]
911 #
912 #
913 #
914 #
915 #
916 #
917 # for k in range(0, len(rmsval)):
918 # if rmsval[k] < 2 or rmsval[k] < 0.1*max(rmsval):
919 # zcrval[k] = 0
920 #
921 #
922 # classes = []
923 # positions = []
924 #
925 # class_ = 5
926 #
927 # for j in range(0, len(tt)):
928 # if class_ != tt[j]:
929 # class_ = tt[j]
930 # classes.append(int(class_))
931 # positions.append(int(j*0.02*SR))
932 #
933 # classvec = zeros((len(audio),))
934 #
935 # if len(positions) == 1:
936 # classvec = classes[0]*ones((len(audio),))
937 # for j in range(1, len(positions)):
938 # classvec[positions[j-1]:positions[j]] = classes[j-1]
939 #
940 #
941 #
942 #
943 #
944 #
945 #
946 #
947 #
948 #
949 #
950
951 #
952
953
954 positions, classes = trmszc(audio, SR)
955
956 classvec = zeros((len(audio),))
957
958 if len(positions) == 1:
959 classvec = classes[0]*ones((len(audio),))
960 for j in range(1, len(positions)):
961 classvec[positions[j-1]:positions[j]] = classes[j-1]
962
963 plot(audio)
964 plot(classvec)
965