e@0
|
1 # -*- coding: utf-8 -*-
|
e@0
|
2 """
|
e@0
|
3 Created on Mon Jun 1 17:17:34 2015
|
e@0
|
4
|
e@0
|
5 @author: mmxgn
|
e@0
|
6 """
|
e@0
|
7
|
e@0
|
8
|
e@0
|
9 from sys import argv
|
e@0
|
10
|
e@0
|
11
|
e@0
|
12
|
e@0
|
13 def trmszc(audio, SR=21000):
|
e@0
|
14 # Segmentation based on 1 second segmentation
|
e@0
|
15 print("[II] Calculating features for %s, please wait..." % fname)
|
e@0
|
16
|
e@0
|
17 rmsval = []
|
e@0
|
18 zcrval = []
|
e@0
|
19 rmszcrval = []
|
e@0
|
20 meanrms = []
|
e@0
|
21 varrms = []
|
e@0
|
22 meanzcr = []
|
e@0
|
23 varzcr = []
|
e@0
|
24 histograms = []
|
e@0
|
25
|
e@0
|
26
|
e@0
|
27
|
e@0
|
28 # Parameters for the chi square distribution
|
e@0
|
29 alphas = []
|
e@0
|
30 betas = []
|
e@0
|
31 L = int(len(audio)/SR)
|
e@0
|
32 for n in range(0, L):
|
e@0
|
33 segment = audio[n*SR:(n+1)*SR]
|
e@0
|
34 framerms = []
|
e@0
|
35 framezcr = []
|
e@0
|
36 # for frame in FrameGenerator(segment, frameSize = frameSize, hopSize = frameSize):
|
e@0
|
37 for k in range(0, 50):
|
e@0
|
38 frame = segment[0.02*SR*k:0.02*SR*(k+1)]
|
e@0
|
39
|
e@0
|
40 rmsv = sqrt(sum(frame**2))
|
e@0
|
41 framerms.append(rmsv)
|
e@0
|
42 f1 = frame[0:-1]
|
e@0
|
43 f2 = frame[1:]
|
e@0
|
44 mm1 = f1*f2
|
e@0
|
45 zc = 0.5*sum((abs(sign(mm1))-sign(mm1)))
|
e@0
|
46
|
e@0
|
47 rmsval.append(rmsv)
|
e@0
|
48 zcrval.append(zc)
|
e@0
|
49 rmszcrval.append(rmsv*zc)
|
e@0
|
50 framezcr.append(zc)
|
e@0
|
51
|
e@0
|
52 meanrms.append(mean(framerms))
|
e@0
|
53 meanzcr.append(mean(framezcr))
|
e@0
|
54
|
e@0
|
55 mu = mean(framerms)
|
e@0
|
56 sigmasq = var(framerms, ddof=1)
|
e@0
|
57
|
e@0
|
58 beta = sigmasq/mu
|
e@0
|
59 alpha = (mu/beta) - 1
|
e@0
|
60
|
e@0
|
61 if mu != 0 and sigmasq != 0:
|
e@0
|
62 betas.append(beta)
|
e@0
|
63 alphas.append(alpha)
|
e@0
|
64 else:
|
e@0
|
65 betas.append(1000000)
|
e@0
|
66 alphas.append(-1)
|
e@0
|
67
|
e@0
|
68 histograms.append(histogram(framerms, 256))
|
e@0
|
69
|
e@0
|
70 varrms.append(var(framerms, ddof=1))
|
e@0
|
71 varzcr.append(var(framerms, ddof=1))
|
e@0
|
72
|
e@0
|
73
|
e@0
|
74 # Computation of KVRMS
|
e@0
|
75
|
e@0
|
76 l = len(rmsval)
|
e@0
|
77 ftm = [mean(rmsval[0:50])]
|
e@0
|
78 ftv = [var(rmsval[0:50], ddof=1)]
|
e@0
|
79 kvrms = [0]
|
e@0
|
80
|
e@0
|
81 for i in range(1,l-50):
|
e@0
|
82 ftm_ = ftm[i-1]+(rmsval[i+49] - rmsval[i-1])/50
|
e@0
|
83 ftv_ = ftv[i-1]+ftm[i-1]**2 + ((rmsval[i+49]**2-rmsval[i-1]**2)/50) - ftm_**2
|
e@0
|
84 ftm.append(ftm_)
|
e@0
|
85 ftv.append(ftv_)
|
e@0
|
86
|
e@0
|
87 if ftm_ != 0:
|
e@0
|
88 kvrms.append(ftv_/ftm_**2)
|
e@0
|
89 else:
|
e@0
|
90 kvrms.append(0)
|
e@0
|
91
|
e@0
|
92
|
e@0
|
93 m = mean(kvrms)
|
e@0
|
94 v = var(kvrms, ddof=1)
|
e@0
|
95
|
e@0
|
96 b = v/m
|
e@0
|
97 a = m/b - 1
|
e@0
|
98
|
e@0
|
99 # Computing Similarity
|
e@0
|
100
|
e@0
|
101 sim = []
|
e@0
|
102 from scipy.special import gamma
|
e@0
|
103
|
e@0
|
104 for i in range(1,L):
|
e@0
|
105 k = (alphas[i]+alphas[i-1]+2)/2
|
e@0
|
106 sim_ = ((2/(betas[i-1]+betas[i]))**k)*(betas[i-1]**((alphas[i]+1)/2))*(betas[i]**((alphas[i-1]+1)/2))*gamma(k)/(sqrt(gamma(alphas[i-1]+1)*gamma(alphas[i]+1)))
|
e@0
|
107
|
e@0
|
108 # 0 If overflow
|
e@0
|
109 if isinf(sim_) or isnan(sim_):
|
e@0
|
110 sim_ = 0
|
e@0
|
111 sim.append(sim_)
|
e@0
|
112
|
e@0
|
113 sim2 = []
|
e@0
|
114
|
e@0
|
115 for i in range(2,L):
|
e@0
|
116 k = (alphas[i]+alphas[i-2]+2)/2
|
e@0
|
117 sim_ = ((2/(betas[i-2]+betas[i]))**k)*(betas[i-2]**((alphas[i]+1)/2))*(betas[i]**((alphas[i-2]+1)/2))*gamma(k)/(sqrt(gamma(alphas[i-2]+1)*gamma(alphas[i]+1)))
|
e@0
|
118
|
e@0
|
119 if isinf(sim_) or isnan(sim_):
|
e@0
|
120 sim_ = 0
|
e@0
|
121 sim2.append(sim_)
|
e@0
|
122
|
e@0
|
123
|
e@0
|
124 p = lambda x, a, b: x**a*exp(-b*x)/b**(a+1)/gamma(a+1)
|
e@0
|
125 #
|
e@0
|
126 rho = lambda alpha1,beta1,alpha2,beta2: gamma((alpha1+alpha2)/2 + 1)/sqrt(gamma(alpha1+1)*gamma(alpha2+1))*2**((alpha1+alpha2)/2+1)*beta1**((alpha2+1)/2)*beta2**((alpha1+1)/2)/(beta1+beta2)**((alpha1+alpha2)/2+1)
|
e@0
|
127
|
e@0
|
128
|
e@0
|
129 P = [0]
|
e@0
|
130 for i in range(1, L-1):
|
e@0
|
131 P_ = (1-sim2[i-1])*(1-sim[i-1]+1-sim[i])*(1-sim2[i-1])
|
e@0
|
132 P_ = (1-sim2[i-1])
|
e@0
|
133 P.append(P_)
|
e@0
|
134
|
e@0
|
135
|
e@0
|
136 P.append(0)
|
e@0
|
137 P = array(P)
|
e@0
|
138
|
e@0
|
139 M = mean(P)
|
e@0
|
140 V1 = mean(abs(P[0:L-3]-P[1:L-2]))
|
e@0
|
141 V2 = mean(abs(P[0:L-4]-P[2:L-2]))
|
e@0
|
142 # V = (V1+V2)*0.25;
|
e@0
|
143 V = 0.25*median(abs((P[0:L-4]-P[2:L-2])))
|
e@0
|
144
|
e@0
|
145 Pd = [P[0],P[1]]
|
e@0
|
146
|
e@0
|
147
|
e@0
|
148 for i in range(2,L-2):
|
e@0
|
149 m = mean(concatenate((P[i-2:i],P[i+1:i+3])))
|
e@0
|
150 m1 = max(concatenate((P[i-2:i],P[i+1:i+3])))
|
e@0
|
151
|
e@0
|
152 if m<0.1*M:
|
e@0
|
153 m1 = 0.1*M
|
e@0
|
154
|
e@0
|
155 d = (P[i]-m)/m1
|
e@0
|
156
|
e@0
|
157 if d < 0:
|
e@0
|
158 d = 0
|
e@0
|
159 Pd.append(P[i]*d/V)
|
e@0
|
160
|
e@0
|
161 Pd.append(P[L-2])
|
e@0
|
162 Pd.append(P[L-1])
|
e@0
|
163
|
e@0
|
164
|
e@0
|
165 # Selection of candidate windows where there is a change
|
e@0
|
166
|
e@0
|
167 j = 0
|
e@0
|
168 pos = []
|
e@0
|
169 for i in range(2, L-3):
|
e@0
|
170 if Pd[i] > 5 and P[i] > P[i+1] and P[i] > P[i-1]:
|
e@0
|
171 j += 1
|
e@0
|
172 pos.append(i)
|
e@0
|
173
|
e@0
|
174 if len(pos) == 0:
|
e@0
|
175 sys.exit(0)
|
e@0
|
176
|
e@0
|
177
|
e@0
|
178 # Computation of Change with 20 ms accuracy
|
e@0
|
179
|
e@0
|
180 k2 = 0
|
e@0
|
181 c = 25
|
e@0
|
182
|
e@0
|
183 Sm = zeros((2*c+51,))
|
e@0
|
184 posms = zeros((len(pos),))
|
e@0
|
185 msec = zeros((len(pos),))
|
e@0
|
186 mpos = zeros((len(pos), ))
|
e@0
|
187
|
e@0
|
188 for k1 in range(0,len(pos)):
|
e@0
|
189 i = 50*(pos[k1] - 1); # ms
|
e@0
|
190 for j in range(-c,50+c+1):
|
e@0
|
191 if ftv[i+j] != 0 and ftm[i+j] != 0:
|
e@0
|
192 b1 = ftv[i+j]/ftm[i+j]
|
e@0
|
193 a1 = ftm[i+j]/b1 - 1
|
e@0
|
194 else:
|
e@0
|
195 b1 = 1000
|
e@0
|
196 a1 = -1
|
e@0
|
197
|
e@0
|
198 if ftv[i+j+50-1] != 0 and ftm[i+j+50-1] != 0:
|
e@0
|
199 b2 = ftv[i+j+50-1]/ftm[i+j+50-1]
|
e@0
|
200 a2 = ftm[i+j+50-1]/b2 - 1
|
e@0
|
201 else:
|
e@0
|
202 b2 = 1000
|
e@0
|
203 a2 = -1
|
e@0
|
204
|
e@0
|
205 k = (a2+a1+2)/2
|
e@0
|
206 Sm[j+c] = ((2/(b1+b2))**k)*(b1**((a2+1)/2))*(b2**((a1+1)/2))*gamma(k)/(sqrt(gamma(a1+1)*gamma(a2+1)))
|
e@0
|
207
|
e@0
|
208 if isnan(Sm[j+c]) or isinf(Sm[j+c]):
|
e@0
|
209 Sm[j+c] = 0
|
e@0
|
210
|
e@0
|
211 posms[k1] = argmin(Sm)
|
e@0
|
212 x = Sm[posms[k1]]
|
e@0
|
213
|
e@0
|
214 h = 1 - Sm
|
e@0
|
215
|
e@0
|
216 if mean(h[max(posms[k1] - 25,1):min(2*c+51,posms[k1]+25)]) > 0.1:
|
e@0
|
217 msec[k2] = posms[k1]-1-c
|
e@0
|
218 mpos[k2] = pos[k1]
|
e@0
|
219 k2 = k2+1
|
e@0
|
220 else:
|
e@0
|
221 msec[k2] = 0
|
e@0
|
222 mpos[k2] = 0
|
e@0
|
223
|
e@0
|
224
|
e@0
|
225 posms = msec
|
e@0
|
226 pos = mpos
|
e@0
|
227
|
e@0
|
228 fpos = pos+0.02*posms
|
e@0
|
229
|
e@0
|
230 pfpos = floor(50*pos+posms);
|
e@0
|
231
|
e@0
|
232 bvoice = 0.14339738781836;
|
e@0
|
233 bmusic = 0.04399754659151;
|
e@0
|
234 amusic = 1.66349817725076;
|
e@0
|
235 avoice = 2.32677887950291;
|
e@0
|
236
|
e@0
|
237 pfpos = concatenate((array([0]), pfpos, array([len(kvrms)-1])))
|
e@0
|
238
|
e@0
|
239 L = len(pfpos)-1
|
e@0
|
240 V = zeros((L,2))
|
e@0
|
241 Vg = zeros((L,2))
|
e@0
|
242
|
e@0
|
243 Pzero = []
|
e@0
|
244 Fsp1 = []
|
e@0
|
245 Type = zeros((L,))
|
e@0
|
246 Typeg = zeros((L,))
|
e@0
|
247
|
e@0
|
248
|
e@0
|
249 # Classification for each segment
|
e@0
|
250
|
e@0
|
251 noise = zeros((L,))
|
e@0
|
252 noise2 = zeros((L,))
|
e@0
|
253
|
e@0
|
254 pfpos = pfpos.astype(int)
|
e@0
|
255 Pmusicg = zeros((L,))
|
e@0
|
256 Pvoiceg = zeros((L,))
|
e@0
|
257 Pmusic = zeros((L,))
|
e@0
|
258 Pvoice = zeros((L,))
|
e@0
|
259 Pspace = zeros((L,))
|
e@0
|
260 zpos = zeros((L,))
|
e@0
|
261 Pmax = zeros((L,))
|
e@0
|
262 Pmaxg = zeros((L,))
|
e@0
|
263 Freq = zeros((L,6))
|
e@0
|
264
|
e@0
|
265 zcrval = array(zcrval)
|
e@0
|
266 rmsval = array(rmsval)
|
e@0
|
267 rmszcrval = array(rmszcrval)
|
e@0
|
268 i
|
e@0
|
269 for i in range(0, L):
|
e@0
|
270 d = 0
|
e@0
|
271
|
e@0
|
272 x1 = mean(kvrms[int(pfpos[i]):int(pfpos[i+1])+1])
|
e@0
|
273 x = x1
|
e@0
|
274 y = rmszcrval[int(pfpos[i])+d:int(pfpos[i+1])-d+1]
|
e@0
|
275 y = y/(2*max(rmsval[pfpos[i]+d:pfpos[i+1]-d+1]) - min(rmsval[pfpos[i]+d:pfpos[i+1]-d+1]) - median(rmsval[pfpos[i]+d:pfpos[i+1]-d+1]))
|
e@0
|
276 noise[i] = 50*mean(exp(-y))
|
e@0
|
277
|
e@0
|
278 Pmusic[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1))
|
e@0
|
279 Pvoice[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1))
|
e@0
|
280
|
e@0
|
281 sm = 0.7*median(ftm[pfpos[i]:pfpos[i+1]+1])+0.3*mean(ftm[pfpos[i]:pfpos[i+1]+1])
|
e@0
|
282
|
e@0
|
283 Pspace[i] = 6*exp((-sm**2)/(2*0.6**2))
|
e@0
|
284
|
e@0
|
285 zpos[i] = sum(exp(-10*zcrval[pfpos[i]+d:pfpos[i+1]-d+1]))/float(len(zcrval[pfpos[i]+d:pfpos[i+1]-d+1]))
|
e@0
|
286
|
e@0
|
287 if zpos[i] > 0.08 or x > 3:
|
e@0
|
288 Pvoice[i] = 10
|
e@0
|
289 else:
|
e@0
|
290 Pmusic[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1))
|
e@0
|
291
|
e@0
|
292 V[i,0] = Pmusic[i]
|
e@0
|
293 V[i,1] = Pvoice[i]
|
e@0
|
294 Type[i] = argmax(V[i,:])
|
e@0
|
295 Pmax[i] = V[i,Type[i]]
|
e@0
|
296
|
e@0
|
297 noise2[i] = 50 * mean(exp(-4*y))
|
e@0
|
298 Pmusicg[i] = 0
|
e@0
|
299
|
e@0
|
300 if noise2[i] > 3.5:
|
e@0
|
301 Pvoiceg[i] = 10
|
e@0
|
302 else:
|
e@0
|
303 Pmusicg[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1))
|
e@0
|
304 Pvoiceg[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1))
|
e@0
|
305
|
e@0
|
306
|
e@0
|
307 Vg[i,0] = Pmusicg[i]
|
e@0
|
308 Vg[i,1] = Pvoiceg[i]
|
e@0
|
309
|
e@0
|
310 Typeg[i] = argmax(Vg[i,:])
|
e@0
|
311 Pmaxg[i] = Vg[i,Typeg[i]]
|
e@0
|
312
|
e@0
|
313 Vk = array(sign(zcrval[pfpos[i]:pfpos[i+1]+1]))
|
e@0
|
314 tempV = rmsval[pfpos[i]:pfpos[i+1]+1]
|
e@0
|
315 rr = max(tempV)+0.001
|
e@0
|
316 tempV1 = tempV/rr
|
e@0
|
317 Vk0 = Vk.copy()
|
e@0
|
318
|
e@0
|
319 for j in range(0, len(Vk)):
|
e@0
|
320 if (tempV1[j] < 0.1 and tempV[j] < 1.5) or tempV[j] < 1:
|
e@0
|
321 Vk[j] = 0
|
e@0
|
322 # print "Changing %d" % j
|
e@0
|
323 Vk1 = Vk.copy()
|
e@0
|
324
|
e@0
|
325 for j in range(0, len(Vk)):
|
e@0
|
326 if tempV1[j] < 0.4 or tempV[j] < 2 or tempV[j] < mean(tempV):
|
e@0
|
327 # print "Changing %d" % j
|
e@0
|
328
|
e@0
|
329 Vk1[j] = 0
|
e@0
|
330
|
e@0
|
331 Zca = Vk1*(zcrval[pfpos[i]:pfpos[i+1]+1])
|
e@0
|
332 Pol = (ones((len(Vk),)) - sign(Vk))*Vk0
|
e@0
|
333
|
e@0
|
334 VZC = Pol*zcrval[pfpos[i]:pfpos[i+1]+1]
|
e@0
|
335 dVk = abs(Vk[2:len(Vk)] - Vk[1:len(Vk)-1])
|
e@0
|
336
|
e@0
|
337 Freq[i, 0] = 50*sum(dVk)/(2*len(Vk))
|
e@0
|
338 Freq[i, 1] = len(Vk)/50
|
e@0
|
339 Freq[i, 2] = sum(Freq[:,2])
|
e@0
|
340 Freq[i, 3] = zpos[i]
|
e@0
|
341 Freq[i, 4] = noise2[i]
|
e@0
|
342 Freq[i, 5] = max(Zca)
|
e@0
|
343
|
e@0
|
344 Pmusicg[i] = 0
|
e@0
|
345 Pvoiceg[i] = 0
|
e@0
|
346
|
e@0
|
347 if Freq[i,0] < 0.59 and Freq[i,1] > 2.5:
|
e@0
|
348 Pmusicg[i] = 10
|
e@0
|
349 else:
|
e@0
|
350 Pmusicg[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1))
|
e@0
|
351 Pvoiceg[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1))
|
e@0
|
352
|
e@0
|
353 if x > 3:
|
e@0
|
354 Pvoiceg[i] = Pvoiceg[i] + 0.1
|
e@0
|
355
|
e@0
|
356 if x > 300:
|
e@0
|
357 Pmusicg[i] = 9
|
e@0
|
358
|
e@0
|
359
|
e@0
|
360 if noise2[i] > 3.5:
|
e@0
|
361 Pvoiceg[i] = 11
|
e@0
|
362
|
e@0
|
363 if max(Zca) > 280:
|
e@0
|
364 Pmusicg[i] = 11
|
e@0
|
365
|
e@0
|
366
|
e@0
|
367 if Freq[i,0] > 4.62 and Freq[i,1] > 2.5 and (noise2[i] > 0.1 or zpos[i] > 0.05 ):
|
e@0
|
368 Pvoiceg[i] = 12
|
e@0
|
369
|
e@0
|
370
|
e@0
|
371 if zpos[i] > 0.15:
|
e@0
|
372 Pvoiceg[i] = 13
|
e@0
|
373
|
e@0
|
374
|
e@0
|
375 Vg[i,0] = Pmusicg[i]
|
e@0
|
376 Vg[i,1] = Pvoiceg[i]
|
e@0
|
377
|
e@0
|
378 Typeg[i] = argmax(Vg[i,:])
|
e@0
|
379 Pmaxg[i] = Vg[i, Typeg[i]]
|
e@0
|
380
|
e@0
|
381 Fsp1.extend(Vk)
|
e@0
|
382
|
e@0
|
383 if (Pspace[i] > 4 and noise2[i] < 1) or (Pspace[i] > 4.5):
|
e@0
|
384 Type[i] = 2
|
e@0
|
385 Typeg[i] = 2
|
e@0
|
386
|
e@0
|
387
|
e@0
|
388 if (pfpos[i+1] - pfpos[i])/50 < 0.5 and i >= 1:
|
e@0
|
389 Type[i] = Type[i-1]
|
e@0
|
390
|
e@0
|
391
|
e@0
|
392
|
e@0
|
393 maxpos = pfpos[-1]
|
e@0
|
394
|
e@0
|
395 tt = zeros((maxpos,))
|
e@0
|
396 pt = zeros((maxpos,))
|
e@0
|
397 pm = zeros((maxpos,))
|
e@0
|
398 pv = zeros((maxpos,))
|
e@0
|
399 ps = zeros((maxpos,))
|
e@0
|
400
|
e@0
|
401 for i in range(0, L):
|
e@0
|
402 tt[pfpos[i]:pfpos[i+1]] = Typeg[i]
|
e@0
|
403 pt[pfpos[i]:pfpos[i+1]] = Pmaxg[i]
|
e@0
|
404 pm[pfpos[i]:pfpos[i+1]] = Pmusicg[i]
|
e@0
|
405 pv[pfpos[i]:pfpos[i+1]] = Pvoiceg[i]
|
e@0
|
406 ps[pfpos[i]:pfpos[i+1]] = Pspace[i]
|
e@0
|
407
|
e@0
|
408
|
e@0
|
409
|
e@0
|
410
|
e@0
|
411
|
e@0
|
412
|
e@0
|
413 for k in range(0, len(rmsval)):
|
e@0
|
414 if rmsval[k] < 2 or rmsval[k] < 0.1*max(rmsval):
|
e@0
|
415 zcrval[k] = 0
|
e@0
|
416
|
e@0
|
417
|
e@0
|
418 classes = []
|
e@0
|
419 positions = []
|
e@0
|
420
|
e@0
|
421 class_ = 5
|
e@0
|
422
|
e@0
|
423 for j in range(0, len(tt)):
|
e@0
|
424 if class_ != tt[j]:
|
e@0
|
425 class_ = tt[j]
|
e@0
|
426 classes.append(int(class_))
|
e@0
|
427 positions.append(int(j*0.02*SR))
|
e@0
|
428
|
e@0
|
429
|
e@0
|
430
|
e@0
|
431 return positions, classes
|
e@0
|
432
|
e@0
|
433
|
e@0
|
434
|
e@0
|
435 if __name__=="__main__":
|
e@0
|
436 if len(argv) != 3:
|
e@0
|
437 print("Incorrect number of arguments:")
|
e@0
|
438 print("Usage: ")
|
e@0
|
439 print("%s <input>")
|
e@0
|
440 print("")
|
e@0
|
441 print("Arguments:")
|
e@0
|
442 print("<input>\tThe input filename. Can be .wav, .mp3, etc...")
|
e@0
|
443 sys.exit(-1)
|
e@0
|
444 else:
|
e@0
|
445 print("[II] Panagiotakis and Tziritas RMS/ZC Speech/Music/Silence Discriminator")
|
e@0
|
446 print("[II] Loading libraries")
|
e@0
|
447
|
e@0
|
448 import essentia
|
e@0
|
449 from essentia import Pool
|
e@0
|
450 from essentia.standard import *
|
e@0
|
451 import csv
|
e@0
|
452 import yaml
|
e@0
|
453
|
e@0
|
454 # reqyures matplotlib
|
e@0
|
455 from pylab import *
|
e@0
|
456
|
e@0
|
457 #requires numpy
|
e@0
|
458 from numpy import *
|
e@0
|
459
|
e@0
|
460 #requires scikit-learn
|
e@0
|
461 from sklearn.metrics import pairwise_distances
|
e@0
|
462
|
e@0
|
463 d = {}
|
e@0
|
464 v = {}
|
e@0
|
465
|
e@0
|
466 fname = argv[1]
|
e@0
|
467 outfname = argv[2]
|
e@0
|
468
|
e@0
|
469
|
e@0
|
470
|
e@0
|
471 trackname = fname.split('.')[0].split('/')[-1]
|
e@0
|
472
|
e@0
|
473
|
e@0
|
474 if outfname.partition('.')[-1].lower() not in ['json', 'yaml']:
|
e@0
|
475 print("Please choose a .json or .yaml as an output file.")
|
e@0
|
476 sys.exit(-1)
|
e@0
|
477 else:
|
e@0
|
478 if outfname.partition('.')[-1].lower() == 'json':
|
e@0
|
479 output = YamlOutput(filename = outfname, format='json')
|
e@0
|
480 else:
|
e@0
|
481 output = YamlOutput(filename = outfname, format='yaml')
|
e@0
|
482
|
e@0
|
483 print("Feature extraction of `%s\'" % fname)
|
e@0
|
484
|
e@0
|
485 # Sampling Rate
|
e@0
|
486 SR = 21000.0
|
e@0
|
487
|
e@0
|
488 # Sampling Frequency
|
e@0
|
489 T = 1.0/SR
|
e@0
|
490
|
e@0
|
491 # SegmentSize
|
e@0
|
492 tsegmentSize = 1000 #ms
|
e@0
|
493 segmentSize = int(ceil(tsegmentSize*SR/1000)) if mod(ceil(tsegmentSize*SR/1000),2) == 0 \
|
e@0
|
494 else int(floor(tsegmentSize*SR/1000))
|
e@0
|
495
|
e@0
|
496
|
e@0
|
497 # FrameSize
|
e@0
|
498 tframeSize = 20 #ms
|
e@0
|
499 frameSize = int(ceil(tframeSize*SR/1000)) if mod(ceil(tframeSize*SR/1000),2) == 0 \
|
e@0
|
500 else int(floor(tframeSize*SR/1000))
|
e@0
|
501
|
e@0
|
502 # HopSize
|
e@0
|
503 hopSize = frameSize
|
e@0
|
504
|
e@0
|
505 # Load Audio
|
e@0
|
506 audio = MonoLoader(filename = fname, sampleRate=SR)()
|
e@0
|
507
|
e@0
|
508
|
e@0
|
509
|
e@0
|
510
|
e@0
|
511
|
e@0
|
512 # # Zero Crossing Rate
|
e@0
|
513 # zcr = ZeroCrossingRate()
|
e@0
|
514 #
|
e@0
|
515 # # RMS
|
e@0
|
516 # rms = RMS()
|
e@0
|
517 #
|
e@0
|
518 # # Segmentation based on 1 second segmentation
|
e@0
|
519 # print("[II] Calculating features for %s, please wait..." % fname)
|
e@0
|
520 #
|
e@0
|
521 # rmsval = []
|
e@0
|
522 # zcrval = []
|
e@0
|
523 # rmszcrval = []
|
e@0
|
524 # meanrms = []
|
e@0
|
525 # varrms = []
|
e@0
|
526 # meanzcr = []
|
e@0
|
527 # varzcr = []
|
e@0
|
528 # histograms = []
|
e@0
|
529 #
|
e@0
|
530 #
|
e@0
|
531 #
|
e@0
|
532 # # Parameters for the chi square distribution
|
e@0
|
533 # alphas = []
|
e@0
|
534 # betas = []
|
e@0
|
535 # L = int(len(audio)/SR)
|
e@0
|
536 # for n in range(0, L):
|
e@0
|
537 # segment = audio[n*SR:(n+1)*SR]
|
e@0
|
538 # framerms = []
|
e@0
|
539 # framezcr = []
|
e@0
|
540 # # for frame in FrameGenerator(segment, frameSize = frameSize, hopSize = frameSize):
|
e@0
|
541 # for k in range(0, 50):
|
e@0
|
542 # frame = segment[0.02*SR*k:0.02*SR*(k+1)]
|
e@0
|
543 #
|
e@0
|
544 # rmsv = sqrt(sum(frame**2))
|
e@0
|
545 # framerms.append(rmsv)
|
e@0
|
546 # f1 = frame[0:-1]
|
e@0
|
547 # f2 = frame[1:]
|
e@0
|
548 # mm1 = f1*f2
|
e@0
|
549 # zc = 0.5*sum((abs(sign(mm1))-sign(mm1)))
|
e@0
|
550 #
|
e@0
|
551 # rmsval.append(rmsv)
|
e@0
|
552 # zcrval.append(zc)
|
e@0
|
553 # rmszcrval.append(rmsv*zc)
|
e@0
|
554 # framezcr.append(zc)
|
e@0
|
555 #
|
e@0
|
556 # meanrms.append(mean(framerms))
|
e@0
|
557 # meanzcr.append(mean(framezcr))
|
e@0
|
558 #
|
e@0
|
559 # mu = mean(framerms)
|
e@0
|
560 # sigmasq = var(framerms, ddof=1)
|
e@0
|
561 #
|
e@0
|
562 # beta = sigmasq/mu
|
e@0
|
563 # alpha = (mu/beta) - 1
|
e@0
|
564 #
|
e@0
|
565 # if mu != 0 and sigmasq != 0:
|
e@0
|
566 # betas.append(beta)
|
e@0
|
567 # alphas.append(alpha)
|
e@0
|
568 # else:
|
e@0
|
569 # betas.append(1000000)
|
e@0
|
570 # alphas.append(-1)
|
e@0
|
571 #
|
e@0
|
572 # histograms.append(histogram(framerms, 256))
|
e@0
|
573 #
|
e@0
|
574 # varrms.append(var(framerms, ddof=1))
|
e@0
|
575 # varzcr.append(var(framerms, ddof=1))
|
e@0
|
576 #
|
e@0
|
577 #
|
e@0
|
578 # # Computation of KVRMS
|
e@0
|
579 #
|
e@0
|
580 # l = len(rmsval)
|
e@0
|
581 # ftm = [mean(rmsval[0:50])]
|
e@0
|
582 # ftv = [var(rmsval[0:50], ddof=1)]
|
e@0
|
583 # kvrms = [0]
|
e@0
|
584 #
|
e@0
|
585 # for i in range(1,l-50):
|
e@0
|
586 # ftm_ = ftm[i-1]+(rmsval[i+49] - rmsval[i-1])/50
|
e@0
|
587 # ftv_ = ftv[i-1]+ftm[i-1]**2 + ((rmsval[i+49]**2-rmsval[i-1]**2)/50) - ftm_**2
|
e@0
|
588 # ftm.append(ftm_)
|
e@0
|
589 # ftv.append(ftv_)
|
e@0
|
590 #
|
e@0
|
591 # if ftm_ != 0:
|
e@0
|
592 # kvrms.append(ftv_/ftm_**2)
|
e@0
|
593 # else:
|
e@0
|
594 # kvrms.append(0)
|
e@0
|
595 #
|
e@0
|
596 #
|
e@0
|
597 # m = mean(kvrms)
|
e@0
|
598 # v = var(kvrms, ddof=1)
|
e@0
|
599 #
|
e@0
|
600 # b = v/m
|
e@0
|
601 # a = m/b - 1
|
e@0
|
602 #
|
e@0
|
603 # # Computing Similarity
|
e@0
|
604 #
|
e@0
|
605 # sim = []
|
e@0
|
606 # from scipy.special import gamma
|
e@0
|
607 #
|
e@0
|
608 # for i in range(1,L):
|
e@0
|
609 # k = (alphas[i]+alphas[i-1]+2)/2
|
e@0
|
610 # sim_ = ((2/(betas[i-1]+betas[i]))**k)*(betas[i-1]**((alphas[i]+1)/2))*(betas[i]**((alphas[i-1]+1)/2))*gamma(k)/(sqrt(gamma(alphas[i-1]+1)*gamma(alphas[i]+1)))
|
e@0
|
611 #
|
e@0
|
612 # # 0 If overflow
|
e@0
|
613 # if isinf(sim_) or isnan(sim_):
|
e@0
|
614 # sim_ = 0
|
e@0
|
615 # sim.append(sim_)
|
e@0
|
616 #
|
e@0
|
617 # sim2 = []
|
e@0
|
618 #
|
e@0
|
619 # for i in range(2,L):
|
e@0
|
620 # k = (alphas[i]+alphas[i-2]+2)/2
|
e@0
|
621 # sim_ = ((2/(betas[i-2]+betas[i]))**k)*(betas[i-2]**((alphas[i]+1)/2))*(betas[i]**((alphas[i-2]+1)/2))*gamma(k)/(sqrt(gamma(alphas[i-2]+1)*gamma(alphas[i]+1)))
|
e@0
|
622 #
|
e@0
|
623 # if isinf(sim_) or isnan(sim_):
|
e@0
|
624 # sim_ = 0
|
e@0
|
625 # sim2.append(sim_)
|
e@0
|
626 #
|
e@0
|
627 #
|
e@0
|
628 # p = lambda x, a, b: x**a*exp(-b*x)/b**(a+1)/gamma(a+1)
|
e@0
|
629 ##
|
e@0
|
630 # rho = lambda alpha1,beta1,alpha2,beta2: gamma((alpha1+alpha2)/2 + 1)/sqrt(gamma(alpha1+1)*gamma(alpha2+1))*2**((alpha1+alpha2)/2+1)*beta1**((alpha2+1)/2)*beta2**((alpha1+1)/2)/(beta1+beta2)**((alpha1+alpha2)/2+1)
|
e@0
|
631 #
|
e@0
|
632 #
|
e@0
|
633 # P = [0]
|
e@0
|
634 # for i in range(1, L-1):
|
e@0
|
635 # P_ = (1-sim2[i-1])*(1-sim[i-1]+1-sim[i])*(1-sim2[i-1])
|
e@0
|
636 # P_ = (1-sim2[i-1])
|
e@0
|
637 # P.append(P_)
|
e@0
|
638 #
|
e@0
|
639 #
|
e@0
|
640 # P.append(0)
|
e@0
|
641 # P = array(P)
|
e@0
|
642 #
|
e@0
|
643 # M = mean(P)
|
e@0
|
644 # V1 = mean(abs(P[0:L-3]-P[1:L-2]))
|
e@0
|
645 # V2 = mean(abs(P[0:L-4]-P[2:L-2]))
|
e@0
|
646 ## V = (V1+V2)*0.25;
|
e@0
|
647 # V = 0.25*median(abs((P[0:L-4]-P[2:L-2])))
|
e@0
|
648 #
|
e@0
|
649 # Pd = [P[0],P[1]]
|
e@0
|
650 #
|
e@0
|
651 #
|
e@0
|
652 # for i in range(2,L-2):
|
e@0
|
653 # m = mean(concatenate((P[i-2:i],P[i+1:i+3])))
|
e@0
|
654 # m1 = max(concatenate((P[i-2:i],P[i+1:i+3])))
|
e@0
|
655 #
|
e@0
|
656 # if m<0.1*M:
|
e@0
|
657 # m1 = 0.1*M
|
e@0
|
658 #
|
e@0
|
659 # d = (P[i]-m)/m1
|
e@0
|
660 #
|
e@0
|
661 # if d < 0:
|
e@0
|
662 # d = 0
|
e@0
|
663 # Pd.append(P[i]*d/V)
|
e@0
|
664 #
|
e@0
|
665 # Pd.append(P[L-2])
|
e@0
|
666 # Pd.append(P[L-1])
|
e@0
|
667 #
|
e@0
|
668 #
|
e@0
|
669 # # Selection of candidate windows where there is a change
|
e@0
|
670 #
|
e@0
|
671 # j = 0
|
e@0
|
672 # pos = []
|
e@0
|
673 # for i in range(2, L-3):
|
e@0
|
674 # if Pd[i] > 5 and P[i] > P[i+1] and P[i] > P[i-1]:
|
e@0
|
675 # j += 1
|
e@0
|
676 # pos.append(i)
|
e@0
|
677 #
|
e@0
|
678 # if len(pos) == 0:
|
e@0
|
679 # sys.exit(0)
|
e@0
|
680 #
|
e@0
|
681 #
|
e@0
|
682 # # Computation of Change with 20 ms accuracy
|
e@0
|
683 #
|
e@0
|
684 # k2 = 0
|
e@0
|
685 # c = 25
|
e@0
|
686 #
|
e@0
|
687 # Sm = zeros((2*c+51,))
|
e@0
|
688 # posms = zeros((len(pos),))
|
e@0
|
689 # msec = zeros((len(pos),))
|
e@0
|
690 # mpos = zeros((len(pos), ))
|
e@0
|
691 #
|
e@0
|
692 # for k1 in range(0,len(pos)):
|
e@0
|
693 # i = 50*(pos[k1] - 1); # ms
|
e@0
|
694 # for j in range(-c,50+c+1):
|
e@0
|
695 # if ftv[i+j] != 0 and ftm[i+j] != 0:
|
e@0
|
696 # b1 = ftv[i+j]/ftm[i+j]
|
e@0
|
697 # a1 = ftm[i+j]/b1 - 1
|
e@0
|
698 # else:
|
e@0
|
699 # b1 = 1000
|
e@0
|
700 # a1 = -1
|
e@0
|
701 #
|
e@0
|
702 # if ftv[i+j+50-1] != 0 and ftm[i+j+50-1] != 0:
|
e@0
|
703 # b2 = ftv[i+j+50-1]/ftm[i+j+50-1]
|
e@0
|
704 # a2 = ftm[i+j+50-1]/b2 - 1
|
e@0
|
705 # else:
|
e@0
|
706 # b2 = 1000
|
e@0
|
707 # a2 = -1
|
e@0
|
708 #
|
e@0
|
709 # k = (a2+a1+2)/2
|
e@0
|
710 # Sm[j+c] = ((2/(b1+b2))**k)*(b1**((a2+1)/2))*(b2**((a1+1)/2))*gamma(k)/(sqrt(gamma(a1+1)*gamma(a2+1)))
|
e@0
|
711 #
|
e@0
|
712 # if isnan(Sm[j+c]) or isinf(Sm[j+c]):
|
e@0
|
713 # Sm[j+c] = 0
|
e@0
|
714 #
|
e@0
|
715 # posms[k1] = argmin(Sm)
|
e@0
|
716 # x = Sm[posms[k1]]
|
e@0
|
717 #
|
e@0
|
718 # h = 1 - Sm
|
e@0
|
719 #
|
e@0
|
720 # if mean(h[max(posms[k1] - 25,1):min(2*c+51,posms[k1]+25)]) > 0.1:
|
e@0
|
721 # msec[k2] = posms[k1]-1-c
|
e@0
|
722 # mpos[k2] = pos[k1]
|
e@0
|
723 # k2 = k2+1
|
e@0
|
724 # else:
|
e@0
|
725 # msec[k2] = 0
|
e@0
|
726 # mpos[k2] = 0
|
e@0
|
727 #
|
e@0
|
728 #
|
e@0
|
729 # posms = msec
|
e@0
|
730 # pos = mpos
|
e@0
|
731 #
|
e@0
|
732 # fpos = pos+0.02*posms
|
e@0
|
733 #
|
e@0
|
734 # pfpos = floor(50*pos+posms);
|
e@0
|
735 #
|
e@0
|
736 # bvoice = 0.14339738781836;
|
e@0
|
737 # bmusic = 0.04399754659151;
|
e@0
|
738 # amusic = 1.66349817725076;
|
e@0
|
739 # avoice = 2.32677887950291;
|
e@0
|
740 #
|
e@0
|
741 # pfpos = concatenate((array([0]), pfpos, array([len(kvrms)-1])))
|
e@0
|
742 #
|
e@0
|
743 # L = len(pfpos)-1
|
e@0
|
744 # V = zeros((L,2))
|
e@0
|
745 # Vg = zeros((L,2))
|
e@0
|
746 #
|
e@0
|
747 # Pzero = []
|
e@0
|
748 # Fsp1 = []
|
e@0
|
749 # Type = zeros((L,))
|
e@0
|
750 # Typeg = zeros((L,))
|
e@0
|
751 #
|
e@0
|
752 #
|
e@0
|
753 # # Classification for each segment
|
e@0
|
754 #
|
e@0
|
755 # noise = zeros((L,))
|
e@0
|
756 # noise2 = zeros((L,))
|
e@0
|
757 #
|
e@0
|
758 # pfpos = pfpos.astype(int)
|
e@0
|
759 # Pmusicg = zeros((L,))
|
e@0
|
760 # Pvoiceg = zeros((L,))
|
e@0
|
761 # Pmusic = zeros((L,))
|
e@0
|
762 # Pvoice = zeros((L,))
|
e@0
|
763 # Pspace = zeros((L,))
|
e@0
|
764 # zpos = zeros((L,))
|
e@0
|
765 # Pmax = zeros((L,))
|
e@0
|
766 # Pmaxg = zeros((L,))
|
e@0
|
767 # Freq = zeros((L,6))
|
e@0
|
768 #
|
e@0
|
769 # zcrval = array(zcrval)
|
e@0
|
770 # rmsval = array(rmsval)
|
e@0
|
771 # rmszcrval = array(rmszcrval)
|
e@0
|
772 # i
|
e@0
|
773 # for i in range(0, L):
|
e@0
|
774 # d = 0
|
e@0
|
775 #
|
e@0
|
776 # x1 = mean(kvrms[int(pfpos[i]):int(pfpos[i+1])+1])
|
e@0
|
777 # x = x1
|
e@0
|
778 # y = rmszcrval[int(pfpos[i])+d:int(pfpos[i+1])-d+1]
|
e@0
|
779 # y = y/(2*max(rmsval[pfpos[i]+d:pfpos[i+1]-d+1]) - min(rmsval[pfpos[i]+d:pfpos[i+1]-d+1]) - median(rmsval[pfpos[i]+d:pfpos[i+1]-d+1]))
|
e@0
|
780 # noise[i] = 50*mean(exp(-y))
|
e@0
|
781 #
|
e@0
|
782 # Pmusic[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1))
|
e@0
|
783 # Pvoice[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1))
|
e@0
|
784 #
|
e@0
|
785 # sm = 0.7*median(ftm[pfpos[i]:pfpos[i+1]+1])+0.3*mean(ftm[pfpos[i]:pfpos[i+1]+1])
|
e@0
|
786 #
|
e@0
|
787 # Pspace[i] = 6*exp((-sm**2)/(2*0.6**2))
|
e@0
|
788 #
|
e@0
|
789 # zpos[i] = sum(exp(-10*zcrval[pfpos[i]+d:pfpos[i+1]-d+1]))/float(len(zcrval[pfpos[i]+d:pfpos[i+1]-d+1]))
|
e@0
|
790 #
|
e@0
|
791 # if zpos[i] > 0.08 or x > 3:
|
e@0
|
792 # Pvoice[i] = 10
|
e@0
|
793 # else:
|
e@0
|
794 # Pmusic[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1))
|
e@0
|
795 #
|
e@0
|
796 # V[i,0] = Pmusic[i]
|
e@0
|
797 # V[i,1] = Pvoice[i]
|
e@0
|
798 # Type[i] = argmax(V[i,:])
|
e@0
|
799 # Pmax[i] = V[i,Type[i]]
|
e@0
|
800 #
|
e@0
|
801 # noise2[i] = 50 * mean(exp(-4*y))
|
e@0
|
802 # Pmusicg[i] = 0
|
e@0
|
803 #
|
e@0
|
804 # if noise2[i] > 3.5:
|
e@0
|
805 # Pvoiceg[i] = 10
|
e@0
|
806 # else:
|
e@0
|
807 # Pmusicg[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1))
|
e@0
|
808 # Pvoiceg[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1))
|
e@0
|
809 #
|
e@0
|
810 #
|
e@0
|
811 # Vg[i,0] = Pmusicg[i]
|
e@0
|
812 # Vg[i,1] = Pvoiceg[i]
|
e@0
|
813 #
|
e@0
|
814 # Typeg[i] = argmax(Vg[i,:])
|
e@0
|
815 # Pmaxg[i] = Vg[i,Typeg[i]]
|
e@0
|
816 #
|
e@0
|
817 # Vk = array(sign(zcrval[pfpos[i]:pfpos[i+1]+1]))
|
e@0
|
818 # tempV = rmsval[pfpos[i]:pfpos[i+1]+1]
|
e@0
|
819 # rr = max(tempV)+0.001
|
e@0
|
820 # tempV1 = tempV/rr
|
e@0
|
821 # Vk0 = Vk.copy()
|
e@0
|
822 #
|
e@0
|
823 # for j in range(0, len(Vk)):
|
e@0
|
824 # if (tempV1[j] < 0.1 and tempV[j] < 1.5) or tempV[j] < 1:
|
e@0
|
825 # Vk[j] = 0
|
e@0
|
826 # # print "Changing %d" % j
|
e@0
|
827 # Vk1 = Vk.copy()
|
e@0
|
828 #
|
e@0
|
829 # for j in range(0, len(Vk)):
|
e@0
|
830 # if tempV1[j] < 0.4 or tempV[j] < 2 or tempV[j] < mean(tempV):
|
e@0
|
831 # # print "Changing %d" % j
|
e@0
|
832 #
|
e@0
|
833 # Vk1[j] = 0
|
e@0
|
834 #
|
e@0
|
835 # Zca = Vk1*(zcrval[pfpos[i]:pfpos[i+1]+1])
|
e@0
|
836 # Pol = (ones((len(Vk),)) - sign(Vk))*Vk0
|
e@0
|
837 #
|
e@0
|
838 # VZC = Pol*zcrval[pfpos[i]:pfpos[i+1]+1]
|
e@0
|
839 # dVk = abs(Vk[2:len(Vk)] - Vk[1:len(Vk)-1])
|
e@0
|
840 #
|
e@0
|
841 # Freq[i, 0] = 50*sum(dVk)/(2*len(Vk))
|
e@0
|
842 # Freq[i, 1] = len(Vk)/50
|
e@0
|
843 # Freq[i, 2] = sum(Freq[:,2])
|
e@0
|
844 # Freq[i, 3] = zpos[i]
|
e@0
|
845 # Freq[i, 4] = noise2[i]
|
e@0
|
846 # Freq[i, 5] = max(Zca)
|
e@0
|
847 #
|
e@0
|
848 # Pmusicg[i] = 0
|
e@0
|
849 # Pvoiceg[i] = 0
|
e@0
|
850 #
|
e@0
|
851 # if Freq[i,0] < 0.59 and Freq[i,1] > 2.5:
|
e@0
|
852 # Pmusicg[i] = 10
|
e@0
|
853 # else:
|
e@0
|
854 # Pmusicg[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1))
|
e@0
|
855 # Pvoiceg[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1))
|
e@0
|
856 #
|
e@0
|
857 # if x > 3:
|
e@0
|
858 # Pvoiceg[i] = Pvoiceg[i] + 0.1
|
e@0
|
859 #
|
e@0
|
860 # if x > 300:
|
e@0
|
861 # Pmusicg[i] = 9
|
e@0
|
862 #
|
e@0
|
863 #
|
e@0
|
864 # if noise2[i] > 3.5:
|
e@0
|
865 # Pvoiceg[i] = 11
|
e@0
|
866 #
|
e@0
|
867 # if max(Zca) > 280:
|
e@0
|
868 # Pmusicg[i] = 11
|
e@0
|
869 #
|
e@0
|
870 #
|
e@0
|
871 # if Freq[i,0] > 4.62 and Freq[i,1] > 2.5 and (noise2[i] > 0.1 or zpos[i] > 0.05 ):
|
e@0
|
872 # Pvoiceg[i] = 12
|
e@0
|
873 #
|
e@0
|
874 #
|
e@0
|
875 # if zpos[i] > 0.15:
|
e@0
|
876 # Pvoiceg[i] = 13
|
e@0
|
877 #
|
e@0
|
878 #
|
e@0
|
879 # Vg[i,0] = Pmusicg[i]
|
e@0
|
880 # Vg[i,1] = Pvoiceg[i]
|
e@0
|
881 #
|
e@0
|
882 # Typeg[i] = argmax(Vg[i,:])
|
e@0
|
883 # Pmaxg[i] = Vg[i, Typeg[i]]
|
e@0
|
884 #
|
e@0
|
885 # Fsp1.extend(Vk)
|
e@0
|
886 #
|
e@0
|
887 # if (Pspace[i] > 4 and noise2[i] < 1) or (Pspace[i] > 4.5):
|
e@0
|
888 # Type[i] = 2
|
e@0
|
889 # Typeg[i] = 2
|
e@0
|
890 #
|
e@0
|
891 #
|
e@0
|
892 # if (pfpos[i+1] - pfpos[i])/50 < 0.5 and i >= 1:
|
e@0
|
893 # Type[i] = Type[i-1]
|
e@0
|
894 #
|
e@0
|
895 #
|
e@0
|
896 #
|
e@0
|
897 # maxpos = pfpos[-1]
|
e@0
|
898 #
|
e@0
|
899 # tt = zeros((maxpos,))
|
e@0
|
900 # pt = zeros((maxpos,))
|
e@0
|
901 # pm = zeros((maxpos,))
|
e@0
|
902 # pv = zeros((maxpos,))
|
e@0
|
903 # ps = zeros((maxpos,))
|
e@0
|
904 #
|
e@0
|
905 # for i in range(0, L):
|
e@0
|
906 # tt[pfpos[i]:pfpos[i+1]] = Typeg[i]
|
e@0
|
907 # pt[pfpos[i]:pfpos[i+1]] = Pmaxg[i]
|
e@0
|
908 # pm[pfpos[i]:pfpos[i+1]] = Pmusicg[i]
|
e@0
|
909 # pv[pfpos[i]:pfpos[i+1]] = Pvoiceg[i]
|
e@0
|
910 # ps[pfpos[i]:pfpos[i+1]] = Pspace[i]
|
e@0
|
911 #
|
e@0
|
912 #
|
e@0
|
913 #
|
e@0
|
914 #
|
e@0
|
915 #
|
e@0
|
916 #
|
e@0
|
917 # for k in range(0, len(rmsval)):
|
e@0
|
918 # if rmsval[k] < 2 or rmsval[k] < 0.1*max(rmsval):
|
e@0
|
919 # zcrval[k] = 0
|
e@0
|
920 #
|
e@0
|
921 #
|
e@0
|
922 # classes = []
|
e@0
|
923 # positions = []
|
e@0
|
924 #
|
e@0
|
925 # class_ = 5
|
e@0
|
926 #
|
e@0
|
927 # for j in range(0, len(tt)):
|
e@0
|
928 # if class_ != tt[j]:
|
e@0
|
929 # class_ = tt[j]
|
e@0
|
930 # classes.append(int(class_))
|
e@0
|
931 # positions.append(int(j*0.02*SR))
|
e@0
|
932 #
|
e@0
|
933 # classvec = zeros((len(audio),))
|
e@0
|
934 #
|
e@0
|
935 # if len(positions) == 1:
|
e@0
|
936 # classvec = classes[0]*ones((len(audio),))
|
e@0
|
937 # for j in range(1, len(positions)):
|
e@0
|
938 # classvec[positions[j-1]:positions[j]] = classes[j-1]
|
e@0
|
939 #
|
e@0
|
940 #
|
e@0
|
941 #
|
e@0
|
942 #
|
e@0
|
943 #
|
e@0
|
944 #
|
e@0
|
945 #
|
e@0
|
946 #
|
e@0
|
947 #
|
e@0
|
948 #
|
e@0
|
949 #
|
e@0
|
950
|
e@0
|
951 #
|
e@0
|
952
|
e@0
|
953
|
e@0
|
954 positions, classes = trmszc(audio, SR)
|
e@0
|
955
|
e@0
|
956 classvec = zeros((len(audio),))
|
e@0
|
957
|
e@0
|
958 if len(positions) == 1:
|
e@0
|
959 classvec = classes[0]*ones((len(audio),))
|
e@0
|
960 for j in range(1, len(positions)):
|
e@0
|
961 classvec[positions[j-1]:positions[j]] = classes[j-1]
|
e@0
|
962
|
e@0
|
963 plot(audio)
|
e@0
|
964 plot(classvec)
|
e@0
|
965 |