annotate experiment-reverb/code/panagiotakis.py @ 2:c87a9505f294 tip

Added LICENSE for code, removed .wav files
author Emmanouil Theofanis Chourdakis <e.t.chourdakis@qmul.ac.uk>
date Sat, 30 Sep 2017 13:25:50 +0100
parents 246d5546657c
children
rev   line source
e@0 1 # -*- coding: utf-8 -*-
e@0 2 """
e@0 3 Created on Mon Jun 1 17:17:34 2015
e@0 4
e@0 5 @author: mmxgn
e@0 6 """
e@0 7
e@0 8
e@0 9 from sys import argv
e@0 10
e@0 11
e@0 12
e@0 13 def trmszc(audio, SR=21000):
e@0 14 # Segmentation based on 1 second segmentation
e@0 15 print("[II] Calculating features for %s, please wait..." % fname)
e@0 16
e@0 17 rmsval = []
e@0 18 zcrval = []
e@0 19 rmszcrval = []
e@0 20 meanrms = []
e@0 21 varrms = []
e@0 22 meanzcr = []
e@0 23 varzcr = []
e@0 24 histograms = []
e@0 25
e@0 26
e@0 27
e@0 28 # Parameters for the chi square distribution
e@0 29 alphas = []
e@0 30 betas = []
e@0 31 L = int(len(audio)/SR)
e@0 32 for n in range(0, L):
e@0 33 segment = audio[n*SR:(n+1)*SR]
e@0 34 framerms = []
e@0 35 framezcr = []
e@0 36 # for frame in FrameGenerator(segment, frameSize = frameSize, hopSize = frameSize):
e@0 37 for k in range(0, 50):
e@0 38 frame = segment[0.02*SR*k:0.02*SR*(k+1)]
e@0 39
e@0 40 rmsv = sqrt(sum(frame**2))
e@0 41 framerms.append(rmsv)
e@0 42 f1 = frame[0:-1]
e@0 43 f2 = frame[1:]
e@0 44 mm1 = f1*f2
e@0 45 zc = 0.5*sum((abs(sign(mm1))-sign(mm1)))
e@0 46
e@0 47 rmsval.append(rmsv)
e@0 48 zcrval.append(zc)
e@0 49 rmszcrval.append(rmsv*zc)
e@0 50 framezcr.append(zc)
e@0 51
e@0 52 meanrms.append(mean(framerms))
e@0 53 meanzcr.append(mean(framezcr))
e@0 54
e@0 55 mu = mean(framerms)
e@0 56 sigmasq = var(framerms, ddof=1)
e@0 57
e@0 58 beta = sigmasq/mu
e@0 59 alpha = (mu/beta) - 1
e@0 60
e@0 61 if mu != 0 and sigmasq != 0:
e@0 62 betas.append(beta)
e@0 63 alphas.append(alpha)
e@0 64 else:
e@0 65 betas.append(1000000)
e@0 66 alphas.append(-1)
e@0 67
e@0 68 histograms.append(histogram(framerms, 256))
e@0 69
e@0 70 varrms.append(var(framerms, ddof=1))
e@0 71 varzcr.append(var(framerms, ddof=1))
e@0 72
e@0 73
e@0 74 # Computation of KVRMS
e@0 75
e@0 76 l = len(rmsval)
e@0 77 ftm = [mean(rmsval[0:50])]
e@0 78 ftv = [var(rmsval[0:50], ddof=1)]
e@0 79 kvrms = [0]
e@0 80
e@0 81 for i in range(1,l-50):
e@0 82 ftm_ = ftm[i-1]+(rmsval[i+49] - rmsval[i-1])/50
e@0 83 ftv_ = ftv[i-1]+ftm[i-1]**2 + ((rmsval[i+49]**2-rmsval[i-1]**2)/50) - ftm_**2
e@0 84 ftm.append(ftm_)
e@0 85 ftv.append(ftv_)
e@0 86
e@0 87 if ftm_ != 0:
e@0 88 kvrms.append(ftv_/ftm_**2)
e@0 89 else:
e@0 90 kvrms.append(0)
e@0 91
e@0 92
e@0 93 m = mean(kvrms)
e@0 94 v = var(kvrms, ddof=1)
e@0 95
e@0 96 b = v/m
e@0 97 a = m/b - 1
e@0 98
e@0 99 # Computing Similarity
e@0 100
e@0 101 sim = []
e@0 102 from scipy.special import gamma
e@0 103
e@0 104 for i in range(1,L):
e@0 105 k = (alphas[i]+alphas[i-1]+2)/2
e@0 106 sim_ = ((2/(betas[i-1]+betas[i]))**k)*(betas[i-1]**((alphas[i]+1)/2))*(betas[i]**((alphas[i-1]+1)/2))*gamma(k)/(sqrt(gamma(alphas[i-1]+1)*gamma(alphas[i]+1)))
e@0 107
e@0 108 # 0 If overflow
e@0 109 if isinf(sim_) or isnan(sim_):
e@0 110 sim_ = 0
e@0 111 sim.append(sim_)
e@0 112
e@0 113 sim2 = []
e@0 114
e@0 115 for i in range(2,L):
e@0 116 k = (alphas[i]+alphas[i-2]+2)/2
e@0 117 sim_ = ((2/(betas[i-2]+betas[i]))**k)*(betas[i-2]**((alphas[i]+1)/2))*(betas[i]**((alphas[i-2]+1)/2))*gamma(k)/(sqrt(gamma(alphas[i-2]+1)*gamma(alphas[i]+1)))
e@0 118
e@0 119 if isinf(sim_) or isnan(sim_):
e@0 120 sim_ = 0
e@0 121 sim2.append(sim_)
e@0 122
e@0 123
e@0 124 p = lambda x, a, b: x**a*exp(-b*x)/b**(a+1)/gamma(a+1)
e@0 125 #
e@0 126 rho = lambda alpha1,beta1,alpha2,beta2: gamma((alpha1+alpha2)/2 + 1)/sqrt(gamma(alpha1+1)*gamma(alpha2+1))*2**((alpha1+alpha2)/2+1)*beta1**((alpha2+1)/2)*beta2**((alpha1+1)/2)/(beta1+beta2)**((alpha1+alpha2)/2+1)
e@0 127
e@0 128
e@0 129 P = [0]
e@0 130 for i in range(1, L-1):
e@0 131 P_ = (1-sim2[i-1])*(1-sim[i-1]+1-sim[i])*(1-sim2[i-1])
e@0 132 P_ = (1-sim2[i-1])
e@0 133 P.append(P_)
e@0 134
e@0 135
e@0 136 P.append(0)
e@0 137 P = array(P)
e@0 138
e@0 139 M = mean(P)
e@0 140 V1 = mean(abs(P[0:L-3]-P[1:L-2]))
e@0 141 V2 = mean(abs(P[0:L-4]-P[2:L-2]))
e@0 142 # V = (V1+V2)*0.25;
e@0 143 V = 0.25*median(abs((P[0:L-4]-P[2:L-2])))
e@0 144
e@0 145 Pd = [P[0],P[1]]
e@0 146
e@0 147
e@0 148 for i in range(2,L-2):
e@0 149 m = mean(concatenate((P[i-2:i],P[i+1:i+3])))
e@0 150 m1 = max(concatenate((P[i-2:i],P[i+1:i+3])))
e@0 151
e@0 152 if m<0.1*M:
e@0 153 m1 = 0.1*M
e@0 154
e@0 155 d = (P[i]-m)/m1
e@0 156
e@0 157 if d < 0:
e@0 158 d = 0
e@0 159 Pd.append(P[i]*d/V)
e@0 160
e@0 161 Pd.append(P[L-2])
e@0 162 Pd.append(P[L-1])
e@0 163
e@0 164
e@0 165 # Selection of candidate windows where there is a change
e@0 166
e@0 167 j = 0
e@0 168 pos = []
e@0 169 for i in range(2, L-3):
e@0 170 if Pd[i] > 5 and P[i] > P[i+1] and P[i] > P[i-1]:
e@0 171 j += 1
e@0 172 pos.append(i)
e@0 173
e@0 174 if len(pos) == 0:
e@0 175 sys.exit(0)
e@0 176
e@0 177
e@0 178 # Computation of Change with 20 ms accuracy
e@0 179
e@0 180 k2 = 0
e@0 181 c = 25
e@0 182
e@0 183 Sm = zeros((2*c+51,))
e@0 184 posms = zeros((len(pos),))
e@0 185 msec = zeros((len(pos),))
e@0 186 mpos = zeros((len(pos), ))
e@0 187
e@0 188 for k1 in range(0,len(pos)):
e@0 189 i = 50*(pos[k1] - 1); # ms
e@0 190 for j in range(-c,50+c+1):
e@0 191 if ftv[i+j] != 0 and ftm[i+j] != 0:
e@0 192 b1 = ftv[i+j]/ftm[i+j]
e@0 193 a1 = ftm[i+j]/b1 - 1
e@0 194 else:
e@0 195 b1 = 1000
e@0 196 a1 = -1
e@0 197
e@0 198 if ftv[i+j+50-1] != 0 and ftm[i+j+50-1] != 0:
e@0 199 b2 = ftv[i+j+50-1]/ftm[i+j+50-1]
e@0 200 a2 = ftm[i+j+50-1]/b2 - 1
e@0 201 else:
e@0 202 b2 = 1000
e@0 203 a2 = -1
e@0 204
e@0 205 k = (a2+a1+2)/2
e@0 206 Sm[j+c] = ((2/(b1+b2))**k)*(b1**((a2+1)/2))*(b2**((a1+1)/2))*gamma(k)/(sqrt(gamma(a1+1)*gamma(a2+1)))
e@0 207
e@0 208 if isnan(Sm[j+c]) or isinf(Sm[j+c]):
e@0 209 Sm[j+c] = 0
e@0 210
e@0 211 posms[k1] = argmin(Sm)
e@0 212 x = Sm[posms[k1]]
e@0 213
e@0 214 h = 1 - Sm
e@0 215
e@0 216 if mean(h[max(posms[k1] - 25,1):min(2*c+51,posms[k1]+25)]) > 0.1:
e@0 217 msec[k2] = posms[k1]-1-c
e@0 218 mpos[k2] = pos[k1]
e@0 219 k2 = k2+1
e@0 220 else:
e@0 221 msec[k2] = 0
e@0 222 mpos[k2] = 0
e@0 223
e@0 224
e@0 225 posms = msec
e@0 226 pos = mpos
e@0 227
e@0 228 fpos = pos+0.02*posms
e@0 229
e@0 230 pfpos = floor(50*pos+posms);
e@0 231
e@0 232 bvoice = 0.14339738781836;
e@0 233 bmusic = 0.04399754659151;
e@0 234 amusic = 1.66349817725076;
e@0 235 avoice = 2.32677887950291;
e@0 236
e@0 237 pfpos = concatenate((array([0]), pfpos, array([len(kvrms)-1])))
e@0 238
e@0 239 L = len(pfpos)-1
e@0 240 V = zeros((L,2))
e@0 241 Vg = zeros((L,2))
e@0 242
e@0 243 Pzero = []
e@0 244 Fsp1 = []
e@0 245 Type = zeros((L,))
e@0 246 Typeg = zeros((L,))
e@0 247
e@0 248
e@0 249 # Classification for each segment
e@0 250
e@0 251 noise = zeros((L,))
e@0 252 noise2 = zeros((L,))
e@0 253
e@0 254 pfpos = pfpos.astype(int)
e@0 255 Pmusicg = zeros((L,))
e@0 256 Pvoiceg = zeros((L,))
e@0 257 Pmusic = zeros((L,))
e@0 258 Pvoice = zeros((L,))
e@0 259 Pspace = zeros((L,))
e@0 260 zpos = zeros((L,))
e@0 261 Pmax = zeros((L,))
e@0 262 Pmaxg = zeros((L,))
e@0 263 Freq = zeros((L,6))
e@0 264
e@0 265 zcrval = array(zcrval)
e@0 266 rmsval = array(rmsval)
e@0 267 rmszcrval = array(rmszcrval)
e@0 268 i
e@0 269 for i in range(0, L):
e@0 270 d = 0
e@0 271
e@0 272 x1 = mean(kvrms[int(pfpos[i]):int(pfpos[i+1])+1])
e@0 273 x = x1
e@0 274 y = rmszcrval[int(pfpos[i])+d:int(pfpos[i+1])-d+1]
e@0 275 y = y/(2*max(rmsval[pfpos[i]+d:pfpos[i+1]-d+1]) - min(rmsval[pfpos[i]+d:pfpos[i+1]-d+1]) - median(rmsval[pfpos[i]+d:pfpos[i+1]-d+1]))
e@0 276 noise[i] = 50*mean(exp(-y))
e@0 277
e@0 278 Pmusic[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1))
e@0 279 Pvoice[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1))
e@0 280
e@0 281 sm = 0.7*median(ftm[pfpos[i]:pfpos[i+1]+1])+0.3*mean(ftm[pfpos[i]:pfpos[i+1]+1])
e@0 282
e@0 283 Pspace[i] = 6*exp((-sm**2)/(2*0.6**2))
e@0 284
e@0 285 zpos[i] = sum(exp(-10*zcrval[pfpos[i]+d:pfpos[i+1]-d+1]))/float(len(zcrval[pfpos[i]+d:pfpos[i+1]-d+1]))
e@0 286
e@0 287 if zpos[i] > 0.08 or x > 3:
e@0 288 Pvoice[i] = 10
e@0 289 else:
e@0 290 Pmusic[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1))
e@0 291
e@0 292 V[i,0] = Pmusic[i]
e@0 293 V[i,1] = Pvoice[i]
e@0 294 Type[i] = argmax(V[i,:])
e@0 295 Pmax[i] = V[i,Type[i]]
e@0 296
e@0 297 noise2[i] = 50 * mean(exp(-4*y))
e@0 298 Pmusicg[i] = 0
e@0 299
e@0 300 if noise2[i] > 3.5:
e@0 301 Pvoiceg[i] = 10
e@0 302 else:
e@0 303 Pmusicg[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1))
e@0 304 Pvoiceg[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1))
e@0 305
e@0 306
e@0 307 Vg[i,0] = Pmusicg[i]
e@0 308 Vg[i,1] = Pvoiceg[i]
e@0 309
e@0 310 Typeg[i] = argmax(Vg[i,:])
e@0 311 Pmaxg[i] = Vg[i,Typeg[i]]
e@0 312
e@0 313 Vk = array(sign(zcrval[pfpos[i]:pfpos[i+1]+1]))
e@0 314 tempV = rmsval[pfpos[i]:pfpos[i+1]+1]
e@0 315 rr = max(tempV)+0.001
e@0 316 tempV1 = tempV/rr
e@0 317 Vk0 = Vk.copy()
e@0 318
e@0 319 for j in range(0, len(Vk)):
e@0 320 if (tempV1[j] < 0.1 and tempV[j] < 1.5) or tempV[j] < 1:
e@0 321 Vk[j] = 0
e@0 322 # print "Changing %d" % j
e@0 323 Vk1 = Vk.copy()
e@0 324
e@0 325 for j in range(0, len(Vk)):
e@0 326 if tempV1[j] < 0.4 or tempV[j] < 2 or tempV[j] < mean(tempV):
e@0 327 # print "Changing %d" % j
e@0 328
e@0 329 Vk1[j] = 0
e@0 330
e@0 331 Zca = Vk1*(zcrval[pfpos[i]:pfpos[i+1]+1])
e@0 332 Pol = (ones((len(Vk),)) - sign(Vk))*Vk0
e@0 333
e@0 334 VZC = Pol*zcrval[pfpos[i]:pfpos[i+1]+1]
e@0 335 dVk = abs(Vk[2:len(Vk)] - Vk[1:len(Vk)-1])
e@0 336
e@0 337 Freq[i, 0] = 50*sum(dVk)/(2*len(Vk))
e@0 338 Freq[i, 1] = len(Vk)/50
e@0 339 Freq[i, 2] = sum(Freq[:,2])
e@0 340 Freq[i, 3] = zpos[i]
e@0 341 Freq[i, 4] = noise2[i]
e@0 342 Freq[i, 5] = max(Zca)
e@0 343
e@0 344 Pmusicg[i] = 0
e@0 345 Pvoiceg[i] = 0
e@0 346
e@0 347 if Freq[i,0] < 0.59 and Freq[i,1] > 2.5:
e@0 348 Pmusicg[i] = 10
e@0 349 else:
e@0 350 Pmusicg[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1))
e@0 351 Pvoiceg[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1))
e@0 352
e@0 353 if x > 3:
e@0 354 Pvoiceg[i] = Pvoiceg[i] + 0.1
e@0 355
e@0 356 if x > 300:
e@0 357 Pmusicg[i] = 9
e@0 358
e@0 359
e@0 360 if noise2[i] > 3.5:
e@0 361 Pvoiceg[i] = 11
e@0 362
e@0 363 if max(Zca) > 280:
e@0 364 Pmusicg[i] = 11
e@0 365
e@0 366
e@0 367 if Freq[i,0] > 4.62 and Freq[i,1] > 2.5 and (noise2[i] > 0.1 or zpos[i] > 0.05 ):
e@0 368 Pvoiceg[i] = 12
e@0 369
e@0 370
e@0 371 if zpos[i] > 0.15:
e@0 372 Pvoiceg[i] = 13
e@0 373
e@0 374
e@0 375 Vg[i,0] = Pmusicg[i]
e@0 376 Vg[i,1] = Pvoiceg[i]
e@0 377
e@0 378 Typeg[i] = argmax(Vg[i,:])
e@0 379 Pmaxg[i] = Vg[i, Typeg[i]]
e@0 380
e@0 381 Fsp1.extend(Vk)
e@0 382
e@0 383 if (Pspace[i] > 4 and noise2[i] < 1) or (Pspace[i] > 4.5):
e@0 384 Type[i] = 2
e@0 385 Typeg[i] = 2
e@0 386
e@0 387
e@0 388 if (pfpos[i+1] - pfpos[i])/50 < 0.5 and i >= 1:
e@0 389 Type[i] = Type[i-1]
e@0 390
e@0 391
e@0 392
e@0 393 maxpos = pfpos[-1]
e@0 394
e@0 395 tt = zeros((maxpos,))
e@0 396 pt = zeros((maxpos,))
e@0 397 pm = zeros((maxpos,))
e@0 398 pv = zeros((maxpos,))
e@0 399 ps = zeros((maxpos,))
e@0 400
e@0 401 for i in range(0, L):
e@0 402 tt[pfpos[i]:pfpos[i+1]] = Typeg[i]
e@0 403 pt[pfpos[i]:pfpos[i+1]] = Pmaxg[i]
e@0 404 pm[pfpos[i]:pfpos[i+1]] = Pmusicg[i]
e@0 405 pv[pfpos[i]:pfpos[i+1]] = Pvoiceg[i]
e@0 406 ps[pfpos[i]:pfpos[i+1]] = Pspace[i]
e@0 407
e@0 408
e@0 409
e@0 410
e@0 411
e@0 412
e@0 413 for k in range(0, len(rmsval)):
e@0 414 if rmsval[k] < 2 or rmsval[k] < 0.1*max(rmsval):
e@0 415 zcrval[k] = 0
e@0 416
e@0 417
e@0 418 classes = []
e@0 419 positions = []
e@0 420
e@0 421 class_ = 5
e@0 422
e@0 423 for j in range(0, len(tt)):
e@0 424 if class_ != tt[j]:
e@0 425 class_ = tt[j]
e@0 426 classes.append(int(class_))
e@0 427 positions.append(int(j*0.02*SR))
e@0 428
e@0 429
e@0 430
e@0 431 return positions, classes
e@0 432
e@0 433
e@0 434
e@0 435 if __name__=="__main__":
e@0 436 if len(argv) != 3:
e@0 437 print("Incorrect number of arguments:")
e@0 438 print("Usage: ")
e@0 439 print("%s <input>")
e@0 440 print("")
e@0 441 print("Arguments:")
e@0 442 print("<input>\tThe input filename. Can be .wav, .mp3, etc...")
e@0 443 sys.exit(-1)
e@0 444 else:
e@0 445 print("[II] Panagiotakis and Tziritas RMS/ZC Speech/Music/Silence Discriminator")
e@0 446 print("[II] Loading libraries")
e@0 447
e@0 448 import essentia
e@0 449 from essentia import Pool
e@0 450 from essentia.standard import *
e@0 451 import csv
e@0 452 import yaml
e@0 453
e@0 454 # reqyures matplotlib
e@0 455 from pylab import *
e@0 456
e@0 457 #requires numpy
e@0 458 from numpy import *
e@0 459
e@0 460 #requires scikit-learn
e@0 461 from sklearn.metrics import pairwise_distances
e@0 462
e@0 463 d = {}
e@0 464 v = {}
e@0 465
e@0 466 fname = argv[1]
e@0 467 outfname = argv[2]
e@0 468
e@0 469
e@0 470
e@0 471 trackname = fname.split('.')[0].split('/')[-1]
e@0 472
e@0 473
e@0 474 if outfname.partition('.')[-1].lower() not in ['json', 'yaml']:
e@0 475 print("Please choose a .json or .yaml as an output file.")
e@0 476 sys.exit(-1)
e@0 477 else:
e@0 478 if outfname.partition('.')[-1].lower() == 'json':
e@0 479 output = YamlOutput(filename = outfname, format='json')
e@0 480 else:
e@0 481 output = YamlOutput(filename = outfname, format='yaml')
e@0 482
e@0 483 print("Feature extraction of `%s\'" % fname)
e@0 484
e@0 485 # Sampling Rate
e@0 486 SR = 21000.0
e@0 487
e@0 488 # Sampling Frequency
e@0 489 T = 1.0/SR
e@0 490
e@0 491 # SegmentSize
e@0 492 tsegmentSize = 1000 #ms
e@0 493 segmentSize = int(ceil(tsegmentSize*SR/1000)) if mod(ceil(tsegmentSize*SR/1000),2) == 0 \
e@0 494 else int(floor(tsegmentSize*SR/1000))
e@0 495
e@0 496
e@0 497 # FrameSize
e@0 498 tframeSize = 20 #ms
e@0 499 frameSize = int(ceil(tframeSize*SR/1000)) if mod(ceil(tframeSize*SR/1000),2) == 0 \
e@0 500 else int(floor(tframeSize*SR/1000))
e@0 501
e@0 502 # HopSize
e@0 503 hopSize = frameSize
e@0 504
e@0 505 # Load Audio
e@0 506 audio = MonoLoader(filename = fname, sampleRate=SR)()
e@0 507
e@0 508
e@0 509
e@0 510
e@0 511
e@0 512 # # Zero Crossing Rate
e@0 513 # zcr = ZeroCrossingRate()
e@0 514 #
e@0 515 # # RMS
e@0 516 # rms = RMS()
e@0 517 #
e@0 518 # # Segmentation based on 1 second segmentation
e@0 519 # print("[II] Calculating features for %s, please wait..." % fname)
e@0 520 #
e@0 521 # rmsval = []
e@0 522 # zcrval = []
e@0 523 # rmszcrval = []
e@0 524 # meanrms = []
e@0 525 # varrms = []
e@0 526 # meanzcr = []
e@0 527 # varzcr = []
e@0 528 # histograms = []
e@0 529 #
e@0 530 #
e@0 531 #
e@0 532 # # Parameters for the chi square distribution
e@0 533 # alphas = []
e@0 534 # betas = []
e@0 535 # L = int(len(audio)/SR)
e@0 536 # for n in range(0, L):
e@0 537 # segment = audio[n*SR:(n+1)*SR]
e@0 538 # framerms = []
e@0 539 # framezcr = []
e@0 540 # # for frame in FrameGenerator(segment, frameSize = frameSize, hopSize = frameSize):
e@0 541 # for k in range(0, 50):
e@0 542 # frame = segment[0.02*SR*k:0.02*SR*(k+1)]
e@0 543 #
e@0 544 # rmsv = sqrt(sum(frame**2))
e@0 545 # framerms.append(rmsv)
e@0 546 # f1 = frame[0:-1]
e@0 547 # f2 = frame[1:]
e@0 548 # mm1 = f1*f2
e@0 549 # zc = 0.5*sum((abs(sign(mm1))-sign(mm1)))
e@0 550 #
e@0 551 # rmsval.append(rmsv)
e@0 552 # zcrval.append(zc)
e@0 553 # rmszcrval.append(rmsv*zc)
e@0 554 # framezcr.append(zc)
e@0 555 #
e@0 556 # meanrms.append(mean(framerms))
e@0 557 # meanzcr.append(mean(framezcr))
e@0 558 #
e@0 559 # mu = mean(framerms)
e@0 560 # sigmasq = var(framerms, ddof=1)
e@0 561 #
e@0 562 # beta = sigmasq/mu
e@0 563 # alpha = (mu/beta) - 1
e@0 564 #
e@0 565 # if mu != 0 and sigmasq != 0:
e@0 566 # betas.append(beta)
e@0 567 # alphas.append(alpha)
e@0 568 # else:
e@0 569 # betas.append(1000000)
e@0 570 # alphas.append(-1)
e@0 571 #
e@0 572 # histograms.append(histogram(framerms, 256))
e@0 573 #
e@0 574 # varrms.append(var(framerms, ddof=1))
e@0 575 # varzcr.append(var(framerms, ddof=1))
e@0 576 #
e@0 577 #
e@0 578 # # Computation of KVRMS
e@0 579 #
e@0 580 # l = len(rmsval)
e@0 581 # ftm = [mean(rmsval[0:50])]
e@0 582 # ftv = [var(rmsval[0:50], ddof=1)]
e@0 583 # kvrms = [0]
e@0 584 #
e@0 585 # for i in range(1,l-50):
e@0 586 # ftm_ = ftm[i-1]+(rmsval[i+49] - rmsval[i-1])/50
e@0 587 # ftv_ = ftv[i-1]+ftm[i-1]**2 + ((rmsval[i+49]**2-rmsval[i-1]**2)/50) - ftm_**2
e@0 588 # ftm.append(ftm_)
e@0 589 # ftv.append(ftv_)
e@0 590 #
e@0 591 # if ftm_ != 0:
e@0 592 # kvrms.append(ftv_/ftm_**2)
e@0 593 # else:
e@0 594 # kvrms.append(0)
e@0 595 #
e@0 596 #
e@0 597 # m = mean(kvrms)
e@0 598 # v = var(kvrms, ddof=1)
e@0 599 #
e@0 600 # b = v/m
e@0 601 # a = m/b - 1
e@0 602 #
e@0 603 # # Computing Similarity
e@0 604 #
e@0 605 # sim = []
e@0 606 # from scipy.special import gamma
e@0 607 #
e@0 608 # for i in range(1,L):
e@0 609 # k = (alphas[i]+alphas[i-1]+2)/2
e@0 610 # sim_ = ((2/(betas[i-1]+betas[i]))**k)*(betas[i-1]**((alphas[i]+1)/2))*(betas[i]**((alphas[i-1]+1)/2))*gamma(k)/(sqrt(gamma(alphas[i-1]+1)*gamma(alphas[i]+1)))
e@0 611 #
e@0 612 # # 0 If overflow
e@0 613 # if isinf(sim_) or isnan(sim_):
e@0 614 # sim_ = 0
e@0 615 # sim.append(sim_)
e@0 616 #
e@0 617 # sim2 = []
e@0 618 #
e@0 619 # for i in range(2,L):
e@0 620 # k = (alphas[i]+alphas[i-2]+2)/2
e@0 621 # sim_ = ((2/(betas[i-2]+betas[i]))**k)*(betas[i-2]**((alphas[i]+1)/2))*(betas[i]**((alphas[i-2]+1)/2))*gamma(k)/(sqrt(gamma(alphas[i-2]+1)*gamma(alphas[i]+1)))
e@0 622 #
e@0 623 # if isinf(sim_) or isnan(sim_):
e@0 624 # sim_ = 0
e@0 625 # sim2.append(sim_)
e@0 626 #
e@0 627 #
e@0 628 # p = lambda x, a, b: x**a*exp(-b*x)/b**(a+1)/gamma(a+1)
e@0 629 ##
e@0 630 # rho = lambda alpha1,beta1,alpha2,beta2: gamma((alpha1+alpha2)/2 + 1)/sqrt(gamma(alpha1+1)*gamma(alpha2+1))*2**((alpha1+alpha2)/2+1)*beta1**((alpha2+1)/2)*beta2**((alpha1+1)/2)/(beta1+beta2)**((alpha1+alpha2)/2+1)
e@0 631 #
e@0 632 #
e@0 633 # P = [0]
e@0 634 # for i in range(1, L-1):
e@0 635 # P_ = (1-sim2[i-1])*(1-sim[i-1]+1-sim[i])*(1-sim2[i-1])
e@0 636 # P_ = (1-sim2[i-1])
e@0 637 # P.append(P_)
e@0 638 #
e@0 639 #
e@0 640 # P.append(0)
e@0 641 # P = array(P)
e@0 642 #
e@0 643 # M = mean(P)
e@0 644 # V1 = mean(abs(P[0:L-3]-P[1:L-2]))
e@0 645 # V2 = mean(abs(P[0:L-4]-P[2:L-2]))
e@0 646 ## V = (V1+V2)*0.25;
e@0 647 # V = 0.25*median(abs((P[0:L-4]-P[2:L-2])))
e@0 648 #
e@0 649 # Pd = [P[0],P[1]]
e@0 650 #
e@0 651 #
e@0 652 # for i in range(2,L-2):
e@0 653 # m = mean(concatenate((P[i-2:i],P[i+1:i+3])))
e@0 654 # m1 = max(concatenate((P[i-2:i],P[i+1:i+3])))
e@0 655 #
e@0 656 # if m<0.1*M:
e@0 657 # m1 = 0.1*M
e@0 658 #
e@0 659 # d = (P[i]-m)/m1
e@0 660 #
e@0 661 # if d < 0:
e@0 662 # d = 0
e@0 663 # Pd.append(P[i]*d/V)
e@0 664 #
e@0 665 # Pd.append(P[L-2])
e@0 666 # Pd.append(P[L-1])
e@0 667 #
e@0 668 #
e@0 669 # # Selection of candidate windows where there is a change
e@0 670 #
e@0 671 # j = 0
e@0 672 # pos = []
e@0 673 # for i in range(2, L-3):
e@0 674 # if Pd[i] > 5 and P[i] > P[i+1] and P[i] > P[i-1]:
e@0 675 # j += 1
e@0 676 # pos.append(i)
e@0 677 #
e@0 678 # if len(pos) == 0:
e@0 679 # sys.exit(0)
e@0 680 #
e@0 681 #
e@0 682 # # Computation of Change with 20 ms accuracy
e@0 683 #
e@0 684 # k2 = 0
e@0 685 # c = 25
e@0 686 #
e@0 687 # Sm = zeros((2*c+51,))
e@0 688 # posms = zeros((len(pos),))
e@0 689 # msec = zeros((len(pos),))
e@0 690 # mpos = zeros((len(pos), ))
e@0 691 #
e@0 692 # for k1 in range(0,len(pos)):
e@0 693 # i = 50*(pos[k1] - 1); # ms
e@0 694 # for j in range(-c,50+c+1):
e@0 695 # if ftv[i+j] != 0 and ftm[i+j] != 0:
e@0 696 # b1 = ftv[i+j]/ftm[i+j]
e@0 697 # a1 = ftm[i+j]/b1 - 1
e@0 698 # else:
e@0 699 # b1 = 1000
e@0 700 # a1 = -1
e@0 701 #
e@0 702 # if ftv[i+j+50-1] != 0 and ftm[i+j+50-1] != 0:
e@0 703 # b2 = ftv[i+j+50-1]/ftm[i+j+50-1]
e@0 704 # a2 = ftm[i+j+50-1]/b2 - 1
e@0 705 # else:
e@0 706 # b2 = 1000
e@0 707 # a2 = -1
e@0 708 #
e@0 709 # k = (a2+a1+2)/2
e@0 710 # Sm[j+c] = ((2/(b1+b2))**k)*(b1**((a2+1)/2))*(b2**((a1+1)/2))*gamma(k)/(sqrt(gamma(a1+1)*gamma(a2+1)))
e@0 711 #
e@0 712 # if isnan(Sm[j+c]) or isinf(Sm[j+c]):
e@0 713 # Sm[j+c] = 0
e@0 714 #
e@0 715 # posms[k1] = argmin(Sm)
e@0 716 # x = Sm[posms[k1]]
e@0 717 #
e@0 718 # h = 1 - Sm
e@0 719 #
e@0 720 # if mean(h[max(posms[k1] - 25,1):min(2*c+51,posms[k1]+25)]) > 0.1:
e@0 721 # msec[k2] = posms[k1]-1-c
e@0 722 # mpos[k2] = pos[k1]
e@0 723 # k2 = k2+1
e@0 724 # else:
e@0 725 # msec[k2] = 0
e@0 726 # mpos[k2] = 0
e@0 727 #
e@0 728 #
e@0 729 # posms = msec
e@0 730 # pos = mpos
e@0 731 #
e@0 732 # fpos = pos+0.02*posms
e@0 733 #
e@0 734 # pfpos = floor(50*pos+posms);
e@0 735 #
e@0 736 # bvoice = 0.14339738781836;
e@0 737 # bmusic = 0.04399754659151;
e@0 738 # amusic = 1.66349817725076;
e@0 739 # avoice = 2.32677887950291;
e@0 740 #
e@0 741 # pfpos = concatenate((array([0]), pfpos, array([len(kvrms)-1])))
e@0 742 #
e@0 743 # L = len(pfpos)-1
e@0 744 # V = zeros((L,2))
e@0 745 # Vg = zeros((L,2))
e@0 746 #
e@0 747 # Pzero = []
e@0 748 # Fsp1 = []
e@0 749 # Type = zeros((L,))
e@0 750 # Typeg = zeros((L,))
e@0 751 #
e@0 752 #
e@0 753 # # Classification for each segment
e@0 754 #
e@0 755 # noise = zeros((L,))
e@0 756 # noise2 = zeros((L,))
e@0 757 #
e@0 758 # pfpos = pfpos.astype(int)
e@0 759 # Pmusicg = zeros((L,))
e@0 760 # Pvoiceg = zeros((L,))
e@0 761 # Pmusic = zeros((L,))
e@0 762 # Pvoice = zeros((L,))
e@0 763 # Pspace = zeros((L,))
e@0 764 # zpos = zeros((L,))
e@0 765 # Pmax = zeros((L,))
e@0 766 # Pmaxg = zeros((L,))
e@0 767 # Freq = zeros((L,6))
e@0 768 #
e@0 769 # zcrval = array(zcrval)
e@0 770 # rmsval = array(rmsval)
e@0 771 # rmszcrval = array(rmszcrval)
e@0 772 # i
e@0 773 # for i in range(0, L):
e@0 774 # d = 0
e@0 775 #
e@0 776 # x1 = mean(kvrms[int(pfpos[i]):int(pfpos[i+1])+1])
e@0 777 # x = x1
e@0 778 # y = rmszcrval[int(pfpos[i])+d:int(pfpos[i+1])-d+1]
e@0 779 # y = y/(2*max(rmsval[pfpos[i]+d:pfpos[i+1]-d+1]) - min(rmsval[pfpos[i]+d:pfpos[i+1]-d+1]) - median(rmsval[pfpos[i]+d:pfpos[i+1]-d+1]))
e@0 780 # noise[i] = 50*mean(exp(-y))
e@0 781 #
e@0 782 # Pmusic[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1))
e@0 783 # Pvoice[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1))
e@0 784 #
e@0 785 # sm = 0.7*median(ftm[pfpos[i]:pfpos[i+1]+1])+0.3*mean(ftm[pfpos[i]:pfpos[i+1]+1])
e@0 786 #
e@0 787 # Pspace[i] = 6*exp((-sm**2)/(2*0.6**2))
e@0 788 #
e@0 789 # zpos[i] = sum(exp(-10*zcrval[pfpos[i]+d:pfpos[i+1]-d+1]))/float(len(zcrval[pfpos[i]+d:pfpos[i+1]-d+1]))
e@0 790 #
e@0 791 # if zpos[i] > 0.08 or x > 3:
e@0 792 # Pvoice[i] = 10
e@0 793 # else:
e@0 794 # Pmusic[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1))
e@0 795 #
e@0 796 # V[i,0] = Pmusic[i]
e@0 797 # V[i,1] = Pvoice[i]
e@0 798 # Type[i] = argmax(V[i,:])
e@0 799 # Pmax[i] = V[i,Type[i]]
e@0 800 #
e@0 801 # noise2[i] = 50 * mean(exp(-4*y))
e@0 802 # Pmusicg[i] = 0
e@0 803 #
e@0 804 # if noise2[i] > 3.5:
e@0 805 # Pvoiceg[i] = 10
e@0 806 # else:
e@0 807 # Pmusicg[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1))
e@0 808 # Pvoiceg[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1))
e@0 809 #
e@0 810 #
e@0 811 # Vg[i,0] = Pmusicg[i]
e@0 812 # Vg[i,1] = Pvoiceg[i]
e@0 813 #
e@0 814 # Typeg[i] = argmax(Vg[i,:])
e@0 815 # Pmaxg[i] = Vg[i,Typeg[i]]
e@0 816 #
e@0 817 # Vk = array(sign(zcrval[pfpos[i]:pfpos[i+1]+1]))
e@0 818 # tempV = rmsval[pfpos[i]:pfpos[i+1]+1]
e@0 819 # rr = max(tempV)+0.001
e@0 820 # tempV1 = tempV/rr
e@0 821 # Vk0 = Vk.copy()
e@0 822 #
e@0 823 # for j in range(0, len(Vk)):
e@0 824 # if (tempV1[j] < 0.1 and tempV[j] < 1.5) or tempV[j] < 1:
e@0 825 # Vk[j] = 0
e@0 826 # # print "Changing %d" % j
e@0 827 # Vk1 = Vk.copy()
e@0 828 #
e@0 829 # for j in range(0, len(Vk)):
e@0 830 # if tempV1[j] < 0.4 or tempV[j] < 2 or tempV[j] < mean(tempV):
e@0 831 # # print "Changing %d" % j
e@0 832 #
e@0 833 # Vk1[j] = 0
e@0 834 #
e@0 835 # Zca = Vk1*(zcrval[pfpos[i]:pfpos[i+1]+1])
e@0 836 # Pol = (ones((len(Vk),)) - sign(Vk))*Vk0
e@0 837 #
e@0 838 # VZC = Pol*zcrval[pfpos[i]:pfpos[i+1]+1]
e@0 839 # dVk = abs(Vk[2:len(Vk)] - Vk[1:len(Vk)-1])
e@0 840 #
e@0 841 # Freq[i, 0] = 50*sum(dVk)/(2*len(Vk))
e@0 842 # Freq[i, 1] = len(Vk)/50
e@0 843 # Freq[i, 2] = sum(Freq[:,2])
e@0 844 # Freq[i, 3] = zpos[i]
e@0 845 # Freq[i, 4] = noise2[i]
e@0 846 # Freq[i, 5] = max(Zca)
e@0 847 #
e@0 848 # Pmusicg[i] = 0
e@0 849 # Pvoiceg[i] = 0
e@0 850 #
e@0 851 # if Freq[i,0] < 0.59 and Freq[i,1] > 2.5:
e@0 852 # Pmusicg[i] = 10
e@0 853 # else:
e@0 854 # Pmusicg[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1))
e@0 855 # Pvoiceg[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1))
e@0 856 #
e@0 857 # if x > 3:
e@0 858 # Pvoiceg[i] = Pvoiceg[i] + 0.1
e@0 859 #
e@0 860 # if x > 300:
e@0 861 # Pmusicg[i] = 9
e@0 862 #
e@0 863 #
e@0 864 # if noise2[i] > 3.5:
e@0 865 # Pvoiceg[i] = 11
e@0 866 #
e@0 867 # if max(Zca) > 280:
e@0 868 # Pmusicg[i] = 11
e@0 869 #
e@0 870 #
e@0 871 # if Freq[i,0] > 4.62 and Freq[i,1] > 2.5 and (noise2[i] > 0.1 or zpos[i] > 0.05 ):
e@0 872 # Pvoiceg[i] = 12
e@0 873 #
e@0 874 #
e@0 875 # if zpos[i] > 0.15:
e@0 876 # Pvoiceg[i] = 13
e@0 877 #
e@0 878 #
e@0 879 # Vg[i,0] = Pmusicg[i]
e@0 880 # Vg[i,1] = Pvoiceg[i]
e@0 881 #
e@0 882 # Typeg[i] = argmax(Vg[i,:])
e@0 883 # Pmaxg[i] = Vg[i, Typeg[i]]
e@0 884 #
e@0 885 # Fsp1.extend(Vk)
e@0 886 #
e@0 887 # if (Pspace[i] > 4 and noise2[i] < 1) or (Pspace[i] > 4.5):
e@0 888 # Type[i] = 2
e@0 889 # Typeg[i] = 2
e@0 890 #
e@0 891 #
e@0 892 # if (pfpos[i+1] - pfpos[i])/50 < 0.5 and i >= 1:
e@0 893 # Type[i] = Type[i-1]
e@0 894 #
e@0 895 #
e@0 896 #
e@0 897 # maxpos = pfpos[-1]
e@0 898 #
e@0 899 # tt = zeros((maxpos,))
e@0 900 # pt = zeros((maxpos,))
e@0 901 # pm = zeros((maxpos,))
e@0 902 # pv = zeros((maxpos,))
e@0 903 # ps = zeros((maxpos,))
e@0 904 #
e@0 905 # for i in range(0, L):
e@0 906 # tt[pfpos[i]:pfpos[i+1]] = Typeg[i]
e@0 907 # pt[pfpos[i]:pfpos[i+1]] = Pmaxg[i]
e@0 908 # pm[pfpos[i]:pfpos[i+1]] = Pmusicg[i]
e@0 909 # pv[pfpos[i]:pfpos[i+1]] = Pvoiceg[i]
e@0 910 # ps[pfpos[i]:pfpos[i+1]] = Pspace[i]
e@0 911 #
e@0 912 #
e@0 913 #
e@0 914 #
e@0 915 #
e@0 916 #
e@0 917 # for k in range(0, len(rmsval)):
e@0 918 # if rmsval[k] < 2 or rmsval[k] < 0.1*max(rmsval):
e@0 919 # zcrval[k] = 0
e@0 920 #
e@0 921 #
e@0 922 # classes = []
e@0 923 # positions = []
e@0 924 #
e@0 925 # class_ = 5
e@0 926 #
e@0 927 # for j in range(0, len(tt)):
e@0 928 # if class_ != tt[j]:
e@0 929 # class_ = tt[j]
e@0 930 # classes.append(int(class_))
e@0 931 # positions.append(int(j*0.02*SR))
e@0 932 #
e@0 933 # classvec = zeros((len(audio),))
e@0 934 #
e@0 935 # if len(positions) == 1:
e@0 936 # classvec = classes[0]*ones((len(audio),))
e@0 937 # for j in range(1, len(positions)):
e@0 938 # classvec[positions[j-1]:positions[j]] = classes[j-1]
e@0 939 #
e@0 940 #
e@0 941 #
e@0 942 #
e@0 943 #
e@0 944 #
e@0 945 #
e@0 946 #
e@0 947 #
e@0 948 #
e@0 949 #
e@0 950
e@0 951 #
e@0 952
e@0 953
e@0 954 positions, classes = trmszc(audio, SR)
e@0 955
e@0 956 classvec = zeros((len(audio),))
e@0 957
e@0 958 if len(positions) == 1:
e@0 959 classvec = classes[0]*ones((len(audio),))
e@0 960 for j in range(1, len(positions)):
e@0 961 classvec[positions[j-1]:positions[j]] = classes[j-1]
e@0 962
e@0 963 plot(audio)
e@0 964 plot(classvec)
e@0 965