e@0: # -*- coding: utf-8 -*-
e@0: """
e@0: Created on Mon Jun 1 17:17:34 2015
e@0:
e@0: @author: mmxgn
e@0: """
e@0:
e@0:
e@0: from sys import argv
e@0:
e@0:
e@0:
e@0: def trmszc(audio, SR=21000):
e@0: # Segmentation based on 1 second segmentation
e@0: print("[II] Calculating features for %s, please wait..." % fname)
e@0:
e@0: rmsval = []
e@0: zcrval = []
e@0: rmszcrval = []
e@0: meanrms = []
e@0: varrms = []
e@0: meanzcr = []
e@0: varzcr = []
e@0: histograms = []
e@0:
e@0:
e@0:
e@0: # Parameters for the chi square distribution
e@0: alphas = []
e@0: betas = []
e@0: L = int(len(audio)/SR)
e@0: for n in range(0, L):
e@0: segment = audio[n*SR:(n+1)*SR]
e@0: framerms = []
e@0: framezcr = []
e@0: # for frame in FrameGenerator(segment, frameSize = frameSize, hopSize = frameSize):
e@0: for k in range(0, 50):
e@0: frame = segment[0.02*SR*k:0.02*SR*(k+1)]
e@0:
e@0: rmsv = sqrt(sum(frame**2))
e@0: framerms.append(rmsv)
e@0: f1 = frame[0:-1]
e@0: f2 = frame[1:]
e@0: mm1 = f1*f2
e@0: zc = 0.5*sum((abs(sign(mm1))-sign(mm1)))
e@0:
e@0: rmsval.append(rmsv)
e@0: zcrval.append(zc)
e@0: rmszcrval.append(rmsv*zc)
e@0: framezcr.append(zc)
e@0:
e@0: meanrms.append(mean(framerms))
e@0: meanzcr.append(mean(framezcr))
e@0:
e@0: mu = mean(framerms)
e@0: sigmasq = var(framerms, ddof=1)
e@0:
e@0: beta = sigmasq/mu
e@0: alpha = (mu/beta) - 1
e@0:
e@0: if mu != 0 and sigmasq != 0:
e@0: betas.append(beta)
e@0: alphas.append(alpha)
e@0: else:
e@0: betas.append(1000000)
e@0: alphas.append(-1)
e@0:
e@0: histograms.append(histogram(framerms, 256))
e@0:
e@0: varrms.append(var(framerms, ddof=1))
e@0: varzcr.append(var(framerms, ddof=1))
e@0:
e@0:
e@0: # Computation of KVRMS
e@0:
e@0: l = len(rmsval)
e@0: ftm = [mean(rmsval[0:50])]
e@0: ftv = [var(rmsval[0:50], ddof=1)]
e@0: kvrms = [0]
e@0:
e@0: for i in range(1,l-50):
e@0: ftm_ = ftm[i-1]+(rmsval[i+49] - rmsval[i-1])/50
e@0: ftv_ = ftv[i-1]+ftm[i-1]**2 + ((rmsval[i+49]**2-rmsval[i-1]**2)/50) - ftm_**2
e@0: ftm.append(ftm_)
e@0: ftv.append(ftv_)
e@0:
e@0: if ftm_ != 0:
e@0: kvrms.append(ftv_/ftm_**2)
e@0: else:
e@0: kvrms.append(0)
e@0:
e@0:
e@0: m = mean(kvrms)
e@0: v = var(kvrms, ddof=1)
e@0:
e@0: b = v/m
e@0: a = m/b - 1
e@0:
e@0: # Computing Similarity
e@0:
e@0: sim = []
e@0: from scipy.special import gamma
e@0:
e@0: for i in range(1,L):
e@0: k = (alphas[i]+alphas[i-1]+2)/2
e@0: sim_ = ((2/(betas[i-1]+betas[i]))**k)*(betas[i-1]**((alphas[i]+1)/2))*(betas[i]**((alphas[i-1]+1)/2))*gamma(k)/(sqrt(gamma(alphas[i-1]+1)*gamma(alphas[i]+1)))
e@0:
e@0: # 0 If overflow
e@0: if isinf(sim_) or isnan(sim_):
e@0: sim_ = 0
e@0: sim.append(sim_)
e@0:
e@0: sim2 = []
e@0:
e@0: for i in range(2,L):
e@0: k = (alphas[i]+alphas[i-2]+2)/2
e@0: sim_ = ((2/(betas[i-2]+betas[i]))**k)*(betas[i-2]**((alphas[i]+1)/2))*(betas[i]**((alphas[i-2]+1)/2))*gamma(k)/(sqrt(gamma(alphas[i-2]+1)*gamma(alphas[i]+1)))
e@0:
e@0: if isinf(sim_) or isnan(sim_):
e@0: sim_ = 0
e@0: sim2.append(sim_)
e@0:
e@0:
e@0: p = lambda x, a, b: x**a*exp(-b*x)/b**(a+1)/gamma(a+1)
e@0: #
e@0: rho = lambda alpha1,beta1,alpha2,beta2: gamma((alpha1+alpha2)/2 + 1)/sqrt(gamma(alpha1+1)*gamma(alpha2+1))*2**((alpha1+alpha2)/2+1)*beta1**((alpha2+1)/2)*beta2**((alpha1+1)/2)/(beta1+beta2)**((alpha1+alpha2)/2+1)
e@0:
e@0:
e@0: P = [0]
e@0: for i in range(1, L-1):
e@0: P_ = (1-sim2[i-1])*(1-sim[i-1]+1-sim[i])*(1-sim2[i-1])
e@0: P_ = (1-sim2[i-1])
e@0: P.append(P_)
e@0:
e@0:
e@0: P.append(0)
e@0: P = array(P)
e@0:
e@0: M = mean(P)
e@0: V1 = mean(abs(P[0:L-3]-P[1:L-2]))
e@0: V2 = mean(abs(P[0:L-4]-P[2:L-2]))
e@0: # V = (V1+V2)*0.25;
e@0: V = 0.25*median(abs((P[0:L-4]-P[2:L-2])))
e@0:
e@0: Pd = [P[0],P[1]]
e@0:
e@0:
e@0: for i in range(2,L-2):
e@0: m = mean(concatenate((P[i-2:i],P[i+1:i+3])))
e@0: m1 = max(concatenate((P[i-2:i],P[i+1:i+3])))
e@0:
e@0: if m<0.1*M:
e@0: m1 = 0.1*M
e@0:
e@0: d = (P[i]-m)/m1
e@0:
e@0: if d < 0:
e@0: d = 0
e@0: Pd.append(P[i]*d/V)
e@0:
e@0: Pd.append(P[L-2])
e@0: Pd.append(P[L-1])
e@0:
e@0:
e@0: # Selection of candidate windows where there is a change
e@0:
e@0: j = 0
e@0: pos = []
e@0: for i in range(2, L-3):
e@0: if Pd[i] > 5 and P[i] > P[i+1] and P[i] > P[i-1]:
e@0: j += 1
e@0: pos.append(i)
e@0:
e@0: if len(pos) == 0:
e@0: sys.exit(0)
e@0:
e@0:
e@0: # Computation of Change with 20 ms accuracy
e@0:
e@0: k2 = 0
e@0: c = 25
e@0:
e@0: Sm = zeros((2*c+51,))
e@0: posms = zeros((len(pos),))
e@0: msec = zeros((len(pos),))
e@0: mpos = zeros((len(pos), ))
e@0:
e@0: for k1 in range(0,len(pos)):
e@0: i = 50*(pos[k1] - 1); # ms
e@0: for j in range(-c,50+c+1):
e@0: if ftv[i+j] != 0 and ftm[i+j] != 0:
e@0: b1 = ftv[i+j]/ftm[i+j]
e@0: a1 = ftm[i+j]/b1 - 1
e@0: else:
e@0: b1 = 1000
e@0: a1 = -1
e@0:
e@0: if ftv[i+j+50-1] != 0 and ftm[i+j+50-1] != 0:
e@0: b2 = ftv[i+j+50-1]/ftm[i+j+50-1]
e@0: a2 = ftm[i+j+50-1]/b2 - 1
e@0: else:
e@0: b2 = 1000
e@0: a2 = -1
e@0:
e@0: k = (a2+a1+2)/2
e@0: Sm[j+c] = ((2/(b1+b2))**k)*(b1**((a2+1)/2))*(b2**((a1+1)/2))*gamma(k)/(sqrt(gamma(a1+1)*gamma(a2+1)))
e@0:
e@0: if isnan(Sm[j+c]) or isinf(Sm[j+c]):
e@0: Sm[j+c] = 0
e@0:
e@0: posms[k1] = argmin(Sm)
e@0: x = Sm[posms[k1]]
e@0:
e@0: h = 1 - Sm
e@0:
e@0: if mean(h[max(posms[k1] - 25,1):min(2*c+51,posms[k1]+25)]) > 0.1:
e@0: msec[k2] = posms[k1]-1-c
e@0: mpos[k2] = pos[k1]
e@0: k2 = k2+1
e@0: else:
e@0: msec[k2] = 0
e@0: mpos[k2] = 0
e@0:
e@0:
e@0: posms = msec
e@0: pos = mpos
e@0:
e@0: fpos = pos+0.02*posms
e@0:
e@0: pfpos = floor(50*pos+posms);
e@0:
e@0: bvoice = 0.14339738781836;
e@0: bmusic = 0.04399754659151;
e@0: amusic = 1.66349817725076;
e@0: avoice = 2.32677887950291;
e@0:
e@0: pfpos = concatenate((array([0]), pfpos, array([len(kvrms)-1])))
e@0:
e@0: L = len(pfpos)-1
e@0: V = zeros((L,2))
e@0: Vg = zeros((L,2))
e@0:
e@0: Pzero = []
e@0: Fsp1 = []
e@0: Type = zeros((L,))
e@0: Typeg = zeros((L,))
e@0:
e@0:
e@0: # Classification for each segment
e@0:
e@0: noise = zeros((L,))
e@0: noise2 = zeros((L,))
e@0:
e@0: pfpos = pfpos.astype(int)
e@0: Pmusicg = zeros((L,))
e@0: Pvoiceg = zeros((L,))
e@0: Pmusic = zeros((L,))
e@0: Pvoice = zeros((L,))
e@0: Pspace = zeros((L,))
e@0: zpos = zeros((L,))
e@0: Pmax = zeros((L,))
e@0: Pmaxg = zeros((L,))
e@0: Freq = zeros((L,6))
e@0:
e@0: zcrval = array(zcrval)
e@0: rmsval = array(rmsval)
e@0: rmszcrval = array(rmszcrval)
e@0: i
e@0: for i in range(0, L):
e@0: d = 0
e@0:
e@0: x1 = mean(kvrms[int(pfpos[i]):int(pfpos[i+1])+1])
e@0: x = x1
e@0: y = rmszcrval[int(pfpos[i])+d:int(pfpos[i+1])-d+1]
e@0: y = y/(2*max(rmsval[pfpos[i]+d:pfpos[i+1]-d+1]) - min(rmsval[pfpos[i]+d:pfpos[i+1]-d+1]) - median(rmsval[pfpos[i]+d:pfpos[i+1]-d+1]))
e@0: noise[i] = 50*mean(exp(-y))
e@0:
e@0: Pmusic[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1))
e@0: Pvoice[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1))
e@0:
e@0: sm = 0.7*median(ftm[pfpos[i]:pfpos[i+1]+1])+0.3*mean(ftm[pfpos[i]:pfpos[i+1]+1])
e@0:
e@0: Pspace[i] = 6*exp((-sm**2)/(2*0.6**2))
e@0:
e@0: zpos[i] = sum(exp(-10*zcrval[pfpos[i]+d:pfpos[i+1]-d+1]))/float(len(zcrval[pfpos[i]+d:pfpos[i+1]-d+1]))
e@0:
e@0: if zpos[i] > 0.08 or x > 3:
e@0: Pvoice[i] = 10
e@0: else:
e@0: Pmusic[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1))
e@0:
e@0: V[i,0] = Pmusic[i]
e@0: V[i,1] = Pvoice[i]
e@0: Type[i] = argmax(V[i,:])
e@0: Pmax[i] = V[i,Type[i]]
e@0:
e@0: noise2[i] = 50 * mean(exp(-4*y))
e@0: Pmusicg[i] = 0
e@0:
e@0: if noise2[i] > 3.5:
e@0: Pvoiceg[i] = 10
e@0: else:
e@0: Pmusicg[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1))
e@0: Pvoiceg[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1))
e@0:
e@0:
e@0: Vg[i,0] = Pmusicg[i]
e@0: Vg[i,1] = Pvoiceg[i]
e@0:
e@0: Typeg[i] = argmax(Vg[i,:])
e@0: Pmaxg[i] = Vg[i,Typeg[i]]
e@0:
e@0: Vk = array(sign(zcrval[pfpos[i]:pfpos[i+1]+1]))
e@0: tempV = rmsval[pfpos[i]:pfpos[i+1]+1]
e@0: rr = max(tempV)+0.001
e@0: tempV1 = tempV/rr
e@0: Vk0 = Vk.copy()
e@0:
e@0: for j in range(0, len(Vk)):
e@0: if (tempV1[j] < 0.1 and tempV[j] < 1.5) or tempV[j] < 1:
e@0: Vk[j] = 0
e@0: # print "Changing %d" % j
e@0: Vk1 = Vk.copy()
e@0:
e@0: for j in range(0, len(Vk)):
e@0: if tempV1[j] < 0.4 or tempV[j] < 2 or tempV[j] < mean(tempV):
e@0: # print "Changing %d" % j
e@0:
e@0: Vk1[j] = 0
e@0:
e@0: Zca = Vk1*(zcrval[pfpos[i]:pfpos[i+1]+1])
e@0: Pol = (ones((len(Vk),)) - sign(Vk))*Vk0
e@0:
e@0: VZC = Pol*zcrval[pfpos[i]:pfpos[i+1]+1]
e@0: dVk = abs(Vk[2:len(Vk)] - Vk[1:len(Vk)-1])
e@0:
e@0: Freq[i, 0] = 50*sum(dVk)/(2*len(Vk))
e@0: Freq[i, 1] = len(Vk)/50
e@0: Freq[i, 2] = sum(Freq[:,2])
e@0: Freq[i, 3] = zpos[i]
e@0: Freq[i, 4] = noise2[i]
e@0: Freq[i, 5] = max(Zca)
e@0:
e@0: Pmusicg[i] = 0
e@0: Pvoiceg[i] = 0
e@0:
e@0: if Freq[i,0] < 0.59 and Freq[i,1] > 2.5:
e@0: Pmusicg[i] = 10
e@0: else:
e@0: Pmusicg[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1))
e@0: Pvoiceg[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1))
e@0:
e@0: if x > 3:
e@0: Pvoiceg[i] = Pvoiceg[i] + 0.1
e@0:
e@0: if x > 300:
e@0: Pmusicg[i] = 9
e@0:
e@0:
e@0: if noise2[i] > 3.5:
e@0: Pvoiceg[i] = 11
e@0:
e@0: if max(Zca) > 280:
e@0: Pmusicg[i] = 11
e@0:
e@0:
e@0: if Freq[i,0] > 4.62 and Freq[i,1] > 2.5 and (noise2[i] > 0.1 or zpos[i] > 0.05 ):
e@0: Pvoiceg[i] = 12
e@0:
e@0:
e@0: if zpos[i] > 0.15:
e@0: Pvoiceg[i] = 13
e@0:
e@0:
e@0: Vg[i,0] = Pmusicg[i]
e@0: Vg[i,1] = Pvoiceg[i]
e@0:
e@0: Typeg[i] = argmax(Vg[i,:])
e@0: Pmaxg[i] = Vg[i, Typeg[i]]
e@0:
e@0: Fsp1.extend(Vk)
e@0:
e@0: if (Pspace[i] > 4 and noise2[i] < 1) or (Pspace[i] > 4.5):
e@0: Type[i] = 2
e@0: Typeg[i] = 2
e@0:
e@0:
e@0: if (pfpos[i+1] - pfpos[i])/50 < 0.5 and i >= 1:
e@0: Type[i] = Type[i-1]
e@0:
e@0:
e@0:
e@0: maxpos = pfpos[-1]
e@0:
e@0: tt = zeros((maxpos,))
e@0: pt = zeros((maxpos,))
e@0: pm = zeros((maxpos,))
e@0: pv = zeros((maxpos,))
e@0: ps = zeros((maxpos,))
e@0:
e@0: for i in range(0, L):
e@0: tt[pfpos[i]:pfpos[i+1]] = Typeg[i]
e@0: pt[pfpos[i]:pfpos[i+1]] = Pmaxg[i]
e@0: pm[pfpos[i]:pfpos[i+1]] = Pmusicg[i]
e@0: pv[pfpos[i]:pfpos[i+1]] = Pvoiceg[i]
e@0: ps[pfpos[i]:pfpos[i+1]] = Pspace[i]
e@0:
e@0:
e@0:
e@0:
e@0:
e@0:
e@0: for k in range(0, len(rmsval)):
e@0: if rmsval[k] < 2 or rmsval[k] < 0.1*max(rmsval):
e@0: zcrval[k] = 0
e@0:
e@0:
e@0: classes = []
e@0: positions = []
e@0:
e@0: class_ = 5
e@0:
e@0: for j in range(0, len(tt)):
e@0: if class_ != tt[j]:
e@0: class_ = tt[j]
e@0: classes.append(int(class_))
e@0: positions.append(int(j*0.02*SR))
e@0:
e@0:
e@0:
e@0: return positions, classes
e@0:
e@0:
e@0:
e@0: if __name__=="__main__":
e@0: if len(argv) != 3:
e@0: print("Incorrect number of arguments:")
e@0: print("Usage: ")
e@0: print("%s ")
e@0: print("")
e@0: print("Arguments:")
e@0: print("\tThe input filename. Can be .wav, .mp3, etc...")
e@0: sys.exit(-1)
e@0: else:
e@0: print("[II] Panagiotakis and Tziritas RMS/ZC Speech/Music/Silence Discriminator")
e@0: print("[II] Loading libraries")
e@0:
e@0: import essentia
e@0: from essentia import Pool
e@0: from essentia.standard import *
e@0: import csv
e@0: import yaml
e@0:
e@0: # reqyures matplotlib
e@0: from pylab import *
e@0:
e@0: #requires numpy
e@0: from numpy import *
e@0:
e@0: #requires scikit-learn
e@0: from sklearn.metrics import pairwise_distances
e@0:
e@0: d = {}
e@0: v = {}
e@0:
e@0: fname = argv[1]
e@0: outfname = argv[2]
e@0:
e@0:
e@0:
e@0: trackname = fname.split('.')[0].split('/')[-1]
e@0:
e@0:
e@0: if outfname.partition('.')[-1].lower() not in ['json', 'yaml']:
e@0: print("Please choose a .json or .yaml as an output file.")
e@0: sys.exit(-1)
e@0: else:
e@0: if outfname.partition('.')[-1].lower() == 'json':
e@0: output = YamlOutput(filename = outfname, format='json')
e@0: else:
e@0: output = YamlOutput(filename = outfname, format='yaml')
e@0:
e@0: print("Feature extraction of `%s\'" % fname)
e@0:
e@0: # Sampling Rate
e@0: SR = 21000.0
e@0:
e@0: # Sampling Frequency
e@0: T = 1.0/SR
e@0:
e@0: # SegmentSize
e@0: tsegmentSize = 1000 #ms
e@0: segmentSize = int(ceil(tsegmentSize*SR/1000)) if mod(ceil(tsegmentSize*SR/1000),2) == 0 \
e@0: else int(floor(tsegmentSize*SR/1000))
e@0:
e@0:
e@0: # FrameSize
e@0: tframeSize = 20 #ms
e@0: frameSize = int(ceil(tframeSize*SR/1000)) if mod(ceil(tframeSize*SR/1000),2) == 0 \
e@0: else int(floor(tframeSize*SR/1000))
e@0:
e@0: # HopSize
e@0: hopSize = frameSize
e@0:
e@0: # Load Audio
e@0: audio = MonoLoader(filename = fname, sampleRate=SR)()
e@0:
e@0:
e@0:
e@0:
e@0:
e@0: # # Zero Crossing Rate
e@0: # zcr = ZeroCrossingRate()
e@0: #
e@0: # # RMS
e@0: # rms = RMS()
e@0: #
e@0: # # Segmentation based on 1 second segmentation
e@0: # print("[II] Calculating features for %s, please wait..." % fname)
e@0: #
e@0: # rmsval = []
e@0: # zcrval = []
e@0: # rmszcrval = []
e@0: # meanrms = []
e@0: # varrms = []
e@0: # meanzcr = []
e@0: # varzcr = []
e@0: # histograms = []
e@0: #
e@0: #
e@0: #
e@0: # # Parameters for the chi square distribution
e@0: # alphas = []
e@0: # betas = []
e@0: # L = int(len(audio)/SR)
e@0: # for n in range(0, L):
e@0: # segment = audio[n*SR:(n+1)*SR]
e@0: # framerms = []
e@0: # framezcr = []
e@0: # # for frame in FrameGenerator(segment, frameSize = frameSize, hopSize = frameSize):
e@0: # for k in range(0, 50):
e@0: # frame = segment[0.02*SR*k:0.02*SR*(k+1)]
e@0: #
e@0: # rmsv = sqrt(sum(frame**2))
e@0: # framerms.append(rmsv)
e@0: # f1 = frame[0:-1]
e@0: # f2 = frame[1:]
e@0: # mm1 = f1*f2
e@0: # zc = 0.5*sum((abs(sign(mm1))-sign(mm1)))
e@0: #
e@0: # rmsval.append(rmsv)
e@0: # zcrval.append(zc)
e@0: # rmszcrval.append(rmsv*zc)
e@0: # framezcr.append(zc)
e@0: #
e@0: # meanrms.append(mean(framerms))
e@0: # meanzcr.append(mean(framezcr))
e@0: #
e@0: # mu = mean(framerms)
e@0: # sigmasq = var(framerms, ddof=1)
e@0: #
e@0: # beta = sigmasq/mu
e@0: # alpha = (mu/beta) - 1
e@0: #
e@0: # if mu != 0 and sigmasq != 0:
e@0: # betas.append(beta)
e@0: # alphas.append(alpha)
e@0: # else:
e@0: # betas.append(1000000)
e@0: # alphas.append(-1)
e@0: #
e@0: # histograms.append(histogram(framerms, 256))
e@0: #
e@0: # varrms.append(var(framerms, ddof=1))
e@0: # varzcr.append(var(framerms, ddof=1))
e@0: #
e@0: #
e@0: # # Computation of KVRMS
e@0: #
e@0: # l = len(rmsval)
e@0: # ftm = [mean(rmsval[0:50])]
e@0: # ftv = [var(rmsval[0:50], ddof=1)]
e@0: # kvrms = [0]
e@0: #
e@0: # for i in range(1,l-50):
e@0: # ftm_ = ftm[i-1]+(rmsval[i+49] - rmsval[i-1])/50
e@0: # ftv_ = ftv[i-1]+ftm[i-1]**2 + ((rmsval[i+49]**2-rmsval[i-1]**2)/50) - ftm_**2
e@0: # ftm.append(ftm_)
e@0: # ftv.append(ftv_)
e@0: #
e@0: # if ftm_ != 0:
e@0: # kvrms.append(ftv_/ftm_**2)
e@0: # else:
e@0: # kvrms.append(0)
e@0: #
e@0: #
e@0: # m = mean(kvrms)
e@0: # v = var(kvrms, ddof=1)
e@0: #
e@0: # b = v/m
e@0: # a = m/b - 1
e@0: #
e@0: # # Computing Similarity
e@0: #
e@0: # sim = []
e@0: # from scipy.special import gamma
e@0: #
e@0: # for i in range(1,L):
e@0: # k = (alphas[i]+alphas[i-1]+2)/2
e@0: # sim_ = ((2/(betas[i-1]+betas[i]))**k)*(betas[i-1]**((alphas[i]+1)/2))*(betas[i]**((alphas[i-1]+1)/2))*gamma(k)/(sqrt(gamma(alphas[i-1]+1)*gamma(alphas[i]+1)))
e@0: #
e@0: # # 0 If overflow
e@0: # if isinf(sim_) or isnan(sim_):
e@0: # sim_ = 0
e@0: # sim.append(sim_)
e@0: #
e@0: # sim2 = []
e@0: #
e@0: # for i in range(2,L):
e@0: # k = (alphas[i]+alphas[i-2]+2)/2
e@0: # sim_ = ((2/(betas[i-2]+betas[i]))**k)*(betas[i-2]**((alphas[i]+1)/2))*(betas[i]**((alphas[i-2]+1)/2))*gamma(k)/(sqrt(gamma(alphas[i-2]+1)*gamma(alphas[i]+1)))
e@0: #
e@0: # if isinf(sim_) or isnan(sim_):
e@0: # sim_ = 0
e@0: # sim2.append(sim_)
e@0: #
e@0: #
e@0: # p = lambda x, a, b: x**a*exp(-b*x)/b**(a+1)/gamma(a+1)
e@0: ##
e@0: # rho = lambda alpha1,beta1,alpha2,beta2: gamma((alpha1+alpha2)/2 + 1)/sqrt(gamma(alpha1+1)*gamma(alpha2+1))*2**((alpha1+alpha2)/2+1)*beta1**((alpha2+1)/2)*beta2**((alpha1+1)/2)/(beta1+beta2)**((alpha1+alpha2)/2+1)
e@0: #
e@0: #
e@0: # P = [0]
e@0: # for i in range(1, L-1):
e@0: # P_ = (1-sim2[i-1])*(1-sim[i-1]+1-sim[i])*(1-sim2[i-1])
e@0: # P_ = (1-sim2[i-1])
e@0: # P.append(P_)
e@0: #
e@0: #
e@0: # P.append(0)
e@0: # P = array(P)
e@0: #
e@0: # M = mean(P)
e@0: # V1 = mean(abs(P[0:L-3]-P[1:L-2]))
e@0: # V2 = mean(abs(P[0:L-4]-P[2:L-2]))
e@0: ## V = (V1+V2)*0.25;
e@0: # V = 0.25*median(abs((P[0:L-4]-P[2:L-2])))
e@0: #
e@0: # Pd = [P[0],P[1]]
e@0: #
e@0: #
e@0: # for i in range(2,L-2):
e@0: # m = mean(concatenate((P[i-2:i],P[i+1:i+3])))
e@0: # m1 = max(concatenate((P[i-2:i],P[i+1:i+3])))
e@0: #
e@0: # if m<0.1*M:
e@0: # m1 = 0.1*M
e@0: #
e@0: # d = (P[i]-m)/m1
e@0: #
e@0: # if d < 0:
e@0: # d = 0
e@0: # Pd.append(P[i]*d/V)
e@0: #
e@0: # Pd.append(P[L-2])
e@0: # Pd.append(P[L-1])
e@0: #
e@0: #
e@0: # # Selection of candidate windows where there is a change
e@0: #
e@0: # j = 0
e@0: # pos = []
e@0: # for i in range(2, L-3):
e@0: # if Pd[i] > 5 and P[i] > P[i+1] and P[i] > P[i-1]:
e@0: # j += 1
e@0: # pos.append(i)
e@0: #
e@0: # if len(pos) == 0:
e@0: # sys.exit(0)
e@0: #
e@0: #
e@0: # # Computation of Change with 20 ms accuracy
e@0: #
e@0: # k2 = 0
e@0: # c = 25
e@0: #
e@0: # Sm = zeros((2*c+51,))
e@0: # posms = zeros((len(pos),))
e@0: # msec = zeros((len(pos),))
e@0: # mpos = zeros((len(pos), ))
e@0: #
e@0: # for k1 in range(0,len(pos)):
e@0: # i = 50*(pos[k1] - 1); # ms
e@0: # for j in range(-c,50+c+1):
e@0: # if ftv[i+j] != 0 and ftm[i+j] != 0:
e@0: # b1 = ftv[i+j]/ftm[i+j]
e@0: # a1 = ftm[i+j]/b1 - 1
e@0: # else:
e@0: # b1 = 1000
e@0: # a1 = -1
e@0: #
e@0: # if ftv[i+j+50-1] != 0 and ftm[i+j+50-1] != 0:
e@0: # b2 = ftv[i+j+50-1]/ftm[i+j+50-1]
e@0: # a2 = ftm[i+j+50-1]/b2 - 1
e@0: # else:
e@0: # b2 = 1000
e@0: # a2 = -1
e@0: #
e@0: # k = (a2+a1+2)/2
e@0: # Sm[j+c] = ((2/(b1+b2))**k)*(b1**((a2+1)/2))*(b2**((a1+1)/2))*gamma(k)/(sqrt(gamma(a1+1)*gamma(a2+1)))
e@0: #
e@0: # if isnan(Sm[j+c]) or isinf(Sm[j+c]):
e@0: # Sm[j+c] = 0
e@0: #
e@0: # posms[k1] = argmin(Sm)
e@0: # x = Sm[posms[k1]]
e@0: #
e@0: # h = 1 - Sm
e@0: #
e@0: # if mean(h[max(posms[k1] - 25,1):min(2*c+51,posms[k1]+25)]) > 0.1:
e@0: # msec[k2] = posms[k1]-1-c
e@0: # mpos[k2] = pos[k1]
e@0: # k2 = k2+1
e@0: # else:
e@0: # msec[k2] = 0
e@0: # mpos[k2] = 0
e@0: #
e@0: #
e@0: # posms = msec
e@0: # pos = mpos
e@0: #
e@0: # fpos = pos+0.02*posms
e@0: #
e@0: # pfpos = floor(50*pos+posms);
e@0: #
e@0: # bvoice = 0.14339738781836;
e@0: # bmusic = 0.04399754659151;
e@0: # amusic = 1.66349817725076;
e@0: # avoice = 2.32677887950291;
e@0: #
e@0: # pfpos = concatenate((array([0]), pfpos, array([len(kvrms)-1])))
e@0: #
e@0: # L = len(pfpos)-1
e@0: # V = zeros((L,2))
e@0: # Vg = zeros((L,2))
e@0: #
e@0: # Pzero = []
e@0: # Fsp1 = []
e@0: # Type = zeros((L,))
e@0: # Typeg = zeros((L,))
e@0: #
e@0: #
e@0: # # Classification for each segment
e@0: #
e@0: # noise = zeros((L,))
e@0: # noise2 = zeros((L,))
e@0: #
e@0: # pfpos = pfpos.astype(int)
e@0: # Pmusicg = zeros((L,))
e@0: # Pvoiceg = zeros((L,))
e@0: # Pmusic = zeros((L,))
e@0: # Pvoice = zeros((L,))
e@0: # Pspace = zeros((L,))
e@0: # zpos = zeros((L,))
e@0: # Pmax = zeros((L,))
e@0: # Pmaxg = zeros((L,))
e@0: # Freq = zeros((L,6))
e@0: #
e@0: # zcrval = array(zcrval)
e@0: # rmsval = array(rmsval)
e@0: # rmszcrval = array(rmszcrval)
e@0: # i
e@0: # for i in range(0, L):
e@0: # d = 0
e@0: #
e@0: # x1 = mean(kvrms[int(pfpos[i]):int(pfpos[i+1])+1])
e@0: # x = x1
e@0: # y = rmszcrval[int(pfpos[i])+d:int(pfpos[i+1])-d+1]
e@0: # y = y/(2*max(rmsval[pfpos[i]+d:pfpos[i+1]-d+1]) - min(rmsval[pfpos[i]+d:pfpos[i+1]-d+1]) - median(rmsval[pfpos[i]+d:pfpos[i+1]-d+1]))
e@0: # noise[i] = 50*mean(exp(-y))
e@0: #
e@0: # Pmusic[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1))
e@0: # Pvoice[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1))
e@0: #
e@0: # sm = 0.7*median(ftm[pfpos[i]:pfpos[i+1]+1])+0.3*mean(ftm[pfpos[i]:pfpos[i+1]+1])
e@0: #
e@0: # Pspace[i] = 6*exp((-sm**2)/(2*0.6**2))
e@0: #
e@0: # zpos[i] = sum(exp(-10*zcrval[pfpos[i]+d:pfpos[i+1]-d+1]))/float(len(zcrval[pfpos[i]+d:pfpos[i+1]-d+1]))
e@0: #
e@0: # if zpos[i] > 0.08 or x > 3:
e@0: # Pvoice[i] = 10
e@0: # else:
e@0: # Pmusic[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1))
e@0: #
e@0: # V[i,0] = Pmusic[i]
e@0: # V[i,1] = Pvoice[i]
e@0: # Type[i] = argmax(V[i,:])
e@0: # Pmax[i] = V[i,Type[i]]
e@0: #
e@0: # noise2[i] = 50 * mean(exp(-4*y))
e@0: # Pmusicg[i] = 0
e@0: #
e@0: # if noise2[i] > 3.5:
e@0: # Pvoiceg[i] = 10
e@0: # else:
e@0: # Pmusicg[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1))
e@0: # Pvoiceg[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1))
e@0: #
e@0: #
e@0: # Vg[i,0] = Pmusicg[i]
e@0: # Vg[i,1] = Pvoiceg[i]
e@0: #
e@0: # Typeg[i] = argmax(Vg[i,:])
e@0: # Pmaxg[i] = Vg[i,Typeg[i]]
e@0: #
e@0: # Vk = array(sign(zcrval[pfpos[i]:pfpos[i+1]+1]))
e@0: # tempV = rmsval[pfpos[i]:pfpos[i+1]+1]
e@0: # rr = max(tempV)+0.001
e@0: # tempV1 = tempV/rr
e@0: # Vk0 = Vk.copy()
e@0: #
e@0: # for j in range(0, len(Vk)):
e@0: # if (tempV1[j] < 0.1 and tempV[j] < 1.5) or tempV[j] < 1:
e@0: # Vk[j] = 0
e@0: # # print "Changing %d" % j
e@0: # Vk1 = Vk.copy()
e@0: #
e@0: # for j in range(0, len(Vk)):
e@0: # if tempV1[j] < 0.4 or tempV[j] < 2 or tempV[j] < mean(tempV):
e@0: # # print "Changing %d" % j
e@0: #
e@0: # Vk1[j] = 0
e@0: #
e@0: # Zca = Vk1*(zcrval[pfpos[i]:pfpos[i+1]+1])
e@0: # Pol = (ones((len(Vk),)) - sign(Vk))*Vk0
e@0: #
e@0: # VZC = Pol*zcrval[pfpos[i]:pfpos[i+1]+1]
e@0: # dVk = abs(Vk[2:len(Vk)] - Vk[1:len(Vk)-1])
e@0: #
e@0: # Freq[i, 0] = 50*sum(dVk)/(2*len(Vk))
e@0: # Freq[i, 1] = len(Vk)/50
e@0: # Freq[i, 2] = sum(Freq[:,2])
e@0: # Freq[i, 3] = zpos[i]
e@0: # Freq[i, 4] = noise2[i]
e@0: # Freq[i, 5] = max(Zca)
e@0: #
e@0: # Pmusicg[i] = 0
e@0: # Pvoiceg[i] = 0
e@0: #
e@0: # if Freq[i,0] < 0.59 and Freq[i,1] > 2.5:
e@0: # Pmusicg[i] = 10
e@0: # else:
e@0: # Pmusicg[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1))
e@0: # Pvoiceg[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1))
e@0: #
e@0: # if x > 3:
e@0: # Pvoiceg[i] = Pvoiceg[i] + 0.1
e@0: #
e@0: # if x > 300:
e@0: # Pmusicg[i] = 9
e@0: #
e@0: #
e@0: # if noise2[i] > 3.5:
e@0: # Pvoiceg[i] = 11
e@0: #
e@0: # if max(Zca) > 280:
e@0: # Pmusicg[i] = 11
e@0: #
e@0: #
e@0: # if Freq[i,0] > 4.62 and Freq[i,1] > 2.5 and (noise2[i] > 0.1 or zpos[i] > 0.05 ):
e@0: # Pvoiceg[i] = 12
e@0: #
e@0: #
e@0: # if zpos[i] > 0.15:
e@0: # Pvoiceg[i] = 13
e@0: #
e@0: #
e@0: # Vg[i,0] = Pmusicg[i]
e@0: # Vg[i,1] = Pvoiceg[i]
e@0: #
e@0: # Typeg[i] = argmax(Vg[i,:])
e@0: # Pmaxg[i] = Vg[i, Typeg[i]]
e@0: #
e@0: # Fsp1.extend(Vk)
e@0: #
e@0: # if (Pspace[i] > 4 and noise2[i] < 1) or (Pspace[i] > 4.5):
e@0: # Type[i] = 2
e@0: # Typeg[i] = 2
e@0: #
e@0: #
e@0: # if (pfpos[i+1] - pfpos[i])/50 < 0.5 and i >= 1:
e@0: # Type[i] = Type[i-1]
e@0: #
e@0: #
e@0: #
e@0: # maxpos = pfpos[-1]
e@0: #
e@0: # tt = zeros((maxpos,))
e@0: # pt = zeros((maxpos,))
e@0: # pm = zeros((maxpos,))
e@0: # pv = zeros((maxpos,))
e@0: # ps = zeros((maxpos,))
e@0: #
e@0: # for i in range(0, L):
e@0: # tt[pfpos[i]:pfpos[i+1]] = Typeg[i]
e@0: # pt[pfpos[i]:pfpos[i+1]] = Pmaxg[i]
e@0: # pm[pfpos[i]:pfpos[i+1]] = Pmusicg[i]
e@0: # pv[pfpos[i]:pfpos[i+1]] = Pvoiceg[i]
e@0: # ps[pfpos[i]:pfpos[i+1]] = Pspace[i]
e@0: #
e@0: #
e@0: #
e@0: #
e@0: #
e@0: #
e@0: # for k in range(0, len(rmsval)):
e@0: # if rmsval[k] < 2 or rmsval[k] < 0.1*max(rmsval):
e@0: # zcrval[k] = 0
e@0: #
e@0: #
e@0: # classes = []
e@0: # positions = []
e@0: #
e@0: # class_ = 5
e@0: #
e@0: # for j in range(0, len(tt)):
e@0: # if class_ != tt[j]:
e@0: # class_ = tt[j]
e@0: # classes.append(int(class_))
e@0: # positions.append(int(j*0.02*SR))
e@0: #
e@0: # classvec = zeros((len(audio),))
e@0: #
e@0: # if len(positions) == 1:
e@0: # classvec = classes[0]*ones((len(audio),))
e@0: # for j in range(1, len(positions)):
e@0: # classvec[positions[j-1]:positions[j]] = classes[j-1]
e@0: #
e@0: #
e@0: #
e@0: #
e@0: #
e@0: #
e@0: #
e@0: #
e@0: #
e@0: #
e@0: #
e@0:
e@0: #
e@0:
e@0:
e@0: positions, classes = trmszc(audio, SR)
e@0:
e@0: classvec = zeros((len(audio),))
e@0:
e@0: if len(positions) == 1:
e@0: classvec = classes[0]*ones((len(audio),))
e@0: for j in range(1, len(positions)):
e@0: classvec[positions[j-1]:positions[j]] = classes[j-1]
e@0:
e@0: plot(audio)
e@0: plot(classvec)
e@0: