Mercurial > hg > chourdakisreiss2016
view experiment-reverb/code/panagiotakis.py @ 2:c87a9505f294 tip
Added LICENSE for code, removed .wav files
author | Emmanouil Theofanis Chourdakis <e.t.chourdakis@qmul.ac.uk> |
---|---|
date | Sat, 30 Sep 2017 13:25:50 +0100 |
parents | 246d5546657c |
children |
line wrap: on
line source
# -*- coding: utf-8 -*- """ Created on Mon Jun 1 17:17:34 2015 @author: mmxgn """ from sys import argv def trmszc(audio, SR=21000): # Segmentation based on 1 second segmentation print("[II] Calculating features for %s, please wait..." % fname) rmsval = [] zcrval = [] rmszcrval = [] meanrms = [] varrms = [] meanzcr = [] varzcr = [] histograms = [] # Parameters for the chi square distribution alphas = [] betas = [] L = int(len(audio)/SR) for n in range(0, L): segment = audio[n*SR:(n+1)*SR] framerms = [] framezcr = [] # for frame in FrameGenerator(segment, frameSize = frameSize, hopSize = frameSize): for k in range(0, 50): frame = segment[0.02*SR*k:0.02*SR*(k+1)] rmsv = sqrt(sum(frame**2)) framerms.append(rmsv) f1 = frame[0:-1] f2 = frame[1:] mm1 = f1*f2 zc = 0.5*sum((abs(sign(mm1))-sign(mm1))) rmsval.append(rmsv) zcrval.append(zc) rmszcrval.append(rmsv*zc) framezcr.append(zc) meanrms.append(mean(framerms)) meanzcr.append(mean(framezcr)) mu = mean(framerms) sigmasq = var(framerms, ddof=1) beta = sigmasq/mu alpha = (mu/beta) - 1 if mu != 0 and sigmasq != 0: betas.append(beta) alphas.append(alpha) else: betas.append(1000000) alphas.append(-1) histograms.append(histogram(framerms, 256)) varrms.append(var(framerms, ddof=1)) varzcr.append(var(framerms, ddof=1)) # Computation of KVRMS l = len(rmsval) ftm = [mean(rmsval[0:50])] ftv = [var(rmsval[0:50], ddof=1)] kvrms = [0] for i in range(1,l-50): ftm_ = ftm[i-1]+(rmsval[i+49] - rmsval[i-1])/50 ftv_ = ftv[i-1]+ftm[i-1]**2 + ((rmsval[i+49]**2-rmsval[i-1]**2)/50) - ftm_**2 ftm.append(ftm_) ftv.append(ftv_) if ftm_ != 0: kvrms.append(ftv_/ftm_**2) else: kvrms.append(0) m = mean(kvrms) v = var(kvrms, ddof=1) b = v/m a = m/b - 1 # Computing Similarity sim = [] from scipy.special import gamma for i in range(1,L): k = (alphas[i]+alphas[i-1]+2)/2 sim_ = ((2/(betas[i-1]+betas[i]))**k)*(betas[i-1]**((alphas[i]+1)/2))*(betas[i]**((alphas[i-1]+1)/2))*gamma(k)/(sqrt(gamma(alphas[i-1]+1)*gamma(alphas[i]+1))) # 0 If overflow if isinf(sim_) or isnan(sim_): sim_ = 0 sim.append(sim_) sim2 = [] for i in range(2,L): k = (alphas[i]+alphas[i-2]+2)/2 sim_ = ((2/(betas[i-2]+betas[i]))**k)*(betas[i-2]**((alphas[i]+1)/2))*(betas[i]**((alphas[i-2]+1)/2))*gamma(k)/(sqrt(gamma(alphas[i-2]+1)*gamma(alphas[i]+1))) if isinf(sim_) or isnan(sim_): sim_ = 0 sim2.append(sim_) p = lambda x, a, b: x**a*exp(-b*x)/b**(a+1)/gamma(a+1) # rho = lambda alpha1,beta1,alpha2,beta2: gamma((alpha1+alpha2)/2 + 1)/sqrt(gamma(alpha1+1)*gamma(alpha2+1))*2**((alpha1+alpha2)/2+1)*beta1**((alpha2+1)/2)*beta2**((alpha1+1)/2)/(beta1+beta2)**((alpha1+alpha2)/2+1) P = [0] for i in range(1, L-1): P_ = (1-sim2[i-1])*(1-sim[i-1]+1-sim[i])*(1-sim2[i-1]) P_ = (1-sim2[i-1]) P.append(P_) P.append(0) P = array(P) M = mean(P) V1 = mean(abs(P[0:L-3]-P[1:L-2])) V2 = mean(abs(P[0:L-4]-P[2:L-2])) # V = (V1+V2)*0.25; V = 0.25*median(abs((P[0:L-4]-P[2:L-2]))) Pd = [P[0],P[1]] for i in range(2,L-2): m = mean(concatenate((P[i-2:i],P[i+1:i+3]))) m1 = max(concatenate((P[i-2:i],P[i+1:i+3]))) if m<0.1*M: m1 = 0.1*M d = (P[i]-m)/m1 if d < 0: d = 0 Pd.append(P[i]*d/V) Pd.append(P[L-2]) Pd.append(P[L-1]) # Selection of candidate windows where there is a change j = 0 pos = [] for i in range(2, L-3): if Pd[i] > 5 and P[i] > P[i+1] and P[i] > P[i-1]: j += 1 pos.append(i) if len(pos) == 0: sys.exit(0) # Computation of Change with 20 ms accuracy k2 = 0 c = 25 Sm = zeros((2*c+51,)) posms = zeros((len(pos),)) msec = zeros((len(pos),)) mpos = zeros((len(pos), )) for k1 in range(0,len(pos)): i = 50*(pos[k1] - 1); # ms for j in range(-c,50+c+1): if ftv[i+j] != 0 and ftm[i+j] != 0: b1 = ftv[i+j]/ftm[i+j] a1 = ftm[i+j]/b1 - 1 else: b1 = 1000 a1 = -1 if ftv[i+j+50-1] != 0 and ftm[i+j+50-1] != 0: b2 = ftv[i+j+50-1]/ftm[i+j+50-1] a2 = ftm[i+j+50-1]/b2 - 1 else: b2 = 1000 a2 = -1 k = (a2+a1+2)/2 Sm[j+c] = ((2/(b1+b2))**k)*(b1**((a2+1)/2))*(b2**((a1+1)/2))*gamma(k)/(sqrt(gamma(a1+1)*gamma(a2+1))) if isnan(Sm[j+c]) or isinf(Sm[j+c]): Sm[j+c] = 0 posms[k1] = argmin(Sm) x = Sm[posms[k1]] h = 1 - Sm if mean(h[max(posms[k1] - 25,1):min(2*c+51,posms[k1]+25)]) > 0.1: msec[k2] = posms[k1]-1-c mpos[k2] = pos[k1] k2 = k2+1 else: msec[k2] = 0 mpos[k2] = 0 posms = msec pos = mpos fpos = pos+0.02*posms pfpos = floor(50*pos+posms); bvoice = 0.14339738781836; bmusic = 0.04399754659151; amusic = 1.66349817725076; avoice = 2.32677887950291; pfpos = concatenate((array([0]), pfpos, array([len(kvrms)-1]))) L = len(pfpos)-1 V = zeros((L,2)) Vg = zeros((L,2)) Pzero = [] Fsp1 = [] Type = zeros((L,)) Typeg = zeros((L,)) # Classification for each segment noise = zeros((L,)) noise2 = zeros((L,)) pfpos = pfpos.astype(int) Pmusicg = zeros((L,)) Pvoiceg = zeros((L,)) Pmusic = zeros((L,)) Pvoice = zeros((L,)) Pspace = zeros((L,)) zpos = zeros((L,)) Pmax = zeros((L,)) Pmaxg = zeros((L,)) Freq = zeros((L,6)) zcrval = array(zcrval) rmsval = array(rmsval) rmszcrval = array(rmszcrval) i for i in range(0, L): d = 0 x1 = mean(kvrms[int(pfpos[i]):int(pfpos[i+1])+1]) x = x1 y = rmszcrval[int(pfpos[i])+d:int(pfpos[i+1])-d+1] y = y/(2*max(rmsval[pfpos[i]+d:pfpos[i+1]-d+1]) - min(rmsval[pfpos[i]+d:pfpos[i+1]-d+1]) - median(rmsval[pfpos[i]+d:pfpos[i+1]-d+1])) noise[i] = 50*mean(exp(-y)) Pmusic[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1)) Pvoice[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1)) sm = 0.7*median(ftm[pfpos[i]:pfpos[i+1]+1])+0.3*mean(ftm[pfpos[i]:pfpos[i+1]+1]) Pspace[i] = 6*exp((-sm**2)/(2*0.6**2)) zpos[i] = sum(exp(-10*zcrval[pfpos[i]+d:pfpos[i+1]-d+1]))/float(len(zcrval[pfpos[i]+d:pfpos[i+1]-d+1])) if zpos[i] > 0.08 or x > 3: Pvoice[i] = 10 else: Pmusic[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1)) V[i,0] = Pmusic[i] V[i,1] = Pvoice[i] Type[i] = argmax(V[i,:]) Pmax[i] = V[i,Type[i]] noise2[i] = 50 * mean(exp(-4*y)) Pmusicg[i] = 0 if noise2[i] > 3.5: Pvoiceg[i] = 10 else: Pmusicg[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1)) Pvoiceg[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1)) Vg[i,0] = Pmusicg[i] Vg[i,1] = Pvoiceg[i] Typeg[i] = argmax(Vg[i,:]) Pmaxg[i] = Vg[i,Typeg[i]] Vk = array(sign(zcrval[pfpos[i]:pfpos[i+1]+1])) tempV = rmsval[pfpos[i]:pfpos[i+1]+1] rr = max(tempV)+0.001 tempV1 = tempV/rr Vk0 = Vk.copy() for j in range(0, len(Vk)): if (tempV1[j] < 0.1 and tempV[j] < 1.5) or tempV[j] < 1: Vk[j] = 0 # print "Changing %d" % j Vk1 = Vk.copy() for j in range(0, len(Vk)): if tempV1[j] < 0.4 or tempV[j] < 2 or tempV[j] < mean(tempV): # print "Changing %d" % j Vk1[j] = 0 Zca = Vk1*(zcrval[pfpos[i]:pfpos[i+1]+1]) Pol = (ones((len(Vk),)) - sign(Vk))*Vk0 VZC = Pol*zcrval[pfpos[i]:pfpos[i+1]+1] dVk = abs(Vk[2:len(Vk)] - Vk[1:len(Vk)-1]) Freq[i, 0] = 50*sum(dVk)/(2*len(Vk)) Freq[i, 1] = len(Vk)/50 Freq[i, 2] = sum(Freq[:,2]) Freq[i, 3] = zpos[i] Freq[i, 4] = noise2[i] Freq[i, 5] = max(Zca) Pmusicg[i] = 0 Pvoiceg[i] = 0 if Freq[i,0] < 0.59 and Freq[i,1] > 2.5: Pmusicg[i] = 10 else: Pmusicg[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1)) Pvoiceg[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1)) if x > 3: Pvoiceg[i] = Pvoiceg[i] + 0.1 if x > 300: Pmusicg[i] = 9 if noise2[i] > 3.5: Pvoiceg[i] = 11 if max(Zca) > 280: Pmusicg[i] = 11 if Freq[i,0] > 4.62 and Freq[i,1] > 2.5 and (noise2[i] > 0.1 or zpos[i] > 0.05 ): Pvoiceg[i] = 12 if zpos[i] > 0.15: Pvoiceg[i] = 13 Vg[i,0] = Pmusicg[i] Vg[i,1] = Pvoiceg[i] Typeg[i] = argmax(Vg[i,:]) Pmaxg[i] = Vg[i, Typeg[i]] Fsp1.extend(Vk) if (Pspace[i] > 4 and noise2[i] < 1) or (Pspace[i] > 4.5): Type[i] = 2 Typeg[i] = 2 if (pfpos[i+1] - pfpos[i])/50 < 0.5 and i >= 1: Type[i] = Type[i-1] maxpos = pfpos[-1] tt = zeros((maxpos,)) pt = zeros((maxpos,)) pm = zeros((maxpos,)) pv = zeros((maxpos,)) ps = zeros((maxpos,)) for i in range(0, L): tt[pfpos[i]:pfpos[i+1]] = Typeg[i] pt[pfpos[i]:pfpos[i+1]] = Pmaxg[i] pm[pfpos[i]:pfpos[i+1]] = Pmusicg[i] pv[pfpos[i]:pfpos[i+1]] = Pvoiceg[i] ps[pfpos[i]:pfpos[i+1]] = Pspace[i] for k in range(0, len(rmsval)): if rmsval[k] < 2 or rmsval[k] < 0.1*max(rmsval): zcrval[k] = 0 classes = [] positions = [] class_ = 5 for j in range(0, len(tt)): if class_ != tt[j]: class_ = tt[j] classes.append(int(class_)) positions.append(int(j*0.02*SR)) return positions, classes if __name__=="__main__": if len(argv) != 3: print("Incorrect number of arguments:") print("Usage: ") print("%s <input>") print("") print("Arguments:") print("<input>\tThe input filename. Can be .wav, .mp3, etc...") sys.exit(-1) else: print("[II] Panagiotakis and Tziritas RMS/ZC Speech/Music/Silence Discriminator") print("[II] Loading libraries") import essentia from essentia import Pool from essentia.standard import * import csv import yaml # reqyures matplotlib from pylab import * #requires numpy from numpy import * #requires scikit-learn from sklearn.metrics import pairwise_distances d = {} v = {} fname = argv[1] outfname = argv[2] trackname = fname.split('.')[0].split('/')[-1] if outfname.partition('.')[-1].lower() not in ['json', 'yaml']: print("Please choose a .json or .yaml as an output file.") sys.exit(-1) else: if outfname.partition('.')[-1].lower() == 'json': output = YamlOutput(filename = outfname, format='json') else: output = YamlOutput(filename = outfname, format='yaml') print("Feature extraction of `%s\'" % fname) # Sampling Rate SR = 21000.0 # Sampling Frequency T = 1.0/SR # SegmentSize tsegmentSize = 1000 #ms segmentSize = int(ceil(tsegmentSize*SR/1000)) if mod(ceil(tsegmentSize*SR/1000),2) == 0 \ else int(floor(tsegmentSize*SR/1000)) # FrameSize tframeSize = 20 #ms frameSize = int(ceil(tframeSize*SR/1000)) if mod(ceil(tframeSize*SR/1000),2) == 0 \ else int(floor(tframeSize*SR/1000)) # HopSize hopSize = frameSize # Load Audio audio = MonoLoader(filename = fname, sampleRate=SR)() # # Zero Crossing Rate # zcr = ZeroCrossingRate() # # # RMS # rms = RMS() # # # Segmentation based on 1 second segmentation # print("[II] Calculating features for %s, please wait..." % fname) # # rmsval = [] # zcrval = [] # rmszcrval = [] # meanrms = [] # varrms = [] # meanzcr = [] # varzcr = [] # histograms = [] # # # # # Parameters for the chi square distribution # alphas = [] # betas = [] # L = int(len(audio)/SR) # for n in range(0, L): # segment = audio[n*SR:(n+1)*SR] # framerms = [] # framezcr = [] # # for frame in FrameGenerator(segment, frameSize = frameSize, hopSize = frameSize): # for k in range(0, 50): # frame = segment[0.02*SR*k:0.02*SR*(k+1)] # # rmsv = sqrt(sum(frame**2)) # framerms.append(rmsv) # f1 = frame[0:-1] # f2 = frame[1:] # mm1 = f1*f2 # zc = 0.5*sum((abs(sign(mm1))-sign(mm1))) # # rmsval.append(rmsv) # zcrval.append(zc) # rmszcrval.append(rmsv*zc) # framezcr.append(zc) # # meanrms.append(mean(framerms)) # meanzcr.append(mean(framezcr)) # # mu = mean(framerms) # sigmasq = var(framerms, ddof=1) # # beta = sigmasq/mu # alpha = (mu/beta) - 1 # # if mu != 0 and sigmasq != 0: # betas.append(beta) # alphas.append(alpha) # else: # betas.append(1000000) # alphas.append(-1) # # histograms.append(histogram(framerms, 256)) # # varrms.append(var(framerms, ddof=1)) # varzcr.append(var(framerms, ddof=1)) # # # # Computation of KVRMS # # l = len(rmsval) # ftm = [mean(rmsval[0:50])] # ftv = [var(rmsval[0:50], ddof=1)] # kvrms = [0] # # for i in range(1,l-50): # ftm_ = ftm[i-1]+(rmsval[i+49] - rmsval[i-1])/50 # ftv_ = ftv[i-1]+ftm[i-1]**2 + ((rmsval[i+49]**2-rmsval[i-1]**2)/50) - ftm_**2 # ftm.append(ftm_) # ftv.append(ftv_) # # if ftm_ != 0: # kvrms.append(ftv_/ftm_**2) # else: # kvrms.append(0) # # # m = mean(kvrms) # v = var(kvrms, ddof=1) # # b = v/m # a = m/b - 1 # # # Computing Similarity # # sim = [] # from scipy.special import gamma # # for i in range(1,L): # k = (alphas[i]+alphas[i-1]+2)/2 # sim_ = ((2/(betas[i-1]+betas[i]))**k)*(betas[i-1]**((alphas[i]+1)/2))*(betas[i]**((alphas[i-1]+1)/2))*gamma(k)/(sqrt(gamma(alphas[i-1]+1)*gamma(alphas[i]+1))) # # # 0 If overflow # if isinf(sim_) or isnan(sim_): # sim_ = 0 # sim.append(sim_) # # sim2 = [] # # for i in range(2,L): # k = (alphas[i]+alphas[i-2]+2)/2 # sim_ = ((2/(betas[i-2]+betas[i]))**k)*(betas[i-2]**((alphas[i]+1)/2))*(betas[i]**((alphas[i-2]+1)/2))*gamma(k)/(sqrt(gamma(alphas[i-2]+1)*gamma(alphas[i]+1))) # # if isinf(sim_) or isnan(sim_): # sim_ = 0 # sim2.append(sim_) # # # p = lambda x, a, b: x**a*exp(-b*x)/b**(a+1)/gamma(a+1) ## # rho = lambda alpha1,beta1,alpha2,beta2: gamma((alpha1+alpha2)/2 + 1)/sqrt(gamma(alpha1+1)*gamma(alpha2+1))*2**((alpha1+alpha2)/2+1)*beta1**((alpha2+1)/2)*beta2**((alpha1+1)/2)/(beta1+beta2)**((alpha1+alpha2)/2+1) # # # P = [0] # for i in range(1, L-1): # P_ = (1-sim2[i-1])*(1-sim[i-1]+1-sim[i])*(1-sim2[i-1]) # P_ = (1-sim2[i-1]) # P.append(P_) # # # P.append(0) # P = array(P) # # M = mean(P) # V1 = mean(abs(P[0:L-3]-P[1:L-2])) # V2 = mean(abs(P[0:L-4]-P[2:L-2])) ## V = (V1+V2)*0.25; # V = 0.25*median(abs((P[0:L-4]-P[2:L-2]))) # # Pd = [P[0],P[1]] # # # for i in range(2,L-2): # m = mean(concatenate((P[i-2:i],P[i+1:i+3]))) # m1 = max(concatenate((P[i-2:i],P[i+1:i+3]))) # # if m<0.1*M: # m1 = 0.1*M # # d = (P[i]-m)/m1 # # if d < 0: # d = 0 # Pd.append(P[i]*d/V) # # Pd.append(P[L-2]) # Pd.append(P[L-1]) # # # # Selection of candidate windows where there is a change # # j = 0 # pos = [] # for i in range(2, L-3): # if Pd[i] > 5 and P[i] > P[i+1] and P[i] > P[i-1]: # j += 1 # pos.append(i) # # if len(pos) == 0: # sys.exit(0) # # # # Computation of Change with 20 ms accuracy # # k2 = 0 # c = 25 # # Sm = zeros((2*c+51,)) # posms = zeros((len(pos),)) # msec = zeros((len(pos),)) # mpos = zeros((len(pos), )) # # for k1 in range(0,len(pos)): # i = 50*(pos[k1] - 1); # ms # for j in range(-c,50+c+1): # if ftv[i+j] != 0 and ftm[i+j] != 0: # b1 = ftv[i+j]/ftm[i+j] # a1 = ftm[i+j]/b1 - 1 # else: # b1 = 1000 # a1 = -1 # # if ftv[i+j+50-1] != 0 and ftm[i+j+50-1] != 0: # b2 = ftv[i+j+50-1]/ftm[i+j+50-1] # a2 = ftm[i+j+50-1]/b2 - 1 # else: # b2 = 1000 # a2 = -1 # # k = (a2+a1+2)/2 # Sm[j+c] = ((2/(b1+b2))**k)*(b1**((a2+1)/2))*(b2**((a1+1)/2))*gamma(k)/(sqrt(gamma(a1+1)*gamma(a2+1))) # # if isnan(Sm[j+c]) or isinf(Sm[j+c]): # Sm[j+c] = 0 # # posms[k1] = argmin(Sm) # x = Sm[posms[k1]] # # h = 1 - Sm # # if mean(h[max(posms[k1] - 25,1):min(2*c+51,posms[k1]+25)]) > 0.1: # msec[k2] = posms[k1]-1-c # mpos[k2] = pos[k1] # k2 = k2+1 # else: # msec[k2] = 0 # mpos[k2] = 0 # # # posms = msec # pos = mpos # # fpos = pos+0.02*posms # # pfpos = floor(50*pos+posms); # # bvoice = 0.14339738781836; # bmusic = 0.04399754659151; # amusic = 1.66349817725076; # avoice = 2.32677887950291; # # pfpos = concatenate((array([0]), pfpos, array([len(kvrms)-1]))) # # L = len(pfpos)-1 # V = zeros((L,2)) # Vg = zeros((L,2)) # # Pzero = [] # Fsp1 = [] # Type = zeros((L,)) # Typeg = zeros((L,)) # # # # Classification for each segment # # noise = zeros((L,)) # noise2 = zeros((L,)) # # pfpos = pfpos.astype(int) # Pmusicg = zeros((L,)) # Pvoiceg = zeros((L,)) # Pmusic = zeros((L,)) # Pvoice = zeros((L,)) # Pspace = zeros((L,)) # zpos = zeros((L,)) # Pmax = zeros((L,)) # Pmaxg = zeros((L,)) # Freq = zeros((L,6)) # # zcrval = array(zcrval) # rmsval = array(rmsval) # rmszcrval = array(rmszcrval) # i # for i in range(0, L): # d = 0 # # x1 = mean(kvrms[int(pfpos[i]):int(pfpos[i+1])+1]) # x = x1 # y = rmszcrval[int(pfpos[i])+d:int(pfpos[i+1])-d+1] # y = y/(2*max(rmsval[pfpos[i]+d:pfpos[i+1]-d+1]) - min(rmsval[pfpos[i]+d:pfpos[i+1]-d+1]) - median(rmsval[pfpos[i]+d:pfpos[i+1]-d+1])) # noise[i] = 50*mean(exp(-y)) # # Pmusic[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1)) # Pvoice[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1)) # # sm = 0.7*median(ftm[pfpos[i]:pfpos[i+1]+1])+0.3*mean(ftm[pfpos[i]:pfpos[i+1]+1]) # # Pspace[i] = 6*exp((-sm**2)/(2*0.6**2)) # # zpos[i] = sum(exp(-10*zcrval[pfpos[i]+d:pfpos[i+1]-d+1]))/float(len(zcrval[pfpos[i]+d:pfpos[i+1]-d+1])) # # if zpos[i] > 0.08 or x > 3: # Pvoice[i] = 10 # else: # Pmusic[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1)) # # V[i,0] = Pmusic[i] # V[i,1] = Pvoice[i] # Type[i] = argmax(V[i,:]) # Pmax[i] = V[i,Type[i]] # # noise2[i] = 50 * mean(exp(-4*y)) # Pmusicg[i] = 0 # # if noise2[i] > 3.5: # Pvoiceg[i] = 10 # else: # Pmusicg[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1)) # Pvoiceg[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1)) # # # Vg[i,0] = Pmusicg[i] # Vg[i,1] = Pvoiceg[i] # # Typeg[i] = argmax(Vg[i,:]) # Pmaxg[i] = Vg[i,Typeg[i]] # # Vk = array(sign(zcrval[pfpos[i]:pfpos[i+1]+1])) # tempV = rmsval[pfpos[i]:pfpos[i+1]+1] # rr = max(tempV)+0.001 # tempV1 = tempV/rr # Vk0 = Vk.copy() # # for j in range(0, len(Vk)): # if (tempV1[j] < 0.1 and tempV[j] < 1.5) or tempV[j] < 1: # Vk[j] = 0 # # print "Changing %d" % j # Vk1 = Vk.copy() # # for j in range(0, len(Vk)): # if tempV1[j] < 0.4 or tempV[j] < 2 or tempV[j] < mean(tempV): # # print "Changing %d" % j # # Vk1[j] = 0 # # Zca = Vk1*(zcrval[pfpos[i]:pfpos[i+1]+1]) # Pol = (ones((len(Vk),)) - sign(Vk))*Vk0 # # VZC = Pol*zcrval[pfpos[i]:pfpos[i+1]+1] # dVk = abs(Vk[2:len(Vk)] - Vk[1:len(Vk)-1]) # # Freq[i, 0] = 50*sum(dVk)/(2*len(Vk)) # Freq[i, 1] = len(Vk)/50 # Freq[i, 2] = sum(Freq[:,2]) # Freq[i, 3] = zpos[i] # Freq[i, 4] = noise2[i] # Freq[i, 5] = max(Zca) # # Pmusicg[i] = 0 # Pvoiceg[i] = 0 # # if Freq[i,0] < 0.59 and Freq[i,1] > 2.5: # Pmusicg[i] = 10 # else: # Pmusicg[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1)) # Pvoiceg[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1)) # # if x > 3: # Pvoiceg[i] = Pvoiceg[i] + 0.1 # # if x > 300: # Pmusicg[i] = 9 # # # if noise2[i] > 3.5: # Pvoiceg[i] = 11 # # if max(Zca) > 280: # Pmusicg[i] = 11 # # # if Freq[i,0] > 4.62 and Freq[i,1] > 2.5 and (noise2[i] > 0.1 or zpos[i] > 0.05 ): # Pvoiceg[i] = 12 # # # if zpos[i] > 0.15: # Pvoiceg[i] = 13 # # # Vg[i,0] = Pmusicg[i] # Vg[i,1] = Pvoiceg[i] # # Typeg[i] = argmax(Vg[i,:]) # Pmaxg[i] = Vg[i, Typeg[i]] # # Fsp1.extend(Vk) # # if (Pspace[i] > 4 and noise2[i] < 1) or (Pspace[i] > 4.5): # Type[i] = 2 # Typeg[i] = 2 # # # if (pfpos[i+1] - pfpos[i])/50 < 0.5 and i >= 1: # Type[i] = Type[i-1] # # # # maxpos = pfpos[-1] # # tt = zeros((maxpos,)) # pt = zeros((maxpos,)) # pm = zeros((maxpos,)) # pv = zeros((maxpos,)) # ps = zeros((maxpos,)) # # for i in range(0, L): # tt[pfpos[i]:pfpos[i+1]] = Typeg[i] # pt[pfpos[i]:pfpos[i+1]] = Pmaxg[i] # pm[pfpos[i]:pfpos[i+1]] = Pmusicg[i] # pv[pfpos[i]:pfpos[i+1]] = Pvoiceg[i] # ps[pfpos[i]:pfpos[i+1]] = Pspace[i] # # # # # # # for k in range(0, len(rmsval)): # if rmsval[k] < 2 or rmsval[k] < 0.1*max(rmsval): # zcrval[k] = 0 # # # classes = [] # positions = [] # # class_ = 5 # # for j in range(0, len(tt)): # if class_ != tt[j]: # class_ = tt[j] # classes.append(int(class_)) # positions.append(int(j*0.02*SR)) # # classvec = zeros((len(audio),)) # # if len(positions) == 1: # classvec = classes[0]*ones((len(audio),)) # for j in range(1, len(positions)): # classvec[positions[j-1]:positions[j]] = classes[j-1] # # # # # # # # # # # # positions, classes = trmszc(audio, SR) classvec = zeros((len(audio),)) if len(positions) == 1: classvec = classes[0]*ones((len(audio),)) for j in range(1, len(positions)): classvec[positions[j-1]:positions[j]] = classes[j-1] plot(audio) plot(classvec)