e@0: # -*- coding: utf-8 -*- e@0: """ e@0: Created on Mon Jun 1 17:17:34 2015 e@0: e@0: @author: mmxgn e@0: """ e@0: e@0: e@0: from sys import argv e@0: e@0: e@0: e@0: def trmszc(audio, SR=21000): e@0: # Segmentation based on 1 second segmentation e@0: print("[II] Calculating features for %s, please wait..." % fname) e@0: e@0: rmsval = [] e@0: zcrval = [] e@0: rmszcrval = [] e@0: meanrms = [] e@0: varrms = [] e@0: meanzcr = [] e@0: varzcr = [] e@0: histograms = [] e@0: e@0: e@0: e@0: # Parameters for the chi square distribution e@0: alphas = [] e@0: betas = [] e@0: L = int(len(audio)/SR) e@0: for n in range(0, L): e@0: segment = audio[n*SR:(n+1)*SR] e@0: framerms = [] e@0: framezcr = [] e@0: # for frame in FrameGenerator(segment, frameSize = frameSize, hopSize = frameSize): e@0: for k in range(0, 50): e@0: frame = segment[0.02*SR*k:0.02*SR*(k+1)] e@0: e@0: rmsv = sqrt(sum(frame**2)) e@0: framerms.append(rmsv) e@0: f1 = frame[0:-1] e@0: f2 = frame[1:] e@0: mm1 = f1*f2 e@0: zc = 0.5*sum((abs(sign(mm1))-sign(mm1))) e@0: e@0: rmsval.append(rmsv) e@0: zcrval.append(zc) e@0: rmszcrval.append(rmsv*zc) e@0: framezcr.append(zc) e@0: e@0: meanrms.append(mean(framerms)) e@0: meanzcr.append(mean(framezcr)) e@0: e@0: mu = mean(framerms) e@0: sigmasq = var(framerms, ddof=1) e@0: e@0: beta = sigmasq/mu e@0: alpha = (mu/beta) - 1 e@0: e@0: if mu != 0 and sigmasq != 0: e@0: betas.append(beta) e@0: alphas.append(alpha) e@0: else: e@0: betas.append(1000000) e@0: alphas.append(-1) e@0: e@0: histograms.append(histogram(framerms, 256)) e@0: e@0: varrms.append(var(framerms, ddof=1)) e@0: varzcr.append(var(framerms, ddof=1)) e@0: e@0: e@0: # Computation of KVRMS e@0: e@0: l = len(rmsval) e@0: ftm = [mean(rmsval[0:50])] e@0: ftv = [var(rmsval[0:50], ddof=1)] e@0: kvrms = [0] e@0: e@0: for i in range(1,l-50): e@0: ftm_ = ftm[i-1]+(rmsval[i+49] - rmsval[i-1])/50 e@0: ftv_ = ftv[i-1]+ftm[i-1]**2 + ((rmsval[i+49]**2-rmsval[i-1]**2)/50) - ftm_**2 e@0: ftm.append(ftm_) e@0: ftv.append(ftv_) e@0: e@0: if ftm_ != 0: e@0: kvrms.append(ftv_/ftm_**2) e@0: else: e@0: kvrms.append(0) e@0: e@0: e@0: m = mean(kvrms) e@0: v = var(kvrms, ddof=1) e@0: e@0: b = v/m e@0: a = m/b - 1 e@0: e@0: # Computing Similarity e@0: e@0: sim = [] e@0: from scipy.special import gamma e@0: e@0: for i in range(1,L): e@0: k = (alphas[i]+alphas[i-1]+2)/2 e@0: sim_ = ((2/(betas[i-1]+betas[i]))**k)*(betas[i-1]**((alphas[i]+1)/2))*(betas[i]**((alphas[i-1]+1)/2))*gamma(k)/(sqrt(gamma(alphas[i-1]+1)*gamma(alphas[i]+1))) e@0: e@0: # 0 If overflow e@0: if isinf(sim_) or isnan(sim_): e@0: sim_ = 0 e@0: sim.append(sim_) e@0: e@0: sim2 = [] e@0: e@0: for i in range(2,L): e@0: k = (alphas[i]+alphas[i-2]+2)/2 e@0: sim_ = ((2/(betas[i-2]+betas[i]))**k)*(betas[i-2]**((alphas[i]+1)/2))*(betas[i]**((alphas[i-2]+1)/2))*gamma(k)/(sqrt(gamma(alphas[i-2]+1)*gamma(alphas[i]+1))) e@0: e@0: if isinf(sim_) or isnan(sim_): e@0: sim_ = 0 e@0: sim2.append(sim_) e@0: e@0: e@0: p = lambda x, a, b: x**a*exp(-b*x)/b**(a+1)/gamma(a+1) e@0: # e@0: rho = lambda alpha1,beta1,alpha2,beta2: gamma((alpha1+alpha2)/2 + 1)/sqrt(gamma(alpha1+1)*gamma(alpha2+1))*2**((alpha1+alpha2)/2+1)*beta1**((alpha2+1)/2)*beta2**((alpha1+1)/2)/(beta1+beta2)**((alpha1+alpha2)/2+1) e@0: e@0: e@0: P = [0] e@0: for i in range(1, L-1): e@0: P_ = (1-sim2[i-1])*(1-sim[i-1]+1-sim[i])*(1-sim2[i-1]) e@0: P_ = (1-sim2[i-1]) e@0: P.append(P_) e@0: e@0: e@0: P.append(0) e@0: P = array(P) e@0: e@0: M = mean(P) e@0: V1 = mean(abs(P[0:L-3]-P[1:L-2])) e@0: V2 = mean(abs(P[0:L-4]-P[2:L-2])) e@0: # V = (V1+V2)*0.25; e@0: V = 0.25*median(abs((P[0:L-4]-P[2:L-2]))) e@0: e@0: Pd = [P[0],P[1]] e@0: e@0: e@0: for i in range(2,L-2): e@0: m = mean(concatenate((P[i-2:i],P[i+1:i+3]))) e@0: m1 = max(concatenate((P[i-2:i],P[i+1:i+3]))) e@0: e@0: if m<0.1*M: e@0: m1 = 0.1*M e@0: e@0: d = (P[i]-m)/m1 e@0: e@0: if d < 0: e@0: d = 0 e@0: Pd.append(P[i]*d/V) e@0: e@0: Pd.append(P[L-2]) e@0: Pd.append(P[L-1]) e@0: e@0: e@0: # Selection of candidate windows where there is a change e@0: e@0: j = 0 e@0: pos = [] e@0: for i in range(2, L-3): e@0: if Pd[i] > 5 and P[i] > P[i+1] and P[i] > P[i-1]: e@0: j += 1 e@0: pos.append(i) e@0: e@0: if len(pos) == 0: e@0: sys.exit(0) e@0: e@0: e@0: # Computation of Change with 20 ms accuracy e@0: e@0: k2 = 0 e@0: c = 25 e@0: e@0: Sm = zeros((2*c+51,)) e@0: posms = zeros((len(pos),)) e@0: msec = zeros((len(pos),)) e@0: mpos = zeros((len(pos), )) e@0: e@0: for k1 in range(0,len(pos)): e@0: i = 50*(pos[k1] - 1); # ms e@0: for j in range(-c,50+c+1): e@0: if ftv[i+j] != 0 and ftm[i+j] != 0: e@0: b1 = ftv[i+j]/ftm[i+j] e@0: a1 = ftm[i+j]/b1 - 1 e@0: else: e@0: b1 = 1000 e@0: a1 = -1 e@0: e@0: if ftv[i+j+50-1] != 0 and ftm[i+j+50-1] != 0: e@0: b2 = ftv[i+j+50-1]/ftm[i+j+50-1] e@0: a2 = ftm[i+j+50-1]/b2 - 1 e@0: else: e@0: b2 = 1000 e@0: a2 = -1 e@0: e@0: k = (a2+a1+2)/2 e@0: Sm[j+c] = ((2/(b1+b2))**k)*(b1**((a2+1)/2))*(b2**((a1+1)/2))*gamma(k)/(sqrt(gamma(a1+1)*gamma(a2+1))) e@0: e@0: if isnan(Sm[j+c]) or isinf(Sm[j+c]): e@0: Sm[j+c] = 0 e@0: e@0: posms[k1] = argmin(Sm) e@0: x = Sm[posms[k1]] e@0: e@0: h = 1 - Sm e@0: e@0: if mean(h[max(posms[k1] - 25,1):min(2*c+51,posms[k1]+25)]) > 0.1: e@0: msec[k2] = posms[k1]-1-c e@0: mpos[k2] = pos[k1] e@0: k2 = k2+1 e@0: else: e@0: msec[k2] = 0 e@0: mpos[k2] = 0 e@0: e@0: e@0: posms = msec e@0: pos = mpos e@0: e@0: fpos = pos+0.02*posms e@0: e@0: pfpos = floor(50*pos+posms); e@0: e@0: bvoice = 0.14339738781836; e@0: bmusic = 0.04399754659151; e@0: amusic = 1.66349817725076; e@0: avoice = 2.32677887950291; e@0: e@0: pfpos = concatenate((array([0]), pfpos, array([len(kvrms)-1]))) e@0: e@0: L = len(pfpos)-1 e@0: V = zeros((L,2)) e@0: Vg = zeros((L,2)) e@0: e@0: Pzero = [] e@0: Fsp1 = [] e@0: Type = zeros((L,)) e@0: Typeg = zeros((L,)) e@0: e@0: e@0: # Classification for each segment e@0: e@0: noise = zeros((L,)) e@0: noise2 = zeros((L,)) e@0: e@0: pfpos = pfpos.astype(int) e@0: Pmusicg = zeros((L,)) e@0: Pvoiceg = zeros((L,)) e@0: Pmusic = zeros((L,)) e@0: Pvoice = zeros((L,)) e@0: Pspace = zeros((L,)) e@0: zpos = zeros((L,)) e@0: Pmax = zeros((L,)) e@0: Pmaxg = zeros((L,)) e@0: Freq = zeros((L,6)) e@0: e@0: zcrval = array(zcrval) e@0: rmsval = array(rmsval) e@0: rmszcrval = array(rmszcrval) e@0: i e@0: for i in range(0, L): e@0: d = 0 e@0: e@0: x1 = mean(kvrms[int(pfpos[i]):int(pfpos[i+1])+1]) e@0: x = x1 e@0: y = rmszcrval[int(pfpos[i])+d:int(pfpos[i+1])-d+1] e@0: y = y/(2*max(rmsval[pfpos[i]+d:pfpos[i+1]-d+1]) - min(rmsval[pfpos[i]+d:pfpos[i+1]-d+1]) - median(rmsval[pfpos[i]+d:pfpos[i+1]-d+1])) e@0: noise[i] = 50*mean(exp(-y)) e@0: e@0: Pmusic[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1)) e@0: Pvoice[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1)) e@0: e@0: sm = 0.7*median(ftm[pfpos[i]:pfpos[i+1]+1])+0.3*mean(ftm[pfpos[i]:pfpos[i+1]+1]) e@0: e@0: Pspace[i] = 6*exp((-sm**2)/(2*0.6**2)) e@0: e@0: zpos[i] = sum(exp(-10*zcrval[pfpos[i]+d:pfpos[i+1]-d+1]))/float(len(zcrval[pfpos[i]+d:pfpos[i+1]-d+1])) e@0: e@0: if zpos[i] > 0.08 or x > 3: e@0: Pvoice[i] = 10 e@0: else: e@0: Pmusic[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1)) e@0: e@0: V[i,0] = Pmusic[i] e@0: V[i,1] = Pvoice[i] e@0: Type[i] = argmax(V[i,:]) e@0: Pmax[i] = V[i,Type[i]] e@0: e@0: noise2[i] = 50 * mean(exp(-4*y)) e@0: Pmusicg[i] = 0 e@0: e@0: if noise2[i] > 3.5: e@0: Pvoiceg[i] = 10 e@0: else: e@0: Pmusicg[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1)) e@0: Pvoiceg[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1)) e@0: e@0: e@0: Vg[i,0] = Pmusicg[i] e@0: Vg[i,1] = Pvoiceg[i] e@0: e@0: Typeg[i] = argmax(Vg[i,:]) e@0: Pmaxg[i] = Vg[i,Typeg[i]] e@0: e@0: Vk = array(sign(zcrval[pfpos[i]:pfpos[i+1]+1])) e@0: tempV = rmsval[pfpos[i]:pfpos[i+1]+1] e@0: rr = max(tempV)+0.001 e@0: tempV1 = tempV/rr e@0: Vk0 = Vk.copy() e@0: e@0: for j in range(0, len(Vk)): e@0: if (tempV1[j] < 0.1 and tempV[j] < 1.5) or tempV[j] < 1: e@0: Vk[j] = 0 e@0: # print "Changing %d" % j e@0: Vk1 = Vk.copy() e@0: e@0: for j in range(0, len(Vk)): e@0: if tempV1[j] < 0.4 or tempV[j] < 2 or tempV[j] < mean(tempV): e@0: # print "Changing %d" % j e@0: e@0: Vk1[j] = 0 e@0: e@0: Zca = Vk1*(zcrval[pfpos[i]:pfpos[i+1]+1]) e@0: Pol = (ones((len(Vk),)) - sign(Vk))*Vk0 e@0: e@0: VZC = Pol*zcrval[pfpos[i]:pfpos[i+1]+1] e@0: dVk = abs(Vk[2:len(Vk)] - Vk[1:len(Vk)-1]) e@0: e@0: Freq[i, 0] = 50*sum(dVk)/(2*len(Vk)) e@0: Freq[i, 1] = len(Vk)/50 e@0: Freq[i, 2] = sum(Freq[:,2]) e@0: Freq[i, 3] = zpos[i] e@0: Freq[i, 4] = noise2[i] e@0: Freq[i, 5] = max(Zca) e@0: e@0: Pmusicg[i] = 0 e@0: Pvoiceg[i] = 0 e@0: e@0: if Freq[i,0] < 0.59 and Freq[i,1] > 2.5: e@0: Pmusicg[i] = 10 e@0: else: e@0: Pmusicg[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1)) e@0: Pvoiceg[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1)) e@0: e@0: if x > 3: e@0: Pvoiceg[i] = Pvoiceg[i] + 0.1 e@0: e@0: if x > 300: e@0: Pmusicg[i] = 9 e@0: e@0: e@0: if noise2[i] > 3.5: e@0: Pvoiceg[i] = 11 e@0: e@0: if max(Zca) > 280: e@0: Pmusicg[i] = 11 e@0: e@0: e@0: if Freq[i,0] > 4.62 and Freq[i,1] > 2.5 and (noise2[i] > 0.1 or zpos[i] > 0.05 ): e@0: Pvoiceg[i] = 12 e@0: e@0: e@0: if zpos[i] > 0.15: e@0: Pvoiceg[i] = 13 e@0: e@0: e@0: Vg[i,0] = Pmusicg[i] e@0: Vg[i,1] = Pvoiceg[i] e@0: e@0: Typeg[i] = argmax(Vg[i,:]) e@0: Pmaxg[i] = Vg[i, Typeg[i]] e@0: e@0: Fsp1.extend(Vk) e@0: e@0: if (Pspace[i] > 4 and noise2[i] < 1) or (Pspace[i] > 4.5): e@0: Type[i] = 2 e@0: Typeg[i] = 2 e@0: e@0: e@0: if (pfpos[i+1] - pfpos[i])/50 < 0.5 and i >= 1: e@0: Type[i] = Type[i-1] e@0: e@0: e@0: e@0: maxpos = pfpos[-1] e@0: e@0: tt = zeros((maxpos,)) e@0: pt = zeros((maxpos,)) e@0: pm = zeros((maxpos,)) e@0: pv = zeros((maxpos,)) e@0: ps = zeros((maxpos,)) e@0: e@0: for i in range(0, L): e@0: tt[pfpos[i]:pfpos[i+1]] = Typeg[i] e@0: pt[pfpos[i]:pfpos[i+1]] = Pmaxg[i] e@0: pm[pfpos[i]:pfpos[i+1]] = Pmusicg[i] e@0: pv[pfpos[i]:pfpos[i+1]] = Pvoiceg[i] e@0: ps[pfpos[i]:pfpos[i+1]] = Pspace[i] e@0: e@0: e@0: e@0: e@0: e@0: e@0: for k in range(0, len(rmsval)): e@0: if rmsval[k] < 2 or rmsval[k] < 0.1*max(rmsval): e@0: zcrval[k] = 0 e@0: e@0: e@0: classes = [] e@0: positions = [] e@0: e@0: class_ = 5 e@0: e@0: for j in range(0, len(tt)): e@0: if class_ != tt[j]: e@0: class_ = tt[j] e@0: classes.append(int(class_)) e@0: positions.append(int(j*0.02*SR)) e@0: e@0: e@0: e@0: return positions, classes e@0: e@0: e@0: e@0: if __name__=="__main__": e@0: if len(argv) != 3: e@0: print("Incorrect number of arguments:") e@0: print("Usage: ") e@0: print("%s ") e@0: print("") e@0: print("Arguments:") e@0: print("\tThe input filename. Can be .wav, .mp3, etc...") e@0: sys.exit(-1) e@0: else: e@0: print("[II] Panagiotakis and Tziritas RMS/ZC Speech/Music/Silence Discriminator") e@0: print("[II] Loading libraries") e@0: e@0: import essentia e@0: from essentia import Pool e@0: from essentia.standard import * e@0: import csv e@0: import yaml e@0: e@0: # reqyures matplotlib e@0: from pylab import * e@0: e@0: #requires numpy e@0: from numpy import * e@0: e@0: #requires scikit-learn e@0: from sklearn.metrics import pairwise_distances e@0: e@0: d = {} e@0: v = {} e@0: e@0: fname = argv[1] e@0: outfname = argv[2] e@0: e@0: e@0: e@0: trackname = fname.split('.')[0].split('/')[-1] e@0: e@0: e@0: if outfname.partition('.')[-1].lower() not in ['json', 'yaml']: e@0: print("Please choose a .json or .yaml as an output file.") e@0: sys.exit(-1) e@0: else: e@0: if outfname.partition('.')[-1].lower() == 'json': e@0: output = YamlOutput(filename = outfname, format='json') e@0: else: e@0: output = YamlOutput(filename = outfname, format='yaml') e@0: e@0: print("Feature extraction of `%s\'" % fname) e@0: e@0: # Sampling Rate e@0: SR = 21000.0 e@0: e@0: # Sampling Frequency e@0: T = 1.0/SR e@0: e@0: # SegmentSize e@0: tsegmentSize = 1000 #ms e@0: segmentSize = int(ceil(tsegmentSize*SR/1000)) if mod(ceil(tsegmentSize*SR/1000),2) == 0 \ e@0: else int(floor(tsegmentSize*SR/1000)) e@0: e@0: e@0: # FrameSize e@0: tframeSize = 20 #ms e@0: frameSize = int(ceil(tframeSize*SR/1000)) if mod(ceil(tframeSize*SR/1000),2) == 0 \ e@0: else int(floor(tframeSize*SR/1000)) e@0: e@0: # HopSize e@0: hopSize = frameSize e@0: e@0: # Load Audio e@0: audio = MonoLoader(filename = fname, sampleRate=SR)() e@0: e@0: e@0: e@0: e@0: e@0: # # Zero Crossing Rate e@0: # zcr = ZeroCrossingRate() e@0: # e@0: # # RMS e@0: # rms = RMS() e@0: # e@0: # # Segmentation based on 1 second segmentation e@0: # print("[II] Calculating features for %s, please wait..." % fname) e@0: # e@0: # rmsval = [] e@0: # zcrval = [] e@0: # rmszcrval = [] e@0: # meanrms = [] e@0: # varrms = [] e@0: # meanzcr = [] e@0: # varzcr = [] e@0: # histograms = [] e@0: # e@0: # e@0: # e@0: # # Parameters for the chi square distribution e@0: # alphas = [] e@0: # betas = [] e@0: # L = int(len(audio)/SR) e@0: # for n in range(0, L): e@0: # segment = audio[n*SR:(n+1)*SR] e@0: # framerms = [] e@0: # framezcr = [] e@0: # # for frame in FrameGenerator(segment, frameSize = frameSize, hopSize = frameSize): e@0: # for k in range(0, 50): e@0: # frame = segment[0.02*SR*k:0.02*SR*(k+1)] e@0: # e@0: # rmsv = sqrt(sum(frame**2)) e@0: # framerms.append(rmsv) e@0: # f1 = frame[0:-1] e@0: # f2 = frame[1:] e@0: # mm1 = f1*f2 e@0: # zc = 0.5*sum((abs(sign(mm1))-sign(mm1))) e@0: # e@0: # rmsval.append(rmsv) e@0: # zcrval.append(zc) e@0: # rmszcrval.append(rmsv*zc) e@0: # framezcr.append(zc) e@0: # e@0: # meanrms.append(mean(framerms)) e@0: # meanzcr.append(mean(framezcr)) e@0: # e@0: # mu = mean(framerms) e@0: # sigmasq = var(framerms, ddof=1) e@0: # e@0: # beta = sigmasq/mu e@0: # alpha = (mu/beta) - 1 e@0: # e@0: # if mu != 0 and sigmasq != 0: e@0: # betas.append(beta) e@0: # alphas.append(alpha) e@0: # else: e@0: # betas.append(1000000) e@0: # alphas.append(-1) e@0: # e@0: # histograms.append(histogram(framerms, 256)) e@0: # e@0: # varrms.append(var(framerms, ddof=1)) e@0: # varzcr.append(var(framerms, ddof=1)) e@0: # e@0: # e@0: # # Computation of KVRMS e@0: # e@0: # l = len(rmsval) e@0: # ftm = [mean(rmsval[0:50])] e@0: # ftv = [var(rmsval[0:50], ddof=1)] e@0: # kvrms = [0] e@0: # e@0: # for i in range(1,l-50): e@0: # ftm_ = ftm[i-1]+(rmsval[i+49] - rmsval[i-1])/50 e@0: # ftv_ = ftv[i-1]+ftm[i-1]**2 + ((rmsval[i+49]**2-rmsval[i-1]**2)/50) - ftm_**2 e@0: # ftm.append(ftm_) e@0: # ftv.append(ftv_) e@0: # e@0: # if ftm_ != 0: e@0: # kvrms.append(ftv_/ftm_**2) e@0: # else: e@0: # kvrms.append(0) e@0: # e@0: # e@0: # m = mean(kvrms) e@0: # v = var(kvrms, ddof=1) e@0: # e@0: # b = v/m e@0: # a = m/b - 1 e@0: # e@0: # # Computing Similarity e@0: # e@0: # sim = [] e@0: # from scipy.special import gamma e@0: # e@0: # for i in range(1,L): e@0: # k = (alphas[i]+alphas[i-1]+2)/2 e@0: # sim_ = ((2/(betas[i-1]+betas[i]))**k)*(betas[i-1]**((alphas[i]+1)/2))*(betas[i]**((alphas[i-1]+1)/2))*gamma(k)/(sqrt(gamma(alphas[i-1]+1)*gamma(alphas[i]+1))) e@0: # e@0: # # 0 If overflow e@0: # if isinf(sim_) or isnan(sim_): e@0: # sim_ = 0 e@0: # sim.append(sim_) e@0: # e@0: # sim2 = [] e@0: # e@0: # for i in range(2,L): e@0: # k = (alphas[i]+alphas[i-2]+2)/2 e@0: # sim_ = ((2/(betas[i-2]+betas[i]))**k)*(betas[i-2]**((alphas[i]+1)/2))*(betas[i]**((alphas[i-2]+1)/2))*gamma(k)/(sqrt(gamma(alphas[i-2]+1)*gamma(alphas[i]+1))) e@0: # e@0: # if isinf(sim_) or isnan(sim_): e@0: # sim_ = 0 e@0: # sim2.append(sim_) e@0: # e@0: # e@0: # p = lambda x, a, b: x**a*exp(-b*x)/b**(a+1)/gamma(a+1) e@0: ## e@0: # rho = lambda alpha1,beta1,alpha2,beta2: gamma((alpha1+alpha2)/2 + 1)/sqrt(gamma(alpha1+1)*gamma(alpha2+1))*2**((alpha1+alpha2)/2+1)*beta1**((alpha2+1)/2)*beta2**((alpha1+1)/2)/(beta1+beta2)**((alpha1+alpha2)/2+1) e@0: # e@0: # e@0: # P = [0] e@0: # for i in range(1, L-1): e@0: # P_ = (1-sim2[i-1])*(1-sim[i-1]+1-sim[i])*(1-sim2[i-1]) e@0: # P_ = (1-sim2[i-1]) e@0: # P.append(P_) e@0: # e@0: # e@0: # P.append(0) e@0: # P = array(P) e@0: # e@0: # M = mean(P) e@0: # V1 = mean(abs(P[0:L-3]-P[1:L-2])) e@0: # V2 = mean(abs(P[0:L-4]-P[2:L-2])) e@0: ## V = (V1+V2)*0.25; e@0: # V = 0.25*median(abs((P[0:L-4]-P[2:L-2]))) e@0: # e@0: # Pd = [P[0],P[1]] e@0: # e@0: # e@0: # for i in range(2,L-2): e@0: # m = mean(concatenate((P[i-2:i],P[i+1:i+3]))) e@0: # m1 = max(concatenate((P[i-2:i],P[i+1:i+3]))) e@0: # e@0: # if m<0.1*M: e@0: # m1 = 0.1*M e@0: # e@0: # d = (P[i]-m)/m1 e@0: # e@0: # if d < 0: e@0: # d = 0 e@0: # Pd.append(P[i]*d/V) e@0: # e@0: # Pd.append(P[L-2]) e@0: # Pd.append(P[L-1]) e@0: # e@0: # e@0: # # Selection of candidate windows where there is a change e@0: # e@0: # j = 0 e@0: # pos = [] e@0: # for i in range(2, L-3): e@0: # if Pd[i] > 5 and P[i] > P[i+1] and P[i] > P[i-1]: e@0: # j += 1 e@0: # pos.append(i) e@0: # e@0: # if len(pos) == 0: e@0: # sys.exit(0) e@0: # e@0: # e@0: # # Computation of Change with 20 ms accuracy e@0: # e@0: # k2 = 0 e@0: # c = 25 e@0: # e@0: # Sm = zeros((2*c+51,)) e@0: # posms = zeros((len(pos),)) e@0: # msec = zeros((len(pos),)) e@0: # mpos = zeros((len(pos), )) e@0: # e@0: # for k1 in range(0,len(pos)): e@0: # i = 50*(pos[k1] - 1); # ms e@0: # for j in range(-c,50+c+1): e@0: # if ftv[i+j] != 0 and ftm[i+j] != 0: e@0: # b1 = ftv[i+j]/ftm[i+j] e@0: # a1 = ftm[i+j]/b1 - 1 e@0: # else: e@0: # b1 = 1000 e@0: # a1 = -1 e@0: # e@0: # if ftv[i+j+50-1] != 0 and ftm[i+j+50-1] != 0: e@0: # b2 = ftv[i+j+50-1]/ftm[i+j+50-1] e@0: # a2 = ftm[i+j+50-1]/b2 - 1 e@0: # else: e@0: # b2 = 1000 e@0: # a2 = -1 e@0: # e@0: # k = (a2+a1+2)/2 e@0: # Sm[j+c] = ((2/(b1+b2))**k)*(b1**((a2+1)/2))*(b2**((a1+1)/2))*gamma(k)/(sqrt(gamma(a1+1)*gamma(a2+1))) e@0: # e@0: # if isnan(Sm[j+c]) or isinf(Sm[j+c]): e@0: # Sm[j+c] = 0 e@0: # e@0: # posms[k1] = argmin(Sm) e@0: # x = Sm[posms[k1]] e@0: # e@0: # h = 1 - Sm e@0: # e@0: # if mean(h[max(posms[k1] - 25,1):min(2*c+51,posms[k1]+25)]) > 0.1: e@0: # msec[k2] = posms[k1]-1-c e@0: # mpos[k2] = pos[k1] e@0: # k2 = k2+1 e@0: # else: e@0: # msec[k2] = 0 e@0: # mpos[k2] = 0 e@0: # e@0: # e@0: # posms = msec e@0: # pos = mpos e@0: # e@0: # fpos = pos+0.02*posms e@0: # e@0: # pfpos = floor(50*pos+posms); e@0: # e@0: # bvoice = 0.14339738781836; e@0: # bmusic = 0.04399754659151; e@0: # amusic = 1.66349817725076; e@0: # avoice = 2.32677887950291; e@0: # e@0: # pfpos = concatenate((array([0]), pfpos, array([len(kvrms)-1]))) e@0: # e@0: # L = len(pfpos)-1 e@0: # V = zeros((L,2)) e@0: # Vg = zeros((L,2)) e@0: # e@0: # Pzero = [] e@0: # Fsp1 = [] e@0: # Type = zeros((L,)) e@0: # Typeg = zeros((L,)) e@0: # e@0: # e@0: # # Classification for each segment e@0: # e@0: # noise = zeros((L,)) e@0: # noise2 = zeros((L,)) e@0: # e@0: # pfpos = pfpos.astype(int) e@0: # Pmusicg = zeros((L,)) e@0: # Pvoiceg = zeros((L,)) e@0: # Pmusic = zeros((L,)) e@0: # Pvoice = zeros((L,)) e@0: # Pspace = zeros((L,)) e@0: # zpos = zeros((L,)) e@0: # Pmax = zeros((L,)) e@0: # Pmaxg = zeros((L,)) e@0: # Freq = zeros((L,6)) e@0: # e@0: # zcrval = array(zcrval) e@0: # rmsval = array(rmsval) e@0: # rmszcrval = array(rmszcrval) e@0: # i e@0: # for i in range(0, L): e@0: # d = 0 e@0: # e@0: # x1 = mean(kvrms[int(pfpos[i]):int(pfpos[i+1])+1]) e@0: # x = x1 e@0: # y = rmszcrval[int(pfpos[i])+d:int(pfpos[i+1])-d+1] e@0: # y = y/(2*max(rmsval[pfpos[i]+d:pfpos[i+1]-d+1]) - min(rmsval[pfpos[i]+d:pfpos[i+1]-d+1]) - median(rmsval[pfpos[i]+d:pfpos[i+1]-d+1])) e@0: # noise[i] = 50*mean(exp(-y)) e@0: # e@0: # Pmusic[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1)) e@0: # Pvoice[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1)) e@0: # e@0: # sm = 0.7*median(ftm[pfpos[i]:pfpos[i+1]+1])+0.3*mean(ftm[pfpos[i]:pfpos[i+1]+1]) e@0: # e@0: # Pspace[i] = 6*exp((-sm**2)/(2*0.6**2)) e@0: # e@0: # zpos[i] = sum(exp(-10*zcrval[pfpos[i]+d:pfpos[i+1]-d+1]))/float(len(zcrval[pfpos[i]+d:pfpos[i+1]-d+1])) e@0: # e@0: # if zpos[i] > 0.08 or x > 3: e@0: # Pvoice[i] = 10 e@0: # else: e@0: # Pmusic[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1)) e@0: # e@0: # V[i,0] = Pmusic[i] e@0: # V[i,1] = Pvoice[i] e@0: # Type[i] = argmax(V[i,:]) e@0: # Pmax[i] = V[i,Type[i]] e@0: # e@0: # noise2[i] = 50 * mean(exp(-4*y)) e@0: # Pmusicg[i] = 0 e@0: # e@0: # if noise2[i] > 3.5: e@0: # Pvoiceg[i] = 10 e@0: # else: e@0: # Pmusicg[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1)) e@0: # Pvoiceg[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1)) e@0: # e@0: # e@0: # Vg[i,0] = Pmusicg[i] e@0: # Vg[i,1] = Pvoiceg[i] e@0: # e@0: # Typeg[i] = argmax(Vg[i,:]) e@0: # Pmaxg[i] = Vg[i,Typeg[i]] e@0: # e@0: # Vk = array(sign(zcrval[pfpos[i]:pfpos[i+1]+1])) e@0: # tempV = rmsval[pfpos[i]:pfpos[i+1]+1] e@0: # rr = max(tempV)+0.001 e@0: # tempV1 = tempV/rr e@0: # Vk0 = Vk.copy() e@0: # e@0: # for j in range(0, len(Vk)): e@0: # if (tempV1[j] < 0.1 and tempV[j] < 1.5) or tempV[j] < 1: e@0: # Vk[j] = 0 e@0: # # print "Changing %d" % j e@0: # Vk1 = Vk.copy() e@0: # e@0: # for j in range(0, len(Vk)): e@0: # if tempV1[j] < 0.4 or tempV[j] < 2 or tempV[j] < mean(tempV): e@0: # # print "Changing %d" % j e@0: # e@0: # Vk1[j] = 0 e@0: # e@0: # Zca = Vk1*(zcrval[pfpos[i]:pfpos[i+1]+1]) e@0: # Pol = (ones((len(Vk),)) - sign(Vk))*Vk0 e@0: # e@0: # VZC = Pol*zcrval[pfpos[i]:pfpos[i+1]+1] e@0: # dVk = abs(Vk[2:len(Vk)] - Vk[1:len(Vk)-1]) e@0: # e@0: # Freq[i, 0] = 50*sum(dVk)/(2*len(Vk)) e@0: # Freq[i, 1] = len(Vk)/50 e@0: # Freq[i, 2] = sum(Freq[:,2]) e@0: # Freq[i, 3] = zpos[i] e@0: # Freq[i, 4] = noise2[i] e@0: # Freq[i, 5] = max(Zca) e@0: # e@0: # Pmusicg[i] = 0 e@0: # Pvoiceg[i] = 0 e@0: # e@0: # if Freq[i,0] < 0.59 and Freq[i,1] > 2.5: e@0: # Pmusicg[i] = 10 e@0: # else: e@0: # Pmusicg[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1)) e@0: # Pvoiceg[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1)) e@0: # e@0: # if x > 3: e@0: # Pvoiceg[i] = Pvoiceg[i] + 0.1 e@0: # e@0: # if x > 300: e@0: # Pmusicg[i] = 9 e@0: # e@0: # e@0: # if noise2[i] > 3.5: e@0: # Pvoiceg[i] = 11 e@0: # e@0: # if max(Zca) > 280: e@0: # Pmusicg[i] = 11 e@0: # e@0: # e@0: # if Freq[i,0] > 4.62 and Freq[i,1] > 2.5 and (noise2[i] > 0.1 or zpos[i] > 0.05 ): e@0: # Pvoiceg[i] = 12 e@0: # e@0: # e@0: # if zpos[i] > 0.15: e@0: # Pvoiceg[i] = 13 e@0: # e@0: # e@0: # Vg[i,0] = Pmusicg[i] e@0: # Vg[i,1] = Pvoiceg[i] e@0: # e@0: # Typeg[i] = argmax(Vg[i,:]) e@0: # Pmaxg[i] = Vg[i, Typeg[i]] e@0: # e@0: # Fsp1.extend(Vk) e@0: # e@0: # if (Pspace[i] > 4 and noise2[i] < 1) or (Pspace[i] > 4.5): e@0: # Type[i] = 2 e@0: # Typeg[i] = 2 e@0: # e@0: # e@0: # if (pfpos[i+1] - pfpos[i])/50 < 0.5 and i >= 1: e@0: # Type[i] = Type[i-1] e@0: # e@0: # e@0: # e@0: # maxpos = pfpos[-1] e@0: # e@0: # tt = zeros((maxpos,)) e@0: # pt = zeros((maxpos,)) e@0: # pm = zeros((maxpos,)) e@0: # pv = zeros((maxpos,)) e@0: # ps = zeros((maxpos,)) e@0: # e@0: # for i in range(0, L): e@0: # tt[pfpos[i]:pfpos[i+1]] = Typeg[i] e@0: # pt[pfpos[i]:pfpos[i+1]] = Pmaxg[i] e@0: # pm[pfpos[i]:pfpos[i+1]] = Pmusicg[i] e@0: # pv[pfpos[i]:pfpos[i+1]] = Pvoiceg[i] e@0: # ps[pfpos[i]:pfpos[i+1]] = Pspace[i] e@0: # e@0: # e@0: # e@0: # e@0: # e@0: # e@0: # for k in range(0, len(rmsval)): e@0: # if rmsval[k] < 2 or rmsval[k] < 0.1*max(rmsval): e@0: # zcrval[k] = 0 e@0: # e@0: # e@0: # classes = [] e@0: # positions = [] e@0: # e@0: # class_ = 5 e@0: # e@0: # for j in range(0, len(tt)): e@0: # if class_ != tt[j]: e@0: # class_ = tt[j] e@0: # classes.append(int(class_)) e@0: # positions.append(int(j*0.02*SR)) e@0: # e@0: # classvec = zeros((len(audio),)) e@0: # e@0: # if len(positions) == 1: e@0: # classvec = classes[0]*ones((len(audio),)) e@0: # for j in range(1, len(positions)): e@0: # classvec[positions[j-1]:positions[j]] = classes[j-1] e@0: # e@0: # e@0: # e@0: # e@0: # e@0: # e@0: # e@0: # e@0: # e@0: # e@0: # e@0: e@0: # e@0: e@0: e@0: positions, classes = trmszc(audio, SR) e@0: e@0: classvec = zeros((len(audio),)) e@0: e@0: if len(positions) == 1: e@0: classvec = classes[0]*ones((len(audio),)) e@0: for j in range(1, len(positions)): e@0: classvec[positions[j-1]:positions[j]] = classes[j-1] e@0: e@0: plot(audio) e@0: plot(classvec) e@0: