Mercurial > hg > chourdakisreiss2016
diff experiment-reverb/code/panagiotakis.py @ 0:246d5546657c
initial commit, needs cleanup
author | Emmanouil Theofanis Chourdakis <e.t.chourdakis@qmul.ac.uk> |
---|---|
date | Wed, 14 Dec 2016 13:15:48 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/experiment-reverb/code/panagiotakis.py Wed Dec 14 13:15:48 2016 +0000 @@ -0,0 +1,965 @@ +# -*- coding: utf-8 -*- +""" +Created on Mon Jun 1 17:17:34 2015 + +@author: mmxgn +""" + + +from sys import argv + + + +def trmszc(audio, SR=21000): + # Segmentation based on 1 second segmentation + print("[II] Calculating features for %s, please wait..." % fname) + + rmsval = [] + zcrval = [] + rmszcrval = [] + meanrms = [] + varrms = [] + meanzcr = [] + varzcr = [] + histograms = [] + + + + # Parameters for the chi square distribution + alphas = [] + betas = [] + L = int(len(audio)/SR) + for n in range(0, L): + segment = audio[n*SR:(n+1)*SR] + framerms = [] + framezcr = [] + # for frame in FrameGenerator(segment, frameSize = frameSize, hopSize = frameSize): + for k in range(0, 50): + frame = segment[0.02*SR*k:0.02*SR*(k+1)] + + rmsv = sqrt(sum(frame**2)) + framerms.append(rmsv) + f1 = frame[0:-1] + f2 = frame[1:] + mm1 = f1*f2 + zc = 0.5*sum((abs(sign(mm1))-sign(mm1))) + + rmsval.append(rmsv) + zcrval.append(zc) + rmszcrval.append(rmsv*zc) + framezcr.append(zc) + + meanrms.append(mean(framerms)) + meanzcr.append(mean(framezcr)) + + mu = mean(framerms) + sigmasq = var(framerms, ddof=1) + + beta = sigmasq/mu + alpha = (mu/beta) - 1 + + if mu != 0 and sigmasq != 0: + betas.append(beta) + alphas.append(alpha) + else: + betas.append(1000000) + alphas.append(-1) + + histograms.append(histogram(framerms, 256)) + + varrms.append(var(framerms, ddof=1)) + varzcr.append(var(framerms, ddof=1)) + + + # Computation of KVRMS + + l = len(rmsval) + ftm = [mean(rmsval[0:50])] + ftv = [var(rmsval[0:50], ddof=1)] + kvrms = [0] + + for i in range(1,l-50): + ftm_ = ftm[i-1]+(rmsval[i+49] - rmsval[i-1])/50 + ftv_ = ftv[i-1]+ftm[i-1]**2 + ((rmsval[i+49]**2-rmsval[i-1]**2)/50) - ftm_**2 + ftm.append(ftm_) + ftv.append(ftv_) + + if ftm_ != 0: + kvrms.append(ftv_/ftm_**2) + else: + kvrms.append(0) + + + m = mean(kvrms) + v = var(kvrms, ddof=1) + + b = v/m + a = m/b - 1 + + # Computing Similarity + + sim = [] + from scipy.special import gamma + + for i in range(1,L): + k = (alphas[i]+alphas[i-1]+2)/2 + sim_ = ((2/(betas[i-1]+betas[i]))**k)*(betas[i-1]**((alphas[i]+1)/2))*(betas[i]**((alphas[i-1]+1)/2))*gamma(k)/(sqrt(gamma(alphas[i-1]+1)*gamma(alphas[i]+1))) + + # 0 If overflow + if isinf(sim_) or isnan(sim_): + sim_ = 0 + sim.append(sim_) + + sim2 = [] + + for i in range(2,L): + k = (alphas[i]+alphas[i-2]+2)/2 + sim_ = ((2/(betas[i-2]+betas[i]))**k)*(betas[i-2]**((alphas[i]+1)/2))*(betas[i]**((alphas[i-2]+1)/2))*gamma(k)/(sqrt(gamma(alphas[i-2]+1)*gamma(alphas[i]+1))) + + if isinf(sim_) or isnan(sim_): + sim_ = 0 + sim2.append(sim_) + + + p = lambda x, a, b: x**a*exp(-b*x)/b**(a+1)/gamma(a+1) +# + rho = lambda alpha1,beta1,alpha2,beta2: gamma((alpha1+alpha2)/2 + 1)/sqrt(gamma(alpha1+1)*gamma(alpha2+1))*2**((alpha1+alpha2)/2+1)*beta1**((alpha2+1)/2)*beta2**((alpha1+1)/2)/(beta1+beta2)**((alpha1+alpha2)/2+1) + + + P = [0] + for i in range(1, L-1): + P_ = (1-sim2[i-1])*(1-sim[i-1]+1-sim[i])*(1-sim2[i-1]) + P_ = (1-sim2[i-1]) + P.append(P_) + + + P.append(0) + P = array(P) + + M = mean(P) + V1 = mean(abs(P[0:L-3]-P[1:L-2])) + V2 = mean(abs(P[0:L-4]-P[2:L-2])) +# V = (V1+V2)*0.25; + V = 0.25*median(abs((P[0:L-4]-P[2:L-2]))) + + Pd = [P[0],P[1]] + + + for i in range(2,L-2): + m = mean(concatenate((P[i-2:i],P[i+1:i+3]))) + m1 = max(concatenate((P[i-2:i],P[i+1:i+3]))) + + if m<0.1*M: + m1 = 0.1*M + + d = (P[i]-m)/m1 + + if d < 0: + d = 0 + Pd.append(P[i]*d/V) + + Pd.append(P[L-2]) + Pd.append(P[L-1]) + + + # Selection of candidate windows where there is a change + + j = 0 + pos = [] + for i in range(2, L-3): + if Pd[i] > 5 and P[i] > P[i+1] and P[i] > P[i-1]: + j += 1 + pos.append(i) + + if len(pos) == 0: + sys.exit(0) + + + # Computation of Change with 20 ms accuracy + + k2 = 0 + c = 25 + + Sm = zeros((2*c+51,)) + posms = zeros((len(pos),)) + msec = zeros((len(pos),)) + mpos = zeros((len(pos), )) + + for k1 in range(0,len(pos)): + i = 50*(pos[k1] - 1); # ms + for j in range(-c,50+c+1): + if ftv[i+j] != 0 and ftm[i+j] != 0: + b1 = ftv[i+j]/ftm[i+j] + a1 = ftm[i+j]/b1 - 1 + else: + b1 = 1000 + a1 = -1 + + if ftv[i+j+50-1] != 0 and ftm[i+j+50-1] != 0: + b2 = ftv[i+j+50-1]/ftm[i+j+50-1] + a2 = ftm[i+j+50-1]/b2 - 1 + else: + b2 = 1000 + a2 = -1 + + k = (a2+a1+2)/2 + Sm[j+c] = ((2/(b1+b2))**k)*(b1**((a2+1)/2))*(b2**((a1+1)/2))*gamma(k)/(sqrt(gamma(a1+1)*gamma(a2+1))) + + if isnan(Sm[j+c]) or isinf(Sm[j+c]): + Sm[j+c] = 0 + + posms[k1] = argmin(Sm) + x = Sm[posms[k1]] + + h = 1 - Sm + + if mean(h[max(posms[k1] - 25,1):min(2*c+51,posms[k1]+25)]) > 0.1: + msec[k2] = posms[k1]-1-c + mpos[k2] = pos[k1] + k2 = k2+1 + else: + msec[k2] = 0 + mpos[k2] = 0 + + + posms = msec + pos = mpos + + fpos = pos+0.02*posms + + pfpos = floor(50*pos+posms); + + bvoice = 0.14339738781836; + bmusic = 0.04399754659151; + amusic = 1.66349817725076; + avoice = 2.32677887950291; + + pfpos = concatenate((array([0]), pfpos, array([len(kvrms)-1]))) + + L = len(pfpos)-1 + V = zeros((L,2)) + Vg = zeros((L,2)) + + Pzero = [] + Fsp1 = [] + Type = zeros((L,)) + Typeg = zeros((L,)) + + + # Classification for each segment + + noise = zeros((L,)) + noise2 = zeros((L,)) + + pfpos = pfpos.astype(int) + Pmusicg = zeros((L,)) + Pvoiceg = zeros((L,)) + Pmusic = zeros((L,)) + Pvoice = zeros((L,)) + Pspace = zeros((L,)) + zpos = zeros((L,)) + Pmax = zeros((L,)) + Pmaxg = zeros((L,)) + Freq = zeros((L,6)) + + zcrval = array(zcrval) + rmsval = array(rmsval) + rmszcrval = array(rmszcrval) + i + for i in range(0, L): + d = 0 + + x1 = mean(kvrms[int(pfpos[i]):int(pfpos[i+1])+1]) + x = x1 + y = rmszcrval[int(pfpos[i])+d:int(pfpos[i+1])-d+1] + y = y/(2*max(rmsval[pfpos[i]+d:pfpos[i+1]-d+1]) - min(rmsval[pfpos[i]+d:pfpos[i+1]-d+1]) - median(rmsval[pfpos[i]+d:pfpos[i+1]-d+1])) + noise[i] = 50*mean(exp(-y)) + + Pmusic[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1)) + Pvoice[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1)) + + sm = 0.7*median(ftm[pfpos[i]:pfpos[i+1]+1])+0.3*mean(ftm[pfpos[i]:pfpos[i+1]+1]) + + Pspace[i] = 6*exp((-sm**2)/(2*0.6**2)) + + zpos[i] = sum(exp(-10*zcrval[pfpos[i]+d:pfpos[i+1]-d+1]))/float(len(zcrval[pfpos[i]+d:pfpos[i+1]-d+1])) + + if zpos[i] > 0.08 or x > 3: + Pvoice[i] = 10 + else: + Pmusic[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1)) + + V[i,0] = Pmusic[i] + V[i,1] = Pvoice[i] + Type[i] = argmax(V[i,:]) + Pmax[i] = V[i,Type[i]] + + noise2[i] = 50 * mean(exp(-4*y)) + Pmusicg[i] = 0 + + if noise2[i] > 3.5: + Pvoiceg[i] = 10 + else: + Pmusicg[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1)) + Pvoiceg[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1)) + + + Vg[i,0] = Pmusicg[i] + Vg[i,1] = Pvoiceg[i] + + Typeg[i] = argmax(Vg[i,:]) + Pmaxg[i] = Vg[i,Typeg[i]] + + Vk = array(sign(zcrval[pfpos[i]:pfpos[i+1]+1])) + tempV = rmsval[pfpos[i]:pfpos[i+1]+1] + rr = max(tempV)+0.001 + tempV1 = tempV/rr + Vk0 = Vk.copy() + + for j in range(0, len(Vk)): + if (tempV1[j] < 0.1 and tempV[j] < 1.5) or tempV[j] < 1: + Vk[j] = 0 + # print "Changing %d" % j + Vk1 = Vk.copy() + + for j in range(0, len(Vk)): + if tempV1[j] < 0.4 or tempV[j] < 2 or tempV[j] < mean(tempV): + # print "Changing %d" % j + + Vk1[j] = 0 + + Zca = Vk1*(zcrval[pfpos[i]:pfpos[i+1]+1]) + Pol = (ones((len(Vk),)) - sign(Vk))*Vk0 + + VZC = Pol*zcrval[pfpos[i]:pfpos[i+1]+1] + dVk = abs(Vk[2:len(Vk)] - Vk[1:len(Vk)-1]) + + Freq[i, 0] = 50*sum(dVk)/(2*len(Vk)) + Freq[i, 1] = len(Vk)/50 + Freq[i, 2] = sum(Freq[:,2]) + Freq[i, 3] = zpos[i] + Freq[i, 4] = noise2[i] + Freq[i, 5] = max(Zca) + + Pmusicg[i] = 0 + Pvoiceg[i] = 0 + + if Freq[i,0] < 0.59 and Freq[i,1] > 2.5: + Pmusicg[i] = 10 + else: + Pmusicg[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1)) + Pvoiceg[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1)) + + if x > 3: + Pvoiceg[i] = Pvoiceg[i] + 0.1 + + if x > 300: + Pmusicg[i] = 9 + + + if noise2[i] > 3.5: + Pvoiceg[i] = 11 + + if max(Zca) > 280: + Pmusicg[i] = 11 + + + if Freq[i,0] > 4.62 and Freq[i,1] > 2.5 and (noise2[i] > 0.1 or zpos[i] > 0.05 ): + Pvoiceg[i] = 12 + + + if zpos[i] > 0.15: + Pvoiceg[i] = 13 + + + Vg[i,0] = Pmusicg[i] + Vg[i,1] = Pvoiceg[i] + + Typeg[i] = argmax(Vg[i,:]) + Pmaxg[i] = Vg[i, Typeg[i]] + + Fsp1.extend(Vk) + + if (Pspace[i] > 4 and noise2[i] < 1) or (Pspace[i] > 4.5): + Type[i] = 2 + Typeg[i] = 2 + + + if (pfpos[i+1] - pfpos[i])/50 < 0.5 and i >= 1: + Type[i] = Type[i-1] + + + + maxpos = pfpos[-1] + + tt = zeros((maxpos,)) + pt = zeros((maxpos,)) + pm = zeros((maxpos,)) + pv = zeros((maxpos,)) + ps = zeros((maxpos,)) + + for i in range(0, L): + tt[pfpos[i]:pfpos[i+1]] = Typeg[i] + pt[pfpos[i]:pfpos[i+1]] = Pmaxg[i] + pm[pfpos[i]:pfpos[i+1]] = Pmusicg[i] + pv[pfpos[i]:pfpos[i+1]] = Pvoiceg[i] + ps[pfpos[i]:pfpos[i+1]] = Pspace[i] + + + + + + + for k in range(0, len(rmsval)): + if rmsval[k] < 2 or rmsval[k] < 0.1*max(rmsval): + zcrval[k] = 0 + + + classes = [] + positions = [] + + class_ = 5 + + for j in range(0, len(tt)): + if class_ != tt[j]: + class_ = tt[j] + classes.append(int(class_)) + positions.append(int(j*0.02*SR)) + + + + return positions, classes + + + +if __name__=="__main__": + if len(argv) != 3: + print("Incorrect number of arguments:") + print("Usage: ") + print("%s <input>") + print("") + print("Arguments:") + print("<input>\tThe input filename. Can be .wav, .mp3, etc...") + sys.exit(-1) + else: + print("[II] Panagiotakis and Tziritas RMS/ZC Speech/Music/Silence Discriminator") + print("[II] Loading libraries") + + import essentia + from essentia import Pool + from essentia.standard import * + import csv + import yaml + + # reqyures matplotlib + from pylab import * + + #requires numpy + from numpy import * + + #requires scikit-learn + from sklearn.metrics import pairwise_distances + + d = {} + v = {} + + fname = argv[1] + outfname = argv[2] + + + + trackname = fname.split('.')[0].split('/')[-1] + + + if outfname.partition('.')[-1].lower() not in ['json', 'yaml']: + print("Please choose a .json or .yaml as an output file.") + sys.exit(-1) + else: + if outfname.partition('.')[-1].lower() == 'json': + output = YamlOutput(filename = outfname, format='json') + else: + output = YamlOutput(filename = outfname, format='yaml') + + print("Feature extraction of `%s\'" % fname) + + # Sampling Rate + SR = 21000.0 + + # Sampling Frequency + T = 1.0/SR + + # SegmentSize + tsegmentSize = 1000 #ms + segmentSize = int(ceil(tsegmentSize*SR/1000)) if mod(ceil(tsegmentSize*SR/1000),2) == 0 \ + else int(floor(tsegmentSize*SR/1000)) + + + # FrameSize + tframeSize = 20 #ms + frameSize = int(ceil(tframeSize*SR/1000)) if mod(ceil(tframeSize*SR/1000),2) == 0 \ + else int(floor(tframeSize*SR/1000)) + + # HopSize + hopSize = frameSize + + # Load Audio + audio = MonoLoader(filename = fname, sampleRate=SR)() + + + + + +# # Zero Crossing Rate +# zcr = ZeroCrossingRate() +# +# # RMS +# rms = RMS() +# +# # Segmentation based on 1 second segmentation +# print("[II] Calculating features for %s, please wait..." % fname) +# +# rmsval = [] +# zcrval = [] +# rmszcrval = [] +# meanrms = [] +# varrms = [] +# meanzcr = [] +# varzcr = [] +# histograms = [] +# +# +# +# # Parameters for the chi square distribution +# alphas = [] +# betas = [] +# L = int(len(audio)/SR) +# for n in range(0, L): +# segment = audio[n*SR:(n+1)*SR] +# framerms = [] +# framezcr = [] +# # for frame in FrameGenerator(segment, frameSize = frameSize, hopSize = frameSize): +# for k in range(0, 50): +# frame = segment[0.02*SR*k:0.02*SR*(k+1)] +# +# rmsv = sqrt(sum(frame**2)) +# framerms.append(rmsv) +# f1 = frame[0:-1] +# f2 = frame[1:] +# mm1 = f1*f2 +# zc = 0.5*sum((abs(sign(mm1))-sign(mm1))) +# +# rmsval.append(rmsv) +# zcrval.append(zc) +# rmszcrval.append(rmsv*zc) +# framezcr.append(zc) +# +# meanrms.append(mean(framerms)) +# meanzcr.append(mean(framezcr)) +# +# mu = mean(framerms) +# sigmasq = var(framerms, ddof=1) +# +# beta = sigmasq/mu +# alpha = (mu/beta) - 1 +# +# if mu != 0 and sigmasq != 0: +# betas.append(beta) +# alphas.append(alpha) +# else: +# betas.append(1000000) +# alphas.append(-1) +# +# histograms.append(histogram(framerms, 256)) +# +# varrms.append(var(framerms, ddof=1)) +# varzcr.append(var(framerms, ddof=1)) +# +# +# # Computation of KVRMS +# +# l = len(rmsval) +# ftm = [mean(rmsval[0:50])] +# ftv = [var(rmsval[0:50], ddof=1)] +# kvrms = [0] +# +# for i in range(1,l-50): +# ftm_ = ftm[i-1]+(rmsval[i+49] - rmsval[i-1])/50 +# ftv_ = ftv[i-1]+ftm[i-1]**2 + ((rmsval[i+49]**2-rmsval[i-1]**2)/50) - ftm_**2 +# ftm.append(ftm_) +# ftv.append(ftv_) +# +# if ftm_ != 0: +# kvrms.append(ftv_/ftm_**2) +# else: +# kvrms.append(0) +# +# +# m = mean(kvrms) +# v = var(kvrms, ddof=1) +# +# b = v/m +# a = m/b - 1 +# +# # Computing Similarity +# +# sim = [] +# from scipy.special import gamma +# +# for i in range(1,L): +# k = (alphas[i]+alphas[i-1]+2)/2 +# sim_ = ((2/(betas[i-1]+betas[i]))**k)*(betas[i-1]**((alphas[i]+1)/2))*(betas[i]**((alphas[i-1]+1)/2))*gamma(k)/(sqrt(gamma(alphas[i-1]+1)*gamma(alphas[i]+1))) +# +# # 0 If overflow +# if isinf(sim_) or isnan(sim_): +# sim_ = 0 +# sim.append(sim_) +# +# sim2 = [] +# +# for i in range(2,L): +# k = (alphas[i]+alphas[i-2]+2)/2 +# sim_ = ((2/(betas[i-2]+betas[i]))**k)*(betas[i-2]**((alphas[i]+1)/2))*(betas[i]**((alphas[i-2]+1)/2))*gamma(k)/(sqrt(gamma(alphas[i-2]+1)*gamma(alphas[i]+1))) +# +# if isinf(sim_) or isnan(sim_): +# sim_ = 0 +# sim2.append(sim_) +# +# +# p = lambda x, a, b: x**a*exp(-b*x)/b**(a+1)/gamma(a+1) +## +# rho = lambda alpha1,beta1,alpha2,beta2: gamma((alpha1+alpha2)/2 + 1)/sqrt(gamma(alpha1+1)*gamma(alpha2+1))*2**((alpha1+alpha2)/2+1)*beta1**((alpha2+1)/2)*beta2**((alpha1+1)/2)/(beta1+beta2)**((alpha1+alpha2)/2+1) +# +# +# P = [0] +# for i in range(1, L-1): +# P_ = (1-sim2[i-1])*(1-sim[i-1]+1-sim[i])*(1-sim2[i-1]) +# P_ = (1-sim2[i-1]) +# P.append(P_) +# +# +# P.append(0) +# P = array(P) +# +# M = mean(P) +# V1 = mean(abs(P[0:L-3]-P[1:L-2])) +# V2 = mean(abs(P[0:L-4]-P[2:L-2])) +## V = (V1+V2)*0.25; +# V = 0.25*median(abs((P[0:L-4]-P[2:L-2]))) +# +# Pd = [P[0],P[1]] +# +# +# for i in range(2,L-2): +# m = mean(concatenate((P[i-2:i],P[i+1:i+3]))) +# m1 = max(concatenate((P[i-2:i],P[i+1:i+3]))) +# +# if m<0.1*M: +# m1 = 0.1*M +# +# d = (P[i]-m)/m1 +# +# if d < 0: +# d = 0 +# Pd.append(P[i]*d/V) +# +# Pd.append(P[L-2]) +# Pd.append(P[L-1]) +# +# +# # Selection of candidate windows where there is a change +# +# j = 0 +# pos = [] +# for i in range(2, L-3): +# if Pd[i] > 5 and P[i] > P[i+1] and P[i] > P[i-1]: +# j += 1 +# pos.append(i) +# +# if len(pos) == 0: +# sys.exit(0) +# +# +# # Computation of Change with 20 ms accuracy +# +# k2 = 0 +# c = 25 +# +# Sm = zeros((2*c+51,)) +# posms = zeros((len(pos),)) +# msec = zeros((len(pos),)) +# mpos = zeros((len(pos), )) +# +# for k1 in range(0,len(pos)): +# i = 50*(pos[k1] - 1); # ms +# for j in range(-c,50+c+1): +# if ftv[i+j] != 0 and ftm[i+j] != 0: +# b1 = ftv[i+j]/ftm[i+j] +# a1 = ftm[i+j]/b1 - 1 +# else: +# b1 = 1000 +# a1 = -1 +# +# if ftv[i+j+50-1] != 0 and ftm[i+j+50-1] != 0: +# b2 = ftv[i+j+50-1]/ftm[i+j+50-1] +# a2 = ftm[i+j+50-1]/b2 - 1 +# else: +# b2 = 1000 +# a2 = -1 +# +# k = (a2+a1+2)/2 +# Sm[j+c] = ((2/(b1+b2))**k)*(b1**((a2+1)/2))*(b2**((a1+1)/2))*gamma(k)/(sqrt(gamma(a1+1)*gamma(a2+1))) +# +# if isnan(Sm[j+c]) or isinf(Sm[j+c]): +# Sm[j+c] = 0 +# +# posms[k1] = argmin(Sm) +# x = Sm[posms[k1]] +# +# h = 1 - Sm +# +# if mean(h[max(posms[k1] - 25,1):min(2*c+51,posms[k1]+25)]) > 0.1: +# msec[k2] = posms[k1]-1-c +# mpos[k2] = pos[k1] +# k2 = k2+1 +# else: +# msec[k2] = 0 +# mpos[k2] = 0 +# +# +# posms = msec +# pos = mpos +# +# fpos = pos+0.02*posms +# +# pfpos = floor(50*pos+posms); +# +# bvoice = 0.14339738781836; +# bmusic = 0.04399754659151; +# amusic = 1.66349817725076; +# avoice = 2.32677887950291; +# +# pfpos = concatenate((array([0]), pfpos, array([len(kvrms)-1]))) +# +# L = len(pfpos)-1 +# V = zeros((L,2)) +# Vg = zeros((L,2)) +# +# Pzero = [] +# Fsp1 = [] +# Type = zeros((L,)) +# Typeg = zeros((L,)) +# +# +# # Classification for each segment +# +# noise = zeros((L,)) +# noise2 = zeros((L,)) +# +# pfpos = pfpos.astype(int) +# Pmusicg = zeros((L,)) +# Pvoiceg = zeros((L,)) +# Pmusic = zeros((L,)) +# Pvoice = zeros((L,)) +# Pspace = zeros((L,)) +# zpos = zeros((L,)) +# Pmax = zeros((L,)) +# Pmaxg = zeros((L,)) +# Freq = zeros((L,6)) +# +# zcrval = array(zcrval) +# rmsval = array(rmsval) +# rmszcrval = array(rmszcrval) +# i +# for i in range(0, L): +# d = 0 +# +# x1 = mean(kvrms[int(pfpos[i]):int(pfpos[i+1])+1]) +# x = x1 +# y = rmszcrval[int(pfpos[i])+d:int(pfpos[i+1])-d+1] +# y = y/(2*max(rmsval[pfpos[i]+d:pfpos[i+1]-d+1]) - min(rmsval[pfpos[i]+d:pfpos[i+1]-d+1]) - median(rmsval[pfpos[i]+d:pfpos[i+1]-d+1])) +# noise[i] = 50*mean(exp(-y)) +# +# Pmusic[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1)) +# Pvoice[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1)) +# +# sm = 0.7*median(ftm[pfpos[i]:pfpos[i+1]+1])+0.3*mean(ftm[pfpos[i]:pfpos[i+1]+1]) +# +# Pspace[i] = 6*exp((-sm**2)/(2*0.6**2)) +# +# zpos[i] = sum(exp(-10*zcrval[pfpos[i]+d:pfpos[i+1]-d+1]))/float(len(zcrval[pfpos[i]+d:pfpos[i+1]-d+1])) +# +# if zpos[i] > 0.08 or x > 3: +# Pvoice[i] = 10 +# else: +# Pmusic[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1)) +# +# V[i,0] = Pmusic[i] +# V[i,1] = Pvoice[i] +# Type[i] = argmax(V[i,:]) +# Pmax[i] = V[i,Type[i]] +# +# noise2[i] = 50 * mean(exp(-4*y)) +# Pmusicg[i] = 0 +# +# if noise2[i] > 3.5: +# Pvoiceg[i] = 10 +# else: +# Pmusicg[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1)) +# Pvoiceg[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1)) +# +# +# Vg[i,0] = Pmusicg[i] +# Vg[i,1] = Pvoiceg[i] +# +# Typeg[i] = argmax(Vg[i,:]) +# Pmaxg[i] = Vg[i,Typeg[i]] +# +# Vk = array(sign(zcrval[pfpos[i]:pfpos[i+1]+1])) +# tempV = rmsval[pfpos[i]:pfpos[i+1]+1] +# rr = max(tempV)+0.001 +# tempV1 = tempV/rr +# Vk0 = Vk.copy() +# +# for j in range(0, len(Vk)): +# if (tempV1[j] < 0.1 and tempV[j] < 1.5) or tempV[j] < 1: +# Vk[j] = 0 +# # print "Changing %d" % j +# Vk1 = Vk.copy() +# +# for j in range(0, len(Vk)): +# if tempV1[j] < 0.4 or tempV[j] < 2 or tempV[j] < mean(tempV): +# # print "Changing %d" % j +# +# Vk1[j] = 0 +# +# Zca = Vk1*(zcrval[pfpos[i]:pfpos[i+1]+1]) +# Pol = (ones((len(Vk),)) - sign(Vk))*Vk0 +# +# VZC = Pol*zcrval[pfpos[i]:pfpos[i+1]+1] +# dVk = abs(Vk[2:len(Vk)] - Vk[1:len(Vk)-1]) +# +# Freq[i, 0] = 50*sum(dVk)/(2*len(Vk)) +# Freq[i, 1] = len(Vk)/50 +# Freq[i, 2] = sum(Freq[:,2]) +# Freq[i, 3] = zpos[i] +# Freq[i, 4] = noise2[i] +# Freq[i, 5] = max(Zca) +# +# Pmusicg[i] = 0 +# Pvoiceg[i] = 0 +# +# if Freq[i,0] < 0.59 and Freq[i,1] > 2.5: +# Pmusicg[i] = 10 +# else: +# Pmusicg[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1)) +# Pvoiceg[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1)) +# +# if x > 3: +# Pvoiceg[i] = Pvoiceg[i] + 0.1 +# +# if x > 300: +# Pmusicg[i] = 9 +# +# +# if noise2[i] > 3.5: +# Pvoiceg[i] = 11 +# +# if max(Zca) > 280: +# Pmusicg[i] = 11 +# +# +# if Freq[i,0] > 4.62 and Freq[i,1] > 2.5 and (noise2[i] > 0.1 or zpos[i] > 0.05 ): +# Pvoiceg[i] = 12 +# +# +# if zpos[i] > 0.15: +# Pvoiceg[i] = 13 +# +# +# Vg[i,0] = Pmusicg[i] +# Vg[i,1] = Pvoiceg[i] +# +# Typeg[i] = argmax(Vg[i,:]) +# Pmaxg[i] = Vg[i, Typeg[i]] +# +# Fsp1.extend(Vk) +# +# if (Pspace[i] > 4 and noise2[i] < 1) or (Pspace[i] > 4.5): +# Type[i] = 2 +# Typeg[i] = 2 +# +# +# if (pfpos[i+1] - pfpos[i])/50 < 0.5 and i >= 1: +# Type[i] = Type[i-1] +# +# +# +# maxpos = pfpos[-1] +# +# tt = zeros((maxpos,)) +# pt = zeros((maxpos,)) +# pm = zeros((maxpos,)) +# pv = zeros((maxpos,)) +# ps = zeros((maxpos,)) +# +# for i in range(0, L): +# tt[pfpos[i]:pfpos[i+1]] = Typeg[i] +# pt[pfpos[i]:pfpos[i+1]] = Pmaxg[i] +# pm[pfpos[i]:pfpos[i+1]] = Pmusicg[i] +# pv[pfpos[i]:pfpos[i+1]] = Pvoiceg[i] +# ps[pfpos[i]:pfpos[i+1]] = Pspace[i] +# +# +# +# +# +# +# for k in range(0, len(rmsval)): +# if rmsval[k] < 2 or rmsval[k] < 0.1*max(rmsval): +# zcrval[k] = 0 +# +# +# classes = [] +# positions = [] +# +# class_ = 5 +# +# for j in range(0, len(tt)): +# if class_ != tt[j]: +# class_ = tt[j] +# classes.append(int(class_)) +# positions.append(int(j*0.02*SR)) +# +# classvec = zeros((len(audio),)) +# +# if len(positions) == 1: +# classvec = classes[0]*ones((len(audio),)) +# for j in range(1, len(positions)): +# classvec[positions[j-1]:positions[j]] = classes[j-1] +# +# +# +# +# +# +# +# +# +# +# + +# + + + positions, classes = trmszc(audio, SR) + + classvec = zeros((len(audio),)) + + if len(positions) == 1: + classvec = classes[0]*ones((len(audio),)) + for j in range(1, len(positions)): + classvec[positions[j-1]:positions[j]] = classes[j-1] + + plot(audio) + plot(classvec) + \ No newline at end of file