view experiment-reverb/code/panagiotakis.py @ 2:c87a9505f294 tip

Added LICENSE for code, removed .wav files
author Emmanouil Theofanis Chourdakis <e.t.chourdakis@qmul.ac.uk>
date Sat, 30 Sep 2017 13:25:50 +0100
parents 246d5546657c
children
line wrap: on
line source
# -*- coding: utf-8 -*-
"""
Created on Mon Jun  1 17:17:34 2015

@author: mmxgn
"""


from sys import argv



def trmszc(audio, SR=21000):
    # Segmentation based on 1 second segmentation
    print("[II] Calculating features for %s, please wait..." % fname)        
    
    rmsval = []
    zcrval = []
    rmszcrval = []
    meanrms = []
    varrms = []
    meanzcr = []     
    varzcr = []
    histograms = []
    
    
    
    # Parameters for the chi square distribution
    alphas = []
    betas = []
    L = int(len(audio)/SR)
    for n in range(0, L):
        segment = audio[n*SR:(n+1)*SR]
        framerms = []
        framezcr = []
       # for frame in FrameGenerator(segment, frameSize = frameSize, hopSize = frameSize):
        for k in range(0, 50):
            frame = segment[0.02*SR*k:0.02*SR*(k+1)]
            
            rmsv = sqrt(sum(frame**2))
            framerms.append(rmsv)
            f1 = frame[0:-1]  
            f2 = frame[1:]
            mm1 = f1*f2
            zc = 0.5*sum((abs(sign(mm1))-sign(mm1)))
            
            rmsval.append(rmsv)
            zcrval.append(zc)
            rmszcrval.append(rmsv*zc)
            framezcr.append(zc)
            
        meanrms.append(mean(framerms))
        meanzcr.append(mean(framezcr))
        
        mu = mean(framerms)
        sigmasq = var(framerms, ddof=1)
        
        beta = sigmasq/mu
        alpha = (mu/beta) - 1

        if mu != 0 and sigmasq != 0:
            betas.append(beta)
            alphas.append(alpha)            
        else:
            betas.append(1000000)
            alphas.append(-1)
        
        histograms.append(histogram(framerms, 256))
        
        varrms.append(var(framerms, ddof=1))
        varzcr.append(var(framerms, ddof=1))
        

    # Computation of KVRMS
    
    l = len(rmsval)
    ftm = [mean(rmsval[0:50])]
    ftv = [var(rmsval[0:50], ddof=1)]
    kvrms = [0]
    
    for i in range(1,l-50):
        ftm_ = ftm[i-1]+(rmsval[i+49] - rmsval[i-1])/50
        ftv_ = ftv[i-1]+ftm[i-1]**2 + ((rmsval[i+49]**2-rmsval[i-1]**2)/50) - ftm_**2
        ftm.append(ftm_)
        ftv.append(ftv_)
        
        if ftm_ != 0:
            kvrms.append(ftv_/ftm_**2)
        else:
            kvrms.append(0)
            
            
    m = mean(kvrms)
    v = var(kvrms, ddof=1)
    
    b = v/m
    a = m/b - 1
    
    # Computing Similarity
    
    sim = []
    from scipy.special import gamma

    for i in range(1,L):
        k = (alphas[i]+alphas[i-1]+2)/2
        sim_ = ((2/(betas[i-1]+betas[i]))**k)*(betas[i-1]**((alphas[i]+1)/2))*(betas[i]**((alphas[i-1]+1)/2))*gamma(k)/(sqrt(gamma(alphas[i-1]+1)*gamma(alphas[i]+1)))
        
        # 0 If overflow
        if isinf(sim_) or isnan(sim_):
            sim_ = 0
        sim.append(sim_)          
        
    sim2 = []
    
    for i in range(2,L):
        k = (alphas[i]+alphas[i-2]+2)/2
        sim_ = ((2/(betas[i-2]+betas[i]))**k)*(betas[i-2]**((alphas[i]+1)/2))*(betas[i]**((alphas[i-2]+1)/2))*gamma(k)/(sqrt(gamma(alphas[i-2]+1)*gamma(alphas[i]+1)))
        
        if isinf(sim_) or isnan(sim_):
            sim_ = 0            
        sim2.append(sim_)             


    p = lambda x, a, b: x**a*exp(-b*x)/b**(a+1)/gamma(a+1)
#
    rho = lambda alpha1,beta1,alpha2,beta2: gamma((alpha1+alpha2)/2 + 1)/sqrt(gamma(alpha1+1)*gamma(alpha2+1))*2**((alpha1+alpha2)/2+1)*beta1**((alpha2+1)/2)*beta2**((alpha1+1)/2)/(beta1+beta2)**((alpha1+alpha2)/2+1)
    
    
    P = [0]
    for i in range(1, L-1):
        P_ = (1-sim2[i-1])*(1-sim[i-1]+1-sim[i])*(1-sim2[i-1])
        P_ = (1-sim2[i-1])
        P.append(P_)
        
        
    P.append(0)
    P = array(P)
    
    M = mean(P)
    V1 = mean(abs(P[0:L-3]-P[1:L-2]))
    V2 = mean(abs(P[0:L-4]-P[2:L-2]))
#        V = (V1+V2)*0.25;        
    V = 0.25*median(abs((P[0:L-4]-P[2:L-2])))
    
    Pd = [P[0],P[1]]
    
    
    for i in range(2,L-2):
        m = mean(concatenate((P[i-2:i],P[i+1:i+3])))
        m1 = max(concatenate((P[i-2:i],P[i+1:i+3])))
        
        if m<0.1*M:
            m1 = 0.1*M
            
        d = (P[i]-m)/m1
        
        if d < 0:
            d = 0
        Pd.append(P[i]*d/V)

    Pd.append(P[L-2])
    Pd.append(P[L-1])
    
    
    # Selection of candidate windows where there is a change 
    
    j = 0
    pos = []
    for i in range(2, L-3):
        if Pd[i] > 5 and P[i] > P[i+1] and P[i] > P[i-1]:
            j += 1
            pos.append(i)
            
    if len(pos) == 0:
        sys.exit(0)
        
        
    # Computation of Change with 20 ms accuracy
        
    k2 = 0
    c = 25
    
    Sm = zeros((2*c+51,))
    posms = zeros((len(pos),))
    msec = zeros((len(pos),))
    mpos = zeros((len(pos), ))
    
    for k1 in range(0,len(pos)):
        i = 50*(pos[k1] - 1); # ms
        for j in range(-c,50+c+1):
            if ftv[i+j] != 0 and ftm[i+j] != 0:
                b1 = ftv[i+j]/ftm[i+j]
                a1 = ftm[i+j]/b1 - 1
            else:
                b1 = 1000
                a1 = -1            
    
            if ftv[i+j+50-1] != 0 and ftm[i+j+50-1] != 0:
                b2 = ftv[i+j+50-1]/ftm[i+j+50-1]
                a2 = ftm[i+j+50-1]/b2 - 1
            else:
                b2 = 1000
                a2 = -1
    
            k = (a2+a1+2)/2
            Sm[j+c] = ((2/(b1+b2))**k)*(b1**((a2+1)/2))*(b2**((a1+1)/2))*gamma(k)/(sqrt(gamma(a1+1)*gamma(a2+1)))
            
            if isnan(Sm[j+c]) or isinf(Sm[j+c]):
                Sm[j+c] = 0
                
            posms[k1] = argmin(Sm)
            x = Sm[posms[k1]]
            
            h = 1 - Sm
            
        if mean(h[max(posms[k1] - 25,1):min(2*c+51,posms[k1]+25)]) > 0.1:
            msec[k2] = posms[k1]-1-c
            mpos[k2] = pos[k1]
            k2 = k2+1
        else:
            msec[k2] = 0
            mpos[k2] = 0
                
            
    posms = msec
    pos = mpos
    
    fpos = pos+0.02*posms
    
    pfpos = floor(50*pos+posms);
    
    bvoice = 0.14339738781836;
    bmusic = 0.04399754659151;
    amusic = 1.66349817725076;
    avoice = 2.32677887950291;    
            
    pfpos = concatenate((array([0]), pfpos, array([len(kvrms)-1])))
        
    L = len(pfpos)-1
    V = zeros((L,2))
    Vg = zeros((L,2))
    
    Pzero = []
    Fsp1 = []
    Type = zeros((L,))
    Typeg = zeros((L,))
    
    
    # Classification for each segment
    
    noise = zeros((L,))
    noise2 = zeros((L,))
    
    pfpos = pfpos.astype(int)
    Pmusicg = zeros((L,))
    Pvoiceg = zeros((L,))
    Pmusic = zeros((L,))
    Pvoice = zeros((L,))   
    Pspace = zeros((L,)) 
    zpos = zeros((L,))    
    Pmax = zeros((L,))
    Pmaxg = zeros((L,))
    Freq = zeros((L,6))
    
    zcrval = array(zcrval)
    rmsval = array(rmsval)
    rmszcrval = array(rmszcrval)
    i
    for i in range(0, L):
        d = 0
        
        x1 = mean(kvrms[int(pfpos[i]):int(pfpos[i+1])+1])
        x = x1
        y = rmszcrval[int(pfpos[i])+d:int(pfpos[i+1])-d+1]
        y = y/(2*max(rmsval[pfpos[i]+d:pfpos[i+1]-d+1]) - min(rmsval[pfpos[i]+d:pfpos[i+1]-d+1]) - median(rmsval[pfpos[i]+d:pfpos[i+1]-d+1]))
        noise[i] = 50*mean(exp(-y))
        
        Pmusic[i]  = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1))
        Pvoice[i]  = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1))
        
        sm = 0.7*median(ftm[pfpos[i]:pfpos[i+1]+1])+0.3*mean(ftm[pfpos[i]:pfpos[i+1]+1])
        
        Pspace[i] = 6*exp((-sm**2)/(2*0.6**2))
        
        zpos[i] = sum(exp(-10*zcrval[pfpos[i]+d:pfpos[i+1]-d+1]))/float(len(zcrval[pfpos[i]+d:pfpos[i+1]-d+1]))
        
        if zpos[i] > 0.08 or x > 3:
            Pvoice[i] = 10
        else:
            Pmusic[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1))
            
        V[i,0] = Pmusic[i]
        V[i,1] = Pvoice[i]
        Type[i] = argmax(V[i,:])
        Pmax[i] = V[i,Type[i]]
        
        noise2[i] = 50 * mean(exp(-4*y))
        Pmusicg[i] = 0
        
        if noise2[i] > 3.5:
            Pvoiceg[i] = 10
        else:
            Pmusicg[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1))
            Pvoiceg[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1))
            
            
        Vg[i,0] = Pmusicg[i]
        Vg[i,1] = Pvoiceg[i]
        
        Typeg[i] = argmax(Vg[i,:])
        Pmaxg[i] = Vg[i,Typeg[i]]
        
        Vk = array(sign(zcrval[pfpos[i]:pfpos[i+1]+1]))
        tempV = rmsval[pfpos[i]:pfpos[i+1]+1]
        rr = max(tempV)+0.001
        tempV1 = tempV/rr
        Vk0 = Vk.copy()
        
        for j in range(0, len(Vk)):
            if (tempV1[j] < 0.1 and tempV[j] < 1.5) or tempV[j] < 1:
                Vk[j] = 0
             #   print "Changing %d" % j
        Vk1 = Vk.copy()
        
        for j in range(0, len(Vk)):
            if tempV1[j] < 0.4 or tempV[j] < 2 or tempV[j]  < mean(tempV):
               # print "Changing %d" % j
    
                Vk1[j] = 0
        
        Zca = Vk1*(zcrval[pfpos[i]:pfpos[i+1]+1])
        Pol = (ones((len(Vk),)) - sign(Vk))*Vk0
        
        VZC = Pol*zcrval[pfpos[i]:pfpos[i+1]+1]
        dVk = abs(Vk[2:len(Vk)] - Vk[1:len(Vk)-1])
        
        Freq[i, 0] = 50*sum(dVk)/(2*len(Vk))
        Freq[i, 1] = len(Vk)/50
        Freq[i, 2] = sum(Freq[:,2])
        Freq[i, 3] = zpos[i]
        Freq[i, 4] = noise2[i]
        Freq[i, 5] = max(Zca)
        
        Pmusicg[i] = 0
        Pvoiceg[i] = 0
        
        if Freq[i,0] < 0.59 and Freq[i,1] > 2.5:
            Pmusicg[i] = 10
        else:
            Pmusicg[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1))
            Pvoiceg[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1))
            
            if x > 3:
                Pvoiceg[i] = Pvoiceg[i] + 0.1
                
            if x > 300:
                Pmusicg[i] = 9
                
                
            if noise2[i] > 3.5:
                Pvoiceg[i] = 11
            
            if max(Zca) > 280:
                Pmusicg[i] = 11
            
            
            if Freq[i,0] > 4.62  and Freq[i,1] > 2.5 and (noise2[i] > 0.1 or zpos[i] > 0.05 ):
                Pvoiceg[i] = 12
                
            
            if zpos[i] > 0.15:
                Pvoiceg[i] = 13
                
        
        Vg[i,0] = Pmusicg[i]
        Vg[i,1] = Pvoiceg[i]
        
        Typeg[i] = argmax(Vg[i,:])
        Pmaxg[i] = Vg[i, Typeg[i]]
        
        Fsp1.extend(Vk)
        
        if (Pspace[i] > 4 and noise2[i] < 1) or (Pspace[i] > 4.5):
            Type[i] = 2
            Typeg[i] = 2
            
        
        if (pfpos[i+1] - pfpos[i])/50 < 0.5 and i >= 1:
            Type[i] = Type[i-1]
        
        
        
    maxpos = pfpos[-1]
                
    tt = zeros((maxpos,))
    pt = zeros((maxpos,))    
    pm = zeros((maxpos,))    
    pv = zeros((maxpos,))    
    ps = zeros((maxpos,))    
    
    for i in range(0, L):
        tt[pfpos[i]:pfpos[i+1]] = Typeg[i]
        pt[pfpos[i]:pfpos[i+1]] = Pmaxg[i]
        pm[pfpos[i]:pfpos[i+1]] = Pmusicg[i]
        pv[pfpos[i]:pfpos[i+1]] = Pvoiceg[i]
        ps[pfpos[i]:pfpos[i+1]] = Pspace[i]
    
        
        
        
        
        
    for k in range(0, len(rmsval)):
        if rmsval[k] < 2 or rmsval[k] < 0.1*max(rmsval):
            zcrval[k] = 0
     
    
    classes = []
    positions = []
    
    class_ = 5
               
    for j in range(0, len(tt)):
        if class_ != tt[j]:
            class_ = tt[j]
            classes.append(int(class_))
            positions.append(int(j*0.02*SR))
            

    
    return positions, classes
        
            

if __name__=="__main__":
    if len(argv) != 3:
        print("Incorrect number of arguments:")
        print("Usage: ")
        print("%s <input>")
        print("")
        print("Arguments:")
        print("<input>\tThe input filename. Can be .wav, .mp3, etc...")     
        sys.exit(-1)
    else:
        print("[II] Panagiotakis and Tziritas RMS/ZC Speech/Music/Silence Discriminator")
        print("[II] Loading libraries")
        
        import essentia
        from essentia import Pool
        from essentia.standard import  *
        import csv
        import yaml
        
        # reqyures matplotlib
        from pylab import *
        
        #requires numpy
        from numpy import *
        
        #requires scikit-learn
        from sklearn.metrics import pairwise_distances        
        
        d = {}
        v = {}
        
        fname = argv[1]        
        outfname = argv[2]
        


        trackname = fname.split('.')[0].split('/')[-1]  

        
        if outfname.partition('.')[-1].lower() not in ['json', 'yaml']:
            print("Please choose a .json or .yaml as an output file.")
            sys.exit(-1)
        else:
            if outfname.partition('.')[-1].lower() == 'json':
                output = YamlOutput(filename = outfname, format='json') 
            else:
                output = YamlOutput(filename = outfname, format='yaml')
        
        print("Feature extraction of `%s\'" % fname)

        # Sampling Rate
        SR = 21000.0
        
        # Sampling Frequency
        T = 1.0/SR

        # SegmentSize
        tsegmentSize = 1000 #ms    
        segmentSize = int(ceil(tsegmentSize*SR/1000)) if mod(ceil(tsegmentSize*SR/1000),2) == 0 \
                                                  else int(floor(tsegmentSize*SR/1000))
      
        
        # FrameSize
        tframeSize = 20 #ms    
        frameSize = int(ceil(tframeSize*SR/1000)) if mod(ceil(tframeSize*SR/1000),2) == 0 \
                                                  else int(floor(tframeSize*SR/1000))        
                             
        # HopSize
        hopSize = frameSize
        
        # Load Audio
        audio = MonoLoader(filename = fname, sampleRate=SR)()   
        
        
        
        
        
#        # Zero Crossing Rate
#        zcr = ZeroCrossingRate()        
#
#        # RMS 
#        rms = RMS()        
#        
#        # Segmentation based on 1 second segmentation
#        print("[II] Calculating features for %s, please wait..." % fname)        
#        
#        rmsval = []
#        zcrval = []
#        rmszcrval = []
#        meanrms = []
#        varrms = []
#        meanzcr = []     
#        varzcr = []
#        histograms = []
#        
#        
#        
#        # Parameters for the chi square distribution
#        alphas = []
#        betas = []
#        L = int(len(audio)/SR)
#        for n in range(0, L):
#            segment = audio[n*SR:(n+1)*SR]
#            framerms = []
#            framezcr = []
#           # for frame in FrameGenerator(segment, frameSize = frameSize, hopSize = frameSize):
#            for k in range(0, 50):
#                frame = segment[0.02*SR*k:0.02*SR*(k+1)]
#                
#                rmsv = sqrt(sum(frame**2))
#                framerms.append(rmsv)
#                f1 = frame[0:-1]  
#                f2 = frame[1:]
#                mm1 = f1*f2
#                zc = 0.5*sum((abs(sign(mm1))-sign(mm1)))
#                
#                rmsval.append(rmsv)
#                zcrval.append(zc)
#                rmszcrval.append(rmsv*zc)
#                framezcr.append(zc)
#                
#            meanrms.append(mean(framerms))
#            meanzcr.append(mean(framezcr))
#            
#            mu = mean(framerms)
#            sigmasq = var(framerms, ddof=1)
#            
#            beta = sigmasq/mu
#            alpha = (mu/beta) - 1
#
#            if mu != 0 and sigmasq != 0:
#                betas.append(beta)
#                alphas.append(alpha)            
#            else:
#                betas.append(1000000)
#                alphas.append(-1)
#            
#            histograms.append(histogram(framerms, 256))
#            
#            varrms.append(var(framerms, ddof=1))
#            varzcr.append(var(framerms, ddof=1))
#            
#
#        # Computation of KVRMS
#        
#        l = len(rmsval)
#        ftm = [mean(rmsval[0:50])]
#        ftv = [var(rmsval[0:50], ddof=1)]
#        kvrms = [0]
#        
#        for i in range(1,l-50):
#            ftm_ = ftm[i-1]+(rmsval[i+49] - rmsval[i-1])/50
#            ftv_ = ftv[i-1]+ftm[i-1]**2 + ((rmsval[i+49]**2-rmsval[i-1]**2)/50) - ftm_**2
#            ftm.append(ftm_)
#            ftv.append(ftv_)
#            
#            if ftm_ != 0:
#                kvrms.append(ftv_/ftm_**2)
#            else:
#                kvrms.append(0)
#                
#                
#        m = mean(kvrms)
#        v = var(kvrms, ddof=1)
#        
#        b = v/m
#        a = m/b - 1
#        
#        # Computing Similarity
#        
#        sim = []
#        from scipy.special import gamma
#
#        for i in range(1,L):
#            k = (alphas[i]+alphas[i-1]+2)/2
#            sim_ = ((2/(betas[i-1]+betas[i]))**k)*(betas[i-1]**((alphas[i]+1)/2))*(betas[i]**((alphas[i-1]+1)/2))*gamma(k)/(sqrt(gamma(alphas[i-1]+1)*gamma(alphas[i]+1)))
#            
#            # 0 If overflow
#            if isinf(sim_) or isnan(sim_):
#                sim_ = 0
#            sim.append(sim_)          
#            
#        sim2 = []
#        
#        for i in range(2,L):
#            k = (alphas[i]+alphas[i-2]+2)/2
#            sim_ = ((2/(betas[i-2]+betas[i]))**k)*(betas[i-2]**((alphas[i]+1)/2))*(betas[i]**((alphas[i-2]+1)/2))*gamma(k)/(sqrt(gamma(alphas[i-2]+1)*gamma(alphas[i]+1)))
#            
#            if isinf(sim_) or isnan(sim_):
#                sim_ = 0            
#            sim2.append(sim_)             
#    
#    
#        p = lambda x, a, b: x**a*exp(-b*x)/b**(a+1)/gamma(a+1)
##
#        rho = lambda alpha1,beta1,alpha2,beta2: gamma((alpha1+alpha2)/2 + 1)/sqrt(gamma(alpha1+1)*gamma(alpha2+1))*2**((alpha1+alpha2)/2+1)*beta1**((alpha2+1)/2)*beta2**((alpha1+1)/2)/(beta1+beta2)**((alpha1+alpha2)/2+1)
#        
#        
#        P = [0]
#        for i in range(1, L-1):
#            P_ = (1-sim2[i-1])*(1-sim[i-1]+1-sim[i])*(1-sim2[i-1])
#            P_ = (1-sim2[i-1])
#            P.append(P_)
#            
#            
#        P.append(0)
#        P = array(P)
#        
#        M = mean(P)
#        V1 = mean(abs(P[0:L-3]-P[1:L-2]))
#        V2 = mean(abs(P[0:L-4]-P[2:L-2]))
##        V = (V1+V2)*0.25;        
#        V = 0.25*median(abs((P[0:L-4]-P[2:L-2])))
#        
#        Pd = [P[0],P[1]]
#        
#        
#        for i in range(2,L-2):
#            m = mean(concatenate((P[i-2:i],P[i+1:i+3])))
#            m1 = max(concatenate((P[i-2:i],P[i+1:i+3])))
#            
#            if m<0.1*M:
#                m1 = 0.1*M
#                
#            d = (P[i]-m)/m1
#            
#            if d < 0:
#                d = 0
#            Pd.append(P[i]*d/V)
#    
#    Pd.append(P[L-2])
#    Pd.append(P[L-1])
#    
#    
#    # Selection of candidate windows where there is a change 
#    
#    j = 0
#    pos = []
#    for i in range(2, L-3):
#        if Pd[i] > 5 and P[i] > P[i+1] and P[i] > P[i-1]:
#            j += 1
#            pos.append(i)
#            
#    if len(pos) == 0:
#        sys.exit(0)
#        
#        
#    # Computation of Change with 20 ms accuracy
#        
#    k2 = 0
#    c = 25
#    
#    Sm = zeros((2*c+51,))
#    posms = zeros((len(pos),))
#    msec = zeros((len(pos),))
#    mpos = zeros((len(pos), ))
#    
#    for k1 in range(0,len(pos)):
#        i = 50*(pos[k1] - 1); # ms
#        for j in range(-c,50+c+1):
#            if ftv[i+j] != 0 and ftm[i+j] != 0:
#                b1 = ftv[i+j]/ftm[i+j]
#                a1 = ftm[i+j]/b1 - 1
#            else:
#                b1 = 1000
#                a1 = -1            
#
#            if ftv[i+j+50-1] != 0 and ftm[i+j+50-1] != 0:
#                b2 = ftv[i+j+50-1]/ftm[i+j+50-1]
#                a2 = ftm[i+j+50-1]/b2 - 1
#            else:
#                b2 = 1000
#                a2 = -1
#
#            k = (a2+a1+2)/2
#            Sm[j+c] = ((2/(b1+b2))**k)*(b1**((a2+1)/2))*(b2**((a1+1)/2))*gamma(k)/(sqrt(gamma(a1+1)*gamma(a2+1)))
#            
#            if isnan(Sm[j+c]) or isinf(Sm[j+c]):
#                Sm[j+c] = 0
#                
#            posms[k1] = argmin(Sm)
#            x = Sm[posms[k1]]
#            
#            h = 1 - Sm
#            
#        if mean(h[max(posms[k1] - 25,1):min(2*c+51,posms[k1]+25)]) > 0.1:
#            msec[k2] = posms[k1]-1-c
#            mpos[k2] = pos[k1]
#            k2 = k2+1
#        else:
#            msec[k2] = 0
#            mpos[k2] = 0
#                
#            
#    posms = msec
#    pos = mpos
#    
#    fpos = pos+0.02*posms
#    
#    pfpos = floor(50*pos+posms);
#    
#    bvoice = 0.14339738781836;
#    bmusic = 0.04399754659151;
#    amusic = 1.66349817725076;
#    avoice = 2.32677887950291;    
#            
#    pfpos = concatenate((array([0]), pfpos, array([len(kvrms)-1])))
#        
#    L = len(pfpos)-1
#    V = zeros((L,2))
#    Vg = zeros((L,2))
#    
#    Pzero = []
#    Fsp1 = []
#    Type = zeros((L,))
#    Typeg = zeros((L,))
#    
#    
#    # Classification for each segment
#    
#    noise = zeros((L,))
#    noise2 = zeros((L,))
#    
#    pfpos = pfpos.astype(int)
#    Pmusicg = zeros((L,))
#    Pvoiceg = zeros((L,))
#    Pmusic = zeros((L,))
#    Pvoice = zeros((L,))   
#    Pspace = zeros((L,)) 
#    zpos = zeros((L,))    
#    Pmax = zeros((L,))
#    Pmaxg = zeros((L,))
#    Freq = zeros((L,6))
#    
#    zcrval = array(zcrval)
#    rmsval = array(rmsval)
#    rmszcrval = array(rmszcrval)
#    i
#    for i in range(0, L):
#        d = 0
#        
#        x1 = mean(kvrms[int(pfpos[i]):int(pfpos[i+1])+1])
#        x = x1
#        y = rmszcrval[int(pfpos[i])+d:int(pfpos[i+1])-d+1]
#        y = y/(2*max(rmsval[pfpos[i]+d:pfpos[i+1]-d+1]) - min(rmsval[pfpos[i]+d:pfpos[i+1]-d+1]) - median(rmsval[pfpos[i]+d:pfpos[i+1]-d+1]))
#        noise[i] = 50*mean(exp(-y))
#        
#        Pmusic[i]  = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1))
#        Pvoice[i]  = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1))
#        
#        sm = 0.7*median(ftm[pfpos[i]:pfpos[i+1]+1])+0.3*mean(ftm[pfpos[i]:pfpos[i+1]+1])
#        
#        Pspace[i] = 6*exp((-sm**2)/(2*0.6**2))
#        
#        zpos[i] = sum(exp(-10*zcrval[pfpos[i]+d:pfpos[i+1]-d+1]))/float(len(zcrval[pfpos[i]+d:pfpos[i+1]-d+1]))
#        
#        if zpos[i] > 0.08 or x > 3:
#            Pvoice[i] = 10
#        else:
#            Pmusic[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1))
#            
#        V[i,0] = Pmusic[i]
#        V[i,1] = Pvoice[i]
#        Type[i] = argmax(V[i,:])
#        Pmax[i] = V[i,Type[i]]
#        
#        noise2[i] = 50 * mean(exp(-4*y))
#        Pmusicg[i] = 0
#        
#        if noise2[i] > 3.5:
#            Pvoiceg[i] = 10
#        else:
#            Pmusicg[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1))
#            Pvoiceg[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1))
#            
#            
#        Vg[i,0] = Pmusicg[i]
#        Vg[i,1] = Pvoiceg[i]
#        
#        Typeg[i] = argmax(Vg[i,:])
#        Pmaxg[i] = Vg[i,Typeg[i]]
#        
#        Vk = array(sign(zcrval[pfpos[i]:pfpos[i+1]+1]))
#        tempV = rmsval[pfpos[i]:pfpos[i+1]+1]
#        rr = max(tempV)+0.001
#        tempV1 = tempV/rr
#        Vk0 = Vk.copy()
#        
#        for j in range(0, len(Vk)):
#            if (tempV1[j] < 0.1 and tempV[j] < 1.5) or tempV[j] < 1:
#                Vk[j] = 0
#             #   print "Changing %d" % j
#        Vk1 = Vk.copy()
#        
#        for j in range(0, len(Vk)):
#            if tempV1[j] < 0.4 or tempV[j] < 2 or tempV[j]  < mean(tempV):
#               # print "Changing %d" % j
#
#                Vk1[j] = 0
#        
#        Zca = Vk1*(zcrval[pfpos[i]:pfpos[i+1]+1])
#        Pol = (ones((len(Vk),)) - sign(Vk))*Vk0
#        
#        VZC = Pol*zcrval[pfpos[i]:pfpos[i+1]+1]
#        dVk = abs(Vk[2:len(Vk)] - Vk[1:len(Vk)-1])
#        
#        Freq[i, 0] = 50*sum(dVk)/(2*len(Vk))
#        Freq[i, 1] = len(Vk)/50
#        Freq[i, 2] = sum(Freq[:,2])
#        Freq[i, 3] = zpos[i]
#        Freq[i, 4] = noise2[i]
#        Freq[i, 5] = max(Zca)
#        
#        Pmusicg[i] = 0
#        Pvoiceg[i] = 0
#        
#        if Freq[i,0] < 0.59 and Freq[i,1] > 2.5:
#            Pmusicg[i] = 10
#        else:
#            Pmusicg[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1))
#            Pvoiceg[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1))
#            
#            if x > 3:
#                Pvoiceg[i] = Pvoiceg[i] + 0.1
#                
#            if x > 300:
#                Pmusicg[i] = 9
#                
#                
#            if noise2[i] > 3.5:
#                Pvoiceg[i] = 11
#            
#            if max(Zca) > 280:
#                Pmusicg[i] = 11
#            
#            
#            if Freq[i,0] > 4.62  and Freq[i,1] > 2.5 and (noise2[i] > 0.1 or zpos[i] > 0.05 ):
#                Pvoiceg[i] = 12
#                
#            
#            if zpos[i] > 0.15:
#                Pvoiceg[i] = 13
#                
#        
#        Vg[i,0] = Pmusicg[i]
#        Vg[i,1] = Pvoiceg[i]
#        
#        Typeg[i] = argmax(Vg[i,:])
#        Pmaxg[i] = Vg[i, Typeg[i]]
#        
#        Fsp1.extend(Vk)
#        
#        if (Pspace[i] > 4 and noise2[i] < 1) or (Pspace[i] > 4.5):
#            Type[i] = 2
#            Typeg[i] = 2
#            
#        
#        if (pfpos[i+1] - pfpos[i])/50 < 0.5 and i >= 1:
#            Type[i] = Type[i-1]
#        
#        
#        
#    maxpos = pfpos[-1]
#                
#    tt = zeros((maxpos,))
#    pt = zeros((maxpos,))    
#    pm = zeros((maxpos,))    
#    pv = zeros((maxpos,))    
#    ps = zeros((maxpos,))    
#    
#    for i in range(0, L):
#        tt[pfpos[i]:pfpos[i+1]] = Typeg[i]
#        pt[pfpos[i]:pfpos[i+1]] = Pmaxg[i]
#        pm[pfpos[i]:pfpos[i+1]] = Pmusicg[i]
#        pv[pfpos[i]:pfpos[i+1]] = Pvoiceg[i]
#        ps[pfpos[i]:pfpos[i+1]] = Pspace[i]
#
#        
#        
#        
#        
#        
#    for k in range(0, len(rmsval)):
#        if rmsval[k] < 2 or rmsval[k] < 0.1*max(rmsval):
#            zcrval[k] = 0
# 
#    
#    classes = []
#    positions = []
#    
#    class_ = 5
#               
#    for j in range(0, len(tt)):
#        if class_ != tt[j]:
#            class_ = tt[j]
#            classes.append(int(class_))
#            positions.append(int(j*0.02*SR))
#            
#    classvec = zeros((len(audio),))
#
#    if len(positions) == 1:
#        classvec = classes[0]*ones((len(audio),))
#    for j in range(1, len(positions)):
#        classvec[positions[j-1]:positions[j]] = classes[j-1]
#        
#    
#    
#        
#        
#        
#            
#        
#    
#                    
#    

#        
        
            
    positions, classes = trmszc(audio, SR)   
        
    classvec = zeros((len(audio),))
    
    if len(positions) == 1:
        classvec = classes[0]*ones((len(audio),))
    for j in range(1, len(positions)):
        classvec[positions[j-1]:positions[j]] = classes[j-1]
        
    plot(audio)
    plot(classvec)