Mercurial > hg > chourdakisreiss2016

# -*- coding: utf-8 -*-
"""
Created on Mon Jun  1 17:17:34 2015

@author: mmxgn
"""


from sys import argv


def trmszc(audio, SR=21000):
    # Segmentation based on 1 second segmentation
    print("[II] Calculating features for %s, please wait..." % fname)

    rmsval = []
    zcrval = []
    rmszcrval = []
    meanrms = []
    varrms = []
    meanzcr = []
    varzcr = []
    histograms = []


    # Parameters for the chi square distribution
    alphas = []
    betas = []
    L = int(len(audio)/SR)
    for n in range(0, L):
        segment = audio[n*SR:(n+1)*SR]
        framerms = []
        framezcr = []
       # for frame in FrameGenerator(segment, frameSize = frameSize, hopSize = frameSize):
        for k in range(0, 50):
            frame = segment[0.02*SR*k:0.02*SR*(k+1)]

            rmsv = sqrt(sum(frame**2))
            framerms.append(rmsv)
            f1 = frame[0:-1]
            f2 = frame[1:]
            mm1 = f1*f2
            zc = 0.5*sum((abs(sign(mm1))-sign(mm1)))

            rmsval.append(rmsv)
            zcrval.append(zc)
            rmszcrval.append(rmsv*zc)
            framezcr.append(zc)

        meanrms.append(mean(framerms))
        meanzcr.append(mean(framezcr))

        mu = mean(framerms)
        sigmasq = var(framerms, ddof=1)

        beta = sigmasq/mu
        alpha = (mu/beta) - 1

        if mu != 0 and sigmasq != 0:
            betas.append(beta)
            alphas.append(alpha)
        else:
            betas.append(1000000)
            alphas.append(-1)

        histograms.append(histogram(framerms, 256))

        varrms.append(var(framerms, ddof=1))
        varzcr.append(var(framerms, ddof=1))


    # Computation of KVRMS

    l = len(rmsval)
    ftm = [mean(rmsval[0:50])]
    ftv = [var(rmsval[0:50], ddof=1)]
    kvrms = [0]

    for i in range(1,l-50):
        ftm_ = ftm[i-1]+(rmsval[i+49] - rmsval[i-1])/50
        ftv_ = ftv[i-1]+ftm[i-1]**2 + ((rmsval[i+49]**2-rmsval[i-1]**2)/50) - ftm_**2
        ftm.append(ftm_)
        ftv.append(ftv_)

        if ftm_ != 0:
            kvrms.append(ftv_/ftm_**2)
        else:
            kvrms.append(0)


    m = mean(kvrms)
    v = var(kvrms, ddof=1)

    b = v/m
    a = m/b - 1

    # Computing Similarity

    sim = []
    from scipy.special import gamma

    for i in range(1,L):
        k = (alphas[i]+alphas[i-1]+2)/2
        sim_ = ((2/(betas[i-1]+betas[i]))**k)*(betas[i-1]**((alphas[i]+1)/2))*(betas[i]**((alphas[i-1]+1)/2))*gamma(k)/(sqrt(gamma(alphas[i-1]+1)*gamma(alphas[i]+1)))

        # 0 If overflow
        if isinf(sim_) or isnan(sim_):
            sim_ = 0
        sim.append(sim_)

    sim2 = []

    for i in range(2,L):
        k = (alphas[i]+alphas[i-2]+2)/2
        sim_ = ((2/(betas[i-2]+betas[i]))**k)*(betas[i-2]**((alphas[i]+1)/2))*(betas[i]**((alphas[i-2]+1)/2))*gamma(k)/(sqrt(gamma(alphas[i-2]+1)*gamma(alphas[i]+1)))

        if isinf(sim_) or isnan(sim_):
            sim_ = 0
        sim2.append(sim_)


    p = lambda x, a, b: x**a*exp(-b*x)/b**(a+1)/gamma(a+1)
#
    rho = lambda alpha1,beta1,alpha2,beta2: gamma((alpha1+alpha2)/2 + 1)/sqrt(gamma(alpha1+1)*gamma(alpha2+1))*2**((alpha1+alpha2)/2+1)*beta1**((alpha2+1)/2)*beta2**((alpha1+1)/2)/(beta1+beta2)**((alpha1+alpha2)/2+1)


    P = [0]
    for i in range(1, L-1):
        P_ = (1-sim2[i-1])*(1-sim[i-1]+1-sim[i])*(1-sim2[i-1])
        P_ = (1-sim2[i-1])
        P.append(P_)


    P.append(0)
    P = array(P)

    M = mean(P)
    V1 = mean(abs(P[0:L-3]-P[1:L-2]))
    V2 = mean(abs(P[0:L-4]-P[2:L-2]))
#        V = (V1+V2)*0.25;
    V = 0.25*median(abs((P[0:L-4]-P[2:L-2])))

    Pd = [P[0],P[1]]


    for i in range(2,L-2):
        m = mean(concatenate((P[i-2:i],P[i+1:i+3])))
        m1 = max(concatenate((P[i-2:i],P[i+1:i+3])))

        if m<0.1*M:
            m1 = 0.1*M

        d = (P[i]-m)/m1

        if d < 0:
            d = 0
        Pd.append(P[i]*d/V)

    Pd.append(P[L-2])
    Pd.append(P[L-1])


    # Selection of candidate windows where there is a change

    j = 0
    pos = []
    for i in range(2, L-3):
        if Pd[i] > 5 and P[i] > P[i+1] and P[i] > P[i-1]:
            j += 1
            pos.append(i)

    if len(pos) == 0:
        sys.exit(0)


    # Computation of Change with 20 ms accuracy

    k2 = 0
    c = 25

    Sm = zeros((2*c+51,))
    posms = zeros((len(pos),))
    msec = zeros((len(pos),))
    mpos = zeros((len(pos), ))

    for k1 in range(0,len(pos)):
        i = 50*(pos[k1] - 1); # ms
        for j in range(-c,50+c+1):
            if ftv[i+j] != 0 and ftm[i+j] != 0:
                b1 = ftv[i+j]/ftm[i+j]
                a1 = ftm[i+j]/b1 - 1
            else:
                b1 = 1000
                a1 = -1

            if ftv[i+j+50-1] != 0 and ftm[i+j+50-1] != 0:
                b2 = ftv[i+j+50-1]/ftm[i+j+50-1]
                a2 = ftm[i+j+50-1]/b2 - 1
            else:
                b2 = 1000
                a2 = -1

            k = (a2+a1+2)/2
            Sm[j+c] = ((2/(b1+b2))**k)*(b1**((a2+1)/2))*(b2**((a1+1)/2))*gamma(k)/(sqrt(gamma(a1+1)*gamma(a2+1)))

            if isnan(Sm[j+c]) or isinf(Sm[j+c]):
                Sm[j+c] = 0

            posms[k1] = argmin(Sm)
            x = Sm[posms[k1]]

            h = 1 - Sm

        if mean(h[max(posms[k1] - 25,1):min(2*c+51,posms[k1]+25)]) > 0.1:
            msec[k2] = posms[k1]-1-c
            mpos[k2] = pos[k1]
            k2 = k2+1
        else:
            msec[k2] = 0
            mpos[k2] = 0


    posms = msec
    pos = mpos

    fpos = pos+0.02*posms

    pfpos = floor(50*pos+posms);

    bvoice = 0.14339738781836;
    bmusic = 0.04399754659151;
    amusic = 1.66349817725076;
    avoice = 2.32677887950291;

    pfpos = concatenate((array([0]), pfpos, array([len(kvrms)-1])))

    L = len(pfpos)-1
    V = zeros((L,2))
    Vg = zeros((L,2))

    Pzero = []
    Fsp1 = []
    Type = zeros((L,))
    Typeg = zeros((L,))


    # Classification for each segment

    noise = zeros((L,))
    noise2 = zeros((L,))

    pfpos = pfpos.astype(int)
    Pmusicg = zeros((L,))
    Pvoiceg = zeros((L,))
    Pmusic = zeros((L,))
    Pvoice = zeros((L,))
    Pspace = zeros((L,))
    zpos = zeros((L,))
    Pmax = zeros((L,))
    Pmaxg = zeros((L,))
    Freq = zeros((L,6))

    zcrval = array(zcrval)
    rmsval = array(rmsval)
    rmszcrval = array(rmszcrval)
    i
    for i in range(0, L):
        d = 0

        x1 = mean(kvrms[int(pfpos[i]):int(pfpos[i+1])+1])
        x = x1
        y = rmszcrval[int(pfpos[i])+d:int(pfpos[i+1])-d+1]
        y = y/(2*max(rmsval[pfpos[i]+d:pfpos[i+1]-d+1]) - min(rmsval[pfpos[i]+d:pfpos[i+1]-d+1]) - median(rmsval[pfpos[i]+d:pfpos[i+1]-d+1]))
        noise[i] = 50*mean(exp(-y))

        Pmusic[i]  = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1))
        Pvoice[i]  = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1))

        sm = 0.7*median(ftm[pfpos[i]:pfpos[i+1]+1])+0.3*mean(ftm[pfpos[i]:pfpos[i+1]+1])

        Pspace[i] = 6*exp((-sm**2)/(2*0.6**2))

        zpos[i] = sum(exp(-10*zcrval[pfpos[i]+d:pfpos[i+1]-d+1]))/float(len(zcrval[pfpos[i]+d:pfpos[i+1]-d+1]))

        if zpos[i] > 0.08 or x > 3:
            Pvoice[i] = 10
        else:
            Pmusic[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1))

        V[i,0] = Pmusic[i]
        V[i,1] = Pvoice[i]
        Type[i] = argmax(V[i,:])
        Pmax[i] = V[i,Type[i]]

        noise2[i] = 50 * mean(exp(-4*y))
        Pmusicg[i] = 0

        if noise2[i] > 3.5:
            Pvoiceg[i] = 10
        else:
            Pmusicg[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1))
            Pvoiceg[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1))


        Vg[i,0] = Pmusicg[i]
        Vg[i,1] = Pvoiceg[i]

        Typeg[i] = argmax(Vg[i,:])
        Pmaxg[i] = Vg[i,Typeg[i]]

        Vk = array(sign(zcrval[pfpos[i]:pfpos[i+1]+1]))
        tempV = rmsval[pfpos[i]:pfpos[i+1]+1]
        rr = max(tempV)+0.001
        tempV1 = tempV/rr
        Vk0 = Vk.copy()

        for j in range(0, len(Vk)):
            if (tempV1[j] < 0.1 and tempV[j] < 1.5) or tempV[j] < 1:
                Vk[j] = 0
             #   print "Changing %d" % j
        Vk1 = Vk.copy()

        for j in range(0, len(Vk)):
            if tempV1[j] < 0.4 or tempV[j] < 2 or tempV[j]  < mean(tempV):
               # print "Changing %d" % j

                Vk1[j] = 0

        Zca = Vk1*(zcrval[pfpos[i]:pfpos[i+1]+1])
        Pol = (ones((len(Vk),)) - sign(Vk))*Vk0

        VZC = Pol*zcrval[pfpos[i]:pfpos[i+1]+1]
        dVk = abs(Vk[2:len(Vk)] - Vk[1:len(Vk)-1])

        Freq[i, 0] = 50*sum(dVk)/(2*len(Vk))
        Freq[i, 1] = len(Vk)/50
        Freq[i, 2] = sum(Freq[:,2])
        Freq[i, 3] = zpos[i]
        Freq[i, 4] = noise2[i]
        Freq[i, 5] = max(Zca)

        Pmusicg[i] = 0
        Pvoiceg[i] = 0

        if Freq[i,0] < 0.59 and Freq[i,1] > 2.5:
            Pmusicg[i] = 10
        else:
            Pmusicg[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1))
            Pvoiceg[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1))

            if x > 3:
                Pvoiceg[i] = Pvoiceg[i] + 0.1

            if x > 300:
                Pmusicg[i] = 9


            if noise2[i] > 3.5:
                Pvoiceg[i] = 11

            if max(Zca) > 280:
                Pmusicg[i] = 11


            if Freq[i,0] > 4.62  and Freq[i,1] > 2.5 and (noise2[i] > 0.1 or zpos[i] > 0.05 ):
                Pvoiceg[i] = 12


            if zpos[i] > 0.15:
                Pvoiceg[i] = 13


        Vg[i,0] = Pmusicg[i]
        Vg[i,1] = Pvoiceg[i]

        Typeg[i] = argmax(Vg[i,:])
        Pmaxg[i] = Vg[i, Typeg[i]]

        Fsp1.extend(Vk)

        if (Pspace[i] > 4 and noise2[i] < 1) or (Pspace[i] > 4.5):
            Type[i] = 2
            Typeg[i] = 2


        if (pfpos[i+1] - pfpos[i])/50 < 0.5 and i >= 1:
            Type[i] = Type[i-1]


    maxpos = pfpos[-1]

    tt = zeros((maxpos,))
    pt = zeros((maxpos,))
    pm = zeros((maxpos,))
    pv = zeros((maxpos,))
    ps = zeros((maxpos,))

    for i in range(0, L):
        tt[pfpos[i]:pfpos[i+1]] = Typeg[i]
        pt[pfpos[i]:pfpos[i+1]] = Pmaxg[i]
        pm[pfpos[i]:pfpos[i+1]] = Pmusicg[i]
        pv[pfpos[i]:pfpos[i+1]] = Pvoiceg[i]
        ps[pfpos[i]:pfpos[i+1]] = Pspace[i]


    for k in range(0, len(rmsval)):
        if rmsval[k] < 2 or rmsval[k] < 0.1*max(rmsval):
            zcrval[k] = 0


    classes = []
    positions = []

    class_ = 5

    for j in range(0, len(tt)):
        if class_ != tt[j]:
            class_ = tt[j]
            classes.append(int(class_))
            positions.append(int(j*0.02*SR))


    return positions, classes


if __name__=="__main__":
    if len(argv) != 3:
        print("Incorrect number of arguments:")
        print("Usage: ")
        print("%s <input>")
        print("")
        print("Arguments:")
        print("<input>\tThe input filename. Can be .wav, .mp3, etc...")
        sys.exit(-1)
    else:
        print("[II] Panagiotakis and Tziritas RMS/ZC Speech/Music/Silence Discriminator")
        print("[II] Loading libraries")

        import essentia
        from essentia import Pool
        from essentia.standard import  *
        import csv
        import yaml

        # reqyures matplotlib
        from pylab import *

        #requires numpy
        from numpy import *

        #requires scikit-learn
        from sklearn.metrics import pairwise_distances

        d = {}
        v = {}

        fname = argv[1]
        outfname = argv[2]


        trackname = fname.split('.')[0].split('/')[-1]


        if outfname.partition('.')[-1].lower() not in ['json', 'yaml']:
            print("Please choose a .json or .yaml as an output file.")
            sys.exit(-1)
        else:
            if outfname.partition('.')[-1].lower() == 'json':
                output = YamlOutput(filename = outfname, format='json')
            else:
                output = YamlOutput(filename = outfname, format='yaml')

        print("Feature extraction of `%s\'" % fname)

        # Sampling Rate
        SR = 21000.0

        # Sampling Frequency
        T = 1.0/SR

        # SegmentSize
        tsegmentSize = 1000 #ms
        segmentSize = int(ceil(tsegmentSize*SR/1000)) if mod(ceil(tsegmentSize*SR/1000),2) == 0 \
                                                  else int(floor(tsegmentSize*SR/1000))


        # FrameSize
        tframeSize = 20 #ms
        frameSize = int(ceil(tframeSize*SR/1000)) if mod(ceil(tframeSize*SR/1000),2) == 0 \
                                                  else int(floor(tframeSize*SR/1000))

        # HopSize
        hopSize = frameSize

        # Load Audio
        audio = MonoLoader(filename = fname, sampleRate=SR)()


#        # Zero Crossing Rate
#        zcr = ZeroCrossingRate()
#
#        # RMS
#        rms = RMS()
#
#        # Segmentation based on 1 second segmentation
#        print("[II] Calculating features for %s, please wait..." % fname)
#
#        rmsval = []
#        zcrval = []
#        rmszcrval = []
#        meanrms = []
#        varrms = []
#        meanzcr = []
#        varzcr = []
#        histograms = []
#
#
#
#        # Parameters for the chi square distribution
#        alphas = []
#        betas = []
#        L = int(len(audio)/SR)
#        for n in range(0, L):
#            segment = audio[n*SR:(n+1)*SR]
#            framerms = []
#            framezcr = []
#           # for frame in FrameGenerator(segment, frameSize = frameSize, hopSize = frameSize):
#            for k in range(0, 50):
#                frame = segment[0.02*SR*k:0.02*SR*(k+1)]
#
#                rmsv = sqrt(sum(frame**2))
#                framerms.append(rmsv)
#                f1 = frame[0:-1]
#                f2 = frame[1:]
#                mm1 = f1*f2
#                zc = 0.5*sum((abs(sign(mm1))-sign(mm1)))
#
#                rmsval.append(rmsv)
#                zcrval.append(zc)
#                rmszcrval.append(rmsv*zc)
#                framezcr.append(zc)
#
#            meanrms.append(mean(framerms))
#            meanzcr.append(mean(framezcr))
#
#            mu = mean(framerms)
#            sigmasq = var(framerms, ddof=1)
#
#            beta = sigmasq/mu
#            alpha = (mu/beta) - 1
#
#            if mu != 0 and sigmasq != 0:
#                betas.append(beta)
#                alphas.append(alpha)
#            else:
#                betas.append(1000000)
#                alphas.append(-1)
#
#            histograms.append(histogram(framerms, 256))
#
#            varrms.append(var(framerms, ddof=1))
#            varzcr.append(var(framerms, ddof=1))
#
#
#        # Computation of KVRMS
#
#        l = len(rmsval)
#        ftm = [mean(rmsval[0:50])]
#        ftv = [var(rmsval[0:50], ddof=1)]
#        kvrms = [0]
#
#        for i in range(1,l-50):
#            ftm_ = ftm[i-1]+(rmsval[i+49] - rmsval[i-1])/50
#            ftv_ = ftv[i-1]+ftm[i-1]**2 + ((rmsval[i+49]**2-rmsval[i-1]**2)/50) - ftm_**2
#            ftm.append(ftm_)
#            ftv.append(ftv_)
#
#            if ftm_ != 0:
#                kvrms.append(ftv_/ftm_**2)
#            else:
#                kvrms.append(0)
#
#
#        m = mean(kvrms)
#        v = var(kvrms, ddof=1)
#
#        b = v/m
#        a = m/b - 1
#
#        # Computing Similarity
#
#        sim = []
#        from scipy.special import gamma
#
#        for i in range(1,L):
#            k = (alphas[i]+alphas[i-1]+2)/2
#            sim_ = ((2/(betas[i-1]+betas[i]))**k)*(betas[i-1]**((alphas[i]+1)/2))*(betas[i]**((alphas[i-1]+1)/2))*gamma(k)/(sqrt(gamma(alphas[i-1]+1)*gamma(alphas[i]+1)))
#
#            # 0 If overflow
#            if isinf(sim_) or isnan(sim_):
#                sim_ = 0
#            sim.append(sim_)
#
#        sim2 = []
#
#        for i in range(2,L):
#            k = (alphas[i]+alphas[i-2]+2)/2
#            sim_ = ((2/(betas[i-2]+betas[i]))**k)*(betas[i-2]**((alphas[i]+1)/2))*(betas[i]**((alphas[i-2]+1)/2))*gamma(k)/(sqrt(gamma(alphas[i-2]+1)*gamma(alphas[i]+1)))
#
#            if isinf(sim_) or isnan(sim_):
#                sim_ = 0
#            sim2.append(sim_)
#
#
#        p = lambda x, a, b: x**a*exp(-b*x)/b**(a+1)/gamma(a+1)
##
#        rho = lambda alpha1,beta1,alpha2,beta2: gamma((alpha1+alpha2)/2 + 1)/sqrt(gamma(alpha1+1)*gamma(alpha2+1))*2**((alpha1+alpha2)/2+1)*beta1**((alpha2+1)/2)*beta2**((alpha1+1)/2)/(beta1+beta2)**((alpha1+alpha2)/2+1)
#
#
#        P = [0]
#        for i in range(1, L-1):
#            P_ = (1-sim2[i-1])*(1-sim[i-1]+1-sim[i])*(1-sim2[i-1])
#            P_ = (1-sim2[i-1])
#            P.append(P_)
#
#
#        P.append(0)
#        P = array(P)
#
#        M = mean(P)
#        V1 = mean(abs(P[0:L-3]-P[1:L-2]))
#        V2 = mean(abs(P[0:L-4]-P[2:L-2]))
##        V = (V1+V2)*0.25;
#        V = 0.25*median(abs((P[0:L-4]-P[2:L-2])))
#
#        Pd = [P[0],P[1]]
#
#
#        for i in range(2,L-2):
#            m = mean(concatenate((P[i-2:i],P[i+1:i+3])))
#            m1 = max(concatenate((P[i-2:i],P[i+1:i+3])))
#
#            if m<0.1*M:
#                m1 = 0.1*M
#
#            d = (P[i]-m)/m1
#
#            if d < 0:
#                d = 0
#            Pd.append(P[i]*d/V)
#
#    Pd.append(P[L-2])
#    Pd.append(P[L-1])
#
#
#    # Selection of candidate windows where there is a change
#
#    j = 0
#    pos = []
#    for i in range(2, L-3):
#        if Pd[i] > 5 and P[i] > P[i+1] and P[i] > P[i-1]:
#            j += 1
#            pos.append(i)
#
#    if len(pos) == 0:
#        sys.exit(0)
#
#
#    # Computation of Change with 20 ms accuracy
#
#    k2 = 0
#    c = 25
#
#    Sm = zeros((2*c+51,))
#    posms = zeros((len(pos),))
#    msec = zeros((len(pos),))
#    mpos = zeros((len(pos), ))
#
#    for k1 in range(0,len(pos)):
#        i = 50*(pos[k1] - 1); # ms
#        for j in range(-c,50+c+1):
#            if ftv[i+j] != 0 and ftm[i+j] != 0:
#                b1 = ftv[i+j]/ftm[i+j]
#                a1 = ftm[i+j]/b1 - 1
#            else:
#                b1 = 1000
#                a1 = -1
#
#            if ftv[i+j+50-1] != 0 and ftm[i+j+50-1] != 0:
#                b2 = ftv[i+j+50-1]/ftm[i+j+50-1]
#                a2 = ftm[i+j+50-1]/b2 - 1
#            else:
#                b2 = 1000
#                a2 = -1
#
#            k = (a2+a1+2)/2
#            Sm[j+c] = ((2/(b1+b2))**k)*(b1**((a2+1)/2))*(b2**((a1+1)/2))*gamma(k)/(sqrt(gamma(a1+1)*gamma(a2+1)))
#
#            if isnan(Sm[j+c]) or isinf(Sm[j+c]):
#                Sm[j+c] = 0
#
#            posms[k1] = argmin(Sm)
#            x = Sm[posms[k1]]
#
#            h = 1 - Sm
#
#        if mean(h[max(posms[k1] - 25,1):min(2*c+51,posms[k1]+25)]) > 0.1:
#            msec[k2] = posms[k1]-1-c
#            mpos[k2] = pos[k1]
#            k2 = k2+1
#        else:
#            msec[k2] = 0
#            mpos[k2] = 0
#
#
#    posms = msec
#    pos = mpos
#
#    fpos = pos+0.02*posms
#
#    pfpos = floor(50*pos+posms);
#
#    bvoice = 0.14339738781836;
#    bmusic = 0.04399754659151;
#    amusic = 1.66349817725076;
#    avoice = 2.32677887950291;
#
#    pfpos = concatenate((array([0]), pfpos, array([len(kvrms)-1])))
#
#    L = len(pfpos)-1
#    V = zeros((L,2))
#    Vg = zeros((L,2))
#
#    Pzero = []
#    Fsp1 = []
#    Type = zeros((L,))
#    Typeg = zeros((L,))
#
#
#    # Classification for each segment
#
#    noise = zeros((L,))
#    noise2 = zeros((L,))
#
#    pfpos = pfpos.astype(int)
#    Pmusicg = zeros((L,))
#    Pvoiceg = zeros((L,))
#    Pmusic = zeros((L,))
#    Pvoice = zeros((L,))
#    Pspace = zeros((L,))
#    zpos = zeros((L,))
#    Pmax = zeros((L,))
#    Pmaxg = zeros((L,))
#    Freq = zeros((L,6))
#
#    zcrval = array(zcrval)
#    rmsval = array(rmsval)
#    rmszcrval = array(rmszcrval)
#    i
#    for i in range(0, L):
#        d = 0
#
#        x1 = mean(kvrms[int(pfpos[i]):int(pfpos[i+1])+1])
#        x = x1
#        y = rmszcrval[int(pfpos[i])+d:int(pfpos[i+1])-d+1]
#        y = y/(2*max(rmsval[pfpos[i]+d:pfpos[i+1]-d+1]) - min(rmsval[pfpos[i]+d:pfpos[i+1]-d+1]) - median(rmsval[pfpos[i]+d:pfpos[i+1]-d+1]))
#        noise[i] = 50*mean(exp(-y))
#
#        Pmusic[i]  = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1))
#        Pvoice[i]  = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1))
#
#        sm = 0.7*median(ftm[pfpos[i]:pfpos[i+1]+1])+0.3*mean(ftm[pfpos[i]:pfpos[i+1]+1])
#
#        Pspace[i] = 6*exp((-sm**2)/(2*0.6**2))
#
#        zpos[i] = sum(exp(-10*zcrval[pfpos[i]+d:pfpos[i+1]-d+1]))/float(len(zcrval[pfpos[i]+d:pfpos[i+1]-d+1]))
#
#        if zpos[i] > 0.08 or x > 3:
#            Pvoice[i] = 10
#        else:
#            Pmusic[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1))
#
#        V[i,0] = Pmusic[i]
#        V[i,1] = Pvoice[i]
#        Type[i] = argmax(V[i,:])
#        Pmax[i] = V[i,Type[i]]
#
#        noise2[i] = 50 * mean(exp(-4*y))
#        Pmusicg[i] = 0
#
#        if noise2[i] > 3.5:
#            Pvoiceg[i] = 10
#        else:
#            Pmusicg[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1))
#            Pvoiceg[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1))
#
#
#        Vg[i,0] = Pmusicg[i]
#        Vg[i,1] = Pvoiceg[i]
#
#        Typeg[i] = argmax(Vg[i,:])
#        Pmaxg[i] = Vg[i,Typeg[i]]
#
#        Vk = array(sign(zcrval[pfpos[i]:pfpos[i+1]+1]))
#        tempV = rmsval[pfpos[i]:pfpos[i+1]+1]
#        rr = max(tempV)+0.001
#        tempV1 = tempV/rr
#        Vk0 = Vk.copy()
#
#        for j in range(0, len(Vk)):
#            if (tempV1[j] < 0.1 and tempV[j] < 1.5) or tempV[j] < 1:
#                Vk[j] = 0
#             #   print "Changing %d" % j
#        Vk1 = Vk.copy()
#
#        for j in range(0, len(Vk)):
#            if tempV1[j] < 0.4 or tempV[j] < 2 or tempV[j]  < mean(tempV):
#               # print "Changing %d" % j
#
#                Vk1[j] = 0
#
#        Zca = Vk1*(zcrval[pfpos[i]:pfpos[i+1]+1])
#        Pol = (ones((len(Vk),)) - sign(Vk))*Vk0
#
#        VZC = Pol*zcrval[pfpos[i]:pfpos[i+1]+1]
#        dVk = abs(Vk[2:len(Vk)] - Vk[1:len(Vk)-1])
#
#        Freq[i, 0] = 50*sum(dVk)/(2*len(Vk))
#        Freq[i, 1] = len(Vk)/50
#        Freq[i, 2] = sum(Freq[:,2])
#        Freq[i, 3] = zpos[i]
#        Freq[i, 4] = noise2[i]
#        Freq[i, 5] = max(Zca)
#
#        Pmusicg[i] = 0
#        Pvoiceg[i] = 0
#
#        if Freq[i,0] < 0.59 and Freq[i,1] > 2.5:
#            Pmusicg[i] = 10
#        else:
#            Pmusicg[i] = (x**amusic)*exp(-x/bmusic)/(gamma(amusic+1)*bmusic**(amusic+1))
#            Pvoiceg[i] = 0.5*(x**avoice)*exp(-x/bvoice)/(gamma(avoice+1)*bvoice**(avoice+1))
#
#            if x > 3:
#                Pvoiceg[i] = Pvoiceg[i] + 0.1
#
#            if x > 300:
#                Pmusicg[i] = 9
#
#
#            if noise2[i] > 3.5:
#                Pvoiceg[i] = 11
#
#            if max(Zca) > 280:
#                Pmusicg[i] = 11
#
#
#            if Freq[i,0] > 4.62  and Freq[i,1] > 2.5 and (noise2[i] > 0.1 or zpos[i] > 0.05 ):
#                Pvoiceg[i] = 12
#
#
#            if zpos[i] > 0.15:
#                Pvoiceg[i] = 13
#
#
#        Vg[i,0] = Pmusicg[i]
#        Vg[i,1] = Pvoiceg[i]
#
#        Typeg[i] = argmax(Vg[i,:])
#        Pmaxg[i] = Vg[i, Typeg[i]]
#
#        Fsp1.extend(Vk)
#
#        if (Pspace[i] > 4 and noise2[i] < 1) or (Pspace[i] > 4.5):
#            Type[i] = 2
#            Typeg[i] = 2
#
#
#        if (pfpos[i+1] - pfpos[i])/50 < 0.5 and i >= 1:
#            Type[i] = Type[i-1]
#
#
#
#    maxpos = pfpos[-1]
#
#    tt = zeros((maxpos,))
#    pt = zeros((maxpos,))
#    pm = zeros((maxpos,))
#    pv = zeros((maxpos,))
#    ps = zeros((maxpos,))
#
#    for i in range(0, L):
#        tt[pfpos[i]:pfpos[i+1]] = Typeg[i]
#        pt[pfpos[i]:pfpos[i+1]] = Pmaxg[i]
#        pm[pfpos[i]:pfpos[i+1]] = Pmusicg[i]
#        pv[pfpos[i]:pfpos[i+1]] = Pvoiceg[i]
#        ps[pfpos[i]:pfpos[i+1]] = Pspace[i]
#
#
#
#
#
#
#    for k in range(0, len(rmsval)):
#        if rmsval[k] < 2 or rmsval[k] < 0.1*max(rmsval):
#            zcrval[k] = 0
#
#
#    classes = []
#    positions = []
#
#    class_ = 5
#
#    for j in range(0, len(tt)):
#        if class_ != tt[j]:
#            class_ = tt[j]
#            classes.append(int(class_))
#            positions.append(int(j*0.02*SR))
#
#    classvec = zeros((len(audio),))
#
#    if len(positions) == 1:
#        classvec = classes[0]*ones((len(audio),))
#    for j in range(1, len(positions)):
#        classvec[positions[j-1]:positions[j]] = classes[j-1]
#
#
#
#
#
#
#
#
#
#
#

#


    positions, classes = trmszc(audio, SR)

    classvec = zeros((len(audio),))

    if len(positions) == 1:
        classvec = classes[0]*ones((len(audio),))
    for j in range(1, len(positions)):
        classvec[positions[j-1]:positions[j]] = classes[j-1]

    plot(audio)
    plot(classvec)
author	Emmanouil Theofanis Chourdakis <e.t.chourdakis@qmul.ac.uk>
date	Sat, 30 Sep 2017 13:25:50 +0100
parents	246d5546657c
children