view experiment-reverb/code/selfsim.py @ 2:c87a9505f294 tip

Added LICENSE for code, removed .wav files
author Emmanouil Theofanis Chourdakis <e.t.chourdakis@qmul.ac.uk>
date Sat, 30 Sep 2017 13:25:50 +0100
parents 246d5546657c
children
line wrap: on
line source
# -*- coding: utf-8 -*-
"""
Created on Mon Jun  8 11:19:15 2015

@author: mmxgn
"""
# Codes taken from: https://github.com/urinieto/msaf/blob/master/msaf/algorithms/foote/segmenter.py



if __name__=="__main__":
    from sys import argv
    if len(argv) != 3:
        print("Incorrect number of arguments:")
        print("Usage: ")
        print("%s <input>")
        print("")
        print("Arguments:")
        print("<input>\tThe input filename. Can be .wav, .mp3, etc...")     
        print("<output_folder>\tThe output folders. Segments will be stored under names 'name_segN'")
        sys.exit(-1)
    else:
        print("[II] Applying the method found in: ")
        print("[II] Automatic Audio Segmentation using a measure of Audio Novelty")
        print("[II] - Jonathar Foote ")
        print("[II] Loading libraries")
        
        import essentia
        from essentia import Pool
        from essentia.standard import  *
        import csv
        import yaml
        
        # reqyures matplotlib
        from pylab import *
        
        #requires numpy
        from numpy import *
        
        import wave
       
        
        from scipy.spatial import distance
        from scipy.ndimage import filters
        d = {}
        v = {}
        
        fname = argv[1]        
        outfdir = argv[2]
        
        print "[II] Using filename: %s" % fname
        print "[II] Using output folder: %s" % outfdir
        
        name = fname.split('.')[-2].split('/')[-1]
        
        print "[II] Segments will be saved in the form '%s/%s_segN.mp3'" % (outfdir, name)


        trackname = fname.split('.')[0].split('/')[-1]  

        
#        if outfname.partition('.')[-1].lower() not in ['json', 'yaml']:
#            print("Please choose a .json or .yaml as an output file.")
#            sys.exit(-1)
#        else:
#            if outfname.partition('.')[-1].lower() == 'json':
#                output = YamlOutput(filename = outfname, format='json') 
#            else:
#                output = YamlOutput(filename = outfname, format='yaml')
        
        print("Feature extraction of `%s\'" % fname)

        # Sampling Rate
        SR = 21000.0
        
        
        # Audio Loader
        loader = MonoLoader(filename = fname, sampleRate=SR)
        
        # Lowpass audio         
        lp = LowPass(cutoffFrequency=SR/4, sampleRate=SR)
        
        # Audio
        audio = lp(loader())
        
        
        
        # For MFCCs
        
        w_hanning = Windowing(type = "hann")
        spectrum = Spectrum()
        mfcc = MFCC()
        
        
        frameSize = int(0.2 * SR) # Change this depending whether it's music or sound
        
        pool = essentia.Pool()
        
        
        
        for frame in FrameGenerator(audio, frameSize = frameSize, hopSize = frameSize/2):
            mfcc_bands, mfcc_coeffs = mfcc(spectrum(w_hanning(frame)))
            pool.add("lowlevel.mfcc_selfsim", mfcc_coeffs)
            
        mfcc_coeffs = pool['lowlevel.mfcc_selfsim']
        
      #  selfsim = 1 - pairwise_distances(mfcc_coeffs)#, metric = "cosine")
        selfsim = distance.pdist(mfcc_coeffs, metric='seuclidean')
        selfsim = distance.squareform(selfsim)
        selfsim /= selfsim.max()
        selfsim = 1 - selfsim
        # Calculating cosine distances as a better metric
        
        C = array([[1,-1],[-1,1]])        
        
        def Novelty(S, C = array([[1, -1],[-1, 1]])):
            L = C.shape[0]
            
            horconcat = concatenate((S[:, 0:L/2], S, S[:,-L/2:]), axis=1)
            verconcat = concatenate((horconcat[0:L/2,:], horconcat, horconcat[-L/2:,:]), axis=0)
            
                    
            N = zeros((S.shape[0],))
            
            for i in range(0, len(N)):
                S_ = 0
                for m in range(-L/2, L/2):
                    for n in range(-L/2, L/2):
                       # print (m,n), (L/2+m, L/2+n)
                        S_ += C[L/2+m, L/2+n]*verconcat[i+m+L/2, i+n-L/2]     
                       # S_ += verconcat[i+m+L/2, i+m-L/2]
                        
              #  print S_        
                N[i] = S_
                
            return N
                
        def novel(S, C = array([[1, -1], [-1, 1]])):
            N = S.shape[0]
            M = C.shape[0]
            
            novelty = zeros(N)
            
            for i in xrange(M/2, N-M/2+1):
                novelty[i] = sum(S[i-M/2:i+M/2,i-M/2:i+M/2] * C)
                
            novelty += novelty.min()
            novelty /= novelty.max()
            
            return novelty
            
            
                    
        def pick_peaks(nc, L=32):
            # Codes taken from: https://github.com/urinieto/msaf/blob/master/msaf/algorithms/foote/segmenter.py
            
            """Obtain peaks from a novelty curve using an adaptive threshold."""
            offset = nc.mean() / 20.
        
            nc = filters.gaussian_filter1d(nc, sigma=4)  # Smooth out nc
        
            th = filters.median_filter(nc, size=L) + offset
            #th = filters.gaussian_filter(nc, sigma=L/2., mode="nearest") + offset
        
            peaks = []
            for i in xrange(1, nc.shape[0] - 1):
                # is it a peak?
                if nc[i - 1] < nc[i] and nc[i] > nc[i + 1]:
                    # is it above the threshold?
                    if nc[i] > th[i]:
                        peaks.append(i)
            #plt.plot(nc)
            #plt.plot(th)
            #for peak in peaks:
                #plt.axvline(peak)
            #plt.show()
        
            return peaks

        from scipy import signal
        def compute_gaussian_krnl(M):
            """Creates a gaussian kernel following Foote's paper."""
            g = signal.gaussian(M, M / 3., sym=True)
            G = np.dot(g.reshape(-1, 1), g.reshape(1, -1))
            G[M / 2:, :M / 2] = -G[M / 2:, :M / 2]
            G[:M / 2, M / 2:] = -G[:M / 2, M / 2:]
            return G        
            
        K = compute_gaussian_krnl(96)
        def kernelMatrix(L):
            k1 = concatenate((ones((L/2,L/2)), -1*ones((L/2,L/2))))
            k1 = concatenate((k1,-k1),axis=1)
            return k1
            
        N = novel(selfsim, K)
        peaks = pick_peaks(N)

        boundaries = array(peaks)*frameSize/2 
        
        sampleRate = SR
        
        audio = MonoLoader(filename=fname, sampleRate = sampleRate)()
        
        from scipy.io.wavfile import write as wavwrite 
        
        for b in range(1, len(boundaries)):
            outname = '%s/%s_seg%d.wav' % (outfdir, name, b)
            segment = audio[boundaries[b-1]:boundaries[b]]
            if len(segment) >= 5*SR:
                #audioout = MonoWriter(sampleRate = SR, filename=outname)
                #audioout(segment)
                
                wavwrite(outname, SR, segment)
                print "[II] Saving %s" % outname