view experiment-reverb/code/segmentation2.py @ 2:c87a9505f294 tip

Added LICENSE for code, removed .wav files
author Emmanouil Theofanis Chourdakis <e.t.chourdakis@qmul.ac.uk>
date Sat, 30 Sep 2017 13:25:50 +0100
parents 246d5546657c
children
line wrap: on
line source
# -*- coding: utf-8 -*-
"""
Created on Mon Jun  1 11:42:06 2015

@author: Emmanouil Theofanis Chourdakis
"""

# Note, reference everything!

from sys import argv


if __name__=="__main__":
    if len(argv) != 3:
        print("Incorrect number of arguments:")
        print("Usage: ")
        print("%s <input> <output>")
        print("")
        print("Arguments:")
        print("<input>\tThe input filename. Can be .wav, .mp3, etc...")
        print("<parameters>\t The parameters filename, in .yaml format" )
        print("<output>\tThe output filename in .yaml format")        
        sys.exit(-1)
    else:
        
        print("[II] Loading libraries")
        
        import essentia
        from essentia import Pool
        from essentia.standard import  *
        import yaml
        
        
        # reqyures matplotlib
        from pylab import *
        
        #requires numpy
        from numpy import *
        
        #requires scikit-learn
        from sklearn.metrics import pairwise_distances        
        
        d = {}
        v = {}
        
        fname = argv[1]
        outfname = argv[2]
        


        trackname = fname.split('.')[0].split('/')[-1]  

        
        if outfname.partition('.')[-1].lower() not in ['json', 'yaml']:
            print("Please choose a .json or .yaml as an output file.")
            sys.exit(-1)
        else:
            if outfname.partition('.')[-1].lower() == 'json':
                output = YamlOutput(filename = outfname, format='json') 
            else:
                output = YamlOutput(filename = outfname, format='yaml')
        
        print("Feature extraction of `%s\'" % fname)

        # Sampling Rate
        SR = 16000.0
        
        # Sampling Frequency
        T = 1.0/SR

        # FrameSize
        tframeSize = 23 #ms    
        frameSize = int(ceil(tframeSize*SR/1000)) if mod(ceil(tframeSize*SR/1000),2) == 0 \
                                                  else int(floor(tframeSize*SR/1000))
                                                  
        # HopSize
        hopSize = frameSize/2
        
        # Load Audio
        audio = MonoLoader(filename = fname, sampleRate=SR)()
        
        
        #Window Frames
        w = Windowing(size = frameSize, type = 'hamming')    
        
        # Spectrum
        spec = Spectrum(size=1024)
        
        # Pool to append mean and variance
        pool = Pool()
        globalPool = Pool()
        
        # Below are Features to be used in the feature extraction stage
        # We use, Spectral Contrast, MFCCs, Zero-Crossing rate, RMS, 
        # Crest Factor, Spectral Centroid, Spectral Occupation, Spectral Flux
        
        # Spectral Contrast
        sc =  SpectralContrast(frameSize = frameSize, highFrequencyBound = 8000, sampleRate = SR)        
        
        # MFCCs
        mfccs = MFCC(highFrequencyBound = 8000, sampleRate = SR)

        # Spectral Centroid
        centroid = Centroid(range = SR/2)
        
        # Spectral Roll-Off
        rolloff = RollOff(sampleRate = SR, cutoff = 0.9)
        
        # Spectral Flux
        flux = Flux()
        
        # Zero Crossing Rate
        zcr = ZeroCrossingRate()        

        # RMS 
        rms = RMS()
        
        # Crest Factor
        crest = Crest()

        

        
        # Segmentation based on Onset detection-based temporal modeling
        print("[II] Calculating features for %s, please wait..." % fname)        
        # Onset Detection
        
        print("[II] Splitting to onsets...")
    
        onsetdetection = OnsetDetectionGlobal(frameSize = frameSize, hopSize = hopSize, sampleRate = SR)(audio)
        onsets = Onsets()(essentia.array([onsetdetection]), [1])

            
        
        print("[II] done, extracting features...")
        for o in range(0, len(onsets)-1): 
            IOI = audio[onsets[o]*SR:onsets[o+1]*SR]
            
            
    
            
            if len(IOI) == 0:
                break;
                
            
            for frame in FrameGenerator(IOI, frameSize, hopSize):
                # Temporal Features
            
                zerocrossingrate = zcr(frame)              
                rmsvalues = rms(frame)
                
                # Spectral features
                framespectrum = spec(w(frame))
                framecontrast = sc(framespectrum)
                mfcc_coeffs = mfccs(framespectrum)[1]
                spectralcentroid = centroid(framespectrum)
                spectralrolloff = rolloff(framespectrum)
                spectralflux = rolloff(framespectrum)
                                
                
                
                pool.add('lowlevel.zcr', zerocrossingrate)
                pool.add('lowlevel.rms', rmsvalues)
                pool.add('lowlevel.spectrum.centroid', spectralcentroid)
                pool.add('lowlevel.spectrum.rolloff', spectralrolloff)
                pool.add('lowlevel.mfcc.coeffs', mfcc_coeffs)
                pool.add('lowlevel.spectrum.magnitude', framespectrum)
                pool.add('lowlevel.contrast.contrast',  framecontrast[0])  
                pool.add('lowlevel.contrast.valleys',  framecontrast[1])            
                pool.add('lowlevel.spectrum.flux', spectralflux)
                
                
            
            spectrumfull = pool['lowlevel.spectrum.magnitude']
            spectralcontrast = pool['lowlevel.contrast.contrast']
            spectralvalleys = pool['lowlevel.contrast.valleys']
            spectralcentroidfeature = pool['lowlevel.spectrum.centroid']
            spectralrollofffeature = pool['lowlevel.spectrum.rolloff']
            spectralfluxfeature = pool['lowlevel.spectrum.flux']
            
            spectralfeature = concatenate((spectralcontrast,spectralvalleys),1)
            mfccfeature = pool['lowlevel.mfcc.coeffs']
            zcrfeature = pool['lowlevel.zcr']
            rmsfeature = pool['lowlevel.rms']
            crestfeature = crest(rmsfeature)
            1
            
            meanspectralfeature = mean(spectralfeature, 0)
            for i in range(0, shape(spectralfeature)[1]):
                globalPool.add('spectralcontrast_%d' % i , meanspectralfeature[i])
            globalPool.add('spectralcentroid', mean(spectralcentroidfeature, 0))
            globalPool.add('spectralrolloff', mean(spectralrollofffeature, 0))
            globalPool.add('spectralflux', mean(spectralfluxfeature, 0))
            
            # Expand mfccs
            meanmfcc = mean(mfccfeature, 0)
            for i in range(0, shape(mfccfeature)[1]):
                globalPool.add('mfcc_%d' % i, meanmfcc[i])
                
            
            globalPool.add('zcr', mean(zcrfeature, 0))
            globalPool.add('rms', mean(rmsfeature, 0))
            globalPool.add('crest', crestfeature)
            
            pool.clear()

        print("[II] done.")  
        
        
        print("[II] Saving data to %s:" % outfname)
        globalPool.add("metadata.filename", fname)
        output(globalPool)