diff experiment-reverb/code/features_extract.py @ 0:246d5546657c

initial commit, needs cleanup
author Emmanouil Theofanis Chourdakis <e.t.chourdakis@qmul.ac.uk>
date Wed, 14 Dec 2016 13:15:48 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/experiment-reverb/code/features_extract.py	Wed Dec 14 13:15:48 2016 +0000
@@ -0,0 +1,231 @@
+#!/usr/bin/python2
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Apr 17 10:32:20 2015
+
+@author: Emmanouil Chourdakis
+"""
+
+# Note, reference everything!
+
+from sys import argv
+import sys
+
+
+if __name__=="__main__":
+    if len(argv) != 2:
+        print("Incorrect number of arguments:")
+        print("Usage: ")
+        print("%s <trackdir>")
+        print("")
+        print("Arguments:")
+        print("<trackdir>\tThe directory containing the tracks, features will be stored in the same directory as .yaml files")
+      
+        sys.exit(-1)
+    else:
+        
+        print("[II] Loading libraries")
+        
+        import essentia
+        from essentia import Pool
+        from essentia.standard import  *
+        import csv
+        import yaml
+        
+
+
+                
+        
+        # reqyures matplotlib
+        from pylab import *
+        
+        #requires numpy
+        from numpy import *
+        
+        #requires scikit-learn
+        from sklearn.metrics import pairwise_distances        
+        
+        # For searching the directory
+        from glob import glob
+        
+        traindir = argv[1]
+        
+        songs_in_dir = glob("%s/*.wav" % traindir)
+        
+        print "[II] Using files: %s" % songs_in_dir
+
+        
+        for f_ in songs_in_dir:
+            d = {}
+            v = {}
+            
+            fname = f_
+            
+            
+            outfname = "%s_features.yaml" % f_.split('.')[0]
+            
+            print "[II] Using: %s" % f_
+            print "[II] and output: %s" % outfname
+            
+
+            
+            if outfname.partition('.')[-1].lower() not in ['json', 'yaml']:
+                print("Please choose a .json or .yaml as an output file.")
+                sys.exit(-1)
+            else:
+                if outfname.partition('.')[-1].lower() == 'json':
+                    output = YamlOutput(filename = outfname, format='json') 
+                else:
+                    output = YamlOutput(filename = outfname, format='yaml')
+
+            print("[II] Feature extraction of `%s\'" % fname)
+    
+            # Sampling Rate
+            SR = 16000.0
+            
+            # Sampling Frequency
+            T = 1.0/SR
+    
+            # FrameSize
+            tframeSize = 23 #ms    
+            frameSize = int(ceil(tframeSize*SR/1000)) if mod(ceil(tframeSize*SR/1000),2) == 0 \
+                                                      else int(floor(tframeSize*SR/1000))
+                                                      
+            # HopSize
+            hopSize = frameSize/2
+            
+            # Load Audio
+            audio = MonoLoader(filename = fname, sampleRate=16000)()
+            
+                            
+            #Window Frames
+            w = Windowing(size = frameSize, type = 'hamming')    
+            
+            # Spectrum
+            spec = Spectrum(size=1024)
+            
+            # Pool to append mean and variance
+            pool = Pool()
+            globalPool = Pool()
+            
+            # Below are Features to be used in the feature extraction stage
+            # We use, Spectral Contrast, MFCCs, Zero-Crossing rate, RMS, 
+            # Crest Factor, Spectral Centroid, Spectral Occupation, Spectral Flux
+            
+            # Spectral Contrast
+            sc =  SpectralContrast(frameSize = frameSize, highFrequencyBound = 8000, sampleRate = SR)        
+            
+            # MFCCs
+            mfccs = MFCC(highFrequencyBound = 8000, sampleRate = SR)
+    
+            # Spectral Centroid
+            centroid = Centroid(range = SR/2)
+            
+            # Spectral Roll-Off
+            rolloff = RollOff(sampleRate = SR, cutoff = 0.9)
+            
+            # Spectral Flux
+            flux = Flux()
+            
+            # Zero Crossing Rate
+            zcr = ZeroCrossingRate()        
+    
+            # RMS 
+            rms = RMS()
+            
+            # Crest Factor
+            crest = Crest()
+    
+            
+    
+            
+            # Segmentation based on Onset detection-based temporal modeling
+            print("[II] Calculating features for %s, please wait..." % fname)        
+            # Onset Detection
+            
+            print("[II] Splitting to onsets...")
+        
+            onsetdetection = OnsetDetectionGlobal(frameSize = frameSize, hopSize = hopSize, sampleRate = SR)(audio)
+            onsets = Onsets()(essentia.array([onsetdetection]), [1])
+    
+
+            
+            print("[II] done, extracting features...")
+            for o in range(0, len(onsets)-1): 
+                IOI = audio[onsets[o]*SR:onsets[o+1]*SR]
+                
+                
+        
+                
+                if len(IOI) == 0:
+                    break;
+                    
+                
+                for frame in FrameGenerator(IOI, frameSize, hopSize):
+                    # Temporal Features
+                
+                    zerocrossingrate = zcr(frame)              
+                    rmsvalues = rms(frame)
+                    
+                    # Spectral features
+                    framespectrum = spec(w(frame))
+                    framecontrast = sc(framespectrum)
+                    mfcc_coeffs = mfccs(framespectrum)[1]
+                    spectralcentroid = centroid(framespectrum)
+                    spectralrolloff = rolloff(framespectrum)
+                    spectralflux = rolloff(framespectrum)
+                                    
+                    
+                    
+                    pool.add('lowlevel.zcr', zerocrossingrate)
+                    pool.add('lowlevel.rms', rmsvalues)
+                    pool.add('lowlevel.spectrum.centroid', spectralcentroid)
+                    pool.add('lowlevel.spectrum.rolloff', spectralrolloff)
+                    pool.add('lowlevel.mfcc.coeffs', mfcc_coeffs)
+                    pool.add('lowlevel.spectrum.magnitude', framespectrum)
+                    pool.add('lowlevel.contrast.contrast',  framecontrast[0])  
+                    pool.add('lowlevel.contrast.valleys',  framecontrast[1])            
+                    pool.add('lowlevel.spectrum.flux', spectralflux)
+                    
+                    
+                
+                spectrumfull = pool['lowlevel.spectrum.magnitude']
+                spectralcontrast = pool['lowlevel.contrast.contrast']
+                spectralvalleys = pool['lowlevel.contrast.valleys']
+                spectralcentroidfeature = pool['lowlevel.spectrum.centroid']
+                spectralrollofffeature = pool['lowlevel.spectrum.rolloff']
+                spectralfluxfeature = pool['lowlevel.spectrum.flux']
+                
+                spectralfeature = concatenate((spectralcontrast,spectralvalleys),1)
+                mfccfeature = pool['lowlevel.mfcc.coeffs']
+                zcrfeature = pool['lowlevel.zcr']
+                rmsfeature = pool['lowlevel.rms']
+                crestfeature = crest(rmsfeature)
+                
+                
+                meanspectralfeature = mean(spectralfeature, 0)
+                for i in range(0, shape(spectralfeature)[1]):
+                    globalPool.add('spectralcontrast_%d' % i , meanspectralfeature[i])
+                globalPool.add('spectralcentroid', mean(spectralcentroidfeature, 0))
+                globalPool.add('spectralrolloff', mean(spectralrollofffeature, 0))
+                globalPool.add('spectralflux', mean(spectralfluxfeature, 0))
+
+                # Expand mfccs
+                meanmfcc = mean(mfccfeature, 0)
+                for i in range(0, shape(mfccfeature)[1]):
+                    globalPool.add('mfcc_%d' % i, meanmfcc[i])
+                    
+                
+                globalPool.add('zcr', mean(zcrfeature, 0))
+                globalPool.add('rms', mean(rmsfeature, 0))
+                globalPool.add('crest', crestfeature)
+                
+                pool.clear()
+    
+            print("[II] done.")  
+            
+            
+            print("[II] Saving data to %s:" % outfname)
+            globalPool.add("metadata.filename", fname)
+            output(globalPool)
+