annotate experiment-reverb/code/features_extract.py @ 2:c87a9505f294 tip

Added LICENSE for code, removed .wav files
author Emmanouil Theofanis Chourdakis <e.t.chourdakis@qmul.ac.uk>
date Sat, 30 Sep 2017 13:25:50 +0100
parents 246d5546657c
children
rev   line source
e@0 1 #!/usr/bin/python2
e@0 2 # -*- coding: utf-8 -*-
e@0 3 """
e@0 4 Created on Fri Apr 17 10:32:20 2015
e@0 5
e@0 6 @author: Emmanouil Chourdakis
e@0 7 """
e@0 8
e@0 9 # Note, reference everything!
e@0 10
e@0 11 from sys import argv
e@0 12 import sys
e@0 13
e@0 14
e@0 15 if __name__=="__main__":
e@0 16 if len(argv) != 2:
e@0 17 print("Incorrect number of arguments:")
e@0 18 print("Usage: ")
e@0 19 print("%s <trackdir>")
e@0 20 print("")
e@0 21 print("Arguments:")
e@0 22 print("<trackdir>\tThe directory containing the tracks, features will be stored in the same directory as .yaml files")
e@0 23
e@0 24 sys.exit(-1)
e@0 25 else:
e@0 26
e@0 27 print("[II] Loading libraries")
e@0 28
e@0 29 import essentia
e@0 30 from essentia import Pool
e@0 31 from essentia.standard import *
e@0 32 import csv
e@0 33 import yaml
e@0 34
e@0 35
e@0 36
e@0 37
e@0 38
e@0 39 # reqyures matplotlib
e@0 40 from pylab import *
e@0 41
e@0 42 #requires numpy
e@0 43 from numpy import *
e@0 44
e@0 45 #requires scikit-learn
e@0 46 from sklearn.metrics import pairwise_distances
e@0 47
e@0 48 # For searching the directory
e@0 49 from glob import glob
e@0 50
e@0 51 traindir = argv[1]
e@0 52
e@0 53 songs_in_dir = glob("%s/*.wav" % traindir)
e@0 54
e@0 55 print "[II] Using files: %s" % songs_in_dir
e@0 56
e@0 57
e@0 58 for f_ in songs_in_dir:
e@0 59 d = {}
e@0 60 v = {}
e@0 61
e@0 62 fname = f_
e@0 63
e@0 64
e@0 65 outfname = "%s_features.yaml" % f_.split('.')[0]
e@0 66
e@0 67 print "[II] Using: %s" % f_
e@0 68 print "[II] and output: %s" % outfname
e@0 69
e@0 70
e@0 71
e@0 72 if outfname.partition('.')[-1].lower() not in ['json', 'yaml']:
e@0 73 print("Please choose a .json or .yaml as an output file.")
e@0 74 sys.exit(-1)
e@0 75 else:
e@0 76 if outfname.partition('.')[-1].lower() == 'json':
e@0 77 output = YamlOutput(filename = outfname, format='json')
e@0 78 else:
e@0 79 output = YamlOutput(filename = outfname, format='yaml')
e@0 80
e@0 81 print("[II] Feature extraction of `%s\'" % fname)
e@0 82
e@0 83 # Sampling Rate
e@0 84 SR = 16000.0
e@0 85
e@0 86 # Sampling Frequency
e@0 87 T = 1.0/SR
e@0 88
e@0 89 # FrameSize
e@0 90 tframeSize = 23 #ms
e@0 91 frameSize = int(ceil(tframeSize*SR/1000)) if mod(ceil(tframeSize*SR/1000),2) == 0 \
e@0 92 else int(floor(tframeSize*SR/1000))
e@0 93
e@0 94 # HopSize
e@0 95 hopSize = frameSize/2
e@0 96
e@0 97 # Load Audio
e@0 98 audio = MonoLoader(filename = fname, sampleRate=16000)()
e@0 99
e@0 100
e@0 101 #Window Frames
e@0 102 w = Windowing(size = frameSize, type = 'hamming')
e@0 103
e@0 104 # Spectrum
e@0 105 spec = Spectrum(size=1024)
e@0 106
e@0 107 # Pool to append mean and variance
e@0 108 pool = Pool()
e@0 109 globalPool = Pool()
e@0 110
e@0 111 # Below are Features to be used in the feature extraction stage
e@0 112 # We use, Spectral Contrast, MFCCs, Zero-Crossing rate, RMS,
e@0 113 # Crest Factor, Spectral Centroid, Spectral Occupation, Spectral Flux
e@0 114
e@0 115 # Spectral Contrast
e@0 116 sc = SpectralContrast(frameSize = frameSize, highFrequencyBound = 8000, sampleRate = SR)
e@0 117
e@0 118 # MFCCs
e@0 119 mfccs = MFCC(highFrequencyBound = 8000, sampleRate = SR)
e@0 120
e@0 121 # Spectral Centroid
e@0 122 centroid = Centroid(range = SR/2)
e@0 123
e@0 124 # Spectral Roll-Off
e@0 125 rolloff = RollOff(sampleRate = SR, cutoff = 0.9)
e@0 126
e@0 127 # Spectral Flux
e@0 128 flux = Flux()
e@0 129
e@0 130 # Zero Crossing Rate
e@0 131 zcr = ZeroCrossingRate()
e@0 132
e@0 133 # RMS
e@0 134 rms = RMS()
e@0 135
e@0 136 # Crest Factor
e@0 137 crest = Crest()
e@0 138
e@0 139
e@0 140
e@0 141
e@0 142 # Segmentation based on Onset detection-based temporal modeling
e@0 143 print("[II] Calculating features for %s, please wait..." % fname)
e@0 144 # Onset Detection
e@0 145
e@0 146 print("[II] Splitting to onsets...")
e@0 147
e@0 148 onsetdetection = OnsetDetectionGlobal(frameSize = frameSize, hopSize = hopSize, sampleRate = SR)(audio)
e@0 149 onsets = Onsets()(essentia.array([onsetdetection]), [1])
e@0 150
e@0 151
e@0 152
e@0 153 print("[II] done, extracting features...")
e@0 154 for o in range(0, len(onsets)-1):
e@0 155 IOI = audio[onsets[o]*SR:onsets[o+1]*SR]
e@0 156
e@0 157
e@0 158
e@0 159
e@0 160 if len(IOI) == 0:
e@0 161 break;
e@0 162
e@0 163
e@0 164 for frame in FrameGenerator(IOI, frameSize, hopSize):
e@0 165 # Temporal Features
e@0 166
e@0 167 zerocrossingrate = zcr(frame)
e@0 168 rmsvalues = rms(frame)
e@0 169
e@0 170 # Spectral features
e@0 171 framespectrum = spec(w(frame))
e@0 172 framecontrast = sc(framespectrum)
e@0 173 mfcc_coeffs = mfccs(framespectrum)[1]
e@0 174 spectralcentroid = centroid(framespectrum)
e@0 175 spectralrolloff = rolloff(framespectrum)
e@0 176 spectralflux = rolloff(framespectrum)
e@0 177
e@0 178
e@0 179
e@0 180 pool.add('lowlevel.zcr', zerocrossingrate)
e@0 181 pool.add('lowlevel.rms', rmsvalues)
e@0 182 pool.add('lowlevel.spectrum.centroid', spectralcentroid)
e@0 183 pool.add('lowlevel.spectrum.rolloff', spectralrolloff)
e@0 184 pool.add('lowlevel.mfcc.coeffs', mfcc_coeffs)
e@0 185 pool.add('lowlevel.spectrum.magnitude', framespectrum)
e@0 186 pool.add('lowlevel.contrast.contrast', framecontrast[0])
e@0 187 pool.add('lowlevel.contrast.valleys', framecontrast[1])
e@0 188 pool.add('lowlevel.spectrum.flux', spectralflux)
e@0 189
e@0 190
e@0 191
e@0 192 spectrumfull = pool['lowlevel.spectrum.magnitude']
e@0 193 spectralcontrast = pool['lowlevel.contrast.contrast']
e@0 194 spectralvalleys = pool['lowlevel.contrast.valleys']
e@0 195 spectralcentroidfeature = pool['lowlevel.spectrum.centroid']
e@0 196 spectralrollofffeature = pool['lowlevel.spectrum.rolloff']
e@0 197 spectralfluxfeature = pool['lowlevel.spectrum.flux']
e@0 198
e@0 199 spectralfeature = concatenate((spectralcontrast,spectralvalleys),1)
e@0 200 mfccfeature = pool['lowlevel.mfcc.coeffs']
e@0 201 zcrfeature = pool['lowlevel.zcr']
e@0 202 rmsfeature = pool['lowlevel.rms']
e@0 203 crestfeature = crest(rmsfeature)
e@0 204
e@0 205
e@0 206 meanspectralfeature = mean(spectralfeature, 0)
e@0 207 for i in range(0, shape(spectralfeature)[1]):
e@0 208 globalPool.add('spectralcontrast_%d' % i , meanspectralfeature[i])
e@0 209 globalPool.add('spectralcentroid', mean(spectralcentroidfeature, 0))
e@0 210 globalPool.add('spectralrolloff', mean(spectralrollofffeature, 0))
e@0 211 globalPool.add('spectralflux', mean(spectralfluxfeature, 0))
e@0 212
e@0 213 # Expand mfccs
e@0 214 meanmfcc = mean(mfccfeature, 0)
e@0 215 for i in range(0, shape(mfccfeature)[1]):
e@0 216 globalPool.add('mfcc_%d' % i, meanmfcc[i])
e@0 217
e@0 218
e@0 219 globalPool.add('zcr', mean(zcrfeature, 0))
e@0 220 globalPool.add('rms', mean(rmsfeature, 0))
e@0 221 globalPool.add('crest', crestfeature)
e@0 222
e@0 223 pool.clear()
e@0 224
e@0 225 print("[II] done.")
e@0 226
e@0 227
e@0 228 print("[II] Saving data to %s:" % outfname)
e@0 229 globalPool.add("metadata.filename", fname)
e@0 230 output(globalPool)
e@0 231