e@0
|
1 #!/usr/bin/python2
|
e@0
|
2 # -*- coding: utf-8 -*-
|
e@0
|
3 """
|
e@0
|
4 Created on Fri Apr 17 10:32:20 2015
|
e@0
|
5
|
e@0
|
6 @author: Emmanouil Chourdakis
|
e@0
|
7 """
|
e@0
|
8
|
e@0
|
9 # Note, reference everything!
|
e@0
|
10
|
e@0
|
11 from sys import argv
|
e@0
|
12 import sys
|
e@0
|
13
|
e@0
|
14
|
e@0
|
15 if __name__=="__main__":
|
e@0
|
16 if len(argv) != 2:
|
e@0
|
17 print("Incorrect number of arguments:")
|
e@0
|
18 print("Usage: ")
|
e@0
|
19 print("%s <trackdir>")
|
e@0
|
20 print("")
|
e@0
|
21 print("Arguments:")
|
e@0
|
22 print("<trackdir>\tThe directory containing the tracks, features will be stored in the same directory as .yaml files")
|
e@0
|
23
|
e@0
|
24 sys.exit(-1)
|
e@0
|
25 else:
|
e@0
|
26
|
e@0
|
27 print("[II] Loading libraries")
|
e@0
|
28
|
e@0
|
29 import essentia
|
e@0
|
30 from essentia import Pool
|
e@0
|
31 from essentia.standard import *
|
e@0
|
32 import csv
|
e@0
|
33 import yaml
|
e@0
|
34
|
e@0
|
35
|
e@0
|
36
|
e@0
|
37
|
e@0
|
38
|
e@0
|
39 # reqyures matplotlib
|
e@0
|
40 from pylab import *
|
e@0
|
41
|
e@0
|
42 #requires numpy
|
e@0
|
43 from numpy import *
|
e@0
|
44
|
e@0
|
45 #requires scikit-learn
|
e@0
|
46 from sklearn.metrics import pairwise_distances
|
e@0
|
47
|
e@0
|
48 # For searching the directory
|
e@0
|
49 from glob import glob
|
e@0
|
50
|
e@0
|
51 traindir = argv[1]
|
e@0
|
52
|
e@0
|
53 songs_in_dir = glob("%s/*.wav" % traindir)
|
e@0
|
54
|
e@0
|
55 print "[II] Using files: %s" % songs_in_dir
|
e@0
|
56
|
e@0
|
57
|
e@0
|
58 for f_ in songs_in_dir:
|
e@0
|
59 d = {}
|
e@0
|
60 v = {}
|
e@0
|
61
|
e@0
|
62 fname = f_
|
e@0
|
63
|
e@0
|
64
|
e@0
|
65 outfname = "%s_features.yaml" % f_.split('.')[0]
|
e@0
|
66
|
e@0
|
67 print "[II] Using: %s" % f_
|
e@0
|
68 print "[II] and output: %s" % outfname
|
e@0
|
69
|
e@0
|
70
|
e@0
|
71
|
e@0
|
72 if outfname.partition('.')[-1].lower() not in ['json', 'yaml']:
|
e@0
|
73 print("Please choose a .json or .yaml as an output file.")
|
e@0
|
74 sys.exit(-1)
|
e@0
|
75 else:
|
e@0
|
76 if outfname.partition('.')[-1].lower() == 'json':
|
e@0
|
77 output = YamlOutput(filename = outfname, format='json')
|
e@0
|
78 else:
|
e@0
|
79 output = YamlOutput(filename = outfname, format='yaml')
|
e@0
|
80
|
e@0
|
81 print("[II] Feature extraction of `%s\'" % fname)
|
e@0
|
82
|
e@0
|
83 # Sampling Rate
|
e@0
|
84 SR = 16000.0
|
e@0
|
85
|
e@0
|
86 # Sampling Frequency
|
e@0
|
87 T = 1.0/SR
|
e@0
|
88
|
e@0
|
89 # FrameSize
|
e@0
|
90 tframeSize = 23 #ms
|
e@0
|
91 frameSize = int(ceil(tframeSize*SR/1000)) if mod(ceil(tframeSize*SR/1000),2) == 0 \
|
e@0
|
92 else int(floor(tframeSize*SR/1000))
|
e@0
|
93
|
e@0
|
94 # HopSize
|
e@0
|
95 hopSize = frameSize/2
|
e@0
|
96
|
e@0
|
97 # Load Audio
|
e@0
|
98 audio = MonoLoader(filename = fname, sampleRate=16000)()
|
e@0
|
99
|
e@0
|
100
|
e@0
|
101 #Window Frames
|
e@0
|
102 w = Windowing(size = frameSize, type = 'hamming')
|
e@0
|
103
|
e@0
|
104 # Spectrum
|
e@0
|
105 spec = Spectrum(size=1024)
|
e@0
|
106
|
e@0
|
107 # Pool to append mean and variance
|
e@0
|
108 pool = Pool()
|
e@0
|
109 globalPool = Pool()
|
e@0
|
110
|
e@0
|
111 # Below are Features to be used in the feature extraction stage
|
e@0
|
112 # We use, Spectral Contrast, MFCCs, Zero-Crossing rate, RMS,
|
e@0
|
113 # Crest Factor, Spectral Centroid, Spectral Occupation, Spectral Flux
|
e@0
|
114
|
e@0
|
115 # Spectral Contrast
|
e@0
|
116 sc = SpectralContrast(frameSize = frameSize, highFrequencyBound = 8000, sampleRate = SR)
|
e@0
|
117
|
e@0
|
118 # MFCCs
|
e@0
|
119 mfccs = MFCC(highFrequencyBound = 8000, sampleRate = SR)
|
e@0
|
120
|
e@0
|
121 # Spectral Centroid
|
e@0
|
122 centroid = Centroid(range = SR/2)
|
e@0
|
123
|
e@0
|
124 # Spectral Roll-Off
|
e@0
|
125 rolloff = RollOff(sampleRate = SR, cutoff = 0.9)
|
e@0
|
126
|
e@0
|
127 # Spectral Flux
|
e@0
|
128 flux = Flux()
|
e@0
|
129
|
e@0
|
130 # Zero Crossing Rate
|
e@0
|
131 zcr = ZeroCrossingRate()
|
e@0
|
132
|
e@0
|
133 # RMS
|
e@0
|
134 rms = RMS()
|
e@0
|
135
|
e@0
|
136 # Crest Factor
|
e@0
|
137 crest = Crest()
|
e@0
|
138
|
e@0
|
139
|
e@0
|
140
|
e@0
|
141
|
e@0
|
142 # Segmentation based on Onset detection-based temporal modeling
|
e@0
|
143 print("[II] Calculating features for %s, please wait..." % fname)
|
e@0
|
144 # Onset Detection
|
e@0
|
145
|
e@0
|
146 print("[II] Splitting to onsets...")
|
e@0
|
147
|
e@0
|
148 onsetdetection = OnsetDetectionGlobal(frameSize = frameSize, hopSize = hopSize, sampleRate = SR)(audio)
|
e@0
|
149 onsets = Onsets()(essentia.array([onsetdetection]), [1])
|
e@0
|
150
|
e@0
|
151
|
e@0
|
152
|
e@0
|
153 print("[II] done, extracting features...")
|
e@0
|
154 for o in range(0, len(onsets)-1):
|
e@0
|
155 IOI = audio[onsets[o]*SR:onsets[o+1]*SR]
|
e@0
|
156
|
e@0
|
157
|
e@0
|
158
|
e@0
|
159
|
e@0
|
160 if len(IOI) == 0:
|
e@0
|
161 break;
|
e@0
|
162
|
e@0
|
163
|
e@0
|
164 for frame in FrameGenerator(IOI, frameSize, hopSize):
|
e@0
|
165 # Temporal Features
|
e@0
|
166
|
e@0
|
167 zerocrossingrate = zcr(frame)
|
e@0
|
168 rmsvalues = rms(frame)
|
e@0
|
169
|
e@0
|
170 # Spectral features
|
e@0
|
171 framespectrum = spec(w(frame))
|
e@0
|
172 framecontrast = sc(framespectrum)
|
e@0
|
173 mfcc_coeffs = mfccs(framespectrum)[1]
|
e@0
|
174 spectralcentroid = centroid(framespectrum)
|
e@0
|
175 spectralrolloff = rolloff(framespectrum)
|
e@0
|
176 spectralflux = rolloff(framespectrum)
|
e@0
|
177
|
e@0
|
178
|
e@0
|
179
|
e@0
|
180 pool.add('lowlevel.zcr', zerocrossingrate)
|
e@0
|
181 pool.add('lowlevel.rms', rmsvalues)
|
e@0
|
182 pool.add('lowlevel.spectrum.centroid', spectralcentroid)
|
e@0
|
183 pool.add('lowlevel.spectrum.rolloff', spectralrolloff)
|
e@0
|
184 pool.add('lowlevel.mfcc.coeffs', mfcc_coeffs)
|
e@0
|
185 pool.add('lowlevel.spectrum.magnitude', framespectrum)
|
e@0
|
186 pool.add('lowlevel.contrast.contrast', framecontrast[0])
|
e@0
|
187 pool.add('lowlevel.contrast.valleys', framecontrast[1])
|
e@0
|
188 pool.add('lowlevel.spectrum.flux', spectralflux)
|
e@0
|
189
|
e@0
|
190
|
e@0
|
191
|
e@0
|
192 spectrumfull = pool['lowlevel.spectrum.magnitude']
|
e@0
|
193 spectralcontrast = pool['lowlevel.contrast.contrast']
|
e@0
|
194 spectralvalleys = pool['lowlevel.contrast.valleys']
|
e@0
|
195 spectralcentroidfeature = pool['lowlevel.spectrum.centroid']
|
e@0
|
196 spectralrollofffeature = pool['lowlevel.spectrum.rolloff']
|
e@0
|
197 spectralfluxfeature = pool['lowlevel.spectrum.flux']
|
e@0
|
198
|
e@0
|
199 spectralfeature = concatenate((spectralcontrast,spectralvalleys),1)
|
e@0
|
200 mfccfeature = pool['lowlevel.mfcc.coeffs']
|
e@0
|
201 zcrfeature = pool['lowlevel.zcr']
|
e@0
|
202 rmsfeature = pool['lowlevel.rms']
|
e@0
|
203 crestfeature = crest(rmsfeature)
|
e@0
|
204
|
e@0
|
205
|
e@0
|
206 meanspectralfeature = mean(spectralfeature, 0)
|
e@0
|
207 for i in range(0, shape(spectralfeature)[1]):
|
e@0
|
208 globalPool.add('spectralcontrast_%d' % i , meanspectralfeature[i])
|
e@0
|
209 globalPool.add('spectralcentroid', mean(spectralcentroidfeature, 0))
|
e@0
|
210 globalPool.add('spectralrolloff', mean(spectralrollofffeature, 0))
|
e@0
|
211 globalPool.add('spectralflux', mean(spectralfluxfeature, 0))
|
e@0
|
212
|
e@0
|
213 # Expand mfccs
|
e@0
|
214 meanmfcc = mean(mfccfeature, 0)
|
e@0
|
215 for i in range(0, shape(mfccfeature)[1]):
|
e@0
|
216 globalPool.add('mfcc_%d' % i, meanmfcc[i])
|
e@0
|
217
|
e@0
|
218
|
e@0
|
219 globalPool.add('zcr', mean(zcrfeature, 0))
|
e@0
|
220 globalPool.add('rms', mean(rmsfeature, 0))
|
e@0
|
221 globalPool.add('crest', crestfeature)
|
e@0
|
222
|
e@0
|
223 pool.clear()
|
e@0
|
224
|
e@0
|
225 print("[II] done.")
|
e@0
|
226
|
e@0
|
227
|
e@0
|
228 print("[II] Saving data to %s:" % outfname)
|
e@0
|
229 globalPool.add("metadata.filename", fname)
|
e@0
|
230 output(globalPool)
|
e@0
|
231
|