fazekasgy@37
|
1 '''PyMFCC_oldstyle.py - This example Vampy plugin demonstrates
|
fazekasgy@37
|
2 how to return sprectrogram-like features.
|
fazekasgy@37
|
3
|
fazekasgy@37
|
4 This plugin uses backward compatible syntax and
|
fazekasgy@37
|
5 no extension module.
|
fazekasgy@37
|
6
|
fazekasgy@37
|
7 Centre for Digital Music, Queen Mary University of London.
|
fazekasgy@37
|
8 Copyright 2006 Gyorgy Fazekas, QMUL.
|
fazekasgy@37
|
9 (See Vamp API for licence information.)
|
fazekasgy@37
|
10
|
fazekasgy@37
|
11 Constants for Mel frequency conversion and filter
|
fazekasgy@37
|
12 centre calculation are taken from the GNU GPL licenced
|
fazekasgy@37
|
13 Freespeech library. Copyright (C) 1999 Jean-Marc Valin
|
fazekasgy@37
|
14 '''
|
fazekasgy@37
|
15
|
fazekasgy@37
|
16 import sys,numpy
|
fazekasgy@37
|
17 from numpy import log,exp,floor,sum
|
fazekasgy@37
|
18 from numpy import *
|
fazekasgy@37
|
19 from numpy.fft import *
|
fazekasgy@37
|
20
|
fazekasgy@37
|
21
|
fazekasgy@37
|
22 class melScaling(object):
|
fazekasgy@37
|
23
|
fazekasgy@37
|
24 def __init__(self,sampleRate,inputSize,numBands,minHz = 0,maxHz = None):
|
fazekasgy@37
|
25 '''Initialise frequency warping and DCT matrix.
|
fazekasgy@37
|
26 Parameters:
|
fazekasgy@37
|
27 sampleRate: audio sample rate
|
fazekasgy@37
|
28 inputSize: length of magnitude spectrum (half of FFT size assumed)
|
fazekasgy@37
|
29 numBands: number of mel Bands (MFCCs)
|
fazekasgy@37
|
30 minHz: lower bound of warping (default = DC)
|
fazekasgy@37
|
31 maxHz: higher bound of warping (default = Nyquist frequency)
|
fazekasgy@37
|
32 '''
|
fazekasgy@37
|
33 self.sampleRate = sampleRate
|
fazekasgy@37
|
34 self.NqHz = sampleRate / 2.0
|
fazekasgy@37
|
35 self.minHz = minHz
|
fazekasgy@37
|
36 if maxHz is None : maxHz = self.NqHz
|
fazekasgy@37
|
37 self.maxHz = maxHz
|
fazekasgy@37
|
38 self.inputSize = inputSize
|
fazekasgy@37
|
39 self.numBands = numBands
|
fazekasgy@37
|
40 self.valid = False
|
fazekasgy@37
|
41 self.updated = False
|
fazekasgy@37
|
42 # print '\n\n>>Plugin initialised with sample rate: %s<<\n\n' %self.sampleRate
|
fazekasgy@37
|
43 # print 'minHz:%s\nmaxHz:%s\n' %(self.minHz,self.maxHz)
|
fazekasgy@37
|
44
|
fazekasgy@37
|
45
|
fazekasgy@37
|
46 def update(self):
|
fazekasgy@37
|
47 # make sure this will run only once if called from a vamp process
|
fazekasgy@37
|
48
|
fazekasgy@37
|
49 if self.updated: return self.valid
|
fazekasgy@37
|
50 self.updated = True
|
fazekasgy@37
|
51 self.valid = False
|
fazekasgy@37
|
52 print 'Updating parameters and recalculating filters: '
|
fazekasgy@37
|
53 print 'Nyquist: ',self.NqHz
|
fazekasgy@37
|
54
|
fazekasgy@37
|
55 if self.maxHz > self.NqHz :
|
fazekasgy@37
|
56 raise Exception('Maximum frequency must be smaller than the Nyquist frequency')
|
fazekasgy@37
|
57
|
fazekasgy@37
|
58 self.maxMel = 1000*log(1+self.maxHz/700.0)/log(1+1000.0/700.0)
|
fazekasgy@37
|
59 self.minMel = 1000*log(1+self.minHz/700.0)/log(1+1000.0/700.0)
|
fazekasgy@37
|
60 print 'minHz:%s\nmaxHz:%s\nminMel:%s\nmaxMel:%s\n' %(self.minHz,self.maxHz,self.minMel,self.maxMel)
|
fazekasgy@37
|
61 self.filterMatrix = self.getFilterMatrix(self.inputSize,self.numBands)
|
fazekasgy@37
|
62 self.DCTMatrix = self.getDCTMatrix(self.numBands)
|
fazekasgy@37
|
63 self.filterIter = self.filterMatrix.__iter__()
|
fazekasgy@37
|
64 self.valid = True
|
fazekasgy@37
|
65 return self.valid
|
fazekasgy@37
|
66
|
fazekasgy@37
|
67 # try :
|
fazekasgy@37
|
68 # self.maxMel = 1000*log(1+self.maxHz/700.0)/log(1+1000.0/700.0)
|
fazekasgy@37
|
69 # self.minMel = 1000*log(1+self.minHz/700.0)/log(1+1000.0/700.0)
|
fazekasgy@37
|
70 # self.filterMatrix = self.getFilterMatrix(self.inputSize,self.numBands)
|
fazekasgy@37
|
71 # self.DCTMatrix = self.getDCTMatrix(self.numBands)
|
fazekasgy@37
|
72 # self.filterIter = self.filterMatrix.__iter__()
|
fazekasgy@37
|
73 # self.valid = True
|
fazekasgy@37
|
74 # return True
|
fazekasgy@37
|
75 # except :
|
fazekasgy@37
|
76 # print "Invalid parameter setting encountered in MelScaling class."
|
fazekasgy@37
|
77 # return False
|
fazekasgy@37
|
78 # return True
|
fazekasgy@37
|
79
|
fazekasgy@37
|
80 def getFilterCentres(self,inputSize,numBands):
|
fazekasgy@37
|
81 '''Calculate Mel filter centres around FFT bins.
|
fazekasgy@37
|
82 This function calculates two extra bands at the edges for
|
fazekasgy@37
|
83 finding the starting and end point of the first and last
|
fazekasgy@37
|
84 actual filters.'''
|
fazekasgy@37
|
85 centresMel = numpy.array(xrange(numBands+2)) * (self.maxMel-self.minMel)/(numBands+1) + self.minMel
|
fazekasgy@37
|
86 centresBin = numpy.floor(0.5 + 700.0*inputSize*(exp(centresMel*log(1+1000.0/700.0)/1000.0)-1)/self.NqHz)
|
fazekasgy@37
|
87 return numpy.array(centresBin,int)
|
fazekasgy@37
|
88
|
fazekasgy@37
|
89 def getFilterMatrix(self,inputSize,numBands):
|
fazekasgy@37
|
90 '''Compose the Mel scaling matrix.'''
|
fazekasgy@37
|
91 filterMatrix = numpy.zeros((numBands,inputSize))
|
fazekasgy@37
|
92 self.filterCentres = self.getFilterCentres(inputSize,numBands)
|
fazekasgy@37
|
93 for i in xrange(numBands) :
|
fazekasgy@37
|
94 start,centre,end = self.filterCentres[i:i+3]
|
fazekasgy@37
|
95 self.setFilter(filterMatrix[i],start,centre,end)
|
fazekasgy@37
|
96 return filterMatrix.transpose()
|
fazekasgy@37
|
97
|
fazekasgy@37
|
98 def setFilter(self,filt,filterStart,filterCentre,filterEnd):
|
fazekasgy@37
|
99 '''Calculate a single Mel filter.'''
|
fazekasgy@37
|
100 k1 = numpy.float32(filterCentre-filterStart)
|
fazekasgy@37
|
101 k2 = numpy.float32(filterEnd-filterCentre)
|
fazekasgy@37
|
102 up = (numpy.array(xrange(filterStart,filterCentre))-filterStart)/k1
|
fazekasgy@37
|
103 dn = (filterEnd-numpy.array(xrange(filterCentre,filterEnd)))/k2
|
fazekasgy@37
|
104 filt[filterStart:filterCentre] = up
|
fazekasgy@37
|
105 filt[filterCentre:filterEnd] = dn
|
fazekasgy@37
|
106
|
fazekasgy@37
|
107 def warpSpectrum(self,magnitudeSpectrum):
|
fazekasgy@37
|
108 '''Compute the Mel scaled spectrum.'''
|
fazekasgy@37
|
109 return numpy.dot(magnitudeSpectrum,self.filterMatrix)
|
fazekasgy@37
|
110
|
fazekasgy@37
|
111 def getDCTMatrix(self,size):
|
fazekasgy@37
|
112 '''Calculate the square DCT transform matrix. Results are
|
fazekasgy@37
|
113 equivalent to Matlab dctmtx(n) but with 64 bit precision.'''
|
fazekasgy@37
|
114 DCTmx = numpy.array(xrange(size),numpy.float64).repeat(size).reshape(size,size)
|
fazekasgy@37
|
115 DCTmxT = numpy.pi * (DCTmx.transpose()+0.5) / size
|
fazekasgy@37
|
116 DCTmxT = (1.0/sqrt( size / 2.0)) * cos(DCTmx * DCTmxT)
|
fazekasgy@37
|
117 DCTmxT[0] = DCTmxT[0] * (sqrt(2.0)/2.0)
|
fazekasgy@37
|
118 return DCTmxT
|
fazekasgy@37
|
119
|
fazekasgy@37
|
120 def dct(self,data_matrix):
|
fazekasgy@37
|
121 '''Compute DCT of input matrix.'''
|
fazekasgy@37
|
122 return numpy.dot(self.DCTMatrix,data_matrix)
|
fazekasgy@37
|
123
|
fazekasgy@37
|
124 def getMFCCs(self,warpedSpectrum,cn=True):
|
fazekasgy@37
|
125 '''Compute MFCC coefficients from Mel warped magnitude spectrum.'''
|
fazekasgy@37
|
126 mfccs=self.dct(numpy.log(warpedSpectrum))
|
fazekasgy@37
|
127 if cn is False : mfccs[0] = 0.0
|
fazekasgy@37
|
128 return mfccs
|
fazekasgy@37
|
129
|
fazekasgy@37
|
130
|
fazekasgy@37
|
131 class PyMFCC_oldstyle(melScaling):
|
fazekasgy@37
|
132
|
fazekasgy@37
|
133 def __init__(self,inputSampleRate):
|
fazekasgy@37
|
134 self.vampy_flags = 1 # vf_DEBUG = 1
|
fazekasgy@37
|
135 self.m_inputSampleRate = inputSampleRate
|
fazekasgy@37
|
136 self.m_stepSize = 1024
|
fazekasgy@37
|
137 self.m_blockSize = 2048
|
fazekasgy@37
|
138 self.m_channels = 1
|
fazekasgy@37
|
139 self.numBands = 40
|
fazekasgy@37
|
140 self.cnull = 1
|
fazekasgy@37
|
141 melScaling.__init__(self,int(self.m_inputSampleRate),self.m_blockSize/2,self.numBands)
|
fazekasgy@37
|
142
|
fazekasgy@37
|
143 def initialise(self,channels,stepSize,blockSize):
|
fazekasgy@37
|
144 self.m_channels = channels
|
fazekasgy@37
|
145 self.m_stepSize = stepSize
|
fazekasgy@37
|
146 self.m_blockSize = blockSize
|
fazekasgy@37
|
147 self.window = numpy.hamming(blockSize)
|
fazekasgy@37
|
148 melScaling.__init__(self,int(self.m_inputSampleRate),self.m_blockSize/2,self.numBands)
|
fazekasgy@37
|
149 return True
|
fazekasgy@37
|
150
|
fazekasgy@37
|
151 def getMaker(self):
|
fazekasgy@37
|
152 return 'Vampy Test Plugins'
|
fazekasgy@37
|
153
|
fazekasgy@37
|
154 def getCopyright(self):
|
fazekasgy@37
|
155 return 'Plugin By George Fazekas'
|
fazekasgy@37
|
156
|
fazekasgy@37
|
157 def getName(self):
|
fazekasgy@37
|
158 return 'Vampy Old Style MFCC Plugin'
|
fazekasgy@37
|
159
|
fazekasgy@37
|
160 def getIdentifier(self):
|
fazekasgy@37
|
161 return 'vampy-mfcc-test-old'
|
fazekasgy@37
|
162
|
fazekasgy@37
|
163 def getDescription(self):
|
fazekasgy@37
|
164 return 'A simple MFCC plugin. (using the old syntax)'
|
fazekasgy@37
|
165
|
fazekasgy@37
|
166 def getMaxChannelCount(self):
|
fazekasgy@37
|
167 return 1
|
fazekasgy@37
|
168
|
fazekasgy@37
|
169 def getInputDomain(self):
|
fazekasgy@37
|
170 return 'TimeDomain'
|
fazekasgy@37
|
171
|
fazekasgy@37
|
172 def getPreferredBlockSize(self):
|
fazekasgy@37
|
173 return 2048
|
fazekasgy@37
|
174
|
fazekasgy@37
|
175 def getPreferredStepSize(self):
|
fazekasgy@37
|
176 return 512
|
fazekasgy@37
|
177
|
fazekasgy@37
|
178 def getOutputDescriptors(self):
|
fazekasgy@37
|
179
|
fazekasgy@37
|
180 Generic={
|
fazekasgy@37
|
181 'hasFixedBinCount':True,
|
fazekasgy@37
|
182 'binCount':int(self.numBands)-self.cnull,
|
fazekasgy@37
|
183 'hasKnownExtents':False,
|
fazekasgy@37
|
184 'isQuantized':True,
|
fazekasgy@37
|
185 'sampleType':'OneSamplePerStep'
|
fazekasgy@37
|
186 }
|
fazekasgy@37
|
187
|
fazekasgy@37
|
188 MFCC=Generic.copy()
|
fazekasgy@37
|
189 MFCC.update({
|
fazekasgy@37
|
190 'identifier':'mfccs',
|
fazekasgy@37
|
191 'name':'MFCCs',
|
fazekasgy@37
|
192 'description':'MFCC Coefficients',
|
fazekasgy@37
|
193 'binNames':map(lambda x: 'C '+str(x),range(self.cnull,int(self.numBands))),
|
fazekasgy@37
|
194 'unit':''
|
fazekasgy@37
|
195 })
|
fazekasgy@37
|
196
|
fazekasgy@37
|
197 warpedSpectrum=Generic.copy()
|
fazekasgy@37
|
198 warpedSpectrum.update({
|
fazekasgy@37
|
199 'identifier':'warped-fft',
|
fazekasgy@37
|
200 'name':'Mel Scaled Spectrum',
|
fazekasgy@37
|
201 'description':'Mel Scaled Magnitide Spectrum',
|
fazekasgy@37
|
202 'unit':'Mel'
|
fazekasgy@37
|
203 })
|
fazekasgy@37
|
204
|
fazekasgy@37
|
205 melFilter=Generic.copy()
|
fazekasgy@37
|
206 melFilter.update({
|
fazekasgy@37
|
207 'identifier':'mel-filter',
|
fazekasgy@37
|
208 'name':'Mel Filter Matrix',
|
fazekasgy@37
|
209 'description':'Returns the created filter matrix.',
|
fazekasgy@37
|
210 'sampleType':'FixedSampleRate',
|
fazekasgy@37
|
211 'sampleRate':self.m_inputSampleRate/self.m_stepSize,
|
fazekasgy@37
|
212 'unit':''
|
fazekasgy@37
|
213 })
|
fazekasgy@37
|
214
|
fazekasgy@37
|
215 return [MFCC,warpedSpectrum,melFilter]
|
fazekasgy@37
|
216
|
fazekasgy@37
|
217 def getParameterDescriptors(self):
|
fazekasgy@37
|
218 melbands = {
|
fazekasgy@37
|
219 'identifier':'melbands',
|
fazekasgy@37
|
220 'name':'Number of bands (coefficients)',
|
fazekasgy@37
|
221 'description':'Set the number of coefficients.',
|
fazekasgy@37
|
222 'unit':'',
|
fazekasgy@37
|
223 'minValue':2.0,
|
fazekasgy@37
|
224 'maxValue':128.0,
|
fazekasgy@37
|
225 'defaultValue':40.0,
|
fazekasgy@37
|
226 'isQuantized':True,
|
fazekasgy@37
|
227 'quantizeStep':1.0
|
fazekasgy@37
|
228 }
|
fazekasgy@37
|
229
|
fazekasgy@37
|
230 cnull = {
|
fazekasgy@37
|
231 'identifier':'cnull',
|
fazekasgy@37
|
232 'name':'Return C0',
|
fazekasgy@37
|
233 'description':'Select if the DC coefficient is required.',
|
fazekasgy@37
|
234 'unit':'',
|
fazekasgy@37
|
235 'minValue':0.0,
|
fazekasgy@37
|
236 'maxValue':1.0,
|
fazekasgy@37
|
237 'defaultValue':0.0,
|
fazekasgy@37
|
238 'isQuantized':True,
|
fazekasgy@37
|
239 'quantizeStep':1.0
|
fazekasgy@37
|
240 }
|
fazekasgy@37
|
241
|
fazekasgy@37
|
242 minHz = {
|
fazekasgy@37
|
243 'identifier':'minHz',
|
fazekasgy@37
|
244 'name':'minimum frequency',
|
fazekasgy@37
|
245 'description':'Set the lower frequency bound.',
|
fazekasgy@37
|
246 'unit':'Hz',
|
fazekasgy@37
|
247 'minValue':0.0,
|
fazekasgy@37
|
248 'maxValue':24000.0,
|
fazekasgy@37
|
249 'defaultValue':0.0,
|
fazekasgy@37
|
250 'isQuantized':True,
|
fazekasgy@37
|
251 'quantizeStep':1.0
|
fazekasgy@37
|
252 }
|
fazekasgy@37
|
253
|
fazekasgy@37
|
254 maxHz = {
|
fazekasgy@37
|
255 'identifier':'maxHz',
|
fazekasgy@37
|
256 'name':'maximum frequency',
|
fazekasgy@37
|
257 'description':'Set the upper frequency bound.',
|
fazekasgy@37
|
258 'unit':'Hz',
|
fazekasgy@37
|
259 'minValue':100.0,
|
fazekasgy@37
|
260 'maxValue':24000.0,
|
fazekasgy@37
|
261 'defaultValue':11025.0,
|
fazekasgy@37
|
262 'isQuantized':True,
|
fazekasgy@37
|
263 'quantizeStep':100.0
|
fazekasgy@37
|
264 }
|
fazekasgy@37
|
265
|
fazekasgy@37
|
266 return [melbands,minHz,maxHz,cnull]
|
fazekasgy@37
|
267
|
fazekasgy@37
|
268 def setParameter(self,paramid,newval):
|
fazekasgy@37
|
269 self.valid = False
|
fazekasgy@37
|
270 if paramid == 'minHz' :
|
fazekasgy@37
|
271 if newval < self.maxHz and newval < self.NqHz :
|
fazekasgy@37
|
272 self.minHz = float(newval)
|
fazekasgy@37
|
273 print 'minHz: ', self.minHz
|
fazekasgy@37
|
274 if paramid == 'maxHz' :
|
fazekasgy@37
|
275 print 'trying to set maxHz to: ',newval
|
fazekasgy@37
|
276 if newval < self.NqHz and newval > self.minHz+1000 :
|
fazekasgy@37
|
277 self.maxHz = float(newval)
|
fazekasgy@37
|
278 else :
|
fazekasgy@37
|
279 self.maxHz = self.NqHz
|
fazekasgy@37
|
280 print 'set to: ',self.maxHz
|
fazekasgy@37
|
281 if paramid == 'cnull' :
|
fazekasgy@37
|
282 self.cnull = int(not int(newval))
|
fazekasgy@37
|
283 if paramid == 'melbands' :
|
fazekasgy@37
|
284 self.numBands = int(newval)
|
fazekasgy@37
|
285 return
|
fazekasgy@37
|
286
|
fazekasgy@37
|
287 def getParameter(self,paramid):
|
fazekasgy@37
|
288 if paramid == 'minHz' :
|
fazekasgy@37
|
289 return float(self.minHz)
|
fazekasgy@37
|
290 if paramid == 'maxHz' :
|
fazekasgy@37
|
291 return float(self.maxHz)
|
fazekasgy@37
|
292 if paramid == 'cnull' :
|
fazekasgy@37
|
293 return float(not int(self.cnull))
|
fazekasgy@37
|
294 if paramid == 'melbands' :
|
fazekasgy@37
|
295 return float(self.numBands)
|
fazekasgy@37
|
296 else:
|
fazekasgy@37
|
297 return 0.0
|
fazekasgy@37
|
298
|
fazekasgy@37
|
299 def processN(self,membuffer,frameSampleStart):
|
fazekasgy@37
|
300
|
fazekasgy@37
|
301 # recalculate the filter and DCT matrices if needed
|
fazekasgy@37
|
302 if not self.update() : return []
|
fazekasgy@37
|
303
|
fazekasgy@37
|
304 fftsize = self.m_blockSize
|
fazekasgy@37
|
305 audioSamples = frombuffer(membuffer[0],float32)
|
fazekasgy@37
|
306
|
fazekasgy@37
|
307 complexSpectrum = fft(self.window*audioSamples,fftsize)
|
fazekasgy@37
|
308 #complexSpectrum = frombuffer(membuffer[0],complex64,-1,8)
|
fazekasgy@37
|
309
|
fazekasgy@37
|
310 magnitudeSpectrum = abs(complexSpectrum)[0:fftsize/2] / (fftsize/2)
|
fazekasgy@37
|
311 melSpectrum = self.warpSpectrum(magnitudeSpectrum)
|
fazekasgy@37
|
312 melCepstrum = self.getMFCCs(melSpectrum,cn=True)
|
fazekasgy@37
|
313
|
fazekasgy@37
|
314 output_melCepstrum = [{
|
fazekasgy@37
|
315 'hasTimestamp':False,
|
fazekasgy@37
|
316 'values':melCepstrum[self.cnull:].tolist()
|
fazekasgy@37
|
317 }]
|
fazekasgy@37
|
318
|
fazekasgy@37
|
319 output_melSpectrum = [{
|
fazekasgy@37
|
320 'hasTimestamp':False,
|
fazekasgy@37
|
321 'values':melSpectrum.tolist()
|
fazekasgy@37
|
322 }]
|
fazekasgy@37
|
323
|
fazekasgy@37
|
324 return [output_melCepstrum,output_melSpectrum,[]]
|
fazekasgy@37
|
325
|
fazekasgy@37
|
326
|
fazekasgy@37
|
327 def getRemainingFeatures(self):
|
fazekasgy@37
|
328 if not self.update() : return []
|
fazekasgy@37
|
329 frameSampleStart = 0
|
fazekasgy@37
|
330 output_melFilter = []
|
fazekasgy@37
|
331
|
fazekasgy@37
|
332 while True:
|
fazekasgy@37
|
333 try :
|
fazekasgy@37
|
334 melFilter = self.filterIter.next()
|
fazekasgy@37
|
335 output_melFilter.append({
|
fazekasgy@37
|
336 'hasTimestamp':True,
|
fazekasgy@37
|
337 'timeStamp':frameSampleStart,
|
fazekasgy@37
|
338 'values':melFilter.tolist()
|
fazekasgy@37
|
339 })
|
fazekasgy@37
|
340 frameSampleStart += self.m_stepSize
|
fazekasgy@37
|
341 except StopIteration :
|
fazekasgy@37
|
342 break;
|
fazekasgy@37
|
343
|
fazekasgy@37
|
344 return [[],[],output_melFilter]
|
fazekasgy@37
|
345
|
fazekasgy@37
|
346
|
fazekasgy@37
|
347 # ============================================
|
fazekasgy@37
|
348 # Simple Unit Tests
|
fazekasgy@37
|
349 # ============================================
|
fazekasgy@37
|
350
|
fazekasgy@37
|
351 def main():
|
fazekasgy@37
|
352
|
fazekasgy@37
|
353 dct = melScaling(44100,2048,numBands=4)
|
fazekasgy@37
|
354 dct.update()
|
fazekasgy@37
|
355 print dct.DCTMatrix
|
fazekasgy@37
|
356 # print dct.getMFCCs(numpy.array([0.0,0.1,0.0,-0.1],numpy.float64))
|
fazekasgy@37
|
357 sys.exit(-1)
|
fazekasgy@37
|
358
|
fazekasgy@37
|
359 if __name__ == '__main__':
|
fazekasgy@37
|
360 main()
|
fazekasgy@37
|
361
|