comparison Example VamPy plugins/PyMFCC.py @ 37:27bab3a16c9a vampy2final

new branch Vampy2final
author fazekasgy
date Mon, 05 Oct 2009 11:28:00 +0000
parents
children d56f48aafb99
comparison
equal deleted inserted replaced
-1:000000000000 37:27bab3a16c9a
1 '''PyMFCC.py - This example Vampy plugin demonstrates
2 how to return sprectrogram-like features and how to return
3 data using the getRemainingFeatures() function.
4
5 The plugin has frequency domain input and is using the
6 numpy array interface. (Flag: vf_ARRAY)
7
8 Outputs:
9 1) 2-128 MFCC coefficients
10 2) Mel-warped spectrum used for the MFCC computation
11 3) Filter matrix used for Mel scaling
12
13 Centre for Digital Music, Queen Mary University of London.
14 Copyright (C) 2009 Gyorgy Fazekas, QMUL. (See Vamp sources
15 for licence information.)
16
17 Constants for Mel frequency conversion and filter
18 centre calculation are taken from the GNU GPL licenced
19 Freespeech library. Copyright (C) 1999 Jean-Marc Valin
20 '''
21
22 import sys,numpy,vampy
23 from numpy import abs,log,exp,floor,sum,sqrt,cos,hstack
24 from numpy.fft import *
25 from vampy import *
26
27
28 class melScaling(object):
29
30 def __init__(self,sampleRate,inputSize,numBands,minHz = 0,maxHz = None):
31 '''Initialise frequency warping and DCT matrix.
32 Parameters:
33 sampleRate: audio sample rate
34 inputSize: length of magnitude spectrum (half of FFT size assumed)
35 numBands: number of mel Bands (MFCCs)
36 minHz: lower bound of warping (default = DC)
37 maxHz: higher bound of warping (default = Nyquist frequency)
38 '''
39 self.sampleRate = sampleRate
40 self.NqHz = sampleRate / 2.0
41 self.minHz = minHz
42 if maxHz is None : maxHz = self.NqHz
43 self.maxHz = maxHz
44 self.inputSize = inputSize
45 self.numBands = numBands
46 self.valid = False
47 self.updated = False
48
49 def update(self):
50 # make sure this will run only once
51 # if called from a vamp process
52 if self.updated: return self.valid
53 self.updated = True
54 self.valid = False
55 print 'Updating parameters and recalculating filters: '
56 print 'Nyquist: ',self.NqHz
57
58 if self.maxHz > self.NqHz :
59 raise Exception('Maximum frequency must be smaller than the Nyquist frequency')
60
61 self.maxMel = 1000*log(1+self.maxHz/700.0)/log(1+1000.0/700.0)
62 self.minMel = 1000*log(1+self.minHz/700.0)/log(1+1000.0/700.0)
63 print 'minHz:%s\nmaxHz:%s\nminMel:%s\nmaxMel:%s\n' \
64 %(self.minHz,self.maxHz,self.minMel,self.maxMel)
65 self.filterMatrix = self.getFilterMatrix(self.inputSize,self.numBands)
66 self.DCTMatrix = self.getDCTMatrix(self.numBands)
67 self.filterIter = self.filterMatrix.__iter__()
68 self.valid = True
69 return self.valid
70
71 def getFilterCentres(self,inputSize,numBands):
72 '''Calculate Mel filter centres around FFT bins.
73 This function calculates two extra bands at the edges for
74 finding the starting and end point of the first and last
75 actual filters.'''
76 centresMel = numpy.array(xrange(numBands+2)) * (self.maxMel-self.minMel)/(numBands+1) + self.minMel
77 centresBin = numpy.floor(0.5 + 700.0*inputSize*(exp(centresMel*log(1+1000.0/700.0)/1000.0)-1)/self.NqHz)
78 return numpy.array(centresBin,int)
79
80 def getFilterMatrix(self,inputSize,numBands):
81 '''Compose the Mel scaling matrix.'''
82 filterMatrix = numpy.zeros((numBands,inputSize))
83 self.filterCentres = self.getFilterCentres(inputSize,numBands)
84 for i in xrange(numBands) :
85 start,centre,end = self.filterCentres[i:i+3]
86 self.setFilter(filterMatrix[i],start,centre,end)
87 return filterMatrix.transpose()
88
89 def setFilter(self,filt,filterStart,filterCentre,filterEnd):
90 '''Calculate a single Mel filter.'''
91 k1 = numpy.float32(filterCentre-filterStart)
92 k2 = numpy.float32(filterEnd-filterCentre)
93 up = (numpy.array(xrange(filterStart,filterCentre))-filterStart)/k1
94 dn = (filterEnd-numpy.array(xrange(filterCentre,filterEnd)))/k2
95 filt[filterStart:filterCentre] = up
96 filt[filterCentre:filterEnd] = dn
97
98 def warpSpectrum(self,magnitudeSpectrum):
99 '''Compute the Mel scaled spectrum.'''
100 return numpy.dot(magnitudeSpectrum,self.filterMatrix)
101
102 def getDCTMatrix(self,size):
103 '''Calculate the square DCT transform matrix. Results are
104 equivalent to Matlab dctmtx(n) with 64 bit precision.'''
105 DCTmx = numpy.array(xrange(size),numpy.float64).repeat(size).reshape(size,size)
106 DCTmxT = numpy.pi * (DCTmx.transpose()+0.5) / size
107 DCTmxT = (1.0/sqrt( size / 2.0)) * cos(DCTmx * DCTmxT)
108 DCTmxT[0] = DCTmxT[0] * (sqrt(2.0)/2.0)
109 return DCTmxT
110
111 def dct(self,data_matrix):
112 '''Compute DCT of input matrix.'''
113 return numpy.dot(self.DCTMatrix,data_matrix)
114
115 def getMFCCs(self,warpedSpectrum,cn=True):
116 '''Compute MFCC coefficients from Mel warped magnitude spectrum.'''
117 mfccs=self.dct(numpy.log(warpedSpectrum))
118 if cn is False : mfccs[0] = 0.0
119 return mfccs
120
121
122 class PyMFCC(melScaling):
123
124 def __init__(self,inputSampleRate):
125
126 # flags for setting some Vampy options
127 self.vampy_flags = vf_DEBUG | vf_ARRAY | vf_REALTIME
128
129 self.m_inputSampleRate = int(inputSampleRate)
130 self.m_stepSize = 1024
131 self.m_blockSize = 2048
132 self.m_channels = 1
133 self.numBands = 40
134 self.cnull = 1
135 self.two_ch = False
136 melScaling.__init__(self,int(self.m_inputSampleRate),self.m_blockSize/2,self.numBands)
137
138 def initialise(self,channels,stepSize,blockSize):
139 self.m_channels = channels
140 self.m_stepSize = stepSize
141 self.m_blockSize = blockSize
142 self.window = numpy.hamming(blockSize)
143 melScaling.__init__(self,int(self.m_inputSampleRate),self.m_blockSize/2,self.numBands)
144 return True
145
146 def getMaker(self):
147 return 'Vampy Example Plugins'
148
149 def getCopyright(self):
150 return 'Plugin By George Fazekas'
151
152 def getName(self):
153 return 'Vampy MFCC Plugin'
154
155 def getIdentifier(self):
156 return 'vampy-mfcc'
157
158 def getDescription(self):
159 return 'A simple MFCC plugin'
160
161 def getMaxChannelCount(self):
162 return 2
163
164 def getInputDomain(self):
165 return FrequencyDomain #TimeDomain
166
167 def getPreferredBlockSize(self):
168 return 2048
169
170 def getPreferredStepSize(self):
171 return 1024
172
173 def getOutputDescriptors(self):
174
175 Generic = OutputDescriptor()
176 Generic.hasFixedBinCount=True
177 Generic.binCount=int(self.numBands)-self.cnull
178 Generic.hasKnownExtents=False
179 Generic.isQuantized=True
180 Generic.sampleType = OneSamplePerStep
181
182 # note the inheritance of attributes (optional)
183 MFCC = OutputDescriptor(Generic)
184 MFCC.identifier = 'mfccs'
185 MFCC.name = 'MFCCs'
186 MFCC.description = 'MFCC Coefficients'
187 MFCC.binNames=map(lambda x: 'C '+str(x),range(self.cnull,int(self.numBands)))
188 if self.two_ch and self.m_channels == 2 :
189 MFCC.binNames *= 2 #repeat the list
190 MFCC.unit = None
191 if self.two_ch and self.m_channels == 2 :
192 MFCC.binCount = self.m_channels * (int(self.numBands)-self.cnull)
193 else :
194 MFCC.binCount = self.numBands-self.cnull
195
196 warpedSpectrum = OutputDescriptor(Generic)
197 warpedSpectrum.identifier='warped-fft'
198 warpedSpectrum.name='Mel Scaled Spectrum'
199 warpedSpectrum.description='Mel Scaled Magnitide Spectrum'
200 warpedSpectrum.unit='Mel'
201 if self.two_ch and self.m_channels == 2 :
202 warpedSpectrum.binCount = self.m_channels * int(self.numBands)
203 else :
204 warpedSpectrum.binCount = self.numBands
205
206 melFilter = OutputDescriptor(Generic)
207 melFilter.identifier = 'mel-filter-matrix'
208 melFilter.sampleType='FixedSampleRate'
209 melFilter.sampleRate=self.m_inputSampleRate/self.m_stepSize
210 melFilter.name='Mel Filter Matrix'
211 melFilter.description='Returns the created filter matrix in getRemainingFeatures.'
212 melFilter.unit = None
213
214 return OutputList(MFCC,warpedSpectrum,melFilter)
215
216
217 def getParameterDescriptors(self):
218
219 melbands = ParameterDescriptor()
220 melbands.identifier='melbands'
221 melbands.name='Number of bands (coefficients)'
222 melbands.description='Set the number of coefficients.'
223 melbands.unit = ''
224 melbands.minValue = 2
225 melbands.maxValue = 128
226 melbands.defaultValue = 40
227 melbands.isQuantized = True
228 melbands.quantizeStep = 1
229
230 cnull = ParameterDescriptor()
231 cnull.identifier='cnull'
232 cnull.name='Return C0'
233 cnull.description='Select if the DC coefficient is required.'
234 cnull.unit = None
235 cnull.minValue = 0
236 cnull.maxValue = 1
237 cnull.defaultValue = 0
238 cnull.isQuantized = True
239 cnull.quantizeStep = 1
240
241 two_ch = ParameterDescriptor(cnull)
242 two_ch.identifier='two_ch'
243 two_ch.name='Process channels separately'
244 two_ch.description='Process two channel files separately.'
245 two_ch.defaultValue = False
246
247 minHz = ParameterDescriptor()
248 minHz.identifier='minHz'
249 minHz.name='minimum frequency'
250 minHz.description='Set the lower frequency bound.'
251 minHz.unit='Hz'
252 minHz.minValue = 0
253 minHz.maxValue = 24000
254 minHz.defaultValue = 0
255 minHz.isQuantized = True
256 minHz.quantizeStep = 1.0
257
258 maxHz = ParameterDescriptor()
259 maxHz.identifier='maxHz'
260 maxHz.description='Set the upper frequency bound.'
261 maxHz.name='maximum frequency'
262 maxHz.unit='Hz'
263 maxHz.minValue = 100
264 maxHz.maxValue = 24000
265 maxHz.defaultValue = 11025
266 maxHz.isQuantized = True
267 maxHz.quantizeStep = 100
268
269 return ParameterList(melbands,minHz,maxHz,cnull,two_ch)
270
271
272 def setParameter(self,paramid,newval):
273 self.valid = False
274 if paramid == 'minHz' :
275 if newval < self.maxHz and newval < self.NqHz :
276 self.minHz = float(newval)
277 if paramid == 'maxHz' :
278 if newval < self.NqHz and newval > self.minHz+1000 :
279 self.maxHz = float(newval)
280 else :
281 self.maxHz = self.NqHz
282 if paramid == 'cnull' :
283 self.cnull = int(not int(newval))
284 if paramid == 'melbands' :
285 self.numBands = int(newval)
286 if paramid == 'two_ch' :
287 self.two_ch = bool(newval)
288 return None
289
290
291 def getParameter(self,paramid):
292 if paramid == 'minHz' :
293 return self.minHz
294 if paramid == 'maxHz' :
295 return self.maxHz
296 if paramid == 'cnull' :
297 return bool(not int(self.cnull))
298 if paramid == 'melbands' :
299 return self.numBands
300 if paramid == 'two_ch' :
301 return self.two_ch
302 else:
303 return 0.0
304
305 # set numpy array process using the 'vf_ARRAY' flag in __init__()
306 # and RealTime time stamps using the 'vf_REALTIME' flag
307 def process(self,inputbuffers,timestamp):
308
309 # calculate the filter and DCT matrices, check
310 # if they are computable given a set of parameters
311 # (we only do this once, when the process is called first)
312 if not self.update() : return None
313
314 # if two channel processing is set, use process2ch
315 if self.m_channels == 2 and self.two_ch :
316 return self.process2ch(inputbuffers,timestamp)
317
318 fftsize = self.m_blockSize
319
320 if self.m_channels > 1 :
321 # take the average of two magnitude spectra
322 mS0 = abs(inputbuffers[0])[0:fftsize/2]
323 mS1 = abs(inputbuffers[1])[0:fftsize/2]
324 magnitudeSpectrum = (mS0 + mS1) / 2
325 else :
326 complexSpectrum = inputbuffers[0]
327 magnitudeSpectrum = abs(complexSpectrum)[0:fftsize/2]
328
329 # do the frequency warping and MFCC computation
330 melSpectrum = self.warpSpectrum(magnitudeSpectrum)
331 melCepstrum = self.getMFCCs(melSpectrum,cn=True)
332
333 # returning the values:
334 outputs = FeatureSet()
335
336 # 1) full initialisation example using a FeatureList
337 f_mfccs = Feature()
338 f_mfccs.values = melCepstrum[self.cnull:]
339 outputs[0] = FeatureList(f_mfccs)
340
341 # 2) simplified: when only one feature is required,
342 # the FeatureList() can be omitted
343 outputs[1] = Feature(melSpectrum)
344
345 # this is equivalint to writing :
346 # outputs[1] = Feature()
347 # outputs[1].values = melSpectrum
348 # or using keyword args: Feature(values = melSpectrum)
349
350 return outputs
351
352 # process channels separately (stack the returned arrays)
353 def process2ch(self,inputbuffers,timestamp):
354
355 fftsize = self.m_blockSize
356
357 complexSpectrum0 = inputbuffers[0]
358 complexSpectrum1 = inputbuffers[1]
359
360 magnitudeSpectrum0 = abs(complexSpectrum0)[0:fftsize/2]
361 magnitudeSpectrum1 = abs(complexSpectrum1)[0:fftsize/2]
362
363 # do the computations
364 melSpectrum0 = self.warpSpectrum(magnitudeSpectrum0)
365 melCepstrum0 = self.getMFCCs(melSpectrum0,cn=True)
366 melSpectrum1 = self.warpSpectrum(magnitudeSpectrum1)
367 melCepstrum1 = self.getMFCCs(melSpectrum1,cn=True)
368
369 outputs = FeatureSet()
370 outputs[0] = Feature(hstack((melCepstrum1[self.cnull:],melCepstrum0[self.cnull:])))
371 outputs[1] = Feature(hstack((melSpectrum1,melSpectrum0)))
372
373 return outputs
374
375
376 def getRemainingFeatures(self):
377 if not self.update() : return []
378 frameSampleStart = 0
379
380 output_featureSet = FeatureSet()
381
382 # the filter is the third output (index starts from zero)
383 output_featureSet[2] = flist = FeatureList()
384
385 while True:
386 f = Feature()
387 f.hasTimestamp = True
388 f.timestamp = frame2RealTime(frameSampleStart,self.m_inputSampleRate)
389 try :
390 f.values = self.filterIter.next()
391 except StopIteration :
392 break
393 flist.append(f)
394 frameSampleStart += self.m_stepSize
395
396 return output_featureSet
397