comparison Example VamPy plugins/test/PyMFCC_freq.py @ 37:27bab3a16c9a vampy2final

new branch Vampy2final
author fazekasgy
date Mon, 05 Oct 2009 11:28:00 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 37:27bab3a16c9a
1 '''PyMFCC_freq.py - This example Vampy plugin demonstrates
2 how to return sprectrogram-like features.
3
4 This plugin has frequency domain input and is using
5 the numpy array interface. Flag: vf_ARRAY
6
7 Centre for Digital Music, Queen Mary University of London.
8 Copyright 2006 Gyorgy Fazekas, QMUL.
9 (See Vamp API for licence information.)
10
11 Constants for Mel frequency conversion and filter
12 centre calculation are taken from the GNU GPL licenced
13 Freespeech library. Copyright (C) 1999 Jean-Marc Valin
14 '''
15
16 import sys,numpy
17 from numpy import log,exp,floor,sum
18 from numpy import *
19 from numpy.fft import *
20 import vampy
21 from vampy import *
22
23
24 class melScaling(object):
25
26 def __init__(self,sampleRate,inputSize,numBands,minHz = 0,maxHz = None):
27 '''Initialise frequency warping and DCT matrix.
28 Parameters:
29 sampleRate: audio sample rate
30 inputSize: length of magnitude spectrum (half of FFT size assumed)
31 numBands: number of mel Bands (MFCCs)
32 minHz: lower bound of warping (default = DC)
33 maxHz: higher bound of warping (default = Nyquist frequency)
34 '''
35 self.sampleRate = sampleRate
36 self.NqHz = sampleRate / 2.0
37 self.minHz = minHz
38 if maxHz is None : maxHz = self.NqHz
39 self.maxHz = maxHz
40 self.inputSize = inputSize
41 self.numBands = numBands
42 self.valid = False
43 self.updated = False
44
45
46 def update(self):
47 # make sure this will run only once if called from a vamp process
48
49 if self.updated: return self.valid
50 self.updated = True
51 self.valid = False
52 print 'Updating parameters and recalculating filters: '
53 print 'Nyquist: ',self.NqHz
54
55 if self.maxHz > self.NqHz :
56 raise Exception('Maximum frequency must be smaller than the Nyquist frequency')
57
58 self.maxMel = 1000*log(1+self.maxHz/700.0)/log(1+1000.0/700.0)
59 self.minMel = 1000*log(1+self.minHz/700.0)/log(1+1000.0/700.0)
60 print 'minHz:%s\nmaxHz:%s\nminMel:%s\nmaxMel:%s\n' %(self.minHz,self.maxHz,self.minMel,self.maxMel)
61 self.filterMatrix = self.getFilterMatrix(self.inputSize,self.numBands)
62 self.DCTMatrix = self.getDCTMatrix(self.numBands)
63 self.filterIter = self.filterMatrix.__iter__()
64 self.valid = True
65 return self.valid
66
67 # try :
68 # self.maxMel = 1000*log(1+self.maxHz/700.0)/log(1+1000.0/700.0)
69 # self.minMel = 1000*log(1+self.minHz/700.0)/log(1+1000.0/700.0)
70 # self.filterMatrix = self.getFilterMatrix(self.inputSize,self.numBands)
71 # self.DCTMatrix = self.getDCTMatrix(self.numBands)
72 # self.filterIter = self.filterMatrix.__iter__()
73 # self.valid = True
74 # return True
75 # except :
76 # print "Invalid parameter setting encountered in MelScaling class."
77 # return False
78 # return True
79
80 def getFilterCentres(self,inputSize,numBands):
81 '''Calculate Mel filter centres around FFT bins.
82 This function calculates two extra bands at the edges for
83 finding the starting and end point of the first and last
84 actual filters.'''
85 centresMel = numpy.array(xrange(numBands+2)) * (self.maxMel-self.minMel)/(numBands+1) + self.minMel
86 centresBin = numpy.floor(0.5 + 700.0*inputSize*(exp(centresMel*log(1+1000.0/700.0)/1000.0)-1)/self.NqHz)
87 return numpy.array(centresBin,int)
88
89 def getFilterMatrix(self,inputSize,numBands):
90 '''Compose the Mel scaling matrix.'''
91 filterMatrix = numpy.zeros((numBands,inputSize))
92 self.filterCentres = self.getFilterCentres(inputSize,numBands)
93 for i in xrange(numBands) :
94 start,centre,end = self.filterCentres[i:i+3]
95 self.setFilter(filterMatrix[i],start,centre,end)
96 return filterMatrix.transpose()
97
98 def setFilter(self,filt,filterStart,filterCentre,filterEnd):
99 '''Calculate a single Mel filter.'''
100 k1 = numpy.float32(filterCentre-filterStart)
101 k2 = numpy.float32(filterEnd-filterCentre)
102 up = (numpy.array(xrange(filterStart,filterCentre))-filterStart)/k1
103 dn = (filterEnd-numpy.array(xrange(filterCentre,filterEnd)))/k2
104 filt[filterStart:filterCentre] = up
105 filt[filterCentre:filterEnd] = dn
106
107 def warpSpectrum(self,magnitudeSpectrum):
108 '''Compute the Mel scaled spectrum.'''
109 return numpy.dot(magnitudeSpectrum,self.filterMatrix)
110
111 def getDCTMatrix(self,size):
112 '''Calculate the square DCT transform matrix. Results are
113 equivalent to Matlab dctmtx(n) but with 64 bit precision.'''
114 DCTmx = numpy.array(xrange(size),numpy.float64).repeat(size).reshape(size,size)
115 DCTmxT = numpy.pi * (DCTmx.transpose()+0.5) / size
116 DCTmxT = (1.0/sqrt( size / 2.0)) * cos(DCTmx * DCTmxT)
117 DCTmxT[0] = DCTmxT[0] * (sqrt(2.0)/2.0)
118 return DCTmxT
119
120 def dct(self,data_matrix):
121 '''Compute DCT of input matrix.'''
122 return numpy.dot(self.DCTMatrix,data_matrix)
123
124 def getMFCCs(self,warpedSpectrum,cn=True):
125 '''Compute MFCC coefficients from Mel warped magnitude spectrum.'''
126 mfccs=self.dct(numpy.log(warpedSpectrum))
127 if cn is False : mfccs[0] = 0.0
128 return mfccs
129
130
131 class PyMFCC_freq(melScaling):
132
133 def __init__(self,inputSampleRate):
134
135 # flags for setting some Vampy options
136 self.vampy_flags = vf_DEBUG | vf_ARRAY | vf_REALTIME
137
138 self.m_inputSampleRate = int(inputSampleRate)
139 self.m_stepSize = 512
140 self.m_blockSize = 2048
141 self.m_channels = 1
142 self.numBands = 40
143 self.cnull = 1
144 self.two_ch = False
145 melScaling.__init__(self,int(self.m_inputSampleRate),self.m_blockSize/2,self.numBands)
146
147 def initialise(self,channels,stepSize,blockSize):
148 self.m_channels = channels
149 self.m_stepSize = stepSize
150 self.m_blockSize = blockSize
151 self.window = numpy.hamming(blockSize)
152 melScaling.__init__(self,int(self.m_inputSampleRate),self.m_blockSize/2,self.numBands)
153 return True
154
155 def getMaker(self):
156 return 'Vampy Test Plugins'
157
158 def getCopyright(self):
159 return 'Plugin By George Fazekas'
160
161 def getName(self):
162 return 'Vampy FrequencyDomain MFCC Plugin'
163
164 def getIdentifier(self):
165 return 'vampy-mfcc-test-freq'
166
167 def getDescription(self):
168 return 'A simple MFCC plugin. (FrequencyDomain)'
169
170 def getMaxChannelCount(self):
171 return 2
172
173 def getInputDomain(self):
174 return FrequencyDomain
175
176 def getPreferredBlockSize(self):
177 return 2048
178
179 def getPreferredStepSize(self):
180 return 512
181
182 def getOutputDescriptors(self):
183
184 Generic = OutputDescriptor()
185 Generic.hasFixedBinCount=True
186 Generic.binCount=int(self.numBands)-self.cnull
187 Generic.hasKnownExtents=False
188 Generic.isQuantized=True
189 Generic.sampleType = OneSamplePerStep
190
191 # note the inheritance of attributes (use is optional)
192 MFCC = OutputDescriptor(Generic)
193 MFCC.identifier = 'mfccs'
194 MFCC.name = 'MFCCs'
195 MFCC.description = 'MFCC Coefficients'
196 MFCC.binNames=map(lambda x: 'C '+str(x),range(self.cnull,int(self.numBands)))
197 MFCC.unit = None
198 if self.two_ch and self.m_channels == 2 :
199 MFCC.binCount = self.m_channels * (int(self.numBands)-self.cnull)
200 else :
201 MFCC.binCount = self.numBands-self.cnull
202
203 warpedSpectrum = OutputDescriptor(Generic)
204 warpedSpectrum.identifier='warped-fft'
205 warpedSpectrum.name='Mel Scaled Spectrum'
206 warpedSpectrum.description='Mel Scaled Magnitide Spectrum'
207 warpedSpectrum.unit='Mel'
208 if self.two_ch and self.m_channels == 2 :
209 warpedSpectrum.binCount = self.m_channels * int(self.numBands)
210 else :
211 warpedSpectrum.binCount = self.numBands
212
213 melFilter = OutputDescriptor(Generic)
214 melFilter.identifier = 'mel-filter-matrix'
215 melFilter.sampleType='FixedSampleRate'
216 melFilter.sampleRate=self.m_inputSampleRate/self.m_stepSize
217 melFilter.name='Mel Filter Matrix'
218 melFilter.description='Returns the created filter matrix in getRemainingFeatures.'
219 melFilter.unit = None
220
221 return OutputList(MFCC,warpedSpectrum,melFilter)
222
223
224 def getParameterDescriptors(self):
225
226 melbands = ParameterDescriptor()
227 melbands.identifier='melbands'
228 melbands.name='Number of bands (coefficients)'
229 melbands.description='Set the number of coefficients.'
230 melbands.unit = ''
231 melbands.minValue = 2
232 melbands.maxValue = 128
233 melbands.defaultValue = 40
234 melbands.isQuantized = True
235 melbands.quantizeStep = 1
236
237 cnull = ParameterDescriptor()
238 cnull.identifier='cnull'
239 cnull.name='Return C0'
240 cnull.description='Select if the DC coefficient is required.'
241 cnull.unit = None
242 cnull.minValue = 0
243 cnull.maxValue = 1
244 cnull.defaultValue = 0
245 cnull.isQuantized = True
246 cnull.quantizeStep = 1
247
248 two_ch = ParameterDescriptor(cnull)
249 two_ch.identifier='two_ch'
250 two_ch.name='Process channels separately'
251 two_ch.description='Process two channel files separately.'
252 two_ch.defaultValue = False
253
254 minHz = ParameterDescriptor()
255 minHz.identifier='minHz'
256 minHz.name='minimum frequency'
257 minHz.description='Set the lower frequency bound.'
258 minHz.unit='Hz'
259 minHz.minValue = 0
260 minHz.maxValue = 24000
261 minHz.defaultValue = 0
262 minHz.isQuantized = True
263 minHz.quantizeStep = 1.0
264
265 maxHz = ParameterDescriptor()
266 maxHz.identifier='maxHz'
267 maxHz.description='Set the upper frequency bound.'
268 maxHz.name='maximum frequency'
269 maxHz.unit='Hz'
270 maxHz.minValue = 100
271 maxHz.maxValue = 24000
272 maxHz.defaultValue = 11025
273 maxHz.isQuantized = True
274 maxHz.quantizeStep = 100
275
276 return ParameterList(melbands,minHz,maxHz,cnull,two_ch)
277
278
279 def setParameter(self,paramid,newval):
280 self.valid = False
281 if paramid == 'minHz' :
282 if newval < self.maxHz and newval < self.NqHz :
283 self.minHz = float(newval)
284 print 'minHz: ', self.minHz
285 if paramid == 'maxHz' :
286 print 'trying to set maxHz to: ',newval
287 if newval < self.NqHz and newval > self.minHz+1000 :
288 self.maxHz = float(newval)
289 else :
290 self.maxHz = self.NqHz
291 print 'set to: ',self.maxHz
292 if paramid == 'cnull' :
293 self.cnull = int(not int(newval))
294 if paramid == 'melbands' :
295 self.numBands = int(newval)
296 if paramid == 'two_ch' :
297 self.two_ch = bool(newval)
298
299 return
300
301 def getParameter(self,paramid):
302 if paramid == 'minHz' :
303 return float(self.minHz)
304 if paramid == 'maxHz' :
305 return float(self.maxHz)
306 if paramid == 'cnull' :
307 return float(not int(self.cnull))
308 if paramid == 'melbands' :
309 return float(self.numBands)
310 if paramid == 'two_ch' :
311 return float(self.two_ch)
312 else:
313 return 0.0
314
315 # set numpy process using the 'use_numpy_interface' flag
316 def process(self,inputbuffers,timestamp):
317
318 if not self.update() : return None
319
320 if self.m_channels == 2 and self.two_ch :
321 return self.process2ch(inputbuffers,timestamp)
322
323 fftsize = self.m_blockSize
324
325 if self.m_channels > 1 :
326 # take the mean of the two magnitude spectra
327 complexSpectrum0 = inputbuffers[0]
328 complexSpectrum1 = inputbuffers[1]
329 magnitudeSpectrum0 = abs(complexSpectrum0)[0:fftsize/2]
330 magnitudeSpectrum1 = abs(complexSpectrum1)[0:fftsize/2]
331 magnitudeSpectrum = (magnitudeSpectrum0 + magnitudeSpectrum1) / 2
332 else :
333 complexSpectrum = inputbuffers[0]
334 magnitudeSpectrum = abs(complexSpectrum)[0:fftsize/2]
335
336 # do the computation
337 melSpectrum = self.warpSpectrum(magnitudeSpectrum)
338 melCepstrum = self.getMFCCs(melSpectrum,cn=True)
339
340 outputs = FeatureSet()
341 outputs[0] = Feature(melCepstrum[self.cnull:])
342 outputs[1] = Feature(melSpectrum)
343 return outputs
344
345
346 # process channels separately (stack the returned arrays)
347 def process2ch(self,inputbuffers,timestamp):
348
349 fftsize = self.m_blockSize
350
351 complexSpectrum0 = inputbuffers[0]
352 complexSpectrum1 = inputbuffers[1]
353
354 magnitudeSpectrum0 = abs(complexSpectrum0)[0:fftsize/2]
355 magnitudeSpectrum1 = abs(complexSpectrum1)[0:fftsize/2]
356
357 # do the computations
358 melSpectrum0 = self.warpSpectrum(magnitudeSpectrum0)
359 melCepstrum0 = self.getMFCCs(melSpectrum0,cn=True)
360 melSpectrum1 = self.warpSpectrum(magnitudeSpectrum1)
361 melCepstrum1 = self.getMFCCs(melSpectrum1,cn=True)
362
363 outputs = FeatureSet()
364
365 outputs[0] = Feature(hstack((melCepstrum1[self.cnull:],melCepstrum0[self.cnull:])))
366
367 outputs[1] = Feature(hstack((melSpectrum1,melSpectrum0)))
368
369 return outputs
370
371
372 def getRemainingFeatures(self):
373 if not self.update() : return []
374 frameSampleStart = 0
375
376 output_featureSet = FeatureSet()
377
378 # the filter is the third output (index starts from zero)
379 output_featureSet[2] = flist = FeatureList()
380
381 while True:
382 f = Feature()
383 f.hasTimestamp = True
384 f.timestamp = frame2RealTime(frameSampleStart,self.m_inputSampleRate)
385 try :
386 f.values = self.filterIter.next()
387 except StopIteration :
388 break
389 flist.append(f)
390 frameSampleStart += self.m_stepSize
391
392 return output_featureSet