comparison Example VamPy plugins/test/PyMFCC_time.py @ 37:27bab3a16c9a vampy2final

new branch Vampy2final
author fazekasgy
date Mon, 05 Oct 2009 11:28:00 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 37:27bab3a16c9a
1 '''PyMFCC_time.py - This example Vampy plugin demonstrates
2 how to return sprectrogram-like features.
3
4 This plugin has time domain input and is using
5 the numpy array interface. Flag: vf_ARRAY
6
7 Centre for Digital Music, Queen Mary University of London.
8 Copyright 2006 Gyorgy Fazekas, QMUL.
9 (See Vamp API for licence information.)
10
11 Constants for Mel frequency conversion and filter
12 centre calculation are taken from the GNU GPL licenced
13 Freespeech library. Copyright (C) 1999 Jean-Marc Valin
14 '''
15
16 import sys,numpy
17 from numpy import log,exp,floor,sum
18 from numpy import *
19 from numpy.fft import *
20 import vampy
21 from vampy import *
22
23
24 class melScaling(object):
25
26 def __init__(self,sampleRate,inputSize,numBands,minHz = 0,maxHz = None):
27 '''Initialise frequency warping and DCT matrix.
28 Parameters:
29 sampleRate: audio sample rate
30 inputSize: length of magnitude spectrum (half of FFT size assumed)
31 numBands: number of mel Bands (MFCCs)
32 minHz: lower bound of warping (default = DC)
33 maxHz: higher bound of warping (default = Nyquist frequency)
34 '''
35 self.sampleRate = sampleRate
36 self.NqHz = sampleRate / 2.0
37 self.minHz = minHz
38 if maxHz is None : maxHz = self.NqHz
39 self.maxHz = maxHz
40 self.inputSize = inputSize
41 self.numBands = numBands
42 self.valid = False
43 self.updated = False
44
45
46 def update(self):
47 # make sure this will run only once if called from a vamp process
48
49 if self.updated: return self.valid
50 self.updated = True
51 self.valid = False
52 print 'Updating parameters and recalculating filters: '
53 print 'Nyquist: ',self.NqHz
54
55 if self.maxHz > self.NqHz :
56 raise Exception('Maximum frequency must be smaller than the Nyquist frequency')
57
58 self.maxMel = 1000*log(1+self.maxHz/700.0)/log(1+1000.0/700.0)
59 self.minMel = 1000*log(1+self.minHz/700.0)/log(1+1000.0/700.0)
60 print 'minHz:%s\nmaxHz:%s\nminMel:%s\nmaxMel:%s\n' %(self.minHz,self.maxHz,self.minMel,self.maxMel)
61 self.filterMatrix = self.getFilterMatrix(self.inputSize,self.numBands)
62 self.DCTMatrix = self.getDCTMatrix(self.numBands)
63 self.filterIter = self.filterMatrix.__iter__()
64 self.valid = True
65 return self.valid
66
67 # try :
68 # self.maxMel = 1000*log(1+self.maxHz/700.0)/log(1+1000.0/700.0)
69 # self.minMel = 1000*log(1+self.minHz/700.0)/log(1+1000.0/700.0)
70 # self.filterMatrix = self.getFilterMatrix(self.inputSize,self.numBands)
71 # self.DCTMatrix = self.getDCTMatrix(self.numBands)
72 # self.filterIter = self.filterMatrix.__iter__()
73 # self.valid = True
74 # return True
75 # except :
76 # print "Invalid parameter setting encountered in MelScaling class."
77 # return False
78 # return True
79
80 def getFilterCentres(self,inputSize,numBands):
81 '''Calculate Mel filter centres around FFT bins.
82 This function calculates two extra bands at the edges for
83 finding the starting and end point of the first and last
84 actual filters.'''
85 centresMel = numpy.array(xrange(numBands+2)) * (self.maxMel-self.minMel)/(numBands+1) + self.minMel
86 centresBin = numpy.floor(0.5 + 700.0*inputSize*(exp(centresMel*log(1+1000.0/700.0)/1000.0)-1)/self.NqHz)
87 return numpy.array(centresBin,int)
88
89 def getFilterMatrix(self,inputSize,numBands):
90 '''Compose the Mel scaling matrix.'''
91 filterMatrix = numpy.zeros((numBands,inputSize))
92 self.filterCentres = self.getFilterCentres(inputSize,numBands)
93 for i in xrange(numBands) :
94 start,centre,end = self.filterCentres[i:i+3]
95 self.setFilter(filterMatrix[i],start,centre,end)
96 return filterMatrix.transpose()
97
98 def setFilter(self,filt,filterStart,filterCentre,filterEnd):
99 '''Calculate a single Mel filter.'''
100 k1 = numpy.float32(filterCentre-filterStart)
101 k2 = numpy.float32(filterEnd-filterCentre)
102 up = (numpy.array(xrange(filterStart,filterCentre))-filterStart)/k1
103 dn = (filterEnd-numpy.array(xrange(filterCentre,filterEnd)))/k2
104 filt[filterStart:filterCentre] = up
105 filt[filterCentre:filterEnd] = dn
106
107 def warpSpectrum(self,magnitudeSpectrum):
108 '''Compute the Mel scaled spectrum.'''
109 return numpy.dot(magnitudeSpectrum,self.filterMatrix)
110
111 def getDCTMatrix(self,size):
112 '''Calculate the square DCT transform matrix. Results are
113 equivalent to Matlab dctmtx(n) but with 64 bit precision.'''
114 DCTmx = numpy.array(xrange(size),numpy.float64).repeat(size).reshape(size,size)
115 DCTmxT = numpy.pi * (DCTmx.transpose()+0.5) / size
116 DCTmxT = (1.0/sqrt( size / 2.0)) * cos(DCTmx * DCTmxT)
117 DCTmxT[0] = DCTmxT[0] * (sqrt(2.0)/2.0)
118 return DCTmxT
119
120 def dct(self,data_matrix):
121 '''Compute DCT of input matrix.'''
122 return numpy.dot(self.DCTMatrix,data_matrix)
123
124 def getMFCCs(self,warpedSpectrum,cn=True):
125 '''Compute MFCC coefficients from Mel warped magnitude spectrum.'''
126 mfccs=self.dct(numpy.log(warpedSpectrum))
127 if cn is False : mfccs[0] = 0.0
128 return mfccs
129
130
131 class PyMFCC_time(melScaling):
132
133 def __init__(self,inputSampleRate):
134
135 # flags for setting some Vampy options
136 self.vampy_flags = vf_DEBUG | vf_ARRAY | vf_REALTIME
137
138 self.m_inputSampleRate = int(inputSampleRate)
139 self.m_stepSize = 512
140 self.m_blockSize = 2048
141 self.m_channels = 1
142 self.numBands = 40
143 self.cnull = 1
144 self.two_ch = False
145 melScaling.__init__(self,int(self.m_inputSampleRate),self.m_blockSize/2,self.numBands)
146
147 def initialise(self,channels,stepSize,blockSize):
148 self.m_channels = channels
149 self.m_stepSize = stepSize
150 self.m_blockSize = blockSize
151 self.window = numpy.hamming(blockSize)
152 melScaling.__init__(self,int(self.m_inputSampleRate),self.m_blockSize/2,self.numBands)
153 return True
154
155 def getMaker(self):
156 return 'Vampy Test Plugins'
157
158 def getCopyright(self):
159 return 'Plugin By George Fazekas'
160
161 def getName(self):
162 return 'Vampy TimeDomain MFCC Plugin'
163
164 def getIdentifier(self):
165 return 'vampy-mfcc-test-timedomain'
166
167 def getDescription(self):
168 return 'A simple MFCC plugin. (TimeDomain)'
169
170 def getMaxChannelCount(self):
171 return 2
172
173 def getInputDomain(self):
174 return TimeDomain
175
176 def getPreferredBlockSize(self):
177 return 2048
178
179 def getPreferredStepSize(self):
180 return 512
181
182 def getOutputDescriptors(self):
183
184 Generic = OutputDescriptor()
185 Generic.hasFixedBinCount=True
186 Generic.binCount=int(self.numBands)-self.cnull
187 Generic.hasKnownExtents=False
188 Generic.isQuantized=True
189 Generic.sampleType = OneSamplePerStep
190
191 # note the inheritance of attributes (use is optional)
192 MFCC = OutputDescriptor(Generic)
193 MFCC.identifier = 'mfccs'
194 MFCC.name = 'MFCCs'
195 MFCC.description = 'MFCC Coefficients'
196 MFCC.binNames=map(lambda x: 'C '+str(x),range(self.cnull,int(self.numBands)))
197 MFCC.unit = None
198 if self.two_ch and self.m_channels == 2 :
199 MFCC.binCount = self.m_channels * (int(self.numBands)-self.cnull)
200 else :
201 MFCC.binCount = self.numBands-self.cnull
202
203 warpedSpectrum = OutputDescriptor(Generic)
204 warpedSpectrum.identifier='warped-fft'
205 warpedSpectrum.name='Mel Scaled Spectrum'
206 warpedSpectrum.description='Mel Scaled Magnitide Spectrum'
207 warpedSpectrum.unit='Mel'
208 if self.two_ch and self.m_channels == 2 :
209 warpedSpectrum.binCount = self.m_channels * int(self.numBands)
210 else :
211 warpedSpectrum.binCount = self.numBands
212
213 melFilter = OutputDescriptor(Generic)
214 melFilter.identifier = 'mel-filter-matrix'
215 melFilter.sampleType='FixedSampleRate'
216 melFilter.sampleRate=self.m_inputSampleRate/self.m_stepSize
217 melFilter.name='Mel Filter Matrix'
218 melFilter.description='Returns the created filter matrix in getRemainingFeatures.'
219 melFilter.unit = None
220
221 return OutputList(MFCC,warpedSpectrum,melFilter)
222
223
224 def getParameterDescriptors(self):
225
226 melbands = ParameterDescriptor()
227 melbands.identifier='melbands'
228 melbands.name='Number of bands (coefficients)'
229 melbands.description='Set the number of coefficients.'
230 melbands.unit = ''
231 melbands.minValue = 2
232 melbands.maxValue = 128
233 melbands.defaultValue = 40
234 melbands.isQuantized = True
235 melbands.quantizeStep = 1
236
237 cnull = ParameterDescriptor()
238 cnull.identifier='cnull'
239 cnull.name='Return C0'
240 cnull.description='Select if the DC coefficient is required.'
241 cnull.unit = None
242 cnull.minValue = 0
243 cnull.maxValue = 1
244 cnull.defaultValue = 0
245 cnull.isQuantized = True
246 cnull.quantizeStep = 1
247
248 two_ch = ParameterDescriptor(cnull)
249 two_ch.identifier='two_ch'
250 two_ch.name='Process channels separately'
251 two_ch.description='Process two channel files separately.'
252 two_ch.defaultValue = False
253
254 minHz = ParameterDescriptor()
255 minHz.identifier='minHz'
256 minHz.name='minimum frequency'
257 minHz.description='Set the lower frequency bound.'
258 minHz.unit='Hz'
259 minHz.minValue = 0
260 minHz.maxValue = 24000
261 minHz.defaultValue = 0
262 minHz.isQuantized = True
263 minHz.quantizeStep = 1.0
264
265 maxHz = ParameterDescriptor()
266 maxHz.identifier='maxHz'
267 maxHz.description='Set the upper frequency bound.'
268 maxHz.name='maximum frequency'
269 maxHz.unit='Hz'
270 maxHz.minValue = 100
271 maxHz.maxValue = 24000
272 maxHz.defaultValue = 11025
273 maxHz.isQuantized = True
274 maxHz.quantizeStep = 100
275
276 return ParameterList(melbands,minHz,maxHz,cnull,two_ch)
277
278
279 def setParameter(self,paramid,newval):
280 self.valid = False
281 if paramid == 'minHz' :
282 if newval < self.maxHz and newval < self.NqHz :
283 self.minHz = float(newval)
284 print 'minHz: ', self.minHz
285 if paramid == 'maxHz' :
286 print 'trying to set maxHz to: ',newval
287 if newval < self.NqHz and newval > self.minHz+1000 :
288 self.maxHz = float(newval)
289 else :
290 self.maxHz = self.NqHz
291 print 'set to: ',self.maxHz
292 if paramid == 'cnull' :
293 self.cnull = int(not int(newval))
294 if paramid == 'melbands' :
295 self.numBands = int(newval)
296 if paramid == 'two_ch' :
297 self.two_ch = bool(newval)
298
299 return
300
301 def getParameter(self,paramid):
302 if paramid == 'minHz' :
303 return float(self.minHz)
304 if paramid == 'maxHz' :
305 return float(self.maxHz)
306 if paramid == 'cnull' :
307 return float(not int(self.cnull))
308 if paramid == 'melbands' :
309 return float(self.numBands)
310 if paramid == 'two_ch' :
311 return float(self.two_ch)
312 else:
313 return 0.0
314
315 # set numpy process using the 'use_numpy_interface' flag
316 def process(self,inputbuffers,timestamp):
317
318 if self.m_channels == 2 and self.two_ch :
319 return self.process2ch(inputbuffers,timestamp)
320
321 # calculate the filter and DCT matrices, check
322 # if they are computable given a set of parameters
323 # (we only do this once, when the process is called first)
324 if not self.update() : return None
325
326 fftsize = self.m_blockSize
327
328 if self.m_channels > 1 :
329 audioSamples = (inputbuffers[0]+inputbuffers[1])/2
330 else :
331 audioSamples = inputbuffers[0]
332
333 #complexSpectrum = frombuffer(membuffer[0],complex64,-1,8)
334 complexSpectrum = fft(self.window*audioSamples,fftsize)
335 magnitudeSpectrum = abs(complexSpectrum)[0:fftsize/2] / (fftsize/2)
336
337 # do the computation
338 melSpectrum = self.warpSpectrum(magnitudeSpectrum)
339 melCepstrum = self.getMFCCs(melSpectrum,cn=True)
340
341 # output feature set (the builtin dict type can also be used)
342 outputs = FeatureSet()
343 outputs[0] = Feature(melCepstrum[self.cnull:])
344 outputs[1] = Feature(melSpectrum)
345
346 return outputs
347
348
349 # process two channel files (stack the returned arrays)
350 def process2ch(self,inputbuffers,timestamp):
351 if not self.update() : return None
352
353 fftsize = self.m_blockSize
354
355 audioSamples0 = inputbuffers[0]
356 audioSamples1 = inputbuffers[1]
357
358 complexSpectrum0 = fft(self.window*audioSamples0,fftsize)
359 complexSpectrum1 = fft(self.window*audioSamples1,fftsize)
360
361 magnitudeSpectrum0 = abs(complexSpectrum0)[0:fftsize/2] / (fftsize/2)
362 magnitudeSpectrum1 = abs(complexSpectrum1)[0:fftsize/2] / (fftsize/2)
363
364 # do the computation
365 melSpectrum0 = self.warpSpectrum(magnitudeSpectrum0)
366 melCepstrum0 = self.getMFCCs(melSpectrum0,cn=True)
367 melSpectrum1 = self.warpSpectrum(magnitudeSpectrum1)
368 melCepstrum1 = self.getMFCCs(melSpectrum1,cn=True)
369
370 outputs = FeatureSet()
371 outputs[0] = Feature(hstack((melCepstrum1[self.cnull:],melCepstrum0[self.cnull:])))
372 outputs[1] = Feature(hstack((melSpectrum1,melSpectrum0)))
373
374 return outputs
375
376
377 def getRemainingFeatures(self):
378 if not self.update() : return []
379 frameSampleStart = 0
380
381 output_featureSet = FeatureSet()
382
383 # the filter is the third output (index starts from zero)
384 output_featureSet[2] = flist = FeatureList()
385
386 while True:
387 f = Feature()
388 f.hasTimestamp = True
389 f.timestamp = frame2RealTime(frameSampleStart,self.m_inputSampleRate)
390 try :
391 f.values = self.filterIter.next()
392 except StopIteration :
393 break
394 flist.append(f)
395 frameSampleStart += self.m_stepSize
396
397 return output_featureSet
398