annotate Example VamPy plugins/test/PyMFCC_buffer.py @ 53:7e59caea821b

* Make a better job of preloading Python, especially when it's in a framework. Go for the Python file in the frameworks directory in preference to any libpythonX.Y.dylib. Particularly, don't try to preload any library without an absolute path until we've exhausted all our framework possibilities (so as to avoid picking up an ancient system library).
author cannam
date Fri, 09 Oct 2009 13:48:25 +0000
parents 27bab3a16c9a
children
rev   line source
fazekasgy@37 1 '''PyMFCC_buffer.py - This example Vampy plugin demonstrates
fazekasgy@37 2 how to return sprectrogram-like features.
fazekasgy@37 3
fazekasgy@37 4 This plugin uses the numpy BUFFER interface and
fazekasgy@37 5 frequency domain input. Flag: vf_BUFFER
fazekasgy@37 6
fazekasgy@37 7 Centre for Digital Music, Queen Mary University of London.
fazekasgy@37 8 Copyright 2006 Gyorgy Fazekas, QMUL.
fazekasgy@37 9 (See Vamp API for licence information.)
fazekasgy@37 10
fazekasgy@37 11 Constants for Mel frequency conversion and filter
fazekasgy@37 12 centre calculation are taken from the GNU GPL licenced
fazekasgy@37 13 Freespeech library. Copyright (C) 1999 Jean-Marc Valin
fazekasgy@37 14 '''
fazekasgy@37 15
fazekasgy@37 16 import sys,numpy
fazekasgy@37 17 from numpy import log,exp,floor,sum
fazekasgy@37 18 from numpy import *
fazekasgy@37 19 from numpy.fft import *
fazekasgy@37 20 import vampy
fazekasgy@37 21 from vampy import *
fazekasgy@37 22
fazekasgy@37 23 class melScaling(object):
fazekasgy@37 24
fazekasgy@37 25 def __init__(self,sampleRate,inputSize,numBands,minHz = 0,maxHz = None):
fazekasgy@37 26 '''Initialise frequency warping and DCT matrix.
fazekasgy@37 27 Parameters:
fazekasgy@37 28 sampleRate: audio sample rate
fazekasgy@37 29 inputSize: length of magnitude spectrum (half of FFT size assumed)
fazekasgy@37 30 numBands: number of mel Bands (MFCCs)
fazekasgy@37 31 minHz: lower bound of warping (default = DC)
fazekasgy@37 32 maxHz: higher bound of warping (default = Nyquist frequency)
fazekasgy@37 33 '''
fazekasgy@37 34 self.sampleRate = sampleRate
fazekasgy@37 35 self.NqHz = sampleRate / 2.0
fazekasgy@37 36 self.minHz = minHz
fazekasgy@37 37 if maxHz is None : maxHz = self.NqHz
fazekasgy@37 38 self.maxHz = maxHz
fazekasgy@37 39 self.inputSize = inputSize
fazekasgy@37 40 self.numBands = numBands
fazekasgy@37 41 self.valid = False
fazekasgy@37 42 self.updated = False
fazekasgy@37 43
fazekasgy@37 44 def update(self):
fazekasgy@37 45 # make sure this will run only once if called from a vamp process
fazekasgy@37 46
fazekasgy@37 47 if self.updated: return self.valid
fazekasgy@37 48 self.updated = True
fazekasgy@37 49 self.valid = False
fazekasgy@37 50 print 'Updating parameters and recalculating filters: '
fazekasgy@37 51 print 'Nyquist: ',self.NqHz
fazekasgy@37 52
fazekasgy@37 53 if self.maxHz > self.NqHz :
fazekasgy@37 54 raise Exception('Maximum frequency must be smaller than the Nyquist frequency')
fazekasgy@37 55
fazekasgy@37 56 self.maxMel = 1000*log(1+self.maxHz/700.0)/log(1+1000.0/700.0)
fazekasgy@37 57 self.minMel = 1000*log(1+self.minHz/700.0)/log(1+1000.0/700.0)
fazekasgy@37 58 print 'minHz:%s\nmaxHz:%s\nminMel:%s\nmaxMel:%s\n' %(self.minHz,self.maxHz,self.minMel,self.maxMel)
fazekasgy@37 59 self.filterMatrix = self.getFilterMatrix(self.inputSize,self.numBands)
fazekasgy@37 60 self.DCTMatrix = self.getDCTMatrix(self.numBands)
fazekasgy@37 61 self.filterIter = self.filterMatrix.__iter__()
fazekasgy@37 62 self.valid = True
fazekasgy@37 63 return self.valid
fazekasgy@37 64
fazekasgy@37 65 # try :
fazekasgy@37 66 # self.maxMel = 1000*log(1+self.maxHz/700.0)/log(1+1000.0/700.0)
fazekasgy@37 67 # self.minMel = 1000*log(1+self.minHz/700.0)/log(1+1000.0/700.0)
fazekasgy@37 68 # self.filterMatrix = self.getFilterMatrix(self.inputSize,self.numBands)
fazekasgy@37 69 # self.DCTMatrix = self.getDCTMatrix(self.numBands)
fazekasgy@37 70 # self.filterIter = self.filterMatrix.__iter__()
fazekasgy@37 71 # self.valid = True
fazekasgy@37 72 # return True
fazekasgy@37 73 # except :
fazekasgy@37 74 # print "Invalid parameter setting encountered in MelScaling class."
fazekasgy@37 75 # return False
fazekasgy@37 76 # return True
fazekasgy@37 77
fazekasgy@37 78 def getFilterCentres(self,inputSize,numBands):
fazekasgy@37 79 '''Calculate Mel filter centres around FFT bins.
fazekasgy@37 80 This function calculates two extra bands at the edges for
fazekasgy@37 81 finding the starting and end point of the first and last
fazekasgy@37 82 actual filters.'''
fazekasgy@37 83 centresMel = numpy.array(xrange(numBands+2)) * (self.maxMel-self.minMel)/(numBands+1) + self.minMel
fazekasgy@37 84 centresBin = numpy.floor(0.5 + 700.0*inputSize*(exp(centresMel*log(1+1000.0/700.0)/1000.0)-1)/self.NqHz)
fazekasgy@37 85 return numpy.array(centresBin,int)
fazekasgy@37 86
fazekasgy@37 87 def getFilterMatrix(self,inputSize,numBands):
fazekasgy@37 88 '''Compose the Mel scaling matrix.'''
fazekasgy@37 89 filterMatrix = numpy.zeros((numBands,inputSize))
fazekasgy@37 90 self.filterCentres = self.getFilterCentres(inputSize,numBands)
fazekasgy@37 91 for i in xrange(numBands) :
fazekasgy@37 92 start,centre,end = self.filterCentres[i:i+3]
fazekasgy@37 93 self.setFilter(filterMatrix[i],start,centre,end)
fazekasgy@37 94 return filterMatrix.transpose()
fazekasgy@37 95
fazekasgy@37 96 def setFilter(self,filt,filterStart,filterCentre,filterEnd):
fazekasgy@37 97 '''Calculate a single Mel filter.'''
fazekasgy@37 98 k1 = numpy.float32(filterCentre-filterStart)
fazekasgy@37 99 k2 = numpy.float32(filterEnd-filterCentre)
fazekasgy@37 100 up = (numpy.array(xrange(filterStart,filterCentre))-filterStart)/k1
fazekasgy@37 101 dn = (filterEnd-numpy.array(xrange(filterCentre,filterEnd)))/k2
fazekasgy@37 102 filt[filterStart:filterCentre] = up
fazekasgy@37 103 filt[filterCentre:filterEnd] = dn
fazekasgy@37 104
fazekasgy@37 105 def warpSpectrum(self,magnitudeSpectrum):
fazekasgy@37 106 '''Compute the Mel scaled spectrum.'''
fazekasgy@37 107 return numpy.dot(magnitudeSpectrum,self.filterMatrix)
fazekasgy@37 108
fazekasgy@37 109 def getDCTMatrix(self,size):
fazekasgy@37 110 '''Calculate the square DCT transform matrix. Results are
fazekasgy@37 111 equivalent to Matlab dctmtx(n) but with 64 bit precision.'''
fazekasgy@37 112 DCTmx = numpy.array(xrange(size),numpy.float64).repeat(size).reshape(size,size)
fazekasgy@37 113 DCTmxT = numpy.pi * (DCTmx.transpose()+0.5) / size
fazekasgy@37 114 DCTmxT = (1.0/sqrt( size / 2.0)) * cos(DCTmx * DCTmxT)
fazekasgy@37 115 DCTmxT[0] = DCTmxT[0] * (sqrt(2.0)/2.0)
fazekasgy@37 116 return DCTmxT
fazekasgy@37 117
fazekasgy@37 118 def dct(self,data_matrix):
fazekasgy@37 119 '''Compute DCT of input matrix.'''
fazekasgy@37 120 return numpy.dot(self.DCTMatrix,data_matrix)
fazekasgy@37 121
fazekasgy@37 122 def getMFCCs(self,warpedSpectrum,cn=True):
fazekasgy@37 123 '''Compute MFCC coefficients from Mel warped magnitude spectrum.'''
fazekasgy@37 124 mfccs=self.dct(numpy.log(warpedSpectrum))
fazekasgy@37 125 if cn is False : mfccs[0] = 0.0
fazekasgy@37 126 return mfccs
fazekasgy@37 127
fazekasgy@37 128
fazekasgy@37 129 class PyMFCC_buffer(melScaling):
fazekasgy@37 130
fazekasgy@37 131 def __init__(self,inputSampleRate):
fazekasgy@37 132
fazekasgy@37 133 # flags for setting some Vampy options
fazekasgy@37 134 self.vampy_flags = vf_DEBUG | vf_BUFFER | vf_REALTIME
fazekasgy@37 135
fazekasgy@37 136 self.m_inputSampleRate = int(inputSampleRate)
fazekasgy@37 137 self.m_stepSize = 512
fazekasgy@37 138 self.m_blockSize = 2048
fazekasgy@37 139 self.m_channels = 1
fazekasgy@37 140 self.numBands = 40
fazekasgy@37 141 self.cnull = 1
fazekasgy@37 142 self.two_ch = False
fazekasgy@37 143 melScaling.__init__(self,int(self.m_inputSampleRate),self.m_blockSize/2,self.numBands)
fazekasgy@37 144
fazekasgy@37 145 def initialise(self,channels,stepSize,blockSize):
fazekasgy@37 146 self.m_channels = channels
fazekasgy@37 147 self.m_stepSize = stepSize
fazekasgy@37 148 self.m_blockSize = blockSize
fazekasgy@37 149 self.window = numpy.hamming(blockSize)
fazekasgy@37 150 melScaling.__init__(self,int(self.m_inputSampleRate),self.m_blockSize/2,self.numBands)
fazekasgy@37 151 return True
fazekasgy@37 152
fazekasgy@37 153 def getMaker(self):
fazekasgy@37 154 return 'Vampy Test Plugins'
fazekasgy@37 155
fazekasgy@37 156 def getCopyright(self):
fazekasgy@37 157 return 'Plugin By George Fazekas'
fazekasgy@37 158
fazekasgy@37 159 def getName(self):
fazekasgy@37 160 return 'Vampy Buffer MFCC Plugin'
fazekasgy@37 161
fazekasgy@37 162 def getIdentifier(self):
fazekasgy@37 163 return 'vampy-mfcc-test-buffer'
fazekasgy@37 164
fazekasgy@37 165 def getDescription(self):
fazekasgy@37 166 return 'A simple MFCC plugin. (using the Buffer interface)'
fazekasgy@37 167
fazekasgy@37 168 def getMaxChannelCount(self):
fazekasgy@37 169 return 2
fazekasgy@37 170
fazekasgy@37 171 def getInputDomain(self):
fazekasgy@37 172 return FrequencyDomain
fazekasgy@37 173
fazekasgy@37 174 def getPreferredBlockSize(self):
fazekasgy@37 175 return 2048
fazekasgy@37 176
fazekasgy@37 177 def getPreferredStepSize(self):
fazekasgy@37 178 return 512
fazekasgy@37 179
fazekasgy@37 180 def getOutputDescriptors(self):
fazekasgy@37 181
fazekasgy@37 182 Generic = OutputDescriptor()
fazekasgy@37 183 Generic.hasFixedBinCount=True
fazekasgy@37 184 Generic.binCount=int(self.numBands)-self.cnull
fazekasgy@37 185 Generic.hasKnownExtents=False
fazekasgy@37 186 Generic.isQuantized=True
fazekasgy@37 187 Generic.sampleType = OneSamplePerStep
fazekasgy@37 188
fazekasgy@37 189 # note the inheritance of attributes (use is optional)
fazekasgy@37 190 MFCC = OutputDescriptor(Generic)
fazekasgy@37 191 MFCC.identifier = 'mfccs'
fazekasgy@37 192 MFCC.name = 'MFCCs'
fazekasgy@37 193 MFCC.description = 'MFCC Coefficients'
fazekasgy@37 194 MFCC.binNames=map(lambda x: 'C '+str(x),range(self.cnull,int(self.numBands)))
fazekasgy@37 195 MFCC.unit = None
fazekasgy@37 196 if self.two_ch and self.m_channels == 2 :
fazekasgy@37 197 MFCC.binCount = self.m_channels * (int(self.numBands)-self.cnull)
fazekasgy@37 198 else :
fazekasgy@37 199 MFCC.binCount = self.numBands-self.cnull
fazekasgy@37 200
fazekasgy@37 201 warpedSpectrum = OutputDescriptor(Generic)
fazekasgy@37 202 warpedSpectrum.identifier='warped-fft'
fazekasgy@37 203 warpedSpectrum.name='Mel Scaled Spectrum'
fazekasgy@37 204 warpedSpectrum.description='Mel Scaled Magnitide Spectrum'
fazekasgy@37 205 warpedSpectrum.unit='Mel'
fazekasgy@37 206 if self.two_ch and self.m_channels == 2 :
fazekasgy@37 207 warpedSpectrum.binCount = self.m_channels * int(self.numBands)
fazekasgy@37 208 else :
fazekasgy@37 209 warpedSpectrum.binCount = self.numBands
fazekasgy@37 210
fazekasgy@37 211 melFilter = OutputDescriptor(Generic)
fazekasgy@37 212 melFilter.identifier = 'mel-filter-matrix'
fazekasgy@37 213 melFilter.sampleType='FixedSampleRate'
fazekasgy@37 214 melFilter.sampleRate=self.m_inputSampleRate/self.m_stepSize
fazekasgy@37 215 melFilter.name='Mel Filter Matrix'
fazekasgy@37 216 melFilter.description='Returns the created filter matrix in getRemainingFeatures.'
fazekasgy@37 217 melFilter.unit = None
fazekasgy@37 218
fazekasgy@37 219 return OutputList(MFCC,warpedSpectrum,melFilter)
fazekasgy@37 220
fazekasgy@37 221
fazekasgy@37 222 def getParameterDescriptors(self):
fazekasgy@37 223
fazekasgy@37 224 melbands = ParameterDescriptor()
fazekasgy@37 225 melbands.identifier='melbands'
fazekasgy@37 226 melbands.name='Number of bands (coefficients)'
fazekasgy@37 227 melbands.description='Set the number of coefficients.'
fazekasgy@37 228 melbands.unit = ''
fazekasgy@37 229 melbands.minValue = 2
fazekasgy@37 230 melbands.maxValue = 128
fazekasgy@37 231 melbands.defaultValue = 40
fazekasgy@37 232 melbands.isQuantized = True
fazekasgy@37 233 melbands.quantizeStep = 1
fazekasgy@37 234
fazekasgy@37 235 cnull = ParameterDescriptor()
fazekasgy@37 236 cnull.identifier='cnull'
fazekasgy@37 237 cnull.name='Return C0'
fazekasgy@37 238 cnull.description='Select if the DC coefficient is required.'
fazekasgy@37 239 cnull.unit = None
fazekasgy@37 240 cnull.minValue = 0
fazekasgy@37 241 cnull.maxValue = 1
fazekasgy@37 242 cnull.defaultValue = 0
fazekasgy@37 243 cnull.isQuantized = True
fazekasgy@37 244 cnull.quantizeStep = 1
fazekasgy@37 245
fazekasgy@37 246 two_ch = ParameterDescriptor(cnull)
fazekasgy@37 247 two_ch.identifier='two_ch'
fazekasgy@37 248 two_ch.name='Process channels separately'
fazekasgy@37 249 two_ch.description='Process two channel files separately.'
fazekasgy@37 250 two_ch.defaultValue = False
fazekasgy@37 251
fazekasgy@37 252 minHz = ParameterDescriptor()
fazekasgy@37 253 minHz.identifier='minHz'
fazekasgy@37 254 minHz.name='minimum frequency'
fazekasgy@37 255 minHz.description='Set the lower frequency bound.'
fazekasgy@37 256 minHz.unit='Hz'
fazekasgy@37 257 minHz.minValue = 0
fazekasgy@37 258 minHz.maxValue = 24000
fazekasgy@37 259 minHz.defaultValue = 0
fazekasgy@37 260 minHz.isQuantized = True
fazekasgy@37 261 minHz.quantizeStep = 1.0
fazekasgy@37 262
fazekasgy@37 263 maxHz = ParameterDescriptor()
fazekasgy@37 264 maxHz.identifier='maxHz'
fazekasgy@37 265 maxHz.description='Set the upper frequency bound.'
fazekasgy@37 266 maxHz.name='maximum frequency'
fazekasgy@37 267 maxHz.unit='Hz'
fazekasgy@37 268 maxHz.minValue = 100
fazekasgy@37 269 maxHz.maxValue = 24000
fazekasgy@37 270 maxHz.defaultValue = 11025
fazekasgy@37 271 maxHz.isQuantized = True
fazekasgy@37 272 maxHz.quantizeStep = 100
fazekasgy@37 273
fazekasgy@37 274 return ParameterList(melbands,minHz,maxHz,cnull,two_ch)
fazekasgy@37 275
fazekasgy@37 276
fazekasgy@37 277 def setParameter(self,paramid,newval):
fazekasgy@37 278 self.valid = False
fazekasgy@37 279 if paramid == 'minHz' :
fazekasgy@37 280 if newval < self.maxHz and newval < self.NqHz :
fazekasgy@37 281 self.minHz = float(newval)
fazekasgy@37 282 print 'minHz: ', self.minHz
fazekasgy@37 283 if paramid == 'maxHz' :
fazekasgy@37 284 print 'trying to set maxHz to: ',newval
fazekasgy@37 285 if newval < self.NqHz and newval > self.minHz+1000 :
fazekasgy@37 286 self.maxHz = float(newval)
fazekasgy@37 287 else :
fazekasgy@37 288 self.maxHz = self.NqHz
fazekasgy@37 289 print 'set to: ',self.maxHz
fazekasgy@37 290 if paramid == 'cnull' :
fazekasgy@37 291 self.cnull = int(not int(newval))
fazekasgy@37 292 if paramid == 'melbands' :
fazekasgy@37 293 self.numBands = int(newval)
fazekasgy@37 294 if paramid == 'two_ch' :
fazekasgy@37 295 self.two_ch = bool(newval)
fazekasgy@37 296
fazekasgy@37 297 return
fazekasgy@37 298
fazekasgy@37 299 def getParameter(self,paramid):
fazekasgy@37 300 if paramid == 'minHz' :
fazekasgy@37 301 return float(self.minHz)
fazekasgy@37 302 if paramid == 'maxHz' :
fazekasgy@37 303 return float(self.maxHz)
fazekasgy@37 304 if paramid == 'cnull' :
fazekasgy@37 305 return float(not int(self.cnull))
fazekasgy@37 306 if paramid == 'melbands' :
fazekasgy@37 307 return float(self.numBands)
fazekasgy@37 308 if paramid == 'two_ch' :
fazekasgy@37 309 return float(self.two_ch)
fazekasgy@37 310 else:
fazekasgy@37 311 return 0.0
fazekasgy@37 312
fazekasgy@37 313 # numpy process using the buffer interface
fazekasgy@37 314 def process(self,inputbuffers,timestamp):
fazekasgy@37 315
fazekasgy@37 316 if not self.update() : return None
fazekasgy@37 317
fazekasgy@37 318 if self.m_channels == 2 and self.two_ch :
fazekasgy@37 319 return self.process2ch(inputbuffers,timestamp)
fazekasgy@37 320
fazekasgy@37 321 fftsize = self.m_blockSize
fazekasgy@37 322
fazekasgy@37 323 if self.m_channels > 1 :
fazekasgy@37 324 # take the mean of the two magnitude spectra
fazekasgy@37 325 complexSpectrum0 = frombuffer(inputbuffers[0],complex64,-1,0)
fazekasgy@37 326 complexSpectrum1 = frombuffer(inputbuffers[1],complex64,-1,0)
fazekasgy@37 327 magnitudeSpectrum0 = abs(complexSpectrum0)[0:fftsize/2]
fazekasgy@37 328 magnitudeSpectrum1 = abs(complexSpectrum1)[0:fftsize/2]
fazekasgy@37 329 magnitudeSpectrum = (magnitudeSpectrum0 + magnitudeSpectrum1) / 2
fazekasgy@37 330 else :
fazekasgy@37 331 complexSpectrum = frombuffer(inputbuffers[0],complex64,-1,0)
fazekasgy@37 332 magnitudeSpectrum = abs(complexSpectrum)[0:fftsize/2]
fazekasgy@37 333
fazekasgy@37 334 # do the computation
fazekasgy@37 335 melSpectrum = self.warpSpectrum(magnitudeSpectrum)
fazekasgy@37 336 melCepstrum = self.getMFCCs(melSpectrum,cn=True)
fazekasgy@37 337
fazekasgy@37 338 # output feature set (the builtin dict type can also be used)
fazekasgy@37 339 outputs = FeatureSet()
fazekasgy@37 340 outputs[0] = Feature(melCepstrum[self.cnull:])
fazekasgy@37 341 outputs[1] = Feature(melSpectrum)
fazekasgy@37 342
fazekasgy@37 343 return outputs
fazekasgy@37 344
fazekasgy@37 345 # process two channel files (stack the returned arrays)
fazekasgy@37 346 def process2ch(self,inputbuffers,timestamp):
fazekasgy@37 347
fazekasgy@37 348 fftsize = self.m_blockSize
fazekasgy@37 349
fazekasgy@37 350 complexSpectrum0 = frombuffer(inputbuffers[0],complex64,-1,0)
fazekasgy@37 351 complexSpectrum1 = frombuffer(inputbuffers[1],complex64,-1,0)
fazekasgy@37 352
fazekasgy@37 353 magnitudeSpectrum0 = abs(complexSpectrum0)[0:fftsize/2]
fazekasgy@37 354 magnitudeSpectrum1 = abs(complexSpectrum1)[0:fftsize/2]
fazekasgy@37 355
fazekasgy@37 356 # do the computations
fazekasgy@37 357 melSpectrum0 = self.warpSpectrum(magnitudeSpectrum0)
fazekasgy@37 358 melCepstrum0 = self.getMFCCs(melSpectrum0,cn=True)
fazekasgy@37 359 melSpectrum1 = self.warpSpectrum(magnitudeSpectrum1)
fazekasgy@37 360 melCepstrum1 = self.getMFCCs(melSpectrum1,cn=True)
fazekasgy@37 361
fazekasgy@37 362 outputs = FeatureSet()
fazekasgy@37 363
fazekasgy@37 364 outputs[0] = Feature(hstack((melCepstrum1[self.cnull:],melCepstrum0[self.cnull:])))
fazekasgy@37 365
fazekasgy@37 366 outputs[1] = Feature(hstack((melSpectrum1,melSpectrum0)))
fazekasgy@37 367
fazekasgy@37 368 return outputs
fazekasgy@37 369
fazekasgy@37 370
fazekasgy@37 371 def getRemainingFeatures(self):
fazekasgy@37 372 if not self.update() : return []
fazekasgy@37 373 frameSampleStart = 0
fazekasgy@37 374
fazekasgy@37 375 output_featureSet = FeatureSet()
fazekasgy@37 376
fazekasgy@37 377 # the filter is the third output (index starts from zero)
fazekasgy@37 378 output_featureSet[2] = flist = FeatureList()
fazekasgy@37 379
fazekasgy@37 380 while True:
fazekasgy@37 381 f = Feature()
fazekasgy@37 382 f.hasTimestamp = True
fazekasgy@37 383 f.timestamp = frame2RealTime(frameSampleStart,self.m_inputSampleRate)
fazekasgy@37 384 try :
fazekasgy@37 385 f.values = self.filterIter.next()
fazekasgy@37 386 except StopIteration :
fazekasgy@37 387 break
fazekasgy@37 388 flist.append(f)
fazekasgy@37 389 frameSampleStart += self.m_stepSize
fazekasgy@37 390
fazekasgy@37 391 return output_featureSet
fazekasgy@37 392