annotate Example VamPy plugins/PyMFCC.py @ 67:146d14ab15e7

Debug output: off by default, on with VAMPY_VERBOSE environment variable
author Chris Cannam
date Mon, 17 Nov 2014 10:03:44 +0000
parents d56f48aafb99
children 44d56a3d16b7
rev   line source
fazekasgy@37 1 '''PyMFCC.py - This example Vampy plugin demonstrates
fazekasgy@37 2 how to return sprectrogram-like features and how to return
fazekasgy@37 3 data using the getRemainingFeatures() function.
fazekasgy@37 4
fazekasgy@37 5 The plugin has frequency domain input and is using the
fazekasgy@37 6 numpy array interface. (Flag: vf_ARRAY)
fazekasgy@37 7
fazekasgy@37 8 Outputs:
fazekasgy@37 9 1) 2-128 MFCC coefficients
fazekasgy@37 10 2) Mel-warped spectrum used for the MFCC computation
fazekasgy@37 11 3) Filter matrix used for Mel scaling
fazekasgy@37 12
fazekasgy@37 13 Centre for Digital Music, Queen Mary University of London.
fazekasgy@37 14 Copyright (C) 2009 Gyorgy Fazekas, QMUL. (See Vamp sources
fazekasgy@37 15 for licence information.)
fazekasgy@37 16
fazekasgy@37 17 Constants for Mel frequency conversion and filter
fazekasgy@37 18 centre calculation are taken from the GNU GPL licenced
fazekasgy@37 19 Freespeech library. Copyright (C) 1999 Jean-Marc Valin
fazekasgy@37 20 '''
fazekasgy@37 21
fazekasgy@37 22 import sys,numpy,vampy
fazekasgy@37 23 from numpy import abs,log,exp,floor,sum,sqrt,cos,hstack
fazekasgy@37 24 from numpy.fft import *
fazekasgy@37 25 from vampy import *
fazekasgy@37 26
fazekasgy@37 27
fazekasgy@37 28 class melScaling(object):
fazekasgy@37 29
fazekasgy@37 30 def __init__(self,sampleRate,inputSize,numBands,minHz = 0,maxHz = None):
fazekasgy@37 31 '''Initialise frequency warping and DCT matrix.
fazekasgy@37 32 Parameters:
fazekasgy@37 33 sampleRate: audio sample rate
fazekasgy@37 34 inputSize: length of magnitude spectrum (half of FFT size assumed)
fazekasgy@37 35 numBands: number of mel Bands (MFCCs)
fazekasgy@37 36 minHz: lower bound of warping (default = DC)
fazekasgy@37 37 maxHz: higher bound of warping (default = Nyquist frequency)
fazekasgy@37 38 '''
fazekasgy@37 39 self.sampleRate = sampleRate
fazekasgy@37 40 self.NqHz = sampleRate / 2.0
fazekasgy@37 41 self.minHz = minHz
fazekasgy@37 42 if maxHz is None : maxHz = self.NqHz
fazekasgy@37 43 self.maxHz = maxHz
fazekasgy@37 44 self.inputSize = inputSize
fazekasgy@37 45 self.numBands = numBands
fazekasgy@37 46 self.valid = False
fazekasgy@37 47 self.updated = False
fazekasgy@37 48
fazekasgy@37 49 def update(self):
fazekasgy@37 50 # make sure this will run only once
fazekasgy@37 51 # if called from a vamp process
fazekasgy@37 52 if self.updated: return self.valid
fazekasgy@37 53 self.updated = True
fazekasgy@37 54 self.valid = False
Chris@67 55 # print 'Updating parameters and recalculating filters: '
Chris@67 56 # print 'Nyquist: ',self.NqHz
fazekasgy@37 57
fazekasgy@37 58 if self.maxHz > self.NqHz :
fazekasgy@37 59 raise Exception('Maximum frequency must be smaller than the Nyquist frequency')
fazekasgy@37 60
fazekasgy@37 61 self.maxMel = 1000*log(1+self.maxHz/700.0)/log(1+1000.0/700.0)
fazekasgy@37 62 self.minMel = 1000*log(1+self.minHz/700.0)/log(1+1000.0/700.0)
Chris@67 63 # print 'minHz:%s\nmaxHz:%s\nminMel:%s\nmaxMel:%s\n' \
Chris@67 64 # %(self.minHz,self.maxHz,self.minMel,self.maxMel)
fazekasgy@37 65 self.filterMatrix = self.getFilterMatrix(self.inputSize,self.numBands)
fazekasgy@37 66 self.DCTMatrix = self.getDCTMatrix(self.numBands)
fazekasgy@37 67 self.filterIter = self.filterMatrix.__iter__()
fazekasgy@37 68 self.valid = True
fazekasgy@37 69 return self.valid
fazekasgy@37 70
fazekasgy@37 71 def getFilterCentres(self,inputSize,numBands):
fazekasgy@37 72 '''Calculate Mel filter centres around FFT bins.
fazekasgy@37 73 This function calculates two extra bands at the edges for
fazekasgy@37 74 finding the starting and end point of the first and last
fazekasgy@37 75 actual filters.'''
fazekasgy@37 76 centresMel = numpy.array(xrange(numBands+2)) * (self.maxMel-self.minMel)/(numBands+1) + self.minMel
fazekasgy@37 77 centresBin = numpy.floor(0.5 + 700.0*inputSize*(exp(centresMel*log(1+1000.0/700.0)/1000.0)-1)/self.NqHz)
fazekasgy@37 78 return numpy.array(centresBin,int)
fazekasgy@37 79
fazekasgy@37 80 def getFilterMatrix(self,inputSize,numBands):
fazekasgy@37 81 '''Compose the Mel scaling matrix.'''
fazekasgy@37 82 filterMatrix = numpy.zeros((numBands,inputSize))
fazekasgy@37 83 self.filterCentres = self.getFilterCentres(inputSize,numBands)
fazekasgy@37 84 for i in xrange(numBands) :
fazekasgy@37 85 start,centre,end = self.filterCentres[i:i+3]
fazekasgy@37 86 self.setFilter(filterMatrix[i],start,centre,end)
fazekasgy@37 87 return filterMatrix.transpose()
fazekasgy@37 88
fazekasgy@37 89 def setFilter(self,filt,filterStart,filterCentre,filterEnd):
fazekasgy@37 90 '''Calculate a single Mel filter.'''
fazekasgy@37 91 k1 = numpy.float32(filterCentre-filterStart)
fazekasgy@37 92 k2 = numpy.float32(filterEnd-filterCentre)
fazekasgy@37 93 up = (numpy.array(xrange(filterStart,filterCentre))-filterStart)/k1
fazekasgy@37 94 dn = (filterEnd-numpy.array(xrange(filterCentre,filterEnd)))/k2
fazekasgy@37 95 filt[filterStart:filterCentre] = up
fazekasgy@37 96 filt[filterCentre:filterEnd] = dn
fazekasgy@37 97
fazekasgy@37 98 def warpSpectrum(self,magnitudeSpectrum):
fazekasgy@37 99 '''Compute the Mel scaled spectrum.'''
fazekasgy@37 100 return numpy.dot(magnitudeSpectrum,self.filterMatrix)
fazekasgy@37 101
fazekasgy@37 102 def getDCTMatrix(self,size):
fazekasgy@37 103 '''Calculate the square DCT transform matrix. Results are
fazekasgy@37 104 equivalent to Matlab dctmtx(n) with 64 bit precision.'''
fazekasgy@37 105 DCTmx = numpy.array(xrange(size),numpy.float64).repeat(size).reshape(size,size)
fazekasgy@37 106 DCTmxT = numpy.pi * (DCTmx.transpose()+0.5) / size
fazekasgy@37 107 DCTmxT = (1.0/sqrt( size / 2.0)) * cos(DCTmx * DCTmxT)
fazekasgy@37 108 DCTmxT[0] = DCTmxT[0] * (sqrt(2.0)/2.0)
fazekasgy@37 109 return DCTmxT
fazekasgy@37 110
fazekasgy@37 111 def dct(self,data_matrix):
fazekasgy@37 112 '''Compute DCT of input matrix.'''
fazekasgy@37 113 return numpy.dot(self.DCTMatrix,data_matrix)
fazekasgy@37 114
fazekasgy@37 115 def getMFCCs(self,warpedSpectrum,cn=True):
fazekasgy@37 116 '''Compute MFCC coefficients from Mel warped magnitude spectrum.'''
fazekasgy@37 117 mfccs=self.dct(numpy.log(warpedSpectrum))
fazekasgy@37 118 if cn is False : mfccs[0] = 0.0
fazekasgy@37 119 return mfccs
fazekasgy@37 120
fazekasgy@37 121
fazekasgy@37 122 class PyMFCC(melScaling):
fazekasgy@37 123
fazekasgy@37 124 def __init__(self,inputSampleRate):
fazekasgy@37 125
fazekasgy@37 126 # flags for setting some Vampy options
Chris@67 127 self.vampy_flags = vf_ARRAY | vf_REALTIME
fazekasgy@37 128
fazekasgy@37 129 self.m_inputSampleRate = int(inputSampleRate)
fazekasgy@37 130 self.m_stepSize = 1024
fazekasgy@37 131 self.m_blockSize = 2048
fazekasgy@37 132 self.m_channels = 1
fazekasgy@37 133 self.numBands = 40
fazekasgy@37 134 self.cnull = 1
fazekasgy@37 135 self.two_ch = False
fazekasgy@37 136 melScaling.__init__(self,int(self.m_inputSampleRate),self.m_blockSize/2,self.numBands)
fazekasgy@37 137
fazekasgy@37 138 def initialise(self,channels,stepSize,blockSize):
fazekasgy@37 139 self.m_channels = channels
fazekasgy@37 140 self.m_stepSize = stepSize
fazekasgy@37 141 self.m_blockSize = blockSize
fazekasgy@37 142 self.window = numpy.hamming(blockSize)
fazekasgy@37 143 melScaling.__init__(self,int(self.m_inputSampleRate),self.m_blockSize/2,self.numBands)
fazekasgy@37 144 return True
fazekasgy@37 145
fazekasgy@37 146 def getMaker(self):
fazekasgy@37 147 return 'Vampy Example Plugins'
fazekasgy@37 148
fazekasgy@37 149 def getCopyright(self):
fazekasgy@37 150 return 'Plugin By George Fazekas'
fazekasgy@37 151
fazekasgy@37 152 def getName(self):
fazekasgy@37 153 return 'Vampy MFCC Plugin'
fazekasgy@37 154
fazekasgy@37 155 def getIdentifier(self):
fazekasgy@37 156 return 'vampy-mfcc'
fazekasgy@37 157
fazekasgy@37 158 def getDescription(self):
fazekasgy@37 159 return 'A simple MFCC plugin'
fazekasgy@37 160
fazekasgy@37 161 def getMaxChannelCount(self):
fazekasgy@37 162 return 2
fazekasgy@37 163
fazekasgy@37 164 def getInputDomain(self):
fazekasgy@37 165 return FrequencyDomain #TimeDomain
fazekasgy@37 166
fazekasgy@37 167 def getPreferredBlockSize(self):
fazekasgy@37 168 return 2048
fazekasgy@37 169
fazekasgy@37 170 def getPreferredStepSize(self):
fazekasgy@37 171 return 1024
fazekasgy@37 172
fazekasgy@37 173 def getOutputDescriptors(self):
fazekasgy@37 174
fazekasgy@37 175 Generic = OutputDescriptor()
fazekasgy@37 176 Generic.hasFixedBinCount=True
fazekasgy@37 177 Generic.binCount=int(self.numBands)-self.cnull
fazekasgy@37 178 Generic.hasKnownExtents=False
fazekasgy@37 179 Generic.isQuantized=True
fazekasgy@37 180 Generic.sampleType = OneSamplePerStep
fazekasgy@37 181
fazekasgy@37 182 # note the inheritance of attributes (optional)
fazekasgy@37 183 MFCC = OutputDescriptor(Generic)
fazekasgy@37 184 MFCC.identifier = 'mfccs'
fazekasgy@37 185 MFCC.name = 'MFCCs'
fazekasgy@37 186 MFCC.description = 'MFCC Coefficients'
fazekasgy@37 187 MFCC.binNames=map(lambda x: 'C '+str(x),range(self.cnull,int(self.numBands)))
fazekasgy@37 188 if self.two_ch and self.m_channels == 2 :
fazekasgy@37 189 MFCC.binNames *= 2 #repeat the list
fazekasgy@37 190 MFCC.unit = None
fazekasgy@37 191 if self.two_ch and self.m_channels == 2 :
fazekasgy@37 192 MFCC.binCount = self.m_channels * (int(self.numBands)-self.cnull)
fazekasgy@37 193 else :
fazekasgy@37 194 MFCC.binCount = self.numBands-self.cnull
fazekasgy@37 195
fazekasgy@37 196 warpedSpectrum = OutputDescriptor(Generic)
fazekasgy@37 197 warpedSpectrum.identifier='warped-fft'
fazekasgy@37 198 warpedSpectrum.name='Mel Scaled Spectrum'
fazekasgy@37 199 warpedSpectrum.description='Mel Scaled Magnitide Spectrum'
fazekasgy@37 200 warpedSpectrum.unit='Mel'
fazekasgy@37 201 if self.two_ch and self.m_channels == 2 :
fazekasgy@37 202 warpedSpectrum.binCount = self.m_channels * int(self.numBands)
fazekasgy@37 203 else :
fazekasgy@37 204 warpedSpectrum.binCount = self.numBands
fazekasgy@37 205
fazekasgy@37 206 melFilter = OutputDescriptor(Generic)
fazekasgy@37 207 melFilter.identifier = 'mel-filter-matrix'
fazekasgy@37 208 melFilter.sampleType='FixedSampleRate'
fazekasgy@37 209 melFilter.sampleRate=self.m_inputSampleRate/self.m_stepSize
fazekasgy@37 210 melFilter.name='Mel Filter Matrix'
fazekasgy@37 211 melFilter.description='Returns the created filter matrix in getRemainingFeatures.'
fazekasgy@37 212 melFilter.unit = None
fazekasgy@37 213
fazekasgy@37 214 return OutputList(MFCC,warpedSpectrum,melFilter)
fazekasgy@37 215
fazekasgy@37 216
fazekasgy@37 217 def getParameterDescriptors(self):
fazekasgy@37 218
fazekasgy@37 219 melbands = ParameterDescriptor()
fazekasgy@37 220 melbands.identifier='melbands'
fazekasgy@37 221 melbands.name='Number of bands (coefficients)'
fazekasgy@37 222 melbands.description='Set the number of coefficients.'
fazekasgy@37 223 melbands.unit = ''
fazekasgy@37 224 melbands.minValue = 2
fazekasgy@37 225 melbands.maxValue = 128
fazekasgy@37 226 melbands.defaultValue = 40
fazekasgy@37 227 melbands.isQuantized = True
fazekasgy@37 228 melbands.quantizeStep = 1
fazekasgy@37 229
fazekasgy@37 230 cnull = ParameterDescriptor()
fazekasgy@37 231 cnull.identifier='cnull'
fazekasgy@37 232 cnull.name='Return C0'
fazekasgy@37 233 cnull.description='Select if the DC coefficient is required.'
fazekasgy@37 234 cnull.unit = None
fazekasgy@37 235 cnull.minValue = 0
fazekasgy@37 236 cnull.maxValue = 1
fazekasgy@37 237 cnull.defaultValue = 0
fazekasgy@37 238 cnull.isQuantized = True
fazekasgy@37 239 cnull.quantizeStep = 1
fazekasgy@37 240
fazekasgy@37 241 two_ch = ParameterDescriptor(cnull)
fazekasgy@37 242 two_ch.identifier='two_ch'
fazekasgy@37 243 two_ch.name='Process channels separately'
fazekasgy@37 244 two_ch.description='Process two channel files separately.'
fazekasgy@37 245 two_ch.defaultValue = False
fazekasgy@37 246
fazekasgy@37 247 minHz = ParameterDescriptor()
fazekasgy@37 248 minHz.identifier='minHz'
fazekasgy@37 249 minHz.name='minimum frequency'
fazekasgy@37 250 minHz.description='Set the lower frequency bound.'
fazekasgy@37 251 minHz.unit='Hz'
fazekasgy@37 252 minHz.minValue = 0
fazekasgy@37 253 minHz.maxValue = 24000
fazekasgy@37 254 minHz.defaultValue = 0
fazekasgy@37 255 minHz.isQuantized = True
fazekasgy@37 256 minHz.quantizeStep = 1.0
fazekasgy@37 257
fazekasgy@37 258 maxHz = ParameterDescriptor()
fazekasgy@37 259 maxHz.identifier='maxHz'
fazekasgy@37 260 maxHz.description='Set the upper frequency bound.'
fazekasgy@37 261 maxHz.name='maximum frequency'
fazekasgy@37 262 maxHz.unit='Hz'
fazekasgy@37 263 maxHz.minValue = 100
fazekasgy@37 264 maxHz.maxValue = 24000
fazekasgy@37 265 maxHz.defaultValue = 11025
fazekasgy@37 266 maxHz.isQuantized = True
fazekasgy@37 267 maxHz.quantizeStep = 100
fazekasgy@37 268
fazekasgy@37 269 return ParameterList(melbands,minHz,maxHz,cnull,two_ch)
fazekasgy@37 270
fazekasgy@37 271
fazekasgy@37 272 def setParameter(self,paramid,newval):
fazekasgy@37 273 self.valid = False
fazekasgy@37 274 if paramid == 'minHz' :
fazekasgy@37 275 if newval < self.maxHz and newval < self.NqHz :
fazekasgy@37 276 self.minHz = float(newval)
fazekasgy@37 277 if paramid == 'maxHz' :
fazekasgy@37 278 if newval < self.NqHz and newval > self.minHz+1000 :
fazekasgy@37 279 self.maxHz = float(newval)
fazekasgy@37 280 else :
fazekasgy@37 281 self.maxHz = self.NqHz
fazekasgy@37 282 if paramid == 'cnull' :
fazekasgy@37 283 self.cnull = int(not int(newval))
fazekasgy@37 284 if paramid == 'melbands' :
fazekasgy@37 285 self.numBands = int(newval)
fazekasgy@37 286 if paramid == 'two_ch' :
fazekasgy@37 287 self.two_ch = bool(newval)
fazekasgy@37 288 return None
fazekasgy@37 289
fazekasgy@37 290
fazekasgy@37 291 def getParameter(self,paramid):
fazekasgy@37 292 if paramid == 'minHz' :
fazekasgy@37 293 return self.minHz
fazekasgy@37 294 if paramid == 'maxHz' :
fazekasgy@37 295 return self.maxHz
fazekasgy@37 296 if paramid == 'cnull' :
fazekasgy@37 297 return bool(not int(self.cnull))
fazekasgy@37 298 if paramid == 'melbands' :
fazekasgy@37 299 return self.numBands
fazekasgy@37 300 if paramid == 'two_ch' :
fazekasgy@37 301 return self.two_ch
fazekasgy@37 302 else:
fazekasgy@37 303 return 0.0
fazekasgy@37 304
fazekasgy@37 305 # set numpy array process using the 'vf_ARRAY' flag in __init__()
fazekasgy@37 306 # and RealTime time stamps using the 'vf_REALTIME' flag
fazekasgy@37 307 def process(self,inputbuffers,timestamp):
fazekasgy@37 308
fazekasgy@37 309 # calculate the filter and DCT matrices, check
fazekasgy@37 310 # if they are computable given a set of parameters
fazekasgy@37 311 # (we only do this once, when the process is called first)
fazekasgy@37 312 if not self.update() : return None
fazekasgy@37 313
fazekasgy@37 314 # if two channel processing is set, use process2ch
fazekasgy@37 315 if self.m_channels == 2 and self.two_ch :
fazekasgy@37 316 return self.process2ch(inputbuffers,timestamp)
fazekasgy@37 317
fazekasgy@37 318 fftsize = self.m_blockSize
fazekasgy@37 319
fazekasgy@37 320 if self.m_channels > 1 :
fazekasgy@37 321 # take the average of two magnitude spectra
fazekasgy@37 322 mS0 = abs(inputbuffers[0])[0:fftsize/2]
fazekasgy@37 323 mS1 = abs(inputbuffers[1])[0:fftsize/2]
fazekasgy@37 324 magnitudeSpectrum = (mS0 + mS1) / 2
fazekasgy@37 325 else :
fazekasgy@37 326 complexSpectrum = inputbuffers[0]
fazekasgy@37 327 magnitudeSpectrum = abs(complexSpectrum)[0:fftsize/2]
fazekasgy@37 328
fazekasgy@37 329 # do the frequency warping and MFCC computation
fazekasgy@37 330 melSpectrum = self.warpSpectrum(magnitudeSpectrum)
fazekasgy@37 331 melCepstrum = self.getMFCCs(melSpectrum,cn=True)
fazekasgy@52 332 # print melSpectrum,melCepstrum
fazekasgy@37 333
fazekasgy@37 334 # returning the values:
fazekasgy@37 335 outputs = FeatureSet()
fazekasgy@37 336
fazekasgy@37 337 # 1) full initialisation example using a FeatureList
fazekasgy@37 338 f_mfccs = Feature()
fazekasgy@37 339 f_mfccs.values = melCepstrum[self.cnull:]
fazekasgy@37 340 outputs[0] = FeatureList(f_mfccs)
fazekasgy@37 341
fazekasgy@37 342 # 2) simplified: when only one feature is required,
fazekasgy@37 343 # the FeatureList() can be omitted
fazekasgy@37 344 outputs[1] = Feature(melSpectrum)
fazekasgy@37 345
fazekasgy@37 346 # this is equivalint to writing :
fazekasgy@37 347 # outputs[1] = Feature()
fazekasgy@37 348 # outputs[1].values = melSpectrum
fazekasgy@37 349 # or using keyword args: Feature(values = melSpectrum)
fazekasgy@37 350
fazekasgy@37 351 return outputs
fazekasgy@37 352
fazekasgy@37 353 # process channels separately (stack the returned arrays)
fazekasgy@37 354 def process2ch(self,inputbuffers,timestamp):
fazekasgy@37 355
fazekasgy@37 356 fftsize = self.m_blockSize
fazekasgy@37 357
fazekasgy@37 358 complexSpectrum0 = inputbuffers[0]
fazekasgy@37 359 complexSpectrum1 = inputbuffers[1]
fazekasgy@37 360
fazekasgy@37 361 magnitudeSpectrum0 = abs(complexSpectrum0)[0:fftsize/2]
fazekasgy@37 362 magnitudeSpectrum1 = abs(complexSpectrum1)[0:fftsize/2]
fazekasgy@37 363
fazekasgy@37 364 # do the computations
fazekasgy@37 365 melSpectrum0 = self.warpSpectrum(magnitudeSpectrum0)
fazekasgy@37 366 melCepstrum0 = self.getMFCCs(melSpectrum0,cn=True)
fazekasgy@37 367 melSpectrum1 = self.warpSpectrum(magnitudeSpectrum1)
fazekasgy@37 368 melCepstrum1 = self.getMFCCs(melSpectrum1,cn=True)
fazekasgy@37 369
fazekasgy@37 370 outputs = FeatureSet()
fazekasgy@37 371 outputs[0] = Feature(hstack((melCepstrum1[self.cnull:],melCepstrum0[self.cnull:])))
fazekasgy@37 372 outputs[1] = Feature(hstack((melSpectrum1,melSpectrum0)))
fazekasgy@37 373
fazekasgy@37 374 return outputs
fazekasgy@37 375
fazekasgy@37 376
fazekasgy@37 377 def getRemainingFeatures(self):
fazekasgy@37 378 if not self.update() : return []
fazekasgy@37 379 frameSampleStart = 0
fazekasgy@37 380
fazekasgy@37 381 output_featureSet = FeatureSet()
fazekasgy@37 382
fazekasgy@37 383 # the filter is the third output (index starts from zero)
fazekasgy@37 384 output_featureSet[2] = flist = FeatureList()
fazekasgy@37 385
fazekasgy@37 386 while True:
fazekasgy@37 387 f = Feature()
fazekasgy@37 388 f.hasTimestamp = True
fazekasgy@37 389 f.timestamp = frame2RealTime(frameSampleStart,self.m_inputSampleRate)
fazekasgy@37 390 try :
fazekasgy@37 391 f.values = self.filterIter.next()
fazekasgy@37 392 except StopIteration :
fazekasgy@37 393 break
fazekasgy@37 394 flist.append(f)
fazekasgy@37 395 frameSampleStart += self.m_stepSize
fazekasgy@37 396
fazekasgy@37 397 return output_featureSet
fazekasgy@37 398