annotate Example VamPy plugins/PyMFCC.py @ 92:a6718f9fe942

If a module appears to redefine one of our own types, refuse to load it. Also clear out the class dict for all refused modules now, so that we don't get stale names on the next scan due to not having cleared the module on unload
author Chris Cannam
date Mon, 14 Jan 2019 16:19:44 +0000
parents f5b8646494d2
children
rev   line source
fazekasgy@37 1 '''PyMFCC.py - This example Vampy plugin demonstrates
fazekasgy@37 2 how to return sprectrogram-like features and how to return
fazekasgy@37 3 data using the getRemainingFeatures() function.
fazekasgy@37 4
fazekasgy@37 5 The plugin has frequency domain input and is using the
fazekasgy@37 6 numpy array interface. (Flag: vf_ARRAY)
fazekasgy@37 7
fazekasgy@37 8 Outputs:
fazekasgy@37 9 1) 2-128 MFCC coefficients
fazekasgy@37 10 2) Mel-warped spectrum used for the MFCC computation
fazekasgy@37 11 3) Filter matrix used for Mel scaling
fazekasgy@37 12
fazekasgy@37 13 Centre for Digital Music, Queen Mary University of London.
fazekasgy@37 14 Copyright (C) 2009 Gyorgy Fazekas, QMUL. (See Vamp sources
fazekasgy@37 15 for licence information.)
fazekasgy@37 16
fazekasgy@37 17 Constants for Mel frequency conversion and filter
fazekasgy@37 18 centre calculation are taken from the GNU GPL licenced
fazekasgy@37 19 Freespeech library. Copyright (C) 1999 Jean-Marc Valin
fazekasgy@37 20 '''
fazekasgy@37 21
fazekasgy@37 22 import sys,numpy,vampy
fazekasgy@37 23 from numpy import abs,log,exp,floor,sum,sqrt,cos,hstack
fazekasgy@37 24 from numpy.fft import *
fazekasgy@37 25 from vampy import *
fazekasgy@37 26
fazekasgy@37 27
fazekasgy@37 28 class melScaling(object):
fazekasgy@37 29
fazekasgy@37 30 def __init__(self,sampleRate,inputSize,numBands,minHz = 0,maxHz = None):
fazekasgy@37 31 '''Initialise frequency warping and DCT matrix.
fazekasgy@37 32 Parameters:
fazekasgy@37 33 sampleRate: audio sample rate
fazekasgy@37 34 inputSize: length of magnitude spectrum (half of FFT size assumed)
fazekasgy@37 35 numBands: number of mel Bands (MFCCs)
fazekasgy@37 36 minHz: lower bound of warping (default = DC)
fazekasgy@37 37 maxHz: higher bound of warping (default = Nyquist frequency)
fazekasgy@37 38 '''
fazekasgy@37 39 self.sampleRate = sampleRate
fazekasgy@37 40 self.NqHz = sampleRate / 2.0
fazekasgy@37 41 self.minHz = minHz
Chris@68 42 if maxHz is None : maxHz = 11025
fazekasgy@37 43 self.maxHz = maxHz
fazekasgy@37 44 self.inputSize = inputSize
fazekasgy@37 45 self.numBands = numBands
fazekasgy@37 46 self.valid = False
fazekasgy@37 47 self.updated = False
fazekasgy@37 48
Chris@68 49 def reset(self):
Chris@68 50 # reset any initial conditions
Chris@68 51 self.updated = False
Chris@68 52 return None
Chris@68 53
fazekasgy@37 54 def update(self):
fazekasgy@37 55 # make sure this will run only once
fazekasgy@37 56 # if called from a vamp process
fazekasgy@37 57 if self.updated: return self.valid
fazekasgy@37 58 self.updated = True
fazekasgy@37 59 self.valid = False
Chris@68 60 # print 'Updating parameters and recalculating filters: '
Chris@68 61 # print 'Nyquist: ',self.NqHz
Chris@68 62 maxHz = self.maxHz
Chris@68 63 if maxHz > self.NqHz : maxHz = self.NqHz
Chris@68 64 minHz = self.minHz
Chris@68 65 if minHz > self.NqHz : minHz = self.NqHz
Chris@68 66 self.maxMel = 1000*log(1+maxHz/700.0)/log(1+1000.0/700.0)
Chris@68 67 self.minMel = 1000*log(1+minHz/700.0)/log(1+1000.0/700.0)
Chris@68 68 # print 'minHz:%s\nmaxHz:%s\nminMel:%s\nmaxMel:%s\n' \
Chris@68 69 # %(self.minHz,self.maxHz,self.minMel,self.maxMel)
fazekasgy@37 70 self.filterMatrix = self.getFilterMatrix(self.inputSize,self.numBands)
fazekasgy@37 71 self.DCTMatrix = self.getDCTMatrix(self.numBands)
fazekasgy@37 72 self.filterIter = self.filterMatrix.__iter__()
fazekasgy@37 73 self.valid = True
fazekasgy@37 74 return self.valid
fazekasgy@37 75
fazekasgy@37 76 def getFilterCentres(self,inputSize,numBands):
fazekasgy@37 77 '''Calculate Mel filter centres around FFT bins.
fazekasgy@37 78 This function calculates two extra bands at the edges for
fazekasgy@37 79 finding the starting and end point of the first and last
fazekasgy@37 80 actual filters.'''
fazekasgy@37 81 centresMel = numpy.array(xrange(numBands+2)) * (self.maxMel-self.minMel)/(numBands+1) + self.minMel
fazekasgy@37 82 centresBin = numpy.floor(0.5 + 700.0*inputSize*(exp(centresMel*log(1+1000.0/700.0)/1000.0)-1)/self.NqHz)
fazekasgy@37 83 return numpy.array(centresBin,int)
fazekasgy@37 84
fazekasgy@37 85 def getFilterMatrix(self,inputSize,numBands):
fazekasgy@37 86 '''Compose the Mel scaling matrix.'''
fazekasgy@37 87 filterMatrix = numpy.zeros((numBands,inputSize))
fazekasgy@37 88 self.filterCentres = self.getFilterCentres(inputSize,numBands)
fazekasgy@37 89 for i in xrange(numBands) :
fazekasgy@37 90 start,centre,end = self.filterCentres[i:i+3]
fazekasgy@37 91 self.setFilter(filterMatrix[i],start,centre,end)
fazekasgy@37 92 return filterMatrix.transpose()
fazekasgy@37 93
fazekasgy@37 94 def setFilter(self,filt,filterStart,filterCentre,filterEnd):
fazekasgy@37 95 '''Calculate a single Mel filter.'''
fazekasgy@37 96 k1 = numpy.float32(filterCentre-filterStart)
fazekasgy@37 97 k2 = numpy.float32(filterEnd-filterCentre)
fazekasgy@37 98 up = (numpy.array(xrange(filterStart,filterCentre))-filterStart)/k1
fazekasgy@37 99 dn = (filterEnd-numpy.array(xrange(filterCentre,filterEnd)))/k2
fazekasgy@37 100 filt[filterStart:filterCentre] = up
fazekasgy@37 101 filt[filterCentre:filterEnd] = dn
fazekasgy@37 102
fazekasgy@37 103 def warpSpectrum(self,magnitudeSpectrum):
fazekasgy@37 104 '''Compute the Mel scaled spectrum.'''
fazekasgy@37 105 return numpy.dot(magnitudeSpectrum,self.filterMatrix)
fazekasgy@37 106
fazekasgy@37 107 def getDCTMatrix(self,size):
fazekasgy@37 108 '''Calculate the square DCT transform matrix. Results are
fazekasgy@37 109 equivalent to Matlab dctmtx(n) with 64 bit precision.'''
fazekasgy@37 110 DCTmx = numpy.array(xrange(size),numpy.float64).repeat(size).reshape(size,size)
fazekasgy@37 111 DCTmxT = numpy.pi * (DCTmx.transpose()+0.5) / size
fazekasgy@37 112 DCTmxT = (1.0/sqrt( size / 2.0)) * cos(DCTmx * DCTmxT)
fazekasgy@37 113 DCTmxT[0] = DCTmxT[0] * (sqrt(2.0)/2.0)
fazekasgy@37 114 return DCTmxT
fazekasgy@37 115
fazekasgy@37 116 def dct(self,data_matrix):
fazekasgy@37 117 '''Compute DCT of input matrix.'''
fazekasgy@37 118 return numpy.dot(self.DCTMatrix,data_matrix)
fazekasgy@37 119
fazekasgy@37 120 def getMFCCs(self,warpedSpectrum,cn=True):
fazekasgy@37 121 '''Compute MFCC coefficients from Mel warped magnitude spectrum.'''
Chris@68 122 eps = 1e-8
Chris@68 123 mfccs=self.dct(numpy.log(warpedSpectrum + eps))
Chris@68 124 if cn is False : mfccs[0] = 0.0
Chris@68 125 return mfccs
fazekasgy@37 126
fazekasgy@37 127
fazekasgy@37 128 class PyMFCC(melScaling):
fazekasgy@37 129
fazekasgy@37 130 def __init__(self,inputSampleRate):
fazekasgy@37 131
fazekasgy@37 132 # flags for setting some Vampy options
Chris@67 133 self.vampy_flags = vf_ARRAY | vf_REALTIME
fazekasgy@37 134
fazekasgy@37 135 self.m_inputSampleRate = int(inputSampleRate)
fazekasgy@37 136 self.m_stepSize = 1024
fazekasgy@37 137 self.m_blockSize = 2048
fazekasgy@37 138 self.m_channels = 1
fazekasgy@37 139 self.numBands = 40
fazekasgy@37 140 self.cnull = 1
fazekasgy@37 141 self.two_ch = False
fazekasgy@37 142 melScaling.__init__(self,int(self.m_inputSampleRate),self.m_blockSize/2,self.numBands)
fazekasgy@37 143
fazekasgy@37 144 def initialise(self,channels,stepSize,blockSize):
fazekasgy@37 145 self.m_channels = channels
fazekasgy@37 146 self.m_stepSize = stepSize
fazekasgy@37 147 self.m_blockSize = blockSize
fazekasgy@37 148 self.window = numpy.hamming(blockSize)
fazekasgy@37 149 melScaling.__init__(self,int(self.m_inputSampleRate),self.m_blockSize/2,self.numBands)
fazekasgy@37 150 return True
fazekasgy@37 151
fazekasgy@37 152 def getMaker(self):
fazekasgy@37 153 return 'Vampy Example Plugins'
fazekasgy@37 154
fazekasgy@37 155 def getCopyright(self):
Chris@69 156 return 'Plugin By George Fazekas. Freely redistributable example plugin (BSD license)'
fazekasgy@37 157
fazekasgy@37 158 def getName(self):
fazekasgy@37 159 return 'Vampy MFCC Plugin'
fazekasgy@37 160
fazekasgy@37 161 def getIdentifier(self):
fazekasgy@37 162 return 'vampy-mfcc'
fazekasgy@37 163
fazekasgy@37 164 def getDescription(self):
fazekasgy@37 165 return 'A simple MFCC plugin'
fazekasgy@37 166
fazekasgy@37 167 def getMaxChannelCount(self):
fazekasgy@37 168 return 2
fazekasgy@37 169
fazekasgy@37 170 def getInputDomain(self):
fazekasgy@37 171 return FrequencyDomain #TimeDomain
fazekasgy@37 172
fazekasgy@37 173 def getPreferredBlockSize(self):
fazekasgy@37 174 return 2048
fazekasgy@37 175
fazekasgy@37 176 def getPreferredStepSize(self):
fazekasgy@37 177 return 1024
fazekasgy@37 178
fazekasgy@37 179 def getOutputDescriptors(self):
fazekasgy@37 180
fazekasgy@37 181 Generic = OutputDescriptor()
fazekasgy@37 182 Generic.hasFixedBinCount=True
fazekasgy@37 183 Generic.binCount=int(self.numBands)-self.cnull
fazekasgy@37 184 Generic.hasKnownExtents=False
fazekasgy@37 185 Generic.isQuantized=True
fazekasgy@37 186 Generic.sampleType = OneSamplePerStep
fazekasgy@37 187
fazekasgy@37 188 # note the inheritance of attributes (optional)
fazekasgy@37 189 MFCC = OutputDescriptor(Generic)
fazekasgy@37 190 MFCC.identifier = 'mfccs'
fazekasgy@37 191 MFCC.name = 'MFCCs'
fazekasgy@37 192 MFCC.description = 'MFCC Coefficients'
fazekasgy@37 193 MFCC.binNames=map(lambda x: 'C '+str(x),range(self.cnull,int(self.numBands)))
fazekasgy@37 194 if self.two_ch and self.m_channels == 2 :
fazekasgy@37 195 MFCC.binNames *= 2 #repeat the list
fazekasgy@37 196 MFCC.unit = None
fazekasgy@37 197 if self.two_ch and self.m_channels == 2 :
fazekasgy@37 198 MFCC.binCount = self.m_channels * (int(self.numBands)-self.cnull)
fazekasgy@37 199 else :
fazekasgy@37 200 MFCC.binCount = self.numBands-self.cnull
fazekasgy@37 201
fazekasgy@37 202 warpedSpectrum = OutputDescriptor(Generic)
fazekasgy@37 203 warpedSpectrum.identifier='warped-fft'
fazekasgy@37 204 warpedSpectrum.name='Mel Scaled Spectrum'
fazekasgy@37 205 warpedSpectrum.description='Mel Scaled Magnitide Spectrum'
fazekasgy@37 206 warpedSpectrum.unit='Mel'
fazekasgy@37 207 if self.two_ch and self.m_channels == 2 :
fazekasgy@37 208 warpedSpectrum.binCount = self.m_channels * int(self.numBands)
fazekasgy@37 209 else :
fazekasgy@37 210 warpedSpectrum.binCount = self.numBands
fazekasgy@37 211
fazekasgy@37 212 melFilter = OutputDescriptor(Generic)
fazekasgy@37 213 melFilter.identifier = 'mel-filter-matrix'
fazekasgy@37 214 melFilter.sampleType='FixedSampleRate'
fazekasgy@37 215 melFilter.sampleRate=self.m_inputSampleRate/self.m_stepSize
fazekasgy@37 216 melFilter.name='Mel Filter Matrix'
fazekasgy@37 217 melFilter.description='Returns the created filter matrix in getRemainingFeatures.'
fazekasgy@37 218 melFilter.unit = None
fazekasgy@37 219
fazekasgy@37 220 return OutputList(MFCC,warpedSpectrum,melFilter)
fazekasgy@37 221
fazekasgy@37 222
fazekasgy@37 223 def getParameterDescriptors(self):
fazekasgy@37 224
fazekasgy@37 225 melbands = ParameterDescriptor()
fazekasgy@37 226 melbands.identifier='melbands'
fazekasgy@37 227 melbands.name='Number of bands (coefficients)'
fazekasgy@37 228 melbands.description='Set the number of coefficients.'
fazekasgy@37 229 melbands.unit = ''
fazekasgy@37 230 melbands.minValue = 2
fazekasgy@37 231 melbands.maxValue = 128
fazekasgy@37 232 melbands.defaultValue = 40
fazekasgy@37 233 melbands.isQuantized = True
fazekasgy@37 234 melbands.quantizeStep = 1
fazekasgy@37 235
fazekasgy@37 236 cnull = ParameterDescriptor()
fazekasgy@37 237 cnull.identifier='cnull'
fazekasgy@37 238 cnull.name='Return C0'
fazekasgy@37 239 cnull.description='Select if the DC coefficient is required.'
fazekasgy@37 240 cnull.unit = None
fazekasgy@37 241 cnull.minValue = 0
fazekasgy@37 242 cnull.maxValue = 1
fazekasgy@37 243 cnull.defaultValue = 0
fazekasgy@37 244 cnull.isQuantized = True
fazekasgy@37 245 cnull.quantizeStep = 1
fazekasgy@37 246
fazekasgy@37 247 two_ch = ParameterDescriptor(cnull)
fazekasgy@37 248 two_ch.identifier='two_ch'
fazekasgy@37 249 two_ch.name='Process channels separately'
fazekasgy@37 250 two_ch.description='Process two channel files separately.'
fazekasgy@37 251 two_ch.defaultValue = False
fazekasgy@37 252
fazekasgy@37 253 minHz = ParameterDescriptor()
fazekasgy@37 254 minHz.identifier='minHz'
fazekasgy@37 255 minHz.name='minimum frequency'
fazekasgy@37 256 minHz.description='Set the lower frequency bound.'
fazekasgy@37 257 minHz.unit='Hz'
fazekasgy@37 258 minHz.minValue = 0
fazekasgy@37 259 minHz.maxValue = 24000
fazekasgy@37 260 minHz.defaultValue = 0
fazekasgy@37 261 minHz.isQuantized = True
fazekasgy@37 262 minHz.quantizeStep = 1.0
fazekasgy@37 263
fazekasgy@37 264 maxHz = ParameterDescriptor()
fazekasgy@37 265 maxHz.identifier='maxHz'
fazekasgy@37 266 maxHz.description='Set the upper frequency bound.'
fazekasgy@37 267 maxHz.name='maximum frequency'
fazekasgy@37 268 maxHz.unit='Hz'
fazekasgy@37 269 maxHz.minValue = 100
fazekasgy@37 270 maxHz.maxValue = 24000
fazekasgy@37 271 maxHz.defaultValue = 11025
fazekasgy@37 272 maxHz.isQuantized = True
fazekasgy@37 273 maxHz.quantizeStep = 100
fazekasgy@37 274
fazekasgy@37 275 return ParameterList(melbands,minHz,maxHz,cnull,two_ch)
fazekasgy@37 276
fazekasgy@37 277
fazekasgy@37 278 def setParameter(self,paramid,newval):
fazekasgy@37 279 self.valid = False
fazekasgy@37 280 if paramid == 'minHz' :
Chris@68 281 self.minHz = float(newval)
fazekasgy@37 282 if paramid == 'maxHz' :
Chris@68 283 self.maxHz = float(newval)
fazekasgy@37 284 if paramid == 'cnull' :
fazekasgy@37 285 self.cnull = int(not int(newval))
fazekasgy@37 286 if paramid == 'melbands' :
fazekasgy@37 287 self.numBands = int(newval)
fazekasgy@37 288 if paramid == 'two_ch' :
fazekasgy@37 289 self.two_ch = bool(newval)
fazekasgy@37 290 return None
fazekasgy@37 291
fazekasgy@37 292
fazekasgy@37 293 def getParameter(self,paramid):
fazekasgy@37 294 if paramid == 'minHz' :
fazekasgy@37 295 return self.minHz
fazekasgy@37 296 if paramid == 'maxHz' :
fazekasgy@37 297 return self.maxHz
fazekasgy@37 298 if paramid == 'cnull' :
fazekasgy@37 299 return bool(not int(self.cnull))
fazekasgy@37 300 if paramid == 'melbands' :
fazekasgy@37 301 return self.numBands
fazekasgy@37 302 if paramid == 'two_ch' :
fazekasgy@37 303 return self.two_ch
fazekasgy@37 304 else:
fazekasgy@37 305 return 0.0
fazekasgy@37 306
fazekasgy@37 307 # set numpy array process using the 'vf_ARRAY' flag in __init__()
fazekasgy@37 308 # and RealTime time stamps using the 'vf_REALTIME' flag
fazekasgy@37 309 def process(self,inputbuffers,timestamp):
fazekasgy@37 310
fazekasgy@37 311 # calculate the filter and DCT matrices, check
fazekasgy@37 312 # if they are computable given a set of parameters
fazekasgy@37 313 # (we only do this once, when the process is called first)
fazekasgy@37 314 if not self.update() : return None
fazekasgy@37 315
fazekasgy@37 316 # if two channel processing is set, use process2ch
fazekasgy@37 317 if self.m_channels == 2 and self.two_ch :
fazekasgy@37 318 return self.process2ch(inputbuffers,timestamp)
fazekasgy@37 319
fazekasgy@37 320 fftsize = self.m_blockSize
fazekasgy@37 321
fazekasgy@37 322 if self.m_channels > 1 :
fazekasgy@37 323 # take the average of two magnitude spectra
fazekasgy@37 324 mS0 = abs(inputbuffers[0])[0:fftsize/2]
fazekasgy@37 325 mS1 = abs(inputbuffers[1])[0:fftsize/2]
fazekasgy@37 326 magnitudeSpectrum = (mS0 + mS1) / 2
fazekasgy@37 327 else :
fazekasgy@37 328 complexSpectrum = inputbuffers[0]
fazekasgy@37 329 magnitudeSpectrum = abs(complexSpectrum)[0:fftsize/2]
fazekasgy@37 330
fazekasgy@37 331 # do the frequency warping and MFCC computation
fazekasgy@37 332 melSpectrum = self.warpSpectrum(magnitudeSpectrum)
fazekasgy@37 333 melCepstrum = self.getMFCCs(melSpectrum,cn=True)
fazekasgy@52 334 # print melSpectrum,melCepstrum
fazekasgy@37 335
fazekasgy@37 336 # returning the values:
fazekasgy@37 337 outputs = FeatureSet()
fazekasgy@37 338
fazekasgy@37 339 # 1) full initialisation example using a FeatureList
fazekasgy@37 340 f_mfccs = Feature()
fazekasgy@37 341 f_mfccs.values = melCepstrum[self.cnull:]
fazekasgy@37 342 outputs[0] = FeatureList(f_mfccs)
fazekasgy@37 343
fazekasgy@37 344 # 2) simplified: when only one feature is required,
fazekasgy@37 345 # the FeatureList() can be omitted
fazekasgy@37 346 outputs[1] = Feature(melSpectrum)
fazekasgy@37 347
fazekasgy@37 348 # this is equivalint to writing :
fazekasgy@37 349 # outputs[1] = Feature()
fazekasgy@37 350 # outputs[1].values = melSpectrum
fazekasgy@37 351 # or using keyword args: Feature(values = melSpectrum)
fazekasgy@37 352
fazekasgy@37 353 return outputs
fazekasgy@37 354
fazekasgy@37 355 # process channels separately (stack the returned arrays)
fazekasgy@37 356 def process2ch(self,inputbuffers,timestamp):
fazekasgy@37 357
fazekasgy@37 358 fftsize = self.m_blockSize
fazekasgy@37 359
fazekasgy@37 360 complexSpectrum0 = inputbuffers[0]
fazekasgy@37 361 complexSpectrum1 = inputbuffers[1]
fazekasgy@37 362
fazekasgy@37 363 magnitudeSpectrum0 = abs(complexSpectrum0)[0:fftsize/2]
fazekasgy@37 364 magnitudeSpectrum1 = abs(complexSpectrum1)[0:fftsize/2]
fazekasgy@37 365
fazekasgy@37 366 # do the computations
fazekasgy@37 367 melSpectrum0 = self.warpSpectrum(magnitudeSpectrum0)
fazekasgy@37 368 melCepstrum0 = self.getMFCCs(melSpectrum0,cn=True)
fazekasgy@37 369 melSpectrum1 = self.warpSpectrum(magnitudeSpectrum1)
fazekasgy@37 370 melCepstrum1 = self.getMFCCs(melSpectrum1,cn=True)
fazekasgy@37 371
fazekasgy@37 372 outputs = FeatureSet()
fazekasgy@37 373 outputs[0] = Feature(hstack((melCepstrum1[self.cnull:],melCepstrum0[self.cnull:])))
fazekasgy@37 374 outputs[1] = Feature(hstack((melSpectrum1,melSpectrum0)))
fazekasgy@37 375
fazekasgy@37 376 return outputs
fazekasgy@37 377
fazekasgy@37 378
fazekasgy@37 379 def getRemainingFeatures(self):
fazekasgy@37 380 if not self.update() : return []
fazekasgy@37 381 frameSampleStart = 0
fazekasgy@37 382
fazekasgy@37 383 output_featureSet = FeatureSet()
fazekasgy@37 384
fazekasgy@37 385 # the filter is the third output (index starts from zero)
fazekasgy@37 386 output_featureSet[2] = flist = FeatureList()
fazekasgy@37 387
fazekasgy@37 388 while True:
fazekasgy@37 389 f = Feature()
fazekasgy@37 390 f.hasTimestamp = True
fazekasgy@37 391 f.timestamp = frame2RealTime(frameSampleStart,self.m_inputSampleRate)
fazekasgy@37 392 try :
fazekasgy@37 393 f.values = self.filterIter.next()
fazekasgy@37 394 except StopIteration :
fazekasgy@37 395 break
fazekasgy@37 396 flist.append(f)
fazekasgy@37 397 frameSampleStart += self.m_stepSize
fazekasgy@37 398
fazekasgy@37 399 return output_featureSet
fazekasgy@37 400