annotate MFCC.py @ 1:d5d4981b58f1

import MFCC code by Gyorgy Fazekas
author Dan Stowell <danstowell@users.sourceforge.net>
date Wed, 14 Nov 2012 13:15:15 +0000
parents
children 7a20cff05bd6
rev   line source
danstowell@1 1 '''MFCC.py
danstowell@1 2
danstowell@1 3 Calculation of MFCC coefficients from frequency-domain data
danstowell@1 4
danstowell@1 5 Adapted from the Vampy example plugin "PyMFCC" by Gyorgy Fazekas
danstowell@1 6 http://code.soundsoftware.ac.uk/projects/vampy/repository/entry/Example%20VamPy%20plugins/PyMFCC.py
danstowell@1 7
danstowell@1 8 Centre for Digital Music, Queen Mary University of London.
danstowell@1 9 Copyright (C) 2009 Gyorgy Fazekas, QMUL.
danstowell@1 10 '''
danstowell@1 11
danstowell@1 12 import sys,numpy
danstowell@1 13 from numpy import abs,log,exp,floor,sum,sqrt,cos,hstack
danstowell@1 14 from numpy.fft import *
danstowell@1 15
danstowell@1 16 class melScaling(object):
danstowell@1 17
danstowell@1 18 def __init__(self,sampleRate,inputSize,numBands,minHz = 0,maxHz = None):
danstowell@1 19 '''Initialise frequency warping and DCT matrix.
danstowell@1 20 Parameters:
danstowell@1 21 sampleRate: audio sample rate
danstowell@1 22 inputSize: length of magnitude spectrum (half of FFT size assumed)
danstowell@1 23 numBands: number of mel Bands (MFCCs)
danstowell@1 24 minHz: lower bound of warping (default = DC)
danstowell@1 25 maxHz: higher bound of warping (default = Nyquist frequency)
danstowell@1 26 '''
danstowell@1 27 self.sampleRate = sampleRate
danstowell@1 28 self.NqHz = sampleRate / 2.0
danstowell@1 29 self.minHz = minHz
danstowell@1 30 if maxHz is None : maxHz = self.NqHz
danstowell@1 31 self.maxHz = maxHz
danstowell@1 32 self.inputSize = inputSize
danstowell@1 33 self.numBands = numBands
danstowell@1 34 self.valid = False
danstowell@1 35 self.updated = False
danstowell@1 36
danstowell@1 37 def update(self):
danstowell@1 38 # make sure this will run only once
danstowell@1 39 # if called from a vamp process
danstowell@1 40 if self.updated: return self.valid
danstowell@1 41 self.updated = True
danstowell@1 42 self.valid = False
danstowell@1 43 print 'Updating parameters and recalculating filters: '
danstowell@1 44 print 'Nyquist: ',self.NqHz
danstowell@1 45
danstowell@1 46 if self.maxHz > self.NqHz :
danstowell@1 47 raise Exception('Maximum frequency must be smaller than the Nyquist frequency')
danstowell@1 48
danstowell@1 49 self.maxMel = 1000*log(1+self.maxHz/700.0)/log(1+1000.0/700.0)
danstowell@1 50 self.minMel = 1000*log(1+self.minHz/700.0)/log(1+1000.0/700.0)
danstowell@1 51 print 'minHz:%s\nmaxHz:%s\nminMel:%s\nmaxMel:%s\n' \
danstowell@1 52 %(self.minHz,self.maxHz,self.minMel,self.maxMel)
danstowell@1 53 self.filterMatrix = self.getFilterMatrix(self.inputSize,self.numBands)
danstowell@1 54 self.DCTMatrix = self.getDCTMatrix(self.numBands)
danstowell@1 55 self.filterIter = self.filterMatrix.__iter__()
danstowell@1 56 self.valid = True
danstowell@1 57 return self.valid
danstowell@1 58
danstowell@1 59 def getFilterCentres(self,inputSize,numBands):
danstowell@1 60 '''Calculate Mel filter centres around FFT bins.
danstowell@1 61 This function calculates two extra bands at the edges for
danstowell@1 62 finding the starting and end point of the first and last
danstowell@1 63 actual filters.'''
danstowell@1 64 centresMel = numpy.array(xrange(numBands+2)) * (self.maxMel-self.minMel)/(numBands+1) + self.minMel
danstowell@1 65 centresBin = numpy.floor(0.5 + 700.0*inputSize*(exp(centresMel*log(1+1000.0/700.0)/1000.0)-1)/self.NqHz)
danstowell@1 66 return numpy.array(centresBin,int)
danstowell@1 67
danstowell@1 68 def getFilterMatrix(self,inputSize,numBands):
danstowell@1 69 '''Compose the Mel scaling matrix.'''
danstowell@1 70 filterMatrix = numpy.zeros((numBands,inputSize))
danstowell@1 71 self.filterCentres = self.getFilterCentres(inputSize,numBands)
danstowell@1 72 for i in xrange(numBands) :
danstowell@1 73 start,centre,end = self.filterCentres[i:i+3]
danstowell@1 74 self.setFilter(filterMatrix[i],start,centre,end)
danstowell@1 75 return filterMatrix.transpose()
danstowell@1 76
danstowell@1 77 def setFilter(self,filt,filterStart,filterCentre,filterEnd):
danstowell@1 78 '''Calculate a single Mel filter.'''
danstowell@1 79 k1 = numpy.float32(filterCentre-filterStart)
danstowell@1 80 k2 = numpy.float32(filterEnd-filterCentre)
danstowell@1 81 up = (numpy.array(xrange(filterStart,filterCentre))-filterStart)/k1
danstowell@1 82 dn = (filterEnd-numpy.array(xrange(filterCentre,filterEnd)))/k2
danstowell@1 83 filt[filterStart:filterCentre] = up
danstowell@1 84 filt[filterCentre:filterEnd] = dn
danstowell@1 85
danstowell@1 86 def warpSpectrum(self,magnitudeSpectrum):
danstowell@1 87 '''Compute the Mel scaled spectrum.'''
danstowell@1 88 return numpy.dot(magnitudeSpectrum,self.filterMatrix)
danstowell@1 89
danstowell@1 90 def getDCTMatrix(self,size):
danstowell@1 91 '''Calculate the square DCT transform matrix. Results are
danstowell@1 92 equivalent to Matlab dctmtx(n) with 64 bit precision.'''
danstowell@1 93 DCTmx = numpy.array(xrange(size),numpy.float64).repeat(size).reshape(size,size)
danstowell@1 94 DCTmxT = numpy.pi * (DCTmx.transpose()+0.5) / size
danstowell@1 95 DCTmxT = (1.0/sqrt( size / 2.0)) * cos(DCTmx * DCTmxT)
danstowell@1 96 DCTmxT[0] = DCTmxT[0] * (sqrt(2.0)/2.0)
danstowell@1 97 return DCTmxT
danstowell@1 98
danstowell@1 99 def dct(self,data_matrix):
danstowell@1 100 '''Compute DCT of input matrix.'''
danstowell@1 101 return numpy.dot(self.DCTMatrix,data_matrix)
danstowell@1 102
danstowell@1 103 def getMFCCs(self,warpedSpectrum,cn=True):
danstowell@1 104 '''Compute MFCC coefficients from Mel warped magnitude spectrum.'''
danstowell@1 105 mfccs=self.dct(numpy.log(warpedSpectrum))
danstowell@1 106 if cn is False : mfccs[0] = 0.0
danstowell@1 107 return mfccs
danstowell@1 108