annotate MFCC.py @ 35:f094fc50ff04 tip master

update readme, fix py3 note
author danstowell <danstowell@users.sourceforge.net>
date Wed, 15 Mar 2023 07:18:09 +0000
parents 34b4b44299be
children
rev   line source
danstowell@1 1 '''MFCC.py
danstowell@1 2
danstowell@1 3 Calculation of MFCC coefficients from frequency-domain data
danstowell@1 4
danstowell@1 5 Adapted from the Vampy example plugin "PyMFCC" by Gyorgy Fazekas
danstowell@1 6 http://code.soundsoftware.ac.uk/projects/vampy/repository/entry/Example%20VamPy%20plugins/PyMFCC.py
danstowell@1 7
danstowell@1 8 Centre for Digital Music, Queen Mary University of London.
danstowell@1 9 Copyright (C) 2009 Gyorgy Fazekas, QMUL.
danstowell@1 10 '''
danstowell@1 11
danstowell@1 12 import sys,numpy
danstowell@1 13 from numpy import abs,log,exp,floor,sum,sqrt,cos,hstack
danstowell@1 14 from numpy.fft import *
danstowell@1 15
danstowell@1 16 class melScaling(object):
danstowell@1 17
danstowell@1 18 def __init__(self,sampleRate,inputSize,numBands,minHz = 0,maxHz = None):
danstowell@1 19 '''Initialise frequency warping and DCT matrix.
danstowell@1 20 Parameters:
danstowell@1 21 sampleRate: audio sample rate
danstowell@1 22 inputSize: length of magnitude spectrum (half of FFT size assumed)
danstowell@1 23 numBands: number of mel Bands (MFCCs)
danstowell@1 24 minHz: lower bound of warping (default = DC)
danstowell@1 25 maxHz: higher bound of warping (default = Nyquist frequency)
danstowell@1 26 '''
danstowell@1 27 self.sampleRate = sampleRate
danstowell@1 28 self.NqHz = sampleRate / 2.0
danstowell@1 29 self.minHz = minHz
danstowell@1 30 if maxHz is None : maxHz = self.NqHz
danstowell@1 31 self.maxHz = maxHz
danstowell@1 32 self.inputSize = inputSize
danstowell@1 33 self.numBands = numBands
danstowell@1 34 self.valid = False
danstowell@1 35 self.updated = False
danstowell@1 36
danstowell@1 37 def update(self):
danstowell@1 38 # make sure this will run only once
danstowell@1 39 # if called from a vamp process
danstowell@1 40 if self.updated: return self.valid
danstowell@1 41 self.updated = True
danstowell@1 42 self.valid = False
danstowell@8 43 print('Updating parameters and recalculating filters: ')
danstowell@8 44 print('Nyquist: ',self.NqHz)
danstowell@1 45
danstowell@1 46 if self.maxHz > self.NqHz :
danstowell@1 47 raise Exception('Maximum frequency must be smaller than the Nyquist frequency')
danstowell@1 48
danstowell@1 49 self.maxMel = 1000*log(1+self.maxHz/700.0)/log(1+1000.0/700.0)
danstowell@1 50 self.minMel = 1000*log(1+self.minHz/700.0)/log(1+1000.0/700.0)
danstowell@8 51 print('minHz:%s\nmaxHz:%s\nminMel:%s\nmaxMel:%s\n' \
danstowell@8 52 %(self.minHz,self.maxHz,self.minMel,self.maxMel))
danstowell@1 53 self.filterMatrix = self.getFilterMatrix(self.inputSize,self.numBands)
danstowell@1 54 self.DCTMatrix = self.getDCTMatrix(self.numBands)
danstowell@1 55 self.valid = True
danstowell@1 56 return self.valid
danstowell@1 57
danstowell@1 58 def getFilterCentres(self,inputSize,numBands):
danstowell@1 59 '''Calculate Mel filter centres around FFT bins.
danstowell@1 60 This function calculates two extra bands at the edges for
danstowell@1 61 finding the starting and end point of the first and last
danstowell@1 62 actual filters.'''
danstowell@8 63 centresMel = numpy.array(range(numBands+2)) * (self.maxMel-self.minMel)/(numBands+1) + self.minMel
danstowell@1 64 centresBin = numpy.floor(0.5 + 700.0*inputSize*(exp(centresMel*log(1+1000.0/700.0)/1000.0)-1)/self.NqHz)
danstowell@1 65 return numpy.array(centresBin,int)
danstowell@1 66
danstowell@1 67 def getFilterMatrix(self,inputSize,numBands):
danstowell@1 68 '''Compose the Mel scaling matrix.'''
danstowell@1 69 filterMatrix = numpy.zeros((numBands,inputSize))
danstowell@1 70 self.filterCentres = self.getFilterCentres(inputSize,numBands)
danstowell@8 71 for i in range(numBands) :
danstowell@1 72 start,centre,end = self.filterCentres[i:i+3]
danstowell@1 73 self.setFilter(filterMatrix[i],start,centre,end)
danstowell@1 74 return filterMatrix.transpose()
danstowell@1 75
danstowell@1 76 def setFilter(self,filt,filterStart,filterCentre,filterEnd):
danstowell@1 77 '''Calculate a single Mel filter.'''
danstowell@1 78 k1 = numpy.float32(filterCentre-filterStart)
danstowell@1 79 k2 = numpy.float32(filterEnd-filterCentre)
danstowell@8 80 up = (numpy.array(range(filterStart,filterCentre))-filterStart)/k1
danstowell@8 81 dn = (filterEnd-numpy.array(range(filterCentre,filterEnd)))/k2
danstowell@1 82 filt[filterStart:filterCentre] = up
danstowell@1 83 filt[filterCentre:filterEnd] = dn
danstowell@3 84
danstowell@1 85 def warpSpectrum(self,magnitudeSpectrum):
danstowell@1 86 '''Compute the Mel scaled spectrum.'''
danstowell@1 87 return numpy.dot(magnitudeSpectrum,self.filterMatrix)
danstowell@1 88
danstowell@1 89 def getDCTMatrix(self,size):
danstowell@1 90 '''Calculate the square DCT transform matrix. Results are
danstowell@1 91 equivalent to Matlab dctmtx(n) with 64 bit precision.'''
danstowell@8 92 DCTmx = numpy.array(range(size),numpy.float64).repeat(size).reshape(size,size)
danstowell@1 93 DCTmxT = numpy.pi * (DCTmx.transpose()+0.5) / size
danstowell@1 94 DCTmxT = (1.0/sqrt( size / 2.0)) * cos(DCTmx * DCTmxT)
danstowell@1 95 DCTmxT[0] = DCTmxT[0] * (sqrt(2.0)/2.0)
danstowell@1 96 return DCTmxT
danstowell@1 97
danstowell@1 98 def dct(self,data_matrix):
danstowell@1 99 '''Compute DCT of input matrix.'''
danstowell@1 100 return numpy.dot(self.DCTMatrix,data_matrix)
danstowell@1 101
danstowell@1 102 def getMFCCs(self,warpedSpectrum,cn=True):
danstowell@1 103 '''Compute MFCC coefficients from Mel warped magnitude spectrum.'''
danstowell@24 104 mfccs=self.dct(numpy.log(numpy.clip(warpedSpectrum, 1e-9, numpy.inf)))
danstowell@1 105 if cn is False : mfccs[0] = 0.0
danstowell@1 106 return mfccs
danstowell@1 107