Maria@4: # -*- coding: utf-8 -*- Maria@4: """ Maria@4: Created on Mon Oct 19 12:58:07 2015 Maria@4: Maria@4: @author: mariapanteli Maria@4: """ Maria@4: import librosa Maria@4: import scipy.signal Maria@4: import numpy Maria@4: Maria@4: class MFCCs: Maria@4: def __init__(self): Maria@4: self.y = None Maria@4: self.sr = None Maria@4: self.melspec = None Maria@4: self.melsr = None Maria@4: self.win1 = None Maria@4: self.hop1 = None Maria@4: self.mfccs = None Maria@4: Maria@4: def load_audiofile(self, filename='test.wav', sr=None, segment=True): Maria@4: self.y, self.sr = librosa.load(filename, sr=sr) Maria@4: if segment: Maria@4: tracklength = self.y.shape[0]/float(self.sr) Maria@4: startSample = 0 Maria@4: endSample = None Maria@4: if tracklength > 90: Maria@4: startPointSec = (tracklength/2.)-20 Maria@4: startSample = round(startPointSec*self.sr) Maria@4: endSample = startSample+45*self.sr Maria@4: self.y = self.y[startSample:endSample] Maria@4: Maria@4: def mel_spectrogram(self, y=None, sr=None): Maria@4: if self.y is None: Maria@4: self.y = y Maria@4: if self.sr is None: Maria@4: self.sr = sr Maria@4: win1 = int(round(0.04*self.sr)) Maria@4: hop1 = int(round(win1/8.)) Maria@4: nfft1 = int(2**numpy.ceil(numpy.log2(win1))) Maria@4: nmels = 40 Maria@4: D = numpy.abs(librosa.stft(self.y, n_fft=nfft1, hop_length=hop1, win_length=win1, window=scipy.signal.hamming))**2 Maria@4: #melspec = librosa.feature.melspectrogram(S=D, sr=self.sr, n_mels=nmels) Maria@4: melspec = librosa.feature.melspectrogram(S=D, sr=self.sr, n_mels=nmels, fmax=8000) Maria@4: melsr = self.sr/float(hop1) Maria@4: self.melspec = melspec Maria@4: self.melsr = melsr Maria@4: Maria@4: def calc_mfccs(self, y=None, sr=None): Maria@4: if self.y is None: Maria@4: self.y = y Maria@4: if self.sr is None: Maria@4: self.sr = sr Maria@4: # require log-amplitude Maria@4: self.mfccs = librosa.feature.mfcc(S=librosa.logamplitude(self.melspec), n_mfcc=21) Maria@4: # remove DCT component Maria@4: self.mfccs = self.mfccs[1:,:] Maria@4: Maria@4: def get_mfccs(self, filename='test.wav', secondframedecomp=False): Maria@4: self.load_audiofile(filename=filename) Maria@4: self.mel_spectrogram() Maria@4: self.calc_mfccs() Maria@4: Maria@4: if secondframedecomp: Maria@4: win2 = int(round(8*self.melsr)) Maria@4: hop2 = int(round(0.5*self.melsr)) Maria@4: nbins, norigframes = self.melspec.shape Maria@4: nframes = int(1+numpy.floor((norigframes-win2)/float(hop2))) Maria@4: avemfccs = numpy.empty((nbins, nframes)) Maria@4: for i in range(nframes): # loop over all 8-sec frames Maria@4: avemfccs[:,i] = numpy.mean(self.mfccs[:, (i*hop2):(i*hop2+win2)], axis=1, keepdims=True) Maria@4: self.mfccs = avemfccs Maria@4: return self.mfccs Maria@4: Maria@4: def get_mfccs_from_melspec(self, melspec=[], melsr=[]): Maria@4: self.melspec = melspec Maria@4: self.melsr = melsr Maria@4: self.calc_mfccs() Maria@4: return self.mfccs Maria@4: Maria@4: if __name__ == '__main__': Maria@4: mfs = MFCCs() Maria@4: mfs.get_mfccs()