comparison smacpy.py @ 34:31fa7d0361df

Merge pull request #6 from danstowell/uselibrosa Modernisation: replace scikits-audio dependency with librosa, & python3 compat
author danstowell <danstowell@users.sourceforge.net>
date Wed, 15 Mar 2023 07:09:51 +0000
parents 469e69bdc354
children
comparison
equal deleted inserted replaced
32:659ebfa334e2 34:31fa7d0361df
12 12
13 import os.path 13 import os.path
14 import numpy as np 14 import numpy as np
15 import argparse 15 import argparse
16 from glob import glob 16 from glob import glob
17 from scikits.audiolab import Sndfile 17 import librosa
18 from scikits.audiolab import Format
19 from sklearn.mixture import GaussianMixture as GMM 18 from sklearn.mixture import GaussianMixture as GMM
20 19
21 from MFCC import melScaling 20 from MFCC import melScaling
22 21
23 ####################################################################### 22 #######################################################################
45 def __init__(self, wavfolder, trainingdata): 44 def __init__(self, wavfolder, trainingdata):
46 """Initialise the classifier and train it on some WAV files. 45 """Initialise the classifier and train it on some WAV files.
47 'wavfolder' is the base folder, to be prepended to all WAV paths. 46 'wavfolder' is the base folder, to be prepended to all WAV paths.
48 'trainingdata' is a dictionary of wavpath:label pairs.""" 47 'trainingdata' is a dictionary of wavpath:label pairs."""
49 48
50 self.mfccMaker = melScaling(int(fs), framelen/2, 40) 49 self.mfccMaker = melScaling(int(fs), int(framelen/2), 40)
51 self.mfccMaker.update() 50 self.mfccMaker.update()
52 51
53 allfeatures = {wavpath:self.file_to_features(os.path.join(wavfolder, wavpath)) for wavpath in trainingdata} 52 allfeatures = {wavpath:self.file_to_features(os.path.join(wavfolder, wavpath)) for wavpath in trainingdata}
54 53
55 # Determine the normalisation stats, and remember them 54 # Determine the normalisation stats, and remember them
100 99
101 def file_to_features(self, wavpath): 100 def file_to_features(self, wavpath):
102 "Reads through a mono WAV file, converting each frame to the required features. Returns a 2D array." 101 "Reads through a mono WAV file, converting each frame to the required features. Returns a 2D array."
103 if verbose: print("Reading %s" % wavpath) 102 if verbose: print("Reading %s" % wavpath)
104 if not os.path.isfile(wavpath): raise ValueError("path %s not found" % wavpath) 103 if not os.path.isfile(wavpath): raise ValueError("path %s not found" % wavpath)
105 sf = Sndfile(wavpath, "r") 104
106 #if (sf.channels != 1) and verbose: print(" Sound file has multiple channels (%i) - channels will be mixed to mono." % sf.channels) 105 audiodata, _ = librosa.load(wavpath, sr=fs, mono=True)
107 if sf.samplerate != fs: raise ValueError("wanted sample rate %g - got %g." % (fs, sf.samplerate))
108 window = np.hamming(framelen) 106 window = np.hamming(framelen)
109 features = [] 107 features = []
108 chunkpos = 0
110 while(True): 109 while(True):
111 try: 110 try:
112 chunk = sf.read_frames(framelen, dtype=np.float32) 111 chunk = audiodata[chunkpos:chunkpos+framelen]
113 if len(chunk) != framelen: 112 if len(chunk) != framelen:
114 print("Not read sufficient samples - returning") 113 #print("Not read sufficient samples - assuming end of file")
115 break 114 break
116 if sf.channels != 1:
117 chunk = np.mean(chunk, 1) # mixdown
118 framespectrum = np.fft.fft(window * chunk) 115 framespectrum = np.fft.fft(window * chunk)
119 magspec = abs(framespectrum[:framelen/2]) 116 magspec = abs(framespectrum[:int(framelen/2)])
120 117
121 # do the frequency warping and MFCC computation 118 # do the frequency warping and MFCC computation
122 melSpectrum = self.mfccMaker.warpSpectrum(magspec) 119 melSpectrum = self.mfccMaker.warpSpectrum(magspec)
123 melCepstrum = self.mfccMaker.getMFCCs(melSpectrum,cn=True) 120 melCepstrum = self.mfccMaker.getMFCCs(melSpectrum,cn=True)
124 melCepstrum = melCepstrum[1:] # exclude zeroth coefficient 121 melCepstrum = melCepstrum[1:] # exclude zeroth coefficient
125 melCepstrum = melCepstrum[:13] # limit to lower MFCCs 122 melCepstrum = melCepstrum[:13] # limit to lower MFCCs
126 123
127 framefeatures = melCepstrum # todo: include deltas? that can be your homework. 124 framefeatures = melCepstrum # todo: include deltas? that can be your homework.
128 125
129 features.append(framefeatures) 126 features.append(framefeatures)
127
128 chunkpos += framelen
130 except RuntimeError: 129 except RuntimeError:
131 break 130 break
132 sf.close() 131 if verbose: print(" Data shape: %s" % str(np.array(features).shape))
133 return np.array(features) 132 return np.array(features)
134 133
135 ####################################################################### 134 #######################################################################
136 def trainAndTest(trainpath, trainwavs, testpath, testwavs): 135 def trainAndTest(trainpath, trainwavs, testpath, testwavs):
137 "Handy function for evaluating your code: trains a model, tests it on wavs of known class. Returns (numcorrect, numtotal, numclasses)." 136 "Handy function for evaluating your code: trains a model, tests it on wavs of known class. Returns (numcorrect, numtotal, numclasses)."