Mercurial > hg > smacpy
comparison smacpy.py @ 34:31fa7d0361df
Merge pull request #6 from danstowell/uselibrosa
Modernisation: replace scikits-audio dependency with librosa, & python3 compat
author | danstowell <danstowell@users.sourceforge.net> |
---|---|
date | Wed, 15 Mar 2023 07:09:51 +0000 |
parents | 469e69bdc354 |
children |
comparison
equal
deleted
inserted
replaced
32:659ebfa334e2 | 34:31fa7d0361df |
---|---|
12 | 12 |
13 import os.path | 13 import os.path |
14 import numpy as np | 14 import numpy as np |
15 import argparse | 15 import argparse |
16 from glob import glob | 16 from glob import glob |
17 from scikits.audiolab import Sndfile | 17 import librosa |
18 from scikits.audiolab import Format | |
19 from sklearn.mixture import GaussianMixture as GMM | 18 from sklearn.mixture import GaussianMixture as GMM |
20 | 19 |
21 from MFCC import melScaling | 20 from MFCC import melScaling |
22 | 21 |
23 ####################################################################### | 22 ####################################################################### |
45 def __init__(self, wavfolder, trainingdata): | 44 def __init__(self, wavfolder, trainingdata): |
46 """Initialise the classifier and train it on some WAV files. | 45 """Initialise the classifier and train it on some WAV files. |
47 'wavfolder' is the base folder, to be prepended to all WAV paths. | 46 'wavfolder' is the base folder, to be prepended to all WAV paths. |
48 'trainingdata' is a dictionary of wavpath:label pairs.""" | 47 'trainingdata' is a dictionary of wavpath:label pairs.""" |
49 | 48 |
50 self.mfccMaker = melScaling(int(fs), framelen/2, 40) | 49 self.mfccMaker = melScaling(int(fs), int(framelen/2), 40) |
51 self.mfccMaker.update() | 50 self.mfccMaker.update() |
52 | 51 |
53 allfeatures = {wavpath:self.file_to_features(os.path.join(wavfolder, wavpath)) for wavpath in trainingdata} | 52 allfeatures = {wavpath:self.file_to_features(os.path.join(wavfolder, wavpath)) for wavpath in trainingdata} |
54 | 53 |
55 # Determine the normalisation stats, and remember them | 54 # Determine the normalisation stats, and remember them |
100 | 99 |
101 def file_to_features(self, wavpath): | 100 def file_to_features(self, wavpath): |
102 "Reads through a mono WAV file, converting each frame to the required features. Returns a 2D array." | 101 "Reads through a mono WAV file, converting each frame to the required features. Returns a 2D array." |
103 if verbose: print("Reading %s" % wavpath) | 102 if verbose: print("Reading %s" % wavpath) |
104 if not os.path.isfile(wavpath): raise ValueError("path %s not found" % wavpath) | 103 if not os.path.isfile(wavpath): raise ValueError("path %s not found" % wavpath) |
105 sf = Sndfile(wavpath, "r") | 104 |
106 #if (sf.channels != 1) and verbose: print(" Sound file has multiple channels (%i) - channels will be mixed to mono." % sf.channels) | 105 audiodata, _ = librosa.load(wavpath, sr=fs, mono=True) |
107 if sf.samplerate != fs: raise ValueError("wanted sample rate %g - got %g." % (fs, sf.samplerate)) | |
108 window = np.hamming(framelen) | 106 window = np.hamming(framelen) |
109 features = [] | 107 features = [] |
108 chunkpos = 0 | |
110 while(True): | 109 while(True): |
111 try: | 110 try: |
112 chunk = sf.read_frames(framelen, dtype=np.float32) | 111 chunk = audiodata[chunkpos:chunkpos+framelen] |
113 if len(chunk) != framelen: | 112 if len(chunk) != framelen: |
114 print("Not read sufficient samples - returning") | 113 #print("Not read sufficient samples - assuming end of file") |
115 break | 114 break |
116 if sf.channels != 1: | |
117 chunk = np.mean(chunk, 1) # mixdown | |
118 framespectrum = np.fft.fft(window * chunk) | 115 framespectrum = np.fft.fft(window * chunk) |
119 magspec = abs(framespectrum[:framelen/2]) | 116 magspec = abs(framespectrum[:int(framelen/2)]) |
120 | 117 |
121 # do the frequency warping and MFCC computation | 118 # do the frequency warping and MFCC computation |
122 melSpectrum = self.mfccMaker.warpSpectrum(magspec) | 119 melSpectrum = self.mfccMaker.warpSpectrum(magspec) |
123 melCepstrum = self.mfccMaker.getMFCCs(melSpectrum,cn=True) | 120 melCepstrum = self.mfccMaker.getMFCCs(melSpectrum,cn=True) |
124 melCepstrum = melCepstrum[1:] # exclude zeroth coefficient | 121 melCepstrum = melCepstrum[1:] # exclude zeroth coefficient |
125 melCepstrum = melCepstrum[:13] # limit to lower MFCCs | 122 melCepstrum = melCepstrum[:13] # limit to lower MFCCs |
126 | 123 |
127 framefeatures = melCepstrum # todo: include deltas? that can be your homework. | 124 framefeatures = melCepstrum # todo: include deltas? that can be your homework. |
128 | 125 |
129 features.append(framefeatures) | 126 features.append(framefeatures) |
127 | |
128 chunkpos += framelen | |
130 except RuntimeError: | 129 except RuntimeError: |
131 break | 130 break |
132 sf.close() | 131 if verbose: print(" Data shape: %s" % str(np.array(features).shape)) |
133 return np.array(features) | 132 return np.array(features) |
134 | 133 |
135 ####################################################################### | 134 ####################################################################### |
136 def trainAndTest(trainpath, trainwavs, testpath, testwavs): | 135 def trainAndTest(trainpath, trainwavs, testpath, testwavs): |
137 "Handy function for evaluating your code: trains a model, tests it on wavs of known class. Returns (numcorrect, numtotal, numclasses)." | 136 "Handy function for evaluating your code: trains a model, tests it on wavs of known class. Returns (numcorrect, numtotal, numclasses)." |