smacpy: smacpy.py comparison

comparison smacpy.py @ 34:31fa7d0361df

Merge pull request #6 from danstowell/uselibrosa Modernisation: replace scikits-audio dependency with librosa, & python3 compat

author	danstowell <danstowell@users.sourceforge.net>
date	Wed, 15 Mar 2023 07:09:51 +0000
parents	469e69bdc354
children

comparison

equal deleted inserted replaced

-:659ebfa334e2
+:31fa7d0361df
 import os.path
 import numpy as np
 import argparse
 from glob import glob
-from scikits.audiolab import Sndfile
+import librosa
-from scikits.audiolab import Format
 from sklearn.mixture import GaussianMixture as GMM
 from MFCC import melScaling
 #######################################################################
 	def __init__(self, wavfolder, trainingdata):
 		"""Initialise the classifier and train it on some WAV files.
 		'wavfolder' is the base folder, to be prepended to all WAV paths.
 		'trainingdata' is a dictionary of wavpath:label pairs."""
-		self.mfccMaker = melScaling(int(fs), framelen/2, 40)
+		self.mfccMaker = melScaling(int(fs), int(framelen/2), 40)
 		self.mfccMaker.update()
 		allfeatures = {wavpath:self.file_to_features(os.path.join(wavfolder, wavpath)) for wavpath in trainingdata}
 		# Determine the normalisation stats, and remember them
 	def file_to_features(self, wavpath):
 		"Reads through a mono WAV file, converting each frame to the required features. Returns a 2D array."
 		if verbose: print("Reading %s" % wavpath)
 		if not os.path.isfile(wavpath): raise ValueError("path %s not found" % wavpath)
-		sf = Sndfile(wavpath, "r")
-		#if (sf.channels != 1) and verbose: print(" Sound file has multiple channels (%i) - channels will be mixed to mono." % sf.channels)
+		audiodata, _ = librosa.load(wavpath, sr=fs, mono=True)
-		if sf.samplerate != fs:         raise ValueError("wanted sample rate %g - got %g." % (fs, sf.samplerate))
 		window = np.hamming(framelen)
 		features = []
+		chunkpos = 0
 		while(True):
 			try:
-				chunk = sf.read_frames(framelen, dtype=np.float32)
+				chunk = audiodata[chunkpos:chunkpos+framelen]
 				if len(chunk) != framelen:
-					print("Not read sufficient samples - returning")
+					#print("Not read sufficient samples - assuming end of file")
 					break
-				if sf.channels != 1:
-					chunk = np.mean(chunk, 1) # mixdown
 				framespectrum = np.fft.fft(window * chunk)
-				magspec = abs(framespectrum[:framelen/2])
+				magspec = abs(framespectrum[:int(framelen/2)])
 				# do the frequency warping and MFCC computation
 				melSpectrum = self.mfccMaker.warpSpectrum(magspec)
 				melCepstrum = self.mfccMaker.getMFCCs(melSpectrum,cn=True)
 				melCepstrum = melCepstrum[1:]   # exclude zeroth coefficient
 				melCepstrum = melCepstrum[:13] # limit to lower MFCCs
 				framefeatures = melCepstrum   # todo: include deltas? that can be your homework.
 				features.append(framefeatures)
+				chunkpos += framelen
 			except RuntimeError:
 				break
-		sf.close()
+		if verbose: print("  Data shape: %s" % str(np.array(features).shape))
 		return np.array(features)
 #######################################################################
 def trainAndTest(trainpath, trainwavs, testpath, testwavs):
 	"Handy function for evaluating your code: trains a model, tests it on wavs of known class. Returns (numcorrect, numtotal, numclasses)."

Mercurial > hg > smacpy

comparison smacpy.py @ 34:31fa7d0361df