e@0: # -*- coding: utf-8 -*- e@0: """ e@0: Created on Mon Jun 1 11:42:06 2015 e@0: e@0: @author: Emmanouil Theofanis Chourdakis e@0: """ e@0: e@0: # Note, reference everything! e@0: e@0: from sys import argv e@0: e@0: e@0: if __name__=="__main__": e@0: if len(argv) != 3: e@0: print("Incorrect number of arguments:") e@0: print("Usage: ") e@0: print("%s ") e@0: print("") e@0: print("Arguments:") e@0: print("\tThe input filename. Can be .wav, .mp3, etc...") e@0: print("\t The parameters filename, in .yaml format" ) e@0: print("\tThe output filename in .yaml format") e@0: sys.exit(-1) e@0: else: e@0: e@0: print("[II] Loading libraries") e@0: e@0: import essentia e@0: from essentia import Pool e@0: from essentia.standard import * e@0: import yaml e@0: e@0: e@0: # reqyures matplotlib e@0: from pylab import * e@0: e@0: #requires numpy e@0: from numpy import * e@0: e@0: #requires scikit-learn e@0: from sklearn.metrics import pairwise_distances e@0: e@0: d = {} e@0: v = {} e@0: e@0: fname = argv[1] e@0: outfname = argv[2] e@0: e@0: e@0: e@0: trackname = fname.split('.')[0].split('/')[-1] e@0: e@0: e@0: if outfname.partition('.')[-1].lower() not in ['json', 'yaml']: e@0: print("Please choose a .json or .yaml as an output file.") e@0: sys.exit(-1) e@0: else: e@0: if outfname.partition('.')[-1].lower() == 'json': e@0: output = YamlOutput(filename = outfname, format='json') e@0: else: e@0: output = YamlOutput(filename = outfname, format='yaml') e@0: e@0: print("Feature extraction of `%s\'" % fname) e@0: e@0: # Sampling Rate e@0: SR = 16000.0 e@0: e@0: # Sampling Frequency e@0: T = 1.0/SR e@0: e@0: # FrameSize e@0: tframeSize = 23 #ms e@0: frameSize = int(ceil(tframeSize*SR/1000)) if mod(ceil(tframeSize*SR/1000),2) == 0 \ e@0: else int(floor(tframeSize*SR/1000)) e@0: e@0: # HopSize e@0: hopSize = frameSize/2 e@0: e@0: # Load Audio e@0: audio = MonoLoader(filename = fname, sampleRate=SR)() e@0: e@0: e@0: #Window Frames e@0: w = Windowing(size = frameSize, type = 'hamming') e@0: e@0: # Spectrum e@0: spec = Spectrum(size=1024) e@0: e@0: # Pool to append mean and variance e@0: pool = Pool() e@0: globalPool = Pool() e@0: e@0: # Below are Features to be used in the feature extraction stage e@0: # We use, Spectral Contrast, MFCCs, Zero-Crossing rate, RMS, e@0: # Crest Factor, Spectral Centroid, Spectral Occupation, Spectral Flux e@0: e@0: # Spectral Contrast e@0: sc = SpectralContrast(frameSize = frameSize, highFrequencyBound = 8000, sampleRate = SR) e@0: e@0: # MFCCs e@0: mfccs = MFCC(highFrequencyBound = 8000, sampleRate = SR) e@0: e@0: # Spectral Centroid e@0: centroid = Centroid(range = SR/2) e@0: e@0: # Spectral Roll-Off e@0: rolloff = RollOff(sampleRate = SR, cutoff = 0.9) e@0: e@0: # Spectral Flux e@0: flux = Flux() e@0: e@0: # Zero Crossing Rate e@0: zcr = ZeroCrossingRate() e@0: e@0: # RMS e@0: rms = RMS() e@0: e@0: # Crest Factor e@0: crest = Crest() e@0: e@0: e@0: e@0: e@0: # Segmentation based on Onset detection-based temporal modeling e@0: print("[II] Calculating features for %s, please wait..." % fname) e@0: # Onset Detection e@0: e@0: print("[II] Splitting to onsets...") e@0: e@0: onsetdetection = OnsetDetectionGlobal(frameSize = frameSize, hopSize = hopSize, sampleRate = SR)(audio) e@0: onsets = Onsets()(essentia.array([onsetdetection]), [1]) e@0: e@0: e@0: e@0: print("[II] done, extracting features...") e@0: for o in range(0, len(onsets)-1): e@0: IOI = audio[onsets[o]*SR:onsets[o+1]*SR] e@0: e@0: e@0: e@0: e@0: if len(IOI) == 0: e@0: break; e@0: e@0: e@0: for frame in FrameGenerator(IOI, frameSize, hopSize): e@0: # Temporal Features e@0: e@0: zerocrossingrate = zcr(frame) e@0: rmsvalues = rms(frame) e@0: e@0: # Spectral features e@0: framespectrum = spec(w(frame)) e@0: framecontrast = sc(framespectrum) e@0: mfcc_coeffs = mfccs(framespectrum)[1] e@0: spectralcentroid = centroid(framespectrum) e@0: spectralrolloff = rolloff(framespectrum) e@0: spectralflux = rolloff(framespectrum) e@0: e@0: e@0: e@0: pool.add('lowlevel.zcr', zerocrossingrate) e@0: pool.add('lowlevel.rms', rmsvalues) e@0: pool.add('lowlevel.spectrum.centroid', spectralcentroid) e@0: pool.add('lowlevel.spectrum.rolloff', spectralrolloff) e@0: pool.add('lowlevel.mfcc.coeffs', mfcc_coeffs) e@0: pool.add('lowlevel.spectrum.magnitude', framespectrum) e@0: pool.add('lowlevel.contrast.contrast', framecontrast[0]) e@0: pool.add('lowlevel.contrast.valleys', framecontrast[1]) e@0: pool.add('lowlevel.spectrum.flux', spectralflux) e@0: e@0: e@0: e@0: spectrumfull = pool['lowlevel.spectrum.magnitude'] e@0: spectralcontrast = pool['lowlevel.contrast.contrast'] e@0: spectralvalleys = pool['lowlevel.contrast.valleys'] e@0: spectralcentroidfeature = pool['lowlevel.spectrum.centroid'] e@0: spectralrollofffeature = pool['lowlevel.spectrum.rolloff'] e@0: spectralfluxfeature = pool['lowlevel.spectrum.flux'] e@0: e@0: spectralfeature = concatenate((spectralcontrast,spectralvalleys),1) e@0: mfccfeature = pool['lowlevel.mfcc.coeffs'] e@0: zcrfeature = pool['lowlevel.zcr'] e@0: rmsfeature = pool['lowlevel.rms'] e@0: crestfeature = crest(rmsfeature) e@0: 1 e@0: e@0: meanspectralfeature = mean(spectralfeature, 0) e@0: for i in range(0, shape(spectralfeature)[1]): e@0: globalPool.add('spectralcontrast_%d' % i , meanspectralfeature[i]) e@0: globalPool.add('spectralcentroid', mean(spectralcentroidfeature, 0)) e@0: globalPool.add('spectralrolloff', mean(spectralrollofffeature, 0)) e@0: globalPool.add('spectralflux', mean(spectralfluxfeature, 0)) e@0: e@0: # Expand mfccs e@0: meanmfcc = mean(mfccfeature, 0) e@0: for i in range(0, shape(mfccfeature)[1]): e@0: globalPool.add('mfcc_%d' % i, meanmfcc[i]) e@0: e@0: e@0: globalPool.add('zcr', mean(zcrfeature, 0)) e@0: globalPool.add('rms', mean(rmsfeature, 0)) e@0: globalPool.add('crest', crestfeature) e@0: e@0: pool.clear() e@0: e@0: print("[II] done.") e@0: e@0: e@0: print("[II] Saving data to %s:" % outfname) e@0: globalPool.add("metadata.filename", fname) e@0: output(globalPool) e@0: e@0: