p@24: """ p@24: This script computes intermediate time-frequency representation (log-mel spectrogram) p@24: from audio signals p@24: p@24: Source code: p@24: https://github.com/sidsig/ICASSP-MLP-Code/blob/master/feature_extraction.py p@24: p@24: Modified by: p@24: Paulo Chiliguano p@24: MSc candidate Sound and Music Computing p@24: Queen Mary University of London p@24: 2015 p@24: p@24: References: p@24: - Sigtia, S., & Dixon, S. (2014, May). Improved music feature learning with deep neural p@24: networks. In Acoustics, Speech and Signal Processing (ICASSP), 2014 IEEE International p@24: Conference on (pp. 6959-6963). IEEE. p@24: - Van den Oord, A., Dieleman, S., & Schrauwen, B. (2013). Deep content-based music p@24: recommendation. In Advances in Neural Information Processing Systems (pp. 2643-2651). p@24: """ p@24: p@24: #import subprocess p@24: #import sys p@24: import os p@24: #from spectrogram import SpecGram p@24: import tables p@24: #import pdb p@24: # LibROSA is a package that allows feature extraction for Music Information Retrieval p@24: import librosa p@24: import numpy as np p@24: p@24: def read_wav(filename): p@24: #bits_per_sample = '16' p@24: #cmd = ['sox',filename,'-t','raw','-e','unsigned-integer','-L','-c','1','-b',bits_per_sample,'-','pad','0','30.0','rate','22050.0','trim','0','30.0'] p@24: #cmd = ' '.join(cmd) p@24: #print cmd p@24: #raw_audio = numpy.fromstring(subprocess.Popen(cmd,stdout=subprocess.PIPE,shell=True).communicate()[0],dtype='uint16') p@24: audioFile, sr = librosa.load(filename, sr=22050, mono=True, offset=0, duration=3) p@24: #random.randint(0,audioFile.size) p@24: #max_amp = 2.**(int(bits_per_sample)-1) p@24: #raw_audio = (raw_audio- max_amp)/max_amp p@24: return audioFile p@24: p@24: def calc_specgram(x,fs,winSize,): p@24: S = librosa.feature.melspectrogram( p@24: y=x, p@24: sr=fs, p@24: n_mels=128, p@24: S=None, p@24: n_fft=winSize, p@24: hop_length=512 p@24: ) p@24: log_S = librosa.logamplitude(S, ref_power=np.max) p@24: log_S = np.transpose(log_S) p@24: return log_S p@24: #spec = SpecGram(x,fs,winSize) p@24: #return spec.specMat p@24: p@24: def make_4tensor(x): p@24: assert x.ndim <= 4 p@24: while x.ndim < 4: p@24: x = np.expand_dims(x,0) p@24: return x p@24: p@24: class FeatExtraction(): p@24: def __init__(self,dataset_dir): p@24: self.dataset_dir = dataset_dir p@24: self.list_dir = os.path.join(self.dataset_dir,'lists') p@24: self.get_filenames() p@24: self.feat_dir = os.path.join(self.dataset_dir,'features') p@24: self.make_feat_dir() p@24: self.h5_filename = os.path.join(self.feat_dir,'feats.h5') p@24: self.make_h5() p@24: self.setup_h5() p@24: self.extract_features() p@24: self.close_h5() p@24: p@24: p@24: def get_filenames(self,): p@24: dataset_files = os.path.join(self.list_dir,'audio_files.txt') p@24: self.filenames = [l.strip() for l in open(dataset_files,'r').readlines()] p@24: self.num_files = len(self.filenames) p@24: p@24: def make_feat_dir(self,): p@24: if not os.path.exists(self.feat_dir): p@24: print 'Making output dir.' p@24: os.mkdir(self.feat_dir) p@24: else: p@24: print 'Output dir already exists.' p@24: p@24: def make_h5(self,): p@24: if not os.path.exists(self.h5_filename): p@24: self.h5 = tables.openFile(self.h5_filename,'w') p@24: else: p@24: print 'Feature file already exists.' p@24: self.h5 = tables.openFile(self.h5_filename,'a') p@24: p@24: def setup_h5(self,): p@24: filename = self.filenames[0] p@24: x = read_wav(filename) p@24: spec_x = calc_specgram(x,22050,1024) p@24: spec_x = make_4tensor(spec_x) p@24: self.data_shape = spec_x.shape[1:] p@24: self.x_earray_shape = (0,) + self.data_shape p@24: self.chunkshape = (1,) + self.data_shape p@24: self.h5_x = self.h5.createEArray('/','x',tables.FloatAtom(itemsize=4),self.x_earray_shape,chunkshape=self.chunkshape,expectedrows=self.num_files) p@24: self.h5_filenames = self.h5.createEArray('/','filenames',tables.StringAtom(256),(0,),expectedrows=self.num_files) p@24: self.h5_x.append(spec_x) p@24: self.h5_filenames.append([filename]) p@24: p@24: def extract_features(self,): p@24: for i in xrange(1,self.num_files): p@24: filename = self.filenames[i] p@24: #print 'Filename: ',filename p@24: x = read_wav(filename) p@24: spec_x = calc_specgram(x,22050,1024) p@24: spec_x = make_4tensor(spec_x) p@24: self.h5_x.append(spec_x) p@24: self.h5_filenames.append([filename]) p@24: p@24: def close_h5(self,): p@24: self.h5.flush() p@24: self.h5.close() p@24: p@24: if __name__ == '__main__': p@24: test = FeatExtraction('/home/paulo/Documents/msc_project/dataset/7digital')