p@24: """
p@24: This script computes intermediate time-frequency representation (log-mel spectrogram)
p@24: from audio signals
p@24: 
p@24: Source code:
p@24: https://github.com/sidsig/ICASSP-MLP-Code/blob/master/feature_extraction.py
p@24: 
p@24: Modified by:
p@24: Paulo Chiliguano
p@24: MSc candidate Sound and Music Computing
p@24: Queen Mary University of London
p@24: 2015
p@24: 
p@24: References:
p@24:  - Sigtia, S., & Dixon, S. (2014, May). Improved music feature learning with deep neural 
p@24:    networks. In Acoustics, Speech and Signal Processing (ICASSP), 2014 IEEE International 
p@24:    Conference on (pp. 6959-6963). IEEE.
p@24:  - Van den Oord, A., Dieleman, S., & Schrauwen, B. (2013). Deep content-based music 
p@24:    recommendation. In Advances in Neural Information Processing Systems (pp. 2643-2651).
p@24: """
p@24: 
p@24: #import subprocess
p@24: #import sys
p@24: import os
p@24: #from spectrogram import SpecGram
p@24: import tables
p@24: #import pdb
p@24: # LibROSA is a package that allows feature extraction for Music Information Retrieval
p@24: import librosa
p@24: import numpy as np
p@24: 
p@24: def read_wav(filename):
p@24:     #bits_per_sample = '16'
p@24:     #cmd = ['sox',filename,'-t','raw','-e','unsigned-integer','-L','-c','1','-b',bits_per_sample,'-','pad','0','30.0','rate','22050.0','trim','0','30.0']
p@24:     #cmd = ' '.join(cmd)
p@24:     #print cmd
p@24:     #raw_audio = numpy.fromstring(subprocess.Popen(cmd,stdout=subprocess.PIPE,shell=True).communicate()[0],dtype='uint16')
p@24:     audioFile, sr = librosa.load(filename, sr=22050, mono=True, offset=0, duration=3)
p@24:     #random.randint(0,audioFile.size)
p@24:     #max_amp = 2.**(int(bits_per_sample)-1)
p@24:     #raw_audio = (raw_audio- max_amp)/max_amp
p@24:     return audioFile
p@24: 
p@24: def calc_specgram(x,fs,winSize,):
p@24:     S = librosa.feature.melspectrogram(
p@24:         y=x,
p@24:         sr=fs,
p@24:         n_mels=128,
p@24:         S=None,
p@24:         n_fft=winSize,
p@24:         hop_length=512
p@24:     )
p@24:     log_S = librosa.logamplitude(S, ref_power=np.max)
p@24:     log_S = np.transpose(log_S)
p@24:     return log_S
p@24:     #spec = SpecGram(x,fs,winSize)
p@24:     #return spec.specMat
p@24: 
p@24: def make_4tensor(x):
p@24:     assert x.ndim <= 4
p@24:     while x.ndim < 4:
p@24:         x = np.expand_dims(x,0)
p@24:     return x
p@24: 
p@24: class FeatExtraction():
p@24:     def __init__(self,dataset_dir):
p@24:     	self.dataset_dir = dataset_dir
p@24:         self.list_dir = os.path.join(self.dataset_dir,'lists')
p@24:         self.get_filenames()
p@24:         self.feat_dir = os.path.join(self.dataset_dir,'features')
p@24:         self.make_feat_dir()
p@24:         self.h5_filename = os.path.join(self.feat_dir,'feats.h5')
p@24:         self.make_h5()
p@24:         self.setup_h5()
p@24:         self.extract_features()
p@24:         self.close_h5()
p@24: 
p@24: 
p@24:     def get_filenames(self,):
p@24:         dataset_files = os.path.join(self.list_dir,'audio_files.txt')
p@24:         self.filenames = [l.strip() for l in open(dataset_files,'r').readlines()]
p@24:         self.num_files = len(self.filenames)
p@24: 
p@24:     def make_feat_dir(self,):
p@24:     	if not os.path.exists(self.feat_dir):
p@24:     		print 'Making output dir.'
p@24:     		os.mkdir(self.feat_dir)
p@24:     	else:
p@24:     		print 'Output dir already exists.'
p@24:     
p@24:     def make_h5(self,):
p@24:     	if not os.path.exists(self.h5_filename):
p@24:     		self.h5 = tables.openFile(self.h5_filename,'w')
p@24:     	else:
p@24:     		print 'Feature file already exists.'
p@24:     		self.h5 = tables.openFile(self.h5_filename,'a')
p@24: 
p@24:     def setup_h5(self,):
p@24:     	filename = self.filenames[0]
p@24:     	x = read_wav(filename)
p@24:     	spec_x = calc_specgram(x,22050,1024)
p@24:     	spec_x = make_4tensor(spec_x)
p@24:     	self.data_shape = spec_x.shape[1:]
p@24:     	self.x_earray_shape = (0,) + self.data_shape
p@24:     	self.chunkshape = (1,) + self.data_shape
p@24:     	self.h5_x = self.h5.createEArray('/','x',tables.FloatAtom(itemsize=4),self.x_earray_shape,chunkshape=self.chunkshape,expectedrows=self.num_files)
p@24:     	self.h5_filenames = self.h5.createEArray('/','filenames',tables.StringAtom(256),(0,),expectedrows=self.num_files)
p@24:     	self.h5_x.append(spec_x)
p@24:     	self.h5_filenames.append([filename])
p@24: 
p@24:     def extract_features(self,):
p@24:         for i in xrange(1,self.num_files):
p@24:     	    filename = self.filenames[i]
p@24:          #print 'Filename: ',filename
p@24:     	    x = read_wav(filename)
p@24:     	    spec_x = calc_specgram(x,22050,1024)
p@24:     	    spec_x = make_4tensor(spec_x)
p@24:     	    self.h5_x.append(spec_x)
p@24:     	    self.h5_filenames.append([filename])
p@24: 
p@24:     def close_h5(self,):
p@24:         self.h5.flush()
p@24:         self.h5.close()
p@24:         
p@24: if __name__ == '__main__':
p@24: 	test = FeatExtraction('/home/paulo/Documents/msc_project/dataset/7digital')