p@24
|
1 """
|
p@24
|
2 This script computes intermediate time-frequency representation (log-mel spectrogram)
|
p@24
|
3 from audio signals
|
p@24
|
4
|
p@24
|
5 Source code:
|
p@24
|
6 https://github.com/sidsig/ICASSP-MLP-Code/blob/master/feature_extraction.py
|
p@24
|
7
|
p@24
|
8 Modified by:
|
p@24
|
9 Paulo Chiliguano
|
p@24
|
10 MSc candidate Sound and Music Computing
|
p@24
|
11 Queen Mary University of London
|
p@24
|
12 2015
|
p@24
|
13
|
p@24
|
14 References:
|
p@24
|
15 - Sigtia, S., & Dixon, S. (2014, May). Improved music feature learning with deep neural
|
p@24
|
16 networks. In Acoustics, Speech and Signal Processing (ICASSP), 2014 IEEE International
|
p@24
|
17 Conference on (pp. 6959-6963). IEEE.
|
p@24
|
18 - Van den Oord, A., Dieleman, S., & Schrauwen, B. (2013). Deep content-based music
|
p@24
|
19 recommendation. In Advances in Neural Information Processing Systems (pp. 2643-2651).
|
p@24
|
20 """
|
p@24
|
21
|
p@24
|
22 #import subprocess
|
p@24
|
23 #import sys
|
p@24
|
24 import os
|
p@24
|
25 #from spectrogram import SpecGram
|
p@24
|
26 import tables
|
p@24
|
27 #import pdb
|
p@24
|
28 # LibROSA is a package that allows feature extraction for Music Information Retrieval
|
p@24
|
29 import librosa
|
p@24
|
30 import numpy as np
|
p@24
|
31
|
p@24
|
32 def read_wav(filename):
|
p@24
|
33 #bits_per_sample = '16'
|
p@24
|
34 #cmd = ['sox',filename,'-t','raw','-e','unsigned-integer','-L','-c','1','-b',bits_per_sample,'-','pad','0','30.0','rate','22050.0','trim','0','30.0']
|
p@24
|
35 #cmd = ' '.join(cmd)
|
p@24
|
36 #print cmd
|
p@24
|
37 #raw_audio = numpy.fromstring(subprocess.Popen(cmd,stdout=subprocess.PIPE,shell=True).communicate()[0],dtype='uint16')
|
p@24
|
38 audioFile, sr = librosa.load(filename, sr=22050, mono=True, offset=0, duration=3)
|
p@24
|
39 #random.randint(0,audioFile.size)
|
p@24
|
40 #max_amp = 2.**(int(bits_per_sample)-1)
|
p@24
|
41 #raw_audio = (raw_audio- max_amp)/max_amp
|
p@24
|
42 return audioFile
|
p@24
|
43
|
p@24
|
44 def calc_specgram(x,fs,winSize,):
|
p@24
|
45 S = librosa.feature.melspectrogram(
|
p@24
|
46 y=x,
|
p@24
|
47 sr=fs,
|
p@24
|
48 n_mels=128,
|
p@24
|
49 S=None,
|
p@24
|
50 n_fft=winSize,
|
p@24
|
51 hop_length=512
|
p@24
|
52 )
|
p@24
|
53 log_S = librosa.logamplitude(S, ref_power=np.max)
|
p@24
|
54 log_S = np.transpose(log_S)
|
p@24
|
55 return log_S
|
p@24
|
56 #spec = SpecGram(x,fs,winSize)
|
p@24
|
57 #return spec.specMat
|
p@24
|
58
|
p@24
|
59 def make_4tensor(x):
|
p@24
|
60 assert x.ndim <= 4
|
p@24
|
61 while x.ndim < 4:
|
p@24
|
62 x = np.expand_dims(x,0)
|
p@24
|
63 return x
|
p@24
|
64
|
p@24
|
65 class FeatExtraction():
|
p@24
|
66 def __init__(self,dataset_dir):
|
p@24
|
67 self.dataset_dir = dataset_dir
|
p@24
|
68 self.list_dir = os.path.join(self.dataset_dir,'lists')
|
p@24
|
69 self.get_filenames()
|
p@24
|
70 self.feat_dir = os.path.join(self.dataset_dir,'features')
|
p@24
|
71 self.make_feat_dir()
|
p@24
|
72 self.h5_filename = os.path.join(self.feat_dir,'feats.h5')
|
p@24
|
73 self.make_h5()
|
p@24
|
74 self.setup_h5()
|
p@24
|
75 self.extract_features()
|
p@24
|
76 self.close_h5()
|
p@24
|
77
|
p@24
|
78
|
p@24
|
79 def get_filenames(self,):
|
p@24
|
80 dataset_files = os.path.join(self.list_dir,'audio_files.txt')
|
p@24
|
81 self.filenames = [l.strip() for l in open(dataset_files,'r').readlines()]
|
p@24
|
82 self.num_files = len(self.filenames)
|
p@24
|
83
|
p@24
|
84 def make_feat_dir(self,):
|
p@24
|
85 if not os.path.exists(self.feat_dir):
|
p@24
|
86 print 'Making output dir.'
|
p@24
|
87 os.mkdir(self.feat_dir)
|
p@24
|
88 else:
|
p@24
|
89 print 'Output dir already exists.'
|
p@24
|
90
|
p@24
|
91 def make_h5(self,):
|
p@24
|
92 if not os.path.exists(self.h5_filename):
|
p@24
|
93 self.h5 = tables.openFile(self.h5_filename,'w')
|
p@24
|
94 else:
|
p@24
|
95 print 'Feature file already exists.'
|
p@24
|
96 self.h5 = tables.openFile(self.h5_filename,'a')
|
p@24
|
97
|
p@24
|
98 def setup_h5(self,):
|
p@24
|
99 filename = self.filenames[0]
|
p@24
|
100 x = read_wav(filename)
|
p@24
|
101 spec_x = calc_specgram(x,22050,1024)
|
p@24
|
102 spec_x = make_4tensor(spec_x)
|
p@24
|
103 self.data_shape = spec_x.shape[1:]
|
p@24
|
104 self.x_earray_shape = (0,) + self.data_shape
|
p@24
|
105 self.chunkshape = (1,) + self.data_shape
|
p@24
|
106 self.h5_x = self.h5.createEArray('/','x',tables.FloatAtom(itemsize=4),self.x_earray_shape,chunkshape=self.chunkshape,expectedrows=self.num_files)
|
p@24
|
107 self.h5_filenames = self.h5.createEArray('/','filenames',tables.StringAtom(256),(0,),expectedrows=self.num_files)
|
p@24
|
108 self.h5_x.append(spec_x)
|
p@24
|
109 self.h5_filenames.append([filename])
|
p@24
|
110
|
p@24
|
111 def extract_features(self,):
|
p@24
|
112 for i in xrange(1,self.num_files):
|
p@24
|
113 filename = self.filenames[i]
|
p@24
|
114 #print 'Filename: ',filename
|
p@24
|
115 x = read_wav(filename)
|
p@24
|
116 spec_x = calc_specgram(x,22050,1024)
|
p@24
|
117 spec_x = make_4tensor(spec_x)
|
p@24
|
118 self.h5_x.append(spec_x)
|
p@24
|
119 self.h5_filenames.append([filename])
|
p@24
|
120
|
p@24
|
121 def close_h5(self,):
|
p@24
|
122 self.h5.flush()
|
p@24
|
123 self.h5.close()
|
p@24
|
124
|
p@24
|
125 if __name__ == '__main__':
|
p@24
|
126 test = FeatExtraction('/home/paulo/Documents/msc_project/dataset/7digital')
|