annotate Code/time_freq_representation/feature_extraction.py @ 47:b0186d4a4496 tip

Move 7Digital dataset to Downloads
author Paulo Chiliguano <p.e.chiliguano@se14.qmul.ac.uk>
date Sat, 09 Jul 2022 00:50:43 -0500
parents 68a62ca32441
children
rev   line source
p@24 1 """
p@24 2 This script computes intermediate time-frequency representation (log-mel spectrogram)
p@24 3 from audio signals
p@24 4
p@24 5 Source code:
p@24 6 https://github.com/sidsig/ICASSP-MLP-Code/blob/master/feature_extraction.py
p@24 7
p@24 8 Modified by:
p@24 9 Paulo Chiliguano
p@24 10 MSc candidate Sound and Music Computing
p@24 11 Queen Mary University of London
p@24 12 2015
p@24 13
p@24 14 References:
p@24 15 - Sigtia, S., & Dixon, S. (2014, May). Improved music feature learning with deep neural
p@24 16 networks. In Acoustics, Speech and Signal Processing (ICASSP), 2014 IEEE International
p@24 17 Conference on (pp. 6959-6963). IEEE.
p@24 18 - Van den Oord, A., Dieleman, S., & Schrauwen, B. (2013). Deep content-based music
p@24 19 recommendation. In Advances in Neural Information Processing Systems (pp. 2643-2651).
p@24 20 """
p@24 21
p@24 22 #import subprocess
p@24 23 #import sys
p@24 24 import os
p@24 25 #from spectrogram import SpecGram
p@24 26 import tables
p@24 27 #import pdb
p@24 28 # LibROSA is a package that allows feature extraction for Music Information Retrieval
p@24 29 import librosa
p@24 30 import numpy as np
p@24 31
p@24 32 def read_wav(filename):
p@24 33 #bits_per_sample = '16'
p@24 34 #cmd = ['sox',filename,'-t','raw','-e','unsigned-integer','-L','-c','1','-b',bits_per_sample,'-','pad','0','30.0','rate','22050.0','trim','0','30.0']
p@24 35 #cmd = ' '.join(cmd)
p@24 36 #print cmd
p@24 37 #raw_audio = numpy.fromstring(subprocess.Popen(cmd,stdout=subprocess.PIPE,shell=True).communicate()[0],dtype='uint16')
p@24 38 audioFile, sr = librosa.load(filename, sr=22050, mono=True, offset=0, duration=3)
p@24 39 #random.randint(0,audioFile.size)
p@24 40 #max_amp = 2.**(int(bits_per_sample)-1)
p@24 41 #raw_audio = (raw_audio- max_amp)/max_amp
p@24 42 return audioFile
p@24 43
p@24 44 def calc_specgram(x,fs,winSize,):
p@24 45 S = librosa.feature.melspectrogram(
p@24 46 y=x,
p@24 47 sr=fs,
p@24 48 n_mels=128,
p@24 49 S=None,
p@24 50 n_fft=winSize,
p@24 51 hop_length=512
p@24 52 )
p@24 53 log_S = librosa.logamplitude(S, ref_power=np.max)
p@24 54 log_S = np.transpose(log_S)
p@24 55 return log_S
p@24 56 #spec = SpecGram(x,fs,winSize)
p@24 57 #return spec.specMat
p@24 58
p@24 59 def make_4tensor(x):
p@24 60 assert x.ndim <= 4
p@24 61 while x.ndim < 4:
p@24 62 x = np.expand_dims(x,0)
p@24 63 return x
p@24 64
p@24 65 class FeatExtraction():
p@24 66 def __init__(self,dataset_dir):
p@24 67 self.dataset_dir = dataset_dir
p@24 68 self.list_dir = os.path.join(self.dataset_dir,'lists')
p@24 69 self.get_filenames()
p@24 70 self.feat_dir = os.path.join(self.dataset_dir,'features')
p@24 71 self.make_feat_dir()
p@24 72 self.h5_filename = os.path.join(self.feat_dir,'feats.h5')
p@24 73 self.make_h5()
p@24 74 self.setup_h5()
p@24 75 self.extract_features()
p@24 76 self.close_h5()
p@24 77
p@24 78
p@24 79 def get_filenames(self,):
p@24 80 dataset_files = os.path.join(self.list_dir,'audio_files.txt')
p@24 81 self.filenames = [l.strip() for l in open(dataset_files,'r').readlines()]
p@24 82 self.num_files = len(self.filenames)
p@24 83
p@24 84 def make_feat_dir(self,):
p@24 85 if not os.path.exists(self.feat_dir):
p@24 86 print 'Making output dir.'
p@24 87 os.mkdir(self.feat_dir)
p@24 88 else:
p@24 89 print 'Output dir already exists.'
p@24 90
p@24 91 def make_h5(self,):
p@24 92 if not os.path.exists(self.h5_filename):
p@24 93 self.h5 = tables.openFile(self.h5_filename,'w')
p@24 94 else:
p@24 95 print 'Feature file already exists.'
p@24 96 self.h5 = tables.openFile(self.h5_filename,'a')
p@24 97
p@24 98 def setup_h5(self,):
p@24 99 filename = self.filenames[0]
p@24 100 x = read_wav(filename)
p@24 101 spec_x = calc_specgram(x,22050,1024)
p@24 102 spec_x = make_4tensor(spec_x)
p@24 103 self.data_shape = spec_x.shape[1:]
p@24 104 self.x_earray_shape = (0,) + self.data_shape
p@24 105 self.chunkshape = (1,) + self.data_shape
p@24 106 self.h5_x = self.h5.createEArray('/','x',tables.FloatAtom(itemsize=4),self.x_earray_shape,chunkshape=self.chunkshape,expectedrows=self.num_files)
p@24 107 self.h5_filenames = self.h5.createEArray('/','filenames',tables.StringAtom(256),(0,),expectedrows=self.num_files)
p@24 108 self.h5_x.append(spec_x)
p@24 109 self.h5_filenames.append([filename])
p@24 110
p@24 111 def extract_features(self,):
p@24 112 for i in xrange(1,self.num_files):
p@24 113 filename = self.filenames[i]
p@24 114 #print 'Filename: ',filename
p@24 115 x = read_wav(filename)
p@24 116 spec_x = calc_specgram(x,22050,1024)
p@24 117 spec_x = make_4tensor(spec_x)
p@24 118 self.h5_x.append(spec_x)
p@24 119 self.h5_filenames.append([filename])
p@24 120
p@24 121 def close_h5(self,):
p@24 122 self.h5.flush()
p@24 123 self.h5.close()
p@24 124
p@24 125 if __name__ == '__main__':
p@24 126 test = FeatExtraction('/home/paulo/Documents/msc_project/dataset/7digital')