hybrid-music-recommender-using-content-based-and-social-information: Code/time_freq_representation/feature

annotate Code/time_freq_representation/feature_extraction.py @ 47:b0186d4a4496 tip

Move 7Digital dataset to Downloads

author	Paulo Chiliguano <p.e.chiliguano@se14.qmul.ac.uk>
date	Sat, 09 Jul 2022 00:50:43 -0500
parents	68a62ca32441
children

rev	line source
p@24	1 """
p@24	2 This script computes intermediate time-frequency representation (log-mel spectrogram)
p@24	3 from audio signals
p@24	4
p@24	5 Source code:
p@24	6 https://github.com/sidsig/ICASSP-MLP-Code/blob/master/feature_extraction.py
p@24	7
p@24	8 Modified by:
p@24	9 Paulo Chiliguano
p@24	10 MSc candidate Sound and Music Computing
p@24	11 Queen Mary University of London
p@24	12 2015
p@24	13
p@24	14 References:
p@24	15 - Sigtia, S., & Dixon, S. (2014, May). Improved music feature learning with deep neural
p@24	16 networks. In Acoustics, Speech and Signal Processing (ICASSP), 2014 IEEE International
p@24	17 Conference on (pp. 6959-6963). IEEE.
p@24	18 - Van den Oord, A., Dieleman, S., & Schrauwen, B. (2013). Deep content-based music
p@24	19 recommendation. In Advances in Neural Information Processing Systems (pp. 2643-2651).
p@24	20 """
p@24	21
p@24	22 #import subprocess
p@24	23 #import sys
p@24	24 import os
p@24	25 #from spectrogram import SpecGram
p@24	26 import tables
p@24	27 #import pdb
p@24	28 # LibROSA is a package that allows feature extraction for Music Information Retrieval
p@24	29 import librosa
p@24	30 import numpy as np
p@24	31
p@24	32 def read_wav(filename):
p@24	33 #bits_per_sample = '16'
p@24	34 #cmd = ['sox',filename,'-t','raw','-e','unsigned-integer','-L','-c','1','-b',bits_per_sample,'-','pad','0','30.0','rate','22050.0','trim','0','30.0']
p@24	35 #cmd = ' '.join(cmd)
p@24	36 #print cmd
p@24	37 #raw_audio = numpy.fromstring(subprocess.Popen(cmd,stdout=subprocess.PIPE,shell=True).communicate()[0],dtype='uint16')
p@24	38 audioFile, sr = librosa.load(filename, sr=22050, mono=True, offset=0, duration=3)
p@24	39 #random.randint(0,audioFile.size)
p@24	40 #max_amp = 2.**(int(bits_per_sample)-1)
p@24	41 #raw_audio = (raw_audio- max_amp)/max_amp
p@24	42 return audioFile
p@24	43
p@24	44 def calc_specgram(x,fs,winSize,):
p@24	45 S = librosa.feature.melspectrogram(
p@24	46 y=x,
p@24	47 sr=fs,
p@24	48 n_mels=128,
p@24	49 S=None,
p@24	50 n_fft=winSize,
p@24	51 hop_length=512
p@24	52 )
p@24	53 log_S = librosa.logamplitude(S, ref_power=np.max)
p@24	54 log_S = np.transpose(log_S)
p@24	55 return log_S
p@24	56 #spec = SpecGram(x,fs,winSize)
p@24	57 #return spec.specMat
p@24	58
p@24	59 def make_4tensor(x):
p@24	60 assert x.ndim <= 4
p@24	61 while x.ndim < 4:
p@24	62 x = np.expand_dims(x,0)
p@24	63 return x
p@24	64
p@24	65 class FeatExtraction():
p@24	66 def __init__(self,dataset_dir):
p@24	67 self.dataset_dir = dataset_dir
p@24	68 self.list_dir = os.path.join(self.dataset_dir,'lists')
p@24	69 self.get_filenames()
p@24	70 self.feat_dir = os.path.join(self.dataset_dir,'features')
p@24	71 self.make_feat_dir()
p@24	72 self.h5_filename = os.path.join(self.feat_dir,'feats.h5')
p@24	73 self.make_h5()
p@24	74 self.setup_h5()
p@24	75 self.extract_features()
p@24	76 self.close_h5()
p@24	77
p@24	78
p@24	79 def get_filenames(self,):
p@24	80 dataset_files = os.path.join(self.list_dir,'audio_files.txt')
p@24	81 self.filenames = [l.strip() for l in open(dataset_files,'r').readlines()]
p@24	82 self.num_files = len(self.filenames)
p@24	83
p@24	84 def make_feat_dir(self,):
p@24	85 if not os.path.exists(self.feat_dir):
p@24	86 print 'Making output dir.'
p@24	87 os.mkdir(self.feat_dir)
p@24	88 else:
p@24	89 print 'Output dir already exists.'
p@24	90
p@24	91 def make_h5(self,):
p@24	92 if not os.path.exists(self.h5_filename):
p@24	93 self.h5 = tables.openFile(self.h5_filename,'w')
p@24	94 else:
p@24	95 print 'Feature file already exists.'
p@24	96 self.h5 = tables.openFile(self.h5_filename,'a')
p@24	97
p@24	98 def setup_h5(self,):
p@24	99 filename = self.filenames[0]
p@24	100 x = read_wav(filename)
p@24	101 spec_x = calc_specgram(x,22050,1024)
p@24	102 spec_x = make_4tensor(spec_x)
p@24	103 self.data_shape = spec_x.shape[1:]
p@24	104 self.x_earray_shape = (0,) + self.data_shape
p@24	105 self.chunkshape = (1,) + self.data_shape
p@24	106 self.h5_x = self.h5.createEArray('/','x',tables.FloatAtom(itemsize=4),self.x_earray_shape,chunkshape=self.chunkshape,expectedrows=self.num_files)
p@24	107 self.h5_filenames = self.h5.createEArray('/','filenames',tables.StringAtom(256),(0,),expectedrows=self.num_files)
p@24	108 self.h5_x.append(spec_x)
p@24	109 self.h5_filenames.append([filename])
p@24	110
p@24	111 def extract_features(self,):
p@24	112 for i in xrange(1,self.num_files):
p@24	113 filename = self.filenames[i]
p@24	114 #print 'Filename: ',filename
p@24	115 x = read_wav(filename)
p@24	116 spec_x = calc_specgram(x,22050,1024)
p@24	117 spec_x = make_4tensor(spec_x)
p@24	118 self.h5_x.append(spec_x)
p@24	119 self.h5_filenames.append([filename])
p@24	120
p@24	121 def close_h5(self,):
p@24	122 self.h5.flush()
p@24	123 self.h5.close()
p@24	124
p@24	125 if __name__ == '__main__':
p@24	126 test = FeatExtraction('/home/paulo/Documents/msc_project/dataset/7digital')

Mercurial > hg > hybrid-music-recommender-using-content-based-and-social-information

annotate Code/time_freq_representation/feature_extraction.py @ 47:b0186d4a4496 tip