Mercurial > hg > hybrid-music-recommender-using-content-based-and-social-information
changeset 5:14936b60b975
Code to extract log-mel-spectrogram
author | Paulo Chiliguano <p.e.chiliguano@se14.qmul.ac.uk> |
---|---|
date | Sat, 11 Jul 2015 20:49:17 +0100 |
parents | 9f187c06cd74 |
children | 41e14a539dd3 |
files | Code/feature_extraction.py Code/make_lists.py Code/utils.py |
diffstat | 3 files changed, 291 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Code/feature_extraction.py Sat Jul 11 20:49:17 2015 +0100 @@ -0,0 +1,106 @@ +""" +Feature extraction. +Siddharth Sigia +Feb,2014 +C4DM +""" +import numpy as np #Paulo: numpy as standard +#import subprocess +#import sys +import os +#from spectrogram import SpecGram +import tables +#import pdb +# Paulo Chiliguano: library for mel spectrogram +import librosa +#import random + +def read_wav(filename): + #bits_per_sample = '16' + #cmd = ['sox',filename,'-t','raw','-e','unsigned-integer','-L','-c','1','-b',bits_per_sample,'-','pad','0','30.0','rate','22050.0','trim','0','30.0'] + #cmd = ' '.join(cmd) + #print cmd + #raw_audio = numpy.fromstring(subprocess.Popen(cmd,stdout=subprocess.PIPE,shell=True).communicate()[0],dtype='uint16') + audioFile, sr = librosa.load(filename, sr=22050, mono=True, offset=20.0, duration=3) + #random.randint(0,audioFile.size) + #max_amp = 2.**(int(bits_per_sample)-1) + #raw_audio = (raw_audio- max_amp)/max_amp + return audioFile + +def calc_specgram(x,fs,winSize,): + S = librosa.feature.melspectrogram(y=x, sr=fs, n_mels=128, S=None, n_fft=winSize, hop_length=512) + log_S = librosa.logamplitude(S, ref_power=np.max) + #spec = SpecGram(x,fs,winSize) + return log_S + + +def make_4tensor(x): + assert x.ndim <= 4 + while x.ndim < 4: + x = np.expand_dims(x,0) + return x + +class FeatExtraction(): + def __init__(self,dataset_dir): + self.dataset_dir = dataset_dir + self.list_dir = os.path.join(self.dataset_dir,'lists') + self.get_filenames() + self.feat_dir = os.path.join(self.dataset_dir,'features') + self.make_feat_dir() + self.h5_filename = os.path.join(self.feat_dir,'feats.h5') + self.make_h5() + self.setup_h5() + self.extract_features() + self.close_h5() + + + def get_filenames(self,): + dataset_files = os.path.join(self.list_dir,'audio_files.txt') + self.filenames = [l.strip() for l in open(dataset_files,'r').readlines()] + self.num_files = len(self.filenames) + + def make_feat_dir(self,): + if not os.path.exists(self.feat_dir): + print 'Making output dir.' + os.mkdir(self.feat_dir) + else: + print 'Output dir already exists.' + + def make_h5(self,): + if not os.path.exists(self.h5_filename): + self.h5 = tables.openFile(self.h5_filename,'w') + else: + print 'Feature file already exists.' + self.h5 = tables.openFile(self.h5_filename,'a') + + def setup_h5(self,): + filename = self.filenames[0] + x = read_wav(filename) + spec_x = calc_specgram(x,22050,1024) + spec_x = make_4tensor(spec_x) + self.data_shape = spec_x.shape[1:] + self.x_earray_shape = (0,) + self.data_shape + self.chunkshape = (1,) + self.data_shape + self.h5_x = self.h5.createEArray('/','x',tables.FloatAtom(itemsize=4),self.x_earray_shape,chunkshape=self.chunkshape,expectedrows=self.num_files) + self.h5_filenames = self.h5.createEArray('/','filenames',tables.StringAtom(256),(0,),expectedrows=self.num_files) + self.h5_x.append(spec_x) + self.h5_filenames.append([filename]) + + + def extract_features(self,): + for i in xrange(1,self.num_files): + filename = self.filenames[i] + print 'Filename: ',filename + x = read_wav(filename) + spec_x = calc_specgram(x,22050,1024) + spec_x = make_4tensor(spec_x) + self.h5_x.append(spec_x) + self.h5_filenames.append([filename]) + + def close_h5(self,): + self.h5.flush() + self.h5.close() + +if __name__ == '__main__': + test = FeatExtraction('/media/paulo/01CFCE0971AA08B0/ilm10k-dataset') +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Code/make_lists.py Sat Jul 11 20:49:17 2015 +0100 @@ -0,0 +1,149 @@ + +import numpy +import numpy.random as random +import os +import pickle +import sys +import utils as U +#import pdb + + +def read_file(filename): + """ + Loads a file into a list + """ + file_list=[l.strip() for l in open(filename,'r').readlines()] + return file_list + +def get_folds(filelist, n_folds): + n_per_fold = len(filelist) / n_folds + folds = [] + for i in range(n_folds-1): + folds.append(filelist[i * n_per_fold: (i + 1) * n_per_fold]) + i = n_folds - 1 + folds.append(filelist[i * n_per_fold:]) + return folds + +def generate_mirex_list(train_list, annotations): + out_list = [] + for song in train_list: + annot = annotations.get(song,None) + if annot is None: + print 'No annotations for song %s' % song + continue + assert(type('') == type(annot)) + out_list.append('%s\t%s\n' % (song,annot)) + + return out_list + + +def make_file_list(gtzan_path, n_folds=5,): + """ + Generates lists + """ + audio_path = os.path.join(gtzan_path,'ilmaudio') + out_path = os.path.join(gtzan_path,'lists') + files_list = [] + for ext in ['.au', '.mp3', '.wav']: + files = U.getFiles(audio_path, ext) + files_list.extend(files) + random.shuffle(files_list) + + if not os.path.exists(out_path): + os.makedirs(out_path) + + audio_list_path = os.path.join(out_path, 'audio_files.txt') + open(audio_list_path,'w').writelines(['%s\n' % f for f in files_list]) + + annotations = get_annotations(files_list) + + ground_truth_path = os.path.join(out_path, 'ground_truth.txt') + open(ground_truth_path,'w').writelines(generate_mirex_list(files_list, annotations)) + generate_ground_truth_pickle(ground_truth_path) + + folds = get_folds(files_list, n_folds=n_folds) + + ### Single fold for quick experiments + create_fold(0, 1, folds, annotations, out_path) + + for n in range(n_folds): + create_fold(n, n_folds, folds, annotations, out_path) + + +def create_fold(n, n_folds, folds, annotations, out_path): + train_path = os.path.join(out_path, 'train_%i_of_%i.txt' % (n+1, n_folds)) + valid_path = os.path.join(out_path, 'valid_%i_of_%i.txt' % (n+1, n_folds)) + test_path = os.path.join(out_path, 'test_%i_of_%i.txt' % (n+1, n_folds)) + + test_list = folds[n] + train_list = [] + for m in range(len(folds)): + if m != n: + train_list.extend(folds[m]) + + open(train_path,'w').writelines(generate_mirex_list(train_list, annotations)) + open(test_path,'w').writelines(generate_mirex_list(test_list, annotations)) + split_list_file(train_path, train_path, valid_path, ratio=0.8) + +def split_list_file(input_file, out_file1, out_file2, ratio=0.8): + input_list = open(input_file,'r').readlines() + + n = len(input_list) + nsplit = int(n *ratio) + + list1 = input_list[:nsplit] + list2 = input_list[nsplit:] + + open(out_file1, 'w').writelines(list1) + open(out_file2, 'w').writelines(list2) + + +def get_annotation(filename): + genre = os.path.split(U.parseFile(filename)[0])[-1] + return genre + +def get_annotations(files_list): + annotations = {} + for filename in files_list: + annotations[filename] = get_annotation(filename) + + return annotations + +def generate_ground_truth_pickle(gt_file): + gt_path,_ = os.path.split(gt_file) + tag_file = os.path.join(gt_path,'tags.txt') + gt_pickle = os.path.join(gt_path,'ground_truth.pickle') + + lines = open(gt_file,'r').readlines() + + tag_set = set() + for line in lines: + filename,tag = line.strip().split('\t') + tag_set.add(tag) + tag_list = sorted(list(tag_set)) + open(tag_file,'w').writelines('\n'.join(tag_list + [''])) + + tag_dict = dict([(tag,i) for i,tag in enumerate(tag_list)]) + n_tags = len(tag_dict) + + mp3_dict = {} + for line in lines: + filename,tag = line.strip().split('\t') + tag_vector = mp3_dict.get(filename,numpy.zeros(n_tags)) + if tag != '': + tag_vector[tag_dict[tag]] = 1. + mp3_dict[filename] = tag_vector + pickle.dump(mp3_dict,open(gt_pickle,'w')) + +if __name__ == '__main__': + if len(sys.argv) < 2: + print 'Usage: python %s gtzan_path [n_folds=10]' % sys.argv[0] + sys.exit() + + gtzan_path = os.path.abspath(sys.argv[1]) + if len(sys.argv) > 2: + n_folds = int(sys.argv[2]) + else: + n_folds = 10 + + make_file_list(gtzan_path, n_folds) \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Code/utils.py Sat Jul 11 20:49:17 2015 +0100 @@ -0,0 +1,36 @@ +import os + +def getFiles(root_dir,ext='.mp3',verbose=True) : + """ + Returns a list of files + """ + fileList=[] + if verbose: + print "Populating %s files..."%ext + for (root,dirs,files) in os.walk(root_dir): + for f in files: + if f.endswith(ext): + filePath=os.path.join(root,f) + fileList.append(filePath) + if verbose: + print "%i files found."%len(fileList) + return fileList + +def parseFile(filePath): + """ + Parses the file path and returns (root,fileName,ext) + """ + root,file=os.path.split(filePath) + fileName,fileExt=os.path.splitext(file) + return (root,fileName,fileExt) + +def read_file(filename): + """ + Loads a file into a list + """ + file_list=[l.strip() for l in open(filename,'r').readlines()] + return file_list + +def writeFile(dataList,filename): + with open(filename,'w') as f: + f.writelines(dataList) \ No newline at end of file