changeset 5:14936b60b975

Code to extract log-mel-spectrogram
author Paulo Chiliguano <p.e.chiliguano@se14.qmul.ac.uk>
date Sat, 11 Jul 2015 20:49:17 +0100
parents 9f187c06cd74
children 41e14a539dd3
files Code/feature_extraction.py Code/make_lists.py Code/utils.py
diffstat 3 files changed, 291 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Code/feature_extraction.py	Sat Jul 11 20:49:17 2015 +0100
@@ -0,0 +1,106 @@
+"""
+Feature extraction.
+Siddharth Sigia
+Feb,2014
+C4DM
+"""
+import numpy as np #Paulo: numpy as standard
+#import subprocess
+#import sys
+import os
+#from spectrogram import SpecGram
+import tables
+#import pdb
+# Paulo Chiliguano: library for mel spectrogram
+import librosa
+#import random
+
+def read_wav(filename):
+    #bits_per_sample = '16'
+    #cmd = ['sox',filename,'-t','raw','-e','unsigned-integer','-L','-c','1','-b',bits_per_sample,'-','pad','0','30.0','rate','22050.0','trim','0','30.0']
+    #cmd = ' '.join(cmd)
+    #print cmd
+    #raw_audio = numpy.fromstring(subprocess.Popen(cmd,stdout=subprocess.PIPE,shell=True).communicate()[0],dtype='uint16')
+    audioFile, sr = librosa.load(filename, sr=22050, mono=True, offset=20.0, duration=3)
+    #random.randint(0,audioFile.size)
+    #max_amp = 2.**(int(bits_per_sample)-1)
+    #raw_audio = (raw_audio- max_amp)/max_amp
+    return audioFile
+
+def calc_specgram(x,fs,winSize,):
+    S = librosa.feature.melspectrogram(y=x, sr=fs, n_mels=128, S=None, n_fft=winSize, hop_length=512)
+    log_S = librosa.logamplitude(S, ref_power=np.max)
+    #spec = SpecGram(x,fs,winSize)
+    return log_S
+
+
+def make_4tensor(x):
+    assert x.ndim <= 4
+    while x.ndim < 4:
+        x = np.expand_dims(x,0)
+    return x
+
+class FeatExtraction():
+    def __init__(self,dataset_dir):
+    	self.dataset_dir = dataset_dir
+        self.list_dir = os.path.join(self.dataset_dir,'lists')
+        self.get_filenames()
+        self.feat_dir = os.path.join(self.dataset_dir,'features')
+        self.make_feat_dir()
+        self.h5_filename = os.path.join(self.feat_dir,'feats.h5')
+        self.make_h5()
+        self.setup_h5()
+        self.extract_features()
+        self.close_h5()
+
+
+    def get_filenames(self,):
+        dataset_files = os.path.join(self.list_dir,'audio_files.txt')
+        self.filenames = [l.strip() for l in open(dataset_files,'r').readlines()]
+        self.num_files = len(self.filenames)
+
+    def make_feat_dir(self,):
+    	if not os.path.exists(self.feat_dir):
+    		print 'Making output dir.'
+    		os.mkdir(self.feat_dir)
+    	else:
+    		print 'Output dir already exists.'
+    
+    def make_h5(self,):
+    	if not os.path.exists(self.h5_filename):
+    		self.h5 = tables.openFile(self.h5_filename,'w')
+    	else:
+    		print 'Feature file already exists.'
+    		self.h5 = tables.openFile(self.h5_filename,'a')
+
+    def setup_h5(self,):
+    	filename = self.filenames[0]
+    	x = read_wav(filename)
+    	spec_x = calc_specgram(x,22050,1024)
+    	spec_x = make_4tensor(spec_x)
+    	self.data_shape = spec_x.shape[1:]
+    	self.x_earray_shape = (0,) + self.data_shape
+    	self.chunkshape = (1,) + self.data_shape
+    	self.h5_x = self.h5.createEArray('/','x',tables.FloatAtom(itemsize=4),self.x_earray_shape,chunkshape=self.chunkshape,expectedrows=self.num_files)
+    	self.h5_filenames = self.h5.createEArray('/','filenames',tables.StringAtom(256),(0,),expectedrows=self.num_files)
+    	self.h5_x.append(spec_x)
+    	self.h5_filenames.append([filename])
+
+
+    def extract_features(self,):
+        for i in xrange(1,self.num_files):
+    	    filename = self.filenames[i]
+            print 'Filename: ',filename
+    	    x = read_wav(filename)
+    	    spec_x = calc_specgram(x,22050,1024)
+    	    spec_x = make_4tensor(spec_x)
+    	    self.h5_x.append(spec_x)
+    	    self.h5_filenames.append([filename])
+
+    def close_h5(self,):
+        self.h5.flush()
+        self.h5.close()
+        
+if __name__ == '__main__':
+	test = FeatExtraction('/media/paulo/01CFCE0971AA08B0/ilm10k-dataset')
+  
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Code/make_lists.py	Sat Jul 11 20:49:17 2015 +0100
@@ -0,0 +1,149 @@
+
+import numpy
+import numpy.random as random
+import os
+import pickle
+import sys
+import utils as U
+#import pdb
+
+
+def read_file(filename):
+    """
+    Loads a file into a list
+    """
+    file_list=[l.strip() for l in open(filename,'r').readlines()]
+    return file_list
+
+def get_folds(filelist, n_folds):
+    n_per_fold = len(filelist) / n_folds
+    folds = []
+    for i in range(n_folds-1):
+        folds.append(filelist[i * n_per_fold: (i + 1) * n_per_fold])
+    i = n_folds - 1
+    folds.append(filelist[i * n_per_fold:])
+    return folds
+
+def generate_mirex_list(train_list, annotations):
+    out_list = []
+    for song in train_list:
+        annot = annotations.get(song,None)
+        if annot is None:
+            print 'No annotations for song %s' % song
+            continue
+        assert(type('') == type(annot))
+        out_list.append('%s\t%s\n' % (song,annot))
+
+    return out_list
+            
+
+def make_file_list(gtzan_path, n_folds=5,):
+    """
+    Generates lists
+    """
+    audio_path = os.path.join(gtzan_path,'ilmaudio')
+    out_path = os.path.join(gtzan_path,'lists')
+    files_list = []
+    for ext in ['.au', '.mp3', '.wav']:
+        files = U.getFiles(audio_path, ext)
+        files_list.extend(files)
+    random.shuffle(files_list)
+    
+    if not os.path.exists(out_path):
+        os.makedirs(out_path)
+    
+    audio_list_path = os.path.join(out_path, 'audio_files.txt')
+    open(audio_list_path,'w').writelines(['%s\n' % f for f in files_list])
+    
+    annotations = get_annotations(files_list)
+
+    ground_truth_path = os.path.join(out_path, 'ground_truth.txt')
+    open(ground_truth_path,'w').writelines(generate_mirex_list(files_list, annotations))
+    generate_ground_truth_pickle(ground_truth_path)
+
+    folds = get_folds(files_list, n_folds=n_folds)
+    
+    ### Single fold for quick experiments
+    create_fold(0, 1, folds, annotations, out_path)
+    
+    for n in range(n_folds):
+        create_fold(n, n_folds, folds, annotations, out_path)
+
+
+def create_fold(n, n_folds, folds, annotations, out_path):
+    train_path = os.path.join(out_path, 'train_%i_of_%i.txt' % (n+1, n_folds))
+    valid_path = os.path.join(out_path, 'valid_%i_of_%i.txt' % (n+1, n_folds))
+    test_path = os.path.join(out_path, 'test_%i_of_%i.txt' % (n+1, n_folds))
+    
+    test_list = folds[n]
+    train_list = []
+    for m in range(len(folds)):
+        if m != n:
+            train_list.extend(folds[m])
+    
+    open(train_path,'w').writelines(generate_mirex_list(train_list, annotations))
+    open(test_path,'w').writelines(generate_mirex_list(test_list, annotations))
+    split_list_file(train_path, train_path, valid_path, ratio=0.8)
+    
+def split_list_file(input_file, out_file1, out_file2, ratio=0.8):
+    input_list = open(input_file,'r').readlines()
+    
+    n = len(input_list)
+    nsplit = int(n *ratio)
+    
+    list1 = input_list[:nsplit]
+    list2 = input_list[nsplit:]
+    
+    open(out_file1, 'w').writelines(list1)
+    open(out_file2, 'w').writelines(list2)
+
+
+def get_annotation(filename):
+    genre = os.path.split(U.parseFile(filename)[0])[-1]
+    return genre
+
+def get_annotations(files_list):
+    annotations = {}
+    for filename in files_list:
+        annotations[filename] = get_annotation(filename)
+
+    return annotations
+
+def generate_ground_truth_pickle(gt_file):
+    gt_path,_ = os.path.split(gt_file)
+    tag_file = os.path.join(gt_path,'tags.txt')
+    gt_pickle = os.path.join(gt_path,'ground_truth.pickle')
+    
+    lines = open(gt_file,'r').readlines()
+    
+    tag_set = set()
+    for line in lines:
+        filename,tag = line.strip().split('\t')
+        tag_set.add(tag)
+    tag_list = sorted(list(tag_set))
+    open(tag_file,'w').writelines('\n'.join(tag_list + ['']))
+    
+    tag_dict = dict([(tag,i) for i,tag in enumerate(tag_list)])        
+    n_tags = len(tag_dict)
+
+    mp3_dict = {}
+    for line in lines:
+        filename,tag = line.strip().split('\t')
+        tag_vector = mp3_dict.get(filename,numpy.zeros(n_tags))
+        if tag != '':
+            tag_vector[tag_dict[tag]] = 1.
+        mp3_dict[filename] = tag_vector
+    pickle.dump(mp3_dict,open(gt_pickle,'w'))
+
+if __name__ == '__main__':
+    if len(sys.argv) < 2:
+        print 'Usage: python %s gtzan_path [n_folds=10]' % sys.argv[0]
+        sys.exit()
+    
+    gtzan_path = os.path.abspath(sys.argv[1])
+    if len(sys.argv) > 2:
+        n_folds = int(sys.argv[2])
+    else:
+        n_folds = 10
+        
+    make_file_list(gtzan_path, n_folds)
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Code/utils.py	Sat Jul 11 20:49:17 2015 +0100
@@ -0,0 +1,36 @@
+import os
+
+def getFiles(root_dir,ext='.mp3',verbose=True) :
+    """
+    Returns a list of files
+    """
+    fileList=[]
+    if verbose:
+        print "Populating %s files..."%ext
+    for (root,dirs,files) in os.walk(root_dir):
+        for f in files:
+            if f.endswith(ext):
+                filePath=os.path.join(root,f)
+                fileList.append(filePath)
+    if verbose:
+        print "%i files found."%len(fileList)
+    return fileList
+
+def parseFile(filePath):
+    """
+    Parses the file path and returns (root,fileName,ext)
+    """
+    root,file=os.path.split(filePath)
+    fileName,fileExt=os.path.splitext(file)
+    return (root,fileName,fileExt)
+
+def read_file(filename):
+    """
+    Loads a file into a list
+    """
+    file_list=[l.strip() for l in open(filename,'r').readlines()]
+    return file_list
+
+def writeFile(dataList,filename):
+    with open(filename,'w') as f:
+        f.writelines(dataList)
\ No newline at end of file