Mercurial > hg > plosone_underreview
diff scripts/load_features.py @ 4:e50c63cf96be branch-tests
rearranging folders
author | Maria Panteli |
---|---|
date | Mon, 11 Sep 2017 11:51:50 +0100 |
parents | |
children | a35bd818d8e9 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/load_features.py Mon Sep 11 11:51:50 2017 +0100 @@ -0,0 +1,287 @@ +# -*- coding: utf-8 -*- +""" +Created on Thu Mar 16 01:50:57 2017 + +@author: mariapanteli +""" + +import numpy as np +import pandas as pd +import os +from sklearn.decomposition import NMF +import OPMellin as opm +import MFCC as mfc +import PitchBihist as pbi + + +class FeatureLoader: + def __init__(self, win2sec=8): + self.win2sec = float(win2sec) + self.sr = 44100. + self.win1 = int(round(0.04*self.sr)) + self.hop1 = int(round(self.win1/8.)) + self.framessr = self.sr/float(self.hop1) + self.win2 = int(round(self.win2sec*self.framessr)) + self.hop2 = int(round(0.5*self.framessr)) + self.framessr2 = self.framessr/float(self.hop2) + + + def get_op_mfcc_for_file(self, melspec_file=None, scale=True): + op = [] + mfc = [] + if not os.path.exists(melspec_file): + return op, mfc + print 'extracting onset patterns and mfccs...' + songframes = pd.read_csv(melspec_file, engine="c", header=None) + songframes.iloc[np.where(np.isnan(songframes))] = 0 + songframes = songframes.iloc[0:min(len(songframes), 18000), :] # only first 1.5 minutes + melspec = songframes.get_values().T + op = self.get_op_from_melspec(melspec, K=2) + mfc = self.get_mfcc_from_melspec(melspec) + if scale: + # scale all frames by mean and std of recording + op = (op - np.nanmean(op)) / np.nanstd(op) + mfc = (mfc - np.nanmean(mfc)) / np.nanstd(mfc) + return op, mfc + + + def get_chroma_for_file(self, chroma_file=None, scale=True): + ch = [] + if not os.path.exists(chroma_file): + return ch + print 'extracting chroma...' + songframes = pd.read_csv(chroma_file, engine="c", header=None) + songframes.iloc[np.where(np.isnan(songframes))] = 0 + songframes = songframes.iloc[0:min(len(songframes), 18000), :] # only first 1.5 minutes + chroma = songframes.get_values().T + ch = self.get_ave_chroma(chroma) + if scale: + # scale all frames by mean and std of recording + ch = (ch - np.nanmean(ch)) / np.nanstd(ch) + return ch + + + def get_music_idx_from_bounds(self, bounds, sr=None): + music_idx = [] + if len(bounds) == 0: + # bounds is empty list + return music_idx + nbounds = bounds.shape[0] + if len(np.where(bounds[:,2]=='m')[0])==0: + # no music segments + return music_idx + elif len(np.where(bounds[:,2]=='s')[0])==nbounds: + # all segments are speech + return music_idx + else: + half_win_hop = int(round(0.5 * self.win2 / float(self.hop2))) + music_bounds = np.where(bounds[:, 2] == 'm')[0] + bounds_in_frames = np.round(np.array(bounds[:, 0], dtype=float) * sr) + duration_in_frames = np.round(np.array(bounds[:, 1], dtype=float) * sr) + for music_bound in music_bounds: + lower_bound = np.max([0, bounds_in_frames[music_bound] - half_win_hop]) + upper_bound = lower_bound + duration_in_frames[music_bound] - half_win_hop + music_idx.append(np.arange(lower_bound, upper_bound, dtype=int)) + if len(music_idx)>0: + music_idx = np.sort(np.concatenate(music_idx)) # it should be sorted, but just in case segments overlap -- remove duplicates if segments overlap + return music_idx + + + def get_music_idx_for_file(self, segmenter_file=None): + music_idx = [] + if os.path.exists(segmenter_file) and os.path.getsize(segmenter_file)>0: + print 'loading speech/music segments...' + bounds = pd.read_csv(segmenter_file, header=None, delimiter='\t').get_values() + if bounds.shape[1] == 1: # depends on the computer platform + bounds = pd.read_csv(segmenter_file, header=None, delimiter=',').get_values() + music_idx = self.get_music_idx_from_bounds(bounds, sr=self.framessr2) + return music_idx + + + def get_features(self, df, class_label='Country'): + oplist = [] + mflist = [] + chlist = [] + pblist = [] + clabels = [] + aulabels = [] + n_files = len(df) + for i in range(n_files): + if not (os.path.exists(df['Melspec'].iloc[i]) and os.path.exists(df['Chroma'].iloc[i]) and os.path.exists(df['Melodia'].iloc[i])): + continue + print 'file ' + str(i) + ' of ' + str(n_files) + music_idx = self.get_music_idx_for_file(df['Speech'].iloc[i]) + if len(music_idx)==0: + # no music segments -> skip this file + continue + try: + op, mfcc = self.get_op_mfcc_for_file(df['Melspec'].iloc[i]) + ch = self.get_chroma_for_file(df['Chroma'].iloc[i]) + #pb = self.get_pb_from_melodia(df['Melodia'].iloc[i]) + pb = self.load_precomputed_pb_from_melodia(df['Melodia'].iloc[i]) + #pb = self.get_contour_feat_from_melodia(df['Melodia'].iloc[i]) + except: + continue + min_n_frames = np.min([len(op), len(mfcc), len(ch), len(pb)]) # ideally, features should have the same number of frames + if min_n_frames==0: + # no features extracted -> skip this file + continue + music_idx = music_idx[music_idx<min_n_frames] + n_frames = len(music_idx) + oplist.append(op.iloc[music_idx, :]) + mflist.append(mfcc.iloc[music_idx, :]) + chlist.append(ch.iloc[music_idx, :]) + pblist.append(pb.iloc[music_idx, :]) + clabels.append(pd.DataFrame(np.repeat(df[class_label].iloc[i], n_frames))) + aulabels.append(pd.DataFrame(np.repeat(df['Audio'].iloc[i], n_frames))) + print len(oplist), len(mflist), len(chlist), len(pblist), len(clabels), len(aulabels) + return pd.concat(oplist), pd.concat(mflist), pd.concat(chlist), pd.concat(pblist), pd.concat(clabels), pd.concat(aulabels) + + + def get_op_from_melspec(self, melspec, K=None): + op = opm.OPMellin(win2sec=self.win2sec) + opmellin = op.get_opmellin_from_melspec(melspec=melspec, melsr=self.framessr) + if K is not None: + opmel = self.mean_K_bands(opmellin.T, K) + opmel = pd.DataFrame(opmel) + return opmel + + + def get_mfcc_from_melspec(self, melspec, deltamfcc=True, avelocalframes=True, stdlocalframes=True): + mf = mfc.MFCCs() + mfcc = mf.get_mfccs_from_melspec(melspec=melspec, melsr=self.framessr) + if deltamfcc: + ff = mfcc + ffdiff = np.diff(ff, axis=1) + ffdelta = np.concatenate((ffdiff, ffdiff[:,-1,None]), axis=1) + frames = np.concatenate([ff,ffdelta], axis=0) + mfcc = frames + if avelocalframes: + mfcc = self.average_local_frames(mfcc, getstd=stdlocalframes) + mfcc = pd.DataFrame(mfcc.T) + return mfcc + + + def get_ave_chroma(self, chroma, avelocalframes=True, stdlocalframes=True, alignchroma=True): + chroma[np.where(np.isnan(chroma))] = 0 + if alignchroma: + maxind = np.argmax(np.sum(chroma, axis=1)) + chroma = np.roll(chroma, -maxind, axis=0) + if avelocalframes: + chroma = self.average_local_frames(chroma, getstd=stdlocalframes) + chroma = pd.DataFrame(chroma.T) + return chroma + + + def average_local_frames(self, frames, getstd=False): + nbins, norigframes = frames.shape + if norigframes<self.win2: + nframes = 1 + else: + nframes = int(1+np.floor((norigframes-self.win2)/float(self.hop2))) + if getstd: + aveframes = np.empty((nbins+nbins, nframes)) + for i in range(nframes): # loop over all 8-sec frames + meanf = np.nanmean(frames[:, (i*self.hop2):min((i*self.hop2+self.win2),norigframes)], axis=1) + stdf = np.nanstd(frames[:, (i*self.hop2):min((i*self.hop2+self.win2),norigframes)], axis=1) + aveframes[:,i] = np.concatenate((meanf,stdf)) + else: + aveframes = np.empty((nbins, nframes)) + for i in range(nframes): # loop over all 8-sec frames + aveframes[:,i] = np.nanmean(frames[:, (i*self.hop2):min((i*self.hop2+self.win2),norigframes)], axis=1) + return aveframes + + + def mean_K_bands(self, songframes, K=40, nmels=40): + [F, P] = songframes.shape + Pproc = int((P/nmels)*K) + procframes = np.zeros([F, Pproc]) + niters = int(nmels/K) + nbins = P/nmels # must be 200 bins + for k in range(K): + for j in range(k*niters, (k+1)*niters): + procframes[:, (k*nbins):((k+1)*nbins)] += songframes[:, (j*nbins):((j+1)*nbins)] + procframes /= float(niters) + return procframes + + + def nmfpitchbihist(self, frames): + nbins, nfr = frames.shape + npc = 2 + nb = int(np.sqrt(nbins)) # assume structure of pitch bihist is nbins*nbins + newframes = np.empty(((nb+nb)*npc, nfr)) + for fr in range(nfr): + pb = np.reshape(frames[:, fr], (nb, nb)) + try: + nmfmodel = NMF(n_components=npc).fit(pb) + W = nmfmodel.transform(pb) + H = nmfmodel.components_.T + newframes[:, fr, None] = np.concatenate((W, H)).flatten()[:, None] + except: + newframes[:, fr, None] = np.zeros(((nb+nb)*npc, 1)) + return newframes + + + def get_pb_from_melodia(self, melodia_file=None, nmfpb=True, scale=True): + pb = [] + if not os.path.exists(melodia_file): + return pb + print 'extracting pitch bihist from melodia...' + pb = pbi.PitchBihist(win2sec=self.win2sec) + pbihist = pb.bihist_from_melodia(filename=melodia_file, stop_sec=90.0) + if nmfpb is True: + pbihist = self.nmfpitchbihist(pbihist) + pbihist = pd.DataFrame(pbihist.T) + if scale: + # scale all frames by mean and std of recording + pbihist = (pbihist - np.nanmean(pbihist)) / np.nanstd(pbihist) + return pbihist + + + def load_precomputed_pb_from_melodia(self, melodia_file=None, nmfpb=True, scale=True): + base = os.path.basename(melodia_file) + root = '/import/c4dm-05/mariap/Melodia-melody-'+str(int(self.win2sec))+'sec/' + print 'load precomputed pitch bihist', root + if self.win2sec == 8: + pbihist = pd.read_csv(os.path.join(root, base)).iloc[1:,1:] + else: + pbihist = np.loadtxt(os.path.join(root, base), delimiter=',').T + if nmfpb is True: + pbihist = self.nmfpitchbihist(pbihist) + pbihist = pd.DataFrame(pbihist.T) + print pbihist.shape + if scale: + # scale all frames by mean and std of recording + pbihist = (pbihist - np.nanmean(pbihist)) / np.nanstd(pbihist) + return pbihist + + +# def get_pb_chroma_for_file(self, chroma_file=None, scale=True): +# ch = [] +# pb = [] +# if not os.path.exists(chroma_file): +# return ch, pb +# songframes = pd.read_csv(chroma_file, engine="c", header=None) +# songframes.iloc[np.where(np.isnan(songframes))] = 0 +# songframes = songframes.iloc[0:min(len(songframes), 18000), :] # only first 1.5 minutes +# chroma = songframes.get_values().T +# ch = self.get_ave_chroma(chroma) +# pb = self.get_pbihist_from_chroma(chroma) +# if scale: +# # scale all frames by mean and std of recording +# ch = (ch - np.nanmean(ch)) / np.nanstd(ch) +# pb = (pb - np.nanmean(pb)) / np.nanstd(pb) +# return ch, pb + + +# def get_pbihist_from_chroma(self, chroma, alignchroma=True, nmfpb=True): +# pb = pbi.PitchBihist(win2sec=self.win2sec) +# chroma[np.where(np.isnan(chroma))] = 0 +# if alignchroma: +# maxind = np.argmax(np.sum(chroma, axis=1)) +# chroma = np.roll(chroma, -maxind, axis=0) +# pbihist = pb.get_pitchbihist_from_chroma(chroma=chroma, chromasr=self.framessr) +# if nmfpb is True: +# pbihist = self.nmfpitchbihist(pbihist) +# pbihist = pd.DataFrame(pbihist.T) +# return pbihist \ No newline at end of file