annotate scripts/load_features.py @ 6:a35bd818d8e9 branch-tests

notebook to test music segments
author Maria Panteli <m.x.panteli@gmail.com>
date Mon, 11 Sep 2017 14:22:17 +0100
parents e50c63cf96be
children 56cbf155680a 852d4377f6ca
rev   line source
Maria@4 1 # -*- coding: utf-8 -*-
Maria@4 2 """
Maria@4 3 Created on Thu Mar 16 01:50:57 2017
Maria@4 4
Maria@4 5 @author: mariapanteli
Maria@4 6 """
Maria@4 7
Maria@4 8 import numpy as np
Maria@4 9 import pandas as pd
Maria@4 10 import os
Maria@4 11 from sklearn.decomposition import NMF
Maria@4 12 import OPMellin as opm
Maria@4 13 import MFCC as mfc
Maria@4 14 import PitchBihist as pbi
Maria@4 15
Maria@4 16
Maria@4 17 class FeatureLoader:
Maria@4 18 def __init__(self, win2sec=8):
Maria@4 19 self.win2sec = float(win2sec)
Maria@4 20 self.sr = 44100.
Maria@4 21 self.win1 = int(round(0.04*self.sr))
Maria@4 22 self.hop1 = int(round(self.win1/8.))
Maria@4 23 self.framessr = self.sr/float(self.hop1)
Maria@4 24 self.win2 = int(round(self.win2sec*self.framessr))
Maria@4 25 self.hop2 = int(round(0.5*self.framessr))
Maria@4 26 self.framessr2 = self.framessr/float(self.hop2)
Maria@4 27
Maria@4 28
Maria@4 29 def get_op_mfcc_for_file(self, melspec_file=None, scale=True):
Maria@4 30 op = []
Maria@4 31 mfc = []
Maria@4 32 if not os.path.exists(melspec_file):
Maria@4 33 return op, mfc
Maria@4 34 print 'extracting onset patterns and mfccs...'
Maria@4 35 songframes = pd.read_csv(melspec_file, engine="c", header=None)
Maria@4 36 songframes.iloc[np.where(np.isnan(songframes))] = 0
Maria@4 37 songframes = songframes.iloc[0:min(len(songframes), 18000), :] # only first 1.5 minutes
Maria@4 38 melspec = songframes.get_values().T
Maria@4 39 op = self.get_op_from_melspec(melspec, K=2)
Maria@4 40 mfc = self.get_mfcc_from_melspec(melspec)
Maria@4 41 if scale:
Maria@4 42 # scale all frames by mean and std of recording
Maria@4 43 op = (op - np.nanmean(op)) / np.nanstd(op)
Maria@4 44 mfc = (mfc - np.nanmean(mfc)) / np.nanstd(mfc)
Maria@4 45 return op, mfc
Maria@4 46
Maria@4 47
Maria@4 48 def get_chroma_for_file(self, chroma_file=None, scale=True):
Maria@4 49 ch = []
Maria@4 50 if not os.path.exists(chroma_file):
Maria@4 51 return ch
Maria@4 52 print 'extracting chroma...'
Maria@4 53 songframes = pd.read_csv(chroma_file, engine="c", header=None)
Maria@4 54 songframes.iloc[np.where(np.isnan(songframes))] = 0
Maria@4 55 songframes = songframes.iloc[0:min(len(songframes), 18000), :] # only first 1.5 minutes
Maria@4 56 chroma = songframes.get_values().T
Maria@4 57 ch = self.get_ave_chroma(chroma)
Maria@4 58 if scale:
Maria@4 59 # scale all frames by mean and std of recording
Maria@4 60 ch = (ch - np.nanmean(ch)) / np.nanstd(ch)
Maria@4 61 return ch
Maria@4 62
Maria@4 63
Maria@4 64 def get_music_idx_from_bounds(self, bounds, sr=None):
Maria@4 65 music_idx = []
Maria@4 66 if len(bounds) == 0:
Maria@4 67 # bounds is empty list
Maria@4 68 return music_idx
Maria@4 69 nbounds = bounds.shape[0]
Maria@4 70 if len(np.where(bounds[:,2]=='m')[0])==0:
Maria@4 71 # no music segments
Maria@4 72 return music_idx
Maria@4 73 elif len(np.where(bounds[:,2]=='s')[0])==nbounds:
Maria@4 74 # all segments are speech
Maria@4 75 return music_idx
Maria@4 76 else:
Maria@4 77 half_win_hop = int(round(0.5 * self.win2 / float(self.hop2)))
Maria@4 78 music_bounds = np.where(bounds[:, 2] == 'm')[0]
Maria@4 79 bounds_in_frames = np.round(np.array(bounds[:, 0], dtype=float) * sr)
Maria@4 80 duration_in_frames = np.round(np.array(bounds[:, 1], dtype=float) * sr)
Maria@4 81 for music_bound in music_bounds:
Maria@4 82 lower_bound = np.max([0, bounds_in_frames[music_bound] - half_win_hop])
m@6 83 upper_bound = bounds_in_frames[music_bound] + duration_in_frames[music_bound] - half_win_hop
Maria@4 84 music_idx.append(np.arange(lower_bound, upper_bound, dtype=int))
Maria@4 85 if len(music_idx)>0:
Maria@4 86 music_idx = np.sort(np.concatenate(music_idx)) # it should be sorted, but just in case segments overlap -- remove duplicates if segments overlap
Maria@4 87 return music_idx
m@6 88
Maria@4 89
Maria@4 90 def get_music_idx_for_file(self, segmenter_file=None):
Maria@4 91 music_idx = []
Maria@4 92 if os.path.exists(segmenter_file) and os.path.getsize(segmenter_file)>0:
Maria@4 93 print 'loading speech/music segments...'
Maria@4 94 bounds = pd.read_csv(segmenter_file, header=None, delimiter='\t').get_values()
Maria@4 95 if bounds.shape[1] == 1: # depends on the computer platform
Maria@4 96 bounds = pd.read_csv(segmenter_file, header=None, delimiter=',').get_values()
Maria@4 97 music_idx = self.get_music_idx_from_bounds(bounds, sr=self.framessr2)
Maria@4 98 return music_idx
Maria@4 99
Maria@4 100
Maria@4 101 def get_features(self, df, class_label='Country'):
Maria@4 102 oplist = []
Maria@4 103 mflist = []
Maria@4 104 chlist = []
Maria@4 105 pblist = []
Maria@4 106 clabels = []
Maria@4 107 aulabels = []
Maria@4 108 n_files = len(df)
Maria@4 109 for i in range(n_files):
Maria@4 110 if not (os.path.exists(df['Melspec'].iloc[i]) and os.path.exists(df['Chroma'].iloc[i]) and os.path.exists(df['Melodia'].iloc[i])):
Maria@4 111 continue
Maria@4 112 print 'file ' + str(i) + ' of ' + str(n_files)
Maria@4 113 music_idx = self.get_music_idx_for_file(df['Speech'].iloc[i])
Maria@4 114 if len(music_idx)==0:
Maria@4 115 # no music segments -> skip this file
Maria@4 116 continue
Maria@4 117 try:
Maria@4 118 op, mfcc = self.get_op_mfcc_for_file(df['Melspec'].iloc[i])
Maria@4 119 ch = self.get_chroma_for_file(df['Chroma'].iloc[i])
Maria@4 120 #pb = self.get_pb_from_melodia(df['Melodia'].iloc[i])
Maria@4 121 pb = self.load_precomputed_pb_from_melodia(df['Melodia'].iloc[i])
Maria@4 122 #pb = self.get_contour_feat_from_melodia(df['Melodia'].iloc[i])
Maria@4 123 except:
Maria@4 124 continue
Maria@4 125 min_n_frames = np.min([len(op), len(mfcc), len(ch), len(pb)]) # ideally, features should have the same number of frames
Maria@4 126 if min_n_frames==0:
Maria@4 127 # no features extracted -> skip this file
Maria@4 128 continue
Maria@4 129 music_idx = music_idx[music_idx<min_n_frames]
Maria@4 130 n_frames = len(music_idx)
Maria@4 131 oplist.append(op.iloc[music_idx, :])
Maria@4 132 mflist.append(mfcc.iloc[music_idx, :])
Maria@4 133 chlist.append(ch.iloc[music_idx, :])
Maria@4 134 pblist.append(pb.iloc[music_idx, :])
Maria@4 135 clabels.append(pd.DataFrame(np.repeat(df[class_label].iloc[i], n_frames)))
Maria@4 136 aulabels.append(pd.DataFrame(np.repeat(df['Audio'].iloc[i], n_frames)))
Maria@4 137 print len(oplist), len(mflist), len(chlist), len(pblist), len(clabels), len(aulabels)
Maria@4 138 return pd.concat(oplist), pd.concat(mflist), pd.concat(chlist), pd.concat(pblist), pd.concat(clabels), pd.concat(aulabels)
Maria@4 139
Maria@4 140
Maria@4 141 def get_op_from_melspec(self, melspec, K=None):
Maria@4 142 op = opm.OPMellin(win2sec=self.win2sec)
Maria@4 143 opmellin = op.get_opmellin_from_melspec(melspec=melspec, melsr=self.framessr)
Maria@4 144 if K is not None:
Maria@4 145 opmel = self.mean_K_bands(opmellin.T, K)
Maria@4 146 opmel = pd.DataFrame(opmel)
Maria@4 147 return opmel
Maria@4 148
Maria@4 149
Maria@4 150 def get_mfcc_from_melspec(self, melspec, deltamfcc=True, avelocalframes=True, stdlocalframes=True):
Maria@4 151 mf = mfc.MFCCs()
Maria@4 152 mfcc = mf.get_mfccs_from_melspec(melspec=melspec, melsr=self.framessr)
Maria@4 153 if deltamfcc:
Maria@4 154 ff = mfcc
Maria@4 155 ffdiff = np.diff(ff, axis=1)
Maria@4 156 ffdelta = np.concatenate((ffdiff, ffdiff[:,-1,None]), axis=1)
Maria@4 157 frames = np.concatenate([ff,ffdelta], axis=0)
Maria@4 158 mfcc = frames
Maria@4 159 if avelocalframes:
Maria@4 160 mfcc = self.average_local_frames(mfcc, getstd=stdlocalframes)
Maria@4 161 mfcc = pd.DataFrame(mfcc.T)
Maria@4 162 return mfcc
Maria@4 163
Maria@4 164
Maria@4 165 def get_ave_chroma(self, chroma, avelocalframes=True, stdlocalframes=True, alignchroma=True):
Maria@4 166 chroma[np.where(np.isnan(chroma))] = 0
Maria@4 167 if alignchroma:
Maria@4 168 maxind = np.argmax(np.sum(chroma, axis=1))
Maria@4 169 chroma = np.roll(chroma, -maxind, axis=0)
Maria@4 170 if avelocalframes:
Maria@4 171 chroma = self.average_local_frames(chroma, getstd=stdlocalframes)
Maria@4 172 chroma = pd.DataFrame(chroma.T)
Maria@4 173 return chroma
Maria@4 174
Maria@4 175
Maria@4 176 def average_local_frames(self, frames, getstd=False):
Maria@4 177 nbins, norigframes = frames.shape
Maria@4 178 if norigframes<self.win2:
Maria@4 179 nframes = 1
Maria@4 180 else:
Maria@4 181 nframes = int(1+np.floor((norigframes-self.win2)/float(self.hop2)))
Maria@4 182 if getstd:
Maria@4 183 aveframes = np.empty((nbins+nbins, nframes))
Maria@4 184 for i in range(nframes): # loop over all 8-sec frames
Maria@4 185 meanf = np.nanmean(frames[:, (i*self.hop2):min((i*self.hop2+self.win2),norigframes)], axis=1)
Maria@4 186 stdf = np.nanstd(frames[:, (i*self.hop2):min((i*self.hop2+self.win2),norigframes)], axis=1)
Maria@4 187 aveframes[:,i] = np.concatenate((meanf,stdf))
Maria@4 188 else:
Maria@4 189 aveframes = np.empty((nbins, nframes))
Maria@4 190 for i in range(nframes): # loop over all 8-sec frames
Maria@4 191 aveframes[:,i] = np.nanmean(frames[:, (i*self.hop2):min((i*self.hop2+self.win2),norigframes)], axis=1)
Maria@4 192 return aveframes
Maria@4 193
Maria@4 194
Maria@4 195 def mean_K_bands(self, songframes, K=40, nmels=40):
Maria@4 196 [F, P] = songframes.shape
Maria@4 197 Pproc = int((P/nmels)*K)
Maria@4 198 procframes = np.zeros([F, Pproc])
Maria@4 199 niters = int(nmels/K)
Maria@4 200 nbins = P/nmels # must be 200 bins
Maria@4 201 for k in range(K):
Maria@4 202 for j in range(k*niters, (k+1)*niters):
Maria@4 203 procframes[:, (k*nbins):((k+1)*nbins)] += songframes[:, (j*nbins):((j+1)*nbins)]
Maria@4 204 procframes /= float(niters)
Maria@4 205 return procframes
Maria@4 206
Maria@4 207
Maria@4 208 def nmfpitchbihist(self, frames):
Maria@4 209 nbins, nfr = frames.shape
Maria@4 210 npc = 2
Maria@4 211 nb = int(np.sqrt(nbins)) # assume structure of pitch bihist is nbins*nbins
Maria@4 212 newframes = np.empty(((nb+nb)*npc, nfr))
Maria@4 213 for fr in range(nfr):
Maria@4 214 pb = np.reshape(frames[:, fr], (nb, nb))
Maria@4 215 try:
Maria@4 216 nmfmodel = NMF(n_components=npc).fit(pb)
Maria@4 217 W = nmfmodel.transform(pb)
Maria@4 218 H = nmfmodel.components_.T
Maria@4 219 newframes[:, fr, None] = np.concatenate((W, H)).flatten()[:, None]
Maria@4 220 except:
Maria@4 221 newframes[:, fr, None] = np.zeros(((nb+nb)*npc, 1))
Maria@4 222 return newframes
Maria@4 223
Maria@4 224
Maria@4 225 def get_pb_from_melodia(self, melodia_file=None, nmfpb=True, scale=True):
Maria@4 226 pb = []
Maria@4 227 if not os.path.exists(melodia_file):
Maria@4 228 return pb
Maria@4 229 print 'extracting pitch bihist from melodia...'
Maria@4 230 pb = pbi.PitchBihist(win2sec=self.win2sec)
Maria@4 231 pbihist = pb.bihist_from_melodia(filename=melodia_file, stop_sec=90.0)
Maria@4 232 if nmfpb is True:
Maria@4 233 pbihist = self.nmfpitchbihist(pbihist)
Maria@4 234 pbihist = pd.DataFrame(pbihist.T)
Maria@4 235 if scale:
Maria@4 236 # scale all frames by mean and std of recording
Maria@4 237 pbihist = (pbihist - np.nanmean(pbihist)) / np.nanstd(pbihist)
Maria@4 238 return pbihist
Maria@4 239
Maria@4 240
Maria@4 241 def load_precomputed_pb_from_melodia(self, melodia_file=None, nmfpb=True, scale=True):
Maria@4 242 base = os.path.basename(melodia_file)
Maria@4 243 root = '/import/c4dm-05/mariap/Melodia-melody-'+str(int(self.win2sec))+'sec/'
Maria@4 244 print 'load precomputed pitch bihist', root
Maria@4 245 if self.win2sec == 8:
Maria@4 246 pbihist = pd.read_csv(os.path.join(root, base)).iloc[1:,1:]
Maria@4 247 else:
Maria@4 248 pbihist = np.loadtxt(os.path.join(root, base), delimiter=',').T
Maria@4 249 if nmfpb is True:
Maria@4 250 pbihist = self.nmfpitchbihist(pbihist)
Maria@4 251 pbihist = pd.DataFrame(pbihist.T)
Maria@4 252 print pbihist.shape
Maria@4 253 if scale:
Maria@4 254 # scale all frames by mean and std of recording
Maria@4 255 pbihist = (pbihist - np.nanmean(pbihist)) / np.nanstd(pbihist)
Maria@4 256 return pbihist
Maria@4 257