Maria@4
|
1 # -*- coding: utf-8 -*-
|
Maria@4
|
2 """
|
Maria@4
|
3 Created on Thu Mar 16 01:50:57 2017
|
Maria@4
|
4
|
Maria@4
|
5 @author: mariapanteli
|
Maria@4
|
6 """
|
Maria@4
|
7
|
Maria@4
|
8 import numpy as np
|
Maria@4
|
9 import pandas as pd
|
Maria@4
|
10 import os
|
Maria@4
|
11 from sklearn.decomposition import NMF
|
Maria@4
|
12 import OPMellin as opm
|
Maria@4
|
13 import MFCC as mfc
|
Maria@4
|
14 import PitchBihist as pbi
|
Maria@4
|
15
|
Maria@4
|
16
|
Maria@4
|
17 class FeatureLoader:
|
Maria@4
|
18 def __init__(self, win2sec=8):
|
Maria@4
|
19 self.win2sec = float(win2sec)
|
Maria@4
|
20 self.sr = 44100.
|
Maria@4
|
21 self.win1 = int(round(0.04*self.sr))
|
Maria@4
|
22 self.hop1 = int(round(self.win1/8.))
|
Maria@4
|
23 self.framessr = self.sr/float(self.hop1)
|
Maria@4
|
24 self.win2 = int(round(self.win2sec*self.framessr))
|
Maria@4
|
25 self.hop2 = int(round(0.5*self.framessr))
|
Maria@4
|
26 self.framessr2 = self.framessr/float(self.hop2)
|
Maria@4
|
27
|
Maria@4
|
28
|
m@26
|
29 def get_op_mfcc_for_file(self, melspec_file=None, scale=True, stop_sec=30.0):
|
Maria@4
|
30 op = []
|
Maria@4
|
31 mfc = []
|
Maria@4
|
32 if not os.path.exists(melspec_file):
|
Maria@4
|
33 return op, mfc
|
Maria@4
|
34 print 'extracting onset patterns and mfccs...'
|
Maria@4
|
35 songframes = pd.read_csv(melspec_file, engine="c", header=None)
|
Maria@4
|
36 songframes.iloc[np.where(np.isnan(songframes))] = 0
|
Maria@35
|
37 n_stop = np.int(np.ceil(stop_sec * self.framessr))
|
m@26
|
38 songframes = songframes.iloc[0:min(len(songframes), n_stop), :]
|
Maria@4
|
39 melspec = songframes.get_values().T
|
Maria@4
|
40 op = self.get_op_from_melspec(melspec, K=2)
|
Maria@4
|
41 mfc = self.get_mfcc_from_melspec(melspec)
|
Maria@4
|
42 if scale:
|
Maria@4
|
43 # scale all frames by mean and std of recording
|
Maria@4
|
44 op = (op - np.nanmean(op)) / np.nanstd(op)
|
Maria@4
|
45 mfc = (mfc - np.nanmean(mfc)) / np.nanstd(mfc)
|
Maria@4
|
46 return op, mfc
|
Maria@4
|
47
|
Maria@4
|
48
|
m@26
|
49 def get_chroma_for_file(self, chroma_file=None, scale=True, stop_sec=30.0):
|
Maria@4
|
50 ch = []
|
Maria@4
|
51 if not os.path.exists(chroma_file):
|
Maria@4
|
52 return ch
|
Maria@4
|
53 print 'extracting chroma...'
|
Maria@4
|
54 songframes = pd.read_csv(chroma_file, engine="c", header=None)
|
Maria@4
|
55 songframes.iloc[np.where(np.isnan(songframes))] = 0
|
m@26
|
56 n_stop = np.int(np.ceil(stop_sec * self.framessr))
|
Maria@35
|
57 songframes = songframes.iloc[0:min(len(songframes), n_stop), :]
|
Maria@4
|
58 chroma = songframes.get_values().T
|
Maria@4
|
59 ch = self.get_ave_chroma(chroma)
|
Maria@4
|
60 if scale:
|
Maria@4
|
61 # scale all frames by mean and std of recording
|
Maria@4
|
62 ch = (ch - np.nanmean(ch)) / np.nanstd(ch)
|
Maria@4
|
63 return ch
|
Maria@4
|
64
|
Maria@4
|
65
|
Maria@4
|
66 def get_music_idx_from_bounds(self, bounds, sr=None):
|
Maria@4
|
67 music_idx = []
|
Maria@4
|
68 if len(bounds) == 0:
|
Maria@4
|
69 # bounds is empty list
|
Maria@4
|
70 return music_idx
|
Maria@4
|
71 nbounds = bounds.shape[0]
|
Maria@4
|
72 if len(np.where(bounds[:,2]=='m')[0])==0:
|
Maria@4
|
73 # no music segments
|
Maria@4
|
74 return music_idx
|
Maria@4
|
75 elif len(np.where(bounds[:,2]=='s')[0])==nbounds:
|
Maria@4
|
76 # all segments are speech
|
Maria@4
|
77 return music_idx
|
Maria@4
|
78 else:
|
Maria@34
|
79 win2_frames = np.int(np.round(self.win2sec * self.framessr2))
|
Maria@34
|
80 #half_win_hop = int(round(0.5 * self.win2 / float(self.hop2)))
|
Maria@4
|
81 music_bounds = np.where(bounds[:, 2] == 'm')[0]
|
Maria@4
|
82 bounds_in_frames = np.round(np.array(bounds[:, 0], dtype=float) * sr)
|
Maria@34
|
83 duration_in_frames = np.ceil(np.array(bounds[:, 1], dtype=float) * sr)
|
Maria@4
|
84 for music_bound in music_bounds:
|
Maria@34
|
85 #lower_bound = np.max([0, bounds_in_frames[music_bound] - half_win_hop])
|
Maria@34
|
86 #upper_bound = bounds_in_frames[music_bound] + duration_in_frames[music_bound] - half_win_hop
|
Maria@34
|
87 lower_bound = bounds_in_frames[music_bound]
|
Maria@34
|
88 upper_bound = bounds_in_frames[music_bound] + duration_in_frames[music_bound] - win2_frames
|
Maria@4
|
89 music_idx.append(np.arange(lower_bound, upper_bound, dtype=int))
|
Maria@4
|
90 if len(music_idx)>0:
|
Maria@4
|
91 music_idx = np.sort(np.concatenate(music_idx)) # it should be sorted, but just in case segments overlap -- remove duplicates if segments overlap
|
Maria@4
|
92 return music_idx
|
m@6
|
93
|
Maria@4
|
94
|
Maria@4
|
95 def get_music_idx_for_file(self, segmenter_file=None):
|
Maria@4
|
96 music_idx = []
|
Maria@4
|
97 if os.path.exists(segmenter_file) and os.path.getsize(segmenter_file)>0:
|
Maria@4
|
98 print 'loading speech/music segments...'
|
Maria@4
|
99 bounds = pd.read_csv(segmenter_file, header=None, delimiter='\t').get_values()
|
Maria@4
|
100 if bounds.shape[1] == 1: # depends on the computer platform
|
Maria@4
|
101 bounds = pd.read_csv(segmenter_file, header=None, delimiter=',').get_values()
|
Maria@4
|
102 music_idx = self.get_music_idx_from_bounds(bounds, sr=self.framessr2)
|
Maria@4
|
103 return music_idx
|
Maria@4
|
104
|
Maria@4
|
105
|
Maria@35
|
106 def get_features(self, df, stop_sec=30.0, class_label='Country', precomp_melody=False):
|
Maria@4
|
107 oplist = []
|
Maria@4
|
108 mflist = []
|
Maria@4
|
109 chlist = []
|
Maria@4
|
110 pblist = []
|
Maria@4
|
111 clabels = []
|
Maria@4
|
112 aulabels = []
|
Maria@4
|
113 n_files = len(df)
|
Maria@4
|
114 for i in range(n_files):
|
Maria@4
|
115 if not (os.path.exists(df['Melspec'].iloc[i]) and os.path.exists(df['Chroma'].iloc[i]) and os.path.exists(df['Melodia'].iloc[i])):
|
Maria@4
|
116 continue
|
Maria@4
|
117 print 'file ' + str(i) + ' of ' + str(n_files)
|
Maria@4
|
118 music_idx = self.get_music_idx_for_file(df['Speech'].iloc[i])
|
Maria@4
|
119 if len(music_idx)==0:
|
Maria@4
|
120 # no music segments -> skip this file
|
Maria@4
|
121 continue
|
m@27
|
122 try:
|
m@26
|
123 op, mfcc = self.get_op_mfcc_for_file(df['Melspec'].iloc[i], stop_sec=stop_sec)
|
m@26
|
124 ch = self.get_chroma_for_file(df['Chroma'].iloc[i], stop_sec=stop_sec)
|
Maria@35
|
125 pb = self.get_pb_for_file(df['Melodia'].iloc[i], precomp_melody=precomp_melody, stop_sec=stop_sec)
|
Maria@35
|
126 #if precomp_melody:
|
Maria@35
|
127 # pb = self.load_precomputed_pb_from_melodia(df['Melodia'].iloc[i], stop_sec=stop_sec)
|
Maria@35
|
128 #else:
|
Maria@35
|
129 # pb = self.get_pb_from_melodia(df['Melodia'].iloc[i], stop_sec=stop_sec)
|
m@27
|
130 except:
|
Maria@4
|
131 continue
|
m@26
|
132 n_stop = np.int(np.ceil(stop_sec * self.framessr2))
|
Maria@35
|
133 print n_stop, len(op), len(mfcc), len(ch), len(pb)
|
Maria@35
|
134 min_n_frames = np.min([n_stop, len(op), len(mfcc), len(ch), len(pb)]) # ideally, features should have the same number of frames
|
Maria@4
|
135 if min_n_frames==0:
|
Maria@4
|
136 # no features extracted -> skip this file
|
Maria@4
|
137 continue
|
Maria@4
|
138 music_idx = music_idx[music_idx<min_n_frames]
|
Maria@4
|
139 n_frames = len(music_idx)
|
Maria@4
|
140 oplist.append(op.iloc[music_idx, :])
|
Maria@4
|
141 mflist.append(mfcc.iloc[music_idx, :])
|
Maria@4
|
142 chlist.append(ch.iloc[music_idx, :])
|
Maria@4
|
143 pblist.append(pb.iloc[music_idx, :])
|
Maria@4
|
144 clabels.append(pd.DataFrame(np.repeat(df[class_label].iloc[i], n_frames)))
|
Maria@4
|
145 aulabels.append(pd.DataFrame(np.repeat(df['Audio'].iloc[i], n_frames)))
|
Maria@4
|
146 print len(oplist), len(mflist), len(chlist), len(pblist), len(clabels), len(aulabels)
|
Maria@4
|
147 return pd.concat(oplist), pd.concat(mflist), pd.concat(chlist), pd.concat(pblist), pd.concat(clabels), pd.concat(aulabels)
|
Maria@4
|
148
|
Maria@4
|
149
|
Maria@4
|
150 def get_op_from_melspec(self, melspec, K=None):
|
Maria@4
|
151 op = opm.OPMellin(win2sec=self.win2sec)
|
Maria@4
|
152 opmellin = op.get_opmellin_from_melspec(melspec=melspec, melsr=self.framessr)
|
Maria@35
|
153 opmel = pd.DataFrame(opmellin.T)
|
Maria@4
|
154 if K is not None:
|
Maria@4
|
155 opmel = self.mean_K_bands(opmellin.T, K)
|
Maria@35
|
156 opmel = pd.DataFrame(opmel)
|
Maria@4
|
157 return opmel
|
Maria@4
|
158
|
Maria@4
|
159
|
Maria@4
|
160 def get_mfcc_from_melspec(self, melspec, deltamfcc=True, avelocalframes=True, stdlocalframes=True):
|
Maria@4
|
161 mf = mfc.MFCCs()
|
Maria@4
|
162 mfcc = mf.get_mfccs_from_melspec(melspec=melspec, melsr=self.framessr)
|
Maria@4
|
163 if deltamfcc:
|
Maria@4
|
164 ff = mfcc
|
Maria@4
|
165 ffdiff = np.diff(ff, axis=1)
|
Maria@4
|
166 ffdelta = np.concatenate((ffdiff, ffdiff[:,-1,None]), axis=1)
|
Maria@4
|
167 frames = np.concatenate([ff,ffdelta], axis=0)
|
Maria@4
|
168 mfcc = frames
|
Maria@4
|
169 if avelocalframes:
|
Maria@4
|
170 mfcc = self.average_local_frames(mfcc, getstd=stdlocalframes)
|
Maria@4
|
171 mfcc = pd.DataFrame(mfcc.T)
|
Maria@4
|
172 return mfcc
|
Maria@4
|
173
|
Maria@4
|
174
|
Maria@35
|
175 def get_ave_chroma(self, chroma, alignchroma=True, avelocalframes=True, stdlocalframes=True):
|
Maria@4
|
176 chroma[np.where(np.isnan(chroma))] = 0
|
Maria@4
|
177 if alignchroma:
|
Maria@35
|
178 maxind = np.argmax(np.sum(chroma, axis=1)) # bin with max magnitude across time
|
Maria@4
|
179 chroma = np.roll(chroma, -maxind, axis=0)
|
Maria@4
|
180 if avelocalframes:
|
Maria@4
|
181 chroma = self.average_local_frames(chroma, getstd=stdlocalframes)
|
Maria@4
|
182 chroma = pd.DataFrame(chroma.T)
|
Maria@4
|
183 return chroma
|
Maria@4
|
184
|
Maria@4
|
185
|
Maria@4
|
186 def average_local_frames(self, frames, getstd=False):
|
Maria@4
|
187 nbins, norigframes = frames.shape
|
Maria@4
|
188 if norigframes<self.win2:
|
Maria@4
|
189 nframes = 1
|
Maria@4
|
190 else:
|
Maria@4
|
191 nframes = int(1+np.floor((norigframes-self.win2)/float(self.hop2)))
|
Maria@4
|
192 if getstd:
|
Maria@4
|
193 aveframes = np.empty((nbins+nbins, nframes))
|
Maria@4
|
194 for i in range(nframes): # loop over all 8-sec frames
|
Maria@4
|
195 meanf = np.nanmean(frames[:, (i*self.hop2):min((i*self.hop2+self.win2),norigframes)], axis=1)
|
Maria@4
|
196 stdf = np.nanstd(frames[:, (i*self.hop2):min((i*self.hop2+self.win2),norigframes)], axis=1)
|
Maria@4
|
197 aveframes[:,i] = np.concatenate((meanf,stdf))
|
Maria@4
|
198 else:
|
Maria@4
|
199 aveframes = np.empty((nbins, nframes))
|
Maria@4
|
200 for i in range(nframes): # loop over all 8-sec frames
|
Maria@4
|
201 aveframes[:,i] = np.nanmean(frames[:, (i*self.hop2):min((i*self.hop2+self.win2),norigframes)], axis=1)
|
Maria@4
|
202 return aveframes
|
Maria@4
|
203
|
Maria@4
|
204
|
Maria@4
|
205 def mean_K_bands(self, songframes, K=40, nmels=40):
|
Maria@4
|
206 [F, P] = songframes.shape
|
Maria@4
|
207 Pproc = int((P/nmels)*K)
|
Maria@4
|
208 procframes = np.zeros([F, Pproc])
|
Maria@4
|
209 niters = int(nmels/K)
|
Maria@4
|
210 nbins = P/nmels # must be 200 bins
|
Maria@4
|
211 for k in range(K):
|
Maria@4
|
212 for j in range(k*niters, (k+1)*niters):
|
Maria@4
|
213 procframes[:, (k*nbins):((k+1)*nbins)] += songframes[:, (j*nbins):((j+1)*nbins)]
|
Maria@4
|
214 procframes /= float(niters)
|
Maria@4
|
215 return procframes
|
Maria@4
|
216
|
Maria@4
|
217
|
Maria@4
|
218 def nmfpitchbihist(self, frames):
|
Maria@4
|
219 nbins, nfr = frames.shape
|
Maria@4
|
220 npc = 2
|
Maria@4
|
221 nb = int(np.sqrt(nbins)) # assume structure of pitch bihist is nbins*nbins
|
Maria@4
|
222 newframes = np.empty(((nb+nb)*npc, nfr))
|
Maria@4
|
223 for fr in range(nfr):
|
Maria@4
|
224 pb = np.reshape(frames[:, fr], (nb, nb))
|
Maria@4
|
225 try:
|
Maria@4
|
226 nmfmodel = NMF(n_components=npc).fit(pb)
|
Maria@4
|
227 W = nmfmodel.transform(pb)
|
Maria@4
|
228 H = nmfmodel.components_.T
|
Maria@4
|
229 newframes[:, fr, None] = np.concatenate((W, H)).flatten()[:, None]
|
Maria@4
|
230 except:
|
Maria@4
|
231 newframes[:, fr, None] = np.zeros(((nb+nb)*npc, 1))
|
Maria@4
|
232 return newframes
|
Maria@4
|
233
|
Maria@4
|
234
|
Maria@35
|
235 def get_pb_for_file(self, melodia_file, precomp_melody=False, nmfpb=True, scale=True, stop_sec=30.0):
|
Maria@35
|
236 pbihist = []
|
Maria@35
|
237 if precomp_melody:
|
Maria@35
|
238 pbihist = self.load_precomp_pb_from_melodia(melodia_file=melodia_file, stop_sec=stop_sec)
|
Maria@35
|
239 else:
|
Maria@35
|
240 pbihist = self.extract_pb_from_melodia(melodia_file=melodia_file, stop_sec=stop_sec)
|
Maria@35
|
241 if len(pbihist) == 0:
|
Maria@35
|
242 # no file was found
|
Maria@35
|
243 return pbihist
|
Maria@4
|
244 if nmfpb is True:
|
Maria@4
|
245 pbihist = self.nmfpitchbihist(pbihist)
|
Maria@4
|
246 pbihist = pd.DataFrame(pbihist.T)
|
Maria@4
|
247 if scale:
|
Maria@4
|
248 # scale all frames by mean and std of recording
|
Maria@4
|
249 pbihist = (pbihist - np.nanmean(pbihist)) / np.nanstd(pbihist)
|
Maria@4
|
250 return pbihist
|
Maria@4
|
251
|
Maria@4
|
252
|
Maria@35
|
253 def extract_pb_from_melodia(self, melodia_file=None, stop_sec=30.0):
|
Maria@35
|
254 pbihist = []
|
Maria@35
|
255 if not os.path.exists(melodia_file):
|
Maria@35
|
256 return pbihist
|
Maria@35
|
257 print 'extracting pitch bihist from melodia...'
|
Maria@35
|
258 pb = pbi.PitchBihist(win2sec=self.win2sec)
|
Maria@35
|
259 pbihist = pb.bihist_from_melodia(filename=melodia_file, stop_sec=stop_sec)
|
Maria@35
|
260 return pbihist
|
Maria@35
|
261
|
Maria@35
|
262
|
Maria@35
|
263 def load_precomp_pb_from_melodia(self, melodia_file=None, stop_sec=30.0):
|
Maria@35
|
264 pbihist = []
|
Maria@4
|
265 base = os.path.basename(melodia_file)
|
Maria@4
|
266 root = '/import/c4dm-05/mariap/Melodia-melody-'+str(int(self.win2sec))+'sec/'
|
m@24
|
267 root_BL = '/import/c4dm-04/mariap/FeatureCsvs_BL_old/PB-melodia/'
|
m@24
|
268 root_SM = '/import/c4dm-04/mariap/FeatureCsvs/PB-melodia/'
|
m@24
|
269 if 'SampleAudio' in base:
|
m@24
|
270 root = root_SM
|
m@24
|
271 else:
|
m@24
|
272 root = root_BL
|
Maria@35
|
273 base = base.split('_')[-1].split('.csv')[0]+'_vamp_mtg-melodia_melodia_melody.csv'
|
Maria@4
|
274 print 'load precomputed pitch bihist', root
|
Maria@35
|
275 pbihist = np.loadtxt(os.path.join(root, base), delimiter=',').T
|
Maria@35
|
276 n_stop = np.int(np.ceil(stop_sec * self.framessr2))
|
Maria@35
|
277 pbihist = pbihist[:, :np.min([pbihist.shape[0], n_stop])]
|
Maria@4
|
278 return pbihist
|
Maria@35
|
279
|
Maria@35
|
280
|
Maria@35
|
281 # def get_pb_from_melodia(self, melodia_file=None, nmfpb=True, scale=True, stop_sec=30.0):
|
Maria@35
|
282 # if not os.path.exists(melodia_file):
|
Maria@35
|
283 # return []
|
Maria@35
|
284 # print 'extracting pitch bihist from melodia...'
|
Maria@35
|
285 # pb = pbi.PitchBihist(win2sec=self.win2sec)
|
Maria@35
|
286 # pbihist = pb.bihist_from_melodia(filename=melodia_file, stop_sec=stop_sec)
|
Maria@35
|
287 # if nmfpb is True:
|
Maria@35
|
288 # pbihist = self.nmfpitchbihist(pbihist)
|
Maria@35
|
289 # pbihist = pd.DataFrame(pbihist.T)
|
Maria@35
|
290 # if scale:
|
Maria@35
|
291 # # scale all frames by mean and std of recording
|
Maria@35
|
292 # pbihist = (pbihist - np.nanmean(pbihist)) / np.nanstd(pbihist)
|
Maria@35
|
293 # return pbihist
|
Maria@35
|
294
|
Maria@35
|
295
|
Maria@35
|
296 # def load_precomputed_pb_from_melodia(self, melodia_file=None, nmfpb=True, scale=True, stop_sec=30.0):
|
Maria@35
|
297 # base = os.path.basename(melodia_file)
|
Maria@35
|
298 # root = '/import/c4dm-05/mariap/Melodia-melody-'+str(int(self.win2sec))+'sec/'
|
Maria@35
|
299 # root_BL = '/import/c4dm-04/mariap/FeatureCsvs_BL_old/PB-melodia/'
|
Maria@35
|
300 # root_SM = '/import/c4dm-04/mariap/FeatureCsvs/PB-melodia/'
|
Maria@35
|
301 # if 'SampleAudio' in base:
|
Maria@35
|
302 # root = root_SM
|
Maria@35
|
303 # else:
|
Maria@35
|
304 # root = root_BL
|
Maria@35
|
305 # base = base.split('_')[-1].split('.csv')[0]+'_vamp_mtg-melodia_melodia_melody.csv'
|
Maria@35
|
306 # print 'load precomputed pitch bihist', root
|
Maria@35
|
307 # #if self.win2sec == 8:
|
Maria@35
|
308 # # pbihist = pd.read_csv(os.path.join(root, base))
|
Maria@35
|
309 # #else:
|
Maria@35
|
310 # if 1:
|
Maria@35
|
311 # pbihist = np.loadtxt(os.path.join(root, base), delimiter=',').T
|
Maria@35
|
312 # if nmfpb is True:
|
Maria@35
|
313 # pbihist = self.nmfpitchbihist(pbihist)
|
Maria@35
|
314 # pbihist = pd.DataFrame(pbihist.T)
|
Maria@35
|
315 # n_stop = np.int(np.ceil(stop_sec * self.framessr2))
|
Maria@35
|
316 # pbihist = pbihist.iloc[:np.min([pbihist.shape[0], n_stop]), :]
|
Maria@35
|
317 # print pbihist.shape
|
Maria@35
|
318 # if scale:
|
Maria@35
|
319 # # scale all frames by mean and std of recording
|
Maria@35
|
320 # pbihist = (pbihist - np.nanmean(pbihist)) / np.nanstd(pbihist)
|
Maria@35
|
321 # return pbihist
|
Maria@4
|
322
|