comparison scripts/load_features.py @ 4:e50c63cf96be branch-tests

rearranging folders
author Maria Panteli
date Mon, 11 Sep 2017 11:51:50 +0100
parents
children a35bd818d8e9
comparison
equal deleted inserted replaced
3:230a0cf17de0 4:e50c63cf96be
1 # -*- coding: utf-8 -*-
2 """
3 Created on Thu Mar 16 01:50:57 2017
4
5 @author: mariapanteli
6 """
7
8 import numpy as np
9 import pandas as pd
10 import os
11 from sklearn.decomposition import NMF
12 import OPMellin as opm
13 import MFCC as mfc
14 import PitchBihist as pbi
15
16
17 class FeatureLoader:
18 def __init__(self, win2sec=8):
19 self.win2sec = float(win2sec)
20 self.sr = 44100.
21 self.win1 = int(round(0.04*self.sr))
22 self.hop1 = int(round(self.win1/8.))
23 self.framessr = self.sr/float(self.hop1)
24 self.win2 = int(round(self.win2sec*self.framessr))
25 self.hop2 = int(round(0.5*self.framessr))
26 self.framessr2 = self.framessr/float(self.hop2)
27
28
29 def get_op_mfcc_for_file(self, melspec_file=None, scale=True):
30 op = []
31 mfc = []
32 if not os.path.exists(melspec_file):
33 return op, mfc
34 print 'extracting onset patterns and mfccs...'
35 songframes = pd.read_csv(melspec_file, engine="c", header=None)
36 songframes.iloc[np.where(np.isnan(songframes))] = 0
37 songframes = songframes.iloc[0:min(len(songframes), 18000), :] # only first 1.5 minutes
38 melspec = songframes.get_values().T
39 op = self.get_op_from_melspec(melspec, K=2)
40 mfc = self.get_mfcc_from_melspec(melspec)
41 if scale:
42 # scale all frames by mean and std of recording
43 op = (op - np.nanmean(op)) / np.nanstd(op)
44 mfc = (mfc - np.nanmean(mfc)) / np.nanstd(mfc)
45 return op, mfc
46
47
48 def get_chroma_for_file(self, chroma_file=None, scale=True):
49 ch = []
50 if not os.path.exists(chroma_file):
51 return ch
52 print 'extracting chroma...'
53 songframes = pd.read_csv(chroma_file, engine="c", header=None)
54 songframes.iloc[np.where(np.isnan(songframes))] = 0
55 songframes = songframes.iloc[0:min(len(songframes), 18000), :] # only first 1.5 minutes
56 chroma = songframes.get_values().T
57 ch = self.get_ave_chroma(chroma)
58 if scale:
59 # scale all frames by mean and std of recording
60 ch = (ch - np.nanmean(ch)) / np.nanstd(ch)
61 return ch
62
63
64 def get_music_idx_from_bounds(self, bounds, sr=None):
65 music_idx = []
66 if len(bounds) == 0:
67 # bounds is empty list
68 return music_idx
69 nbounds = bounds.shape[0]
70 if len(np.where(bounds[:,2]=='m')[0])==0:
71 # no music segments
72 return music_idx
73 elif len(np.where(bounds[:,2]=='s')[0])==nbounds:
74 # all segments are speech
75 return music_idx
76 else:
77 half_win_hop = int(round(0.5 * self.win2 / float(self.hop2)))
78 music_bounds = np.where(bounds[:, 2] == 'm')[0]
79 bounds_in_frames = np.round(np.array(bounds[:, 0], dtype=float) * sr)
80 duration_in_frames = np.round(np.array(bounds[:, 1], dtype=float) * sr)
81 for music_bound in music_bounds:
82 lower_bound = np.max([0, bounds_in_frames[music_bound] - half_win_hop])
83 upper_bound = lower_bound + duration_in_frames[music_bound] - half_win_hop
84 music_idx.append(np.arange(lower_bound, upper_bound, dtype=int))
85 if len(music_idx)>0:
86 music_idx = np.sort(np.concatenate(music_idx)) # it should be sorted, but just in case segments overlap -- remove duplicates if segments overlap
87 return music_idx
88
89
90 def get_music_idx_for_file(self, segmenter_file=None):
91 music_idx = []
92 if os.path.exists(segmenter_file) and os.path.getsize(segmenter_file)>0:
93 print 'loading speech/music segments...'
94 bounds = pd.read_csv(segmenter_file, header=None, delimiter='\t').get_values()
95 if bounds.shape[1] == 1: # depends on the computer platform
96 bounds = pd.read_csv(segmenter_file, header=None, delimiter=',').get_values()
97 music_idx = self.get_music_idx_from_bounds(bounds, sr=self.framessr2)
98 return music_idx
99
100
101 def get_features(self, df, class_label='Country'):
102 oplist = []
103 mflist = []
104 chlist = []
105 pblist = []
106 clabels = []
107 aulabels = []
108 n_files = len(df)
109 for i in range(n_files):
110 if not (os.path.exists(df['Melspec'].iloc[i]) and os.path.exists(df['Chroma'].iloc[i]) and os.path.exists(df['Melodia'].iloc[i])):
111 continue
112 print 'file ' + str(i) + ' of ' + str(n_files)
113 music_idx = self.get_music_idx_for_file(df['Speech'].iloc[i])
114 if len(music_idx)==0:
115 # no music segments -> skip this file
116 continue
117 try:
118 op, mfcc = self.get_op_mfcc_for_file(df['Melspec'].iloc[i])
119 ch = self.get_chroma_for_file(df['Chroma'].iloc[i])
120 #pb = self.get_pb_from_melodia(df['Melodia'].iloc[i])
121 pb = self.load_precomputed_pb_from_melodia(df['Melodia'].iloc[i])
122 #pb = self.get_contour_feat_from_melodia(df['Melodia'].iloc[i])
123 except:
124 continue
125 min_n_frames = np.min([len(op), len(mfcc), len(ch), len(pb)]) # ideally, features should have the same number of frames
126 if min_n_frames==0:
127 # no features extracted -> skip this file
128 continue
129 music_idx = music_idx[music_idx<min_n_frames]
130 n_frames = len(music_idx)
131 oplist.append(op.iloc[music_idx, :])
132 mflist.append(mfcc.iloc[music_idx, :])
133 chlist.append(ch.iloc[music_idx, :])
134 pblist.append(pb.iloc[music_idx, :])
135 clabels.append(pd.DataFrame(np.repeat(df[class_label].iloc[i], n_frames)))
136 aulabels.append(pd.DataFrame(np.repeat(df['Audio'].iloc[i], n_frames)))
137 print len(oplist), len(mflist), len(chlist), len(pblist), len(clabels), len(aulabels)
138 return pd.concat(oplist), pd.concat(mflist), pd.concat(chlist), pd.concat(pblist), pd.concat(clabels), pd.concat(aulabels)
139
140
141 def get_op_from_melspec(self, melspec, K=None):
142 op = opm.OPMellin(win2sec=self.win2sec)
143 opmellin = op.get_opmellin_from_melspec(melspec=melspec, melsr=self.framessr)
144 if K is not None:
145 opmel = self.mean_K_bands(opmellin.T, K)
146 opmel = pd.DataFrame(opmel)
147 return opmel
148
149
150 def get_mfcc_from_melspec(self, melspec, deltamfcc=True, avelocalframes=True, stdlocalframes=True):
151 mf = mfc.MFCCs()
152 mfcc = mf.get_mfccs_from_melspec(melspec=melspec, melsr=self.framessr)
153 if deltamfcc:
154 ff = mfcc
155 ffdiff = np.diff(ff, axis=1)
156 ffdelta = np.concatenate((ffdiff, ffdiff[:,-1,None]), axis=1)
157 frames = np.concatenate([ff,ffdelta], axis=0)
158 mfcc = frames
159 if avelocalframes:
160 mfcc = self.average_local_frames(mfcc, getstd=stdlocalframes)
161 mfcc = pd.DataFrame(mfcc.T)
162 return mfcc
163
164
165 def get_ave_chroma(self, chroma, avelocalframes=True, stdlocalframes=True, alignchroma=True):
166 chroma[np.where(np.isnan(chroma))] = 0
167 if alignchroma:
168 maxind = np.argmax(np.sum(chroma, axis=1))
169 chroma = np.roll(chroma, -maxind, axis=0)
170 if avelocalframes:
171 chroma = self.average_local_frames(chroma, getstd=stdlocalframes)
172 chroma = pd.DataFrame(chroma.T)
173 return chroma
174
175
176 def average_local_frames(self, frames, getstd=False):
177 nbins, norigframes = frames.shape
178 if norigframes<self.win2:
179 nframes = 1
180 else:
181 nframes = int(1+np.floor((norigframes-self.win2)/float(self.hop2)))
182 if getstd:
183 aveframes = np.empty((nbins+nbins, nframes))
184 for i in range(nframes): # loop over all 8-sec frames
185 meanf = np.nanmean(frames[:, (i*self.hop2):min((i*self.hop2+self.win2),norigframes)], axis=1)
186 stdf = np.nanstd(frames[:, (i*self.hop2):min((i*self.hop2+self.win2),norigframes)], axis=1)
187 aveframes[:,i] = np.concatenate((meanf,stdf))
188 else:
189 aveframes = np.empty((nbins, nframes))
190 for i in range(nframes): # loop over all 8-sec frames
191 aveframes[:,i] = np.nanmean(frames[:, (i*self.hop2):min((i*self.hop2+self.win2),norigframes)], axis=1)
192 return aveframes
193
194
195 def mean_K_bands(self, songframes, K=40, nmels=40):
196 [F, P] = songframes.shape
197 Pproc = int((P/nmels)*K)
198 procframes = np.zeros([F, Pproc])
199 niters = int(nmels/K)
200 nbins = P/nmels # must be 200 bins
201 for k in range(K):
202 for j in range(k*niters, (k+1)*niters):
203 procframes[:, (k*nbins):((k+1)*nbins)] += songframes[:, (j*nbins):((j+1)*nbins)]
204 procframes /= float(niters)
205 return procframes
206
207
208 def nmfpitchbihist(self, frames):
209 nbins, nfr = frames.shape
210 npc = 2
211 nb = int(np.sqrt(nbins)) # assume structure of pitch bihist is nbins*nbins
212 newframes = np.empty(((nb+nb)*npc, nfr))
213 for fr in range(nfr):
214 pb = np.reshape(frames[:, fr], (nb, nb))
215 try:
216 nmfmodel = NMF(n_components=npc).fit(pb)
217 W = nmfmodel.transform(pb)
218 H = nmfmodel.components_.T
219 newframes[:, fr, None] = np.concatenate((W, H)).flatten()[:, None]
220 except:
221 newframes[:, fr, None] = np.zeros(((nb+nb)*npc, 1))
222 return newframes
223
224
225 def get_pb_from_melodia(self, melodia_file=None, nmfpb=True, scale=True):
226 pb = []
227 if not os.path.exists(melodia_file):
228 return pb
229 print 'extracting pitch bihist from melodia...'
230 pb = pbi.PitchBihist(win2sec=self.win2sec)
231 pbihist = pb.bihist_from_melodia(filename=melodia_file, stop_sec=90.0)
232 if nmfpb is True:
233 pbihist = self.nmfpitchbihist(pbihist)
234 pbihist = pd.DataFrame(pbihist.T)
235 if scale:
236 # scale all frames by mean and std of recording
237 pbihist = (pbihist - np.nanmean(pbihist)) / np.nanstd(pbihist)
238 return pbihist
239
240
241 def load_precomputed_pb_from_melodia(self, melodia_file=None, nmfpb=True, scale=True):
242 base = os.path.basename(melodia_file)
243 root = '/import/c4dm-05/mariap/Melodia-melody-'+str(int(self.win2sec))+'sec/'
244 print 'load precomputed pitch bihist', root
245 if self.win2sec == 8:
246 pbihist = pd.read_csv(os.path.join(root, base)).iloc[1:,1:]
247 else:
248 pbihist = np.loadtxt(os.path.join(root, base), delimiter=',').T
249 if nmfpb is True:
250 pbihist = self.nmfpitchbihist(pbihist)
251 pbihist = pd.DataFrame(pbihist.T)
252 print pbihist.shape
253 if scale:
254 # scale all frames by mean and std of recording
255 pbihist = (pbihist - np.nanmean(pbihist)) / np.nanstd(pbihist)
256 return pbihist
257
258
259 # def get_pb_chroma_for_file(self, chroma_file=None, scale=True):
260 # ch = []
261 # pb = []
262 # if not os.path.exists(chroma_file):
263 # return ch, pb
264 # songframes = pd.read_csv(chroma_file, engine="c", header=None)
265 # songframes.iloc[np.where(np.isnan(songframes))] = 0
266 # songframes = songframes.iloc[0:min(len(songframes), 18000), :] # only first 1.5 minutes
267 # chroma = songframes.get_values().T
268 # ch = self.get_ave_chroma(chroma)
269 # pb = self.get_pbihist_from_chroma(chroma)
270 # if scale:
271 # # scale all frames by mean and std of recording
272 # ch = (ch - np.nanmean(ch)) / np.nanstd(ch)
273 # pb = (pb - np.nanmean(pb)) / np.nanstd(pb)
274 # return ch, pb
275
276
277 # def get_pbihist_from_chroma(self, chroma, alignchroma=True, nmfpb=True):
278 # pb = pbi.PitchBihist(win2sec=self.win2sec)
279 # chroma[np.where(np.isnan(chroma))] = 0
280 # if alignchroma:
281 # maxind = np.argmax(np.sum(chroma, axis=1))
282 # chroma = np.roll(chroma, -maxind, axis=0)
283 # pbihist = pb.get_pitchbihist_from_chroma(chroma=chroma, chromasr=self.framessr)
284 # if nmfpb is True:
285 # pbihist = self.nmfpitchbihist(pbihist)
286 # pbihist = pd.DataFrame(pbihist.T)
287 # return pbihist