Mercurial > hg > plosone_underreview
comparison scripts/load_features.py @ 4:e50c63cf96be branch-tests
rearranging folders
author | Maria Panteli |
---|---|
date | Mon, 11 Sep 2017 11:51:50 +0100 |
parents | |
children | a35bd818d8e9 |
comparison
equal
deleted
inserted
replaced
3:230a0cf17de0 | 4:e50c63cf96be |
---|---|
1 # -*- coding: utf-8 -*- | |
2 """ | |
3 Created on Thu Mar 16 01:50:57 2017 | |
4 | |
5 @author: mariapanteli | |
6 """ | |
7 | |
8 import numpy as np | |
9 import pandas as pd | |
10 import os | |
11 from sklearn.decomposition import NMF | |
12 import OPMellin as opm | |
13 import MFCC as mfc | |
14 import PitchBihist as pbi | |
15 | |
16 | |
17 class FeatureLoader: | |
18 def __init__(self, win2sec=8): | |
19 self.win2sec = float(win2sec) | |
20 self.sr = 44100. | |
21 self.win1 = int(round(0.04*self.sr)) | |
22 self.hop1 = int(round(self.win1/8.)) | |
23 self.framessr = self.sr/float(self.hop1) | |
24 self.win2 = int(round(self.win2sec*self.framessr)) | |
25 self.hop2 = int(round(0.5*self.framessr)) | |
26 self.framessr2 = self.framessr/float(self.hop2) | |
27 | |
28 | |
29 def get_op_mfcc_for_file(self, melspec_file=None, scale=True): | |
30 op = [] | |
31 mfc = [] | |
32 if not os.path.exists(melspec_file): | |
33 return op, mfc | |
34 print 'extracting onset patterns and mfccs...' | |
35 songframes = pd.read_csv(melspec_file, engine="c", header=None) | |
36 songframes.iloc[np.where(np.isnan(songframes))] = 0 | |
37 songframes = songframes.iloc[0:min(len(songframes), 18000), :] # only first 1.5 minutes | |
38 melspec = songframes.get_values().T | |
39 op = self.get_op_from_melspec(melspec, K=2) | |
40 mfc = self.get_mfcc_from_melspec(melspec) | |
41 if scale: | |
42 # scale all frames by mean and std of recording | |
43 op = (op - np.nanmean(op)) / np.nanstd(op) | |
44 mfc = (mfc - np.nanmean(mfc)) / np.nanstd(mfc) | |
45 return op, mfc | |
46 | |
47 | |
48 def get_chroma_for_file(self, chroma_file=None, scale=True): | |
49 ch = [] | |
50 if not os.path.exists(chroma_file): | |
51 return ch | |
52 print 'extracting chroma...' | |
53 songframes = pd.read_csv(chroma_file, engine="c", header=None) | |
54 songframes.iloc[np.where(np.isnan(songframes))] = 0 | |
55 songframes = songframes.iloc[0:min(len(songframes), 18000), :] # only first 1.5 minutes | |
56 chroma = songframes.get_values().T | |
57 ch = self.get_ave_chroma(chroma) | |
58 if scale: | |
59 # scale all frames by mean and std of recording | |
60 ch = (ch - np.nanmean(ch)) / np.nanstd(ch) | |
61 return ch | |
62 | |
63 | |
64 def get_music_idx_from_bounds(self, bounds, sr=None): | |
65 music_idx = [] | |
66 if len(bounds) == 0: | |
67 # bounds is empty list | |
68 return music_idx | |
69 nbounds = bounds.shape[0] | |
70 if len(np.where(bounds[:,2]=='m')[0])==0: | |
71 # no music segments | |
72 return music_idx | |
73 elif len(np.where(bounds[:,2]=='s')[0])==nbounds: | |
74 # all segments are speech | |
75 return music_idx | |
76 else: | |
77 half_win_hop = int(round(0.5 * self.win2 / float(self.hop2))) | |
78 music_bounds = np.where(bounds[:, 2] == 'm')[0] | |
79 bounds_in_frames = np.round(np.array(bounds[:, 0], dtype=float) * sr) | |
80 duration_in_frames = np.round(np.array(bounds[:, 1], dtype=float) * sr) | |
81 for music_bound in music_bounds: | |
82 lower_bound = np.max([0, bounds_in_frames[music_bound] - half_win_hop]) | |
83 upper_bound = lower_bound + duration_in_frames[music_bound] - half_win_hop | |
84 music_idx.append(np.arange(lower_bound, upper_bound, dtype=int)) | |
85 if len(music_idx)>0: | |
86 music_idx = np.sort(np.concatenate(music_idx)) # it should be sorted, but just in case segments overlap -- remove duplicates if segments overlap | |
87 return music_idx | |
88 | |
89 | |
90 def get_music_idx_for_file(self, segmenter_file=None): | |
91 music_idx = [] | |
92 if os.path.exists(segmenter_file) and os.path.getsize(segmenter_file)>0: | |
93 print 'loading speech/music segments...' | |
94 bounds = pd.read_csv(segmenter_file, header=None, delimiter='\t').get_values() | |
95 if bounds.shape[1] == 1: # depends on the computer platform | |
96 bounds = pd.read_csv(segmenter_file, header=None, delimiter=',').get_values() | |
97 music_idx = self.get_music_idx_from_bounds(bounds, sr=self.framessr2) | |
98 return music_idx | |
99 | |
100 | |
101 def get_features(self, df, class_label='Country'): | |
102 oplist = [] | |
103 mflist = [] | |
104 chlist = [] | |
105 pblist = [] | |
106 clabels = [] | |
107 aulabels = [] | |
108 n_files = len(df) | |
109 for i in range(n_files): | |
110 if not (os.path.exists(df['Melspec'].iloc[i]) and os.path.exists(df['Chroma'].iloc[i]) and os.path.exists(df['Melodia'].iloc[i])): | |
111 continue | |
112 print 'file ' + str(i) + ' of ' + str(n_files) | |
113 music_idx = self.get_music_idx_for_file(df['Speech'].iloc[i]) | |
114 if len(music_idx)==0: | |
115 # no music segments -> skip this file | |
116 continue | |
117 try: | |
118 op, mfcc = self.get_op_mfcc_for_file(df['Melspec'].iloc[i]) | |
119 ch = self.get_chroma_for_file(df['Chroma'].iloc[i]) | |
120 #pb = self.get_pb_from_melodia(df['Melodia'].iloc[i]) | |
121 pb = self.load_precomputed_pb_from_melodia(df['Melodia'].iloc[i]) | |
122 #pb = self.get_contour_feat_from_melodia(df['Melodia'].iloc[i]) | |
123 except: | |
124 continue | |
125 min_n_frames = np.min([len(op), len(mfcc), len(ch), len(pb)]) # ideally, features should have the same number of frames | |
126 if min_n_frames==0: | |
127 # no features extracted -> skip this file | |
128 continue | |
129 music_idx = music_idx[music_idx<min_n_frames] | |
130 n_frames = len(music_idx) | |
131 oplist.append(op.iloc[music_idx, :]) | |
132 mflist.append(mfcc.iloc[music_idx, :]) | |
133 chlist.append(ch.iloc[music_idx, :]) | |
134 pblist.append(pb.iloc[music_idx, :]) | |
135 clabels.append(pd.DataFrame(np.repeat(df[class_label].iloc[i], n_frames))) | |
136 aulabels.append(pd.DataFrame(np.repeat(df['Audio'].iloc[i], n_frames))) | |
137 print len(oplist), len(mflist), len(chlist), len(pblist), len(clabels), len(aulabels) | |
138 return pd.concat(oplist), pd.concat(mflist), pd.concat(chlist), pd.concat(pblist), pd.concat(clabels), pd.concat(aulabels) | |
139 | |
140 | |
141 def get_op_from_melspec(self, melspec, K=None): | |
142 op = opm.OPMellin(win2sec=self.win2sec) | |
143 opmellin = op.get_opmellin_from_melspec(melspec=melspec, melsr=self.framessr) | |
144 if K is not None: | |
145 opmel = self.mean_K_bands(opmellin.T, K) | |
146 opmel = pd.DataFrame(opmel) | |
147 return opmel | |
148 | |
149 | |
150 def get_mfcc_from_melspec(self, melspec, deltamfcc=True, avelocalframes=True, stdlocalframes=True): | |
151 mf = mfc.MFCCs() | |
152 mfcc = mf.get_mfccs_from_melspec(melspec=melspec, melsr=self.framessr) | |
153 if deltamfcc: | |
154 ff = mfcc | |
155 ffdiff = np.diff(ff, axis=1) | |
156 ffdelta = np.concatenate((ffdiff, ffdiff[:,-1,None]), axis=1) | |
157 frames = np.concatenate([ff,ffdelta], axis=0) | |
158 mfcc = frames | |
159 if avelocalframes: | |
160 mfcc = self.average_local_frames(mfcc, getstd=stdlocalframes) | |
161 mfcc = pd.DataFrame(mfcc.T) | |
162 return mfcc | |
163 | |
164 | |
165 def get_ave_chroma(self, chroma, avelocalframes=True, stdlocalframes=True, alignchroma=True): | |
166 chroma[np.where(np.isnan(chroma))] = 0 | |
167 if alignchroma: | |
168 maxind = np.argmax(np.sum(chroma, axis=1)) | |
169 chroma = np.roll(chroma, -maxind, axis=0) | |
170 if avelocalframes: | |
171 chroma = self.average_local_frames(chroma, getstd=stdlocalframes) | |
172 chroma = pd.DataFrame(chroma.T) | |
173 return chroma | |
174 | |
175 | |
176 def average_local_frames(self, frames, getstd=False): | |
177 nbins, norigframes = frames.shape | |
178 if norigframes<self.win2: | |
179 nframes = 1 | |
180 else: | |
181 nframes = int(1+np.floor((norigframes-self.win2)/float(self.hop2))) | |
182 if getstd: | |
183 aveframes = np.empty((nbins+nbins, nframes)) | |
184 for i in range(nframes): # loop over all 8-sec frames | |
185 meanf = np.nanmean(frames[:, (i*self.hop2):min((i*self.hop2+self.win2),norigframes)], axis=1) | |
186 stdf = np.nanstd(frames[:, (i*self.hop2):min((i*self.hop2+self.win2),norigframes)], axis=1) | |
187 aveframes[:,i] = np.concatenate((meanf,stdf)) | |
188 else: | |
189 aveframes = np.empty((nbins, nframes)) | |
190 for i in range(nframes): # loop over all 8-sec frames | |
191 aveframes[:,i] = np.nanmean(frames[:, (i*self.hop2):min((i*self.hop2+self.win2),norigframes)], axis=1) | |
192 return aveframes | |
193 | |
194 | |
195 def mean_K_bands(self, songframes, K=40, nmels=40): | |
196 [F, P] = songframes.shape | |
197 Pproc = int((P/nmels)*K) | |
198 procframes = np.zeros([F, Pproc]) | |
199 niters = int(nmels/K) | |
200 nbins = P/nmels # must be 200 bins | |
201 for k in range(K): | |
202 for j in range(k*niters, (k+1)*niters): | |
203 procframes[:, (k*nbins):((k+1)*nbins)] += songframes[:, (j*nbins):((j+1)*nbins)] | |
204 procframes /= float(niters) | |
205 return procframes | |
206 | |
207 | |
208 def nmfpitchbihist(self, frames): | |
209 nbins, nfr = frames.shape | |
210 npc = 2 | |
211 nb = int(np.sqrt(nbins)) # assume structure of pitch bihist is nbins*nbins | |
212 newframes = np.empty(((nb+nb)*npc, nfr)) | |
213 for fr in range(nfr): | |
214 pb = np.reshape(frames[:, fr], (nb, nb)) | |
215 try: | |
216 nmfmodel = NMF(n_components=npc).fit(pb) | |
217 W = nmfmodel.transform(pb) | |
218 H = nmfmodel.components_.T | |
219 newframes[:, fr, None] = np.concatenate((W, H)).flatten()[:, None] | |
220 except: | |
221 newframes[:, fr, None] = np.zeros(((nb+nb)*npc, 1)) | |
222 return newframes | |
223 | |
224 | |
225 def get_pb_from_melodia(self, melodia_file=None, nmfpb=True, scale=True): | |
226 pb = [] | |
227 if not os.path.exists(melodia_file): | |
228 return pb | |
229 print 'extracting pitch bihist from melodia...' | |
230 pb = pbi.PitchBihist(win2sec=self.win2sec) | |
231 pbihist = pb.bihist_from_melodia(filename=melodia_file, stop_sec=90.0) | |
232 if nmfpb is True: | |
233 pbihist = self.nmfpitchbihist(pbihist) | |
234 pbihist = pd.DataFrame(pbihist.T) | |
235 if scale: | |
236 # scale all frames by mean and std of recording | |
237 pbihist = (pbihist - np.nanmean(pbihist)) / np.nanstd(pbihist) | |
238 return pbihist | |
239 | |
240 | |
241 def load_precomputed_pb_from_melodia(self, melodia_file=None, nmfpb=True, scale=True): | |
242 base = os.path.basename(melodia_file) | |
243 root = '/import/c4dm-05/mariap/Melodia-melody-'+str(int(self.win2sec))+'sec/' | |
244 print 'load precomputed pitch bihist', root | |
245 if self.win2sec == 8: | |
246 pbihist = pd.read_csv(os.path.join(root, base)).iloc[1:,1:] | |
247 else: | |
248 pbihist = np.loadtxt(os.path.join(root, base), delimiter=',').T | |
249 if nmfpb is True: | |
250 pbihist = self.nmfpitchbihist(pbihist) | |
251 pbihist = pd.DataFrame(pbihist.T) | |
252 print pbihist.shape | |
253 if scale: | |
254 # scale all frames by mean and std of recording | |
255 pbihist = (pbihist - np.nanmean(pbihist)) / np.nanstd(pbihist) | |
256 return pbihist | |
257 | |
258 | |
259 # def get_pb_chroma_for_file(self, chroma_file=None, scale=True): | |
260 # ch = [] | |
261 # pb = [] | |
262 # if not os.path.exists(chroma_file): | |
263 # return ch, pb | |
264 # songframes = pd.read_csv(chroma_file, engine="c", header=None) | |
265 # songframes.iloc[np.where(np.isnan(songframes))] = 0 | |
266 # songframes = songframes.iloc[0:min(len(songframes), 18000), :] # only first 1.5 minutes | |
267 # chroma = songframes.get_values().T | |
268 # ch = self.get_ave_chroma(chroma) | |
269 # pb = self.get_pbihist_from_chroma(chroma) | |
270 # if scale: | |
271 # # scale all frames by mean and std of recording | |
272 # ch = (ch - np.nanmean(ch)) / np.nanstd(ch) | |
273 # pb = (pb - np.nanmean(pb)) / np.nanstd(pb) | |
274 # return ch, pb | |
275 | |
276 | |
277 # def get_pbihist_from_chroma(self, chroma, alignchroma=True, nmfpb=True): | |
278 # pb = pbi.PitchBihist(win2sec=self.win2sec) | |
279 # chroma[np.where(np.isnan(chroma))] = 0 | |
280 # if alignchroma: | |
281 # maxind = np.argmax(np.sum(chroma, axis=1)) | |
282 # chroma = np.roll(chroma, -maxind, axis=0) | |
283 # pbihist = pb.get_pitchbihist_from_chroma(chroma=chroma, chromasr=self.framessr) | |
284 # if nmfpb is True: | |
285 # pbihist = self.nmfpitchbihist(pbihist) | |
286 # pbihist = pd.DataFrame(pbihist.T) | |
287 # return pbihist |