plosone_underreview: scripts/load_features.py comparison

comparison scripts/load_features.py @ 26:29b5ee381305 branch-tests

notebooks rerunning, and changes in load features for melodia

author	mpanteli <m.x.panteli@gmail.com>
date	Wed, 13 Sep 2017 17:33:48 +0100
parents	8dea2b8349c5
children	e4736064d282

comparison

equal deleted inserted replaced

-:8dea2b8349c5
+:29b5ee381305
 self.win2 = int(round(self.win2sec*self.framessr))
 self.hop2 = int(round(0.5*self.framessr))
 self.framessr2 = self.framessr/float(self.hop2)
-def get_op_mfcc_for_file(self, melspec_file=None, scale=True):
+def get_op_mfcc_for_file(self, melspec_file=None, scale=True, stop_sec=30.0):
 op = []
 mfc = []
 if not os.path.exists(melspec_file):
 return op, mfc
 print 'extracting onset patterns and mfccs...'
 songframes = pd.read_csv(melspec_file, engine="c", header=None)
 songframes.iloc[np.where(np.isnan(songframes))] = 0
-songframes = songframes.iloc[0:min(len(songframes), 18000), :]  # only first 1.5 minutes
+	n_stop = np.int(np.ceil(stop_sec * self.framessr))
+songframes = songframes.iloc[0:min(len(songframes), n_stop), :]
 melspec = songframes.get_values().T
 op = self.get_op_from_melspec(melspec, K=2)
 mfc = self.get_mfcc_from_melspec(melspec)
 if scale:
 # scale all frames by mean and std of recording
 op = (op - np.nanmean(op)) / np.nanstd(op)
 mfc = (mfc - np.nanmean(mfc)) / np.nanstd(mfc)
 return op, mfc
-def get_chroma_for_file(self, chroma_file=None, scale=True):
+def get_chroma_for_file(self, chroma_file=None, scale=True, stop_sec=30.0):
 ch = []
 if not os.path.exists(chroma_file):
 return ch
 print 'extracting chroma...'
 songframes = pd.read_csv(chroma_file, engine="c", header=None)
 songframes.iloc[np.where(np.isnan(songframes))] = 0
-songframes = songframes.iloc[0:min(len(songframes), 18000), :]  # only first 1.5 minutes
+n_stop = np.int(np.ceil(stop_sec * self.framessr))
+	songframes = songframes.iloc[0:min(len(songframes), n_stop), :]
 chroma = songframes.get_values().T
 ch = self.get_ave_chroma(chroma)
 if scale:
 # scale all frames by mean and std of recording
 ch = (ch - np.nanmean(ch)) / np.nanstd(ch)
 bounds = pd.read_csv(segmenter_file, header=None, delimiter=',').get_values()
 music_idx = self.get_music_idx_from_bounds(bounds, sr=self.framessr2)
 return music_idx
-def get_features(self, df, class_label='Country'):
+def get_features(self, df, stop_sec=30.0, class_label='Country'):
 oplist = []
 mflist = []
 chlist = []
 pblist = []
 clabels = []
 print 'file ' + str(i) + ' of ' + str(n_files)
 music_idx = self.get_music_idx_for_file(df['Speech'].iloc[i])
 if len(music_idx)==0:
 # no music segments -> skip this file
 continue
-try:
+if 1:
-op, mfcc = self.get_op_mfcc_for_file(df['Melspec'].iloc[i])
+op, mfcc = self.get_op_mfcc_for_file(df['Melspec'].iloc[i], stop_sec=stop_sec)
-ch = self.get_chroma_for_file(df['Chroma'].iloc[i])
+ch = self.get_chroma_for_file(df['Chroma'].iloc[i], stop_sec=stop_sec)
-pb = self.get_pb_from_melodia(df['Melodia'].iloc[i])
+#pb = self.get_pb_from_melodia(df['Melodia'].iloc[i], stop_sec=stop_sec)
-#pb = self.load_precomputed_pb_from_melodia(df['Melodia'].iloc[i])
+pb = self.load_precomputed_pb_from_melodia(df['Melodia'].iloc[i], stop_sec=stop_sec)
 #pb = self.get_contour_feat_from_melodia(df['Melodia'].iloc[i])
-except:
+else:
 continue
-min_n_frames = np.min([len(op), len(mfcc), len(ch), len(pb)])  # ideally, features should have the same number of frames
+n_stop = np.int(np.ceil(stop_sec * self.framessr2))
+	    print n_stop, len(op), len(mfcc), len(ch), len(pb)
+	    min_n_frames = np.min([n_stop, len(op), len(mfcc), len(ch), len(pb)])  # ideally, features should have the same number of frames
 if min_n_frames==0:
 # no features extracted -> skip this file
 continue
 music_idx = music_idx[music_idx<min_n_frames]
 n_frames = len(music_idx)
 except:
 newframes[:, fr, None] = np.zeros(((nb+nb)*npc, 1))
 return newframes
-def get_pb_from_melodia(self, melodia_file=None, nmfpb=True, scale=True):
+def get_pb_from_melodia(self, melodia_file=None, nmfpb=True, scale=True, stop_sec=30.0):
 pb = []
 if not os.path.exists(melodia_file):
 return pb
 print 'extracting pitch bihist from melodia...'
 pb = pbi.PitchBihist(win2sec=self.win2sec)
-pbihist = pb.bihist_from_melodia(filename=melodia_file, stop_sec=90.0)
+pbihist = pb.bihist_from_melodia(filename=melodia_file, stop_sec=stop_sec)
 if nmfpb is True:
 pbihist = self.nmfpitchbihist(pbihist)
 pbihist = pd.DataFrame(pbihist.T)
 if scale:
 # scale all frames by mean and std of recording
 pbihist = (pbihist - np.nanmean(pbihist)) / np.nanstd(pbihist)
 return pbihist
-def load_precomputed_pb_from_melodia(self, melodia_file=None, nmfpb=True, scale=True):
+def load_precomputed_pb_from_melodia(self, melodia_file=None, nmfpb=True, scale=True, stop_sec=30.0):
 base = os.path.basename(melodia_file)
 root = '/import/c4dm-05/mariap/Melodia-melody-'+str(int(self.win2sec))+'sec/'
 root_BL = '/import/c4dm-04/mariap/FeatureCsvs_BL_old/PB-melodia/'
 root_SM = '/import/c4dm-04/mariap/FeatureCsvs/PB-melodia/'
 if 'SampleAudio' in base:
 root = root_SM
 else:
 root = root_BL
+	    base = base.split('_')[-1].split('.csv')[0]+'_vamp_mtg-melodia_melodia_melody.csv'
 print 'load precomputed pitch bihist', root
-if self.win2sec == 8:
+#if self.win2sec == 8:
-pbihist = pd.read_csv(os.path.join(root, base)).iloc[1:,1:]
+#    pbihist = pd.read_csv(os.path.join(root, base))
-else:
+#else:
+	if 1:
 pbihist = np.loadtxt(os.path.join(root, base), delimiter=',').T
 if nmfpb is True:
 pbihist = self.nmfpitchbihist(pbihist)
 pbihist = pd.DataFrame(pbihist.T)
+	n_stop = np.int(np.ceil(stop_sec * self.framessr2))
+	pbihist = pbihist.iloc[:np.min([pbihist.shape[0], n_stop]), :]
 print pbihist.shape
 if scale:
 # scale all frames by mean and std of recording
 pbihist = (pbihist - np.nanmean(pbihist)) / np.nanstd(pbihist)
 return pbihist

Mercurial > hg > plosone_underreview

comparison scripts/load_features.py @ 26:29b5ee381305 branch-tests