Mercurial > hg > plosone_underreview
diff scripts/load_features.py @ 26:29b5ee381305 branch-tests
notebooks rerunning, and changes in load features for melodia
author | mpanteli <m.x.panteli@gmail.com> |
---|---|
date | Wed, 13 Sep 2017 17:33:48 +0100 |
parents | 8dea2b8349c5 |
children | e4736064d282 |
line wrap: on
line diff
--- a/scripts/load_features.py Wed Sep 13 13:53:09 2017 +0100 +++ b/scripts/load_features.py Wed Sep 13 17:33:48 2017 +0100 @@ -26,7 +26,7 @@ self.framessr2 = self.framessr/float(self.hop2) - def get_op_mfcc_for_file(self, melspec_file=None, scale=True): + def get_op_mfcc_for_file(self, melspec_file=None, scale=True, stop_sec=30.0): op = [] mfc = [] if not os.path.exists(melspec_file): @@ -34,7 +34,8 @@ print 'extracting onset patterns and mfccs...' songframes = pd.read_csv(melspec_file, engine="c", header=None) songframes.iloc[np.where(np.isnan(songframes))] = 0 - songframes = songframes.iloc[0:min(len(songframes), 18000), :] # only first 1.5 minutes + n_stop = np.int(np.ceil(stop_sec * self.framessr)) + songframes = songframes.iloc[0:min(len(songframes), n_stop), :] melspec = songframes.get_values().T op = self.get_op_from_melspec(melspec, K=2) mfc = self.get_mfcc_from_melspec(melspec) @@ -45,14 +46,15 @@ return op, mfc - def get_chroma_for_file(self, chroma_file=None, scale=True): + def get_chroma_for_file(self, chroma_file=None, scale=True, stop_sec=30.0): ch = [] if not os.path.exists(chroma_file): return ch print 'extracting chroma...' songframes = pd.read_csv(chroma_file, engine="c", header=None) songframes.iloc[np.where(np.isnan(songframes))] = 0 - songframes = songframes.iloc[0:min(len(songframes), 18000), :] # only first 1.5 minutes + n_stop = np.int(np.ceil(stop_sec * self.framessr)) + songframes = songframes.iloc[0:min(len(songframes), n_stop), :] chroma = songframes.get_values().T ch = self.get_ave_chroma(chroma) if scale: @@ -98,7 +100,7 @@ return music_idx - def get_features(self, df, class_label='Country'): + def get_features(self, df, stop_sec=30.0, class_label='Country'): oplist = [] mflist = [] chlist = [] @@ -114,15 +116,17 @@ if len(music_idx)==0: # no music segments -> skip this file continue - try: - op, mfcc = self.get_op_mfcc_for_file(df['Melspec'].iloc[i]) - ch = self.get_chroma_for_file(df['Chroma'].iloc[i]) - pb = self.get_pb_from_melodia(df['Melodia'].iloc[i]) - #pb = self.load_precomputed_pb_from_melodia(df['Melodia'].iloc[i]) + if 1: + op, mfcc = self.get_op_mfcc_for_file(df['Melspec'].iloc[i], stop_sec=stop_sec) + ch = self.get_chroma_for_file(df['Chroma'].iloc[i], stop_sec=stop_sec) + #pb = self.get_pb_from_melodia(df['Melodia'].iloc[i], stop_sec=stop_sec) + pb = self.load_precomputed_pb_from_melodia(df['Melodia'].iloc[i], stop_sec=stop_sec) #pb = self.get_contour_feat_from_melodia(df['Melodia'].iloc[i]) - except: + else: continue - min_n_frames = np.min([len(op), len(mfcc), len(ch), len(pb)]) # ideally, features should have the same number of frames + n_stop = np.int(np.ceil(stop_sec * self.framessr2)) + print n_stop, len(op), len(mfcc), len(ch), len(pb) + min_n_frames = np.min([n_stop, len(op), len(mfcc), len(ch), len(pb)]) # ideally, features should have the same number of frames if min_n_frames==0: # no features extracted -> skip this file continue @@ -222,13 +226,13 @@ return newframes - def get_pb_from_melodia(self, melodia_file=None, nmfpb=True, scale=True): + def get_pb_from_melodia(self, melodia_file=None, nmfpb=True, scale=True, stop_sec=30.0): pb = [] if not os.path.exists(melodia_file): return pb print 'extracting pitch bihist from melodia...' pb = pbi.PitchBihist(win2sec=self.win2sec) - pbihist = pb.bihist_from_melodia(filename=melodia_file, stop_sec=90.0) + pbihist = pb.bihist_from_melodia(filename=melodia_file, stop_sec=stop_sec) if nmfpb is True: pbihist = self.nmfpitchbihist(pbihist) pbihist = pd.DataFrame(pbihist.T) @@ -238,7 +242,7 @@ return pbihist - def load_precomputed_pb_from_melodia(self, melodia_file=None, nmfpb=True, scale=True): + def load_precomputed_pb_from_melodia(self, melodia_file=None, nmfpb=True, scale=True, stop_sec=30.0): base = os.path.basename(melodia_file) root = '/import/c4dm-05/mariap/Melodia-melody-'+str(int(self.win2sec))+'sec/' root_BL = '/import/c4dm-04/mariap/FeatureCsvs_BL_old/PB-melodia/' @@ -247,14 +251,18 @@ root = root_SM else: root = root_BL + base = base.split('_')[-1].split('.csv')[0]+'_vamp_mtg-melodia_melodia_melody.csv' print 'load precomputed pitch bihist', root - if self.win2sec == 8: - pbihist = pd.read_csv(os.path.join(root, base)).iloc[1:,1:] - else: + #if self.win2sec == 8: + # pbihist = pd.read_csv(os.path.join(root, base)) + #else: + if 1: pbihist = np.loadtxt(os.path.join(root, base), delimiter=',').T if nmfpb is True: pbihist = self.nmfpitchbihist(pbihist) pbihist = pd.DataFrame(pbihist.T) + n_stop = np.int(np.ceil(stop_sec * self.framessr2)) + pbihist = pbihist.iloc[:np.min([pbihist.shape[0], n_stop]), :] print pbihist.shape if scale: # scale all frames by mean and std of recording