Mercurial > hg > plosone_underreview
comparison scripts/load_features.py @ 43:b1d9ba5f888e branch-tests
debugging tests
author | Maria Panteli <m.x.panteli@gmail.com> |
---|---|
date | Fri, 15 Sep 2017 16:17:55 +0100 |
parents | 3b67cd634b9a |
children | 06e5711f9f62 |
comparison
equal
deleted
inserted
replaced
41:57f53b0d1eaa | 43:b1d9ba5f888e |
---|---|
101 bounds = pd.read_csv(segmenter_file, header=None, delimiter=',').get_values() | 101 bounds = pd.read_csv(segmenter_file, header=None, delimiter=',').get_values() |
102 music_idx = self.get_music_idx_from_bounds(bounds, sr=self.framessr2) | 102 music_idx = self.get_music_idx_from_bounds(bounds, sr=self.framessr2) |
103 return music_idx | 103 return music_idx |
104 | 104 |
105 | 105 |
106 def get_features(self, df, stop_sec=30.0, class_label='Country', precomp_melody=False): | 106 def get_features(self, df, stop_sec=30.0, class_label='Country', precomp_melody=True): |
107 oplist = [] | 107 oplist = [] |
108 mflist = [] | 108 mflist = [] |
109 chlist = [] | 109 chlist = [] |
110 pblist = [] | 110 pblist = [] |
111 clabels = [] | 111 clabels = [] |
114 for i in range(n_files): | 114 for i in range(n_files): |
115 if not (os.path.exists(df['Melspec'].iloc[i]) and os.path.exists(df['Chroma'].iloc[i]) and os.path.exists(df['Melodia'].iloc[i])): | 115 if not (os.path.exists(df['Melspec'].iloc[i]) and os.path.exists(df['Chroma'].iloc[i]) and os.path.exists(df['Melodia'].iloc[i])): |
116 continue | 116 continue |
117 print 'file ' + str(i) + ' of ' + str(n_files) | 117 print 'file ' + str(i) + ' of ' + str(n_files) |
118 music_idx = self.get_music_idx_for_file(df['Speech'].iloc[i]) | 118 music_idx = self.get_music_idx_for_file(df['Speech'].iloc[i]) |
119 if len(music_idx)==0: | 119 #min_dur_sec=8.0 |
120 # no music segments -> skip this file | 120 #min_n_frames = np.int(np.floor(min_dur_sec * self.framessr2)) |
121 continue | 121 if len(music_idx)==0: # or len(music_idx)<min_n_frames: |
122 try: | 122 # no music segments or music segment too short -> skip this file |
123 op, mfcc = self.get_op_mfcc_for_file(df['Melspec'].iloc[i], stop_sec=stop_sec) | 123 continue |
124 ch = self.get_chroma_for_file(df['Chroma'].iloc[i], stop_sec=stop_sec) | 124 if 1: |
125 pb = self.get_pb_for_file(df['Melodia'].iloc[i], precomp_melody=precomp_melody, stop_sec=stop_sec) | 125 # allow feature extraction of longer segments (2*stop_sec) |
126 # because some of it might be speech segments that are filtered out | |
127 stop_sec_feat = 2 * stop_sec | |
128 op, mfcc = self.get_op_mfcc_for_file(df['Melspec'].iloc[i], stop_sec=stop_sec_feat) | |
129 ch = self.get_chroma_for_file(df['Chroma'].iloc[i], stop_sec=stop_sec_feat) | |
130 pb = self.get_pb_for_file(df['Melodia'].iloc[i], precomp_melody=precomp_melody, stop_sec=stop_sec_feat) | |
126 #if precomp_melody: | 131 #if precomp_melody: |
127 # pb = self.load_precomputed_pb_from_melodia(df['Melodia'].iloc[i], stop_sec=stop_sec) | 132 # pb = self.load_precomputed_pb_from_melodia(df['Melodia'].iloc[i], stop_sec=stop_sec) |
128 #else: | 133 #else: |
129 # pb = self.get_pb_from_melodia(df['Melodia'].iloc[i], stop_sec=stop_sec) | 134 # pb = self.get_pb_from_melodia(df['Melodia'].iloc[i], stop_sec=stop_sec) |
130 except: | 135 else: |
131 continue | 136 continue |
132 n_stop = np.int(np.ceil(stop_sec * self.framessr2)) | 137 n_stop = np.int(np.ceil(stop_sec * self.framessr2)) |
133 print n_stop, len(op), len(mfcc), len(ch), len(pb) | 138 print n_stop, len(op), len(mfcc), len(ch), len(pb) |
134 min_n_frames = np.min([n_stop, len(op), len(mfcc), len(ch), len(pb)]) # ideally, features should have the same number of frames | 139 max_n_frames = np.min([n_stop, len(op), len(mfcc), len(ch), len(pb)]) # ideally, features should have the same number of frames |
135 if min_n_frames==0: | 140 if max_n_frames==0: |
136 # no features extracted -> skip this file | 141 # no features extracted -> skip this file |
137 continue | 142 continue |
138 music_idx = music_idx[music_idx<min_n_frames] | 143 # music segment duration must be <= 30sec |
144 music_idx = music_idx[music_idx<max_n_frames] | |
139 n_frames = len(music_idx) | 145 n_frames = len(music_idx) |
140 oplist.append(op.iloc[music_idx, :]) | 146 oplist.append(op.iloc[music_idx, :]) |
141 mflist.append(mfcc.iloc[music_idx, :]) | 147 mflist.append(mfcc.iloc[music_idx, :]) |
142 chlist.append(ch.iloc[music_idx, :]) | 148 chlist.append(ch.iloc[music_idx, :]) |
143 pblist.append(pb.iloc[music_idx, :]) | 149 pblist.append(pb.iloc[music_idx, :]) |
269 if 'SampleAudio' in base: | 275 if 'SampleAudio' in base: |
270 root = root_SM | 276 root = root_SM |
271 else: | 277 else: |
272 root = root_BL | 278 root = root_BL |
273 base = base.split('_')[-1].split('.csv')[0]+'_vamp_mtg-melodia_melodia_melody.csv' | 279 base = base.split('_')[-1].split('.csv')[0]+'_vamp_mtg-melodia_melodia_melody.csv' |
280 bihist_file = os.path.join(root, base) | |
281 if not os.path.exists(bihist_file): | |
282 return pbihist | |
274 print 'load precomputed pitch bihist', root | 283 print 'load precomputed pitch bihist', root |
275 pbihist = np.loadtxt(os.path.join(root, base), delimiter=',').T | 284 pbihist = np.loadtxt(bihist_file, delimiter=',').T |
276 n_stop = np.int(np.ceil(stop_sec * self.framessr2)) | 285 n_stop = np.int(np.ceil(stop_sec * self.framessr2)) |
277 pbihist = pbihist[:, :np.min([pbihist.shape[0], n_stop])] | 286 pbihist = pbihist[:, :np.min([pbihist.shape[0], n_stop])] |
278 return pbihist | 287 return pbihist |
279 | 288 |