Maria@4: # -*- coding: utf-8 -*- Maria@4: """ Maria@4: Created on Thu Mar 16 01:50:57 2017 Maria@4: Maria@4: @author: mariapanteli Maria@4: """ Maria@4: Maria@4: import numpy as np Maria@4: import pandas as pd Maria@4: import os Maria@4: from sklearn.decomposition import NMF Maria@4: import OPMellin as opm Maria@4: import MFCC as mfc Maria@4: import PitchBihist as pbi Maria@4: Maria@4: Maria@4: class FeatureLoader: Maria@4: def __init__(self, win2sec=8): Maria@4: self.win2sec = float(win2sec) Maria@4: self.sr = 44100. Maria@4: self.win1 = int(round(0.04*self.sr)) Maria@4: self.hop1 = int(round(self.win1/8.)) Maria@4: self.framessr = self.sr/float(self.hop1) Maria@4: self.win2 = int(round(self.win2sec*self.framessr)) Maria@4: self.hop2 = int(round(0.5*self.framessr)) Maria@4: self.framessr2 = self.framessr/float(self.hop2) Maria@4: Maria@4: m@26: def get_op_mfcc_for_file(self, melspec_file=None, scale=True, stop_sec=30.0): Maria@4: op = [] Maria@4: mfc = [] Maria@4: if not os.path.exists(melspec_file): Maria@4: return op, mfc Maria@4: print 'extracting onset patterns and mfccs...' Maria@4: songframes = pd.read_csv(melspec_file, engine="c", header=None) Maria@4: songframes.iloc[np.where(np.isnan(songframes))] = 0 Maria@35: n_stop = np.int(np.ceil(stop_sec * self.framessr)) m@26: songframes = songframes.iloc[0:min(len(songframes), n_stop), :] Maria@4: melspec = songframes.get_values().T Maria@4: op = self.get_op_from_melspec(melspec, K=2) Maria@4: mfc = self.get_mfcc_from_melspec(melspec) Maria@4: if scale: Maria@4: # scale all frames by mean and std of recording Maria@4: op = (op - np.nanmean(op)) / np.nanstd(op) Maria@4: mfc = (mfc - np.nanmean(mfc)) / np.nanstd(mfc) Maria@4: return op, mfc Maria@4: Maria@4: m@26: def get_chroma_for_file(self, chroma_file=None, scale=True, stop_sec=30.0): Maria@4: ch = [] Maria@4: if not os.path.exists(chroma_file): Maria@4: return ch Maria@4: print 'extracting chroma...' Maria@4: songframes = pd.read_csv(chroma_file, engine="c", header=None) Maria@4: songframes.iloc[np.where(np.isnan(songframes))] = 0 m@26: n_stop = np.int(np.ceil(stop_sec * self.framessr)) Maria@35: songframes = songframes.iloc[0:min(len(songframes), n_stop), :] Maria@4: chroma = songframes.get_values().T Maria@4: ch = self.get_ave_chroma(chroma) Maria@4: if scale: Maria@4: # scale all frames by mean and std of recording Maria@4: ch = (ch - np.nanmean(ch)) / np.nanstd(ch) Maria@4: return ch Maria@4: Maria@4: Maria@4: def get_music_idx_from_bounds(self, bounds, sr=None): Maria@4: music_idx = [] Maria@4: if len(bounds) == 0: Maria@4: # bounds is empty list Maria@4: return music_idx Maria@4: nbounds = bounds.shape[0] Maria@4: if len(np.where(bounds[:,2]=='m')[0])==0: Maria@4: # no music segments Maria@4: return music_idx Maria@4: elif len(np.where(bounds[:,2]=='s')[0])==nbounds: Maria@4: # all segments are speech Maria@4: return music_idx Maria@4: else: Maria@34: win2_frames = np.int(np.round(self.win2sec * self.framessr2)) Maria@34: #half_win_hop = int(round(0.5 * self.win2 / float(self.hop2))) Maria@4: music_bounds = np.where(bounds[:, 2] == 'm')[0] Maria@4: bounds_in_frames = np.round(np.array(bounds[:, 0], dtype=float) * sr) Maria@34: duration_in_frames = np.ceil(np.array(bounds[:, 1], dtype=float) * sr) Maria@4: for music_bound in music_bounds: Maria@34: #lower_bound = np.max([0, bounds_in_frames[music_bound] - half_win_hop]) Maria@34: #upper_bound = bounds_in_frames[music_bound] + duration_in_frames[music_bound] - half_win_hop Maria@34: lower_bound = bounds_in_frames[music_bound] Maria@34: upper_bound = bounds_in_frames[music_bound] + duration_in_frames[music_bound] - win2_frames Maria@4: music_idx.append(np.arange(lower_bound, upper_bound, dtype=int)) Maria@4: if len(music_idx)>0: Maria@4: music_idx = np.sort(np.concatenate(music_idx)) # it should be sorted, but just in case segments overlap -- remove duplicates if segments overlap Maria@4: return music_idx m@6: Maria@4: Maria@4: def get_music_idx_for_file(self, segmenter_file=None): Maria@4: music_idx = [] Maria@4: if os.path.exists(segmenter_file) and os.path.getsize(segmenter_file)>0: Maria@4: print 'loading speech/music segments...' Maria@4: bounds = pd.read_csv(segmenter_file, header=None, delimiter='\t').get_values() Maria@4: if bounds.shape[1] == 1: # depends on the computer platform Maria@4: bounds = pd.read_csv(segmenter_file, header=None, delimiter=',').get_values() Maria@4: music_idx = self.get_music_idx_from_bounds(bounds, sr=self.framessr2) Maria@4: return music_idx Maria@4: Maria@4: m@42: def get_features(self, df, stop_sec=30.0, class_label='Country', precomp_melody=True): Maria@4: oplist = [] Maria@4: mflist = [] Maria@4: chlist = [] Maria@4: pblist = [] Maria@4: clabels = [] Maria@4: aulabels = [] Maria@4: n_files = len(df) Maria@4: for i in range(n_files): Maria@4: if not (os.path.exists(df['Melspec'].iloc[i]) and os.path.exists(df['Chroma'].iloc[i]) and os.path.exists(df['Melodia'].iloc[i])): Maria@4: continue Maria@4: print 'file ' + str(i) + ' of ' + str(n_files) Maria@4: music_idx = self.get_music_idx_for_file(df['Speech'].iloc[i]) m@43: #min_dur_sec=8.0 m@43: #min_n_frames = np.int(np.floor(min_dur_sec * self.framessr2)) m@43: if len(music_idx)==0: # or len(music_idx) skip this file Maria@4: continue m@46: try: m@43: # allow feature extraction of longer segments (2*stop_sec) m@43: # because some of it might be speech segments that are filtered out m@43: stop_sec_feat = 2 * stop_sec m@43: op, mfcc = self.get_op_mfcc_for_file(df['Melspec'].iloc[i], stop_sec=stop_sec_feat) m@43: ch = self.get_chroma_for_file(df['Chroma'].iloc[i], stop_sec=stop_sec_feat) m@43: pb = self.get_pb_for_file(df['Melodia'].iloc[i], precomp_melody=precomp_melody, stop_sec=stop_sec_feat) Maria@35: #if precomp_melody: Maria@35: # pb = self.load_precomputed_pb_from_melodia(df['Melodia'].iloc[i], stop_sec=stop_sec) Maria@35: #else: Maria@35: # pb = self.get_pb_from_melodia(df['Melodia'].iloc[i], stop_sec=stop_sec) m@46: except: Maria@4: continue m@26: n_stop = np.int(np.ceil(stop_sec * self.framessr2)) Maria@35: print n_stop, len(op), len(mfcc), len(ch), len(pb) m@43: max_n_frames = np.min([n_stop, len(op), len(mfcc), len(ch), len(pb)]) # ideally, features should have the same number of frames m@43: if max_n_frames==0: Maria@4: # no features extracted -> skip this file Maria@4: continue m@43: # music segment duration must be <= 30sec m@43: music_idx = music_idx[music_idx