Maria@4
|
1 # -*- coding: utf-8 -*-
|
Maria@4
|
2 """
|
Maria@4
|
3 Created on Wed Mar 15 22:52:57 2017
|
Maria@4
|
4
|
Maria@4
|
5 @author: mariapanteli
|
Maria@4
|
6 """
|
Maria@4
|
7
|
Maria@4
|
8 import numpy as np
|
Maria@4
|
9 import pandas as pd
|
Maria@4
|
10 import pickle
|
Maria@4
|
11
|
Maria@4
|
12 import load_features
|
Maria@4
|
13 import util_dataset
|
Maria@4
|
14 import util_filter_dataset
|
Maria@4
|
15
|
Maria@4
|
16
|
Maria@4
|
17 #METADATA_FILE = 'sample_dataset/metadata.csv'
|
Maria@4
|
18 #OUTPUT_FILES = ['sample_dataset/train_data.pickle', 'sample_dataset/val_data.pickle', 'sample_dataset/test_data.pickle']
|
Maria@4
|
19 WIN_SIZE = 2
|
Maria@4
|
20 METADATA_FILE = 'data/metadata_BLSM_language_all.csv'
|
Maria@4
|
21 #OUTPUT_FILES = ['/import/c4dm-04/mariap/train_data_cf.pickle', '/import/c4dm-04/mariap/val_data_cf.pickle', '/import/c4dm-04/mariap/test_data_cf.pickle']
|
Maria@4
|
22 #OUTPUT_FILES = ['/import/c4dm-04/mariap/train_data_cf_4.pickle', '/import/c4dm-04/mariap/val_data_cf_4.pickle', '/import/c4dm-04/mariap/test_data_cf_4.pickle']
|
Maria@4
|
23 OUTPUT_FILES = ['/import/c4dm-04/mariap/train_data_melodia_'+str(WIN_SIZE)+'.pickle',
|
Maria@4
|
24 '/import/c4dm-04/mariap/val_data_melodia_'+str(WIN_SIZE)+'.pickle',
|
Maria@4
|
25 '/import/c4dm-04/mariap/test_data_melodia_'+str(WIN_SIZE)+'.pickle']
|
Maria@4
|
26
|
Maria@4
|
27 def extract_features(df, win2sec=8.0):
|
Maria@4
|
28 """Extract features from melspec and chroma.
|
Maria@4
|
29
|
Maria@4
|
30 Parameters
|
Maria@4
|
31 ----------
|
Maria@4
|
32 df : pd.DataFrame
|
Maria@4
|
33 Metadata including class label and path to audio, melspec, chroma
|
Maria@4
|
34 win2sec : float
|
Maria@4
|
35 The window size for the second frame decomposition of the features
|
Maria@4
|
36
|
Maria@4
|
37 Returns
|
Maria@4
|
38 -------
|
Maria@4
|
39 X : np.array
|
Maria@4
|
40 The features for every frame x every audio file in the dataset
|
Maria@4
|
41 Y : np.array
|
Maria@4
|
42 The class labels for every frame in the dataset
|
Maria@4
|
43 Y_audio : np.array
|
Maria@4
|
44 The audio labels
|
Maria@4
|
45 """
|
Maria@4
|
46 feat_loader = load_features.FeatureLoader(win2sec=win2sec)
|
Maria@4
|
47 frames_rhy, frames_mfcc, frames_chroma, frames_mel, Y_df, Y_audio_df = feat_loader.get_features(df)
|
Maria@4
|
48 print frames_rhy.shape, frames_mel.shape, frames_mfcc.shape, frames_chroma.shape
|
Maria@4
|
49 X = np.concatenate((frames_rhy, frames_mel, frames_mfcc, frames_chroma), axis=1)
|
Maria@4
|
50 Y = Y_df.get_values()
|
Maria@4
|
51 Y_audio = Y_audio_df.get_values()
|
Maria@4
|
52 return X, Y, Y_audio
|
Maria@4
|
53
|
Maria@4
|
54
|
Maria@4
|
55 if __name__ == '__main__':
|
Maria@4
|
56 # load dataset
|
Maria@4
|
57 df = pd.read_csv(METADATA_FILE)
|
Maria@4
|
58 df = util_filter_dataset.remove_missing_data(df)
|
Maria@4
|
59 subset_idx = util_dataset.subset_labels(df['Country'].get_values())
|
Maria@4
|
60 df = df.iloc[subset_idx, :]
|
Maria@4
|
61 X, Y = np.arange(len(df)), df['Country'].get_values()
|
Maria@4
|
62
|
Maria@4
|
63 # split in train, val, test set
|
Maria@4
|
64 train_set, val_set, test_set = util_dataset.get_train_val_test_idx(X, Y)
|
Maria@4
|
65
|
Maria@4
|
66 # extract features and write output
|
Maria@4
|
67 X_train, Y_train, Y_audio_train = extract_features(df.iloc[train_set[0], :], win2sec=WIN_SIZE)
|
Maria@4
|
68 with open(OUTPUT_FILES[0], 'wb') as f:
|
Maria@4
|
69 pickle.dump([X_train, Y_train, Y_audio_train], f)
|
Maria@4
|
70
|
Maria@4
|
71 X_val, Y_val, Y_audio_val = extract_features(df.iloc[val_set[0], :], win2sec=WIN_SIZE)
|
Maria@4
|
72 with open(OUTPUT_FILES[1], 'wb') as f:
|
Maria@4
|
73 pickle.dump([X_val, Y_val, Y_audio_val], f)
|
Maria@4
|
74
|
Maria@4
|
75 X_test, Y_test, Y_audio_test = extract_features(df.iloc[test_set[0], :], win2sec=WIN_SIZE)
|
Maria@4
|
76 with open(OUTPUT_FILES[2], 'wb') as f:
|
Maria@4
|
77 pickle.dump([X_test, Y_test, Y_audio_test], f)
|
Maria@4
|
78
|
Maria@4
|
79 #out_file = '/import/c4dm-04/mariap/test_data_melodia_1_test.pickle'
|
Maria@4
|
80 # pickle.dump([X_test, Y_test, Y_audio_test], f)
|
Maria@4
|
81 #with open(out_file, 'wb') as f:
|