comparison scripts/classification.py @ 62:4425a4918102 branch-tests

fixed indices for feature components
author Maria Panteli <m.x.panteli@gmail.com>
date Thu, 21 Sep 2017 17:35:07 +0100
parents d118b6ca8370
children e83ecc296669
comparison
equal deleted inserted replaced
61:ac3fcd42e7bd 62:4425a4918102
6 """ 6 """
7 import numpy as np 7 import numpy as np
8 import pandas as pd 8 import pandas as pd
9 import pickle 9 import pickle
10 from sklearn import metrics 10 from sklearn import metrics
11 from sklearn.model_selection import train_test_split
11 12
12 import map_and_average 13 import map_and_average
13 import util_feature_learning 14 import util_feature_learning
14 15
15 16
18 19
19 def load_data_from_pickle(filename): 20 def load_data_from_pickle(filename):
20 X_list, Y, Yaudio = pickle.load(open(filename,'rb')) 21 X_list, Y, Yaudio = pickle.load(open(filename,'rb'))
21 X = np.concatenate(X_list, axis=1) 22 X = np.concatenate(X_list, axis=1)
22 return X, Y, Yaudio 23 return X, Y, Yaudio
24
25
26 def feat_inds_from_pickle(filename):
27 X_list, Y, Yaudio = pickle.load(open(filename,'rb'))
28 feat_inds = [len(X_list[0]), len(X_list[1]), len(X_list[2]), len(X_list[3])]
29 feat_labels = ['rhy', 'mel', 'mfc', 'chr']
30 return feat_labels, feat_inds
23 31
24 32
25 def get_train_test_indices(audiolabs): 33 def get_train_test_indices(audiolabs):
26 trainset, valset, testset = map_and_average.load_train_val_test_sets() 34 trainset, valset, testset = map_and_average.load_train_val_test_sets()
27 trainaudiolabels, testaudiolabels = trainset[2], testset[2] 35 trainaudiolabels, testaudiolabels = trainset[2], testset[2]
50 X, Y, Yaudio = load_data_from_pickle(filename) 58 X, Y, Yaudio = load_data_from_pickle(filename)
51 #X_train, Y_train, X_test, Y_test = get_train_test_sets(X, Y, traininds, testinds) 59 #X_train, Y_train, X_test, Y_test = get_train_test_sets(X, Y, traininds, testinds)
52 X_train, X_val_test, Y_train, Y_val_test = train_test_split(X, Y, train_size=0.6, random_state=12345, stratify=Y) 60 X_train, X_val_test, Y_train, Y_val_test = train_test_split(X, Y, train_size=0.6, random_state=12345, stratify=Y)
53 X_val, X_test, Y_val, Y_test = train_test_split(X_val_test, Y_val_test, train_size=0.5, random_state=12345, stratify=Y_val_test) 61 X_val, X_test, Y_val, Y_test = train_test_split(X_val_test, Y_val_test, train_size=0.5, random_state=12345, stratify=Y_val_test)
54 df_result = feat_learner.classify(X_train, Y_train, X_test, Y_test, transform_label=transform_label) 62 df_result = feat_learner.classify(X_train, Y_train, X_test, Y_test, transform_label=transform_label)
55 df_result_feat = classify_each_feature(X_train, Y_train, X_test, Y_test, transform_label=transform_label) 63 df_result_feat = classify_each_feature(X_train, Y_train, X_test, Y_test, filename, transform_label=transform_label)
56 df_result = pd.concat([df_result, df_result_feat], axis=1, ignore_index=True) 64 df_result = pd.concat([df_result, df_result_feat], axis=1, ignore_index=True)
57 df_results = pd.concat([df_results, df_result], axis=0, ignore_index=True) 65 df_results = pd.concat([df_results, df_result], axis=0, ignore_index=True)
58 return df_results 66 return df_results
59 67
60 68
61 def classify_each_feature(X_train, Y_train, X_test, Y_test, transform_label=" "): 69 def classify_each_feature(X_train, Y_train, X_test, Y_test, filename, transform_label=" "):
62 n_dim = X_train.shape[1] 70 n_dim = X_train.shape[1]
63 feat_labels, feat_inds = map_and_average.get_feat_inds(n_dim=n_dim) 71 #feat_labels, feat_inds = map_and_average.get_feat_inds(n_dim=n_dim)
72 feat_labels, feat_inds = feat_inds_from_pickle(filename)
64 #df_results = pd.DataFrame() 73 #df_results = pd.DataFrame()
65 # first the classification with all features together 74 # first the classification with all features together
66 df_results = feat_learner.classify(X_train, Y_train, X_test, Y_test, transform_label=transform_label) 75 df_results = feat_learner.classify(X_train, Y_train, X_test, Y_test, transform_label=transform_label)
67 # then append for each feature separately 76 # then append for each feature separately
68 for i in range(len(feat_inds)): 77 for i in range(len(feat_inds)):