Mercurial > hg > plosone_underreview
comparison scripts/classification.py @ 62:4425a4918102 branch-tests
fixed indices for feature components
author | Maria Panteli <m.x.panteli@gmail.com> |
---|---|
date | Thu, 21 Sep 2017 17:35:07 +0100 |
parents | d118b6ca8370 |
children | e83ecc296669 |
comparison
equal
deleted
inserted
replaced
61:ac3fcd42e7bd | 62:4425a4918102 |
---|---|
6 """ | 6 """ |
7 import numpy as np | 7 import numpy as np |
8 import pandas as pd | 8 import pandas as pd |
9 import pickle | 9 import pickle |
10 from sklearn import metrics | 10 from sklearn import metrics |
11 from sklearn.model_selection import train_test_split | |
11 | 12 |
12 import map_and_average | 13 import map_and_average |
13 import util_feature_learning | 14 import util_feature_learning |
14 | 15 |
15 | 16 |
18 | 19 |
19 def load_data_from_pickle(filename): | 20 def load_data_from_pickle(filename): |
20 X_list, Y, Yaudio = pickle.load(open(filename,'rb')) | 21 X_list, Y, Yaudio = pickle.load(open(filename,'rb')) |
21 X = np.concatenate(X_list, axis=1) | 22 X = np.concatenate(X_list, axis=1) |
22 return X, Y, Yaudio | 23 return X, Y, Yaudio |
24 | |
25 | |
26 def feat_inds_from_pickle(filename): | |
27 X_list, Y, Yaudio = pickle.load(open(filename,'rb')) | |
28 feat_inds = [len(X_list[0]), len(X_list[1]), len(X_list[2]), len(X_list[3])] | |
29 feat_labels = ['rhy', 'mel', 'mfc', 'chr'] | |
30 return feat_labels, feat_inds | |
23 | 31 |
24 | 32 |
25 def get_train_test_indices(audiolabs): | 33 def get_train_test_indices(audiolabs): |
26 trainset, valset, testset = map_and_average.load_train_val_test_sets() | 34 trainset, valset, testset = map_and_average.load_train_val_test_sets() |
27 trainaudiolabels, testaudiolabels = trainset[2], testset[2] | 35 trainaudiolabels, testaudiolabels = trainset[2], testset[2] |
50 X, Y, Yaudio = load_data_from_pickle(filename) | 58 X, Y, Yaudio = load_data_from_pickle(filename) |
51 #X_train, Y_train, X_test, Y_test = get_train_test_sets(X, Y, traininds, testinds) | 59 #X_train, Y_train, X_test, Y_test = get_train_test_sets(X, Y, traininds, testinds) |
52 X_train, X_val_test, Y_train, Y_val_test = train_test_split(X, Y, train_size=0.6, random_state=12345, stratify=Y) | 60 X_train, X_val_test, Y_train, Y_val_test = train_test_split(X, Y, train_size=0.6, random_state=12345, stratify=Y) |
53 X_val, X_test, Y_val, Y_test = train_test_split(X_val_test, Y_val_test, train_size=0.5, random_state=12345, stratify=Y_val_test) | 61 X_val, X_test, Y_val, Y_test = train_test_split(X_val_test, Y_val_test, train_size=0.5, random_state=12345, stratify=Y_val_test) |
54 df_result = feat_learner.classify(X_train, Y_train, X_test, Y_test, transform_label=transform_label) | 62 df_result = feat_learner.classify(X_train, Y_train, X_test, Y_test, transform_label=transform_label) |
55 df_result_feat = classify_each_feature(X_train, Y_train, X_test, Y_test, transform_label=transform_label) | 63 df_result_feat = classify_each_feature(X_train, Y_train, X_test, Y_test, filename, transform_label=transform_label) |
56 df_result = pd.concat([df_result, df_result_feat], axis=1, ignore_index=True) | 64 df_result = pd.concat([df_result, df_result_feat], axis=1, ignore_index=True) |
57 df_results = pd.concat([df_results, df_result], axis=0, ignore_index=True) | 65 df_results = pd.concat([df_results, df_result], axis=0, ignore_index=True) |
58 return df_results | 66 return df_results |
59 | 67 |
60 | 68 |
61 def classify_each_feature(X_train, Y_train, X_test, Y_test, transform_label=" "): | 69 def classify_each_feature(X_train, Y_train, X_test, Y_test, filename, transform_label=" "): |
62 n_dim = X_train.shape[1] | 70 n_dim = X_train.shape[1] |
63 feat_labels, feat_inds = map_and_average.get_feat_inds(n_dim=n_dim) | 71 #feat_labels, feat_inds = map_and_average.get_feat_inds(n_dim=n_dim) |
72 feat_labels, feat_inds = feat_inds_from_pickle(filename) | |
64 #df_results = pd.DataFrame() | 73 #df_results = pd.DataFrame() |
65 # first the classification with all features together | 74 # first the classification with all features together |
66 df_results = feat_learner.classify(X_train, Y_train, X_test, Y_test, transform_label=transform_label) | 75 df_results = feat_learner.classify(X_train, Y_train, X_test, Y_test, transform_label=transform_label) |
67 # then append for each feature separately | 76 # then append for each feature separately |
68 for i in range(len(feat_inds)): | 77 for i in range(len(feat_inds)): |