Mercurial > hg > plosone_underreview
comparison scripts/classification.py @ 65:9b10b688c2ac branch-tests
results for 30 seconds
author | mpanteli <m.x.panteli@gmail.com> |
---|---|
date | Thu, 21 Sep 2017 20:11:43 +0100 |
parents | e83ecc296669 |
children | 98fc06ba2938 |
comparison
equal
deleted
inserted
replaced
64:e83ecc296669 | 65:9b10b688c2ac |
---|---|
14 import util_feature_learning | 14 import util_feature_learning |
15 | 15 |
16 | 16 |
17 FILENAMES = map_and_average.OUTPUT_FILES | 17 FILENAMES = map_and_average.OUTPUT_FILES |
18 TRANSFORM_LABELS = ['LDA', 'PCA', 'NMF', 'SSNMF', 'NA'] | 18 TRANSFORM_LABELS = ['LDA', 'PCA', 'NMF', 'SSNMF', 'NA'] |
19 RANDOM_STATE = 12345 | |
19 | 20 |
20 def load_data_from_pickle(filename): | 21 def load_data_from_pickle(filename): |
21 X_list, Y, Yaudio = pickle.load(open(filename,'rb')) | 22 X_list, Y, Yaudio = pickle.load(open(filename,'rb')) |
22 X = np.concatenate(X_list, axis=1) | 23 X = np.concatenate(X_list, axis=1) |
23 return X, Y, Yaudio | 24 return X, Y, Yaudio |
24 | 25 |
25 | 26 |
26 def feat_inds_from_pickle(filename): | 27 def feat_inds_from_pickle(filename): |
27 X_list, Y, Yaudio = pickle.load(open(filename,'rb')) | 28 X_list, Y, Yaudio = pickle.load(open(filename,'rb')) |
28 feat_inds = [len(X_list[0]), len(X_list[1]), len(X_list[2]), len(X_list[3])] | 29 len_inds = np.array([X_list[0].shape[1], X_list[1].shape[1], |
30 X_list[2].shape[1], X_list[3].shape[1]]) | |
31 cum_sum = np.concatenate([[0], np.cumsum(len_inds)]) | |
32 feat_inds = [np.arange(cum_sum[i], cum_sum[i+1]) for i in range(len(X_list))] | |
33 #feat_inds = [X_list[0].shape[1], X_list[1].shape[1], X_list[2].shape[1], X_list[3].shape[1]] | |
29 feat_labels = ['rhy', 'mel', 'mfc', 'chr'] | 34 feat_labels = ['rhy', 'mel', 'mfc', 'chr'] |
30 return feat_labels, feat_inds | 35 return feat_labels, feat_inds |
31 | 36 |
32 | 37 |
33 def get_train_test_indices(audiolabs): | 38 def get_train_test_indices(audiolabs): |
55 #traininds, testinds = get_train_test_indices(Yaudio) | 60 #traininds, testinds = get_train_test_indices(Yaudio) |
56 for filename, transform_label in zip(file_list, TRANSFORM_LABELS): | 61 for filename, transform_label in zip(file_list, TRANSFORM_LABELS): |
57 print filename | 62 print filename |
58 X, Y, Yaudio = load_data_from_pickle(filename) | 63 X, Y, Yaudio = load_data_from_pickle(filename) |
59 #X_train, Y_train, X_test, Y_test = get_train_test_sets(X, Y, traininds, testinds) | 64 #X_train, Y_train, X_test, Y_test = get_train_test_sets(X, Y, traininds, testinds) |
60 X_train, X_val_test, Y_train, Y_val_test = train_test_split(X, Y, train_size=0.6, random_state=12345, stratify=Y) | 65 X_train, X_val_test, Y_train, Y_val_test = train_test_split(X, Y, train_size=0.6, random_state=RANDOM_STATE, stratify=Y) |
61 X_val, X_test, Y_val, Y_test = train_test_split(X_val_test, Y_val_test, train_size=0.5, random_state=12345, stratify=Y_val_test) | 66 X_val, X_test, Y_val, Y_test = train_test_split(X_val_test, Y_val_test, train_size=0.5, random_state=RANDOM_STATE, stratify=Y_val_test) |
62 df_result = feat_learner.classify(X_train, Y_train, X_test, Y_test, transform_label=transform_label) | 67 #df_result = feat_learner.classify(X_train, Y_train, X_test, Y_test, transform_label=transform_label) |
63 df_result_feat = classify_each_feature(X_train, Y_train, X_test, Y_test, filename, transform_label=transform_label) | 68 #df_result_feat = classify_each_feature(X_train, Y_train, X_test, Y_test, filename, transform_label=transform_label) |
64 df_result = pd.concat([df_result, df_result_feat], axis=1, ignore_index=True) | 69 #df_result = pd.concat([df_result, df_result_feat], axis=1, ignore_index=True) |
70 #df_results = pd.concat([df_results, df_result], axis=0, ignore_index=True) | |
71 df_result = classify_each_feature(X_train, Y_train, X_test, Y_test, filename, transform_label=transform_label) | |
65 df_results = pd.concat([df_results, df_result], axis=0, ignore_index=True) | 72 df_results = pd.concat([df_results, df_result], axis=0, ignore_index=True) |
66 return df_results | 73 return df_results |
67 | 74 |
68 | 75 |
69 def classify_each_feature(X_train, Y_train, X_test, Y_test, filename, transform_label=" "): | 76 def classify_each_feature(X_train, Y_train, X_test, Y_test, filename, transform_label=" "): |
70 n_dim = X_train.shape[1] | 77 n_dim = X_train.shape[1] |
71 #feat_labels, feat_inds = map_and_average.get_feat_inds(n_dim=n_dim) | 78 #feat_labels, feat_inds = map_and_average.get_feat_inds(n_dim=n_dim) |
72 feat_labels, feat_inds = feat_inds_from_pickle(filename) | 79 feat_labels, feat_inds = feat_inds_from_pickle(filename) |
73 #df_results = pd.DataFrame() | 80 #df_results = pd.DataFrame() |
81 feat_learner = util_feature_learning.Transformer() | |
74 # first the classification with all features together | 82 # first the classification with all features together |
75 df_results = feat_learner.classify(X_train, Y_train, X_test, Y_test, transform_label=transform_label) | 83 df_results = feat_learner.classify(X_train, Y_train, X_test, Y_test, transform_label=transform_label) |
76 # then append for each feature separately | 84 # then append for each feature separately |
77 for i in range(len(feat_inds)): | 85 for i in range(len(feat_inds)): |
78 df_result = feat_learner.classify(X_train[:, feat_inds[i]], Y_train, | 86 df_result = feat_learner.classify(X_train[:, feat_inds[i]], Y_train, |
79 X_test[:, feat_inds[i]], Y_test) | 87 X_test[:, feat_inds[i]], Y_test, transform_label=transform_label) |
80 df_results = pd.concat([df_results, df_result], axis=1, ignore_index=True) | 88 df_results = pd.concat([df_results, df_result], axis=1, ignore_index=True) |
81 return df_results | 89 return df_results |
82 | 90 |
83 | 91 |
84 def plot_CF(CF, labels=None, figurename=None): | 92 def plot_CF(CF, labels=None, figurename=None): |
110 filename = FILENAMES[feat_learning_i] | 118 filename = FILENAMES[feat_learning_i] |
111 print filename | 119 print filename |
112 X, Y, Yaudio = load_data_from_pickle(filename) | 120 X, Y, Yaudio = load_data_from_pickle(filename) |
113 #traininds, testinds = get_train_test_indices(Yaudio) | 121 #traininds, testinds = get_train_test_indices(Yaudio) |
114 #X_train, Y_train, X_test, Y_test = get_train_test_sets(X, Y, traininds, testinds) | 122 #X_train, Y_train, X_test, Y_test = get_train_test_sets(X, Y, traininds, testinds) |
115 X_train, X_val_test, Y_train, Y_val_test = train_test_split(X, Y, train_size=0.6, random_state=12345, stratify=Y) | 123 X_train, X_val_test, Y_train, Y_val_test = train_test_split(X, Y, train_size=0.6, random_state=RANDOM_STATE, stratify=Y) |
116 X_val, X_test, Y_val, Y_test = train_test_split(X_val_test, Y_val_test, train_size=0.5, random_state=12345, stratify=Y_val_test) | 124 X_val, X_test, Y_val, Y_test = train_test_split(X_val_test, Y_val_test, train_size=0.5, random_state=RANDOM_STATE, stratify=Y_val_test) |
117 if output_data: | 125 if output_data: |
118 _, CF = confusion_matrix(X_train, Y_train, X_test, Y_test, saveCF=True, plots=True) | 126 _, CF = confusion_matrix(X_train, Y_train, X_test, Y_test, saveCF=True, plots=True) |
119 else: | 127 else: |
120 _, CF = confusion_matrix(X_train, Y_train, X_test, Y_test, saveCF=False, plots=False) | 128 _, CF = confusion_matrix(X_train, Y_train, X_test, Y_test, saveCF=False, plots=False) |
121 return CF | 129 return CF |