diff notebooks/sensitivity_experiment_server_mapper.py @ 48:08b9327f1935 branch-tests

mapper now writes output
author mpanteli <m.x.panteli@gmail.com>
date Fri, 15 Sep 2017 17:46:45 +0100
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/notebooks/sensitivity_experiment_server_mapper.py	Fri Sep 15 17:46:45 2017 +0100
@@ -0,0 +1,44 @@
+import numpy as np
+import pandas as pd
+import sys
+sys.path.append('../')
+import scripts.load_dataset as load_dataset
+import scripts.map_and_average as mapper
+import scripts.classification as classification
+import scripts.outliers as outliers
+
+#df = load_dataset.sample_dataset(csv_file=load_dataset.METADATA_FILE)
+OUTPUT_FILES = load_dataset.OUTPUT_FILES
+n_iters = 1
+n = int(sys.argv[1])
+MAPPER_OUTPUT_FILES = mapper.OUTPUT_FILES
+
+#for n in range(n_iters):
+if 1:
+    print "iteration %d" % n
+    
+    print "mapping..."
+    mapper.INPUT_FILES = [output_file.split('.pickle')[0]+'_'+str(n)+'.pickle' for 
+                                 output_file in OUTPUT_FILES]
+    _, _, ldadata_list, _, _, Y, Yaudio = mapper.lda_map_and_average_frames(min_variance=0.99)
+    mapper.OUTPUT_FILES = [output_file.split('.pickle')[0]+'_'+str(n)+'.pickle' for 
+                                 output_file in MAPPER_OUTPUT_FILES]
+    mapper.write_output([], [], ldadata_list, [], [], Y, Yaudio)
+    
+    #X = np.concatenate(ldadata_list, axis=1)
+    
+    ## classification and confusion
+    #print "classifying..."
+    #traininds, testinds = classification.get_train_test_indices(Yaudio)
+    #X_train, Y_train, X_test, Y_test = classification.get_train_test_sets(X, Y, traininds, testinds)
+    #accuracy, _ = classification.confusion_matrix(X_train, Y_train, X_test, Y_test, saveCF=False, plots=False)
+    #print accuracy
+    
+    ## outliers
+    #print "detecting outliers..."
+    #df_global, threshold, MD = outliers.get_outliers_df(X, Y, chi2thr=0.999)
+    #outliers.print_most_least_outliers_topN(df_global, N=10)
+    
+    ## write output
+    #print "writing file"
+    #df_global.to_csv('../data/outliers_'+str(n)+'.csv', index=False)
\ No newline at end of file