annotate notebooks/sensitivity_experiment_server_mapper.py @ 105:edd82eb89b4b branch-tests tip

Merge
author Maria Panteli
date Sun, 15 Oct 2017 13:36:59 +0100
parents 08b9327f1935
children
rev   line source
m@48 1 import numpy as np
m@48 2 import pandas as pd
m@48 3 import sys
m@48 4 sys.path.append('../')
m@48 5 import scripts.load_dataset as load_dataset
m@48 6 import scripts.map_and_average as mapper
m@48 7 import scripts.classification as classification
m@48 8 import scripts.outliers as outliers
m@48 9
m@48 10 #df = load_dataset.sample_dataset(csv_file=load_dataset.METADATA_FILE)
m@48 11 OUTPUT_FILES = load_dataset.OUTPUT_FILES
m@48 12 n_iters = 1
m@48 13 n = int(sys.argv[1])
m@48 14 MAPPER_OUTPUT_FILES = mapper.OUTPUT_FILES
m@48 15
m@48 16 #for n in range(n_iters):
m@48 17 if 1:
m@48 18 print "iteration %d" % n
m@48 19
m@48 20 print "mapping..."
m@48 21 mapper.INPUT_FILES = [output_file.split('.pickle')[0]+'_'+str(n)+'.pickle' for
m@48 22 output_file in OUTPUT_FILES]
m@48 23 _, _, ldadata_list, _, _, Y, Yaudio = mapper.lda_map_and_average_frames(min_variance=0.99)
m@48 24 mapper.OUTPUT_FILES = [output_file.split('.pickle')[0]+'_'+str(n)+'.pickle' for
m@48 25 output_file in MAPPER_OUTPUT_FILES]
m@48 26 mapper.write_output([], [], ldadata_list, [], [], Y, Yaudio)
m@48 27
m@48 28 #X = np.concatenate(ldadata_list, axis=1)
m@48 29
m@48 30 ## classification and confusion
m@48 31 #print "classifying..."
m@48 32 #traininds, testinds = classification.get_train_test_indices(Yaudio)
m@48 33 #X_train, Y_train, X_test, Y_test = classification.get_train_test_sets(X, Y, traininds, testinds)
m@48 34 #accuracy, _ = classification.confusion_matrix(X_train, Y_train, X_test, Y_test, saveCF=False, plots=False)
m@48 35 #print accuracy
m@48 36
m@48 37 ## outliers
m@48 38 #print "detecting outliers..."
m@48 39 #df_global, threshold, MD = outliers.get_outliers_df(X, Y, chi2thr=0.999)
m@48 40 #outliers.print_most_least_outliers_topN(df_global, N=10)
m@48 41
m@48 42 ## write output
m@48 43 #print "writing file"
m@48 44 #df_global.to_csv('../data/outliers_'+str(n)+'.csv', index=False)