annotate notebooks/sensitivity_experiment.ipynb @ 18:ed109218dd4b branch-tests

rename result scripts and more tests
author Maria Panteli
date Tue, 12 Sep 2017 23:18:19 +0100
parents 2e487b9c0a7b
children 0bba6f63f4fd
rev   line source
m@15 1 {
m@15 2 "cells": [
m@15 3 {
m@15 4 "cell_type": "code",
m@17 5 "execution_count": 2,
m@17 6 "metadata": {},
m@15 7 "outputs": [
m@15 8 {
m@17 9 "name": "stdout",
m@15 10 "output_type": "stream",
m@15 11 "text": [
m@17 12 "The autoreload extension is already loaded. To reload it, use:\n",
m@17 13 " %reload_ext autoreload\n"
m@15 14 ]
m@15 15 }
m@15 16 ],
m@15 17 "source": [
m@15 18 "import numpy as np\n",
m@15 19 "\n",
m@15 20 "%matplotlib inline\n",
m@15 21 "import matplotlib.pyplot as plt\n",
m@15 22 "\n",
m@15 23 "%load_ext autoreload\n",
m@15 24 "%autoreload 2\n",
m@15 25 "\n",
m@15 26 "import sys\n",
m@15 27 "sys.path.append('../')\n",
m@15 28 "import scripts.load_dataset as load_dataset\n",
m@15 29 "import scripts.map_and_average as mapper\n",
Maria@18 30 "import scripts.classification\n",
Maria@18 31 "import scripts.outliers as outliers"
m@15 32 ]
m@15 33 },
m@15 34 {
m@15 35 "cell_type": "code",
m@17 36 "execution_count": 4,
m@17 37 "metadata": {},
m@15 38 "outputs": [],
m@15 39 "source": [
m@17 40 "OUTPUT_FILES = load_dataset.OUTPUT_FILES\n",
m@15 41 "n_iters = 10"
m@15 42 ]
m@15 43 },
m@15 44 {
m@15 45 "cell_type": "code",
m@17 46 "execution_count": null,
m@17 47 "metadata": {},
m@17 48 "outputs": [],
m@15 49 "source": [
m@17 50 "df = load_dataset.sample_dataset(csv_file=load_dataset.METADATA_FILE)\n",
m@15 51 "for n in range(n_iters):\n",
m@15 52 " print \"iteration %d\" % n\n",
m@15 53 " load_dataset.OUTPUT_FILES = [output_file.split('.pickle')[0]+'_'+str(n)+'.pickle' for \n",
m@15 54 " output_file in OUTPUT_FILES]\n",
m@15 55 " load_dataset.features_for_train_test_sets(df, write_output=True)"
m@15 56 ]
m@15 57 },
m@15 58 {
m@15 59 "cell_type": "code",
m@15 60 "execution_count": null,
m@15 61 "metadata": {
m@15 62 "collapsed": true
m@15 63 },
m@15 64 "outputs": [],
m@15 65 "source": [
m@15 66 "for n in range(n_iters):\n",
m@15 67 " print \"iteration %d\" % n\n",
m@15 68 " \n",
m@15 69 " print \"mapping...\"\n",
m@15 70 " mapper.INPUT_FILES = [output_file.split('.pickle')[0]+'_'+str(n)+'.pickle' for \n",
m@15 71 " output_file in OUTPUT_FILES]\n",
m@15 72 " _, _, ldadata_list, _, _, Y, Yaudio = mapper.lda_map_and_average_frames(min_variance=0.99)\n",
m@15 73 " X = np.concatenate(ldadata_list)\n",
m@15 74 " \n",
m@15 75 " # classification and confusion\n",
m@15 76 " print \"classifying...\"\n",
Maria@18 77 " traininds, testinds = classification.get_train_test_indices()\n",
Maria@18 78 " X_train, Y_train, X_test, Y_test = classification.get_train_test_sets(X, Y, traininds, testinds)\n",
Maria@18 79 " accuracy, _ = classification.confusion_matrix(X_train, Y_train, X_test, Y_test, saveCF=False, plots=False)\n",
m@15 80 " print accuracy\n",
m@15 81 " \n",
m@15 82 " # outliers\n",
m@15 83 " print \"detecting outliers...\"\n",
Maria@18 84 " ddf = outliers.load_metadata(Yaudio, metadata_file=load_dataset.METADATA_FILE)\n",
m@15 85 " df_global, threshold, MD = get_outliers_df(X, Y, chi2thr=0.999)\n",
m@15 86 " print_most_least_outliers_topN(df_global, N=10)\n",
m@15 87 " \n",
m@15 88 " # write output\n",
m@15 89 " print \"writing file\"\n",
m@15 90 " df_global.to_csv('../data/outliers_'+str(n)+'.csv', index=False)"
m@15 91 ]
m@15 92 }
m@15 93 ],
m@15 94 "metadata": {
m@15 95 "kernelspec": {
m@15 96 "display_name": "Python 2",
m@15 97 "language": "python",
m@15 98 "name": "python2"
m@15 99 }
m@15 100 },
m@15 101 "nbformat": 4,
m@17 102 "nbformat_minor": 1
m@15 103 }