annotate notebooks/sensitivity_experiment.ipynb @ 19:0bba6f63f4fd branch-tests

some edits in notebook
author mpanteli <m.x.panteli@gmail.com>
date Wed, 13 Sep 2017 12:09:55 +0100
parents ed109218dd4b
children 206fdeff4641
rev   line source
m@15 1 {
m@15 2 "cells": [
m@15 3 {
m@15 4 "cell_type": "code",
m@19 5 "execution_count": 8,
m@17 6 "metadata": {},
m@15 7 "outputs": [
m@15 8 {
m@17 9 "name": "stdout",
m@15 10 "output_type": "stream",
m@15 11 "text": [
m@17 12 "The autoreload extension is already loaded. To reload it, use:\n",
m@17 13 " %reload_ext autoreload\n"
m@15 14 ]
m@15 15 }
m@15 16 ],
m@15 17 "source": [
m@15 18 "import numpy as np\n",
m@15 19 "\n",
m@15 20 "%matplotlib inline\n",
m@15 21 "import matplotlib.pyplot as plt\n",
m@15 22 "\n",
m@15 23 "%load_ext autoreload\n",
m@15 24 "%autoreload 2\n",
m@15 25 "\n",
m@15 26 "import sys\n",
m@15 27 "sys.path.append('../')\n",
m@15 28 "import scripts.load_dataset as load_dataset\n",
m@15 29 "import scripts.map_and_average as mapper\n",
Maria@18 30 "import scripts.classification\n",
Maria@18 31 "import scripts.outliers as outliers"
m@15 32 ]
m@15 33 },
m@15 34 {
m@15 35 "cell_type": "code",
m@19 36 "execution_count": null,
m@19 37 "metadata": {
m@19 38 "collapsed": true
m@19 39 },
m@15 40 "outputs": [],
m@15 41 "source": [
m@17 42 "OUTPUT_FILES = load_dataset.OUTPUT_FILES\n",
m@19 43 "n_iters = 10\n",
m@19 44 "df = load_dataset.sample_dataset(csv_file=load_dataset.METADATA_FILE)"
m@15 45 ]
m@15 46 },
m@15 47 {
m@15 48 "cell_type": "code",
m@17 49 "execution_count": null,
m@19 50 "metadata": {
m@19 51 "collapsed": true
m@19 52 },
m@17 53 "outputs": [],
m@15 54 "source": [
m@15 55 "for n in range(n_iters):\n",
m@15 56 " print \"iteration %d\" % n\n",
m@15 57 " load_dataset.OUTPUT_FILES = [output_file.split('.pickle')[0]+'_'+str(n)+'.pickle' for \n",
m@15 58 " output_file in OUTPUT_FILES]\n",
m@15 59 " load_dataset.features_for_train_test_sets(df, write_output=True)"
m@15 60 ]
m@15 61 },
m@15 62 {
m@15 63 "cell_type": "code",
m@15 64 "execution_count": null,
m@15 65 "metadata": {
m@15 66 "collapsed": true
m@15 67 },
m@15 68 "outputs": [],
m@15 69 "source": [
m@15 70 "for n in range(n_iters):\n",
m@15 71 " print \"iteration %d\" % n\n",
m@15 72 " \n",
m@15 73 " print \"mapping...\"\n",
m@15 74 " mapper.INPUT_FILES = [output_file.split('.pickle')[0]+'_'+str(n)+'.pickle' for \n",
m@15 75 " output_file in OUTPUT_FILES]\n",
m@15 76 " _, _, ldadata_list, _, _, Y, Yaudio = mapper.lda_map_and_average_frames(min_variance=0.99)\n",
m@15 77 " X = np.concatenate(ldadata_list)\n",
m@15 78 " \n",
m@15 79 " # classification and confusion\n",
m@15 80 " print \"classifying...\"\n",
Maria@18 81 " traininds, testinds = classification.get_train_test_indices()\n",
Maria@18 82 " X_train, Y_train, X_test, Y_test = classification.get_train_test_sets(X, Y, traininds, testinds)\n",
Maria@18 83 " accuracy, _ = classification.confusion_matrix(X_train, Y_train, X_test, Y_test, saveCF=False, plots=False)\n",
m@15 84 " print accuracy\n",
m@15 85 " \n",
m@15 86 " # outliers\n",
m@15 87 " print \"detecting outliers...\"\n",
Maria@18 88 " ddf = outliers.load_metadata(Yaudio, metadata_file=load_dataset.METADATA_FILE)\n",
m@15 89 " df_global, threshold, MD = get_outliers_df(X, Y, chi2thr=0.999)\n",
m@15 90 " print_most_least_outliers_topN(df_global, N=10)\n",
m@15 91 " \n",
m@15 92 " # write output\n",
m@15 93 " print \"writing file\"\n",
m@15 94 " df_global.to_csv('../data/outliers_'+str(n)+'.csv', index=False)"
m@15 95 ]
m@15 96 }
m@15 97 ],
m@15 98 "metadata": {
m@15 99 "kernelspec": {
m@15 100 "display_name": "Python 2",
m@15 101 "language": "python",
m@15 102 "name": "python2"
m@19 103 },
m@19 104 "language_info": {
m@19 105 "codemirror_mode": {
m@19 106 "name": "ipython",
m@19 107 "version": 2
m@19 108 },
m@19 109 "file_extension": ".py",
m@19 110 "mimetype": "text/x-python",
m@19 111 "name": "python",
m@19 112 "nbconvert_exporter": "python",
m@19 113 "pygments_lexer": "ipython2",
m@19 114 "version": "2.7.12"
m@15 115 }
m@15 116 },
m@15 117 "nbformat": 4,
m@17 118 "nbformat_minor": 1
m@15 119 }