m@15
|
1 {
|
m@15
|
2 "cells": [
|
m@15
|
3 {
|
m@15
|
4 "cell_type": "code",
|
m@19
|
5 "execution_count": 8,
|
m@17
|
6 "metadata": {},
|
m@15
|
7 "outputs": [
|
m@15
|
8 {
|
m@17
|
9 "name": "stdout",
|
m@15
|
10 "output_type": "stream",
|
m@15
|
11 "text": [
|
m@17
|
12 "The autoreload extension is already loaded. To reload it, use:\n",
|
m@17
|
13 " %reload_ext autoreload\n"
|
m@15
|
14 ]
|
m@15
|
15 }
|
m@15
|
16 ],
|
m@15
|
17 "source": [
|
m@15
|
18 "import numpy as np\n",
|
m@15
|
19 "\n",
|
m@15
|
20 "%matplotlib inline\n",
|
m@15
|
21 "import matplotlib.pyplot as plt\n",
|
m@15
|
22 "\n",
|
m@15
|
23 "%load_ext autoreload\n",
|
m@15
|
24 "%autoreload 2\n",
|
m@15
|
25 "\n",
|
m@15
|
26 "import sys\n",
|
m@15
|
27 "sys.path.append('../')\n",
|
m@15
|
28 "import scripts.load_dataset as load_dataset\n",
|
m@15
|
29 "import scripts.map_and_average as mapper\n",
|
Maria@18
|
30 "import scripts.classification\n",
|
Maria@18
|
31 "import scripts.outliers as outliers"
|
m@15
|
32 ]
|
m@15
|
33 },
|
m@15
|
34 {
|
m@15
|
35 "cell_type": "code",
|
m@19
|
36 "execution_count": null,
|
m@19
|
37 "metadata": {
|
m@19
|
38 "collapsed": true
|
m@19
|
39 },
|
m@15
|
40 "outputs": [],
|
m@15
|
41 "source": [
|
m@17
|
42 "OUTPUT_FILES = load_dataset.OUTPUT_FILES\n",
|
m@19
|
43 "n_iters = 10\n",
|
m@19
|
44 "df = load_dataset.sample_dataset(csv_file=load_dataset.METADATA_FILE)"
|
m@15
|
45 ]
|
m@15
|
46 },
|
m@15
|
47 {
|
m@15
|
48 "cell_type": "code",
|
m@17
|
49 "execution_count": null,
|
m@19
|
50 "metadata": {
|
m@19
|
51 "collapsed": true
|
m@19
|
52 },
|
m@17
|
53 "outputs": [],
|
m@15
|
54 "source": [
|
m@15
|
55 "for n in range(n_iters):\n",
|
m@15
|
56 " print \"iteration %d\" % n\n",
|
m@15
|
57 " load_dataset.OUTPUT_FILES = [output_file.split('.pickle')[0]+'_'+str(n)+'.pickle' for \n",
|
m@15
|
58 " output_file in OUTPUT_FILES]\n",
|
m@15
|
59 " load_dataset.features_for_train_test_sets(df, write_output=True)"
|
m@15
|
60 ]
|
m@15
|
61 },
|
m@15
|
62 {
|
m@15
|
63 "cell_type": "code",
|
m@15
|
64 "execution_count": null,
|
m@15
|
65 "metadata": {
|
m@15
|
66 "collapsed": true
|
m@15
|
67 },
|
m@15
|
68 "outputs": [],
|
m@15
|
69 "source": [
|
m@15
|
70 "for n in range(n_iters):\n",
|
m@15
|
71 " print \"iteration %d\" % n\n",
|
m@15
|
72 " \n",
|
m@15
|
73 " print \"mapping...\"\n",
|
m@15
|
74 " mapper.INPUT_FILES = [output_file.split('.pickle')[0]+'_'+str(n)+'.pickle' for \n",
|
m@15
|
75 " output_file in OUTPUT_FILES]\n",
|
m@15
|
76 " _, _, ldadata_list, _, _, Y, Yaudio = mapper.lda_map_and_average_frames(min_variance=0.99)\n",
|
m@15
|
77 " X = np.concatenate(ldadata_list)\n",
|
m@15
|
78 " \n",
|
m@15
|
79 " # classification and confusion\n",
|
m@15
|
80 " print \"classifying...\"\n",
|
Maria@18
|
81 " traininds, testinds = classification.get_train_test_indices()\n",
|
Maria@18
|
82 " X_train, Y_train, X_test, Y_test = classification.get_train_test_sets(X, Y, traininds, testinds)\n",
|
Maria@18
|
83 " accuracy, _ = classification.confusion_matrix(X_train, Y_train, X_test, Y_test, saveCF=False, plots=False)\n",
|
m@15
|
84 " print accuracy\n",
|
m@15
|
85 " \n",
|
m@15
|
86 " # outliers\n",
|
m@15
|
87 " print \"detecting outliers...\"\n",
|
Maria@18
|
88 " ddf = outliers.load_metadata(Yaudio, metadata_file=load_dataset.METADATA_FILE)\n",
|
m@15
|
89 " df_global, threshold, MD = get_outliers_df(X, Y, chi2thr=0.999)\n",
|
m@15
|
90 " print_most_least_outliers_topN(df_global, N=10)\n",
|
m@15
|
91 " \n",
|
m@15
|
92 " # write output\n",
|
m@15
|
93 " print \"writing file\"\n",
|
m@15
|
94 " df_global.to_csv('../data/outliers_'+str(n)+'.csv', index=False)"
|
m@15
|
95 ]
|
m@15
|
96 }
|
m@15
|
97 ],
|
m@15
|
98 "metadata": {
|
m@15
|
99 "kernelspec": {
|
m@15
|
100 "display_name": "Python 2",
|
m@15
|
101 "language": "python",
|
m@15
|
102 "name": "python2"
|
m@19
|
103 },
|
m@19
|
104 "language_info": {
|
m@19
|
105 "codemirror_mode": {
|
m@19
|
106 "name": "ipython",
|
m@19
|
107 "version": 2
|
m@19
|
108 },
|
m@19
|
109 "file_extension": ".py",
|
m@19
|
110 "mimetype": "text/x-python",
|
m@19
|
111 "name": "python",
|
m@19
|
112 "nbconvert_exporter": "python",
|
m@19
|
113 "pygments_lexer": "ipython2",
|
m@19
|
114 "version": "2.7.12"
|
m@15
|
115 }
|
m@15
|
116 },
|
m@15
|
117 "nbformat": 4,
|
m@17
|
118 "nbformat_minor": 1
|
m@15
|
119 }
|