m@15: { m@15: "cells": [ m@15: { m@15: "cell_type": "code", m@17: "execution_count": 2, m@17: "metadata": {}, m@15: "outputs": [ m@15: { m@17: "name": "stdout", m@15: "output_type": "stream", m@15: "text": [ m@17: "The autoreload extension is already loaded. To reload it, use:\n", m@17: " %reload_ext autoreload\n" m@15: ] m@15: } m@15: ], m@15: "source": [ m@15: "import numpy as np\n", m@15: "\n", m@15: "%matplotlib inline\n", m@15: "import matplotlib.pyplot as plt\n", m@15: "\n", m@15: "%load_ext autoreload\n", m@15: "%autoreload 2\n", m@15: "\n", m@15: "import sys\n", m@15: "sys.path.append('../')\n", m@15: "import scripts.load_dataset as load_dataset\n", m@15: "import scripts.map_and_average as mapper\n", Maria@18: "import scripts.classification\n", Maria@18: "import scripts.outliers as outliers" m@15: ] m@15: }, m@15: { m@15: "cell_type": "code", m@17: "execution_count": 4, m@17: "metadata": {}, m@15: "outputs": [], m@15: "source": [ m@17: "OUTPUT_FILES = load_dataset.OUTPUT_FILES\n", m@15: "n_iters = 10" m@15: ] m@15: }, m@15: { m@15: "cell_type": "code", m@17: "execution_count": null, m@17: "metadata": {}, m@17: "outputs": [], m@15: "source": [ m@17: "df = load_dataset.sample_dataset(csv_file=load_dataset.METADATA_FILE)\n", m@15: "for n in range(n_iters):\n", m@15: " print \"iteration %d\" % n\n", m@15: " load_dataset.OUTPUT_FILES = [output_file.split('.pickle')[0]+'_'+str(n)+'.pickle' for \n", m@15: " output_file in OUTPUT_FILES]\n", m@15: " load_dataset.features_for_train_test_sets(df, write_output=True)" m@15: ] m@15: }, m@15: { m@15: "cell_type": "code", m@15: "execution_count": null, m@15: "metadata": { m@15: "collapsed": true m@15: }, m@15: "outputs": [], m@15: "source": [ m@15: "for n in range(n_iters):\n", m@15: " print \"iteration %d\" % n\n", m@15: " \n", m@15: " print \"mapping...\"\n", m@15: " mapper.INPUT_FILES = [output_file.split('.pickle')[0]+'_'+str(n)+'.pickle' for \n", m@15: " output_file in OUTPUT_FILES]\n", m@15: " _, _, ldadata_list, _, _, Y, Yaudio = mapper.lda_map_and_average_frames(min_variance=0.99)\n", m@15: " X = np.concatenate(ldadata_list)\n", m@15: " \n", m@15: " # classification and confusion\n", m@15: " print \"classifying...\"\n", Maria@18: " traininds, testinds = classification.get_train_test_indices()\n", Maria@18: " X_train, Y_train, X_test, Y_test = classification.get_train_test_sets(X, Y, traininds, testinds)\n", Maria@18: " accuracy, _ = classification.confusion_matrix(X_train, Y_train, X_test, Y_test, saveCF=False, plots=False)\n", m@15: " print accuracy\n", m@15: " \n", m@15: " # outliers\n", m@15: " print \"detecting outliers...\"\n", Maria@18: " ddf = outliers.load_metadata(Yaudio, metadata_file=load_dataset.METADATA_FILE)\n", m@15: " df_global, threshold, MD = get_outliers_df(X, Y, chi2thr=0.999)\n", m@15: " print_most_least_outliers_topN(df_global, N=10)\n", m@15: " \n", m@15: " # write output\n", m@15: " print \"writing file\"\n", m@15: " df_global.to_csv('../data/outliers_'+str(n)+'.csv', index=False)" m@15: ] m@15: } m@15: ], m@15: "metadata": { m@15: "kernelspec": { m@15: "display_name": "Python 2", m@15: "language": "python", m@15: "name": "python2" m@15: } m@15: }, m@15: "nbformat": 4, m@17: "nbformat_minor": 1 m@15: }