m@15
|
1 {
|
m@15
|
2 "cells": [
|
m@15
|
3 {
|
m@15
|
4 "cell_type": "code",
|
m@15
|
5 "execution_count": 1,
|
m@15
|
6 "metadata": {
|
m@15
|
7 "collapsed": false
|
m@15
|
8 },
|
m@15
|
9 "outputs": [
|
m@15
|
10 {
|
m@15
|
11 "name": "stderr",
|
m@15
|
12 "output_type": "stream",
|
m@15
|
13 "text": [
|
m@15
|
14 "/Users/mariapanteli/anaconda/lib/python2.7/site-packages/librosa/core/audio.py:33: UserWarning: Could not import scikits.samplerate. Falling back to scipy.signal\n",
|
m@15
|
15 " warnings.warn('Could not import scikits.samplerate. '\n"
|
m@15
|
16 ]
|
m@15
|
17 }
|
m@15
|
18 ],
|
m@15
|
19 "source": [
|
m@15
|
20 "import numpy as np\n",
|
m@15
|
21 "\n",
|
m@15
|
22 "%matplotlib inline\n",
|
m@15
|
23 "import matplotlib.pyplot as plt\n",
|
m@15
|
24 "\n",
|
m@15
|
25 "%load_ext autoreload\n",
|
m@15
|
26 "%autoreload 2\n",
|
m@15
|
27 "\n",
|
m@15
|
28 "import sys\n",
|
m@15
|
29 "sys.path.append('../')\n",
|
m@15
|
30 "import scripts.load_dataset as load_dataset\n",
|
m@15
|
31 "import scripts.map_and_average as mapper\n",
|
m@15
|
32 "import scripts.results_classification as results_class\n",
|
m@15
|
33 "import scripts.results as results"
|
m@15
|
34 ]
|
m@15
|
35 },
|
m@15
|
36 {
|
m@15
|
37 "cell_type": "code",
|
m@15
|
38 "execution_count": 2,
|
m@15
|
39 "metadata": {
|
m@15
|
40 "collapsed": true
|
m@15
|
41 },
|
m@15
|
42 "outputs": [],
|
m@15
|
43 "source": [
|
m@15
|
44 "OUTPUT_FILES = ['/import/c4dm-04/mariap/train_data_melodia_'+str(WIN_SIZE)+'.pickle', \n",
|
m@15
|
45 " '/import/c4dm-04/mariap/val_data_melodia_'+str(WIN_SIZE)+'.pickle', \n",
|
m@15
|
46 " '/import/c4dm-04/mariap/test_data_melodia_'+str(WIN_SIZE)+'.pickle']\n",
|
m@15
|
47 "n_iters = 10"
|
m@15
|
48 ]
|
m@15
|
49 },
|
m@15
|
50 {
|
m@15
|
51 "cell_type": "code",
|
m@15
|
52 "execution_count": 5,
|
m@15
|
53 "metadata": {
|
m@15
|
54 "collapsed": false
|
m@15
|
55 },
|
m@15
|
56 "outputs": [
|
m@15
|
57 {
|
m@15
|
58 "ename": "IOError",
|
m@15
|
59 "evalue": "File data/metadata_BLSM_language_all.csv does not exist",
|
m@15
|
60 "output_type": "error",
|
m@15
|
61 "traceback": [
|
m@15
|
62 "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
m@15
|
63 "\u001b[0;31mIOError\u001b[0m Traceback (most recent call last)",
|
m@15
|
64 "\u001b[0;32m<ipython-input-5-8d1030af886f>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mn\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn_iters\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mdf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mload_dataset\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msample_dataset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcsv_file\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mload_dataset\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mMETADATA_FILE\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m load_dataset.OUTPUT_FILES = [output_file.split('.pickle')[0]+'_'+str(n)+'.pickle' for \n\u001b[1;32m 4\u001b[0m output_file in load_dataset.OUTPUT_FILES]\n\u001b[1;32m 5\u001b[0m \u001b[0mload_dataset\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfeatures_for_train_test_sets\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwrite_output\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
m@15
|
65 "\u001b[0;32m/Users/mariapanteli/Documents/QMUL/Code/MyPythonCode/plosone_underreview/scripts/load_dataset.py\u001b[0m in \u001b[0;36msample_dataset\u001b[0;34m(csv_file)\u001b[0m\n\u001b[1;32m 130\u001b[0m \u001b[0mThe\u001b[0m \u001b[0mmetadata\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mselected\u001b[0m \u001b[0msubset\u001b[0m \u001b[0mof\u001b[0m \u001b[0mtracks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 131\u001b[0m \"\"\"\n\u001b[0;32m--> 132\u001b[0;31m \u001b[0mdf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcsv_file\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 133\u001b[0m \u001b[0mdf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mutil_filter_dataset\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mremove_missing_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 134\u001b[0m \u001b[0msubset_idx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msubset_labels\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Country'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_values\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
m@15
|
66 "\u001b[0;32m/Users/mariapanteli/anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc\u001b[0m in \u001b[0;36mparser_f\u001b[0;34m(filepath_or_buffer, sep, dialect, compression, doublequote, escapechar, quotechar, quoting, skipinitialspace, lineterminator, header, index_col, names, prefix, skiprows, skipfooter, skip_footer, na_values, na_fvalues, true_values, false_values, delimiter, converters, dtype, usecols, engine, delim_whitespace, as_recarray, na_filter, compact_ints, use_unsigned, low_memory, buffer_lines, warn_bad_lines, error_bad_lines, keep_default_na, thousands, comment, decimal, parse_dates, keep_date_col, dayfirst, date_parser, memory_map, float_precision, nrows, iterator, chunksize, verbose, encoding, squeeze, mangle_dupe_cols, tupleize_cols, infer_datetime_format, skip_blank_lines)\u001b[0m\n\u001b[1;32m 463\u001b[0m skip_blank_lines=skip_blank_lines)\n\u001b[1;32m 464\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 465\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_read\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 466\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 467\u001b[0m \u001b[0mparser_f\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__name__\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
m@15
|
67 "\u001b[0;32m/Users/mariapanteli/anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc\u001b[0m in \u001b[0;36m_read\u001b[0;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[1;32m 239\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 240\u001b[0m \u001b[0;31m# Create the parser.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 241\u001b[0;31m \u001b[0mparser\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mTextFileReader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 242\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 243\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mnrows\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mNone\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mchunksize\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
m@15
|
68 "\u001b[0;32m/Users/mariapanteli/anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[1;32m 555\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moptions\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'has_index_names'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'has_index_names'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 556\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 557\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_make_engine\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mengine\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 558\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 559\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_get_options_with_defaults\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mengine\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
m@15
|
69 "\u001b[0;32m/Users/mariapanteli/anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc\u001b[0m in \u001b[0;36m_make_engine\u001b[0;34m(self, engine)\u001b[0m\n\u001b[1;32m 692\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_make_engine\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mengine\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'c'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 693\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mengine\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'c'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 694\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mCParserWrapper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moptions\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 695\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 696\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mengine\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'python'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
m@15
|
70 "\u001b[0;32m/Users/mariapanteli/anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, src, **kwds)\u001b[0m\n\u001b[1;32m 1059\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'allow_leading_cols'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex_col\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1060\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1061\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_reader\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_parser\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTextReader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msrc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1062\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1063\u001b[0m \u001b[0;31m# XXX\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
m@15
|
71 "\u001b[0;32mpandas/parser.pyx\u001b[0m in \u001b[0;36mpandas.parser.TextReader.__cinit__ (pandas/parser.c:3163)\u001b[0;34m()\u001b[0m\n",
|
m@15
|
72 "\u001b[0;32mpandas/parser.pyx\u001b[0m in \u001b[0;36mpandas.parser.TextReader._setup_parser_source (pandas/parser.c:5779)\u001b[0;34m()\u001b[0m\n",
|
m@15
|
73 "\u001b[0;31mIOError\u001b[0m: File data/metadata_BLSM_language_all.csv does not exist"
|
m@15
|
74 ]
|
m@15
|
75 }
|
m@15
|
76 ],
|
m@15
|
77 "source": [
|
m@15
|
78 "for n in range(n_iters):\n",
|
m@15
|
79 " print \"iteration %d\" % n\n",
|
m@15
|
80 " df = load_dataset.sample_dataset(csv_file=load_dataset.METADATA_FILE)\n",
|
m@15
|
81 " load_dataset.OUTPUT_FILES = [output_file.split('.pickle')[0]+'_'+str(n)+'.pickle' for \n",
|
m@15
|
82 " output_file in OUTPUT_FILES]\n",
|
m@15
|
83 " load_dataset.features_for_train_test_sets(df, write_output=True)"
|
m@15
|
84 ]
|
m@15
|
85 },
|
m@15
|
86 {
|
m@15
|
87 "cell_type": "code",
|
m@15
|
88 "execution_count": null,
|
m@15
|
89 "metadata": {
|
m@15
|
90 "collapsed": true
|
m@15
|
91 },
|
m@15
|
92 "outputs": [],
|
m@15
|
93 "source": [
|
m@15
|
94 "for n in range(n_iters):\n",
|
m@15
|
95 " print \"iteration %d\" % n\n",
|
m@15
|
96 " \n",
|
m@15
|
97 " print \"mapping...\"\n",
|
m@15
|
98 " mapper.INPUT_FILES = [output_file.split('.pickle')[0]+'_'+str(n)+'.pickle' for \n",
|
m@15
|
99 " output_file in OUTPUT_FILES]\n",
|
m@15
|
100 " _, _, ldadata_list, _, _, Y, Yaudio = mapper.lda_map_and_average_frames(min_variance=0.99)\n",
|
m@15
|
101 " X = np.concatenate(ldadata_list)\n",
|
m@15
|
102 " \n",
|
m@15
|
103 " # classification and confusion\n",
|
m@15
|
104 " print \"classifying...\"\n",
|
m@15
|
105 " traininds, testinds = results_class.get_train_test_indices()\n",
|
m@15
|
106 " X_train, Y_train, X_test, Y_test = results_class.get_train_test_sets(X, Y, traininds, testinds)\n",
|
m@15
|
107 " accuracy, _ = results_class.confusion_matrix(X_train, Y_train, X_test, Y_test, saveCF=False, plots=False)\n",
|
m@15
|
108 " print accuracy\n",
|
m@15
|
109 " \n",
|
m@15
|
110 " # outliers\n",
|
m@15
|
111 " print \"detecting outliers...\"\n",
|
m@15
|
112 " ddf = results.load_metadata(Yaudio, metadata_file=load_dataset.METADATA_FILE)\n",
|
m@15
|
113 " df_global, threshold, MD = get_outliers_df(X, Y, chi2thr=0.999)\n",
|
m@15
|
114 " print_most_least_outliers_topN(df_global, N=10)\n",
|
m@15
|
115 " \n",
|
m@15
|
116 " # write output\n",
|
m@15
|
117 " print \"writing file\"\n",
|
m@15
|
118 " df_global.to_csv('../data/outliers_'+str(n)+'.csv', index=False)"
|
m@15
|
119 ]
|
m@15
|
120 }
|
m@15
|
121 ],
|
m@15
|
122 "metadata": {
|
m@15
|
123 "kernelspec": {
|
m@15
|
124 "display_name": "Python 2",
|
m@15
|
125 "language": "python",
|
m@15
|
126 "name": "python2"
|
m@15
|
127 },
|
m@15
|
128 "language_info": {
|
m@15
|
129 "codemirror_mode": {
|
m@15
|
130 "name": "ipython",
|
m@15
|
131 "version": 2
|
m@15
|
132 },
|
m@15
|
133 "file_extension": ".py",
|
m@15
|
134 "mimetype": "text/x-python",
|
m@15
|
135 "name": "python",
|
m@15
|
136 "nbconvert_exporter": "python",
|
m@15
|
137 "pygments_lexer": "ipython2",
|
m@15
|
138 "version": "2.7.12"
|
m@15
|
139 }
|
m@15
|
140 },
|
m@15
|
141 "nbformat": 4,
|
m@15
|
142 "nbformat_minor": 0
|
m@15
|
143 }
|