diff notebooks/sensitivity_experiment.ipynb @ 90:e279ccea5f9b branch-tests

results on 30sec
author mpanteli <m.x.panteli@gmail.com>
date Mon, 02 Oct 2017 15:32:23 +0100
parents 4395037087b6
children
line wrap: on
line diff
--- a/notebooks/sensitivity_experiment.ipynb	Mon Oct 02 12:37:55 2017 +0100
+++ b/notebooks/sensitivity_experiment.ipynb	Mon Oct 02 15:32:23 2017 +0100
@@ -2,22 +2,15 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "ERROR! Session/line number was not unique in database. History logging moved to new session 32\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/homes/mp305/anaconda/lib/python2.7/site-packages/librosa/core/audio.py:33: UserWarning: Could not import scikits.samplerate. Falling back to scipy.signal\n",
-      "  warnings.warn('Could not import scikits.samplerate. '\n"
+      "The autoreload extension is already loaded. To reload it, use:\n",
+      "  %reload_ext autoreload\n"
      ]
     }
    ],
@@ -42,7 +35,9 @@
   {
    "cell_type": "code",
    "execution_count": 2,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "OUTPUT_FILES = load_dataset.OUTPUT_FILES\n",
@@ -5255,7 +5250,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 67,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
@@ -5263,294 +5258,300 @@
      "output_type": "stream",
      "text": [
       "iteration 0\n",
-      "/import/c4dm-04/mariap/lda_data_melodia_8.pickle\n",
-      "(6560, 380) (6560,)\n",
+      "../data/lda_data_melodia_8_30sec.pickle\n",
+      "(6560, 381) (6560,)\n",
       "detecting outliers...\n",
       "most outliers \n",
       "           Country  Outliers  N_Country  N_Outliers\n",
-      "95            Chad  0.555556          9           5\n",
-      "86          Gambia  0.525000         40          21\n",
-      "135  French Guiana  0.500000         22          11\n",
-      "44           Benin  0.476190         21          10\n",
-      "15         Liberia  0.468750         32          15\n",
-      "136       Botswana  0.458333         72          33\n",
+      "136       Botswana  0.625000         72          45\n",
+      "59            Chad  0.555556          9           5\n",
+      "42           Benin  0.523810         21          11\n",
+      "31     Ivory Coast  0.500000         12           6\n",
+      "20        Pakistan  0.493151         73          36\n",
+      "63      Mozambique  0.481481         27          13\n",
+      "106          Nepal  0.460526         76          35\n",
+      "17   French Guiana  0.454545         22          10\n",
       "104         Bhutan  0.444444          9           4\n",
-      "68          Brazil  0.437500         80          35\n",
-      "92     Switzerland  0.428571         42          18\n",
-      "78     El Salvador  0.423077         26          11\n",
+      "86          Gambia  0.425000         40          17\n",
       "least outliers \n",
-      "                              Country  Outliers  N_Country  N_Outliers\n",
-      "1                           Lithuania  0.000000         38           0\n",
-      "29                         Tajikistan  0.000000         15           0\n",
-      "32                     Czech Republic  0.000000         33           0\n",
-      "107                          Kiribati  0.000000         14           0\n",
-      "120                        Kazakhstan  0.000000         70           0\n",
-      "119                           Denmark  0.000000         13           0\n",
-      "0                              Canada  0.050000         80           4\n",
-      "73                            Nigeria  0.051948         77           4\n",
-      "109  Democratic Republic of the Congo  0.052632         38           2\n",
-      "105                             Sudan  0.055556         54           3\n",
+      "                 Country  Outliers  N_Country  N_Outliers\n",
+      "100  Antigua and Barbuda  0.000000         34           0\n",
+      "28            Tajikistan  0.000000         15           0\n",
+      "113              Iceland  0.000000         11           0\n",
+      "119              Denmark  0.000000         13           0\n",
+      "27           South Korea  0.000000          9           0\n",
+      "1              Lithuania  0.000000         38           0\n",
+      "120           Kazakhstan  0.014286         70           1\n",
+      "15           Netherlands  0.018519         54           1\n",
+      "74        Czech Republic  0.030303         33           1\n",
+      "105                Sudan  0.037037         54           2\n",
       "writing file\n",
       "iteration 1\n",
-      "/import/c4dm-04/mariap/lda_data_melodia_8.pickle\n",
-      "(6560, 380) (6560,)\n",
+      "../data/lda_data_melodia_8_30sec.pickle\n",
+      "(6560, 381) (6560,)\n",
       "detecting outliers...\n",
       "most outliers \n",
       "           Country  Outliers  N_Country  N_Outliers\n",
-      "95            Chad  0.666667          9           6\n",
-      "17   French Guiana  0.545455         22          12\n",
-      "86          Gambia  0.525000         40          21\n",
-      "44           Benin  0.523810         21          11\n",
-      "6          Bolivia  0.500000         28          14\n",
-      "78     El Salvador  0.500000         26          13\n",
-      "136       Botswana  0.486111         72          35\n",
-      "10       Guatemala  0.465116         43          20\n",
+      "31     Ivory Coast  0.666667         12           8\n",
+      "136       Botswana  0.638889         72          46\n",
+      "95            Chad  0.555556          9           5\n",
+      "20        Pakistan  0.479452         73          35\n",
+      "43           Benin  0.476190         21          10\n",
+      "86          Gambia  0.475000         40          19\n",
+      "78     El Salvador  0.461538         26          12\n",
       "115        Senegal  0.454545         33          15\n",
+      "135  French Guiana  0.454545         22          10\n",
       "104         Bhutan  0.444444          9           4\n",
       "least outliers \n",
-      "                              Country  Outliers  N_Country  N_Outliers\n",
-      "120                        Kazakhstan  0.000000         70           0\n",
-      "1                           Lithuania  0.000000         38           0\n",
-      "107                          Kiribati  0.000000         14           0\n",
-      "119                           Denmark  0.000000         13           0\n",
-      "9                        Saudi Arabia  0.000000          8           0\n",
-      "98                         Uzbekistan  0.030303         33           1\n",
-      "15                        Netherlands  0.037037         54           2\n",
-      "57                             Russia  0.037975         79           3\n",
-      "109  Democratic Republic of the Congo  0.052632         38           2\n",
-      "105                             Sudan  0.055556         54           3\n",
+      "            Country  Outliers  N_Country  N_Outliers\n",
+      "1         Lithuania  0.000000         38           0\n",
+      "107        Kiribati  0.000000         14           0\n",
+      "119         Denmark  0.000000         13           0\n",
+      "27      South Korea  0.000000          9           0\n",
+      "120      Kazakhstan  0.014286         70           1\n",
+      "105           Sudan  0.018519         54           1\n",
+      "74   Czech Republic  0.030303         33           1\n",
+      "93          Grenada  0.033333         30           1\n",
+      "15      Netherlands  0.037037         54           2\n",
+      "0            Canada  0.037500         80           3\n",
       "writing file\n",
       "iteration 2\n",
-      "/import/c4dm-04/mariap/lda_data_melodia_8.pickle\n",
-      "(6560, 380) (6560,)\n",
+      "../data/lda_data_melodia_8_30sec.pickle\n",
+      "(6560, 381) (6560,)\n",
       "detecting outliers...\n",
       "most outliers \n",
       "           Country  Outliers  N_Country  N_Outliers\n",
-      "95            Chad  0.666667          9           6\n",
-      "104         Bhutan  0.555556          9           5\n",
-      "86          Gambia  0.550000         40          22\n",
-      "135  French Guiana  0.545455         22          12\n",
-      "78     El Salvador  0.538462         26          14\n",
-      "43           Benin  0.523810         21          11\n",
-      "6          Bolivia  0.500000         28          14\n",
-      "136       Botswana  0.486111         72          35\n",
-      "64      Mozambique  0.444444         27          12\n",
-      "14         Liberia  0.437500         32          14\n",
+      "61            Chad  0.666667          9           6\n",
+      "136       Botswana  0.625000         72          45\n",
+      "72     Ivory Coast  0.583333         12           7\n",
+      "20        Pakistan  0.534247         73          39\n",
+      "86          Gambia  0.525000         40          21\n",
+      "44           Benin  0.476190         21          10\n",
+      "78     El Salvador  0.461538         26          12\n",
+      "106          Nepal  0.434211         76          33\n",
+      "66          Uganda  0.426471         68          29\n",
+      "135  French Guiana  0.409091         22           9\n",
       "least outliers \n",
-      "                              Country  Outliers  N_Country  N_Outliers\n",
-      "1                           Lithuania  0.000000         38           0\n",
-      "107                          Kiribati  0.000000         14           0\n",
-      "119                           Denmark  0.000000         13           0\n",
-      "120                        Kazakhstan  0.000000         70           0\n",
-      "15                        Netherlands  0.018519         54           1\n",
-      "105                             Sudan  0.037037         54           2\n",
-      "0                              Canada  0.050000         80           4\n",
-      "109  Democratic Republic of the Congo  0.052632         38           2\n",
-      "94                               Iraq  0.057971         69           4\n",
-      "31                     Czech Republic  0.060606         33           2\n",
+      "            Country  Outliers  N_Country  N_Outliers\n",
+      "1         Lithuania  0.000000         38           0\n",
+      "119         Denmark  0.000000         13           0\n",
+      "31   Czech Republic  0.000000         33           0\n",
+      "30      Afghanistan  0.000000         19           0\n",
+      "27      South Korea  0.000000          9           0\n",
+      "102       Nicaragua  0.000000         17           0\n",
+      "120      Kazakhstan  0.014286         70           1\n",
+      "15      Netherlands  0.018519         54           1\n",
+      "43           Malawi  0.040000         25           1\n",
+      "0            Canada  0.050000         80           4\n",
       "writing file\n",
       "iteration 3\n",
-      "/import/c4dm-04/mariap/lda_data_melodia_8.pickle\n",
-      "(6560, 380) (6560,)\n",
+      "../data/lda_data_melodia_8_30sec.pickle\n",
+      "(6560, 381) (6560,)\n",
+      "detecting outliers...\n",
+      "most outliers \n",
+      "         Country  Outliers  N_Country  N_Outliers\n",
+      "95          Chad  0.666667          9           6\n",
+      "136     Botswana  0.583333         72          42\n",
+      "86        Gambia  0.575000         40          23\n",
+      "63    Mozambique  0.518519         27          14\n",
+      "31   Ivory Coast  0.500000         12           6\n",
+      "42         Benin  0.476190         21          10\n",
+      "106        Nepal  0.473684         76          36\n",
+      "20      Pakistan  0.452055         73          33\n",
+      "64        Uganda  0.426471         68          29\n",
+      "62       Senegal  0.424242         33          14\n",
+      "least outliers \n",
+      "                      Country  Outliers  N_Country  N_Outliers\n",
+      "1                   Lithuania  0.000000         38           0\n",
+      "74             Czech Republic  0.000000         33           0\n",
+      "27                South Korea  0.000000          9           0\n",
+      "119                   Denmark  0.000000         13           0\n",
+      "120                Kazakhstan  0.014286         70           1\n",
+      "105                     Sudan  0.037037         54           2\n",
+      "15                Netherlands  0.037037         54           2\n",
+      "65                    Hungary  0.049180         61           3\n",
+      "0                      Canada  0.050000         80           4\n",
+      "44   United States of America  0.051282         78           4\n",
+      "writing file\n",
+      "iteration 4\n",
+      "../data/lda_data_melodia_8_30sec.pickle\n",
+      "(6560, 381) (6560,)\n",
+      "detecting outliers...\n",
+      "most outliers \n",
+      "         Country  Outliers  N_Country  N_Outliers\n",
+      "43         Benin  0.619048         21          13\n",
+      "136     Botswana  0.597222         72          43\n",
+      "72   Ivory Coast  0.583333         12           7\n",
+      "95          Chad  0.555556          9           5\n",
+      "86        Gambia  0.525000         40          21\n",
+      "64    Mozambique  0.518519         27          14\n",
+      "20      Pakistan  0.506849         73          37\n",
+      "106        Nepal  0.486842         76          37\n",
+      "65        Uganda  0.470588         68          32\n",
+      "63       Senegal  0.454545         33          15\n",
+      "least outliers \n",
+      "            Country  Outliers  N_Country  N_Outliers\n",
+      "120      Kazakhstan  0.000000         70           0\n",
+      "119         Denmark  0.000000         13           0\n",
+      "27      South Korea  0.000000          9           0\n",
+      "1         Lithuania  0.000000         38           0\n",
+      "107        Kiribati  0.000000         14           0\n",
+      "31   Czech Republic  0.030303         33           1\n",
+      "15      Netherlands  0.037037         54           2\n",
+      "0            Canada  0.037500         80           3\n",
+      "50          Finland  0.052632         19           1\n",
+      "30      Afghanistan  0.052632         19           1\n",
+      "writing file\n",
+      "iteration 5\n",
+      "../data/lda_data_melodia_8_30sec.pickle\n",
+      "(6560, 381) (6560,)\n",
       "detecting outliers...\n",
       "most outliers \n",
       "           Country  Outliers  N_Country  N_Outliers\n",
       "60            Chad  0.666667          9           6\n",
-      "17   French Guiana  0.590909         22          13\n",
-      "86          Gambia  0.550000         40          22\n",
-      "6          Bolivia  0.535714         28          15\n",
-      "136       Botswana  0.513889         72          37\n",
-      "64      Mozambique  0.481481         27          13\n",
-      "14         Liberia  0.468750         32          15\n",
+      "43           Benin  0.619048         21          13\n",
+      "136       Botswana  0.583333         72          42\n",
+      "72     Ivory Coast  0.583333         12           7\n",
+      "20        Pakistan  0.479452         73          35\n",
+      "86          Gambia  0.475000         40          19\n",
       "78     El Salvador  0.461538         26          12\n",
-      "115        Senegal  0.454545         33          15\n",
-      "108          Malta  0.437500         16           7\n",
+      "106          Nepal  0.460526         76          35\n",
+      "63         Senegal  0.454545         33          15\n",
+      "17   French Guiana  0.409091         22           9\n",
       "least outliers \n",
       "            Country  Outliers  N_Country  N_Outliers\n",
-      "120      Kazakhstan  0.000000         70           0\n",
       "1         Lithuania  0.000000         38           0\n",
-      "30      Afghanistan  0.000000         19           0\n",
+      "27      South Korea  0.000000          9           0\n",
       "119         Denmark  0.000000         13           0\n",
-      "107        Kiribati  0.000000         14           0\n",
+      "9      Saudi Arabia  0.000000          8           0\n",
+      "120      Kazakhstan  0.014286         70           1\n",
       "31   Czech Republic  0.030303         33           1\n",
-      "98       Uzbekistan  0.030303         33           1\n",
       "15      Netherlands  0.037037         54           2\n",
       "105           Sudan  0.037037         54           2\n",
-      "84             Iraq  0.042857         70           3\n",
+      "0            Canada  0.037500         80           3\n",
+      "112          Israel  0.037500         80           3\n",
       "writing file\n",
-      "iteration 4\n",
-      "/import/c4dm-04/mariap/lda_data_melodia_8.pickle\n",
-      "(6560, 380) (6560,)\n",
+      "iteration 6\n",
+      "../data/lda_data_melodia_8_30sec.pickle\n",
+      "(6560, 381) (6560,)\n",
       "detecting outliers...\n",
       "most outliers \n",
       "           Country  Outliers  N_Country  N_Outliers\n",
-      "117       Zimbabwe  0.583333         12           7\n",
-      "60            Chad  0.555556          9           5\n",
-      "86          Gambia  0.550000         40          22\n",
-      "43           Benin  0.523810         21          11\n",
-      "6          Bolivia  0.500000         28          14\n",
-      "135  French Guiana  0.500000         22          11\n",
-      "136       Botswana  0.472222         72          34\n",
+      "136       Botswana  0.597222         72          43\n",
+      "72     Ivory Coast  0.583333         12           7\n",
+      "106          Nepal  0.500000         76          38\n",
+      "86          Gambia  0.500000         40          20\n",
+      "115        Senegal  0.484848         33          16\n",
+      "14         Liberia  0.468750         32          15\n",
       "78     El Salvador  0.461538         26          12\n",
-      "10       Guatemala  0.441860         43          19\n",
-      "14         Liberia  0.437500         32          14\n",
+      "135  French Guiana  0.454545         22          10\n",
+      "20        Pakistan  0.452055         73          33\n",
+      "95            Chad  0.444444          9           4\n",
       "least outliers \n",
-      "                              Country  Outliers  N_Country  N_Outliers\n",
-      "1                           Lithuania  0.000000         38           0\n",
-      "107                          Kiribati  0.000000         14           0\n",
-      "119                           Denmark  0.000000         13           0\n",
-      "120                        Kazakhstan  0.000000         70           0\n",
-      "27                        South Korea  0.000000          9           0\n",
-      "109  Democratic Republic of the Congo  0.026316         38           1\n",
-      "94                               Iraq  0.028571         70           2\n",
-      "31                     Czech Republic  0.030303         33           1\n",
-      "105                             Sudan  0.037037         54           2\n",
-      "85                       Sierra Leone  0.050000         80           4\n",
+      "            Country  Outliers  N_Country  N_Outliers\n",
+      "113         Iceland  0.000000         11           0\n",
+      "1         Lithuania  0.000000         38           0\n",
+      "119         Denmark  0.000000         13           0\n",
+      "31   Czech Republic  0.000000         33           0\n",
+      "27      South Korea  0.000000          9           0\n",
+      "15      Netherlands  0.000000         54           0\n",
+      "120      Kazakhstan  0.014286         70           1\n",
+      "30      Afghanistan  0.052632         19           1\n",
+      "58         Bulgaria  0.054054         37           2\n",
+      "105           Sudan  0.055556         54           3\n",
       "writing file\n",
-      "iteration 5\n",
-      "/import/c4dm-04/mariap/lda_data_melodia_8.pickle\n",
-      "(6560, 380) (6560,)\n",
+      "iteration 7\n",
+      "../data/lda_data_melodia_8_30sec.pickle\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(6560, 381) (6560,)\n",
+      "detecting outliers...\n",
+      "most outliers \n",
+      "         Country  Outliers  N_Country  N_Outliers\n",
+      "72   Ivory Coast  0.666667         12           8\n",
+      "136     Botswana  0.611111         72          44\n",
+      "86        Gambia  0.575000         40          23\n",
+      "95          Chad  0.555556          9           5\n",
+      "44         Benin  0.523810         21          11\n",
+      "64       Senegal  0.484848         33          16\n",
+      "106        Nepal  0.460526         76          35\n",
+      "20      Pakistan  0.452055         73          33\n",
+      "65    Mozambique  0.444444         27          12\n",
+      "66        Uganda  0.441176         68          30\n",
+      "least outliers \n",
+      "                      Country  Outliers  N_Country  N_Outliers\n",
+      "1                   Lithuania  0.000000         38           0\n",
+      "119                   Denmark  0.000000         13           0\n",
+      "113                   Iceland  0.000000         11           0\n",
+      "27                South Korea  0.000000          9           0\n",
+      "120                Kazakhstan  0.014286         70           1\n",
+      "57                     Russia  0.025316         79           2\n",
+      "46   United States of America  0.025641         78           2\n",
+      "31             Czech Republic  0.030303         33           1\n",
+      "15                Netherlands  0.037037         54           2\n",
+      "0                      Canada  0.050000         80           4\n",
+      "writing file\n",
+      "iteration 8\n",
+      "../data/lda_data_melodia_8_30sec.pickle\n",
+      "(6560, 381) (6560,)\n",
       "detecting outliers...\n",
       "most outliers \n",
       "           Country  Outliers  N_Country  N_Outliers\n",
-      "61            Chad  0.666667          9           6\n",
-      "44           Benin  0.619048         21          13\n",
-      "104         Bhutan  0.555556          9           5\n",
-      "18   French Guiana  0.545455         22          12\n",
-      "86          Gambia  0.525000         40          21\n",
-      "136       Botswana  0.500000         72          36\n",
-      "117       Zimbabwe  0.500000         12           6\n",
-      "15         Liberia  0.500000         32          16\n",
-      "64         Senegal  0.484848         33          16\n",
-      "78     El Salvador  0.461538         26          12\n",
+      "136       Botswana  0.625000         72          45\n",
+      "72     Ivory Coast  0.583333         12           7\n",
+      "86          Gambia  0.475000         40          19\n",
+      "106          Nepal  0.460526         76          35\n",
+      "63         Senegal  0.454545         33          15\n",
+      "135  French Guiana  0.454545         22          10\n",
+      "20        Pakistan  0.452055         73          33\n",
+      "60            Chad  0.444444          9           4\n",
+      "64      Mozambique  0.444444         27          12\n",
+      "14         Liberia  0.437500         32          14\n",
       "least outliers \n",
-      "                              Country  Outliers  N_Country  N_Outliers\n",
-      "1                           Lithuania  0.000000         38           0\n",
-      "120                        Kazakhstan  0.000000         70           0\n",
-      "119                           Denmark  0.000000         13           0\n",
-      "107                          Kiribati  0.000000         14           0\n",
-      "9                        Saudi Arabia  0.000000          8           0\n",
-      "0                              Canada  0.025000         80           2\n",
-      "57                             Russia  0.050633         79           4\n",
-      "109  Democratic Republic of the Congo  0.052632         38           2\n",
-      "51                            Finland  0.052632         19           1\n",
-      "105                             Sudan  0.055556         54           3\n",
+      "                      Country  Outliers  N_Country  N_Outliers\n",
+      "1                   Lithuania  0.000000         38           0\n",
+      "27                South Korea  0.000000          9           0\n",
+      "30                Afghanistan  0.000000         19           0\n",
+      "31             Czech Republic  0.000000         33           0\n",
+      "119                   Denmark  0.000000         13           0\n",
+      "120                Kazakhstan  0.014286         70           1\n",
+      "15                Netherlands  0.037037         54           2\n",
+      "105                     Sudan  0.037037         54           2\n",
+      "45   United States of America  0.051282         78           4\n",
+      "134                  Paraguay  0.055556         18           1\n",
       "writing file\n",
-      "iteration 6\n",
-      "/import/c4dm-04/mariap/lda_data_melodia_8.pickle\n",
-      "(6560, 380) (6560,)\n",
+      "iteration 9\n",
+      "../data/lda_data_melodia_8_30sec.pickle\n",
+      "(6560, 381) (6560,)\n",
       "detecting outliers...\n",
       "most outliers \n",
       "           Country  Outliers  N_Country  N_Outliers\n",
-      "60            Chad  0.666667          9           6\n",
+      "31     Ivory Coast  0.666667         12           8\n",
+      "136       Botswana  0.611111         72          44\n",
       "17   French Guiana  0.590909         22          13\n",
-      "117       Zimbabwe  0.583333         12           7\n",
-      "86          Gambia  0.575000         40          23\n",
+      "59            Chad  0.555556          9           5\n",
       "78     El Salvador  0.538462         26          14\n",
-      "43           Benin  0.523810         21          11\n",
-      "115        Senegal  0.515152         33          17\n",
-      "136       Botswana  0.472222         72          34\n",
+      "20        Pakistan  0.493151         73          36\n",
+      "106          Nepal  0.486842         76          37\n",
+      "42           Benin  0.476190         21          10\n",
+      "86          Gambia  0.450000         40          18\n",
       "104         Bhutan  0.444444          9           4\n",
-      "84          Belize  0.441176         34          15\n",
       "least outliers \n",
-      "                              Country  Outliers  N_Country  N_Outliers\n",
-      "1                           Lithuania  0.000000         38           0\n",
-      "107                          Kiribati  0.000000         14           0\n",
-      "113                           Iceland  0.000000         11           0\n",
-      "72                        Ivory Coast  0.000000         12           0\n",
-      "119                           Denmark  0.000000         13           0\n",
-      "120                        Kazakhstan  0.000000         70           0\n",
-      "28                         Tajikistan  0.000000         15           0\n",
-      "105                             Sudan  0.018519         54           1\n",
-      "15                        Netherlands  0.018519         54           1\n",
-      "109  Democratic Republic of the Congo  0.026316         38           1\n",
-      "writing file\n",
-      "iteration 7\n",
-      "/import/c4dm-04/mariap/lda_data_melodia_8.pickle\n",
-      "(6560, 380) (6560,)\n",
-      "detecting outliers...\n",
-      "most outliers \n",
-      "           Country  Outliers  N_Country  N_Outliers\n",
-      "95            Chad  0.555556          9           5\n",
-      "86          Gambia  0.525000         40          21\n",
-      "43           Benin  0.523810         21          11\n",
-      "135  French Guiana  0.500000         22          11\n",
-      "63         Senegal  0.484848         33          16\n",
-      "14         Liberia  0.468750         32          15\n",
-      "52       Indonesia  0.437500         80          35\n",
-      "136       Botswana  0.430556         72          31\n",
-      "6          Bolivia  0.428571         28          12\n",
-      "92     Switzerland  0.428571         42          18\n",
-      "least outliers \n",
-      "                              Country  Outliers  N_Country  N_Outliers\n",
-      "119                           Denmark  0.000000         13           0\n",
-      "1                           Lithuania  0.000000         38           0\n",
-      "107                          Kiribati  0.000000         14           0\n",
-      "120                        Kazakhstan  0.000000         70           0\n",
-      "113                           Iceland  0.000000         11           0\n",
-      "94                               Iraq  0.028571         70           2\n",
-      "98                         Uzbekistan  0.030303         33           1\n",
-      "105                             Sudan  0.037037         54           2\n",
-      "85                       Sierra Leone  0.037500         80           3\n",
-      "109  Democratic Republic of the Congo  0.052632         38           2\n",
-      "writing file\n",
-      "iteration 8\n",
-      "/import/c4dm-04/mariap/lda_data_melodia_8.pickle\n",
-      "(6560, 380) (6560,)\n",
-      "detecting outliers...\n",
-      "most outliers \n",
-      "           Country  Outliers  N_Country  N_Outliers\n",
-      "61            Chad  0.666667          9           6\n",
-      "78     El Salvador  0.576923         26          15\n",
-      "44           Benin  0.571429         21          12\n",
-      "104         Bhutan  0.555556          9           5\n",
-      "86          Gambia  0.550000         40          22\n",
-      "17   French Guiana  0.545455         22          12\n",
-      "94          Belize  0.470588         34          16\n",
-      "14         Liberia  0.468750         32          15\n",
-      "92     Switzerland  0.452381         42          19\n",
-      "53       Indonesia  0.450000         80          36\n",
-      "least outliers \n",
-      "                              Country  Outliers  N_Country  N_Outliers\n",
-      "119                           Denmark  0.000000         13           0\n",
-      "1                           Lithuania  0.000000         38           0\n",
-      "120                        Kazakhstan  0.000000         70           0\n",
-      "107                          Kiribati  0.000000         14           0\n",
-      "98                         Uzbekistan  0.030303         33           1\n",
-      "105                             Sudan  0.037037         54           2\n",
-      "15                        Netherlands  0.037037         54           2\n",
-      "85                       Sierra Leone  0.037500         80           3\n",
-      "84                               Iraq  0.042857         70           3\n",
-      "109  Democratic Republic of the Congo  0.052632         38           2\n",
-      "writing file\n",
-      "iteration 9\n",
-      "/import/c4dm-04/mariap/lda_data_melodia_8.pickle\n",
-      "(6560, 380) (6560,)\n",
-      "detecting outliers...\n",
-      "most outliers \n",
-      "           Country  Outliers  N_Country  N_Outliers\n",
-      "95            Chad  0.555556          9           5\n",
-      "104         Bhutan  0.555556          9           5\n",
-      "86          Gambia  0.550000         40          22\n",
-      "78     El Salvador  0.538462         26          14\n",
-      "18   French Guiana  0.500000         22          11\n",
-      "115        Senegal  0.484848         33          16\n",
-      "44           Benin  0.476190         21          10\n",
-      "41            Laos  0.470588         17           8\n",
-      "6          Bolivia  0.464286         28          13\n",
-      "65      Mozambique  0.444444         27          12\n",
-      "least outliers \n",
-      "                              Country  Outliers  N_Country  N_Outliers\n",
-      "119                           Denmark  0.000000         13           0\n",
-      "1                           Lithuania  0.000000         38           0\n",
-      "120                        Kazakhstan  0.000000         70           0\n",
-      "107                          Kiribati  0.000000         14           0\n",
-      "32                     Czech Republic  0.000000         33           0\n",
-      "85                       Sierra Leone  0.050000         80           4\n",
-      "0                              Canada  0.050000         80           4\n",
-      "109  Democratic Republic of the Congo  0.052632         38           2\n",
-      "105                             Sudan  0.055556         54           3\n",
-      "16                        Netherlands  0.055556         54           3\n",
+      "                      Country  Outliers  N_Country  N_Outliers\n",
+      "1                   Lithuania  0.000000         38           0\n",
+      "27                South Korea  0.000000          9           0\n",
+      "119                   Denmark  0.000000         13           0\n",
+      "44   United States of America  0.012821         78           1\n",
+      "120                Kazakhstan  0.014286         70           1\n",
+      "74             Czech Republic  0.030303         33           1\n",
+      "18                New Zealand  0.037037         27           1\n",
+      "15                Netherlands  0.037037         54           2\n",
+      "105                     Sudan  0.037037         54           2\n",
+      "0                      Canada  0.050000         80           4\n",
       "writing file\n"
      ]
     }
@@ -5558,10 +5559,11 @@
    "source": [
     "from sklearn.model_selection import train_test_split\n",
     "\n",
+    "#results_file = mapper.OUTPUT_FILES[0]\n",
+    "results_file = '../data/lda_data_melodia_8_30sec.pickle'\n",
     "n_iters = 10\n",
     "for n in range(n_iters):\n",
     "    print \"iteration %d\" % n\n",
-    "    results_file = mapper.OUTPUT_FILES[0]\n",
     "    print results_file\n",
     "    X, Y, Yaudio = classification.load_data_from_pickle(results_file)\n",
     "    # get only 80% of the dataset.. to vary the choice of outliers\n",
@@ -5596,7 +5598,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 18,
    "metadata": {
     "collapsed": true
    },
@@ -5613,7 +5615,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [
     {
@@ -5622,7 +5624,7 @@
        "(137, 10)"
       ]
      },
-     "execution_count": 8,
+     "execution_count": 19,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -5640,39 +5642,39 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "         Country        Country        Country        Country   Country  \\\n",
-      "0           Chad           Chad           Chad           Chad  Zimbabwe   \n",
-      "1         Gambia  French Guiana         Bhutan  French Guiana      Chad   \n",
-      "2  French Guiana         Gambia         Gambia         Gambia    Gambia   \n",
-      "3          Benin          Benin  French Guiana        Bolivia     Benin   \n",
-      "4        Liberia        Bolivia    El Salvador       Botswana   Bolivia   \n",
+      "       Country      Country      Country      Country      Country  \\\n",
+      "0     Botswana  Ivory Coast         Chad         Chad        Benin   \n",
+      "1         Chad     Botswana     Botswana     Botswana     Botswana   \n",
+      "2        Benin         Chad  Ivory Coast       Gambia  Ivory Coast   \n",
+      "3  Ivory Coast     Pakistan     Pakistan   Mozambique         Chad   \n",
+      "4     Pakistan        Benin       Gambia  Ivory Coast       Gambia   \n",
       "\n",
-      "         Country        Country        Country      Country        Country  \n",
-      "0           Chad           Chad           Chad         Chad         Bhutan  \n",
-      "1          Benin  French Guiana         Gambia  El Salvador           Chad  \n",
-      "2         Bhutan       Zimbabwe          Benin        Benin         Gambia  \n",
-      "3  French Guiana         Gambia  French Guiana       Bhutan    El Salvador  \n",
-      "4         Gambia    El Salvador        Senegal       Gambia  French Guiana  \n",
+      "       Country      Country      Country        Country        Country  \n",
+      "0         Chad     Botswana  Ivory Coast       Botswana    Ivory Coast  \n",
+      "1        Benin  Ivory Coast     Botswana    Ivory Coast       Botswana  \n",
+      "2     Botswana       Gambia       Gambia         Gambia  French Guiana  \n",
+      "3  Ivory Coast        Nepal         Chad          Nepal           Chad  \n",
+      "4     Pakistan      Senegal        Benin  French Guiana    El Salvador  \n",
       "   Outliers  Outliers  Outliers  Outliers  Outliers  Outliers  Outliers  \\\n",
-      "0  0.555556  0.666667  0.666667  0.666667  0.583333  0.666667  0.666667   \n",
-      "1  0.525000  0.545455  0.555556  0.590909  0.555556  0.619048  0.590909   \n",
-      "2  0.500000  0.525000  0.550000  0.550000  0.550000  0.555556  0.583333   \n",
-      "3  0.476190  0.523810  0.545455  0.535714  0.523810  0.545455  0.575000   \n",
-      "4  0.468750  0.500000  0.538462  0.513889  0.500000  0.525000  0.538462   \n",
+      "0  0.625000  0.666667  0.666667  0.666667  0.619048  0.666667  0.597222   \n",
+      "1  0.555556  0.638889  0.625000  0.583333  0.597222  0.619048  0.583333   \n",
+      "2  0.523810  0.555556  0.583333  0.575000  0.583333  0.583333  0.500000   \n",
+      "3  0.500000  0.479452  0.534247  0.518519  0.555556  0.583333  0.500000   \n",
+      "4  0.493151  0.476190  0.525000  0.500000  0.525000  0.479452  0.484848   \n",
       "\n",
       "   Outliers  Outliers  Outliers  \n",
-      "0  0.555556  0.666667  0.555556  \n",
-      "1  0.525000  0.576923  0.555556  \n",
-      "2  0.523810  0.571429  0.550000  \n",
-      "3  0.500000  0.555556  0.538462  \n",
-      "4  0.484848  0.550000  0.500000  \n"
+      "0  0.666667  0.625000  0.666667  \n",
+      "1  0.611111  0.583333  0.611111  \n",
+      "2  0.575000  0.475000  0.590909  \n",
+      "3  0.555556  0.460526  0.555556  \n",
+      "4  0.523810  0.454545  0.538462  \n"
      ]
     }
    ],
@@ -5696,11 +5698,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 71,
-   "metadata": {
-    "collapsed": true
-   },
-   "outputs": [],
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/homes/mp305/anaconda/lib/python2.7/site-packages/scipy/stats/stats.py:250: RuntimeWarning: The input array could not be properly checked for nan values. nan values will be ignored.\n",
+      "  \"values. nan values will be ignored.\", RuntimeWarning)\n"
+     ]
+    }
+   ],
    "source": [
     "from scipy.stats import kendalltau\n",
     "r_, p_ = [], []\n",
@@ -5716,14 +5725,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 72,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "0.0493253335359 0.410409379365\n"
+      "0.0554645319767 0.37638195368\n"
      ]
     }
    ],
@@ -5733,14 +5742,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 80,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "0.240026302342 0.351418392739\n"
+      "0.248540800214 0.311313597605\n"
      ]
     }
    ],
@@ -5762,14 +5771,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 81,
+   "execution_count": 24,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "0.237245179063 0.417925582965\n"
+      "0.294545454545 0.449007896087\n"
      ]
     }
    ],
@@ -5784,7 +5793,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 75,
+   "execution_count": 25,
    "metadata": {
     "collapsed": true
    },
@@ -5797,16 +5806,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 76,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "{'Chad', 'French Guiana', 'Gambia'}"
+       "{'Botswana', 'Chad', 'Gambia', 'Ivory Coast', 'Pakistan'}"
       ]
      },
-     "execution_count": 76,
+     "execution_count": 26,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -5824,8 +5833,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
+   "execution_count": 27,
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "# majority voting + precision at K (top5?)\n",
@@ -5836,7 +5847,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 28,
    "metadata": {},
    "outputs": [
     {
@@ -5854,43 +5865,43 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>Brazil</td>\n",
-       "      <td>1</td>\n",
+       "      <td>Pakistan</td>\n",
+       "      <td>10</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>Liberia</td>\n",
-       "      <td>7</td>\n",
+       "      <td>Bhutan</td>\n",
+       "      <td>3</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>Belize</td>\n",
-       "      <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
        "      <td>Chad</td>\n",
        "      <td>10</td>\n",
        "    </tr>\n",
        "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Liberia</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>Bhutan</td>\n",
-       "      <td>7</td>\n",
+       "      <td>El Salvador</td>\n",
+       "      <td>5</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "     index   0\n",
-       "0   Brazil   1\n",
-       "1  Liberia   7\n",
-       "2   Belize   2\n",
-       "3     Chad  10\n",
-       "4   Bhutan   7"
+       "         index   0\n",
+       "0     Pakistan  10\n",
+       "1       Bhutan   3\n",
+       "2         Chad  10\n",
+       "3      Liberia   2\n",
+       "4  El Salvador   5"
       ]
      },
-     "execution_count": 11,
+     "execution_count": 28,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -5902,7 +5913,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 29,
    "metadata": {},
    "outputs": [
     {
@@ -5919,128 +5930,98 @@
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
-       "      <th>3</th>\n",
+       "      <th>0</th>\n",
+       "      <td>Pakistan</td>\n",
+       "      <td>10</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
        "      <td>Chad</td>\n",
        "      <td>10</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>6</th>\n",
+       "      <th>5</th>\n",
        "      <td>Gambia</td>\n",
        "      <td>10</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>12</th>\n",
-       "      <td>French Guiana</td>\n",
+       "      <th>10</th>\n",
+       "      <td>Ivory Coast</td>\n",
        "      <td>10</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>18</th>\n",
-       "      <td>Benin</td>\n",
+       "      <th>12</th>\n",
+       "      <td>Botswana</td>\n",
+       "      <td>10</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>Nepal</td>\n",
        "      <td>9</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>El Salvador</td>\n",
-       "      <td>9</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>17</th>\n",
-       "      <td>Botswana</td>\n",
+       "      <th>13</th>\n",
+       "      <td>Benin</td>\n",
        "      <td>8</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>Bhutan</td>\n",
+       "      <th>8</th>\n",
+       "      <td>Senegal</td>\n",
        "      <td>7</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>Liberia</td>\n",
+       "      <th>9</th>\n",
+       "      <td>French Guiana</td>\n",
        "      <td>7</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>16</th>\n",
-       "      <td>Bolivia</td>\n",
-       "      <td>6</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>10</th>\n",
-       "      <td>Senegal</td>\n",
-       "      <td>6</td>\n",
+       "      <th>4</th>\n",
+       "      <td>El Salvador</td>\n",
+       "      <td>5</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>11</th>\n",
-       "      <td>Zimbabwe</td>\n",
+       "      <td>Mozambique</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>Uganda</td>\n",
+       "      <td>4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Bhutan</td>\n",
        "      <td>3</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>14</th>\n",
-       "      <td>Switzerland</td>\n",
-       "      <td>3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>15</th>\n",
-       "      <td>Mozambique</td>\n",
-       "      <td>3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>Belize</td>\n",
+       "      <th>3</th>\n",
+       "      <td>Liberia</td>\n",
        "      <td>2</td>\n",
        "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>Indonesia</td>\n",
-       "      <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>Guatemala</td>\n",
-       "      <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>Brazil</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>13</th>\n",
-       "      <td>Laos</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9</th>\n",
-       "      <td>Malta</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
        "            index   0\n",
-       "3            Chad  10\n",
-       "6          Gambia  10\n",
-       "12  French Guiana  10\n",
-       "18          Benin   9\n",
-       "5     El Salvador   9\n",
-       "17       Botswana   8\n",
-       "4          Bhutan   7\n",
-       "1         Liberia   7\n",
-       "16        Bolivia   6\n",
-       "10        Senegal   6\n",
-       "11       Zimbabwe   3\n",
-       "14    Switzerland   3\n",
-       "15     Mozambique   3\n",
-       "2          Belize   2\n",
-       "7       Indonesia   2\n",
-       "8       Guatemala   2\n",
-       "0          Brazil   1\n",
-       "13           Laos   1\n",
-       "9           Malta   1"
+       "0        Pakistan  10\n",
+       "2            Chad  10\n",
+       "5          Gambia  10\n",
+       "10    Ivory Coast  10\n",
+       "12       Botswana  10\n",
+       "6           Nepal   9\n",
+       "13          Benin   8\n",
+       "8         Senegal   7\n",
+       "9   French Guiana   7\n",
+       "4     El Salvador   5\n",
+       "11     Mozambique   5\n",
+       "7          Uganda   4\n",
+       "1          Bhutan   3\n",
+       "3         Liberia   2"
       ]
      },
-     "execution_count": 12,
+     "execution_count": 29,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -6051,14 +6032,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 30,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "0.51 0.0830662386292\n"
+      "0.67 0.0640312423743\n"
      ]
     }
    ],
@@ -6077,16 +6058,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 31,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "array([ 0.6,  0.5,  0.5,  0.5,  0.5,  0.5,  0.5,  0.4,  0.7,  0.4])"
+       "array([ 0.6,  0.7,  0.7,  0.6,  0.6,  0.7,  0.8,  0.6,  0.7,  0.7])"
       ]
      },
-     "execution_count": 15,
+     "execution_count": 31,
      "metadata": {},
      "output_type": "execute_result"
     }