Mercurial > hg > plosone_underreview
diff notebooks/sensitivity_experiment.ipynb @ 90:e279ccea5f9b branch-tests
results on 30sec
author | mpanteli <m.x.panteli@gmail.com> |
---|---|
date | Mon, 02 Oct 2017 15:32:23 +0100 |
parents | 4395037087b6 |
children |
line wrap: on
line diff
--- a/notebooks/sensitivity_experiment.ipynb Mon Oct 02 12:37:55 2017 +0100 +++ b/notebooks/sensitivity_experiment.ipynb Mon Oct 02 15:32:23 2017 +0100 @@ -2,22 +2,15 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "ERROR! Session/line number was not unique in database. History logging moved to new session 32\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/homes/mp305/anaconda/lib/python2.7/site-packages/librosa/core/audio.py:33: UserWarning: Could not import scikits.samplerate. Falling back to scipy.signal\n", - " warnings.warn('Could not import scikits.samplerate. '\n" + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" ] } ], @@ -42,7 +35,9 @@ { "cell_type": "code", "execution_count": 2, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "OUTPUT_FILES = load_dataset.OUTPUT_FILES\n", @@ -5255,7 +5250,7 @@ }, { "cell_type": "code", - "execution_count": 67, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -5263,294 +5258,300 @@ "output_type": "stream", "text": [ "iteration 0\n", - "/import/c4dm-04/mariap/lda_data_melodia_8.pickle\n", - "(6560, 380) (6560,)\n", + "../data/lda_data_melodia_8_30sec.pickle\n", + "(6560, 381) (6560,)\n", "detecting outliers...\n", "most outliers \n", " Country Outliers N_Country N_Outliers\n", - "95 Chad 0.555556 9 5\n", - "86 Gambia 0.525000 40 21\n", - "135 French Guiana 0.500000 22 11\n", - "44 Benin 0.476190 21 10\n", - "15 Liberia 0.468750 32 15\n", - "136 Botswana 0.458333 72 33\n", + "136 Botswana 0.625000 72 45\n", + "59 Chad 0.555556 9 5\n", + "42 Benin 0.523810 21 11\n", + "31 Ivory Coast 0.500000 12 6\n", + "20 Pakistan 0.493151 73 36\n", + "63 Mozambique 0.481481 27 13\n", + "106 Nepal 0.460526 76 35\n", + "17 French Guiana 0.454545 22 10\n", "104 Bhutan 0.444444 9 4\n", - "68 Brazil 0.437500 80 35\n", - "92 Switzerland 0.428571 42 18\n", - "78 El Salvador 0.423077 26 11\n", + "86 Gambia 0.425000 40 17\n", "least outliers \n", - " Country Outliers N_Country N_Outliers\n", - "1 Lithuania 0.000000 38 0\n", - "29 Tajikistan 0.000000 15 0\n", - "32 Czech Republic 0.000000 33 0\n", - "107 Kiribati 0.000000 14 0\n", - "120 Kazakhstan 0.000000 70 0\n", - "119 Denmark 0.000000 13 0\n", - "0 Canada 0.050000 80 4\n", - "73 Nigeria 0.051948 77 4\n", - "109 Democratic Republic of the Congo 0.052632 38 2\n", - "105 Sudan 0.055556 54 3\n", + " Country Outliers N_Country N_Outliers\n", + "100 Antigua and Barbuda 0.000000 34 0\n", + "28 Tajikistan 0.000000 15 0\n", + "113 Iceland 0.000000 11 0\n", + "119 Denmark 0.000000 13 0\n", + "27 South Korea 0.000000 9 0\n", + "1 Lithuania 0.000000 38 0\n", + "120 Kazakhstan 0.014286 70 1\n", + "15 Netherlands 0.018519 54 1\n", + "74 Czech Republic 0.030303 33 1\n", + "105 Sudan 0.037037 54 2\n", "writing file\n", "iteration 1\n", - "/import/c4dm-04/mariap/lda_data_melodia_8.pickle\n", - "(6560, 380) (6560,)\n", + "../data/lda_data_melodia_8_30sec.pickle\n", + "(6560, 381) (6560,)\n", "detecting outliers...\n", "most outliers \n", " Country Outliers N_Country N_Outliers\n", - "95 Chad 0.666667 9 6\n", - "17 French Guiana 0.545455 22 12\n", - "86 Gambia 0.525000 40 21\n", - "44 Benin 0.523810 21 11\n", - "6 Bolivia 0.500000 28 14\n", - "78 El Salvador 0.500000 26 13\n", - "136 Botswana 0.486111 72 35\n", - "10 Guatemala 0.465116 43 20\n", + "31 Ivory Coast 0.666667 12 8\n", + "136 Botswana 0.638889 72 46\n", + "95 Chad 0.555556 9 5\n", + "20 Pakistan 0.479452 73 35\n", + "43 Benin 0.476190 21 10\n", + "86 Gambia 0.475000 40 19\n", + "78 El Salvador 0.461538 26 12\n", "115 Senegal 0.454545 33 15\n", + "135 French Guiana 0.454545 22 10\n", "104 Bhutan 0.444444 9 4\n", "least outliers \n", - " Country Outliers N_Country N_Outliers\n", - "120 Kazakhstan 0.000000 70 0\n", - "1 Lithuania 0.000000 38 0\n", - "107 Kiribati 0.000000 14 0\n", - "119 Denmark 0.000000 13 0\n", - "9 Saudi Arabia 0.000000 8 0\n", - "98 Uzbekistan 0.030303 33 1\n", - "15 Netherlands 0.037037 54 2\n", - "57 Russia 0.037975 79 3\n", - "109 Democratic Republic of the Congo 0.052632 38 2\n", - "105 Sudan 0.055556 54 3\n", + " Country Outliers N_Country N_Outliers\n", + "1 Lithuania 0.000000 38 0\n", + "107 Kiribati 0.000000 14 0\n", + "119 Denmark 0.000000 13 0\n", + "27 South Korea 0.000000 9 0\n", + "120 Kazakhstan 0.014286 70 1\n", + "105 Sudan 0.018519 54 1\n", + "74 Czech Republic 0.030303 33 1\n", + "93 Grenada 0.033333 30 1\n", + "15 Netherlands 0.037037 54 2\n", + "0 Canada 0.037500 80 3\n", "writing file\n", "iteration 2\n", - "/import/c4dm-04/mariap/lda_data_melodia_8.pickle\n", - "(6560, 380) (6560,)\n", + "../data/lda_data_melodia_8_30sec.pickle\n", + "(6560, 381) (6560,)\n", "detecting outliers...\n", "most outliers \n", " Country Outliers N_Country N_Outliers\n", - "95 Chad 0.666667 9 6\n", - "104 Bhutan 0.555556 9 5\n", - "86 Gambia 0.550000 40 22\n", - "135 French Guiana 0.545455 22 12\n", - "78 El Salvador 0.538462 26 14\n", - "43 Benin 0.523810 21 11\n", - "6 Bolivia 0.500000 28 14\n", - "136 Botswana 0.486111 72 35\n", - "64 Mozambique 0.444444 27 12\n", - "14 Liberia 0.437500 32 14\n", + "61 Chad 0.666667 9 6\n", + "136 Botswana 0.625000 72 45\n", + "72 Ivory Coast 0.583333 12 7\n", + "20 Pakistan 0.534247 73 39\n", + "86 Gambia 0.525000 40 21\n", + "44 Benin 0.476190 21 10\n", + "78 El Salvador 0.461538 26 12\n", + "106 Nepal 0.434211 76 33\n", + "66 Uganda 0.426471 68 29\n", + "135 French Guiana 0.409091 22 9\n", "least outliers \n", - " Country Outliers N_Country N_Outliers\n", - "1 Lithuania 0.000000 38 0\n", - "107 Kiribati 0.000000 14 0\n", - "119 Denmark 0.000000 13 0\n", - "120 Kazakhstan 0.000000 70 0\n", - "15 Netherlands 0.018519 54 1\n", - "105 Sudan 0.037037 54 2\n", - "0 Canada 0.050000 80 4\n", - "109 Democratic Republic of the Congo 0.052632 38 2\n", - "94 Iraq 0.057971 69 4\n", - "31 Czech Republic 0.060606 33 2\n", + " Country Outliers N_Country N_Outliers\n", + "1 Lithuania 0.000000 38 0\n", + "119 Denmark 0.000000 13 0\n", + "31 Czech Republic 0.000000 33 0\n", + "30 Afghanistan 0.000000 19 0\n", + "27 South Korea 0.000000 9 0\n", + "102 Nicaragua 0.000000 17 0\n", + "120 Kazakhstan 0.014286 70 1\n", + "15 Netherlands 0.018519 54 1\n", + "43 Malawi 0.040000 25 1\n", + "0 Canada 0.050000 80 4\n", "writing file\n", "iteration 3\n", - "/import/c4dm-04/mariap/lda_data_melodia_8.pickle\n", - "(6560, 380) (6560,)\n", + "../data/lda_data_melodia_8_30sec.pickle\n", + "(6560, 381) (6560,)\n", + "detecting outliers...\n", + "most outliers \n", + " Country Outliers N_Country N_Outliers\n", + "95 Chad 0.666667 9 6\n", + "136 Botswana 0.583333 72 42\n", + "86 Gambia 0.575000 40 23\n", + "63 Mozambique 0.518519 27 14\n", + "31 Ivory Coast 0.500000 12 6\n", + "42 Benin 0.476190 21 10\n", + "106 Nepal 0.473684 76 36\n", + "20 Pakistan 0.452055 73 33\n", + "64 Uganda 0.426471 68 29\n", + "62 Senegal 0.424242 33 14\n", + "least outliers \n", + " Country Outliers N_Country N_Outliers\n", + "1 Lithuania 0.000000 38 0\n", + "74 Czech Republic 0.000000 33 0\n", + "27 South Korea 0.000000 9 0\n", + "119 Denmark 0.000000 13 0\n", + "120 Kazakhstan 0.014286 70 1\n", + "105 Sudan 0.037037 54 2\n", + "15 Netherlands 0.037037 54 2\n", + "65 Hungary 0.049180 61 3\n", + "0 Canada 0.050000 80 4\n", + "44 United States of America 0.051282 78 4\n", + "writing file\n", + "iteration 4\n", + "../data/lda_data_melodia_8_30sec.pickle\n", + "(6560, 381) (6560,)\n", + "detecting outliers...\n", + "most outliers \n", + " Country Outliers N_Country N_Outliers\n", + "43 Benin 0.619048 21 13\n", + "136 Botswana 0.597222 72 43\n", + "72 Ivory Coast 0.583333 12 7\n", + "95 Chad 0.555556 9 5\n", + "86 Gambia 0.525000 40 21\n", + "64 Mozambique 0.518519 27 14\n", + "20 Pakistan 0.506849 73 37\n", + "106 Nepal 0.486842 76 37\n", + "65 Uganda 0.470588 68 32\n", + "63 Senegal 0.454545 33 15\n", + "least outliers \n", + " Country Outliers N_Country N_Outliers\n", + "120 Kazakhstan 0.000000 70 0\n", + "119 Denmark 0.000000 13 0\n", + "27 South Korea 0.000000 9 0\n", + "1 Lithuania 0.000000 38 0\n", + "107 Kiribati 0.000000 14 0\n", + "31 Czech Republic 0.030303 33 1\n", + "15 Netherlands 0.037037 54 2\n", + "0 Canada 0.037500 80 3\n", + "50 Finland 0.052632 19 1\n", + "30 Afghanistan 0.052632 19 1\n", + "writing file\n", + "iteration 5\n", + "../data/lda_data_melodia_8_30sec.pickle\n", + "(6560, 381) (6560,)\n", "detecting outliers...\n", "most outliers \n", " Country Outliers N_Country N_Outliers\n", "60 Chad 0.666667 9 6\n", - "17 French Guiana 0.590909 22 13\n", - "86 Gambia 0.550000 40 22\n", - "6 Bolivia 0.535714 28 15\n", - "136 Botswana 0.513889 72 37\n", - "64 Mozambique 0.481481 27 13\n", - "14 Liberia 0.468750 32 15\n", + "43 Benin 0.619048 21 13\n", + "136 Botswana 0.583333 72 42\n", + "72 Ivory Coast 0.583333 12 7\n", + "20 Pakistan 0.479452 73 35\n", + "86 Gambia 0.475000 40 19\n", "78 El Salvador 0.461538 26 12\n", - "115 Senegal 0.454545 33 15\n", - "108 Malta 0.437500 16 7\n", + "106 Nepal 0.460526 76 35\n", + "63 Senegal 0.454545 33 15\n", + "17 French Guiana 0.409091 22 9\n", "least outliers \n", " Country Outliers N_Country N_Outliers\n", - "120 Kazakhstan 0.000000 70 0\n", "1 Lithuania 0.000000 38 0\n", - "30 Afghanistan 0.000000 19 0\n", + "27 South Korea 0.000000 9 0\n", "119 Denmark 0.000000 13 0\n", - "107 Kiribati 0.000000 14 0\n", + "9 Saudi Arabia 0.000000 8 0\n", + "120 Kazakhstan 0.014286 70 1\n", "31 Czech Republic 0.030303 33 1\n", - "98 Uzbekistan 0.030303 33 1\n", "15 Netherlands 0.037037 54 2\n", "105 Sudan 0.037037 54 2\n", - "84 Iraq 0.042857 70 3\n", + "0 Canada 0.037500 80 3\n", + "112 Israel 0.037500 80 3\n", "writing file\n", - "iteration 4\n", - "/import/c4dm-04/mariap/lda_data_melodia_8.pickle\n", - "(6560, 380) (6560,)\n", + "iteration 6\n", + "../data/lda_data_melodia_8_30sec.pickle\n", + "(6560, 381) (6560,)\n", "detecting outliers...\n", "most outliers \n", " Country Outliers N_Country N_Outliers\n", - "117 Zimbabwe 0.583333 12 7\n", - "60 Chad 0.555556 9 5\n", - "86 Gambia 0.550000 40 22\n", - "43 Benin 0.523810 21 11\n", - "6 Bolivia 0.500000 28 14\n", - "135 French Guiana 0.500000 22 11\n", - "136 Botswana 0.472222 72 34\n", + "136 Botswana 0.597222 72 43\n", + "72 Ivory Coast 0.583333 12 7\n", + "106 Nepal 0.500000 76 38\n", + "86 Gambia 0.500000 40 20\n", + "115 Senegal 0.484848 33 16\n", + "14 Liberia 0.468750 32 15\n", "78 El Salvador 0.461538 26 12\n", - "10 Guatemala 0.441860 43 19\n", - "14 Liberia 0.437500 32 14\n", + "135 French Guiana 0.454545 22 10\n", + "20 Pakistan 0.452055 73 33\n", + "95 Chad 0.444444 9 4\n", "least outliers \n", - " Country Outliers N_Country N_Outliers\n", - "1 Lithuania 0.000000 38 0\n", - "107 Kiribati 0.000000 14 0\n", - "119 Denmark 0.000000 13 0\n", - "120 Kazakhstan 0.000000 70 0\n", - "27 South Korea 0.000000 9 0\n", - "109 Democratic Republic of the Congo 0.026316 38 1\n", - "94 Iraq 0.028571 70 2\n", - "31 Czech Republic 0.030303 33 1\n", - "105 Sudan 0.037037 54 2\n", - "85 Sierra Leone 0.050000 80 4\n", + " Country Outliers N_Country N_Outliers\n", + "113 Iceland 0.000000 11 0\n", + "1 Lithuania 0.000000 38 0\n", + "119 Denmark 0.000000 13 0\n", + "31 Czech Republic 0.000000 33 0\n", + "27 South Korea 0.000000 9 0\n", + "15 Netherlands 0.000000 54 0\n", + "120 Kazakhstan 0.014286 70 1\n", + "30 Afghanistan 0.052632 19 1\n", + "58 Bulgaria 0.054054 37 2\n", + "105 Sudan 0.055556 54 3\n", "writing file\n", - "iteration 5\n", - "/import/c4dm-04/mariap/lda_data_melodia_8.pickle\n", - "(6560, 380) (6560,)\n", + "iteration 7\n", + "../data/lda_data_melodia_8_30sec.pickle\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(6560, 381) (6560,)\n", + "detecting outliers...\n", + "most outliers \n", + " Country Outliers N_Country N_Outliers\n", + "72 Ivory Coast 0.666667 12 8\n", + "136 Botswana 0.611111 72 44\n", + "86 Gambia 0.575000 40 23\n", + "95 Chad 0.555556 9 5\n", + "44 Benin 0.523810 21 11\n", + "64 Senegal 0.484848 33 16\n", + "106 Nepal 0.460526 76 35\n", + "20 Pakistan 0.452055 73 33\n", + "65 Mozambique 0.444444 27 12\n", + "66 Uganda 0.441176 68 30\n", + "least outliers \n", + " Country Outliers N_Country N_Outliers\n", + "1 Lithuania 0.000000 38 0\n", + "119 Denmark 0.000000 13 0\n", + "113 Iceland 0.000000 11 0\n", + "27 South Korea 0.000000 9 0\n", + "120 Kazakhstan 0.014286 70 1\n", + "57 Russia 0.025316 79 2\n", + "46 United States of America 0.025641 78 2\n", + "31 Czech Republic 0.030303 33 1\n", + "15 Netherlands 0.037037 54 2\n", + "0 Canada 0.050000 80 4\n", + "writing file\n", + "iteration 8\n", + "../data/lda_data_melodia_8_30sec.pickle\n", + "(6560, 381) (6560,)\n", "detecting outliers...\n", "most outliers \n", " Country Outliers N_Country N_Outliers\n", - "61 Chad 0.666667 9 6\n", - "44 Benin 0.619048 21 13\n", - "104 Bhutan 0.555556 9 5\n", - "18 French Guiana 0.545455 22 12\n", - "86 Gambia 0.525000 40 21\n", - "136 Botswana 0.500000 72 36\n", - "117 Zimbabwe 0.500000 12 6\n", - "15 Liberia 0.500000 32 16\n", - "64 Senegal 0.484848 33 16\n", - "78 El Salvador 0.461538 26 12\n", + "136 Botswana 0.625000 72 45\n", + "72 Ivory Coast 0.583333 12 7\n", + "86 Gambia 0.475000 40 19\n", + "106 Nepal 0.460526 76 35\n", + "63 Senegal 0.454545 33 15\n", + "135 French Guiana 0.454545 22 10\n", + "20 Pakistan 0.452055 73 33\n", + "60 Chad 0.444444 9 4\n", + "64 Mozambique 0.444444 27 12\n", + "14 Liberia 0.437500 32 14\n", "least outliers \n", - " Country Outliers N_Country N_Outliers\n", - "1 Lithuania 0.000000 38 0\n", - "120 Kazakhstan 0.000000 70 0\n", - "119 Denmark 0.000000 13 0\n", - "107 Kiribati 0.000000 14 0\n", - "9 Saudi Arabia 0.000000 8 0\n", - "0 Canada 0.025000 80 2\n", - "57 Russia 0.050633 79 4\n", - "109 Democratic Republic of the Congo 0.052632 38 2\n", - "51 Finland 0.052632 19 1\n", - "105 Sudan 0.055556 54 3\n", + " Country Outliers N_Country N_Outliers\n", + "1 Lithuania 0.000000 38 0\n", + "27 South Korea 0.000000 9 0\n", + "30 Afghanistan 0.000000 19 0\n", + "31 Czech Republic 0.000000 33 0\n", + "119 Denmark 0.000000 13 0\n", + "120 Kazakhstan 0.014286 70 1\n", + "15 Netherlands 0.037037 54 2\n", + "105 Sudan 0.037037 54 2\n", + "45 United States of America 0.051282 78 4\n", + "134 Paraguay 0.055556 18 1\n", "writing file\n", - "iteration 6\n", - "/import/c4dm-04/mariap/lda_data_melodia_8.pickle\n", - "(6560, 380) (6560,)\n", + "iteration 9\n", + "../data/lda_data_melodia_8_30sec.pickle\n", + "(6560, 381) (6560,)\n", "detecting outliers...\n", "most outliers \n", " Country Outliers N_Country N_Outliers\n", - "60 Chad 0.666667 9 6\n", + "31 Ivory Coast 0.666667 12 8\n", + "136 Botswana 0.611111 72 44\n", "17 French Guiana 0.590909 22 13\n", - "117 Zimbabwe 0.583333 12 7\n", - "86 Gambia 0.575000 40 23\n", + "59 Chad 0.555556 9 5\n", "78 El Salvador 0.538462 26 14\n", - "43 Benin 0.523810 21 11\n", - "115 Senegal 0.515152 33 17\n", - "136 Botswana 0.472222 72 34\n", + "20 Pakistan 0.493151 73 36\n", + "106 Nepal 0.486842 76 37\n", + "42 Benin 0.476190 21 10\n", + "86 Gambia 0.450000 40 18\n", "104 Bhutan 0.444444 9 4\n", - "84 Belize 0.441176 34 15\n", "least outliers \n", - " Country Outliers N_Country N_Outliers\n", - "1 Lithuania 0.000000 38 0\n", - "107 Kiribati 0.000000 14 0\n", - "113 Iceland 0.000000 11 0\n", - "72 Ivory Coast 0.000000 12 0\n", - "119 Denmark 0.000000 13 0\n", - "120 Kazakhstan 0.000000 70 0\n", - "28 Tajikistan 0.000000 15 0\n", - "105 Sudan 0.018519 54 1\n", - "15 Netherlands 0.018519 54 1\n", - "109 Democratic Republic of the Congo 0.026316 38 1\n", - "writing file\n", - "iteration 7\n", - "/import/c4dm-04/mariap/lda_data_melodia_8.pickle\n", - "(6560, 380) (6560,)\n", - "detecting outliers...\n", - "most outliers \n", - " Country Outliers N_Country N_Outliers\n", - "95 Chad 0.555556 9 5\n", - "86 Gambia 0.525000 40 21\n", - "43 Benin 0.523810 21 11\n", - "135 French Guiana 0.500000 22 11\n", - "63 Senegal 0.484848 33 16\n", - "14 Liberia 0.468750 32 15\n", - "52 Indonesia 0.437500 80 35\n", - "136 Botswana 0.430556 72 31\n", - "6 Bolivia 0.428571 28 12\n", - "92 Switzerland 0.428571 42 18\n", - "least outliers \n", - " Country Outliers N_Country N_Outliers\n", - "119 Denmark 0.000000 13 0\n", - "1 Lithuania 0.000000 38 0\n", - "107 Kiribati 0.000000 14 0\n", - "120 Kazakhstan 0.000000 70 0\n", - "113 Iceland 0.000000 11 0\n", - "94 Iraq 0.028571 70 2\n", - "98 Uzbekistan 0.030303 33 1\n", - "105 Sudan 0.037037 54 2\n", - "85 Sierra Leone 0.037500 80 3\n", - "109 Democratic Republic of the Congo 0.052632 38 2\n", - "writing file\n", - "iteration 8\n", - "/import/c4dm-04/mariap/lda_data_melodia_8.pickle\n", - "(6560, 380) (6560,)\n", - "detecting outliers...\n", - "most outliers \n", - " Country Outliers N_Country N_Outliers\n", - "61 Chad 0.666667 9 6\n", - "78 El Salvador 0.576923 26 15\n", - "44 Benin 0.571429 21 12\n", - "104 Bhutan 0.555556 9 5\n", - "86 Gambia 0.550000 40 22\n", - "17 French Guiana 0.545455 22 12\n", - "94 Belize 0.470588 34 16\n", - "14 Liberia 0.468750 32 15\n", - "92 Switzerland 0.452381 42 19\n", - "53 Indonesia 0.450000 80 36\n", - "least outliers \n", - " Country Outliers N_Country N_Outliers\n", - "119 Denmark 0.000000 13 0\n", - "1 Lithuania 0.000000 38 0\n", - "120 Kazakhstan 0.000000 70 0\n", - "107 Kiribati 0.000000 14 0\n", - "98 Uzbekistan 0.030303 33 1\n", - "105 Sudan 0.037037 54 2\n", - "15 Netherlands 0.037037 54 2\n", - "85 Sierra Leone 0.037500 80 3\n", - "84 Iraq 0.042857 70 3\n", - "109 Democratic Republic of the Congo 0.052632 38 2\n", - "writing file\n", - "iteration 9\n", - "/import/c4dm-04/mariap/lda_data_melodia_8.pickle\n", - "(6560, 380) (6560,)\n", - "detecting outliers...\n", - "most outliers \n", - " Country Outliers N_Country N_Outliers\n", - "95 Chad 0.555556 9 5\n", - "104 Bhutan 0.555556 9 5\n", - "86 Gambia 0.550000 40 22\n", - "78 El Salvador 0.538462 26 14\n", - "18 French Guiana 0.500000 22 11\n", - "115 Senegal 0.484848 33 16\n", - "44 Benin 0.476190 21 10\n", - "41 Laos 0.470588 17 8\n", - "6 Bolivia 0.464286 28 13\n", - "65 Mozambique 0.444444 27 12\n", - "least outliers \n", - " Country Outliers N_Country N_Outliers\n", - "119 Denmark 0.000000 13 0\n", - "1 Lithuania 0.000000 38 0\n", - "120 Kazakhstan 0.000000 70 0\n", - "107 Kiribati 0.000000 14 0\n", - "32 Czech Republic 0.000000 33 0\n", - "85 Sierra Leone 0.050000 80 4\n", - "0 Canada 0.050000 80 4\n", - "109 Democratic Republic of the Congo 0.052632 38 2\n", - "105 Sudan 0.055556 54 3\n", - "16 Netherlands 0.055556 54 3\n", + " Country Outliers N_Country N_Outliers\n", + "1 Lithuania 0.000000 38 0\n", + "27 South Korea 0.000000 9 0\n", + "119 Denmark 0.000000 13 0\n", + "44 United States of America 0.012821 78 1\n", + "120 Kazakhstan 0.014286 70 1\n", + "74 Czech Republic 0.030303 33 1\n", + "18 New Zealand 0.037037 27 1\n", + "15 Netherlands 0.037037 54 2\n", + "105 Sudan 0.037037 54 2\n", + "0 Canada 0.050000 80 4\n", "writing file\n" ] } @@ -5558,10 +5559,11 @@ "source": [ "from sklearn.model_selection import train_test_split\n", "\n", + "#results_file = mapper.OUTPUT_FILES[0]\n", + "results_file = '../data/lda_data_melodia_8_30sec.pickle'\n", "n_iters = 10\n", "for n in range(n_iters):\n", " print \"iteration %d\" % n\n", - " results_file = mapper.OUTPUT_FILES[0]\n", " print results_file\n", " X, Y, Yaudio = classification.load_data_from_pickle(results_file)\n", " # get only 80% of the dataset.. to vary the choice of outliers\n", @@ -5596,7 +5598,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 18, "metadata": { "collapsed": true }, @@ -5613,7 +5615,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -5622,7 +5624,7 @@ "(137, 10)" ] }, - "execution_count": 8, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -5640,39 +5642,39 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - " Country Country Country Country Country \\\n", - "0 Chad Chad Chad Chad Zimbabwe \n", - "1 Gambia French Guiana Bhutan French Guiana Chad \n", - "2 French Guiana Gambia Gambia Gambia Gambia \n", - "3 Benin Benin French Guiana Bolivia Benin \n", - "4 Liberia Bolivia El Salvador Botswana Bolivia \n", + " Country Country Country Country Country \\\n", + "0 Botswana Ivory Coast Chad Chad Benin \n", + "1 Chad Botswana Botswana Botswana Botswana \n", + "2 Benin Chad Ivory Coast Gambia Ivory Coast \n", + "3 Ivory Coast Pakistan Pakistan Mozambique Chad \n", + "4 Pakistan Benin Gambia Ivory Coast Gambia \n", "\n", - " Country Country Country Country Country \n", - "0 Chad Chad Chad Chad Bhutan \n", - "1 Benin French Guiana Gambia El Salvador Chad \n", - "2 Bhutan Zimbabwe Benin Benin Gambia \n", - "3 French Guiana Gambia French Guiana Bhutan El Salvador \n", - "4 Gambia El Salvador Senegal Gambia French Guiana \n", + " Country Country Country Country Country \n", + "0 Chad Botswana Ivory Coast Botswana Ivory Coast \n", + "1 Benin Ivory Coast Botswana Ivory Coast Botswana \n", + "2 Botswana Gambia Gambia Gambia French Guiana \n", + "3 Ivory Coast Nepal Chad Nepal Chad \n", + "4 Pakistan Senegal Benin French Guiana El Salvador \n", " Outliers Outliers Outliers Outliers Outliers Outliers Outliers \\\n", - "0 0.555556 0.666667 0.666667 0.666667 0.583333 0.666667 0.666667 \n", - "1 0.525000 0.545455 0.555556 0.590909 0.555556 0.619048 0.590909 \n", - "2 0.500000 0.525000 0.550000 0.550000 0.550000 0.555556 0.583333 \n", - "3 0.476190 0.523810 0.545455 0.535714 0.523810 0.545455 0.575000 \n", - "4 0.468750 0.500000 0.538462 0.513889 0.500000 0.525000 0.538462 \n", + "0 0.625000 0.666667 0.666667 0.666667 0.619048 0.666667 0.597222 \n", + "1 0.555556 0.638889 0.625000 0.583333 0.597222 0.619048 0.583333 \n", + "2 0.523810 0.555556 0.583333 0.575000 0.583333 0.583333 0.500000 \n", + "3 0.500000 0.479452 0.534247 0.518519 0.555556 0.583333 0.500000 \n", + "4 0.493151 0.476190 0.525000 0.500000 0.525000 0.479452 0.484848 \n", "\n", " Outliers Outliers Outliers \n", - "0 0.555556 0.666667 0.555556 \n", - "1 0.525000 0.576923 0.555556 \n", - "2 0.523810 0.571429 0.550000 \n", - "3 0.500000 0.555556 0.538462 \n", - "4 0.484848 0.550000 0.500000 \n" + "0 0.666667 0.625000 0.666667 \n", + "1 0.611111 0.583333 0.611111 \n", + "2 0.575000 0.475000 0.590909 \n", + "3 0.555556 0.460526 0.555556 \n", + "4 0.523810 0.454545 0.538462 \n" ] } ], @@ -5696,11 +5698,18 @@ }, { "cell_type": "code", - "execution_count": 71, - "metadata": { - "collapsed": true - }, - "outputs": [], + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/homes/mp305/anaconda/lib/python2.7/site-packages/scipy/stats/stats.py:250: RuntimeWarning: The input array could not be properly checked for nan values. nan values will be ignored.\n", + " \"values. nan values will be ignored.\", RuntimeWarning)\n" + ] + } + ], "source": [ "from scipy.stats import kendalltau\n", "r_, p_ = [], []\n", @@ -5716,14 +5725,14 @@ }, { "cell_type": "code", - "execution_count": 72, + "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "0.0493253335359 0.410409379365\n" + "0.0554645319767 0.37638195368\n" ] } ], @@ -5733,14 +5742,14 @@ }, { "cell_type": "code", - "execution_count": 80, + "execution_count": 23, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "0.240026302342 0.351418392739\n" + "0.248540800214 0.311313597605\n" ] } ], @@ -5762,14 +5771,14 @@ }, { "cell_type": "code", - "execution_count": 81, + "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "0.237245179063 0.417925582965\n" + "0.294545454545 0.449007896087\n" ] } ], @@ -5784,7 +5793,7 @@ }, { "cell_type": "code", - "execution_count": 75, + "execution_count": 25, "metadata": { "collapsed": true }, @@ -5797,16 +5806,16 @@ }, { "cell_type": "code", - "execution_count": 76, + "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'Chad', 'French Guiana', 'Gambia'}" + "{'Botswana', 'Chad', 'Gambia', 'Ivory Coast', 'Pakistan'}" ] }, - "execution_count": 76, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } @@ -5824,8 +5833,10 @@ }, { "cell_type": "code", - "execution_count": 10, - "metadata": {}, + "execution_count": 27, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "# majority voting + precision at K (top5?)\n", @@ -5836,7 +5847,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 28, "metadata": {}, "outputs": [ { @@ -5854,43 +5865,43 @@ " <tbody>\n", " <tr>\n", " <th>0</th>\n", - " <td>Brazil</td>\n", - " <td>1</td>\n", + " <td>Pakistan</td>\n", + " <td>10</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", - " <td>Liberia</td>\n", - " <td>7</td>\n", + " <td>Bhutan</td>\n", + " <td>3</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", - " <td>Belize</td>\n", - " <td>2</td>\n", - " </tr>\n", - " <tr>\n", - " <th>3</th>\n", " <td>Chad</td>\n", " <td>10</td>\n", " </tr>\n", " <tr>\n", + " <th>3</th>\n", + " <td>Liberia</td>\n", + " <td>2</td>\n", + " </tr>\n", + " <tr>\n", " <th>4</th>\n", - " <td>Bhutan</td>\n", - " <td>7</td>\n", + " <td>El Salvador</td>\n", + " <td>5</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ - " index 0\n", - "0 Brazil 1\n", - "1 Liberia 7\n", - "2 Belize 2\n", - "3 Chad 10\n", - "4 Bhutan 7" + " index 0\n", + "0 Pakistan 10\n", + "1 Bhutan 3\n", + "2 Chad 10\n", + "3 Liberia 2\n", + "4 El Salvador 5" ] }, - "execution_count": 11, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" } @@ -5902,7 +5913,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 29, "metadata": {}, "outputs": [ { @@ -5919,128 +5930,98 @@ " </thead>\n", " <tbody>\n", " <tr>\n", - " <th>3</th>\n", + " <th>0</th>\n", + " <td>Pakistan</td>\n", + " <td>10</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", " <td>Chad</td>\n", " <td>10</td>\n", " </tr>\n", " <tr>\n", - " <th>6</th>\n", + " <th>5</th>\n", " <td>Gambia</td>\n", " <td>10</td>\n", " </tr>\n", " <tr>\n", - " <th>12</th>\n", - " <td>French Guiana</td>\n", + " <th>10</th>\n", + " <td>Ivory Coast</td>\n", " <td>10</td>\n", " </tr>\n", " <tr>\n", - " <th>18</th>\n", - " <td>Benin</td>\n", + " <th>12</th>\n", + " <td>Botswana</td>\n", + " <td>10</td>\n", + " </tr>\n", + " <tr>\n", + " <th>6</th>\n", + " <td>Nepal</td>\n", " <td>9</td>\n", " </tr>\n", " <tr>\n", - " <th>5</th>\n", - " <td>El Salvador</td>\n", - " <td>9</td>\n", - " </tr>\n", - " <tr>\n", - " <th>17</th>\n", - " <td>Botswana</td>\n", + " <th>13</th>\n", + " <td>Benin</td>\n", " <td>8</td>\n", " </tr>\n", " <tr>\n", - " <th>4</th>\n", - " <td>Bhutan</td>\n", + " <th>8</th>\n", + " <td>Senegal</td>\n", " <td>7</td>\n", " </tr>\n", " <tr>\n", - " <th>1</th>\n", - " <td>Liberia</td>\n", + " <th>9</th>\n", + " <td>French Guiana</td>\n", " <td>7</td>\n", " </tr>\n", " <tr>\n", - " <th>16</th>\n", - " <td>Bolivia</td>\n", - " <td>6</td>\n", - " </tr>\n", - " <tr>\n", - " <th>10</th>\n", - " <td>Senegal</td>\n", - " <td>6</td>\n", + " <th>4</th>\n", + " <td>El Salvador</td>\n", + " <td>5</td>\n", " </tr>\n", " <tr>\n", " <th>11</th>\n", - " <td>Zimbabwe</td>\n", + " <td>Mozambique</td>\n", + " <td>5</td>\n", + " </tr>\n", + " <tr>\n", + " <th>7</th>\n", + " <td>Uganda</td>\n", + " <td>4</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>Bhutan</td>\n", " <td>3</td>\n", " </tr>\n", " <tr>\n", - " <th>14</th>\n", - " <td>Switzerland</td>\n", - " <td>3</td>\n", - " </tr>\n", - " <tr>\n", - " <th>15</th>\n", - " <td>Mozambique</td>\n", - " <td>3</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2</th>\n", - " <td>Belize</td>\n", + " <th>3</th>\n", + " <td>Liberia</td>\n", " <td>2</td>\n", " </tr>\n", - " <tr>\n", - " <th>7</th>\n", - " <td>Indonesia</td>\n", - " <td>2</td>\n", - " </tr>\n", - " <tr>\n", - " <th>8</th>\n", - " <td>Guatemala</td>\n", - " <td>2</td>\n", - " </tr>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>Brazil</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>13</th>\n", - " <td>Laos</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>9</th>\n", - " <td>Malta</td>\n", - " <td>1</td>\n", - " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " index 0\n", - "3 Chad 10\n", - "6 Gambia 10\n", - "12 French Guiana 10\n", - "18 Benin 9\n", - "5 El Salvador 9\n", - "17 Botswana 8\n", - "4 Bhutan 7\n", - "1 Liberia 7\n", - "16 Bolivia 6\n", - "10 Senegal 6\n", - "11 Zimbabwe 3\n", - "14 Switzerland 3\n", - "15 Mozambique 3\n", - "2 Belize 2\n", - "7 Indonesia 2\n", - "8 Guatemala 2\n", - "0 Brazil 1\n", - "13 Laos 1\n", - "9 Malta 1" + "0 Pakistan 10\n", + "2 Chad 10\n", + "5 Gambia 10\n", + "10 Ivory Coast 10\n", + "12 Botswana 10\n", + "6 Nepal 9\n", + "13 Benin 8\n", + "8 Senegal 7\n", + "9 French Guiana 7\n", + "4 El Salvador 5\n", + "11 Mozambique 5\n", + "7 Uganda 4\n", + "1 Bhutan 3\n", + "3 Liberia 2" ] }, - "execution_count": 12, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } @@ -6051,14 +6032,14 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 30, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "0.51 0.0830662386292\n" + "0.67 0.0640312423743\n" ] } ], @@ -6077,16 +6058,16 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "array([ 0.6, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.4, 0.7, 0.4])" + "array([ 0.6, 0.7, 0.7, 0.6, 0.6, 0.7, 0.8, 0.6, 0.7, 0.7])" ] }, - "execution_count": 15, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" }