Mercurial > hg > plosone_underreview
comparison notebooks/sensitivity_experiment.ipynb @ 73:92a5e280946d branch-tests
?
author | Maria Panteli |
---|---|
date | Fri, 22 Sep 2017 20:36:30 +0100 |
parents | 930c35ab894c |
children | 4395037087b6 |
comparison
equal
deleted
inserted
replaced
72:930c35ab894c | 73:92a5e280946d |
---|---|
1 { | 1 { |
2 "cells": [ | 2 "cells": [ |
3 { | 3 { |
4 "cell_type": "code", | 4 "cell_type": "code", |
5 "execution_count": 1, | 5 "execution_count": 1, |
6 "metadata": {}, | 6 "metadata": { |
7 "collapsed": false | |
8 }, | |
7 "outputs": [ | 9 "outputs": [ |
8 { | 10 { |
9 "name": "stderr", | 11 "name": "stderr", |
10 "output_type": "stream", | 12 "output_type": "stream", |
11 "text": [ | 13 "text": [ |
45 ] | 47 ] |
46 }, | 48 }, |
47 { | 49 { |
48 "cell_type": "code", | 50 "cell_type": "code", |
49 "execution_count": 5, | 51 "execution_count": 5, |
50 "metadata": {}, | 52 "metadata": { |
53 "collapsed": false | |
54 }, | |
51 "outputs": [ | 55 "outputs": [ |
52 { | 56 { |
53 "data": { | 57 "data": { |
54 "text/plain": [ | 58 "text/plain": [ |
55 "(8396, 108)" | 59 "(8396, 108)" |
66 ] | 70 ] |
67 }, | 71 }, |
68 { | 72 { |
69 "cell_type": "code", | 73 "cell_type": "code", |
70 "execution_count": 48, | 74 "execution_count": 48, |
71 "metadata": {}, | 75 "metadata": { |
76 "collapsed": false | |
77 }, | |
72 "outputs": [ | 78 "outputs": [ |
73 { | 79 { |
74 "name": "stdout", | 80 "name": "stdout", |
75 "output_type": "stream", | 81 "output_type": "stream", |
76 "text": [ | 82 "text": [ |
276 ] | 282 ] |
277 }, | 283 }, |
278 { | 284 { |
279 "cell_type": "code", | 285 "cell_type": "code", |
280 "execution_count": 52, | 286 "execution_count": 52, |
281 "metadata": {}, | 287 "metadata": { |
288 "collapsed": false | |
289 }, | |
282 "outputs": [ | 290 "outputs": [ |
283 { | 291 { |
284 "name": "stdout", | 292 "name": "stdout", |
285 "output_type": "stream", | 293 "output_type": "stream", |
286 "text": [ | 294 "text": [ |
445 ] | 453 ] |
446 }, | 454 }, |
447 { | 455 { |
448 "cell_type": "code", | 456 "cell_type": "code", |
449 "execution_count": 56, | 457 "execution_count": 56, |
450 "metadata": {}, | 458 "metadata": { |
459 "collapsed": false | |
460 }, | |
451 "outputs": [ | 461 "outputs": [ |
452 { | 462 { |
453 "data": { | 463 "data": { |
454 "text/plain": [ | 464 "text/plain": [ |
455 "array([ 59, 1, 1, 1, 1, 733, 733, 733, 733, 733, 733, 733, 733,\n", | 465 "array([ 59, 1, 1, 1, 1, 733, 733, 733, 733, 733, 733, 733, 733,\n", |
468 ] | 478 ] |
469 }, | 479 }, |
470 { | 480 { |
471 "cell_type": "code", | 481 "cell_type": "code", |
472 "execution_count": 8, | 482 "execution_count": 8, |
473 "metadata": {}, | 483 "metadata": { |
484 "collapsed": false | |
485 }, | |
474 "outputs": [ | 486 "outputs": [ |
475 { | 487 { |
476 "data": { | 488 "data": { |
477 "text/html": [ | 489 "text/html": [ |
478 "<div>\n", | 490 "<div>\n", |
727 ] | 739 ] |
728 }, | 740 }, |
729 { | 741 { |
730 "cell_type": "code", | 742 "cell_type": "code", |
731 "execution_count": 47, | 743 "execution_count": 47, |
732 "metadata": {}, | 744 "metadata": { |
745 "collapsed": false | |
746 }, | |
733 "outputs": [ | 747 "outputs": [ |
734 { | 748 { |
735 "name": "stdout", | 749 "name": "stdout", |
736 "output_type": "stream", | 750 "output_type": "stream", |
737 "text": [ | 751 "text": [ |
826 ] | 840 ] |
827 }, | 841 }, |
828 { | 842 { |
829 "cell_type": "code", | 843 "cell_type": "code", |
830 "execution_count": 59, | 844 "execution_count": 59, |
831 "metadata": {}, | 845 "metadata": { |
846 "collapsed": false | |
847 }, | |
832 "outputs": [ | 848 "outputs": [ |
833 { | 849 { |
834 "name": "stdout", | 850 "name": "stdout", |
835 "output_type": "stream", | 851 "output_type": "stream", |
836 "text": [ | 852 "text": [ |
4573 ] | 4589 ] |
4574 }, | 4590 }, |
4575 { | 4591 { |
4576 "cell_type": "code", | 4592 "cell_type": "code", |
4577 "execution_count": 21, | 4593 "execution_count": 21, |
4578 "metadata": {}, | 4594 "metadata": { |
4595 "collapsed": false | |
4596 }, | |
4579 "outputs": [ | 4597 "outputs": [ |
4580 { | 4598 { |
4581 "name": "stdout", | 4599 "name": "stdout", |
4582 "output_type": "stream", | 4600 "output_type": "stream", |
4583 "text": [ | 4601 "text": [ |
4789 "13 Germany 0.040000 100 4\n", | 4807 "13 Germany 0.040000 100 4\n", |
4790 "31 Afghanistan 0.041667 24 1\n", | 4808 "31 Afghanistan 0.041667 24 1\n", |
4791 "105 Sudan 0.045455 66 3\n", | 4809 "105 Sudan 0.045455 66 3\n", |
4792 "120 Kazakhstan 0.045455 88 4\n", | 4810 "120 Kazakhstan 0.045455 88 4\n", |
4793 "writing file\n", | 4811 "writing file\n", |
4794 "iteration 7\n" | 4812 "iteration 7\n", |
4795 ] | |
4796 }, | |
4797 { | |
4798 "name": "stdout", | |
4799 "output_type": "stream", | |
4800 "text": [ | |
4801 "classifying...\n", | 4813 "classifying...\n", |
4802 "/import/c4dm-04/mariap/train_data_melodia_8_7.pickle\n", | 4814 "/import/c4dm-04/mariap/train_data_melodia_8_7.pickle\n", |
4803 "0.179777654473\n", | 4815 "0.179777654473\n", |
4804 "detecting outliers...\n", | 4816 "detecting outliers...\n", |
4805 "most outliers \n", | 4817 "most outliers \n", |
4929 ] | 4941 ] |
4930 }, | 4942 }, |
4931 { | 4943 { |
4932 "cell_type": "code", | 4944 "cell_type": "code", |
4933 "execution_count": 52, | 4945 "execution_count": 52, |
4934 "metadata": {}, | 4946 "metadata": { |
4947 "collapsed": false | |
4948 }, | |
4935 "outputs": [ | 4949 "outputs": [ |
4936 { | 4950 { |
4937 "name": "stdout", | 4951 "name": "stdout", |
4938 "output_type": "stream", | 4952 "output_type": "stream", |
4939 "text": [ | 4953 "text": [ |
5131 "13 Germany 0.040000 100 4\n", | 5145 "13 Germany 0.040000 100 4\n", |
5132 "31 Afghanistan 0.041667 24 1\n", | 5146 "31 Afghanistan 0.041667 24 1\n", |
5133 "105 Sudan 0.045455 66 3\n", | 5147 "105 Sudan 0.045455 66 3\n", |
5134 "120 Kazakhstan 0.045455 88 4\n", | 5148 "120 Kazakhstan 0.045455 88 4\n", |
5135 "writing file\n", | 5149 "writing file\n", |
5136 "iteration 7\n" | 5150 "iteration 7\n", |
5137 ] | |
5138 }, | |
5139 { | |
5140 "name": "stdout", | |
5141 "output_type": "stream", | |
5142 "text": [ | |
5143 "(8048, 381) (8048,)\n", | 5151 "(8048, 381) (8048,)\n", |
5144 "detecting outliers...\n", | 5152 "detecting outliers...\n", |
5145 "most outliers \n", | 5153 "most outliers \n", |
5146 " Country Outliers N_Country N_Outliers\n", | 5154 " Country Outliers N_Country N_Outliers\n", |
5147 "136 Botswana 0.636364 88 56\n", | 5155 "136 Botswana 0.636364 88 56\n", |
5261 ] | 5269 ] |
5262 }, | 5270 }, |
5263 { | 5271 { |
5264 "cell_type": "code", | 5272 "cell_type": "code", |
5265 "execution_count": 67, | 5273 "execution_count": 67, |
5266 "metadata": {}, | 5274 "metadata": { |
5275 "collapsed": false | |
5276 }, | |
5267 "outputs": [ | 5277 "outputs": [ |
5268 { | 5278 { |
5269 "name": "stdout", | 5279 "name": "stdout", |
5270 "output_type": "stream", | 5280 "output_type": "stream", |
5271 "text": [ | 5281 "text": [ |
5442 "109 Democratic Republic of the Congo 0.052632 38 2\n", | 5452 "109 Democratic Republic of the Congo 0.052632 38 2\n", |
5443 "51 Finland 0.052632 19 1\n", | 5453 "51 Finland 0.052632 19 1\n", |
5444 "105 Sudan 0.055556 54 3\n", | 5454 "105 Sudan 0.055556 54 3\n", |
5445 "writing file\n", | 5455 "writing file\n", |
5446 "iteration 6\n", | 5456 "iteration 6\n", |
5447 "/import/c4dm-04/mariap/lda_data_melodia_8.pickle\n" | 5457 "/import/c4dm-04/mariap/lda_data_melodia_8.pickle\n", |
5448 ] | |
5449 }, | |
5450 { | |
5451 "name": "stdout", | |
5452 "output_type": "stream", | |
5453 "text": [ | |
5454 "(6560, 380) (6560,)\n", | 5458 "(6560, 380) (6560,)\n", |
5455 "detecting outliers...\n", | 5459 "detecting outliers...\n", |
5456 "most outliers \n", | 5460 "most outliers \n", |
5457 " Country Outliers N_Country N_Outliers\n", | 5461 " Country Outliers N_Country N_Outliers\n", |
5458 "60 Chad 0.666667 9 6\n", | 5462 "60 Chad 0.666667 9 6\n", |
5625 ] | 5629 ] |
5626 }, | 5630 }, |
5627 { | 5631 { |
5628 "cell_type": "code", | 5632 "cell_type": "code", |
5629 "execution_count": 69, | 5633 "execution_count": 69, |
5630 "metadata": {}, | 5634 "metadata": { |
5635 "collapsed": false | |
5636 }, | |
5631 "outputs": [ | 5637 "outputs": [ |
5632 { | 5638 { |
5633 "data": { | 5639 "data": { |
5634 "text/plain": [ | 5640 "text/plain": [ |
5635 "(137, 10)" | 5641 "(137, 10)" |
5652 ] | 5658 ] |
5653 }, | 5659 }, |
5654 { | 5660 { |
5655 "cell_type": "code", | 5661 "cell_type": "code", |
5656 "execution_count": 70, | 5662 "execution_count": 70, |
5657 "metadata": {}, | 5663 "metadata": { |
5664 "collapsed": false | |
5665 }, | |
5658 "outputs": [ | 5666 "outputs": [ |
5659 { | 5667 { |
5660 "name": "stdout", | 5668 "name": "stdout", |
5661 "output_type": "stream", | 5669 "output_type": "stream", |
5662 "text": [ | 5670 "text": [ |
5707 ] | 5715 ] |
5708 }, | 5716 }, |
5709 { | 5717 { |
5710 "cell_type": "code", | 5718 "cell_type": "code", |
5711 "execution_count": 71, | 5719 "execution_count": 71, |
5712 "metadata": {}, | 5720 "metadata": { |
5721 "collapsed": true | |
5722 }, | |
5713 "outputs": [], | 5723 "outputs": [], |
5714 "source": [ | 5724 "source": [ |
5715 "from scipy.stats import kendalltau\n", | 5725 "from scipy.stats import kendalltau\n", |
5716 "r_, p_ = [], []\n", | 5726 "r_, p_ = [], []\n", |
5717 "ranked_countries_arr = ranked_countries.get_values()\n", | 5727 "ranked_countries_arr = ranked_countries.get_values()\n", |
5725 ] | 5735 ] |
5726 }, | 5736 }, |
5727 { | 5737 { |
5728 "cell_type": "code", | 5738 "cell_type": "code", |
5729 "execution_count": 72, | 5739 "execution_count": 72, |
5730 "metadata": {}, | 5740 "metadata": { |
5741 "collapsed": false | |
5742 }, | |
5731 "outputs": [ | 5743 "outputs": [ |
5732 { | 5744 { |
5733 "name": "stdout", | 5745 "name": "stdout", |
5734 "output_type": "stream", | 5746 "output_type": "stream", |
5735 "text": [ | 5747 "text": [ |
5742 ] | 5754 ] |
5743 }, | 5755 }, |
5744 { | 5756 { |
5745 "cell_type": "code", | 5757 "cell_type": "code", |
5746 "execution_count": 80, | 5758 "execution_count": 80, |
5747 "metadata": {}, | 5759 "metadata": { |
5760 "collapsed": false | |
5761 }, | |
5748 "outputs": [ | 5762 "outputs": [ |
5749 { | 5763 { |
5750 "name": "stdout", | 5764 "name": "stdout", |
5751 "output_type": "stream", | 5765 "output_type": "stream", |
5752 "text": [ | 5766 "text": [ |
5771 ] | 5785 ] |
5772 }, | 5786 }, |
5773 { | 5787 { |
5774 "cell_type": "code", | 5788 "cell_type": "code", |
5775 "execution_count": 81, | 5789 "execution_count": 81, |
5776 "metadata": {}, | 5790 "metadata": { |
5791 "collapsed": false | |
5792 }, | |
5777 "outputs": [ | 5793 "outputs": [ |
5778 { | 5794 { |
5779 "name": "stdout", | 5795 "name": "stdout", |
5780 "output_type": "stream", | 5796 "output_type": "stream", |
5781 "text": [ | 5797 "text": [ |
5806 ] | 5822 ] |
5807 }, | 5823 }, |
5808 { | 5824 { |
5809 "cell_type": "code", | 5825 "cell_type": "code", |
5810 "execution_count": 76, | 5826 "execution_count": 76, |
5811 "metadata": {}, | 5827 "metadata": { |
5828 "collapsed": false | |
5829 }, | |
5812 "outputs": [ | 5830 "outputs": [ |
5813 { | 5831 { |
5814 "data": { | 5832 "data": { |
5815 "text/plain": [ | 5833 "text/plain": [ |
5816 "{'Chad', 'French Guiana', 'Gambia'}" | 5834 "{'Chad', 'French Guiana', 'Gambia'}" |
5826 ] | 5844 ] |
5827 }, | 5845 }, |
5828 { | 5846 { |
5829 "cell_type": "code", | 5847 "cell_type": "code", |
5830 "execution_count": 97, | 5848 "execution_count": 97, |
5831 "metadata": {}, | 5849 "metadata": { |
5850 "collapsed": true | |
5851 }, | |
5832 "outputs": [], | 5852 "outputs": [], |
5833 "source": [ | 5853 "source": [ |
5834 "# majority voting + precision at K (top5?)\n", | 5854 "# majority voting + precision at K (top5?)\n", |
5835 "from collections import Counter\n", | 5855 "from collections import Counter\n", |
5836 "K_vote = 10\n", | 5856 "K_vote = 10\n", |
5838 ] | 5858 ] |
5839 }, | 5859 }, |
5840 { | 5860 { |
5841 "cell_type": "code", | 5861 "cell_type": "code", |
5842 "execution_count": 98, | 5862 "execution_count": 98, |
5843 "metadata": {}, | 5863 "metadata": { |
5864 "collapsed": false | |
5865 }, | |
5844 "outputs": [ | 5866 "outputs": [ |
5845 { | 5867 { |
5846 "data": { | 5868 "data": { |
5847 "text/html": [ | 5869 "text/html": [ |
5848 "<div>\n", | 5870 "<div>\n", |
5904 ] | 5926 ] |
5905 }, | 5927 }, |
5906 { | 5928 { |
5907 "cell_type": "code", | 5929 "cell_type": "code", |
5908 "execution_count": 99, | 5930 "execution_count": 99, |
5909 "metadata": {}, | 5931 "metadata": { |
5932 "collapsed": false | |
5933 }, | |
5910 "outputs": [ | 5934 "outputs": [ |
5911 { | 5935 { |
5912 "data": { | 5936 "data": { |
5913 "text/html": [ | 5937 "text/html": [ |
5914 "<div>\n", | 5938 "<div>\n", |
6053 ] | 6077 ] |
6054 }, | 6078 }, |
6055 { | 6079 { |
6056 "cell_type": "code", | 6080 "cell_type": "code", |
6057 "execution_count": 102, | 6081 "execution_count": 102, |
6058 "metadata": {}, | 6082 "metadata": { |
6083 "collapsed": false | |
6084 }, | |
6059 "outputs": [ | 6085 "outputs": [ |
6060 { | 6086 { |
6061 "data": { | 6087 "data": { |
6062 "text/plain": [ | 6088 "text/plain": [ |
6063 "0.51000000000000001" | 6089 "0.51000000000000001" |
6105 "file_extension": ".py", | 6131 "file_extension": ".py", |
6106 "mimetype": "text/x-python", | 6132 "mimetype": "text/x-python", |
6107 "name": "python", | 6133 "name": "python", |
6108 "nbconvert_exporter": "python", | 6134 "nbconvert_exporter": "python", |
6109 "pygments_lexer": "ipython2", | 6135 "pygments_lexer": "ipython2", |
6110 "version": "2.7.12" | 6136 "version": "2.7.11" |
6111 } | 6137 } |
6112 }, | 6138 }, |
6113 "nbformat": 4, | 6139 "nbformat": 4, |
6114 "nbformat_minor": 1 | 6140 "nbformat_minor": 1 |
6115 } | 6141 } |