comparison notebooks/sensitivity_experiment.ipynb @ 73:92a5e280946d branch-tests

?
author Maria Panteli
date Fri, 22 Sep 2017 20:36:30 +0100
parents 930c35ab894c
children 4395037087b6
comparison
equal deleted inserted replaced
72:930c35ab894c 73:92a5e280946d
1 { 1 {
2 "cells": [ 2 "cells": [
3 { 3 {
4 "cell_type": "code", 4 "cell_type": "code",
5 "execution_count": 1, 5 "execution_count": 1,
6 "metadata": {}, 6 "metadata": {
7 "collapsed": false
8 },
7 "outputs": [ 9 "outputs": [
8 { 10 {
9 "name": "stderr", 11 "name": "stderr",
10 "output_type": "stream", 12 "output_type": "stream",
11 "text": [ 13 "text": [
45 ] 47 ]
46 }, 48 },
47 { 49 {
48 "cell_type": "code", 50 "cell_type": "code",
49 "execution_count": 5, 51 "execution_count": 5,
50 "metadata": {}, 52 "metadata": {
53 "collapsed": false
54 },
51 "outputs": [ 55 "outputs": [
52 { 56 {
53 "data": { 57 "data": {
54 "text/plain": [ 58 "text/plain": [
55 "(8396, 108)" 59 "(8396, 108)"
66 ] 70 ]
67 }, 71 },
68 { 72 {
69 "cell_type": "code", 73 "cell_type": "code",
70 "execution_count": 48, 74 "execution_count": 48,
71 "metadata": {}, 75 "metadata": {
76 "collapsed": false
77 },
72 "outputs": [ 78 "outputs": [
73 { 79 {
74 "name": "stdout", 80 "name": "stdout",
75 "output_type": "stream", 81 "output_type": "stream",
76 "text": [ 82 "text": [
276 ] 282 ]
277 }, 283 },
278 { 284 {
279 "cell_type": "code", 285 "cell_type": "code",
280 "execution_count": 52, 286 "execution_count": 52,
281 "metadata": {}, 287 "metadata": {
288 "collapsed": false
289 },
282 "outputs": [ 290 "outputs": [
283 { 291 {
284 "name": "stdout", 292 "name": "stdout",
285 "output_type": "stream", 293 "output_type": "stream",
286 "text": [ 294 "text": [
445 ] 453 ]
446 }, 454 },
447 { 455 {
448 "cell_type": "code", 456 "cell_type": "code",
449 "execution_count": 56, 457 "execution_count": 56,
450 "metadata": {}, 458 "metadata": {
459 "collapsed": false
460 },
451 "outputs": [ 461 "outputs": [
452 { 462 {
453 "data": { 463 "data": {
454 "text/plain": [ 464 "text/plain": [
455 "array([ 59, 1, 1, 1, 1, 733, 733, 733, 733, 733, 733, 733, 733,\n", 465 "array([ 59, 1, 1, 1, 1, 733, 733, 733, 733, 733, 733, 733, 733,\n",
468 ] 478 ]
469 }, 479 },
470 { 480 {
471 "cell_type": "code", 481 "cell_type": "code",
472 "execution_count": 8, 482 "execution_count": 8,
473 "metadata": {}, 483 "metadata": {
484 "collapsed": false
485 },
474 "outputs": [ 486 "outputs": [
475 { 487 {
476 "data": { 488 "data": {
477 "text/html": [ 489 "text/html": [
478 "<div>\n", 490 "<div>\n",
727 ] 739 ]
728 }, 740 },
729 { 741 {
730 "cell_type": "code", 742 "cell_type": "code",
731 "execution_count": 47, 743 "execution_count": 47,
732 "metadata": {}, 744 "metadata": {
745 "collapsed": false
746 },
733 "outputs": [ 747 "outputs": [
734 { 748 {
735 "name": "stdout", 749 "name": "stdout",
736 "output_type": "stream", 750 "output_type": "stream",
737 "text": [ 751 "text": [
826 ] 840 ]
827 }, 841 },
828 { 842 {
829 "cell_type": "code", 843 "cell_type": "code",
830 "execution_count": 59, 844 "execution_count": 59,
831 "metadata": {}, 845 "metadata": {
846 "collapsed": false
847 },
832 "outputs": [ 848 "outputs": [
833 { 849 {
834 "name": "stdout", 850 "name": "stdout",
835 "output_type": "stream", 851 "output_type": "stream",
836 "text": [ 852 "text": [
4573 ] 4589 ]
4574 }, 4590 },
4575 { 4591 {
4576 "cell_type": "code", 4592 "cell_type": "code",
4577 "execution_count": 21, 4593 "execution_count": 21,
4578 "metadata": {}, 4594 "metadata": {
4595 "collapsed": false
4596 },
4579 "outputs": [ 4597 "outputs": [
4580 { 4598 {
4581 "name": "stdout", 4599 "name": "stdout",
4582 "output_type": "stream", 4600 "output_type": "stream",
4583 "text": [ 4601 "text": [
4789 "13 Germany 0.040000 100 4\n", 4807 "13 Germany 0.040000 100 4\n",
4790 "31 Afghanistan 0.041667 24 1\n", 4808 "31 Afghanistan 0.041667 24 1\n",
4791 "105 Sudan 0.045455 66 3\n", 4809 "105 Sudan 0.045455 66 3\n",
4792 "120 Kazakhstan 0.045455 88 4\n", 4810 "120 Kazakhstan 0.045455 88 4\n",
4793 "writing file\n", 4811 "writing file\n",
4794 "iteration 7\n" 4812 "iteration 7\n",
4795 ]
4796 },
4797 {
4798 "name": "stdout",
4799 "output_type": "stream",
4800 "text": [
4801 "classifying...\n", 4813 "classifying...\n",
4802 "/import/c4dm-04/mariap/train_data_melodia_8_7.pickle\n", 4814 "/import/c4dm-04/mariap/train_data_melodia_8_7.pickle\n",
4803 "0.179777654473\n", 4815 "0.179777654473\n",
4804 "detecting outliers...\n", 4816 "detecting outliers...\n",
4805 "most outliers \n", 4817 "most outliers \n",
4929 ] 4941 ]
4930 }, 4942 },
4931 { 4943 {
4932 "cell_type": "code", 4944 "cell_type": "code",
4933 "execution_count": 52, 4945 "execution_count": 52,
4934 "metadata": {}, 4946 "metadata": {
4947 "collapsed": false
4948 },
4935 "outputs": [ 4949 "outputs": [
4936 { 4950 {
4937 "name": "stdout", 4951 "name": "stdout",
4938 "output_type": "stream", 4952 "output_type": "stream",
4939 "text": [ 4953 "text": [
5131 "13 Germany 0.040000 100 4\n", 5145 "13 Germany 0.040000 100 4\n",
5132 "31 Afghanistan 0.041667 24 1\n", 5146 "31 Afghanistan 0.041667 24 1\n",
5133 "105 Sudan 0.045455 66 3\n", 5147 "105 Sudan 0.045455 66 3\n",
5134 "120 Kazakhstan 0.045455 88 4\n", 5148 "120 Kazakhstan 0.045455 88 4\n",
5135 "writing file\n", 5149 "writing file\n",
5136 "iteration 7\n" 5150 "iteration 7\n",
5137 ]
5138 },
5139 {
5140 "name": "stdout",
5141 "output_type": "stream",
5142 "text": [
5143 "(8048, 381) (8048,)\n", 5151 "(8048, 381) (8048,)\n",
5144 "detecting outliers...\n", 5152 "detecting outliers...\n",
5145 "most outliers \n", 5153 "most outliers \n",
5146 " Country Outliers N_Country N_Outliers\n", 5154 " Country Outliers N_Country N_Outliers\n",
5147 "136 Botswana 0.636364 88 56\n", 5155 "136 Botswana 0.636364 88 56\n",
5261 ] 5269 ]
5262 }, 5270 },
5263 { 5271 {
5264 "cell_type": "code", 5272 "cell_type": "code",
5265 "execution_count": 67, 5273 "execution_count": 67,
5266 "metadata": {}, 5274 "metadata": {
5275 "collapsed": false
5276 },
5267 "outputs": [ 5277 "outputs": [
5268 { 5278 {
5269 "name": "stdout", 5279 "name": "stdout",
5270 "output_type": "stream", 5280 "output_type": "stream",
5271 "text": [ 5281 "text": [
5442 "109 Democratic Republic of the Congo 0.052632 38 2\n", 5452 "109 Democratic Republic of the Congo 0.052632 38 2\n",
5443 "51 Finland 0.052632 19 1\n", 5453 "51 Finland 0.052632 19 1\n",
5444 "105 Sudan 0.055556 54 3\n", 5454 "105 Sudan 0.055556 54 3\n",
5445 "writing file\n", 5455 "writing file\n",
5446 "iteration 6\n", 5456 "iteration 6\n",
5447 "/import/c4dm-04/mariap/lda_data_melodia_8.pickle\n" 5457 "/import/c4dm-04/mariap/lda_data_melodia_8.pickle\n",
5448 ]
5449 },
5450 {
5451 "name": "stdout",
5452 "output_type": "stream",
5453 "text": [
5454 "(6560, 380) (6560,)\n", 5458 "(6560, 380) (6560,)\n",
5455 "detecting outliers...\n", 5459 "detecting outliers...\n",
5456 "most outliers \n", 5460 "most outliers \n",
5457 " Country Outliers N_Country N_Outliers\n", 5461 " Country Outliers N_Country N_Outliers\n",
5458 "60 Chad 0.666667 9 6\n", 5462 "60 Chad 0.666667 9 6\n",
5625 ] 5629 ]
5626 }, 5630 },
5627 { 5631 {
5628 "cell_type": "code", 5632 "cell_type": "code",
5629 "execution_count": 69, 5633 "execution_count": 69,
5630 "metadata": {}, 5634 "metadata": {
5635 "collapsed": false
5636 },
5631 "outputs": [ 5637 "outputs": [
5632 { 5638 {
5633 "data": { 5639 "data": {
5634 "text/plain": [ 5640 "text/plain": [
5635 "(137, 10)" 5641 "(137, 10)"
5652 ] 5658 ]
5653 }, 5659 },
5654 { 5660 {
5655 "cell_type": "code", 5661 "cell_type": "code",
5656 "execution_count": 70, 5662 "execution_count": 70,
5657 "metadata": {}, 5663 "metadata": {
5664 "collapsed": false
5665 },
5658 "outputs": [ 5666 "outputs": [
5659 { 5667 {
5660 "name": "stdout", 5668 "name": "stdout",
5661 "output_type": "stream", 5669 "output_type": "stream",
5662 "text": [ 5670 "text": [
5707 ] 5715 ]
5708 }, 5716 },
5709 { 5717 {
5710 "cell_type": "code", 5718 "cell_type": "code",
5711 "execution_count": 71, 5719 "execution_count": 71,
5712 "metadata": {}, 5720 "metadata": {
5721 "collapsed": true
5722 },
5713 "outputs": [], 5723 "outputs": [],
5714 "source": [ 5724 "source": [
5715 "from scipy.stats import kendalltau\n", 5725 "from scipy.stats import kendalltau\n",
5716 "r_, p_ = [], []\n", 5726 "r_, p_ = [], []\n",
5717 "ranked_countries_arr = ranked_countries.get_values()\n", 5727 "ranked_countries_arr = ranked_countries.get_values()\n",
5725 ] 5735 ]
5726 }, 5736 },
5727 { 5737 {
5728 "cell_type": "code", 5738 "cell_type": "code",
5729 "execution_count": 72, 5739 "execution_count": 72,
5730 "metadata": {}, 5740 "metadata": {
5741 "collapsed": false
5742 },
5731 "outputs": [ 5743 "outputs": [
5732 { 5744 {
5733 "name": "stdout", 5745 "name": "stdout",
5734 "output_type": "stream", 5746 "output_type": "stream",
5735 "text": [ 5747 "text": [
5742 ] 5754 ]
5743 }, 5755 },
5744 { 5756 {
5745 "cell_type": "code", 5757 "cell_type": "code",
5746 "execution_count": 80, 5758 "execution_count": 80,
5747 "metadata": {}, 5759 "metadata": {
5760 "collapsed": false
5761 },
5748 "outputs": [ 5762 "outputs": [
5749 { 5763 {
5750 "name": "stdout", 5764 "name": "stdout",
5751 "output_type": "stream", 5765 "output_type": "stream",
5752 "text": [ 5766 "text": [
5771 ] 5785 ]
5772 }, 5786 },
5773 { 5787 {
5774 "cell_type": "code", 5788 "cell_type": "code",
5775 "execution_count": 81, 5789 "execution_count": 81,
5776 "metadata": {}, 5790 "metadata": {
5791 "collapsed": false
5792 },
5777 "outputs": [ 5793 "outputs": [
5778 { 5794 {
5779 "name": "stdout", 5795 "name": "stdout",
5780 "output_type": "stream", 5796 "output_type": "stream",
5781 "text": [ 5797 "text": [
5806 ] 5822 ]
5807 }, 5823 },
5808 { 5824 {
5809 "cell_type": "code", 5825 "cell_type": "code",
5810 "execution_count": 76, 5826 "execution_count": 76,
5811 "metadata": {}, 5827 "metadata": {
5828 "collapsed": false
5829 },
5812 "outputs": [ 5830 "outputs": [
5813 { 5831 {
5814 "data": { 5832 "data": {
5815 "text/plain": [ 5833 "text/plain": [
5816 "{'Chad', 'French Guiana', 'Gambia'}" 5834 "{'Chad', 'French Guiana', 'Gambia'}"
5826 ] 5844 ]
5827 }, 5845 },
5828 { 5846 {
5829 "cell_type": "code", 5847 "cell_type": "code",
5830 "execution_count": 97, 5848 "execution_count": 97,
5831 "metadata": {}, 5849 "metadata": {
5850 "collapsed": true
5851 },
5832 "outputs": [], 5852 "outputs": [],
5833 "source": [ 5853 "source": [
5834 "# majority voting + precision at K (top5?)\n", 5854 "# majority voting + precision at K (top5?)\n",
5835 "from collections import Counter\n", 5855 "from collections import Counter\n",
5836 "K_vote = 10\n", 5856 "K_vote = 10\n",
5838 ] 5858 ]
5839 }, 5859 },
5840 { 5860 {
5841 "cell_type": "code", 5861 "cell_type": "code",
5842 "execution_count": 98, 5862 "execution_count": 98,
5843 "metadata": {}, 5863 "metadata": {
5864 "collapsed": false
5865 },
5844 "outputs": [ 5866 "outputs": [
5845 { 5867 {
5846 "data": { 5868 "data": {
5847 "text/html": [ 5869 "text/html": [
5848 "<div>\n", 5870 "<div>\n",
5904 ] 5926 ]
5905 }, 5927 },
5906 { 5928 {
5907 "cell_type": "code", 5929 "cell_type": "code",
5908 "execution_count": 99, 5930 "execution_count": 99,
5909 "metadata": {}, 5931 "metadata": {
5932 "collapsed": false
5933 },
5910 "outputs": [ 5934 "outputs": [
5911 { 5935 {
5912 "data": { 5936 "data": {
5913 "text/html": [ 5937 "text/html": [
5914 "<div>\n", 5938 "<div>\n",
6053 ] 6077 ]
6054 }, 6078 },
6055 { 6079 {
6056 "cell_type": "code", 6080 "cell_type": "code",
6057 "execution_count": 102, 6081 "execution_count": 102,
6058 "metadata": {}, 6082 "metadata": {
6083 "collapsed": false
6084 },
6059 "outputs": [ 6085 "outputs": [
6060 { 6086 {
6061 "data": { 6087 "data": {
6062 "text/plain": [ 6088 "text/plain": [
6063 "0.51000000000000001" 6089 "0.51000000000000001"
6105 "file_extension": ".py", 6131 "file_extension": ".py",
6106 "mimetype": "text/x-python", 6132 "mimetype": "text/x-python",
6107 "name": "python", 6133 "name": "python",
6108 "nbconvert_exporter": "python", 6134 "nbconvert_exporter": "python",
6109 "pygments_lexer": "ipython2", 6135 "pygments_lexer": "ipython2",
6110 "version": "2.7.12" 6136 "version": "2.7.11"
6111 } 6137 }
6112 }, 6138 },
6113 "nbformat": 4, 6139 "nbformat": 4,
6114 "nbformat_minor": 1 6140 "nbformat_minor": 1
6115 } 6141 }