Mercurial > hg > plosone_underreview
comparison notebooks/test_hubness.ipynb @ 54:dbcd5b2a4efa branch-tests
additions in notebooks
author | Maria Panteli <m.x.panteli@gmail.com> |
---|---|
date | Tue, 19 Sep 2017 18:41:14 +0100 |
parents | 90f8a2ea6f6f |
children | 98cd5317e504 b2c38538f127 |
comparison
equal
deleted
inserted
replaced
53:7532363b9dda | 54:dbcd5b2a4efa |
---|---|
1 { | 1 { |
2 "cells": [ | 2 "cells": [ |
3 { | 3 { |
4 "cell_type": "code", | 4 "cell_type": "code", |
5 "execution_count": 1, | 5 "execution_count": 2, |
6 "metadata": { | 6 "metadata": { |
7 "collapsed": true | 7 "collapsed": true |
8 }, | 8 }, |
9 "outputs": [], | 9 "outputs": [], |
10 "source": [ | 10 "source": [ |
25 "import scripts.utils_spatial as utils_spatial" | 25 "import scripts.utils_spatial as utils_spatial" |
26 ] | 26 ] |
27 }, | 27 }, |
28 { | 28 { |
29 "cell_type": "code", | 29 "cell_type": "code", |
30 "execution_count": 5, | 30 "execution_count": 3, |
31 "metadata": {}, | 31 "metadata": { |
32 "collapsed": true | |
33 }, | |
32 "outputs": [], | 34 "outputs": [], |
33 "source": [ | 35 "source": [ |
34 "X_list, Y, Yaudio = pickle.load(open('../data/lda_data_melodia_8.pickle','rb'))\n", | 36 "X_list, Y, Yaudio = pickle.load(open('../data/lda_data_melodia_8.pickle','rb'))\n", |
35 "#ddf = outliers.load_metadata(Yaudio, metadata_file='../data/metadata.csv')\n", | 37 "#ddf = outliers.load_metadata(Yaudio, metadata_file='../data/metadata.csv')\n", |
36 "#w, data_countries = utils_spatial.get_neighbors_for_countries_in_dataset(Y)\n", | 38 "#w, data_countries = utils_spatial.get_neighbors_for_countries_in_dataset(Y)\n", |
42 ] | 44 ] |
43 }, | 45 }, |
44 { | 46 { |
45 "cell_type": "code", | 47 "cell_type": "code", |
46 "execution_count": 3, | 48 "execution_count": 3, |
49 "metadata": { | |
50 "collapsed": false | |
51 }, | |
52 "outputs": [ | |
53 { | |
54 "data": { | |
55 "text/plain": [ | |
56 "(8200, 380)" | |
57 ] | |
58 }, | |
59 "execution_count": 3, | |
60 "metadata": {}, | |
61 "output_type": "execute_result" | |
62 } | |
63 ], | |
64 "source": [ | |
65 "X.shape" | |
66 ] | |
67 }, | |
68 { | |
69 "cell_type": "markdown", | |
47 "metadata": {}, | 70 "metadata": {}, |
48 "outputs": [ | 71 "source": [ |
49 { | 72 "## distance matrix" |
50 "data": { | |
51 "text/plain": [ | |
52 "(8200, 380)" | |
53 ] | |
54 }, | |
55 "execution_count": 3, | |
56 "metadata": {}, | |
57 "output_type": "execute_result" | |
58 } | |
59 ], | |
60 "source": [ | |
61 "X.shape" | |
62 ] | 73 ] |
63 }, | 74 }, |
64 { | 75 { |
65 "cell_type": "code", | 76 "cell_type": "code", |
66 "execution_count": 4, | 77 "execution_count": 4, |
73 ] | 84 ] |
74 }, | 85 }, |
75 { | 86 { |
76 "cell_type": "code", | 87 "cell_type": "code", |
77 "execution_count": 5, | 88 "execution_count": 5, |
89 "metadata": { | |
90 "collapsed": false | |
91 }, | |
92 "outputs": [ | |
93 { | |
94 "data": { | |
95 "text/plain": [ | |
96 "(8200, 8200)" | |
97 ] | |
98 }, | |
99 "execution_count": 5, | |
100 "metadata": {}, | |
101 "output_type": "execute_result" | |
102 } | |
103 ], | |
104 "source": [ | |
105 "np.savetxt('../data/D_mahal.csv', D)\n", | |
106 "D = np.loadtxt('../data/D_mahal.csv')\n", | |
107 "D.shape" | |
108 ] | |
109 }, | |
110 { | |
111 "cell_type": "code", | |
112 "execution_count": 6, | |
113 "metadata": { | |
114 "collapsed": false | |
115 }, | |
116 "outputs": [ | |
117 { | |
118 "data": { | |
119 "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZUAAAEACAYAAAB78OvLAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAGWtJREFUeJzt3W2sXdV95/HvDwyUPIwd05F5sCFWa6Q4StrADM50WvUy\nJGCiCniRgKsJeBKrL0pnYPqiE8xIYKtRO54KUaoRRGrzYFCDQdASoqBgB7iaSiNwEiClIYzNqJ7B\nl9pEBpO0o05t8Z8XZ128ubm2r303vvccfz/SkddZe6919jpczu/svfbeJ1WFJEl9OGWuN0CSNDoM\nFUlSbwwVSVJvDBVJUm8MFUlSbwwVSVJvZhQqSRYleSjJj5K8mGRVksVJtiXZkWRrkkWd9dcn2Znk\npSSXd+ovTvJCW3ZXp/6MJA+0+qeTXNBZtra9xo4kN3Tqlyd5prXZkuS02b8dkqTZmOmeyl3AY1X1\nIeCjwEvALcC2qroQeKI9J8lK4DpgJbAauDtJWj/3AOuqagWwIsnqVr8O2Nfq7wQ2tb4WA7cBl7TH\n7UkWtjabgDtamzdaH5KkOXTUUGkf4r9WVV8BqKqDVfUmcBWwua22Gbimla8G7q+qA1W1C3gZWJXk\nHOD9VbW9rXdvp023r4eBy1r5CmBrVe2vqv3ANuDKFlKXAg9N8/qSpDkykz2V5cCPk3w1ybNJ/jTJ\ne4ElVbW3rbMXWNLK5wK7O+13A+dNUz/R6mn/vgKD0ALeTHLWEfpaDOyvqrem6UuSNEdmEioLgIuA\nu6vqIuAfaIe6JtXgXi8n6n4v3ldGkuapBTNYZzewu6q+254/BKwH9iQ5u6r2tENbr7XlE8CyTvul\nrY+JVp5aP9nmfODVJAuAhVW1L8kEMNZpswx4EngdWJTklLa3srT18Q5JDCBJOkZVlaOvNb2j7qlU\n1R7glSQXtqpPAD8EvgmsbXVrgUda+VFgTZLTkywHVgDbWz8/aWeOBbge+EanzWRfn2Yw8Q+wFbi8\nnX32AeCTwONtz+gp4DPTvP7U7R/Jx+233z7n2+D4HJ/jG73HbM1kTwXgPwB/nuR04H8BnwNOBR5M\nsg7YBVzbPsRfTPIg8CJwELixDm3pjcDXgDMZnE327Vb/ZeC+JDuBfcCa1tfrSX4fmNxL2liDCXuA\nLwBbknwReLb1IUmaQzMKlar6AfAvp1n0icOs/wfAH0xT/33gI9PU/z9aKE2z7KvAV6ep/1tg1RE3\nXBw6m3ugj28iknQ4XlE/pMbGxo5h7RN5HkU/jm18w8fxDbdRH99sZJS/uSapUR7fTAz2VCbfg7in\nIumIklDv5kS9JEkzNdOJeo2I7hyLey2S+uaeykln+OZXJA0PQ0WS1BtDRZLUG0NFktQbQ0WS1BvP\n/hpBU6+il6QTxT2VkeVZXpJOPENFktQbQ0WS1BtDRZLUG0NFktQbQ0WS1BtDRZLUG0NFktQbQ0WS\n1BuvqD+J+dsqkvrmnspJzavuJfXLUJEk9cZQkST1xlCRJPXGUJEk9cZQkST1xlCRJPXGUJEk9WZG\noZJkV5K/TvJcku2tbnGSbUl2JNmaZFFn/fVJdiZ5KcnlnfqLk7zQlt3VqT8jyQOt/ukkF3SWrW2v\nsSPJDZ365UmeaW22JDlttm+GJGl2ZrqnUsBYVX2sqi5pdbcA26rqQuCJ9pwkK4HrgJXAauDuHLp0\n+x5gXVWtAFYkWd3q1wH7Wv2dwKbW12LgNuCS9rg9ycLWZhNwR2vzRutDkjSHjuXwV6Y8vwrY3Mqb\ngWta+Wrg/qo6UFW7gJeBVUnOAd5fVdvbevd22nT7ehi4rJWvALZW1f6q2g9sA65sIXUp8NA0ry9J\nmiPHsqfynSTfS/JbrW5JVe1t5b3AklY+F9jdabsbOG+a+olWT/v3FYCqOgi8meSsI/S1GNhfVW9N\n05ckaY7M9IaS/7qq/i7JPwe2JXmpu7CqKsmJuomUN6uSpHlqRqFSVX/X/v1xkr9kML+xN8nZVbWn\nHdp6ra0+ASzrNF/KYA9jopWn1k+2OR94NckCYGFV7UsyAYx12iwDngReBxYlOaXtrSxtffyMDRs2\nvF0eGxtjbGxsutWGXveOw5I0U+Pj44yPj/fWX452y/Mk7wFOraqfJnkvsBXYCHyCweT6piS3AIuq\n6pY2Uf91BsFzHvAd4Bfb3swzwE3AduBbwJ9U1beT3Ah8pKp+O8ka4JqqWtMm6r8HXMRgTuf7wEVV\ntT/Jg8DDVfVAki8Bz1fVl6Zse50st3QfhMrkWI+9fLK8T5KOLAlVddzfUmcSKsuBv2xPFwB/XlV/\n2D7wH2Swh7ELuLZNppPkVuDzwEHg5qp6vNVfDHwNOBN4rKpuavVnAPcBHwP2AWvaJD9JPgfc2l7/\ni1W1ubNdWxjMrzwLfLaqDkzZdkPFUJF0DN71UBlmhsrMyyfL+yTpyGYbKl5RL0nqjaEiSeqNoSJJ\n6s1Mr1PRiOuekuz8iqTj5Z6KmsLrSiXNlqEiSeqNoSJJ6o2hIknqjaEiSeqNoSJJ6o2hIknqjaEi\nSeqNoSJJ6o2hIknqjaEiSeqNoSJJ6o2hIknqjaEiSeqNoSJJ6o2hIknqjaEiSeqNoSJJ6o2hIknq\njaEiSeqNoSJJ6o2hIknqjaEiSerNgrneAM0/Sd4uV9UcbomkYeOeiqZR7SFJx2ZGoZLk1CTPJflm\ne744ybYkO5JsTbKos+76JDuTvJTk8k79xUleaMvu6tSfkeSBVv90kgs6y9a219iR5IZO/fIkz7Q2\nW5KcNts3QpI0ezPdU7kZeJFDX19vAbZV1YXAE+05SVYC1wErgdXA3Tl0LOUeYF1VrQBWJFnd6tcB\n+1r9ncCm1tdi4Dbgkva4PcnC1mYTcEdr80brQ5I0x44aKkmWAp8C/gyYDIirgM2tvBm4ppWvBu6v\nqgNVtQt4GViV5Bzg/VW1va13b6dNt6+Hgcta+Qpga1Xtr6r9wDbgyhZSlwIPTfP6kqQ5NJM9lTuB\n3wPe6tQtqaq9rbwXWNLK5wK7O+vtBs6bpn6i1dP+fQWgqg4CbyY56wh9LQb2V9Vb0/R1Ukny9kOS\n5oMjnv2V5DeA16rquSRj061TVZXkRM3qHvPrbNiw4e3y2NgYY2NjPW7OfDD5lhgsko7d+Pg44+Pj\nvfV3tFOKfwW4KsmngJ8D/lmS+4C9Sc6uqj3t0NZrbf0JYFmn/VIGexgTrTy1frLN+cCrSRYAC6tq\nX5IJYKzTZhnwJPA6sCjJKW1vZWnrY1rdUJEkvdPUL9sbN26cVX9HPPxVVbdW1bKqWg6sAZ6squuB\nR4G1bbW1wCOt/CiwJsnpSZYDK4DtVbUH+EmSVW1O5HrgG502k319msHEP8BW4PIki5J8APgk8HgN\nLpx4CvjMNK8vSZpDx3rx4+Sxlv8CPJhkHbALuBagql5M8iCDM8UOAjfWoavnbgS+BpwJPFZV3271\nXwbuS7IT2McgvKiq15P8PvDdtt7GNmEP8AVgS5IvAs+2PiRJcyyjfMV0khrx8fHOOZX+y6P8/kn6\nWUmoquOepPWKeklSbwwVSVJvDBVJUm8MFUlSbwwVSVJvDBVJUm8MFUlSbwwVSVJvDBVJUm8MFUlS\nbwwVSVJvDBVJUm8MFUlSb4711vc6yXR/qtg7Fks6GvdUdBTFcfyKs6STlKEiSeqNoSJJ6o2hIknq\njaEiSeqNoSJJ6o2hIknqjaEiSeqNoSJJ6o2hIknqjaEiSeqNoSJJ6o2hIknqjaEiSerNEUMlyc8l\neSbJ80leTPKHrX5xkm1JdiTZmmRRp836JDuTvJTk8k79xUleaMvu6tSfkeSBVv90kgs6y9a219iR\n5IZO/fK2XTuTbElyWl9viCTp+B0xVKrqH4FLq+qXgY8Clyb5VeAWYFtVXQg80Z6TZCVwHbASWA3c\nnUM/yHEPsK6qVgArkqxu9euAfa3+TmBT62sxcBtwSXvcnmRha7MJuKO1eaP1IUmaY0c9/FVV/7cV\nTwdOZfAhfhWwudVvBq5p5auB+6vqQFXtAl4GViU5B3h/VW1v693badPt62Hgsla+AthaVfuraj+w\nDbiyhdSlwEPTvL4kaQ4dNVSSnJLkeWAv8FRV/RBYUlV72yp7gSWtfC6wu9N8N3DeNPUTrZ727ysA\nVXUQeDPJWUfoazGwv6remqYvSdIcOurPCbcP719uh54eT3LplOWV5ET9NOAxv86GDRveLo+NjTE2\nNtbj5kjScBsfH2d8fLy3/mb8G/VV9WaSbwEXA3uTnF1Ve9qhrdfaahPAsk6zpQz2MCZaeWr9ZJvz\ngVeTLAAWVtW+JBPAWKfNMuBJ4HVgUZJTWuAtbX1MqxsqkqR3mvple+PGjbPq72hnf/385JldSc4E\nPgk8BzwKrG2rrQUeaeVHgTVJTk+yHFgBbK+qPcBPkqxqcyLXA9/otJns69MMJv4BtgKXJ1mU5APt\ntR+vqgKeAj4zzetLkuZQBp/Rh1mYfITBRPgp7XFfVf1ROzPrQQZ7GLuAa9tkOkluBT4PHARurqrH\nW/3FwNeAM4HHquqmVn8GcB/wMWAfsKZN8pPkc8CtbXO+WFWbW/1yYAuD+ZVngc9W1YFptr+ONL5h\ndOhkukmT48sJKHdedcTeV0kDSaiqqR80M28/yh8OoxsqJzJIpi+P2vsqaWC2oeIV9ZKk3hgqkqTe\nGCqSpN4YKpKk3hgqkqTeGCqSpN4YKpKk3hgqkqTeGCqSpN4YKpKk3hgqkqTeGCqSpN4YKpKk3hgq\nkqTeGCqSpN4YKpKk3hgqkqTeLJjrDdBw6v6ssb8CKWmSeyo6TsWhnxeWpAFDRZLUG0NFktQbQ0WS\n1BtDRZLUG0NFktQbQ0WS1BtDRZLUG0NFktQbQ0WS1JujhkqSZUmeSvLDJH+T5KZWvzjJtiQ7kmxN\nsqjTZn2SnUleSnJ5p/7iJC+0ZXd16s9I8kCrfzrJBZ1la9tr7EhyQ6d+eZJnWpstSU7r4w2RJB2/\nmeypHAB+t6o+DHwc+J0kHwJuAbZV1YXAE+05SVYC1wErgdXA3Tl0o6h7gHVVtQJYkWR1q18H7Gv1\ndwKbWl+LgduAS9rj9iQLW5tNwB2tzRutD0nSHDpqqFTVnqp6vpX/HvgRcB5wFbC5rbYZuKaVrwbu\nr6oDVbULeBlYleQc4P1Vtb2td2+nTbevh4HLWvkKYGtV7a+q/cA24MoWUpcCD03z+pKkOXJMcypJ\nPgh8DHgGWFJVe9uivcCSVj4X2N1ptptBCE2tn2j1tH9fAaiqg8CbSc46Ql+Lgf1V9dY0fUmS5siM\nb32f5H0M9iJurqqfTrn1eSU5UbesPabX2bBhw9vlsbExxsbGet4cSRpe4+PjjI+P99bfjEKlTYI/\nDNxXVY+06r1Jzq6qPe3Q1mutfgJY1mm+lMEexkQrT62fbHM+8GqSBcDCqtqXZAIY67RZBjwJvA4s\nSnJK21tZ2vr4Gd1Q0bvD31aRhtfUL9sbN26cVX8zOfsrwJeBF6vqjzuLHgXWtvJa4JFO/ZokpydZ\nDqwAtlfVHuAnSVa1Pq8HvjFNX59mMPEPsBW4PMmiJB8APgk8XoNPrqeAz0zz+jrh/G0VSQM52jfL\nJL8K/Hfgrzn0ybEe2A48yGAPYxdwbZtMJ8mtwOeBgwwOlz3e6i8GvgacCTxWVZOnJ58B3MdgvmYf\nsKZN8pPkc8Ct7XW/WFWbW/1yYAuD+ZVngc9W1YEp216j9s15kMeTY5p/5VF7v6WTTRKqKkdf8zDt\nR/lDYFRCpXt4aWDuw8NQkUbTbEPFK+qHhoeYJM1/hookqTeGiiSpN4aKJKk3hookqTeGiiSpN4aK\nJKk3hookqTeGiiSpNzO+S7E0E95cUjq5uaeinnnlv3QyM1QkSb0xVCRJvTFUJEm9MVQkSb0xVCRJ\nvTFUJEm9MVQkSb0xVCRJvTFUJEm9MVQkSb3x3l9613gfMOnk456K3kXeB0w62RgqkqTeGCqSpN4Y\nKpKk3hgqkqTeGCqSpN4cNVSSfCXJ3iQvdOoWJ9mWZEeSrUkWdZatT7IzyUtJLu/UX5zkhbbsrk79\nGUkeaPVPJ7mgs2xte40dSW7o1C9P8kxrsyXJabN9IyRJszeTPZWvAqun1N0CbKuqC4En2nOSrASu\nA1a2Nnfn0MUK9wDrqmoFsCLJZJ/rgH2t/k5gU+trMXAbcEl73J5kYWuzCbijtXmj9TFSkrz9kKRh\ncdRQqaq/YvDB3XUVsLmVNwPXtPLVwP1VdaCqdgEvA6uSnAO8v6q2t/Xu7bTp9vUwcFkrXwFsrar9\nVbUf2AZc2ULqUuChaV5/xHidh6ThcrxzKkuqam8r7wWWtPK5wO7OeruB86apn2j1tH9fAaiqg8Cb\nSc46Ql+Lgf1V9dY0fWmecs9LOjnMeqK+BvffOFFfp/3aPrTc65JOBsd776+9Sc6uqj3t0NZrrX4C\nWNZZbymDPYyJVp5aP9nmfODVJAuAhVW1L8kEMNZpswx4EngdWJTklLa3srT1Ma0NGza8XR4bG2Ns\nbOxwq0rSSWd8fJzx8fHe+stMbvSX5IPAN6vqI+35f2Uwub4pyS3Aoqq6pU3Uf53BxPp5wHeAX6yq\nSvIMcBOwHfgW8CdV9e0kNwIfqarfTrIGuKaq1rSJ+u8BFwEBvg9cVFX7kzwIPFxVDyT5EvB8VX1p\nmu2uYb2R4eAw0eS2j1Z5WP+bSCeDJFTVcR+nPmqoJLkf+HXg5xnMn9wGfAN4kMEexi7g2jaZTpJb\ngc8DB4Gbq+rxVn8x8DXgTOCxqrqp1Z8B3Ad8DNgHrGmT/CT5HHBr25QvVtXmVr8c2MJgfuVZ4LNV\ndWCabTdU5mF5WP+bSCeDdz1UhpmhMj/Lw/rfRDoZzDZUvKJektQbQ0WS1Bt/+VEnnL8IKY0u91Q0\nB7xmRRpVhookqTeGiiSpN4aKJKk3hookqTee/aU55Zlg0mhxT0VzzDPBpFFiqEiSeuPhr3nEH7CS\nNOwMlXmnexPGk4vzK9Lw8/CX5hHnV6RhZ6hIknpjqEiSeuOciuYl51ek4eSeiuYp51ekYWSoSJJ6\nY6hIknrjnIrmPedXpOHhnoqGgPMr0rAwVCRJvfHw1xzzfl/HxkNh0vzmnsq84OGdmfO9kuYz91Q0\ntNxrkeYfQ0VD7NAdnQ93GNGwkU4sQ0UjovuTAdOHjQEjvfuGek4lyeokLyXZmeQLc709mo8OzcEk\nefsh6d0xtKGS5FTgvwGrgZXAbyb50Nxu1cz08+E23tfmzFPj70Kf8ydgxsfH5+R1TxTHd/Ia2lAB\nLgFerqpdVXUA2AJcPcfbdAxmexbTeE/bMV+Nv8v9z23AjPqHkuM7eQ3znMp5wCud57uBVVNX6h5H\n97CHpnf0Cf8Z9eKcjTTUoTKj/4NPOWWwM7Zr1y4uuOCCd3WDjsRAGxbTT/jPpHys/403btx4/Jt5\nGDMNtiNtq+Go2ciw/gEl+TiwoapWt+frgbeqalNnneEcnCTNoao67m/BwxwqC4D/CVwGvApsB36z\nqn40pxsmSSexoT38VVUHk/x74HHgVODLBookza2h3VORJM0/w3xK8WGN2kWRSZYleSrJD5P8TZKb\nWv3iJNuS7EiyNcmiud7W45Xk1CTPJflmez5KY1uU5KEkP0ryYpJVIza+9e1v84UkX09yxjCPL8lX\nkuxN8kKn7rDjaePf2T5zLp+brZ65w4zvj9rf5w+S/EWShZ1lxzS+kQuVYb4o8ggOAL9bVR8GPg78\nThvTLcC2qroQeKI9H1Y3Ay9y6HSqURrbXcBjVfUh4KPAS4zI+JJ8EPgt4KKq+giDQ9FrGO7xfZXB\n50fXtONJshK4jsFnzWrg7iTz/XN1uvFtBT5cVb8E7ADWw/GNb74P/ngM+UWRP6uq9lTV863898CP\nGFyncxWwua22GbhmbrZwdpIsBT4F/BmD83RhdMa2EPi1qvoKDOYCq+pNRmR8wE8YfOl5Tzt55j0M\nTpwZ2vFV1V8Bb0ypPtx4rgbur6oDVbULeJnBZ9C8Nd34qmpbVb3Vnj4DLG3lYx7fKIbKdBdFnjdH\n29K79s3wYwz+wy+pqr1t0V5gyRxt1mzdCfwe8FanblTGthz4cZKvJnk2yZ8meS8jMr6qeh24A/g/\nDMJkf1VtY0TG13G48ZzL4DNm0ih83nweeKyVj3l8oxgqI3vmQZL3AQ8DN1fVT7vLanDGxdCNPclv\nAK9V1XMc2kt5h2EdW7MAuAi4u6ouAv6BKYeChnl8SX4B+I/ABxl8AL0vyWe76wzz+KYzg/EM7ViT\n/Gfgn6rq60dY7YjjG8VQmQCWdZ4v451JO5SSnMYgUO6rqkda9d4kZ7fl5wCvzdX2zcKvAFcl+Vvg\nfuDfJLmP0RgbDP72dlfVd9vzhxiEzJ4RGd+/AP5HVe2rqoPAXwD/itEZ36TD/T1O/bxZ2uqGTpJ/\nx+Aw9L/tVB/z+EYxVL4HrEjywSSnM5hkenSOt2lWMrinxpeBF6vqjzuLHgXWtvJa4JGpbee7qrq1\nqpZV1XIGE7xPVtX1jMDYYDAfBryS5MJW9Qngh8A3GYHxMTjp4ONJzmx/p59gcMLFqIxv0uH+Hh8F\n1iQ5PclyYAWDC7GHSpLVDA5BX11V/9hZdOzjq6qRewBXMrja/mVg/VxvTw/j+VUG8w3PA8+1x2pg\nMfAdBmdrbAUWzfW2znKcvw482sojMzbgl4DvAj9g8E1+4YiN7z8xCMoXGExinzbM42Owx/wq8E8M\n5mc/d6TxALe2z5qXgCvmevuPY3yfB3YC/7vz+XL38Y7Pix8lSb0ZxcNfkqQ5YqhIknpjqEiSemOo\nSJJ6Y6hIknpjqEiSemOoSJJ6Y6hIknrz/wF0zsvts73EjAAAAABJRU5ErkJggg==\n", | |
120 "text/plain": [ | |
121 "<matplotlib.figure.Figure at 0x7f6dc44adfd0>" | |
122 ] | |
123 }, | |
124 "metadata": {}, | |
125 "output_type": "display_data" | |
126 } | |
127 ], | |
128 "source": [ | |
129 "plt.hist(D.ravel(), bins=100);" | |
130 ] | |
131 }, | |
132 { | |
133 "cell_type": "markdown", | |
78 "metadata": {}, | 134 "metadata": {}, |
79 "outputs": [ | 135 "source": [ |
80 { | 136 "## n-occurrence and stats" |
81 "data": { | |
82 "text/plain": [ | |
83 "(8200, 8200)" | |
84 ] | |
85 }, | |
86 "execution_count": 5, | |
87 "metadata": {}, | |
88 "output_type": "execute_result" | |
89 } | |
90 ], | |
91 "source": [ | |
92 "D.shape" | |
93 ] | |
94 }, | |
95 { | |
96 "cell_type": "code", | |
97 "execution_count": 6, | |
98 "metadata": {}, | |
99 "outputs": [ | |
100 { | |
101 "data": { | |
102 "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZUAAAEACAYAAAB78OvLAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAGWtJREFUeJzt3W2sXdV95/HvDwyUPIwd05F5sCFWa6Q4StrADM50WvUy\nJGCiCniRgKsJeBKrL0pnYPqiE8xIYKtRO54KUaoRRGrzYFCDQdASoqBgB7iaSiNwEiClIYzNqJ7B\nl9pEBpO0o05t8Z8XZ128ubm2r303vvccfz/SkddZe6919jpczu/svfbeJ1WFJEl9OGWuN0CSNDoM\nFUlSbwwVSVJvDBVJUm8MFUlSbwwVSVJvZhQqSRYleSjJj5K8mGRVksVJtiXZkWRrkkWd9dcn2Znk\npSSXd+ovTvJCW3ZXp/6MJA+0+qeTXNBZtra9xo4kN3Tqlyd5prXZkuS02b8dkqTZmOmeyl3AY1X1\nIeCjwEvALcC2qroQeKI9J8lK4DpgJbAauDtJWj/3AOuqagWwIsnqVr8O2Nfq7wQ2tb4WA7cBl7TH\n7UkWtjabgDtamzdaH5KkOXTUUGkf4r9WVV8BqKqDVfUmcBWwua22Gbimla8G7q+qA1W1C3gZWJXk\nHOD9VbW9rXdvp023r4eBy1r5CmBrVe2vqv3ANuDKFlKXAg9N8/qSpDkykz2V5cCPk3w1ybNJ/jTJ\ne4ElVbW3rbMXWNLK5wK7O+13A+dNUz/R6mn/vgKD0ALeTHLWEfpaDOyvqrem6UuSNEdmEioLgIuA\nu6vqIuAfaIe6JtXgXi8n6n4v3ldGkuapBTNYZzewu6q+254/BKwH9iQ5u6r2tENbr7XlE8CyTvul\nrY+JVp5aP9nmfODVJAuAhVW1L8kEMNZpswx4EngdWJTklLa3srT18Q5JDCBJOkZVlaOvNb2j7qlU\n1R7glSQXtqpPAD8EvgmsbXVrgUda+VFgTZLTkywHVgDbWz8/aWeOBbge+EanzWRfn2Yw8Q+wFbi8\nnX32AeCTwONtz+gp4DPTvP7U7R/Jx+233z7n2+D4HJ/jG73HbM1kTwXgPwB/nuR04H8BnwNOBR5M\nsg7YBVzbPsRfTPIg8CJwELixDm3pjcDXgDMZnE327Vb/ZeC+JDuBfcCa1tfrSX4fmNxL2liDCXuA\nLwBbknwReLb1IUmaQzMKlar6AfAvp1n0icOs/wfAH0xT/33gI9PU/z9aKE2z7KvAV6ep/1tg1RE3\nXBw6m3ugj28iknQ4XlE/pMbGxo5h7RN5HkU/jm18w8fxDbdRH99sZJS/uSapUR7fTAz2VCbfg7in\nIumIklDv5kS9JEkzNdOJeo2I7hyLey2S+uaeykln+OZXJA0PQ0WS1BtDRZLUG0NFktQbQ0WS1BvP\n/hpBU6+il6QTxT2VkeVZXpJOPENFktQbQ0WS1BtDRZLUG0NFktQbQ0WS1BtDRZLUG0NFktQbQ0WS\n1BuvqD+J+dsqkvrmnspJzavuJfXLUJEk9cZQkST1xlCRJPXGUJEk9cZQkST1xlCRJPXGUJEk9WZG\noZJkV5K/TvJcku2tbnGSbUl2JNmaZFFn/fVJdiZ5KcnlnfqLk7zQlt3VqT8jyQOt/ukkF3SWrW2v\nsSPJDZ365UmeaW22JDlttm+GJGl2ZrqnUsBYVX2sqi5pdbcA26rqQuCJ9pwkK4HrgJXAauDuHLp0\n+x5gXVWtAFYkWd3q1wH7Wv2dwKbW12LgNuCS9rg9ycLWZhNwR2vzRutDkjSHjuXwV6Y8vwrY3Mqb\ngWta+Wrg/qo6UFW7gJeBVUnOAd5fVdvbevd22nT7ehi4rJWvALZW1f6q2g9sA65sIXUp8NA0ry9J\nmiPHsqfynSTfS/JbrW5JVe1t5b3AklY+F9jdabsbOG+a+olWT/v3FYCqOgi8meSsI/S1GNhfVW9N\n05ckaY7M9IaS/7qq/i7JPwe2JXmpu7CqKsmJuomUN6uSpHlqRqFSVX/X/v1xkr9kML+xN8nZVbWn\nHdp6ra0+ASzrNF/KYA9jopWn1k+2OR94NckCYGFV7UsyAYx12iwDngReBxYlOaXtrSxtffyMDRs2\nvF0eGxtjbGxsutWGXveOw5I0U+Pj44yPj/fWX452y/Mk7wFOraqfJnkvsBXYCHyCweT6piS3AIuq\n6pY2Uf91BsFzHvAd4Bfb3swzwE3AduBbwJ9U1beT3Ah8pKp+O8ka4JqqWtMm6r8HXMRgTuf7wEVV\ntT/Jg8DDVfVAki8Bz1fVl6Zse50st3QfhMrkWI+9fLK8T5KOLAlVddzfUmcSKsuBv2xPFwB/XlV/\n2D7wH2Swh7ELuLZNppPkVuDzwEHg5qp6vNVfDHwNOBN4rKpuavVnAPcBHwP2AWvaJD9JPgfc2l7/\ni1W1ubNdWxjMrzwLfLaqDkzZdkPFUJF0DN71UBlmhsrMyyfL+yTpyGYbKl5RL0nqjaEiSeqNoSJJ\n6s1Mr1PRiOuekuz8iqTj5Z6KmsLrSiXNlqEiSeqNoSJJ6o2hIknqjaEiSeqNoSJJ6o2hIknqjaEi\nSeqNoSJJ6o2hIknqjaEiSeqNoSJJ6o2hIknqjaEiSeqNoSJJ6o2hIknqjaEiSeqNoSJJ6o2hIknq\njaEiSeqNoSJJ6o2hIknqjaEiSerNgrneAM0/Sd4uV9UcbomkYeOeiqZR7SFJx2ZGoZLk1CTPJflm\ne744ybYkO5JsTbKos+76JDuTvJTk8k79xUleaMvu6tSfkeSBVv90kgs6y9a219iR5IZO/fIkz7Q2\nW5KcNts3QpI0ezPdU7kZeJFDX19vAbZV1YXAE+05SVYC1wErgdXA3Tl0LOUeYF1VrQBWJFnd6tcB\n+1r9ncCm1tdi4Dbgkva4PcnC1mYTcEdr80brQ5I0x44aKkmWAp8C/gyYDIirgM2tvBm4ppWvBu6v\nqgNVtQt4GViV5Bzg/VW1va13b6dNt6+Hgcta+Qpga1Xtr6r9wDbgyhZSlwIPTfP6kqQ5NJM9lTuB\n3wPe6tQtqaq9rbwXWNLK5wK7O+vtBs6bpn6i1dP+fQWgqg4CbyY56wh9LQb2V9Vb0/R1Ukny9kOS\n5oMjnv2V5DeA16rquSRj061TVZXkRM3qHvPrbNiw4e3y2NgYY2NjPW7OfDD5lhgsko7d+Pg44+Pj\nvfV3tFOKfwW4KsmngJ8D/lmS+4C9Sc6uqj3t0NZrbf0JYFmn/VIGexgTrTy1frLN+cCrSRYAC6tq\nX5IJYKzTZhnwJPA6sCjJKW1vZWnrY1rdUJEkvdPUL9sbN26cVX9HPPxVVbdW1bKqWg6sAZ6squuB\nR4G1bbW1wCOt/CiwJsnpSZYDK4DtVbUH+EmSVW1O5HrgG502k319msHEP8BW4PIki5J8APgk8HgN\nLpx4CvjMNK8vSZpDx3rx4+Sxlv8CPJhkHbALuBagql5M8iCDM8UOAjfWoavnbgS+BpwJPFZV3271\nXwbuS7IT2McgvKiq15P8PvDdtt7GNmEP8AVgS5IvAs+2PiRJcyyjfMV0khrx8fHOOZX+y6P8/kn6\nWUmoquOepPWKeklSbwwVSVJvDBVJUm8MFUlSbwwVSVJvDBVJUm8MFUlSbwwVSVJvDBVJUm8MFUlS\nbwwVSVJvDBVJUm8MFUlSb4711vc6yXR/qtg7Fks6GvdUdBTFcfyKs6STlKEiSeqNoSJJ6o2hIknq\njaEiSeqNoSJJ6o2hIknqjaEiSeqNoSJJ6o2hIknqjaEiSeqNoSJJ6o2hIknqjaEiSerNEUMlyc8l\neSbJ80leTPKHrX5xkm1JdiTZmmRRp836JDuTvJTk8k79xUleaMvu6tSfkeSBVv90kgs6y9a219iR\n5IZO/fK2XTuTbElyWl9viCTp+B0xVKrqH4FLq+qXgY8Clyb5VeAWYFtVXQg80Z6TZCVwHbASWA3c\nnUM/yHEPsK6qVgArkqxu9euAfa3+TmBT62sxcBtwSXvcnmRha7MJuKO1eaP1IUmaY0c9/FVV/7cV\nTwdOZfAhfhWwudVvBq5p5auB+6vqQFXtAl4GViU5B3h/VW1v693badPt62Hgsla+AthaVfuraj+w\nDbiyhdSlwEPTvL4kaQ4dNVSSnJLkeWAv8FRV/RBYUlV72yp7gSWtfC6wu9N8N3DeNPUTrZ727ysA\nVXUQeDPJWUfoazGwv6remqYvSdIcOurPCbcP719uh54eT3LplOWV5ET9NOAxv86GDRveLo+NjTE2\nNtbj5kjScBsfH2d8fLy3/mb8G/VV9WaSbwEXA3uTnF1Ve9qhrdfaahPAsk6zpQz2MCZaeWr9ZJvz\ngVeTLAAWVtW+JBPAWKfNMuBJ4HVgUZJTWuAtbX1MqxsqkqR3mvple+PGjbPq72hnf/385JldSc4E\nPgk8BzwKrG2rrQUeaeVHgTVJTk+yHFgBbK+qPcBPkqxqcyLXA9/otJns69MMJv4BtgKXJ1mU5APt\ntR+vqgKeAj4zzetLkuZQBp/Rh1mYfITBRPgp7XFfVf1ROzPrQQZ7GLuAa9tkOkluBT4PHARurqrH\nW/3FwNeAM4HHquqmVn8GcB/wMWAfsKZN8pPkc8CtbXO+WFWbW/1yYAuD+ZVngc9W1YFptr+ONL5h\ndOhkukmT48sJKHdedcTeV0kDSaiqqR80M28/yh8OoxsqJzJIpi+P2vsqaWC2oeIV9ZKk3hgqkqTe\nGCqSpN4YKpKk3hgqkqTeGCqSpN4YKpKk3hgqkqTeGCqSpN4YKpKk3hgqkqTeGCqSpN4YKpKk3hgq\nkqTeGCqSpN4YKpKk3hgqkqTeLJjrDdBw6v6ssb8CKWmSeyo6TsWhnxeWpAFDRZLUG0NFktQbQ0WS\n1BtDRZLUG0NFktQbQ0WS1BtDRZLUG0NFktQbQ0WS1JujhkqSZUmeSvLDJH+T5KZWvzjJtiQ7kmxN\nsqjTZn2SnUleSnJ5p/7iJC+0ZXd16s9I8kCrfzrJBZ1la9tr7EhyQ6d+eZJnWpstSU7r4w2RJB2/\nmeypHAB+t6o+DHwc+J0kHwJuAbZV1YXAE+05SVYC1wErgdXA3Tl0o6h7gHVVtQJYkWR1q18H7Gv1\ndwKbWl+LgduAS9rj9iQLW5tNwB2tzRutD0nSHDpqqFTVnqp6vpX/HvgRcB5wFbC5rbYZuKaVrwbu\nr6oDVbULeBlYleQc4P1Vtb2td2+nTbevh4HLWvkKYGtV7a+q/cA24MoWUpcCD03z+pKkOXJMcypJ\nPgh8DHgGWFJVe9uivcCSVj4X2N1ptptBCE2tn2j1tH9fAaiqg8CbSc46Ql+Lgf1V9dY0fUmS5siM\nb32f5H0M9iJurqqfTrn1eSU5UbesPabX2bBhw9vlsbExxsbGet4cSRpe4+PjjI+P99bfjEKlTYI/\nDNxXVY+06r1Jzq6qPe3Q1mutfgJY1mm+lMEexkQrT62fbHM+8GqSBcDCqtqXZAIY67RZBjwJvA4s\nSnJK21tZ2vr4Gd1Q0bvD31aRhtfUL9sbN26cVX8zOfsrwJeBF6vqjzuLHgXWtvJa4JFO/ZokpydZ\nDqwAtlfVHuAnSVa1Pq8HvjFNX59mMPEPsBW4PMmiJB8APgk8XoNPrqeAz0zz+jrh/G0VSQM52jfL\nJL8K/Hfgrzn0ybEe2A48yGAPYxdwbZtMJ8mtwOeBgwwOlz3e6i8GvgacCTxWVZOnJ58B3MdgvmYf\nsKZN8pPkc8Ct7XW/WFWbW/1yYAuD+ZVngc9W1YEp216j9s15kMeTY5p/5VF7v6WTTRKqKkdf8zDt\nR/lDYFRCpXt4aWDuw8NQkUbTbEPFK+qHhoeYJM1/hookqTeGiiSpN4aKJKk3hookqTeGiiSpN4aK\nJKk3hookqTeGiiSpNzO+S7E0E95cUjq5uaeinnnlv3QyM1QkSb0xVCRJvTFUJEm9MVQkSb0xVCRJ\nvTFUJEm9MVQkSb0xVCRJvTFUJEm9MVQkSb3x3l9613gfMOnk456K3kXeB0w62RgqkqTeGCqSpN4Y\nKpKk3hgqkqTeGCqSpN4cNVSSfCXJ3iQvdOoWJ9mWZEeSrUkWdZatT7IzyUtJLu/UX5zkhbbsrk79\nGUkeaPVPJ7mgs2xte40dSW7o1C9P8kxrsyXJabN9IyRJszeTPZWvAqun1N0CbKuqC4En2nOSrASu\nA1a2Nnfn0MUK9wDrqmoFsCLJZJ/rgH2t/k5gU+trMXAbcEl73J5kYWuzCbijtXmj9TFSkrz9kKRh\ncdRQqaq/YvDB3XUVsLmVNwPXtPLVwP1VdaCqdgEvA6uSnAO8v6q2t/Xu7bTp9vUwcFkrXwFsrar9\nVbUf2AZc2ULqUuChaV5/xHidh6ThcrxzKkuqam8r7wWWtPK5wO7OeruB86apn2j1tH9fAaiqg8Cb\nSc46Ql+Lgf1V9dY0fWmecs9LOjnMeqK+BvffOFFfp/3aPrTc65JOBsd776+9Sc6uqj3t0NZrrX4C\nWNZZbymDPYyJVp5aP9nmfODVJAuAhVW1L8kEMNZpswx4EngdWJTklLa3srT1Ma0NGza8XR4bG2Ns\nbOxwq0rSSWd8fJzx8fHe+stMbvSX5IPAN6vqI+35f2Uwub4pyS3Aoqq6pU3Uf53BxPp5wHeAX6yq\nSvIMcBOwHfgW8CdV9e0kNwIfqarfTrIGuKaq1rSJ+u8BFwEBvg9cVFX7kzwIPFxVDyT5EvB8VX1p\nmu2uYb2R4eAw0eS2j1Z5WP+bSCeDJFTVcR+nPmqoJLkf+HXg5xnMn9wGfAN4kMEexi7g2jaZTpJb\ngc8DB4Gbq+rxVn8x8DXgTOCxqrqp1Z8B3Ad8DNgHrGmT/CT5HHBr25QvVtXmVr8c2MJgfuVZ4LNV\ndWCabTdU5mF5WP+bSCeDdz1UhpmhMj/Lw/rfRDoZzDZUvKJektQbQ0WS1Bt/+VEnnL8IKY0u91Q0\nB7xmRRpVhookqTeGiiSpN4aKJKk3hookqTee/aU55Zlg0mhxT0VzzDPBpFFiqEiSeuPhr3nEH7CS\nNOwMlXmnexPGk4vzK9Lw8/CX5hHnV6RhZ6hIknpjqEiSeuOciuYl51ek4eSeiuYp51ekYWSoSJJ6\nY6hIknrjnIrmPedXpOHhnoqGgPMr0rAwVCRJvfHw1xzzfl/HxkNh0vzmnsq84OGdmfO9kuYz91Q0\ntNxrkeYfQ0VD7NAdnQ93GNGwkU4sQ0UjovuTAdOHjQEjvfuGek4lyeokLyXZmeQLc709mo8OzcEk\nefsh6d0xtKGS5FTgvwGrgZXAbyb50Nxu1cz08+E23tfmzFPj70Kf8ydgxsfH5+R1TxTHd/Ia2lAB\nLgFerqpdVXUA2AJcPcfbdAxmexbTeE/bMV+Nv8v9z23AjPqHkuM7eQ3znMp5wCud57uBVVNX6h5H\n97CHpnf0Cf8Z9eKcjTTUoTKj/4NPOWWwM7Zr1y4uuOCCd3WDjsRAGxbTT/jPpHys/403btx4/Jt5\nGDMNtiNtq+Go2ciw/gEl+TiwoapWt+frgbeqalNnneEcnCTNoao67m/BwxwqC4D/CVwGvApsB36z\nqn40pxsmSSexoT38VVUHk/x74HHgVODLBookza2h3VORJM0/w3xK8WGN2kWRSZYleSrJD5P8TZKb\nWv3iJNuS7EiyNcmiud7W45Xk1CTPJflmez5KY1uU5KEkP0ryYpJVIza+9e1v84UkX09yxjCPL8lX\nkuxN8kKn7rDjaePf2T5zLp+brZ65w4zvj9rf5w+S/EWShZ1lxzS+kQuVYb4o8ggOAL9bVR8GPg78\nThvTLcC2qroQeKI9H1Y3Ay9y6HSqURrbXcBjVfUh4KPAS4zI+JJ8EPgt4KKq+giDQ9FrGO7xfZXB\n50fXtONJshK4jsFnzWrg7iTz/XN1uvFtBT5cVb8E7ADWw/GNb74P/ngM+UWRP6uq9lTV863898CP\nGFyncxWwua22GbhmbrZwdpIsBT4F/BmD83RhdMa2EPi1qvoKDOYCq+pNRmR8wE8YfOl5Tzt55j0M\nTpwZ2vFV1V8Bb0ypPtx4rgbur6oDVbULeJnBZ9C8Nd34qmpbVb3Vnj4DLG3lYx7fKIbKdBdFnjdH\n29K79s3wYwz+wy+pqr1t0V5gyRxt1mzdCfwe8FanblTGthz4cZKvJnk2yZ8meS8jMr6qeh24A/g/\nDMJkf1VtY0TG13G48ZzL4DNm0ih83nweeKyVj3l8oxgqI3vmQZL3AQ8DN1fVT7vLanDGxdCNPclv\nAK9V1XMc2kt5h2EdW7MAuAi4u6ouAv6BKYeChnl8SX4B+I/ABxl8AL0vyWe76wzz+KYzg/EM7ViT\n/Gfgn6rq60dY7YjjG8VQmQCWdZ4v451JO5SSnMYgUO6rqkda9d4kZ7fl5wCvzdX2zcKvAFcl+Vvg\nfuDfJLmP0RgbDP72dlfVd9vzhxiEzJ4RGd+/AP5HVe2rqoPAXwD/itEZ36TD/T1O/bxZ2uqGTpJ/\nx+Aw9L/tVB/z+EYxVL4HrEjywSSnM5hkenSOt2lWMrinxpeBF6vqjzuLHgXWtvJa4JGpbee7qrq1\nqpZV1XIGE7xPVtX1jMDYYDAfBryS5MJW9Qngh8A3GYHxMTjp4ONJzmx/p59gcMLFqIxv0uH+Hh8F\n1iQ5PclyYAWDC7GHSpLVDA5BX11V/9hZdOzjq6qRewBXMrja/mVg/VxvTw/j+VUG8w3PA8+1x2pg\nMfAdBmdrbAUWzfW2znKcvw482sojMzbgl4DvAj9g8E1+4YiN7z8xCMoXGExinzbM42Owx/wq8E8M\n5mc/d6TxALe2z5qXgCvmevuPY3yfB3YC/7vz+XL38Y7Pix8lSb0ZxcNfkqQ5YqhIknpjqEiSemOo\nSJJ6Y6hIknpjqEiSemOoSJJ6Y6hIknrz/wF0zsvts73EjAAAAABJRU5ErkJggg==\n", | |
103 "text/plain": [ | |
104 "<matplotlib.figure.Figure at 0x7f6dc44adfd0>" | |
105 ] | |
106 }, | |
107 "metadata": {}, | |
108 "output_type": "display_data" | |
109 } | |
110 ], | |
111 "source": [ | |
112 "plt.hist(D.ravel(), bins=100);" | |
113 ] | 137 ] |
114 }, | 138 }, |
115 { | 139 { |
116 "cell_type": "code", | 140 "cell_type": "code", |
117 "execution_count": 7, | 141 "execution_count": 7, |
130 ] | 154 ] |
131 }, | 155 }, |
132 { | 156 { |
133 "cell_type": "code", | 157 "cell_type": "code", |
134 "execution_count": 8, | 158 "execution_count": 8, |
135 "metadata": {}, | 159 "metadata": { |
160 "collapsed": false | |
161 }, | |
136 "outputs": [ | 162 "outputs": [ |
137 { | 163 { |
138 "name": "stdout", | 164 "name": "stdout", |
139 "output_type": "stream", | 165 "output_type": "stream", |
140 "text": [ | 166 "text": [ |
171 "metadata": {}, | 197 "metadata": {}, |
172 "output_type": "display_data" | 198 "output_type": "display_data" |
173 } | 199 } |
174 ], | 200 ], |
175 "source": [ | 201 "source": [ |
176 "N_k = n_occurrence_from_D(D, k=100)\n", | 202 "# take k mean of country sample\n", |
203 "uniq_countries, uniq_counts = np.unique(Y, return_counts=True)\n", | |
204 "k = np.int(np.round(np.mean(uniq_counts)))\n", | |
205 "print k\n", | |
206 "N_k = n_occurrence_from_D(D, k=k)\n", | |
177 "print skew(N_k)\n", | 207 "print skew(N_k)\n", |
208 "print np.median(N_k)\n", | |
209 "print np.mean(N_k)\n", | |
210 "print np.std(N_k)\n", | |
211 "print len(np.where(N_k>1000))\n", | |
178 "plt.figure()\n", | 212 "plt.figure()\n", |
179 "plt.hist(N_k, bins=100);\n", | 213 "plt.hist(N_k, bins=100);\n", |
180 "plt.figure()\n", | 214 "plt.figure()\n", |
215 "plt.hist(N_k[N_k<1000], bins=100);\n", | |
216 "plt.figure()\n", | |
181 "plt.plot(np.sort(N_k))" | 217 "plt.plot(np.sort(N_k))" |
182 ] | 218 ] |
183 }, | 219 }, |
184 { | 220 { |
185 "cell_type": "code", | 221 "cell_type": "code", |
195 ] | 231 ] |
196 }, | 232 }, |
197 { | 233 { |
198 "cell_type": "code", | 234 "cell_type": "code", |
199 "execution_count": 17, | 235 "execution_count": 17, |
200 "metadata": {}, | 236 "metadata": { |
237 "collapsed": false | |
238 }, | |
201 "outputs": [ | 239 "outputs": [ |
202 { | 240 { |
203 "data": { | 241 "data": { |
204 "text/plain": [ | 242 "text/plain": [ |
205 "array([[4650, 2942, 3520, ..., 3488, 2864, 6361],\n", | 243 "array([[4650, 2942, 3520, ..., 3488, 2864, 6361],\n", |
221 ] | 259 ] |
222 }, | 260 }, |
223 { | 261 { |
224 "cell_type": "code", | 262 "cell_type": "code", |
225 "execution_count": 13, | 263 "execution_count": 13, |
226 "metadata": {}, | 264 "metadata": { |
265 "collapsed": false | |
266 }, | |
227 "outputs": [ | 267 "outputs": [ |
228 { | 268 { |
229 "name": "stdout", | 269 "name": "stdout", |
230 "output_type": "stream", | 270 "output_type": "stream", |
231 "text": [ | 271 "text": [ |
247 ] | 287 ] |
248 }, | 288 }, |
249 { | 289 { |
250 "cell_type": "code", | 290 "cell_type": "code", |
251 "execution_count": 18, | 291 "execution_count": 18, |
252 "metadata": {}, | 292 "metadata": { |
293 "collapsed": false | |
294 }, | |
253 "outputs": [ | 295 "outputs": [ |
254 { | 296 { |
255 "data": { | 297 "data": { |
256 "text/plain": [ | 298 "text/plain": [ |
257 "(7160,)" | 299 "(7160,)" |
273 "## let's get the audio url to listen to tracks identified as large hubs" | 315 "## let's get the audio url to listen to tracks identified as large hubs" |
274 ] | 316 ] |
275 }, | 317 }, |
276 { | 318 { |
277 "cell_type": "code", | 319 "cell_type": "code", |
278 "execution_count": 6, | 320 "execution_count": 5, |
279 "metadata": {}, | 321 "metadata": { |
322 "collapsed": false | |
323 }, | |
280 "outputs": [ | 324 "outputs": [ |
281 { | 325 { |
282 "name": "stderr", | 326 "name": "stderr", |
283 "output_type": "stream", | 327 "output_type": "stream", |
284 "text": [ | 328 "text": [ |
285 "/homes/mp305/anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2822: DtypeWarning: Columns (0,1,2,4,5,6,7,8,10,11,12,13,14,15,16,17,19,21,22,23,24,25,26,27,29,31,35,38,39,40,41,44,45,48,55,56,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,93,95,96) have mixed types. Specify dtype option on import or set low_memory=False.\n", | 329 "/Users/mariapanteli/anaconda/lib/python2.7/site-packages/pandas/io/parsers.py:1159: DtypeWarning: Columns (0,1,2,4,5,6,7,8,10,11,12,13,14,15,16,17,19,21,22,23,24,25,26,27,29,31,35,38,39,40,41,44,45,48,55,56,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,93,95,96) have mixed types. Specify dtype option on import or set low_memory=False.\n", |
286 " if self.run_code(code, result):\n" | 330 " data = self._reader.read(nrows)\n" |
287 ] | 331 ] |
288 } | 332 } |
289 ], | 333 ], |
290 "source": [ | 334 "source": [ |
291 "ddf = outliers.load_metadata(Yaudio, metadata_file='../data/metadata_BLSM_language_all.csv')" | 335 "#ddf = outliers.load_metadata(Yaudio, metadata_file='../data/metadata_BLSM_language_all.csv')\n", |
292 ] | 336 "ddf = outliers.load_metadata(Yaudio, metadata_file='/Users/mariapanteli/Documents/'+\n", |
293 }, | 337 " 'QMUL/Code/MyPythonCode/MergeBL-Smith/data/metadata_BLSM_language_all.csv')" |
294 { | 338 ] |
295 "cell_type": "code", | 339 }, |
296 "execution_count": 8, | 340 { |
297 "metadata": {}, | 341 "cell_type": "code", |
342 "execution_count": 6, | |
343 "metadata": { | |
344 "collapsed": false | |
345 }, | |
298 "outputs": [ | 346 "outputs": [ |
299 { | 347 { |
300 "data": { | 348 "data": { |
301 "text/plain": [ | 349 "text/plain": [ |
302 "(8200, 108)" | 350 "(8200, 108)" |
303 ] | 351 ] |
304 }, | 352 }, |
305 "execution_count": 8, | 353 "execution_count": 6, |
306 "metadata": {}, | 354 "metadata": {}, |
307 "output_type": "execute_result" | 355 "output_type": "execute_result" |
308 } | 356 } |
309 ], | 357 ], |
310 "source": [ | 358 "source": [ |
311 "ddf.shape" | 359 "ddf.shape" |
312 ] | 360 ] |
313 }, | 361 }, |
314 { | 362 { |
315 "cell_type": "code", | 363 "cell_type": "code", |
316 "execution_count": 12, | 364 "execution_count": 7, |
317 "metadata": {}, | 365 "metadata": { |
366 "collapsed": false | |
367 }, | |
318 "outputs": [ | 368 "outputs": [ |
319 { | 369 { |
320 "data": { | 370 "data": { |
321 "text/plain": [ | 371 "text/plain": [ |
322 "True" | 372 "True" |
323 ] | 373 ] |
324 }, | 374 }, |
325 "execution_count": 12, | 375 "execution_count": 7, |
326 "metadata": {}, | 376 "metadata": {}, |
327 "output_type": "execute_result" | 377 "output_type": "execute_result" |
328 } | 378 } |
329 ], | 379 ], |
330 "source": [ | 380 "source": [ |
331 "\"songurls_Album\" in ddf.columns" | 381 "\"songurls_Album\" in ddf.columns" |
332 ] | |
333 }, | |
334 { | |
335 "cell_type": "code", | |
336 "execution_count": 37, | |
337 "metadata": {}, | |
338 "outputs": [ | |
339 { | |
340 "data": { | |
341 "text/plain": [ | |
342 "array([ 'https://sounds.bl.uk/World-and-traditional-music/Decca-West-African-recordings/025A-1CS0043663XX-0100A0.mp3',\n", | |
343 " 'https://sounds.bl.uk/World-and-traditional-music/Rycroft/025A-C0811X0005XX-2000A0.mp3',\n", | |
344 " 'https://sounds.bl.uk/World-and-traditional-music/Colin-Huehns-Pakistan/025A-C0485X0085XX-3100A0.mp3',\n", | |
345 " 'https://sounds.bl.uk/World-and-traditional-music/Rycroft/025A-C0811X0005XX-1300A0.mp3',\n", | |
346 " 'https://sounds.bl.uk/World-and-traditional-music/Colin-Huehns-Pakistan/025A-C0485X0031XX-0500A0.mp3'], dtype=object)" | |
347 ] | |
348 }, | |
349 "execution_count": 37, | |
350 "metadata": {}, | |
351 "output_type": "execute_result" | |
352 } | |
353 ], | |
354 "source": [ | |
355 "ddf['songurls_Album'].iloc[large_hubs_idx].get_values()" | |
356 ] | 382 ] |
357 }, | 383 }, |
358 { | 384 { |
359 "cell_type": "markdown", | 385 "cell_type": "markdown", |
360 "metadata": {}, | 386 "metadata": {}, |
362 "### first, fix the url for BL tracks (because it was changed recently and the metadata.csv file is not updated) " | 388 "### first, fix the url for BL tracks (because it was changed recently and the metadata.csv file is not updated) " |
363 ] | 389 ] |
364 }, | 390 }, |
365 { | 391 { |
366 "cell_type": "code", | 392 "cell_type": "code", |
367 "execution_count": 41, | 393 "execution_count": 8, |
368 "metadata": {}, | 394 "metadata": { |
395 "collapsed": false | |
396 }, | |
369 "outputs": [ | 397 "outputs": [ |
370 { | 398 { |
371 "name": "stderr", | 399 "name": "stderr", |
372 "output_type": "stream", | 400 "output_type": "stream", |
373 "text": [ | 401 "text": [ |
374 "/homes/mp305/anaconda/lib/python2.7/site-packages/pandas/core/indexing.py:115: SettingWithCopyWarning: \n", | 402 "/Users/mariapanteli/anaconda/lib/python2.7/site-packages/pandas/core/indexing.py:121: SettingWithCopyWarning: \n", |
375 "A value is trying to be set on a copy of a slice from a DataFrame\n", | 403 "A value is trying to be set on a copy of a slice from a DataFrame\n", |
376 "\n", | 404 "\n", |
377 "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", | 405 "See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", |
378 " self._setitem_with_indexer(indexer, value)\n" | 406 " self._setitem_with_indexer(indexer, value)\n" |
379 ] | 407 ] |
380 } | 408 } |
381 ], | 409 ], |
382 "source": [ | 410 "source": [ |
387 " ddf['MetaFile'].iloc[bl_ind].split('.')[0])" | 415 " ddf['MetaFile'].iloc[bl_ind].split('.')[0])" |
388 ] | 416 ] |
389 }, | 417 }, |
390 { | 418 { |
391 "cell_type": "code", | 419 "cell_type": "code", |
392 "execution_count": 32, | 420 "execution_count": 10, |
393 "metadata": {}, | 421 "metadata": { |
394 "outputs": [ | 422 "collapsed": false |
395 { | 423 }, |
396 "data": { | 424 "outputs": [ |
397 "text/html": [ | 425 { |
398 "<table border=\"1\" class=\"dataframe\">\n", | 426 "data": { |
399 " <thead>\n", | 427 "text/plain": [ |
400 " <tr style=\"text-align: right;\">\n", | 428 "array([ 'https://sounds.bl.uk/World-and-traditional-music/Decca-West-African-recordings/025M-1CS0043663XX-0100V0',\n", |
401 " <th></th>\n", | 429 " 'https://sounds.bl.uk/World-and-traditional-music/Rycroft/025M-C0811X0005XX-2000V0',\n", |
402 " <th>songurls_Album</th>\n", | 430 " 'https://sounds.bl.uk/World-and-traditional-music/Colin-Huehns-Pakistan/025M-C0485X0085XX-3100V0',\n", |
403 " <th>Country</th>\n", | 431 " 'https://sounds.bl.uk/World-and-traditional-music/Rycroft/025M-C0811X0005XX-1300V0',\n", |
404 " </tr>\n", | 432 " 'https://sounds.bl.uk/World-and-traditional-music/Colin-Huehns-Pakistan/025M-C0485X0031XX-0500V0'], dtype=object)" |
405 " </thead>\n", | 433 ] |
406 " <tbody>\n", | 434 }, |
407 " <tr>\n", | 435 "execution_count": 10, |
408 " <th>515</th>\n", | 436 "metadata": {}, |
409 " <td>https://sounds.bl.uk/World-and-traditional-mus...</td>\n", | 437 "output_type": "execute_result" |
410 " <td>Nigeria</td>\n", | 438 } |
411 " </tr>\n", | 439 ], |
412 " <tr>\n", | 440 "source": [ |
413 " <th>2549</th>\n", | 441 "large_hubs_idx = np.array([515, 2549, 3486, 5020, 5119])\n", |
414 " <td>https://sounds.bl.uk/World-and-traditional-mus...</td>\n", | 442 "ddf['songurls_Album'].iloc[large_hubs_idx].get_values()" |
415 " <td>Swaziland</td>\n", | |
416 " </tr>\n", | |
417 " <tr>\n", | |
418 " <th>3486</th>\n", | |
419 " <td>https://sounds.bl.uk/World-and-traditional-mus...</td>\n", | |
420 " <td>Kazakhstan</td>\n", | |
421 " </tr>\n", | |
422 " <tr>\n", | |
423 " <th>5020</th>\n", | |
424 " <td>https://sounds.bl.uk/World-and-traditional-mus...</td>\n", | |
425 " <td>Swaziland</td>\n", | |
426 " </tr>\n", | |
427 " <tr>\n", | |
428 " <th>5119</th>\n", | |
429 " <td>https://sounds.bl.uk/World-and-traditional-mus...</td>\n", | |
430 " <td>Pakistan</td>\n", | |
431 " </tr>\n", | |
432 " </tbody>\n", | |
433 "</table>" | |
434 ], | |
435 "text/plain": [ | |
436 "<IPython.core.display.HTML object>" | |
437 ] | |
438 }, | |
439 "execution_count": 32, | |
440 "metadata": {}, | |
441 "output_type": "execute_result" | |
442 } | |
443 ], | |
444 "source": [ | |
445 "from IPython.display import HTML\n", | |
446 "HTML(ddf[['songurls_Album', 'Country']].iloc[large_hubs_idx, :].to_html())" | |
447 ] | 443 ] |
448 }, | 444 }, |
449 { | 445 { |
450 "cell_type": "code", | 446 "cell_type": "code", |
451 "execution_count": null, | 447 "execution_count": null, |