annotate notebooks/test_hubness.ipynb @ 28:bd284065aeb6 branch-tests

small edit
author Maria Panteli <m.x.panteli@gmail.com>
date Wed, 13 Sep 2017 17:34:24 +0100
parents ed109218dd4b
children 6aa08c9c95e9
rev   line source
m@8 1 {
m@8 2 "cells": [
m@8 3 {
m@8 4 "cell_type": "code",
m@28 5 "execution_count": 2,
m@28 6 "metadata": {
m@28 7 "collapsed": true
m@28 8 },
m@11 9 "outputs": [],
m@8 10 "source": [
m@8 11 "import numpy as np\n",
m@8 12 "import pickle\n",
m@8 13 "from scipy.stats import pearsonr\n",
m@8 14 "from scipy.stats import skew\n",
m@8 15 "import sys\n",
m@8 16 "from sklearn.metrics.pairwise import pairwise_distances\n",
m@8 17 "%matplotlib inline\n",
m@8 18 "import matplotlib.pyplot as plt\n",
m@8 19 "\n",
m@8 20 "%load_ext autoreload\n",
m@8 21 "%autoreload 2\n",
m@8 22 "\n",
m@8 23 "sys.path.append('../')\n",
Maria@18 24 "import scripts.outliers as outliers\n",
m@8 25 "import scripts.utils_spatial as utils_spatial"
m@8 26 ]
m@8 27 },
m@8 28 {
m@8 29 "cell_type": "code",
m@28 30 "execution_count": 3,
m@28 31 "metadata": {
m@28 32 "collapsed": false
m@28 33 },
m@8 34 "outputs": [
m@8 35 {
m@8 36 "name": "stdout",
m@8 37 "output_type": "stream",
m@8 38 "text": [
m@28 39 "WARNING: there are 21 disconnected observations\n",
m@28 40 "Island ids: [3, 6, 26, 35, 39, 45, 52, 61, 62, 66, 77, 85, 94, 97, 98, 102, 103, 107, 110, 120, 121]\n",
m@8 41 "Antigua and Barbuda\n",
m@8 42 "Australia\n",
m@8 43 "Cuba\n",
m@8 44 "Fiji\n",
m@8 45 "French Polynesia\n",
m@8 46 "Grenada\n",
m@8 47 "Iceland\n",
m@8 48 "Jamaica\n",
m@8 49 "Japan\n",
m@8 50 "Kiribati\n",
m@8 51 "Malta\n",
m@8 52 "New Zealand\n",
m@8 53 "Philippines\n",
m@8 54 "Puerto Rico\n",
m@8 55 "Republic of Serbia\n",
m@8 56 "Saint Lucia\n",
m@8 57 "Samoa\n",
m@8 58 "Solomon Islands\n",
m@8 59 "South Korea\n",
m@8 60 "The Bahamas\n",
m@8 61 "Trinidad and Tobago\n"
m@8 62 ]
m@8 63 }
m@8 64 ],
m@8 65 "source": [
m@8 66 "X_list, Y, Yaudio = pickle.load(open('../data/lda_data_melodia_8.pickle','rb'))\n",
Maria@18 67 "ddf = outliers.load_metadata(Yaudio, metadata_file='../data/metadata.csv')\n",
m@8 68 "w, data_countries = utils_spatial.get_neighbors_for_countries_in_dataset(Y)\n",
m@8 69 "w_dict = utils_spatial.from_weights_to_dict(w, data_countries)\n",
m@8 70 "Xrhy, Xmel, Xmfc, Xchr = X_list\n",
m@8 71 "X = np.concatenate((Xrhy, Xmel, Xmfc, Xchr), axis=1)\n",
m@8 72 "\n",
m@8 73 "# global outliers\n",
Maria@18 74 "df_global, threshold, MD = outliers.get_outliers_df(X, Y, chi2thr=0.999)"
m@8 75 ]
m@8 76 },
m@8 77 {
m@8 78 "cell_type": "code",
m@12 79 "execution_count": 3,
m@28 80 "metadata": {
m@28 81 "collapsed": false
m@28 82 },
m@8 83 "outputs": [
m@8 84 {
m@8 85 "data": {
m@8 86 "text/plain": [
m@8 87 "(8200, 380)"
m@8 88 ]
m@8 89 },
m@12 90 "execution_count": 3,
m@8 91 "metadata": {},
m@8 92 "output_type": "execute_result"
m@8 93 }
m@8 94 ],
m@8 95 "source": [
m@8 96 "X.shape"
m@8 97 ]
m@8 98 },
m@8 99 {
m@8 100 "cell_type": "code",
m@12 101 "execution_count": 4,
m@28 102 "metadata": {
m@28 103 "collapsed": true
m@28 104 },
m@8 105 "outputs": [],
m@8 106 "source": [
m@8 107 "D = pairwise_distances(X, metric='mahalanobis')"
m@8 108 ]
m@8 109 },
m@8 110 {
m@8 111 "cell_type": "code",
m@12 112 "execution_count": 5,
m@28 113 "metadata": {
m@28 114 "collapsed": false
m@28 115 },
m@12 116 "outputs": [
m@12 117 {
m@12 118 "data": {
m@12 119 "text/plain": [
m@12 120 "(8200, 8200)"
m@12 121 ]
m@12 122 },
m@12 123 "execution_count": 5,
m@12 124 "metadata": {},
m@12 125 "output_type": "execute_result"
m@12 126 }
m@12 127 ],
m@12 128 "source": [
m@12 129 "D.shape"
m@12 130 ]
m@12 131 },
m@12 132 {
m@12 133 "cell_type": "code",
m@11 134 "execution_count": 6,
m@28 135 "metadata": {
m@28 136 "collapsed": false
m@28 137 },
m@11 138 "outputs": [
m@11 139 {
m@11 140 "data": {
m@12 141 "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZUAAAEACAYAAAB78OvLAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAGWtJREFUeJzt3W2sXdV95/HvDwyUPIwd05F5sCFWa6Q4StrADM50WvUy\nJGCiCniRgKsJeBKrL0pnYPqiE8xIYKtRO54KUaoRRGrzYFCDQdASoqBgB7iaSiNwEiClIYzNqJ7B\nl9pEBpO0o05t8Z8XZ128ubm2r303vvccfz/SkddZe6919jpczu/svfbeJ1WFJEl9OGWuN0CSNDoM\nFUlSbwwVSVJvDBVJUm8MFUlSbwwVSVJvZhQqSRYleSjJj5K8mGRVksVJtiXZkWRrkkWd9dcn2Znk\npSSXd+ovTvJCW3ZXp/6MJA+0+qeTXNBZtra9xo4kN3Tqlyd5prXZkuS02b8dkqTZmOmeyl3AY1X1\nIeCjwEvALcC2qroQeKI9J8lK4DpgJbAauDtJWj/3AOuqagWwIsnqVr8O2Nfq7wQ2tb4WA7cBl7TH\n7UkWtjabgDtamzdaH5KkOXTUUGkf4r9WVV8BqKqDVfUmcBWwua22Gbimla8G7q+qA1W1C3gZWJXk\nHOD9VbW9rXdvp023r4eBy1r5CmBrVe2vqv3ANuDKFlKXAg9N8/qSpDkykz2V5cCPk3w1ybNJ/jTJ\ne4ElVbW3rbMXWNLK5wK7O+13A+dNUz/R6mn/vgKD0ALeTHLWEfpaDOyvqrem6UuSNEdmEioLgIuA\nu6vqIuAfaIe6JtXgXi8n6n4v3ldGkuapBTNYZzewu6q+254/BKwH9iQ5u6r2tENbr7XlE8CyTvul\nrY+JVp5aP9nmfODVJAuAhVW1L8kEMNZpswx4EngdWJTklLa3srT18Q5JDCBJOkZVlaOvNb2j7qlU\n1R7glSQXtqpPAD8EvgmsbXVrgUda+VFgTZLTkywHVgDbWz8/aWeOBbge+EanzWRfn2Yw8Q+wFbi8\nnX32AeCTwONtz+gp4DPTvP7U7R/Jx+233z7n2+D4HJ/jG73HbM1kTwXgPwB/nuR04H8BnwNOBR5M\nsg7YBVzbPsRfTPIg8CJwELixDm3pjcDXgDMZnE327Vb/ZeC+JDuBfcCa1tfrSX4fmNxL2liDCXuA\nLwBbknwReLb1IUmaQzMKlar6AfAvp1n0icOs/wfAH0xT/33gI9PU/z9aKE2z7KvAV6ep/1tg1RE3\nXBw6m3ugj28iknQ4XlE/pMbGxo5h7RN5HkU/jm18w8fxDbdRH99sZJS/uSapUR7fTAz2VCbfg7in\nIumIklDv5kS9JEkzNdOJeo2I7hyLey2S+uaeykln+OZXJA0PQ0WS1BtDRZLUG0NFktQbQ0WS1BvP\n/hpBU6+il6QTxT2VkeVZXpJOPENFktQbQ0WS1BtDRZLUG0NFktQbQ0WS1BtDRZLUG0NFktQbQ0WS\n1BuvqD+J+dsqkvrmnspJzavuJfXLUJEk9cZQkST1xlCRJPXGUJEk9cZQkST1xlCRJPXGUJEk9WZG\noZJkV5K/TvJcku2tbnGSbUl2JNmaZFFn/fVJdiZ5KcnlnfqLk7zQlt3VqT8jyQOt/ukkF3SWrW2v\nsSPJDZ365UmeaW22JDlttm+GJGl2ZrqnUsBYVX2sqi5pdbcA26rqQuCJ9pwkK4HrgJXAauDuHLp0\n+x5gXVWtAFYkWd3q1wH7Wv2dwKbW12LgNuCS9rg9ycLWZhNwR2vzRutDkjSHjuXwV6Y8vwrY3Mqb\ngWta+Wrg/qo6UFW7gJeBVUnOAd5fVdvbevd22nT7ehi4rJWvALZW1f6q2g9sA65sIXUp8NA0ry9J\nmiPHsqfynSTfS/JbrW5JVe1t5b3AklY+F9jdabsbOG+a+olWT/v3FYCqOgi8meSsI/S1GNhfVW9N\n05ckaY7M9IaS/7qq/i7JPwe2JXmpu7CqKsmJuomUN6uSpHlqRqFSVX/X/v1xkr9kML+xN8nZVbWn\nHdp6ra0+ASzrNF/KYA9jopWn1k+2OR94NckCYGFV7UsyAYx12iwDngReBxYlOaXtrSxtffyMDRs2\nvF0eGxtjbGxsutWGXveOw5I0U+Pj44yPj/fWX452y/Mk7wFOraqfJnkvsBXYCHyCweT6piS3AIuq\n6pY2Uf91BsFzHvAd4Bfb3swzwE3AduBbwJ9U1beT3Ah8pKp+O8ka4JqqWtMm6r8HXMRgTuf7wEVV\ntT/Jg8DDVfVAki8Bz1fVl6Zse50st3QfhMrkWI+9fLK8T5KOLAlVddzfUmcSKsuBv2xPFwB/XlV/\n2D7wH2Swh7ELuLZNppPkVuDzwEHg5qp6vNVfDHwNOBN4rKpuavVnAPcBHwP2AWvaJD9JPgfc2l7/\ni1W1ubNdWxjMrzwLfLaqDkzZdkPFUJF0DN71UBlmhsrMyyfL+yTpyGYbKl5RL0nqjaEiSeqNoSJJ\n6s1Mr1PRiOuekuz8iqTj5Z6KmsLrSiXNlqEiSeqNoSJJ6o2hIknqjaEiSeqNoSJJ6o2hIknqjaEi\nSeqNoSJJ6o2hIknqjaEiSeqNoSJJ6o2hIknqjaEiSeqNoSJJ6o2hIknqjaEiSeqNoSJJ6o2hIknq\njaEiSeqNoSJJ6o2hIknqjaEiSerNgrneAM0/Sd4uV9UcbomkYeOeiqZR7SFJx2ZGoZLk1CTPJflm\ne744ybYkO5JsTbKos+76JDuTvJTk8k79xUleaMvu6tSfkeSBVv90kgs6y9a219iR5IZO/fIkz7Q2\nW5KcNts3QpI0ezPdU7kZeJFDX19vAbZV1YXAE+05SVYC1wErgdXA3Tl0LOUeYF1VrQBWJFnd6tcB\n+1r9ncCm1tdi4Dbgkva4PcnC1mYTcEdr80brQ5I0x44aKkmWAp8C/gyYDIirgM2tvBm4ppWvBu6v\nqgNVtQt4GViV5Bzg/VW1va13b6dNt6+Hgcta+Qpga1Xtr6r9wDbgyhZSlwIPTfP6kqQ5NJM9lTuB\n3wPe6tQtqaq9rbwXWNLK5wK7O+vtBs6bpn6i1dP+fQWgqg4CbyY56wh9LQb2V9Vb0/R1Ukny9kOS\n5oMjnv2V5DeA16rquSRj061TVZXkRM3qHvPrbNiw4e3y2NgYY2NjPW7OfDD5lhgsko7d+Pg44+Pj\nvfV3tFOKfwW4KsmngJ8D/lmS+4C9Sc6uqj3t0NZrbf0JYFmn/VIGexgTrTy1frLN+cCrSRYAC6tq\nX5IJYKzTZhnwJPA6sCjJKW1vZWnrY1rdUJEkvdPUL9sbN26cVX9HPPxVVbdW1bKqWg6sAZ6squuB\nR4G1bbW1wCOt/CiwJsnpSZYDK4DtVbUH+EmSVW1O5HrgG502k319msHEP8BW4PIki5J8APgk8HgN\nLpx4CvjMNK8vSZpDx3rx4+Sxlv8CPJhkHbALuBagql5M8iCDM8UOAjfWoavnbgS+BpwJPFZV3271\nXwbuS7IT2McgvKiq15P8PvDdtt7GNmEP8AVgS5IvAs+2PiRJcyyjfMV0khrx8fHOOZX+y6P8/kn6\nWUmoquOepPWKeklSbwwVSVJvDBVJUm8MFUlSbwwVSVJvDBVJUm8MFUlSbwwVSVJvDBVJUm8MFUlS\nbwwVSVJvDBVJUm8MFUlSb4711vc6yXR/qtg7Fks6GvdUdBTFcfyKs6STlKEiSeqNoSJJ6o2hIknq\njaEiSeqNoSJJ6o2hIknqjaEiSeqNoSJJ6o2hIknqjaEiSeqNoSJJ6o2hIknqjaEiSerNEUMlyc8l\neSbJ80leTPKHrX5xkm1JdiTZmmRRp836JDuTvJTk8k79xUleaMvu6tSfkeSBVv90kgs6y9a219iR\n5IZO/fK2XTuTbElyWl9viCTp+B0xVKrqH4FLq+qXgY8Clyb5VeAWYFtVXQg80Z6TZCVwHbASWA3c\nnUM/yHEPsK6qVgArkqxu9euAfa3+TmBT62sxcBtwSXvcnmRha7MJuKO1eaP1IUmaY0c9/FVV/7cV\nTwdOZfAhfhWwudVvBq5p5auB+6vqQFXtAl4GViU5B3h/VW1v693badPt62Hgsla+AthaVfuraj+w\nDbiyhdSlwEPTvL4kaQ4dNVSSnJLkeWAv8FRV/RBYUlV72yp7gSWtfC6wu9N8N3DeNPUTrZ727ysA\nVXUQeDPJWUfoazGwv6remqYvSdIcOurPCbcP719uh54eT3LplOWV5ET9NOAxv86GDRveLo+NjTE2\nNtbj5kjScBsfH2d8fLy3/mb8G/VV9WaSbwEXA3uTnF1Ve9qhrdfaahPAsk6zpQz2MCZaeWr9ZJvz\ngVeTLAAWVtW+JBPAWKfNMuBJ4HVgUZJTWuAtbX1MqxsqkqR3mvple+PGjbPq72hnf/385JldSc4E\nPgk8BzwKrG2rrQUeaeVHgTVJTk+yHFgBbK+qPcBPkqxqcyLXA9/otJns69MMJv4BtgKXJ1mU5APt\ntR+vqgKeAj4zzetLkuZQBp/Rh1mYfITBRPgp7XFfVf1ROzPrQQZ7GLuAa9tkOkluBT4PHARurqrH\nW/3FwNeAM4HHquqmVn8GcB/wMWAfsKZN8pPkc8CtbXO+WFWbW/1yYAuD+ZVngc9W1YFptr+ONL5h\ndOhkukmT48sJKHdedcTeV0kDSaiqqR80M28/yh8OoxsqJzJIpi+P2vsqaWC2oeIV9ZKk3hgqkqTe\nGCqSpN4YKpKk3hgqkqTeGCqSpN4YKpKk3hgqkqTeGCqSpN4YKpKk3hgqkqTeGCqSpN4YKpKk3hgq\nkqTeGCqSpN4YKpKk3hgqkqTeLJjrDdBw6v6ssb8CKWmSeyo6TsWhnxeWpAFDRZLUG0NFktQbQ0WS\n1BtDRZLUG0NFktQbQ0WS1BtDRZLUG0NFktQbQ0WS1JujhkqSZUmeSvLDJH+T5KZWvzjJtiQ7kmxN\nsqjTZn2SnUleSnJ5p/7iJC+0ZXd16s9I8kCrfzrJBZ1la9tr7EhyQ6d+eZJnWpstSU7r4w2RJB2/\nmeypHAB+t6o+DHwc+J0kHwJuAbZV1YXAE+05SVYC1wErgdXA3Tl0o6h7gHVVtQJYkWR1q18H7Gv1\ndwKbWl+LgduAS9rj9iQLW5tNwB2tzRutD0nSHDpqqFTVnqp6vpX/HvgRcB5wFbC5rbYZuKaVrwbu\nr6oDVbULeBlYleQc4P1Vtb2td2+nTbevh4HLWvkKYGtV7a+q/cA24MoWUpcCD03z+pKkOXJMcypJ\nPgh8DHgGWFJVe9uivcCSVj4X2N1ptptBCE2tn2j1tH9fAaiqg8CbSc46Ql+Lgf1V9dY0fUmS5siM\nb32f5H0M9iJurqqfTrn1eSU5UbesPabX2bBhw9vlsbExxsbGet4cSRpe4+PjjI+P99bfjEKlTYI/\nDNxXVY+06r1Jzq6qPe3Q1mutfgJY1mm+lMEexkQrT62fbHM+8GqSBcDCqtqXZAIY67RZBjwJvA4s\nSnJK21tZ2vr4Gd1Q0bvD31aRhtfUL9sbN26cVX8zOfsrwJeBF6vqjzuLHgXWtvJa4JFO/ZokpydZ\nDqwAtlfVHuAnSVa1Pq8HvjFNX59mMPEPsBW4PMmiJB8APgk8XoNPrqeAz0zz+jrh/G0VSQM52jfL\nJL8K/Hfgrzn0ybEe2A48yGAPYxdwbZtMJ8mtwOeBgwwOlz3e6i8GvgacCTxWVZOnJ58B3MdgvmYf\nsKZN8pPkc8Ct7XW/WFWbW/1yYAuD+ZVngc9W1YEp216j9s15kMeTY5p/5VF7v6WTTRKqKkdf8zDt\nR/lDYFRCpXt4aWDuw8NQkUbTbEPFK+qHhoeYJM1/hookqTeGiiSpN4aKJKk3hookqTeGiiSpN4aK\nJKk3hookqTeGiiSpNzO+S7E0E95cUjq5uaeinnnlv3QyM1QkSb0xVCRJvTFUJEm9MVQkSb0xVCRJ\nvTFUJEm9MVQkSb0xVCRJvTFUJEm9MVQkSb3x3l9613gfMOnk456K3kXeB0w62RgqkqTeGCqSpN4Y\nKpKk3hgqkqTeGCqSpN4cNVSSfCXJ3iQvdOoWJ9mWZEeSrUkWdZatT7IzyUtJLu/UX5zkhbbsrk79\nGUkeaPVPJ7mgs2xte40dSW7o1C9P8kxrsyXJabN9IyRJszeTPZWvAqun1N0CbKuqC4En2nOSrASu\nA1a2Nnfn0MUK9wDrqmoFsCLJZJ/rgH2t/k5gU+trMXAbcEl73J5kYWuzCbijtXmj9TFSkrz9kKRh\ncdRQqaq/YvDB3XUVsLmVNwPXtPLVwP1VdaCqdgEvA6uSnAO8v6q2t/Xu7bTp9vUwcFkrXwFsrar9\nVbUf2AZc2ULqUuChaV5/xHidh6ThcrxzKkuqam8r7wWWtPK5wO7OeruB86apn2j1tH9fAaiqg8Cb\nSc46Ql+Lgf1V9dY0fWmecs9LOjnMeqK+BvffOFFfp/3aPrTc65JOBsd776+9Sc6uqj3t0NZrrX4C\nWNZZbymDPYyJVp5aP9nmfODVJAuAhVW1L8kEMNZpswx4EngdWJTklLa3srT1Ma0NGza8XR4bG2Ns\nbOxwq0rSSWd8fJzx8fHe+stMbvSX5IPAN6vqI+35f2Uwub4pyS3Aoqq6pU3Uf53BxPp5wHeAX6yq\nSvIMcBOwHfgW8CdV9e0kNwIfqarfTrIGuKaq1rSJ+u8BFwEBvg9cVFX7kzwIPFxVDyT5EvB8VX1p\nmu2uYb2R4eAw0eS2j1Z5WP+bSCeDJFTVcR+nPmqoJLkf+HXg5xnMn9wGfAN4kMEexi7g2jaZTpJb\ngc8DB4Gbq+rxVn8x8DXgTOCxqrqp1Z8B3Ad8DNgHrGmT/CT5HHBr25QvVtXmVr8c2MJgfuVZ4LNV\ndWCabTdU5mF5WP+bSCeDdz1UhpmhMj/Lw/rfRDoZzDZUvKJektQbQ0WS1Bt/+VEnnL8IKY0u91Q0\nB7xmRRpVhookqTeGiiSpN4aKJKk3hookqTee/aU55Zlg0mhxT0VzzDPBpFFiqEiSeuPhr3nEH7CS\nNOwMlXmnexPGk4vzK9Lw8/CX5hHnV6RhZ6hIknpjqEiSeuOciuYl51ek4eSeiuYp51ekYWSoSJJ6\nY6hIknrjnIrmPedXpOHhnoqGgPMr0rAwVCRJvfHw1xzzfl/HxkNh0vzmnsq84OGdmfO9kuYz91Q0\ntNxrkeYfQ0VD7NAdnQ93GNGwkU4sQ0UjovuTAdOHjQEjvfuGek4lyeokLyXZmeQLc709mo8OzcEk\nefsh6d0xtKGS5FTgvwGrgZXAbyb50Nxu1cz08+E23tfmzFPj70Kf8ydgxsfH5+R1TxTHd/Ia2lAB\nLgFerqpdVXUA2AJcPcfbdAxmexbTeE/bMV+Nv8v9z23AjPqHkuM7eQ3znMp5wCud57uBVVNX6h5H\n97CHpnf0Cf8Z9eKcjTTUoTKj/4NPOWWwM7Zr1y4uuOCCd3WDjsRAGxbTT/jPpHys/403btx4/Jt5\nGDMNtiNtq+Go2ciw/gEl+TiwoapWt+frgbeqalNnneEcnCTNoao67m/BwxwqC4D/CVwGvApsB36z\nqn40pxsmSSexoT38VVUHk/x74HHgVODLBookza2h3VORJM0/w3xK8WGN2kWRSZYleSrJD5P8TZKb\nWv3iJNuS7EiyNcmiud7W45Xk1CTPJflmez5KY1uU5KEkP0ryYpJVIza+9e1v84UkX09yxjCPL8lX\nkuxN8kKn7rDjaePf2T5zLp+brZ65w4zvj9rf5w+S/EWShZ1lxzS+kQuVYb4o8ggOAL9bVR8GPg78\nThvTLcC2qroQeKI9H1Y3Ay9y6HSqURrbXcBjVfUh4KPAS4zI+JJ8EPgt4KKq+giDQ9FrGO7xfZXB\n50fXtONJshK4jsFnzWrg7iTz/XN1uvFtBT5cVb8E7ADWw/GNb74P/ngM+UWRP6uq9lTV863898CP\nGFyncxWwua22GbhmbrZwdpIsBT4F/BmD83RhdMa2EPi1qvoKDOYCq+pNRmR8wE8YfOl5Tzt55j0M\nTpwZ2vFV1V8Bb0ypPtx4rgbur6oDVbULeJnBZ9C8Nd34qmpbVb3Vnj4DLG3lYx7fKIbKdBdFnjdH\n29K79s3wYwz+wy+pqr1t0V5gyRxt1mzdCfwe8FanblTGthz4cZKvJnk2yZ8meS8jMr6qeh24A/g/\nDMJkf1VtY0TG13G48ZzL4DNm0ih83nweeKyVj3l8oxgqI3vmQZL3AQ8DN1fVT7vLanDGxdCNPclv\nAK9V1XMc2kt5h2EdW7MAuAi4u6ouAv6BKYeChnl8SX4B+I/ABxl8AL0vyWe76wzz+KYzg/EM7ViT\n/Gfgn6rq60dY7YjjG8VQmQCWdZ4v451JO5SSnMYgUO6rqkda9d4kZ7fl5wCvzdX2zcKvAFcl+Vvg\nfuDfJLmP0RgbDP72dlfVd9vzhxiEzJ4RGd+/AP5HVe2rqoPAXwD/itEZ36TD/T1O/bxZ2uqGTpJ/\nx+Aw9L/tVB/z+EYxVL4HrEjywSSnM5hkenSOt2lWMrinxpeBF6vqjzuLHgXWtvJa4JGpbee7qrq1\nqpZV1XIGE7xPVtX1jMDYYDAfBryS5MJW9Qngh8A3GYHxMTjp4ONJzmx/p59gcMLFqIxv0uH+Hh8F\n1iQ5PclyYAWDC7GHSpLVDA5BX11V/9hZdOzjq6qRewBXMrja/mVg/VxvTw/j+VUG8w3PA8+1x2pg\nMfAdBmdrbAUWzfW2znKcvw482sojMzbgl4DvAj9g8E1+4YiN7z8xCMoXGExinzbM42Owx/wq8E8M\n5mc/d6TxALe2z5qXgCvmevuPY3yfB3YC/7vz+XL38Y7Pix8lSb0ZxcNfkqQ5YqhIknpjqEiSemOo\nSJJ6Y6hIknpjqEiSemOoSJJ6Y6hIknrz/wF0zsvts73EjAAAAABJRU5ErkJggg==\n",
m@11 142 "text/plain": [
m@12 143 "<matplotlib.figure.Figure at 0x7f3668585f50>"
m@11 144 ]
m@11 145 },
m@11 146 "metadata": {},
m@11 147 "output_type": "display_data"
m@11 148 }
m@11 149 ],
m@8 150 "source": [
m@8 151 "plt.hist(D.ravel(), bins=100);"
m@8 152 ]
m@8 153 },
m@8 154 {
m@8 155 "cell_type": "code",
m@12 156 "execution_count": 7,
m@8 157 "metadata": {
m@8 158 "collapsed": true
m@8 159 },
m@8 160 "outputs": [],
m@8 161 "source": [
m@8 162 "def n_occurrence_from_D(D, k=10, n_items=None):\n",
m@8 163 " if n_items is None:\n",
m@8 164 " n_items = len(D)\n",
m@8 165 " sort_idx = np.argsort(D, axis=1)\n",
m@8 166 " D_k = sort_idx[:, 1:(k+1)] # nearest neighbour is the item itself\n",
m@8 167 " N_k = np.bincount(D_k.astype(int).ravel(), minlength=n_items)\n",
m@8 168 " return N_k"
m@8 169 ]
m@8 170 },
m@8 171 {
m@8 172 "cell_type": "code",
m@28 173 "execution_count": 1,
m@28 174 "metadata": {
m@28 175 "collapsed": false
m@28 176 },
m@11 177 "outputs": [
m@11 178 {
m@28 179 "ename": "NameError",
m@28 180 "evalue": "name 'n_occurrence_from_D' is not defined",
m@28 181 "output_type": "error",
m@28 182 "traceback": [
m@28 183 "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
m@28 184 "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
m@28 185 "\u001b[0;32m<ipython-input-1-0aacb5dec8fd>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mN_k\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mn_occurrence_from_D\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mD\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mk\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m100\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0;32mprint\u001b[0m \u001b[0mskew\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mN_k\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mplt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfigure\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mplt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mN_k\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbins\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m100\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m;\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mplt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfigure\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
m@28 186 "\u001b[0;31mNameError\u001b[0m: name 'n_occurrence_from_D' is not defined"
m@11 187 ]
m@11 188 }
m@11 189 ],
m@8 190 "source": [
m@8 191 "N_k = n_occurrence_from_D(D, k=100)\n",
m@8 192 "print skew(N_k)\n",
m@28 193 "plt.figure()\n",
m@28 194 "plt.hist(N_k, bins=100);\n",
m@28 195 "plt.figure()\n",
m@28 196 "plt.plot(np.sort(N_k))"
m@8 197 ]
m@8 198 },
m@8 199 {
m@8 200 "cell_type": "code",
m@12 201 "execution_count": 11,
m@28 202 "metadata": {
m@28 203 "collapsed": true
m@28 204 },
m@8 205 "outputs": [],
m@8 206 "source": [
m@12 207 "#sort_idx = np.argsort(D, axis=1)\n",
m@12 208 "k = 10\n",
m@12 209 "D_k = sort_idx[:, 1:(k+1)]"
m@12 210 ]
m@12 211 },
m@12 212 {
m@12 213 "cell_type": "code",
m@12 214 "execution_count": 12,
m@28 215 "metadata": {
m@28 216 "collapsed": false
m@28 217 },
m@12 218 "outputs": [
m@12 219 {
m@12 220 "data": {
m@12 221 "text/plain": [
m@12 222 "array([[4650, 2942, 3520, ..., 1318, 6678, 6056],\n",
m@12 223 " [1933, 6143, 6757, ..., 7269, 4321, 1563],\n",
m@12 224 " [3170, 2549, 4860, ..., 6678, 7414, 6056],\n",
m@12 225 " ..., \n",
m@12 226 " [6016, 2243, 1616, ..., 7627, 2018, 515],\n",
m@12 227 " [7027, 4860, 6346, ..., 997, 3892, 1846],\n",
m@12 228 " [5119, 1563, 4035, ..., 3486, 7617, 3854]])"
m@12 229 ]
m@12 230 },
m@12 231 "execution_count": 12,
m@12 232 "metadata": {},
m@12 233 "output_type": "execute_result"
m@12 234 }
m@12 235 ],
m@12 236 "source": [
m@12 237 "D_k"
m@8 238 ]
m@8 239 },
m@8 240 {
m@8 241 "cell_type": "code",
m@8 242 "execution_count": null,
m@8 243 "metadata": {
m@8 244 "collapsed": true
m@8 245 },
m@8 246 "outputs": [],
m@8 247 "source": []
m@8 248 }
m@8 249 ],
m@8 250 "metadata": {
m@8 251 "kernelspec": {
m@8 252 "display_name": "Python 2",
m@8 253 "language": "python",
m@8 254 "name": "python2"
m@8 255 },
m@8 256 "language_info": {
m@8 257 "codemirror_mode": {
m@8 258 "name": "ipython",
m@8 259 "version": 2
m@8 260 },
m@8 261 "file_extension": ".py",
m@8 262 "mimetype": "text/x-python",
m@8 263 "name": "python",
m@8 264 "nbconvert_exporter": "python",
m@8 265 "pygments_lexer": "ipython2",
m@8 266 "version": "2.7.12"
m@8 267 }
m@8 268 },
m@8 269 "nbformat": 4,
m@11 270 "nbformat_minor": 1
m@8 271 }