m@8: { m@8: "cells": [ m@8: { m@8: "cell_type": "code", m@28: "execution_count": 2, m@28: "metadata": { m@28: "collapsed": true m@28: }, m@11: "outputs": [], m@8: "source": [ m@8: "import numpy as np\n", m@8: "import pickle\n", m@8: "from scipy.stats import pearsonr\n", m@8: "from scipy.stats import skew\n", m@8: "import sys\n", m@8: "from sklearn.metrics.pairwise import pairwise_distances\n", m@8: "%matplotlib inline\n", m@8: "import matplotlib.pyplot as plt\n", m@8: "\n", m@8: "%load_ext autoreload\n", m@8: "%autoreload 2\n", m@8: "\n", m@8: "sys.path.append('../')\n", Maria@18: "import scripts.outliers as outliers\n", m@8: "import scripts.utils_spatial as utils_spatial" m@8: ] m@8: }, m@8: { m@8: "cell_type": "code", m@28: "execution_count": 3, m@28: "metadata": { m@28: "collapsed": false m@28: }, m@8: "outputs": [ m@8: { m@8: "name": "stdout", m@8: "output_type": "stream", m@8: "text": [ m@28: "WARNING: there are 21 disconnected observations\n", m@28: "Island ids: [3, 6, 26, 35, 39, 45, 52, 61, 62, 66, 77, 85, 94, 97, 98, 102, 103, 107, 110, 120, 121]\n", m@8: "Antigua and Barbuda\n", m@8: "Australia\n", m@8: "Cuba\n", m@8: "Fiji\n", m@8: "French Polynesia\n", m@8: "Grenada\n", m@8: "Iceland\n", m@8: "Jamaica\n", m@8: "Japan\n", m@8: "Kiribati\n", m@8: "Malta\n", m@8: "New Zealand\n", m@8: "Philippines\n", m@8: "Puerto Rico\n", m@8: "Republic of Serbia\n", m@8: "Saint Lucia\n", m@8: "Samoa\n", m@8: "Solomon Islands\n", m@8: "South Korea\n", m@8: "The Bahamas\n", m@8: "Trinidad and Tobago\n" m@8: ] m@8: } m@8: ], m@8: "source": [ m@8: "X_list, Y, Yaudio = pickle.load(open('../data/lda_data_melodia_8.pickle','rb'))\n", Maria@18: "ddf = outliers.load_metadata(Yaudio, metadata_file='../data/metadata.csv')\n", m@8: "w, data_countries = utils_spatial.get_neighbors_for_countries_in_dataset(Y)\n", m@8: "w_dict = utils_spatial.from_weights_to_dict(w, data_countries)\n", m@8: "Xrhy, Xmel, Xmfc, Xchr = X_list\n", m@8: "X = np.concatenate((Xrhy, Xmel, Xmfc, Xchr), axis=1)\n", m@8: "\n", m@8: "# global outliers\n", Maria@18: "df_global, threshold, MD = outliers.get_outliers_df(X, Y, chi2thr=0.999)" m@8: ] m@8: }, m@8: { m@8: "cell_type": "code", m@12: "execution_count": 3, m@28: "metadata": { m@28: "collapsed": false m@28: }, m@8: "outputs": [ m@8: { m@8: "data": { m@8: "text/plain": [ m@8: "(8200, 380)" m@8: ] m@8: }, m@12: "execution_count": 3, m@8: "metadata": {}, m@8: "output_type": "execute_result" m@8: } m@8: ], m@8: "source": [ m@8: "X.shape" m@8: ] m@8: }, m@8: { m@8: "cell_type": "code", m@12: "execution_count": 4, m@28: "metadata": { m@28: "collapsed": true m@28: }, m@8: "outputs": [], m@8: "source": [ m@8: "D = pairwise_distances(X, metric='mahalanobis')" m@8: ] m@8: }, m@8: { m@8: "cell_type": "code", m@12: "execution_count": 5, m@28: "metadata": { m@28: "collapsed": false m@28: }, m@12: "outputs": [ m@12: { m@12: "data": { m@12: "text/plain": [ m@12: "(8200, 8200)" m@12: ] m@12: }, m@12: "execution_count": 5, m@12: "metadata": {}, m@12: "output_type": "execute_result" m@12: } m@12: ], m@12: "source": [ m@12: "D.shape" m@12: ] m@12: }, m@12: { m@12: "cell_type": "code", m@11: "execution_count": 6, m@28: "metadata": { m@28: "collapsed": false m@28: }, m@11: "outputs": [ m@11: { m@11: "data": { m@12: "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZUAAAEACAYAAAB78OvLAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAGWtJREFUeJzt3W2sXdV95/HvDwyUPIwd05F5sCFWa6Q4StrADM50WvUy\nJGCiCniRgKsJeBKrL0pnYPqiE8xIYKtRO54KUaoRRGrzYFCDQdASoqBgB7iaSiNwEiClIYzNqJ7B\nl9pEBpO0o05t8Z8XZ128ubm2r303vvccfz/SkddZe6919jpczu/svfbeJ1WFJEl9OGWuN0CSNDoM\nFUlSbwwVSVJvDBVJUm8MFUlSbwwVSVJvZhQqSRYleSjJj5K8mGRVksVJtiXZkWRrkkWd9dcn2Znk\npSSXd+ovTvJCW3ZXp/6MJA+0+qeTXNBZtra9xo4kN3Tqlyd5prXZkuS02b8dkqTZmOmeyl3AY1X1\nIeCjwEvALcC2qroQeKI9J8lK4DpgJbAauDtJWj/3AOuqagWwIsnqVr8O2Nfq7wQ2tb4WA7cBl7TH\n7UkWtjabgDtamzdaH5KkOXTUUGkf4r9WVV8BqKqDVfUmcBWwua22Gbimla8G7q+qA1W1C3gZWJXk\nHOD9VbW9rXdvp023r4eBy1r5CmBrVe2vqv3ANuDKFlKXAg9N8/qSpDkykz2V5cCPk3w1ybNJ/jTJ\ne4ElVbW3rbMXWNLK5wK7O+13A+dNUz/R6mn/vgKD0ALeTHLWEfpaDOyvqrem6UuSNEdmEioLgIuA\nu6vqIuAfaIe6JtXgXi8n6n4v3ldGkuapBTNYZzewu6q+254/BKwH9iQ5u6r2tENbr7XlE8CyTvul\nrY+JVp5aP9nmfODVJAuAhVW1L8kEMNZpswx4EngdWJTklLa3srT18Q5JDCBJOkZVlaOvNb2j7qlU\n1R7glSQXtqpPAD8EvgmsbXVrgUda+VFgTZLTkywHVgDbWz8/aWeOBbge+EanzWRfn2Yw8Q+wFbi8\nnX32AeCTwONtz+gp4DPTvP7U7R/Jx+233z7n2+D4HJ/jG73HbM1kTwXgPwB/nuR04H8BnwNOBR5M\nsg7YBVzbPsRfTPIg8CJwELixDm3pjcDXgDMZnE327Vb/ZeC+JDuBfcCa1tfrSX4fmNxL2liDCXuA\nLwBbknwReLb1IUmaQzMKlar6AfAvp1n0icOs/wfAH0xT/33gI9PU/z9aKE2z7KvAV6ep/1tg1RE3\nXBw6m3ugj28iknQ4XlE/pMbGxo5h7RN5HkU/jm18w8fxDbdRH99sZJS/uSapUR7fTAz2VCbfg7in\nIumIklDv5kS9JEkzNdOJeo2I7hyLey2S+uaeykln+OZXJA0PQ0WS1BtDRZLUG0NFktQbQ0WS1BvP\n/hpBU6+il6QTxT2VkeVZXpJOPENFktQbQ0WS1BtDRZLUG0NFktQbQ0WS1BtDRZLUG0NFktQbQ0WS\n1BuvqD+J+dsqkvrmnspJzavuJfXLUJEk9cZQkST1xlCRJPXGUJEk9cZQkST1xlCRJPXGUJEk9WZG\noZJkV5K/TvJcku2tbnGSbUl2JNmaZFFn/fVJdiZ5KcnlnfqLk7zQlt3VqT8jyQOt/ukkF3SWrW2v\nsSPJDZ365UmeaW22JDlttm+GJGl2ZrqnUsBYVX2sqi5pdbcA26rqQuCJ9pwkK4HrgJXAauDuHLp0\n+x5gXVWtAFYkWd3q1wH7Wv2dwKbW12LgNuCS9rg9ycLWZhNwR2vzRutDkjSHjuXwV6Y8vwrY3Mqb\ngWta+Wrg/qo6UFW7gJeBVUnOAd5fVdvbevd22nT7ehi4rJWvALZW1f6q2g9sA65sIXUp8NA0ry9J\nmiPHsqfynSTfS/JbrW5JVe1t5b3AklY+F9jdabsbOG+a+olWT/v3FYCqOgi8meSsI/S1GNhfVW9N\n05ckaY7M9IaS/7qq/i7JPwe2JXmpu7CqKsmJuomUN6uSpHlqRqFSVX/X/v1xkr9kML+xN8nZVbWn\nHdp6ra0+ASzrNF/KYA9jopWn1k+2OR94NckCYGFV7UsyAYx12iwDngReBxYlOaXtrSxtffyMDRs2\nvF0eGxtjbGxsutWGXveOw5I0U+Pj44yPj/fWX452y/Mk7wFOraqfJnkvsBXYCHyCweT6piS3AIuq\n6pY2Uf91BsFzHvAd4Bfb3swzwE3AduBbwJ9U1beT3Ah8pKp+O8ka4JqqWtMm6r8HXMRgTuf7wEVV\ntT/Jg8DDVfVAki8Bz1fVl6Zse50st3QfhMrkWI+9fLK8T5KOLAlVddzfUmcSKsuBv2xPFwB/XlV/\n2D7wH2Swh7ELuLZNppPkVuDzwEHg5qp6vNVfDHwNOBN4rKpuavVnAPcBHwP2AWvaJD9JPgfc2l7/\ni1W1ubNdWxjMrzwLfLaqDkzZdkPFUJF0DN71UBlmhsrMyyfL+yTpyGYbKl5RL0nqjaEiSeqNoSJJ\n6s1Mr1PRiOuekuz8iqTj5Z6KmsLrSiXNlqEiSeqNoSJJ6o2hIknqjaEiSeqNoSJJ6o2hIknqjaEi\nSeqNoSJJ6o2hIknqjaEiSeqNoSJJ6o2hIknqjaEiSeqNoSJJ6o2hIknqjaEiSeqNoSJJ6o2hIknq\njaEiSeqNoSJJ6o2hIknqjaEiSerNgrneAM0/Sd4uV9UcbomkYeOeiqZR7SFJx2ZGoZLk1CTPJflm\ne744ybYkO5JsTbKos+76JDuTvJTk8k79xUleaMvu6tSfkeSBVv90kgs6y9a219iR5IZO/fIkz7Q2\nW5KcNts3QpI0ezPdU7kZeJFDX19vAbZV1YXAE+05SVYC1wErgdXA3Tl0LOUeYF1VrQBWJFnd6tcB\n+1r9ncCm1tdi4Dbgkva4PcnC1mYTcEdr80brQ5I0x44aKkmWAp8C/gyYDIirgM2tvBm4ppWvBu6v\nqgNVtQt4GViV5Bzg/VW1va13b6dNt6+Hgcta+Qpga1Xtr6r9wDbgyhZSlwIPTfP6kqQ5NJM9lTuB\n3wPe6tQtqaq9rbwXWNLK5wK7O+vtBs6bpn6i1dP+fQWgqg4CbyY56wh9LQb2V9Vb0/R1Ukny9kOS\n5oMjnv2V5DeA16rquSRj061TVZXkRM3qHvPrbNiw4e3y2NgYY2NjPW7OfDD5lhgsko7d+Pg44+Pj\nvfV3tFOKfwW4KsmngJ8D/lmS+4C9Sc6uqj3t0NZrbf0JYFmn/VIGexgTrTy1frLN+cCrSRYAC6tq\nX5IJYKzTZhnwJPA6sCjJKW1vZWnrY1rdUJEkvdPUL9sbN26cVX9HPPxVVbdW1bKqWg6sAZ6squuB\nR4G1bbW1wCOt/CiwJsnpSZYDK4DtVbUH+EmSVW1O5HrgG502k319msHEP8BW4PIki5J8APgk8HgN\nLpx4CvjMNK8vSZpDx3rx4+Sxlv8CPJhkHbALuBagql5M8iCDM8UOAjfWoavnbgS+BpwJPFZV3271\nXwbuS7IT2McgvKiq15P8PvDdtt7GNmEP8AVgS5IvAs+2PiRJcyyjfMV0khrx8fHOOZX+y6P8/kn6\nWUmoquOepPWKeklSbwwVSVJvDBVJUm8MFUlSbwwVSVJvDBVJUm8MFUlSbwwVSVJvDBVJUm8MFUlS\nbwwVSVJvDBVJUm8MFUlSb4711vc6yXR/qtg7Fks6GvdUdBTFcfyKs6STlKEiSeqNoSJJ6o2hIknq\njaEiSeqNoSJJ6o2hIknqjaEiSeqNoSJJ6o2hIknqjaEiSeqNoSJJ6o2hIknqjaEiSerNEUMlyc8l\neSbJ80leTPKHrX5xkm1JdiTZmmRRp836JDuTvJTk8k79xUleaMvu6tSfkeSBVv90kgs6y9a219iR\n5IZO/fK2XTuTbElyWl9viCTp+B0xVKrqH4FLq+qXgY8Clyb5VeAWYFtVXQg80Z6TZCVwHbASWA3c\nnUM/yHEPsK6qVgArkqxu9euAfa3+TmBT62sxcBtwSXvcnmRha7MJuKO1eaP1IUmaY0c9/FVV/7cV\nTwdOZfAhfhWwudVvBq5p5auB+6vqQFXtAl4GViU5B3h/VW1v693badPt62Hgsla+AthaVfuraj+w\nDbiyhdSlwEPTvL4kaQ4dNVSSnJLkeWAv8FRV/RBYUlV72yp7gSWtfC6wu9N8N3DeNPUTrZ727ysA\nVXUQeDPJWUfoazGwv6remqYvSdIcOurPCbcP719uh54eT3LplOWV5ET9NOAxv86GDRveLo+NjTE2\nNtbj5kjScBsfH2d8fLy3/mb8G/VV9WaSbwEXA3uTnF1Ve9qhrdfaahPAsk6zpQz2MCZaeWr9ZJvz\ngVeTLAAWVtW+JBPAWKfNMuBJ4HVgUZJTWuAtbX1MqxsqkqR3mvple+PGjbPq72hnf/385JldSc4E\nPgk8BzwKrG2rrQUeaeVHgTVJTk+yHFgBbK+qPcBPkqxqcyLXA9/otJns69MMJv4BtgKXJ1mU5APt\ntR+vqgKeAj4zzetLkuZQBp/Rh1mYfITBRPgp7XFfVf1ROzPrQQZ7GLuAa9tkOkluBT4PHARurqrH\nW/3FwNeAM4HHquqmVn8GcB/wMWAfsKZN8pPkc8CtbXO+WFWbW/1yYAuD+ZVngc9W1YFptr+ONL5h\ndOhkukmT48sJKHdedcTeV0kDSaiqqR80M28/yh8OoxsqJzJIpi+P2vsqaWC2oeIV9ZKk3hgqkqTe\nGCqSpN4YKpKk3hgqkqTeGCqSpN4YKpKk3hgqkqTeGCqSpN4YKpKk3hgqkqTeGCqSpN4YKpKk3hgq\nkqTeGCqSpN4YKpKk3hgqkqTeLJjrDdBw6v6ssb8CKWmSeyo6TsWhnxeWpAFDRZLUG0NFktQbQ0WS\n1BtDRZLUG0NFktQbQ0WS1BtDRZLUG0NFktQbQ0WS1JujhkqSZUmeSvLDJH+T5KZWvzjJtiQ7kmxN\nsqjTZn2SnUleSnJ5p/7iJC+0ZXd16s9I8kCrfzrJBZ1la9tr7EhyQ6d+eZJnWpstSU7r4w2RJB2/\nmeypHAB+t6o+DHwc+J0kHwJuAbZV1YXAE+05SVYC1wErgdXA3Tl0o6h7gHVVtQJYkWR1q18H7Gv1\ndwKbWl+LgduAS9rj9iQLW5tNwB2tzRutD0nSHDpqqFTVnqp6vpX/HvgRcB5wFbC5rbYZuKaVrwbu\nr6oDVbULeBlYleQc4P1Vtb2td2+nTbevh4HLWvkKYGtV7a+q/cA24MoWUpcCD03z+pKkOXJMcypJ\nPgh8DHgGWFJVe9uivcCSVj4X2N1ptptBCE2tn2j1tH9fAaiqg8CbSc46Ql+Lgf1V9dY0fUmS5siM\nb32f5H0M9iJurqqfTrn1eSU5UbesPabX2bBhw9vlsbExxsbGet4cSRpe4+PjjI+P99bfjEKlTYI/\nDNxXVY+06r1Jzq6qPe3Q1mutfgJY1mm+lMEexkQrT62fbHM+8GqSBcDCqtqXZAIY67RZBjwJvA4s\nSnJK21tZ2vr4Gd1Q0bvD31aRhtfUL9sbN26cVX8zOfsrwJeBF6vqjzuLHgXWtvJa4JFO/ZokpydZ\nDqwAtlfVHuAnSVa1Pq8HvjFNX59mMPEPsBW4PMmiJB8APgk8XoNPrqeAz0zz+jrh/G0VSQM52jfL\nJL8K/Hfgrzn0ybEe2A48yGAPYxdwbZtMJ8mtwOeBgwwOlz3e6i8GvgacCTxWVZOnJ58B3MdgvmYf\nsKZN8pPkc8Ct7XW/WFWbW/1yYAuD+ZVngc9W1YEp216j9s15kMeTY5p/5VF7v6WTTRKqKkdf8zDt\nR/lDYFRCpXt4aWDuw8NQkUbTbEPFK+qHhoeYJM1/hookqTeGiiSpN4aKJKk3hookqTeGiiSpN4aK\nJKk3hookqTeGiiSpNzO+S7E0E95cUjq5uaeinnnlv3QyM1QkSb0xVCRJvTFUJEm9MVQkSb0xVCRJ\nvTFUJEm9MVQkSb0xVCRJvTFUJEm9MVQkSb3x3l9613gfMOnk456K3kXeB0w62RgqkqTeGCqSpN4Y\nKpKk3hgqkqTeGCqSpN4cNVSSfCXJ3iQvdOoWJ9mWZEeSrUkWdZatT7IzyUtJLu/UX5zkhbbsrk79\nGUkeaPVPJ7mgs2xte40dSW7o1C9P8kxrsyXJabN9IyRJszeTPZWvAqun1N0CbKuqC4En2nOSrASu\nA1a2Nnfn0MUK9wDrqmoFsCLJZJ/rgH2t/k5gU+trMXAbcEl73J5kYWuzCbijtXmj9TFSkrz9kKRh\ncdRQqaq/YvDB3XUVsLmVNwPXtPLVwP1VdaCqdgEvA6uSnAO8v6q2t/Xu7bTp9vUwcFkrXwFsrar9\nVbUf2AZc2ULqUuChaV5/xHidh6ThcrxzKkuqam8r7wWWtPK5wO7OeruB86apn2j1tH9fAaiqg8Cb\nSc46Ql+Lgf1V9dY0fWmecs9LOjnMeqK+BvffOFFfp/3aPrTc65JOBsd776+9Sc6uqj3t0NZrrX4C\nWNZZbymDPYyJVp5aP9nmfODVJAuAhVW1L8kEMNZpswx4EngdWJTklLa3srT1Ma0NGza8XR4bG2Ns\nbOxwq0rSSWd8fJzx8fHe+stMbvSX5IPAN6vqI+35f2Uwub4pyS3Aoqq6pU3Uf53BxPp5wHeAX6yq\nSvIMcBOwHfgW8CdV9e0kNwIfqarfTrIGuKaq1rSJ+u8BFwEBvg9cVFX7kzwIPFxVDyT5EvB8VX1p\nmu2uYb2R4eAw0eS2j1Z5WP+bSCeDJFTVcR+nPmqoJLkf+HXg5xnMn9wGfAN4kMEexi7g2jaZTpJb\ngc8DB4Gbq+rxVn8x8DXgTOCxqrqp1Z8B3Ad8DNgHrGmT/CT5HHBr25QvVtXmVr8c2MJgfuVZ4LNV\ndWCabTdU5mF5WP+bSCeDdz1UhpmhMj/Lw/rfRDoZzDZUvKJektQbQ0WS1Bt/+VEnnL8IKY0u91Q0\nB7xmRRpVhookqTeGiiSpN4aKJKk3hookqTee/aU55Zlg0mhxT0VzzDPBpFFiqEiSeuPhr3nEH7CS\nNOwMlXmnexPGk4vzK9Lw8/CX5hHnV6RhZ6hIknpjqEiSeuOciuYl51ek4eSeiuYp51ekYWSoSJJ6\nY6hIknrjnIrmPedXpOHhnoqGgPMr0rAwVCRJvfHw1xzzfl/HxkNh0vzmnsq84OGdmfO9kuYz91Q0\ntNxrkeYfQ0VD7NAdnQ93GNGwkU4sQ0UjovuTAdOHjQEjvfuGek4lyeokLyXZmeQLc709mo8OzcEk\nefsh6d0xtKGS5FTgvwGrgZXAbyb50Nxu1cz08+E23tfmzFPj70Kf8ydgxsfH5+R1TxTHd/Ia2lAB\nLgFerqpdVXUA2AJcPcfbdAxmexbTeE/bMV+Nv8v9z23AjPqHkuM7eQ3znMp5wCud57uBVVNX6h5H\n97CHpnf0Cf8Z9eKcjTTUoTKj/4NPOWWwM7Zr1y4uuOCCd3WDjsRAGxbTT/jPpHys/403btx4/Jt5\nGDMNtiNtq+Go2ciw/gEl+TiwoapWt+frgbeqalNnneEcnCTNoao67m/BwxwqC4D/CVwGvApsB36z\nqn40pxsmSSexoT38VVUHk/x74HHgVODLBookza2h3VORJM0/w3xK8WGN2kWRSZYleSrJD5P8TZKb\nWv3iJNuS7EiyNcmiud7W45Xk1CTPJflmez5KY1uU5KEkP0ryYpJVIza+9e1v84UkX09yxjCPL8lX\nkuxN8kKn7rDjaePf2T5zLp+brZ65w4zvj9rf5w+S/EWShZ1lxzS+kQuVYb4o8ggOAL9bVR8GPg78\nThvTLcC2qroQeKI9H1Y3Ay9y6HSqURrbXcBjVfUh4KPAS4zI+JJ8EPgt4KKq+giDQ9FrGO7xfZXB\n50fXtONJshK4jsFnzWrg7iTz/XN1uvFtBT5cVb8E7ADWw/GNb74P/ngM+UWRP6uq9lTV863898CP\nGFyncxWwua22GbhmbrZwdpIsBT4F/BmD83RhdMa2EPi1qvoKDOYCq+pNRmR8wE8YfOl5Tzt55j0M\nTpwZ2vFV1V8Bb0ypPtx4rgbur6oDVbULeJnBZ9C8Nd34qmpbVb3Vnj4DLG3lYx7fKIbKdBdFnjdH\n29K79s3wYwz+wy+pqr1t0V5gyRxt1mzdCfwe8FanblTGthz4cZKvJnk2yZ8meS8jMr6qeh24A/g/\nDMJkf1VtY0TG13G48ZzL4DNm0ih83nweeKyVj3l8oxgqI3vmQZL3AQ8DN1fVT7vLanDGxdCNPclv\nAK9V1XMc2kt5h2EdW7MAuAi4u6ouAv6BKYeChnl8SX4B+I/ABxl8AL0vyWe76wzz+KYzg/EM7ViT\n/Gfgn6rq60dY7YjjG8VQmQCWdZ4v451JO5SSnMYgUO6rqkda9d4kZ7fl5wCvzdX2zcKvAFcl+Vvg\nfuDfJLmP0RgbDP72dlfVd9vzhxiEzJ4RGd+/AP5HVe2rqoPAXwD/itEZ36TD/T1O/bxZ2uqGTpJ/\nx+Aw9L/tVB/z+EYxVL4HrEjywSSnM5hkenSOt2lWMrinxpeBF6vqjzuLHgXWtvJa4JGpbee7qrq1\nqpZV1XIGE7xPVtX1jMDYYDAfBryS5MJW9Qngh8A3GYHxMTjp4ONJzmx/p59gcMLFqIxv0uH+Hh8F\n1iQ5PclyYAWDC7GHSpLVDA5BX11V/9hZdOzjq6qRewBXMrja/mVg/VxvTw/j+VUG8w3PA8+1x2pg\nMfAdBmdrbAUWzfW2znKcvw482sojMzbgl4DvAj9g8E1+4YiN7z8xCMoXGExinzbM42Owx/wq8E8M\n5mc/d6TxALe2z5qXgCvmevuPY3yfB3YC/7vz+XL38Y7Pix8lSb0ZxcNfkqQ5YqhIknpjqEiSemOo\nSJJ6Y6hIknpjqEiSemOoSJJ6Y6hIknrz/wF0zsvts73EjAAAAABJRU5ErkJggg==\n", m@11: "text/plain": [ m@12: "" m@11: ] m@11: }, m@11: "metadata": {}, m@11: "output_type": "display_data" m@11: } m@11: ], m@8: "source": [ m@8: "plt.hist(D.ravel(), bins=100);" m@8: ] m@8: }, m@8: { m@8: "cell_type": "code", m@12: "execution_count": 7, m@8: "metadata": { m@8: "collapsed": true m@8: }, m@8: "outputs": [], m@8: "source": [ m@8: "def n_occurrence_from_D(D, k=10, n_items=None):\n", m@8: " if n_items is None:\n", m@8: " n_items = len(D)\n", m@8: " sort_idx = np.argsort(D, axis=1)\n", m@8: " D_k = sort_idx[:, 1:(k+1)] # nearest neighbour is the item itself\n", m@8: " N_k = np.bincount(D_k.astype(int).ravel(), minlength=n_items)\n", m@8: " return N_k" m@8: ] m@8: }, m@8: { m@8: "cell_type": "code", m@28: "execution_count": 1, m@28: "metadata": { m@28: "collapsed": false m@28: }, m@11: "outputs": [ m@11: { m@28: "ename": "NameError", m@28: "evalue": "name 'n_occurrence_from_D' is not defined", m@28: "output_type": "error", m@28: "traceback": [ m@28: "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", m@28: "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", m@28: "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mN_k\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mn_occurrence_from_D\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mD\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mk\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m100\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0;32mprint\u001b[0m \u001b[0mskew\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mN_k\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mplt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfigure\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mplt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mN_k\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbins\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m100\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m;\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mplt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfigure\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", m@28: "\u001b[0;31mNameError\u001b[0m: name 'n_occurrence_from_D' is not defined" m@11: ] m@11: } m@11: ], m@8: "source": [ m@8: "N_k = n_occurrence_from_D(D, k=100)\n", m@8: "print skew(N_k)\n", m@28: "plt.figure()\n", m@28: "plt.hist(N_k, bins=100);\n", m@28: "plt.figure()\n", m@28: "plt.plot(np.sort(N_k))" m@8: ] m@8: }, m@8: { m@8: "cell_type": "code", m@12: "execution_count": 11, m@28: "metadata": { m@28: "collapsed": true m@28: }, m@8: "outputs": [], m@8: "source": [ m@12: "#sort_idx = np.argsort(D, axis=1)\n", m@12: "k = 10\n", m@12: "D_k = sort_idx[:, 1:(k+1)]" m@12: ] m@12: }, m@12: { m@12: "cell_type": "code", m@12: "execution_count": 12, m@28: "metadata": { m@28: "collapsed": false m@28: }, m@12: "outputs": [ m@12: { m@12: "data": { m@12: "text/plain": [ m@12: "array([[4650, 2942, 3520, ..., 1318, 6678, 6056],\n", m@12: " [1933, 6143, 6757, ..., 7269, 4321, 1563],\n", m@12: " [3170, 2549, 4860, ..., 6678, 7414, 6056],\n", m@12: " ..., \n", m@12: " [6016, 2243, 1616, ..., 7627, 2018, 515],\n", m@12: " [7027, 4860, 6346, ..., 997, 3892, 1846],\n", m@12: " [5119, 1563, 4035, ..., 3486, 7617, 3854]])" m@12: ] m@12: }, m@12: "execution_count": 12, m@12: "metadata": {}, m@12: "output_type": "execute_result" m@12: } m@12: ], m@12: "source": [ m@12: "D_k" m@8: ] m@8: }, m@8: { m@8: "cell_type": "code", m@8: "execution_count": null, m@8: "metadata": { m@8: "collapsed": true m@8: }, m@8: "outputs": [], m@8: "source": [] m@8: } m@8: ], m@8: "metadata": { m@8: "kernelspec": { m@8: "display_name": "Python 2", m@8: "language": "python", m@8: "name": "python2" m@8: }, m@8: "language_info": { m@8: "codemirror_mode": { m@8: "name": "ipython", m@8: "version": 2 m@8: }, m@8: "file_extension": ".py", m@8: "mimetype": "text/x-python", m@8: "name": "python", m@8: "nbconvert_exporter": "python", m@8: "pygments_lexer": "ipython2", m@8: "version": "2.7.12" m@8: } m@8: }, m@8: "nbformat": 4, m@11: "nbformat_minor": 1 m@8: }