Mercurial > hg > plosone_underreview
view notebooks/test_hubness.ipynb @ 31:03ff14ba9fa2 branch-tests
merged
author | Maria Panteli <m.x.panteli@gmail.com> |
---|---|
date | Wed, 13 Sep 2017 19:58:10 +0100 |
parents | 6aa08c9c95e9 e4736064d282 |
children | 57f53b0d1eaa |
line wrap: on
line source
{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import numpy as np\n", "import pickle\n", "from scipy.stats import pearsonr\n", "from scipy.stats import skew\n", "import sys\n", "from sklearn.metrics.pairwise import pairwise_distances\n", "%matplotlib inline\n", "import matplotlib.pyplot as plt\n", "\n", "%load_ext autoreload\n", "%autoreload 2\n", "\n", "sys.path.append('../')\n", "import scripts.outliers as outliers\n", "import scripts.utils_spatial as utils_spatial" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "WARNING: there are 21 disconnected observations\n", "Island ids: [3, 6, 26, 35, 39, 45, 52, 61, 62, 66, 77, 85, 94, 97, 98, 102, 103, 107, 110, 120, 121]\n", "Antigua and Barbuda\n", "Australia\n", "Cuba\n", "Fiji\n", "French Polynesia\n", "Grenada\n", "Iceland\n", "Jamaica\n", "Japan\n", "Kiribati\n", "Malta\n", "New Zealand\n", "Philippines\n", "Puerto Rico\n", "Republic of Serbia\n", "Saint Lucia\n", "Samoa\n", "Solomon Islands\n", "South Korea\n", "The Bahamas\n", "Trinidad and Tobago\n" ] } ], "source": [ "X_list, Y, Yaudio = pickle.load(open('../data/lda_data_melodia_8.pickle','rb'))\n", "ddf = outliers.load_metadata(Yaudio, metadata_file='../data/metadata.csv')\n", "w, data_countries = utils_spatial.get_neighbors_for_countries_in_dataset(Y)\n", "w_dict = utils_spatial.from_weights_to_dict(w, data_countries)\n", "Xrhy, Xmel, Xmfc, Xchr = X_list\n", "X = np.concatenate((Xrhy, Xmel, Xmfc, Xchr), axis=1)\n", "\n", "# global outliers\n", "df_global, threshold, MD = outliers.get_outliers_df(X, Y, chi2thr=0.999)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(8200, 380)" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X.shape" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": true }, "outputs": [], "source": [ "D = pairwise_distances(X, metric='mahalanobis')" ] }, { "cell_type": "code", "execution_count": 5, "collapsed": false }, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(8200, 8200)" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "D.shape" ] }, { "cell_type": "code", "execution_count": 6, "collapsed": false }, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZUAAAEACAYAAAB78OvLAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAGWtJREFUeJzt3W2sXdV95/HvDwyUPIwd05F5sCFWa6Q4StrADM50WvUy\nJGCiCniRgKsJeBKrL0pnYPqiE8xIYKtRO54KUaoRRGrzYFCDQdASoqBgB7iaSiNwEiClIYzNqJ7B\nl9pEBpO0o05t8Z8XZ128ubm2r303vvccfz/SkddZe6919jpczu/svfbeJ1WFJEl9OGWuN0CSNDoM\nFUlSbwwVSVJvDBVJUm8MFUlSbwwVSVJvZhQqSRYleSjJj5K8mGRVksVJtiXZkWRrkkWd9dcn2Znk\npSSXd+ovTvJCW3ZXp/6MJA+0+qeTXNBZtra9xo4kN3Tqlyd5prXZkuS02b8dkqTZmOmeyl3AY1X1\nIeCjwEvALcC2qroQeKI9J8lK4DpgJbAauDtJWj/3AOuqagWwIsnqVr8O2Nfq7wQ2tb4WA7cBl7TH\n7UkWtjabgDtamzdaH5KkOXTUUGkf4r9WVV8BqKqDVfUmcBWwua22Gbimla8G7q+qA1W1C3gZWJXk\nHOD9VbW9rXdvp023r4eBy1r5CmBrVe2vqv3ANuDKFlKXAg9N8/qSpDkykz2V5cCPk3w1ybNJ/jTJ\ne4ElVbW3rbMXWNLK5wK7O+13A+dNUz/R6mn/vgKD0ALeTHLWEfpaDOyvqrem6UuSNEdmEioLgIuA\nu6vqIuAfaIe6JtXgXi8n6n4v3ldGkuapBTNYZzewu6q+254/BKwH9iQ5u6r2tENbr7XlE8CyTvul\nrY+JVp5aP9nmfODVJAuAhVW1L8kEMNZpswx4EngdWJTklLa3srT18Q5JDCBJOkZVlaOvNb2j7qlU\n1R7glSQXtqpPAD8EvgmsbXVrgUda+VFgTZLTkywHVgDbWz8/aWeOBbge+EanzWRfn2Yw8Q+wFbi8\nnX32AeCTwONtz+gp4DPTvP7U7R/Jx+233z7n2+D4HJ/jG73HbM1kTwXgPwB/nuR04H8BnwNOBR5M\nsg7YBVzbPsRfTPIg8CJwELixDm3pjcDXgDMZnE327Vb/ZeC+JDuBfcCa1tfrSX4fmNxL2liDCXuA\nLwBbknwReLb1IUmaQzMKlar6AfAvp1n0icOs/wfAH0xT/33gI9PU/z9aKE2z7KvAV6ep/1tg1RE3\nXBw6m3ugj28iknQ4XlE/pMbGxo5h7RN5HkU/jm18w8fxDbdRH99sZJS/uSapUR7fTAz2VCbfg7in\nIumIklDv5kS9JEkzNdOJeo2I7hyLey2S+uaeykln+OZXJA0PQ0WS1BtDRZLUG0NFktQbQ0WS1BvP\n/hpBU6+il6QTxT2VkeVZXpJOPENFktQbQ0WS1BtDRZLUG0NFktQbQ0WS1BtDRZLUG0NFktQbQ0WS\n1BuvqD+J+dsqkvrmnspJzavuJfXLUJEk9cZQkST1xlCRJPXGUJEk9cZQkST1xlCRJPXGUJEk9WZG\noZJkV5K/TvJcku2tbnGSbUl2JNmaZFFn/fVJdiZ5KcnlnfqLk7zQlt3VqT8jyQOt/ukkF3SWrW2v\nsSPJDZ365UmeaW22JDlttm+GJGl2ZrqnUsBYVX2sqi5pdbcA26rqQuCJ9pwkK4HrgJXAauDuHLp0\n+x5gXVWtAFYkWd3q1wH7Wv2dwKbW12LgNuCS9rg9ycLWZhNwR2vzRutDkjSHjuXwV6Y8vwrY3Mqb\ngWta+Wrg/qo6UFW7gJeBVUnOAd5fVdvbevd22nT7ehi4rJWvALZW1f6q2g9sA65sIXUp8NA0ry9J\nmiPHsqfynSTfS/JbrW5JVe1t5b3AklY+F9jdabsbOG+a+olWT/v3FYCqOgi8meSsI/S1GNhfVW9N\n05ckaY7M9IaS/7qq/i7JPwe2JXmpu7CqKsmJuomUN6uSpHlqRqFSVX/X/v1xkr9kML+xN8nZVbWn\nHdp6ra0+ASzrNF/KYA9jopWn1k+2OR94NckCYGFV7UsyAYx12iwDngReBxYlOaXtrSxtffyMDRs2\nvF0eGxtjbGxsutWGXveOw5I0U+Pj44yPj/fWX452y/Mk7wFOraqfJnkvsBXYCHyCweT6piS3AIuq\n6pY2Uf91BsFzHvAd4Bfb3swzwE3AduBbwJ9U1beT3Ah8pKp+O8ka4JqqWtMm6r8HXMRgTuf7wEVV\ntT/Jg8DDVfVAki8Bz1fVl6Zse50st3QfhMrkWI+9fLK8T5KOLAlVddzfUmcSKsuBv2xPFwB/XlV/\n2D7wH2Swh7ELuLZNppPkVuDzwEHg5qp6vNVfDHwNOBN4rKpuavVnAPcBHwP2AWvaJD9JPgfc2l7/\ni1W1ubNdWxjMrzwLfLaqDkzZdkPFUJF0DN71UBlmhsrMyyfL+yTpyGYbKl5RL0nqjaEiSeqNoSJJ\n6s1Mr1PRiOuekuz8iqTj5Z6KmsLrSiXNlqEiSeqNoSJJ6o2hIknqjaEiSeqNoSJJ6o2hIknqjaEi\nSeqNoSJJ6o2hIknqjaEiSeqNoSJJ6o2hIknqjaEiSeqNoSJJ6o2hIknqjaEiSeqNoSJJ6o2hIknq\njaEiSeqNoSJJ6o2hIknqjaEiSerNgrneAM0/Sd4uV9UcbomkYeOeiqZR7SFJx2ZGoZLk1CTPJflm\ne744ybYkO5JsTbKos+76JDuTvJTk8k79xUleaMvu6tSfkeSBVv90kgs6y9a219iR5IZO/fIkz7Q2\nW5KcNts3QpI0ezPdU7kZeJFDX19vAbZV1YXAE+05SVYC1wErgdXA3Tl0LOUeYF1VrQBWJFnd6tcB\n+1r9ncCm1tdi4Dbgkva4PcnC1mYTcEdr80brQ5I0x44aKkmWAp8C/gyYDIirgM2tvBm4ppWvBu6v\nqgNVtQt4GViV5Bzg/VW1va13b6dNt6+Hgcta+Qpga1Xtr6r9wDbgyhZSlwIPTfP6kqQ5NJM9lTuB\n3wPe6tQtqaq9rbwXWNLK5wK7O+vtBs6bpn6i1dP+fQWgqg4CbyY56wh9LQb2V9Vb0/R1Ukny9kOS\n5oMjnv2V5DeA16rquSRj061TVZXkRM3qHvPrbNiw4e3y2NgYY2NjPW7OfDD5lhgsko7d+Pg44+Pj\nvfV3tFOKfwW4KsmngJ8D/lmS+4C9Sc6uqj3t0NZrbf0JYFmn/VIGexgTrTy1frLN+cCrSRYAC6tq\nX5IJYKzTZhnwJPA6sCjJKW1vZWnrY1rdUJEkvdPUL9sbN26cVX9HPPxVVbdW1bKqWg6sAZ6squuB\nR4G1bbW1wCOt/CiwJsnpSZYDK4DtVbUH+EmSVW1O5HrgG502k319msHEP8BW4PIki5J8APgk8HgN\nLpx4CvjMNK8vSZpDx3rx4+Sxlv8CPJhkHbALuBagql5M8iCDM8UOAjfWoavnbgS+BpwJPFZV3271\nXwbuS7IT2McgvKiq15P8PvDdtt7GNmEP8AVgS5IvAs+2PiRJcyyjfMV0khrx8fHOOZX+y6P8/kn6\nWUmoquOepPWKeklSbwwVSVJvDBVJUm8MFUlSbwwVSVJvDBVJUm8MFUlSbwwVSVJvDBVJUm8MFUlS\nbwwVSVJvDBVJUm8MFUlSb4711vc6yXR/qtg7Fks6GvdUdBTFcfyKs6STlKEiSeqNoSJJ6o2hIknq\njaEiSeqNoSJJ6o2hIknqjaEiSeqNoSJJ6o2hIknqjaEiSeqNoSJJ6o2hIknqjaEiSerNEUMlyc8l\neSbJ80leTPKHrX5xkm1JdiTZmmRRp836JDuTvJTk8k79xUleaMvu6tSfkeSBVv90kgs6y9a219iR\n5IZO/fK2XTuTbElyWl9viCTp+B0xVKrqH4FLq+qXgY8Clyb5VeAWYFtVXQg80Z6TZCVwHbASWA3c\nnUM/yHEPsK6qVgArkqxu9euAfa3+TmBT62sxcBtwSXvcnmRha7MJuKO1eaP1IUmaY0c9/FVV/7cV\nTwdOZfAhfhWwudVvBq5p5auB+6vqQFXtAl4GViU5B3h/VW1v693badPt62Hgsla+AthaVfuraj+w\nDbiyhdSlwEPTvL4kaQ4dNVSSnJLkeWAv8FRV/RBYUlV72yp7gSWtfC6wu9N8N3DeNPUTrZ727ysA\nVXUQeDPJWUfoazGwv6remqYvSdIcOurPCbcP719uh54eT3LplOWV5ET9NOAxv86GDRveLo+NjTE2\nNtbj5kjScBsfH2d8fLy3/mb8G/VV9WaSbwEXA3uTnF1Ve9qhrdfaahPAsk6zpQz2MCZaeWr9ZJvz\ngVeTLAAWVtW+JBPAWKfNMuBJ4HVgUZJTWuAtbX1MqxsqkqR3mvple+PGjbPq72hnf/385JldSc4E\nPgk8BzwKrG2rrQUeaeVHgTVJTk+yHFgBbK+qPcBPkqxqcyLXA9/otJns69MMJv4BtgKXJ1mU5APt\ntR+vqgKeAj4zzetLkuZQBp/Rh1mYfITBRPgp7XFfVf1ROzPrQQZ7GLuAa9tkOkluBT4PHARurqrH\nW/3FwNeAM4HHquqmVn8GcB/wMWAfsKZN8pPkc8CtbXO+WFWbW/1yYAuD+ZVngc9W1YFptr+ONL5h\ndOhkukmT48sJKHdedcTeV0kDSaiqqR80M28/yh8OoxsqJzJIpi+P2vsqaWC2oeIV9ZKk3hgqkqTe\nGCqSpN4YKpKk3hgqkqTeGCqSpN4YKpKk3hgqkqTeGCqSpN4YKpKk3hgqkqTeGCqSpN4YKpKk3hgq\nkqTeGCqSpN4YKpKk3hgqkqTeLJjrDdBw6v6ssb8CKWmSeyo6TsWhnxeWpAFDRZLUG0NFktQbQ0WS\n1BtDRZLUG0NFktQbQ0WS1BtDRZLUG0NFktQbQ0WS1JujhkqSZUmeSvLDJH+T5KZWvzjJtiQ7kmxN\nsqjTZn2SnUleSnJ5p/7iJC+0ZXd16s9I8kCrfzrJBZ1la9tr7EhyQ6d+eZJnWpstSU7r4w2RJB2/\nmeypHAB+t6o+DHwc+J0kHwJuAbZV1YXAE+05SVYC1wErgdXA3Tl0o6h7gHVVtQJYkWR1q18H7Gv1\ndwKbWl+LgduAS9rj9iQLW5tNwB2tzRutD0nSHDpqqFTVnqp6vpX/HvgRcB5wFbC5rbYZuKaVrwbu\nr6oDVbULeBlYleQc4P1Vtb2td2+nTbevh4HLWvkKYGtV7a+q/cA24MoWUpcCD03z+pKkOXJMcypJ\nPgh8DHgGWFJVe9uivcCSVj4X2N1ptptBCE2tn2j1tH9fAaiqg8CbSc46Ql+Lgf1V9dY0fUmS5siM\nb32f5H0M9iJurqqfTrn1eSU5UbesPabX2bBhw9vlsbExxsbGet4cSRpe4+PjjI+P99bfjEKlTYI/\nDNxXVY+06r1Jzq6qPe3Q1mutfgJY1mm+lMEexkQrT62fbHM+8GqSBcDCqtqXZAIY67RZBjwJvA4s\nSnJK21tZ2vr4Gd1Q0bvD31aRhtfUL9sbN26cVX8zOfsrwJeBF6vqjzuLHgXWtvJa4JFO/ZokpydZ\nDqwAtlfVHuAnSVa1Pq8HvjFNX59mMPEPsBW4PMmiJB8APgk8XoNPrqeAz0zz+jrh/G0VSQM52jfL\nJL8K/Hfgrzn0ybEe2A48yGAPYxdwbZtMJ8mtwOeBgwwOlz3e6i8GvgacCTxWVZOnJ58B3MdgvmYf\nsKZN8pPkc8Ct7XW/WFWbW/1yYAuD+ZVngc9W1YEp216j9s15kMeTY5p/5VF7v6WTTRKqKkdf8zDt\nR/lDYFRCpXt4aWDuw8NQkUbTbEPFK+qHhoeYJM1/hookqTeGiiSpN4aKJKk3hookqTeGiiSpN4aK\nJKk3hookqTeGiiSpNzO+S7E0E95cUjq5uaeinnnlv3QyM1QkSb0xVCRJvTFUJEm9MVQkSb0xVCRJ\nvTFUJEm9MVQkSb0xVCRJvTFUJEm9MVQkSb3x3l9613gfMOnk456K3kXeB0w62RgqkqTeGCqSpN4Y\nKpKk3hgqkqTeGCqSpN4cNVSSfCXJ3iQvdOoWJ9mWZEeSrUkWdZatT7IzyUtJLu/UX5zkhbbsrk79\nGUkeaPVPJ7mgs2xte40dSW7o1C9P8kxrsyXJabN9IyRJszeTPZWvAqun1N0CbKuqC4En2nOSrASu\nA1a2Nnfn0MUK9wDrqmoFsCLJZJ/rgH2t/k5gU+trMXAbcEl73J5kYWuzCbijtXmj9TFSkrz9kKRh\ncdRQqaq/YvDB3XUVsLmVNwPXtPLVwP1VdaCqdgEvA6uSnAO8v6q2t/Xu7bTp9vUwcFkrXwFsrar9\nVbUf2AZc2ULqUuChaV5/xHidh6ThcrxzKkuqam8r7wWWtPK5wO7OeruB86apn2j1tH9fAaiqg8Cb\nSc46Ql+Lgf1V9dY0fWmecs9LOjnMeqK+BvffOFFfp/3aPrTc65JOBsd776+9Sc6uqj3t0NZrrX4C\nWNZZbymDPYyJVp5aP9nmfODVJAuAhVW1L8kEMNZpswx4EngdWJTklLa3srT1Ma0NGza8XR4bG2Ns\nbOxwq0rSSWd8fJzx8fHe+stMbvSX5IPAN6vqI+35f2Uwub4pyS3Aoqq6pU3Uf53BxPp5wHeAX6yq\nSvIMcBOwHfgW8CdV9e0kNwIfqarfTrIGuKaq1rSJ+u8BFwEBvg9cVFX7kzwIPFxVDyT5EvB8VX1p\nmu2uYb2R4eAw0eS2j1Z5WP+bSCeDJFTVcR+nPmqoJLkf+HXg5xnMn9wGfAN4kMEexi7g2jaZTpJb\ngc8DB4Gbq+rxVn8x8DXgTOCxqrqp1Z8B3Ad8DNgHrGmT/CT5HHBr25QvVtXmVr8c2MJgfuVZ4LNV\ndWCabTdU5mF5WP+bSCeDdz1UhpmhMj/Lw/rfRDoZzDZUvKJektQbQ0WS1Bt/+VEnnL8IKY0u91Q0\nB7xmRRpVhookqTeGiiSpN4aKJKk3hookqTee/aU55Zlg0mhxT0VzzDPBpFFiqEiSeuPhr3nEH7CS\nNOwMlXmnexPGk4vzK9Lw8/CX5hHnV6RhZ6hIknpjqEiSeuOciuYl51ek4eSeiuYp51ekYWSoSJJ6\nY6hIknrjnIrmPedXpOHhnoqGgPMr0rAwVCRJvfHw1xzzfl/HxkNh0vzmnsq84OGdmfO9kuYz91Q0\ntNxrkeYfQ0VD7NAdnQ93GNGwkU4sQ0UjovuTAdOHjQEjvfuGek4lyeokLyXZmeQLc709mo8OzcEk\nefsh6d0xtKGS5FTgvwGrgZXAbyb50Nxu1cz08+E23tfmzFPj70Kf8ydgxsfH5+R1TxTHd/Ia2lAB\nLgFerqpdVXUA2AJcPcfbdAxmexbTeE/bMV+Nv8v9z23AjPqHkuM7eQ3znMp5wCud57uBVVNX6h5H\n97CHpnf0Cf8Z9eKcjTTUoTKj/4NPOWWwM7Zr1y4uuOCCd3WDjsRAGxbTT/jPpHys/403btx4/Jt5\nGDMNtiNtq+Go2ciw/gEl+TiwoapWt+frgbeqalNnneEcnCTNoao67m/BwxwqC4D/CVwGvApsB36z\nqn40pxsmSSexoT38VVUHk/x74HHgVODLBookza2h3VORJM0/w3xK8WGN2kWRSZYleSrJD5P8TZKb\nWv3iJNuS7EiyNcmiud7W45Xk1CTPJflmez5KY1uU5KEkP0ryYpJVIza+9e1v84UkX09yxjCPL8lX\nkuxN8kKn7rDjaePf2T5zLp+brZ65w4zvj9rf5w+S/EWShZ1lxzS+kQuVYb4o8ggOAL9bVR8GPg78\nThvTLcC2qroQeKI9H1Y3Ay9y6HSqURrbXcBjVfUh4KPAS4zI+JJ8EPgt4KKq+giDQ9FrGO7xfZXB\n50fXtONJshK4jsFnzWrg7iTz/XN1uvFtBT5cVb8E7ADWw/GNb74P/ngM+UWRP6uq9lTV863898CP\nGFyncxWwua22GbhmbrZwdpIsBT4F/BmD83RhdMa2EPi1qvoKDOYCq+pNRmR8wE8YfOl5Tzt55j0M\nTpwZ2vFV1V8Bb0ypPtx4rgbur6oDVbULeJnBZ9C8Nd34qmpbVb3Vnj4DLG3lYx7fKIbKdBdFnjdH\n29K79s3wYwz+wy+pqr1t0V5gyRxt1mzdCfwe8FanblTGthz4cZKvJnk2yZ8meS8jMr6qeh24A/g/\nDMJkf1VtY0TG13G48ZzL4DNm0ih83nweeKyVj3l8oxgqI3vmQZL3AQ8DN1fVT7vLanDGxdCNPclv\nAK9V1XMc2kt5h2EdW7MAuAi4u6ouAv6BKYeChnl8SX4B+I/ABxl8AL0vyWe76wzz+KYzg/EM7ViT\n/Gfgn6rq60dY7YjjG8VQmQCWdZ4v451JO5SSnMYgUO6rqkda9d4kZ7fl5wCvzdX2zcKvAFcl+Vvg\nfuDfJLmP0RgbDP72dlfVd9vzhxiEzJ4RGd+/AP5HVe2rqoPAXwD/itEZ36TD/T1O/bxZ2uqGTpJ/\nx+Aw9L/tVB/z+EYxVL4HrEjywSSnM5hkenSOt2lWMrinxpeBF6vqjzuLHgXWtvJa4JGpbee7qrq1\nqpZV1XIGE7xPVtX1jMDYYDAfBryS5MJW9Qngh8A3GYHxMTjp4ONJzmx/p59gcMLFqIxv0uH+Hh8F\n1iQ5PclyYAWDC7GHSpLVDA5BX11V/9hZdOzjq6qRewBXMrja/mVg/VxvTw/j+VUG8w3PA8+1x2pg\nMfAdBmdrbAUWzfW2znKcvw482sojMzbgl4DvAj9g8E1+4YiN7z8xCMoXGExinzbM42Owx/wq8E8M\n5mc/d6TxALe2z5qXgCvmevuPY3yfB3YC/7vz+XL38Y7Pix8lSb0ZxcNfkqQ5YqhIknpjqEiSemOo\nSJJ6Y6hIknpjqEiSemOoSJJ6Y6hIknrz/wF0zsvts73EjAAAAABJRU5ErkJggg==\n", "text/plain": [ "<matplotlib.figure.Figure at 0x7f6dc44adfd0>" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plt.hist(D.ravel(), bins=100);" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def n_occurrence_from_D(D, k=10, n_items=None):\n", " if n_items is None:\n", " n_items = len(D)\n", " sort_idx = np.argsort(D, axis=1)\n", " D_k = sort_idx[:, 1:(k+1)] # nearest neighbour is the item itself\n", " N_k = np.bincount(D_k.astype(int).ravel(), minlength=n_items)\n", " return N_k" ] }, { "cell_type": "code", "execution_count": 8, "outputs": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m<ipython-input-1-0aacb5dec8fd>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mN_k\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mn_occurrence_from_D\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mD\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mk\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m100\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0;32mprint\u001b[0m \u001b[0mskew\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mN_k\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mplt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfigure\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mplt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mN_k\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbins\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m100\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m;\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mplt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfigure\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mNameError\u001b[0m: name 'n_occurrence_from_D' is not defined" "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "8.18316065981\n" ] }, { "data": { "text/plain": [ "[<matplotlib.lines.Line2D at 0x7f6d5a4dd550>]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEACAYAAABcXmojAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAFsBJREFUeJzt3X+QXWd93/H3BwuDbC+oKh3554zVZL22MqQYpYgUU69b\nxxUl2P5DY8szeDSpSodRUpN2hiAx09p/EcNMJhh17E5TwDIDShUCHrk4imXHdyaTtJYMchAWiq0m\nIqyC1hgSnBCqWuNv/7jPHl02QruSrlZX9vs1c8fPec5zzv2eXet+9nnOvbupKiRJAnjd2S5AkjQ6\nDAVJUsdQkCR1DAVJUsdQkCR1DAVJUmfOUEiyKcmzSfYm+UKSNyRZmmRnkueSPJZkyazxzyfZn+Sm\ngf6V7RzPJ7nvTF2QJOnUnTAUklwJfAB4e1W9FTgPWAtsBHZW1VXAE22bJCuA24EVwGrg/iRpp3sA\nWF9V48B4ktVDvxpJ0mmZa6bwEvAycEGSRcAFwF8CNwNb2pgtwK2tfQuwtaperqqDwAFgVZJLgLGq\n2tXGPTRwjCRpRJwwFKrq+8BvAH9BPwz+uqp2AsuqaroNmwaWtfalwNTAKaaAy47Tf6j1S5JGyFzL\nRz8F/CpwJf0X9ouSvH9wTPV/T4a/K0OSXgUWzbH/54A/rqrvAST5EvDzwOEkF1fV4bY09EIbfwi4\nYuD4y+nPEA619mD/oeM9YRIDRpJOUlVl7lFzm+uewn7gnUkWtxvGNwL7gEeAdW3MOuDh1t4OrE1y\nfpLlwDiwq6oOAy8lWdXOc+fAMX9PVY304+677z7rNVindVqnNc48humEM4Wq+pMkDwFPA68AXwP+\nGzAGbEuyHjgI3NbG70uyjX5wHAU21LGKNwAPAouBR6tqx1CvRJJ02uZaPqKqPgF8Ylb39+nPGo43\n/mPAx47T/1XgradQoyRpgfiJ5lMwOTl5tkuYF+scLuscrnOhznOhxmHLsNejTleSGrWaJGmUJaEW\n6EazJOk1xFCQJHUMBUlSx1CQJHUMBUlSZyRD4brr3ssNN7yXqampuQdLkoZmzg+vnQ1/9EcbWLz4\n3/HDH/7wbJciSa8pIzlTgPeyaNFFZ7sISXrNGdFQkCSdDYaCJKljKEiSOoaCJKljKEiSOoaCJKlj\nKEiSOoaCJKljKEiSOnOGQpKJJHsGHj9IcleSpUl2JnkuyWNJlgwcsynJ80n2J7lpoH9lkr1t331n\n6qIkSadmzlCoqj+tqmur6lpgJfB3wJeBjcDOqroKeKJtk2QFcDuwAlgN3J9k5s/EPQCsr6pxYDzJ\n6mFfkCTp1J3s8tGNwIGq+jZwM7Cl9W8Bbm3tW4CtVfVyVR0EDgCrklwCjFXVrjbuoYFjJEkj4GRD\nYS2wtbWXVdV0a08Dy1r7UmDwd15PAZcdp/9Q65ckjYh5h0KS84H3Ab8ze19VFVBDrEuSdBaczN9T\neA/w1ar6btueTnJxVR1uS0MvtP5DwBUDx11Of4ZwqLUH+w8d/6nu4ciRF9m8eTNr1qxhcnLyJMqU\npFe3Xq9Hr9c7I+dO/4f8eQxMfhv4vara0rY/AXyvqj6eZCOwpKo2thvNXwDeQX956HHgp6uqkjwF\n3AXsAr4CfKqqdsx6noJibGyC3bu3MzExMaRLlaRXpyRUVeYeObd5zRSSXEj/JvMHBrrvBbYlWQ8c\nBG4DqKp9SbYB+4CjwIY6ljwbgAeBxcCjswNBknR2zXumsFCcKUjSyRnmTMFPNEuSOoaCJKljKEiS\nOoaCJKljKEiSOoaCJKljKEiSOoaCJKljKEiSOoaCJKljKEiSOoaCJKljKEiSOoaCJKljKEiSOoaC\nJKljKEiSOoaCJKljKEiSOvMKhSRLknwxyTeT7EuyKsnSJDuTPJfksSRLBsZvSvJ8kv1JbhroX5lk\nb9t335m4IEnSqZvvTOE+4NGqugb4WWA/sBHYWVVXAU+0bZKsAG4HVgCrgfuTzPxB6QeA9VU1Down\nWT20K5EknbY5QyHJm4F3V9VnAKrqaFX9ALgZ2NKGbQFube1bgK1V9XJVHQQOAKuSXAKMVdWuNu6h\ngWMkSSNgPjOF5cB3k3w2ydeS/FaSC4FlVTXdxkwDy1r7UmBq4Pgp4LLj9B9q/ZKkEbFonmPeDvxK\nVe1O8knaUtGMqqokNbyy7uHIkRfZvHkza9asYXJycninlqRzXK/Xo9frnZFzp+rEr+VJLgb+V1Ut\nb9vXAZuAfwzcUFWH29LQk1V1dZKNAFV1bxu/A7gb+FYbc03rvwO4vqo+OOv5CoqxsQl2797OxMTE\nMK9Xkl51klBVmXvk3OZcPqqqw8C3k1zVum4EngUeAda1vnXAw629HVib5Pwky4FxYFc7z0vtnUsB\n7hw4RpI0AuazfATw74HPJzkf+D/ALwHnAduSrAcOArcBVNW+JNuAfcBRYEMdm45sAB4EFtN/N9OO\nIV2HJGkI5lw+WmguH0nSyVnQ5SNJ0muHoSBJ6hgKkqSOoSBJ6hgKkqSOoSBJ6hgKkqSOoSBJ6hgK\nkqSOoSBJ6hgKkqSOoSBJ6hgKkqSOoSBJ6hgKkqSOoSBJ6hgKkqSOoSBJ6hgKkqTOvEIhycEkX0+y\nJ8mu1rc0yc4kzyV5LMmSgfGbkjyfZH+Smwb6VybZ2/bdN/zLkSSdjvnOFAqYrKprq+odrW8jsLOq\nrgKeaNskWQHcDqwAVgP3J5n5g9IPAOurahwYT7J6SNchSRqCk1k+yqztm4Etrb0FuLW1bwG2VtXL\nVXUQOACsSnIJMFZVu9q4hwaOkSSNgJOZKTye5OkkH2h9y6pqurWngWWtfSkwNXDsFHDZcfoPtX5J\n0ohYNM9x76qq7yT5R8DOJPsHd1ZVJanhlXUPR468yObNm1mzZg2Tk5PDO7UkneN6vR69Xu+MnDtV\nJ/danuRu4G+BD9C/z3C4LQ09WVVXJ9kIUFX3tvE7gLuBb7Ux17T+O4Drq+qDs85fUIyNTbB793Ym\nJiZO8xIl6dUtCVU1e4n/lMy5fJTkgiRjrX0hcBOwF9gOrGvD1gEPt/Z2YG2S85MsB8aBXVV1GHgp\nyap24/nOgWMkSSNgPstHy4AvtzcQLQI+X1WPJXka2JZkPXAQuA2gqvYl2QbsA44CG+rYdGQD8CCw\nGHi0qnYM8VokSafppJePzjSXjyTp5Czo8pEk6bXDUJAkdQwFSVLHUJAkdQwFSVLHUJAkdQwFSVLH\nUJAkdQwFSVLHUJAkdQwFSVLHUJAkdQwFSVLHUJAkdQwFSVLHUJAkdQwFSVLHUJAkdQwFSVJnXqGQ\n5Lwke5I80raXJtmZ5LkkjyVZMjB2U5Lnk+xPctNA/8oke9u++4Z/KZKk0zXfmcKHgH1Ate2NwM6q\nugp4om2TZAVwO7ACWA3cn2Tmj0k/AKyvqnFgPMnq4VyCJGlY5gyFJJcD/xr478DMC/zNwJbW3gLc\n2tq3AFur6uWqOggcAFYluQQYq6pdbdxDA8dIkkbEfGYKvwl8GHhloG9ZVU239jSwrLUvBaYGxk0B\nlx2n/1DrlySNkEUn2pnkF4EXqmpPksnjjamqSlLH23fq7uHIkRfZvHkza9asYXLyuE8tSa9JvV6P\nXq93Rs6dqp/8ep7kY8CdwFHgjcCbgC8B/xSYrKrDbWnoyaq6OslGgKq6tx2/A7gb+FYbc03rvwO4\nvqo+eJznLCjGxibYvXs7ExMTQ7xcSXr1SUJVZe6Rczvh8lFVfbSqrqiq5cBa4A+q6k5gO7CuDVsH\nPNza24G1Sc5PshwYB3ZV1WHgpSSr2o3nOweOkSSNiBMuHx3HzLTiXmBbkvXAQeA2gKral2Qb/Xcq\nHQU21LGpyAbgQWAx8GhV7Ti90iVJw3bC5aOzweUjSTo5C7Z8JEl6bTEUJEkdQ0GS1DEUJEkdQ0GS\n1DEUJEkdQ0GS1DEUJEkdQ0GS1DEUJEkdQ0GS1DEUJEkdQ0GS1DEUJEkdQ0GS1DEUJEkdQ0GS1DEU\nJEkdQ0GS1DlhKCR5Y5KnkjyTZF+SX2/9S5PsTPJckseSLBk4ZlOS55PsT3LTQP/KJHvbvvvO3CVJ\nkk7VCUOhqv4vcENVvQ34WeCGJNcBG4GdVXUV8ETbJskK4HZgBbAauD/JzB+TfgBYX1XjwHiS1Wfi\ngiRJp27O5aOq+rvWPB84D/gr4GZgS+vfAtza2rcAW6vq5ao6CBwAViW5BBirql1t3EMDx0iSRsSc\noZDkdUmeAaaBJ6vqWWBZVU23IdPAsta+FJgaOHwKuOw4/YdavyRphCyaa0BVvQK8Lcmbgd9PcsOs\n/ZWkhlvWPRw58iKbN29mzZo1TE5ODvf0knQO6/V69Hq9M3LuVM3/9TzJfwJ+BPxbYLKqDreloSer\n6uokGwGq6t42fgdwN/CtNuaa1n8HcH1VffA4z1FQjI1NsHv3diYmJk7zEiXp1S0JVZW5R85trncf\nvWXmnUVJFgO/AOwBtgPr2rB1wMOtvR1Ym+T8JMuBcWBXVR0GXkqyqt14vnPgGEnSiJhr+egSYEuS\n19EPkM9V1RNJ9gDbkqwHDgK3AVTVviTbgH3AUWBDHZuKbAAeBBYDj1bVjmFfjCTp9JzU8tFCcPlI\nkk7Ogi0fSZJeWwwFSVLHUJAkdQwFSVLHUJAkdQwFSVLHUJAkdQwFSVLHUJAkdQwFSVLHUJAkdQwF\nSVLHUJAkdQwFSVLHUJAkdQwFSVLHUJAkdQwFSVLHUJAkdeYMhSRXJHkyybNJvpHkrta/NMnOJM8l\neSzJkoFjNiV5Psn+JDcN9K9Msrftu+/MXJIk6VTNZ6bwMvAfqupngHcCv5zkGmAjsLOqrgKeaNsk\nWQHcDqwAVgP3J5n5g9IPAOurahwYT7J6qFcjSTotc4ZCVR2uqmda+2+BbwKXATcDW9qwLcCtrX0L\nsLWqXq6qg8ABYFWSS4CxqtrVxj00cIwkaQSc1D2FJFcC1wJPAcuqarrtmgaWtfalwNTAYVP0Q2R2\n/6HWL0kaEYvmOzDJRcDvAh+qqr85tiIEVVVJanhl3cORIy+yefNm1qxZw+Tk5PBOLUnnuF6vR6/X\nOyPnTtXcr+VJXg/8T+D3quqTrW8/MFlVh9vS0JNVdXWSjQBVdW8btwO4G/hWG3NN678DuL6qPjjr\nuQqKsbEJdu/ezsTExNAuVpJejZJQVZl75Nzm8+6jAJ8G9s0EQrMdWNfa64CHB/rXJjk/yXJgHNhV\nVYeBl5Ksaue8c+AYSdIImM/y0buA9wNfT7Kn9W0C7gW2JVkPHARuA6iqfUm2AfuAo8CGOjYd2QA8\nCCwGHq2qHUO6DknSEMxr+WghuXwkSSdnQZePJEmvHYaCJKljKEiSOoaCJKljKEiSOoaCJKljKEiS\nOoaCJKljKEiSOoaCJKljKEiSOoaCJKljKEiSOoaCJKljKEiSOoaCJKljKEiSOoaCJKkzZygk+UyS\n6SR7B/qWJtmZ5LkkjyVZMrBvU5Lnk+xPctNA/8oke9u++4Z/KZKk0zWfmcJngdWz+jYCO6vqKuCJ\ntk2SFcDtwIp2zP1JZv5u6APA+qoaB8aTzD6nJOksmzMUquoPgb+a1X0zsKW1twC3tvYtwNaqermq\nDgIHgFVJLgHGqmpXG/fQwDGSpBFxqvcUllXVdGtPA8ta+1JgamDcFHDZcfoPtX5J0gg57RvNVVVA\nDaEWSdJZtugUj5tOcnFVHW5LQy+0/kPAFQPjLqc/QzjU2oP9h37y6e/hyJEX2bx5M2vWrGFycvIU\ny5SkV59er0ev1zsj507/B/05BiVXAo9U1Vvb9ieA71XVx5NsBJZU1cZ2o/kLwDvoLw89Dvx0VVWS\np4C7gF3AV4BPVdWO4zxXQTE2NsHu3duZmJgYyoVK0qtVEqoqc4+c25wzhSRbgeuBtyT5NvCfgXuB\nbUnWAweB2wCqal+SbcA+4CiwoY6lzgbgQWAx8OjxAkGSdHbNa6awkJwpSNLJGeZMwU80S5I6hoIk\nqWMoSJI6hoIkqWMoSJI6hoIkqWMoSJI6hoIkqWMoSJI6hoIkqWMoSJI6hoIkqWMoSJI6hoIkqWMo\nSJI6hoIkqWMoSJI6hoIkqWMoSJI6Cx4KSVYn2Z/k+SQfWejnlyT9ZAsaCknOA/4LsBpYAdyR5Jqf\nNP7qq68mSfcYFb1e72yXMC/WOVzWOVznQp3nQo3DttAzhXcAB6rqYFW9DPw2cMuJD6n2GB3nyv8o\n1jlc1jlc50Kd50KNw7bQoXAZ8O2B7anWNy+Ds4ZRnEFI0rlu0QI/37x+5H/Tm97Hj340dYLD82Pt\nMxUMVWd2hjK77jP9fJI0lyzkC1GSdwL3VNXqtr0JeKWqPj4wxldGSTpJVTWUn44XOhQWAX8K/Evg\nL4FdwB1V9c0FK0KS9BMt6PJRVR1N8ivA7wPnAZ82ECRpdCzoTEGSNNpG5hPNZ/tDbUk+k2Q6yd6B\nvqVJdiZ5LsljSZYM7NvUat2f5KaB/pVJ9rZ99w25xiuSPJnk2STfSHLXiNb5xiRPJXkmyb4kvz6K\ndQ48x3lJ9iR5ZFTrTHIwyddbnbtGuM4lSb6Y5Jvte79qlOpMMtG+hjOPHyS5a5RqnPW8z7bn+EKS\nNyxInVV11h/0l5IOAFcCrweeAa5Z4BreDVwL7B3o+wTwa639EeDe1l7Ranx9q/kAx2Zdu4B3tPaj\nwOoh1ngx8LbWvoj+/ZlrRq3Ods4L2n8XAf8buG4U62zn/Y/A54Hto/h9b+f8c2DprL5RrHML8G8G\nvvdvHsU623lfB3wHuGLUamzP9WfAG9r2/wDWLUSdQ/0in8YX4OeBHQPbG4GNZ6GOK/nxUNgPLGvt\ni4H9rb0J+MjAuB3AO4FLgG8O9K8F/usZrPdh4MZRrhO4ANgN/Mwo1glcDjwO3AA8Mqrfd/qh8A9n\n9Y1UnfQD4M+O0z9SdQ6c9ybgD0exRmAp/R/6/gH9cH0E+IWFqHNUlo9O60NtZ9Cyqppu7WlgWWtf\nSr/GGTP1zu4/xBm6jiRX0p/ZPDWKdSZ5XZJnWj1PVtWzo1gn8JvAh4FXBvpGsc4CHk/ydJIPjGid\ny4HvJvlskq8l+a0kF45gnTPWAltbe6RqrKrvA78B/AX9d2r+dVXtXIg6RyUURv5ud/VjdiTqTHIR\n8LvAh6rqbwb3jUqdVfVKVb2N/k/i/zzJDbP2n/U6k/wi8EJV7aH/ici/ZxTqbN5VVdcC7wF+Ocm7\nB3eOSJ2LgLcD91fV24Ef0p/1d0akTpKcD7wP+J3Z+0ahxiQ/Bfwq/dWLS4GLkrx/cMyZqnNUQuEQ\n/XW9GVfw4+l2tkwnuRggySXAC61/dr2X06/3UGsP9h8aZkFJXk8/ED5XVQ+Pap0zquoHwFeAlSNY\n5z8Dbk7y5/R/YvwXST43gnVSVd9p//0u8GX6v0ds1OqcAqaqanfb/iL9kDg8YnVCP1y/2r6eMHpf\ny58D/riqvldVR4Ev0V9mP+Nfy1EJhaeB8SRXtgS/Hdh+lmuCfg3rWnsd/TX8mf61Sc5PshwYB3ZV\n1WHgpfaOiwB3Dhxz2to5Pw3sq6pPjnCdb5l5V0SSxfTXQveMWp1V9dGquqKqltNfSviDqrpz1OpM\nckGSsda+kP5a+N5Rq7Od/9tJrmpdNwLP0l8PH5k6mzs4tnQ0U8so1bgfeGeSxe38NwL7WIiv5bBv\n3pzGjZX30L+xcgDYdBaefyv9tbv/R//+xi/Rv9nzOPAc8BiwZGD8R1ut+4F/NdC/kv4/2APAp4Zc\n43X0176fof8iu4f+ryEftTrfCnyt1fl14MOtf6TqnFXz9Rx799FI1Ul/rf6Z9vjGzL+PUauznf+f\n0H9jwZ/Q/+n2zaNWJ3Ah8CIwNtA3UjW28/8a/VDdS/9dXa9fiDr98JokqTMqy0eSpBFgKEiSOoaC\nJKljKEiSOoaCJKljKEiSOoaCJKljKEiSOv8fBtDhFf0jZeoAAAAASUVORK5CYII=\n", "text/plain": [ "<matplotlib.figure.Figure at 0x7f6dc44ad2d0>" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEACAYAAABcXmojAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAHBRJREFUeJzt3X2QXNV55/HvT28gJAVZC4xeXSh45KDYLFhehNdmadZE\nVhJHUK4tJFeZqLKKE5cS49gpY8m1tR6qUg52VRLLtRFb6xgjuYyysmMoYWNZA6bLSexlDAgjGGRJ\nNgLNmBlhMBIvehv07B/3THM9jDRv3X17en6fqq4+ffrce5/bI/XT55z7oojAzMwMYFLRAZiZWeNw\nUjAzswonBTMzq3BSMDOzCicFMzOrcFIwM7OKIZOCpI2SnpS0R9Jdks6RNEdSu6R9knZJmj2g/X5J\neyWtyNUvS+vYL2lTrXbIzMxG76xJQdLFwEeBd0XEO4HJwBpgA9AeEUuAB9JrJC0FVgNLgZXAZklK\nq7sdWBcRrUCrpJVV3xszMxuToXoKR4FTwHmSpgDnAb8EVgFbUpstwA2pfD2wLSJORcRB4ACwXNI8\nYFZEdKR2W3PLmJlZgzhrUoiIF4G/A54lSwYvRUQ70BIRvalZL9CSyvOBrtwquoAFg9R3p3ozM2sg\nQw0fXQL8FXAx2Rf7TEkfybeJ7DoZvlaGmVkTmDLE++8GfhQRLwBI+jbwHqBH0tyI6ElDQ4dT+25g\nUW75hWQ9hO5Uztd3D7ZBSU4wZmYjFBEautXQhppT2AtcJWl6mjC+DugE7gXWpjZrgXtSeQewRtI0\nSYuBVqAjInqAo5KWp/XclFvmTSKioR6f+9znCo/BMTVPTI0al2MavzFV01l7ChHxU0lbgYeB08Cj\nwP8BZgHbJa0DDgI3pvadkraTJY4+YH28EfF64E5gOnBfROys6p6YmdmYDTV8RER8EfjigOoXyXoN\ng7X/PPD5QeofAd45ihjNzKxOfEbzMJRKpaJDeBPHNDyNGBM0ZlyOaXgaMaZqUrXHo8ZKUjRaTGZm\njUwSUaeJZjMzm0CcFMzMrMJJwczMKpwUzMyswknBzMwqnBTMzKzCScHMzCqcFMzMquSii6Cvr+go\nxsYnr5mZVUEETJoEp0+DqnIa2fD55DUzswbTnwzqnRCqzUnBzKwKXn8dJk8uOoqxc1IwM6sCJwUz\nM6twUjAzswonBTMzq3BSMDOzCicFMzOrmDBJQdLbJe3OPY5IulnSHEntkvZJ2iVpdm6ZjZL2S9or\naUWufpmkPem9TbXaKTOzeuvrgylD3vW+8Q2ZFCLiZxFxRURcASwDXgPuBjYA7RGxBHggvUbSUmA1\nsBRYCWyWKqdz3A6si4hWoFXSymrvkJlZEfr6YOrUoqMYu5EOH10HHIiIQ8AqYEuq3wLckMrXA9si\n4lREHAQOAMslzQNmRURHarc1t4yZ2bh26tQE6SkMsAbYlsotEdGbyr1ASyrPB7pyy3QBCwap7071\nZmbj3oTrKUiaBvwR8M2B76Ur2PkqdmY2YTVLT2Eku/D7wCMR8Xx63StpbkT0pKGhw6m+G1iUW24h\nWQ+hO5Xz9d2Dbaitra1SLpVKlEqlEYRpZlZ/9ewplMtlyuVyTdY97EtnS/pn4HsRsSW9/iLwQkR8\nQdIGYHZEbEgTzXcBV5IND90PvC0iQtJDwM1AB/Bd4MsRsXPAdnzpbDMbdx56CD7+cejoGLpttVXz\n0tnD6ilImkE2yfzRXPVtwHZJ64CDwI0AEdEpaTvQCfQB63Pf8uuBO4HpwH0DE4KZ2XjVLHMKw0oK\nEfEqcMGAuhfJEsVg7T8PfH6Q+keAd448TDOzxtYscwo+o9nMrAomzMlrZmY2tGYZPnJSMDOrAg8f\nmZlZhYePzMyswsNHZmZW0dc3QS6dbWZmQ/PwkZmZVbz+upOCmZklHj4yM7MK9xTMzKzCcwpmZlbh\n4SMzM6vw8JGZmVV4+MjMzCo8fGRmZhUePjIzswoPH5mZWYWHj8zMrGJCDR9Jmi3pW5KektQpabmk\nOZLaJe2TtEvS7Fz7jZL2S9oraUWufpmkPem9TbXYITOzIky0m+xsAu6LiEuBy4C9wAagPSKWAA+k\n10haCqwGlgIrgc2SlNZzO7AuIlqBVkkrq7YnZmYFevVVmDGj6CjGbsikIOl84OqIuAMgIvoi4giw\nCtiSmm0Bbkjl64FtEXEqIg4CB4DlkuYBsyKiI7XbmlvGzGxce+UVmDmz6CjGbjg9hcXA85K+JulR\nSV+RNANoiYje1KYXaEnl+UBXbvkuYMEg9d2p3sxs3GuWnsJwRsCmAO8C/jIifiLpS6Shon4REZKi\nWkG1tbVVyqVSiVKpVK1Vm5nVxGuvwfTp9dlWuVymXC7XZN3DSQpdQFdE/CS9/hawEeiRNDcietLQ\n0OH0fjewKLf8wrSO7lTO13cPtsF8UjAzGw+OHatfUhj4Y/nWW2+t2rqHHD6KiB7gkKQlqeo64Eng\nXmBtqlsL3JPKO4A1kqZJWgy0Ah1pPUfTkUsCbsotY2Y2rtUzKdTScA+g+jjwDUnTgJ8DfwJMBrZL\nWgccBG4EiIhOSduBTqAPWB8R/UNL64E7gelkRzPtrNJ+mJkVqlmSgt74vm4MkqLRYjIzG8oll8DO\nndDaWv9tSyIiNHTLofmMZjOzKmiWnoKTgplZFTgpmJlZxfHjzZEUPKdgZjZGETBpUnZRvEkF/NT2\nnIKZWQPpTwZFJIRqa4JdMDMrVl8fTJ1adBTV4aRgZjZGzXLZbHBSMDMbs1On3FMwM7PEScHMzCr6\n+jx8ZGZmiXsKZmZW4aRgZmYVHj4yM7MK9xTMzKzCScHMzCo8fGRmZhXuKZiZWYWTgpmZVUy44SNJ\nByU9Lmm3pI5UN0dSu6R9knZJmp1rv1HSfkl7Ja3I1S+TtCe9t6n6u2NmVn8TsacQQCkiroiIK1Pd\nBqA9IpYAD6TXSFoKrAaWAiuBzZL6b/5wO7AuIlqBVkkrq7QfZmaFmYhJAWDgXX1WAVtSeQtwQypf\nD2yLiFMRcRA4ACyXNA+YFREdqd3W3DJmZuPWhBs+Iusp3C/pYUkfTXUtEdGbyr1ASyrPB7pyy3YB\nCwap7071ZmbjWjP1FIab294bEc9JuhBol7Q3/2ZEhKSq3Vi5ra2tUi6VSpRKpWqt2sys6uqdFMrl\nMuVyuSbrHlZSiIjn0vPzku4GrgR6Jc2NiJ40NHQ4Ne8GFuUWX0jWQ+hO5Xx992DbyycFM7NGV+/b\ncQ78sXzrrbdWbd1DDh9JOk/SrFSeAawA9gA7gLWp2VrgnlTeAayRNE3SYqAV6IiIHuCopOVp4vmm\n3DJmZuNWM92Oczi70QLcnQ4gmgJ8IyJ2SXoY2C5pHXAQuBEgIjolbQc6gT5gfUT0Dy2tB+4EpgP3\nRcTOKu6LmVkhmmlOQW98XzcGSdFoMZmZnc2mTfCLX2TPRZBERAw8QnRUfEazmdkYNdPwkZOCmdkY\nNdPwkZOCmdkY1fvoo1pyUjAzGyMPH5mZWYWHj8zMrMJJwczMKibiBfHMzOwM3FMwM7MKJwUzM6vw\n8JGZmVW4p2BmZhVOCmZmVuHhIzMzq3BPwczMKpwUzMyswsNHZmZW4Z6CmZlVOCmYmVnFhBs+kjRZ\n0m5J96bXcyS1S9onaZek2bm2GyXtl7RX0opc/TJJe9J7Bd3J1Mys+iZiT+ETQCcQ6fUGoD0ilgAP\npNdIWgqsBpYCK4HNkvpvJn07sC4iWoFWSSurswtmZsV6+WWYObPoKKpjyKQgaSHwB8A/Af1f8KuA\nLam8Bbghla8HtkXEqYg4CBwAlkuaB8yKiI7UbmtuGTOzce2552DevKKjqI7h9BT+Afg0cDpX1xIR\nvancC7Sk8nygK9euC1gwSH13qjczG9defx1efRV+67eKjqQ6zjo1IumDwOGI2C2pNFibiAhJMdh7\no9XW1lYpl0olSqVBN21mVrgTJ+Dcc6EyUF4H5XKZcrlck3Ur4szf55I+D9wE9AHnAr8FfBv4T0Ap\nInrS0NCDEfE7kjYARMRtafmdwOeAZ1KbS1P9h4FrIuJjg2wzzhaTmVkjefFFeNvbsueiSCIiqpKW\nzjp8FBGfjYhFEbEYWAP8ICJuAnYAa1OztcA9qbwDWCNpmqTFQCvQERE9wFFJy9PE8025ZczMxq0T\nJ+Ccc4qOonpGemRt/0/424DtktYBB4EbASKiU9J2siOV+oD1uZ/964E7genAfRGxc2yhm5kV7/jx\nbPioWZx1+KgIHj4ys/Fk71644YbsuSh1Gz4yM7Oza7aegpOCmdkYHD/eXHMKTgpmZmPQf0hqs3BS\nMDMbA/cUzMyswj0FMzOrOHbMScHMzJKjR5vnukfgpGBmNibNdNlscFIwMxuTkyc9fGRmZsnJkzBt\nWtFRVI+TgpnZGDgpmJlZhZOCmZlVOCmYmVnFyZMwdWrRUVSPk4KZ2Ri4p2BmZhVHj/o8BTMzS3p7\nYe7coqOoHicFM7MxeP55uOiioqOoHicFM7Mx2L8fLryw6Ciq56xJQdK5kh6S9JikTkl/m+rnSGqX\ntE/SLkmzc8tslLRf0l5JK3L1yyTtSe9tqt0umZnVx/Hj0NcHF1xQdCTVc9akEBHHgWsj4nLgMuBa\nSe8DNgDtEbEEeCC9RtJSYDWwFFgJbJbUfzPp24F1EdEKtEpaWYsdMjOrlyNHsl7CpCYacxlyVyLi\ntVScBkwGfg2sArak+i3ADal8PbAtIk5FxEHgALBc0jxgVkR0pHZbc8uYmY1LR47A7NlDtxtPhkwK\nkiZJegzoBR6MiCeBlojoTU16gZZUng905RbvAhYMUt+d6s3Mxq2XXoLzzy86iuqaMlSDiDgNXC7p\nfOD7kq4d8H5IimoG1dbWVimXSiVKpVI1V29mVhVHjhSTFMrlMuVyuSbrHjIp9IuII5K+CywDeiXN\njYieNDR0ODXrBhblFltI1kPoTuV8ffeZtpVPCmZmjaqonsLAH8u33npr1dY91NFHF/QfWSRpOvB7\nwG5gB7A2NVsL3JPKO4A1kqZJWgy0Ah0R0QMclbQ8TTzflFvGzGxcasY5haF6CvOALZImkSWQr0fE\nA5J2A9slrQMOAjcCRESnpO1AJ9AHrI+I/qGl9cCdwHTgvojYWe2dMTOrp8OHYc6coqOoLr3xnd0Y\nJEWjxWRmNphPfhIWLYJPfarYOCQRERq65dCa6OhaM7P6+tWvmq+n4KRgZjZKzz4Lb31r0VFUl5OC\nmdko9fQ01xVSwUnBzGzUmvHoIycFM7NRKurktVpyUjAzG4WTJ+HUKTjvvKIjqS4nBTOzUXjppWzo\nSFU5ELRxOCmYmY1CdzcsaMLLejopmJmNwjPPOCmYmVly+DDMm1d0FNXnpGBmNgrHjsGMGUVHUX1O\nCmZmo3D8OJx7btFRVJ+TgpnZKBw7BtOnFx1F9TkpmJmNwquvNt85CuCkYGY2Ks14NjM4KZiZjUoz\nXvcInBTMzEalqPsz15qTgpnZKLinYGZmFRO2pyBpkaQHJT0p6QlJN6f6OZLaJe2TtEvS7NwyGyXt\nl7RX0opc/TJJe9J7m2qzS2ZmtTeRJ5pPAZ+MiN8FrgL+QtKlwAagPSKWAA+k10haCqwGlgIrgc1S\n5TqCtwPrIqIVaJW0sqp7Y2ZWJxN2+CgieiLisVR+BXgKWACsArakZluAG1L5emBbRJyKiIPAAWC5\npHnArIjoSO225pYxMxs3Tp7MHhP+PAVJFwNXAA8BLRHRm97qBVpSeT7QlVusiyyJDKzvTvVmZuNK\nfy+h2e6lADBluA0lzQT+BfhERLys3KcRESEpqhVUW1tbpVwqlSiVStVatZnZmBU9yVwulymXyzVZ\ntyKG/i6XNBX4DvC9iPhSqtsLlCKiJw0NPRgRvyNpA0BE3Jba7QQ+BzyT2lya6j8MXBMRHxuwrRhO\nTGZmRXn4YfizP4NHHy06kowkIqIq/ZbhHH0k4KtAZ39CSHYAa1N5LXBPrn6NpGmSFgOtQEdE9ABH\nJS1P67wpt4yZ2bjRfyvOZjSc4aP3Ah8BHpe0O9VtBG4DtktaBxwEbgSIiE5J24FOoA9Yn/vpvx64\nE5gO3BcRO6u0H2ZmddOsh6PCMIeP6snDR2bW6L7yFfjxj+GOO4qOJFPX4SMzM/tNTz4J8+cXHUVt\nOCmYmY3Qr37VnPdnBicFM7MRO3AA3vGOoqOoDScFM7MReukluOiioqOoDScFM7MReuUVmDWr6Chq\nw0nBzGyEXnkFZs4sOoracFIwMxuBiCwpzJhRdCS14aRgZjYCx4/DlCkwdWrRkdSGk4KZ2QgcPgwX\nXlh0FLXjpGBmNgLPPAMLFxYdRe04KZiZjcDjj8NllxUdRe04KZiZjcDBg/Dbv110FLXjpGBmNgKH\nDsFb31p0FLXjpGBmNgLPPguLFhUdRe04KZiZjUCz9xR8PwUzs2E6dSo7ae2117JzFRqF76dgZlaA\n7m6YO7exEkK1OSmYmQ3ToUPNPZ8Aw0gKku6Q1CtpT65ujqR2Sfsk7ZI0O/feRkn7Je2VtCJXv0zS\nnvTepurviplZbT3xBFxySdFR1NZwegpfA1YOqNsAtEfEEuCB9BpJS4HVwNK0zGZJ/eNctwPrIqIV\naJU0cJ1mZg3thz+E97+/6Chqa8ikEBH/Cvx6QPUqYEsqbwFuSOXrgW0RcSoiDgIHgOWS5gGzIqIj\ntduaW8bMbFx46qnmveNav9HOKbRERG8q9wItqTwf6Mq16wIWDFLfnerNzMaN7m5Y0OTfXGOeaE7H\nj/oYUjNraidOwJEjzXsbzn6jPbCqV9LciOhJQ0OHU303kJ+bX0jWQ+hO5Xx995lW3tbWVimXSiVK\npdIowzQzq45f/hLmzYNJDXDMZrlcplwu12Tdwzp5TdLFwL0R8c70+ovACxHxBUkbgNkRsSFNNN8F\nXEk2PHQ/8LaICEkPATcDHcB3gS9HxM5BtuWT18ys4fzbv8Ett8CPflR0JG9WzZPXhuwpSNoGXANc\nIOkQ8D+B24DtktYBB4EbASKiU9J2oBPoA9bnvuHXA3cC04H7BksIZmaN6tCh5p9PAF/mwsxsWG65\nJbs38+bNRUfyZr7MhZlZne3bBxNhetNJwcxsGH76U7j88qKjqD0nBTOzIXR1wdGjzX+JC3BSMDMb\n0iOPwHveA5MnFx1J7TkpmJkN4Wc/a/6ro/ZzUjAzO4sI2LoVbryx6Ejqw0nBzOwsnnoqm0+45pqi\nI6kPJwUzs7PYsQNWrWqMy1vUwwTZTTOz0elPChOFz2g2MzuDQ4fg0kvhhRfgnHOKjubMfEazmVkd\nfO97sHJlYyeEanNSMDM7g699Df74j4uOor6cFMzMBtHRAfv3wwc+UHQk9eWkYGY2wLFj8Nd/DX/z\nNxNr6Ag80Wxm9iYf+xgcPgzf/Ob4uLRFXW+yY2Y2kfz938P3vw8//vH4SAjV5qRgZpZs2wZf+lKW\nEObOLTqaYjgpmJkBd90Fn/wk7Nw5MW67eSaeUzCzCe/uu+HP/xzuvx8uu6zoaEZuXJ+8JmmlpL2S\n9kv6TL23b2bW74kn4EMfynoI3/nO+EwI1VbXpCBpMvC/gJXAUuDDki6tZwyjUS6Xiw7hTRzT8DRi\nTNCYcU2UmF57LesZrF6d3XP5qquyK6FeeWVxMTWSevcUrgQORMTBiDgF/DNwfZ1jGLFG/EfgmIan\nEWOCxoyrGWM6dSq7a9r27XDLLfCHf5hNIP/jP2Z3Unv66ax++vT6xdTo6j3RvAA4lHvdBSyvcwxm\n1sAisi/z48ezX/U9PdDXB6++CidOZI9jx7J7HLz8Mhw5kj2/9FJW7unJ7qn83HPZY8kSePvb4Yor\n4E//FO68Ey68sOi9bFz1TgrDmkH+4AeHsaJhzkWPZM76TG3374eHHipm22dq9/Ofw7//ezHbPpOn\nn4Yf/rC66xxpu4Ftn3kGfvCDYrZ9Ns8+C7t2FbPtM7Xr6souAJd34gScPp0tU41H//ZPnMi+6F9/\nPXucPv1GOQKmTIFzz83abNmSvZ4xI6ubNi17Pv98mDUre545E97yFmhpgauvzm6dOW9edhTRtGnD\n/wytzkcfSboKaIuIlen1RuB0RHwh18aHHpmZjVC1jj6qd1KYAvwMeD/wS6AD+HBEPFW3IMzM7Izq\nOnwUEX2S/hL4PjAZ+KoTgplZ42i4k9fMzKw4DXPp7Hqe1CbpDkm9kvbk6uZIape0T9IuSbNz721M\nce2VtCJXv0zSnvTepjHGtEjSg5KelPSEpJuLjkvSuZIekvSYpE5Jf1t0TLn1TZa0W9K9DRTTQUmP\np7g6GiEuSbMlfUvSU+lvuLzgf1NvT59P/+OIpJsb4HPamP7v7ZF0l6RzGiCmT6R1PSHpE6mu9jFF\nROEPsqGkA8DFwFTgMeDSGm7vauAKYE+u7ovALan8GeC2VF6a4pma4jvAGz2sDuDKVL4PWDmGmOYC\nl6fyTLK5l0sbIK7z0vMU4P8B7ys6prSOTwHfAHY0wt8vreNpYM6AuqL/fluA/577G55fdEy52CYB\nzwGLiowprfcXwDnp9f8F1hYc0zuAPcC5ZN+P7cAl9YhpTH/Uaj2A9wA7c683ABtqvM2L+c2ksBdo\nSeW5wN5U3gh8JtduJ3AVMA94Kle/BvjfVYzvHuC6RokLOA/4CfC7RccELATuB64F7m2Uvx9ZUvgP\nA+oKi4ssAfxikPrCP6u0nhXAvxYdEzCH7EfYW8gS573A7xUc038D/in3+n8At9QjpkYZPhrspLZ6\nX6ewJSJ6U7kXaEnl+Smefv2xDazvpkoxS7qYrCfzUNFxSZok6bG07Qcj4smiYwL+Afg0cDpXV3RM\nkJ2Hc7+khyV9tAHiWgw8L+lrkh6V9BVJMwqOKW8NsC2VC4spIl4E/g54luyoyJcior3ImIAngKvT\ncNF5wB+Q/RiqeUyNkhQaarY7spRaSEySZgL/AnwiIl4uOq6IOB0Rl5P9g/wvkq4tMiZJHwQOR8Ru\nYNDjsgv8+703Iq4Afh/4C0lXFxzXFOBdwOaIeBfwKlkvvMiYAJA0Dfgj4JsD3yvg39QlwF+RjR7M\nB2ZK+kiRMUXEXuALwC7ge2RDQ6/XI6ZGSQrdZOOK/Rbxm9mtHnolzQWQNA84fIbYFqbYulM5X989\nlgAkTSVLCF+PiHsaJS6AiDgCfBdYVnBM/xlYJelpsl+Z/1XS1wuOCYCIeC49Pw/cTXatryLj6gK6\nIuIn6fW3yJJET9GfFVnifCR9VlDs5/Ru4EcR8UJE9AHfJhvSLvRziog7IuLdEXEN8GtgH3X4nBol\nKTwMtEq6OP2CWA3sqHMMO8gml0jP9+Tq10iaJmkx0Ap0REQPcDQdzSHgptwyI5bW8VWgMyK+1Ahx\nSbqg/+gGSdPJxll3FxlTRHw2IhZFxGKy4YcfRMRNRcYEIOk8SbNSeQbZePmeIuNK6zokaUmqug54\nkmzMvLDPKvkwbwwd9W+7qJj2AldJmp7WdR3QScGfk6SL0vNbgQ8Bd1GPz2k0kyC1eJD9cvgZ2az5\nxhpvaxvZ2OFJsrmMPyGbbLqfLBvvAmbn2n82xbUX+ECufhnZf/wDwJfHGNP7yMbIHyP74t1Ndonx\nwuIC3gk8mmJ6HPh0qi/0s8qt8xreOPqo6L/f4vQ5PUY2HryxQeL6j2QHCPyU7Bfw+Q0Q0wzgV8Cs\nXF3RMd1CljD3kB2xNbUBYvphiukx4Np6fU4+ec3MzCoaZfjIzMwagJOCmZlVOCmYmVmFk4KZmVU4\nKZiZWYWTgpmZVTgpmJlZhZOCmZlV/H+LfARR37IDCwAAAABJRU5ErkJggg==\n", "text/plain": [ "<matplotlib.figure.Figure at 0x7f6d5a0d9390>" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "N_k = n_occurrence_from_D(D, k=100)\n", "print skew(N_k)\n", "plt.figure()\n", "plt.hist(N_k, bins=100);\n", "plt.figure()\n", "plt.plot(np.sort(N_k))" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "sort_idx = np.argsort(D, axis=1)\n", "k = 100\n", "D_k = sort_idx[:, 1:(k+1)]" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "array([[4650, 2942, 3520, ..., 3488, 2864, 6361],\n", " [1933, 6143, 6757, ..., 2346, 3441, 6857],\n", " [3170, 2549, 4860, ..., 2260, 2978, 7433],\n", " ..., \n", " [6016, 2243, 1616, ..., 3486, 3441, 4554],\n", " [7027, 4860, 6346, ..., 7312, 6972, 5532],\n", " [5119, 1563, 4035, ..., 6253, 7433, 3232]])" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "D_k" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[ 515 2549 3486 5020 5119]\n", "['Nigeria' 'Swaziland' 'Kazakhstan' 'Swaziland' 'Pakistan']\n", "['D:/_Audio/Decca-West-African-recordings/025A-1CS0043663XX-0100A0.mp3'\n", " 'D:/_Audio/Rycroft/025A-C0811X0005XX-2000A0.mp3'\n", " 'D:/_Audio/Colin-Huehns-Pakistan/025A-C0485X0085XX-3100A0.mp3'\n", " 'D:/_Audio/Rycroft/025A-C0811X0005XX-1300A0.mp3'\n", " 'D:/_Audio/Colin-Huehns-Pakistan/025A-C0485X0031XX-0500A0.mp3']\n" ] } ], "source": [ "large_hubs_idx = np.where(N_k>7000)[0]\n", "print large_hubs_idx\n", "print Y[large_hubs_idx]\n", "print Yaudio[large_hubs_idx]" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(7160,)" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.where(D_k==515)[0].shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.12" } }, "nbformat": 4, "nbformat_minor": 1 }