Mercurial > hg > plosone_underreview
view notebooks/test_hubness.ipynb @ 9:c4841876a8ff branch-tests
adding notebooks and trying to explain classifier coefficients
author | Maria Panteli <m.x.panteli@gmail.com> |
---|---|
date | Mon, 11 Sep 2017 19:06:40 +0100 |
parents | 0f3eba42b425 |
children | 8e897e82af51 |
line wrap: on
line source
{ "cells": [ { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The autoreload extension is already loaded. To reload it, use:\n", " %reload_ext autoreload\n" ] } ], "source": [ "import numpy as np\n", "import pickle\n", "from scipy.stats import pearsonr\n", "from scipy.stats import skew\n", "import sys\n", "from sklearn.metrics.pairwise import pairwise_distances\n", "%matplotlib inline\n", "import matplotlib.pyplot as plt\n", "\n", "%load_ext autoreload\n", "%autoreload 2\n", "\n", "sys.path.append('../')\n", "import scripts.results as results\n", "import scripts.utils_spatial as utils_spatial" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "WARNING: there are 21 disconnected observations\n", "Island ids: [3, 6, 26, 35, 39, 45, 52, 61, 62, 66, 77, 85, 94, 97, 98, 102, 103, 107, 110, 120, 121]\n", "Antigua and Barbuda\n", "Australia\n", "Cuba\n", "Fiji\n", "French Polynesia\n", "Grenada\n", "Iceland\n", "Jamaica\n", "Japan\n", "Kiribati\n", "Malta\n", "New Zealand\n", "Philippines\n", "Puerto Rico\n", "Republic of Serbia\n", "Saint Lucia\n", "Samoa\n", "Solomon Islands\n", "South Korea\n", "The Bahamas\n", "Trinidad and Tobago\n" ] } ], "source": [ "X_list, Y, Yaudio = pickle.load(open('../data/lda_data_melodia_8.pickle','rb'))\n", "ddf = results.load_metadata(Yaudio, metadata_file='../data/metadata.csv')\n", "w, data_countries = utils_spatial.get_neighbors_for_countries_in_dataset(Y)\n", "w_dict = utils_spatial.from_weights_to_dict(w, data_countries)\n", "Xrhy, Xmel, Xmfc, Xchr = X_list\n", "X = np.concatenate((Xrhy, Xmel, Xmfc, Xchr), axis=1)\n", "\n", "# global outliers\n", "df_global, threshold, MD = results.get_outliers_df(X, Y, chi2thr=0.999)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(8200, 380)" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X.shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "D = pairwise_distances(X, metric='mahalanobis')" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEACAYAAABcXmojAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAFMBJREFUeJzt3X+s3fV93/HnC1wIaQmelcr8cghSjRZP2Yi84Wjp2pvR\nMbdqgUoRONIo2qxqirsl6rRqJn8Usz/SEGljVBP8UzqM11KssRCqIIJDuVr3B7ipYHHjUIwUV9jg\nS2dSQhRlxeK9P873fv3l+vrec6+v7/d7730+pKv7PZ/z/R6/z8fn3Nf5fL4/TqoKSZIALui7AEnS\ncBgKkqSWoSBJahkKkqSWoSBJahkKkqTWWKGQ5GiSbyd5McnBpm1DkgNJXknyTJL1nfXvSnIkyctJ\nbuq0b01yqLnv/k77xUkea9qfT3LNUj5JSdJ4xh0pFDBRVZ+oqhuatt3Agaq6Dni2uU2SLcDtwBZg\nO/BAkjTbPAjsrKrNwOYk25v2ncDJpv0+4N5zfF6SpEVYyPRRZty+GdjbLO8Fbm2WbwEerap3q+oo\n8CqwLckVwKVVdbBZ75HONt3Hehy4cQF1SZKWyEJGCt9M8q0kv960bayqqWZ5CtjYLF8JHOtsewy4\napb24007ze/XAKrqFPB2kg0LeSKSpHO3bsz1PlVVbyT5aeBAkpe7d1ZVJfF6GZK0wo0VClX1RvP7\nr5N8FbgBmEpyeVWdaKaG3mxWPw5s6mx+NaMRwvFmeWb79DYfAV5Psg64rKre6tZg6EjS4lTVzOn/\ns5p3+ijJB5Nc2iz/JHATcAh4ErizWe1O4Ilm+UlgR5KLklwLbAYOVtUJ4AdJtjU7nu8AvtbZZvqx\nPsNox/VsT2xQP3fffXfvNVjT6qrLmqxpqX8WapyRwkbgq80BROuAP6iqZ5J8C9ifZCdwFLit+cN9\nOMl+4DBwCthVpyvbBTwMXAI8VVVPN+0PAfuSHAFOAjsW/EwkSeds3lCoqu8B18/S/hbwC2fZ5kvA\nl2Zp/3Pg47O0/z+aUJEk9cczms/BxMRE3yWcwZrGN8S6rGk81nT+ZDFzTn1IUiulVkkaiiTUUu5o\nliStHYaCJKllKEiSWuOe0SxpQE5fY3LE/W1aKo4UpBWrmh9p6ThSkFYBRw5aKoaCtELM/MP/ft0Q\nyPvWNSC0EE4fSSvKuFNGTi1pcQwFSVLL6SNplXMqSQvhSEFa9ZxK0vgMBUlSy+kjaQ1xKknzMRSk\nAZv7MNTFmA6CpX5crRZOH0mD5z4BLR9DQZLUMhQkSS1DQZLUMhQkSS1DQZLUMhQkSS1DQZLU8uQ1\naUCW/mQ1aWEMBWlw3v+FOdJycvpIktRypCCtUV4cT7NxpCCtWV5TSWcyFCRJLUNBktQyFCRJLUNB\nktQyFCRJLUNBktQyFCRJrbFCIcmFSV5M8sfN7Q1JDiR5JckzSdZ31r0ryZEkLye5qdO+Ncmh5r77\nO+0XJ3msaX8+yTVL+QSloUvS/kh9G3ek8AXgMKfPdNkNHKiq64Bnm9sk2QLcDmwBtgMP5PQr/UFg\nZ1VtBjYn2d607wRONu33Afee21OSVqJ+TyTrBpPhtLbNGwpJrgZ+Cfg9Tl+d62Zgb7O8F7i1Wb4F\neLSq3q2qo8CrwLYkVwCXVtXBZr1HOtt0H+tx4MZFPxtJi1T0HUwahnFGCvcBvwW812nbWFVTzfIU\nsLFZvhI41lnvGHDVLO3Hm3aa368BVNUp4O0kGxbwHCRJS2TOC+Il+WXgzap6McnEbOtUVSVZlo8X\ne/bsaZcnJiaYmJi1JElasyYnJ5mcnFz09pnr6ohJvgTcAZwCPgB8CPifwD8CJqrqRDM19FxV/d0k\nuwGq6svN9k8DdwN/1azzsab9s8DPVdXnmnX2VNXzSdYBb1TVT89SS3klR61Gozn86dd2d3nm7XGW\nF7PNmdv7Xls9klBVY+8omnP6qKq+WFWbqupaYAfwJ1V1B/AkcGez2p3AE83yk8COJBcluRbYDBys\nqhPAD5Jsa3Y83wF8rbPN9GN9htGOa0lSDxb6fQrTHx++DOxPshM4CtwGUFWHk+xndKTSKWBX5+P9\nLuBh4BLgqap6uml/CNiX5AhwklH4SJJ6MOf00ZA4faTVyukjnU9LOn0kSVpbDAVJUstQkCS1DAVJ\nUstQkCS1DAVJUstQkCS1DAVJUmuhZzRLWgJD/86Cbn2eyLa2OFKQejPk7y8Ycm06nwwFSVLLUJAk\ntQwFSVLLUJAktQwFSVLLUJAktQwFSVLLUJAktQwFSVLLy1xImpOXvFhbHClImoeXvFhLDAVJUstQ\nkCS1DAVJUssdzdIyGfp3KEjgSEFaZu601bAZCpKklqEgSWoZCpKklqEgSWoZCpKklqEgSWoZCpKk\nlqEgSWoZCpKk1pyhkOQDSV5I8lKSw0l+p2nfkORAkleSPJNkfWebu5IcSfJykps67VuTHGruu7/T\nfnGSx5r255Nccz6eqCRpfnOGQlX9GPh0VV0P/H3g00l+FtgNHKiq64Bnm9sk2QLcDmwBtgMP5PQF\nXx4EdlbVZmBzku1N+07gZNN+H3DvUj5BSUsnSfuj1Wne6aOq+lGzeBFwIfB94GZgb9O+F7i1Wb4F\neLSq3q2qo8CrwLYkVwCXVtXBZr1HOtt0H+tx4MZFPxtJ55nXblrt5g2FJBckeQmYAp6rqu8AG6tq\nqlllCtjYLF8JHOtsfgy4apb24007ze/XAKrqFPB2kg2LezqSpHMx76Wzq+o94PoklwHfSPLpGfdX\nEj86SNIqMPb3KVTV20m+DmwFppJcXlUnmqmhN5vVjgObOptdzWiEcLxZntk+vc1HgNeTrAMuq6q3\nZqthz5497fLExAQTExPjli9Ja8Lk5CSTk5OL3j5VZ/+Qn+TDwKmq+psklwDfAO4B/jmjncP3JtkN\nrK+q3c2O5j8EbmA0LfRN4Gea0cQLwOeBg8DXgd+tqqeT7AI+XlWfS7IDuLWqdsxSS81VqzREZ+6Q\nnX4NZ4zlcdc7120Wt73vx5UhCVU19pEB840UrgD2JrmA0f6HfVX1bJIXgf1JdgJHgdsAqupwkv3A\nYeAUsKvzl3wX8DBwCfBUVT3dtD8E7EtyBDgJnBEI0srW/aMqDducI4UhcaSglWg0Ujj/n9odKehs\nFjpS8IxmSVLLUJAktQwFSVLLUJAktQwFSVLLUJAktcY+o1mSumaemOchqquDoSBpkWae26DVwOkj\nSVLLUJAktQwFSVLLUJAktQwFSVLLUJAktTwkVVpCZ36pjrSyGArSkvP4fa1cTh9JklqGgiSpZShI\nklqGgiSpZShIklqGgiSp5SGpkpZE9xwNv1th5XKkIGmJFO8/R0MrkaEgSWoZCpKklqEgSWoZCpKk\nlqEgSWoZCpKklqEgSWoZCpKklqEgSWoZCpKklqEgSWp5QTzpHHUvBCetdPOOFJJsSvJcku8k+Ysk\nn2/aNyQ5kOSVJM8kWd/Z5q4kR5K8nOSmTvvWJIea++7vtF+c5LGm/fkk1yz1E5XOLy8Gp9VhnOmj\nd4HfrKq/B3wS+I0kHwN2Aweq6jrg2eY2SbYAtwNbgO3AAzn9UepBYGdVbQY2J9netO8ETjbt9wH3\nLsmzk9SLJO2PVpZ5Q6GqTlTVS83yD4HvAlcBNwN7m9X2Arc2y7cAj1bVu1V1FHgV2JbkCuDSqjrY\nrPdIZ5vuYz0O3HguT0pS3xw5rVQL2tGc5KPAJ4AXgI1VNdXcNQVsbJavBI51NjvGKERmth9v2ml+\nvwZQVaeAt5NsWEhtkqRzN3YoJPkpRp/iv1BV73Tvq9HXLPmxQJJWuLGOPkryE4wCYV9VPdE0TyW5\nvKpONFNDbzbtx4FNnc2vZjRCON4sz2yf3uYjwOtJ1gGXVdVbM+vYs2dPuzwxMcHExMQ45UvSmjE5\nOcnk5OSit89836Xa7CTey2hH8G922r/StN2bZDewvqp2Nzua/xC4gdG00DeBn6mqSvIC8HngIPB1\n4Her6ukku4CPV9XnkuwAbq2qHTPqKL/3VUM0eotMvza7yzNvL3R5ubY5v/+m79t+JaGqxt7jP04o\n/Czwv4Bvc/p/+i5Gf9j3M/qEfxS4rar+ptnmi8C/Ak4xmm76RtO+FXgYuAR4qqqmD2+9GNjHaH/F\nSWBHs5O6W4ehoEEyFOZez/dtv5Y8FIbCUNBQGQpzr+f7tl8LDQUvcyFJahkKkqSWoSBJahkKkqSW\nV0mVFsFr+oyv21fudB4+RwrSonki/3jsp5XEUJAktQwFSVLLUJAktQwFSVLLUJAktQwFSVLLUJAk\ntQwFSVLLUJAktQwFSVLLax9JWjZeB2n4HClIWkZeB2noHClIY/LKqFoLHClIC+InXa1uhoIkqWUo\nSJJahoIkqWUoSJJahoIkqWUoSJJahoIkqeXJa5J6MfNkQC97MQyGgqSedEPAs8WHwukjSVLLUJAk\ntZw+kubgRfC01jhSkOblRfC0dhgKkqSWoSBJahkKkqTWvKGQ5PeTTCU51GnbkORAkleSPJNkfee+\nu5IcSfJykps67VuTHGruu7/TfnGSx5r255Ncs5RPUJI0vnFGCv8N2D6jbTdwoKquA55tbpNkC3A7\nsKXZ5oGcPnzjQWBnVW0GNieZfsydwMmm/T7g3nN4PpJWqCTtj/ozbyhU1Z8C35/RfDOwt1neC9za\nLN8CPFpV71bVUeBVYFuSK4BLq+pgs94jnW26j/U4cOMinoekFc+jvIZgsfsUNlbVVLM8BWxslq8E\njnXWOwZcNUv78aad5vdrAFV1Cng7yYZF1iVJOgfnfPJaVVWSZYn3PXv2tMsTExNMTEwsxz8rSSvG\n5OQkk5OTi95+saEwleTyqjrRTA292bQfBzZ11rua0QjheLM8s316m48ArydZB1xWVW/N9o92Q0GS\ndKaZH5jvueeeBW2/2OmjJ4E7m+U7gSc67TuSXJTkWmAzcLCqTgA/SLKt2fF8B/C1WR7rM4x2XEuS\nejDvSCHJo8DPAx9O8hrw28CXgf1JdgJHgdsAqupwkv3AYeAUsKtOXyR9F/AwcAnwVFU93bQ/BOxL\ncgQ4CexYmqcmLZxHvmity0r5YosktVJq1co1CoWZ1/mvRS73sf1q+TdP831/bpJQVWN/2vGMZkkD\n5OGpfTEUJEktQ0GS1DIUJEktQ0GS1DIUJEktQ0GS1Drnax9JK50nrA1b9//HcxbOP0cKEuBx8UPm\n/81yMhQkSS1DQZLUMhQkSS1DQZLU8ugjSSuGRyKdf44UJK0gHol0vjlS0JrkuQnS7BwpaA3zU6c0\nk6EgSWo5fSRpRZo5BeiO56VhKEhaoWZ+x7OWgtNHkqSWoSBJajl9pDXDw1Cl+RkKWmOm56ENiNXG\ns52XhtNHklYJzztZCoaCJKllKEiSWu5T0KrljuW1y/0Li2coaJXzBKe1yQMKFsvpI0lSy5GCpFXN\nqaSFMRS0qrgfQWdyKmkhDAWtQv4R0Oy8sur8DAWteI4OND4PPJjPigqFw4cPA7B+/XquvPLKnqvR\nsDg60MK5v+FMgzn6KMn2JC8nOZLkP8y2zic/+Rm2br2R3bvvXu7yNDBJ2h9p8bw0xkyDCIUkFwL/\nFdgObAE+m+RjM9d7553D/PjH/5FTp5a7wtlNTk72XcIZVnNNZwbBub6hJ8+9qDVhsu8ClsW5ftAY\n4ntvMQYRCsANwKtVdbSq3gX+CLil55rmNcQXweqvaSk/2U0u0eOsdpN9F7BMTr+2FhMQQ3zvLcZQ\nQuEq4LXO7WNNm9aw7hvTaSItr9kDYi28Joeyo3msj34f+tCv8Ld/+1dceOG2812PzrPx31AeLaK+\ndQ9ieP/rcebr+J577jlz6xW2AztDKDjJJ4E9VbW9uX0X8F5V3dtZp/9CJWkFqqqxP1ENJRTWAX8J\n3Ai8DhwEPltV3+21MElaYwYxfVRVp5L8G+AbwIXAQwaCJC2/QYwUJEnDMJSjj85qnJPa+pDkaJJv\nJ3kxycGeavj9JFNJDnXaNiQ5kOSVJM8kWT+AmvYkOdb01YtJti9zTZuSPJfkO0n+Isnnm/be+mqO\nmnrrqyQfSPJCkpeSHE7yO01736+ps9XV6+uqqeHC5t/+4+Z2r311lpoW1E+DHik0J7X9JfALwHHg\nzxjIvoYk3wO2VtVbPdbwT4AfAo9U1cebtq8A/7eqvtKE6N+pqt0913Q38E5V/eflqmNGTZcDl1fV\nS0l+Cvhz4FbgX9JTX81R023021cfrKofNfv5/jfw74Gb6fE1NUddN9JjXzV1/TtgK3BpVd3c9/vv\nLDUt6P039JHC0E9q6/UYyar6U+D7M5pvBvY2y3sZ/aHpuybosa+q6kRVvdQs/xD4LqPzYHrrqzlq\ngn776kfN4kWM9u99n55fU3PUBT32VZKrgV8Cfq9TR699dZaawgL6aeihMOST2gr4ZpJvJfn1vovp\n2FhVU83yFLCxz2I6/m2S/5PkoT6G1NOSfBT4BPACA+mrTk3PN0299VWSC5K8xKg/nquq7zCAfjpL\nXdDv6+o+4LeA9zptfffVbDUVC+inoYfCcOe24FNV9QngF4HfaKZNBqVGc4ND6MMHgWuB64E3gP/U\nRxHNNM3jwBeq6p3ufX31VVPT/2hq+iE991VVvVdV1wNXAz+X5NMz7u+ln2apa4Ie+yrJLwNvVtWL\nnOVT+HL31Rw1Laifhh4Kx4FNndubGI0WeldVbzS//xr4KqOpriGYauarSXIF8GbP9VBVb1aD0bB2\n2fsqyU8wCoR9VfVE09xrX3Vq+u/TNQ2hr5o63ga+zmhuejCvqU5d/7DnvvrHwM3NvsVHgX+aZB/9\n9tVsNT2y0H4aeih8C9ic5KNJLgJuB57suSaSfDDJpc3yTwI3AYfm3mrZPAnc2SzfCTwxx7rLonlz\nTPtVlrmvkgR4CDhcVf+lc1dvfXW2mvrsqyQfnp5aSHIJ8M+AF+n5NXW2uqb/+DaWta+q6otVtamq\nrgV2AH9SVXfQY1+dpaZfW+hrahAnr53NgE9q2wh8dfS+Zh3wB1X1zHIXkeRR4OeBDyd5Dfht4MvA\n/iQ7gaOMjmbps6a7gYkk1zMaSn8P+NfLWRPwKeBfAN9O8mLTdhf99tVsNX2R0WXj++qrK4C9SS5g\n9IFxX1U929TX22tqjroe6fl11TU9TdTr+6+je6GmryT5B4zZT4M+JFWStLyGPn0kSVpGhoIkqWUo\nSJJahoIkqWUoSJJahoIkqWUoSJJahoIkqfX/AXYOauaNcBQ7AAAAAElFTkSuQmCC\n", "text/plain": [ "<matplotlib.figure.Figure at 0x1174100d0>" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plt.hist(D.ravel(), bins=100);" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def n_occurrence_from_D(D, k=10, n_items=None):\n", " if n_items is None:\n", " n_items = len(D)\n", " sort_idx = np.argsort(D, axis=1)\n", " D_k = sort_idx[:, 1:(k+1)] # nearest neighbour is the item itself\n", " N_k = np.bincount(D_k.astype(int).ravel(), minlength=n_items)\n", " return N_k" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2.71175956476\n" ] }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEACAYAAAC08h1NAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAFDVJREFUeJzt3VGMHdd93/Hvj6KomJItmkhBkZQCsYlYiUXb2Ilop7WR\nTauySpCQepIYIAZhq3kREjsomogU0EpPrkwgiFMUemhiGxvDYsI6rkAVjkxK1kVTpBDtWEoUUSzJ\ntmy0crmyJduym6Jd7v77cIfl1Wa1u3f37nLF8/0AC505c2bumSPub+aembs3VYUkqR3rrnQHJEmr\ny+CXpMYY/JLUGINfkhpj8EtSYwx+SWrMgsGf5FCSl5K8mOTxJNcl2ZzkRJIzSY4n2TSr/dkkp5Ps\nWdnuS5KGlfme409yK/BV4I6q+j9J/gD4MvC3gW9X1eEkDwLvraqDSXYBjwN3AtuBp4GdVTWzsoch\nSVqsha743wSmgI1J1gMbgW8Ce4Hxrs04cE9X3gccqaqpqjoPnAN2j7rTkqSlmzf4q+oN4DeBv6Qf\n+N+tqhPAlqqa7JpNAlu68jZgYmAXE/Sv/CVJa8S8wZ/kR4FfA26lH+o3JPmlwTbVnyua7+8++Dch\nJGkNWb/A+p8E/qSqXgdI8iXgp4ALSW6qqgtJtgKvde1fBW4Z2P7mru4tkngykKQlqKosdx8LzfGf\nBj6Y5F1JAtwFnAKeBA50bQ4AT3TlY8D+JBuS7ABuA06+Tef9qeLhhx++4n1YKz+OhWPhWMz/Myrz\nXvFX1Z8l+T3g68AM8A3g3wLvBo4muR84D9zbtT+V5Cj9k8NF4IEaZW8lScu20FQPVXUYODyr+g36\nV/9ztf8k8Mnld02StBL85O4VNjY2dqW7sGY4Fpc5Fpc5FqM37we4VuxFE2eAJGlISahVuLkrSbrK\nGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjFvyzzCvl\n+us3A3Dddet5443XFmgtSRqVK/bXOeF1YIp1625menpq1fsgSe80o/rrnFfsih82Awa+JK025/gl\nqTEGvyQ1ZsHgT/K3kjw/8PO9JB9PsjnJiSRnkhxPsmlgm0NJziY5nWTPyh6CJGkYQ93cTbIOeBXY\nDfwq8O2qOpzkQeC9VXUwyS7gceBOYDvwNLCzqmYG9lNQ9G/ubvTmriQtwpX66sW7gHNV9QqwFxjv\n6seBe7ryPuBIVU1V1XngHP0ThSRpDRg2+PcDR7rylqqa7MqTwJauvA2YGNhmgv6VvyRpDVh08CfZ\nAPwC8O9mr6v+fNF8c0ar/2EBSdKchnmO/2eBP62qb3XLk0luqqoLSbYClz5++ypwy8B2N3d1szwC\nTDMzM02v12NsbGzIrkvS1a3X69Hr9Ua+30Xf3E3y+8AfVdV4t3wYeL2qPpXkILBp1s3d3Vy+uftj\nNfBC3tyVpOGN6ubuooI/yfXA/wB2VNX3u7rNwFHgR4DzwL1V9d1u3UPAx4CLwCeq6iuz9mfwS9KQ\nVjX4R83gl6ThXanHOSVJ73AGvyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8k\nNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxiwq+JNsSvLFJC8nOZXk\nA0k2JzmR5EyS40k2DbQ/lORsktNJ9qxc9yVJw1rsFf9vA1+uqjuAvwucBg4CJ6pqJ/BMt0ySXcB9\nwC7gbuCxJL6zkKQ1YsFATnIj8OGq+ixAVV2squ8Be4Hxrtk4cE9X3gccqaqpqjoPnAN2j7rjkqSl\nWcyV+A7gW0k+l+QbSX4nyfXAlqqa7NpMAlu68jZgYmD7CWD7yHosSVqW9Yts837gV6rqa0k+TTet\nc0lVVZKaZx9zrHsEmGZmZpper8fY2Nhi+yxJTej1evR6vZHvN1Xz5TUkuQn4z1W1o1v+EHAI+JvA\nz1TVhSRbgWer6vYkBwGq6tGu/VPAw1X13MA+q38umGLduo1MT0+N/MAk6WqThKrKcvez4FRPVV0A\nXkmys6u6C3gJeBI40NUdAJ7oyseA/Uk2JNkB3AacXG5HJUmjsZipHoBfBb6QZAPwX4GPAtcAR5Pc\nD5wH7gWoqlNJjgKngIvAA7XQ2wpJ0qpZcKpnRV7UqR5JGtqqTfVIkq4uBr8kNcbgl6TGGPyS1BiD\nX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfgl\nqTEGvyQ1xuCXpMYsKviTnE/y50meT3Kyq9uc5ESSM0mOJ9k00P5QkrNJTifZs1KdlyQNb7FX/AWM\nVdX7qmp3V3cQOFFVO4FnumWS7ALuA3YBdwOPJfGdhSStEcME8uxvdt8LjHflceCerrwPOFJVU1V1\nHjgH7EaStCYMc8X/dJKvJ/nlrm5LVU125UlgS1feBkwMbDsBbF92TyVJI7F+ke3+QVX9zyR/AziR\n5PTgyqqqJDXP9nOsewSYZmZmml6vx9jY2CK7Iklt6PV69Hq9ke83VfPl9RwbJA8DPwB+mf68/4Uk\nW4Fnq+r2JAcBqurRrv1TwMNV9dzAPqp/Lphi3bqNTE9PjehwJOnqlYSqmj3tPrQFp3qSbEzy7q58\nPbAHeBE4Bhzomh0AnujKx4D9STYk2QHcBpxcbkclSaOxmKmeLcC/T3Kp/Req6niSrwNHk9wPnAfu\nBaiqU0mOAqeAi8ADNezbCknSihl6qmckL+pUjyQNbdWmeiRJVxeDX5IaY/BLUmMMfklqjMEvSY0x\n+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINf\nkhpj8EtSYxYV/EmuSfJ8kie75c1JTiQ5k+R4kk0DbQ8lOZvkdJI9K9VxSdLSLPaK/xPAKfrfkA5w\nEDhRVTuBZ7plkuwC7gN2AXcDjyXxXYUkrSELhnKSm4GfA34XuPTt7nuB8a48DtzTlfcBR6pqqqrO\nA+eA3aPssCRpeRZzNf5bwK8DMwN1W6pqsitPAlu68jZgYqDdBLB9uZ2UJI3O+vlWJvl54LWqej7J\n2FxtqqqS1FzrLjWZu/oRYJqZmWl6vR5jY3PuXpKa1ev16PV6I99vqt4+s5N8EvgIcBH4IeA9wJeA\nO4GxqrqQZCvwbFXdnuQgQFU92m3/FPBwVT03a7/VPx9MsW7dRqanp0Z+YJJ0tUlCVWXhlvObd6qn\nqh6qqluqagewH/hqVX0EOAYc6JodAJ7oyseA/Uk2JNkB3AacXG4nJUmjM+9UzxwuvT14FDia5H7g\nPHAvQFWdSnKU/hNAF4EHar63FJKkVTfvVM+KvahTPZI0tFWZ6pEkXX0MfklqjMEvSY0x+CWpMQa/\nJDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtS\nYwx+SWqMwS9JjZk3+JP8UJLnkryQ5FSSf9XVb05yIsmZJMeTbBrY5lCSs0lOJ9mz0gcgSRrOgl+2\nnmRjVf1VkvXAfwL+ObAX+HZVHU7yIPDeqjqYZBfwOHAnsB14GthZVTOz9umXrUvSkFbty9ar6q+6\n4gbgGuA79IN/vKsfB+7pyvuAI1U1VVXngXPA7uV2UpI0OgsGf5J1SV4AJoFnq+olYEtVTXZNJoEt\nXXkbMDGw+QT9K39J0hqxfqEG3TTNjye5EfhKkp+Ztb76Uzdvv4u5qx8BppmZmabX6zE2NrbYPktS\nE3q9Hr1eb+T7XXCO/y2Nk38B/G/gnwJjVXUhyVb67wRuT3IQoKoe7do/BTxcVc/N2o9z/JI0pFWZ\n40/yw5ee2EnyLuAfA88Dx4ADXbMDwBNd+RiwP8mGJDuA24CTy+2kJGl0Fprq2QqMJ1lH/yTx+ap6\nJsnzwNEk9wPngXsBqupUkqPAKeAi8EAN85ZCkrTihprqGdmLOtUjSUNbtcc5JUlXF4Nfkhpj8EtS\nYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG\n4Jekxhj8ktQYg1+SGmPwS1JjFgz+JLckeTbJS0n+IsnHu/rNSU4kOZPkeJJNA9scSnI2yekke1by\nACRJw1nwy9aT3ATcVFUvJLkB+FPgHuCjwLer6nCSB4H3VtXBJLuAx4E7ge3A08DOqpoZ2Kdfti5J\nQ1q1L1uvqgtV9UJX/gHwMv1A3wuMd83G6Z8MAPYBR6pqqqrOA+eA3cvtqCRpNIaa409yK/A+4Dlg\nS1VNdqsmgS1deRswMbDZBP0ThSRpDVi/2IbdNM8fAp+oqu8nl99tVFX1p2/e1hzrHgGmmZmZptfr\nMTY2ttiuSFITer0evV5v5PtdcI4fIMm1wH8A/qiqPt3VnQbGqupCkq3As1V1e5KDAFX1aNfuKeDh\nqnpuYH/O8UvSkFZtjj/9S/vPAKcuhX7nGHCgKx8Anhio359kQ5IdwG3AyeV2VJI0Got5qudDwH8E\n/pzLUzaH6If5UeBHgPPAvVX13W6bh4CPARfpTw19ZdY+veKXpCGN6op/UVM9o2bwS9LwVm2qR5J0\ndTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQY\ng1+SGmPwS1JjDH5JaozBL0mNMfglqTGL+bL1zyaZTPLiQN3mJCeSnElyPMmmgXWHkpxNcjrJnpXq\nuCRpaRZzxf854O5ZdQeBE1W1E3imWybJLuA+YFe3zWNJfFchSWvIgqFcVX8MfGdW9V5gvCuPA/d0\n5X3AkaqaqqrzwDlg92i6KkkahaVejW+pqsmuPAls6crbgImBdhPA9iW+hiRpBSx7GqaqCqj5miz3\nNSRJo7N+idtNJrmpqi4k2Qq81tW/Ctwy0O7mrm4OjwDTzMxM0+v1GBsbW2JXJOnq1Ov16PV6I99v\n+hfsCzRKbgWerKq/0y0fBl6vqk8lOQhsqqqD3c3dx+nP628HngZ+rGa9SJLqvxGYYt26jUxPT43y\nmCTpqpSEqspy97PgFX+SI8BPAz+c5BXgXwKPAkeT3A+cB+4FqKpTSY4Cp4CLwAOzQ1+SdGUt6op/\n5C/qFb8kDW1UV/w+Yy9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWp\nMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjlvrViyMzM3OR5K1/XtrvbpEk/lo2jsoVD/6+waBfmQOV\npHem0eejUz2S1BiDX5IasyLBn+TuJKeTnE3y4BK2//8/kqTRGnnwJ7kG+DfA3cAu4BeT3DHcXoq3\nzmvN+TpXxQmi1+td6S6sGY7FZY7FZY7F6K3EFf9u4FxVna+qKeD3gX1L3dnsgH9r0F8+QbxTTwT+\no77MsbjMsbjMsRi9lXiqZzvwysDyBPCBpe9u9h3tGii/fbu3C//BR0WX8hipj55KWkmrceG6EsG/\nqCR8z3t+AZjhzTdXoAdv6cZbTxZ/fVDnW7eYfc9toX3NdcJYjf/hyz1RLfbEN8yxDNunpfZhMSf9\nufr9duvWwkl/Jfs0irFYyu/BWrTU4xjm39msPV7aw2K6N7SMeuCTfBB4pKru7pYPATNV9amBNu+M\n/9uStMZU1bLPBisR/OuB/wL8I+CbwEngF6vq5ZG+kCRpSUY+1VNVF5P8CvAV4BrgM4a+JK0dI7/i\nlyStbav+yd3lfrjrnSTJLUmeTfJSkr9I8vGufnOSE0nOJDmeZNPANoe6sTmdZM+V6/3KSHJNkueT\nPNktNzkWSTYl+WKSl5OcSvKBhsfiUPc78mKSx5Nc18pYJPlskskkLw7UDX3sSX6iG7+zSX57wReu\nqlX7oT/1cw64FbgWeAG4YzX7sMrHexPw4135Bvr3Pu4ADgO/0dU/CDzalXd1Y3JtN0bngHVX+jhG\nPCb/DPgCcKxbbnIsgHHgY115PXBji2PRHc9/A67rlv8AONDKWAAfBt4HvDhQN8yxX5q1OQns7spf\nBu6e73VX+4p/pB/uWuuq6kJVvdCVfwC8TP9zDnvp/+LT/feerrwPOFJVU1V1nv7/2N2r2ukVlORm\n4OeA3+Xyc2rNjUWSG4EPV9VnoX9frKq+R4NjAbwJTAEbuwdDNtJ/KKSJsaiqPwa+M6t6mGP/QJKt\nwLur6mTX7vcGtpnTagf/XB/u2r7KfbgiktxK/8z+HLClqia7VZPAlq68jf6YXHK1jc9vAb8OzAzU\ntTgWO4BvJflckm8k+Z0k19PgWFTVG8BvAn9JP/C/W1UnaHAsBgx77LPrX2WBMVnt4G/yTnKSG4A/\nBD5RVd8fXFf992bzjctVMWZJfh54raqe520+ldLKWNCf2nk/8FhVvR/4X8DBwQatjEWSHwV+jf7U\nxTbghiS/NNimlbGYyyKOfUlWO/hfBW4ZWL6Ft56prjpJrqUf+p+vqie66skkN3XrtwKvdfWzx+fm\nru5q8PeBvUn+O3AE+IdJPk+bYzEBTFTV17rlL9I/EVxocCx+EviTqnq9qi4CXwJ+ijbH4pJhficm\nuvqbZ9XPOyarHfxfB25LcmuSDcB9wLFV7sOqSf9z2Z8BTlXVpwdWHaN/A4vuv08M1O9PsiHJDuA2\n+jdt3vGq6qGquqWqdgD7ga9W1UdocywuAK8k2dlV3QW8BDxJY2MBnAY+mORd3e/LXcAp2hyLS4b6\nnej+Pb3ZPRkW4CMD28ztCtzF/ln6T7ecAw5d6bvqK3ysH6I/n/0C8Hz3czewGXgaOAMcBzYNbPNQ\nNzangX9ypY9hhcblp7n8VE+TYwH8PeBrwJ/Rv8q9seGx+A36J74X6d/MvLaVsaD/7vebwP+lf//z\no0s5duAnuvE7B/zrhV7XD3BJUmP86kVJaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtS\nY/4f5A0BHDAleMkAAAAASUVORK5CYII=\n", "text/plain": [ "<matplotlib.figure.Figure at 0x1e926a590>" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "N_k = n_occurrence_from_D(D, k=100)\n", "print skew(N_k)\n", "plt.hist(N_k, bins=100);" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": true }, "outputs": [], "source": [ "N_k" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.12" } }, "nbformat": 4, "nbformat_minor": 0 }