annotate notebooks/test_hubness.ipynb @ 8:0f3eba42b425 branch-tests

added notebooks and utils
author Maria Panteli <m.x.panteli@gmail.com>
date Mon, 11 Sep 2017 18:23:14 +0100
parents
children 8e897e82af51
rev   line source
m@8 1 {
m@8 2 "cells": [
m@8 3 {
m@8 4 "cell_type": "code",
m@8 5 "execution_count": 13,
m@8 6 "metadata": {
m@8 7 "collapsed": false
m@8 8 },
m@8 9 "outputs": [
m@8 10 {
m@8 11 "name": "stdout",
m@8 12 "output_type": "stream",
m@8 13 "text": [
m@8 14 "The autoreload extension is already loaded. To reload it, use:\n",
m@8 15 " %reload_ext autoreload\n"
m@8 16 ]
m@8 17 }
m@8 18 ],
m@8 19 "source": [
m@8 20 "import numpy as np\n",
m@8 21 "import pickle\n",
m@8 22 "from scipy.stats import pearsonr\n",
m@8 23 "from scipy.stats import skew\n",
m@8 24 "import sys\n",
m@8 25 "from sklearn.metrics.pairwise import pairwise_distances\n",
m@8 26 "%matplotlib inline\n",
m@8 27 "import matplotlib.pyplot as plt\n",
m@8 28 "\n",
m@8 29 "%load_ext autoreload\n",
m@8 30 "%autoreload 2\n",
m@8 31 "\n",
m@8 32 "sys.path.append('../')\n",
m@8 33 "import scripts.results as results\n",
m@8 34 "import scripts.utils_spatial as utils_spatial"
m@8 35 ]
m@8 36 },
m@8 37 {
m@8 38 "cell_type": "code",
m@8 39 "execution_count": 2,
m@8 40 "metadata": {
m@8 41 "collapsed": false
m@8 42 },
m@8 43 "outputs": [
m@8 44 {
m@8 45 "name": "stdout",
m@8 46 "output_type": "stream",
m@8 47 "text": [
m@8 48 "WARNING: there are 21 disconnected observations\n",
m@8 49 "Island ids: [3, 6, 26, 35, 39, 45, 52, 61, 62, 66, 77, 85, 94, 97, 98, 102, 103, 107, 110, 120, 121]\n",
m@8 50 "Antigua and Barbuda\n",
m@8 51 "Australia\n",
m@8 52 "Cuba\n",
m@8 53 "Fiji\n",
m@8 54 "French Polynesia\n",
m@8 55 "Grenada\n",
m@8 56 "Iceland\n",
m@8 57 "Jamaica\n",
m@8 58 "Japan\n",
m@8 59 "Kiribati\n",
m@8 60 "Malta\n",
m@8 61 "New Zealand\n",
m@8 62 "Philippines\n",
m@8 63 "Puerto Rico\n",
m@8 64 "Republic of Serbia\n",
m@8 65 "Saint Lucia\n",
m@8 66 "Samoa\n",
m@8 67 "Solomon Islands\n",
m@8 68 "South Korea\n",
m@8 69 "The Bahamas\n",
m@8 70 "Trinidad and Tobago\n"
m@8 71 ]
m@8 72 }
m@8 73 ],
m@8 74 "source": [
m@8 75 "X_list, Y, Yaudio = pickle.load(open('../data/lda_data_melodia_8.pickle','rb'))\n",
m@8 76 "ddf = results.load_metadata(Yaudio, metadata_file='../data/metadata.csv')\n",
m@8 77 "w, data_countries = utils_spatial.get_neighbors_for_countries_in_dataset(Y)\n",
m@8 78 "w_dict = utils_spatial.from_weights_to_dict(w, data_countries)\n",
m@8 79 "Xrhy, Xmel, Xmfc, Xchr = X_list\n",
m@8 80 "X = np.concatenate((Xrhy, Xmel, Xmfc, Xchr), axis=1)\n",
m@8 81 "\n",
m@8 82 "# global outliers\n",
m@8 83 "df_global, threshold, MD = results.get_outliers_df(X, Y, chi2thr=0.999)"
m@8 84 ]
m@8 85 },
m@8 86 {
m@8 87 "cell_type": "code",
m@8 88 "execution_count": 3,
m@8 89 "metadata": {
m@8 90 "collapsed": false
m@8 91 },
m@8 92 "outputs": [
m@8 93 {
m@8 94 "data": {
m@8 95 "text/plain": [
m@8 96 "(8200, 380)"
m@8 97 ]
m@8 98 },
m@8 99 "execution_count": 3,
m@8 100 "metadata": {},
m@8 101 "output_type": "execute_result"
m@8 102 }
m@8 103 ],
m@8 104 "source": [
m@8 105 "X.shape"
m@8 106 ]
m@8 107 },
m@8 108 {
m@8 109 "cell_type": "code",
m@8 110 "execution_count": null,
m@8 111 "metadata": {
m@8 112 "collapsed": false
m@8 113 },
m@8 114 "outputs": [],
m@8 115 "source": [
m@8 116 "D = pairwise_distances(X, metric='mahalanobis')"
m@8 117 ]
m@8 118 },
m@8 119 {
m@8 120 "cell_type": "code",
m@8 121 "execution_count": 6,
m@8 122 "metadata": {
m@8 123 "collapsed": false
m@8 124 },
m@8 125 "outputs": [
m@8 126 {
m@8 127 "data": {
m@8 128 "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEACAYAAABcXmojAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAFMBJREFUeJzt3X+s3fV93/HnC1wIaQmelcr8cghSjRZP2Yi84Wjp2pvR\nMbdqgUoRONIo2qxqirsl6rRqJn8Usz/SEGljVBP8UzqM11KssRCqIIJDuVr3B7ipYHHjUIwUV9jg\nS2dSQhRlxeK9P873fv3l+vrec6+v7/d7730+pKv7PZ/z/R6/z8fn3Nf5fL4/TqoKSZIALui7AEnS\ncBgKkqSWoSBJahkKkqSWoSBJahkKkqTWWKGQ5GiSbyd5McnBpm1DkgNJXknyTJL1nfXvSnIkyctJ\nbuq0b01yqLnv/k77xUkea9qfT3LNUj5JSdJ4xh0pFDBRVZ+oqhuatt3Agaq6Dni2uU2SLcDtwBZg\nO/BAkjTbPAjsrKrNwOYk25v2ncDJpv0+4N5zfF6SpEVYyPRRZty+GdjbLO8Fbm2WbwEerap3q+oo\n8CqwLckVwKVVdbBZ75HONt3Hehy4cQF1SZKWyEJGCt9M8q0kv960bayqqWZ5CtjYLF8JHOtsewy4\napb24007ze/XAKrqFPB2kg0LeSKSpHO3bsz1PlVVbyT5aeBAkpe7d1ZVJfF6GZK0wo0VClX1RvP7\nr5N8FbgBmEpyeVWdaKaG3mxWPw5s6mx+NaMRwvFmeWb79DYfAV5Psg64rKre6tZg6EjS4lTVzOn/\ns5p3+ijJB5Nc2iz/JHATcAh4ErizWe1O4Ilm+UlgR5KLklwLbAYOVtUJ4AdJtjU7nu8AvtbZZvqx\nPsNox/VsT2xQP3fffXfvNVjT6qrLmqxpqX8WapyRwkbgq80BROuAP6iqZ5J8C9ifZCdwFLit+cN9\nOMl+4DBwCthVpyvbBTwMXAI8VVVPN+0PAfuSHAFOAjsW/EwkSeds3lCoqu8B18/S/hbwC2fZ5kvA\nl2Zp/3Pg47O0/z+aUJEk9cczms/BxMRE3yWcwZrGN8S6rGk81nT+ZDFzTn1IUiulVkkaiiTUUu5o\nliStHYaCJKllKEiSWuOe0SxpQE5fY3LE/W1aKo4UpBWrmh9p6ThSkFYBRw5aKoaCtELM/MP/ft0Q\nyPvWNSC0EE4fSSvKuFNGTi1pcQwFSVLL6SNplXMqSQvhSEFa9ZxK0vgMBUlSy+kjaQ1xKknzMRSk\nAZv7MNTFmA6CpX5crRZOH0mD5z4BLR9DQZLUMhQkSS1DQZLUMhQkSS1DQZLUMhQkSS1DQZLU8uQ1\naUCW/mQ1aWEMBWlw3v+FOdJycvpIktRypCCtUV4cT7NxpCCtWV5TSWcyFCRJLUNBktQyFCRJLUNB\nktQyFCRJLUNBktQyFCRJrbFCIcmFSV5M8sfN7Q1JDiR5JckzSdZ31r0ryZEkLye5qdO+Ncmh5r77\nO+0XJ3msaX8+yTVL+QSloUvS/kh9G3ek8AXgMKfPdNkNHKiq64Bnm9sk2QLcDmwBtgMP5PQr/UFg\nZ1VtBjYn2d607wRONu33Afee21OSVqJ+TyTrBpPhtLbNGwpJrgZ+Cfg9Tl+d62Zgb7O8F7i1Wb4F\neLSq3q2qo8CrwLYkVwCXVtXBZr1HOtt0H+tx4MZFPxtJi1T0HUwahnFGCvcBvwW812nbWFVTzfIU\nsLFZvhI41lnvGHDVLO3Hm3aa368BVNUp4O0kGxbwHCRJS2TOC+Il+WXgzap6McnEbOtUVSVZlo8X\ne/bsaZcnJiaYmJi1JElasyYnJ5mcnFz09pnr6ohJvgTcAZwCPgB8CPifwD8CJqrqRDM19FxV/d0k\nuwGq6svN9k8DdwN/1azzsab9s8DPVdXnmnX2VNXzSdYBb1TVT89SS3klR61Gozn86dd2d3nm7XGW\nF7PNmdv7Xls9klBVY+8omnP6qKq+WFWbqupaYAfwJ1V1B/AkcGez2p3AE83yk8COJBcluRbYDBys\nqhPAD5Jsa3Y83wF8rbPN9GN9htGOa0lSDxb6fQrTHx++DOxPshM4CtwGUFWHk+xndKTSKWBX5+P9\nLuBh4BLgqap6uml/CNiX5AhwklH4SJJ6MOf00ZA4faTVyukjnU9LOn0kSVpbDAVJUstQkCS1DAVJ\nUstQkCS1DAVJUstQkCS1DAVJUmuhZzRLWgJD/86Cbn2eyLa2OFKQejPk7y8Ycm06nwwFSVLLUJAk\ntQwFSVLLUJAktQwFSVLLUJAktQwFSVLLUJAktQwFSVLLy1xImpOXvFhbHClImoeXvFhLDAVJUstQ\nkCS1DAVJUssdzdIyGfp3KEjgSEFaZu601bAZCpKklqEgSWoZCpKklqEgSWoZCpKklqEgSWoZCpKk\nlqEgSWoZCpKk1pyhkOQDSV5I8lKSw0l+p2nfkORAkleSPJNkfWebu5IcSfJykps67VuTHGruu7/T\nfnGSx5r255Nccz6eqCRpfnOGQlX9GPh0VV0P/H3g00l+FtgNHKiq64Bnm9sk2QLcDmwBtgMP5PQF\nXx4EdlbVZmBzku1N+07gZNN+H3DvUj5BSUsnSfuj1Wne6aOq+lGzeBFwIfB94GZgb9O+F7i1Wb4F\neLSq3q2qo8CrwLYkVwCXVtXBZr1HOtt0H+tx4MZFPxtJ55nXblrt5g2FJBckeQmYAp6rqu8AG6tq\nqlllCtjYLF8JHOtsfgy4apb24007ze/XAKrqFPB2kg2LezqSpHMx76Wzq+o94PoklwHfSPLpGfdX\nEj86SNIqMPb3KVTV20m+DmwFppJcXlUnmqmhN5vVjgObOptdzWiEcLxZntk+vc1HgNeTrAMuq6q3\nZqthz5497fLExAQTExPjli9Ja8Lk5CSTk5OL3j5VZ/+Qn+TDwKmq+psklwDfAO4B/jmjncP3JtkN\nrK+q3c2O5j8EbmA0LfRN4Gea0cQLwOeBg8DXgd+tqqeT7AI+XlWfS7IDuLWqdsxSS81VqzREZ+6Q\nnX4NZ4zlcdc7120Wt73vx5UhCVU19pEB840UrgD2JrmA0f6HfVX1bJIXgf1JdgJHgdsAqupwkv3A\nYeAUsKvzl3wX8DBwCfBUVT3dtD8E7EtyBDgJnBEI0srW/aMqDducI4UhcaSglWg0Ujj/n9odKehs\nFjpS8IxmSVLLUJAktQwFSVLLUJAktQwFSVLLUJAktcY+o1mSumaemOchqquDoSBpkWae26DVwOkj\nSVLLUJAktQwFSVLLUJAktQwFSVLLUJAktTwkVVpCZ36pjrSyGArSkvP4fa1cTh9JklqGgiSpZShI\nklqGgiSpZShIklqGgiSp5SGpkpZE9xwNv1th5XKkIGmJFO8/R0MrkaEgSWoZCpKklqEgSWoZCpKk\nlqEgSWoZCpKklqEgSWoZCpKklqEgSWoZCpKklqEgSWp5QTzpHHUvBCetdPOOFJJsSvJcku8k+Ysk\nn2/aNyQ5kOSVJM8kWd/Z5q4kR5K8nOSmTvvWJIea++7vtF+c5LGm/fkk1yz1E5XOLy8Gp9VhnOmj\nd4HfrKq/B3wS+I0kHwN2Aweq6jrg2eY2SbYAtwNbgO3AAzn9UepBYGdVbQY2J9netO8ETjbt9wH3\nLsmzk9SLJO2PVpZ5Q6GqTlTVS83yD4HvAlcBNwN7m9X2Arc2y7cAj1bVu1V1FHgV2JbkCuDSqjrY\nrPdIZ5vuYz0O3HguT0pS3xw5rVQL2tGc5KPAJ4AXgI1VNdXcNQVsbJavBI51NjvGKERmth9v2ml+\nvwZQVaeAt5NsWEhtkqRzN3YoJPkpRp/iv1BV73Tvq9HXLPmxQJJWuLGOPkryE4wCYV9VPdE0TyW5\nvKpONFNDbzbtx4FNnc2vZjRCON4sz2yf3uYjwOtJ1gGXVdVbM+vYs2dPuzwxMcHExMQ45UvSmjE5\nOcnk5OSit89836Xa7CTey2hH8G922r/StN2bZDewvqp2Nzua/xC4gdG00DeBn6mqSvIC8HngIPB1\n4Her6ukku4CPV9XnkuwAbq2qHTPqKL/3VUM0eotMvza7yzNvL3R5ubY5v/+m79t+JaGqxt7jP04o\n/Czwv4Bvc/p/+i5Gf9j3M/qEfxS4rar+ptnmi8C/Ak4xmm76RtO+FXgYuAR4qqqmD2+9GNjHaH/F\nSWBHs5O6W4ehoEEyFOZez/dtv5Y8FIbCUNBQGQpzr+f7tl8LDQUvcyFJahkKkqSWoSBJahkKkqSW\nV0mVFsFr+oyv21fudB4+RwrSonki/3jsp5XEUJAktQwFSVLLUJAktQwFSVLLUJAktQwFSVLLUJAk\ntQwFSVLLUJAktQwFSVLLax9JWjZeB2n4HClIWkZeB2noHClIY/LKqFoLHClIC+InXa1uhoIkqWUo\nSJJahoIkqWUoSJJahoIkqWUoSJJahoIkqeXJa5J6MfNkQC97MQyGgqSedEPAs8WHwukjSVLLUJAk\ntZw+kubgRfC01jhSkOblRfC0dhgKkqSWoSBJahkKkqTWvKGQ5PeTTCU51GnbkORAkleSPJNkfee+\nu5IcSfJykps67VuTHGruu7/TfnGSx5r255Ncs5RPUJI0vnFGCv8N2D6jbTdwoKquA55tbpNkC3A7\nsKXZ5oGcPnzjQWBnVW0GNieZfsydwMmm/T7g3nN4PpJWqCTtj/ozbyhU1Z8C35/RfDOwt1neC9za\nLN8CPFpV71bVUeBVYFuSK4BLq+pgs94jnW26j/U4cOMinoekFc+jvIZgsfsUNlbVVLM8BWxslq8E\njnXWOwZcNUv78aad5vdrAFV1Cng7yYZF1iVJOgfnfPJaVVWSZYn3PXv2tMsTExNMTEwsxz8rSSvG\n5OQkk5OTi95+saEwleTyqjrRTA292bQfBzZ11rua0QjheLM8s316m48ArydZB1xWVW/N9o92Q0GS\ndKaZH5jvueeeBW2/2OmjJ4E7m+U7gSc67TuSXJTkWmAzcLCqTgA/SLKt2fF8B/C1WR7rM4x2XEuS\nejDvSCHJo8DPAx9O8hrw28CXgf1JdgJHgdsAqupwkv3AYeAUsKtOXyR9F/AwcAnwVFU93bQ/BOxL\ncgQ4CexYmqcmLZxHvmity0r5YosktVJq1co1CoWZ1/mvRS73sf1q+TdP831/bpJQVWN/2vGMZkkD\n5OGpfTEUJEktQ0GS1DIUJEktQ0GS1DIUJEktQ0GS1Drnax9JK50nrA1b9//HcxbOP0cKEuBx8UPm\n/81yMhQkSS1DQZLUMhQkSS1DQZLU8ugjSSuGRyKdf44UJK0gHol0vjlS0JrkuQnS7BwpaA3zU6c0\nk6EgSWo5fSRpRZo5BeiO56VhKEhaoWZ+x7OWgtNHkqSWoSBJajl9pDXDw1Cl+RkKWmOm56ENiNXG\ns52XhtNHklYJzztZCoaCJKllKEiSWu5T0KrljuW1y/0Li2coaJXzBKe1yQMKFsvpI0lSy5GCpFXN\nqaSFMRS0qrgfQWdyKmkhDAWtQv4R0Oy8sur8DAWteI4OND4PPJjPigqFw4cPA7B+/XquvPLKnqvR\nsDg60MK5v+FMgzn6KMn2JC8nOZLkP8y2zic/+Rm2br2R3bvvXu7yNDBJ2h9p8bw0xkyDCIUkFwL/\nFdgObAE+m+RjM9d7553D/PjH/5FTp5a7wtlNTk72XcIZVnNNZwbBub6hJ8+9qDVhsu8ClsW5ftAY\n4ntvMQYRCsANwKtVdbSq3gX+CLil55rmNcQXweqvaSk/2U0u0eOsdpN9F7BMTr+2FhMQQ3zvLcZQ\nQuEq4LXO7WNNm9aw7hvTaSItr9kDYi28Joeyo3msj34f+tCv8Ld/+1dceOG2812PzrPx31AeLaK+\ndQ9ieP/rcebr+J577jlz6xW2AztDKDjJJ4E9VbW9uX0X8F5V3dtZp/9CJWkFqqqxP1ENJRTWAX8J\n3Ai8DhwEPltV3+21MElaYwYxfVRVp5L8G+AbwIXAQwaCJC2/QYwUJEnDMJSjj85qnJPa+pDkaJJv\nJ3kxycGeavj9JFNJDnXaNiQ5kOSVJM8kWT+AmvYkOdb01YtJti9zTZuSPJfkO0n+Isnnm/be+mqO\nmnrrqyQfSPJCkpeSHE7yO01736+ps9XV6+uqqeHC5t/+4+Z2r311lpoW1E+DHik0J7X9JfALwHHg\nzxjIvoYk3wO2VtVbPdbwT4AfAo9U1cebtq8A/7eqvtKE6N+pqt0913Q38E5V/eflqmNGTZcDl1fV\nS0l+Cvhz4FbgX9JTX81R023021cfrKofNfv5/jfw74Gb6fE1NUddN9JjXzV1/TtgK3BpVd3c9/vv\nLDUt6P039JHC0E9q6/UYyar6U+D7M5pvBvY2y3sZ/aHpuybosa+q6kRVvdQs/xD4LqPzYHrrqzlq\ngn776kfN4kWM9u99n55fU3PUBT32VZKrgV8Cfq9TR699dZaawgL6aeihMOST2gr4ZpJvJfn1vovp\n2FhVU83yFLCxz2I6/m2S/5PkoT6G1NOSfBT4BPACA+mrTk3PN0299VWSC5K8xKg/nquq7zCAfjpL\nXdDv6+o+4LeA9zptfffVbDUVC+inoYfCcOe24FNV9QngF4HfaKZNBqVGc4ND6MMHgWuB64E3gP/U\nRxHNNM3jwBeq6p3ufX31VVPT/2hq+iE991VVvVdV1wNXAz+X5NMz7u+ln2apa4Ie+yrJLwNvVtWL\nnOVT+HL31Rw1Laifhh4Kx4FNndubGI0WeldVbzS//xr4KqOpriGYauarSXIF8GbP9VBVb1aD0bB2\n2fsqyU8wCoR9VfVE09xrX3Vq+u/TNQ2hr5o63ga+zmhuejCvqU5d/7DnvvrHwM3NvsVHgX+aZB/9\n9tVsNT2y0H4aeih8C9ic5KNJLgJuB57suSaSfDDJpc3yTwI3AYfm3mrZPAnc2SzfCTwxx7rLonlz\nTPtVlrmvkgR4CDhcVf+lc1dvfXW2mvrsqyQfnp5aSHIJ8M+AF+n5NXW2uqb/+DaWta+q6otVtamq\nrgV2AH9SVXfQY1+dpaZfW+hrahAnr53NgE9q2wh8dfS+Zh3wB1X1zHIXkeRR4OeBDyd5Dfht4MvA\n/iQ7gaOMjmbps6a7gYkk1zMaSn8P+NfLWRPwKeBfAN9O8mLTdhf99tVsNX2R0WXj++qrK4C9SS5g\n9IFxX1U929TX22tqjroe6fl11TU9TdTr+6+je6GmryT5B4zZT4M+JFWStLyGPn0kSVpGhoIkqWUo\nSJJahoIkqWUoSJJahoIkqWUoSJJahoIkqfX/AXYOauaNcBQ7AAAAAElFTkSuQmCC\n",
m@8 129 "text/plain": [
m@8 130 "<matplotlib.figure.Figure at 0x1174100d0>"
m@8 131 ]
m@8 132 },
m@8 133 "metadata": {},
m@8 134 "output_type": "display_data"
m@8 135 }
m@8 136 ],
m@8 137 "source": [
m@8 138 "plt.hist(D.ravel(), bins=100);"
m@8 139 ]
m@8 140 },
m@8 141 {
m@8 142 "cell_type": "code",
m@8 143 "execution_count": 16,
m@8 144 "metadata": {
m@8 145 "collapsed": true
m@8 146 },
m@8 147 "outputs": [],
m@8 148 "source": [
m@8 149 "def n_occurrence_from_D(D, k=10, n_items=None):\n",
m@8 150 " if n_items is None:\n",
m@8 151 " n_items = len(D)\n",
m@8 152 " sort_idx = np.argsort(D, axis=1)\n",
m@8 153 " D_k = sort_idx[:, 1:(k+1)] # nearest neighbour is the item itself\n",
m@8 154 " N_k = np.bincount(D_k.astype(int).ravel(), minlength=n_items)\n",
m@8 155 " return N_k"
m@8 156 ]
m@8 157 },
m@8 158 {
m@8 159 "cell_type": "code",
m@8 160 "execution_count": 18,
m@8 161 "metadata": {
m@8 162 "collapsed": false
m@8 163 },
m@8 164 "outputs": [
m@8 165 {
m@8 166 "name": "stdout",
m@8 167 "output_type": "stream",
m@8 168 "text": [
m@8 169 "2.71175956476\n"
m@8 170 ]
m@8 171 },
m@8 172 {
m@8 173 "data": {
m@8 174 "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEACAYAAAC08h1NAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAFDVJREFUeJzt3VGMHdd93/Hvj6KomJItmkhBkZQCsYlYiUXb2Ilop7WR\nTauySpCQepIYIAZhq3kREjsomogU0EpPrkwgiFMUemhiGxvDYsI6rkAVjkxK1kVTpBDtWEoUUSzJ\ntmy0crmyJduym6Jd7v77cIfl1Wa1u3f37nLF8/0AC505c2bumSPub+aembs3VYUkqR3rrnQHJEmr\ny+CXpMYY/JLUGINfkhpj8EtSYwx+SWrMgsGf5FCSl5K8mOTxJNcl2ZzkRJIzSY4n2TSr/dkkp5Ps\nWdnuS5KGlfme409yK/BV4I6q+j9J/gD4MvC3gW9X1eEkDwLvraqDSXYBjwN3AtuBp4GdVTWzsoch\nSVqsha743wSmgI1J1gMbgW8Ce4Hxrs04cE9X3gccqaqpqjoPnAN2j7rTkqSlmzf4q+oN4DeBv6Qf\n+N+tqhPAlqqa7JpNAlu68jZgYmAXE/Sv/CVJa8S8wZ/kR4FfA26lH+o3JPmlwTbVnyua7+8++Dch\nJGkNWb/A+p8E/qSqXgdI8iXgp4ALSW6qqgtJtgKvde1fBW4Z2P7mru4tkngykKQlqKosdx8LzfGf\nBj6Y5F1JAtwFnAKeBA50bQ4AT3TlY8D+JBuS7ABuA06+Tef9qeLhhx++4n1YKz+OhWPhWMz/Myrz\nXvFX1Z8l+T3g68AM8A3g3wLvBo4muR84D9zbtT+V5Cj9k8NF4IEaZW8lScu20FQPVXUYODyr+g36\nV/9ztf8k8Mnld02StBL85O4VNjY2dqW7sGY4Fpc5Fpc5FqM37we4VuxFE2eAJGlISahVuLkrSbrK\nGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjFvyzzCvl\n+us3A3Dddet5443XFmgtSRqVK/bXOeF1YIp1625menpq1fsgSe80o/rrnFfsih82Awa+JK025/gl\nqTEGvyQ1ZsHgT/K3kjw/8PO9JB9PsjnJiSRnkhxPsmlgm0NJziY5nWTPyh6CJGkYQ93cTbIOeBXY\nDfwq8O2qOpzkQeC9VXUwyS7gceBOYDvwNLCzqmYG9lNQ9G/ubvTmriQtwpX66sW7gHNV9QqwFxjv\n6seBe7ryPuBIVU1V1XngHP0ThSRpDRg2+PcDR7rylqqa7MqTwJauvA2YGNhmgv6VvyRpDVh08CfZ\nAPwC8O9mr6v+fNF8c0ar/2EBSdKchnmO/2eBP62qb3XLk0luqqoLSbYClz5++ypwy8B2N3d1szwC\nTDMzM02v12NsbGzIrkvS1a3X69Hr9Ua+30Xf3E3y+8AfVdV4t3wYeL2qPpXkILBp1s3d3Vy+uftj\nNfBC3tyVpOGN6ubuooI/yfXA/wB2VNX3u7rNwFHgR4DzwL1V9d1u3UPAx4CLwCeq6iuz9mfwS9KQ\nVjX4R83gl6ThXanHOSVJ73AGvyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8k\nNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxiwq+JNsSvLFJC8nOZXk\nA0k2JzmR5EyS40k2DbQ/lORsktNJ9qxc9yVJw1rsFf9vA1+uqjuAvwucBg4CJ6pqJ/BMt0ySXcB9\nwC7gbuCxJL6zkKQ1YsFATnIj8OGq+ixAVV2squ8Be4Hxrtk4cE9X3gccqaqpqjoPnAN2j7rjkqSl\nWcyV+A7gW0k+l+QbSX4nyfXAlqqa7NpMAlu68jZgYmD7CWD7yHosSVqW9Yts837gV6rqa0k+TTet\nc0lVVZKaZx9zrHsEmGZmZpper8fY2Nhi+yxJTej1evR6vZHvN1Xz5TUkuQn4z1W1o1v+EHAI+JvA\nz1TVhSRbgWer6vYkBwGq6tGu/VPAw1X13MA+q38umGLduo1MT0+N/MAk6WqThKrKcvez4FRPVV0A\nXkmys6u6C3gJeBI40NUdAJ7oyseA/Uk2JNkB3AacXG5HJUmjsZipHoBfBb6QZAPwX4GPAtcAR5Pc\nD5wH7gWoqlNJjgKngIvAA7XQ2wpJ0qpZcKpnRV7UqR5JGtqqTfVIkq4uBr8kNcbgl6TGGPyS1BiD\nX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfgl\nqTEGvyQ1xuCXpMYsKviTnE/y50meT3Kyq9uc5ESSM0mOJ9k00P5QkrNJTifZs1KdlyQNb7FX/AWM\nVdX7qmp3V3cQOFFVO4FnumWS7ALuA3YBdwOPJfGdhSStEcME8uxvdt8LjHflceCerrwPOFJVU1V1\nHjgH7EaStCYMc8X/dJKvJ/nlrm5LVU125UlgS1feBkwMbDsBbF92TyVJI7F+ke3+QVX9zyR/AziR\n5PTgyqqqJDXP9nOsewSYZmZmml6vx9jY2CK7Iklt6PV69Hq9ke83VfPl9RwbJA8DPwB+mf68/4Uk\nW4Fnq+r2JAcBqurRrv1TwMNV9dzAPqp/Lphi3bqNTE9PjehwJOnqlYSqmj3tPrQFp3qSbEzy7q58\nPbAHeBE4Bhzomh0AnujKx4D9STYk2QHcBpxcbkclSaOxmKmeLcC/T3Kp/Req6niSrwNHk9wPnAfu\nBaiqU0mOAqeAi8ADNezbCknSihl6qmckL+pUjyQNbdWmeiRJVxeDX5IaY/BLUmMMfklqjMEvSY0x\n+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINf\nkhpj8EtSYxYV/EmuSfJ8kie75c1JTiQ5k+R4kk0DbQ8lOZvkdJI9K9VxSdLSLPaK/xPAKfrfkA5w\nEDhRVTuBZ7plkuwC7gN2AXcDjyXxXYUkrSELhnKSm4GfA34XuPTt7nuB8a48DtzTlfcBR6pqqqrO\nA+eA3aPssCRpeRZzNf5bwK8DMwN1W6pqsitPAlu68jZgYqDdBLB9uZ2UJI3O+vlWJvl54LWqej7J\n2FxtqqqS1FzrLjWZu/oRYJqZmWl6vR5jY3PuXpKa1ev16PV6I99vqt4+s5N8EvgIcBH4IeA9wJeA\nO4GxqrqQZCvwbFXdnuQgQFU92m3/FPBwVT03a7/VPx9MsW7dRqanp0Z+YJJ0tUlCVWXhlvObd6qn\nqh6qqluqagewH/hqVX0EOAYc6JodAJ7oyseA/Uk2JNkB3AacXG4nJUmjM+9UzxwuvT14FDia5H7g\nPHAvQFWdSnKU/hNAF4EHar63FJKkVTfvVM+KvahTPZI0tFWZ6pEkXX0MfklqjMEvSY0x+CWpMQa/\nJDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtS\nYwx+SWqMwS9JjZk3+JP8UJLnkryQ5FSSf9XVb05yIsmZJMeTbBrY5lCSs0lOJ9mz0gcgSRrOgl+2\nnmRjVf1VkvXAfwL+ObAX+HZVHU7yIPDeqjqYZBfwOHAnsB14GthZVTOz9umXrUvSkFbty9ar6q+6\n4gbgGuA79IN/vKsfB+7pyvuAI1U1VVXngXPA7uV2UpI0OgsGf5J1SV4AJoFnq+olYEtVTXZNJoEt\nXXkbMDGw+QT9K39J0hqxfqEG3TTNjye5EfhKkp+Ztb76Uzdvv4u5qx8BppmZmabX6zE2NrbYPktS\nE3q9Hr1eb+T7XXCO/y2Nk38B/G/gnwJjVXUhyVb67wRuT3IQoKoe7do/BTxcVc/N2o9z/JI0pFWZ\n40/yw5ee2EnyLuAfA88Dx4ADXbMDwBNd+RiwP8mGJDuA24CTy+2kJGl0Fprq2QqMJ1lH/yTx+ap6\nJsnzwNEk9wPngXsBqupUkqPAKeAi8EAN85ZCkrTihprqGdmLOtUjSUNbtcc5JUlXF4Nfkhpj8EtS\nYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG\n4Jekxhj8ktQYg1+SGmPwS1JjFgz+JLckeTbJS0n+IsnHu/rNSU4kOZPkeJJNA9scSnI2yekke1by\nACRJw1nwy9aT3ATcVFUvJLkB+FPgHuCjwLer6nCSB4H3VtXBJLuAx4E7ge3A08DOqpoZ2Kdfti5J\nQ1q1L1uvqgtV9UJX/gHwMv1A3wuMd83G6Z8MAPYBR6pqqqrOA+eA3cvtqCRpNIaa409yK/A+4Dlg\nS1VNdqsmgS1deRswMbDZBP0ThSRpDVi/2IbdNM8fAp+oqu8nl99tVFX1p2/e1hzrHgGmmZmZptfr\nMTY2ttiuSFITer0evV5v5PtdcI4fIMm1wH8A/qiqPt3VnQbGqupCkq3As1V1e5KDAFX1aNfuKeDh\nqnpuYH/O8UvSkFZtjj/9S/vPAKcuhX7nGHCgKx8Anhio359kQ5IdwG3AyeV2VJI0Got5qudDwH8E\n/pzLUzaH6If5UeBHgPPAvVX13W6bh4CPARfpTw19ZdY+veKXpCGN6op/UVM9o2bwS9LwVm2qR5J0\ndTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQY\ng1+SGmPwS1JjDH5JaozBL0mNMfglqTGL+bL1zyaZTPLiQN3mJCeSnElyPMmmgXWHkpxNcjrJnpXq\nuCRpaRZzxf854O5ZdQeBE1W1E3imWybJLuA+YFe3zWNJfFchSWvIgqFcVX8MfGdW9V5gvCuPA/d0\n5X3AkaqaqqrzwDlg92i6KkkahaVejW+pqsmuPAls6crbgImBdhPA9iW+hiRpBSx7GqaqCqj5miz3\nNSRJo7N+idtNJrmpqi4k2Qq81tW/Ctwy0O7mrm4OjwDTzMxM0+v1GBsbW2JXJOnq1Ov16PV6I99v\n+hfsCzRKbgWerKq/0y0fBl6vqk8lOQhsqqqD3c3dx+nP628HngZ+rGa9SJLqvxGYYt26jUxPT43y\nmCTpqpSEqspy97PgFX+SI8BPAz+c5BXgXwKPAkeT3A+cB+4FqKpTSY4Cp4CLwAOzQ1+SdGUt6op/\n5C/qFb8kDW1UV/w+Yy9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWp\nMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjlvrViyMzM3OR5K1/XtrvbpEk/lo2jsoVD/6+waBfmQOV\npHem0eejUz2S1BiDX5IasyLBn+TuJKeTnE3y4BK2//8/kqTRGnnwJ7kG+DfA3cAu4BeT3DHcXoq3\nzmvN+TpXxQmi1+td6S6sGY7FZY7FZY7F6K3EFf9u4FxVna+qKeD3gX1L3dnsgH9r0F8+QbxTTwT+\no77MsbjMsbjMsRi9lXiqZzvwysDyBPCBpe9u9h3tGii/fbu3C//BR0WX8hipj55KWkmrceG6EsG/\nqCR8z3t+AZjhzTdXoAdv6cZbTxZ/fVDnW7eYfc9toX3NdcJYjf/hyz1RLfbEN8yxDNunpfZhMSf9\nufr9duvWwkl/Jfs0irFYyu/BWrTU4xjm39msPV7aw2K6N7SMeuCTfBB4pKru7pYPATNV9amBNu+M\n/9uStMZU1bLPBisR/OuB/wL8I+CbwEngF6vq5ZG+kCRpSUY+1VNVF5P8CvAV4BrgM4a+JK0dI7/i\nlyStbav+yd3lfrjrnSTJLUmeTfJSkr9I8vGufnOSE0nOJDmeZNPANoe6sTmdZM+V6/3KSHJNkueT\nPNktNzkWSTYl+WKSl5OcSvKBhsfiUPc78mKSx5Nc18pYJPlskskkLw7UDX3sSX6iG7+zSX57wReu\nqlX7oT/1cw64FbgWeAG4YzX7sMrHexPw4135Bvr3Pu4ADgO/0dU/CDzalXd1Y3JtN0bngHVX+jhG\nPCb/DPgCcKxbbnIsgHHgY115PXBji2PRHc9/A67rlv8AONDKWAAfBt4HvDhQN8yxX5q1OQns7spf\nBu6e73VX+4p/pB/uWuuq6kJVvdCVfwC8TP9zDnvp/+LT/feerrwPOFJVU1V1nv7/2N2r2ukVlORm\n4OeA3+Xyc2rNjUWSG4EPV9VnoX9frKq+R4NjAbwJTAEbuwdDNtJ/KKSJsaiqPwa+M6t6mGP/QJKt\nwLur6mTX7vcGtpnTagf/XB/u2r7KfbgiktxK/8z+HLClqia7VZPAlq68jf6YXHK1jc9vAb8OzAzU\ntTgWO4BvJflckm8k+Z0k19PgWFTVG8BvAn9JP/C/W1UnaHAsBgx77LPrX2WBMVnt4G/yTnKSG4A/\nBD5RVd8fXFf992bzjctVMWZJfh54raqe520+ldLKWNCf2nk/8FhVvR/4X8DBwQatjEWSHwV+jf7U\nxTbghiS/NNimlbGYyyKOfUlWO/hfBW4ZWL6Ft56prjpJrqUf+p+vqie66skkN3XrtwKvdfWzx+fm\nru5q8PeBvUn+O3AE+IdJPk+bYzEBTFTV17rlL9I/EVxocCx+EviTqnq9qi4CXwJ+ijbH4pJhficm\nuvqbZ9XPOyarHfxfB25LcmuSDcB9wLFV7sOqSf9z2Z8BTlXVpwdWHaN/A4vuv08M1O9PsiHJDuA2\n+jdt3vGq6qGquqWqdgD7ga9W1UdocywuAK8k2dlV3QW8BDxJY2MBnAY+mORd3e/LXcAp2hyLS4b6\nnej+Pb3ZPRkW4CMD28ztCtzF/ln6T7ecAw5d6bvqK3ysH6I/n/0C8Hz3czewGXgaOAMcBzYNbPNQ\nNzangX9ypY9hhcblp7n8VE+TYwH8PeBrwJ/Rv8q9seGx+A36J74X6d/MvLaVsaD/7vebwP+lf//z\no0s5duAnuvE7B/zrhV7XD3BJUmP86kVJaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtS\nY/4f5A0BHDAleMkAAAAASUVORK5CYII=\n",
m@8 175 "text/plain": [
m@8 176 "<matplotlib.figure.Figure at 0x1e926a590>"
m@8 177 ]
m@8 178 },
m@8 179 "metadata": {},
m@8 180 "output_type": "display_data"
m@8 181 }
m@8 182 ],
m@8 183 "source": [
m@8 184 "N_k = n_occurrence_from_D(D, k=100)\n",
m@8 185 "print skew(N_k)\n",
m@8 186 "plt.hist(N_k, bins=100);"
m@8 187 ]
m@8 188 },
m@8 189 {
m@8 190 "cell_type": "code",
m@8 191 "execution_count": 9,
m@8 192 "metadata": {
m@8 193 "collapsed": true
m@8 194 },
m@8 195 "outputs": [],
m@8 196 "source": [
m@8 197 "N_k"
m@8 198 ]
m@8 199 },
m@8 200 {
m@8 201 "cell_type": "code",
m@8 202 "execution_count": null,
m@8 203 "metadata": {
m@8 204 "collapsed": true
m@8 205 },
m@8 206 "outputs": [],
m@8 207 "source": []
m@8 208 }
m@8 209 ],
m@8 210 "metadata": {
m@8 211 "kernelspec": {
m@8 212 "display_name": "Python 2",
m@8 213 "language": "python",
m@8 214 "name": "python2"
m@8 215 },
m@8 216 "language_info": {
m@8 217 "codemirror_mode": {
m@8 218 "name": "ipython",
m@8 219 "version": 2
m@8 220 },
m@8 221 "file_extension": ".py",
m@8 222 "mimetype": "text/x-python",
m@8 223 "name": "python",
m@8 224 "nbconvert_exporter": "python",
m@8 225 "pygments_lexer": "ipython2",
m@8 226 "version": "2.7.12"
m@8 227 }
m@8 228 },
m@8 229 "nbformat": 4,
m@8 230 "nbformat_minor": 0
m@8 231 }