annotate notebooks/correlation_n_outliers.ipynb @ 105:edd82eb89b4b branch-tests tip

Merge
author Maria Panteli
date Sun, 15 Oct 2017 13:36:59 +0100
parents 69521f86d931
children
rev   line source
m@94 1 {
m@94 2 "cells": [
m@94 3 {
m@94 4 "cell_type": "code",
m@94 5 "execution_count": 1,
m@94 6 "metadata": {
m@94 7 "collapsed": true
m@94 8 },
m@94 9 "outputs": [],
m@94 10 "source": [
m@94 11 "import numpy as np\n",
m@94 12 "import pickle\n",
m@94 13 "from scipy.stats import pearsonr\n",
m@94 14 "import sys\n",
m@94 15 "\n",
m@94 16 "%matplotlib inline\n",
m@94 17 "import matplotlib.pyplot as plt\n",
m@94 18 "\n",
m@94 19 "%load_ext autoreload\n",
m@94 20 "%autoreload 2\n",
m@94 21 "\n",
m@94 22 "sys.path.append('../')\n",
m@94 23 "import scripts.outliers as outliers"
m@94 24 ]
m@94 25 },
m@94 26 {
m@94 27 "cell_type": "code",
m@94 28 "execution_count": 5,
m@94 29 "metadata": {},
m@94 30 "outputs": [
m@94 31 {
m@94 32 "name": "stdout",
m@94 33 "output_type": "stream",
m@94 34 "text": [
m@94 35 "Antigua and Barbuda\n",
m@94 36 "Australia\n",
m@94 37 "Cuba\n",
m@94 38 "Fiji\n",
m@94 39 "French Polynesia\n",
m@94 40 "Grenada\n",
m@94 41 "Iceland\n",
m@94 42 "Jamaica\n",
m@94 43 "Japan\n",
m@94 44 "Kiribati\n",
m@94 45 "Malta\n",
m@94 46 "New Zealand\n",
m@94 47 "Philippines\n",
m@94 48 "Puerto Rico\n",
m@94 49 "Republic of Serbia\n",
m@94 50 "Saint Lucia\n",
m@94 51 "Samoa\n",
m@94 52 "Solomon Islands\n",
m@94 53 "South Korea\n",
m@94 54 "The Bahamas\n",
m@94 55 "Trinidad and Tobago\n"
m@94 56 ]
m@94 57 }
m@94 58 ],
m@94 59 "source": [
m@94 60 "DATA_FILE = '../data/lda_data_8.pickle'\n",
m@94 61 "METADATA_FILE = '../data/metadata.csv'\n",
m@94 62 "dataset, ddf, w_dict = outliers.load_data(DATA_FILE, METADATA_FILE)\n",
m@94 63 "X_list, Y, Yaudio = dataset\n",
m@94 64 "X = np.concatenate(X_list, axis=1)"
m@94 65 ]
m@94 66 },
m@94 67 {
m@94 68 "cell_type": "code",
m@94 69 "execution_count": 6,
m@94 70 "metadata": {},
m@94 71 "outputs": [
m@94 72 {
m@94 73 "data": {
m@94 74 "text/html": [
m@94 75 "<div>\n",
m@94 76 "<table border=\"1\" class=\"dataframe\">\n",
m@94 77 " <thead>\n",
m@94 78 " <tr style=\"text-align: right;\">\n",
m@94 79 " <th></th>\n",
m@94 80 " <th>Country</th>\n",
m@94 81 " <th>Outliers</th>\n",
m@94 82 " <th>N_Country</th>\n",
m@94 83 " <th>N_Outliers</th>\n",
m@94 84 " </tr>\n",
m@94 85 " </thead>\n",
m@94 86 " <tbody>\n",
m@94 87 " <tr>\n",
m@94 88 " <th>0</th>\n",
m@94 89 " <td>Canada</td>\n",
m@94 90 " <td>0.050000</td>\n",
m@94 91 " <td>100</td>\n",
m@94 92 " <td>5</td>\n",
m@94 93 " </tr>\n",
m@94 94 " <tr>\n",
m@94 95 " <th>1</th>\n",
m@94 96 " <td>Lithuania</td>\n",
m@94 97 " <td>0.000000</td>\n",
m@94 98 " <td>47</td>\n",
m@94 99 " <td>0</td>\n",
m@94 100 " </tr>\n",
m@94 101 " <tr>\n",
m@94 102 " <th>2</th>\n",
m@94 103 " <td>Cambodia</td>\n",
m@94 104 " <td>0.210526</td>\n",
m@94 105 " <td>19</td>\n",
m@94 106 " <td>4</td>\n",
m@94 107 " </tr>\n",
m@94 108 " <tr>\n",
m@94 109 " <th>3</th>\n",
m@94 110 " <td>Ethiopia</td>\n",
m@94 111 " <td>0.257143</td>\n",
m@94 112 " <td>35</td>\n",
m@94 113 " <td>9</td>\n",
m@94 114 " </tr>\n",
m@94 115 " <tr>\n",
m@94 116 " <th>4</th>\n",
m@94 117 " <td>Swaziland</td>\n",
m@94 118 " <td>0.163265</td>\n",
m@94 119 " <td>98</td>\n",
m@94 120 " <td>16</td>\n",
m@94 121 " </tr>\n",
m@94 122 " </tbody>\n",
m@94 123 "</table>\n",
m@94 124 "</div>"
m@94 125 ],
m@94 126 "text/plain": [
m@94 127 " Country Outliers N_Country N_Outliers\n",
m@94 128 "0 Canada 0.050000 100 5\n",
m@94 129 "1 Lithuania 0.000000 47 0\n",
m@94 130 "2 Cambodia 0.210526 19 4\n",
m@94 131 "3 Ethiopia 0.257143 35 9\n",
m@94 132 "4 Swaziland 0.163265 98 16"
m@94 133 ]
m@94 134 },
m@94 135 "execution_count": 6,
m@94 136 "metadata": {},
m@94 137 "output_type": "execute_result"
m@94 138 }
m@94 139 ],
m@94 140 "source": [
m@94 141 "df_global, threshold, MD = outliers.get_outliers_df(X, Y, chi2thr=0.999)\n",
m@94 142 "df_global.head()"
m@94 143 ]
m@94 144 },
m@94 145 {
m@94 146 "cell_type": "markdown",
m@94 147 "metadata": {},
m@94 148 "source": [
m@94 149 "## Pearson correlation between percentage of outliers and number of samples per country"
m@94 150 ]
m@94 151 },
m@94 152 {
m@94 153 "cell_type": "code",
m@94 154 "execution_count": 10,
m@94 155 "metadata": {},
m@94 156 "outputs": [
m@94 157 {
m@94 158 "name": "stdout",
m@94 159 "output_type": "stream",
m@94 160 "text": [
m@94 161 "correlation -0.0102335874359\n",
m@94 162 "p-value 0.905523601988\n"
m@94 163 ]
m@94 164 },
m@94 165 {
m@94 166 "data": {
m@94 167 "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEPCAYAAABY9lNGAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAH25JREFUeJzt3X2UXHWd5/H3N4SebRQMnXASNDAwYVjEg07hymQ3zknp\n2N04D4Ek5/jArNuLzjKOCkdthwSDh3an40EcGEdn3LOMo+S4whxmWJg4Mt1pXHuW7CLsxIBRyEZ0\nUNAlPAQcxm2XePjuH3Urqaq+XXXr1n2s+rzOqZN6uHXv9/6qc7/393B/19wdERGRVsvyDkBERIpJ\nCUJEREIpQYiISCglCBERCaUEISIioZQgREQkVGoJwsy+YGaHzexAw3ufMrNHzOwhM/uvZvaKhs+u\nMbPvmtlBMxtLKy4REYkmzRrEF4GLW97bA7zG3V8HHAKuATCz84G3A+cH3/mcmal2IyKSo9QOwu5+\nL/Bcy3tz7v5S8PJ+YG3w/BLgNnc/6u6PAY8CF6UVm4iIdJbnWfq7gbuD568Enmj47AngVZlHJCIi\nx+SSIMxsB/Ciu9/aZjHNASIikqPlWW/QzP498BvArze8/SPgjIbXa4P3Wr+rpCEiEoO7W7ffybQG\nYWYXA38AXOLuP2v4aDfwDjMbMrOzgV8GHghbh7sX/nHdddflHoPiVJyKUzHWH3GlVoMws9uAjcAq\nM3scuI7aqKUhYM7MAO5z9/e5+8NmdjvwMPBz4H3ey16JiEjPUksQ7v7OkLe/0Gb5TwCfSCseERHp\njq41SEG1Ws07hEgUZ7IUZ7LKEGcZYuyFlaklx8zU8iQi0iUzw4veSS0iIuWhBCEiIqGUIEREJJQS\nhIiIhFKCEBGRUEoQIiISSglCRERCKUGIiEgoJQgREQmlBCEiIqGUIEREJJQShIiIhFKCEBGRUEoQ\nIiISSglCRERCKUGIiEgoJQgREQmlBCEiIqGUIEREJJQShIiIhFKCEBGRUEoQIiISSglCRERCKUGI\niEgoJQgREQmlBCEiIqFSSxBm9gUzO2xmBxreGzGzOTM7ZGZ7zGxFw2fXmNl3zeygmY2lFZeIiEST\nZg3ii8DFLe9tB+bc/Vzga8FrzOx84O3A+cF3Pmdmqt2IiOQotYOwu98LPNfy9iZgV/B8F3Bp8PwS\n4DZ3P+rujwGPAhelFVuZzM7OMja2lbGxrczOzsZapvXzTq+jbrfRzp07WbnyHFauPIedO3dG2kav\ncXYbYxJll8Q2kihvkUy4e2oP4CzgQMPr5xqeW/018Fngdxo++zywNWR9PkhmZmZ8eHi1wy0Ot/jw\n8GqfmZnpapnWz4eGTvOhoRVLvh4eXu3T09Mdt9toenra4ZRjy8Mpvnz5y9puo9N+DA2t8KGh09rE\n3fx5pxjjlG+U8u92G2Fxd1veIt0Kjp3dH8PjfCnyytskiOD1EV86QWwJWV/iBVdko6NbgoOGB49b\nfHR0S1fLhH0O60NezzhscVjvJ598RsftNhoZWReyjbUO0w7rgudrm7ZRqWxYYj/qy6ztEPf6rmKM\nU75Ryr/7bSyOO6z8ut2OSDtxE8TylCsorQ6b2Rp3f9LMTgeeCt7/EXBGw3Jrg/cWmZqaOva8Wq1S\nrVbTibQknnnm2bbvhX0O/9zy+nlgAvgkAC+88KEEIvspcAPwmeD1VdS6mf4EgIce+hCzs7OMj483\nfOcAsC2I48cJxCAymObn55mfn+99RXGyStQHi2sQNwDbgufbgeuD5+cDDwJDwNnA9wALWV8aybWw\njjdPTDqs92XLVvr09HTTMpXKBodVDc07q5rOzsM+Nzu5qenG7NSWM9hJX7bs1GPLxGliOv566ZpL\npbKxaV+XLVvZ8J2ZpriTbmKamZnxSmVD034uW3aqVyobjq0nShPU6OgWHx3dsuS21cQkRUDRmpiA\n26idBr4IPA5cDowA9wCHgD3AioblP0qtc/ogML7EOlMqvuKanp5ue7CuNWFMBs0yteeLm0maP69U\nNjYd2CqVjYsO5pXKBq9UNvjIyDqvVDZ2PGBNTEw4nBokgcngeaemrZGmhLc4jkkfGVl3LM7GA/L0\n9HRX8TVqPmhPutmpbjYSxN1cxq1JoP66UtmwZIJa6jtLvQ77jkiSCpcg0ngMYoLo1A6eREdr2DLd\nntUujnMypFZxUlNNpl5TiXrG3s0+dVem0fozmrcb/p0kOrZFkqYE0aeidJR2OvuM2hTSuMxS211q\nXWHLL1t2stc6m9d5rcN6MkgM9U7x8ITXKdZuOo/D1hc3QTR/LzyGJDq221FNQ+JQguhTeZ2Rhh3o\nKpWNS8YSFue6dReENDON9HwAjXoQXqrsovQLhJVx83ab+0jq30kzQah2InEpQfSxPM4aww5GtQ7v\n9s1drW3trdcyTExMdNUBHjW2zgf04/HW+11q/RcbluwX6LTdoaEVx/pzum0miyPt2kmaVPPJlxKE\nJC5qs1M361jqvV5jC9NtLSip7aZ1MCxrglDNJ39KEJK6sv1Hj1MLKrKylX9dWRNbP4mbIDQhnkQ2\nPj7OnXfuYnR0N6Oju7nzzl0tF7oVS1i8q1atzjus2MpW/lJ+Vksu5WBmXqZ4pXhmZ2fZvHmChYXa\nVePDw9t0oE2Zyjx/Zoa7W9ffK9MBVwlCkjA7O8uNN94MwOTkFTpQZUBlni8lCMldlINA3geKMsQo\nkrS4CSL3juduHqiTurCSmJpcMYqkA41ikjwlMTW5Yhxcuk4iXXEThEYxiaRAd4iLrt6JPTe3ibm5\nTWzePFHKMuvL3zxOVsnrgWoQhVWG5pusYsx7P8umH2ptRf/NUROT5C3Pq4yjSirGdsv0wwEvS/1Q\nXkXfh7gJIus7ykkfGx8f7zjiJ8oyaUoixtZx/Xv3Tmhcfw8mJ69g794JFhZqr4eHtzE5uSvfoKQm\nTlbJ60EJaxB5nzFL8nq9R4csVvb/J0X/zVENonh0pjmY6lNiHL+WQr95J3nXLHvVr7+5LpRL0djY\nVubmNgETwTu1eXT27Lkjz7C6MsgXjS2175o6Qsom7oVyqkHIkga5BtRu3/v1bFGklWoQKSr7mWY/\n1IDiGuR9l/6jGkQB6UxTREotTs92Xg9KOIopC2mNACn6yIw0Dcq+l330kESDLpQbTGkfyAb5ANLv\n+z4oSVDiJwj1QZSc2solLv3tDI64fRCarE9EREKpk7rkNE2BxKW/HelETUx9YJAvZpPe6G9nMOiW\noyIiEkp9ECIikqhcEoSZXWNm3zGzA2Z2q5n9gpmNmNmcmR0ysz1mtiKP2EREpCbzBGFmZwH/AbjQ\n3S8ATgDeAWwH5tz9XOBrwWsREclJHjWIfwKOAieZ2XLgJODHwCagPoRiF3BpDrGJiEgg8wTh7keA\nG4EfUksMz7v7HLDa3Q8Hix0GVmcdm4iIHJf5dRBmtg74IHAW8BPgr8zs3zYu4+5uZqHDlaampo49\nr1arVKvVtEIVESml+fl55ufne15P5sNczeztwKi7/27w+l3AeuDNwJvc/UkzOx34uruf1/JdDXMV\nEelSmYa5HgTWm9mwmRnwFuBh4CscnxRmArgrh9hERCSQy4VyZnY1tSTwEvBN4HeBk4HbgTOBx4C3\nufvzLd9TDUJEpEu6kloATZ0gIospQUjpb3EqIulQghDN7y8iocrUSS0iIiWg+0H0Ec3vLyJJUhNT\nn1EntYi0Uh+EiIiEUh+EiIgkSglCRERCKUGIiEgoJQgREQmlBCEiIqGUIOSY2dlZxsa2Mja2ldnZ\n2bzDKRyVjwwaDXMVQPM4daLykTLTdRDSE83j1J7KR8pM10H0GTVniEjeNBdTAbU2Z+zdO5F6c4bm\ncWpP5SODSE1MOQubOymv5gzN49SeykfKKm4Tk2oQOVqqppCX8fFxHfTaUPnIoFGCyNGNN94cJIda\nTWFhofaemjNEpAiUIApofHycO+/c1dCcoeGUIpI99UHkSGPrRSQLug6ipNTxKSJpU4IQyYESvJSB\nEoRIxtREKGWhBCGSMU2/IWWhqTak75Vh+pFnnjlc+BhFolINQkqhiM05rTENDX0QOJEXX/xUYWIU\nATUxSZ8ranNOYyf1M888y/79l1O0GEVK1cRkZivM7K/N7BEze9jMftXMRsxszswOmdkeM1uRR2wi\n3RgfH2fPnjvYs+cOVq1amXc4IonKqw/iT4C73f3VwGuBg8B2YM7dzwW+FrwWAWpDSIeHtwG7gF3B\n9CNX5B1WkzLEKNKNzJuYzOwVwH53/6WW9w8CG939sJmtAebd/byWZdTENMDKcM1BGWKUwZN4H4SZ\nXbfEdxzA3f9jtxsL1vsrwH8GHgZeB+wDPgg84e6nBssYcKT+uuG7ShAFpoOjSDGlMd33TwmSQYOX\nAe8BVgGxEkSwzQuBD7j7/zKzT9PSnOTubmahmWBqaurY82q1SrVajRmGJCmPmxyJSLj5+Xnm5+d7\nXk+kJiYzOwW4ilpyuB240d2firXBWvPRfe5+dvD6jcA1wC8Bb3L3J83sdODramIqj7RHGal2IhJf\nKqOYzGylmU0DDwEnAhe6+7a4yQHA3Z8EHjezc4O33gJ8B/gKx48uE8Bdcbch/aVeO5mb28Tc3CY2\nb57QRWgiGViyicnM/gjYDNwMvNbdX0hwu1cCXzazIeB7wOXACcDtZvYe4DHgbQluT1KW5k2Olrqx\nkmoRIulq1wfxYeBF4Frg2lq/8THu7qfE3ai7PwS8IeSjt8Rdp+RLNzkS6T+6kloKr4jTbIiUiaba\nkL6mTmqR+JQgREQkVKnmYhIZBGWYnlykHdUgRFKgfhMpEtUgRNrI+my+eWhuLVHU+1Dyjk0kqnbD\nXEX6QpGnASlybCKqQUjfaz6bX8PCwtlcdtn7Uz1bjzr1dzc1DZGsKUHIAJmldiB+L0eOfCzVKTvq\nFw6Oju5mdHS3agVSSuqklkX67ZqD4804ZwPvpUi3BFVntmRBndSSiH6cGK9+Nj8y8nTeoSyimoYU\nmWoQ0iTtabuz1lgb2rjxQnbu/GzT2fqOHVfy93//TaA/akv9oN9qsEWQxg2DREpt8QihekLYDcDG\njVc2JQyNIMqfRnUVjLuX5lELV9I0MzPjw8OrHW5xuMWHh1f7zMxM3mHFMjq6JdgPDx63+Ojolsif\nS/b0m6QjOHZ2fcxVDUKaaNpuEalTgpBFxsfH+yIpdLqJUZo3OZJ49JsUizqpEzTInWtJ7Hsa5ddp\nnYP8mxWVfpPkxe2kzr1foZsHBe6D6Ke2+24lse+DXH4iaSNmH0TuB/2ugi1wgihz59rMzIyPjm7x\n0dEtsQ7KSex7mctPpOjiJgj1QQw4DSsUkSXFySp5PShwDaKsTSRJnLmriUmk2FANIl+DPDw0iX0f\n5PITKSqNYhpwmixOpP/FHcWkBCEaVijS55QgpLSSTlBR1qekKINE10FIKSXdOR1lfeoQl0GDroMY\nXL1ex5CnpK9/iLI+XXMhgyZugtAoppLTdQwikpo4WSWJB3ACsB/4SvB6BJgDDgF7gBUh30k8s+Yl\nqbP+tM6Gs6qVqIkpf2WugUo0lK2JCfgw8GVgd/D6BuDq4Pk24PqQ7yRdbrlI8gCVRoJojW/ZslO9\nUtmQ2sEj6QNUlPUN4kExbJ+VLAdDqRIEsBa4B3hTQw3iILA6eL4GOBjyvcQLLg9JHtTT+A8eFh+s\n18GjxJb6O1F/zGCImyCWpdd41dYfA38AvNTw3mp3Pxw8PwyszjyqEsrupvevZGHhk8eGhmZpdnaW\nsbGtjI1tZXZ2NvPt94Mbb7w56KeaACZy+y2lXDLvpDaz3wKecvf9ZlYNW8bd3cxCL3iYmpo69rxa\nrVKthq6i0JK+KUrSN/hpja/W4rcLeDKxbUSlTvh06QY9/Wl+fp75+fneVxSn2tHLA/gE8Djwj8D/\nAX4KfIlaE9OaYJnT6eMmJvfit4HPzMx4pbLRly1b6TCZW/u0mkCS0a4psuh/i9I7YjYx5XoltZlt\nBD7i7r9tZjcAz7r7J81sO7VRTNtblvc84x1EeV9xPDa2lbm5TdSaRgBqzWl79tyRaRz9IO/fUvJT\nyqk2ggQx6e6bzGwEuB04E3gMeJu7P9+yvBJEBN0eCIp84NBkgiK9K2WC6JYSRGfdHlDLcAAucgIT\nKQMlCAG6b5JRE45I/4ubIPIa5ioiIgWnBNFnJievYHi4Pix1VzBs8YrElu8nZb++ouzxSwnEGfqU\n14M+Guaapm6HLQ7iMMeyTzFR9vglW5RxmGu31AehDtuklL3vJa/49fdXTuqDGAD1EUdzc5uYm9vE\n5s0TalqQzLT7+1NzV5+KU+3I68GANzHpquLklL2JJo/4l/r7K3tZDgJ0wyCR6OqTHB5vLinWtR+d\nFCn+5okAYWGh9l6ZylPCKUGUiCZWS1bSkxxmLev4l/r706ywfSxOtSOvBwPexOTe+4ijON/PcpRT\n3G0N4kisPOimQ+VEmW4YFPehBNGbOP+Rs/zPH3dbOkDlTwm62JQgpKM4ndxZdozH3ZY670Xai5sg\nNMxVRERCqZN6gMTp5M6yYzzuttR5L5IOXUk9YOJcCZvl1bNxt6UrfEWWpum+RSLoJZEMWhIatP3t\nZ3ETRO4dz908UCe19KCX0U6DNlJq0Pa336FRTDKIuhle2ctop0EbKTVo+9vv4iYIjWKS0tLkhRJG\nEwcmKE5WyeuBahDSoNuzXDUxRVfW/S1r3GlDk/WJtNfLBHdFmhwvC2XdX00cmCyNYpLSqjcx1Q4I\ntesf7ryzHAcySUfZbwSVFt0wqI8k0YY6CO2w9bPc0dHdjI7uVnKQgb7HehpUgyiYJM6KdWYtg0zX\nbyymC+X6RBJV5OZ1zAJTjIw8za23/pn+s4gMIDUxSYhZaknivRw58jENAxWRrihBFEwSbajH1zEF\n1Ed01Jqc+vXuX4PQ5yKSNSWIgkmi47W+jpGRp1OKslh0wZxISuJcPNHLAzgD+DrwHeDbwFXB+yPA\nHHAI2AOsCPluQpeNlEMStxdN86KhotxFTNNCiLRHiS6UOwp8yN0fNLOXA/vMbA64HJhz9xvMbBuw\nPXgMpNaRSHv3TnRdm0jzYqck4hORgouTVZJ8AHcBbwEOAquD99YAB0OWTTSrFlnRz4qLFF83NaWi\n1HpEskQZJ+szs7OACnA/teRwOPjoMLA6p7AKa9++hxJtWy9bx+7OnTtZufIcVq48h507dx57P2q/\nzVJ9FWUrB5HMxMkqSTyAlwP7gEuD18+1fH4k5DuJZtUiaz0rhlUOk4n1I/TaP5H1pGjT09MOpzSU\nxyk+PT3d1TrCaj2VygZN7iZ9jxL1QWBmJwJ3AF9y97uCtw+b2Rp3f9LMTgeeCvvu1NTUsefVapVq\ntZpytPmonxVfdtn7OXLkNOC/AOMsLFyQyORjvU5qlvVkbjfd9EXgMxy/gBBuuukP2bFjR0/r/cEP\nntTkbtJ35ufnmZ+f73k9mScIMzPgL4CH3f3TDR/tpva/tP6/9a6QrzcliH43Pj7O61//uuCq6OId\nsMbHx0t1IJ2cvIK9eydYWKi9Hh7exi/+4nkcOZJvXCJJaz15/vjHPx5vRXGqHb08gDcCLwEPAvuD\nx8XUhrneg4a5NmltyhkaWuGVysaeO1nznDc/Tkdxt01MS22j9X3dP0AGAbrlaP+qH9QqlQ0+NHRa\nYgezPEb09HJAnp6e9pGRdT4ysq5jcuhmGxrZJP1OCaIA0j7Q5DG0NOl9Wmofoh78e9mGyKCKmyB0\nR7mE9OOFY1nt0/e/f5C5uXuodULDtddeBdBzB7SI9ChOVsnrQYFrEFmctWbdXr54nyZ9ZGRdT7WJ\nsH04+eQzF5XdyMi62HGXpV9BTVvlVbbfDjUx5SurZo0s/zCb92kmuBaj94Nu6z6MjKxLNEGEbaNo\nypLEZLEy/nZKEDkr26igqOs9vk/rU0uASVwEVzbqJymvMv52cROE+iASkvWFY3Vp9hM07tO+fU+n\ndr1Ava/hppv+EIAPf/hq9T+IFEGcrJLXgwLXIPKSZdNWlBpS0Zt2iqKMzRRSU8bfDtUgJE1RakhL\n1WYA3US+xfj4ODt2XNlQa7pS5VISebUW5CJOVsnrgWoQixTpbEaT4UVXpN9N+h9lnO673+QxbXQS\ntyhNU/NkeM33xS7qNNtZxNU8WWK0+4UXtbyKTGXWozhZJa8HBa5B6IwwvAwqlY2hfSRFLa+s4uq2\n76io5VVkKrPj0DDXfJVp6FuaHclRJ8MrankVrdM/67j6icrsuLgJQp3UAybt6TPCpgAP69Dr1JzS\n7waqo1PKK05WyetBgWsQZanOFuWsqqjlpbj6h8rsONTElL8yXANQlAThXtzyUlz9Q2VWEzdBWO27\n5WBmXqZ4i6i1iWl4eFvhRj6JSLLMDHe3rr9XpgOuEkQyZmdndeGayABRghARkVBxE4QulBMRkVBK\nECIiEkoJQkREQilBiIhIKCUIEREJpQQhIiKhlCBERCSUEoSIiIRSghARkVBKECIiEqpQCcLMLjaz\ng2b2XTPblnc8IiKDrDAJwsxOAP4UuBg4H3inmb0636jimZ+fzzuESBRnshRnssoQZxli7EVhEgRw\nEfCouz/m7keBvwQuyTmmWMryR6M4k6U4k1WGOMsQYy+KlCBeBTze8PqJ4D0REclBkRKE5vEWESmQ\nwtwPwszWA1PufnHw+hrgJXf/ZMMyxQhWRKRkSn3DIDNbDvxv4NeBHwMPAO9090dyDUxEZEAtzzuA\nOnf/uZl9AJgFTgD+QslBRCQ/halBiIhIsRSpk3oRMxsxszkzO2Rme8xsxRLLfcHMDpvZgYzj63hh\nn5l9Jvj8ITOrZBlfQwxt4zSz88zsPjP7mZlN5hFjEEenOH8nKMdvmdn/MLPXFjTOS4I495vZPjN7\nc9FibFjuDWb2czPbkmV8DdvvVJZVM/tJUJb7zezaIsYZLFMNYvy2mc1nHGI9hk7l+ZGGsjwQ/Pah\nx1UA3L2wD+AG4Org+Tbg+iWW+zWgAhzIMLYTgEeBs4ATgQeBV7cs8xvA3cHzXwW+kUMZRonzNOBf\nAdPAZE6/dZQ4/zXwiuD5xQUuz5c1PL+A2vU9hYqxYbn/BvwtsLWgZVkFdufxN9llnCuA7wBrg9er\nihhny/K/BdzTbp2FrkEAm4BdwfNdwKVhC7n7vcBzWQUViHJh37H43f1+YIWZrc42zM5xuvvT7v4P\nwNGMY2sUJc773P0nwcv7gbUZxwjR4vxpw8uXA89kGB9Ev+j0SuCvgaezDK5B1Di7Hn2TsChxXgbc\n4e5PALh71r85dH+x8WXAbe1WWPQEsdrdDwfPDwNZH1zbiXJhX9gyWR/UynIBYrdxvge4O9WIwkWK\n08wuNbNHgL8DrsootrqOMZrZq6gdPP5T8FYenZFRytKBfxM02d1tZudnFt1xUeL8ZWDEzL5uZv9g\nZu/KLLrjIv8fMrOTgHHgjnYrzH0Uk5nNAWtCPtrR+MLdvWDXQUSNpfXsJ+t9KFKZtRM5TjN7E/Bu\nYEN64SwpUpzufhdwl5n9GvAl4F+mGlXL5iMs82lge/D/ysjnLD1KnN8EznD3/2tmbwXuAs5NN6xF\nosR5InAhtWH6JwH3mdk33P27qUbWrJv/678N7HX359stlHuCcPfRpT4LOp7XuPuTZnY68FSGoXXy\nI+CMhtdnUMvY7ZZZG7yXpShxFkGkOIOO6T8HLnb3rJsVocvydPd7zWy5ma1092dTj64mSoyvB/6y\nlhtYBbzVzI66++5sQgQixOnuLzQ8/zsz+5yZjbj7kYxihGjl+TjwjLsvAAtm9t+B1wFZJohu/jbf\nQYfmJaAUndTbgufbWaKTOvj8LLLtpF4OfC/Y7hCdO6nXk0+nasc4G5adIr9O6ijleSa1Trj1ecTY\nRZzrOD6E/ELge0WLsWX5LwJbClqWqxvK8iLgsYLGeR5wD7WO4pOAA8D5RYszWO4VwLPAcMd1Zl3Y\nXe7wSFDoh4A9wIrg/VcCX21Y7jZqV1//P2qZ/PKM4nsrtau/HwWuCd77PeD3Gpb50+Dzh4ALcyrH\ntnFSa+J7HPgJtc7+HwIvL2Ccnw/+sPcHjwcKWp5XA98OYrwXeEPRYmxZNpcEEbEs3x+U5YPA/ySn\nk4OI/9c/Qm0k0wHgqgLHOQHcGmV9ulBORERCFX0Uk4iI5EQJQkREQilBiIhIKCUIEREJpQQhIiKh\nlCBERCSUEoQMPDNba2Z/E0wr/6iZfdrMTuzwnY+2vP7n4N9XmtlfpRmvSFZ0HYQMtGAeovuBP3P3\nXWa2DLgZOOLuV7f53gvufvJSr7vY/nJ3/3mc2EXSphqEDLo3AwvuXp+W/SXgQ8C7zez3zeyz9QXN\n7G/NbKOZXQ8MBzdd+VLjyszsrPqNq8zsBDP7lJk9EMxGekXwftXM7jWzvwG+bWYnmdlXzezB4CYu\nb8to30Xayn2yPpGcvQbY1/iGu79gZj9k8f8Pr33s283s/e7e6Q6B7wGed/eLzOwXgL1mtif4rAK8\nxt1/YGZbgR+5+28CmNkpve6USBJUg5BBl2Yb6xjw78xsP/ANanOLnRN89oC7/yB4/i1g1MyuN7M3\nuvs/pRiTSGRKEDLoHqY29fUxwRn8mcDzNP8f+Rcx1v8Bd68Ej3Xufk/w/rG7znntngEVapO8TZvZ\nx2JsRyRxShAy0Nz9a8BJ9TuAmdkJwI3UZjj9PvArVnMGtemm646aWacm2lngffXlzOzc4E5eTYJ7\nnfzM3b8M/BG1KcJFcqc+CBHYDHwuOHNfBnwV+Ki7HzWzf6RWy3iE5r6Km4Fvmdk+d38XzU1V9eef\npzY3/zeD0VJPBdvyluUvAD5lZi8BLwK/n/D+icSiYa4iIhJKTUwiIhJKCUJEREIpQYiISCglCBER\nCaUEISIioZQgREQklBKEiIiEUoIQEZFQ/x8EKwOPDQIAnAAAAABJRU5ErkJggg==\n",
m@94 168 "text/plain": [
m@94 169 "<matplotlib.figure.Figure at 0x7efedacd46d0>"
m@94 170 ]
m@94 171 },
m@94 172 "metadata": {},
m@94 173 "output_type": "display_data"
m@94 174 }
m@94 175 ],
m@94 176 "source": [
m@94 177 "corr, pval = pearsonr(df_global['Outliers'], df_global['N_Country'])\n",
m@94 178 "print 'correlation', corr\n",
m@94 179 "print 'p-value', pval\n",
m@94 180 "\n",
m@94 181 "plt.scatter(df_global['Outliers'], df_global['N_Country'])\n",
m@94 182 "plt.xlabel('Outliers');\n",
m@94 183 "plt.ylabel('N');"
m@94 184 ]
m@94 185 },
m@94 186 {
m@94 187 "cell_type": "code",
m@94 188 "execution_count": null,
m@94 189 "metadata": {
m@94 190 "collapsed": true
m@94 191 },
m@94 192 "outputs": [],
m@94 193 "source": []
m@94 194 }
m@94 195 ],
m@94 196 "metadata": {
m@94 197 "kernelspec": {
m@94 198 "display_name": "Python 2",
m@94 199 "language": "python",
m@94 200 "name": "python2"
m@94 201 },
m@94 202 "language_info": {
m@94 203 "codemirror_mode": {
m@94 204 "name": "ipython",
m@94 205 "version": 2
m@94 206 },
m@94 207 "file_extension": ".py",
m@94 208 "mimetype": "text/x-python",
m@94 209 "name": "python",
m@94 210 "nbconvert_exporter": "python",
m@94 211 "pygments_lexer": "ipython2",
m@94 212 "version": "2.7.12"
m@94 213 }
m@94 214 },
m@94 215 "nbformat": 4,
m@94 216 "nbformat_minor": 2
m@94 217 }