view notebooks/correlation_n_outliers.ipynb @ 105:edd82eb89b4b branch-tests tip

Merge
author Maria Panteli
date Sun, 15 Oct 2017 13:36:59 +0100
parents 69521f86d931
children
line wrap: on
line source
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pickle\n",
    "from scipy.stats import pearsonr\n",
    "import sys\n",
    "\n",
    "%matplotlib inline\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "%load_ext autoreload\n",
    "%autoreload 2\n",
    "\n",
    "sys.path.append('../')\n",
    "import scripts.outliers as outliers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Antigua and Barbuda\n",
      "Australia\n",
      "Cuba\n",
      "Fiji\n",
      "French Polynesia\n",
      "Grenada\n",
      "Iceland\n",
      "Jamaica\n",
      "Japan\n",
      "Kiribati\n",
      "Malta\n",
      "New Zealand\n",
      "Philippines\n",
      "Puerto Rico\n",
      "Republic of Serbia\n",
      "Saint Lucia\n",
      "Samoa\n",
      "Solomon Islands\n",
      "South Korea\n",
      "The Bahamas\n",
      "Trinidad and Tobago\n"
     ]
    }
   ],
   "source": [
    "DATA_FILE = '../data/lda_data_8.pickle'\n",
    "METADATA_FILE = '../data/metadata.csv'\n",
    "dataset, ddf, w_dict = outliers.load_data(DATA_FILE, METADATA_FILE)\n",
    "X_list, Y, Yaudio = dataset\n",
    "X = np.concatenate(X_list, axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Country</th>\n",
       "      <th>Outliers</th>\n",
       "      <th>N_Country</th>\n",
       "      <th>N_Outliers</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Canada</td>\n",
       "      <td>0.050000</td>\n",
       "      <td>100</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Lithuania</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>47</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Cambodia</td>\n",
       "      <td>0.210526</td>\n",
       "      <td>19</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Ethiopia</td>\n",
       "      <td>0.257143</td>\n",
       "      <td>35</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Swaziland</td>\n",
       "      <td>0.163265</td>\n",
       "      <td>98</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     Country  Outliers  N_Country  N_Outliers\n",
       "0     Canada  0.050000        100           5\n",
       "1  Lithuania  0.000000         47           0\n",
       "2   Cambodia  0.210526         19           4\n",
       "3   Ethiopia  0.257143         35           9\n",
       "4  Swaziland  0.163265         98          16"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_global, threshold, MD = outliers.get_outliers_df(X, Y, chi2thr=0.999)\n",
    "df_global.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Pearson correlation between percentage of outliers and number of samples per country"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "correlation -0.0102335874359\n",
      "p-value 0.905523601988\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEPCAYAAABY9lNGAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAH25JREFUeJzt3X2UXHWd5/H3N4SebRQMnXASNDAwYVjEg07hymQ3zknp\n2N04D4Ek5/jArNuLzjKOCkdthwSDh3an40EcGEdn3LOMo+S4whxmWJg4Mt1pXHuW7CLsxIBRyEZ0\nUNAlPAQcxm2XePjuH3Urqaq+XXXr1n2s+rzOqZN6uHXv9/6qc7/393B/19wdERGRVsvyDkBERIpJ\nCUJEREIpQYiISCglCBERCaUEISIioZQgREQkVGoJwsy+YGaHzexAw3ufMrNHzOwhM/uvZvaKhs+u\nMbPvmtlBMxtLKy4REYkmzRrEF4GLW97bA7zG3V8HHAKuATCz84G3A+cH3/mcmal2IyKSo9QOwu5+\nL/Bcy3tz7v5S8PJ+YG3w/BLgNnc/6u6PAY8CF6UVm4iIdJbnWfq7gbuD568Enmj47AngVZlHJCIi\nx+SSIMxsB/Ciu9/aZjHNASIikqPlWW/QzP498BvArze8/SPgjIbXa4P3Wr+rpCEiEoO7W7ffybQG\nYWYXA38AXOLuP2v4aDfwDjMbMrOzgV8GHghbh7sX/nHdddflHoPiVJyKUzHWH3GlVoMws9uAjcAq\nM3scuI7aqKUhYM7MAO5z9/e5+8NmdjvwMPBz4H3ey16JiEjPUksQ7v7OkLe/0Gb5TwCfSCseERHp\njq41SEG1Ws07hEgUZ7IUZ7LKEGcZYuyFlaklx8zU8iQi0iUzw4veSS0iIuWhBCEiIqGUIEREJJQS\nhIiIhFKCEBGRUEoQIiISSglCRERCKUGIiEgoJQgREQmlBCEiIqGUIEREJJQShIiIhFKCEBGRUEoQ\nIiISSglCRERCKUGIiEgoJQgREQmlBCEiIqGUIEREJJQShIiIhFKCEBGRUEoQIiISSglCRERCKUGI\niEgoJQgREQmlBCEiIqFSSxBm9gUzO2xmBxreGzGzOTM7ZGZ7zGxFw2fXmNl3zeygmY2lFZeIiEST\nZg3ii8DFLe9tB+bc/Vzga8FrzOx84O3A+cF3Pmdmqt2IiOQotYOwu98LPNfy9iZgV/B8F3Bp8PwS\n4DZ3P+rujwGPAhelFVuZzM7OMja2lbGxrczOzsZapvXzTq+jbrfRzp07WbnyHFauPIedO3dG2kav\ncXYbYxJll8Q2kihvkUy4e2oP4CzgQMPr5xqeW/018Fngdxo++zywNWR9PkhmZmZ8eHi1wy0Ot/jw\n8GqfmZnpapnWz4eGTvOhoRVLvh4eXu3T09Mdt9toenra4ZRjy8Mpvnz5y9puo9N+DA2t8KGh09rE\n3fx5pxjjlG+U8u92G2Fxd1veIt0Kjp3dH8PjfCnyytskiOD1EV86QWwJWV/iBVdko6NbgoOGB49b\nfHR0S1fLhH0O60NezzhscVjvJ598RsftNhoZWReyjbUO0w7rgudrm7ZRqWxYYj/qy6ztEPf6rmKM\nU75Ryr/7bSyOO6z8ut2OSDtxE8TylCsorQ6b2Rp3f9LMTgeeCt7/EXBGw3Jrg/cWmZqaOva8Wq1S\nrVbTibQknnnm2bbvhX0O/9zy+nlgAvgkAC+88KEEIvspcAPwmeD1VdS6mf4EgIce+hCzs7OMj483\nfOcAsC2I48cJxCAymObn55mfn+99RXGyStQHi2sQNwDbgufbgeuD5+cDDwJDwNnA9wALWV8aybWw\njjdPTDqs92XLVvr09HTTMpXKBodVDc07q5rOzsM+Nzu5qenG7NSWM9hJX7bs1GPLxGliOv566ZpL\npbKxaV+XLVvZ8J2ZpriTbmKamZnxSmVD034uW3aqVyobjq0nShPU6OgWHx3dsuS21cQkRUDRmpiA\n26idBr4IPA5cDowA9wCHgD3AioblP0qtc/ogML7EOlMqvuKanp5ue7CuNWFMBs0yteeLm0maP69U\nNjYd2CqVjYsO5pXKBq9UNvjIyDqvVDZ2PGBNTEw4nBokgcngeaemrZGmhLc4jkkfGVl3LM7GA/L0\n9HRX8TVqPmhPutmpbjYSxN1cxq1JoP66UtmwZIJa6jtLvQ77jkiSCpcg0ngMYoLo1A6eREdr2DLd\nntUujnMypFZxUlNNpl5TiXrG3s0+dVem0fozmrcb/p0kOrZFkqYE0aeidJR2OvuM2hTSuMxS211q\nXWHLL1t2stc6m9d5rcN6MkgM9U7x8ITXKdZuOo/D1hc3QTR/LzyGJDq221FNQ+JQguhTeZ2Rhh3o\nKpWNS8YSFue6dReENDON9HwAjXoQXqrsovQLhJVx83ab+0jq30kzQah2InEpQfSxPM4aww5GtQ7v\n9s1drW3trdcyTExMdNUBHjW2zgf04/HW+11q/RcbluwX6LTdoaEVx/pzum0miyPt2kmaVPPJlxKE\nJC5qs1M361jqvV5jC9NtLSip7aZ1MCxrglDNJ39KEJK6sv1Hj1MLKrKylX9dWRNbP4mbIDQhnkQ2\nPj7OnXfuYnR0N6Oju7nzzl0tF7oVS1i8q1atzjus2MpW/lJ+Vksu5WBmXqZ4pXhmZ2fZvHmChYXa\nVePDw9t0oE2Zyjx/Zoa7W9ffK9MBVwlCkjA7O8uNN94MwOTkFTpQZUBlni8lCMldlINA3geKMsQo\nkrS4CSL3juduHqiTurCSmJpcMYqkA41ikjwlMTW5Yhxcuk4iXXEThEYxiaRAd4iLrt6JPTe3ibm5\nTWzePFHKMuvL3zxOVsnrgWoQhVWG5pusYsx7P8umH2ptRf/NUROT5C3Pq4yjSirGdsv0wwEvS/1Q\nXkXfh7gJIus7ykkfGx8f7zjiJ8oyaUoixtZx/Xv3Tmhcfw8mJ69g794JFhZqr4eHtzE5uSvfoKQm\nTlbJ60EJaxB5nzFL8nq9R4csVvb/J0X/zVENonh0pjmY6lNiHL+WQr95J3nXLHvVr7+5LpRL0djY\nVubmNgETwTu1eXT27Lkjz7C6MsgXjS2175o6Qsom7oVyqkHIkga5BtRu3/v1bFGklWoQKSr7mWY/\n1IDiGuR9l/6jGkQB6UxTREotTs92Xg9KOIopC2mNACn6yIw0Dcq+l330kESDLpQbTGkfyAb5ANLv\n+z4oSVDiJwj1QZSc2solLv3tDI64fRCarE9EREKpk7rkNE2BxKW/HelETUx9YJAvZpPe6G9nMOiW\noyIiEkp9ECIikqhcEoSZXWNm3zGzA2Z2q5n9gpmNmNmcmR0ysz1mtiKP2EREpCbzBGFmZwH/AbjQ\n3S8ATgDeAWwH5tz9XOBrwWsREclJHjWIfwKOAieZ2XLgJODHwCagPoRiF3BpDrGJiEgg8wTh7keA\nG4EfUksMz7v7HLDa3Q8Hix0GVmcdm4iIHJf5dRBmtg74IHAW8BPgr8zs3zYu4+5uZqHDlaampo49\nr1arVKvVtEIVESml+fl55ufne15P5sNczeztwKi7/27w+l3AeuDNwJvc/UkzOx34uruf1/JdDXMV\nEelSmYa5HgTWm9mwmRnwFuBh4CscnxRmArgrh9hERCSQy4VyZnY1tSTwEvBN4HeBk4HbgTOBx4C3\nufvzLd9TDUJEpEu6kloATZ0gIospQUjpb3EqIulQghDN7y8iocrUSS0iIiWg+0H0Ec3vLyJJUhNT\nn1EntYi0Uh+EiIiEUh+EiIgkSglCRERCKUGIiEgoJQgREQmlBCEiIqGUIOSY2dlZxsa2Mja2ldnZ\n2bzDKRyVjwwaDXMVQPM4daLykTLTdRDSE83j1J7KR8pM10H0GTVniEjeNBdTAbU2Z+zdO5F6c4bm\ncWpP5SODSE1MOQubOymv5gzN49SeykfKKm4Tk2oQOVqqppCX8fFxHfTaUPnIoFGCyNGNN94cJIda\nTWFhofaemjNEpAiUIApofHycO+/c1dCcoeGUIpI99UHkSGPrRSQLug6ipNTxKSJpU4IQyYESvJSB\nEoRIxtREKGWhBCGSMU2/IWWhqTak75Vh+pFnnjlc+BhFolINQkqhiM05rTENDX0QOJEXX/xUYWIU\nATUxSZ8ranNOYyf1M888y/79l1O0GEVK1cRkZivM7K/N7BEze9jMftXMRsxszswOmdkeM1uRR2wi\n3RgfH2fPnjvYs+cOVq1amXc4IonKqw/iT4C73f3VwGuBg8B2YM7dzwW+FrwWAWpDSIeHtwG7gF3B\n9CNX5B1WkzLEKNKNzJuYzOwVwH53/6WW9w8CG939sJmtAebd/byWZdTENMDKcM1BGWKUwZN4H4SZ\nXbfEdxzA3f9jtxsL1vsrwH8GHgZeB+wDPgg84e6nBssYcKT+uuG7ShAFpoOjSDGlMd33TwmSQYOX\nAe8BVgGxEkSwzQuBD7j7/zKzT9PSnOTubmahmWBqaurY82q1SrVajRmGJCmPmxyJSLj5+Xnm5+d7\nXk+kJiYzOwW4ilpyuB240d2firXBWvPRfe5+dvD6jcA1wC8Bb3L3J83sdODramIqj7RHGal2IhJf\nKqOYzGylmU0DDwEnAhe6+7a4yQHA3Z8EHjezc4O33gJ8B/gKx48uE8Bdcbch/aVeO5mb28Tc3CY2\nb57QRWgiGViyicnM/gjYDNwMvNbdX0hwu1cCXzazIeB7wOXACcDtZvYe4DHgbQluT1KW5k2Olrqx\nkmoRIulq1wfxYeBF4Frg2lq/8THu7qfE3ai7PwS8IeSjt8Rdp+RLNzkS6T+6kloKr4jTbIiUiaba\nkL6mTmqR+JQgREQkVKnmYhIZBGWYnlykHdUgRFKgfhMpEtUgRNrI+my+eWhuLVHU+1Dyjk0kqnbD\nXEX6QpGnASlybCKqQUjfaz6bX8PCwtlcdtn7Uz1bjzr1dzc1DZGsKUHIAJmldiB+L0eOfCzVKTvq\nFw6Oju5mdHS3agVSSuqklkX67ZqD4804ZwPvpUi3BFVntmRBndSSiH6cGK9+Nj8y8nTeoSyimoYU\nmWoQ0iTtabuz1lgb2rjxQnbu/GzT2fqOHVfy93//TaA/akv9oN9qsEWQxg2DREpt8QihekLYDcDG\njVc2JQyNIMqfRnUVjLuX5lELV9I0MzPjw8OrHW5xuMWHh1f7zMxM3mHFMjq6JdgPDx63+Ojolsif\nS/b0m6QjOHZ2fcxVDUKaaNpuEalTgpBFxsfH+yIpdLqJUZo3OZJ49JsUizqpEzTInWtJ7Hsa5ddp\nnYP8mxWVfpPkxe2kzr1foZsHBe6D6Ke2+24lse+DXH4iaSNmH0TuB/2ugi1wgihz59rMzIyPjm7x\n0dEtsQ7KSex7mctPpOjiJgj1QQw4DSsUkSXFySp5PShwDaKsTSRJnLmriUmk2FANIl+DPDw0iX0f\n5PITKSqNYhpwmixOpP/FHcWkBCEaVijS55QgpLSSTlBR1qekKINE10FIKSXdOR1lfeoQl0GDroMY\nXL1ex5CnpK9/iLI+XXMhgyZugtAoppLTdQwikpo4WSWJB3ACsB/4SvB6BJgDDgF7gBUh30k8s+Yl\nqbP+tM6Gs6qVqIkpf2WugUo0lK2JCfgw8GVgd/D6BuDq4Pk24PqQ7yRdbrlI8gCVRoJojW/ZslO9\nUtmQ2sEj6QNUlPUN4kExbJ+VLAdDqRIEsBa4B3hTQw3iILA6eL4GOBjyvcQLLg9JHtTT+A8eFh+s\n18GjxJb6O1F/zGCImyCWpdd41dYfA38AvNTw3mp3Pxw8PwyszjyqEsrupvevZGHhk8eGhmZpdnaW\nsbGtjI1tZXZ2NvPt94Mbb7w56KeaACZy+y2lXDLvpDaz3wKecvf9ZlYNW8bd3cxCL3iYmpo69rxa\nrVKthq6i0JK+KUrSN/hpja/W4rcLeDKxbUSlTvh06QY9/Wl+fp75+fneVxSn2tHLA/gE8Djwj8D/\nAX4KfIlaE9OaYJnT6eMmJvfit4HPzMx4pbLRly1b6TCZW/u0mkCS0a4psuh/i9I7YjYx5XoltZlt\nBD7i7r9tZjcAz7r7J81sO7VRTNtblvc84x1EeV9xPDa2lbm5TdSaRgBqzWl79tyRaRz9IO/fUvJT\nyqk2ggQx6e6bzGwEuB04E3gMeJu7P9+yvBJEBN0eCIp84NBkgiK9K2WC6JYSRGfdHlDLcAAucgIT\nKQMlCAG6b5JRE45I/4ubIPIa5ioiIgWnBNFnJievYHi4Pix1VzBs8YrElu8nZb++ouzxSwnEGfqU\n14M+Guaapm6HLQ7iMMeyTzFR9vglW5RxmGu31AehDtuklL3vJa/49fdXTuqDGAD1EUdzc5uYm9vE\n5s0TalqQzLT7+1NzV5+KU+3I68GANzHpquLklL2JJo/4l/r7K3tZDgJ0wyCR6OqTHB5vLinWtR+d\nFCn+5okAYWGh9l6ZylPCKUGUiCZWS1bSkxxmLev4l/r706ywfSxOtSOvBwPexOTe+4ijON/PcpRT\n3G0N4kisPOimQ+VEmW4YFPehBNGbOP+Rs/zPH3dbOkDlTwm62JQgpKM4ndxZdozH3ZY670Xai5sg\nNMxVRERCqZN6gMTp5M6yYzzuttR5L5IOXUk9YOJcCZvl1bNxt6UrfEWWpum+RSLoJZEMWhIatP3t\nZ3ETRO4dz908UCe19KCX0U6DNlJq0Pa336FRTDKIuhle2ctop0EbKTVo+9vv4iYIjWKS0tLkhRJG\nEwcmKE5WyeuBahDSoNuzXDUxRVfW/S1r3GlDk/WJtNfLBHdFmhwvC2XdX00cmCyNYpLSqjcx1Q4I\ntesf7ryzHAcySUfZbwSVFt0wqI8k0YY6CO2w9bPc0dHdjI7uVnKQgb7HehpUgyiYJM6KdWYtg0zX\nbyymC+X6RBJV5OZ1zAJTjIw8za23/pn+s4gMIDUxSYhZaknivRw58jENAxWRrihBFEwSbajH1zEF\n1Ed01Jqc+vXuX4PQ5yKSNSWIgkmi47W+jpGRp1OKslh0wZxISuJcPNHLAzgD+DrwHeDbwFXB+yPA\nHHAI2AOsCPluQpeNlEMStxdN86KhotxFTNNCiLRHiS6UOwp8yN0fNLOXA/vMbA64HJhz9xvMbBuw\nPXgMpNaRSHv3TnRdm0jzYqck4hORgouTVZJ8AHcBbwEOAquD99YAB0OWTTSrFlnRz4qLFF83NaWi\n1HpEskQZJ+szs7OACnA/teRwOPjoMLA6p7AKa9++hxJtWy9bx+7OnTtZufIcVq48h507dx57P2q/\nzVJ9FWUrB5HMxMkqSTyAlwP7gEuD18+1fH4k5DuJZtUiaz0rhlUOk4n1I/TaP5H1pGjT09MOpzSU\nxyk+PT3d1TrCaj2VygZN7iZ9jxL1QWBmJwJ3AF9y97uCtw+b2Rp3f9LMTgeeCvvu1NTUsefVapVq\ntZpytPmonxVfdtn7OXLkNOC/AOMsLFyQyORjvU5qlvVkbjfd9EXgMxy/gBBuuukP2bFjR0/r/cEP\nntTkbtJ35ufnmZ+f73k9mScIMzPgL4CH3f3TDR/tpva/tP6/9a6QrzcliH43Pj7O61//uuCq6OId\nsMbHx0t1IJ2cvIK9eydYWKi9Hh7exi/+4nkcOZJvXCJJaz15/vjHPx5vRXGqHb08gDcCLwEPAvuD\nx8XUhrneg4a5NmltyhkaWuGVysaeO1nznDc/Tkdxt01MS22j9X3dP0AGAbrlaP+qH9QqlQ0+NHRa\nYgezPEb09HJAnp6e9pGRdT4ysq5jcuhmGxrZJP1OCaIA0j7Q5DG0NOl9Wmofoh78e9mGyKCKmyB0\nR7mE9OOFY1nt0/e/f5C5uXuodULDtddeBdBzB7SI9ChOVsnrQYFrEFmctWbdXr54nyZ9ZGRdT7WJ\nsH04+eQzF5XdyMi62HGXpV9BTVvlVbbfDjUx5SurZo0s/zCb92kmuBaj94Nu6z6MjKxLNEGEbaNo\nypLEZLEy/nZKEDkr26igqOs9vk/rU0uASVwEVzbqJymvMv52cROE+iASkvWFY3Vp9hM07tO+fU+n\ndr1Ava/hppv+EIAPf/hq9T+IFEGcrJLXgwLXIPKSZdNWlBpS0Zt2iqKMzRRSU8bfDtUgJE1RakhL\n1WYA3US+xfj4ODt2XNlQa7pS5VISebUW5CJOVsnrgWoQixTpbEaT4UVXpN9N+h9lnO673+QxbXQS\ntyhNU/NkeM33xS7qNNtZxNU8WWK0+4UXtbyKTGXWozhZJa8HBa5B6IwwvAwqlY2hfSRFLa+s4uq2\n76io5VVkKrPj0DDXfJVp6FuaHclRJ8MrankVrdM/67j6icrsuLgJQp3UAybt6TPCpgAP69Dr1JzS\n7waqo1PKK05WyetBgWsQZanOFuWsqqjlpbj6h8rsONTElL8yXANQlAThXtzyUlz9Q2VWEzdBWO27\n5WBmXqZ4i6i1iWl4eFvhRj6JSLLMDHe3rr9XpgOuEkQyZmdndeGayABRghARkVBxE4QulBMRkVBK\nECIiEkoJQkREQilBiIhIKCUIEREJpQQhIiKhlCBERCSUEoSIiIRSghARkVBKECIiEqpQCcLMLjaz\ng2b2XTPblnc8IiKDrDAJwsxOAP4UuBg4H3inmb0636jimZ+fzzuESBRnshRnssoQZxli7EVhEgRw\nEfCouz/m7keBvwQuyTmmWMryR6M4k6U4k1WGOMsQYy+KlCBeBTze8PqJ4D0REclBkRKE5vEWESmQ\nwtwPwszWA1PufnHw+hrgJXf/ZMMyxQhWRKRkSn3DIDNbDvxv4NeBHwMPAO9090dyDUxEZEAtzzuA\nOnf/uZl9AJgFTgD+QslBRCQ/halBiIhIsRSpk3oRMxsxszkzO2Rme8xsxRLLfcHMDpvZgYzj63hh\nn5l9Jvj8ITOrZBlfQwxt4zSz88zsPjP7mZlN5hFjEEenOH8nKMdvmdn/MLPXFjTOS4I495vZPjN7\nc9FibFjuDWb2czPbkmV8DdvvVJZVM/tJUJb7zezaIsYZLFMNYvy2mc1nHGI9hk7l+ZGGsjwQ/Pah\nx1UA3L2wD+AG4Org+Tbg+iWW+zWgAhzIMLYTgEeBs4ATgQeBV7cs8xvA3cHzXwW+kUMZRonzNOBf\nAdPAZE6/dZQ4/zXwiuD5xQUuz5c1PL+A2vU9hYqxYbn/BvwtsLWgZVkFdufxN9llnCuA7wBrg9er\nihhny/K/BdzTbp2FrkEAm4BdwfNdwKVhC7n7vcBzWQUViHJh37H43f1+YIWZrc42zM5xuvvT7v4P\nwNGMY2sUJc773P0nwcv7gbUZxwjR4vxpw8uXA89kGB9Ev+j0SuCvgaezDK5B1Di7Hn2TsChxXgbc\n4e5PALh71r85dH+x8WXAbe1WWPQEsdrdDwfPDwNZH1zbiXJhX9gyWR/UynIBYrdxvge4O9WIwkWK\n08wuNbNHgL8DrsootrqOMZrZq6gdPP5T8FYenZFRytKBfxM02d1tZudnFt1xUeL8ZWDEzL5uZv9g\nZu/KLLrjIv8fMrOTgHHgjnYrzH0Uk5nNAWtCPtrR+MLdvWDXQUSNpfXsJ+t9KFKZtRM5TjN7E/Bu\nYEN64SwpUpzufhdwl5n9GvAl4F+mGlXL5iMs82lge/D/ysjnLD1KnN8EznD3/2tmbwXuAs5NN6xF\nosR5InAhtWH6JwH3mdk33P27qUbWrJv/678N7HX359stlHuCcPfRpT4LOp7XuPuTZnY68FSGoXXy\nI+CMhtdnUMvY7ZZZG7yXpShxFkGkOIOO6T8HLnb3rJsVocvydPd7zWy5ma1092dTj64mSoyvB/6y\nlhtYBbzVzI66++5sQgQixOnuLzQ8/zsz+5yZjbj7kYxihGjl+TjwjLsvAAtm9t+B1wFZJohu/jbf\nQYfmJaAUndTbgufbWaKTOvj8LLLtpF4OfC/Y7hCdO6nXk0+nasc4G5adIr9O6ijleSa1Trj1ecTY\nRZzrOD6E/ELge0WLsWX5LwJbClqWqxvK8iLgsYLGeR5wD7WO4pOAA8D5RYszWO4VwLPAcMd1Zl3Y\nXe7wSFDoh4A9wIrg/VcCX21Y7jZqV1//P2qZ/PKM4nsrtau/HwWuCd77PeD3Gpb50+Dzh4ALcyrH\ntnFSa+J7HPgJtc7+HwIvL2Ccnw/+sPcHjwcKWp5XA98OYrwXeEPRYmxZNpcEEbEs3x+U5YPA/ySn\nk4OI/9c/Qm0k0wHgqgLHOQHcGmV9ulBORERCFX0Uk4iI5EQJQkREQilBiIhIKCUIEREJpQQhIiKh\nlCBERCSUEoQMPDNba2Z/E0wr/6iZfdrMTuzwnY+2vP7n4N9XmtlfpRmvSFZ0HYQMtGAeovuBP3P3\nXWa2DLgZOOLuV7f53gvufvJSr7vY/nJ3/3mc2EXSphqEDLo3AwvuXp+W/SXgQ8C7zez3zeyz9QXN\n7G/NbKOZXQ8MBzdd+VLjyszsrPqNq8zsBDP7lJk9EMxGekXwftXM7jWzvwG+bWYnmdlXzezB4CYu\nb8to30Xayn2yPpGcvQbY1/iGu79gZj9k8f8Pr33s283s/e7e6Q6B7wGed/eLzOwXgL1mtif4rAK8\nxt1/YGZbgR+5+28CmNkpve6USBJUg5BBl2Yb6xjw78xsP/ANanOLnRN89oC7/yB4/i1g1MyuN7M3\nuvs/pRiTSGRKEDLoHqY29fUxwRn8mcDzNP8f+Rcx1v8Bd68Ej3Xufk/w/rG7znntngEVapO8TZvZ\nx2JsRyRxShAy0Nz9a8BJ9TuAmdkJwI3UZjj9PvArVnMGtemm646aWacm2lngffXlzOzc4E5eTYJ7\nnfzM3b8M/BG1KcJFcqc+CBHYDHwuOHNfBnwV+Ki7HzWzf6RWy3iE5r6Km4Fvmdk+d38XzU1V9eef\npzY3/zeD0VJPBdvyluUvAD5lZi8BLwK/n/D+icSiYa4iIhJKTUwiIhJKCUJEREIpQYiISCglCBER\nCaUEISIioZQgREQklBKEiIiEUoIQEZFQ/x8EKwOPDQIAnAAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x7efedacd46d0>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "corr, pval = pearsonr(df_global['Outliers'], df_global['N_Country'])\n",
    "print 'correlation', corr\n",
    "print 'p-value', pval\n",
    "\n",
    "plt.scatter(df_global['Outliers'], df_global['N_Country'])\n",
    "plt.xlabel('Outliers');\n",
    "plt.ylabel('N');"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}