m@94
|
1 {
|
m@94
|
2 "cells": [
|
m@94
|
3 {
|
m@94
|
4 "cell_type": "code",
|
m@94
|
5 "execution_count": 1,
|
m@94
|
6 "metadata": {
|
m@94
|
7 "collapsed": true
|
m@94
|
8 },
|
m@94
|
9 "outputs": [],
|
m@94
|
10 "source": [
|
m@94
|
11 "import numpy as np\n",
|
m@94
|
12 "import pickle\n",
|
m@94
|
13 "from scipy.stats import pearsonr\n",
|
m@94
|
14 "import sys\n",
|
m@94
|
15 "\n",
|
m@94
|
16 "%matplotlib inline\n",
|
m@94
|
17 "import matplotlib.pyplot as plt\n",
|
m@94
|
18 "\n",
|
m@94
|
19 "%load_ext autoreload\n",
|
m@94
|
20 "%autoreload 2\n",
|
m@94
|
21 "\n",
|
m@94
|
22 "sys.path.append('../')\n",
|
m@94
|
23 "import scripts.outliers as outliers"
|
m@94
|
24 ]
|
m@94
|
25 },
|
m@94
|
26 {
|
m@94
|
27 "cell_type": "code",
|
m@94
|
28 "execution_count": 5,
|
m@94
|
29 "metadata": {},
|
m@94
|
30 "outputs": [
|
m@94
|
31 {
|
m@94
|
32 "name": "stdout",
|
m@94
|
33 "output_type": "stream",
|
m@94
|
34 "text": [
|
m@94
|
35 "Antigua and Barbuda\n",
|
m@94
|
36 "Australia\n",
|
m@94
|
37 "Cuba\n",
|
m@94
|
38 "Fiji\n",
|
m@94
|
39 "French Polynesia\n",
|
m@94
|
40 "Grenada\n",
|
m@94
|
41 "Iceland\n",
|
m@94
|
42 "Jamaica\n",
|
m@94
|
43 "Japan\n",
|
m@94
|
44 "Kiribati\n",
|
m@94
|
45 "Malta\n",
|
m@94
|
46 "New Zealand\n",
|
m@94
|
47 "Philippines\n",
|
m@94
|
48 "Puerto Rico\n",
|
m@94
|
49 "Republic of Serbia\n",
|
m@94
|
50 "Saint Lucia\n",
|
m@94
|
51 "Samoa\n",
|
m@94
|
52 "Solomon Islands\n",
|
m@94
|
53 "South Korea\n",
|
m@94
|
54 "The Bahamas\n",
|
m@94
|
55 "Trinidad and Tobago\n"
|
m@94
|
56 ]
|
m@94
|
57 }
|
m@94
|
58 ],
|
m@94
|
59 "source": [
|
m@94
|
60 "DATA_FILE = '../data/lda_data_8.pickle'\n",
|
m@94
|
61 "METADATA_FILE = '../data/metadata.csv'\n",
|
m@94
|
62 "dataset, ddf, w_dict = outliers.load_data(DATA_FILE, METADATA_FILE)\n",
|
m@94
|
63 "X_list, Y, Yaudio = dataset\n",
|
m@94
|
64 "X = np.concatenate(X_list, axis=1)"
|
m@94
|
65 ]
|
m@94
|
66 },
|
m@94
|
67 {
|
m@94
|
68 "cell_type": "code",
|
m@94
|
69 "execution_count": 6,
|
m@94
|
70 "metadata": {},
|
m@94
|
71 "outputs": [
|
m@94
|
72 {
|
m@94
|
73 "data": {
|
m@94
|
74 "text/html": [
|
m@94
|
75 "<div>\n",
|
m@94
|
76 "<table border=\"1\" class=\"dataframe\">\n",
|
m@94
|
77 " <thead>\n",
|
m@94
|
78 " <tr style=\"text-align: right;\">\n",
|
m@94
|
79 " <th></th>\n",
|
m@94
|
80 " <th>Country</th>\n",
|
m@94
|
81 " <th>Outliers</th>\n",
|
m@94
|
82 " <th>N_Country</th>\n",
|
m@94
|
83 " <th>N_Outliers</th>\n",
|
m@94
|
84 " </tr>\n",
|
m@94
|
85 " </thead>\n",
|
m@94
|
86 " <tbody>\n",
|
m@94
|
87 " <tr>\n",
|
m@94
|
88 " <th>0</th>\n",
|
m@94
|
89 " <td>Canada</td>\n",
|
m@94
|
90 " <td>0.050000</td>\n",
|
m@94
|
91 " <td>100</td>\n",
|
m@94
|
92 " <td>5</td>\n",
|
m@94
|
93 " </tr>\n",
|
m@94
|
94 " <tr>\n",
|
m@94
|
95 " <th>1</th>\n",
|
m@94
|
96 " <td>Lithuania</td>\n",
|
m@94
|
97 " <td>0.000000</td>\n",
|
m@94
|
98 " <td>47</td>\n",
|
m@94
|
99 " <td>0</td>\n",
|
m@94
|
100 " </tr>\n",
|
m@94
|
101 " <tr>\n",
|
m@94
|
102 " <th>2</th>\n",
|
m@94
|
103 " <td>Cambodia</td>\n",
|
m@94
|
104 " <td>0.210526</td>\n",
|
m@94
|
105 " <td>19</td>\n",
|
m@94
|
106 " <td>4</td>\n",
|
m@94
|
107 " </tr>\n",
|
m@94
|
108 " <tr>\n",
|
m@94
|
109 " <th>3</th>\n",
|
m@94
|
110 " <td>Ethiopia</td>\n",
|
m@94
|
111 " <td>0.257143</td>\n",
|
m@94
|
112 " <td>35</td>\n",
|
m@94
|
113 " <td>9</td>\n",
|
m@94
|
114 " </tr>\n",
|
m@94
|
115 " <tr>\n",
|
m@94
|
116 " <th>4</th>\n",
|
m@94
|
117 " <td>Swaziland</td>\n",
|
m@94
|
118 " <td>0.163265</td>\n",
|
m@94
|
119 " <td>98</td>\n",
|
m@94
|
120 " <td>16</td>\n",
|
m@94
|
121 " </tr>\n",
|
m@94
|
122 " </tbody>\n",
|
m@94
|
123 "</table>\n",
|
m@94
|
124 "</div>"
|
m@94
|
125 ],
|
m@94
|
126 "text/plain": [
|
m@94
|
127 " Country Outliers N_Country N_Outliers\n",
|
m@94
|
128 "0 Canada 0.050000 100 5\n",
|
m@94
|
129 "1 Lithuania 0.000000 47 0\n",
|
m@94
|
130 "2 Cambodia 0.210526 19 4\n",
|
m@94
|
131 "3 Ethiopia 0.257143 35 9\n",
|
m@94
|
132 "4 Swaziland 0.163265 98 16"
|
m@94
|
133 ]
|
m@94
|
134 },
|
m@94
|
135 "execution_count": 6,
|
m@94
|
136 "metadata": {},
|
m@94
|
137 "output_type": "execute_result"
|
m@94
|
138 }
|
m@94
|
139 ],
|
m@94
|
140 "source": [
|
m@94
|
141 "df_global, threshold, MD = outliers.get_outliers_df(X, Y, chi2thr=0.999)\n",
|
m@94
|
142 "df_global.head()"
|
m@94
|
143 ]
|
m@94
|
144 },
|
m@94
|
145 {
|
m@94
|
146 "cell_type": "markdown",
|
m@94
|
147 "metadata": {},
|
m@94
|
148 "source": [
|
m@94
|
149 "## Pearson correlation between percentage of outliers and number of samples per country"
|
m@94
|
150 ]
|
m@94
|
151 },
|
m@94
|
152 {
|
m@94
|
153 "cell_type": "code",
|
m@94
|
154 "execution_count": 10,
|
m@94
|
155 "metadata": {},
|
m@94
|
156 "outputs": [
|
m@94
|
157 {
|
m@94
|
158 "name": "stdout",
|
m@94
|
159 "output_type": "stream",
|
m@94
|
160 "text": [
|
m@94
|
161 "correlation -0.0102335874359\n",
|
m@94
|
162 "p-value 0.905523601988\n"
|
m@94
|
163 ]
|
m@94
|
164 },
|
m@94
|
165 {
|
m@94
|
166 "data": {
|
m@94
|
167 "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEPCAYAAABY9lNGAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAH25JREFUeJzt3X2UXHWd5/H3N4SebRQMnXASNDAwYVjEg07hymQ3zknp\n2N04D4Ek5/jArNuLzjKOCkdthwSDh3an40EcGEdn3LOMo+S4whxmWJg4Mt1pXHuW7CLsxIBRyEZ0\nUNAlPAQcxm2XePjuH3Urqaq+XXXr1n2s+rzOqZN6uHXv9/6qc7/393B/19wdERGRVsvyDkBERIpJ\nCUJEREIpQYiISCglCBERCaUEISIioZQgREQkVGoJwsy+YGaHzexAw3ufMrNHzOwhM/uvZvaKhs+u\nMbPvmtlBMxtLKy4REYkmzRrEF4GLW97bA7zG3V8HHAKuATCz84G3A+cH3/mcmal2IyKSo9QOwu5+\nL/Bcy3tz7v5S8PJ+YG3w/BLgNnc/6u6PAY8CF6UVm4iIdJbnWfq7gbuD568Enmj47AngVZlHJCIi\nx+SSIMxsB/Ciu9/aZjHNASIikqPlWW/QzP498BvArze8/SPgjIbXa4P3Wr+rpCEiEoO7W7ffybQG\nYWYXA38AXOLuP2v4aDfwDjMbMrOzgV8GHghbh7sX/nHdddflHoPiVJyKUzHWH3GlVoMws9uAjcAq\nM3scuI7aqKUhYM7MAO5z9/e5+8NmdjvwMPBz4H3ey16JiEjPUksQ7v7OkLe/0Gb5TwCfSCseERHp\njq41SEG1Ws07hEgUZ7IUZ7LKEGcZYuyFlaklx8zU8iQi0iUzw4veSS0iIuWhBCEiIqGUIEREJJQS\nhIiIhFKCEBGRUEoQIiISSglCRERCKUGIiEgoJQgREQmlBCEiIqGUIEREJJQShIiIhFKCEBGRUEoQ\nIiISSglCRERCKUGIiEgoJQgREQmlBCEiIqGUIEREJJQShIiIhFKCEBGRUEoQIiISSglCRERCKUGI\niEgoJQgREQmlBCEiIqFSSxBm9gUzO2xmBxreGzGzOTM7ZGZ7zGxFw2fXmNl3zeygmY2lFZeIiEST\nZg3ii8DFLe9tB+bc/Vzga8FrzOx84O3A+cF3Pmdmqt2IiOQotYOwu98LPNfy9iZgV/B8F3Bp8PwS\n4DZ3P+rujwGPAhelFVuZzM7OMja2lbGxrczOzsZapvXzTq+jbrfRzp07WbnyHFauPIedO3dG2kav\ncXYbYxJll8Q2kihvkUy4e2oP4CzgQMPr5xqeW/018Fngdxo++zywNWR9PkhmZmZ8eHi1wy0Ot/jw\n8GqfmZnpapnWz4eGTvOhoRVLvh4eXu3T09Mdt9toenra4ZRjy8Mpvnz5y9puo9N+DA2t8KGh09rE\n3fx5pxjjlG+U8u92G2Fxd1veIt0Kjp3dH8PjfCnyytskiOD1EV86QWwJWV/iBVdko6NbgoOGB49b\nfHR0S1fLhH0O60NezzhscVjvJ598RsftNhoZWReyjbUO0w7rgudrm7ZRqWxYYj/qy6ztEPf6rmKM\nU75Ryr/7bSyOO6z8ut2OSDtxE8TylCsorQ6b2Rp3f9LMTgeeCt7/EXBGw3Jrg/cWmZqaOva8Wq1S\nrVbTibQknnnm2bbvhX0O/9zy+nlgAvgkAC+88KEEIvspcAPwmeD1VdS6mf4EgIce+hCzs7OMj483\nfOcAsC2I48cJxCAymObn55mfn+99RXGyStQHi2sQNwDbgufbgeuD5+cDDwJDwNnA9wALWV8aybWw\njjdPTDqs92XLVvr09HTTMpXKBodVDc07q5rOzsM+Nzu5qenG7NSWM9hJX7bs1GPLxGliOv566ZpL\npbKxaV+XLVvZ8J2ZpriTbmKamZnxSmVD034uW3aqVyobjq0nShPU6OgWHx3dsuS21cQkRUDRmpiA\n26idBr4IPA5cDowA9wCHgD3AioblP0qtc/ogML7EOlMqvuKanp5ue7CuNWFMBs0yteeLm0maP69U\nNjYd2CqVjYsO5pXKBq9UNvjIyDqvVDZ2PGBNTEw4nBokgcngeaemrZGmhLc4jkkfGVl3LM7GA/L0\n9HRX8TVqPmhPutmpbjYSxN1cxq1JoP66UtmwZIJa6jtLvQ77jkiSCpcg0ngMYoLo1A6eREdr2DLd\nntUujnMypFZxUlNNpl5TiXrG3s0+dVem0fozmrcb/p0kOrZFkqYE0aeidJR2OvuM2hTSuMxS211q\nXWHLL1t2stc6m9d5rcN6MkgM9U7x8ITXKdZuOo/D1hc3QTR/LzyGJDq221FNQ+JQguhTeZ2Rhh3o\nKpWNS8YSFue6dReENDON9HwAjXoQXqrsovQLhJVx83ab+0jq30kzQah2InEpQfSxPM4aww5GtQ7v\n9s1drW3trdcyTExMdNUBHjW2zgf04/HW+11q/RcbluwX6LTdoaEVx/pzum0miyPt2kmaVPPJlxKE\nJC5qs1M361jqvV5jC9NtLSip7aZ1MCxrglDNJ39KEJK6sv1Hj1MLKrKylX9dWRNbP4mbIDQhnkQ2\nPj7OnXfuYnR0N6Oju7nzzl0tF7oVS1i8q1atzjus2MpW/lJ+Vksu5WBmXqZ4pXhmZ2fZvHmChYXa\nVePDw9t0oE2Zyjx/Zoa7W9ffK9MBVwlCkjA7O8uNN94MwOTkFTpQZUBlni8lCMldlINA3geKMsQo\nkrS4CSL3juduHqiTurCSmJpcMYqkA41ikjwlMTW5Yhxcuk4iXXEThEYxiaRAd4iLrt6JPTe3ibm5\nTWzePFHKMuvL3zxOVsnrgWoQhVWG5pusYsx7P8umH2ptRf/NUROT5C3Pq4yjSirGdsv0wwEvS/1Q\nXkXfh7gJIus7ykkfGx8f7zjiJ8oyaUoixtZx/Xv3Tmhcfw8mJ69g794JFhZqr4eHtzE5uSvfoKQm\nTlbJ60EJaxB5nzFL8nq9R4csVvb/J0X/zVENonh0pjmY6lNiHL+WQr95J3nXLHvVr7+5LpRL0djY\nVubmNgETwTu1eXT27Lkjz7C6MsgXjS2175o6Qsom7oVyqkHIkga5BtRu3/v1bFGklWoQKSr7mWY/\n1IDiGuR9l/6jGkQB6UxTREotTs92Xg9KOIopC2mNACn6yIw0Dcq+l330kESDLpQbTGkfyAb5ANLv\n+z4oSVDiJwj1QZSc2solLv3tDI64fRCarE9EREKpk7rkNE2BxKW/HelETUx9YJAvZpPe6G9nMOiW\noyIiEkp9ECIikqhcEoSZXWNm3zGzA2Z2q5n9gpmNmNmcmR0ysz1mtiKP2EREpCbzBGFmZwH/AbjQ\n3S8ATgDeAWwH5tz9XOBrwWsREclJHjWIfwKOAieZ2XLgJODHwCagPoRiF3BpDrGJiEgg8wTh7keA\nG4EfUksMz7v7HLDa3Q8Hix0GVmcdm4iIHJf5dRBmtg74IHAW8BPgr8zs3zYu4+5uZqHDlaampo49\nr1arVKvVtEIVESml+fl55ufne15P5sNczeztwKi7/27w+l3AeuDNwJvc/UkzOx34uruf1/JdDXMV\nEelSmYa5HgTWm9mwmRnwFuBh4CscnxRmArgrh9hERCSQy4VyZnY1tSTwEvBN4HeBk4HbgTOBx4C3\nufvzLd9TDUJEpEu6kloATZ0gIospQUjpb3EqIulQghDN7y8iocrUSS0iIiWg+0H0Ec3vLyJJUhNT\nn1EntYi0Uh+EiIiEUh+EiIgkSglCRERCKUGIiEgoJQgREQmlBCEiIqGUIOSY2dlZxsa2Mja2ldnZ\n2bzDKRyVjwwaDXMVQPM4daLykTLTdRDSE83j1J7KR8pM10H0GTVniEjeNBdTAbU2Z+zdO5F6c4bm\ncWpP5SODSE1MOQubOymv5gzN49SeykfKKm4Tk2oQOVqqppCX8fFxHfTaUPnIoFGCyNGNN94cJIda\nTWFhofaemjNEpAiUIApofHycO+/c1dCcoeGUIpI99UHkSGPrRSQLug6ipNTxKSJpU4IQyYESvJSB\nEoRIxtREKGWhBCGSMU2/IWWhqTak75Vh+pFnnjlc+BhFolINQkqhiM05rTENDX0QOJEXX/xUYWIU\nATUxSZ8ranNOYyf1M888y/79l1O0GEVK1cRkZivM7K/N7BEze9jMftXMRsxszswOmdkeM1uRR2wi\n3RgfH2fPnjvYs+cOVq1amXc4IonKqw/iT4C73f3VwGuBg8B2YM7dzwW+FrwWAWpDSIeHtwG7gF3B\n9CNX5B1WkzLEKNKNzJuYzOwVwH53/6WW9w8CG939sJmtAebd/byWZdTENMDKcM1BGWKUwZN4H4SZ\nXbfEdxzA3f9jtxsL1vsrwH8GHgZeB+wDPgg84e6nBssYcKT+uuG7ShAFpoOjSDGlMd33TwmSQYOX\nAe8BVgGxEkSwzQuBD7j7/zKzT9PSnOTubmahmWBqaurY82q1SrVajRmGJCmPmxyJSLj5+Xnm5+d7\nXk+kJiYzOwW4ilpyuB240d2firXBWvPRfe5+dvD6jcA1wC8Bb3L3J83sdODramIqj7RHGal2IhJf\nKqOYzGylmU0DDwEnAhe6+7a4yQHA3Z8EHjezc4O33gJ8B/gKx48uE8Bdcbch/aVeO5mb28Tc3CY2\nb57QRWgiGViyicnM/gjYDNwMvNbdX0hwu1cCXzazIeB7wOXACcDtZvYe4DHgbQluT1KW5k2Olrqx\nkmoRIulq1wfxYeBF4Frg2lq/8THu7qfE3ai7PwS8IeSjt8Rdp+RLNzkS6T+6kloKr4jTbIiUiaba\nkL6mTmqR+JQgREQkVKnmYhIZBGWYnlykHdUgRFKgfhMpEtUgRNrI+my+eWhuLVHU+1Dyjk0kqnbD\nXEX6QpGnASlybCKqQUjfaz6bX8PCwtlcdtn7Uz1bjzr1dzc1DZGsKUHIAJmldiB+L0eOfCzVKTvq\nFw6Oju5mdHS3agVSSuqklkX67ZqD4804ZwPvpUi3BFVntmRBndSSiH6cGK9+Nj8y8nTeoSyimoYU\nmWoQ0iTtabuz1lgb2rjxQnbu/GzT2fqOHVfy93//TaA/akv9oN9qsEWQxg2DREpt8QihekLYDcDG\njVc2JQyNIMqfRnUVjLuX5lELV9I0MzPjw8OrHW5xuMWHh1f7zMxM3mHFMjq6JdgPDx63+Ojolsif\nS/b0m6QjOHZ2fcxVDUKaaNpuEalTgpBFxsfH+yIpdLqJUZo3OZJ49JsUizqpEzTInWtJ7Hsa5ddp\nnYP8mxWVfpPkxe2kzr1foZsHBe6D6Ke2+24lse+DXH4iaSNmH0TuB/2ugi1wgihz59rMzIyPjm7x\n0dEtsQ7KSex7mctPpOjiJgj1QQw4DSsUkSXFySp5PShwDaKsTSRJnLmriUmk2FANIl+DPDw0iX0f\n5PITKSqNYhpwmixOpP/FHcWkBCEaVijS55QgpLSSTlBR1qekKINE10FIKSXdOR1lfeoQl0GDroMY\nXL1ex5CnpK9/iLI+XXMhgyZugtAoppLTdQwikpo4WSWJB3ACsB/4SvB6BJgDDgF7gBUh30k8s+Yl\nqbP+tM6Gs6qVqIkpf2WugUo0lK2JCfgw8GVgd/D6BuDq4Pk24PqQ7yRdbrlI8gCVRoJojW/ZslO9\nUtmQ2sEj6QNUlPUN4kExbJ+VLAdDqRIEsBa4B3hTQw3iILA6eL4GOBjyvcQLLg9JHtTT+A8eFh+s\n18GjxJb6O1F/zGCImyCWpdd41dYfA38AvNTw3mp3Pxw8PwyszjyqEsrupvevZGHhk8eGhmZpdnaW\nsbGtjI1tZXZ2NvPt94Mbb7w56KeaACZy+y2lXDLvpDaz3wKecvf9ZlYNW8bd3cxCL3iYmpo69rxa\nrVKthq6i0JK+KUrSN/hpja/W4rcLeDKxbUSlTvh06QY9/Wl+fp75+fneVxSn2tHLA/gE8Djwj8D/\nAX4KfIlaE9OaYJnT6eMmJvfit4HPzMx4pbLRly1b6TCZW/u0mkCS0a4psuh/i9I7YjYx5XoltZlt\nBD7i7r9tZjcAz7r7J81sO7VRTNtblvc84x1EeV9xPDa2lbm5TdSaRgBqzWl79tyRaRz9IO/fUvJT\nyqk2ggQx6e6bzGwEuB04E3gMeJu7P9+yvBJEBN0eCIp84NBkgiK9K2WC6JYSRGfdHlDLcAAucgIT\nKQMlCAG6b5JRE45I/4ubIPIa5ioiIgWnBNFnJievYHi4Pix1VzBs8YrElu8nZb++ouzxSwnEGfqU\n14M+Guaapm6HLQ7iMMeyTzFR9vglW5RxmGu31AehDtuklL3vJa/49fdXTuqDGAD1EUdzc5uYm9vE\n5s0TalqQzLT7+1NzV5+KU+3I68GANzHpquLklL2JJo/4l/r7K3tZDgJ0wyCR6OqTHB5vLinWtR+d\nFCn+5okAYWGh9l6ZylPCKUGUiCZWS1bSkxxmLev4l/r706ywfSxOtSOvBwPexOTe+4ijON/PcpRT\n3G0N4kisPOimQ+VEmW4YFPehBNGbOP+Rs/zPH3dbOkDlTwm62JQgpKM4ndxZdozH3ZY670Xai5sg\nNMxVRERCqZN6gMTp5M6yYzzuttR5L5IOXUk9YOJcCZvl1bNxt6UrfEWWpum+RSLoJZEMWhIatP3t\nZ3ETRO4dz908UCe19KCX0U6DNlJq0Pa336FRTDKIuhle2ctop0EbKTVo+9vv4iYIjWKS0tLkhRJG\nEwcmKE5WyeuBahDSoNuzXDUxRVfW/S1r3GlDk/WJtNfLBHdFmhwvC2XdX00cmCyNYpLSqjcx1Q4I\ntesf7ryzHAcySUfZbwSVFt0wqI8k0YY6CO2w9bPc0dHdjI7uVnKQgb7HehpUgyiYJM6KdWYtg0zX\nbyymC+X6RBJV5OZ1zAJTjIw8za23/pn+s4gMIDUxSYhZaknivRw58jENAxWRrihBFEwSbajH1zEF\n1Ed01Jqc+vXuX4PQ5yKSNSWIgkmi47W+jpGRp1OKslh0wZxISuJcPNHLAzgD+DrwHeDbwFXB+yPA\nHHAI2AOsCPluQpeNlEMStxdN86KhotxFTNNCiLRHiS6UOwp8yN0fNLOXA/vMbA64HJhz9xvMbBuw\nPXgMpNaRSHv3TnRdm0jzYqck4hORgouTVZJ8AHcBbwEOAquD99YAB0OWTTSrFlnRz4qLFF83NaWi\n1HpEskQZJ+szs7OACnA/teRwOPjoMLA6p7AKa9++hxJtWy9bx+7OnTtZufIcVq48h507dx57P2q/\nzVJ9FWUrB5HMxMkqSTyAlwP7gEuD18+1fH4k5DuJZtUiaz0rhlUOk4n1I/TaP5H1pGjT09MOpzSU\nxyk+PT3d1TrCaj2VygZN7iZ9jxL1QWBmJwJ3AF9y97uCtw+b2Rp3f9LMTgeeCvvu1NTUsefVapVq\ntZpytPmonxVfdtn7OXLkNOC/AOMsLFyQyORjvU5qlvVkbjfd9EXgMxy/gBBuuukP2bFjR0/r/cEP\nntTkbtJ35ufnmZ+f73k9mScIMzPgL4CH3f3TDR/tpva/tP6/9a6QrzcliH43Pj7O61//uuCq6OId\nsMbHx0t1IJ2cvIK9eydYWKi9Hh7exi/+4nkcOZJvXCJJaz15/vjHPx5vRXGqHb08gDcCLwEPAvuD\nx8XUhrneg4a5NmltyhkaWuGVysaeO1nznDc/Tkdxt01MS22j9X3dP0AGAbrlaP+qH9QqlQ0+NHRa\nYgezPEb09HJAnp6e9pGRdT4ysq5jcuhmGxrZJP1OCaIA0j7Q5DG0NOl9Wmofoh78e9mGyKCKmyB0\nR7mE9OOFY1nt0/e/f5C5uXuodULDtddeBdBzB7SI9ChOVsnrQYFrEFmctWbdXr54nyZ9ZGRdT7WJ\nsH04+eQzF5XdyMi62HGXpV9BTVvlVbbfDjUx5SurZo0s/zCb92kmuBaj94Nu6z6MjKxLNEGEbaNo\nypLEZLEy/nZKEDkr26igqOs9vk/rU0uASVwEVzbqJymvMv52cROE+iASkvWFY3Vp9hM07tO+fU+n\ndr1Ava/hppv+EIAPf/hq9T+IFEGcrJLXgwLXIPKSZdNWlBpS0Zt2iqKMzRRSU8bfDtUgJE1RakhL\n1WYA3US+xfj4ODt2XNlQa7pS5VISebUW5CJOVsnrgWoQixTpbEaT4UVXpN9N+h9lnO673+QxbXQS\ntyhNU/NkeM33xS7qNNtZxNU8WWK0+4UXtbyKTGXWozhZJa8HBa5B6IwwvAwqlY2hfSRFLa+s4uq2\n76io5VVkKrPj0DDXfJVp6FuaHclRJ8MrankVrdM/67j6icrsuLgJQp3UAybt6TPCpgAP69Dr1JzS\n7waqo1PKK05WyetBgWsQZanOFuWsqqjlpbj6h8rsONTElL8yXANQlAThXtzyUlz9Q2VWEzdBWO27\n5WBmXqZ4i6i1iWl4eFvhRj6JSLLMDHe3rr9XpgOuEkQyZmdndeGayABRghARkVBxE4QulBMRkVBK\nECIiEkoJQkREQilBiIhIKCUIEREJpQQhIiKhlCBERCSUEoSIiIRSghARkVBKECIiEqpQCcLMLjaz\ng2b2XTPblnc8IiKDrDAJwsxOAP4UuBg4H3inmb0636jimZ+fzzuESBRnshRnssoQZxli7EVhEgRw\nEfCouz/m7keBvwQuyTmmWMryR6M4k6U4k1WGOMsQYy+KlCBeBTze8PqJ4D0REclBkRKE5vEWESmQ\nwtwPwszWA1PufnHw+hrgJXf/ZMMyxQhWRKRkSn3DIDNbDvxv4NeBHwMPAO9090dyDUxEZEAtzzuA\nOnf/uZl9AJgFTgD+QslBRCQ/halBiIhIsRSpk3oRMxsxszkzO2Rme8xsxRLLfcHMDpvZgYzj63hh\nn5l9Jvj8ITOrZBlfQwxt4zSz88zsPjP7mZlN5hFjEEenOH8nKMdvmdn/MLPXFjTOS4I495vZPjN7\nc9FibFjuDWb2czPbkmV8DdvvVJZVM/tJUJb7zezaIsYZLFMNYvy2mc1nHGI9hk7l+ZGGsjwQ/Pah\nx1UA3L2wD+AG4Org+Tbg+iWW+zWgAhzIMLYTgEeBs4ATgQeBV7cs8xvA3cHzXwW+kUMZRonzNOBf\nAdPAZE6/dZQ4/zXwiuD5xQUuz5c1PL+A2vU9hYqxYbn/BvwtsLWgZVkFdufxN9llnCuA7wBrg9er\nihhny/K/BdzTbp2FrkEAm4BdwfNdwKVhC7n7vcBzWQUViHJh37H43f1+YIWZrc42zM5xuvvT7v4P\nwNGMY2sUJc773P0nwcv7gbUZxwjR4vxpw8uXA89kGB9Ev+j0SuCvgaezDK5B1Di7Hn2TsChxXgbc\n4e5PALh71r85dH+x8WXAbe1WWPQEsdrdDwfPDwNZH1zbiXJhX9gyWR/UynIBYrdxvge4O9WIwkWK\n08wuNbNHgL8DrsootrqOMZrZq6gdPP5T8FYenZFRytKBfxM02d1tZudnFt1xUeL8ZWDEzL5uZv9g\nZu/KLLrjIv8fMrOTgHHgjnYrzH0Uk5nNAWtCPtrR+MLdvWDXQUSNpfXsJ+t9KFKZtRM5TjN7E/Bu\nYEN64SwpUpzufhdwl5n9GvAl4F+mGlXL5iMs82lge/D/ysjnLD1KnN8EznD3/2tmbwXuAs5NN6xF\nosR5InAhtWH6JwH3mdk33P27qUbWrJv/678N7HX359stlHuCcPfRpT4LOp7XuPuTZnY68FSGoXXy\nI+CMhtdnUMvY7ZZZG7yXpShxFkGkOIOO6T8HLnb3rJsVocvydPd7zWy5ma1092dTj64mSoyvB/6y\nlhtYBbzVzI66++5sQgQixOnuLzQ8/zsz+5yZjbj7kYxihGjl+TjwjLsvAAtm9t+B1wFZJohu/jbf\nQYfmJaAUndTbgufbWaKTOvj8LLLtpF4OfC/Y7hCdO6nXk0+nasc4G5adIr9O6ijleSa1Trj1ecTY\nRZzrOD6E/ELge0WLsWX5LwJbClqWqxvK8iLgsYLGeR5wD7WO4pOAA8D5RYszWO4VwLPAcMd1Zl3Y\nXe7wSFDoh4A9wIrg/VcCX21Y7jZqV1//P2qZ/PKM4nsrtau/HwWuCd77PeD3Gpb50+Dzh4ALcyrH\ntnFSa+J7HPgJtc7+HwIvL2Ccnw/+sPcHjwcKWp5XA98OYrwXeEPRYmxZNpcEEbEs3x+U5YPA/ySn\nk4OI/9c/Qm0k0wHgqgLHOQHcGmV9ulBORERCFX0Uk4iI5EQJQkREQilBiIhIKCUIEREJpQQhIiKh\nlCBERCSUEoQMPDNba2Z/E0wr/6iZfdrMTuzwnY+2vP7n4N9XmtlfpRmvSFZ0HYQMtGAeovuBP3P3\nXWa2DLgZOOLuV7f53gvufvJSr7vY/nJ3/3mc2EXSphqEDLo3AwvuXp+W/SXgQ8C7zez3zeyz9QXN\n7G/NbKOZXQ8MBzdd+VLjyszsrPqNq8zsBDP7lJk9EMxGekXwftXM7jWzvwG+bWYnmdlXzezB4CYu\nb8to30Xayn2yPpGcvQbY1/iGu79gZj9k8f8Pr33s283s/e7e6Q6B7wGed/eLzOwXgL1mtif4rAK8\nxt1/YGZbgR+5+28CmNkpve6USBJUg5BBl2Yb6xjw78xsP/ANanOLnRN89oC7/yB4/i1g1MyuN7M3\nuvs/pRiTSGRKEDLoHqY29fUxwRn8mcDzNP8f+Rcx1v8Bd68Ej3Xufk/w/rG7znntngEVapO8TZvZ\nx2JsRyRxShAy0Nz9a8BJ9TuAmdkJwI3UZjj9PvArVnMGtemm646aWacm2lngffXlzOzc4E5eTYJ7\nnfzM3b8M/BG1KcJFcqc+CBHYDHwuOHNfBnwV+Ki7HzWzf6RWy3iE5r6Km4Fvmdk+d38XzU1V9eef\npzY3/zeD0VJPBdvyluUvAD5lZi8BLwK/n/D+icSiYa4iIhJKTUwiIhJKCUJEREIpQYiISCglCBER\nCaUEISIioZQgREQklBKEiIiEUoIQEZFQ/x8EKwOPDQIAnAAAAABJRU5ErkJggg==\n",
|
m@94
|
168 "text/plain": [
|
m@94
|
169 "<matplotlib.figure.Figure at 0x7efedacd46d0>"
|
m@94
|
170 ]
|
m@94
|
171 },
|
m@94
|
172 "metadata": {},
|
m@94
|
173 "output_type": "display_data"
|
m@94
|
174 }
|
m@94
|
175 ],
|
m@94
|
176 "source": [
|
m@94
|
177 "corr, pval = pearsonr(df_global['Outliers'], df_global['N_Country'])\n",
|
m@94
|
178 "print 'correlation', corr\n",
|
m@94
|
179 "print 'p-value', pval\n",
|
m@94
|
180 "\n",
|
m@94
|
181 "plt.scatter(df_global['Outliers'], df_global['N_Country'])\n",
|
m@94
|
182 "plt.xlabel('Outliers');\n",
|
m@94
|
183 "plt.ylabel('N');"
|
m@94
|
184 ]
|
m@94
|
185 },
|
m@94
|
186 {
|
m@94
|
187 "cell_type": "code",
|
m@94
|
188 "execution_count": null,
|
m@94
|
189 "metadata": {
|
m@94
|
190 "collapsed": true
|
m@94
|
191 },
|
m@94
|
192 "outputs": [],
|
m@94
|
193 "source": []
|
m@94
|
194 }
|
m@94
|
195 ],
|
m@94
|
196 "metadata": {
|
m@94
|
197 "kernelspec": {
|
m@94
|
198 "display_name": "Python 2",
|
m@94
|
199 "language": "python",
|
m@94
|
200 "name": "python2"
|
m@94
|
201 },
|
m@94
|
202 "language_info": {
|
m@94
|
203 "codemirror_mode": {
|
m@94
|
204 "name": "ipython",
|
m@94
|
205 "version": 2
|
m@94
|
206 },
|
m@94
|
207 "file_extension": ".py",
|
m@94
|
208 "mimetype": "text/x-python",
|
m@94
|
209 "name": "python",
|
m@94
|
210 "nbconvert_exporter": "python",
|
m@94
|
211 "pygments_lexer": "ipython2",
|
m@94
|
212 "version": "2.7.12"
|
m@94
|
213 }
|
m@94
|
214 },
|
m@94
|
215 "nbformat": 4,
|
m@94
|
216 "nbformat_minor": 2
|
m@94
|
217 }
|