changeset 63:b0e194bfb71d branch-tests

minor edits on server
author mpanteli <m.x.panteli@gmail.com>
date Thu, 21 Sep 2017 17:35:57 +0100
parents ac3fcd42e7bd
children e83ecc296669
files notebooks/sensitivity_experiment.ipynb notebooks/test_hubness.ipynb notebooks/test_music_segments.ipynb scripts/classification.py
diffstat 4 files changed, 225 insertions(+), 69 deletions(-) [+]
line wrap: on
line diff
--- a/notebooks/sensitivity_experiment.ipynb	Thu Sep 21 15:25:20 2017 +0100
+++ b/notebooks/sensitivity_experiment.ipynb	Thu Sep 21 17:35:57 2017 +0100
@@ -2,18 +2,9 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/homes/mp305/anaconda/lib/python2.7/site-packages/librosa/core/audio.py:33: UserWarning: Could not import scikits.samplerate. Falling back to scipy.signal\n",
-      "  warnings.warn('Could not import scikits.samplerate. '\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "import numpy as np\n",
     "import pandas as pd\n",
@@ -34,7 +25,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {
     "collapsed": true
    },
--- a/notebooks/test_hubness.ipynb	Thu Sep 21 15:25:20 2017 +0100
+++ b/notebooks/test_hubness.ipynb	Thu Sep 21 17:35:57 2017 +0100
@@ -22,12 +22,13 @@
     "\n",
     "sys.path.append('../')\n",
     "import scripts.outliers as outliers\n",
-    "import scripts.utils_spatial as utils_spatial"
+    "import scripts.utils_spatial as utils_spatial\n",
+    "import scripts.utils as utils"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
    "metadata": {
     "collapsed": true
    },
@@ -45,7 +46,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
@@ -54,7 +55,7 @@
        "(8200, 380)"
       ]
      },
-     "execution_count": 3,
+     "execution_count": 4,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -132,7 +133,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 9,
    "metadata": {
     "collapsed": true
    },
@@ -229,46 +230,6 @@
    ]
   },
   {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## what if we use a different distance metric?"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "1.20652171267\n"
-     ]
-    },
-    {
-     "data": {
-      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXsAAAEACAYAAABS29YJAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAFadJREFUeJzt3W2MXNd93/HvT5IJWXFdVlBBUw8oCVSCzcIAhcZEUOdh\ngiSqFLSS/MaSgwREqwYGlNqGi6QmDbRcJ0CsBLDrF4X9xnLAuA0bIoEFKoFlUaomcF6YgltSkrVm\nJQJmYbrSKm2VVIIqmKr+fTF3yeFqd2d2HnZ35n4/wGDPnvsw5+wl/3Pmf8+9N1WFJGm+XbXVDZAk\nTZ/BXpJawGAvSS1gsJekFjDYS1ILGOwlqQWGCvZJrk5yOsmjze/XJzmZ5IUkjyfZ2bfu4SQvJjmb\n5I5pNVySNLxhR/afAhaB5Un5h4CTVXUb8GTzO0n2AfcB+4A7gS8n8duDJG2xgYE4yc3ALwNfBdJU\n3w0cbcpHgXub8j3Asaq6WFXngXPAgUk2WJK0ccOMuv8d8FvA2311u6pqqSkvAbua8o3Ahb71LgA3\njdtISdJ41g32Sf4J8EpVnebyqP4K1bvfwnr3XPB+DJK0xa4ZsPwfAXcn+WXgWuC9Sb4OLCV5X1W9\nnGQ38Eqz/o+AW/q2v7mpu0ISPwAkaQRVterAe5B1R/ZV9dmquqWq9gL3A/+5qn4NOAEcbFY7CDzS\nlE8A9yfZkWQvcCvw9Br7ntvXkSNHtrwN9s/+ta1vbejfOAaN7N8Ro5ufDwHHkzwAnAc+2gTwxSTH\n6c3ceQt4sMZtoSRpbEMH+6r6C+AvmvL/Bn5xjfV+F/jdibROkjQRzoGfgk6ns9VNmCr7N7vmuW8w\n//0bR7Yiy5LE7I4kbVASahonaCVJ88FgL0ktsNHZOK2RXPlNybSTpFnmyH5dgy4OlqTZYLCXpBYw\n2EtSCxjsJakFDPaS1AIGe0lqAYO9JLWA8+w3yPn3kmaRI/uROP9e0mwx2EtSCxjsJakFDPaS1AIG\ne0lqAYO9JLXAusE+ybVJTiU5k2Qxyeeb+oUkF5Kcbl539W1zOMmLSc4muWPaHZAkDTbwsYRJrquq\nN5JcA/wl8JvALwCvVdUXV6y7D/gj4EPATcATwG1V9faK9bb9Ywl78+mX25hL8+nXqpekaZvqYwmr\n6o2muAO4Gnh1+X1XWf0e4FhVXayq88A54MAoDZMkTc7AYJ/kqiRngCXgqap6vln0iSTPJHk4yc6m\n7kbgQt/mF+iN8CVJW2iYkf3bVbUfuBn42SQd4CvAXmA/8BLwhfV2MYF2SpLGMPS9carqb5L8OfCT\nVdVdrk/yVeDR5tcfAbf0bXZzU/cOCwsLl8qdTodOpzNsUySpFbrdLt1udyL7WvcEbZIbgLeq6q+T\nvBv4FvA54PmqerlZ59PAh6rqV/pO0B7g8gnav7/ybKwnaCVp48Y5QTtoZL8bOJrkKnopn69X1ZNJ\n/jDJfnpR7wfAxwGqajHJcWAReAt4cNtHdUlqgYFTL6fypo7sJWnDpjr1UpI0+wz2ktQCPqlqinyq\nlaTtwpH91PlUK0lbz5H9BDiCl7TdObKfGEfwkrYvg70ktYDBXpJawGAvSS3gCdpN1H8i15O4kjaT\nI/tN5UlcSVvDYC9JLWCwl6QWMNhLUgsY7CWpBQz2ktQCrZp66T1sJLVVC0f2Tn+U1D4tDPaS1D7r\nBvsk1yY5leRMksUkn2/qr09yMskLSR5PsrNvm8NJXkxyNskd0+6AJGmwgQ8cT3JdVb2R5BrgL4Hf\nBO4G/mdV/X6SzwB/p6oOJdkH/BHwIeAm4Angtqp6e8U+t+SB4xt5WPhGHjg+7Lo+qFzSOKb6wPGq\neqMp7gCuBl6lF+yPNvVHgXub8j3Asaq6WFXngXPAgVEaJkmanIHBPslVSc4AS8BTVfU8sKuqlppV\nloBdTflG4ELf5hfojfAlSVto4NTLJgWzP8nfBr6V5OdXLK8k6+UkVl22sLBwqdzpdOh0OsO0d+44\nHVTSWrrdLt1udyL7Gpizv2Ll5N8A/xf4F0Cnql5OspveiP/9SQ4BVNVDzfqPAUeq6tSK/WybnP1a\nwXazcvYbOY8gqd2mlrNPcsPyTJsk7wZ+CTgNnAAONqsdBB5pyieA+5PsSLIXuBV4epSGba7tNfc+\nyRUvSRrXoDTObuBokqvofTB8vaqeTHIaOJ7kAeA88FGAqlpMchxYBN4CHtySIfxc6P9GIEnj2VAa\nZ2Jvuu3SOMOnVzYjjWNqR9Jqpjr1UpI0+wz2ktQCBntJagGDvSS1gMFeklrAYC9JLWCwl6QWMNhL\nUgsY7CWpBQz2ktQCBntJagGDvSS1gMFeklrAYC9JLWCwl6QWMNhLUgsY7CWpBQY9llDbyFoPR5ek\nQRzZz5zt9XB0SbNhYLBPckuSp5I8n+R7ST7Z1C8kuZDkdPO6q2+bw0leTHI2yR3T7MAgSS69NrK+\nJM2TYdI4F4FPV9WZJO8B/kuSk/SGl1+sqi/2r5xkH3AfsA+4CXgiyW1V9faE274B/Q/9nuS6kjQb\nBo7sq+rlqjrTlF8Hvk8viMPqEfEe4FhVXayq88A54MBkmrv9+C1A0izYUM4+yR7gduA7TdUnkjyT\n5OEkO5u6G4ELfZtd4PKHwxwyfy5p+xt6Nk6TwvkT4FNV9XqSrwC/3Sz+HeALwANrbP6OiLiwsHCp\n3Ol06HQ6wzZFklqh2+3S7XYnsq8MM30vybuAPwO+WVVfWmX5HuDRqvpgkkMAVfVQs+wx4EhVnepb\nvzZr2mAvzdKfh79crqp1l69cd+39bWTd9ddZrU3rrSupPZJQVSPljoeZjRPgYWCxP9An2d232keA\n55ryCeD+JDuS7AVuBZ4epXGSpMkYJo3zYeBXgWeTnG7qPgt8LMl+ekPNHwAfB6iqxSTHgUXgLeDB\nTRvGS5JWNVQaZ+JvahrHNI6kDRsnjePtEqbA6ZiSthtvlzAV3tJA0vZisJekFjDYS1ILGOwlqQUM\n9pLUAgZ7SWoBg70ktYDBXpJawGAvSS1gsJekFmh1sPe2BpLaotXB3lsaSGqLlgd7SWoHg70ktYDB\nXpJawGAvSS3gw0u2yLRmAvXv1ydZSVpmsB/S5INz/+MKJ235UYmS1DMwjZPkliRPJXk+yfeSfLKp\nvz7JySQvJHk8yc6+bQ4neTHJ2SR3TLMDm8enT0maXcPk7C8Cn66qfwD8FPAbST4AHAJOVtVtwJPN\n7yTZB9wH7APuBL6cxHMDkrSFBgbhqnq5qs405deB7wM3AXcDR5vVjgL3NuV7gGNVdbGqzgPngAMT\nbrckaQM2NOJOsge4HTgF7KqqpWbRErCrKd8IXOjb7AK9DwdJ0hYZ+gRtkvcAfwp8qqpeWzHro5Ks\nl9B+x7KFhYVL5U6nQ6fTGbYpktQK3W6Xbrc7kX1lmOl5Sd4F/Bnwzar6UlN3FuhU1ctJdgNPVdX7\nkxwCqKqHmvUeA45U1am+/dVmTQvsfSj1z3xZWR60fJjyJPbRK1fVO9q8/Ldaq371/q6+XNLsSkJV\njTTVbpjZOAEeBhaXA33jBHCwKR8EHumrvz/JjiR7gVuBp0dpnCRpMoZJ43wY+FXg2SSnm7rDwEPA\n8SQPAOeBjwJU1WKS48Ai8Bbw4KYN4yVJqxoqjTPxNzWNs2bZNI6ktUw1jSNJmn0Ge0lqAe+NMwd8\nvKKkQRzZzw3v3SNpbXMzsl85uvXkpCRdNmcje0e3krSaOQv2kqTVGOwlqQUM9pLUAnNzgnaeObVS\n0rgc2c8ETzxLGo/BXpJawGAvSS1gsJekFjDYS1ILGOwlqQUM9pLUAgZ7SWoBg70ktcDAYJ/ka0mW\nkjzXV7eQ5EKS083rrr5lh5O8mORskjum1fB55dWykqZhmJH9HwB3rqgr4ItVdXvz+iZAkn3AfcC+\nZpsvJ/Hbw4ZM/krZJFe8JLXPwEBcVd8GXl1l0WpR4x7gWFVdrKrzwDngwFgt1IR4ywWpzcYZdX8i\nyTNJHk6ys6m7EbjQt84F4KYx3mNkjmIl6bJR73r5FeC3m/LvAF8AHlhj3VWHkwsLC5fKnU6HTqcz\nYlPWsvy2BnxJs6nb7dLtdieyrwzzrNYke4BHq+qD6y1Lcgigqh5qlj0GHKmqUyu2qUk/I7Y3iu8P\n8MOWR91u0vsYZX/93rmP5b/xyr+Nz+eVZlMSqmqkEexIaZwku/t+/QiwPFPnBHB/kh1J9gK3Ak+P\n8h4ahnl4ScMZmMZJcgz4OeCGJD8EjgCdJPvpRZofAB8HqKrFJMeBReAt4MGJD+ElSRs2VBpn4m9q\nGmdT2mQaR5ovm57GkSTNFoO9JLWAwV6SWsBgL0ktYLCXpBYY9QpazYGVt5Nwlo40vxzZt54XZklt\nYLCXpBYw2EtSCxjsJakFDPaS1AIGe0lqAYO9JLWAwV6SWsBgL0kt4BW0c2zUB657Za00fxzZz7Vx\nro71ylppnhjsJakFDPaS1AIDg32SryVZSvJcX931SU4meSHJ40l29i07nOTFJGeT3DGthkuShjfM\nyP4PgDtX1B0CTlbVbcCTze8k2QfcB+xrtvlykol+e0hyxUsb599Oap+Bgbiqvg28uqL6buBoUz4K\n3NuU7wGOVdXFqjoPnAMOTKapV7QKTx6Ow7+f1Dajjrp3VdVSU14CdjXlG4ELfetdAG4a8T0kSRMy\n9jz7qqok6w0TV122sLBwqdzpdOh0OuM2RZLmSrfbpdvtTmRfGeaCmSR7gEer6oPN72eBTlW9nGQ3\n8FRVvT/JIYCqeqhZ7zHgSFWdWrG/GvVCnV6ueXnbXLrgZ2X98OVRt5v0Pra+TWv9Ldc7Vl6AJW2e\nJFTVSCfcRk3jnAAONuWDwCN99fcn2ZFkL3Ar8PSI76GZ4TkAabsbmMZJcgz4OeCGJD8E/i3wEHA8\nyQPAeeCjAFW1mOQ4sAi8BTw48hBekjQxQ6VxJv6mpnG2aZv6bSSNM9y6ksazFWkczSXTMdK8mvm7\nXnpx0ObwRKw02+ZgZO9odPP4t5Zm1RwEe0nSIAZ7SWoBg70ktYDBXpJawGCvgZzxJM2+mZ96qc2w\nfJHWYP0fDE7PlLYPR/aaMKdnStuRwV6SWsBgL0ktYM5eU7daHt/bL0iby5G9Nslqwdz8vrRZDPaS\n1AIGe0lqAYO9JLWAwV6SWmCs2ThJzgP/B/h/wMWqOpDkeuCPgb9H83zaqvrrMdupGeRtFqTtY9yR\nfQGdqrq9qg40dYeAk1V1G/Bk87taydk20nYxiTTOyuHb3cDRpnwUuHcC7yFJGsMkRvZPJPlukl9v\n6nZV1VJTXgJ2jfkeQC8lYFpAkkYz7hW0H66ql5L8XeBkkrP9C6uqkkzwe/zwd1+UJF02VrCvqpea\nn3+V5BvAAWApyfuq6uUku4FXVtt2YWHhUrnT6dDpdMZpiiTNnW63S7fbnci+Muo9SZJcB1xdVa8l\n+QngceBzwC8C/6uqfi/JIWBnVR1asW1t9H17KZzlkf3ytpMob5d9zFabrrzHzfD7WGs7740jDZaE\nqhopvTHOyH4X8I0mj34N8B+r6vEk3wWOJ3mAZurlGO8hSZqAkUf2Y72pI/s5aFO/yY3svRumtLZx\nRvZeQasRTXMOvfPzpUnb1vezd6qlJE3GDIzsHeVJ0rhmINhLksa1rdM4mj+m5qSt4chem8y0nLQV\nDPaS1AIGe0lqAXP2mgmDLrbyYixpfQZ7bWtXBvHLV9yufqJ3rSt819+vHwxqA9M42ubWOqFbA5YP\nu2+pHRzZa1uY1pRM0ztSjyN7bRPea0eapm0xsnf0JUnTtS2Cfc+gk2/S8Pw3JF1pGwX7fsPPqpBW\nt/F/Q8N8w/RbqGaVOXvpCsPk9z0HoNmzTUf20nSMkt4ZZhvn7Wu727Jgv7i4uFVvrVYbNUW4+nbv\nvOjL1KO2p6kE+yR3Al8Crga+WlW/t3Kd/ft/kmuv3cPFi69OownS2Ib7FrD2h8da+X3z/toKE8/Z\nJ7ka+PfAncA+4GNJPrByveuu+xCvvbbIm28uTLoJ20B3qxugsXSbn5PIza93BfDmB/lut7vp77mZ\n5r1/45jGCdoDwLmqOl9VF4H/BNwzhffZxrpb3QBtQLJyum93q5oCXG7PynaNU79s3oPhvPdvHNMI\n9jcBP+z7/UJTJ21Tkx1lv/PDY7j6K5dv9BvBoHsI9Xzuc59b9UNA828aOfuh/te8+eZzvPe9/5Qf\n//i/8+abU2iFtGXWyuMPU//Ok7xrBeWNBOu17h46zL6qaqQPhvXOUUzifIbXRWxMJt35JD8FLFTV\nnc3vh4G3+0/SJmnvX1ySxlBVI30lm0awvwb4b8AvAP8DeBr4WFV9f6JvJEka2sTTOFX1VpJ/CXyL\n3tTLhw30krS1Jj6ylyRtP5t+b5wkdyY5m+TFJJ/Z7PeftCTnkzyb5HSSp5u665OcTPJCkseT7Nzq\ndg4rydeSLCV5rq9uzf4kOdwcy7NJ7tiaVg9vjf4tJLnQHMPTSe7qWzZr/bslyVNJnk/yvSSfbOpn\n/hiu07e5OH5Jrk1yKsmZJItJPt/UT+bYVdWmveildc4Be4B3AWeAD2xmG6bQpx8A16+o+33gXzfl\nzwAPbXU7N9CfnwFuB54b1B96F82daY7lnubYXrXVfRihf0eAf7XKurPYv/cB+5vye+idP/vAPBzD\ndfo2T8fvuubnNcB3gJ+e1LHb7JH9vF5wtfLs+N3A0aZ8FLh3c5szuqr6NrDyHhZr9ece4FhVXayq\n8/T+sR3YjHaOao3+weo3tZnF/r1cVWea8uvA9+ld5zLzx3CdvsH8HL83muIOeoPjV5nQsdvsYD+P\nF1wV8ESS7yb59aZuV1UtNeUlYNfWNG1i1urPjfSO4bJZPp6fSPJMkof7vibPdP+S7KH3LeYUc3YM\n+/r2naZqLo5fkquSnKF3jJ6qqueZ0LHb7GA/j2eDP1xVtwN3Ab+R5Gf6F1bv+9bc9HuI/sxiX78C\n7AX2Ay8BX1hn3ZnoX5L3AH8KfKqqXutfNuvHsOnbn9Dr2+vM0fGrqreraj9wM/CzSX5+xfKRj91m\nB/sfAbf0/X4LV34yzZyqeqn5+VfAN+h9jVpK8j6AJLuBV7auhROxVn9WHs+bm7qZUlWvVAP4Kpe/\nCs9k/5K8i16g/3pVPdJUz8Ux7Ovbf1ju27wdP4Cq+hvgz4F/yISO3WYH++8CtybZk2QHcB9wYpPb\nMDFJrkvyt5ryTwB3AM/R69PBZrWDwCOr72FmrNWfE8D9SXYk2QvcSu8iupnS/Ada9hF6xxBmsH/p\n3R/gYWCxqr7Ut2jmj+FafZuX45fkhuUUVJJ3A78EnGZSx24LzjbfRe8s+jng8Fad9Z5QX/bSOxt+\nBvjecn+A64EngBeAx4GdW93WDfTpGL0rn39M7/zKP1uvP8Bnm2N5FvjHW93+Efr3z4E/BJ4Fnmn+\nI+2a4f79NPB282/ydPO6cx6O4Rp9u2tejh/wQeC/Nv17Fvitpn4ix86LqiSpBXzguCS1gMFeklrA\nYC9JLWCwl6QWMNhLUgsY7CWpBQz2ktQCBntJaoH/Dzx1pB+a1db3AAAAAElFTkSuQmCC\n",
-      "text/plain": [
-       "<matplotlib.figure.Figure at 0x7f7616b7dd90>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "D_cos = pairwise_distances(X, metric='cosine')\n",
-    "uniq_countries, uniq_counts = np.unique(Y, return_counts=True)\n",
-    "k = np.int(np.round(np.mean(uniq_counts)))\n",
-    "N_k = n_occurrence_from_D(D_cos, k=k)\n",
-    "print skew(N_k)\n",
-    "plt.figure()\n",
-    "plt.hist(N_k, bins=100);"
-   ]
-  },
-  {
    "cell_type": "code",
    "execution_count": 16,
    "metadata": {
@@ -534,7 +495,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
@@ -544,6 +505,13 @@
       "/homes/mp305/anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2822: DtypeWarning: Columns (0,1,2,4,5,6,7,8,10,11,12,13,14,15,16,17,19,21,22,23,24,25,26,27,29,31,35,38,39,40,41,44,45,48,55,56,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,93,95,96) have mixed types. Specify dtype option on import or set low_memory=False.\n",
       "  if self.run_code(code, result):\n"
      ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
     }
    ],
    "source": [
@@ -601,17 +569,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/Users/mariapanteli/anaconda/lib/python2.7/site-packages/pandas/core/indexing.py:121: SettingWithCopyWarning: \n",
+      "/homes/mp305/anaconda/lib/python2.7/site-packages/pandas/core/indexing.py:115: SettingWithCopyWarning: \n",
       "A value is trying to be set on a copy of a slice from a DataFrame\n",
       "\n",
-      "See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
+      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
       "  self._setitem_with_indexer(indexer, value)\n"
      ]
     }
@@ -650,6 +618,128 @@
    ]
   },
   {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## what if we use a different distance metric?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1.20652171267\n"
+     ]
+    },
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXsAAAEACAYAAABS29YJAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAFadJREFUeJzt3W2MXNd93/HvT5IJWXFdVlBBUw8oCVSCzcIAhcZEUOdh\ngiSqFLSS/MaSgwREqwYGlNqGi6QmDbRcJ0CsBLDrF4X9xnLAuA0bIoEFKoFlUaomcF6YgltSkrVm\nJQJmYbrSKm2VVIIqmKr+fTF3yeFqd2d2HnZ35n4/wGDPnvsw5+wl/3Pmf8+9N1WFJGm+XbXVDZAk\nTZ/BXpJawGAvSS1gsJekFjDYS1ILGOwlqQWGCvZJrk5yOsmjze/XJzmZ5IUkjyfZ2bfu4SQvJjmb\n5I5pNVySNLxhR/afAhaB5Un5h4CTVXUb8GTzO0n2AfcB+4A7gS8n8duDJG2xgYE4yc3ALwNfBdJU\n3w0cbcpHgXub8j3Asaq6WFXngXPAgUk2WJK0ccOMuv8d8FvA2311u6pqqSkvAbua8o3Ahb71LgA3\njdtISdJ41g32Sf4J8EpVnebyqP4K1bvfwnr3XPB+DJK0xa4ZsPwfAXcn+WXgWuC9Sb4OLCV5X1W9\nnGQ38Eqz/o+AW/q2v7mpu0ISPwAkaQRVterAe5B1R/ZV9dmquqWq9gL3A/+5qn4NOAEcbFY7CDzS\nlE8A9yfZkWQvcCvw9Br7ntvXkSNHtrwN9s/+ta1vbejfOAaN7N8Ro5ufDwHHkzwAnAc+2gTwxSTH\n6c3ceQt4sMZtoSRpbEMH+6r6C+AvmvL/Bn5xjfV+F/jdibROkjQRzoGfgk6ns9VNmCr7N7vmuW8w\n//0bR7Yiy5LE7I4kbVASahonaCVJ88FgL0ktsNHZOK2RXPlNybSTpFnmyH5dgy4OlqTZYLCXpBYw\n2EtSCxjsJakFDPaS1AIGe0lqAYO9JLWA8+w3yPn3kmaRI/uROP9e0mwx2EtSCxjsJakFDPaS1AIG\ne0lqAYO9JLXAusE+ybVJTiU5k2Qxyeeb+oUkF5Kcbl539W1zOMmLSc4muWPaHZAkDTbwsYRJrquq\nN5JcA/wl8JvALwCvVdUXV6y7D/gj4EPATcATwG1V9faK9bb9Ywl78+mX25hL8+nXqpekaZvqYwmr\n6o2muAO4Gnh1+X1XWf0e4FhVXayq88A54MAoDZMkTc7AYJ/kqiRngCXgqap6vln0iSTPJHk4yc6m\n7kbgQt/mF+iN8CVJW2iYkf3bVbUfuBn42SQd4CvAXmA/8BLwhfV2MYF2SpLGMPS9carqb5L8OfCT\nVdVdrk/yVeDR5tcfAbf0bXZzU/cOCwsLl8qdTodOpzNsUySpFbrdLt1udyL7WvcEbZIbgLeq6q+T\nvBv4FvA54PmqerlZ59PAh6rqV/pO0B7g8gnav7/ybKwnaCVp48Y5QTtoZL8bOJrkKnopn69X1ZNJ\n/jDJfnpR7wfAxwGqajHJcWAReAt4cNtHdUlqgYFTL6fypo7sJWnDpjr1UpI0+wz2ktQCPqlqinyq\nlaTtwpH91PlUK0lbz5H9BDiCl7TdObKfGEfwkrYvg70ktYDBXpJawGAvSS3gCdpN1H8i15O4kjaT\nI/tN5UlcSVvDYC9JLWCwl6QWMNhLUgsY7CWpBQz2ktQCrZp66T1sJLVVC0f2Tn+U1D4tDPaS1D7r\nBvsk1yY5leRMksUkn2/qr09yMskLSR5PsrNvm8NJXkxyNskd0+6AJGmwgQ8cT3JdVb2R5BrgL4Hf\nBO4G/mdV/X6SzwB/p6oOJdkH/BHwIeAm4Angtqp6e8U+t+SB4xt5WPhGHjg+7Lo+qFzSOKb6wPGq\neqMp7gCuBl6lF+yPNvVHgXub8j3Asaq6WFXngXPAgVEaJkmanIHBPslVSc4AS8BTVfU8sKuqlppV\nloBdTflG4ELf5hfojfAlSVto4NTLJgWzP8nfBr6V5OdXLK8k6+UkVl22sLBwqdzpdOh0OsO0d+44\nHVTSWrrdLt1udyL7Gpizv2Ll5N8A/xf4F0Cnql5OspveiP/9SQ4BVNVDzfqPAUeq6tSK/WybnP1a\nwXazcvYbOY8gqd2mlrNPcsPyTJsk7wZ+CTgNnAAONqsdBB5pyieA+5PsSLIXuBV4epSGba7tNfc+\nyRUvSRrXoDTObuBokqvofTB8vaqeTHIaOJ7kAeA88FGAqlpMchxYBN4CHtySIfxc6P9GIEnj2VAa\nZ2Jvuu3SOMOnVzYjjWNqR9Jqpjr1UpI0+wz2ktQCBntJagGDvSS1gMFeklrAYC9JLWCwl6QWMNhL\nUgsY7CWpBQz2ktQCBntJagGDvSS1gMFeklrAYC9JLWCwl6QWMNhLUgsY7CWpBQY9llDbyFoPR5ek\nQRzZz5zt9XB0SbNhYLBPckuSp5I8n+R7ST7Z1C8kuZDkdPO6q2+bw0leTHI2yR3T7MAgSS69NrK+\nJM2TYdI4F4FPV9WZJO8B/kuSk/SGl1+sqi/2r5xkH3AfsA+4CXgiyW1V9faE274B/Q/9nuS6kjQb\nBo7sq+rlqjrTlF8Hvk8viMPqEfEe4FhVXayq88A54MBkmrv9+C1A0izYUM4+yR7gduA7TdUnkjyT\n5OEkO5u6G4ELfZtd4PKHwxwyfy5p+xt6Nk6TwvkT4FNV9XqSrwC/3Sz+HeALwANrbP6OiLiwsHCp\n3Ol06HQ6wzZFklqh2+3S7XYnsq8MM30vybuAPwO+WVVfWmX5HuDRqvpgkkMAVfVQs+wx4EhVnepb\nvzZr2mAvzdKfh79crqp1l69cd+39bWTd9ddZrU3rrSupPZJQVSPljoeZjRPgYWCxP9An2d232keA\n55ryCeD+JDuS7AVuBZ4epXGSpMkYJo3zYeBXgWeTnG7qPgt8LMl+ekPNHwAfB6iqxSTHgUXgLeDB\nTRvGS5JWNVQaZ+JvahrHNI6kDRsnjePtEqbA6ZiSthtvlzAV3tJA0vZisJekFjDYS1ILGOwlqQUM\n9pLUAgZ7SWoBg70ktYDBXpJawGAvSS1gsJekFmh1sPe2BpLaotXB3lsaSGqLlgd7SWoHg70ktYDB\nXpJawGAvSS3gw0u2yLRmAvXv1ydZSVpmsB/S5INz/+MKJ235UYmS1DMwjZPkliRPJXk+yfeSfLKp\nvz7JySQvJHk8yc6+bQ4neTHJ2SR3TLMDm8enT0maXcPk7C8Cn66qfwD8FPAbST4AHAJOVtVtwJPN\n7yTZB9wH7APuBL6cxHMDkrSFBgbhqnq5qs405deB7wM3AXcDR5vVjgL3NuV7gGNVdbGqzgPngAMT\nbrckaQM2NOJOsge4HTgF7KqqpWbRErCrKd8IXOjb7AK9DwdJ0hYZ+gRtkvcAfwp8qqpeWzHro5Ks\nl9B+x7KFhYVL5U6nQ6fTGbYpktQK3W6Xbrc7kX1lmOl5Sd4F/Bnwzar6UlN3FuhU1ctJdgNPVdX7\nkxwCqKqHmvUeA45U1am+/dVmTQvsfSj1z3xZWR60fJjyJPbRK1fVO9q8/Ldaq371/q6+XNLsSkJV\njTTVbpjZOAEeBhaXA33jBHCwKR8EHumrvz/JjiR7gVuBp0dpnCRpMoZJ43wY+FXg2SSnm7rDwEPA\n8SQPAOeBjwJU1WKS48Ai8Bbw4KYN4yVJqxoqjTPxNzWNs2bZNI6ktUw1jSNJmn0Ge0lqAe+NMwd8\nvKKkQRzZzw3v3SNpbXMzsl85uvXkpCRdNmcje0e3krSaOQv2kqTVGOwlqQUM9pLUAnNzgnaeObVS\n0rgc2c8ETzxLGo/BXpJawGAvSS1gsJekFjDYS1ILGOwlqQUM9pLUAgZ7SWoBg70ktcDAYJ/ka0mW\nkjzXV7eQ5EKS083rrr5lh5O8mORskjum1fB55dWykqZhmJH9HwB3rqgr4ItVdXvz+iZAkn3AfcC+\nZpsvJ/Hbw4ZM/krZJFe8JLXPwEBcVd8GXl1l0WpR4x7gWFVdrKrzwDngwFgt1IR4ywWpzcYZdX8i\nyTNJHk6ys6m7EbjQt84F4KYx3mNkjmIl6bJR73r5FeC3m/LvAF8AHlhj3VWHkwsLC5fKnU6HTqcz\nYlPWsvy2BnxJs6nb7dLtdieyrwzzrNYke4BHq+qD6y1Lcgigqh5qlj0GHKmqUyu2qUk/I7Y3iu8P\n8MOWR91u0vsYZX/93rmP5b/xyr+Nz+eVZlMSqmqkEexIaZwku/t+/QiwPFPnBHB/kh1J9gK3Ak+P\n8h4ahnl4ScMZmMZJcgz4OeCGJD8EjgCdJPvpRZofAB8HqKrFJMeBReAt4MGJD+ElSRs2VBpn4m9q\nGmdT2mQaR5ovm57GkSTNFoO9JLWAwV6SWsBgL0ktYLCXpBYY9QpazYGVt5Nwlo40vxzZt54XZklt\nYLCXpBYw2EtSCxjsJakFDPaS1AIGe0lqAYO9JLWAwV6SWsBgL0kt4BW0c2zUB657Za00fxzZz7Vx\nro71ylppnhjsJakFDPaS1AIDg32SryVZSvJcX931SU4meSHJ40l29i07nOTFJGeT3DGthkuShjfM\nyP4PgDtX1B0CTlbVbcCTze8k2QfcB+xrtvlykol+e0hyxUsb599Oap+Bgbiqvg28uqL6buBoUz4K\n3NuU7wGOVdXFqjoPnAMOTKapV7QKTx6Ow7+f1Dajjrp3VdVSU14CdjXlG4ELfetdAG4a8T0kSRMy\n9jz7qqok6w0TV122sLBwqdzpdOh0OuM2RZLmSrfbpdvtTmRfGeaCmSR7gEer6oPN72eBTlW9nGQ3\n8FRVvT/JIYCqeqhZ7zHgSFWdWrG/GvVCnV6ueXnbXLrgZ2X98OVRt5v0Pra+TWv9Ldc7Vl6AJW2e\nJFTVSCfcRk3jnAAONuWDwCN99fcn2ZFkL3Ar8PSI76GZ4TkAabsbmMZJcgz4OeCGJD8E/i3wEHA8\nyQPAeeCjAFW1mOQ4sAi8BTw48hBekjQxQ6VxJv6mpnG2aZv6bSSNM9y6ksazFWkczSXTMdK8mvm7\nXnpx0ObwRKw02+ZgZO9odPP4t5Zm1RwEe0nSIAZ7SWoBg70ktYDBXpJawGCvgZzxJM2+mZ96qc2w\nfJHWYP0fDE7PlLYPR/aaMKdnStuRwV6SWsBgL0ktYM5eU7daHt/bL0iby5G9Nslqwdz8vrRZDPaS\n1AIGe0lqAYO9JLWAwV6SWmCs2ThJzgP/B/h/wMWqOpDkeuCPgb9H83zaqvrrMdupGeRtFqTtY9yR\nfQGdqrq9qg40dYeAk1V1G/Bk87taydk20nYxiTTOyuHb3cDRpnwUuHcC7yFJGsMkRvZPJPlukl9v\n6nZV1VJTXgJ2jfkeQC8lYFpAkkYz7hW0H66ql5L8XeBkkrP9C6uqkkzwe/zwd1+UJF02VrCvqpea\nn3+V5BvAAWApyfuq6uUku4FXVtt2YWHhUrnT6dDpdMZpiiTNnW63S7fbnci+Muo9SZJcB1xdVa8l\n+QngceBzwC8C/6uqfi/JIWBnVR1asW1t9H17KZzlkf3ytpMob5d9zFabrrzHzfD7WGs7740jDZaE\nqhopvTHOyH4X8I0mj34N8B+r6vEk3wWOJ3mAZurlGO8hSZqAkUf2Y72pI/s5aFO/yY3svRumtLZx\nRvZeQasRTXMOvfPzpUnb1vezd6qlJE3GDIzsHeVJ0rhmINhLksa1rdM4mj+m5qSt4chem8y0nLQV\nDPaS1AIGe0lqAXP2mgmDLrbyYixpfQZ7bWtXBvHLV9yufqJ3rSt819+vHwxqA9M42ubWOqFbA5YP\nu2+pHRzZa1uY1pRM0ztSjyN7bRPea0eapm0xsnf0JUnTtS2Cfc+gk2/S8Pw3JF1pGwX7fsPPqpBW\nt/F/Q8N8w/RbqGaVOXvpCsPk9z0HoNmzTUf20nSMkt4ZZhvn7Wu727Jgv7i4uFVvrVYbNUW4+nbv\nvOjL1KO2p6kE+yR3Al8Crga+WlW/t3Kd/ft/kmuv3cPFi69OownS2Ib7FrD2h8da+X3z/toKE8/Z\nJ7ka+PfAncA+4GNJPrByveuu+xCvvbbIm28uTLoJ20B3qxugsXSbn5PIza93BfDmB/lut7vp77mZ\n5r1/45jGCdoDwLmqOl9VF4H/BNwzhffZxrpb3QBtQLJyum93q5oCXG7PynaNU79s3oPhvPdvHNMI\n9jcBP+z7/UJTJ21Tkx1lv/PDY7j6K5dv9BvBoHsI9Xzuc59b9UNA828aOfuh/te8+eZzvPe9/5Qf\n//i/8+abU2iFtGXWyuMPU//Ok7xrBeWNBOu17h46zL6qaqQPhvXOUUzifIbXRWxMJt35JD8FLFTV\nnc3vh4G3+0/SJmnvX1ySxlBVI30lm0awvwb4b8AvAP8DeBr4WFV9f6JvJEka2sTTOFX1VpJ/CXyL\n3tTLhw30krS1Jj6ylyRtP5t+b5wkdyY5m+TFJJ/Z7PeftCTnkzyb5HSSp5u665OcTPJCkseT7Nzq\ndg4rydeSLCV5rq9uzf4kOdwcy7NJ7tiaVg9vjf4tJLnQHMPTSe7qWzZr/bslyVNJnk/yvSSfbOpn\n/hiu07e5OH5Jrk1yKsmZJItJPt/UT+bYVdWmveildc4Be4B3AWeAD2xmG6bQpx8A16+o+33gXzfl\nzwAPbXU7N9CfnwFuB54b1B96F82daY7lnubYXrXVfRihf0eAf7XKurPYv/cB+5vye+idP/vAPBzD\ndfo2T8fvuubnNcB3gJ+e1LHb7JH9vF5wtfLs+N3A0aZ8FLh3c5szuqr6NrDyHhZr9ece4FhVXayq\n8/T+sR3YjHaOao3+weo3tZnF/r1cVWea8uvA9+ld5zLzx3CdvsH8HL83muIOeoPjV5nQsdvsYD+P\nF1wV8ESS7yb59aZuV1UtNeUlYNfWNG1i1urPjfSO4bJZPp6fSPJMkof7vibPdP+S7KH3LeYUc3YM\n+/r2naZqLo5fkquSnKF3jJ6qqueZ0LHb7GA/j2eDP1xVtwN3Ab+R5Gf6F1bv+9bc9HuI/sxiX78C\n7AX2Ay8BX1hn3ZnoX5L3AH8KfKqqXutfNuvHsOnbn9Dr2+vM0fGrqreraj9wM/CzSX5+xfKRj91m\nB/sfAbf0/X4LV34yzZyqeqn5+VfAN+h9jVpK8j6AJLuBV7auhROxVn9WHs+bm7qZUlWvVAP4Kpe/\nCs9k/5K8i16g/3pVPdJUz8Ux7Ovbf1ju27wdP4Cq+hvgz4F/yISO3WYH++8CtybZk2QHcB9wYpPb\nMDFJrkvyt5ryTwB3AM/R69PBZrWDwCOr72FmrNWfE8D9SXYk2QvcSu8iupnS/Ada9hF6xxBmsH/p\n3R/gYWCxqr7Ut2jmj+FafZuX45fkhuUUVJJ3A78EnGZSx24LzjbfRe8s+jng8Fad9Z5QX/bSOxt+\nBvjecn+A64EngBeAx4GdW93WDfTpGL0rn39M7/zKP1uvP8Bnm2N5FvjHW93+Efr3z4E/BJ4Fnmn+\nI+2a4f79NPB282/ydPO6cx6O4Rp9u2tejh/wQeC/Nv17Fvitpn4ix86LqiSpBXzguCS1gMFeklrA\nYC9JLWCwl6QWMNhLUgsY7CWpBQz2ktQCBntJaoH/Dzx1pB+a1db3AAAAAElFTkSuQmCC\n",
+      "text/plain": [
+       "<matplotlib.figure.Figure at 0x7f7616b7dd90>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "D_cos = pairwise_distances(X, metric='cosine')\n",
+    "uniq_countries, uniq_counts = np.unique(Y, return_counts=True)\n",
+    "k = np.int(np.round(np.mean(uniq_counts)))\n",
+    "N_k = n_occurrence_from_D(D_cos, k=k)\n",
+    "print skew(N_k)\n",
+    "plt.figure()\n",
+    "plt.hist(N_k, bins=100);"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## are the extreme hubs same as before?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[232 233 238 246 256] [ 'https://sounds.bl.uk/World-and-traditional-music/Dinka-songs-from-South-Sudan/025M-C1580X0004XX-0001V0'\n",
+      " 'https://sounds.bl.uk/World-and-traditional-music/Dinka-songs-from-South-Sudan/025M-C1580X0109XX-0001V0'\n",
+      " 'https://sounds.bl.uk/World-and-traditional-music/Terry-Yarnell-Collection/025M-C1005X0002XX-1200V0'\n",
+      " 'https://sounds.bl.uk/World-and-traditional-music/Roy-Palmer-collection/025M-C1023X0006XX-0700V0'\n",
+      " 'https://sounds.bl.uk/World-and-traditional-music/Terry-Yarnell-Collection/025M-C1005X0004XX-1900V0']\n"
+     ]
+    }
+   ],
+   "source": [
+    "sort_idx = np.argsort(N_k)\n",
+    "print N_k[sort_idx[-5:]], ddf['songurls_Album'].iloc[sort_idx[-5:]].get_values()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "collapsed": true
+   },
+   "source": [
+    "## pairwise Mahalanobis seems to have extreme values, how about Mahalanobis from the whole set?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYQAAAEACAYAAACznAEdAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAFYVJREFUeJzt3W+wnOV93vHvZUAYDDUlnhECNJWSiBi5JLFpgNZ2WDtY\nlTsJ4hUSnVDVoZ5JlcRu2nEs2TPh+A2B9I9DJoM7jgMWHlsdxXEZmFAVmbAtnsbINmDLCBXUIpuj\nRLLTuBB3xkEMv77Y+0jL4egI7dlzzu7R9zOzo3vv59/vAWmvve9n99lUFZIkvWGxC5AkjQYDQZIE\nGAiSpMZAkCQBBoIkqTEQJEnASQIhyd1JjiTZO63/N5I8neTbSe7o69+W5Nkk+5Os6+u/MsnetuzO\n4Z+GJGmuTjZCuAdY39+R5D3A9cBPV9XfB/5d618LbATWtm3uSpK22aeAW6pqDbAmyav2KUlafLMG\nQlU9CvxgWve/BH6nqo62db7f+jcAO6rqaFUdBA4AVydZAZxfVXvaevcCNwypfknSkAxyDWEN8PNJ\nvpqkm+QftP6Lgcm+9SaBS2boP9T6JUkj5MwBt/m7VXVNkp8DdgI/PtyyJEkLbZBAmAS+BFBVX0vy\nSpK30Hvnv7JvvUvbuodau7//0Ew7TuKNlSRpAFWVk681u0GmjO4D3guQ5DJgWVX9FXA/sCnJsiSr\n6U0t7amqw8CLSa5uF5lvbvuYUVWN7ePWW29d9BpOx9qtf/Ef1r+4j2GZdYSQZAdwLfBjSZ4Hfhu4\nG7i7fRT1JeCftRfyfUl2AvuAl4EtdbzSLcBngXOAB6tq19DOQJI0FLMGQlXddIJFN59g/duA22bo\n/wZwxSlXJ0laMH5TeYg6nc5ilzCwca4drH+xWf/SkGHOP81VkhqleiRpHCShFumisiRpCTIQJEmA\ngSBJagwESRJgIEiSGgNBkgQYCJKkxkCQJAEGgiSpMRAkSYCBIElqDARJEmAgSJIaA0GSBBgIkqTG\nQJAkASf5Cc1Rlxz/PQh/WEeS5mbWEUKSu5McSbJ3hmX/JskrSS7s69uW5Nkk+5Os6+u/MsnetuzO\n4Z6CQSBJw3CyKaN7gPXTO5OsBN4HfKevby2wEVjbtrkrx9/Cfwq4parWAGuSvGafkqTFNWsgVNWj\nwA9mWPQfgN+a1rcB2FFVR6vqIHAAuDrJCuD8qtrT1rsXuGFOVUuShu6ULyon2QBMVtW3pi26GJjs\nez4JXDJD/6HWP5Akxx6SpOE5pYvKSc4FPkZvuuhY91Arel2mrhsYCpI0LKf6KaOfAFYB32zv0C8F\nvpHkanrv/Ff2rXspvZHBodbu7z90ogNMTEwca3c6HTqdzimWKElLW7fbpdvtDn2/OdnHNZOsAh6o\nqitmWPYccGVV/XW7qPwF4Cp6U0JfBn6yqirJY8CHgD3AnwK/X1W7ZthfvY56ePUIoYD4sVNJp60k\nVNWcp0xO9rHTHcD/AC5L8nySD0xb5dircFXtA3YC+4D/Amzpe3XfAnwGeBY4MFMYSJIW10lHCAvJ\nEYIknboFGSFIkk4fBoIkCTAQJEmNgSBJAgwESVJjIEiSAANBktQYCJIkwECQJDUGgiQJMBAkSY2B\nIEkCDARJUmMgSJIAA0GS1BgIkiTAQJAkNQaCJAkwECRJzayBkOTuJEeS7O3r+7dJnk7yzSRfSvLm\nvmXbkjybZH+SdX39VybZ25bdOT+nIkmai5ONEO4B1k/rewh4W1X9DPAMsA0gyVpgI7C2bXNXkqkf\nff4UcEtVrQHWJJm+T0nSIps1EKrqUeAH0/p2V9Ur7eljwKWtvQHYUVVHq+ogcAC4OskK4Pyq2tPW\nuxe4YUj1S5KGZK7XEH4FeLC1LwYm+5ZNApfM0H+o9UuSRsiZg26Y5OPAS1X1hSHWw8TExLF2p9Oh\n0+kMc/eSNPa63S7dbnfo+01Vzb5Csgp4oKqu6Ov758AHgV+oqh+1vq0AVXV7e74LuBX4DvBIVV3e\n+m8Crq2qX53hWPU66gGm1plqh5NtJ0lLVRKqKidfc3anPGXULgh/BNgwFQbN/cCmJMuSrAbWAHuq\n6jDwYpKr20Xmm4H75lq4JGm4Zp0ySrIDuBZ4S5Ln6b3j3wYsA3a3DxH9eVVtqap9SXYC+4CXgS19\nb/e3AJ8FzgEerKpd83EykqTBnXTKaCE5ZSRJp27RpowkSUuTgSBJAgwESVJjIEiSAANBktQYCJIk\nwECQJDUGgiQJmMPN7UbN8Z9ewC+pSdIAltAIoTj+DWZJ0qlaQoEgSZoLA0GSBBgIkqTGQJAkAQaC\nJKkxECRJgIEgSWoMBEkSYCBIkppZAyHJ3UmOJNnb13dhkt1JnknyUJIL+pZtS/Jskv1J1vX1X5lk\nb1t25/yciiRpLk42QrgHWD+tbyuwu6ouAx5uz0myFtgIrG3b3JXjNxj6FHBLVa0B1iSZvk9J0iKb\nNRCq6lHgB9O6rwe2t/Z24IbW3gDsqKqjVXUQOABcnWQFcH5V7Wnr3du3jSRpRAxyDWF5VR1p7SPA\n8ta+GJjsW28SuGSG/kOtX5I0QuZ0++uqqiRDvcXoxMTEsXan06HT6Qxz95I09rrdLt1ud+j7zcl+\nOyDJKuCBqrqiPd8PdKrqcJsOeqSq3ppkK0BV3d7W2wXcCnynrXN5678JuLaqfnWGY9XrqIfjt7me\nar+6z99DkHQ6SUJV5eRrzm6QKaP7gc2tvRm4r69/U5JlSVYDa4A9VXUYeDHJ1e0i881920iSRsSs\nU0ZJdgDXAm9J8jzw28DtwM4ktwAHgRsBqmpfkp3APuBlYEvf2/0twGeBc4AHq2rX8E9FkjQXJ50y\nWkhOGUnSqVvMKSNJ0hJkIEiSAANBktQYCJIkwECQJDUGgiQJMBAkSY2BIEkCDARJUmMgSJIAA0GS\n1BgIkiRgjj+Qs1CO/zSzJGm+jNEIoTh+R1NJ0rCNUSBIkuaTgSBJAgwESVJjIEiSAANBktQMHAhJ\ntiV5KsneJF9IcnaSC5PsTvJMkoeSXDBt/WeT7E+ybjjlS5KGZaBASLIK+CDwjqq6AjgD2ARsBXZX\n1WXAw+05SdYCG4G1wHrgriSOTiRphAz6ovwicBQ4N8mZwLnAXwDXA9vbOtuBG1p7A7Cjqo5W1UHg\nAHDVoEVLkoZvoECoqr8G/j3wXXpB8H+rajewvKqOtNWOAMtb+2Jgsm8Xk8AlA1X8OiQ59pAkvT4D\n3boiyU8A/wpYBbwA/HGSX+5fp6oqyWxfLZ5x2cTExLF2p9Oh0+kMUOHUrg0ESUtPt9ul2+0Ofb+p\nOvXbQSTZCLyvqv5Fe34zcA3wXuA9VXU4yQrgkap6a5KtAFV1e1t/F3BrVT02bb81Uz29d/r9L/LT\n2ydePsj5SdI4SUJVzfkd8KDXEPYD1yQ5J71X6+uAfcADwOa2zmbgvta+H9iUZFmS1cAaYM/gZUuS\nhm2gKaOq+maSe4GvA68AjwOfBs4Hdia5BTgI3NjW35dkJ73QeBnYMuNQQJK0aAaaMpovThlJ0qlb\n7CkjSdISYyBIkgADQZLUGAiSJMBAkCQ1BoIkCTAQJEmNgSBJAgwESVJjIEiSAANBktQYCJIkwECQ\nJDUGgiQJMBAkSY2BIEkCDARJUmMgSJIAA0GS1AwcCEkuSPLFJE8n2Zfk6iQXJtmd5JkkDyW5oG/9\nbUmeTbI/ybrhlC9JGpa5jBDuBB6sqsuBnwb2A1uB3VV1GfBwe06StcBGYC2wHrgriaMTSRohA70o\nJ3kz8O6quhugql6uqheA64HtbbXtwA2tvQHYUVVHq+ogcAC4ai6FS5KGa9B36auB7ye5J8njSf4w\nyZuA5VV1pK1zBFje2hcDk33bTwKXDHhsSdI8OHMO270D+PWq+lqS36NND02pqkpSs+xjxmUTExPH\n2p1Oh06nM2CJkrQ0dbtdut3u0Pebqtles0+wUXIR8OdVtbo9fxewDfhx4D1VdTjJCuCRqnprkq0A\nVXV7W38XcGtVPTZtvzVTPUk4nh8ztU+8fJDzk6RxkoSqylz3M9CUUVUdBp5Pclnrug54CngA2Nz6\nNgP3tfb9wKYky5KsBtYAewauWpI0dINOGQH8BvD5JMuA/wV8ADgD2JnkFuAgcCNAVe1LshPYB7wM\nbJlxKCBJWjQDTRnNF6eMJOnUDWvKaC4jhLHQC5Mew0GSTuw0+HJYcYIPNEmS+pwGgSBJej0MBEkS\nYCBIkhoDQZIEGAiSpMZAkCQBBoIkqTEQJEmAgSBJagwESRJgIEiSGgNBkgQYCJKkxkCQJAEGgiSp\nMRAkSYCBIElq5hQISc5I8kSSB9rzC5PsTvJMkoeSXNC37rYkzybZn2TdXAuXJA3XXEcIHwb2cfw3\nKrcCu6vqMuDh9pwka4GNwFpgPXBXEkcnkjRCBn5RTnIp8E+AzwBTv2R/PbC9tbcDN7T2BmBHVR2t\nqoPAAeCqQY8tSRq+ubxL/yTwEeCVvr7lVXWktY8Ay1v7YmCyb71J4JI5HFuSNGRnDrJRkl8EvldV\nTyTpzLROVVWSmmnZ1CozdU5MTBxrdzodOp0Zdy9Jp61ut0u32x36flM122v2CTZKbgNuBl4G3gj8\nHeBLwM8Bnao6nGQF8EhVvTXJVoCqur1tvwu4taoem7bfmqmeJBzPj5naJ1veaw9yrpI06pJQVTn5\nmrMbaMqoqj5WVSurajWwCfizqroZuB/Y3FbbDNzX2vcDm5IsS7IaWAPsmVvpkqRhGmjKaAZTb71v\nB3YmuQU4CNwIUFX7kuyk94mkl4EtMw4FJEmLZqApo/ky31NG/UbpvCVpLhZ1ymh8FSe4li1Jp71h\nTRkN1XPPPcd3v/vdxS5Dkk4rIxkIn/70Z/jkJz/PG9/49/jRj55e7HIk6bQwklNGVfC3f/tBXnjh\nv3HWWdctdjmSdFoYyUCQJC08A0GSBBgIkqTGQJAkAQaCJKkxECRJwIh+D2Eh9G6H0eNtLCTptB4h\neBsLSep3GgeCJKmfgSBJAgwESVJjIEiSAANBktQYCJIkYMBASLIyySNJnkry7SQfav0XJtmd5Jkk\nDyW5oG+bbUmeTbI/ybphnYAkaTgGHSEcBX6zqt4GXAP8WpLLga3A7qq6DHi4PSfJWmAjsBZYD9yV\nxNGJJI2QgV6Uq+pwVT3Z2j8EngYuAa4HtrfVtgM3tPYGYEdVHa2qg8AB4Ko51C1JGrI5v0tPsgp4\nO/AYsLyqjrRFR4DlrX0xMNm32SS9ABkJSY49JOl0NadASHIe8CfAh6vqb/qXVe8GQbPdG2KE7hvh\nbSwkaeCb2yU5i14YfK6q7mvdR5JcVFWHk6wAvtf6DwEr+za/tPW9xsTEBF/5yn8HzgLeOWh5krRk\ndbtdut3u0PebQe70md7cynbg/1TVb/b1/27ruyPJVuCCqtraLip/gd51g0uALwM/WdMOnqSqiq1b\nP84dd5wLfJzzzvun/PCHOzj+Dj4ztGfqG6ztnU8ljZskVNWc57wHHSG8E/hl4FtJnmh924DbgZ1J\nbgEOAjcCVNW+JDuBfcDLwJbpYSBJWlwDBUJVfYUTX3+47gTb3AbcNsjxJEnzz+8CSJIAA0GS1BgI\nkiTAQJAkNQaCJAmYwxfTlqr+21f4yVhJpxNHCK/hbSwknZ4MBEkSYCBIkhqvIcxiptthe11B0lLl\nCGFW1fenQSBpaTMQJEmAgSBJagwESRLgReVT5hfXJC1VjhBOmReYJS1NjhDmwNGCpKXEEcKcOFqQ\ntHQ4QhgSRwuSxt2CjhCSrE+yP8mzST66kMeef44WJI23BQuEJGcAfwCsB9YCNyW5fKGOvzC6QG+0\nMNNjykx9i63b7S52CXNi/YvL+peGhRwhXAUcqKqDVXUU+E/AhgU8/gLotj/7RwvH268OgVePJk4U\nHgtl3P9BWP/isv6lYSED4RLg+b7nk63vNPLaaaXXhsSrw2OURhGSlraFvKj8uifYEzj77M9x9tlf\n5aWXHp/PmkbA1H+W6S/8x/sXKhQ+8YlPHD/6DBfGB71wPr1+L7pLoykL9Y8zyTXARFWtb8+3Aa9U\n1R196/hKIUkDqKo5v3NcyEA4E/ifwC8AfwHsAW6qqqcXpABJ0qwWbMqoql5O8uvAfwXOAP7IMJCk\n0bFgIwRJ0mgbiVtXjOoX1pLcneRIkr19fRcm2Z3kmSQPJbmgb9m2dg77k6zr678yyd627M4Fqn1l\nkkeSPJXk20k+NGb1vzHJY0meTLIvye+MU/19xz4jyRNJHhi3+pMcTPKtVv+eMaz/giRfTPJ0+zt0\n9bjUn+Sn2n/3qccLST407/VX1aI+6E0fHQBWAWcBTwKXL3ZdrbZ3A28H9vb1/S7wW639UeD21l7b\naj+rncsBjo/A9gBXtfaDwPoFqP0i4Gdb+zx6128uH5f627HObX+eCXwVeNc41d+O96+BzwP3j9Pf\nn3as54ALp/WNU/3bgV/p+zv05nGqv+883gD8JbByvutfsJOa5WT/IbCr7/lWYOti19VXzypeHQj7\ngeWtfRGwv7W3AR/tW28XcA2wAni6r38T8B8X4TzuA64bx/qBc4GvAW8bp/qBS4EvA+8BHhi3vz/0\nAuHHpvWNRf30Xvz/9wz9Y1H/tJrXAY8uRP2jMGU0bl9YW15VR1r7CLC8tS+mV/uUqfOY3n+IBT6/\nJKvojXQeY4zqT/KGJE+2Oh+pqqcYo/qBTwIfAV7p6xun+gv4cpKvJ/lg6xuX+lcD309yT5LHk/xh\nkjcxPvX32wTsaO15rX8UAmFsr2pXL3JHuv4k5wF/Any4qv6mf9mo119Vr1TVz9J7p/3zSd4zbfnI\n1p/kF4HvVdUTvPZbh8Bo19+8s6reDrwf+LUk7+5fOOL1nwm8A7irqt4B/D96sw/HjHj9ACRZBvwS\n8MfTl81H/aMQCIfozY1NWcmrE23UHElyEUCSFcD3Wv/087iU3nkcau3+/kMLUCdJzqIXBp+rqvta\n99jUP6WqXgD+FLiS8an/HwHXJ3mO3ru79yb5HONTP1X1l+3P7wP/md79yMal/klgsqq+1p5/kV5A\nHB6T+qe8H/hG+38A8/zffxQC4evAmiSrWhpuBO5f5Jpmcz+wubU305ubn+rflGRZktXAGmBPVR0G\nXmyfcAhwc98286Yd64+AfVX1e2NY/1umPkGR5BzgfcAT41J/VX2sqlZW1Wp6Q/4/q6qbx6X+JOcm\nOb+130RvHnvvuNTfjvt8ksta13XAU8AD41B/n5s4Pl00Vef81b+QF0dmuWjyfnqfgjkAbFvsevrq\n2kHvW9Uv0bvO8QHgQnoXCp8BHgIu6Fv/Y+0c9gP/uK//Snr/mA4Av79Atb+L3tz1k/ReSJ+gd+vx\ncan/CuDxVv+3gI+0/rGof9q5XMvxTxmNRf305uCfbI9vT/27HJf623F/ht6HEb4JfIneheZxqv9N\nwF8B5/f1zWv9fjFNkgSMxpSRJGkEGAiSJMBAkCQ1BoIkCTAQJEmNgSBJAgwESVJjIEiSAPj/Uofs\nWLDE1f0AAAAASUVORK5CYII=\n",
+      "text/plain": [
+       "<matplotlib.figure.Figure at 0x7fa475850810>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "threshold, y_pred, MD = utils.get_outliers_Mahal(X, chi2thr=0.999)\n",
+    "plt.figure()\n",
+    "plt.hist(MD, bins=100);"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEACAYAAABcXmojAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAE3pJREFUeJzt3W2MXuWd3/HvDxwUCMQOSmUeJZBiFFyxIksXr/NQJm1K\nndUWeMWDtAht3ZUibzfRrpTGXqnFfcOSlZqUqIIXdbKYNPHKyjYIGszasIy6rRSmyZrg4LjgNI6w\nGw/RNspDN3Yh/Pvivnz5zmTwjMf3zNyG70ca+bqvc51z/8+Fmd9c59xnnKpCkiSAc5a7AEnS+DAU\nJEmdoSBJ6gwFSVJnKEiSOkNBktTNKxSSrErylSTfSbI/ybokFyfZk+TFJLuTrBoavyXJS0kOJLl5\nqP+GJPvatgcW44QkSQs335XCA8ATVXUt8GvAAWAzsKeqrgGebq9Jsha4A1gLbAAeTJJ2nIeAjVW1\nBliTZMPIzkSSdMbmDIUkK4EPVdUXAKrqtar6MXALsL0N2w7c1tq3Ajuq6tWqOgQcBNYluRS4qKqm\n2rhHhvaRJI2B+awUrgZ+mOTPkvxNkv+Y5B3A6qqabmOmgdWtfRlweGj/w8Dls/Qfaf2SpDExn1BY\nAfw68GBV/Trwf2mXik6owe/K8PdlSNJZbsU8xhwGDlfV/2ivvwJsAY4muaSqjrZLQ6+07UeAK4f2\nv6Id40hrD/cfmflmSQwXSTpNVZW5R81tzpVCVR0FXk5yTev6CPAC8DhwT+u7B3i0tR8D7kxyXpKr\ngTXAVDvOT9onlwLcPbTPzPcc669777132WuwTuu0Tms88TVK81kpAPwB8KUk5wHfBX4XOBfYmWQj\ncAi4vX1D359kJ7AfeA3YVCer3gQ8DJzP4NNMT47oPCRJIzCvUKiqbwG/Mcumj7zB+PuA+2bp/yZw\n3ekUKElaOj7RvAATExPLXcK8WOdoWedonQ11ng01jlpGfT3qTCWpcatJksZZEmqpbjRLkt46DAVJ\nUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLUGQqSpM5QkCR1hoIk\nqTMUJEmdoSBJ6gwFSVJnKEiSurEMhQsueBcXXvgupqamlrsUSXpLGctQ+PnP/xfnnvsefvGLXyx3\nKZL0ljKWoQDvIlmx3EVI0lvOmIaCJGk5GAqSpM5QkCR1hoIkqZtXKCQ5lOT5JHuTTLW+i5PsSfJi\nkt1JVg2N35LkpSQHktw81H9Dkn1t2wOjPx1J0pmY70qhgImqel9V3dj6NgN7quoa4On2miRrgTuA\ntcAG4MEkafs8BGysqjXAmiQbRnQekqQROJ3LR5nx+hZge2tvB25r7VuBHVX1alUdAg4C65JcClxU\nVSeeSHtkaB9J0hg4nZXCU0m+keT3Wt/qqppu7WlgdWtfBhwe2vcwcPks/UdavyRpTMz3CbEPVNUP\nkvw9YE+SA8Mbq6qS1OjLkyQtpXmFQlX9oP35wyRfBW4EppNcUlVH26WhV9rwI8CVQ7tfwWCFcKS1\nh/uPzP6OWzl27GW2bdvG8ePHmZiYmP8ZSdKb3OTkJJOTk4ty7FSd+gf8JBcA51bVT5O8A9gN/Fvg\nI8DfVtWnk2wGVlXV5naj+csMguNy4CngPW018SzwcWAK+Brwuap6csb7FRQrV65n167PsH79+tGe\nsSS9ySShqmbe912Q+awUVgNfbR8gWgF8qap2J/kGsDPJRuAQcDtAVe1PshPYD7wGbKqTybMJeBg4\nH3hiZiBIkpbXnKFQVd8Drp+l//8wWC3Mts99wH2z9H8TuO70y5QkLQWfaJYkdYaCJKkzFCRJnaEg\nSeoMBUlSZyhIkjpDQZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQ\nJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSN69Q\nSHJukr1JHm+vL06yJ8mLSXYnWTU0dkuSl5IcSHLzUP8NSfa1bQ+M/lQkSWdqviuFTwD7gWqvNwN7\nquoa4On2miRrgTuAtcAG4MEkafs8BGysqjXAmiQbRnMKkqRRmTMUklwB/BawDTjxDf4WYHtrbwdu\na+1bgR1V9WpVHQIOAuuSXApcVFVTbdwjQ/tIksbEfFYKnwU+Cbw+1Le6qqZbexpY3dqXAYeHxh0G\nLp+l/0jrlySNkRWn2pjkt4FXqmpvkonZxlRVJanZti3cVo4de5lt27Zx/PhxJiZmfWtJekuanJxk\ncnJyUY6dqjf+fp7kPuBu4DXg7cA7gf8M/AYwUVVH26WhZ6rqvUk2A1TV/W3/J4F7ge+3Mde2/ruA\nm6rqY7O8Z0GxcuV6du36DOvXrx/h6UrSm08Sqipzj5zbKS8fVdUfV9WVVXU1cCfwV1V1N/AYcE8b\ndg/waGs/BtyZ5LwkVwNrgKmqOgr8JMm6duP57qF9JElj4pSXj2ZxYllxP7AzyUbgEHA7QFXtT7KT\nwSeVXgM21cmlyCbgYeB84ImqevLMSpckjdopLx8tBy8fSdLpWbLLR5KktxZDQZLUGQqSpM5QkCR1\nhoIkqTMUJEmdoSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6\nQ0GS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLUGQqSpM5QkCR1pwyFJG9P8myS\n55LsT/Inrf/iJHuSvJhkd5JVQ/tsSfJSkgNJbh7qvyHJvrbtgcU7JUnSQp0yFKrqGPDhqroe+DXg\nw0k+CGwG9lTVNcDT7TVJ1gJ3AGuBDcCDSdIO9xCwsarWAGuSbFiME5IkLdycl4+q6u9a8zzgXOBH\nwC3A9ta/HbittW8FdlTVq1V1CDgIrEtyKXBRVU21cY8M7SNJGhNzhkKSc5I8B0wDz1TVC8Dqqppu\nQ6aB1a19GXB4aPfDwOWz9B9p/ZKkMbJirgFV9TpwfZKVwF8m+fCM7ZWkRlvWVo4de5lt27Zx/Phx\nJiYmRnt4STqLTU5OMjk5uSjHTtX8v58n+dfAz4F/AUxU1dF2aeiZqnpvks0AVXV/G/8kcC/w/Tbm\n2tZ/F3BTVX1slvcoKFauXM+uXZ9h/fr1Z3iKkvTmloSqytwj5zbXp4/efeKTRUnOB/4JsBd4DLin\nDbsHeLS1HwPuTHJekquBNcBUVR0FfpJkXbvxfPfQPpKkMTHX5aNLge1JzmEQIF+sqqeT7AV2JtkI\nHAJuB6iq/Ul2AvuB14BNdXIpsgl4GDgfeKKqnhz1yUiSzsxpXT5aCl4+kqTTs2SXjyRJby2GgiSp\nMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLU\nGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnq\n5gyFJFcmeSbJC0m+neTjrf/iJHuSvJhkd5JVQ/tsSfJSkgNJbh7qvyHJvrbtgcU5JUnSQs1npfAq\n8IdV9feB3wR+P8m1wGZgT1VdAzzdXpNkLXAHsBbYADyYJO1YDwEbq2oNsCbJhpGejSTpjMwZClV1\ntKqea+2fAd8BLgduAba3YduB21r7VmBHVb1aVYeAg8C6JJcCF1XVVBv3yNA+kqQxcFr3FJJcBbwP\neBZYXVXTbdM0sLq1LwMOD+12mEGIzOw/0volSWNixXwHJrkQ+AvgE1X105NXhKCqKkmNrqytHDv2\nMtu2beP48eNMTEyM7tCSdJabnJxkcnJyUY6dqrm/lyd5G/BfgF1V9e9b3wFgoqqOtktDz1TVe5Ns\nBqiq+9u4J4F7ge+3Mde2/ruAm6rqYzPeq6BYuXI9u3Z9hvXr14/sZCXpzSgJVZW5R85tPp8+CvB5\nYP+JQGgeA+5p7XuAR4f670xyXpKrgTXAVFUdBX6SZF075t1D+0iSxsB8Lh99APgd4Pkke1vfFuB+\nYGeSjcAh4HaAqtqfZCewH3gN2FQnlyObgIeB84EnqurJEZ2HJGkE5gyFqvpvvPGK4iNvsM99wH2z\n9H8TuO50CpQkLR2faJYkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLUGQqSpG7evxBvObz//e//\npdfz+T1NkqSFOwtWCtW+JEmL7SwIBUnSUjEUJEmdoSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWG\ngiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKkzFCRJ3ZyhkOQLSaaT7BvquzjJ\nniQvJtmdZNXQti1JXkpyIMnNQ/03JNnXtj0w+lORJJ2p+awU/gzYMKNvM7Cnqq4Bnm6vSbIWuANY\n2/Z5MEnaPg8BG6tqDbAmycxjSpKW2ZyhUFV/DfxoRvctwPbW3g7c1tq3Ajuq6tWqOgQcBNYluRS4\nqKqm2rhHhvaRJI2Jhd5TWF1V0609Daxu7cuAw0PjDgOXz9J/pPVLksbIGd9orqoCagS1SJKW2YoF\n7jed5JKqOtouDb3S+o8AVw6Nu4LBCuFIaw/3H3njw2/l2LGXW3sSmFhgmZL05jM5Ocnk5OSiHDuD\nH/TnGJRcBTxeVde1138K/G1VfTrJZmBVVW1uN5q/DNzI4PLQU8B7qqqSPAt8HJgCvgZ8rqqenOW9\nCoqVK9fz4x9/nZOLkDCfWiXprSYJVZW5R85tzpVCkh3ATcC7k7wM/BvgfmBnko3AIeB2gKran2Qn\nsB94DdhUJ7+TbwIeBs4HnpgtECRJy2teK4Wl5EpBkk7PKFcKPtEsSeoMBUlSZyhIkjpDQZLUGQqS\npM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOkNBktQt9N9TWBYn/7ln/OV4krQIzrKVgv/ImyQt\nprMsFCRJi8lQkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOkNBktQZCpKk7qz6NRfD/JUXkjR6Z/FK\nwV95IUmjdhaHgiRp1AwFSVJnKEiSuiUPhSQbkhxI8lKST43omL9041mStDBLGgpJzgX+A7ABWAvc\nleTaMz/y0t5wnpycXNL3WyjrHC3rHK2zoc6zocZRW+qVwo3Awao6VFWvAn8O3Dqqg59YMSz2quFs\n+YtinaNlnaN1NtR5NtQ4aksdCpcDLw+9Ptz6RuTkx1SXKiAk6c1kqR9em9d1nne+859x7NiBEb3V\n4gTD1q1bR35MSVpuWcqngZP8JrC1qja011uA16vq00NjfCJNkk5TVY3kp9+lDoUVwP8E/jHwv4Ep\n4K6q+s6SFSFJekNLevmoql5L8i+BvwTOBT5vIEjS+FjSlYIkabyNzRPNi/FQ2xnWcyjJ80n2Jplq\nfRcn2ZPkxSS7k6waGr+l1X4gyc2LVNMXkkwn2TfUd9o1Jbkhyb627YElqnNrksNtPvcm+egY1Hll\nkmeSvJDk20k+3vrHak5PUedYzWmStyd5NslzSfYn+ZPWPzbzeYoax2ouh97j3FbP4+314s9lVS37\nF4NLSQeBq4C3Ac8B1y5zTd8DLp7R96fAv2rtTwH3t/baVvPb2jkcBM5ZhJo+BLwP2LfAmk6sDKeA\nG1v7CWDDEtR5L/BHs4xdzjovAa5v7QsZ3O+6dtzm9BR1juOcXtD+XAF8HfjgGM7nbDWO3Vy24/4R\n8CXgsfZ60edyXFYKi/pQ2xmYeTf/FmB7a28HbmvtW4EdVfVqVR1i8B/kxlEXU1V/DfzoDGpal+RS\n4KKqmmrjHhnaZzHrhF+dz+Wu82hVPdfaPwO+w+C5mbGa01PUCeM3p3/Xmucx+GHvR4zffM5WI4zZ\nXCa5AvgtYNtQbYs+l+MSCov8UNuCFPBUkm8k+b3Wt7qqplt7Gljd2pcxqPmEpaz/dGua2X+Epav1\nD5J8K8nnh5a9Y1FnkqsYrG6eZYzndKjOr7eusZrTJOckeY7BvD1TVS8wZvP5BjXCmM0l8Fngk8Dr\nQ32LPpfjEgrjeLf7A1X1PuCjwO8n+dDwxhqsxU5V95Kf0zxqWk4PAVcD1wM/AP7d8pZzUpILgb8A\nPlFVPx3eNk5z2ur8CoM6f8YYzmlVvV5V1wNXAP8wyYdnbF/2+ZylxgnGbC6T/DbwSlXtZfYVzKLN\n5biEwhHgyqHXV/LL6bbkquoH7c8fAl9lcDloOsklAG1Z9kobPrP+K1rfUjidmg63/itm9C96rVX1\nSjUMlsMnLq8ta51J3sYgEL5YVY+27rGb06E6/9OJOsd1TlttPwa+BtzAGM7njBr/wRjO5fuBW5J8\nD9gB/KMkX2Qp5nLUN0YW8sXghs93GdwgOY9lvtEMXMDgOhzAO4D/DtzM4CbPp1r/Zn71Js95DH7a\n+C7tJs8i1HYVv3qj+bRqYnCJZB2Dn0AW6wbZzDovHWr/IfDl5a6zHfcR4LMz+sdqTk9R51jNKfBu\nYFVrnw/8VwYPqo7NfJ6ixkvGaS5n1HwT8PhS/d0c+QmcwYl/lMGnKg4CW5a5lqvbBD8HfPtEPcDF\nwFPAi8DuE3+52rY/brUfAP7pItW1g8GT4P+PwT2Y311ITQx+etvXtn1uCer85wy+qT0PfAt4lMG1\n0eWu84MMrtc+B+xtXxvGbU7foM6PjtucAtcBf9PqfB745EL/v1msOk9R41jN5Yyab+Lkp48WfS59\neE2S1I3LPQVJ0hgwFCRJnaEgSeoMBUlSZyhIkjpDQZLUGQqSpM5QkCR1/x9NWsspl9ltywAAAABJ\nRU5ErkJggg==\n",
+      "text/plain": [
+       "<matplotlib.figure.Figure at 0x7fa475850e90>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "threshold, y_pred, MD = utils.get_outliers_Mahal(X[:, :50], chi2thr=0.999)\n",
+    "plt.figure()\n",
+    "plt.hist(MD, bins=100);"
+   ]
+  },
+  {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
--- a/notebooks/test_music_segments.ipynb	Thu Sep 21 15:25:20 2017 +0100
+++ b/notebooks/test_music_segments.ipynb	Thu Sep 21 17:35:57 2017 +0100
@@ -27,7 +27,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 25,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# verify on the 30-second segments\n",
+    "filenames = ['/import/c4dm-04/mariap/train_data_melodia_8_30sec.pickle',\n",
+    "             '/import/c4dm-04/mariap/val_data_melodia_8_30sec.pickle', \n",
+    "             '/import/c4dm-04/mariap/test_data_melodia_8_30sec.pickle']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
    "metadata": {
     "collapsed": true
    },
@@ -45,7 +59,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 27,
    "metadata": {
     "collapsed": true
    },
@@ -146,7 +160,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 28,
    "metadata": {},
    "outputs": [
     {
@@ -181,7 +195,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 29,
    "metadata": {},
    "outputs": [
     {
@@ -189,10 +203,10 @@
      "output_type": "stream",
      "text": [
       "n tracks: 2068\n",
-      "mean 134.338008\n",
-      "median 163.000000\n",
-      "std 44.855790\n",
-      "mean duration 67.169004\n"
+      "mean 42.459381\n",
+      "median 44.000000\n",
+      "std 6.567739\n",
+      "mean duration 21.229691\n"
      ]
     }
    ],
@@ -209,6 +223,66 @@
   },
   {
    "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "8089"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "6147+1942"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "8099"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "6119+1980"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "8200"
+      ]
+     },
+     "execution_count": 24,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "6132+2068"
+   ]
+  },
+  {
+   "cell_type": "code",
    "execution_count": null,
    "metadata": {
     "collapsed": true
--- a/scripts/classification.py	Thu Sep 21 15:25:20 2017 +0100
+++ b/scripts/classification.py	Thu Sep 21 17:35:57 2017 +0100
@@ -8,6 +8,7 @@
 import pandas as pd
 import pickle
 from sklearn import metrics
+from sklearn.model_selection import train_test_split
 
 import map_and_average
 import util_feature_learning