m@8: { m@8: "cells": [ m@8: { m@8: "cell_type": "code", m@10: "execution_count": 20, m@8: "metadata": { m@8: "collapsed": false m@8: }, m@8: "outputs": [ m@8: { m@8: "name": "stdout", m@8: "output_type": "stream", m@8: "text": [ m@8: "The autoreload extension is already loaded. To reload it, use:\n", m@8: " %reload_ext autoreload\n" m@8: ] m@8: } m@8: ], m@8: "source": [ m@8: "import numpy as np\n", m@8: "import pickle\n", m@8: "from scipy.stats import pearsonr\n", m@8: "from scipy.stats import skew\n", m@8: "import sys\n", m@8: "from sklearn.metrics.pairwise import pairwise_distances\n", m@8: "%matplotlib inline\n", m@8: "import matplotlib.pyplot as plt\n", m@8: "\n", m@8: "%load_ext autoreload\n", m@8: "%autoreload 2\n", m@8: "\n", m@8: "sys.path.append('../')\n", m@8: "import scripts.results as results\n", m@8: "import scripts.utils_spatial as utils_spatial" m@8: ] m@8: }, m@8: { m@8: "cell_type": "code", m@10: "execution_count": 21, m@8: "metadata": { m@8: "collapsed": false m@8: }, m@8: "outputs": [ m@8: { m@8: "name": "stdout", m@8: "output_type": "stream", m@8: "text": [ m@8: "WARNING: there are 21 disconnected observations\n", m@8: "Island ids: [3, 6, 26, 35, 39, 45, 52, 61, 62, 66, 77, 85, 94, 97, 98, 102, 103, 107, 110, 120, 121]\n", m@8: "Antigua and Barbuda\n", m@8: "Australia\n", m@8: "Cuba\n", m@8: "Fiji\n", m@8: "French Polynesia\n", m@8: "Grenada\n", m@8: "Iceland\n", m@8: "Jamaica\n", m@8: "Japan\n", m@8: "Kiribati\n", m@8: "Malta\n", m@8: "New Zealand\n", m@8: "Philippines\n", m@8: "Puerto Rico\n", m@8: "Republic of Serbia\n", m@8: "Saint Lucia\n", m@8: "Samoa\n", m@8: "Solomon Islands\n", m@8: "South Korea\n", m@8: "The Bahamas\n", m@8: "Trinidad and Tobago\n" m@8: ] m@8: } m@8: ], m@8: "source": [ m@8: "X_list, Y, Yaudio = pickle.load(open('../data/lda_data_melodia_8.pickle','rb'))\n", m@8: "ddf = results.load_metadata(Yaudio, metadata_file='../data/metadata.csv')\n", m@8: "w, data_countries = utils_spatial.get_neighbors_for_countries_in_dataset(Y)\n", m@8: "w_dict = utils_spatial.from_weights_to_dict(w, data_countries)\n", m@8: "Xrhy, Xmel, Xmfc, Xchr = X_list\n", m@8: "X = np.concatenate((Xrhy, Xmel, Xmfc, Xchr), axis=1)\n", m@8: "\n", m@8: "# global outliers\n", m@8: "df_global, threshold, MD = results.get_outliers_df(X, Y, chi2thr=0.999)" m@8: ] m@8: }, m@8: { m@8: "cell_type": "code", m@10: "execution_count": null, m@8: "metadata": { m@8: "collapsed": false m@8: }, m@8: "outputs": [ m@8: { m@8: "data": { m@8: "text/plain": [ m@8: "(8200, 380)" m@8: ] m@8: }, m@10: "execution_count": 22, m@8: "metadata": {}, m@8: "output_type": "execute_result" m@8: } m@8: ], m@8: "source": [ m@8: "X.shape" m@8: ] m@8: }, m@8: { m@8: "cell_type": "code", m@8: "execution_count": null, m@8: "metadata": { m@8: "collapsed": false m@8: }, m@8: "outputs": [], m@8: "source": [ m@8: "D = pairwise_distances(X, metric='mahalanobis')" m@8: ] m@8: }, m@8: { m@8: "cell_type": "code", m@10: "execution_count": null, m@8: "metadata": { m@8: "collapsed": false m@8: }, m@10: "outputs": [], m@8: "source": [ m@8: "plt.hist(D.ravel(), bins=100);" m@8: ] m@8: }, m@8: { m@8: "cell_type": "code", m@10: "execution_count": null, m@8: "metadata": { m@8: "collapsed": true m@8: }, m@8: "outputs": [], m@8: "source": [ m@8: "def n_occurrence_from_D(D, k=10, n_items=None):\n", m@8: " if n_items is None:\n", m@8: " n_items = len(D)\n", m@8: " sort_idx = np.argsort(D, axis=1)\n", m@8: " D_k = sort_idx[:, 1:(k+1)] # nearest neighbour is the item itself\n", m@8: " N_k = np.bincount(D_k.astype(int).ravel(), minlength=n_items)\n", m@8: " return N_k" m@8: ] m@8: }, m@8: { m@8: "cell_type": "code", m@10: "execution_count": null, m@8: "metadata": { m@8: "collapsed": false m@8: }, m@10: "outputs": [], m@8: "source": [ m@8: "N_k = n_occurrence_from_D(D, k=100)\n", m@8: "print skew(N_k)\n", m@8: "plt.hist(N_k, bins=100);" m@8: ] m@8: }, m@8: { m@8: "cell_type": "code", m@8: "execution_count": 9, m@8: "metadata": { m@8: "collapsed": true m@8: }, m@8: "outputs": [], m@8: "source": [ m@8: "N_k" m@8: ] m@8: }, m@8: { m@8: "cell_type": "code", m@8: "execution_count": null, m@8: "metadata": { m@8: "collapsed": true m@8: }, m@8: "outputs": [], m@8: "source": [] m@8: } m@8: ], m@8: "metadata": { m@8: "kernelspec": { m@8: "display_name": "Python 2", m@8: "language": "python", m@8: "name": "python2" m@8: }, m@8: "language_info": { m@8: "codemirror_mode": { m@8: "name": "ipython", m@8: "version": 2 m@8: }, m@8: "file_extension": ".py", m@8: "mimetype": "text/x-python", m@8: "name": "python", m@8: "nbconvert_exporter": "python", m@8: "pygments_lexer": "ipython2", m@8: "version": "2.7.12" m@8: } m@8: }, m@8: "nbformat": 4, m@8: "nbformat_minor": 0 m@8: }