changeset 83:a300b571a0b0 branch-tests

merged
author mpanteli <m.x.panteli@gmail.com>
date Tue, 26 Sep 2017 21:18:54 +0100
parents 4395037087b6 (current diff) c3ab85ed1fa7 (diff)
children 8404ea114e45
files
diffstat 3 files changed, 26 insertions(+), 10 deletions(-) [+]
line wrap: on
line diff
--- a/notebooks/results_30_seconds_and_figures.ipynb	Tue Sep 26 21:18:26 2017 +0100
+++ b/notebooks/results_30_seconds_and_figures.ipynb	Tue Sep 26 21:18:54 2017 +0100
@@ -2,11 +2,20 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 23,
    "metadata": {
     "collapsed": false
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The autoreload extension is already loaded. To reload it, use:\n",
+      "  %reload_ext autoreload\n"
+     ]
+    }
+   ],
    "source": [
     "import numpy as np\n",
     "import pandas as pd\n",
--- a/scripts/interactive_plot.py	Tue Sep 26 21:18:26 2017 +0100
+++ b/scripts/interactive_plot.py	Tue Sep 26 21:18:54 2017 +0100
@@ -114,8 +114,11 @@
     data_x, data_y = get_random_point_in_country_poly(df['Country'].get_values())    
 
     #threshold, y_pred, MD = get_outliers(features, chi2thr=0.995)
-    alpha_color = MD/np.max(MD) # if outlier vary transparency wrt MD
-    alpha_color[y_pred==False] = 0.2  # if not outlier just color white
+    #alpha_color = MD/np.max(MD) # if outlier vary transparency wrt MD
+    #alpha_color[y_pred==False] = 0.2  # if not outlier just color white
+    # min transparency at least 0.5
+    alpha_color = (MD-np.min(MD)+0.5)/(np.max(MD)-np.min(MD)+0.5)
+    alpha_color[y_pred==False] = 0.4
     
     circle_color = np.repeat('grey', repeats=len(y_pred))
     circle_color[y_pred] = 'red'
@@ -136,17 +139,21 @@
     p = figure(tools=TOOLS, plot_width=1200, title="Click on the red dots to listen to some music outliers")
     outlier_ind = np.argmax(MD)
     nonoutlier_ind = np.argmin(MD)
-    rleg1 = p.circle(data_x[outlier_ind], data_y[outlier_ind], fill_color='red', alpha=alpha_color[outlier_ind], size=4, line_color=None, 
-                        selection_color="firebrick", nonselection_color='white', legend="outliers")
-    rleg2 = p.circle(data_x[nonoutlier_ind], data_y[nonoutlier_ind], fill_color='grey', alpha=alpha_color[nonoutlier_ind], size=4, line_color=None, 
-                        selection_color="firebrick", nonselection_color='white', legend="non-outliers")   
+    rleg1 = p.circle(data_x[outlier_ind], data_y[outlier_ind], fill_color='red', alpha=alpha_color[outlier_ind], size=10,
+                     line_color=None, selection_color="firebrick", nonselection_color='white', legend="outliers")
+    rleg2 = p.circle(data_x[nonoutlier_ind], data_y[nonoutlier_ind], fill_color='grey', alpha=alpha_color[nonoutlier_ind],
+                     size=10, line_color=None, selection_color="firebrick", nonselection_color='white', legend="non-outliers")  
     r1 = p.patches(pp_x, pp_y, fill_color='white', line_width=0.4, line_color='grey')
-    r2 = p.circle_cross('x','y',fill_color='color', alpha='alpha', size=4, line_color=None, 
+    r2 = p.circle_cross('x','y',fill_color='color', alpha='alpha', size=10, line_color=None, 
                         selection_color="firebrick", nonselection_color='color', source=source) 
     
     p = add_bokeh_interactivity(p, r2, hover_outlier=True)
     p = beautify_bokeh_background(p)
     
+    msg = """Scatter points represent recordings from each country randomly drawn within the boundaries of the country. Red points represent recordings detected as outliers and grey points as non-outliers. Hover over your mouse to get additional information of the Country, Language, Culture, Genre, and whether the recording was detected as outlier (True/False) and its Mahalanobis distance (MD). Click on each point to be redirected to the Smithsonian Folkways or British Library website to listen to the audio."""
+    caption = Title(text=msg, align='left', text_font_size='10pt')
+    p.add_layout(caption, 'below')
+    
     if out_file is not None:
         output_file(out_file)
         save(p)
--- a/scripts_R/PlotOutliersCountry.R	Tue Sep 26 21:18:26 2017 +0100
+++ b/scripts_R/PlotOutliersCountry.R	Tue Sep 26 21:18:54 2017 +0100
@@ -16,7 +16,7 @@
 distMahal = as.dist(apply(data, 1, function(i) mahalanobis(data, i, cov = cov(data),tol=1e-18)))
 hc=hclust(distMahal, method="average")
 mypal = c("#000000", "#9B0000", "#9B0000", "#9B0000", "#9B0000")
-clus5 = cutree(hc, 5)
+clus5 = cutree(hc, 4)
 pdf('../data/results/hierarchical_cluster.pdf')
 par(mar=c(1,1,1,1))
 plot(as.phylo(hc),type="fan",tip.color=mypal[clus5], cex=.5, label.offset=.5)