m@77
|
1 # -*- coding: utf-8 -*-
|
m@77
|
2 """
|
m@77
|
3 Created on Fri Jun 24 13:06:12 2016
|
m@77
|
4
|
m@77
|
5 @author: mariapanteli
|
m@77
|
6 """
|
m@77
|
7
|
m@77
|
8 import numpy as np
|
m@77
|
9 import matplotlib.pyplot as plt
|
m@102
|
10 from bokeh.models import HoverTool, TapTool, CustomJS, Title, Label#, OpenURL, Button
|
m@102
|
11 #from bokeh.io import show, vform
|
m@77
|
12 from bokeh.plotting import figure, show, save, output_file, ColumnDataSource
|
m@77
|
13 from mpl_toolkits.basemap import Basemap
|
m@77
|
14 from shapely.geometry import Point, Polygon
|
m@77
|
15 import random
|
m@77
|
16 from bokeh.models.widgets import Panel, Tabs
|
m@77
|
17 import os
|
m@77
|
18
|
m@77
|
19
|
m@77
|
20 SHAPEFILE = os.path.join(os.path.dirname(__file__), 'util_data', 'shapefiles', 'ne_110m_admin_0_countries')
|
m@77
|
21
|
m@77
|
22
|
m@77
|
23 def get_random_point_in_polygon(poly):
|
m@77
|
24 (minx, miny, maxx, maxy) = poly.bounds
|
m@77
|
25 while True:
|
m@77
|
26 p = Point(random.uniform(minx, maxx), random.uniform(miny, maxy))
|
m@77
|
27 if poly.contains(p):
|
m@77
|
28 return p
|
m@77
|
29
|
m@77
|
30
|
m@77
|
31 def get_random_point_in_country_poly(countries_data):
|
m@77
|
32 pp_x, pp_y, coords_poly, countries_poly = get_countries_lonlat_poly(SHAPEFILE)
|
m@77
|
33 data_x = []
|
m@77
|
34 data_y = []
|
m@77
|
35 for country in countries_data:
|
m@77
|
36 #print country
|
m@77
|
37 poly_inds = np.where(countries_poly==country)[0]
|
m@77
|
38 if len(poly_inds)<1:
|
m@77
|
39 data_x.append(np.nan)
|
m@77
|
40 data_y.append(np.nan)
|
m@77
|
41 continue
|
m@77
|
42 poly = coords_poly[poly_inds[0]]
|
m@77
|
43 if len(poly_inds)>1:
|
m@77
|
44 # if many polys for country choose the largest one (ie most points)
|
m@77
|
45 len_list = [len(pp_x[poly_ind]) for poly_ind in poly_inds]
|
m@77
|
46 poly = coords_poly[poly_inds[np.argmax(len_list)]]
|
m@77
|
47 p = Polygon(poly)
|
m@77
|
48 point_in_poly = get_random_point_in_polygon(p)
|
m@77
|
49 data_x.append(point_in_poly.x)
|
m@77
|
50 data_y.append(point_in_poly.y)
|
m@77
|
51 return data_x, data_y
|
m@77
|
52
|
m@77
|
53
|
m@77
|
54 def get_countries_lonlat_poly(shapefile):
|
m@77
|
55 mm=Basemap()
|
m@77
|
56 mm.readshapefile(shapefile, 'units', color='#444444', linewidth=.2)
|
m@77
|
57 pp_x = []
|
m@77
|
58 pp_y = []
|
m@77
|
59 for shape in mm.units:
|
m@77
|
60 pp_x.append([ss[0] for ss in shape])
|
m@77
|
61 pp_y.append([ss[1] for ss in shape])
|
m@77
|
62 countries_poly = []
|
m@77
|
63 for mm_info in mm.units_info:
|
m@77
|
64 countries_poly.append(mm_info['admin'])
|
m@77
|
65 countries_poly = np.array(countries_poly, dtype=str)
|
m@77
|
66 #(-52.55642473001839, 2.504705308437053) for French Guiana
|
m@77
|
67 countries_poly[102] = 'French Guiana' # manual correction
|
m@77
|
68 return pp_x, pp_y, mm.units, countries_poly
|
m@77
|
69
|
m@77
|
70
|
m@77
|
71 def add_bokeh_interactivity(p, r, hover_outlier=False):
|
m@77
|
72 '''add interactivity
|
m@77
|
73 '''
|
m@77
|
74 callback = CustomJS(args=dict(r=r), code="""
|
m@77
|
75 var inds = cb_obj.get('selected')['1d'].indices;
|
m@77
|
76 var d1 = cb_obj.get('data');
|
m@77
|
77 url = d1['url'][inds[0]];
|
m@77
|
78 if (url){
|
m@77
|
79 window.open(url);}""")
|
m@77
|
80 hover_tooltips = """
|
m@77
|
81 <div>
|
m@77
|
82 <div><span style="font-size: 17px; font-weight: bold;">@name</span></div>
|
m@77
|
83 <div><span style="font-size: 12px;">@info</span></div>
|
m@77
|
84 </div>"""
|
m@77
|
85 hover_tooltips_outlier = """
|
m@77
|
86 <div>
|
m@77
|
87 <div><span style="font-size: 17px; font-weight: bold;">@name</span></div>
|
m@77
|
88 <div><span style="font-size: 12px;">@info</span></div>
|
m@101
|
89 <div><span style="font-size: 10px; color: #500;">@outlierMD</span></div>
|
m@101
|
90 <div><span style="font-size: 12px;">@collection</span></div>
|
m@77
|
91 </div>"""
|
m@77
|
92 if hover_outlier:
|
m@77
|
93 p.add_tools(HoverTool(renderers=[r], tooltips=hover_tooltips_outlier))
|
m@77
|
94 else:
|
m@77
|
95 p.add_tools(HoverTool(renderers=[r], tooltips=hover_tooltips))
|
m@77
|
96 p.add_tools(TapTool(renderers=[r], callback = callback))
|
m@77
|
97 return p
|
m@77
|
98
|
m@77
|
99
|
m@77
|
100 def beautify_bokeh_background(p):
|
m@77
|
101 '''remove unnecessary background
|
m@77
|
102 '''
|
m@77
|
103 p.outline_line_color = None
|
m@77
|
104 p.grid.grid_line_color=None
|
m@77
|
105 p.axis.axis_line_color=None
|
m@77
|
106 p.axis.major_label_text_font_size='0pt'
|
m@77
|
107 p.axis.major_tick_line_color=None
|
m@77
|
108 p.axis.minor_tick_line_color=None
|
m@77
|
109 return p
|
m@77
|
110
|
m@77
|
111
|
m@77
|
112 def plot_outliers_world_figure(MD, y_pred, df, out_file=None):
|
m@77
|
113 '''assume features, df are in the same order
|
m@77
|
114 '''
|
m@77
|
115 pp_x, pp_y, coords_poly, countries_poly = get_countries_lonlat_poly(SHAPEFILE)
|
m@77
|
116 data_x, data_y = get_random_point_in_country_poly(df['Country'].get_values())
|
m@77
|
117
|
m@77
|
118 #threshold, y_pred, MD = get_outliers(features, chi2thr=0.995)
|
m@80
|
119 #alpha_color = MD/np.max(MD) # if outlier vary transparency wrt MD
|
m@80
|
120 #alpha_color[y_pred==False] = 0.2 # if not outlier just color white
|
m@80
|
121 # min transparency at least 0.5
|
m@80
|
122 alpha_color = (MD-np.min(MD)+0.5)/(np.max(MD)-np.min(MD)+0.5)
|
m@84
|
123 alpha_color[y_pred==False] = 0.3
|
m@77
|
124
|
m@77
|
125 circle_color = np.repeat('grey', repeats=len(y_pred))
|
m@77
|
126 circle_color[y_pred] = 'red'
|
m@101
|
127
|
m@102
|
128 #bl_inds = np.where(np.isnan(df['BuyLinkTrackDownload']))[0]
|
m@102
|
129 bl_inds = np.where(df['Collection']=='British Library')[0]
|
m@101
|
130 collection = np.repeat('Smithsonian Folkways', len(df))
|
m@101
|
131 collection[bl_inds] = 'British Library'
|
m@101
|
132
|
m@101
|
133 outlier_info = []
|
m@101
|
134 for i in range(len(MD)):
|
m@101
|
135 if y_pred[i]:
|
m@101
|
136 # if outlier
|
m@101
|
137 outlier_info.append('outlier, MD=' + str(int(MD[i])))
|
m@101
|
138 else:
|
m@101
|
139 outlier_info.append('non-outlier, MD=' + str(int(MD[i])))
|
m@101
|
140
|
m@77
|
141 source = ColumnDataSource(data=dict(
|
m@77
|
142 x=data_x,
|
m@77
|
143 y=data_y,
|
m@77
|
144 name=df['Country'].get_values(),
|
m@77
|
145 color=circle_color,
|
m@77
|
146 alpha=alpha_color,
|
m@102
|
147 #info = zip(df['Culture'].get_values(),df['Language'].get_values(),df['Genre_Album'].get_values()),
|
m@102
|
148 info = zip(df['Culture'].get_values(),df['Language'].get_values(),df['Genre'].get_values()),
|
m@101
|
149 #outlierMD=[str(y_pred[i])+'('+str(int(MD[i]))+')' for i in range(len(MD))],
|
m@101
|
150 outlierMD = outlier_info,
|
m@101
|
151 collection = collection,
|
m@102
|
152 #url=df['songurls_Album'].get_values()
|
m@102
|
153 url=df['Url'].get_values()
|
m@77
|
154 ))
|
m@77
|
155
|
m@77
|
156 TOOLS="wheel_zoom,box_zoom,pan,reset,save"
|
m@77
|
157
|
m@102
|
158 p = figure(tools=TOOLS, plot_width=1200, title="Outlier recordings per country (click on each point to listen to the audio). More info at: github.com/mpanteli/music-outliers/tree/master/demo/README.md.")
|
m@77
|
159 outlier_ind = np.argmax(MD)
|
m@77
|
160 nonoutlier_ind = np.argmin(MD)
|
m@84
|
161 rleg1 = p.circle(data_x[outlier_ind], data_y[outlier_ind], fill_color='red', alpha=alpha_color[outlier_ind], size=6,
|
m@80
|
162 line_color=None, selection_color="firebrick", nonselection_color='white', legend="outliers")
|
m@80
|
163 rleg2 = p.circle(data_x[nonoutlier_ind], data_y[nonoutlier_ind], fill_color='grey', alpha=alpha_color[nonoutlier_ind],
|
m@84
|
164 size=6, line_color=None, selection_color="firebrick", nonselection_color='white', legend="non-outliers")
|
m@77
|
165 r1 = p.patches(pp_x, pp_y, fill_color='white', line_width=0.4, line_color='grey')
|
m@84
|
166 r2 = p.circle_cross('x','y', fill_color='color', alpha='alpha', size=6, line_color=None,
|
m@77
|
167 selection_color="firebrick", nonselection_color='color', source=source)
|
m@77
|
168
|
m@77
|
169 p = add_bokeh_interactivity(p, r2, hover_outlier=True)
|
m@77
|
170 p = beautify_bokeh_background(p)
|
m@77
|
171
|
m@102
|
172 #from bokeh.layouts import widgetbox
|
m@102
|
173 #button = Button(label="Button", callback=OpenURL(url='http://www.cnn.com/'), button_type="success")
|
m@102
|
174 #button = Button(label="Button", callback=CustomJS(code="""window.open('http://www.cnn.com/');"""), button_type="success")
|
m@102
|
175 #show(vform(button))
|
m@102
|
176 #button.js_on_event(events.ButtonClick, OpenURL(url="http://www.colors.commutercreative.com/"))
|
m@102
|
177
|
m@102
|
178 #test_url = "http://www.colors.commutercreative.com/"
|
m@84
|
179 #explanation = Label(x=70, y=70, x_units='screen', y_units='screen',
|
m@102
|
180 # text='For more info click here.', border_line_color='black', border_line_alpha=1.0,
|
m@84
|
181 # background_fill_color='white', background_fill_alpha=1.0)
|
m@84
|
182 #p.add_layout(explanation)
|
m@102
|
183 #r3 = p.scatter(x=70, y=70, size=20)
|
m@102
|
184 #p.add_tools(TapTool(renderers=[r3], callback=OpenURL(url=test_url)))
|
m@102
|
185 #p.scatter(x=70, y=70, size=20, source=source_url)
|
m@80
|
186
|
m@102
|
187 #callback = CustomJS(args=dict(r=r3), code="""
|
m@102
|
188 # var inds = cb_obj.get('selected')['1d'].indices;
|
m@102
|
189 # var d1 = cb_obj.get('data');
|
m@102
|
190 # url = d1['url'][inds[0]];
|
m@102
|
191 # if (url){
|
m@102
|
192 # window.open(url);}""")
|
m@102
|
193 #p.add_tools(TapTool(renderers=[r3], callback = callback))
|
m@102
|
194 #taptool = p.select(type=TapTool)
|
m@102
|
195 #taptool.callback = OpenURL(url=url)
|
m@102
|
196
|
m@77
|
197 if out_file is not None:
|
m@77
|
198 output_file(out_file)
|
m@77
|
199 save(p)
|
m@77
|
200 #show(p)
|
m@77
|
201 return p
|
m@77
|
202
|
m@77
|
203
|
m@77
|
204 def plot_tabs(tab_all, tabs_feat, out_file="temp.html"):
|
m@77
|
205 tab1 = Panel(child=tab_all, title="All")
|
m@77
|
206 tab2 = Panel(child=tabs_feat[0], title="Rhythm")
|
m@77
|
207 tab3 = Panel(child=tabs_feat[1], title="Melody")
|
m@77
|
208 tab4 = Panel(child=tabs_feat[2], title="Timbre")
|
m@77
|
209 tab5 = Panel(child=tabs_feat[3], title="Harmony")
|
m@77
|
210 tabs = Tabs(tabs=[tab1,tab2,tab3,tab4,tab5])
|
m@77
|
211 output_file(out_file)
|
m@77
|
212 save(tabs)
|
m@77
|
213 show(tabs)
|