annotate scripts/utils_spatial.py @ 8:0f3eba42b425 branch-tests

added notebooks and utils
author Maria Panteli <m.x.panteli@gmail.com>
date Mon, 11 Sep 2017 18:23:14 +0100
parents e50c63cf96be
children 98718fdd8326
rev   line source
Maria@4 1 # -*- coding: utf-8 -*-
Maria@4 2 """
Maria@4 3 Created on Wed May 17 11:35:51 2017
Maria@4 4
Maria@4 5 @author: mariapanteli
Maria@4 6 """
Maria@4 7 import numpy as np
Maria@4 8 import json
Maria@4 9 import pysal # before shapely in util_plots
Maria@4 10 import fiona
m@8 11 import os
Maria@4 12 import sys
Maria@4 13 sys.path.append('../misc')
Maria@4 14 import matplotlib.pyplot as plt
Maria@4 15
Maria@4 16
m@8 17 DATA_DIR = os.path.join(os.path.dirname(__file__), 'util_data')
m@8 18 JSON_DB = os.path.join(DATA_DIR, 'countries.json')
m@8 19 SHAPEFILE = os.path.join(DATA_DIR, 'shapefiles', 'ne_10m_admin_0_countries.shp')
m@8 20
m@8 21
m@8 22 def neighbors_from_json_file(data_countries, json_DB=JSON_DB):
Maria@4 23 neighbors = {}
Maria@4 24 with open(json_DB) as json_file:
Maria@4 25 countries_dict = json.load(json_file)
Maria@4 26 country_names = []
Maria@4 27 country_iso = []
Maria@4 28 country_borders_iso = []
Maria@4 29 for country_info in countries_dict:
Maria@4 30 country_names.append(country_info['name']['common'])
Maria@4 31 country_iso.append(country_info['cca3'])
Maria@4 32 country_borders_iso.append(country_info['borders'])
Maria@4 33 # temporary fixes of country names to match json data
Maria@4 34 country_names[country_names.index('United States')] = 'United States of America'
Maria@4 35 country_names[country_names.index('Tanzania')] = 'United Republic of Tanzania'
Maria@4 36 country_names[country_names.index('DR Congo')] = 'Democratic Republic of the Congo'
Maria@4 37 country_names[country_names.index('Czechia')] = 'Czech Republic'
Maria@4 38 for i, country in enumerate(data_countries):
Maria@4 39 neighbors[i] = {}
Maria@4 40 if country in country_names:
Maria@4 41 if len(country_borders_iso[country_names.index(country)])>0:
Maria@4 42 # if country has neighbors according to json file
Maria@4 43 neighbors_iso = country_borders_iso[country_names.index(country)]
Maria@4 44 neighbors_names = [country_names[country_iso.index(nn)] for nn in neighbors_iso]
Maria@4 45 for neighbor in neighbors_names:
Maria@4 46 if neighbor in data_countries:
Maria@4 47 neighbor_idx = np.where(data_countries==neighbor)[0][0]
Maria@4 48 neighbors[i][neighbor_idx] = 1.0
Maria@4 49 w = pysal.weights.W(neighbors, id_order=range(len(data_countries)))
Maria@4 50 return w
Maria@4 51
Maria@4 52
Maria@4 53 def get_countries_from_shapefile(shapefile):
Maria@4 54 shp = fiona.open(shapefile, 'r')
Maria@4 55 countries = []
Maria@4 56 if shp[0]["properties"].has_key("ADMIN"):
Maria@4 57 country_keyword = "ADMIN"
Maria@4 58 elif shp[0]["properties"].has_key("NAME"):
Maria@4 59 country_keyword = "NAME"
Maria@4 60 else:
Maria@4 61 country_keyword = "admin"
Maria@4 62 for line in shp:
Maria@4 63 countries.append(line["properties"][country_keyword])
Maria@4 64 shp.close()
Maria@4 65 return countries
Maria@4 66
Maria@4 67
Maria@4 68 def replace_empty_neighbours_with_KNN(data_countries, w):
m@8 69 shapefile = SHAPEFILE
Maria@4 70 no_neighbors_idx = w.islands
Maria@4 71 knn = 10
Maria@4 72 wknn = pysal.knnW_from_shapefile(shapefile, knn)
Maria@4 73 knn_countries = get_countries_from_shapefile(shapefile)
Maria@4 74 neighbors = w.neighbors
Maria@4 75 for nn_idx in no_neighbors_idx:
Maria@4 76 country = data_countries[nn_idx]
Maria@4 77 print country
Maria@4 78 if country not in knn_countries:
Maria@4 79 continue
Maria@4 80 knn_country_idx = knn_countries.index(country)
Maria@4 81 knn_country_neighbors = [knn_countries[nn] for nn in wknn.neighbors[knn_country_idx]]
Maria@4 82 for knn_nn in knn_country_neighbors:
Maria@4 83 if len(neighbors[nn_idx])>2:
Maria@4 84 continue
Maria@4 85 data_country_idx = np.where(data_countries==knn_nn)[0]
Maria@4 86 if len(data_country_idx)>0:
Maria@4 87 neighbors[nn_idx][data_country_idx[0]] = 1.0
Maria@4 88 w = pysal.weights.W(neighbors, id_order=range(len(data_countries)))
Maria@4 89 return w
Maria@4 90
Maria@4 91
Maria@4 92 def get_neighbors_for_countries_in_dataset(Y):
Maria@4 93 # neighbors
Maria@4 94 data_countries = np.unique(Y)
Maria@4 95 w = neighbors_from_json_file(data_countries)
Maria@4 96 w = replace_empty_neighbours_with_KNN(data_countries, w)
Maria@4 97 return w, data_countries
Maria@4 98
Maria@4 99
Maria@4 100 def from_weights_to_dict(w, data_countries):
Maria@4 101 w_dict = {}
Maria@4 102 for i in w.neighbors:
Maria@4 103 w_dict[data_countries[i]] = [data_countries[nn] for nn in w.neighbors[i]]
Maria@4 104 return w_dict
Maria@4 105
Maria@4 106
Maria@4 107 def get_LH_HL_idx(lm, p_vals):
Maria@4 108 sig_idx = np.where(p_vals<0.05)[0]
Maria@4 109 LH_idx = sig_idx[np.where(lm.q[sig_idx]==2)[0]]
Maria@4 110 HL_idx = sig_idx[np.where(lm.q[sig_idx]==4)[0]]
Maria@4 111 return LH_idx, HL_idx
Maria@4 112
Maria@4 113
Maria@4 114 def print_Moran_outliers(y, w, data_countries):
Maria@4 115 lm = pysal.Moran_Local(y, w)
Maria@4 116 p_vals = lm.p_z_sim
Maria@4 117 LH_idx, HL_idx = get_LH_HL_idx(lm, p_vals)
Maria@4 118 print 'LH', zip(data_countries[LH_idx], p_vals[LH_idx]) # LH
Maria@4 119 print 'HL', zip(data_countries[HL_idx], p_vals[HL_idx]) # HL
Maria@4 120
Maria@4 121
Maria@4 122 def plot_Moran_scatterplot(y, w, data_countries, out_file=None):
Maria@4 123 lm = pysal.Moran_Local(y, w)
Maria@4 124 p_vals = lm.p_z_sim
Maria@4 125 LH_idx, HL_idx = get_LH_HL_idx(lm, p_vals)
Maria@4 126
Maria@4 127 ylt = pysal.lag_spatial(lm.w, lm.y)
Maria@4 128 yt = lm.y
Maria@4 129 yt = (yt - np.mean(yt))/np.std(yt)
Maria@4 130 ylt = (ylt - np.mean(ylt))/np.std(ylt)
Maria@4 131 colors = plt.cm.spectral(np.linspace(0,1,5))
Maria@4 132 quad = np.zeros(yt.shape, dtype=int)
Maria@4 133 quad[np.bitwise_and(ylt > 0, yt > 0)]=1 # HH
Maria@4 134 quad[np.bitwise_and(ylt > 0, yt < 0)]=2 # LH
Maria@4 135 quad[np.bitwise_and(ylt < 0, yt < 0)]=3 # LL
Maria@4 136 quad[np.bitwise_and(ylt < 0, yt > 0)]=4 # HL
Maria@4 137 marker_color = colors[quad]
Maria@4 138 marker_size = 40*np.ones(yt.shape, dtype=int)
Maria@4 139 marker_size[LH_idx] = 140
Maria@4 140 marker_size[HL_idx] = 140
Maria@4 141
Maria@4 142 plt.figure()
Maria@4 143 plt.scatter(yt, ylt, c=marker_color, s=marker_size, alpha=0.7)
Maria@4 144 plt.xlabel('Value')
Maria@4 145 plt.ylabel('Spatially Lagged Value')
Maria@4 146 plt.axvline(c='black', ls='--')
Maria@4 147 plt.axhline(c='black', ls='--')
Maria@4 148 plt.ylim(min(ylt)-0.5, max(ylt)+0.5)
Maria@4 149 plt.xlim(min(yt)-0.5, max(yt)+1.5)
Maria@4 150 for i in np.concatenate((LH_idx, HL_idx)):
Maria@4 151 plt.annotate(data_countries[i], (yt[i], ylt[i]), xytext=(yt[i]*1.1, ylt[i]*1.1),textcoords='data',arrowprops=dict(arrowstyle="->",connectionstyle="arc3"))
Maria@4 152 extreme_points = np.concatenate(([np.argmin(ylt)], [np.argmax(ylt)], np.where(yt>np.mean(yt)+2.8*np.std(yt))[0], np.where(ylt>np.mean(yt)+2.8*np.std(ylt))[0]))
Maria@4 153 extreme_points = np.array(list(set(extreme_points) - set(np.concatenate((LH_idx, HL_idx)))))
Maria@4 154 for i in extreme_points:
Maria@4 155 plt.annotate(data_countries[i], (yt[i]+0.1, ylt[i]))
Maria@4 156 if out_file is not None:
Maria@4 157 plt.savefig(out_file)