Mercurial > hg > plosone_underreview
diff scripts/utils_spatial.py @ 4:e50c63cf96be branch-tests
rearranging folders
author | Maria Panteli |
---|---|
date | Mon, 11 Sep 2017 11:51:50 +0100 |
parents | |
children | 0f3eba42b425 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/utils_spatial.py Mon Sep 11 11:51:50 2017 +0100 @@ -0,0 +1,151 @@ +# -*- coding: utf-8 -*- +""" +Created on Wed May 17 11:35:51 2017 + +@author: mariapanteli +""" +import numpy as np +import json +import pysal # before shapely in util_plots +import fiona +import sys +sys.path.append('../misc') +import matplotlib.pyplot as plt + + +def neighbors_from_json_file(data_countries, json_DB='../MergeBL-Smith/data/countries.json'): + neighbors = {} + with open(json_DB) as json_file: + countries_dict = json.load(json_file) + country_names = [] + country_iso = [] + country_borders_iso = [] + for country_info in countries_dict: + country_names.append(country_info['name']['common']) + country_iso.append(country_info['cca3']) + country_borders_iso.append(country_info['borders']) + # temporary fixes of country names to match json data + country_names[country_names.index('United States')] = 'United States of America' + country_names[country_names.index('Tanzania')] = 'United Republic of Tanzania' + country_names[country_names.index('DR Congo')] = 'Democratic Republic of the Congo' + country_names[country_names.index('Czechia')] = 'Czech Republic' + for i, country in enumerate(data_countries): + neighbors[i] = {} + if country in country_names: + if len(country_borders_iso[country_names.index(country)])>0: + # if country has neighbors according to json file + neighbors_iso = country_borders_iso[country_names.index(country)] + neighbors_names = [country_names[country_iso.index(nn)] for nn in neighbors_iso] + for neighbor in neighbors_names: + if neighbor in data_countries: + neighbor_idx = np.where(data_countries==neighbor)[0][0] + neighbors[i][neighbor_idx] = 1.0 + w = pysal.weights.W(neighbors, id_order=range(len(data_countries))) + return w + + +def get_countries_from_shapefile(shapefile): + shp = fiona.open(shapefile, 'r') + countries = [] + if shp[0]["properties"].has_key("ADMIN"): + country_keyword = "ADMIN" + elif shp[0]["properties"].has_key("NAME"): + country_keyword = "NAME" + else: + country_keyword = "admin" + for line in shp: + countries.append(line["properties"][country_keyword]) + shp.close() + return countries + + +def replace_empty_neighbours_with_KNN(data_countries, w): + shapefile = "../MergeBL-Smith/shapefiles/ne_10m_admin_0_countries.shp" + no_neighbors_idx = w.islands + knn = 10 + wknn = pysal.knnW_from_shapefile(shapefile, knn) + knn_countries = get_countries_from_shapefile(shapefile) + neighbors = w.neighbors + for nn_idx in no_neighbors_idx: + country = data_countries[nn_idx] + print country + if country not in knn_countries: + continue + knn_country_idx = knn_countries.index(country) + knn_country_neighbors = [knn_countries[nn] for nn in wknn.neighbors[knn_country_idx]] + for knn_nn in knn_country_neighbors: + if len(neighbors[nn_idx])>2: + continue + data_country_idx = np.where(data_countries==knn_nn)[0] + if len(data_country_idx)>0: + neighbors[nn_idx][data_country_idx[0]] = 1.0 + w = pysal.weights.W(neighbors, id_order=range(len(data_countries))) + return w + + +def get_neighbors_for_countries_in_dataset(Y): + # neighbors + data_countries = np.unique(Y) + w = neighbors_from_json_file(data_countries) + w = replace_empty_neighbours_with_KNN(data_countries, w) + return w, data_countries + + +def from_weights_to_dict(w, data_countries): + w_dict = {} + for i in w.neighbors: + w_dict[data_countries[i]] = [data_countries[nn] for nn in w.neighbors[i]] + return w_dict + + +def get_LH_HL_idx(lm, p_vals): + sig_idx = np.where(p_vals<0.05)[0] + LH_idx = sig_idx[np.where(lm.q[sig_idx]==2)[0]] + HL_idx = sig_idx[np.where(lm.q[sig_idx]==4)[0]] + return LH_idx, HL_idx + + +def print_Moran_outliers(y, w, data_countries): + lm = pysal.Moran_Local(y, w) + p_vals = lm.p_z_sim + LH_idx, HL_idx = get_LH_HL_idx(lm, p_vals) + print 'LH', zip(data_countries[LH_idx], p_vals[LH_idx]) # LH + print 'HL', zip(data_countries[HL_idx], p_vals[HL_idx]) # HL + + +def plot_Moran_scatterplot(y, w, data_countries, out_file=None): + lm = pysal.Moran_Local(y, w) + p_vals = lm.p_z_sim + LH_idx, HL_idx = get_LH_HL_idx(lm, p_vals) + + ylt = pysal.lag_spatial(lm.w, lm.y) + yt = lm.y + yt = (yt - np.mean(yt))/np.std(yt) + ylt = (ylt - np.mean(ylt))/np.std(ylt) + colors = plt.cm.spectral(np.linspace(0,1,5)) + quad = np.zeros(yt.shape, dtype=int) + quad[np.bitwise_and(ylt > 0, yt > 0)]=1 # HH + quad[np.bitwise_and(ylt > 0, yt < 0)]=2 # LH + quad[np.bitwise_and(ylt < 0, yt < 0)]=3 # LL + quad[np.bitwise_and(ylt < 0, yt > 0)]=4 # HL + marker_color = colors[quad] + marker_size = 40*np.ones(yt.shape, dtype=int) + marker_size[LH_idx] = 140 + marker_size[HL_idx] = 140 + + plt.figure() + plt.scatter(yt, ylt, c=marker_color, s=marker_size, alpha=0.7) + plt.xlabel('Value') + plt.ylabel('Spatially Lagged Value') + plt.axvline(c='black', ls='--') + plt.axhline(c='black', ls='--') + plt.ylim(min(ylt)-0.5, max(ylt)+0.5) + plt.xlim(min(yt)-0.5, max(yt)+1.5) + for i in np.concatenate((LH_idx, HL_idx)): + plt.annotate(data_countries[i], (yt[i], ylt[i]), xytext=(yt[i]*1.1, ylt[i]*1.1),textcoords='data',arrowprops=dict(arrowstyle="->",connectionstyle="arc3")) + extreme_points = np.concatenate(([np.argmin(ylt)], [np.argmax(ylt)], np.where(yt>np.mean(yt)+2.8*np.std(yt))[0], np.where(ylt>np.mean(yt)+2.8*np.std(ylt))[0])) + extreme_points = np.array(list(set(extreme_points) - set(np.concatenate((LH_idx, HL_idx))))) + for i in extreme_points: + plt.annotate(data_countries[i], (yt[i]+0.1, ylt[i])) + if out_file is not None: + plt.savefig(out_file)