Maria@4: # -*- coding: utf-8 -*- Maria@4: """ Maria@4: Created on Wed May 17 11:35:51 2017 Maria@4: Maria@4: @author: mariapanteli Maria@4: """ Maria@4: import numpy as np Maria@4: import json Maria@4: import pysal # before shapely in util_plots Maria@4: import fiona m@8: import os Maria@4: import matplotlib.pyplot as plt Maria@4: Maria@4: m@8: DATA_DIR = os.path.join(os.path.dirname(__file__), 'util_data') m@8: JSON_DB = os.path.join(DATA_DIR, 'countries.json') m@8: SHAPEFILE = os.path.join(DATA_DIR, 'shapefiles', 'ne_10m_admin_0_countries.shp') m@8: m@8: m@8: def neighbors_from_json_file(data_countries, json_DB=JSON_DB): Maria@4: neighbors = {} Maria@4: with open(json_DB) as json_file: Maria@4: countries_dict = json.load(json_file) Maria@4: country_names = [] Maria@4: country_iso = [] Maria@4: country_borders_iso = [] Maria@4: for country_info in countries_dict: Maria@4: country_names.append(country_info['name']['common']) Maria@4: country_iso.append(country_info['cca3']) Maria@4: country_borders_iso.append(country_info['borders']) Maria@4: # temporary fixes of country names to match json data Maria@4: country_names[country_names.index('United States')] = 'United States of America' Maria@4: country_names[country_names.index('Tanzania')] = 'United Republic of Tanzania' Maria@4: country_names[country_names.index('DR Congo')] = 'Democratic Republic of the Congo' Maria@4: country_names[country_names.index('Czechia')] = 'Czech Republic' Maria@4: for i, country in enumerate(data_countries): Maria@4: neighbors[i] = {} Maria@4: if country in country_names: Maria@4: if len(country_borders_iso[country_names.index(country)])>0: Maria@4: # if country has neighbors according to json file Maria@4: neighbors_iso = country_borders_iso[country_names.index(country)] Maria@4: neighbors_names = [country_names[country_iso.index(nn)] for nn in neighbors_iso] Maria@4: for neighbor in neighbors_names: Maria@4: if neighbor in data_countries: Maria@4: neighbor_idx = np.where(data_countries==neighbor)[0][0] Maria@4: neighbors[i][neighbor_idx] = 1.0 Maria@4: w = pysal.weights.W(neighbors, id_order=range(len(data_countries))) Maria@4: return w Maria@4: Maria@4: Maria@4: def get_countries_from_shapefile(shapefile): Maria@4: shp = fiona.open(shapefile, 'r') Maria@4: countries = [] Maria@4: if shp[0]["properties"].has_key("ADMIN"): Maria@4: country_keyword = "ADMIN" Maria@4: elif shp[0]["properties"].has_key("NAME"): Maria@4: country_keyword = "NAME" Maria@4: else: Maria@4: country_keyword = "admin" Maria@4: for line in shp: Maria@4: countries.append(line["properties"][country_keyword]) Maria@4: shp.close() Maria@4: return countries Maria@4: Maria@4: Maria@4: def replace_empty_neighbours_with_KNN(data_countries, w): m@8: shapefile = SHAPEFILE Maria@4: no_neighbors_idx = w.islands Maria@4: knn = 10 Maria@4: wknn = pysal.knnW_from_shapefile(shapefile, knn) Maria@4: knn_countries = get_countries_from_shapefile(shapefile) Maria@4: neighbors = w.neighbors Maria@4: for nn_idx in no_neighbors_idx: Maria@4: country = data_countries[nn_idx] Maria@4: print country Maria@4: if country not in knn_countries: Maria@4: continue Maria@4: knn_country_idx = knn_countries.index(country) Maria@4: knn_country_neighbors = [knn_countries[nn] for nn in wknn.neighbors[knn_country_idx]] Maria@4: for knn_nn in knn_country_neighbors: Maria@4: if len(neighbors[nn_idx])>2: Maria@4: continue Maria@4: data_country_idx = np.where(data_countries==knn_nn)[0] Maria@4: if len(data_country_idx)>0: Maria@4: neighbors[nn_idx][data_country_idx[0]] = 1.0 Maria@4: w = pysal.weights.W(neighbors, id_order=range(len(data_countries))) Maria@4: return w Maria@4: Maria@4: Maria@4: def get_neighbors_for_countries_in_dataset(Y): Maria@4: # neighbors Maria@4: data_countries = np.unique(Y) Maria@4: w = neighbors_from_json_file(data_countries) Maria@4: w = replace_empty_neighbours_with_KNN(data_countries, w) Maria@4: return w, data_countries Maria@4: Maria@4: Maria@4: def from_weights_to_dict(w, data_countries): Maria@4: w_dict = {} Maria@4: for i in w.neighbors: Maria@4: w_dict[data_countries[i]] = [data_countries[nn] for nn in w.neighbors[i]] Maria@4: return w_dict Maria@4: Maria@4: Maria@4: def get_LH_HL_idx(lm, p_vals): Maria@4: sig_idx = np.where(p_vals<0.05)[0] Maria@4: LH_idx = sig_idx[np.where(lm.q[sig_idx]==2)[0]] Maria@4: HL_idx = sig_idx[np.where(lm.q[sig_idx]==4)[0]] Maria@4: return LH_idx, HL_idx Maria@4: Maria@4: Maria@4: def print_Moran_outliers(y, w, data_countries): Maria@4: lm = pysal.Moran_Local(y, w) Maria@4: p_vals = lm.p_z_sim Maria@4: LH_idx, HL_idx = get_LH_HL_idx(lm, p_vals) Maria@4: print 'LH', zip(data_countries[LH_idx], p_vals[LH_idx]) # LH Maria@4: print 'HL', zip(data_countries[HL_idx], p_vals[HL_idx]) # HL Maria@4: Maria@4: Maria@4: def plot_Moran_scatterplot(y, w, data_countries, out_file=None): Maria@4: lm = pysal.Moran_Local(y, w) Maria@4: p_vals = lm.p_z_sim Maria@4: LH_idx, HL_idx = get_LH_HL_idx(lm, p_vals) Maria@4: Maria@4: ylt = pysal.lag_spatial(lm.w, lm.y) Maria@4: yt = lm.y Maria@4: yt = (yt - np.mean(yt))/np.std(yt) Maria@4: ylt = (ylt - np.mean(ylt))/np.std(ylt) Maria@4: colors = plt.cm.spectral(np.linspace(0,1,5)) Maria@4: quad = np.zeros(yt.shape, dtype=int) Maria@4: quad[np.bitwise_and(ylt > 0, yt > 0)]=1 # HH Maria@4: quad[np.bitwise_and(ylt > 0, yt < 0)]=2 # LH Maria@4: quad[np.bitwise_and(ylt < 0, yt < 0)]=3 # LL Maria@4: quad[np.bitwise_and(ylt < 0, yt > 0)]=4 # HL Maria@4: marker_color = colors[quad] Maria@4: marker_size = 40*np.ones(yt.shape, dtype=int) Maria@4: marker_size[LH_idx] = 140 Maria@4: marker_size[HL_idx] = 140 Maria@4: Maria@4: plt.figure() Maria@4: plt.scatter(yt, ylt, c=marker_color, s=marker_size, alpha=0.7) Maria@4: plt.xlabel('Value') Maria@4: plt.ylabel('Spatially Lagged Value') Maria@4: plt.axvline(c='black', ls='--') Maria@4: plt.axhline(c='black', ls='--') Maria@4: plt.ylim(min(ylt)-0.5, max(ylt)+0.5) Maria@4: plt.xlim(min(yt)-0.5, max(yt)+1.5) Maria@4: for i in np.concatenate((LH_idx, HL_idx)): Maria@4: plt.annotate(data_countries[i], (yt[i], ylt[i]), xytext=(yt[i]*1.1, ylt[i]*1.1),textcoords='data',arrowprops=dict(arrowstyle="->",connectionstyle="arc3")) Maria@4: extreme_points = np.concatenate(([np.argmin(ylt)], [np.argmax(ylt)], np.where(yt>np.mean(yt)+2.8*np.std(yt))[0], np.where(ylt>np.mean(yt)+2.8*np.std(ylt))[0])) Maria@4: extreme_points = np.array(list(set(extreme_points) - set(np.concatenate((LH_idx, HL_idx))))) Maria@4: for i in extreme_points: Maria@4: plt.annotate(data_countries[i], (yt[i]+0.1, ylt[i])) Maria@4: if out_file is not None: Maria@4: plt.savefig(out_file)