Mercurial > hg > plosone_underreview
view scripts/utils_spatial.py @ 99:192259977b50 branch-tests
metadata and delete Moran
author | Maria Panteli <m.x.panteli@gmail.com> |
---|---|
date | Tue, 03 Oct 2017 16:42:44 +0100 |
parents | 027945e93211 |
children |
line wrap: on
line source
# -*- coding: utf-8 -*- """ Created on Wed May 17 11:35:51 2017 @author: mariapanteli """ import numpy as np import pandas as pd import json import pysal # before shapely in util_plots import fiona import os import matplotlib.pyplot as plt DATA_DIR = os.path.join(os.path.dirname(__file__), 'util_data') JSON_DB = os.path.join(DATA_DIR, 'countries.json') SHAPEFILE = os.path.join(DATA_DIR, 'shapefiles', 'ne_10m_admin_0_countries.shp') def neighbors_from_json_file(data_countries, json_DB=JSON_DB): neighbors = {} with open(json_DB) as json_file: countries_dict = json.load(json_file) country_names = [] country_iso = [] country_borders_iso = [] for country_info in countries_dict: country_names.append(country_info['name']['common']) country_iso.append(country_info['cca3']) country_borders_iso.append(country_info['borders']) # temporary fixes of country names to match json data country_names[country_names.index('United States')] = 'United States of America' country_names[country_names.index('Tanzania')] = 'United Republic of Tanzania' country_names[country_names.index('DR Congo')] = 'Democratic Republic of the Congo' country_names[country_names.index('Czechia')] = 'Czech Republic' for i, country in enumerate(data_countries): neighbors[i] = {} if country in country_names: if len(country_borders_iso[country_names.index(country)])>0: # if country has neighbors according to json file neighbors_iso = country_borders_iso[country_names.index(country)] neighbors_names = [country_names[country_iso.index(nn)] for nn in neighbors_iso] for neighbor in neighbors_names: if neighbor in data_countries: neighbor_idx = np.where(data_countries==neighbor)[0][0] neighbors[i][neighbor_idx] = 1.0 w = pysal.weights.W(neighbors, id_order=range(len(data_countries))) return w def get_countries_from_shapefile(shapefile): shp = fiona.open(shapefile, 'r') countries = [] if shp[0]["properties"].has_key("ADMIN"): country_keyword = "ADMIN" elif shp[0]["properties"].has_key("NAME"): country_keyword = "NAME" else: country_keyword = "admin" for line in shp: countries.append(line["properties"][country_keyword]) shp.close() return countries def replace_empty_neighbours_with_KNN(data_countries, w): shapefile = SHAPEFILE no_neighbors_idx = w.islands knn = 10 wknn = pysal.knnW_from_shapefile(shapefile, knn) knn_countries = get_countries_from_shapefile(shapefile) neighbors = w.neighbors for nn_idx in no_neighbors_idx: country = data_countries[nn_idx] print country if country not in knn_countries: continue knn_country_idx = knn_countries.index(country) knn_country_neighbors = [knn_countries[nn] for nn in wknn.neighbors[knn_country_idx]] for knn_nn in knn_country_neighbors: if len(neighbors[nn_idx])>2: continue data_country_idx = np.where(data_countries==knn_nn)[0] if len(data_country_idx)>0: neighbors[nn_idx][data_country_idx[0]] = 1.0 w = pysal.weights.W(neighbors, id_order=range(len(data_countries))) return w def get_neighbors_for_countries_in_dataset(Y): # neighbors data_countries = np.unique(Y) w = neighbors_from_json_file(data_countries) w = replace_empty_neighbours_with_KNN(data_countries, w) return w, data_countries def from_weights_to_dict(w, data_countries): w_dict = {} for i in w.neighbors: w_dict[data_countries[i]] = [data_countries[nn] for nn in w.neighbors[i]] return w_dict def get_regions_from_shapefile(shapefile): shp = fiona.open(shapefile, 'r') countries = [] regions = [] if shp[0]["properties"].has_key("ADMIN"): country_keyword = "ADMIN" region_keyword = "REGION_WB" elif shp[0]["properties"].has_key("NAME"): country_keyword = "NAME" region_keyword = "REGION_WB" else: country_keyword = "admin" region_keyword = "region_wb" for line in shp: #countries.append(line["properties"]["admin"]) countries.append(line["properties"][country_keyword]) regions.append(line["properties"][region_keyword]) shp.close() return countries, regions def append_regions(df): countries, regions = get_regions_from_shapefile(SHAPEFILE) if 'French Guiana' not in countries: countries.append('French Guiana') regions.append('Latin America & Caribbean') df_regions = pd.DataFrame({'Country': countries, 'Region': regions}) df_append = pd.merge(df, df_regions, how='left', on='Country') return df_append