annotate scripts/utils_spatial.py @ 105:edd82eb89b4b branch-tests tip

Merge
author Maria Panteli
date Sun, 15 Oct 2017 13:36:59 +0100
parents 192259977b50
children
rev   line source
Maria@4 1 # -*- coding: utf-8 -*-
Maria@4 2 """
Maria@4 3 Created on Wed May 17 11:35:51 2017
Maria@4 4
Maria@4 5 @author: mariapanteli
Maria@4 6 """
Maria@4 7 import numpy as np
m@84 8 import pandas as pd
Maria@4 9 import json
Maria@4 10 import pysal # before shapely in util_plots
Maria@4 11 import fiona
m@8 12 import os
Maria@4 13 import matplotlib.pyplot as plt
Maria@4 14
Maria@4 15
m@8 16 DATA_DIR = os.path.join(os.path.dirname(__file__), 'util_data')
m@8 17 JSON_DB = os.path.join(DATA_DIR, 'countries.json')
m@8 18 SHAPEFILE = os.path.join(DATA_DIR, 'shapefiles', 'ne_10m_admin_0_countries.shp')
m@8 19
m@8 20
m@8 21 def neighbors_from_json_file(data_countries, json_DB=JSON_DB):
Maria@4 22 neighbors = {}
Maria@4 23 with open(json_DB) as json_file:
Maria@4 24 countries_dict = json.load(json_file)
Maria@4 25 country_names = []
Maria@4 26 country_iso = []
Maria@4 27 country_borders_iso = []
Maria@4 28 for country_info in countries_dict:
Maria@4 29 country_names.append(country_info['name']['common'])
Maria@4 30 country_iso.append(country_info['cca3'])
Maria@4 31 country_borders_iso.append(country_info['borders'])
Maria@4 32 # temporary fixes of country names to match json data
Maria@4 33 country_names[country_names.index('United States')] = 'United States of America'
Maria@4 34 country_names[country_names.index('Tanzania')] = 'United Republic of Tanzania'
Maria@4 35 country_names[country_names.index('DR Congo')] = 'Democratic Republic of the Congo'
Maria@4 36 country_names[country_names.index('Czechia')] = 'Czech Republic'
Maria@4 37 for i, country in enumerate(data_countries):
Maria@4 38 neighbors[i] = {}
Maria@4 39 if country in country_names:
Maria@4 40 if len(country_borders_iso[country_names.index(country)])>0:
Maria@4 41 # if country has neighbors according to json file
Maria@4 42 neighbors_iso = country_borders_iso[country_names.index(country)]
Maria@4 43 neighbors_names = [country_names[country_iso.index(nn)] for nn in neighbors_iso]
Maria@4 44 for neighbor in neighbors_names:
Maria@4 45 if neighbor in data_countries:
Maria@4 46 neighbor_idx = np.where(data_countries==neighbor)[0][0]
Maria@4 47 neighbors[i][neighbor_idx] = 1.0
Maria@4 48 w = pysal.weights.W(neighbors, id_order=range(len(data_countries)))
Maria@4 49 return w
Maria@4 50
Maria@4 51
Maria@4 52 def get_countries_from_shapefile(shapefile):
Maria@4 53 shp = fiona.open(shapefile, 'r')
Maria@4 54 countries = []
Maria@4 55 if shp[0]["properties"].has_key("ADMIN"):
Maria@4 56 country_keyword = "ADMIN"
Maria@4 57 elif shp[0]["properties"].has_key("NAME"):
Maria@4 58 country_keyword = "NAME"
Maria@4 59 else:
Maria@4 60 country_keyword = "admin"
Maria@4 61 for line in shp:
Maria@4 62 countries.append(line["properties"][country_keyword])
Maria@4 63 shp.close()
Maria@4 64 return countries
Maria@4 65
Maria@4 66
Maria@4 67 def replace_empty_neighbours_with_KNN(data_countries, w):
m@8 68 shapefile = SHAPEFILE
Maria@4 69 no_neighbors_idx = w.islands
Maria@4 70 knn = 10
Maria@4 71 wknn = pysal.knnW_from_shapefile(shapefile, knn)
Maria@4 72 knn_countries = get_countries_from_shapefile(shapefile)
Maria@4 73 neighbors = w.neighbors
Maria@4 74 for nn_idx in no_neighbors_idx:
Maria@4 75 country = data_countries[nn_idx]
Maria@4 76 print country
Maria@4 77 if country not in knn_countries:
Maria@4 78 continue
Maria@4 79 knn_country_idx = knn_countries.index(country)
Maria@4 80 knn_country_neighbors = [knn_countries[nn] for nn in wknn.neighbors[knn_country_idx]]
Maria@4 81 for knn_nn in knn_country_neighbors:
Maria@4 82 if len(neighbors[nn_idx])>2:
Maria@4 83 continue
Maria@4 84 data_country_idx = np.where(data_countries==knn_nn)[0]
Maria@4 85 if len(data_country_idx)>0:
Maria@4 86 neighbors[nn_idx][data_country_idx[0]] = 1.0
Maria@4 87 w = pysal.weights.W(neighbors, id_order=range(len(data_countries)))
Maria@4 88 return w
Maria@4 89
Maria@4 90
Maria@4 91 def get_neighbors_for_countries_in_dataset(Y):
Maria@4 92 # neighbors
Maria@4 93 data_countries = np.unique(Y)
Maria@4 94 w = neighbors_from_json_file(data_countries)
Maria@4 95 w = replace_empty_neighbours_with_KNN(data_countries, w)
Maria@4 96 return w, data_countries
Maria@4 97
Maria@4 98
Maria@4 99 def from_weights_to_dict(w, data_countries):
Maria@4 100 w_dict = {}
Maria@4 101 for i in w.neighbors:
Maria@4 102 w_dict[data_countries[i]] = [data_countries[nn] for nn in w.neighbors[i]]
Maria@4 103 return w_dict
Maria@4 104
Maria@4 105
m@77 106 def get_regions_from_shapefile(shapefile):
m@77 107 shp = fiona.open(shapefile, 'r')
m@77 108 countries = []
m@77 109 regions = []
m@77 110 if shp[0]["properties"].has_key("ADMIN"):
m@77 111 country_keyword = "ADMIN"
m@77 112 region_keyword = "REGION_WB"
m@77 113 elif shp[0]["properties"].has_key("NAME"):
m@77 114 country_keyword = "NAME"
m@77 115 region_keyword = "REGION_WB"
m@77 116 else:
m@77 117 country_keyword = "admin"
m@77 118 region_keyword = "region_wb"
m@77 119 for line in shp:
m@77 120 #countries.append(line["properties"]["admin"])
m@77 121 countries.append(line["properties"][country_keyword])
m@77 122 regions.append(line["properties"][region_keyword])
m@77 123 shp.close()
m@77 124 return countries, regions
m@77 125
m@77 126
m@77 127 def append_regions(df):
m@77 128 countries, regions = get_regions_from_shapefile(SHAPEFILE)
m@77 129 if 'French Guiana' not in countries:
m@77 130 countries.append('French Guiana')
m@77 131 regions.append('Latin America & Caribbean')
m@77 132 df_regions = pd.DataFrame({'Country': countries, 'Region': regions})
m@77 133 df_append = pd.merge(df, df_regions, how='left', on='Country')
m@77 134 return df_append
m@77 135