Maria@4: # -*- coding: utf-8 -*- Maria@4: """ Maria@4: Created on Wed May 17 11:35:51 2017 Maria@4: Maria@4: @author: mariapanteli Maria@4: """ Maria@4: import numpy as np m@84: import pandas as pd Maria@4: import json Maria@4: import pysal # before shapely in util_plots Maria@4: import fiona m@8: import os Maria@4: import matplotlib.pyplot as plt Maria@4: Maria@4: m@8: DATA_DIR = os.path.join(os.path.dirname(__file__), 'util_data') m@8: JSON_DB = os.path.join(DATA_DIR, 'countries.json') m@8: SHAPEFILE = os.path.join(DATA_DIR, 'shapefiles', 'ne_10m_admin_0_countries.shp') m@8: m@8: m@8: def neighbors_from_json_file(data_countries, json_DB=JSON_DB): Maria@4: neighbors = {} Maria@4: with open(json_DB) as json_file: Maria@4: countries_dict = json.load(json_file) Maria@4: country_names = [] Maria@4: country_iso = [] Maria@4: country_borders_iso = [] Maria@4: for country_info in countries_dict: Maria@4: country_names.append(country_info['name']['common']) Maria@4: country_iso.append(country_info['cca3']) Maria@4: country_borders_iso.append(country_info['borders']) Maria@4: # temporary fixes of country names to match json data Maria@4: country_names[country_names.index('United States')] = 'United States of America' Maria@4: country_names[country_names.index('Tanzania')] = 'United Republic of Tanzania' Maria@4: country_names[country_names.index('DR Congo')] = 'Democratic Republic of the Congo' Maria@4: country_names[country_names.index('Czechia')] = 'Czech Republic' Maria@4: for i, country in enumerate(data_countries): Maria@4: neighbors[i] = {} Maria@4: if country in country_names: Maria@4: if len(country_borders_iso[country_names.index(country)])>0: Maria@4: # if country has neighbors according to json file Maria@4: neighbors_iso = country_borders_iso[country_names.index(country)] Maria@4: neighbors_names = [country_names[country_iso.index(nn)] for nn in neighbors_iso] Maria@4: for neighbor in neighbors_names: Maria@4: if neighbor in data_countries: Maria@4: neighbor_idx = np.where(data_countries==neighbor)[0][0] Maria@4: neighbors[i][neighbor_idx] = 1.0 Maria@4: w = pysal.weights.W(neighbors, id_order=range(len(data_countries))) Maria@4: return w Maria@4: Maria@4: Maria@4: def get_countries_from_shapefile(shapefile): Maria@4: shp = fiona.open(shapefile, 'r') Maria@4: countries = [] Maria@4: if shp[0]["properties"].has_key("ADMIN"): Maria@4: country_keyword = "ADMIN" Maria@4: elif shp[0]["properties"].has_key("NAME"): Maria@4: country_keyword = "NAME" Maria@4: else: Maria@4: country_keyword = "admin" Maria@4: for line in shp: Maria@4: countries.append(line["properties"][country_keyword]) Maria@4: shp.close() Maria@4: return countries Maria@4: Maria@4: Maria@4: def replace_empty_neighbours_with_KNN(data_countries, w): m@8: shapefile = SHAPEFILE Maria@4: no_neighbors_idx = w.islands Maria@4: knn = 10 Maria@4: wknn = pysal.knnW_from_shapefile(shapefile, knn) Maria@4: knn_countries = get_countries_from_shapefile(shapefile) Maria@4: neighbors = w.neighbors Maria@4: for nn_idx in no_neighbors_idx: Maria@4: country = data_countries[nn_idx] Maria@4: print country Maria@4: if country not in knn_countries: Maria@4: continue Maria@4: knn_country_idx = knn_countries.index(country) Maria@4: knn_country_neighbors = [knn_countries[nn] for nn in wknn.neighbors[knn_country_idx]] Maria@4: for knn_nn in knn_country_neighbors: Maria@4: if len(neighbors[nn_idx])>2: Maria@4: continue Maria@4: data_country_idx = np.where(data_countries==knn_nn)[0] Maria@4: if len(data_country_idx)>0: Maria@4: neighbors[nn_idx][data_country_idx[0]] = 1.0 Maria@4: w = pysal.weights.W(neighbors, id_order=range(len(data_countries))) Maria@4: return w Maria@4: Maria@4: Maria@4: def get_neighbors_for_countries_in_dataset(Y): Maria@4: # neighbors Maria@4: data_countries = np.unique(Y) Maria@4: w = neighbors_from_json_file(data_countries) Maria@4: w = replace_empty_neighbours_with_KNN(data_countries, w) Maria@4: return w, data_countries Maria@4: Maria@4: Maria@4: def from_weights_to_dict(w, data_countries): Maria@4: w_dict = {} Maria@4: for i in w.neighbors: Maria@4: w_dict[data_countries[i]] = [data_countries[nn] for nn in w.neighbors[i]] Maria@4: return w_dict Maria@4: Maria@4: m@77: def get_regions_from_shapefile(shapefile): m@77: shp = fiona.open(shapefile, 'r') m@77: countries = [] m@77: regions = [] m@77: if shp[0]["properties"].has_key("ADMIN"): m@77: country_keyword = "ADMIN" m@77: region_keyword = "REGION_WB" m@77: elif shp[0]["properties"].has_key("NAME"): m@77: country_keyword = "NAME" m@77: region_keyword = "REGION_WB" m@77: else: m@77: country_keyword = "admin" m@77: region_keyword = "region_wb" m@77: for line in shp: m@77: #countries.append(line["properties"]["admin"]) m@77: countries.append(line["properties"][country_keyword]) m@77: regions.append(line["properties"][region_keyword]) m@77: shp.close() m@77: return countries, regions m@77: m@77: m@77: def append_regions(df): m@77: countries, regions = get_regions_from_shapefile(SHAPEFILE) m@77: if 'French Guiana' not in countries: m@77: countries.append('French Guiana') m@77: regions.append('Latin America & Caribbean') m@77: df_regions = pd.DataFrame({'Country': countries, 'Region': regions}) m@77: df_append = pd.merge(df, df_regions, how='left', on='Country') m@77: return df_append m@77: