Maria@4
|
1 # -*- coding: utf-8 -*-
|
Maria@4
|
2 """
|
Maria@4
|
3 Created on Wed May 17 11:35:51 2017
|
Maria@4
|
4
|
Maria@4
|
5 @author: mariapanteli
|
Maria@4
|
6 """
|
Maria@4
|
7 import numpy as np
|
m@84
|
8 import pandas as pd
|
Maria@4
|
9 import json
|
Maria@4
|
10 import pysal # before shapely in util_plots
|
Maria@4
|
11 import fiona
|
m@8
|
12 import os
|
Maria@4
|
13 import matplotlib.pyplot as plt
|
Maria@4
|
14
|
Maria@4
|
15
|
m@8
|
16 DATA_DIR = os.path.join(os.path.dirname(__file__), 'util_data')
|
m@8
|
17 JSON_DB = os.path.join(DATA_DIR, 'countries.json')
|
m@8
|
18 SHAPEFILE = os.path.join(DATA_DIR, 'shapefiles', 'ne_10m_admin_0_countries.shp')
|
m@8
|
19
|
m@8
|
20
|
m@8
|
21 def neighbors_from_json_file(data_countries, json_DB=JSON_DB):
|
Maria@4
|
22 neighbors = {}
|
Maria@4
|
23 with open(json_DB) as json_file:
|
Maria@4
|
24 countries_dict = json.load(json_file)
|
Maria@4
|
25 country_names = []
|
Maria@4
|
26 country_iso = []
|
Maria@4
|
27 country_borders_iso = []
|
Maria@4
|
28 for country_info in countries_dict:
|
Maria@4
|
29 country_names.append(country_info['name']['common'])
|
Maria@4
|
30 country_iso.append(country_info['cca3'])
|
Maria@4
|
31 country_borders_iso.append(country_info['borders'])
|
Maria@4
|
32 # temporary fixes of country names to match json data
|
Maria@4
|
33 country_names[country_names.index('United States')] = 'United States of America'
|
Maria@4
|
34 country_names[country_names.index('Tanzania')] = 'United Republic of Tanzania'
|
Maria@4
|
35 country_names[country_names.index('DR Congo')] = 'Democratic Republic of the Congo'
|
Maria@4
|
36 country_names[country_names.index('Czechia')] = 'Czech Republic'
|
Maria@4
|
37 for i, country in enumerate(data_countries):
|
Maria@4
|
38 neighbors[i] = {}
|
Maria@4
|
39 if country in country_names:
|
Maria@4
|
40 if len(country_borders_iso[country_names.index(country)])>0:
|
Maria@4
|
41 # if country has neighbors according to json file
|
Maria@4
|
42 neighbors_iso = country_borders_iso[country_names.index(country)]
|
Maria@4
|
43 neighbors_names = [country_names[country_iso.index(nn)] for nn in neighbors_iso]
|
Maria@4
|
44 for neighbor in neighbors_names:
|
Maria@4
|
45 if neighbor in data_countries:
|
Maria@4
|
46 neighbor_idx = np.where(data_countries==neighbor)[0][0]
|
Maria@4
|
47 neighbors[i][neighbor_idx] = 1.0
|
Maria@4
|
48 w = pysal.weights.W(neighbors, id_order=range(len(data_countries)))
|
Maria@4
|
49 return w
|
Maria@4
|
50
|
Maria@4
|
51
|
Maria@4
|
52 def get_countries_from_shapefile(shapefile):
|
Maria@4
|
53 shp = fiona.open(shapefile, 'r')
|
Maria@4
|
54 countries = []
|
Maria@4
|
55 if shp[0]["properties"].has_key("ADMIN"):
|
Maria@4
|
56 country_keyword = "ADMIN"
|
Maria@4
|
57 elif shp[0]["properties"].has_key("NAME"):
|
Maria@4
|
58 country_keyword = "NAME"
|
Maria@4
|
59 else:
|
Maria@4
|
60 country_keyword = "admin"
|
Maria@4
|
61 for line in shp:
|
Maria@4
|
62 countries.append(line["properties"][country_keyword])
|
Maria@4
|
63 shp.close()
|
Maria@4
|
64 return countries
|
Maria@4
|
65
|
Maria@4
|
66
|
Maria@4
|
67 def replace_empty_neighbours_with_KNN(data_countries, w):
|
m@8
|
68 shapefile = SHAPEFILE
|
Maria@4
|
69 no_neighbors_idx = w.islands
|
Maria@4
|
70 knn = 10
|
Maria@4
|
71 wknn = pysal.knnW_from_shapefile(shapefile, knn)
|
Maria@4
|
72 knn_countries = get_countries_from_shapefile(shapefile)
|
Maria@4
|
73 neighbors = w.neighbors
|
Maria@4
|
74 for nn_idx in no_neighbors_idx:
|
Maria@4
|
75 country = data_countries[nn_idx]
|
Maria@4
|
76 print country
|
Maria@4
|
77 if country not in knn_countries:
|
Maria@4
|
78 continue
|
Maria@4
|
79 knn_country_idx = knn_countries.index(country)
|
Maria@4
|
80 knn_country_neighbors = [knn_countries[nn] for nn in wknn.neighbors[knn_country_idx]]
|
Maria@4
|
81 for knn_nn in knn_country_neighbors:
|
Maria@4
|
82 if len(neighbors[nn_idx])>2:
|
Maria@4
|
83 continue
|
Maria@4
|
84 data_country_idx = np.where(data_countries==knn_nn)[0]
|
Maria@4
|
85 if len(data_country_idx)>0:
|
Maria@4
|
86 neighbors[nn_idx][data_country_idx[0]] = 1.0
|
Maria@4
|
87 w = pysal.weights.W(neighbors, id_order=range(len(data_countries)))
|
Maria@4
|
88 return w
|
Maria@4
|
89
|
Maria@4
|
90
|
Maria@4
|
91 def get_neighbors_for_countries_in_dataset(Y):
|
Maria@4
|
92 # neighbors
|
Maria@4
|
93 data_countries = np.unique(Y)
|
Maria@4
|
94 w = neighbors_from_json_file(data_countries)
|
Maria@4
|
95 w = replace_empty_neighbours_with_KNN(data_countries, w)
|
Maria@4
|
96 return w, data_countries
|
Maria@4
|
97
|
Maria@4
|
98
|
Maria@4
|
99 def from_weights_to_dict(w, data_countries):
|
Maria@4
|
100 w_dict = {}
|
Maria@4
|
101 for i in w.neighbors:
|
Maria@4
|
102 w_dict[data_countries[i]] = [data_countries[nn] for nn in w.neighbors[i]]
|
Maria@4
|
103 return w_dict
|
Maria@4
|
104
|
Maria@4
|
105
|
m@77
|
106 def get_regions_from_shapefile(shapefile):
|
m@77
|
107 shp = fiona.open(shapefile, 'r')
|
m@77
|
108 countries = []
|
m@77
|
109 regions = []
|
m@77
|
110 if shp[0]["properties"].has_key("ADMIN"):
|
m@77
|
111 country_keyword = "ADMIN"
|
m@77
|
112 region_keyword = "REGION_WB"
|
m@77
|
113 elif shp[0]["properties"].has_key("NAME"):
|
m@77
|
114 country_keyword = "NAME"
|
m@77
|
115 region_keyword = "REGION_WB"
|
m@77
|
116 else:
|
m@77
|
117 country_keyword = "admin"
|
m@77
|
118 region_keyword = "region_wb"
|
m@77
|
119 for line in shp:
|
m@77
|
120 #countries.append(line["properties"]["admin"])
|
m@77
|
121 countries.append(line["properties"][country_keyword])
|
m@77
|
122 regions.append(line["properties"][region_keyword])
|
m@77
|
123 shp.close()
|
m@77
|
124 return countries, regions
|
m@77
|
125
|
m@77
|
126
|
m@77
|
127 def append_regions(df):
|
m@77
|
128 countries, regions = get_regions_from_shapefile(SHAPEFILE)
|
m@77
|
129 if 'French Guiana' not in countries:
|
m@77
|
130 countries.append('French Guiana')
|
m@77
|
131 regions.append('Latin America & Caribbean')
|
m@77
|
132 df_regions = pd.DataFrame({'Country': countries, 'Region': regions})
|
m@77
|
133 df_append = pd.merge(df, df_regions, how='left', on='Country')
|
m@77
|
134 return df_append
|
m@77
|
135
|