DaveM@13
|
1 #!/usr/bin/env python
|
DaveM@13
|
2
|
DaveM@13
|
3 import csv
|
DaveM@13
|
4 import time
|
DaveM@13
|
5 import unicodedata
|
DaveM@13
|
6 from geopy.geocoders import Nominatim
|
DaveM@13
|
7 from geopy.exc import GeocoderTimedOut
|
DaveM@13
|
8 import random
|
DaveM@13
|
9 import pdb
|
DaveM@13
|
10
|
DaveM@19
|
11 DEFAULT_TIME = None
|
DaveM@19
|
12 # DEFAULT_TIME_M = None
|
DaveM@15
|
13 DEAULT_LOCATION = None
|
DaveM@13
|
14
|
DaveM@13
|
15 def regulateData(dataDict):
|
DaveM@13
|
16 print("Parse %s"%(str(dataDict['ID'])))
|
DaveM@13
|
17 p_DOBQ = "What is your partner's date of birth? Please use the format DD/MM/YYYY (for example, 29/03/1981)."
|
DaveM@13
|
18 p_TOBQ = "At what exact time were your partner born? Please use the format HHMM (for example, 2204)."
|
DaveM@13
|
19 DOB_DQ = "Which day (numeric) have you been born?"
|
DaveM@13
|
20 DOB_MQ = "Which month have you been born?"
|
DaveM@13
|
21 DOB_YQ = "Year Of Birth"
|
DaveM@13
|
22 TOB_Q = "At what exact time were you born? Please use the format HHMM (for example, 2204)."
|
DaveM@13
|
23 COB = "What is your place of birth? Please specify city and country (for example, San Francisco, USA)."
|
DaveM@13
|
24 p_COB = "What is your partner's place of birth? Please specify city and country (for example, San Francisco, USA)."
|
DaveM@13
|
25 dataDict['DOB'] = parseDOB(dataDict[DOB_DQ],dataDict[DOB_MQ],dataDict[DOB_YQ])
|
DaveM@13
|
26 # print (dataDict[DOB_DQ],dataDict[DOB_MQ],dataDict[DOB_YQ])
|
DaveM@13
|
27 # print dataDict['DOB']
|
DaveM@13
|
28 dataDict['TOB'] = parseTOB(dataDict[TOB_Q])
|
DaveM@13
|
29 dataDict['pDOB'] = parsePartnerDOB(dataDict[p_DOBQ])
|
DaveM@13
|
30 dataDict['pTOB'] = parseTOB(dataDict[p_TOBQ])
|
DaveM@13
|
31 # MAKE RANDOM PLACE
|
DaveM@13
|
32 # dataDict['COB'] = (random.uniform(-90, 90),random.uniform(-90, 90))
|
DaveM@13
|
33 # dataDict['pCOB'] = (random.uniform(-90, 90),random.uniform(-90, 90))
|
DaveM@13
|
34 dataDict['COB'] = parseBirthTown(dataDict[COB])
|
DaveM@13
|
35 dataDict['pCOB'] = parseBirthTown(dataDict[p_COB])
|
DaveM@13
|
36 return dataDict
|
DaveM@13
|
37
|
DaveM@13
|
38
|
DaveM@13
|
39 def parseBirthTown(s):
|
DaveM@13
|
40 try:
|
DaveM@13
|
41 s = s.encode('ascii')
|
DaveM@13
|
42 except UnicodeDecodeError:
|
DaveM@13
|
43 # pdb.set_trace()
|
DaveM@13
|
44 s = s.decode('latin-1')
|
DaveM@13
|
45 # s = unicodedata.normalize('NFKD',s.decode('utf-8')).encode('ascii','ignore')
|
DaveM@13
|
46 timeoutTime = 2
|
DaveM@13
|
47 geolocator = Nominatim(timeout=timeoutTime)
|
DaveM@13
|
48 while s is not [] and timeoutTime < 60:
|
DaveM@13
|
49 try:
|
DaveM@13
|
50 location = geolocator.geocode(s)
|
DaveM@13
|
51 if location is not None:
|
DaveM@13
|
52 # print(location.raw)
|
DaveM@13
|
53 # print (location.latitude, location.longitude)
|
DaveM@13
|
54 return (location.latitude, location.longitude, location.raw)
|
DaveM@13
|
55 else:
|
DaveM@13
|
56 s = s.split(' ',1)
|
DaveM@13
|
57 if len(s) == 2:
|
DaveM@13
|
58 s = s[1]
|
DaveM@13
|
59 # print s
|
DaveM@13
|
60 else:
|
DaveM@13
|
61 s = DEAULT_LOCATION
|
DaveM@13
|
62 except:
|
DaveM@13
|
63 timeoutTime += 1
|
DaveM@13
|
64 print("Error: geocode failed on input %s, incrementing timeout time to %d"%(s,timeoutTime))
|
DaveM@13
|
65 time.sleep(5)
|
DaveM@13
|
66 geolocator = Nominatim(timeout=timeoutTime)
|
DaveM@13
|
67 # places = geograpy.get_place_context(text=s)
|
DaveM@13
|
68
|
DaveM@13
|
69 def parsePartnerDOB(dob):
|
DaveM@13
|
70 # print dob
|
DaveM@13
|
71 # pdb.set_trace()
|
DaveM@13
|
72 dob = dob.strip()
|
DaveM@13
|
73 if(dob.count('-') == 2):
|
DaveM@13
|
74 dob = dob.replace('-','/')
|
DaveM@13
|
75 if(dob.count(' ') == 2):
|
DaveM@13
|
76 dob = dob.replace(' ','/')
|
DaveM@13
|
77 dob_ = dob.split('/')
|
DaveM@13
|
78 if(len(dob_) != 3):
|
DaveM@13
|
79 dob = dob.replace('/','').strip()
|
DaveM@13
|
80 dob_ = []
|
DaveM@13
|
81 # print dob
|
DaveM@13
|
82 if len(dob) == 8: # ddmmyyyy
|
DaveM@13
|
83 dob_.append(dob[:2])
|
DaveM@13
|
84 dob_.append(dob[2:4])
|
DaveM@13
|
85 dob_.append(dob[4:])
|
DaveM@13
|
86 elif len(dob) == 7 and dob[1] == '1' and (dob[2] == '0' or dob[2] == '1' or dob[2] == '2'): # dmmyyyy
|
DaveM@13
|
87 dob_.append(dob[0])
|
DaveM@13
|
88 dob_.append(dob[1:3])
|
DaveM@13
|
89 dob_.append(dob[3:])
|
DaveM@13
|
90 elif(len(dob) == 7):
|
DaveM@13
|
91 if int(dob[:2]) > 31:# dmmyyyy
|
DaveM@13
|
92 dob_.append(dob[0])
|
DaveM@13
|
93 dob_.append(dob[1:3])
|
DaveM@13
|
94 dob_.append(dob[3:])
|
DaveM@13
|
95 elif len(dob) == 7: # ddmyyyy
|
DaveM@13
|
96 dob_.append(dob[0:2])
|
DaveM@13
|
97 dob_.append(dob[2])
|
DaveM@13
|
98 dob_.append(dob[3:])
|
DaveM@13
|
99 elif len(dob) == 6 and dob[3:4] != '19': # ddmmyy
|
DaveM@13
|
100 dob_.append(dob[:2])
|
DaveM@13
|
101 dob_.append(dob[2:4])
|
DaveM@13
|
102 dob_.append(dob[4:])
|
DaveM@13
|
103 elif len(dob) == 5 and dob[1] == '1' and (dob[2] == '0' or dob[2] == '1' or dob[2] == '2'): # dmmyy
|
DaveM@13
|
104 dob_.append(dob[0])
|
DaveM@13
|
105 dob_.append(dob[1:3])
|
DaveM@13
|
106 dob_.append(dob[3:])
|
DaveM@13
|
107 elif len(dob) == 5: # ddmyy
|
DaveM@13
|
108 dob_.append(dob[:2])
|
DaveM@13
|
109 dob_.append(dob[2])
|
DaveM@13
|
110 dob_.append(dob[3:])
|
DaveM@13
|
111 elif len(dob) == 4: # dmyy
|
DaveM@13
|
112 dob_.append(dob[0])
|
DaveM@13
|
113 dob_.append(dob[1])
|
DaveM@13
|
114 dob_.append(dob[2:])
|
DaveM@13
|
115 else:
|
DaveM@13
|
116 if(len(dob) < 4):
|
DaveM@13
|
117 return None
|
DaveM@13
|
118 # print dob
|
DaveM@13
|
119 # print filter(lambda x: x.isdigit(),dob)
|
DaveM@13
|
120 print 'no / partnerDOB issue'
|
DaveM@13
|
121 # deal with no /'s
|
DaveM@13
|
122 try:
|
DaveM@13
|
123 d = int(filter(lambda x: x.isdigit(),dob_[0]))
|
DaveM@13
|
124 m = int(filter(lambda x: x.isdigit(),dob_[1]))
|
DaveM@13
|
125 y = int(filter(lambda x: x.isdigit(),dob_[2]))
|
DaveM@13
|
126 if y < 100:
|
DaveM@13
|
127 y = y + 1900
|
DaveM@13
|
128 if (m > 12 and d <= 12):
|
DaveM@13
|
129 temp = d
|
DaveM@13
|
130 d = m
|
DaveM@13
|
131 m = temp
|
DaveM@13
|
132 if(d > 31 or d < 1 or m > 12 or m < 1 or y > 2017 or y < 1900):
|
DaveM@13
|
133 print 'error with DOB '+d+'/'+m+'/'+y
|
DaveM@13
|
134 pdb.set_trace()
|
DaveM@13
|
135 except TypeError:
|
DaveM@13
|
136 return None
|
DaveM@13
|
137 # print (d,m,y)
|
DaveM@13
|
138 return (d,m,y)
|
DaveM@13
|
139
|
DaveM@13
|
140 def monthStringToNum(s):
|
DaveM@13
|
141 # print 'inMonthStringToNum'
|
DaveM@13
|
142 m = {'jan':1,'feb':2,'mar':3,
|
DaveM@13
|
143 'apr':4,'may':5,'jun':6,'jul':7,'aug':8,
|
DaveM@13
|
144 'sep':9,'oct':10,'nov':11,'dec':12}
|
DaveM@13
|
145 s_ = s.strip()[:3].lower()
|
DaveM@13
|
146 try:
|
DaveM@13
|
147 out = m[s_]
|
DaveM@13
|
148 return out
|
DaveM@13
|
149 except:
|
DaveM@13
|
150 raise ValueError('Not a month')
|
DaveM@13
|
151
|
DaveM@13
|
152 def checkMonthDay(d,m):
|
DaveM@13
|
153 if d > 31: # take first two digits of day
|
DaveM@13
|
154 d = int(str(d)[:2])
|
DaveM@13
|
155 if d > 31:
|
DaveM@13
|
156 d = int(str(d)[1])
|
DaveM@13
|
157 if m > 12 and d < 12: # Day and month wrong way round - American
|
DaveM@13
|
158 temp = m
|
DaveM@13
|
159 m = d
|
DaveM@13
|
160 d = temp
|
DaveM@13
|
161 if(m == 2):
|
DaveM@13
|
162 if d <= 29:
|
DaveM@13
|
163 return (True,d,m)
|
DaveM@13
|
164 else:
|
DaveM@13
|
165 return (False,d,m)
|
DaveM@13
|
166 elif m in [4,6,9,11]:
|
DaveM@13
|
167 if d <= 30:
|
DaveM@13
|
168 return (True,d,m)
|
DaveM@13
|
169 else:
|
DaveM@13
|
170 return (False,d,m)
|
DaveM@13
|
171 elif m <= 12 and d <= 31:
|
DaveM@13
|
172 return (True,d,m)
|
DaveM@13
|
173 else:
|
DaveM@13
|
174 return (False,d,m)
|
DaveM@13
|
175
|
DaveM@13
|
176 def parseDOB(d,m,y):
|
DaveM@13
|
177 d = int(filter(lambda x: x.isdigit(),d))
|
DaveM@13
|
178 y = int(filter(lambda x: x.isdigit(),y))
|
DaveM@13
|
179 try:
|
DaveM@13
|
180 # print m
|
DaveM@13
|
181 m = monthStringToNum(m.strip())
|
DaveM@13
|
182 except ValueError:
|
DaveM@13
|
183 m = int(m.strip())
|
DaveM@13
|
184 if(y < 100):
|
DaveM@13
|
185 y = y + 1900
|
DaveM@13
|
186 (r,d,m) = checkMonthDay(d,m)
|
DaveM@13
|
187 if not r:
|
DaveM@13
|
188 print 'error with day month'
|
DaveM@13
|
189 print (r,d,m)
|
DaveM@13
|
190 return (d,m,y)
|
DaveM@13
|
191
|
DaveM@13
|
192 def parseTOB(T):
|
DaveM@13
|
193 timeFlag = None
|
DaveM@13
|
194 T_ = T.replace('.','').lower().strip()
|
DaveM@13
|
195 if 'am' in T_:
|
DaveM@13
|
196 timeFlag = 0
|
DaveM@13
|
197 T = T_.replace('am','')
|
DaveM@13
|
198 if 'pm' in T_:
|
DaveM@13
|
199 timeFlag = 1
|
DaveM@13
|
200 T = T_.replace('pm','')
|
DaveM@13
|
201 T = T.strip()
|
DaveM@13
|
202 if T.count('.') == 1:
|
DaveM@13
|
203 T = T.replace('.',':')
|
DaveM@13
|
204 try:
|
DaveM@13
|
205 if ':' in T:
|
DaveM@13
|
206 T_ = T.split(':')
|
DaveM@13
|
207
|
DaveM@13
|
208 H = int(T_[0])
|
DaveM@13
|
209 M = int(T_[1])
|
DaveM@13
|
210 else:
|
DaveM@13
|
211 if len(T) == 4:
|
DaveM@13
|
212 H = int(T[:2])
|
DaveM@13
|
213 M = int(T[2:])
|
DaveM@13
|
214 elif int(T) <= 24 :
|
DaveM@13
|
215 H = int(T)
|
DaveM@13
|
216 M = 0
|
DaveM@13
|
217 elif int(T) > 100:
|
DaveM@13
|
218 H = int(T)/100
|
DaveM@13
|
219 M = int(T)%100
|
DaveM@19
|
220 else:
|
DaveM@19
|
221 return None
|
DaveM@13
|
222 if timeFlag is not None:
|
DaveM@13
|
223 if timeFlag == 0:
|
DaveM@13
|
224 H = H%12
|
DaveM@13
|
225 else:
|
DaveM@13
|
226 H = H%12 + 12
|
DaveM@13
|
227 except ValueError:
|
DaveM@19
|
228 return None
|
DaveM@21
|
229 if H > 24 or M > 60:
|
DaveM@21
|
230 return None
|
DaveM@21
|
231 else:
|
DaveM@21
|
232 return (H,M)
|
DaveM@19
|
233
|
DaveM@13
|
234
|
DaveM@13
|
235
|