Mercurial > hg > horiscopes
comparison V5/dParse.py @ 23:11d4e438045e
make version 5
author | DaveM |
---|---|
date | Mon, 09 Apr 2018 15:07:21 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
22:a5b8e2b91d8f | 23:11d4e438045e |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 import csv | |
4 import time | |
5 import unicodedata | |
6 from geopy.geocoders import Nominatim | |
7 from geopy.exc import GeocoderTimedOut | |
8 import random | |
9 import pdb | |
10 | |
11 DEFAULT_TIME = None | |
12 # DEFAULT_TIME_M = None | |
13 DEAULT_LOCATION = None | |
14 | |
15 def regulateData(dataDict): | |
16 print("Parse %s"%(str(dataDict['ID']))) | |
17 p_DOBQ = "What is your partner's date of birth? Please use the format DD/MM/YYYY (for example, 29/03/1981)." | |
18 p_TOBQ = "At what exact time were your partner born? Please use the format HHMM (for example, 2204)." | |
19 DOB_DQ = "Which day (numeric) have you been born?" | |
20 DOB_MQ = "Which month have you been born?" | |
21 DOB_YQ = "Year Of Birth" | |
22 TOB_Q = "At what exact time were you born? Please use the format HHMM (for example, 2204)." | |
23 COB = "What is your place of birth? Please specify city and country (for example, San Francisco, USA)." | |
24 p_COB = "What is your partner's place of birth? Please specify city and country (for example, San Francisco, USA)." | |
25 dataDict['DOB'] = parseDOB(dataDict[DOB_DQ],dataDict[DOB_MQ],dataDict[DOB_YQ]) | |
26 # print (dataDict[DOB_DQ],dataDict[DOB_MQ],dataDict[DOB_YQ]) | |
27 # print dataDict['DOB'] | |
28 dataDict['TOB'] = parseTOB(dataDict[TOB_Q]) | |
29 dataDict['pDOB'] = parsePartnerDOB(dataDict[p_DOBQ]) | |
30 dataDict['pTOB'] = parseTOB(dataDict[p_TOBQ]) | |
31 # MAKE RANDOM PLACE | |
32 # dataDict['COB'] = (random.uniform(-90, 90),random.uniform(-90, 90)) | |
33 # dataDict['pCOB'] = (random.uniform(-90, 90),random.uniform(-90, 90)) | |
34 dataDict['COB'] = parseBirthTown(dataDict[COB]) | |
35 dataDict['pCOB'] = parseBirthTown(dataDict[p_COB]) | |
36 return dataDict | |
37 | |
38 | |
39 def parseBirthTown(s): | |
40 try: | |
41 s = s.encode('ascii') | |
42 except UnicodeDecodeError: | |
43 # pdb.set_trace() | |
44 s = s.decode('latin-1') | |
45 # s = unicodedata.normalize('NFKD',s.decode('utf-8')).encode('ascii','ignore') | |
46 timeoutTime = 2 | |
47 geolocator = Nominatim(timeout=timeoutTime) | |
48 while s is not [] and timeoutTime < 60: | |
49 try: | |
50 location = geolocator.geocode(s) | |
51 if location is not None: | |
52 # print(location.raw) | |
53 # print (location.latitude, location.longitude) | |
54 return (location.latitude, location.longitude, location.raw) | |
55 else: | |
56 s = s.split(' ',1) | |
57 if len(s) == 2: | |
58 s = s[1] | |
59 # print s | |
60 else: | |
61 s = DEAULT_LOCATION | |
62 except: | |
63 timeoutTime += 1 | |
64 print("Error: geocode failed on input %s, incrementing timeout time to %d"%(s,timeoutTime)) | |
65 time.sleep(5) | |
66 geolocator = Nominatim(timeout=timeoutTime) | |
67 # places = geograpy.get_place_context(text=s) | |
68 | |
69 def parsePartnerDOB(dob): | |
70 # print dob | |
71 # pdb.set_trace() | |
72 dob = dob.strip() | |
73 if(dob.count('-') == 2): | |
74 dob = dob.replace('-','/') | |
75 if(dob.count(' ') == 2): | |
76 dob = dob.replace(' ','/') | |
77 dob_ = dob.split('/') | |
78 if(len(dob_) != 3): | |
79 dob = dob.replace('/','').strip() | |
80 dob_ = [] | |
81 # print dob | |
82 if len(dob) == 8: # ddmmyyyy | |
83 dob_.append(dob[:2]) | |
84 dob_.append(dob[2:4]) | |
85 dob_.append(dob[4:]) | |
86 elif len(dob) == 7 and dob[1] == '1' and (dob[2] == '0' or dob[2] == '1' or dob[2] == '2'): # dmmyyyy | |
87 dob_.append(dob[0]) | |
88 dob_.append(dob[1:3]) | |
89 dob_.append(dob[3:]) | |
90 elif(len(dob) == 7): | |
91 if int(dob[:2]) > 31:# dmmyyyy | |
92 dob_.append(dob[0]) | |
93 dob_.append(dob[1:3]) | |
94 dob_.append(dob[3:]) | |
95 elif len(dob) == 7: # ddmyyyy | |
96 dob_.append(dob[0:2]) | |
97 dob_.append(dob[2]) | |
98 dob_.append(dob[3:]) | |
99 elif len(dob) == 6 and dob[3:4] != '19': # ddmmyy | |
100 dob_.append(dob[:2]) | |
101 dob_.append(dob[2:4]) | |
102 dob_.append(dob[4:]) | |
103 elif len(dob) == 5 and dob[1] == '1' and (dob[2] == '0' or dob[2] == '1' or dob[2] == '2'): # dmmyy | |
104 dob_.append(dob[0]) | |
105 dob_.append(dob[1:3]) | |
106 dob_.append(dob[3:]) | |
107 elif len(dob) == 5: # ddmyy | |
108 dob_.append(dob[:2]) | |
109 dob_.append(dob[2]) | |
110 dob_.append(dob[3:]) | |
111 elif len(dob) == 4: # dmyy | |
112 dob_.append(dob[0]) | |
113 dob_.append(dob[1]) | |
114 dob_.append(dob[2:]) | |
115 else: | |
116 if(len(dob) < 4): | |
117 return None | |
118 # print dob | |
119 # print filter(lambda x: x.isdigit(),dob) | |
120 print 'no / partnerDOB issue' | |
121 # deal with no /'s | |
122 try: | |
123 d = int(filter(lambda x: x.isdigit(),dob_[0])) | |
124 m = int(filter(lambda x: x.isdigit(),dob_[1])) | |
125 y = int(filter(lambda x: x.isdigit(),dob_[2])) | |
126 if y < 100: | |
127 y = y + 1900 | |
128 if (m > 12 and d <= 12): | |
129 temp = d | |
130 d = m | |
131 m = temp | |
132 if(d > 31 or d < 1 or m > 12 or m < 1 or y > 2017 or y < 1900): | |
133 print 'error with DOB '+d+'/'+m+'/'+y | |
134 pdb.set_trace() | |
135 except TypeError: | |
136 return None | |
137 # print (d,m,y) | |
138 return (d,m,y) | |
139 | |
140 def monthStringToNum(s): | |
141 # print 'inMonthStringToNum' | |
142 m = {'jan':1,'feb':2,'mar':3, | |
143 'apr':4,'may':5,'jun':6,'jul':7,'aug':8, | |
144 'sep':9,'oct':10,'nov':11,'dec':12} | |
145 s_ = s.strip()[:3].lower() | |
146 try: | |
147 out = m[s_] | |
148 return out | |
149 except: | |
150 raise ValueError('Not a month') | |
151 | |
152 def checkMonthDay(d,m): | |
153 if d > 31: # take first two digits of day | |
154 d = int(str(d)[:2]) | |
155 if d > 31: | |
156 d = int(str(d)[1]) | |
157 if m > 12 and d < 12: # Day and month wrong way round - American | |
158 temp = m | |
159 m = d | |
160 d = temp | |
161 if(m == 2): | |
162 if d <= 29: | |
163 return (True,d,m) | |
164 else: | |
165 return (False,d,m) | |
166 elif m in [4,6,9,11]: | |
167 if d <= 30: | |
168 return (True,d,m) | |
169 else: | |
170 return (False,d,m) | |
171 elif m <= 12 and d <= 31: | |
172 return (True,d,m) | |
173 else: | |
174 return (False,d,m) | |
175 | |
176 def parseDOB(d,m,y): | |
177 d = int(filter(lambda x: x.isdigit(),d)) | |
178 y = int(filter(lambda x: x.isdigit(),y)) | |
179 try: | |
180 # print m | |
181 m = monthStringToNum(m.strip()) | |
182 except ValueError: | |
183 m = int(m.strip()) | |
184 if(y < 100): | |
185 y = y + 1900 | |
186 (r,d,m) = checkMonthDay(d,m) | |
187 if not r: | |
188 print 'error with day month' | |
189 print (r,d,m) | |
190 return (d,m,y) | |
191 | |
192 def parseTOB(T): | |
193 timeFlag = None | |
194 T_ = T.replace('.','').lower().strip() | |
195 if 'am' in T_: | |
196 timeFlag = 0 | |
197 T = T_.replace('am','') | |
198 if 'pm' in T_: | |
199 timeFlag = 1 | |
200 T = T_.replace('pm','') | |
201 T = T.strip() | |
202 if T.count('.') == 1: | |
203 T = T.replace('.',':') | |
204 try: | |
205 if ':' in T: | |
206 T_ = T.split(':') | |
207 | |
208 H = int(T_[0]) | |
209 M = int(T_[1]) | |
210 else: | |
211 if len(T) == 4: | |
212 H = int(T[:2]) | |
213 M = int(T[2:]) | |
214 elif int(T) <= 24 : | |
215 H = int(T) | |
216 M = 0 | |
217 elif int(T) > 100: | |
218 H = int(T)/100 | |
219 M = int(T)%100 | |
220 else: | |
221 return None | |
222 if timeFlag is not None: | |
223 if timeFlag == 0: | |
224 H = H%12 | |
225 else: | |
226 H = H%12 + 12 | |
227 except ValueError: | |
228 return None | |
229 if H > 24 or M > 60: | |
230 return None | |
231 else: | |
232 return (H,M) | |
233 | |
234 | |
235 |