comparison V3/dParse.py @ 8:11dee9cbaacc

creation of V3
author DaveM
date Thu, 21 Dec 2017 11:47:51 +0000
parents
children
comparison
equal deleted inserted replaced
7:f3a7cfc52104 8:11dee9cbaacc
1 #!/usr/bin/env python
2
3 import csv
4 import time
5 import unicodedata
6 from geopy.geocoders import Nominatim
7 from geopy.exc import GeocoderTimedOut
8 import random
9 import pdb
10
11 DEFAULT_TIME_H = 12
12 DEFAULT_TIME_M = 00
13 DEAULT_LOCATION = 'USA'
14
15 def parseCSV(filename):
16 stream = csv.DictReader(open(filename,'rb'))
17 dictList = []
18 for line in stream:
19 dictList.append(regulateData(line))
20 return dictList
21
22 def regulateData(dataDict):
23 print("Parse %s"%(str(dataDict['ID'])))
24 p_DOBQ = "What is your partner's date of birth? Please use the format DD/MM/YYYY (for example, 29/03/1981)."
25 p_TOBQ = "At what exact time were your partner born? Please use the format HHMM (for example, 2204)."
26 DOB_DQ = "Which day (numeric) have you been born?"
27 DOB_MQ = "Which month have you been born?"
28 DOB_YQ = "Year Of Birth"
29 TOB_Q = "At what exact time were you born? Please use the format HHMM (for example, 2204)."
30 COB = "What is your place of birth? Please specify city and country (for example, San Francisco, USA)."
31 p_COB = "What is your partner's place of birth? Please specify city and country (for example, San Francisco, USA)."
32 dataDict['DOB'] = parseDOB(dataDict[DOB_DQ],dataDict[DOB_MQ],dataDict[DOB_YQ])
33 # print (dataDict[DOB_DQ],dataDict[DOB_MQ],dataDict[DOB_YQ])
34 # print dataDict['DOB']
35 dataDict['TOB'] = parseTOB(dataDict[TOB_Q])
36 dataDict['pDOB'] = parsePartnerDOB(dataDict[p_DOBQ])
37 dataDict['pTOB'] = parseTOB(dataDict[p_TOBQ])
38 # MAKE RANDOM PLACE
39 # dataDict['COB'] = (random.uniform(-90, 90),random.uniform(-90, 90))
40 # dataDict['pCOB'] = (random.uniform(-90, 90),random.uniform(-90, 90))
41 dataDict['COB'] = parseBirthTown(dataDict[COB])
42 dataDict['pCOB'] = parseBirthTown(dataDict[p_COB])
43 return dataDict
44
45
46 def parseBirthTown(s):
47 try:
48 s = s.encode('ascii')
49 except UnicodeDecodeError:
50 # pdb.set_trace()
51 s = s.decode('latin-1')
52 # s = unicodedata.normalize('NFKD',s.decode('utf-8')).encode('ascii','ignore')
53 timeoutTime = 2
54 geolocator = Nominatim(timeout=timeoutTime)
55 while s is not [] and timeoutTime < 60:
56 try:
57 location = geolocator.geocode(s)
58 if location is not None:
59 # print(location.raw)
60 # print (location.latitude, location.longitude)
61 return (location.latitude, location.longitude, location.raw)
62 else:
63 s = s.split(' ',1)
64 if len(s) == 2:
65 s = s[1]
66 # print s
67 else:
68 s = DEAULT_LOCATION
69 except:
70 timeoutTime += 1
71 print("Error: geocode failed on input %s, incrementing timeout time to %d"%(s,timeoutTime))
72 time.sleep(5)
73 geolocator = Nominatim(timeout=timeoutTime)
74 # places = geograpy.get_place_context(text=s)
75
76 def parsePartnerDOB(dob):
77 # print dob
78 # pdb.set_trace()
79 dob = dob.strip()
80 if(dob.count('-') == 2):
81 dob = dob.replace('-','/')
82 if(dob.count(' ') == 2):
83 dob = dob.replace(' ','/')
84 dob_ = dob.split('/')
85 if(len(dob_) != 3):
86 dob = dob.replace('/','').strip()
87 dob_ = []
88 # print dob
89 if len(dob) == 8: # ddmmyyyy
90 dob_.append(dob[:2])
91 dob_.append(dob[2:4])
92 dob_.append(dob[4:])
93 elif len(dob) == 7 and dob[1] == '1' and (dob[2] == '0' or dob[2] == '1' or dob[2] == '2'): # dmmyyyy
94 dob_.append(dob[0])
95 dob_.append(dob[1:3])
96 dob_.append(dob[3:])
97 elif(len(dob) == 7):
98 if int(dob[:2]) > 31:# dmmyyyy
99 dob_.append(dob[0])
100 dob_.append(dob[1:3])
101 dob_.append(dob[3:])
102 elif len(dob) == 7: # ddmyyyy
103 dob_.append(dob[0:2])
104 dob_.append(dob[2])
105 dob_.append(dob[3:])
106 elif len(dob) == 6 and dob[3:4] != '19': # ddmmyy
107 dob_.append(dob[:2])
108 dob_.append(dob[2:4])
109 dob_.append(dob[4:])
110 elif len(dob) == 5 and dob[1] == '1' and (dob[2] == '0' or dob[2] == '1' or dob[2] == '2'): # dmmyy
111 dob_.append(dob[0])
112 dob_.append(dob[1:3])
113 dob_.append(dob[3:])
114 elif len(dob) == 5: # ddmyy
115 dob_.append(dob[:2])
116 dob_.append(dob[2])
117 dob_.append(dob[3:])
118 elif len(dob) == 4: # dmyy
119 dob_.append(dob[0])
120 dob_.append(dob[1])
121 dob_.append(dob[2:])
122 else:
123 if(len(dob) < 4):
124 return None
125 # print dob
126 # print filter(lambda x: x.isdigit(),dob)
127 print 'no / partnerDOB issue'
128 # deal with no /'s
129 try:
130 d = int(filter(lambda x: x.isdigit(),dob_[0]))
131 m = int(filter(lambda x: x.isdigit(),dob_[1]))
132 y = int(filter(lambda x: x.isdigit(),dob_[2]))
133 if y < 100:
134 y = y + 1900
135 if (m > 12 and d <= 12):
136 temp = d
137 d = m
138 m = temp
139 if(d > 31 or d < 1 or m > 12 or m < 1 or y > 2017 or y < 1900):
140 print 'error with DOB '+d+'/'+m+'/'+y
141 pdb.set_trace()
142 except TypeError:
143 return None
144 # print (d,m,y)
145 return (d,m,y)
146
147 def monthStringToNum(s):
148 # print 'inMonthStringToNum'
149 m = {'jan':1,'feb':2,'mar':3,
150 'apr':4,'may':5,'jun':6,'jul':7,'aug':8,
151 'sep':9,'oct':10,'nov':11,'dec':12}
152 s_ = s.strip()[:3].lower()
153 try:
154 out = m[s_]
155 return out
156 except:
157 raise ValueError('Not a month')
158
159 def checkMonthDay(d,m):
160 if d > 31: # take first two digits of day
161 d = int(str(d)[:2])
162 if d > 31:
163 d = int(str(d)[1])
164 if m > 12 and d < 12: # Day and month wrong way round - American
165 temp = m
166 m = d
167 d = temp
168 if(m == 2):
169 if d <= 29:
170 return (True,d,m)
171 else:
172 return (False,d,m)
173 elif m in [4,6,9,11]:
174 if d <= 30:
175 return (True,d,m)
176 else:
177 return (False,d,m)
178 elif m <= 12 and d <= 31:
179 return (True,d,m)
180 else:
181 return (False,d,m)
182
183 def parseDOB(d,m,y):
184 d = int(filter(lambda x: x.isdigit(),d))
185 y = int(filter(lambda x: x.isdigit(),y))
186 try:
187 # print m
188 m = monthStringToNum(m.strip())
189 except ValueError:
190 m = int(m.strip())
191 if(y < 100):
192 y = y + 1900
193 (r,d,m) = checkMonthDay(d,m)
194 if not r:
195 print 'error with day month'
196 print (r,d,m)
197 return (d,m,y)
198
199 def parseTOB(T):
200 # pdb.set_trace()
201 timeFlag = None
202 T_ = T.replace('.','').lower().strip()
203 if 'am' in T_:
204 timeFlag = 0
205 T = T_.replace('am','')
206 if 'pm' in T_:
207 timeFlag = 1
208 T = T_.replace('pm','')
209 T = T.strip()
210 if T.count('.') == 1:
211 T = T.replace('.',':')
212 try:
213 if ':' in T:
214 T_ = T.split(':')
215
216 H = int(T_[0])
217 M = int(T_[1])
218 else:
219 if len(T) == 4:
220 H = int(T[:2])
221 M = int(T[2:])
222 elif int(T) <= 24 :
223 H = int(T)
224 M = 0
225 elif int(T) > 100:
226 H = int(T)/100
227 M = int(T)%100
228 if timeFlag is not None:
229 if timeFlag == 0:
230 H = H%12
231 else:
232 H = H%12 + 12
233 except ValueError:
234 H = DEFAULT_TIME_H
235 M = DEFAULT_TIME_M
236 return (H,M)
237
238 def makePayload(dataDict):
239 if type(dataDict['COB']) is str:
240 cob_0 = float(dataDict['COB'].split(',')[0][1:])
241 cob_1 = float(dataDict['COB'].split(',')[1])
242 dataDict['COB'] = (cob_0,cob_1)
243 if type(dataDict['pCOB']) is str:
244 pcob_0 = float(dataDict['pCOB'].split(',')[0][1:])
245 pcob_1 = float(dataDict['pCOB'].split(',')[1])
246 dataDict['pCOB'] = (pcob_0,pcob_1)
247 if type(dataDict['DOB']) is str:
248 dataDict['DOB'] = dataDict['DOB'][1:-1].split(',')
249 if type(dataDict['pDOB']) is str:
250 dataDict['pDOB'] = dataDict['pDOB'][1:-1].split(',')
251 if type(dataDict['TOB']) is str:
252 dataDict['TOB'] = dataDict['TOB'][1:-1].split(',')
253 if type(dataDict['pTOB']) is str:
254 dataDict['pTOB'] = dataDict['pTOB'][1:-1].split(',')
255 # pdb.set_trace()
256
257 print dataDict['pDOB']
258
259 R = {'send_calculation':'1', #Req
260 'muz_narozeni_den':dataDict['DOB'][0],
261 'muz_narozeni_mesic':dataDict['DOB'][1],
262 'muz_narozeni_rok':dataDict['DOB'][2],
263 'muz_narozeni_hodina':dataDict['TOB'][0],
264 'muz_narozeni_minuta':dataDict['TOB'][1],
265 'muz_narozeni_city':'',
266 'muz_narozeni_mesto_hidden':'Manually+place%3A+%C2%B0%27N%2C+%C2%B0%27E',#auto
267 'muz_narozeni_stat_hidden':'XX',
268 'muz_narozeni_podstat_kratky_hidden':'',
269 'muz_narozeni_podstat_hidden':'',
270 'muz_narozeni_podstat2_kratky_hidden':'',
271 'muz_narozeni_podstat3_kratky_hidden':'',
272 'muz_narozeni_input_hidden':'',
273 'muz_narozeni_sirka_stupne':str(abs(dataDict['COB'][0])).split('.')[0],
274 'muz_narozeni_sirka_minuty':str(float('0.'+str(dataDict['COB'][0]).split('.')[1])*60).split('.')[0],
275 'muz_narozeni_sirka_smer': '1' if dataDict['COB'][0]<0 else '0', #address N Dir (0':'N',1':'S)
276 'muz_narozeni_delka_stupne':str(abs(dataDict['COB'][1])).split('.')[0], #address E - Main
277 'muz_narozeni_delka_minuty':str(float('0.'+str(dataDict['COB'][1]).split('.')[1])*60).split('.')[0],
278 'muz_narozeni_delka_smer': '1' if dataDict['COB'][1]<0 else '0', #address E Dir (0':'E',1':'W)
279 'muz_narozeni_timezone_form':'auto',
280 'muz_narozeni_timezone_dst_form':'auto',
281 'send_calculation':'1',
282 'zena_narozeni_den':dataDict['pDOB'][0],
283 'zena_narozeni_mesic':dataDict['pDOB'][1],
284 'zena_narozeni_rok':dataDict['pDOB'][2],
285 'zena_narozeni_hodina':dataDict['pTOB'][0],
286 'zena_narozeni_minuta':dataDict['pTOB'][1],
287 'zena_narozeni_city':'',
288 'zena_narozeni_mesto_hidden':'Manually+place%3A+%C2%B0%27N%2C+%C2%B0%27E',
289 'zena_narozeni_stat_hidden':'XX',
290 'zena_narozeni_podstat_kratky_hidden':'',
291 'zena_narozeni_podstat_hidden':'',
292 'zena_narozeni_podstat2_kratky_hidden':'',
293 'zena_narozeni_podstat3_kratky_hidden':'',
294 'zena_narozeni_input_hidden':'',
295 'zena_narozeni_sirka_stupne':str(abs(dataDict['pCOB'][0])).split('.')[0],
296 'zena_narozeni_sirka_minuty':str(float('0.'+str(dataDict['pCOB'][0]).split('.')[1])*60).split('.')[0],
297 'zena_narozeni_sirka_smer': '1' if dataDict['pCOB'][0]<0 else '0',
298 'zena_narozeni_delka_stupne':str(abs(dataDict['pCOB'][1])).split('.')[0],
299 'zena_narozeni_delka_minuty':str(float('0.'+str(dataDict['pCOB'][1]).split('.')[1])*60).split('.')[0],
300 'zena_narozeni_delka_smer': '1' if dataDict['pCOB'][1]<0 else '0',
301 'zena_narozeni_timezone_form':'auto',
302 'zena_narozeni_timezone_dst_form':'auto',
303 'switch_interpretations':'0',
304 'house_system':'placidus',
305 'uhel_orbis':'#tabs_redraw'}
306 return R
307
308
309
310