annotate V4/dParse.py @ 13:b253748dbb11

developing V4 - Class based structure, with self calcuation of all aspects
author DaveM
date Sun, 04 Mar 2018 14:51:43 +0000
parents
children 50a95089414d
rev   line source
DaveM@13 1 #!/usr/bin/env python
DaveM@13 2
DaveM@13 3 import csv
DaveM@13 4 import time
DaveM@13 5 import unicodedata
DaveM@13 6 from geopy.geocoders import Nominatim
DaveM@13 7 from geopy.exc import GeocoderTimedOut
DaveM@13 8 import random
DaveM@13 9 import pdb
DaveM@13 10
DaveM@13 11 DEFAULT_TIME_H = 12
DaveM@13 12 DEFAULT_TIME_M = 00
DaveM@13 13 DEAULT_LOCATION = 'USA'
DaveM@13 14
DaveM@13 15 # def parseCSV(filename):
DaveM@13 16 # stream = csv.DictReader(open(filename,'rb'))
DaveM@13 17 # dictList = []
DaveM@13 18 # people = []
DaveM@13 19 # for line in stream:
DaveM@13 20 # people.append(syn.person(line))
DaveM@13 21 # return people
DaveM@13 22 # # dictList.append(regulateData(line))
DaveM@13 23 # # return dictList
DaveM@13 24
DaveM@13 25 def regulateData(dataDict):
DaveM@13 26 print("Parse %s"%(str(dataDict['ID'])))
DaveM@13 27 p_DOBQ = "What is your partner's date of birth? Please use the format DD/MM/YYYY (for example, 29/03/1981)."
DaveM@13 28 p_TOBQ = "At what exact time were your partner born? Please use the format HHMM (for example, 2204)."
DaveM@13 29 DOB_DQ = "Which day (numeric) have you been born?"
DaveM@13 30 DOB_MQ = "Which month have you been born?"
DaveM@13 31 DOB_YQ = "Year Of Birth"
DaveM@13 32 TOB_Q = "At what exact time were you born? Please use the format HHMM (for example, 2204)."
DaveM@13 33 COB = "What is your place of birth? Please specify city and country (for example, San Francisco, USA)."
DaveM@13 34 p_COB = "What is your partner's place of birth? Please specify city and country (for example, San Francisco, USA)."
DaveM@13 35 dataDict['DOB'] = parseDOB(dataDict[DOB_DQ],dataDict[DOB_MQ],dataDict[DOB_YQ])
DaveM@13 36 # print (dataDict[DOB_DQ],dataDict[DOB_MQ],dataDict[DOB_YQ])
DaveM@13 37 # print dataDict['DOB']
DaveM@13 38 dataDict['TOB'] = parseTOB(dataDict[TOB_Q])
DaveM@13 39 dataDict['pDOB'] = parsePartnerDOB(dataDict[p_DOBQ])
DaveM@13 40 dataDict['pTOB'] = parseTOB(dataDict[p_TOBQ])
DaveM@13 41 # MAKE RANDOM PLACE
DaveM@13 42 # dataDict['COB'] = (random.uniform(-90, 90),random.uniform(-90, 90))
DaveM@13 43 # dataDict['pCOB'] = (random.uniform(-90, 90),random.uniform(-90, 90))
DaveM@13 44 dataDict['COB'] = parseBirthTown(dataDict[COB])
DaveM@13 45 dataDict['pCOB'] = parseBirthTown(dataDict[p_COB])
DaveM@13 46 return dataDict
DaveM@13 47
DaveM@13 48
DaveM@13 49 def parseBirthTown(s):
DaveM@13 50 try:
DaveM@13 51 s = s.encode('ascii')
DaveM@13 52 except UnicodeDecodeError:
DaveM@13 53 # pdb.set_trace()
DaveM@13 54 s = s.decode('latin-1')
DaveM@13 55 # s = unicodedata.normalize('NFKD',s.decode('utf-8')).encode('ascii','ignore')
DaveM@13 56 timeoutTime = 2
DaveM@13 57 geolocator = Nominatim(timeout=timeoutTime)
DaveM@13 58 while s is not [] and timeoutTime < 60:
DaveM@13 59 try:
DaveM@13 60 location = geolocator.geocode(s)
DaveM@13 61 if location is not None:
DaveM@13 62 # print(location.raw)
DaveM@13 63 # print (location.latitude, location.longitude)
DaveM@13 64 return (location.latitude, location.longitude, location.raw)
DaveM@13 65 else:
DaveM@13 66 s = s.split(' ',1)
DaveM@13 67 if len(s) == 2:
DaveM@13 68 s = s[1]
DaveM@13 69 # print s
DaveM@13 70 else:
DaveM@13 71 s = DEAULT_LOCATION
DaveM@13 72 except:
DaveM@13 73 timeoutTime += 1
DaveM@13 74 print("Error: geocode failed on input %s, incrementing timeout time to %d"%(s,timeoutTime))
DaveM@13 75 time.sleep(5)
DaveM@13 76 geolocator = Nominatim(timeout=timeoutTime)
DaveM@13 77 # places = geograpy.get_place_context(text=s)
DaveM@13 78
DaveM@13 79 def parsePartnerDOB(dob):
DaveM@13 80 # print dob
DaveM@13 81 # pdb.set_trace()
DaveM@13 82 dob = dob.strip()
DaveM@13 83 if(dob.count('-') == 2):
DaveM@13 84 dob = dob.replace('-','/')
DaveM@13 85 if(dob.count(' ') == 2):
DaveM@13 86 dob = dob.replace(' ','/')
DaveM@13 87 dob_ = dob.split('/')
DaveM@13 88 if(len(dob_) != 3):
DaveM@13 89 dob = dob.replace('/','').strip()
DaveM@13 90 dob_ = []
DaveM@13 91 # print dob
DaveM@13 92 if len(dob) == 8: # ddmmyyyy
DaveM@13 93 dob_.append(dob[:2])
DaveM@13 94 dob_.append(dob[2:4])
DaveM@13 95 dob_.append(dob[4:])
DaveM@13 96 elif len(dob) == 7 and dob[1] == '1' and (dob[2] == '0' or dob[2] == '1' or dob[2] == '2'): # dmmyyyy
DaveM@13 97 dob_.append(dob[0])
DaveM@13 98 dob_.append(dob[1:3])
DaveM@13 99 dob_.append(dob[3:])
DaveM@13 100 elif(len(dob) == 7):
DaveM@13 101 if int(dob[:2]) > 31:# dmmyyyy
DaveM@13 102 dob_.append(dob[0])
DaveM@13 103 dob_.append(dob[1:3])
DaveM@13 104 dob_.append(dob[3:])
DaveM@13 105 elif len(dob) == 7: # ddmyyyy
DaveM@13 106 dob_.append(dob[0:2])
DaveM@13 107 dob_.append(dob[2])
DaveM@13 108 dob_.append(dob[3:])
DaveM@13 109 elif len(dob) == 6 and dob[3:4] != '19': # ddmmyy
DaveM@13 110 dob_.append(dob[:2])
DaveM@13 111 dob_.append(dob[2:4])
DaveM@13 112 dob_.append(dob[4:])
DaveM@13 113 elif len(dob) == 5 and dob[1] == '1' and (dob[2] == '0' or dob[2] == '1' or dob[2] == '2'): # dmmyy
DaveM@13 114 dob_.append(dob[0])
DaveM@13 115 dob_.append(dob[1:3])
DaveM@13 116 dob_.append(dob[3:])
DaveM@13 117 elif len(dob) == 5: # ddmyy
DaveM@13 118 dob_.append(dob[:2])
DaveM@13 119 dob_.append(dob[2])
DaveM@13 120 dob_.append(dob[3:])
DaveM@13 121 elif len(dob) == 4: # dmyy
DaveM@13 122 dob_.append(dob[0])
DaveM@13 123 dob_.append(dob[1])
DaveM@13 124 dob_.append(dob[2:])
DaveM@13 125 else:
DaveM@13 126 if(len(dob) < 4):
DaveM@13 127 return None
DaveM@13 128 # print dob
DaveM@13 129 # print filter(lambda x: x.isdigit(),dob)
DaveM@13 130 print 'no / partnerDOB issue'
DaveM@13 131 # deal with no /'s
DaveM@13 132 try:
DaveM@13 133 d = int(filter(lambda x: x.isdigit(),dob_[0]))
DaveM@13 134 m = int(filter(lambda x: x.isdigit(),dob_[1]))
DaveM@13 135 y = int(filter(lambda x: x.isdigit(),dob_[2]))
DaveM@13 136 if y < 100:
DaveM@13 137 y = y + 1900
DaveM@13 138 if (m > 12 and d <= 12):
DaveM@13 139 temp = d
DaveM@13 140 d = m
DaveM@13 141 m = temp
DaveM@13 142 if(d > 31 or d < 1 or m > 12 or m < 1 or y > 2017 or y < 1900):
DaveM@13 143 print 'error with DOB '+d+'/'+m+'/'+y
DaveM@13 144 pdb.set_trace()
DaveM@13 145 except TypeError:
DaveM@13 146 return None
DaveM@13 147 # print (d,m,y)
DaveM@13 148 return (d,m,y)
DaveM@13 149
DaveM@13 150 def monthStringToNum(s):
DaveM@13 151 # print 'inMonthStringToNum'
DaveM@13 152 m = {'jan':1,'feb':2,'mar':3,
DaveM@13 153 'apr':4,'may':5,'jun':6,'jul':7,'aug':8,
DaveM@13 154 'sep':9,'oct':10,'nov':11,'dec':12}
DaveM@13 155 s_ = s.strip()[:3].lower()
DaveM@13 156 try:
DaveM@13 157 out = m[s_]
DaveM@13 158 return out
DaveM@13 159 except:
DaveM@13 160 raise ValueError('Not a month')
DaveM@13 161
DaveM@13 162 def checkMonthDay(d,m):
DaveM@13 163 if d > 31: # take first two digits of day
DaveM@13 164 d = int(str(d)[:2])
DaveM@13 165 if d > 31:
DaveM@13 166 d = int(str(d)[1])
DaveM@13 167 if m > 12 and d < 12: # Day and month wrong way round - American
DaveM@13 168 temp = m
DaveM@13 169 m = d
DaveM@13 170 d = temp
DaveM@13 171 if(m == 2):
DaveM@13 172 if d <= 29:
DaveM@13 173 return (True,d,m)
DaveM@13 174 else:
DaveM@13 175 return (False,d,m)
DaveM@13 176 elif m in [4,6,9,11]:
DaveM@13 177 if d <= 30:
DaveM@13 178 return (True,d,m)
DaveM@13 179 else:
DaveM@13 180 return (False,d,m)
DaveM@13 181 elif m <= 12 and d <= 31:
DaveM@13 182 return (True,d,m)
DaveM@13 183 else:
DaveM@13 184 return (False,d,m)
DaveM@13 185
DaveM@13 186 def parseDOB(d,m,y):
DaveM@13 187 d = int(filter(lambda x: x.isdigit(),d))
DaveM@13 188 y = int(filter(lambda x: x.isdigit(),y))
DaveM@13 189 try:
DaveM@13 190 # print m
DaveM@13 191 m = monthStringToNum(m.strip())
DaveM@13 192 except ValueError:
DaveM@13 193 m = int(m.strip())
DaveM@13 194 if(y < 100):
DaveM@13 195 y = y + 1900
DaveM@13 196 (r,d,m) = checkMonthDay(d,m)
DaveM@13 197 if not r:
DaveM@13 198 print 'error with day month'
DaveM@13 199 print (r,d,m)
DaveM@13 200 return (d,m,y)
DaveM@13 201
DaveM@13 202 def parseTOB(T):
DaveM@13 203 # pdb.set_trace()
DaveM@13 204 timeFlag = None
DaveM@13 205 T_ = T.replace('.','').lower().strip()
DaveM@13 206 if 'am' in T_:
DaveM@13 207 timeFlag = 0
DaveM@13 208 T = T_.replace('am','')
DaveM@13 209 if 'pm' in T_:
DaveM@13 210 timeFlag = 1
DaveM@13 211 T = T_.replace('pm','')
DaveM@13 212 T = T.strip()
DaveM@13 213 if T.count('.') == 1:
DaveM@13 214 T = T.replace('.',':')
DaveM@13 215 try:
DaveM@13 216 if ':' in T:
DaveM@13 217 T_ = T.split(':')
DaveM@13 218
DaveM@13 219 H = int(T_[0])
DaveM@13 220 M = int(T_[1])
DaveM@13 221 else:
DaveM@13 222 if len(T) == 4:
DaveM@13 223 H = int(T[:2])
DaveM@13 224 M = int(T[2:])
DaveM@13 225 elif int(T) <= 24 :
DaveM@13 226 H = int(T)
DaveM@13 227 M = 0
DaveM@13 228 elif int(T) > 100:
DaveM@13 229 H = int(T)/100
DaveM@13 230 M = int(T)%100
DaveM@13 231 if timeFlag is not None:
DaveM@13 232 if timeFlag == 0:
DaveM@13 233 H = H%12
DaveM@13 234 else:
DaveM@13 235 H = H%12 + 12
DaveM@13 236 except ValueError:
DaveM@13 237 H = DEFAULT_TIME_H
DaveM@13 238 M = DEFAULT_TIME_M
DaveM@13 239 return (H,M)
DaveM@13 240
DaveM@13 241 def makePayload(dataDict):
DaveM@13 242 if type(dataDict['COB']) is str:
DaveM@13 243 cob_0 = float(dataDict['COB'].split(',')[0][1:])
DaveM@13 244 cob_1 = float(dataDict['COB'].split(',')[1])
DaveM@13 245 dataDict['COB'] = (cob_0,cob_1)
DaveM@13 246 if type(dataDict['pCOB']) is str:
DaveM@13 247 pcob_0 = float(dataDict['pCOB'].split(',')[0][1:])
DaveM@13 248 pcob_1 = float(dataDict['pCOB'].split(',')[1])
DaveM@13 249 dataDict['pCOB'] = (pcob_0,pcob_1)
DaveM@13 250 if type(dataDict['DOB']) is str:
DaveM@13 251 dataDict['DOB'] = dataDict['DOB'][1:-1].split(',')
DaveM@13 252 if type(dataDict['pDOB']) is str:
DaveM@13 253 dataDict['pDOB'] = dataDict['pDOB'][1:-1].split(',')
DaveM@13 254 if type(dataDict['TOB']) is str:
DaveM@13 255 dataDict['TOB'] = dataDict['TOB'][1:-1].split(',')
DaveM@13 256 if type(dataDict['pTOB']) is str:
DaveM@13 257 dataDict['pTOB'] = dataDict['pTOB'][1:-1].split(',')
DaveM@13 258 # pdb.set_trace()
DaveM@13 259
DaveM@13 260 print dataDict['pDOB']
DaveM@13 261
DaveM@13 262 R = {'send_calculation':'1', #Req
DaveM@13 263 'muz_narozeni_den':dataDict['DOB'][0],
DaveM@13 264 'muz_narozeni_mesic':dataDict['DOB'][1],
DaveM@13 265 'muz_narozeni_rok':dataDict['DOB'][2],
DaveM@13 266 'muz_narozeni_hodina':dataDict['TOB'][0],
DaveM@13 267 'muz_narozeni_minuta':dataDict['TOB'][1],
DaveM@13 268 'muz_narozeni_city':'',
DaveM@13 269 'muz_narozeni_mesto_hidden':'Manually+place%3A+%C2%B0%27N%2C+%C2%B0%27E',#auto
DaveM@13 270 'muz_narozeni_stat_hidden':'XX',
DaveM@13 271 'muz_narozeni_podstat_kratky_hidden':'',
DaveM@13 272 'muz_narozeni_podstat_hidden':'',
DaveM@13 273 'muz_narozeni_podstat2_kratky_hidden':'',
DaveM@13 274 'muz_narozeni_podstat3_kratky_hidden':'',
DaveM@13 275 'muz_narozeni_input_hidden':'',
DaveM@13 276 'muz_narozeni_sirka_stupne':str(abs(dataDict['COB'][0])).split('.')[0],
DaveM@13 277 'muz_narozeni_sirka_minuty':str(float('0.'+str(dataDict['COB'][0]).split('.')[1])*60).split('.')[0],
DaveM@13 278 'muz_narozeni_sirka_smer': '1' if dataDict['COB'][0]<0 else '0', #address N Dir (0':'N',1':'S)
DaveM@13 279 'muz_narozeni_delka_stupne':str(abs(dataDict['COB'][1])).split('.')[0], #address E - Main
DaveM@13 280 'muz_narozeni_delka_minuty':str(float('0.'+str(dataDict['COB'][1]).split('.')[1])*60).split('.')[0],
DaveM@13 281 'muz_narozeni_delka_smer': '1' if dataDict['COB'][1]<0 else '0', #address E Dir (0':'E',1':'W)
DaveM@13 282 'muz_narozeni_timezone_form':'auto',
DaveM@13 283 'muz_narozeni_timezone_dst_form':'auto',
DaveM@13 284 'send_calculation':'1',
DaveM@13 285 'zena_narozeni_den':dataDict['pDOB'][0],
DaveM@13 286 'zena_narozeni_mesic':dataDict['pDOB'][1],
DaveM@13 287 'zena_narozeni_rok':dataDict['pDOB'][2],
DaveM@13 288 'zena_narozeni_hodina':dataDict['pTOB'][0],
DaveM@13 289 'zena_narozeni_minuta':dataDict['pTOB'][1],
DaveM@13 290 'zena_narozeni_city':'',
DaveM@13 291 'zena_narozeni_mesto_hidden':'Manually+place%3A+%C2%B0%27N%2C+%C2%B0%27E',
DaveM@13 292 'zena_narozeni_stat_hidden':'XX',
DaveM@13 293 'zena_narozeni_podstat_kratky_hidden':'',
DaveM@13 294 'zena_narozeni_podstat_hidden':'',
DaveM@13 295 'zena_narozeni_podstat2_kratky_hidden':'',
DaveM@13 296 'zena_narozeni_podstat3_kratky_hidden':'',
DaveM@13 297 'zena_narozeni_input_hidden':'',
DaveM@13 298 'zena_narozeni_sirka_stupne':str(abs(dataDict['pCOB'][0])).split('.')[0],
DaveM@13 299 'zena_narozeni_sirka_minuty':str(float('0.'+str(dataDict['pCOB'][0]).split('.')[1])*60).split('.')[0],
DaveM@13 300 'zena_narozeni_sirka_smer': '1' if dataDict['pCOB'][0]<0 else '0',
DaveM@13 301 'zena_narozeni_delka_stupne':str(abs(dataDict['pCOB'][1])).split('.')[0],
DaveM@13 302 'zena_narozeni_delka_minuty':str(float('0.'+str(dataDict['pCOB'][1]).split('.')[1])*60).split('.')[0],
DaveM@13 303 'zena_narozeni_delka_smer': '1' if dataDict['pCOB'][1]<0 else '0',
DaveM@13 304 'zena_narozeni_timezone_form':'auto',
DaveM@13 305 'zena_narozeni_timezone_dst_form':'auto',
DaveM@13 306 'switch_interpretations':'0',
DaveM@13 307 'house_system':'placidus',
DaveM@13 308 'uhel_orbis':'#tabs_redraw'}
DaveM@13 309 return R
DaveM@13 310
DaveM@13 311
DaveM@13 312
DaveM@13 313