annotate V3/dParse.py @ 8:11dee9cbaacc

creation of V3
author DaveM
date Thu, 21 Dec 2017 11:47:51 +0000
parents
children
rev   line source
DaveM@8 1 #!/usr/bin/env python
DaveM@8 2
DaveM@8 3 import csv
DaveM@8 4 import time
DaveM@8 5 import unicodedata
DaveM@8 6 from geopy.geocoders import Nominatim
DaveM@8 7 from geopy.exc import GeocoderTimedOut
DaveM@8 8 import random
DaveM@8 9 import pdb
DaveM@8 10
DaveM@8 11 DEFAULT_TIME_H = 12
DaveM@8 12 DEFAULT_TIME_M = 00
DaveM@8 13 DEAULT_LOCATION = 'USA'
DaveM@8 14
DaveM@8 15 def parseCSV(filename):
DaveM@8 16 stream = csv.DictReader(open(filename,'rb'))
DaveM@8 17 dictList = []
DaveM@8 18 for line in stream:
DaveM@8 19 dictList.append(regulateData(line))
DaveM@8 20 return dictList
DaveM@8 21
DaveM@8 22 def regulateData(dataDict):
DaveM@8 23 print("Parse %s"%(str(dataDict['ID'])))
DaveM@8 24 p_DOBQ = "What is your partner's date of birth? Please use the format DD/MM/YYYY (for example, 29/03/1981)."
DaveM@8 25 p_TOBQ = "At what exact time were your partner born? Please use the format HHMM (for example, 2204)."
DaveM@8 26 DOB_DQ = "Which day (numeric) have you been born?"
DaveM@8 27 DOB_MQ = "Which month have you been born?"
DaveM@8 28 DOB_YQ = "Year Of Birth"
DaveM@8 29 TOB_Q = "At what exact time were you born? Please use the format HHMM (for example, 2204)."
DaveM@8 30 COB = "What is your place of birth? Please specify city and country (for example, San Francisco, USA)."
DaveM@8 31 p_COB = "What is your partner's place of birth? Please specify city and country (for example, San Francisco, USA)."
DaveM@8 32 dataDict['DOB'] = parseDOB(dataDict[DOB_DQ],dataDict[DOB_MQ],dataDict[DOB_YQ])
DaveM@8 33 # print (dataDict[DOB_DQ],dataDict[DOB_MQ],dataDict[DOB_YQ])
DaveM@8 34 # print dataDict['DOB']
DaveM@8 35 dataDict['TOB'] = parseTOB(dataDict[TOB_Q])
DaveM@8 36 dataDict['pDOB'] = parsePartnerDOB(dataDict[p_DOBQ])
DaveM@8 37 dataDict['pTOB'] = parseTOB(dataDict[p_TOBQ])
DaveM@8 38 # MAKE RANDOM PLACE
DaveM@8 39 # dataDict['COB'] = (random.uniform(-90, 90),random.uniform(-90, 90))
DaveM@8 40 # dataDict['pCOB'] = (random.uniform(-90, 90),random.uniform(-90, 90))
DaveM@8 41 dataDict['COB'] = parseBirthTown(dataDict[COB])
DaveM@8 42 dataDict['pCOB'] = parseBirthTown(dataDict[p_COB])
DaveM@8 43 return dataDict
DaveM@8 44
DaveM@8 45
DaveM@8 46 def parseBirthTown(s):
DaveM@8 47 try:
DaveM@8 48 s = s.encode('ascii')
DaveM@8 49 except UnicodeDecodeError:
DaveM@8 50 # pdb.set_trace()
DaveM@8 51 s = s.decode('latin-1')
DaveM@8 52 # s = unicodedata.normalize('NFKD',s.decode('utf-8')).encode('ascii','ignore')
DaveM@8 53 timeoutTime = 2
DaveM@8 54 geolocator = Nominatim(timeout=timeoutTime)
DaveM@8 55 while s is not [] and timeoutTime < 60:
DaveM@8 56 try:
DaveM@8 57 location = geolocator.geocode(s)
DaveM@8 58 if location is not None:
DaveM@8 59 # print(location.raw)
DaveM@8 60 # print (location.latitude, location.longitude)
DaveM@8 61 return (location.latitude, location.longitude, location.raw)
DaveM@8 62 else:
DaveM@8 63 s = s.split(' ',1)
DaveM@8 64 if len(s) == 2:
DaveM@8 65 s = s[1]
DaveM@8 66 # print s
DaveM@8 67 else:
DaveM@8 68 s = DEAULT_LOCATION
DaveM@8 69 except:
DaveM@8 70 timeoutTime += 1
DaveM@8 71 print("Error: geocode failed on input %s, incrementing timeout time to %d"%(s,timeoutTime))
DaveM@8 72 time.sleep(5)
DaveM@8 73 geolocator = Nominatim(timeout=timeoutTime)
DaveM@8 74 # places = geograpy.get_place_context(text=s)
DaveM@8 75
DaveM@8 76 def parsePartnerDOB(dob):
DaveM@8 77 # print dob
DaveM@8 78 # pdb.set_trace()
DaveM@8 79 dob = dob.strip()
DaveM@8 80 if(dob.count('-') == 2):
DaveM@8 81 dob = dob.replace('-','/')
DaveM@8 82 if(dob.count(' ') == 2):
DaveM@8 83 dob = dob.replace(' ','/')
DaveM@8 84 dob_ = dob.split('/')
DaveM@8 85 if(len(dob_) != 3):
DaveM@8 86 dob = dob.replace('/','').strip()
DaveM@8 87 dob_ = []
DaveM@8 88 # print dob
DaveM@8 89 if len(dob) == 8: # ddmmyyyy
DaveM@8 90 dob_.append(dob[:2])
DaveM@8 91 dob_.append(dob[2:4])
DaveM@8 92 dob_.append(dob[4:])
DaveM@8 93 elif len(dob) == 7 and dob[1] == '1' and (dob[2] == '0' or dob[2] == '1' or dob[2] == '2'): # dmmyyyy
DaveM@8 94 dob_.append(dob[0])
DaveM@8 95 dob_.append(dob[1:3])
DaveM@8 96 dob_.append(dob[3:])
DaveM@8 97 elif(len(dob) == 7):
DaveM@8 98 if int(dob[:2]) > 31:# dmmyyyy
DaveM@8 99 dob_.append(dob[0])
DaveM@8 100 dob_.append(dob[1:3])
DaveM@8 101 dob_.append(dob[3:])
DaveM@8 102 elif len(dob) == 7: # ddmyyyy
DaveM@8 103 dob_.append(dob[0:2])
DaveM@8 104 dob_.append(dob[2])
DaveM@8 105 dob_.append(dob[3:])
DaveM@8 106 elif len(dob) == 6 and dob[3:4] != '19': # ddmmyy
DaveM@8 107 dob_.append(dob[:2])
DaveM@8 108 dob_.append(dob[2:4])
DaveM@8 109 dob_.append(dob[4:])
DaveM@8 110 elif len(dob) == 5 and dob[1] == '1' and (dob[2] == '0' or dob[2] == '1' or dob[2] == '2'): # dmmyy
DaveM@8 111 dob_.append(dob[0])
DaveM@8 112 dob_.append(dob[1:3])
DaveM@8 113 dob_.append(dob[3:])
DaveM@8 114 elif len(dob) == 5: # ddmyy
DaveM@8 115 dob_.append(dob[:2])
DaveM@8 116 dob_.append(dob[2])
DaveM@8 117 dob_.append(dob[3:])
DaveM@8 118 elif len(dob) == 4: # dmyy
DaveM@8 119 dob_.append(dob[0])
DaveM@8 120 dob_.append(dob[1])
DaveM@8 121 dob_.append(dob[2:])
DaveM@8 122 else:
DaveM@8 123 if(len(dob) < 4):
DaveM@8 124 return None
DaveM@8 125 # print dob
DaveM@8 126 # print filter(lambda x: x.isdigit(),dob)
DaveM@8 127 print 'no / partnerDOB issue'
DaveM@8 128 # deal with no /'s
DaveM@8 129 try:
DaveM@8 130 d = int(filter(lambda x: x.isdigit(),dob_[0]))
DaveM@8 131 m = int(filter(lambda x: x.isdigit(),dob_[1]))
DaveM@8 132 y = int(filter(lambda x: x.isdigit(),dob_[2]))
DaveM@8 133 if y < 100:
DaveM@8 134 y = y + 1900
DaveM@8 135 if (m > 12 and d <= 12):
DaveM@8 136 temp = d
DaveM@8 137 d = m
DaveM@8 138 m = temp
DaveM@8 139 if(d > 31 or d < 1 or m > 12 or m < 1 or y > 2017 or y < 1900):
DaveM@8 140 print 'error with DOB '+d+'/'+m+'/'+y
DaveM@8 141 pdb.set_trace()
DaveM@8 142 except TypeError:
DaveM@8 143 return None
DaveM@8 144 # print (d,m,y)
DaveM@8 145 return (d,m,y)
DaveM@8 146
DaveM@8 147 def monthStringToNum(s):
DaveM@8 148 # print 'inMonthStringToNum'
DaveM@8 149 m = {'jan':1,'feb':2,'mar':3,
DaveM@8 150 'apr':4,'may':5,'jun':6,'jul':7,'aug':8,
DaveM@8 151 'sep':9,'oct':10,'nov':11,'dec':12}
DaveM@8 152 s_ = s.strip()[:3].lower()
DaveM@8 153 try:
DaveM@8 154 out = m[s_]
DaveM@8 155 return out
DaveM@8 156 except:
DaveM@8 157 raise ValueError('Not a month')
DaveM@8 158
DaveM@8 159 def checkMonthDay(d,m):
DaveM@8 160 if d > 31: # take first two digits of day
DaveM@8 161 d = int(str(d)[:2])
DaveM@8 162 if d > 31:
DaveM@8 163 d = int(str(d)[1])
DaveM@8 164 if m > 12 and d < 12: # Day and month wrong way round - American
DaveM@8 165 temp = m
DaveM@8 166 m = d
DaveM@8 167 d = temp
DaveM@8 168 if(m == 2):
DaveM@8 169 if d <= 29:
DaveM@8 170 return (True,d,m)
DaveM@8 171 else:
DaveM@8 172 return (False,d,m)
DaveM@8 173 elif m in [4,6,9,11]:
DaveM@8 174 if d <= 30:
DaveM@8 175 return (True,d,m)
DaveM@8 176 else:
DaveM@8 177 return (False,d,m)
DaveM@8 178 elif m <= 12 and d <= 31:
DaveM@8 179 return (True,d,m)
DaveM@8 180 else:
DaveM@8 181 return (False,d,m)
DaveM@8 182
DaveM@8 183 def parseDOB(d,m,y):
DaveM@8 184 d = int(filter(lambda x: x.isdigit(),d))
DaveM@8 185 y = int(filter(lambda x: x.isdigit(),y))
DaveM@8 186 try:
DaveM@8 187 # print m
DaveM@8 188 m = monthStringToNum(m.strip())
DaveM@8 189 except ValueError:
DaveM@8 190 m = int(m.strip())
DaveM@8 191 if(y < 100):
DaveM@8 192 y = y + 1900
DaveM@8 193 (r,d,m) = checkMonthDay(d,m)
DaveM@8 194 if not r:
DaveM@8 195 print 'error with day month'
DaveM@8 196 print (r,d,m)
DaveM@8 197 return (d,m,y)
DaveM@8 198
DaveM@8 199 def parseTOB(T):
DaveM@8 200 # pdb.set_trace()
DaveM@8 201 timeFlag = None
DaveM@8 202 T_ = T.replace('.','').lower().strip()
DaveM@8 203 if 'am' in T_:
DaveM@8 204 timeFlag = 0
DaveM@8 205 T = T_.replace('am','')
DaveM@8 206 if 'pm' in T_:
DaveM@8 207 timeFlag = 1
DaveM@8 208 T = T_.replace('pm','')
DaveM@8 209 T = T.strip()
DaveM@8 210 if T.count('.') == 1:
DaveM@8 211 T = T.replace('.',':')
DaveM@8 212 try:
DaveM@8 213 if ':' in T:
DaveM@8 214 T_ = T.split(':')
DaveM@8 215
DaveM@8 216 H = int(T_[0])
DaveM@8 217 M = int(T_[1])
DaveM@8 218 else:
DaveM@8 219 if len(T) == 4:
DaveM@8 220 H = int(T[:2])
DaveM@8 221 M = int(T[2:])
DaveM@8 222 elif int(T) <= 24 :
DaveM@8 223 H = int(T)
DaveM@8 224 M = 0
DaveM@8 225 elif int(T) > 100:
DaveM@8 226 H = int(T)/100
DaveM@8 227 M = int(T)%100
DaveM@8 228 if timeFlag is not None:
DaveM@8 229 if timeFlag == 0:
DaveM@8 230 H = H%12
DaveM@8 231 else:
DaveM@8 232 H = H%12 + 12
DaveM@8 233 except ValueError:
DaveM@8 234 H = DEFAULT_TIME_H
DaveM@8 235 M = DEFAULT_TIME_M
DaveM@8 236 return (H,M)
DaveM@8 237
DaveM@8 238 def makePayload(dataDict):
DaveM@8 239 if type(dataDict['COB']) is str:
DaveM@8 240 cob_0 = float(dataDict['COB'].split(',')[0][1:])
DaveM@8 241 cob_1 = float(dataDict['COB'].split(',')[1])
DaveM@8 242 dataDict['COB'] = (cob_0,cob_1)
DaveM@8 243 if type(dataDict['pCOB']) is str:
DaveM@8 244 pcob_0 = float(dataDict['pCOB'].split(',')[0][1:])
DaveM@8 245 pcob_1 = float(dataDict['pCOB'].split(',')[1])
DaveM@8 246 dataDict['pCOB'] = (pcob_0,pcob_1)
DaveM@8 247 if type(dataDict['DOB']) is str:
DaveM@8 248 dataDict['DOB'] = dataDict['DOB'][1:-1].split(',')
DaveM@8 249 if type(dataDict['pDOB']) is str:
DaveM@8 250 dataDict['pDOB'] = dataDict['pDOB'][1:-1].split(',')
DaveM@8 251 if type(dataDict['TOB']) is str:
DaveM@8 252 dataDict['TOB'] = dataDict['TOB'][1:-1].split(',')
DaveM@8 253 if type(dataDict['pTOB']) is str:
DaveM@8 254 dataDict['pTOB'] = dataDict['pTOB'][1:-1].split(',')
DaveM@8 255 # pdb.set_trace()
DaveM@8 256
DaveM@8 257 print dataDict['pDOB']
DaveM@8 258
DaveM@8 259 R = {'send_calculation':'1', #Req
DaveM@8 260 'muz_narozeni_den':dataDict['DOB'][0],
DaveM@8 261 'muz_narozeni_mesic':dataDict['DOB'][1],
DaveM@8 262 'muz_narozeni_rok':dataDict['DOB'][2],
DaveM@8 263 'muz_narozeni_hodina':dataDict['TOB'][0],
DaveM@8 264 'muz_narozeni_minuta':dataDict['TOB'][1],
DaveM@8 265 'muz_narozeni_city':'',
DaveM@8 266 'muz_narozeni_mesto_hidden':'Manually+place%3A+%C2%B0%27N%2C+%C2%B0%27E',#auto
DaveM@8 267 'muz_narozeni_stat_hidden':'XX',
DaveM@8 268 'muz_narozeni_podstat_kratky_hidden':'',
DaveM@8 269 'muz_narozeni_podstat_hidden':'',
DaveM@8 270 'muz_narozeni_podstat2_kratky_hidden':'',
DaveM@8 271 'muz_narozeni_podstat3_kratky_hidden':'',
DaveM@8 272 'muz_narozeni_input_hidden':'',
DaveM@8 273 'muz_narozeni_sirka_stupne':str(abs(dataDict['COB'][0])).split('.')[0],
DaveM@8 274 'muz_narozeni_sirka_minuty':str(float('0.'+str(dataDict['COB'][0]).split('.')[1])*60).split('.')[0],
DaveM@8 275 'muz_narozeni_sirka_smer': '1' if dataDict['COB'][0]<0 else '0', #address N Dir (0':'N',1':'S)
DaveM@8 276 'muz_narozeni_delka_stupne':str(abs(dataDict['COB'][1])).split('.')[0], #address E - Main
DaveM@8 277 'muz_narozeni_delka_minuty':str(float('0.'+str(dataDict['COB'][1]).split('.')[1])*60).split('.')[0],
DaveM@8 278 'muz_narozeni_delka_smer': '1' if dataDict['COB'][1]<0 else '0', #address E Dir (0':'E',1':'W)
DaveM@8 279 'muz_narozeni_timezone_form':'auto',
DaveM@8 280 'muz_narozeni_timezone_dst_form':'auto',
DaveM@8 281 'send_calculation':'1',
DaveM@8 282 'zena_narozeni_den':dataDict['pDOB'][0],
DaveM@8 283 'zena_narozeni_mesic':dataDict['pDOB'][1],
DaveM@8 284 'zena_narozeni_rok':dataDict['pDOB'][2],
DaveM@8 285 'zena_narozeni_hodina':dataDict['pTOB'][0],
DaveM@8 286 'zena_narozeni_minuta':dataDict['pTOB'][1],
DaveM@8 287 'zena_narozeni_city':'',
DaveM@8 288 'zena_narozeni_mesto_hidden':'Manually+place%3A+%C2%B0%27N%2C+%C2%B0%27E',
DaveM@8 289 'zena_narozeni_stat_hidden':'XX',
DaveM@8 290 'zena_narozeni_podstat_kratky_hidden':'',
DaveM@8 291 'zena_narozeni_podstat_hidden':'',
DaveM@8 292 'zena_narozeni_podstat2_kratky_hidden':'',
DaveM@8 293 'zena_narozeni_podstat3_kratky_hidden':'',
DaveM@8 294 'zena_narozeni_input_hidden':'',
DaveM@8 295 'zena_narozeni_sirka_stupne':str(abs(dataDict['pCOB'][0])).split('.')[0],
DaveM@8 296 'zena_narozeni_sirka_minuty':str(float('0.'+str(dataDict['pCOB'][0]).split('.')[1])*60).split('.')[0],
DaveM@8 297 'zena_narozeni_sirka_smer': '1' if dataDict['pCOB'][0]<0 else '0',
DaveM@8 298 'zena_narozeni_delka_stupne':str(abs(dataDict['pCOB'][1])).split('.')[0],
DaveM@8 299 'zena_narozeni_delka_minuty':str(float('0.'+str(dataDict['pCOB'][1]).split('.')[1])*60).split('.')[0],
DaveM@8 300 'zena_narozeni_delka_smer': '1' if dataDict['pCOB'][1]<0 else '0',
DaveM@8 301 'zena_narozeni_timezone_form':'auto',
DaveM@8 302 'zena_narozeni_timezone_dst_form':'auto',
DaveM@8 303 'switch_interpretations':'0',
DaveM@8 304 'house_system':'placidus',
DaveM@8 305 'uhel_orbis':'#tabs_redraw'}
DaveM@8 306 return R
DaveM@8 307
DaveM@8 308
DaveM@8 309
DaveM@8 310