annotate V4/dParse.py @ 31:926b008ccb0c tip

resolving vertex and fixing description results - I believe everything works
author DaveM
date Sat, 19 May 2018 14:50:41 +0100
parents 0264a7888d54
children
rev   line source
DaveM@13 1 #!/usr/bin/env python
DaveM@13 2
DaveM@13 3 import csv
DaveM@13 4 import time
DaveM@13 5 import unicodedata
DaveM@13 6 from geopy.geocoders import Nominatim
DaveM@13 7 from geopy.exc import GeocoderTimedOut
DaveM@13 8 import random
DaveM@13 9 import pdb
DaveM@13 10
DaveM@19 11 DEFAULT_TIME = None
DaveM@19 12 # DEFAULT_TIME_M = None
DaveM@15 13 DEAULT_LOCATION = None
DaveM@13 14
DaveM@13 15 def regulateData(dataDict):
DaveM@13 16 print("Parse %s"%(str(dataDict['ID'])))
DaveM@13 17 p_DOBQ = "What is your partner's date of birth? Please use the format DD/MM/YYYY (for example, 29/03/1981)."
DaveM@13 18 p_TOBQ = "At what exact time were your partner born? Please use the format HHMM (for example, 2204)."
DaveM@13 19 DOB_DQ = "Which day (numeric) have you been born?"
DaveM@13 20 DOB_MQ = "Which month have you been born?"
DaveM@13 21 DOB_YQ = "Year Of Birth"
DaveM@13 22 TOB_Q = "At what exact time were you born? Please use the format HHMM (for example, 2204)."
DaveM@13 23 COB = "What is your place of birth? Please specify city and country (for example, San Francisco, USA)."
DaveM@13 24 p_COB = "What is your partner's place of birth? Please specify city and country (for example, San Francisco, USA)."
DaveM@13 25 dataDict['DOB'] = parseDOB(dataDict[DOB_DQ],dataDict[DOB_MQ],dataDict[DOB_YQ])
DaveM@13 26 # print (dataDict[DOB_DQ],dataDict[DOB_MQ],dataDict[DOB_YQ])
DaveM@13 27 # print dataDict['DOB']
DaveM@13 28 dataDict['TOB'] = parseTOB(dataDict[TOB_Q])
DaveM@13 29 dataDict['pDOB'] = parsePartnerDOB(dataDict[p_DOBQ])
DaveM@13 30 dataDict['pTOB'] = parseTOB(dataDict[p_TOBQ])
DaveM@13 31 # MAKE RANDOM PLACE
DaveM@13 32 # dataDict['COB'] = (random.uniform(-90, 90),random.uniform(-90, 90))
DaveM@13 33 # dataDict['pCOB'] = (random.uniform(-90, 90),random.uniform(-90, 90))
DaveM@13 34 dataDict['COB'] = parseBirthTown(dataDict[COB])
DaveM@13 35 dataDict['pCOB'] = parseBirthTown(dataDict[p_COB])
DaveM@13 36 return dataDict
DaveM@13 37
DaveM@13 38
DaveM@13 39 def parseBirthTown(s):
DaveM@13 40 try:
DaveM@13 41 s = s.encode('ascii')
DaveM@13 42 except UnicodeDecodeError:
DaveM@13 43 # pdb.set_trace()
DaveM@13 44 s = s.decode('latin-1')
DaveM@13 45 # s = unicodedata.normalize('NFKD',s.decode('utf-8')).encode('ascii','ignore')
DaveM@13 46 timeoutTime = 2
DaveM@13 47 geolocator = Nominatim(timeout=timeoutTime)
DaveM@13 48 while s is not [] and timeoutTime < 60:
DaveM@13 49 try:
DaveM@13 50 location = geolocator.geocode(s)
DaveM@13 51 if location is not None:
DaveM@13 52 # print(location.raw)
DaveM@13 53 # print (location.latitude, location.longitude)
DaveM@13 54 return (location.latitude, location.longitude, location.raw)
DaveM@13 55 else:
DaveM@13 56 s = s.split(' ',1)
DaveM@13 57 if len(s) == 2:
DaveM@13 58 s = s[1]
DaveM@13 59 # print s
DaveM@13 60 else:
DaveM@13 61 s = DEAULT_LOCATION
DaveM@13 62 except:
DaveM@13 63 timeoutTime += 1
DaveM@13 64 print("Error: geocode failed on input %s, incrementing timeout time to %d"%(s,timeoutTime))
DaveM@13 65 time.sleep(5)
DaveM@13 66 geolocator = Nominatim(timeout=timeoutTime)
DaveM@13 67 # places = geograpy.get_place_context(text=s)
DaveM@13 68
DaveM@13 69 def parsePartnerDOB(dob):
DaveM@13 70 # print dob
DaveM@13 71 # pdb.set_trace()
DaveM@13 72 dob = dob.strip()
DaveM@13 73 if(dob.count('-') == 2):
DaveM@13 74 dob = dob.replace('-','/')
DaveM@13 75 if(dob.count(' ') == 2):
DaveM@13 76 dob = dob.replace(' ','/')
DaveM@13 77 dob_ = dob.split('/')
DaveM@13 78 if(len(dob_) != 3):
DaveM@13 79 dob = dob.replace('/','').strip()
DaveM@13 80 dob_ = []
DaveM@13 81 # print dob
DaveM@13 82 if len(dob) == 8: # ddmmyyyy
DaveM@13 83 dob_.append(dob[:2])
DaveM@13 84 dob_.append(dob[2:4])
DaveM@13 85 dob_.append(dob[4:])
DaveM@13 86 elif len(dob) == 7 and dob[1] == '1' and (dob[2] == '0' or dob[2] == '1' or dob[2] == '2'): # dmmyyyy
DaveM@13 87 dob_.append(dob[0])
DaveM@13 88 dob_.append(dob[1:3])
DaveM@13 89 dob_.append(dob[3:])
DaveM@13 90 elif(len(dob) == 7):
DaveM@13 91 if int(dob[:2]) > 31:# dmmyyyy
DaveM@13 92 dob_.append(dob[0])
DaveM@13 93 dob_.append(dob[1:3])
DaveM@13 94 dob_.append(dob[3:])
DaveM@13 95 elif len(dob) == 7: # ddmyyyy
DaveM@13 96 dob_.append(dob[0:2])
DaveM@13 97 dob_.append(dob[2])
DaveM@13 98 dob_.append(dob[3:])
DaveM@13 99 elif len(dob) == 6 and dob[3:4] != '19': # ddmmyy
DaveM@13 100 dob_.append(dob[:2])
DaveM@13 101 dob_.append(dob[2:4])
DaveM@13 102 dob_.append(dob[4:])
DaveM@13 103 elif len(dob) == 5 and dob[1] == '1' and (dob[2] == '0' or dob[2] == '1' or dob[2] == '2'): # dmmyy
DaveM@13 104 dob_.append(dob[0])
DaveM@13 105 dob_.append(dob[1:3])
DaveM@13 106 dob_.append(dob[3:])
DaveM@13 107 elif len(dob) == 5: # ddmyy
DaveM@13 108 dob_.append(dob[:2])
DaveM@13 109 dob_.append(dob[2])
DaveM@13 110 dob_.append(dob[3:])
DaveM@13 111 elif len(dob) == 4: # dmyy
DaveM@13 112 dob_.append(dob[0])
DaveM@13 113 dob_.append(dob[1])
DaveM@13 114 dob_.append(dob[2:])
DaveM@13 115 else:
DaveM@13 116 if(len(dob) < 4):
DaveM@13 117 return None
DaveM@13 118 # print dob
DaveM@13 119 # print filter(lambda x: x.isdigit(),dob)
DaveM@13 120 print 'no / partnerDOB issue'
DaveM@13 121 # deal with no /'s
DaveM@13 122 try:
DaveM@13 123 d = int(filter(lambda x: x.isdigit(),dob_[0]))
DaveM@13 124 m = int(filter(lambda x: x.isdigit(),dob_[1]))
DaveM@13 125 y = int(filter(lambda x: x.isdigit(),dob_[2]))
DaveM@13 126 if y < 100:
DaveM@13 127 y = y + 1900
DaveM@13 128 if (m > 12 and d <= 12):
DaveM@13 129 temp = d
DaveM@13 130 d = m
DaveM@13 131 m = temp
DaveM@13 132 if(d > 31 or d < 1 or m > 12 or m < 1 or y > 2017 or y < 1900):
DaveM@13 133 print 'error with DOB '+d+'/'+m+'/'+y
DaveM@13 134 pdb.set_trace()
DaveM@13 135 except TypeError:
DaveM@13 136 return None
DaveM@13 137 # print (d,m,y)
DaveM@13 138 return (d,m,y)
DaveM@13 139
DaveM@13 140 def monthStringToNum(s):
DaveM@13 141 # print 'inMonthStringToNum'
DaveM@13 142 m = {'jan':1,'feb':2,'mar':3,
DaveM@13 143 'apr':4,'may':5,'jun':6,'jul':7,'aug':8,
DaveM@13 144 'sep':9,'oct':10,'nov':11,'dec':12}
DaveM@13 145 s_ = s.strip()[:3].lower()
DaveM@13 146 try:
DaveM@13 147 out = m[s_]
DaveM@13 148 return out
DaveM@13 149 except:
DaveM@13 150 raise ValueError('Not a month')
DaveM@13 151
DaveM@13 152 def checkMonthDay(d,m):
DaveM@13 153 if d > 31: # take first two digits of day
DaveM@13 154 d = int(str(d)[:2])
DaveM@13 155 if d > 31:
DaveM@13 156 d = int(str(d)[1])
DaveM@13 157 if m > 12 and d < 12: # Day and month wrong way round - American
DaveM@13 158 temp = m
DaveM@13 159 m = d
DaveM@13 160 d = temp
DaveM@13 161 if(m == 2):
DaveM@13 162 if d <= 29:
DaveM@13 163 return (True,d,m)
DaveM@13 164 else:
DaveM@13 165 return (False,d,m)
DaveM@13 166 elif m in [4,6,9,11]:
DaveM@13 167 if d <= 30:
DaveM@13 168 return (True,d,m)
DaveM@13 169 else:
DaveM@13 170 return (False,d,m)
DaveM@13 171 elif m <= 12 and d <= 31:
DaveM@13 172 return (True,d,m)
DaveM@13 173 else:
DaveM@13 174 return (False,d,m)
DaveM@13 175
DaveM@13 176 def parseDOB(d,m,y):
DaveM@13 177 d = int(filter(lambda x: x.isdigit(),d))
DaveM@13 178 y = int(filter(lambda x: x.isdigit(),y))
DaveM@13 179 try:
DaveM@13 180 # print m
DaveM@13 181 m = monthStringToNum(m.strip())
DaveM@13 182 except ValueError:
DaveM@13 183 m = int(m.strip())
DaveM@13 184 if(y < 100):
DaveM@13 185 y = y + 1900
DaveM@13 186 (r,d,m) = checkMonthDay(d,m)
DaveM@13 187 if not r:
DaveM@13 188 print 'error with day month'
DaveM@13 189 print (r,d,m)
DaveM@13 190 return (d,m,y)
DaveM@13 191
DaveM@13 192 def parseTOB(T):
DaveM@13 193 timeFlag = None
DaveM@13 194 T_ = T.replace('.','').lower().strip()
DaveM@13 195 if 'am' in T_:
DaveM@13 196 timeFlag = 0
DaveM@13 197 T = T_.replace('am','')
DaveM@13 198 if 'pm' in T_:
DaveM@13 199 timeFlag = 1
DaveM@13 200 T = T_.replace('pm','')
DaveM@13 201 T = T.strip()
DaveM@13 202 if T.count('.') == 1:
DaveM@13 203 T = T.replace('.',':')
DaveM@13 204 try:
DaveM@13 205 if ':' in T:
DaveM@13 206 T_ = T.split(':')
DaveM@13 207
DaveM@13 208 H = int(T_[0])
DaveM@13 209 M = int(T_[1])
DaveM@13 210 else:
DaveM@13 211 if len(T) == 4:
DaveM@13 212 H = int(T[:2])
DaveM@13 213 M = int(T[2:])
DaveM@13 214 elif int(T) <= 24 :
DaveM@13 215 H = int(T)
DaveM@13 216 M = 0
DaveM@13 217 elif int(T) > 100:
DaveM@13 218 H = int(T)/100
DaveM@13 219 M = int(T)%100
DaveM@19 220 else:
DaveM@19 221 return None
DaveM@13 222 if timeFlag is not None:
DaveM@13 223 if timeFlag == 0:
DaveM@13 224 H = H%12
DaveM@13 225 else:
DaveM@13 226 H = H%12 + 12
DaveM@13 227 except ValueError:
DaveM@19 228 return None
DaveM@21 229 if H > 24 or M > 60:
DaveM@21 230 return None
DaveM@21 231 else:
DaveM@21 232 return (H,M)
DaveM@19 233
DaveM@13 234
DaveM@13 235