annotate V5/dParse.py @ 23:11d4e438045e

make version 5
author DaveM
date Mon, 09 Apr 2018 15:07:21 +0100
parents
children
rev   line source
DaveM@23 1 #!/usr/bin/env python
DaveM@23 2
DaveM@23 3 import csv
DaveM@23 4 import time
DaveM@23 5 import unicodedata
DaveM@23 6 from geopy.geocoders import Nominatim
DaveM@23 7 from geopy.exc import GeocoderTimedOut
DaveM@23 8 import random
DaveM@23 9 import pdb
DaveM@23 10
DaveM@23 11 DEFAULT_TIME = None
DaveM@23 12 # DEFAULT_TIME_M = None
DaveM@23 13 DEAULT_LOCATION = None
DaveM@23 14
DaveM@23 15 def regulateData(dataDict):
DaveM@23 16 print("Parse %s"%(str(dataDict['ID'])))
DaveM@23 17 p_DOBQ = "What is your partner's date of birth? Please use the format DD/MM/YYYY (for example, 29/03/1981)."
DaveM@23 18 p_TOBQ = "At what exact time were your partner born? Please use the format HHMM (for example, 2204)."
DaveM@23 19 DOB_DQ = "Which day (numeric) have you been born?"
DaveM@23 20 DOB_MQ = "Which month have you been born?"
DaveM@23 21 DOB_YQ = "Year Of Birth"
DaveM@23 22 TOB_Q = "At what exact time were you born? Please use the format HHMM (for example, 2204)."
DaveM@23 23 COB = "What is your place of birth? Please specify city and country (for example, San Francisco, USA)."
DaveM@23 24 p_COB = "What is your partner's place of birth? Please specify city and country (for example, San Francisco, USA)."
DaveM@23 25 dataDict['DOB'] = parseDOB(dataDict[DOB_DQ],dataDict[DOB_MQ],dataDict[DOB_YQ])
DaveM@23 26 # print (dataDict[DOB_DQ],dataDict[DOB_MQ],dataDict[DOB_YQ])
DaveM@23 27 # print dataDict['DOB']
DaveM@23 28 dataDict['TOB'] = parseTOB(dataDict[TOB_Q])
DaveM@23 29 dataDict['pDOB'] = parsePartnerDOB(dataDict[p_DOBQ])
DaveM@23 30 dataDict['pTOB'] = parseTOB(dataDict[p_TOBQ])
DaveM@23 31 # MAKE RANDOM PLACE
DaveM@23 32 # dataDict['COB'] = (random.uniform(-90, 90),random.uniform(-90, 90))
DaveM@23 33 # dataDict['pCOB'] = (random.uniform(-90, 90),random.uniform(-90, 90))
DaveM@23 34 dataDict['COB'] = parseBirthTown(dataDict[COB])
DaveM@23 35 dataDict['pCOB'] = parseBirthTown(dataDict[p_COB])
DaveM@23 36 return dataDict
DaveM@23 37
DaveM@23 38
DaveM@23 39 def parseBirthTown(s):
DaveM@23 40 try:
DaveM@23 41 s = s.encode('ascii')
DaveM@23 42 except UnicodeDecodeError:
DaveM@23 43 # pdb.set_trace()
DaveM@23 44 s = s.decode('latin-1')
DaveM@23 45 # s = unicodedata.normalize('NFKD',s.decode('utf-8')).encode('ascii','ignore')
DaveM@23 46 timeoutTime = 2
DaveM@23 47 geolocator = Nominatim(timeout=timeoutTime)
DaveM@23 48 while s is not [] and timeoutTime < 60:
DaveM@23 49 try:
DaveM@23 50 location = geolocator.geocode(s)
DaveM@23 51 if location is not None:
DaveM@23 52 # print(location.raw)
DaveM@23 53 # print (location.latitude, location.longitude)
DaveM@23 54 return (location.latitude, location.longitude, location.raw)
DaveM@23 55 else:
DaveM@23 56 s = s.split(' ',1)
DaveM@23 57 if len(s) == 2:
DaveM@23 58 s = s[1]
DaveM@23 59 # print s
DaveM@23 60 else:
DaveM@23 61 s = DEAULT_LOCATION
DaveM@23 62 except:
DaveM@23 63 timeoutTime += 1
DaveM@23 64 print("Error: geocode failed on input %s, incrementing timeout time to %d"%(s,timeoutTime))
DaveM@23 65 time.sleep(5)
DaveM@23 66 geolocator = Nominatim(timeout=timeoutTime)
DaveM@23 67 # places = geograpy.get_place_context(text=s)
DaveM@23 68
DaveM@23 69 def parsePartnerDOB(dob):
DaveM@23 70 # print dob
DaveM@23 71 # pdb.set_trace()
DaveM@23 72 dob = dob.strip()
DaveM@23 73 if(dob.count('-') == 2):
DaveM@23 74 dob = dob.replace('-','/')
DaveM@23 75 if(dob.count(' ') == 2):
DaveM@23 76 dob = dob.replace(' ','/')
DaveM@23 77 dob_ = dob.split('/')
DaveM@23 78 if(len(dob_) != 3):
DaveM@23 79 dob = dob.replace('/','').strip()
DaveM@23 80 dob_ = []
DaveM@23 81 # print dob
DaveM@23 82 if len(dob) == 8: # ddmmyyyy
DaveM@23 83 dob_.append(dob[:2])
DaveM@23 84 dob_.append(dob[2:4])
DaveM@23 85 dob_.append(dob[4:])
DaveM@23 86 elif len(dob) == 7 and dob[1] == '1' and (dob[2] == '0' or dob[2] == '1' or dob[2] == '2'): # dmmyyyy
DaveM@23 87 dob_.append(dob[0])
DaveM@23 88 dob_.append(dob[1:3])
DaveM@23 89 dob_.append(dob[3:])
DaveM@23 90 elif(len(dob) == 7):
DaveM@23 91 if int(dob[:2]) > 31:# dmmyyyy
DaveM@23 92 dob_.append(dob[0])
DaveM@23 93 dob_.append(dob[1:3])
DaveM@23 94 dob_.append(dob[3:])
DaveM@23 95 elif len(dob) == 7: # ddmyyyy
DaveM@23 96 dob_.append(dob[0:2])
DaveM@23 97 dob_.append(dob[2])
DaveM@23 98 dob_.append(dob[3:])
DaveM@23 99 elif len(dob) == 6 and dob[3:4] != '19': # ddmmyy
DaveM@23 100 dob_.append(dob[:2])
DaveM@23 101 dob_.append(dob[2:4])
DaveM@23 102 dob_.append(dob[4:])
DaveM@23 103 elif len(dob) == 5 and dob[1] == '1' and (dob[2] == '0' or dob[2] == '1' or dob[2] == '2'): # dmmyy
DaveM@23 104 dob_.append(dob[0])
DaveM@23 105 dob_.append(dob[1:3])
DaveM@23 106 dob_.append(dob[3:])
DaveM@23 107 elif len(dob) == 5: # ddmyy
DaveM@23 108 dob_.append(dob[:2])
DaveM@23 109 dob_.append(dob[2])
DaveM@23 110 dob_.append(dob[3:])
DaveM@23 111 elif len(dob) == 4: # dmyy
DaveM@23 112 dob_.append(dob[0])
DaveM@23 113 dob_.append(dob[1])
DaveM@23 114 dob_.append(dob[2:])
DaveM@23 115 else:
DaveM@23 116 if(len(dob) < 4):
DaveM@23 117 return None
DaveM@23 118 # print dob
DaveM@23 119 # print filter(lambda x: x.isdigit(),dob)
DaveM@23 120 print 'no / partnerDOB issue'
DaveM@23 121 # deal with no /'s
DaveM@23 122 try:
DaveM@23 123 d = int(filter(lambda x: x.isdigit(),dob_[0]))
DaveM@23 124 m = int(filter(lambda x: x.isdigit(),dob_[1]))
DaveM@23 125 y = int(filter(lambda x: x.isdigit(),dob_[2]))
DaveM@23 126 if y < 100:
DaveM@23 127 y = y + 1900
DaveM@23 128 if (m > 12 and d <= 12):
DaveM@23 129 temp = d
DaveM@23 130 d = m
DaveM@23 131 m = temp
DaveM@23 132 if(d > 31 or d < 1 or m > 12 or m < 1 or y > 2017 or y < 1900):
DaveM@23 133 print 'error with DOB '+d+'/'+m+'/'+y
DaveM@23 134 pdb.set_trace()
DaveM@23 135 except TypeError:
DaveM@23 136 return None
DaveM@23 137 # print (d,m,y)
DaveM@23 138 return (d,m,y)
DaveM@23 139
DaveM@23 140 def monthStringToNum(s):
DaveM@23 141 # print 'inMonthStringToNum'
DaveM@23 142 m = {'jan':1,'feb':2,'mar':3,
DaveM@23 143 'apr':4,'may':5,'jun':6,'jul':7,'aug':8,
DaveM@23 144 'sep':9,'oct':10,'nov':11,'dec':12}
DaveM@23 145 s_ = s.strip()[:3].lower()
DaveM@23 146 try:
DaveM@23 147 out = m[s_]
DaveM@23 148 return out
DaveM@23 149 except:
DaveM@23 150 raise ValueError('Not a month')
DaveM@23 151
DaveM@23 152 def checkMonthDay(d,m):
DaveM@23 153 if d > 31: # take first two digits of day
DaveM@23 154 d = int(str(d)[:2])
DaveM@23 155 if d > 31:
DaveM@23 156 d = int(str(d)[1])
DaveM@23 157 if m > 12 and d < 12: # Day and month wrong way round - American
DaveM@23 158 temp = m
DaveM@23 159 m = d
DaveM@23 160 d = temp
DaveM@23 161 if(m == 2):
DaveM@23 162 if d <= 29:
DaveM@23 163 return (True,d,m)
DaveM@23 164 else:
DaveM@23 165 return (False,d,m)
DaveM@23 166 elif m in [4,6,9,11]:
DaveM@23 167 if d <= 30:
DaveM@23 168 return (True,d,m)
DaveM@23 169 else:
DaveM@23 170 return (False,d,m)
DaveM@23 171 elif m <= 12 and d <= 31:
DaveM@23 172 return (True,d,m)
DaveM@23 173 else:
DaveM@23 174 return (False,d,m)
DaveM@23 175
DaveM@23 176 def parseDOB(d,m,y):
DaveM@23 177 d = int(filter(lambda x: x.isdigit(),d))
DaveM@23 178 y = int(filter(lambda x: x.isdigit(),y))
DaveM@23 179 try:
DaveM@23 180 # print m
DaveM@23 181 m = monthStringToNum(m.strip())
DaveM@23 182 except ValueError:
DaveM@23 183 m = int(m.strip())
DaveM@23 184 if(y < 100):
DaveM@23 185 y = y + 1900
DaveM@23 186 (r,d,m) = checkMonthDay(d,m)
DaveM@23 187 if not r:
DaveM@23 188 print 'error with day month'
DaveM@23 189 print (r,d,m)
DaveM@23 190 return (d,m,y)
DaveM@23 191
DaveM@23 192 def parseTOB(T):
DaveM@23 193 timeFlag = None
DaveM@23 194 T_ = T.replace('.','').lower().strip()
DaveM@23 195 if 'am' in T_:
DaveM@23 196 timeFlag = 0
DaveM@23 197 T = T_.replace('am','')
DaveM@23 198 if 'pm' in T_:
DaveM@23 199 timeFlag = 1
DaveM@23 200 T = T_.replace('pm','')
DaveM@23 201 T = T.strip()
DaveM@23 202 if T.count('.') == 1:
DaveM@23 203 T = T.replace('.',':')
DaveM@23 204 try:
DaveM@23 205 if ':' in T:
DaveM@23 206 T_ = T.split(':')
DaveM@23 207
DaveM@23 208 H = int(T_[0])
DaveM@23 209 M = int(T_[1])
DaveM@23 210 else:
DaveM@23 211 if len(T) == 4:
DaveM@23 212 H = int(T[:2])
DaveM@23 213 M = int(T[2:])
DaveM@23 214 elif int(T) <= 24 :
DaveM@23 215 H = int(T)
DaveM@23 216 M = 0
DaveM@23 217 elif int(T) > 100:
DaveM@23 218 H = int(T)/100
DaveM@23 219 M = int(T)%100
DaveM@23 220 else:
DaveM@23 221 return None
DaveM@23 222 if timeFlag is not None:
DaveM@23 223 if timeFlag == 0:
DaveM@23 224 H = H%12
DaveM@23 225 else:
DaveM@23 226 H = H%12 + 12
DaveM@23 227 except ValueError:
DaveM@23 228 return None
DaveM@23 229 if H > 24 or M > 60:
DaveM@23 230 return None
DaveM@23 231 else:
DaveM@23 232 return (H,M)
DaveM@23 233
DaveM@23 234
DaveM@23 235