Mercurial > hg > horiscopes
diff V5/dParse.py @ 23:11d4e438045e
make version 5
author | DaveM |
---|---|
date | Mon, 09 Apr 2018 15:07:21 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/V5/dParse.py Mon Apr 09 15:07:21 2018 +0100 @@ -0,0 +1,235 @@ +#!/usr/bin/env python + +import csv +import time +import unicodedata +from geopy.geocoders import Nominatim +from geopy.exc import GeocoderTimedOut +import random +import pdb + +DEFAULT_TIME = None +# DEFAULT_TIME_M = None +DEAULT_LOCATION = None + +def regulateData(dataDict): + print("Parse %s"%(str(dataDict['ID']))) + p_DOBQ = "What is your partner's date of birth? Please use the format DD/MM/YYYY (for example, 29/03/1981)." + p_TOBQ = "At what exact time were your partner born? Please use the format HHMM (for example, 2204)." + DOB_DQ = "Which day (numeric) have you been born?" + DOB_MQ = "Which month have you been born?" + DOB_YQ = "Year Of Birth" + TOB_Q = "At what exact time were you born? Please use the format HHMM (for example, 2204)." + COB = "What is your place of birth? Please specify city and country (for example, San Francisco, USA)." + p_COB = "What is your partner's place of birth? Please specify city and country (for example, San Francisco, USA)." + dataDict['DOB'] = parseDOB(dataDict[DOB_DQ],dataDict[DOB_MQ],dataDict[DOB_YQ]) + # print (dataDict[DOB_DQ],dataDict[DOB_MQ],dataDict[DOB_YQ]) + # print dataDict['DOB'] + dataDict['TOB'] = parseTOB(dataDict[TOB_Q]) + dataDict['pDOB'] = parsePartnerDOB(dataDict[p_DOBQ]) + dataDict['pTOB'] = parseTOB(dataDict[p_TOBQ]) + # MAKE RANDOM PLACE + # dataDict['COB'] = (random.uniform(-90, 90),random.uniform(-90, 90)) + # dataDict['pCOB'] = (random.uniform(-90, 90),random.uniform(-90, 90)) + dataDict['COB'] = parseBirthTown(dataDict[COB]) + dataDict['pCOB'] = parseBirthTown(dataDict[p_COB]) + return dataDict + + +def parseBirthTown(s): + try: + s = s.encode('ascii') + except UnicodeDecodeError: + # pdb.set_trace() + s = s.decode('latin-1') + # s = unicodedata.normalize('NFKD',s.decode('utf-8')).encode('ascii','ignore') + timeoutTime = 2 + geolocator = Nominatim(timeout=timeoutTime) + while s is not [] and timeoutTime < 60: + try: + location = geolocator.geocode(s) + if location is not None: + # print(location.raw) + # print (location.latitude, location.longitude) + return (location.latitude, location.longitude, location.raw) + else: + s = s.split(' ',1) + if len(s) == 2: + s = s[1] + # print s + else: + s = DEAULT_LOCATION + except: + timeoutTime += 1 + print("Error: geocode failed on input %s, incrementing timeout time to %d"%(s,timeoutTime)) + time.sleep(5) + geolocator = Nominatim(timeout=timeoutTime) + # places = geograpy.get_place_context(text=s) + +def parsePartnerDOB(dob): + # print dob + # pdb.set_trace() + dob = dob.strip() + if(dob.count('-') == 2): + dob = dob.replace('-','/') + if(dob.count(' ') == 2): + dob = dob.replace(' ','/') + dob_ = dob.split('/') + if(len(dob_) != 3): + dob = dob.replace('/','').strip() + dob_ = [] + # print dob + if len(dob) == 8: # ddmmyyyy + dob_.append(dob[:2]) + dob_.append(dob[2:4]) + dob_.append(dob[4:]) + elif len(dob) == 7 and dob[1] == '1' and (dob[2] == '0' or dob[2] == '1' or dob[2] == '2'): # dmmyyyy + dob_.append(dob[0]) + dob_.append(dob[1:3]) + dob_.append(dob[3:]) + elif(len(dob) == 7): + if int(dob[:2]) > 31:# dmmyyyy + dob_.append(dob[0]) + dob_.append(dob[1:3]) + dob_.append(dob[3:]) + elif len(dob) == 7: # ddmyyyy + dob_.append(dob[0:2]) + dob_.append(dob[2]) + dob_.append(dob[3:]) + elif len(dob) == 6 and dob[3:4] != '19': # ddmmyy + dob_.append(dob[:2]) + dob_.append(dob[2:4]) + dob_.append(dob[4:]) + elif len(dob) == 5 and dob[1] == '1' and (dob[2] == '0' or dob[2] == '1' or dob[2] == '2'): # dmmyy + dob_.append(dob[0]) + dob_.append(dob[1:3]) + dob_.append(dob[3:]) + elif len(dob) == 5: # ddmyy + dob_.append(dob[:2]) + dob_.append(dob[2]) + dob_.append(dob[3:]) + elif len(dob) == 4: # dmyy + dob_.append(dob[0]) + dob_.append(dob[1]) + dob_.append(dob[2:]) + else: + if(len(dob) < 4): + return None + # print dob + # print filter(lambda x: x.isdigit(),dob) + print 'no / partnerDOB issue' + # deal with no /'s + try: + d = int(filter(lambda x: x.isdigit(),dob_[0])) + m = int(filter(lambda x: x.isdigit(),dob_[1])) + y = int(filter(lambda x: x.isdigit(),dob_[2])) + if y < 100: + y = y + 1900 + if (m > 12 and d <= 12): + temp = d + d = m + m = temp + if(d > 31 or d < 1 or m > 12 or m < 1 or y > 2017 or y < 1900): + print 'error with DOB '+d+'/'+m+'/'+y + pdb.set_trace() + except TypeError: + return None + # print (d,m,y) + return (d,m,y) + +def monthStringToNum(s): + # print 'inMonthStringToNum' + m = {'jan':1,'feb':2,'mar':3, + 'apr':4,'may':5,'jun':6,'jul':7,'aug':8, + 'sep':9,'oct':10,'nov':11,'dec':12} + s_ = s.strip()[:3].lower() + try: + out = m[s_] + return out + except: + raise ValueError('Not a month') + +def checkMonthDay(d,m): + if d > 31: # take first two digits of day + d = int(str(d)[:2]) + if d > 31: + d = int(str(d)[1]) + if m > 12 and d < 12: # Day and month wrong way round - American + temp = m + m = d + d = temp + if(m == 2): + if d <= 29: + return (True,d,m) + else: + return (False,d,m) + elif m in [4,6,9,11]: + if d <= 30: + return (True,d,m) + else: + return (False,d,m) + elif m <= 12 and d <= 31: + return (True,d,m) + else: + return (False,d,m) + +def parseDOB(d,m,y): + d = int(filter(lambda x: x.isdigit(),d)) + y = int(filter(lambda x: x.isdigit(),y)) + try: + # print m + m = monthStringToNum(m.strip()) + except ValueError: + m = int(m.strip()) + if(y < 100): + y = y + 1900 + (r,d,m) = checkMonthDay(d,m) + if not r: + print 'error with day month' + print (r,d,m) + return (d,m,y) + +def parseTOB(T): + timeFlag = None + T_ = T.replace('.','').lower().strip() + if 'am' in T_: + timeFlag = 0 + T = T_.replace('am','') + if 'pm' in T_: + timeFlag = 1 + T = T_.replace('pm','') + T = T.strip() + if T.count('.') == 1: + T = T.replace('.',':') + try: + if ':' in T: + T_ = T.split(':') + + H = int(T_[0]) + M = int(T_[1]) + else: + if len(T) == 4: + H = int(T[:2]) + M = int(T[2:]) + elif int(T) <= 24 : + H = int(T) + M = 0 + elif int(T) > 100: + H = int(T)/100 + M = int(T)%100 + else: + return None + if timeFlag is not None: + if timeFlag == 0: + H = H%12 + else: + H = H%12 + 12 + except ValueError: + return None + if H > 24 or M > 60: + return None + else: + return (H,M) + + +