Mercurial > hg > horiscopes
changeset 4:99115e36316b
developing and testing DOB and TOB gathering, and implementing geo-location to latitude coordinates
author | DaveM |
---|---|
date | Mon, 11 Dec 2017 11:29:38 +0000 |
parents | c2898c2a3cc6 |
children | 73cf5cabef86 |
files | V2/dParse.py V2/runme.py timesheet.xlsx |
diffstat | 3 files changed, 193 insertions(+), 58 deletions(-) [+] |
line wrap: on
line diff
--- a/V2/dParse.py Sun Dec 10 17:25:53 2017 +0000 +++ b/V2/dParse.py Mon Dec 11 11:29:38 2017 +0000 @@ -1,11 +1,127 @@ +#!/usr/bin/env python + +import csv +import time +import unicodedata +from geopy.geocoders import Nominatim +from geopy.exc import GeocoderTimedOut +import pdb + +DEFAULT_TIME_H = 12 +DEFAULT_TIME_M = 00 +DEAULT_LOCATION = 'USA' + +def parseCSV(filename): + stream = csv.DictReader(open(filename,'rb')) + dictList = [] + for line in stream: + dictList.append(regulateData(line)) + return dictList + +def regulateData(dataDict): + print("Parse %s"%(str(dataDict['ID']))) + p_DOBQ = "What is your partner's date of birth? Please use the format DD/MM/YYYY (for example, 29/03/1981)." + p_TOBQ = "At what exact time were your partner born? Please use the format HHMM (for example, 2204)." + DOB_DQ = "Which day (numeric) have you been born?" + DOB_MQ = "Which month have you been born?" + DOB_YQ = "Year Of Birth" + TOB_Q = "At what exact time were you born? Please use the format HHMM (for example, 2204)." + COB = "What is your place of birth? Please specify city and country (for example, San Francisco, USA)." + p_COB = "What is your partner's place of birth? Please specify city and country (for example, San Francisco, USA)." + dataDict['DOB'] = parseDOB(dataDict[DOB_DQ],dataDict[DOB_MQ],dataDict[DOB_YQ]) + # print (dataDict[DOB_DQ],dataDict[DOB_MQ],dataDict[DOB_YQ]) + # print dataDict['DOB'] + dataDict['TOB'] = parseTOB(dataDict[TOB_Q]) + dataDict['pDOB'] = parsePartnerDOB(dataDict[p_DOBQ]) + dataDict['pTOB'] = parseTOB(dataDict[p_TOBQ]) + # dataDict['COB'] = parseBirthTown(dataDict[COB]) + # dataDict['pCOB'] = parseBirthTown(dataDict[p_COB]) + return dataDict + + +def parseBirthTown(s): + try: + s = s.encode('ascii') + except UnicodeDecodeError: + # pdb.set_trace() + s = s.decode('latin-1') + # s = unicodedata.normalize('NFKD',s.decode('utf-8')).encode('ascii','ignore') + timeoutTime = 2 + geolocator = Nominatim(timeout=timeoutTime) + while s is not [] and timeoutTime < 60: + try: + location = geolocator.geocode(s) + if location is not None: + # print(location.raw) + # print (location.latitude, location.longitude) + return (location.latitude, location.longitude, location.raw) + else: + s = s.split(' ',1) + if len(s) == 2: + s = s[1] + # print s + else: + s = DEAULT_LOCATION + except GeocoderTimedOut as e: + timeoutTime += 1 + print("Error: geocode failed on input %s with message %s, incrementing timeout time to %d"%(s, e.msg,timeoutTime)) + time.sleep(5) + geolocator = Nominatim(timeout=timeoutTime) + + + + # places = geograpy.get_place_context(text=s) + def parsePartnerDOB(dob): + dob = dob.strip() + if(dob.count('-') == 2): + dob = dob.replace('-','/') + if(dob.count(' ') == 2): + dob = dob.replace(' ','/') dob_ = dob.split('/') if(len(dob_) != 3): + dob = dob.replace('/','').strip() + dob_ = [] + # print dob + if len(dob) == 8: # ddmmyyyy + dob_.append(dob[:2]) + dob_.append(dob[2:3]) + dob_.append(dob[4:]) + elif len(dob) == 7 and dob[1] == '1' and (dob[2] == '0' or dob[2] == '1' or dob[2] == '2'): # dmmyyyy + dob_.append(dob[0]) + dob_.append(dob[1:2]) + dob_.append(dob[4:]) + elif len(dob) == 7: # ddmyyyy + dob_.append(dob[0:1]) + dob_.append(dob[2]) + dob_.append(dob[4:]) + elif len(dob) == 6 and dob[3:4] != '19': # ddmmyy + dob_.append(dob[:2]) + dob_.append(dob[2:3]) + dob_.append(dob[2:]) + elif len(dob) == 5 and dob[1] == '1' and (dob[2] == '0' or dob[2] == '1' or dob[2] == '2'): # dmmyy + dob_.append(dob[0]) + dob_.append(dob[1:2]) + dob_.append(dob[2:]) + elif len(dob) == 5: # ddmyy + dob_.append(dob[:2]) + dob_.append(dob[2]) + dob_.append(dob[2:]) + elif len(dob) == 4: # dmyy + dob_.append(dob[0]) + dob_.append(dob[1]) + dob_.append(dob[2:]) + else: + if(len(dob) < 4): + return None + print dob + # print filter(lambda x: x.isdigit(),dob) + print 'no / partnerDOB issue' # deal with no /'s try: - d = int(dob_[0]) - m = int(dob_[1]) - y = int(dob_[2]) + d = int(filter(lambda x: x.isdigit(),dob_[0])) + m = int(filter(lambda x: x.isdigit(),dob_[1])) + y = int(filter(lambda x: x.isdigit(),dob_[2])) if y < 100: y = y + 1900 if(d > 31 or m > 12 or y > 2017 or y < 1900): @@ -15,76 +131,95 @@ return (d,m,y) def monthStringToNum(s): - m = {'jan':1,'feb':2, - 'mar':3,'apr':4,'may':5, - 'jun':6,'jul':7,'aug':8, - 'sep':9,'oct':10,'nov':11, - 'dec':12} - s_ = string.strip()[:3].lower() + # print 'inMonthStringToNum' + m = {'jan':1,'feb':2,'mar':3, + 'apr':4,'may':5,'jun':6,'jul':7,'aug':8, + 'sep':9,'oct':10,'nov':11,'dec':12} + s_ = s.strip()[:3].lower() try: - out = m[s] + out = m[s_] return out except: raise ValueError('Not a month') +def checkMonthDay(d,m): + if d > 31: # take first two digits of day + d = int(str(d)[:2]) + if d > 31: + d = int(str(d)[1]) + if m > 12 and d < 12: # Day and month wrong way round - American + temp = m + m = d + d = temp + if(m == 2): + if d <= 29: + return (True,d,m) + else: + return (False,d,m) + elif m in [4,6,9,11]: + if d <= 30: + return (True,d,m) + else: + return (False,d,m) + elif m <= 12 and d <= 31: + return (True,d,m) + else: + return (False,d,m) + def parseDOB(d,m,y): - d = int(d.strip()) - y = int(y.strip()) + d = int(filter(lambda x: x.isdigit(),d)) + y = int(filter(lambda x: x.isdigit(),y)) try: + # print m m = monthStringToNum(m.strip()) except ValueError: m = int(m.strip()) if(y < 100): y = y + 1900 + (r,d,m) = checkMonthDay(d,m) + if not r: + print 'error with day month' + print (r,d,m) return (d,m,y) def parseTOB(T): - timeFlat = None + # pdb.set_trace() + timeFlag = None + T_ = T.replace('.','').lower().strip() + if 'am' in T_: + timeFlag = 0 + T = T_.replace('am','') + if 'pm' in T_: + timeFlag = 1 + T = T_.replace('pm','') + T = T.strip() + if T.count('.') == 1: + T = T.replace('.',':') try: - T = T.lower().strip() - if 'am' in T: - timeFlag = 0 - T.replace('am','') - if 'pm' in T: - timeFlag = 1 - T.replace('pm','') - t.strip() if ':' in T: - T.split(':') - H = int(T[0]) - M = int(T[1]) - elif '.' in T: - T.split('.') - H = int(T[0]) - M = int(T[1]) + T_ = T.split(':') + # pdb.set_trace() + H = int(T_[0]) + M = int(T_[1]) else: - int(T) - if T < 24 : - H = T + if len(T) == 4: + H = int(T[:2]) + M = int(T[2:]) + elif int(T) <= 24 : + H = int(T) M = 0 - elif T > 100: - H = T/100 - M = T%100 + elif int(T) > 100: + H = int(T)/100 + M = int(T)%100 if timeFlag is not None: if timeFlag == 0: H = H%12 else: H = H%12 + 12 except ValueError: - H = 12 - M = 00 + H = DEFAULT_TIME_H + M = DEFAULT_TIME_M return (H,M) -def regulateData(dataDict): - p_DOBQ = "What is your partner's date of birth? Please use the format DD/MM/YYYY (for example, 29/03/1981)." - p_TOBQ = "At what exact time were your partner born? Please use the format HHMM (for example, 2204)." - DOB_DQ = "Which day (numeric) have you been born?" - DOB_MQ = "Which month have you been born?" - DOB_YQ = "Year Of Birth" - TOB_Q = "At what exact time were you born? Please use the format HHMM (for example, 2204)." - dataDict['DOB'] = parseDOB(dataDict[DOB_DQ],dataDict[DOB_MQ],dataDict[DOB_YQ]) - dataDict['TOB'] = parseTOB(dataDict[TOB_Q]) - dataDict['pDOB'] = parsePartnerDOB(dataDict[p_DOBQ]) - dataDict['pTOB'] = parseTOB(dataDict[p_TOBQ]) - return dataDict \ No newline at end of file +
--- a/V2/runme.py Sun Dec 10 17:25:53 2017 +0000 +++ b/V2/runme.py Mon Dec 11 11:29:38 2017 +0000 @@ -56,15 +56,15 @@ time.sleep(5) return r -def parseCSV(filename): - stream = csv.DictReader(open(filename,'rb')) - dictList = [] - for line in stream: - dictList.append(dp.regulateData(line)) +# def parseCSV(filename): +# stream = csv.DictReader(open(filename,'rb')) +# dictList = [] +# for line in stream: +# dictList.append(regulateData(line)) - # dictList = headerParse(dictList) - # dictList = validateData(dictList) - return dictList +# # dictList = headerParse(dictList) +# # dictList = validateData(dictList) +# return dictList def printToFile(filename,data): keys = data[0].keys() @@ -74,11 +74,11 @@ dict_writer.writerows(data) def testMain(): - people = parseCSV('individuals.csv') + people = dp.parseCSV('individuals.csv') def _main(): # people = dict() - people = parseCSV('individuals.csv') + people = dp.parseCSV('individuals.csv') horiscopeList = [] # people = [1,2,3,4,5] for person in people: