changeset 4:99115e36316b

developing and testing DOB and TOB gathering, and implementing geo-location to latitude coordinates
author DaveM
date Mon, 11 Dec 2017 11:29:38 +0000
parents c2898c2a3cc6
children 73cf5cabef86
files V2/dParse.py V2/runme.py timesheet.xlsx
diffstat 3 files changed, 193 insertions(+), 58 deletions(-) [+]
line wrap: on
line diff
--- a/V2/dParse.py	Sun Dec 10 17:25:53 2017 +0000
+++ b/V2/dParse.py	Mon Dec 11 11:29:38 2017 +0000
@@ -1,11 +1,127 @@
+#!/usr/bin/env python
+
+import csv
+import time
+import unicodedata
+from geopy.geocoders import Nominatim
+from geopy.exc import GeocoderTimedOut
+import pdb
+
+DEFAULT_TIME_H = 12
+DEFAULT_TIME_M = 00
+DEAULT_LOCATION = 'USA'
+
+def parseCSV(filename):
+	stream = csv.DictReader(open(filename,'rb'))
+	dictList = []
+	for line in stream:
+		dictList.append(regulateData(line))
+	return dictList
+
+def regulateData(dataDict):
+	print("Parse %s"%(str(dataDict['ID'])))
+	p_DOBQ = "What is your partner's date of birth? Please use the format DD/MM/YYYY (for example, 29/03/1981)."
+	p_TOBQ = "At what exact time were your partner born? Please use the format HHMM (for example, 2204)."
+	DOB_DQ = "Which day (numeric) have you been born?"
+	DOB_MQ = "Which month have you been born?"
+	DOB_YQ = "Year Of Birth"
+	TOB_Q = "At what exact time were you born? Please use the format HHMM (for example, 2204)."
+	COB = "What is your place of birth? Please specify city and country (for example, San Francisco, USA)."
+	p_COB = "What is your partner's place of birth? Please specify city and country (for example, San Francisco, USA)."
+	dataDict['DOB'] = parseDOB(dataDict[DOB_DQ],dataDict[DOB_MQ],dataDict[DOB_YQ])
+	# print (dataDict[DOB_DQ],dataDict[DOB_MQ],dataDict[DOB_YQ])
+	# print dataDict['DOB']
+	dataDict['TOB'] = parseTOB(dataDict[TOB_Q])
+	dataDict['pDOB'] = parsePartnerDOB(dataDict[p_DOBQ])
+	dataDict['pTOB'] = parseTOB(dataDict[p_TOBQ])
+	# dataDict['COB'] = parseBirthTown(dataDict[COB])
+	# dataDict['pCOB'] = parseBirthTown(dataDict[p_COB])
+	return dataDict
+
+
+def parseBirthTown(s):
+	try:
+		s = s.encode('ascii')
+	except UnicodeDecodeError:
+		# pdb.set_trace()
+		s = s.decode('latin-1')
+		# s = unicodedata.normalize('NFKD',s.decode('utf-8')).encode('ascii','ignore')
+	timeoutTime = 2
+	geolocator = Nominatim(timeout=timeoutTime)
+	while s is not [] and timeoutTime < 60:
+		try:
+			location = geolocator.geocode(s)
+			if location is not None:
+				# print(location.raw)
+				# print (location.latitude, location.longitude)
+				return (location.latitude, location.longitude, location.raw)
+			else:
+				s = s.split(' ',1)
+				if len(s) == 2:
+					s = s[1]
+					# print s
+				else:
+					s = DEAULT_LOCATION
+		except GeocoderTimedOut as e:
+			timeoutTime += 1
+			print("Error: geocode failed on input %s with message %s, incrementing timeout time to %d"%(s, e.msg,timeoutTime))
+			time.sleep(5)
+			geolocator = Nominatim(timeout=timeoutTime)
+
+
+
+	# places = geograpy.get_place_context(text=s)
+
 def parsePartnerDOB(dob):
+	dob = dob.strip()
+	if(dob.count('-') == 2):
+		dob = dob.replace('-','/')
+	if(dob.count(' ') == 2):
+		dob = dob.replace(' ','/')
 	dob_ = dob.split('/')
 	if(len(dob_) != 3):
+		dob = dob.replace('/','').strip()
+		dob_ = []
+		# print dob
+		if len(dob) == 8: # ddmmyyyy
+			dob_.append(dob[:2])
+			dob_.append(dob[2:3])
+			dob_.append(dob[4:])
+		elif len(dob) == 7 and dob[1] == '1' and (dob[2] == '0' or dob[2] == '1' or dob[2] == '2'): # dmmyyyy
+			dob_.append(dob[0])
+			dob_.append(dob[1:2])
+			dob_.append(dob[4:])
+		elif len(dob) == 7: # ddmyyyy
+			dob_.append(dob[0:1])
+			dob_.append(dob[2])
+			dob_.append(dob[4:])
+		elif len(dob) == 6 and dob[3:4] != '19': # ddmmyy
+			dob_.append(dob[:2])
+			dob_.append(dob[2:3])
+			dob_.append(dob[2:])
+		elif len(dob) == 5 and dob[1] == '1' and (dob[2] == '0' or dob[2] == '1' or dob[2] == '2'): # dmmyy
+			dob_.append(dob[0])
+			dob_.append(dob[1:2])
+			dob_.append(dob[2:])
+		elif len(dob) == 5: # ddmyy
+			dob_.append(dob[:2])
+			dob_.append(dob[2])
+			dob_.append(dob[2:])
+		elif len(dob) == 4: # dmyy
+			dob_.append(dob[0])
+			dob_.append(dob[1])
+			dob_.append(dob[2:])
+		else:
+			if(len(dob) < 4):
+				return None
+			print dob
+			# print filter(lambda x: x.isdigit(),dob)
+			print 'no / partnerDOB issue'
 		# deal with no /'s
 	try:
-		d = int(dob_[0])
-		m = int(dob_[1])
-		y = int(dob_[2])
+		d = int(filter(lambda x: x.isdigit(),dob_[0]))
+		m = int(filter(lambda x: x.isdigit(),dob_[1]))
+		y = int(filter(lambda x: x.isdigit(),dob_[2]))
 		if y < 100:
 			y = y + 1900
 		if(d > 31 or m > 12 or y > 2017 or y < 1900):
@@ -15,76 +131,95 @@
 	return (d,m,y)
 
 def monthStringToNum(s):
-	m = {'jan':1,'feb':2,
-	'mar':3,'apr':4,'may':5,
-	'jun':6,'jul':7,'aug':8,
-	'sep':9,'oct':10,'nov':11,
-	'dec':12}
-	s_ = string.strip()[:3].lower()
+	# print 'inMonthStringToNum'
+	m = {'jan':1,'feb':2,'mar':3,
+	'apr':4,'may':5,'jun':6,'jul':7,'aug':8,
+	'sep':9,'oct':10,'nov':11,'dec':12}
+	s_ = s.strip()[:3].lower()
 	try:
-		out = m[s]
+		out = m[s_]
 		return out
 	except:
 		raise ValueError('Not a month')
 
+def checkMonthDay(d,m):
+	if d > 31: # take first two digits of day
+		d = int(str(d)[:2])
+		if d > 31:
+			d = int(str(d)[1])
+	if m > 12 and d < 12: # Day and month wrong way round - American
+		temp = m
+		m = d
+		d = temp
+	if(m == 2):
+		if d <= 29:
+			return (True,d,m)
+		else:
+			return (False,d,m)
+	elif m in [4,6,9,11]:
+		if d <= 30:
+			return (True,d,m)
+		else:
+			return (False,d,m)
+	elif m <= 12 and d <= 31:
+		return (True,d,m)
+	else:
+		return (False,d,m)
+
 def parseDOB(d,m,y):
-	d = int(d.strip())
-	y = int(y.strip())
+	d = int(filter(lambda x: x.isdigit(),d))
+	y = int(filter(lambda x: x.isdigit(),y))
 	try:
+		# print m
 		m = monthStringToNum(m.strip())
 	except ValueError:
 		m = int(m.strip())
 	if(y < 100):
 		y = y + 1900
+	(r,d,m) = checkMonthDay(d,m)
+	if not r:
+		print 'error with day month'
+		print (r,d,m)
 	return (d,m,y)
 
 def parseTOB(T):
-	timeFlat = None
+	# pdb.set_trace()
+	timeFlag = None
+	T_ = T.replace('.','').lower().strip()
+	if 'am' in T_:
+		timeFlag = 0
+		T = T_.replace('am','')
+	if 'pm' in T_:
+		timeFlag = 1
+		T = T_.replace('pm','')
+	T = T.strip()
+	if T.count('.') == 1:
+		T = T.replace('.',':')
 	try:
-		T = T.lower().strip()
-		if 'am' in T:
-			timeFlag = 0
-			T.replace('am','')
-		if 'pm' in T:
-			timeFlag = 1
-			T.replace('pm','')
-		t.strip()
 		if ':' in T:
-			T.split(':')
-			H = int(T[0])
-			M = int(T[1])
-		elif '.' in T:
-			T.split('.')
-			H = int(T[0])
-			M = int(T[1]) 
+			T_ = T.split(':')
+			# pdb.set_trace()
+			H = int(T_[0])
+			M = int(T_[1])
 		else:
-			int(T)
-			if T < 24 :
-				H = T
+			if len(T) == 4:
+				H = int(T[:2])
+				M = int(T[2:])
+			elif int(T) <= 24 :
+				H = int(T)
 				M = 0
-			elif T > 100:
-				H = T/100
-				M = T%100
+			elif int(T) > 100:
+				H = int(T)/100
+				M = int(T)%100
 		if timeFlag is not None:
 			if timeFlag == 0:
 				H = H%12
 			else:
 				H = H%12 + 12
 	except ValueError:
-		H = 12
-		M = 00
+		H = DEFAULT_TIME_H
+		M = DEFAULT_TIME_M
 	return (H,M)
 
-def regulateData(dataDict):
-	p_DOBQ = "What is your partner's date of birth? Please use the format DD/MM/YYYY (for example, 29/03/1981)."
-	p_TOBQ = "At what exact time were your partner born? Please use the format HHMM (for example, 2204)."
-	DOB_DQ = "Which day (numeric) have you been born?"
-	DOB_MQ = "Which month have you been born?"
-	DOB_YQ = "Year Of Birth"
-	TOB_Q = "At what exact time were you born? Please use the format HHMM (for example, 2204)."
-	dataDict['DOB'] = parseDOB(dataDict[DOB_DQ],dataDict[DOB_MQ],dataDict[DOB_YQ])
-	dataDict['TOB'] = parseTOB(dataDict[TOB_Q])
-	dataDict['pDOB'] = parsePartnerDOB(dataDict[p_DOBQ])
-	dataDict['pTOB'] = parseTOB(dataDict[p_TOBQ])
 
-	return dataDict
\ No newline at end of file
+
--- a/V2/runme.py	Sun Dec 10 17:25:53 2017 +0000
+++ b/V2/runme.py	Mon Dec 11 11:29:38 2017 +0000
@@ -56,15 +56,15 @@
 	time.sleep(5)
 	return r
 
-def parseCSV(filename):
-	stream = csv.DictReader(open(filename,'rb'))
-	dictList = []
-	for line in stream:
-		dictList.append(dp.regulateData(line))
+# def parseCSV(filename):
+# 	stream = csv.DictReader(open(filename,'rb'))
+# 	dictList = []
+# 	for line in stream:
+# 		dictList.append(regulateData(line))
 
-	# dictList = headerParse(dictList)
-	# dictList = validateData(dictList)
-	return dictList
+# 	# dictList = headerParse(dictList)
+# 	# dictList = validateData(dictList)
+# 	return dictList
 
 def printToFile(filename,data):
 	keys = data[0].keys()
@@ -74,11 +74,11 @@
 		dict_writer.writerows(data)
 
 def testMain():
-	people = parseCSV('individuals.csv')
+	people = dp.parseCSV('individuals.csv')
 
 def _main():
 	# people = dict()
-	people = parseCSV('individuals.csv')
+	people = dp.parseCSV('individuals.csv')
 	horiscopeList = []
 	# people = [1,2,3,4,5]
 	for person in people:
Binary file timesheet.xlsx has changed