view V4/dParse.py @ 13:b253748dbb11

developing V4 - Class based structure, with self calcuation of all aspects
author DaveM
date Sun, 04 Mar 2018 14:51:43 +0000
parents
children 50a95089414d
line wrap: on
line source
#!/usr/bin/env python

import csv
import time
import unicodedata
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut
import random
import pdb

DEFAULT_TIME_H = 12
DEFAULT_TIME_M = 00
DEAULT_LOCATION = 'USA'

# def parseCSV(filename):
# 	stream = csv.DictReader(open(filename,'rb'))
# 	dictList = []
# 	people = []
# 	for line in stream:
# 		people.append(syn.person(line))
# 	return people
# 		# dictList.append(regulateData(line))
# 	# return dictList

def regulateData(dataDict):
	print("Parse %s"%(str(dataDict['ID'])))
	p_DOBQ = "What is your partner's date of birth? Please use the format DD/MM/YYYY (for example, 29/03/1981)."
	p_TOBQ = "At what exact time were your partner born? Please use the format HHMM (for example, 2204)."
	DOB_DQ = "Which day (numeric) have you been born?"
	DOB_MQ = "Which month have you been born?"
	DOB_YQ = "Year Of Birth"
	TOB_Q = "At what exact time were you born? Please use the format HHMM (for example, 2204)."
	COB = "What is your place of birth? Please specify city and country (for example, San Francisco, USA)."
	p_COB = "What is your partner's place of birth? Please specify city and country (for example, San Francisco, USA)."
	dataDict['DOB'] = parseDOB(dataDict[DOB_DQ],dataDict[DOB_MQ],dataDict[DOB_YQ])
	# print (dataDict[DOB_DQ],dataDict[DOB_MQ],dataDict[DOB_YQ])
	# print dataDict['DOB']
	dataDict['TOB'] = parseTOB(dataDict[TOB_Q])
	dataDict['pDOB'] = parsePartnerDOB(dataDict[p_DOBQ])
	dataDict['pTOB'] = parseTOB(dataDict[p_TOBQ])
	# MAKE RANDOM PLACE
	# dataDict['COB'] = (random.uniform(-90, 90),random.uniform(-90, 90))
	# dataDict['pCOB'] = (random.uniform(-90, 90),random.uniform(-90, 90))
	dataDict['COB'] = parseBirthTown(dataDict[COB])
	dataDict['pCOB'] = parseBirthTown(dataDict[p_COB])
	return dataDict


def parseBirthTown(s):
	try:
		s = s.encode('ascii')
	except UnicodeDecodeError:
		# pdb.set_trace()
		s = s.decode('latin-1')
		# s = unicodedata.normalize('NFKD',s.decode('utf-8')).encode('ascii','ignore')
	timeoutTime = 2
	geolocator = Nominatim(timeout=timeoutTime)
	while s is not [] and timeoutTime < 60:
		try:
			location = geolocator.geocode(s)
			if location is not None:
				# print(location.raw)
				# print (location.latitude, location.longitude)
				return (location.latitude, location.longitude, location.raw)
			else:
				s = s.split(' ',1)
				if len(s) == 2:
					s = s[1]
					# print s
				else:
					s = DEAULT_LOCATION
		except:
			timeoutTime += 1
			print("Error: geocode failed on input %s, incrementing timeout time to %d"%(s,timeoutTime))
			time.sleep(5)
			geolocator = Nominatim(timeout=timeoutTime)
	# places = geograpy.get_place_context(text=s)

def parsePartnerDOB(dob):
	# print dob
	# pdb.set_trace()
	dob = dob.strip()
	if(dob.count('-') == 2):
		dob = dob.replace('-','/')
	if(dob.count(' ') == 2):
		dob = dob.replace(' ','/')
	dob_ = dob.split('/')
	if(len(dob_) != 3):
		dob = dob.replace('/','').strip()
		dob_ = []
		# print dob
		if len(dob) == 8: # ddmmyyyy
			dob_.append(dob[:2])
			dob_.append(dob[2:4])
			dob_.append(dob[4:])
		elif len(dob) == 7 and dob[1] == '1' and (dob[2] == '0' or dob[2] == '1' or dob[2] == '2'): # dmmyyyy
			dob_.append(dob[0])
			dob_.append(dob[1:3])
			dob_.append(dob[3:])
		elif(len(dob) == 7):
			if int(dob[:2]) > 31:# dmmyyyy
				dob_.append(dob[0])
				dob_.append(dob[1:3])
				dob_.append(dob[3:])
		elif len(dob) == 7: # ddmyyyy
			dob_.append(dob[0:2])
			dob_.append(dob[2])
			dob_.append(dob[3:])
		elif len(dob) == 6 and dob[3:4] != '19': # ddmmyy
			dob_.append(dob[:2])
			dob_.append(dob[2:4])
			dob_.append(dob[4:])
		elif len(dob) == 5 and dob[1] == '1' and (dob[2] == '0' or dob[2] == '1' or dob[2] == '2'): # dmmyy
			dob_.append(dob[0])
			dob_.append(dob[1:3])
			dob_.append(dob[3:])
		elif len(dob) == 5: # ddmyy
			dob_.append(dob[:2])
			dob_.append(dob[2])
			dob_.append(dob[3:])
		elif len(dob) == 4: # dmyy
			dob_.append(dob[0])
			dob_.append(dob[1])
			dob_.append(dob[2:])
		else:
			if(len(dob) < 4):
				return None
			# print dob
			# print filter(lambda x: x.isdigit(),dob)
			print 'no / partnerDOB issue'
		# deal with no /'s
	try:
		d = int(filter(lambda x: x.isdigit(),dob_[0]))
		m = int(filter(lambda x: x.isdigit(),dob_[1]))
		y = int(filter(lambda x: x.isdigit(),dob_[2]))
		if y < 100:
			y = y + 1900
		if (m > 12 and d <= 12):
			temp = d
			d = m
			m = temp 
		if(d > 31 or d < 1 or m > 12 or m < 1 or y > 2017 or y < 1900):
			print 'error with DOB '+d+'/'+m+'/'+y
			pdb.set_trace()
	except TypeError:
		return None
	# print  (d,m,y)
	return (d,m,y)

def monthStringToNum(s):
	# print 'inMonthStringToNum'
	m = {'jan':1,'feb':2,'mar':3,
	'apr':4,'may':5,'jun':6,'jul':7,'aug':8,
	'sep':9,'oct':10,'nov':11,'dec':12}
	s_ = s.strip()[:3].lower()
	try:
		out = m[s_]
		return out
	except:
		raise ValueError('Not a month')

def checkMonthDay(d,m):
	if d > 31: # take first two digits of day
		d = int(str(d)[:2])
		if d > 31:
			d = int(str(d)[1])
	if m > 12 and d < 12: # Day and month wrong way round - American
		temp = m
		m = d
		d = temp
	if(m == 2):
		if d <= 29:
			return (True,d,m)
		else:
			return (False,d,m)
	elif m in [4,6,9,11]:
		if d <= 30:
			return (True,d,m)
		else:
			return (False,d,m)
	elif m <= 12 and d <= 31:
		return (True,d,m)
	else:
		return (False,d,m)

def parseDOB(d,m,y):
	d = int(filter(lambda x: x.isdigit(),d))
	y = int(filter(lambda x: x.isdigit(),y))
	try:
		# print m
		m = monthStringToNum(m.strip())
	except ValueError:
		m = int(m.strip())
	if(y < 100):
		y = y + 1900
	(r,d,m) = checkMonthDay(d,m)
	if not r:
		print 'error with day month'
		print (r,d,m)
	return (d,m,y)

def parseTOB(T):
	# pdb.set_trace()
	timeFlag = None
	T_ = T.replace('.','').lower().strip()
	if 'am' in T_:
		timeFlag = 0
		T = T_.replace('am','')
	if 'pm' in T_:
		timeFlag = 1
		T = T_.replace('pm','')
	T = T.strip()
	if T.count('.') == 1:
		T = T.replace('.',':')
	try:
		if ':' in T:
			T_ = T.split(':')
			
			H = int(T_[0])
			M = int(T_[1])
		else:
			if len(T) == 4:
				H = int(T[:2])
				M = int(T[2:])
			elif int(T) <= 24 :
				H = int(T)
				M = 0
			elif int(T) > 100:
				H = int(T)/100
				M = int(T)%100
		if timeFlag is not None:
			if timeFlag == 0:
				H = H%12
			else:
				H = H%12 + 12
	except ValueError:
		H = DEFAULT_TIME_H
		M = DEFAULT_TIME_M
	return (H,M)

def makePayload(dataDict):
	if type(dataDict['COB']) is str:
		cob_0 = float(dataDict['COB'].split(',')[0][1:])
		cob_1 = float(dataDict['COB'].split(',')[1])
		dataDict['COB'] = (cob_0,cob_1)
	if type(dataDict['pCOB']) is str:
		pcob_0 = float(dataDict['pCOB'].split(',')[0][1:])
		pcob_1 = float(dataDict['pCOB'].split(',')[1])
		dataDict['pCOB'] = (pcob_0,pcob_1)
	if type(dataDict['DOB']) is str:
		dataDict['DOB'] = dataDict['DOB'][1:-1].split(',')
	if type(dataDict['pDOB']) is str:
		dataDict['pDOB'] = dataDict['pDOB'][1:-1].split(',')
	if type(dataDict['TOB']) is str:
		dataDict['TOB'] = dataDict['TOB'][1:-1].split(',')
	if type(dataDict['pTOB']) is str:
		dataDict['pTOB'] = dataDict['pTOB'][1:-1].split(',')
	# pdb.set_trace()

	print dataDict['pDOB']

	R = {'send_calculation':'1', #Req
		'muz_narozeni_den':dataDict['DOB'][0],
		'muz_narozeni_mesic':dataDict['DOB'][1],
		'muz_narozeni_rok':dataDict['DOB'][2],
		'muz_narozeni_hodina':dataDict['TOB'][0],
		'muz_narozeni_minuta':dataDict['TOB'][1],
		'muz_narozeni_city':'',
		'muz_narozeni_mesto_hidden':'Manually+place%3A+%C2%B0%27N%2C+%C2%B0%27E',#auto
		'muz_narozeni_stat_hidden':'XX',
		'muz_narozeni_podstat_kratky_hidden':'',
		'muz_narozeni_podstat_hidden':'',
		'muz_narozeni_podstat2_kratky_hidden':'',
		'muz_narozeni_podstat3_kratky_hidden':'',
		'muz_narozeni_input_hidden':'',
		'muz_narozeni_sirka_stupne':str(abs(dataDict['COB'][0])).split('.')[0],
		'muz_narozeni_sirka_minuty':str(float('0.'+str(dataDict['COB'][0]).split('.')[1])*60).split('.')[0],
		'muz_narozeni_sirka_smer': '1' if dataDict['COB'][0]<0 else '0', #address N Dir (0':'N',1':'S)
		'muz_narozeni_delka_stupne':str(abs(dataDict['COB'][1])).split('.')[0], #address E - Main
		'muz_narozeni_delka_minuty':str(float('0.'+str(dataDict['COB'][1]).split('.')[1])*60).split('.')[0],
		'muz_narozeni_delka_smer': '1' if dataDict['COB'][1]<0 else '0', #address E Dir (0':'E',1':'W)
		'muz_narozeni_timezone_form':'auto',
		'muz_narozeni_timezone_dst_form':'auto',
		'send_calculation':'1',
		'zena_narozeni_den':dataDict['pDOB'][0],
		'zena_narozeni_mesic':dataDict['pDOB'][1],
		'zena_narozeni_rok':dataDict['pDOB'][2],
		'zena_narozeni_hodina':dataDict['pTOB'][0],
		'zena_narozeni_minuta':dataDict['pTOB'][1],
		'zena_narozeni_city':'',
		'zena_narozeni_mesto_hidden':'Manually+place%3A+%C2%B0%27N%2C+%C2%B0%27E',
		'zena_narozeni_stat_hidden':'XX',
		'zena_narozeni_podstat_kratky_hidden':'',
		'zena_narozeni_podstat_hidden':'',
		'zena_narozeni_podstat2_kratky_hidden':'',
		'zena_narozeni_podstat3_kratky_hidden':'',
		'zena_narozeni_input_hidden':'',
		'zena_narozeni_sirka_stupne':str(abs(dataDict['pCOB'][0])).split('.')[0],
		'zena_narozeni_sirka_minuty':str(float('0.'+str(dataDict['pCOB'][0]).split('.')[1])*60).split('.')[0],
		'zena_narozeni_sirka_smer': '1' if dataDict['pCOB'][0]<0 else '0',
		'zena_narozeni_delka_stupne':str(abs(dataDict['pCOB'][1])).split('.')[0],
		'zena_narozeni_delka_minuty':str(float('0.'+str(dataDict['pCOB'][1]).split('.')[1])*60).split('.')[0],
		'zena_narozeni_delka_smer': '1' if dataDict['pCOB'][1]<0 else '0',
		'zena_narozeni_timezone_form':'auto',
		'zena_narozeni_timezone_dst_form':'auto',
		'switch_interpretations':'0',
		'house_system':'placidus',
		'uhel_orbis':'#tabs_redraw'}
	return R