view V2/runme.py @ 30:15c43f44a806

update timesheet
author DaveM
date Sun, 13 May 2018 17:40:12 +0100
parents 3d5ca8e78f8f
children
line wrap: on
line source
#!/usr/bin/env python
import dParse as dp
import requests
import re
import time
import csv
import random
import pdb
import os
import pickle
from HTMLParser import HTMLParser
# from lxml import html
from bs4 import BeautifulSoup

def parsePage(resp):
	# pdb.set_trace()
	person = dict()
	soup = BeautifulSoup(resp.content, 'lxml')
	tcCell = soup.find_all('div', attrs={'class':'tc'})
	for cell in tcCell:
		tableCell = cell.find_all('td');
		if len(tableCell) > 2:
			C = tableCell[0].strong.contents[0].encode('utf-8')
			D = tableCell[2].strong.contents[0].encode('utf-8')
			# print (C,D)
			A = re.search("\/>(.*)<br/>.*\(([0-9]*)\\xc2\\xb0([0-9]*)(.*)\)",str(tableCell[1]))
			# A0 = A.group(1)
			# A1 = A.group(2).split('\xc2\xb0')[0]
			# A2 = A.group(2).split('\xc2\xb0')[1].split('\xe2')[0]
			# print (A.group(1),A.group(2),A.group(3))
			person[(C,D)] = (A.group(1),A.group(2),A.group(3))
	return person


def setURL(p):
	""" 
	Code impacting factors into URL
	IMPACTING FACTORS
	Date of Birth
	Birth Time
	Country of birth
	City of birth (And state of birth)
	"""
	## For some reason we need to post men first then women.
	# url = "https://horoscopes.astro-seek.com/calculate-love-compatibility/?send_calculation=1&muz_narozeni_den=1&muz_narozeni_mesic=1&muz_narozeni_rok=1970&muz_narozeni_hodina=00&muz_narozeni_minuta=00&muz_narozeni_city=London%2C+United+Kingdom&muz_narozeni_mesto_hidden=London&muz_narozeni_stat_hidden=GB&muz_narozeni_podstat_kratky_hidden=England&muz_narozeni_podstat_hidden=England&muz_narozeni_podstat2_kratky_hidden=Greater+London&muz_narozeni_podstat3_kratky_hidden=undefined&muz_narozeni_input_hidden=&muz_narozeni_sirka_stupne=51&muz_narozeni_sirka_minuty=30&muz_narozeni_sirka_smer=0&muz_narozeni_delka_stupne=0&muz_narozeni_delka_minuty=8&muz_narozeni_delka_smer=1&muz_narozeni_timezone_form=auto&muz_narozeni_timezone_dst_form=auto&send_calculation=1&zena_narozeni_den=1&zena_narozeni_mesic=1&zena_narozeni_rok=1970&zena_narozeni_hodina=00&zena_narozeni_minuta=00&zena_narozeni_city=Berlin%2C+Germany&zena_narozeni_mesto_hidden=Berlin&zena_narozeni_stat_hidden=DE&zena_narozeni_podstat_kratky_hidden=Berlin&zena_narozeni_podstat_hidden=Berlin&zena_narozeni_podstat2_kratky_hidden=undefined&zena_narozeni_podstat3_kratky_hidden=undefined&zena_narozeni_input_hidden=&zena_narozeni_sirka_stupne=52&zena_narozeni_sirka_minuty=31&zena_narozeni_sirka_smer=0&zena_narozeni_delka_stupne=13&zena_narozeni_delka_minuty=24&zena_narozeni_delka_smer=0&zena_narozeni_timezone_form=auto&zena_narozeni_timezone_dst_form=auto&switch_interpretations=0&house_system=placidus&uhel_orbis=#tabs_redraw"
	# payload = {'muz_narozeni_den':'1','muz_narozeni_mesic':'1','muz_narozeni_rok':'1970'}
	url = 'https://horoscopes.astro-seek.com/calculate-love-compatibility/'
	# payload = {'send_calculation':'1','muz_narozeni_den':mDay,'muz_narozeni_mesic':mMonth,'muz_narozeni_rok':mYear,'zena_narozeni_den':fDay,'zena_narozeni_mesic':fMonth,'zena_narozeni_rok':fYear}
	payload = dp.makePayload(p)
	return (url,payload)

def requestURL(url,payload):
	r = requests.get(url, params=payload)
	time.sleep(5)
	return r

def makeURLPayload(url,payload):
	url += '?'
	for p in payload:
		url += '&' + str(p)
		url += '=' + str(payload[p])
	return url

def printToFile(filename,data,removeAdds):
	if removeAdds == True:
		del data['DOB']
		del data['TOB']
		del data['pDOB']
		del data['pTOB']
		del data['COB']
		del data['pCOB']
		del data['horiscope']
	keys = data[0].keys()
	with open(filename,'w') as stream:
		dict_writer = csv.DictWriter(stream, keys)
		dict_writer.writeheader()
		dict_writer.writerows(data)

def loadPick(filename):
	with open(filename, 'rb') as handle:
		b = pickle.load(handle)
	return b

def savePick(filename,data):
	with open(filename, 'wb') as handle:
		pickle.dump(data,handle)

def tempPF(fName,data):
	f__ = open(fName,'w')
	f__.write(data)
	f__.close()

def testMain():
	people = dp.parseCSV('individuals.csv')

def _main():
	pickFile = 'outData.pick'
	# people = dict()
	if not os.path.exists(pickFile):
		print 'reParse file'
		people = dp.parseCSV('individuals.csv')
		savePick(pickFile,people)
	else:
		print 'read in ' + pickFile
		people = loadPick(pickFile)

	horiscopeList = []
	for person in people:
		if person['pDOB'] is None or person['pDOB'] == '':
			print 'SKIPPING person '+ person['ID'] + ' pDOB is None'
		else:
			print 'parsing person '+ person['ID']
			url,payload = setURL(person)
			resp = requestURL(url,payload)
			person['horiscope'] = parsePage(resp)
			if not person['horiscope']: # debug if dict is empty
				print 'attempt failed,  try again'
				url,payload = setURL(person)
				resp = requestURL(url,payload)
				person['horiscope'] = parsePage(resp)
				if not person['horiscope']:
					print 'attempt two failed'
					# pdb.set_trace()
			for d in person['horiscope'].keys():
				person[d] = person['horiscope'][d]
			horiscopeList.append(person)
			savePick(pickFile,person)
	print horiscopeList
	savePick(pickFile,person)
	savePick('2'+pickFile,horiscopeList)
	printToFile('final_'+outFile,horiscopeList)

if __name__ == "__main__":
	_main()