diff runme.py @ 0:479b128cc52c

make python files original
author DaveM
date Fri, 08 Dec 2017 10:20:44 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/runme.py	Fri Dec 08 10:20:44 2017 +0000
@@ -0,0 +1,80 @@
+#!/usr/bin/env python
+import requests
+import re
+import time
+import csv
+import random
+from HTMLParser import HTMLParser
+# from lxml import html
+from bs4 import BeautifulSoup
+
+def parsePage(resp):
+	soup = BeautifulSoup(resp.content, 'lxml')
+	tcCell = soup.find_all('div', attrs={'class':'tc'})
+	for cell in tcCell:
+		person = dict()
+		tableCell = cell.find_all('td');
+		if len(tableCell) > 2:
+			C = tableCell[0].strong.contents[0].encode('utf-8')
+			D = tableCell[2].strong.contents[0].encode('utf-8')
+			print (C,D)
+			A = re.search("\/>(.*)<br/>.*\(([0-9]*)\\xc2\\xb0([0-9]*)(.*)\)",str(tableCell[1]))
+			# A0 = A.group(1)
+			# A1 = A.group(2).split('\xc2\xb0')[0]
+			# A2 = A.group(2).split('\xc2\xb0')[1].split('\xe2')[0]
+			print (A.group(1),A.group(2),A.group(3))
+			person[(C,D)] = (A.group(1),A.group(2),A.group(3))
+		return person
+
+
+def setURL(p):
+	## For some reason we need to post men first then women.
+	# url = "https://horoscopes.astro-seek.com/calculate-love-compatibility/?send_calculation=1&muz_narozeni_den=1&muz_narozeni_mesic=1&muz_narozeni_rok=1970&muz_narozeni_hodina=00&muz_narozeni_minuta=00&muz_narozeni_city=London%2C+United+Kingdom&muz_narozeni_mesto_hidden=London&muz_narozeni_stat_hidden=GB&muz_narozeni_podstat_kratky_hidden=England&muz_narozeni_podstat_hidden=England&muz_narozeni_podstat2_kratky_hidden=Greater+London&muz_narozeni_podstat3_kratky_hidden=undefined&muz_narozeni_input_hidden=&muz_narozeni_sirka_stupne=51&muz_narozeni_sirka_minuty=30&muz_narozeni_sirka_smer=0&muz_narozeni_delka_stupne=0&muz_narozeni_delka_minuty=8&muz_narozeni_delka_smer=1&muz_narozeni_timezone_form=auto&muz_narozeni_timezone_dst_form=auto&send_calculation=1&zena_narozeni_den=1&zena_narozeni_mesic=1&zena_narozeni_rok=1970&zena_narozeni_hodina=00&zena_narozeni_minuta=00&zena_narozeni_city=Berlin%2C+Germany&zena_narozeni_mesto_hidden=Berlin&zena_narozeni_stat_hidden=DE&zena_narozeni_podstat_kratky_hidden=Berlin&zena_narozeni_podstat_hidden=Berlin&zena_narozeni_podstat2_kratky_hidden=undefined&zena_narozeni_podstat3_kratky_hidden=undefined&zena_narozeni_input_hidden=&zena_narozeni_sirka_stupne=52&zena_narozeni_sirka_minuty=31&zena_narozeni_sirka_smer=0&zena_narozeni_delka_stupne=13&zena_narozeni_delka_minuty=24&zena_narozeni_delka_smer=0&zena_narozeni_timezone_form=auto&zena_narozeni_timezone_dst_form=auto&switch_interpretations=0&house_system=placidus&uhel_orbis=#tabs_redraw"
+	# payload = {'muz_narozeni_den':'1','muz_narozeni_mesic':'1','muz_narozeni_rok':'1970'}
+	url = 'https://horoscopes.astro-seek.com/calculate-love-compatibility/'
+	mDay = random.randint(1,29)
+	mMonth = random.randint(1,12)
+	mYear = random.randint(1,100)+1917
+	fDay = random.randint(1,29)
+	fMonth = random.randint(1,12)
+	fYear = random.randint(1,100)+1917
+
+	payload = {'?send_calculation':'1','muz_narozeni_den':mDay,'muz_narozeni_mesic':mMonth,'muz_narozeni_rok':mYear,'zena_narozeni_den':fDay,'zena_narozeni_mesic':fMonth,'zena_narozeni_rok':fYear}
+	return (url,payload)
+
+def requestURL(url,payload):
+	r = requests.get(url, params=payload)
+	time.sleep(5)
+	return r
+
+def parseCSV(filename):
+	stream = csv.DictReader(open(filename,'rb'))
+	dictList = []
+	for line in stream:
+		dictList.append(line)
+	return dictList
+
+def printToFile(filename,data):
+	keys = data[0].keys()
+	with open(filename,'w') as stream:
+		dict_writer = csv.DictWriter(stream, keys)
+		dict_writer.writeheader()
+		dict_writer.writerows(data)
+
+def main():
+	# people = dict()
+	people = parseCSV('individuals.csv')
+	horiscopeList = []
+	# people = [1,2,3,4,5]
+	for person in people:
+		print 'parsing person '+ person['ID']
+		url,payload = setURL('')
+		resp = requestURL(url,payload)
+		
+		person['horiscope'] = parsePage(resp)
+		horiscopeList.append(person)
+	print horiscopeList
+	printToFile('outputdata.csv',horiscopeList)
+
+if __name__ == "__main__":
+	main()