Mercurial > hg > horiscopes
diff runme.py @ 0:479b128cc52c
make python files original
author | DaveM |
---|---|
date | Fri, 08 Dec 2017 10:20:44 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/runme.py Fri Dec 08 10:20:44 2017 +0000 @@ -0,0 +1,80 @@ +#!/usr/bin/env python +import requests +import re +import time +import csv +import random +from HTMLParser import HTMLParser +# from lxml import html +from bs4 import BeautifulSoup + +def parsePage(resp): + soup = BeautifulSoup(resp.content, 'lxml') + tcCell = soup.find_all('div', attrs={'class':'tc'}) + for cell in tcCell: + person = dict() + tableCell = cell.find_all('td'); + if len(tableCell) > 2: + C = tableCell[0].strong.contents[0].encode('utf-8') + D = tableCell[2].strong.contents[0].encode('utf-8') + print (C,D) + A = re.search("\/>(.*)<br/>.*\(([0-9]*)\\xc2\\xb0([0-9]*)(.*)\)",str(tableCell[1])) + # A0 = A.group(1) + # A1 = A.group(2).split('\xc2\xb0')[0] + # A2 = A.group(2).split('\xc2\xb0')[1].split('\xe2')[0] + print (A.group(1),A.group(2),A.group(3)) + person[(C,D)] = (A.group(1),A.group(2),A.group(3)) + return person + + +def setURL(p): + ## For some reason we need to post men first then women. + # url = "https://horoscopes.astro-seek.com/calculate-love-compatibility/?send_calculation=1&muz_narozeni_den=1&muz_narozeni_mesic=1&muz_narozeni_rok=1970&muz_narozeni_hodina=00&muz_narozeni_minuta=00&muz_narozeni_city=London%2C+United+Kingdom&muz_narozeni_mesto_hidden=London&muz_narozeni_stat_hidden=GB&muz_narozeni_podstat_kratky_hidden=England&muz_narozeni_podstat_hidden=England&muz_narozeni_podstat2_kratky_hidden=Greater+London&muz_narozeni_podstat3_kratky_hidden=undefined&muz_narozeni_input_hidden=&muz_narozeni_sirka_stupne=51&muz_narozeni_sirka_minuty=30&muz_narozeni_sirka_smer=0&muz_narozeni_delka_stupne=0&muz_narozeni_delka_minuty=8&muz_narozeni_delka_smer=1&muz_narozeni_timezone_form=auto&muz_narozeni_timezone_dst_form=auto&send_calculation=1&zena_narozeni_den=1&zena_narozeni_mesic=1&zena_narozeni_rok=1970&zena_narozeni_hodina=00&zena_narozeni_minuta=00&zena_narozeni_city=Berlin%2C+Germany&zena_narozeni_mesto_hidden=Berlin&zena_narozeni_stat_hidden=DE&zena_narozeni_podstat_kratky_hidden=Berlin&zena_narozeni_podstat_hidden=Berlin&zena_narozeni_podstat2_kratky_hidden=undefined&zena_narozeni_podstat3_kratky_hidden=undefined&zena_narozeni_input_hidden=&zena_narozeni_sirka_stupne=52&zena_narozeni_sirka_minuty=31&zena_narozeni_sirka_smer=0&zena_narozeni_delka_stupne=13&zena_narozeni_delka_minuty=24&zena_narozeni_delka_smer=0&zena_narozeni_timezone_form=auto&zena_narozeni_timezone_dst_form=auto&switch_interpretations=0&house_system=placidus&uhel_orbis=#tabs_redraw" + # payload = {'muz_narozeni_den':'1','muz_narozeni_mesic':'1','muz_narozeni_rok':'1970'} + url = 'https://horoscopes.astro-seek.com/calculate-love-compatibility/' + mDay = random.randint(1,29) + mMonth = random.randint(1,12) + mYear = random.randint(1,100)+1917 + fDay = random.randint(1,29) + fMonth = random.randint(1,12) + fYear = random.randint(1,100)+1917 + + payload = {'?send_calculation':'1','muz_narozeni_den':mDay,'muz_narozeni_mesic':mMonth,'muz_narozeni_rok':mYear,'zena_narozeni_den':fDay,'zena_narozeni_mesic':fMonth,'zena_narozeni_rok':fYear} + return (url,payload) + +def requestURL(url,payload): + r = requests.get(url, params=payload) + time.sleep(5) + return r + +def parseCSV(filename): + stream = csv.DictReader(open(filename,'rb')) + dictList = [] + for line in stream: + dictList.append(line) + return dictList + +def printToFile(filename,data): + keys = data[0].keys() + with open(filename,'w') as stream: + dict_writer = csv.DictWriter(stream, keys) + dict_writer.writeheader() + dict_writer.writerows(data) + +def main(): + # people = dict() + people = parseCSV('individuals.csv') + horiscopeList = [] + # people = [1,2,3,4,5] + for person in people: + print 'parsing person '+ person['ID'] + url,payload = setURL('') + resp = requestURL(url,payload) + + person['horiscope'] = parsePage(resp) + horiscopeList.append(person) + print horiscopeList + printToFile('outputdata.csv',horiscopeList) + +if __name__ == "__main__": + main()