Mercurial > hg > horiscopes
comparison V2/runme.py @ 2:e541264bcb9b
refactor dir
author | DaveM |
---|---|
date | Fri, 08 Dec 2017 16:18:39 +0000 |
parents | |
children | c2898c2a3cc6 |
comparison
equal
deleted
inserted
replaced
1:27a294084c27 | 2:e541264bcb9b |
---|---|
1 #!/usr/bin/env python | |
2 import requests | |
3 import re | |
4 import time | |
5 import csv | |
6 import random | |
7 from HTMLParser import HTMLParser | |
8 # from lxml import html | |
9 from bs4 import BeautifulSoup | |
10 | |
11 def parsePage(resp): | |
12 soup = BeautifulSoup(resp.content, 'lxml') | |
13 tcCell = soup.find_all('div', attrs={'class':'tc'}) | |
14 for cell in tcCell: | |
15 person = dict() | |
16 tableCell = cell.find_all('td'); | |
17 if len(tableCell) > 2: | |
18 C = tableCell[0].strong.contents[0].encode('utf-8') | |
19 D = tableCell[2].strong.contents[0].encode('utf-8') | |
20 print (C,D) | |
21 A = re.search("\/>(.*)<br/>.*\(([0-9]*)\\xc2\\xb0([0-9]*)(.*)\)",str(tableCell[1])) | |
22 # A0 = A.group(1) | |
23 # A1 = A.group(2).split('\xc2\xb0')[0] | |
24 # A2 = A.group(2).split('\xc2\xb0')[1].split('\xe2')[0] | |
25 print (A.group(1),A.group(2),A.group(3)) | |
26 person[(C,D)] = (A.group(1),A.group(2),A.group(3)) | |
27 return person | |
28 | |
29 | |
30 def setURL(p): | |
31 """ | |
32 Code impacting factors into URL | |
33 IMPACTING FACTORS | |
34 Date of Birth | |
35 Birth Time | |
36 Country of birth | |
37 City of birth (And state of birth) | |
38 """ | |
39 ## For some reason we need to post men first then women. | |
40 # url = "https://horoscopes.astro-seek.com/calculate-love-compatibility/?send_calculation=1&muz_narozeni_den=1&muz_narozeni_mesic=1&muz_narozeni_rok=1970&muz_narozeni_hodina=00&muz_narozeni_minuta=00&muz_narozeni_city=London%2C+United+Kingdom&muz_narozeni_mesto_hidden=London&muz_narozeni_stat_hidden=GB&muz_narozeni_podstat_kratky_hidden=England&muz_narozeni_podstat_hidden=England&muz_narozeni_podstat2_kratky_hidden=Greater+London&muz_narozeni_podstat3_kratky_hidden=undefined&muz_narozeni_input_hidden=&muz_narozeni_sirka_stupne=51&muz_narozeni_sirka_minuty=30&muz_narozeni_sirka_smer=0&muz_narozeni_delka_stupne=0&muz_narozeni_delka_minuty=8&muz_narozeni_delka_smer=1&muz_narozeni_timezone_form=auto&muz_narozeni_timezone_dst_form=auto&send_calculation=1&zena_narozeni_den=1&zena_narozeni_mesic=1&zena_narozeni_rok=1970&zena_narozeni_hodina=00&zena_narozeni_minuta=00&zena_narozeni_city=Berlin%2C+Germany&zena_narozeni_mesto_hidden=Berlin&zena_narozeni_stat_hidden=DE&zena_narozeni_podstat_kratky_hidden=Berlin&zena_narozeni_podstat_hidden=Berlin&zena_narozeni_podstat2_kratky_hidden=undefined&zena_narozeni_podstat3_kratky_hidden=undefined&zena_narozeni_input_hidden=&zena_narozeni_sirka_stupne=52&zena_narozeni_sirka_minuty=31&zena_narozeni_sirka_smer=0&zena_narozeni_delka_stupne=13&zena_narozeni_delka_minuty=24&zena_narozeni_delka_smer=0&zena_narozeni_timezone_form=auto&zena_narozeni_timezone_dst_form=auto&switch_interpretations=0&house_system=placidus&uhel_orbis=#tabs_redraw" | |
41 # payload = {'muz_narozeni_den':'1','muz_narozeni_mesic':'1','muz_narozeni_rok':'1970'} | |
42 url = 'https://horoscopes.astro-seek.com/calculate-love-compatibility/' | |
43 mDay = random.randint(1,29) | |
44 mMonth = random.randint(1,12) | |
45 mYear = random.randint(1,100)+1917 | |
46 fDay = random.randint(1,29) | |
47 fMonth = random.randint(1,12) | |
48 fYear = random.randint(1,100)+1917 | |
49 | |
50 payload = {'?send_calculation':'1','muz_narozeni_den':mDay,'muz_narozeni_mesic':mMonth,'muz_narozeni_rok':mYear,'zena_narozeni_den':fDay,'zena_narozeni_mesic':fMonth,'zena_narozeni_rok':fYear} | |
51 return (url,payload) | |
52 | |
53 def requestURL(url,payload): | |
54 r = requests.get(url, params=payload) | |
55 time.sleep(5) | |
56 return r | |
57 | |
58 def parseCSV(filename): | |
59 stream = csv.DictReader(open(filename,'rb')) | |
60 dictList = [] | |
61 for line in stream: | |
62 dictList.append(line) | |
63 return dictList | |
64 | |
65 def printToFile(filename,data): | |
66 keys = data[0].keys() | |
67 with open(filename,'w') as stream: | |
68 dict_writer = csv.DictWriter(stream, keys) | |
69 dict_writer.writeheader() | |
70 dict_writer.writerows(data) | |
71 | |
72 def main(): | |
73 # people = dict() | |
74 people = parseCSV('individuals.csv') | |
75 horiscopeList = [] | |
76 # people = [1,2,3,4,5] | |
77 for person in people: | |
78 print 'parsing person '+ person['ID'] | |
79 url,payload = setURL('') | |
80 resp = requestURL(url,payload) | |
81 | |
82 person['horiscope'] = parsePage(resp) | |
83 horiscopeList.append(person) | |
84 print horiscopeList | |
85 printToFile('outputdata.csv',horiscopeList) | |
86 | |
87 if __name__ == "__main__": | |
88 main() |