DaveM@0
|
1 #!/usr/bin/env python
|
DaveM@0
|
2 import requests
|
DaveM@0
|
3 import re
|
DaveM@0
|
4 import time
|
DaveM@0
|
5 import csv
|
DaveM@0
|
6 import random
|
DaveM@0
|
7 from HTMLParser import HTMLParser
|
DaveM@0
|
8 # from lxml import html
|
DaveM@0
|
9 from bs4 import BeautifulSoup
|
DaveM@0
|
10
|
DaveM@0
|
11 def parsePage(resp):
|
DaveM@0
|
12 soup = BeautifulSoup(resp.content, 'lxml')
|
DaveM@0
|
13 tcCell = soup.find_all('div', attrs={'class':'tc'})
|
DaveM@0
|
14 for cell in tcCell:
|
DaveM@0
|
15 person = dict()
|
DaveM@0
|
16 tableCell = cell.find_all('td');
|
DaveM@0
|
17 if len(tableCell) > 2:
|
DaveM@0
|
18 C = tableCell[0].strong.contents[0].encode('utf-8')
|
DaveM@0
|
19 D = tableCell[2].strong.contents[0].encode('utf-8')
|
DaveM@0
|
20 print (C,D)
|
DaveM@0
|
21 A = re.search("\/>(.*)<br/>.*\(([0-9]*)\\xc2\\xb0([0-9]*)(.*)\)",str(tableCell[1]))
|
DaveM@0
|
22 # A0 = A.group(1)
|
DaveM@0
|
23 # A1 = A.group(2).split('\xc2\xb0')[0]
|
DaveM@0
|
24 # A2 = A.group(2).split('\xc2\xb0')[1].split('\xe2')[0]
|
DaveM@0
|
25 print (A.group(1),A.group(2),A.group(3))
|
DaveM@0
|
26 person[(C,D)] = (A.group(1),A.group(2),A.group(3))
|
DaveM@0
|
27 return person
|
DaveM@0
|
28
|
DaveM@0
|
29
|
DaveM@0
|
30 def setURL(p):
|
DaveM@2
|
31 """
|
DaveM@2
|
32 Code impacting factors into URL
|
DaveM@2
|
33 IMPACTING FACTORS
|
DaveM@2
|
34 Date of Birth
|
DaveM@2
|
35 Birth Time
|
DaveM@2
|
36 Country of birth
|
DaveM@2
|
37 City of birth (And state of birth)
|
DaveM@2
|
38 """
|
DaveM@0
|
39 ## For some reason we need to post men first then women.
|
DaveM@0
|
40 # url = "https://horoscopes.astro-seek.com/calculate-love-compatibility/?send_calculation=1&muz_narozeni_den=1&muz_narozeni_mesic=1&muz_narozeni_rok=1970&muz_narozeni_hodina=00&muz_narozeni_minuta=00&muz_narozeni_city=London%2C+United+Kingdom&muz_narozeni_mesto_hidden=London&muz_narozeni_stat_hidden=GB&muz_narozeni_podstat_kratky_hidden=England&muz_narozeni_podstat_hidden=England&muz_narozeni_podstat2_kratky_hidden=Greater+London&muz_narozeni_podstat3_kratky_hidden=undefined&muz_narozeni_input_hidden=&muz_narozeni_sirka_stupne=51&muz_narozeni_sirka_minuty=30&muz_narozeni_sirka_smer=0&muz_narozeni_delka_stupne=0&muz_narozeni_delka_minuty=8&muz_narozeni_delka_smer=1&muz_narozeni_timezone_form=auto&muz_narozeni_timezone_dst_form=auto&send_calculation=1&zena_narozeni_den=1&zena_narozeni_mesic=1&zena_narozeni_rok=1970&zena_narozeni_hodina=00&zena_narozeni_minuta=00&zena_narozeni_city=Berlin%2C+Germany&zena_narozeni_mesto_hidden=Berlin&zena_narozeni_stat_hidden=DE&zena_narozeni_podstat_kratky_hidden=Berlin&zena_narozeni_podstat_hidden=Berlin&zena_narozeni_podstat2_kratky_hidden=undefined&zena_narozeni_podstat3_kratky_hidden=undefined&zena_narozeni_input_hidden=&zena_narozeni_sirka_stupne=52&zena_narozeni_sirka_minuty=31&zena_narozeni_sirka_smer=0&zena_narozeni_delka_stupne=13&zena_narozeni_delka_minuty=24&zena_narozeni_delka_smer=0&zena_narozeni_timezone_form=auto&zena_narozeni_timezone_dst_form=auto&switch_interpretations=0&house_system=placidus&uhel_orbis=#tabs_redraw"
|
DaveM@0
|
41 # payload = {'muz_narozeni_den':'1','muz_narozeni_mesic':'1','muz_narozeni_rok':'1970'}
|
DaveM@0
|
42 url = 'https://horoscopes.astro-seek.com/calculate-love-compatibility/'
|
DaveM@0
|
43 mDay = random.randint(1,29)
|
DaveM@0
|
44 mMonth = random.randint(1,12)
|
DaveM@0
|
45 mYear = random.randint(1,100)+1917
|
DaveM@0
|
46 fDay = random.randint(1,29)
|
DaveM@0
|
47 fMonth = random.randint(1,12)
|
DaveM@0
|
48 fYear = random.randint(1,100)+1917
|
DaveM@0
|
49
|
DaveM@0
|
50 payload = {'?send_calculation':'1','muz_narozeni_den':mDay,'muz_narozeni_mesic':mMonth,'muz_narozeni_rok':mYear,'zena_narozeni_den':fDay,'zena_narozeni_mesic':fMonth,'zena_narozeni_rok':fYear}
|
DaveM@0
|
51 return (url,payload)
|
DaveM@0
|
52
|
DaveM@0
|
53 def requestURL(url,payload):
|
DaveM@0
|
54 r = requests.get(url, params=payload)
|
DaveM@0
|
55 time.sleep(5)
|
DaveM@0
|
56 return r
|
DaveM@0
|
57
|
DaveM@0
|
58 def parseCSV(filename):
|
DaveM@0
|
59 stream = csv.DictReader(open(filename,'rb'))
|
DaveM@0
|
60 dictList = []
|
DaveM@0
|
61 for line in stream:
|
DaveM@0
|
62 dictList.append(line)
|
DaveM@0
|
63 return dictList
|
DaveM@0
|
64
|
DaveM@0
|
65 def printToFile(filename,data):
|
DaveM@0
|
66 keys = data[0].keys()
|
DaveM@0
|
67 with open(filename,'w') as stream:
|
DaveM@0
|
68 dict_writer = csv.DictWriter(stream, keys)
|
DaveM@0
|
69 dict_writer.writeheader()
|
DaveM@0
|
70 dict_writer.writerows(data)
|
DaveM@0
|
71
|
DaveM@0
|
72 def main():
|
DaveM@0
|
73 # people = dict()
|
DaveM@0
|
74 people = parseCSV('individuals.csv')
|
DaveM@0
|
75 horiscopeList = []
|
DaveM@0
|
76 # people = [1,2,3,4,5]
|
DaveM@0
|
77 for person in people:
|
DaveM@0
|
78 print 'parsing person '+ person['ID']
|
DaveM@0
|
79 url,payload = setURL('')
|
DaveM@0
|
80 resp = requestURL(url,payload)
|
DaveM@0
|
81
|
DaveM@0
|
82 person['horiscope'] = parsePage(resp)
|
DaveM@0
|
83 horiscopeList.append(person)
|
DaveM@0
|
84 print horiscopeList
|
DaveM@0
|
85 printToFile('outputdata.csv',horiscopeList)
|
DaveM@0
|
86
|
DaveM@0
|
87 if __name__ == "__main__":
|
DaveM@0
|
88 main()
|