annotate V1/test1.py @ 10:85c9aa9d90c5

implement refactor and present results, to output to csv
author DaveM
date Mon, 22 Jan 2018 22:31:20 +0000
parents e541264bcb9b
children
rev   line source
DaveM@0 1 #!/usr/bin/env python
DaveM@0 2
DaveM@0 3 import requests
DaveM@0 4 import re
DaveM@0 5 from HTMLParser import HTMLParser
DaveM@0 6 # from lxml import html
DaveM@0 7 from bs4 import BeautifulSoup
DaveM@0 8
DaveM@0 9
DaveM@0 10 url = "https://horoscopes.astro-seek.com/calculate-love-compatibility/?send_calculation=1&muz_narozeni_den=1&muz_narozeni_mesic=1&muz_narozeni_rok=1970&muz_narozeni_hodina=00&muz_narozeni_minuta=00&muz_narozeni_city=London%2C+United+Kingdom&muz_narozeni_mesto_hidden=London&muz_narozeni_stat_hidden=GB&muz_narozeni_podstat_kratky_hidden=England&muz_narozeni_podstat_hidden=England&muz_narozeni_podstat2_kratky_hidden=Greater+London&muz_narozeni_podstat3_kratky_hidden=undefined&muz_narozeni_input_hidden=&muz_narozeni_sirka_stupne=51&muz_narozeni_sirka_minuty=30&muz_narozeni_sirka_smer=0&muz_narozeni_delka_stupne=0&muz_narozeni_delka_minuty=8&muz_narozeni_delka_smer=1&muz_narozeni_timezone_form=auto&muz_narozeni_timezone_dst_form=auto&send_calculation=1&zena_narozeni_den=1&zena_narozeni_mesic=1&zena_narozeni_rok=1970&zena_narozeni_hodina=00&zena_narozeni_minuta=00&zena_narozeni_city=Berlin%2C+Germany&zena_narozeni_mesto_hidden=Berlin&zena_narozeni_stat_hidden=DE&zena_narozeni_podstat_kratky_hidden=Berlin&zena_narozeni_podstat_hidden=Berlin&zena_narozeni_podstat2_kratky_hidden=undefined&zena_narozeni_podstat3_kratky_hidden=undefined&zena_narozeni_input_hidden=&zena_narozeni_sirka_stupne=52&zena_narozeni_sirka_minuty=31&zena_narozeni_sirka_smer=0&zena_narozeni_delka_stupne=13&zena_narozeni_delka_minuty=24&zena_narozeni_delka_smer=0&zena_narozeni_timezone_form=auto&zena_narozeni_timezone_dst_form=auto&switch_interpretations=0&house_system=placidus&uhel_orbis=#tabs_redraw"
DaveM@0 11 url = 'https://horoscopes.astro-seek.com/calculate-love-compatibility/'
DaveM@0 12 payload = {'?send_calculation':'1','muz_narozeni_den':'25','muz_narozeni_mesic':'6','muz_narozeni_rok':'1988','muz_narozeni_hodina':'00','muz_narozeni_minuta':'00','muz_narozeni_city':'London%2C+United+Kingdom','zena_narozeni_den':'14','zena_narozeni_mesic':'3','zena_narozeni_rok':'1995','zena_narozeni_hodina':'00','zena_narozeni_minuta':'00'}
DaveM@0 13 # {'muz_narozeni_den':'1','muz_narozeni_mesic':'1','muz_narozeni_rok':'1970'}
DaveM@0 14
DaveM@0 15 # GET
DaveM@0 16 r = requests.get(url)
DaveM@0 17 # # GET with params in URL
DaveM@0 18 r = requests.get(url, params=payload)
DaveM@0 19
DaveM@0 20 # Response, status etc
DaveM@0 21 # print r.text.encode('utf-8')
DaveM@0 22 # tree = html.fromstring(r.content)
DaveM@0 23 # tcTable = tree.xpath('//div[@class="detail-rozbor-clanek"]/strong/text()')
DaveM@0 24 # print tcTable
DaveM@0 25
DaveM@0 26 soup = BeautifulSoup(r.content, 'lxml')
DaveM@0 27 tcCell = soup.find_all('div', attrs={'class':'tc'})
DaveM@0 28 # print tcCell
DaveM@0 29
DaveM@0 30
DaveM@0 31 for cell in tcCell:
DaveM@0 32 person = dict()
DaveM@0 33 tableCell = cell.find_all('td');
DaveM@0 34 if len(tableCell) > 2:
DaveM@0 35 C = tableCell[0].strong.contents[0].encode('utf-8')
DaveM@0 36 D = tableCell[2].strong.contents[0].encode('utf-8')
DaveM@0 37 print (C,D)
DaveM@0 38 A = re.search("\/>(.*)<br/>.*\(([0-9]*)\\xc2\\xb0([0-9]*)(.*)\)",str(tableCell[1]))
DaveM@0 39 # A0 = A.group(1)
DaveM@0 40 # A1 = A.group(2).split('\xc2\xb0')[0]
DaveM@0 41 # A2 = A.group(2).split('\xc2\xb0')[1].split('\xe2')[0]
DaveM@0 42 print (A.group(1),A.group(2),A.group(3))
DaveM@0 43 person[(C,D)] = (A.group(1),A.group(2),A.group(3))
DaveM@0 44 return person
DaveM@0 45 # print re.search(">, (.*), <br/>.*\(([0-9]*)\\xb([0-9]*)\\u",valueCell.contents.encode('utf-8'))
DaveM@0 46 # except IndexError:
DaveM@0 47 # print 'te'
DaveM@0 48 # print "\n\n\n"
DaveM@0 49
DaveM@0 50
DaveM@0 51
DaveM@0 52
DaveM@0 53
DaveM@0 54
DaveM@0 55 # # # # #
DaveM@0 56
DaveM@0 57 payload = {'?send_calculation':'1',
DaveM@0 58 'muz_narozeni_den':'25',
DaveM@0 59 'muz_narozeni_mesic':'6',
DaveM@0 60 'muz_narozeni_rok':'1988',
DaveM@0 61 'muz_narozeni_hodina':'00',
DaveM@0 62 'muz_narozeni_minuta':'00',
DaveM@0 63 'muz_narozeni_city':'London%2C+United+Kingdom',
DaveM@0 64 'zena_narozeni_den':'14',
DaveM@0 65 'zena_narozeni_mesic':'3',
DaveM@0 66 'zena_narozeni_rok':'1995',
DaveM@0 67 'zena_narozeni_hodina':'00',
DaveM@0 68 'zena_narozeni_minuta':'00'}