annotate V2/runme.py @ 4:99115e36316b

developing and testing DOB and TOB gathering, and implementing geo-location to latitude coordinates
author DaveM
date Mon, 11 Dec 2017 11:29:38 +0000
parents c2898c2a3cc6
children 3d5ca8e78f8f
rev   line source
DaveM@2 1 #!/usr/bin/env python
DaveM@3 2 import dParse as dp
DaveM@2 3 import requests
DaveM@2 4 import re
DaveM@2 5 import time
DaveM@2 6 import csv
DaveM@2 7 import random
DaveM@2 8 from HTMLParser import HTMLParser
DaveM@2 9 # from lxml import html
DaveM@2 10 from bs4 import BeautifulSoup
DaveM@2 11
DaveM@2 12 def parsePage(resp):
DaveM@2 13 soup = BeautifulSoup(resp.content, 'lxml')
DaveM@2 14 tcCell = soup.find_all('div', attrs={'class':'tc'})
DaveM@2 15 for cell in tcCell:
DaveM@2 16 person = dict()
DaveM@2 17 tableCell = cell.find_all('td');
DaveM@2 18 if len(tableCell) > 2:
DaveM@2 19 C = tableCell[0].strong.contents[0].encode('utf-8')
DaveM@2 20 D = tableCell[2].strong.contents[0].encode('utf-8')
DaveM@2 21 print (C,D)
DaveM@2 22 A = re.search("\/>(.*)<br/>.*\(([0-9]*)\\xc2\\xb0([0-9]*)(.*)\)",str(tableCell[1]))
DaveM@2 23 # A0 = A.group(1)
DaveM@2 24 # A1 = A.group(2).split('\xc2\xb0')[0]
DaveM@2 25 # A2 = A.group(2).split('\xc2\xb0')[1].split('\xe2')[0]
DaveM@2 26 print (A.group(1),A.group(2),A.group(3))
DaveM@2 27 person[(C,D)] = (A.group(1),A.group(2),A.group(3))
DaveM@2 28 return person
DaveM@2 29
DaveM@2 30
DaveM@2 31 def setURL(p):
DaveM@2 32 """
DaveM@2 33 Code impacting factors into URL
DaveM@2 34 IMPACTING FACTORS
DaveM@2 35 Date of Birth
DaveM@2 36 Birth Time
DaveM@2 37 Country of birth
DaveM@2 38 City of birth (And state of birth)
DaveM@2 39 """
DaveM@2 40 ## For some reason we need to post men first then women.
DaveM@2 41 # url = "https://horoscopes.astro-seek.com/calculate-love-compatibility/?send_calculation=1&muz_narozeni_den=1&muz_narozeni_mesic=1&muz_narozeni_rok=1970&muz_narozeni_hodina=00&muz_narozeni_minuta=00&muz_narozeni_city=London%2C+United+Kingdom&muz_narozeni_mesto_hidden=London&muz_narozeni_stat_hidden=GB&muz_narozeni_podstat_kratky_hidden=England&muz_narozeni_podstat_hidden=England&muz_narozeni_podstat2_kratky_hidden=Greater+London&muz_narozeni_podstat3_kratky_hidden=undefined&muz_narozeni_input_hidden=&muz_narozeni_sirka_stupne=51&muz_narozeni_sirka_minuty=30&muz_narozeni_sirka_smer=0&muz_narozeni_delka_stupne=0&muz_narozeni_delka_minuty=8&muz_narozeni_delka_smer=1&muz_narozeni_timezone_form=auto&muz_narozeni_timezone_dst_form=auto&send_calculation=1&zena_narozeni_den=1&zena_narozeni_mesic=1&zena_narozeni_rok=1970&zena_narozeni_hodina=00&zena_narozeni_minuta=00&zena_narozeni_city=Berlin%2C+Germany&zena_narozeni_mesto_hidden=Berlin&zena_narozeni_stat_hidden=DE&zena_narozeni_podstat_kratky_hidden=Berlin&zena_narozeni_podstat_hidden=Berlin&zena_narozeni_podstat2_kratky_hidden=undefined&zena_narozeni_podstat3_kratky_hidden=undefined&zena_narozeni_input_hidden=&zena_narozeni_sirka_stupne=52&zena_narozeni_sirka_minuty=31&zena_narozeni_sirka_smer=0&zena_narozeni_delka_stupne=13&zena_narozeni_delka_minuty=24&zena_narozeni_delka_smer=0&zena_narozeni_timezone_form=auto&zena_narozeni_timezone_dst_form=auto&switch_interpretations=0&house_system=placidus&uhel_orbis=#tabs_redraw"
DaveM@2 42 # payload = {'muz_narozeni_den':'1','muz_narozeni_mesic':'1','muz_narozeni_rok':'1970'}
DaveM@2 43 url = 'https://horoscopes.astro-seek.com/calculate-love-compatibility/'
DaveM@2 44 mDay = random.randint(1,29)
DaveM@2 45 mMonth = random.randint(1,12)
DaveM@2 46 mYear = random.randint(1,100)+1917
DaveM@2 47 fDay = random.randint(1,29)
DaveM@2 48 fMonth = random.randint(1,12)
DaveM@2 49 fYear = random.randint(1,100)+1917
DaveM@2 50
DaveM@2 51 payload = {'?send_calculation':'1','muz_narozeni_den':mDay,'muz_narozeni_mesic':mMonth,'muz_narozeni_rok':mYear,'zena_narozeni_den':fDay,'zena_narozeni_mesic':fMonth,'zena_narozeni_rok':fYear}
DaveM@2 52 return (url,payload)
DaveM@2 53
DaveM@2 54 def requestURL(url,payload):
DaveM@2 55 r = requests.get(url, params=payload)
DaveM@2 56 time.sleep(5)
DaveM@2 57 return r
DaveM@2 58
DaveM@4 59 # def parseCSV(filename):
DaveM@4 60 # stream = csv.DictReader(open(filename,'rb'))
DaveM@4 61 # dictList = []
DaveM@4 62 # for line in stream:
DaveM@4 63 # dictList.append(regulateData(line))
DaveM@3 64
DaveM@4 65 # # dictList = headerParse(dictList)
DaveM@4 66 # # dictList = validateData(dictList)
DaveM@4 67 # return dictList
DaveM@2 68
DaveM@2 69 def printToFile(filename,data):
DaveM@2 70 keys = data[0].keys()
DaveM@2 71 with open(filename,'w') as stream:
DaveM@2 72 dict_writer = csv.DictWriter(stream, keys)
DaveM@2 73 dict_writer.writeheader()
DaveM@2 74 dict_writer.writerows(data)
DaveM@2 75
DaveM@3 76 def testMain():
DaveM@4 77 people = dp.parseCSV('individuals.csv')
DaveM@3 78
DaveM@3 79 def _main():
DaveM@2 80 # people = dict()
DaveM@4 81 people = dp.parseCSV('individuals.csv')
DaveM@2 82 horiscopeList = []
DaveM@2 83 # people = [1,2,3,4,5]
DaveM@2 84 for person in people:
DaveM@2 85 print 'parsing person '+ person['ID']
DaveM@2 86 url,payload = setURL('')
DaveM@2 87 resp = requestURL(url,payload)
DaveM@2 88
DaveM@2 89 person['horiscope'] = parsePage(resp)
DaveM@2 90 horiscopeList.append(person)
DaveM@2 91 print horiscopeList
DaveM@2 92 printToFile('outputdata.csv',horiscopeList)
DaveM@2 93
DaveM@2 94 if __name__ == "__main__":
DaveM@3 95 testMain()