Mercurial > hg > horiscopes
view V2/runme.py @ 30:15c43f44a806
update timesheet
author | DaveM |
---|---|
date | Sun, 13 May 2018 17:40:12 +0100 |
parents | 3d5ca8e78f8f |
children |
line wrap: on
line source
#!/usr/bin/env python import dParse as dp import requests import re import time import csv import random import pdb import os import pickle from HTMLParser import HTMLParser # from lxml import html from bs4 import BeautifulSoup def parsePage(resp): # pdb.set_trace() person = dict() soup = BeautifulSoup(resp.content, 'lxml') tcCell = soup.find_all('div', attrs={'class':'tc'}) for cell in tcCell: tableCell = cell.find_all('td'); if len(tableCell) > 2: C = tableCell[0].strong.contents[0].encode('utf-8') D = tableCell[2].strong.contents[0].encode('utf-8') # print (C,D) A = re.search("\/>(.*)<br/>.*\(([0-9]*)\\xc2\\xb0([0-9]*)(.*)\)",str(tableCell[1])) # A0 = A.group(1) # A1 = A.group(2).split('\xc2\xb0')[0] # A2 = A.group(2).split('\xc2\xb0')[1].split('\xe2')[0] # print (A.group(1),A.group(2),A.group(3)) person[(C,D)] = (A.group(1),A.group(2),A.group(3)) return person def setURL(p): """ Code impacting factors into URL IMPACTING FACTORS Date of Birth Birth Time Country of birth City of birth (And state of birth) """ ## For some reason we need to post men first then women. # url = "https://horoscopes.astro-seek.com/calculate-love-compatibility/?send_calculation=1&muz_narozeni_den=1&muz_narozeni_mesic=1&muz_narozeni_rok=1970&muz_narozeni_hodina=00&muz_narozeni_minuta=00&muz_narozeni_city=London%2C+United+Kingdom&muz_narozeni_mesto_hidden=London&muz_narozeni_stat_hidden=GB&muz_narozeni_podstat_kratky_hidden=England&muz_narozeni_podstat_hidden=England&muz_narozeni_podstat2_kratky_hidden=Greater+London&muz_narozeni_podstat3_kratky_hidden=undefined&muz_narozeni_input_hidden=&muz_narozeni_sirka_stupne=51&muz_narozeni_sirka_minuty=30&muz_narozeni_sirka_smer=0&muz_narozeni_delka_stupne=0&muz_narozeni_delka_minuty=8&muz_narozeni_delka_smer=1&muz_narozeni_timezone_form=auto&muz_narozeni_timezone_dst_form=auto&send_calculation=1&zena_narozeni_den=1&zena_narozeni_mesic=1&zena_narozeni_rok=1970&zena_narozeni_hodina=00&zena_narozeni_minuta=00&zena_narozeni_city=Berlin%2C+Germany&zena_narozeni_mesto_hidden=Berlin&zena_narozeni_stat_hidden=DE&zena_narozeni_podstat_kratky_hidden=Berlin&zena_narozeni_podstat_hidden=Berlin&zena_narozeni_podstat2_kratky_hidden=undefined&zena_narozeni_podstat3_kratky_hidden=undefined&zena_narozeni_input_hidden=&zena_narozeni_sirka_stupne=52&zena_narozeni_sirka_minuty=31&zena_narozeni_sirka_smer=0&zena_narozeni_delka_stupne=13&zena_narozeni_delka_minuty=24&zena_narozeni_delka_smer=0&zena_narozeni_timezone_form=auto&zena_narozeni_timezone_dst_form=auto&switch_interpretations=0&house_system=placidus&uhel_orbis=#tabs_redraw" # payload = {'muz_narozeni_den':'1','muz_narozeni_mesic':'1','muz_narozeni_rok':'1970'} url = 'https://horoscopes.astro-seek.com/calculate-love-compatibility/' # payload = {'send_calculation':'1','muz_narozeni_den':mDay,'muz_narozeni_mesic':mMonth,'muz_narozeni_rok':mYear,'zena_narozeni_den':fDay,'zena_narozeni_mesic':fMonth,'zena_narozeni_rok':fYear} payload = dp.makePayload(p) return (url,payload) def requestURL(url,payload): r = requests.get(url, params=payload) time.sleep(5) return r def makeURLPayload(url,payload): url += '?' for p in payload: url += '&' + str(p) url += '=' + str(payload[p]) return url def printToFile(filename,data,removeAdds): if removeAdds == True: del data['DOB'] del data['TOB'] del data['pDOB'] del data['pTOB'] del data['COB'] del data['pCOB'] del data['horiscope'] keys = data[0].keys() with open(filename,'w') as stream: dict_writer = csv.DictWriter(stream, keys) dict_writer.writeheader() dict_writer.writerows(data) def loadPick(filename): with open(filename, 'rb') as handle: b = pickle.load(handle) return b def savePick(filename,data): with open(filename, 'wb') as handle: pickle.dump(data,handle) def tempPF(fName,data): f__ = open(fName,'w') f__.write(data) f__.close() def testMain(): people = dp.parseCSV('individuals.csv') def _main(): pickFile = 'outData.pick' # people = dict() if not os.path.exists(pickFile): print 'reParse file' people = dp.parseCSV('individuals.csv') savePick(pickFile,people) else: print 'read in ' + pickFile people = loadPick(pickFile) horiscopeList = [] for person in people: if person['pDOB'] is None or person['pDOB'] == '': print 'SKIPPING person '+ person['ID'] + ' pDOB is None' else: print 'parsing person '+ person['ID'] url,payload = setURL(person) resp = requestURL(url,payload) person['horiscope'] = parsePage(resp) if not person['horiscope']: # debug if dict is empty print 'attempt failed, try again' url,payload = setURL(person) resp = requestURL(url,payload) person['horiscope'] = parsePage(resp) if not person['horiscope']: print 'attempt two failed' # pdb.set_trace() for d in person['horiscope'].keys(): person[d] = person['horiscope'][d] horiscopeList.append(person) savePick(pickFile,person) print horiscopeList savePick(pickFile,person) savePick('2'+pickFile,horiscopeList) printToFile('final_'+outFile,horiscopeList) if __name__ == "__main__": _main()