Mercurial > hg > horiscopes
diff V4/runme.py @ 15:50a95089414d
updating to allow for all aspects to be calculated, and remove default value passing for people.
author | DaveM |
---|---|
date | Sun, 04 Mar 2018 17:09:50 +0000 |
parents | a0c217ee4168 |
children | b11cff4b7f83 |
line wrap: on
line diff
--- a/V4/runme.py Sun Mar 04 15:03:15 2018 +0000 +++ b/V4/runme.py Sun Mar 04 17:09:50 2018 +0000 @@ -1,29 +1,33 @@ #!/usr/bin/env python + + import dParse as dp -# import compatibility as comp import synastry as syn import requests -import re import time import csv -import random import pdb import os import pickle -from HTMLParser import HTMLParser -# from lxml import html +import sys from bs4 import BeautifulSoup def parsePage(resp): + gotLocation = 0 horiscope = syn.planetPositions() soup = BeautifulSoup(resp.content, 'lxml') tcCell = soup.find_all('div', attrs={'class':'right-sedy-banner-svetlejsi'}) for cell in tcCell: + if "Planets in partner's house" in cell.get_text(): + gotLocation = 1 divList = cell.find_all('div') for i in range(len(divList)): - planetName = divList[i].getText().lower() - if planetName in planetPositions.planetNames: - horiscope.planets[planetName].setLocation(divList[i+2].getText(),divList[i+4].getText()) + planetName = divList[i].getText().lower().strip().replace(':','').split(' ')[0] + if planetName in syn.planetPositions.planetNames: + if gotLocation and not '/' in planetName: + horiscope.planets[planetName].setHouse(divList[i+2].getText(),divList[i+4].getText()) + else: + horiscope.planets[planetName].setLocation(divList[i+2].getText(),divList[i+4].getText()) return horiscope def makePeople(filename): @@ -36,42 +40,11 @@ # pdb.set_trace() return people -# def setURL(p): -# url = 'https://horoscopes.astro-seek.com/calculate-love-compatibility/' -# payload = dp.makePayload(p) -# return (url,payload) - def requestURL(url,payload): r = requests.get(url, params=payload) time.sleep(5) return r -# def makeURLPayload(url,payload): -# url += '?' -# for p in payload: -# url += '&' + str(p) -# url += '=' + str(payload[p]) -# return url - -# def printToFile(filename,data,removeAdds): -# if removeAdds == True: -# del data['DOB'] -# del data['TOB'] -# del data['pDOB'] -# del data['pTOB'] -# del data['COB'] -# del data['pCOB'] -# del data['horiscope'] -# # keys = data[0].keys() -# keys = [] -# for d in data: -# keys = keys + d.keys() -# keys = sorted(uniqueList(keys)) -# with open(filename,'w') as stream: -# dict_writer = csv.DictWriter(stream, keys, extrasaction='ignore') -# dict_writer.writeheader() -# dict_writer.writerows(data) - def loadPick(filename): with open(filename, 'rb') as handle: b = pickle.load(handle) @@ -81,102 +54,38 @@ with open(filename, 'wb') as handle: pickle.dump(data,handle) -# def tempPF(fName,data): -# f__ = open(fName,'w') -# f__.write(data) -# f__.close() - def parseHoriscope(people,saveFile): horiscopeList = [] for person in people: - if person.p_dob is None or person.p_dob == '': - print 'SKIPPING person '+ person.id + ' p_dob is None' - # person.horiscope = None - # horiscopeList.append({'ID':person['ID']}) + issue = person.identifyIssues() + if issue is not None: + print 'SKIPPING person '+ person.id + ' error with ' + issue else: print 'parsing person '+ person.id - parseTries = 3 - while parseTries > 0: - try: - person.makePayload() - resp = requestURL(person.url,person.payload) - person.horiscope = parsePage(resp) - pdb.set_trace() - parseTries = 0 - except: - print sys.exc_info()[0] - parseTries -= 1 - # for d in person.horiscope.keys(): - # person[d] = person['horiscope'][d] - # horiscopeList.append(person) - # if saveFile is not None: - # savePick(saveFile,horiscopeList) - # return horiscopeList - # savePick(pickFile,person) - # savePick('2'+pickFile,horiscopeList) - # printToFile('final_'+outFile,horiscopeList) - -# def printDict(d): -# for d_ in d: -# print (d,d_) - -# def refactorHoriscope(hor): -# d = {} -# d['ID'] = hor['ID'] -# for h in hor['horiscope']: -# hs = sorted(h) -# d[(hs[0], hs[1], hor['horiscope'][h][0])] = 1 -# d[(hs[0], hs[1])] = float(str(hor['horiscope'][h][1]) + '.' + str(hor['horiscope'][h][2])) -# return d - -# def uniqueList(seq): -# # order preserving -# noDupes = [] -# [noDupes.append(i) for i in seq if not noDupes.count(i)] -# return noDupes - -# def merge_two_dicts(x, y): -# z = x.copy() # start with x's keys and values -# z.update(y) # modifies z with y's keys and values & returns None -# return z - -# def findMissing(unique,keyList): -# missing = [] -# for u in unique: -# if u not in keyList: -# missing.append(u) -# return u - -# def presentResults(saveFile): -# data = [] -# data2 = [] -# hlist = loadPick(saveFile) -# keyList = [] -# for h in hlist: -# d = refactorHoriscope(h) -# keyList.append(d.keys()) -# data.append(d) -# uniqueKeys = uniqueList(keyList) -# # for da in data: -# # missingKeys = findMissing(uniqueKeys,da.keys()) -# # # pdb.set_trace() -# # d2 = dict(zip(missingKeys,[0]*len(missingKeys))) -# # da = merge_two_dicts(da,d2) -# # data2.append(da) -# return data - - -def newTest(): - people = makePeople('individuals.csv') - + person.makePayload() + person.resp = requestURL(person.url,person.payload) + person.horiscope = parsePage(person.resp) + # person.horiscope.printPositions() + if saveFile is not None: + savePick(saveFile,people) def testMain(): - pickFile = 'outData.pick' - # people = makePeople('individuals.csv') - # savePick(pickFile,people) - people = loadPick(pickFile) - parseSaveFile = pickFile.split('.')[0]+'_collect.pick' - parseHoriscope(people,parseSaveFile) + restartDataFile = 1 + if(restartDataFile): + pickFile = 'outData.pick' + # people = makePeople('individuals.csv') + # savePick(pickFile,people) + people = loadPick(pickFile) + parseSaveFile = pickFile.split('.')[0]+'_collect.pick' + parseHoriscope(people,parseSaveFile) + else: + people = loadPick('onlineDatacollect.pick') + for p in people: + if p.horiscope is None: + print p.id + else: + p.horiscope.calcAllAspects() + # horiscopeData = presentResults(parseSaveFile) # comRules = comp.parseCompatDef('compatibilityRules.csv') # applyCompatScore(horiscopeData,rules)