Mercurial > hg > horiscopes
diff V4/runme.py @ 13:b253748dbb11
developing V4 - Class based structure, with self calcuation of all aspects
author | DaveM |
---|---|
date | Sun, 04 Mar 2018 14:51:43 +0000 |
parents | |
children | a0c217ee4168 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/V4/runme.py Sun Mar 04 14:51:43 2018 +0000 @@ -0,0 +1,200 @@ +#!/usr/bin/env python +import dParse as dp +# import compatibility as comp +import synastry as syn +import requests +import re +import time +import csv +import random +import pdb +import os +import pickle +from HTMLParser import HTMLParser +# from lxml import html +from bs4 import BeautifulSoup + +def parsePage(horiscope, resp): + horiscope = syn.planetPositions() + soup = BeautifulSoup(resp.content, 'lxml') + tcCell = soup.find_all('div', attrs={'class':'right-sedy-banner-svetlejsi'}) + for cell in tcCell: + divList = cell.find_all('div') + for i in range(len(divList)): + planetName = divList[i].getText().lower() + if planetName in planetPositions.planetNames: + horiscope.planets[planetName].setLocation(divList[i+2].getText(),divList[i+4].getText()) + +def makePeople(filename): + stream = csv.DictReader(open(filename,'rb')) + dictList = [] + people = [] + for line in stream: + thisPerson = syn.Person(dp.regulateData(line)) + people.append(thisPerson) + # pdb.set_trace() + return people + +# def setURL(p): +# url = 'https://horoscopes.astro-seek.com/calculate-love-compatibility/' +# payload = dp.makePayload(p) +# return (url,payload) + +def requestURL(url,payload): + r = requests.get(url, params=payload) + time.sleep(5) + return r + +# def makeURLPayload(url,payload): +# url += '?' +# for p in payload: +# url += '&' + str(p) +# url += '=' + str(payload[p]) +# return url + +# def printToFile(filename,data,removeAdds): +# if removeAdds == True: +# del data['DOB'] +# del data['TOB'] +# del data['pDOB'] +# del data['pTOB'] +# del data['COB'] +# del data['pCOB'] +# del data['horiscope'] +# # keys = data[0].keys() +# keys = [] +# for d in data: +# keys = keys + d.keys() +# keys = sorted(uniqueList(keys)) +# with open(filename,'w') as stream: +# dict_writer = csv.DictWriter(stream, keys, extrasaction='ignore') +# dict_writer.writeheader() +# dict_writer.writerows(data) + +def loadPick(filename): + with open(filename, 'rb') as handle: + b = pickle.load(handle) + return b + +def savePick(filename,data): + with open(filename, 'wb') as handle: + pickle.dump(data,handle) + +# def tempPF(fName,data): +# f__ = open(fName,'w') +# f__.write(data) +# f__.close() + +def parseHoriscope(people,saveFile): + horiscopeList = [] + for person in people: + if person.p_dob is None or person.p_dob == '': + print 'SKIPPING person '+ person.id + ' p_dob is None' + # person.horiscope = None + # horiscopeList.append({'ID':person['ID']}) + else: + print 'parsing person '+ person.id + parseTries = 3 + while parseTries > 0: + try: + person.makePayload() + resp = requestURL(person.url,person.payload) + parsePage(person.horiscope,resp) + pdb.set_trace() + parseTries = 0 + except: + print sys.exc_info()[0] + parseTries -= 1 + # for d in person.horiscope.keys(): + # person[d] = person['horiscope'][d] + # horiscopeList.append(person) + # if saveFile is not None: + # savePick(saveFile,horiscopeList) + # return horiscopeList + # savePick(pickFile,person) + # savePick('2'+pickFile,horiscopeList) + # printToFile('final_'+outFile,horiscopeList) + +# def printDict(d): +# for d_ in d: +# print (d,d_) + +# def refactorHoriscope(hor): +# d = {} +# d['ID'] = hor['ID'] +# for h in hor['horiscope']: +# hs = sorted(h) +# d[(hs[0], hs[1], hor['horiscope'][h][0])] = 1 +# d[(hs[0], hs[1])] = float(str(hor['horiscope'][h][1]) + '.' + str(hor['horiscope'][h][2])) +# return d + +# def uniqueList(seq): +# # order preserving +# noDupes = [] +# [noDupes.append(i) for i in seq if not noDupes.count(i)] +# return noDupes + +# def merge_two_dicts(x, y): +# z = x.copy() # start with x's keys and values +# z.update(y) # modifies z with y's keys and values & returns None +# return z + +# def findMissing(unique,keyList): +# missing = [] +# for u in unique: +# if u not in keyList: +# missing.append(u) +# return u + +# def presentResults(saveFile): +# data = [] +# data2 = [] +# hlist = loadPick(saveFile) +# keyList = [] +# for h in hlist: +# d = refactorHoriscope(h) +# keyList.append(d.keys()) +# data.append(d) +# uniqueKeys = uniqueList(keyList) +# # for da in data: +# # missingKeys = findMissing(uniqueKeys,da.keys()) +# # # pdb.set_trace() +# # d2 = dict(zip(missingKeys,[0]*len(missingKeys))) +# # da = merge_two_dicts(da,d2) +# # data2.append(da) +# return data + + +def newTest(): + people = makePeople('individuals.csv') + + +def testMain(): + pickFile = 'outData.pick' + # people = makePeople('individuals.csv') + # savePick(pickFile,people) + people = loadPick(pickFile) + parseSaveFile = pickFile.split('.')[0]+'_collect.pick' + parseHoriscope(people,parseSaveFile) + # horiscopeData = presentResults(parseSaveFile) + # comRules = comp.parseCompatDef('compatibilityRules.csv') + # applyCompatScore(horiscopeData,rules) + +def _main(): + pickFile = 'outData.pick' + # people = dict() + if not os.path.exists(pickFile): + print 'reParse file' + people = makePeople('individuals.csv') + savePick(pickFile,people) + else: + print 'read in ' + pickFile + people = loadPick(pickFile) + parseSaveFile = pickFile.split('.')[0]+'_collect.pick' + parseHoriscope(people,parseSaveFile) + horiscopeData = presentResults(parseSaveFile) + comRules = comp.parseCompatDef('compatibilityRules.csv') + applyCompatScore(horiscopeData,rules) + +if __name__ == "__main__": + testMain()