Mercurial > hg > horiscopes
view V4/runme.py @ 14:a0c217ee4168
current edits
author | DaveM |
---|---|
date | Sun, 04 Mar 2018 15:03:15 +0000 |
parents | b253748dbb11 |
children | 50a95089414d |
line wrap: on
line source
#!/usr/bin/env python import dParse as dp # import compatibility as comp import synastry as syn import requests import re import time import csv import random import pdb import os import pickle from HTMLParser import HTMLParser # from lxml import html from bs4 import BeautifulSoup def parsePage(resp): horiscope = syn.planetPositions() soup = BeautifulSoup(resp.content, 'lxml') tcCell = soup.find_all('div', attrs={'class':'right-sedy-banner-svetlejsi'}) for cell in tcCell: divList = cell.find_all('div') for i in range(len(divList)): planetName = divList[i].getText().lower() if planetName in planetPositions.planetNames: horiscope.planets[planetName].setLocation(divList[i+2].getText(),divList[i+4].getText()) return horiscope def makePeople(filename): stream = csv.DictReader(open(filename,'rb')) dictList = [] people = [] for line in stream: thisPerson = syn.Person(dp.regulateData(line)) people.append(thisPerson) # pdb.set_trace() return people # def setURL(p): # url = 'https://horoscopes.astro-seek.com/calculate-love-compatibility/' # payload = dp.makePayload(p) # return (url,payload) def requestURL(url,payload): r = requests.get(url, params=payload) time.sleep(5) return r # def makeURLPayload(url,payload): # url += '?' # for p in payload: # url += '&' + str(p) # url += '=' + str(payload[p]) # return url # def printToFile(filename,data,removeAdds): # if removeAdds == True: # del data['DOB'] # del data['TOB'] # del data['pDOB'] # del data['pTOB'] # del data['COB'] # del data['pCOB'] # del data['horiscope'] # # keys = data[0].keys() # keys = [] # for d in data: # keys = keys + d.keys() # keys = sorted(uniqueList(keys)) # with open(filename,'w') as stream: # dict_writer = csv.DictWriter(stream, keys, extrasaction='ignore') # dict_writer.writeheader() # dict_writer.writerows(data) def loadPick(filename): with open(filename, 'rb') as handle: b = pickle.load(handle) return b def savePick(filename,data): with open(filename, 'wb') as handle: pickle.dump(data,handle) # def tempPF(fName,data): # f__ = open(fName,'w') # f__.write(data) # f__.close() def parseHoriscope(people,saveFile): horiscopeList = [] for person in people: if person.p_dob is None or person.p_dob == '': print 'SKIPPING person '+ person.id + ' p_dob is None' # person.horiscope = None # horiscopeList.append({'ID':person['ID']}) else: print 'parsing person '+ person.id parseTries = 3 while parseTries > 0: try: person.makePayload() resp = requestURL(person.url,person.payload) person.horiscope = parsePage(resp) pdb.set_trace() parseTries = 0 except: print sys.exc_info()[0] parseTries -= 1 # for d in person.horiscope.keys(): # person[d] = person['horiscope'][d] # horiscopeList.append(person) # if saveFile is not None: # savePick(saveFile,horiscopeList) # return horiscopeList # savePick(pickFile,person) # savePick('2'+pickFile,horiscopeList) # printToFile('final_'+outFile,horiscopeList) # def printDict(d): # for d_ in d: # print (d,d_) # def refactorHoriscope(hor): # d = {} # d['ID'] = hor['ID'] # for h in hor['horiscope']: # hs = sorted(h) # d[(hs[0], hs[1], hor['horiscope'][h][0])] = 1 # d[(hs[0], hs[1])] = float(str(hor['horiscope'][h][1]) + '.' + str(hor['horiscope'][h][2])) # return d # def uniqueList(seq): # # order preserving # noDupes = [] # [noDupes.append(i) for i in seq if not noDupes.count(i)] # return noDupes # def merge_two_dicts(x, y): # z = x.copy() # start with x's keys and values # z.update(y) # modifies z with y's keys and values & returns None # return z # def findMissing(unique,keyList): # missing = [] # for u in unique: # if u not in keyList: # missing.append(u) # return u # def presentResults(saveFile): # data = [] # data2 = [] # hlist = loadPick(saveFile) # keyList = [] # for h in hlist: # d = refactorHoriscope(h) # keyList.append(d.keys()) # data.append(d) # uniqueKeys = uniqueList(keyList) # # for da in data: # # missingKeys = findMissing(uniqueKeys,da.keys()) # # # pdb.set_trace() # # d2 = dict(zip(missingKeys,[0]*len(missingKeys))) # # da = merge_two_dicts(da,d2) # # data2.append(da) # return data def newTest(): people = makePeople('individuals.csv') def testMain(): pickFile = 'outData.pick' # people = makePeople('individuals.csv') # savePick(pickFile,people) people = loadPick(pickFile) parseSaveFile = pickFile.split('.')[0]+'_collect.pick' parseHoriscope(people,parseSaveFile) # horiscopeData = presentResults(parseSaveFile) # comRules = comp.parseCompatDef('compatibilityRules.csv') # applyCompatScore(horiscopeData,rules) def _main(): pickFile = 'outData.pick' # people = dict() if not os.path.exists(pickFile): print 'reParse file' people = makePeople('individuals.csv') savePick(pickFile,people) else: print 'read in ' + pickFile people = loadPick(pickFile) parseSaveFile = pickFile.split('.')[0]+'_collect.pick' parseHoriscope(people,parseSaveFile) horiscopeData = presentResults(parseSaveFile) comRules = comp.parseCompatDef('compatibilityRules.csv') applyCompatScore(horiscopeData,rules) if __name__ == "__main__": testMain()