DaveM@13: #!/usr/bin/env python DaveM@13: import dParse as dp DaveM@13: # import compatibility as comp DaveM@13: import synastry as syn DaveM@13: import requests DaveM@13: import re DaveM@13: import time DaveM@13: import csv DaveM@13: import random DaveM@13: import pdb DaveM@13: import os DaveM@13: import pickle DaveM@13: from HTMLParser import HTMLParser DaveM@13: # from lxml import html DaveM@13: from bs4 import BeautifulSoup DaveM@13: DaveM@13: def parsePage(horiscope, resp): DaveM@13: horiscope = syn.planetPositions() DaveM@13: soup = BeautifulSoup(resp.content, 'lxml') DaveM@13: tcCell = soup.find_all('div', attrs={'class':'right-sedy-banner-svetlejsi'}) DaveM@13: for cell in tcCell: DaveM@13: divList = cell.find_all('div') DaveM@13: for i in range(len(divList)): DaveM@13: planetName = divList[i].getText().lower() DaveM@13: if planetName in planetPositions.planetNames: DaveM@13: horiscope.planets[planetName].setLocation(divList[i+2].getText(),divList[i+4].getText()) DaveM@13: DaveM@13: def makePeople(filename): DaveM@13: stream = csv.DictReader(open(filename,'rb')) DaveM@13: dictList = [] DaveM@13: people = [] DaveM@13: for line in stream: DaveM@13: thisPerson = syn.Person(dp.regulateData(line)) DaveM@13: people.append(thisPerson) DaveM@13: # pdb.set_trace() DaveM@13: return people DaveM@13: DaveM@13: # def setURL(p): DaveM@13: # url = 'https://horoscopes.astro-seek.com/calculate-love-compatibility/' DaveM@13: # payload = dp.makePayload(p) DaveM@13: # return (url,payload) DaveM@13: DaveM@13: def requestURL(url,payload): DaveM@13: r = requests.get(url, params=payload) DaveM@13: time.sleep(5) DaveM@13: return r DaveM@13: DaveM@13: # def makeURLPayload(url,payload): DaveM@13: # url += '?' DaveM@13: # for p in payload: DaveM@13: # url += '&' + str(p) DaveM@13: # url += '=' + str(payload[p]) DaveM@13: # return url DaveM@13: DaveM@13: # def printToFile(filename,data,removeAdds): DaveM@13: # if removeAdds == True: DaveM@13: # del data['DOB'] DaveM@13: # del data['TOB'] DaveM@13: # del data['pDOB'] DaveM@13: # del data['pTOB'] DaveM@13: # del data['COB'] DaveM@13: # del data['pCOB'] DaveM@13: # del data['horiscope'] DaveM@13: # # keys = data[0].keys() DaveM@13: # keys = [] DaveM@13: # for d in data: DaveM@13: # keys = keys + d.keys() DaveM@13: # keys = sorted(uniqueList(keys)) DaveM@13: # with open(filename,'w') as stream: DaveM@13: # dict_writer = csv.DictWriter(stream, keys, extrasaction='ignore') DaveM@13: # dict_writer.writeheader() DaveM@13: # dict_writer.writerows(data) DaveM@13: DaveM@13: def loadPick(filename): DaveM@13: with open(filename, 'rb') as handle: DaveM@13: b = pickle.load(handle) DaveM@13: return b DaveM@13: DaveM@13: def savePick(filename,data): DaveM@13: with open(filename, 'wb') as handle: DaveM@13: pickle.dump(data,handle) DaveM@13: DaveM@13: # def tempPF(fName,data): DaveM@13: # f__ = open(fName,'w') DaveM@13: # f__.write(data) DaveM@13: # f__.close() DaveM@13: DaveM@13: def parseHoriscope(people,saveFile): DaveM@13: horiscopeList = [] DaveM@13: for person in people: DaveM@13: if person.p_dob is None or person.p_dob == '': DaveM@13: print 'SKIPPING person '+ person.id + ' p_dob is None' DaveM@13: # person.horiscope = None DaveM@13: # horiscopeList.append({'ID':person['ID']}) DaveM@13: else: DaveM@13: print 'parsing person '+ person.id DaveM@13: parseTries = 3 DaveM@13: while parseTries > 0: DaveM@13: try: DaveM@13: person.makePayload() DaveM@13: resp = requestURL(person.url,person.payload) DaveM@13: parsePage(person.horiscope,resp) DaveM@13: pdb.set_trace() DaveM@13: parseTries = 0 DaveM@13: except: DaveM@13: print sys.exc_info()[0] DaveM@13: parseTries -= 1 DaveM@13: # for d in person.horiscope.keys(): DaveM@13: # person[d] = person['horiscope'][d] DaveM@13: # horiscopeList.append(person) DaveM@13: # if saveFile is not None: DaveM@13: # savePick(saveFile,horiscopeList) DaveM@13: # return horiscopeList DaveM@13: # savePick(pickFile,person) DaveM@13: # savePick('2'+pickFile,horiscopeList) DaveM@13: # printToFile('final_'+outFile,horiscopeList) DaveM@13: DaveM@13: # def printDict(d): DaveM@13: # for d_ in d: DaveM@13: # print (d,d_) DaveM@13: DaveM@13: # def refactorHoriscope(hor): DaveM@13: # d = {} DaveM@13: # d['ID'] = hor['ID'] DaveM@13: # for h in hor['horiscope']: DaveM@13: # hs = sorted(h) DaveM@13: # d[(hs[0], hs[1], hor['horiscope'][h][0])] = 1 DaveM@13: # d[(hs[0], hs[1])] = float(str(hor['horiscope'][h][1]) + '.' + str(hor['horiscope'][h][2])) DaveM@13: # return d DaveM@13: DaveM@13: # def uniqueList(seq): DaveM@13: # # order preserving DaveM@13: # noDupes = [] DaveM@13: # [noDupes.append(i) for i in seq if not noDupes.count(i)] DaveM@13: # return noDupes DaveM@13: DaveM@13: # def merge_two_dicts(x, y): DaveM@13: # z = x.copy() # start with x's keys and values DaveM@13: # z.update(y) # modifies z with y's keys and values & returns None DaveM@13: # return z DaveM@13: DaveM@13: # def findMissing(unique,keyList): DaveM@13: # missing = [] DaveM@13: # for u in unique: DaveM@13: # if u not in keyList: DaveM@13: # missing.append(u) DaveM@13: # return u DaveM@13: DaveM@13: # def presentResults(saveFile): DaveM@13: # data = [] DaveM@13: # data2 = [] DaveM@13: # hlist = loadPick(saveFile) DaveM@13: # keyList = [] DaveM@13: # for h in hlist: DaveM@13: # d = refactorHoriscope(h) DaveM@13: # keyList.append(d.keys()) DaveM@13: # data.append(d) DaveM@13: # uniqueKeys = uniqueList(keyList) DaveM@13: # # for da in data: DaveM@13: # # missingKeys = findMissing(uniqueKeys,da.keys()) DaveM@13: # # # pdb.set_trace() DaveM@13: # # d2 = dict(zip(missingKeys,[0]*len(missingKeys))) DaveM@13: # # da = merge_two_dicts(da,d2) DaveM@13: # # data2.append(da) DaveM@13: # return data DaveM@13: DaveM@13: DaveM@13: def newTest(): DaveM@13: people = makePeople('individuals.csv') DaveM@13: DaveM@13: DaveM@13: def testMain(): DaveM@13: pickFile = 'outData.pick' DaveM@13: # people = makePeople('individuals.csv') DaveM@13: # savePick(pickFile,people) DaveM@13: people = loadPick(pickFile) DaveM@13: parseSaveFile = pickFile.split('.')[0]+'_collect.pick' DaveM@13: parseHoriscope(people,parseSaveFile) DaveM@13: # horiscopeData = presentResults(parseSaveFile) DaveM@13: # comRules = comp.parseCompatDef('compatibilityRules.csv') DaveM@13: # applyCompatScore(horiscopeData,rules) DaveM@13: DaveM@13: def _main(): DaveM@13: pickFile = 'outData.pick' DaveM@13: # people = dict() DaveM@13: if not os.path.exists(pickFile): DaveM@13: print 'reParse file' DaveM@13: people = makePeople('individuals.csv') DaveM@13: savePick(pickFile,people) DaveM@13: else: DaveM@13: print 'read in ' + pickFile DaveM@13: people = loadPick(pickFile) DaveM@13: parseSaveFile = pickFile.split('.')[0]+'_collect.pick' DaveM@13: parseHoriscope(people,parseSaveFile) DaveM@13: horiscopeData = presentResults(parseSaveFile) DaveM@13: comRules = comp.parseCompatDef('compatibilityRules.csv') DaveM@13: applyCompatScore(horiscopeData,rules) DaveM@13: DaveM@13: if __name__ == "__main__": DaveM@13: testMain()