annotate V4/runme.py @ 14:a0c217ee4168

current edits
author DaveM
date Sun, 04 Mar 2018 15:03:15 +0000
parents b253748dbb11
children 50a95089414d
rev   line source
DaveM@13 1 #!/usr/bin/env python
DaveM@13 2 import dParse as dp
DaveM@13 3 # import compatibility as comp
DaveM@13 4 import synastry as syn
DaveM@13 5 import requests
DaveM@13 6 import re
DaveM@13 7 import time
DaveM@13 8 import csv
DaveM@13 9 import random
DaveM@13 10 import pdb
DaveM@13 11 import os
DaveM@13 12 import pickle
DaveM@13 13 from HTMLParser import HTMLParser
DaveM@13 14 # from lxml import html
DaveM@13 15 from bs4 import BeautifulSoup
DaveM@13 16
DaveM@14 17 def parsePage(resp):
DaveM@13 18 horiscope = syn.planetPositions()
DaveM@13 19 soup = BeautifulSoup(resp.content, 'lxml')
DaveM@13 20 tcCell = soup.find_all('div', attrs={'class':'right-sedy-banner-svetlejsi'})
DaveM@13 21 for cell in tcCell:
DaveM@13 22 divList = cell.find_all('div')
DaveM@13 23 for i in range(len(divList)):
DaveM@13 24 planetName = divList[i].getText().lower()
DaveM@13 25 if planetName in planetPositions.planetNames:
DaveM@13 26 horiscope.planets[planetName].setLocation(divList[i+2].getText(),divList[i+4].getText())
DaveM@14 27 return horiscope
DaveM@13 28
DaveM@13 29 def makePeople(filename):
DaveM@13 30 stream = csv.DictReader(open(filename,'rb'))
DaveM@13 31 dictList = []
DaveM@13 32 people = []
DaveM@13 33 for line in stream:
DaveM@13 34 thisPerson = syn.Person(dp.regulateData(line))
DaveM@13 35 people.append(thisPerson)
DaveM@13 36 # pdb.set_trace()
DaveM@13 37 return people
DaveM@13 38
DaveM@13 39 # def setURL(p):
DaveM@13 40 # url = 'https://horoscopes.astro-seek.com/calculate-love-compatibility/'
DaveM@13 41 # payload = dp.makePayload(p)
DaveM@13 42 # return (url,payload)
DaveM@13 43
DaveM@13 44 def requestURL(url,payload):
DaveM@13 45 r = requests.get(url, params=payload)
DaveM@13 46 time.sleep(5)
DaveM@13 47 return r
DaveM@13 48
DaveM@13 49 # def makeURLPayload(url,payload):
DaveM@13 50 # url += '?'
DaveM@13 51 # for p in payload:
DaveM@13 52 # url += '&' + str(p)
DaveM@13 53 # url += '=' + str(payload[p])
DaveM@13 54 # return url
DaveM@13 55
DaveM@13 56 # def printToFile(filename,data,removeAdds):
DaveM@13 57 # if removeAdds == True:
DaveM@13 58 # del data['DOB']
DaveM@13 59 # del data['TOB']
DaveM@13 60 # del data['pDOB']
DaveM@13 61 # del data['pTOB']
DaveM@13 62 # del data['COB']
DaveM@13 63 # del data['pCOB']
DaveM@13 64 # del data['horiscope']
DaveM@13 65 # # keys = data[0].keys()
DaveM@13 66 # keys = []
DaveM@13 67 # for d in data:
DaveM@13 68 # keys = keys + d.keys()
DaveM@13 69 # keys = sorted(uniqueList(keys))
DaveM@13 70 # with open(filename,'w') as stream:
DaveM@13 71 # dict_writer = csv.DictWriter(stream, keys, extrasaction='ignore')
DaveM@13 72 # dict_writer.writeheader()
DaveM@13 73 # dict_writer.writerows(data)
DaveM@13 74
DaveM@13 75 def loadPick(filename):
DaveM@13 76 with open(filename, 'rb') as handle:
DaveM@13 77 b = pickle.load(handle)
DaveM@13 78 return b
DaveM@13 79
DaveM@13 80 def savePick(filename,data):
DaveM@13 81 with open(filename, 'wb') as handle:
DaveM@13 82 pickle.dump(data,handle)
DaveM@13 83
DaveM@13 84 # def tempPF(fName,data):
DaveM@13 85 # f__ = open(fName,'w')
DaveM@13 86 # f__.write(data)
DaveM@13 87 # f__.close()
DaveM@13 88
DaveM@13 89 def parseHoriscope(people,saveFile):
DaveM@13 90 horiscopeList = []
DaveM@13 91 for person in people:
DaveM@13 92 if person.p_dob is None or person.p_dob == '':
DaveM@13 93 print 'SKIPPING person '+ person.id + ' p_dob is None'
DaveM@13 94 # person.horiscope = None
DaveM@13 95 # horiscopeList.append({'ID':person['ID']})
DaveM@13 96 else:
DaveM@13 97 print 'parsing person '+ person.id
DaveM@13 98 parseTries = 3
DaveM@13 99 while parseTries > 0:
DaveM@13 100 try:
DaveM@13 101 person.makePayload()
DaveM@13 102 resp = requestURL(person.url,person.payload)
DaveM@14 103 person.horiscope = parsePage(resp)
DaveM@13 104 pdb.set_trace()
DaveM@13 105 parseTries = 0
DaveM@13 106 except:
DaveM@13 107 print sys.exc_info()[0]
DaveM@13 108 parseTries -= 1
DaveM@13 109 # for d in person.horiscope.keys():
DaveM@13 110 # person[d] = person['horiscope'][d]
DaveM@13 111 # horiscopeList.append(person)
DaveM@13 112 # if saveFile is not None:
DaveM@13 113 # savePick(saveFile,horiscopeList)
DaveM@13 114 # return horiscopeList
DaveM@13 115 # savePick(pickFile,person)
DaveM@13 116 # savePick('2'+pickFile,horiscopeList)
DaveM@13 117 # printToFile('final_'+outFile,horiscopeList)
DaveM@13 118
DaveM@13 119 # def printDict(d):
DaveM@13 120 # for d_ in d:
DaveM@13 121 # print (d,d_)
DaveM@13 122
DaveM@13 123 # def refactorHoriscope(hor):
DaveM@13 124 # d = {}
DaveM@13 125 # d['ID'] = hor['ID']
DaveM@13 126 # for h in hor['horiscope']:
DaveM@13 127 # hs = sorted(h)
DaveM@13 128 # d[(hs[0], hs[1], hor['horiscope'][h][0])] = 1
DaveM@13 129 # d[(hs[0], hs[1])] = float(str(hor['horiscope'][h][1]) + '.' + str(hor['horiscope'][h][2]))
DaveM@13 130 # return d
DaveM@13 131
DaveM@13 132 # def uniqueList(seq):
DaveM@13 133 # # order preserving
DaveM@13 134 # noDupes = []
DaveM@13 135 # [noDupes.append(i) for i in seq if not noDupes.count(i)]
DaveM@13 136 # return noDupes
DaveM@13 137
DaveM@13 138 # def merge_two_dicts(x, y):
DaveM@13 139 # z = x.copy() # start with x's keys and values
DaveM@13 140 # z.update(y) # modifies z with y's keys and values & returns None
DaveM@13 141 # return z
DaveM@13 142
DaveM@13 143 # def findMissing(unique,keyList):
DaveM@13 144 # missing = []
DaveM@13 145 # for u in unique:
DaveM@13 146 # if u not in keyList:
DaveM@13 147 # missing.append(u)
DaveM@13 148 # return u
DaveM@13 149
DaveM@13 150 # def presentResults(saveFile):
DaveM@13 151 # data = []
DaveM@13 152 # data2 = []
DaveM@13 153 # hlist = loadPick(saveFile)
DaveM@13 154 # keyList = []
DaveM@13 155 # for h in hlist:
DaveM@13 156 # d = refactorHoriscope(h)
DaveM@13 157 # keyList.append(d.keys())
DaveM@13 158 # data.append(d)
DaveM@13 159 # uniqueKeys = uniqueList(keyList)
DaveM@13 160 # # for da in data:
DaveM@13 161 # # missingKeys = findMissing(uniqueKeys,da.keys())
DaveM@13 162 # # # pdb.set_trace()
DaveM@13 163 # # d2 = dict(zip(missingKeys,[0]*len(missingKeys)))
DaveM@13 164 # # da = merge_two_dicts(da,d2)
DaveM@13 165 # # data2.append(da)
DaveM@13 166 # return data
DaveM@13 167
DaveM@13 168
DaveM@13 169 def newTest():
DaveM@13 170 people = makePeople('individuals.csv')
DaveM@13 171
DaveM@13 172
DaveM@13 173 def testMain():
DaveM@13 174 pickFile = 'outData.pick'
DaveM@13 175 # people = makePeople('individuals.csv')
DaveM@13 176 # savePick(pickFile,people)
DaveM@13 177 people = loadPick(pickFile)
DaveM@13 178 parseSaveFile = pickFile.split('.')[0]+'_collect.pick'
DaveM@13 179 parseHoriscope(people,parseSaveFile)
DaveM@13 180 # horiscopeData = presentResults(parseSaveFile)
DaveM@13 181 # comRules = comp.parseCompatDef('compatibilityRules.csv')
DaveM@13 182 # applyCompatScore(horiscopeData,rules)
DaveM@13 183
DaveM@13 184 def _main():
DaveM@13 185 pickFile = 'outData.pick'
DaveM@13 186 # people = dict()
DaveM@13 187 if not os.path.exists(pickFile):
DaveM@13 188 print 'reParse file'
DaveM@13 189 people = makePeople('individuals.csv')
DaveM@13 190 savePick(pickFile,people)
DaveM@13 191 else:
DaveM@13 192 print 'read in ' + pickFile
DaveM@13 193 people = loadPick(pickFile)
DaveM@13 194 parseSaveFile = pickFile.split('.')[0]+'_collect.pick'
DaveM@13 195 parseHoriscope(people,parseSaveFile)
DaveM@13 196 horiscopeData = presentResults(parseSaveFile)
DaveM@13 197 comRules = comp.parseCompatDef('compatibilityRules.csv')
DaveM@13 198 applyCompatScore(horiscopeData,rules)
DaveM@13 199
DaveM@13 200 if __name__ == "__main__":
DaveM@13 201 testMain()