annotate V4/runme.py @ 13:b253748dbb11

developing V4 - Class based structure, with self calcuation of all aspects
author DaveM
date Sun, 04 Mar 2018 14:51:43 +0000
parents
children a0c217ee4168
rev   line source
DaveM@13 1 #!/usr/bin/env python
DaveM@13 2 import dParse as dp
DaveM@13 3 # import compatibility as comp
DaveM@13 4 import synastry as syn
DaveM@13 5 import requests
DaveM@13 6 import re
DaveM@13 7 import time
DaveM@13 8 import csv
DaveM@13 9 import random
DaveM@13 10 import pdb
DaveM@13 11 import os
DaveM@13 12 import pickle
DaveM@13 13 from HTMLParser import HTMLParser
DaveM@13 14 # from lxml import html
DaveM@13 15 from bs4 import BeautifulSoup
DaveM@13 16
DaveM@13 17 def parsePage(horiscope, resp):
DaveM@13 18 horiscope = syn.planetPositions()
DaveM@13 19 soup = BeautifulSoup(resp.content, 'lxml')
DaveM@13 20 tcCell = soup.find_all('div', attrs={'class':'right-sedy-banner-svetlejsi'})
DaveM@13 21 for cell in tcCell:
DaveM@13 22 divList = cell.find_all('div')
DaveM@13 23 for i in range(len(divList)):
DaveM@13 24 planetName = divList[i].getText().lower()
DaveM@13 25 if planetName in planetPositions.planetNames:
DaveM@13 26 horiscope.planets[planetName].setLocation(divList[i+2].getText(),divList[i+4].getText())
DaveM@13 27
DaveM@13 28 def makePeople(filename):
DaveM@13 29 stream = csv.DictReader(open(filename,'rb'))
DaveM@13 30 dictList = []
DaveM@13 31 people = []
DaveM@13 32 for line in stream:
DaveM@13 33 thisPerson = syn.Person(dp.regulateData(line))
DaveM@13 34 people.append(thisPerson)
DaveM@13 35 # pdb.set_trace()
DaveM@13 36 return people
DaveM@13 37
DaveM@13 38 # def setURL(p):
DaveM@13 39 # url = 'https://horoscopes.astro-seek.com/calculate-love-compatibility/'
DaveM@13 40 # payload = dp.makePayload(p)
DaveM@13 41 # return (url,payload)
DaveM@13 42
DaveM@13 43 def requestURL(url,payload):
DaveM@13 44 r = requests.get(url, params=payload)
DaveM@13 45 time.sleep(5)
DaveM@13 46 return r
DaveM@13 47
DaveM@13 48 # def makeURLPayload(url,payload):
DaveM@13 49 # url += '?'
DaveM@13 50 # for p in payload:
DaveM@13 51 # url += '&' + str(p)
DaveM@13 52 # url += '=' + str(payload[p])
DaveM@13 53 # return url
DaveM@13 54
DaveM@13 55 # def printToFile(filename,data,removeAdds):
DaveM@13 56 # if removeAdds == True:
DaveM@13 57 # del data['DOB']
DaveM@13 58 # del data['TOB']
DaveM@13 59 # del data['pDOB']
DaveM@13 60 # del data['pTOB']
DaveM@13 61 # del data['COB']
DaveM@13 62 # del data['pCOB']
DaveM@13 63 # del data['horiscope']
DaveM@13 64 # # keys = data[0].keys()
DaveM@13 65 # keys = []
DaveM@13 66 # for d in data:
DaveM@13 67 # keys = keys + d.keys()
DaveM@13 68 # keys = sorted(uniqueList(keys))
DaveM@13 69 # with open(filename,'w') as stream:
DaveM@13 70 # dict_writer = csv.DictWriter(stream, keys, extrasaction='ignore')
DaveM@13 71 # dict_writer.writeheader()
DaveM@13 72 # dict_writer.writerows(data)
DaveM@13 73
DaveM@13 74 def loadPick(filename):
DaveM@13 75 with open(filename, 'rb') as handle:
DaveM@13 76 b = pickle.load(handle)
DaveM@13 77 return b
DaveM@13 78
DaveM@13 79 def savePick(filename,data):
DaveM@13 80 with open(filename, 'wb') as handle:
DaveM@13 81 pickle.dump(data,handle)
DaveM@13 82
DaveM@13 83 # def tempPF(fName,data):
DaveM@13 84 # f__ = open(fName,'w')
DaveM@13 85 # f__.write(data)
DaveM@13 86 # f__.close()
DaveM@13 87
DaveM@13 88 def parseHoriscope(people,saveFile):
DaveM@13 89 horiscopeList = []
DaveM@13 90 for person in people:
DaveM@13 91 if person.p_dob is None or person.p_dob == '':
DaveM@13 92 print 'SKIPPING person '+ person.id + ' p_dob is None'
DaveM@13 93 # person.horiscope = None
DaveM@13 94 # horiscopeList.append({'ID':person['ID']})
DaveM@13 95 else:
DaveM@13 96 print 'parsing person '+ person.id
DaveM@13 97 parseTries = 3
DaveM@13 98 while parseTries > 0:
DaveM@13 99 try:
DaveM@13 100 person.makePayload()
DaveM@13 101 resp = requestURL(person.url,person.payload)
DaveM@13 102 parsePage(person.horiscope,resp)
DaveM@13 103 pdb.set_trace()
DaveM@13 104 parseTries = 0
DaveM@13 105 except:
DaveM@13 106 print sys.exc_info()[0]
DaveM@13 107 parseTries -= 1
DaveM@13 108 # for d in person.horiscope.keys():
DaveM@13 109 # person[d] = person['horiscope'][d]
DaveM@13 110 # horiscopeList.append(person)
DaveM@13 111 # if saveFile is not None:
DaveM@13 112 # savePick(saveFile,horiscopeList)
DaveM@13 113 # return horiscopeList
DaveM@13 114 # savePick(pickFile,person)
DaveM@13 115 # savePick('2'+pickFile,horiscopeList)
DaveM@13 116 # printToFile('final_'+outFile,horiscopeList)
DaveM@13 117
DaveM@13 118 # def printDict(d):
DaveM@13 119 # for d_ in d:
DaveM@13 120 # print (d,d_)
DaveM@13 121
DaveM@13 122 # def refactorHoriscope(hor):
DaveM@13 123 # d = {}
DaveM@13 124 # d['ID'] = hor['ID']
DaveM@13 125 # for h in hor['horiscope']:
DaveM@13 126 # hs = sorted(h)
DaveM@13 127 # d[(hs[0], hs[1], hor['horiscope'][h][0])] = 1
DaveM@13 128 # d[(hs[0], hs[1])] = float(str(hor['horiscope'][h][1]) + '.' + str(hor['horiscope'][h][2]))
DaveM@13 129 # return d
DaveM@13 130
DaveM@13 131 # def uniqueList(seq):
DaveM@13 132 # # order preserving
DaveM@13 133 # noDupes = []
DaveM@13 134 # [noDupes.append(i) for i in seq if not noDupes.count(i)]
DaveM@13 135 # return noDupes
DaveM@13 136
DaveM@13 137 # def merge_two_dicts(x, y):
DaveM@13 138 # z = x.copy() # start with x's keys and values
DaveM@13 139 # z.update(y) # modifies z with y's keys and values & returns None
DaveM@13 140 # return z
DaveM@13 141
DaveM@13 142 # def findMissing(unique,keyList):
DaveM@13 143 # missing = []
DaveM@13 144 # for u in unique:
DaveM@13 145 # if u not in keyList:
DaveM@13 146 # missing.append(u)
DaveM@13 147 # return u
DaveM@13 148
DaveM@13 149 # def presentResults(saveFile):
DaveM@13 150 # data = []
DaveM@13 151 # data2 = []
DaveM@13 152 # hlist = loadPick(saveFile)
DaveM@13 153 # keyList = []
DaveM@13 154 # for h in hlist:
DaveM@13 155 # d = refactorHoriscope(h)
DaveM@13 156 # keyList.append(d.keys())
DaveM@13 157 # data.append(d)
DaveM@13 158 # uniqueKeys = uniqueList(keyList)
DaveM@13 159 # # for da in data:
DaveM@13 160 # # missingKeys = findMissing(uniqueKeys,da.keys())
DaveM@13 161 # # # pdb.set_trace()
DaveM@13 162 # # d2 = dict(zip(missingKeys,[0]*len(missingKeys)))
DaveM@13 163 # # da = merge_two_dicts(da,d2)
DaveM@13 164 # # data2.append(da)
DaveM@13 165 # return data
DaveM@13 166
DaveM@13 167
DaveM@13 168 def newTest():
DaveM@13 169 people = makePeople('individuals.csv')
DaveM@13 170
DaveM@13 171
DaveM@13 172 def testMain():
DaveM@13 173 pickFile = 'outData.pick'
DaveM@13 174 # people = makePeople('individuals.csv')
DaveM@13 175 # savePick(pickFile,people)
DaveM@13 176 people = loadPick(pickFile)
DaveM@13 177 parseSaveFile = pickFile.split('.')[0]+'_collect.pick'
DaveM@13 178 parseHoriscope(people,parseSaveFile)
DaveM@13 179 # horiscopeData = presentResults(parseSaveFile)
DaveM@13 180 # comRules = comp.parseCompatDef('compatibilityRules.csv')
DaveM@13 181 # applyCompatScore(horiscopeData,rules)
DaveM@13 182
DaveM@13 183 def _main():
DaveM@13 184 pickFile = 'outData.pick'
DaveM@13 185 # people = dict()
DaveM@13 186 if not os.path.exists(pickFile):
DaveM@13 187 print 'reParse file'
DaveM@13 188 people = makePeople('individuals.csv')
DaveM@13 189 savePick(pickFile,people)
DaveM@13 190 else:
DaveM@13 191 print 'read in ' + pickFile
DaveM@13 192 people = loadPick(pickFile)
DaveM@13 193 parseSaveFile = pickFile.split('.')[0]+'_collect.pick'
DaveM@13 194 parseHoriscope(people,parseSaveFile)
DaveM@13 195 horiscopeData = presentResults(parseSaveFile)
DaveM@13 196 comRules = comp.parseCompatDef('compatibilityRules.csv')
DaveM@13 197 applyCompatScore(horiscopeData,rules)
DaveM@13 198
DaveM@13 199 if __name__ == "__main__":
DaveM@13 200 testMain()