Mercurial > hg > horiscopes
comparison V4/runme.py @ 13:b253748dbb11
developing V4 - Class based structure, with self calcuation of all aspects
author | DaveM |
---|---|
date | Sun, 04 Mar 2018 14:51:43 +0000 |
parents | |
children | a0c217ee4168 |
comparison
equal
deleted
inserted
replaced
12:18e337b2550d | 13:b253748dbb11 |
---|---|
1 #!/usr/bin/env python | |
2 import dParse as dp | |
3 # import compatibility as comp | |
4 import synastry as syn | |
5 import requests | |
6 import re | |
7 import time | |
8 import csv | |
9 import random | |
10 import pdb | |
11 import os | |
12 import pickle | |
13 from HTMLParser import HTMLParser | |
14 # from lxml import html | |
15 from bs4 import BeautifulSoup | |
16 | |
17 def parsePage(horiscope, resp): | |
18 horiscope = syn.planetPositions() | |
19 soup = BeautifulSoup(resp.content, 'lxml') | |
20 tcCell = soup.find_all('div', attrs={'class':'right-sedy-banner-svetlejsi'}) | |
21 for cell in tcCell: | |
22 divList = cell.find_all('div') | |
23 for i in range(len(divList)): | |
24 planetName = divList[i].getText().lower() | |
25 if planetName in planetPositions.planetNames: | |
26 horiscope.planets[planetName].setLocation(divList[i+2].getText(),divList[i+4].getText()) | |
27 | |
28 def makePeople(filename): | |
29 stream = csv.DictReader(open(filename,'rb')) | |
30 dictList = [] | |
31 people = [] | |
32 for line in stream: | |
33 thisPerson = syn.Person(dp.regulateData(line)) | |
34 people.append(thisPerson) | |
35 # pdb.set_trace() | |
36 return people | |
37 | |
38 # def setURL(p): | |
39 # url = 'https://horoscopes.astro-seek.com/calculate-love-compatibility/' | |
40 # payload = dp.makePayload(p) | |
41 # return (url,payload) | |
42 | |
43 def requestURL(url,payload): | |
44 r = requests.get(url, params=payload) | |
45 time.sleep(5) | |
46 return r | |
47 | |
48 # def makeURLPayload(url,payload): | |
49 # url += '?' | |
50 # for p in payload: | |
51 # url += '&' + str(p) | |
52 # url += '=' + str(payload[p]) | |
53 # return url | |
54 | |
55 # def printToFile(filename,data,removeAdds): | |
56 # if removeAdds == True: | |
57 # del data['DOB'] | |
58 # del data['TOB'] | |
59 # del data['pDOB'] | |
60 # del data['pTOB'] | |
61 # del data['COB'] | |
62 # del data['pCOB'] | |
63 # del data['horiscope'] | |
64 # # keys = data[0].keys() | |
65 # keys = [] | |
66 # for d in data: | |
67 # keys = keys + d.keys() | |
68 # keys = sorted(uniqueList(keys)) | |
69 # with open(filename,'w') as stream: | |
70 # dict_writer = csv.DictWriter(stream, keys, extrasaction='ignore') | |
71 # dict_writer.writeheader() | |
72 # dict_writer.writerows(data) | |
73 | |
74 def loadPick(filename): | |
75 with open(filename, 'rb') as handle: | |
76 b = pickle.load(handle) | |
77 return b | |
78 | |
79 def savePick(filename,data): | |
80 with open(filename, 'wb') as handle: | |
81 pickle.dump(data,handle) | |
82 | |
83 # def tempPF(fName,data): | |
84 # f__ = open(fName,'w') | |
85 # f__.write(data) | |
86 # f__.close() | |
87 | |
88 def parseHoriscope(people,saveFile): | |
89 horiscopeList = [] | |
90 for person in people: | |
91 if person.p_dob is None or person.p_dob == '': | |
92 print 'SKIPPING person '+ person.id + ' p_dob is None' | |
93 # person.horiscope = None | |
94 # horiscopeList.append({'ID':person['ID']}) | |
95 else: | |
96 print 'parsing person '+ person.id | |
97 parseTries = 3 | |
98 while parseTries > 0: | |
99 try: | |
100 person.makePayload() | |
101 resp = requestURL(person.url,person.payload) | |
102 parsePage(person.horiscope,resp) | |
103 pdb.set_trace() | |
104 parseTries = 0 | |
105 except: | |
106 print sys.exc_info()[0] | |
107 parseTries -= 1 | |
108 # for d in person.horiscope.keys(): | |
109 # person[d] = person['horiscope'][d] | |
110 # horiscopeList.append(person) | |
111 # if saveFile is not None: | |
112 # savePick(saveFile,horiscopeList) | |
113 # return horiscopeList | |
114 # savePick(pickFile,person) | |
115 # savePick('2'+pickFile,horiscopeList) | |
116 # printToFile('final_'+outFile,horiscopeList) | |
117 | |
118 # def printDict(d): | |
119 # for d_ in d: | |
120 # print (d,d_) | |
121 | |
122 # def refactorHoriscope(hor): | |
123 # d = {} | |
124 # d['ID'] = hor['ID'] | |
125 # for h in hor['horiscope']: | |
126 # hs = sorted(h) | |
127 # d[(hs[0], hs[1], hor['horiscope'][h][0])] = 1 | |
128 # d[(hs[0], hs[1])] = float(str(hor['horiscope'][h][1]) + '.' + str(hor['horiscope'][h][2])) | |
129 # return d | |
130 | |
131 # def uniqueList(seq): | |
132 # # order preserving | |
133 # noDupes = [] | |
134 # [noDupes.append(i) for i in seq if not noDupes.count(i)] | |
135 # return noDupes | |
136 | |
137 # def merge_two_dicts(x, y): | |
138 # z = x.copy() # start with x's keys and values | |
139 # z.update(y) # modifies z with y's keys and values & returns None | |
140 # return z | |
141 | |
142 # def findMissing(unique,keyList): | |
143 # missing = [] | |
144 # for u in unique: | |
145 # if u not in keyList: | |
146 # missing.append(u) | |
147 # return u | |
148 | |
149 # def presentResults(saveFile): | |
150 # data = [] | |
151 # data2 = [] | |
152 # hlist = loadPick(saveFile) | |
153 # keyList = [] | |
154 # for h in hlist: | |
155 # d = refactorHoriscope(h) | |
156 # keyList.append(d.keys()) | |
157 # data.append(d) | |
158 # uniqueKeys = uniqueList(keyList) | |
159 # # for da in data: | |
160 # # missingKeys = findMissing(uniqueKeys,da.keys()) | |
161 # # # pdb.set_trace() | |
162 # # d2 = dict(zip(missingKeys,[0]*len(missingKeys))) | |
163 # # da = merge_two_dicts(da,d2) | |
164 # # data2.append(da) | |
165 # return data | |
166 | |
167 | |
168 def newTest(): | |
169 people = makePeople('individuals.csv') | |
170 | |
171 | |
172 def testMain(): | |
173 pickFile = 'outData.pick' | |
174 # people = makePeople('individuals.csv') | |
175 # savePick(pickFile,people) | |
176 people = loadPick(pickFile) | |
177 parseSaveFile = pickFile.split('.')[0]+'_collect.pick' | |
178 parseHoriscope(people,parseSaveFile) | |
179 # horiscopeData = presentResults(parseSaveFile) | |
180 # comRules = comp.parseCompatDef('compatibilityRules.csv') | |
181 # applyCompatScore(horiscopeData,rules) | |
182 | |
183 def _main(): | |
184 pickFile = 'outData.pick' | |
185 # people = dict() | |
186 if not os.path.exists(pickFile): | |
187 print 'reParse file' | |
188 people = makePeople('individuals.csv') | |
189 savePick(pickFile,people) | |
190 else: | |
191 print 'read in ' + pickFile | |
192 people = loadPick(pickFile) | |
193 parseSaveFile = pickFile.split('.')[0]+'_collect.pick' | |
194 parseHoriscope(people,parseSaveFile) | |
195 horiscopeData = presentResults(parseSaveFile) | |
196 comRules = comp.parseCompatDef('compatibilityRules.csv') | |
197 applyCompatScore(horiscopeData,rules) | |
198 | |
199 if __name__ == "__main__": | |
200 testMain() |