Mercurial > hg > horiscopes
comparison V4/runme.py @ 15:50a95089414d
updating to allow for all aspects to be calculated, and remove default value passing for people.
author | DaveM |
---|---|
date | Sun, 04 Mar 2018 17:09:50 +0000 |
parents | a0c217ee4168 |
children | b11cff4b7f83 |
comparison
equal
deleted
inserted
replaced
14:a0c217ee4168 | 15:50a95089414d |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 | |
3 | |
2 import dParse as dp | 4 import dParse as dp |
3 # import compatibility as comp | |
4 import synastry as syn | 5 import synastry as syn |
5 import requests | 6 import requests |
6 import re | |
7 import time | 7 import time |
8 import csv | 8 import csv |
9 import random | |
10 import pdb | 9 import pdb |
11 import os | 10 import os |
12 import pickle | 11 import pickle |
13 from HTMLParser import HTMLParser | 12 import sys |
14 # from lxml import html | |
15 from bs4 import BeautifulSoup | 13 from bs4 import BeautifulSoup |
16 | 14 |
17 def parsePage(resp): | 15 def parsePage(resp): |
16 gotLocation = 0 | |
18 horiscope = syn.planetPositions() | 17 horiscope = syn.planetPositions() |
19 soup = BeautifulSoup(resp.content, 'lxml') | 18 soup = BeautifulSoup(resp.content, 'lxml') |
20 tcCell = soup.find_all('div', attrs={'class':'right-sedy-banner-svetlejsi'}) | 19 tcCell = soup.find_all('div', attrs={'class':'right-sedy-banner-svetlejsi'}) |
21 for cell in tcCell: | 20 for cell in tcCell: |
21 if "Planets in partner's house" in cell.get_text(): | |
22 gotLocation = 1 | |
22 divList = cell.find_all('div') | 23 divList = cell.find_all('div') |
23 for i in range(len(divList)): | 24 for i in range(len(divList)): |
24 planetName = divList[i].getText().lower() | 25 planetName = divList[i].getText().lower().strip().replace(':','').split(' ')[0] |
25 if planetName in planetPositions.planetNames: | 26 if planetName in syn.planetPositions.planetNames: |
26 horiscope.planets[planetName].setLocation(divList[i+2].getText(),divList[i+4].getText()) | 27 if gotLocation and not '/' in planetName: |
28 horiscope.planets[planetName].setHouse(divList[i+2].getText(),divList[i+4].getText()) | |
29 else: | |
30 horiscope.planets[planetName].setLocation(divList[i+2].getText(),divList[i+4].getText()) | |
27 return horiscope | 31 return horiscope |
28 | 32 |
29 def makePeople(filename): | 33 def makePeople(filename): |
30 stream = csv.DictReader(open(filename,'rb')) | 34 stream = csv.DictReader(open(filename,'rb')) |
31 dictList = [] | 35 dictList = [] |
34 thisPerson = syn.Person(dp.regulateData(line)) | 38 thisPerson = syn.Person(dp.regulateData(line)) |
35 people.append(thisPerson) | 39 people.append(thisPerson) |
36 # pdb.set_trace() | 40 # pdb.set_trace() |
37 return people | 41 return people |
38 | 42 |
39 # def setURL(p): | |
40 # url = 'https://horoscopes.astro-seek.com/calculate-love-compatibility/' | |
41 # payload = dp.makePayload(p) | |
42 # return (url,payload) | |
43 | |
44 def requestURL(url,payload): | 43 def requestURL(url,payload): |
45 r = requests.get(url, params=payload) | 44 r = requests.get(url, params=payload) |
46 time.sleep(5) | 45 time.sleep(5) |
47 return r | 46 return r |
48 | |
49 # def makeURLPayload(url,payload): | |
50 # url += '?' | |
51 # for p in payload: | |
52 # url += '&' + str(p) | |
53 # url += '=' + str(payload[p]) | |
54 # return url | |
55 | |
56 # def printToFile(filename,data,removeAdds): | |
57 # if removeAdds == True: | |
58 # del data['DOB'] | |
59 # del data['TOB'] | |
60 # del data['pDOB'] | |
61 # del data['pTOB'] | |
62 # del data['COB'] | |
63 # del data['pCOB'] | |
64 # del data['horiscope'] | |
65 # # keys = data[0].keys() | |
66 # keys = [] | |
67 # for d in data: | |
68 # keys = keys + d.keys() | |
69 # keys = sorted(uniqueList(keys)) | |
70 # with open(filename,'w') as stream: | |
71 # dict_writer = csv.DictWriter(stream, keys, extrasaction='ignore') | |
72 # dict_writer.writeheader() | |
73 # dict_writer.writerows(data) | |
74 | 47 |
75 def loadPick(filename): | 48 def loadPick(filename): |
76 with open(filename, 'rb') as handle: | 49 with open(filename, 'rb') as handle: |
77 b = pickle.load(handle) | 50 b = pickle.load(handle) |
78 return b | 51 return b |
79 | 52 |
80 def savePick(filename,data): | 53 def savePick(filename,data): |
81 with open(filename, 'wb') as handle: | 54 with open(filename, 'wb') as handle: |
82 pickle.dump(data,handle) | 55 pickle.dump(data,handle) |
83 | 56 |
84 # def tempPF(fName,data): | |
85 # f__ = open(fName,'w') | |
86 # f__.write(data) | |
87 # f__.close() | |
88 | |
89 def parseHoriscope(people,saveFile): | 57 def parseHoriscope(people,saveFile): |
90 horiscopeList = [] | 58 horiscopeList = [] |
91 for person in people: | 59 for person in people: |
92 if person.p_dob is None or person.p_dob == '': | 60 issue = person.identifyIssues() |
93 print 'SKIPPING person '+ person.id + ' p_dob is None' | 61 if issue is not None: |
94 # person.horiscope = None | 62 print 'SKIPPING person '+ person.id + ' error with ' + issue |
95 # horiscopeList.append({'ID':person['ID']}) | |
96 else: | 63 else: |
97 print 'parsing person '+ person.id | 64 print 'parsing person '+ person.id |
98 parseTries = 3 | 65 person.makePayload() |
99 while parseTries > 0: | 66 person.resp = requestURL(person.url,person.payload) |
100 try: | 67 person.horiscope = parsePage(person.resp) |
101 person.makePayload() | 68 # person.horiscope.printPositions() |
102 resp = requestURL(person.url,person.payload) | 69 if saveFile is not None: |
103 person.horiscope = parsePage(resp) | 70 savePick(saveFile,people) |
104 pdb.set_trace() | |
105 parseTries = 0 | |
106 except: | |
107 print sys.exc_info()[0] | |
108 parseTries -= 1 | |
109 # for d in person.horiscope.keys(): | |
110 # person[d] = person['horiscope'][d] | |
111 # horiscopeList.append(person) | |
112 # if saveFile is not None: | |
113 # savePick(saveFile,horiscopeList) | |
114 # return horiscopeList | |
115 # savePick(pickFile,person) | |
116 # savePick('2'+pickFile,horiscopeList) | |
117 # printToFile('final_'+outFile,horiscopeList) | |
118 | |
119 # def printDict(d): | |
120 # for d_ in d: | |
121 # print (d,d_) | |
122 | |
123 # def refactorHoriscope(hor): | |
124 # d = {} | |
125 # d['ID'] = hor['ID'] | |
126 # for h in hor['horiscope']: | |
127 # hs = sorted(h) | |
128 # d[(hs[0], hs[1], hor['horiscope'][h][0])] = 1 | |
129 # d[(hs[0], hs[1])] = float(str(hor['horiscope'][h][1]) + '.' + str(hor['horiscope'][h][2])) | |
130 # return d | |
131 | |
132 # def uniqueList(seq): | |
133 # # order preserving | |
134 # noDupes = [] | |
135 # [noDupes.append(i) for i in seq if not noDupes.count(i)] | |
136 # return noDupes | |
137 | |
138 # def merge_two_dicts(x, y): | |
139 # z = x.copy() # start with x's keys and values | |
140 # z.update(y) # modifies z with y's keys and values & returns None | |
141 # return z | |
142 | |
143 # def findMissing(unique,keyList): | |
144 # missing = [] | |
145 # for u in unique: | |
146 # if u not in keyList: | |
147 # missing.append(u) | |
148 # return u | |
149 | |
150 # def presentResults(saveFile): | |
151 # data = [] | |
152 # data2 = [] | |
153 # hlist = loadPick(saveFile) | |
154 # keyList = [] | |
155 # for h in hlist: | |
156 # d = refactorHoriscope(h) | |
157 # keyList.append(d.keys()) | |
158 # data.append(d) | |
159 # uniqueKeys = uniqueList(keyList) | |
160 # # for da in data: | |
161 # # missingKeys = findMissing(uniqueKeys,da.keys()) | |
162 # # # pdb.set_trace() | |
163 # # d2 = dict(zip(missingKeys,[0]*len(missingKeys))) | |
164 # # da = merge_two_dicts(da,d2) | |
165 # # data2.append(da) | |
166 # return data | |
167 | |
168 | |
169 def newTest(): | |
170 people = makePeople('individuals.csv') | |
171 | |
172 | 71 |
173 def testMain(): | 72 def testMain(): |
174 pickFile = 'outData.pick' | 73 restartDataFile = 1 |
175 # people = makePeople('individuals.csv') | 74 if(restartDataFile): |
176 # savePick(pickFile,people) | 75 pickFile = 'outData.pick' |
177 people = loadPick(pickFile) | 76 # people = makePeople('individuals.csv') |
178 parseSaveFile = pickFile.split('.')[0]+'_collect.pick' | 77 # savePick(pickFile,people) |
179 parseHoriscope(people,parseSaveFile) | 78 people = loadPick(pickFile) |
79 parseSaveFile = pickFile.split('.')[0]+'_collect.pick' | |
80 parseHoriscope(people,parseSaveFile) | |
81 else: | |
82 people = loadPick('onlineDatacollect.pick') | |
83 for p in people: | |
84 if p.horiscope is None: | |
85 print p.id | |
86 else: | |
87 p.horiscope.calcAllAspects() | |
88 | |
180 # horiscopeData = presentResults(parseSaveFile) | 89 # horiscopeData = presentResults(parseSaveFile) |
181 # comRules = comp.parseCompatDef('compatibilityRules.csv') | 90 # comRules = comp.parseCompatDef('compatibilityRules.csv') |
182 # applyCompatScore(horiscopeData,rules) | 91 # applyCompatScore(horiscopeData,rules) |
183 | 92 |
184 def _main(): | 93 def _main(): |