Mercurial > hg > horiscopes
comparison V4/runme.py @ 19:ae220e89cb3a
fixing parse bugs, and angle calculation bugs
author | DaveM |
---|---|
date | Tue, 06 Mar 2018 17:25:38 +0000 |
parents | b11cff4b7f83 |
children | 0264a7888d54 |
comparison
equal
deleted
inserted
replaced
18:155126861c07 | 19:ae220e89cb3a |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 | 2 |
3 | 3 |
4 import dParse as dp | 4 import dParse as dp |
5 import synastry as syn | 5 import synastry as syn |
6 import requests | 6 # import requests |
7 import time | 7 # import time |
8 import csv | 8 import csv |
9 import pdb | 9 import pdb |
10 import os | 10 import os |
11 import pickle | 11 import pickle |
12 import sys | 12 import sys |
13 from bs4 import BeautifulSoup | 13 from bs4 import BeautifulSoup |
14 | 14 |
15 def parsePage(resp): | 15 # def parsePage(resp): |
16 gotLocation = 0 | 16 # gotLocation = 0 |
17 horiscope = syn.planetPositions() | 17 # horiscope = syn.planetPositions() |
18 soup = BeautifulSoup(resp.content, 'lxml') | 18 # soup = BeautifulSoup(resp.content, 'lxml') |
19 tcCell = soup.find_all('div', attrs={'class':'right-sedy-banner-svetlejsi'}) | 19 # tcCell = soup.find_all('div', attrs={'class':'right-sedy-banner-svetlejsi'}) |
20 for cell in tcCell: | 20 # for cell in tcCell: |
21 if "Planets in partner's house" in cell.get_text(): | 21 # if "Planets in partner's house" in cell.get_text(): |
22 gotLocation = 1 | 22 # gotLocation = 1 |
23 divList = cell.find_all('div') | 23 # divList = cell.find_all('div') |
24 for i in range(len(divList)): | 24 # for i in range(len(divList)): |
25 planetName = divList[i].getText().lower().strip().replace(':','').split(' ')[0] | 25 # planetName = divList[i].getText().lower().strip().replace(':','').split(' ')[0] |
26 if planetName in syn.planetPositions.planetNames: | 26 # if planetName in syn.planetPositions.planetNames: |
27 if gotLocation and not '/' in planetName: | 27 # if gotLocation and not '/' in planetName: |
28 horiscope.planets[planetName].setHouse(divList[i+2].getText(),divList[i+4].getText()) | 28 # horiscope.planets[planetName].setHouse(divList[i+2].getText(),divList[i+4].getText()) |
29 else: | 29 # else: |
30 horiscope.planets[planetName].setLocation(divList[i+2].getText(),divList[i+4].getText()) | 30 # horiscope.planets[planetName].setLocation(divList[i+2].getText(),divList[i+4].getText()) |
31 return horiscope | 31 # return horiscope |
32 | 32 |
33 def makePeople(filename): | 33 def makePeople(filename): |
34 stream = csv.DictReader(open(filename,'rb')) | 34 stream = csv.DictReader(open(filename,'rb')) |
35 dictList = [] | 35 dictList = [] |
36 people = [] | 36 people = [] |
38 thisPerson = syn.Person(dp.regulateData(line)) | 38 thisPerson = syn.Person(dp.regulateData(line)) |
39 people.append(thisPerson) | 39 people.append(thisPerson) |
40 # pdb.set_trace() | 40 # pdb.set_trace() |
41 return people | 41 return people |
42 | 42 |
43 def requestURL(url,payload): | 43 def uniquify(itemList): |
44 r = requests.get(url, params=payload) | 44 keyDict = {} |
45 time.sleep(5) | 45 for item in itemList: |
46 return r | 46 keyDict[item] = 1 |
47 return keyDict.keys() | |
48 | |
49 def outputPeople(filename,people): | |
50 with open(filename, "wb") as csv_file: | |
51 dictKeys = [] | |
52 for person in people: | |
53 if person.issue is None: | |
54 person.horiscope.calcAllAspects() | |
55 dictKeys += person.horiscope.aspect.keys() | |
56 dictKeys = uniquify(dictKeys) | |
57 writer = csv.DictWriter(csv_file, ['id']+dictKeys) | |
58 writer.writeheader() | |
59 for person in people: | |
60 if person.issue is None: | |
61 tempDict = {'id':person.id} | |
62 tempDict.update(person.horiscope.aspect) | |
63 # pdb.set_trace() | |
64 writer.writerow(tempDict) | |
65 | |
66 def outputScores(filename,people): | |
67 with open(filename, "wb") as csv_file: | |
68 csv_file.write('Person ID, Score \n') | |
69 for person in people: | |
70 if person.issue is None: | |
71 csv_file.write(str(person.id)+','+str(person.score)+'\n') | |
72 else: | |
73 csv_file.write(str(person.id)+','+str(person.issue)+'\n') | |
74 | |
75 def outputIssues(filename,people): | |
76 with open(filename, "wb") as csv_file: | |
77 csv_file.write('Person ID, Issue \n') | |
78 for person in people: | |
79 if person.issue is not None: | |
80 csv_file.write(str(person.id)+','+str(person.issue)+'\n') | |
81 | |
82 # def requestURL(url,payload): | |
83 # r = requests.get(url, params=payload) | |
84 # time.sleep(5) | |
85 # return r | |
47 | 86 |
48 def loadPick(filename): | 87 def loadPick(filename): |
49 with open(filename, 'rb') as handle: | 88 with open(filename, 'rb') as handle: |
50 b = pickle.load(handle) | 89 b = pickle.load(handle) |
51 return b | 90 return b |
60 if issue is not None: | 99 if issue is not None: |
61 print 'SKIPPING person '+ person.id + ' error with ' + issue | 100 print 'SKIPPING person '+ person.id + ' error with ' + issue |
62 else: | 101 else: |
63 print 'parsing person '+ person.id | 102 print 'parsing person '+ person.id |
64 person.makePayload() | 103 person.makePayload() |
65 person.resp = requestURL(person.url,person.payload) | 104 person.requestURL() |
66 person.horiscope = parsePage(person.resp) | 105 person.horiscope = person.parsePage() |
67 # person.horiscope.printPositions() | 106 # person.horiscope.printPositions() |
68 if saveFile is not None: | 107 if saveFile is not None: |
69 savePick(saveFile,people) | 108 savePick(saveFile,people) |
70 | 109 |
71 # def testMain(): | 110 # def testMain(): |
100 parseSaveFile = pickFile.split('.')[0]+'_collect.pick' | 139 parseSaveFile = pickFile.split('.')[0]+'_collect.pick' |
101 if not os.path.exists(parseSaveFile): | 140 if not os.path.exists(parseSaveFile): |
102 parseHoriscope(people,parseSaveFile) | 141 parseHoriscope(people,parseSaveFile) |
103 else: | 142 else: |
104 people = loadPick(parseSaveFile) | 143 people = loadPick(parseSaveFile) |
105 comp = syn.compatibility() | 144 if not os.path.exists('fullResults.pick'): |
106 comp.parseCompatRules('compatibilityRules.csv') | 145 comp = syn.compatibility() |
107 for person in people: | 146 comp.parseCompatRules('compatibilityRules.csv') |
108 person.score = None | 147 for person in people: |
109 if person.issue is None: | 148 person.score = None |
110 person.score = comp.calcCompatibility(person.horiscope) | 149 if person.issue is None: |
111 if person.score is None: | 150 person.score = comp.calcCompatibility(person.horiscope) |
112 person.issue = 'None Planet Locations' | 151 if person.score is None: |
113 savePick('fullResults.pick',people) | 152 person.issue = 'None Planet Locations' |
153 # pdb.set_trace() | |
154 f = open('issues/'+str(person.id)+'.html','w') | |
155 f.write(person.resp.content) | |
156 f.close() | |
157 savePick('fullResults.pick',people) | |
158 else: | |
159 people = loadPick('fullResults.pick') | |
160 outputPeople('fullResult.csv',people) | |
161 outputScores('scores.csv',people) | |
114 | 162 |
115 if __name__ == "__main__": | 163 if __name__ == "__main__": |
116 _main() | 164 _main() |