comparison V4/runme.py @ 19:ae220e89cb3a

fixing parse bugs, and angle calculation bugs
author DaveM
date Tue, 06 Mar 2018 17:25:38 +0000
parents b11cff4b7f83
children 0264a7888d54
comparison
equal deleted inserted replaced
18:155126861c07 19:ae220e89cb3a
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 2
3 3
4 import dParse as dp 4 import dParse as dp
5 import synastry as syn 5 import synastry as syn
6 import requests 6 # import requests
7 import time 7 # import time
8 import csv 8 import csv
9 import pdb 9 import pdb
10 import os 10 import os
11 import pickle 11 import pickle
12 import sys 12 import sys
13 from bs4 import BeautifulSoup 13 from bs4 import BeautifulSoup
14 14
15 def parsePage(resp): 15 # def parsePage(resp):
16 gotLocation = 0 16 # gotLocation = 0
17 horiscope = syn.planetPositions() 17 # horiscope = syn.planetPositions()
18 soup = BeautifulSoup(resp.content, 'lxml') 18 # soup = BeautifulSoup(resp.content, 'lxml')
19 tcCell = soup.find_all('div', attrs={'class':'right-sedy-banner-svetlejsi'}) 19 # tcCell = soup.find_all('div', attrs={'class':'right-sedy-banner-svetlejsi'})
20 for cell in tcCell: 20 # for cell in tcCell:
21 if "Planets in partner's house" in cell.get_text(): 21 # if "Planets in partner's house" in cell.get_text():
22 gotLocation = 1 22 # gotLocation = 1
23 divList = cell.find_all('div') 23 # divList = cell.find_all('div')
24 for i in range(len(divList)): 24 # for i in range(len(divList)):
25 planetName = divList[i].getText().lower().strip().replace(':','').split(' ')[0] 25 # planetName = divList[i].getText().lower().strip().replace(':','').split(' ')[0]
26 if planetName in syn.planetPositions.planetNames: 26 # if planetName in syn.planetPositions.planetNames:
27 if gotLocation and not '/' in planetName: 27 # if gotLocation and not '/' in planetName:
28 horiscope.planets[planetName].setHouse(divList[i+2].getText(),divList[i+4].getText()) 28 # horiscope.planets[planetName].setHouse(divList[i+2].getText(),divList[i+4].getText())
29 else: 29 # else:
30 horiscope.planets[planetName].setLocation(divList[i+2].getText(),divList[i+4].getText()) 30 # horiscope.planets[planetName].setLocation(divList[i+2].getText(),divList[i+4].getText())
31 return horiscope 31 # return horiscope
32 32
33 def makePeople(filename): 33 def makePeople(filename):
34 stream = csv.DictReader(open(filename,'rb')) 34 stream = csv.DictReader(open(filename,'rb'))
35 dictList = [] 35 dictList = []
36 people = [] 36 people = []
38 thisPerson = syn.Person(dp.regulateData(line)) 38 thisPerson = syn.Person(dp.regulateData(line))
39 people.append(thisPerson) 39 people.append(thisPerson)
40 # pdb.set_trace() 40 # pdb.set_trace()
41 return people 41 return people
42 42
43 def requestURL(url,payload): 43 def uniquify(itemList):
44 r = requests.get(url, params=payload) 44 keyDict = {}
45 time.sleep(5) 45 for item in itemList:
46 return r 46 keyDict[item] = 1
47 return keyDict.keys()
48
49 def outputPeople(filename,people):
50 with open(filename, "wb") as csv_file:
51 dictKeys = []
52 for person in people:
53 if person.issue is None:
54 person.horiscope.calcAllAspects()
55 dictKeys += person.horiscope.aspect.keys()
56 dictKeys = uniquify(dictKeys)
57 writer = csv.DictWriter(csv_file, ['id']+dictKeys)
58 writer.writeheader()
59 for person in people:
60 if person.issue is None:
61 tempDict = {'id':person.id}
62 tempDict.update(person.horiscope.aspect)
63 # pdb.set_trace()
64 writer.writerow(tempDict)
65
66 def outputScores(filename,people):
67 with open(filename, "wb") as csv_file:
68 csv_file.write('Person ID, Score \n')
69 for person in people:
70 if person.issue is None:
71 csv_file.write(str(person.id)+','+str(person.score)+'\n')
72 else:
73 csv_file.write(str(person.id)+','+str(person.issue)+'\n')
74
75 def outputIssues(filename,people):
76 with open(filename, "wb") as csv_file:
77 csv_file.write('Person ID, Issue \n')
78 for person in people:
79 if person.issue is not None:
80 csv_file.write(str(person.id)+','+str(person.issue)+'\n')
81
82 # def requestURL(url,payload):
83 # r = requests.get(url, params=payload)
84 # time.sleep(5)
85 # return r
47 86
48 def loadPick(filename): 87 def loadPick(filename):
49 with open(filename, 'rb') as handle: 88 with open(filename, 'rb') as handle:
50 b = pickle.load(handle) 89 b = pickle.load(handle)
51 return b 90 return b
60 if issue is not None: 99 if issue is not None:
61 print 'SKIPPING person '+ person.id + ' error with ' + issue 100 print 'SKIPPING person '+ person.id + ' error with ' + issue
62 else: 101 else:
63 print 'parsing person '+ person.id 102 print 'parsing person '+ person.id
64 person.makePayload() 103 person.makePayload()
65 person.resp = requestURL(person.url,person.payload) 104 person.requestURL()
66 person.horiscope = parsePage(person.resp) 105 person.horiscope = person.parsePage()
67 # person.horiscope.printPositions() 106 # person.horiscope.printPositions()
68 if saveFile is not None: 107 if saveFile is not None:
69 savePick(saveFile,people) 108 savePick(saveFile,people)
70 109
71 # def testMain(): 110 # def testMain():
100 parseSaveFile = pickFile.split('.')[0]+'_collect.pick' 139 parseSaveFile = pickFile.split('.')[0]+'_collect.pick'
101 if not os.path.exists(parseSaveFile): 140 if not os.path.exists(parseSaveFile):
102 parseHoriscope(people,parseSaveFile) 141 parseHoriscope(people,parseSaveFile)
103 else: 142 else:
104 people = loadPick(parseSaveFile) 143 people = loadPick(parseSaveFile)
105 comp = syn.compatibility() 144 if not os.path.exists('fullResults.pick'):
106 comp.parseCompatRules('compatibilityRules.csv') 145 comp = syn.compatibility()
107 for person in people: 146 comp.parseCompatRules('compatibilityRules.csv')
108 person.score = None 147 for person in people:
109 if person.issue is None: 148 person.score = None
110 person.score = comp.calcCompatibility(person.horiscope) 149 if person.issue is None:
111 if person.score is None: 150 person.score = comp.calcCompatibility(person.horiscope)
112 person.issue = 'None Planet Locations' 151 if person.score is None:
113 savePick('fullResults.pick',people) 152 person.issue = 'None Planet Locations'
153 # pdb.set_trace()
154 f = open('issues/'+str(person.id)+'.html','w')
155 f.write(person.resp.content)
156 f.close()
157 savePick('fullResults.pick',people)
158 else:
159 people = loadPick('fullResults.pick')
160 outputPeople('fullResult.csv',people)
161 outputScores('scores.csv',people)
114 162
115 if __name__ == "__main__": 163 if __name__ == "__main__":
116 _main() 164 _main()