comparison V5/runme.py @ 23:11d4e438045e

make version 5
author DaveM
date Mon, 09 Apr 2018 15:07:21 +0100
parents
children d2bd074d9284
comparison
equal deleted inserted replaced
22:a5b8e2b91d8f 23:11d4e438045e
1 #!/usr/bin/env python
2
3
4 import dParse as dp
5 import synastry as syn
6 # import requests
7 # import time
8 import csv
9 import pdb
10 import os
11 import pickle
12 import sys
13 from bs4 import BeautifulSoup
14
15 # def parsePage(resp):
16 # gotLocation = 0
17 # horiscope = syn.planetPositions()
18 # soup = BeautifulSoup(resp.content, 'lxml')
19 # tcCell = soup.find_all('div', attrs={'class':'right-sedy-banner-svetlejsi'})
20 # for cell in tcCell:
21 # if "Planets in partner's house" in cell.get_text():
22 # gotLocation = 1
23 # divList = cell.find_all('div')
24 # for i in range(len(divList)):
25 # planetName = divList[i].getText().lower().strip().replace(':','').split(' ')[0]
26 # if planetName in syn.planetPositions.planetNames:
27 # if gotLocation and not '/' in planetName:
28 # horiscope.planets[planetName].setHouse(divList[i+2].getText(),divList[i+4].getText())
29 # else:
30 # horiscope.planets[planetName].setLocation(divList[i+2].getText(),divList[i+4].getText())
31 # return horiscope
32
33 def makePeople(filename):
34 stream = csv.DictReader(open(filename,'rb'))
35 dictList = []
36 people = []
37 for line in stream:
38 thisPerson = syn.Person(dp.regulateData(line))
39 people.append(thisPerson)
40 # pdb.set_trace()
41 return people
42
43 def uniquify(itemList):
44 keyDict = {}
45 for item in itemList:
46 keyDict[item] = 1
47 return sorted(keyDict.keys())
48
49 def outputPeople(filename,people):
50 with open(filename, "wb") as csv_file:
51 dictKeys = []
52 for person in people:
53 if person.issue is None:
54 person.horiscope.makeAllAspectTreple()
55 dictKeys += person.horiscope.aspectTreple.keys()
56 dictKeys = uniquify(dictKeys)
57 writer = csv.DictWriter(csv_file, ['id','score']+dictKeys)
58 writer.writeheader()
59 for person in people:
60 if person.issue is None:
61 tempDict = {'id':person.id,'score':person.score}
62 tempDict.update(person.horiscope.aspectTreple)
63 # pdb.set_trace()
64 writer.writerow(tempDict)
65
66 def outputScores(filename,people):
67 with open(filename, "wb") as csv_file:
68 csv_file.write('Person ID, Score \n')
69 for person in people:
70 if person.issue is None:
71 csv_file.write(str(person.id)+','+str(person.score)+'\n')
72 else:
73 csv_file.write(str(person.id)+','+str(person.issue)+'\n')
74
75 def outputIssues(filename,people):
76 with open(filename, "wb") as csv_file:
77 csv_file.write('Person ID, Issue \n')
78 for person in people:
79 if person.issue is not None:
80 csv_file.write(str(person.id)+','+str(person.issue)+'\n')
81
82 # def requestURL(url,payload):
83 # r = requests.get(url, params=payload)
84 # time.sleep(5)
85 # return r
86
87 # def loadPickUpdate(filename):
88
89 def loadPick(filename):
90 with open(filename, 'rb') as handle:
91 b = pickle.load(handle)
92 return b
93
94 def savePick(filename,data):
95 with open(filename, 'wb') as handle:
96 pickle.dump(data,handle)
97
98 def parseHoriscope(people,saveFile):
99 rawData = {}
100 for person in people:
101 issue = person.identifyIssues()
102 if issue is not None:
103 print 'SKIPPING person '+ person.id + ' error with ' + issue
104 else:
105 print 'parsing person '+ person.id
106 if person.resp is None:
107 print 'Posting Request for person '+ person.id
108 person.makePayload()
109 rawData[person.id] = person.requestURL()
110 print 'parsing person '+ person.id
111 person.horiscope = person.parsePage()
112 # person.horiscope.printPositions()
113 if saveFile is not None:
114 savePick('raw_'+saveFile,rawData)
115 savePick(saveFile,people)
116
117 def parseHoriscopeRaw(people,saveFile):
118 rawData = loadPick('raw_'+saveFile)
119 for person in people:
120 issue = person.identifyIssues()
121 if issue is not None:
122 print 'SKIPPING person '+ person.id + ' error with ' + issue
123 else:
124 print 'parsing person '+ person.id
125 if person.resp is None:
126 print 'Posting Request for person '+ person.id
127 person.makePayload()
128 person.resp = rawData[person.id]
129 print 'parsing person '+ person.id
130 person.horiscope = person.parsePage()
131 # person.horiscope.printPositions()
132 if saveFile is not None:
133 savePick(saveFile,people)
134
135 # def testMain():
136 # pickFile = 'outData.pick'
137 # restartDataFile = 0
138 # if(restartDataFile):
139 # # people = makePeople('individuals.csv')
140 # # savePick(pickFile,people)
141 # people = loadPick(pickFile)
142 # parseSaveFile = pickFile.split('.')[0]+'_collect.pick'
143 # parseHoriscope(people,parseSaveFile)
144 # else:
145 # people = loadPick('outData_collect.pick')
146 # comp = syn.compatibility()
147 # comp.parseCompatRules('compatibilityRules.csv')
148 # for person in people:
149 # if person.issue is None:
150 # person.score = comp.calcCompatibility(person.horiscope)
151 # if person.score is None:
152 # person.issue = 'None Planet Locations'
153 # else:
154 # print person.id,person.score
155 # pdb.set_trace()
156
157 def _main():
158 pickFile = 'outData.pick'
159 if not os.path.exists(pickFile):
160 people = makePeople('individuals.csv')
161 savePick(pickFile,people)
162 else:
163 people = loadPick(pickFile)
164 parseSaveFile = pickFile.split('.')[0]+'_collect.pick'
165 if not os.path.exists(parseSaveFile):
166 if os.path.exists('raw_'+parseSaveFile):
167 parseHoriscopeRaw(people,parseSaveFile)
168 else:
169 parseHoriscope(people,parseSaveFile)
170 else:
171 people = loadPick(parseSaveFile)
172 # if not os.path.exists('fullResults.pick'):
173 if not os.path.exists('fullResults.pick'):
174 comp = syn.compatibility()
175 comp.parseCompatRules('compatibilityRules.csv')
176 for person in people:
177 print person.id
178 person.score = None
179 if person.issue is None:
180 person.score = comp.calcCompatibility(person.horiscope)
181 if person.score is None:
182 person.issue = 'None Planet Locations'
183 # pdb.set_trace()
184 f = open('issues/'+str(person.id)+'.html','w')
185 f.write(person.resp.content)
186 f.close()
187 savePick('fullResults.pick',people)
188 else:
189 people = loadPick('fullResults.pick')
190 outputPeople('fullResult.csv',people)
191 outputScores('scores.csv',people)
192 outputIssues('issues.csv',people)
193
194 if __name__ == "__main__":
195 _main()