DaveM@23
|
1 #!/usr/bin/env python
|
DaveM@23
|
2
|
DaveM@23
|
3
|
DaveM@23
|
4 import dParse as dp
|
DaveM@23
|
5 import synastry as syn
|
DaveM@23
|
6 # import requests
|
DaveM@23
|
7 # import time
|
DaveM@23
|
8 import csv
|
DaveM@23
|
9 import pdb
|
DaveM@23
|
10 import os
|
DaveM@23
|
11 import pickle
|
DaveM@23
|
12 import sys
|
DaveM@23
|
13 from bs4 import BeautifulSoup
|
DaveM@23
|
14
|
DaveM@23
|
15 # def parsePage(resp):
|
DaveM@23
|
16 # gotLocation = 0
|
DaveM@23
|
17 # horiscope = syn.planetPositions()
|
DaveM@23
|
18 # soup = BeautifulSoup(resp.content, 'lxml')
|
DaveM@23
|
19 # tcCell = soup.find_all('div', attrs={'class':'right-sedy-banner-svetlejsi'})
|
DaveM@23
|
20 # for cell in tcCell:
|
DaveM@23
|
21 # if "Planets in partner's house" in cell.get_text():
|
DaveM@23
|
22 # gotLocation = 1
|
DaveM@23
|
23 # divList = cell.find_all('div')
|
DaveM@23
|
24 # for i in range(len(divList)):
|
DaveM@23
|
25 # planetName = divList[i].getText().lower().strip().replace(':','').split(' ')[0]
|
DaveM@23
|
26 # if planetName in syn.planetPositions.planetNames:
|
DaveM@23
|
27 # if gotLocation and not '/' in planetName:
|
DaveM@23
|
28 # horiscope.planets[planetName].setHouse(divList[i+2].getText(),divList[i+4].getText())
|
DaveM@23
|
29 # else:
|
DaveM@23
|
30 # horiscope.planets[planetName].setLocation(divList[i+2].getText(),divList[i+4].getText())
|
DaveM@23
|
31 # return horiscope
|
DaveM@23
|
32
|
DaveM@23
|
33 def makePeople(filename):
|
DaveM@23
|
34 stream = csv.DictReader(open(filename,'rb'))
|
DaveM@23
|
35 dictList = []
|
DaveM@23
|
36 people = []
|
DaveM@23
|
37 for line in stream:
|
DaveM@23
|
38 thisPerson = syn.Person(dp.regulateData(line))
|
DaveM@23
|
39 people.append(thisPerson)
|
DaveM@23
|
40 # pdb.set_trace()
|
DaveM@23
|
41 return people
|
DaveM@23
|
42
|
DaveM@23
|
43 def uniquify(itemList):
|
DaveM@23
|
44 keyDict = {}
|
DaveM@23
|
45 for item in itemList:
|
DaveM@23
|
46 keyDict[item] = 1
|
DaveM@23
|
47 return sorted(keyDict.keys())
|
DaveM@23
|
48
|
DaveM@23
|
49 def outputPeople(filename,people):
|
DaveM@23
|
50 with open(filename, "wb") as csv_file:
|
DaveM@23
|
51 dictKeys = []
|
DaveM@23
|
52 for person in people:
|
DaveM@23
|
53 if person.issue is None:
|
DaveM@23
|
54 person.horiscope.makeAllAspectTreple()
|
DaveM@23
|
55 dictKeys += person.horiscope.aspectTreple.keys()
|
DaveM@23
|
56 dictKeys = uniquify(dictKeys)
|
DaveM@23
|
57 writer = csv.DictWriter(csv_file, ['id','score']+dictKeys)
|
DaveM@23
|
58 writer.writeheader()
|
DaveM@23
|
59 for person in people:
|
DaveM@23
|
60 if person.issue is None:
|
DaveM@23
|
61 tempDict = {'id':person.id,'score':person.score}
|
DaveM@23
|
62 tempDict.update(person.horiscope.aspectTreple)
|
DaveM@23
|
63 # pdb.set_trace()
|
DaveM@23
|
64 writer.writerow(tempDict)
|
DaveM@23
|
65
|
DaveM@23
|
66 def outputScores(filename,people):
|
DaveM@23
|
67 with open(filename, "wb") as csv_file:
|
DaveM@23
|
68 csv_file.write('Person ID, Score \n')
|
DaveM@23
|
69 for person in people:
|
DaveM@23
|
70 if person.issue is None:
|
DaveM@23
|
71 csv_file.write(str(person.id)+','+str(person.score)+'\n')
|
DaveM@23
|
72 else:
|
DaveM@23
|
73 csv_file.write(str(person.id)+','+str(person.issue)+'\n')
|
DaveM@23
|
74
|
DaveM@23
|
75 def outputIssues(filename,people):
|
DaveM@23
|
76 with open(filename, "wb") as csv_file:
|
DaveM@23
|
77 csv_file.write('Person ID, Issue \n')
|
DaveM@23
|
78 for person in people:
|
DaveM@23
|
79 if person.issue is not None:
|
DaveM@23
|
80 csv_file.write(str(person.id)+','+str(person.issue)+'\n')
|
DaveM@23
|
81
|
DaveM@23
|
82 # def requestURL(url,payload):
|
DaveM@23
|
83 # r = requests.get(url, params=payload)
|
DaveM@23
|
84 # time.sleep(5)
|
DaveM@23
|
85 # return r
|
DaveM@23
|
86
|
DaveM@23
|
87 # def loadPickUpdate(filename):
|
DaveM@23
|
88
|
DaveM@23
|
89 def loadPick(filename):
|
DaveM@23
|
90 with open(filename, 'rb') as handle:
|
DaveM@23
|
91 b = pickle.load(handle)
|
DaveM@23
|
92 return b
|
DaveM@23
|
93
|
DaveM@23
|
94 def savePick(filename,data):
|
DaveM@23
|
95 with open(filename, 'wb') as handle:
|
DaveM@23
|
96 pickle.dump(data,handle)
|
DaveM@23
|
97
|
DaveM@23
|
98 def parseHoriscope(people,saveFile):
|
DaveM@23
|
99 rawData = {}
|
DaveM@23
|
100 for person in people:
|
DaveM@23
|
101 issue = person.identifyIssues()
|
DaveM@23
|
102 if issue is not None:
|
DaveM@23
|
103 print 'SKIPPING person '+ person.id + ' error with ' + issue
|
DaveM@23
|
104 else:
|
DaveM@24
|
105 # print 'parsing person '+ person.id
|
DaveM@23
|
106 if person.resp is None:
|
DaveM@23
|
107 print 'Posting Request for person '+ person.id
|
DaveM@23
|
108 person.makePayload()
|
DaveM@23
|
109 rawData[person.id] = person.requestURL()
|
DaveM@24
|
110 # print 'parsing person '+ person.id
|
DaveM@23
|
111 person.horiscope = person.parsePage()
|
DaveM@23
|
112 # person.horiscope.printPositions()
|
DaveM@23
|
113 if saveFile is not None:
|
DaveM@23
|
114 savePick('raw_'+saveFile,rawData)
|
DaveM@23
|
115 savePick(saveFile,people)
|
DaveM@23
|
116
|
DaveM@23
|
117 def parseHoriscopeRaw(people,saveFile):
|
DaveM@23
|
118 rawData = loadPick('raw_'+saveFile)
|
DaveM@23
|
119 for person in people:
|
DaveM@23
|
120 issue = person.identifyIssues()
|
DaveM@23
|
121 if issue is not None:
|
DaveM@23
|
122 print 'SKIPPING person '+ person.id + ' error with ' + issue
|
DaveM@23
|
123 else:
|
DaveM@24
|
124 # print 'parsing person '+ person.id
|
DaveM@23
|
125 if person.resp is None:
|
DaveM@24
|
126 print 'Reading Request for person '+ person.id
|
DaveM@23
|
127 person.makePayload()
|
DaveM@23
|
128 person.resp = rawData[person.id]
|
DaveM@24
|
129 # print 'parsing person '+ person.id
|
DaveM@23
|
130 person.horiscope = person.parsePage()
|
DaveM@23
|
131 # person.horiscope.printPositions()
|
DaveM@23
|
132 if saveFile is not None:
|
DaveM@23
|
133 savePick(saveFile,people)
|
DaveM@23
|
134
|
DaveM@23
|
135 def _main():
|
DaveM@23
|
136 pickFile = 'outData.pick'
|
DaveM@23
|
137 if not os.path.exists(pickFile):
|
DaveM@23
|
138 people = makePeople('individuals.csv')
|
DaveM@23
|
139 savePick(pickFile,people)
|
DaveM@23
|
140 else:
|
DaveM@23
|
141 people = loadPick(pickFile)
|
DaveM@23
|
142 parseSaveFile = pickFile.split('.')[0]+'_collect.pick'
|
DaveM@23
|
143 if not os.path.exists(parseSaveFile):
|
DaveM@23
|
144 if os.path.exists('raw_'+parseSaveFile):
|
DaveM@23
|
145 parseHoriscopeRaw(people,parseSaveFile)
|
DaveM@23
|
146 else:
|
DaveM@23
|
147 parseHoriscope(people,parseSaveFile)
|
DaveM@23
|
148 else:
|
DaveM@23
|
149 people = loadPick(parseSaveFile)
|
DaveM@23
|
150 # if not os.path.exists('fullResults.pick'):
|
DaveM@23
|
151 if not os.path.exists('fullResults.pick'):
|
DaveM@23
|
152 comp = syn.compatibility()
|
DaveM@23
|
153 comp.parseCompatRules('compatibilityRules.csv')
|
DaveM@23
|
154 for person in people:
|
DaveM@23
|
155 print person.id
|
DaveM@23
|
156 person.score = None
|
DaveM@23
|
157 if person.issue is None:
|
DaveM@23
|
158 person.score = comp.calcCompatibility(person.horiscope)
|
DaveM@23
|
159 if person.score is None:
|
DaveM@23
|
160 person.issue = 'None Planet Locations'
|
DaveM@23
|
161 # pdb.set_trace()
|
DaveM@23
|
162 f = open('issues/'+str(person.id)+'.html','w')
|
DaveM@23
|
163 f.write(person.resp.content)
|
DaveM@23
|
164 f.close()
|
DaveM@23
|
165 savePick('fullResults.pick',people)
|
DaveM@23
|
166 else:
|
DaveM@23
|
167 people = loadPick('fullResults.pick')
|
DaveM@23
|
168 outputPeople('fullResult.csv',people)
|
DaveM@23
|
169 outputScores('scores.csv',people)
|
DaveM@23
|
170 outputIssues('issues.csv',people)
|
DaveM@23
|
171
|
DaveM@23
|
172 if __name__ == "__main__":
|
DaveM@23
|
173 _main()
|