DaveM@13
|
1 #!/usr/bin/env python
|
DaveM@13
|
2 import dParse as dp
|
DaveM@13
|
3 # import compatibility as comp
|
DaveM@13
|
4 import synastry as syn
|
DaveM@13
|
5 import requests
|
DaveM@13
|
6 import re
|
DaveM@13
|
7 import time
|
DaveM@13
|
8 import csv
|
DaveM@13
|
9 import random
|
DaveM@13
|
10 import pdb
|
DaveM@13
|
11 import os
|
DaveM@13
|
12 import pickle
|
DaveM@13
|
13 from HTMLParser import HTMLParser
|
DaveM@13
|
14 # from lxml import html
|
DaveM@13
|
15 from bs4 import BeautifulSoup
|
DaveM@13
|
16
|
DaveM@14
|
17 def parsePage(resp):
|
DaveM@13
|
18 horiscope = syn.planetPositions()
|
DaveM@13
|
19 soup = BeautifulSoup(resp.content, 'lxml')
|
DaveM@13
|
20 tcCell = soup.find_all('div', attrs={'class':'right-sedy-banner-svetlejsi'})
|
DaveM@13
|
21 for cell in tcCell:
|
DaveM@13
|
22 divList = cell.find_all('div')
|
DaveM@13
|
23 for i in range(len(divList)):
|
DaveM@13
|
24 planetName = divList[i].getText().lower()
|
DaveM@13
|
25 if planetName in planetPositions.planetNames:
|
DaveM@13
|
26 horiscope.planets[planetName].setLocation(divList[i+2].getText(),divList[i+4].getText())
|
DaveM@14
|
27 return horiscope
|
DaveM@13
|
28
|
DaveM@13
|
29 def makePeople(filename):
|
DaveM@13
|
30 stream = csv.DictReader(open(filename,'rb'))
|
DaveM@13
|
31 dictList = []
|
DaveM@13
|
32 people = []
|
DaveM@13
|
33 for line in stream:
|
DaveM@13
|
34 thisPerson = syn.Person(dp.regulateData(line))
|
DaveM@13
|
35 people.append(thisPerson)
|
DaveM@13
|
36 # pdb.set_trace()
|
DaveM@13
|
37 return people
|
DaveM@13
|
38
|
DaveM@13
|
39 # def setURL(p):
|
DaveM@13
|
40 # url = 'https://horoscopes.astro-seek.com/calculate-love-compatibility/'
|
DaveM@13
|
41 # payload = dp.makePayload(p)
|
DaveM@13
|
42 # return (url,payload)
|
DaveM@13
|
43
|
DaveM@13
|
44 def requestURL(url,payload):
|
DaveM@13
|
45 r = requests.get(url, params=payload)
|
DaveM@13
|
46 time.sleep(5)
|
DaveM@13
|
47 return r
|
DaveM@13
|
48
|
DaveM@13
|
49 # def makeURLPayload(url,payload):
|
DaveM@13
|
50 # url += '?'
|
DaveM@13
|
51 # for p in payload:
|
DaveM@13
|
52 # url += '&' + str(p)
|
DaveM@13
|
53 # url += '=' + str(payload[p])
|
DaveM@13
|
54 # return url
|
DaveM@13
|
55
|
DaveM@13
|
56 # def printToFile(filename,data,removeAdds):
|
DaveM@13
|
57 # if removeAdds == True:
|
DaveM@13
|
58 # del data['DOB']
|
DaveM@13
|
59 # del data['TOB']
|
DaveM@13
|
60 # del data['pDOB']
|
DaveM@13
|
61 # del data['pTOB']
|
DaveM@13
|
62 # del data['COB']
|
DaveM@13
|
63 # del data['pCOB']
|
DaveM@13
|
64 # del data['horiscope']
|
DaveM@13
|
65 # # keys = data[0].keys()
|
DaveM@13
|
66 # keys = []
|
DaveM@13
|
67 # for d in data:
|
DaveM@13
|
68 # keys = keys + d.keys()
|
DaveM@13
|
69 # keys = sorted(uniqueList(keys))
|
DaveM@13
|
70 # with open(filename,'w') as stream:
|
DaveM@13
|
71 # dict_writer = csv.DictWriter(stream, keys, extrasaction='ignore')
|
DaveM@13
|
72 # dict_writer.writeheader()
|
DaveM@13
|
73 # dict_writer.writerows(data)
|
DaveM@13
|
74
|
DaveM@13
|
75 def loadPick(filename):
|
DaveM@13
|
76 with open(filename, 'rb') as handle:
|
DaveM@13
|
77 b = pickle.load(handle)
|
DaveM@13
|
78 return b
|
DaveM@13
|
79
|
DaveM@13
|
80 def savePick(filename,data):
|
DaveM@13
|
81 with open(filename, 'wb') as handle:
|
DaveM@13
|
82 pickle.dump(data,handle)
|
DaveM@13
|
83
|
DaveM@13
|
84 # def tempPF(fName,data):
|
DaveM@13
|
85 # f__ = open(fName,'w')
|
DaveM@13
|
86 # f__.write(data)
|
DaveM@13
|
87 # f__.close()
|
DaveM@13
|
88
|
DaveM@13
|
89 def parseHoriscope(people,saveFile):
|
DaveM@13
|
90 horiscopeList = []
|
DaveM@13
|
91 for person in people:
|
DaveM@13
|
92 if person.p_dob is None or person.p_dob == '':
|
DaveM@13
|
93 print 'SKIPPING person '+ person.id + ' p_dob is None'
|
DaveM@13
|
94 # person.horiscope = None
|
DaveM@13
|
95 # horiscopeList.append({'ID':person['ID']})
|
DaveM@13
|
96 else:
|
DaveM@13
|
97 print 'parsing person '+ person.id
|
DaveM@13
|
98 parseTries = 3
|
DaveM@13
|
99 while parseTries > 0:
|
DaveM@13
|
100 try:
|
DaveM@13
|
101 person.makePayload()
|
DaveM@13
|
102 resp = requestURL(person.url,person.payload)
|
DaveM@14
|
103 person.horiscope = parsePage(resp)
|
DaveM@13
|
104 pdb.set_trace()
|
DaveM@13
|
105 parseTries = 0
|
DaveM@13
|
106 except:
|
DaveM@13
|
107 print sys.exc_info()[0]
|
DaveM@13
|
108 parseTries -= 1
|
DaveM@13
|
109 # for d in person.horiscope.keys():
|
DaveM@13
|
110 # person[d] = person['horiscope'][d]
|
DaveM@13
|
111 # horiscopeList.append(person)
|
DaveM@13
|
112 # if saveFile is not None:
|
DaveM@13
|
113 # savePick(saveFile,horiscopeList)
|
DaveM@13
|
114 # return horiscopeList
|
DaveM@13
|
115 # savePick(pickFile,person)
|
DaveM@13
|
116 # savePick('2'+pickFile,horiscopeList)
|
DaveM@13
|
117 # printToFile('final_'+outFile,horiscopeList)
|
DaveM@13
|
118
|
DaveM@13
|
119 # def printDict(d):
|
DaveM@13
|
120 # for d_ in d:
|
DaveM@13
|
121 # print (d,d_)
|
DaveM@13
|
122
|
DaveM@13
|
123 # def refactorHoriscope(hor):
|
DaveM@13
|
124 # d = {}
|
DaveM@13
|
125 # d['ID'] = hor['ID']
|
DaveM@13
|
126 # for h in hor['horiscope']:
|
DaveM@13
|
127 # hs = sorted(h)
|
DaveM@13
|
128 # d[(hs[0], hs[1], hor['horiscope'][h][0])] = 1
|
DaveM@13
|
129 # d[(hs[0], hs[1])] = float(str(hor['horiscope'][h][1]) + '.' + str(hor['horiscope'][h][2]))
|
DaveM@13
|
130 # return d
|
DaveM@13
|
131
|
DaveM@13
|
132 # def uniqueList(seq):
|
DaveM@13
|
133 # # order preserving
|
DaveM@13
|
134 # noDupes = []
|
DaveM@13
|
135 # [noDupes.append(i) for i in seq if not noDupes.count(i)]
|
DaveM@13
|
136 # return noDupes
|
DaveM@13
|
137
|
DaveM@13
|
138 # def merge_two_dicts(x, y):
|
DaveM@13
|
139 # z = x.copy() # start with x's keys and values
|
DaveM@13
|
140 # z.update(y) # modifies z with y's keys and values & returns None
|
DaveM@13
|
141 # return z
|
DaveM@13
|
142
|
DaveM@13
|
143 # def findMissing(unique,keyList):
|
DaveM@13
|
144 # missing = []
|
DaveM@13
|
145 # for u in unique:
|
DaveM@13
|
146 # if u not in keyList:
|
DaveM@13
|
147 # missing.append(u)
|
DaveM@13
|
148 # return u
|
DaveM@13
|
149
|
DaveM@13
|
150 # def presentResults(saveFile):
|
DaveM@13
|
151 # data = []
|
DaveM@13
|
152 # data2 = []
|
DaveM@13
|
153 # hlist = loadPick(saveFile)
|
DaveM@13
|
154 # keyList = []
|
DaveM@13
|
155 # for h in hlist:
|
DaveM@13
|
156 # d = refactorHoriscope(h)
|
DaveM@13
|
157 # keyList.append(d.keys())
|
DaveM@13
|
158 # data.append(d)
|
DaveM@13
|
159 # uniqueKeys = uniqueList(keyList)
|
DaveM@13
|
160 # # for da in data:
|
DaveM@13
|
161 # # missingKeys = findMissing(uniqueKeys,da.keys())
|
DaveM@13
|
162 # # # pdb.set_trace()
|
DaveM@13
|
163 # # d2 = dict(zip(missingKeys,[0]*len(missingKeys)))
|
DaveM@13
|
164 # # da = merge_two_dicts(da,d2)
|
DaveM@13
|
165 # # data2.append(da)
|
DaveM@13
|
166 # return data
|
DaveM@13
|
167
|
DaveM@13
|
168
|
DaveM@13
|
169 def newTest():
|
DaveM@13
|
170 people = makePeople('individuals.csv')
|
DaveM@13
|
171
|
DaveM@13
|
172
|
DaveM@13
|
173 def testMain():
|
DaveM@13
|
174 pickFile = 'outData.pick'
|
DaveM@13
|
175 # people = makePeople('individuals.csv')
|
DaveM@13
|
176 # savePick(pickFile,people)
|
DaveM@13
|
177 people = loadPick(pickFile)
|
DaveM@13
|
178 parseSaveFile = pickFile.split('.')[0]+'_collect.pick'
|
DaveM@13
|
179 parseHoriscope(people,parseSaveFile)
|
DaveM@13
|
180 # horiscopeData = presentResults(parseSaveFile)
|
DaveM@13
|
181 # comRules = comp.parseCompatDef('compatibilityRules.csv')
|
DaveM@13
|
182 # applyCompatScore(horiscopeData,rules)
|
DaveM@13
|
183
|
DaveM@13
|
184 def _main():
|
DaveM@13
|
185 pickFile = 'outData.pick'
|
DaveM@13
|
186 # people = dict()
|
DaveM@13
|
187 if not os.path.exists(pickFile):
|
DaveM@13
|
188 print 'reParse file'
|
DaveM@13
|
189 people = makePeople('individuals.csv')
|
DaveM@13
|
190 savePick(pickFile,people)
|
DaveM@13
|
191 else:
|
DaveM@13
|
192 print 'read in ' + pickFile
|
DaveM@13
|
193 people = loadPick(pickFile)
|
DaveM@13
|
194 parseSaveFile = pickFile.split('.')[0]+'_collect.pick'
|
DaveM@13
|
195 parseHoriscope(people,parseSaveFile)
|
DaveM@13
|
196 horiscopeData = presentResults(parseSaveFile)
|
DaveM@13
|
197 comRules = comp.parseCompatDef('compatibilityRules.csv')
|
DaveM@13
|
198 applyCompatScore(horiscopeData,rules)
|
DaveM@13
|
199
|
DaveM@13
|
200 if __name__ == "__main__":
|
DaveM@13
|
201 testMain()
|