DaveM@13
|
1 #!/usr/bin/env python
|
DaveM@13
|
2 import dParse as dp
|
DaveM@13
|
3 # import compatibility as comp
|
DaveM@13
|
4 import synastry as syn
|
DaveM@13
|
5 import requests
|
DaveM@13
|
6 import re
|
DaveM@13
|
7 import time
|
DaveM@13
|
8 import csv
|
DaveM@13
|
9 import random
|
DaveM@13
|
10 import pdb
|
DaveM@13
|
11 import os
|
DaveM@13
|
12 import pickle
|
DaveM@13
|
13 from HTMLParser import HTMLParser
|
DaveM@13
|
14 # from lxml import html
|
DaveM@13
|
15 from bs4 import BeautifulSoup
|
DaveM@13
|
16
|
DaveM@13
|
17 def parsePage(horiscope, resp):
|
DaveM@13
|
18 horiscope = syn.planetPositions()
|
DaveM@13
|
19 soup = BeautifulSoup(resp.content, 'lxml')
|
DaveM@13
|
20 tcCell = soup.find_all('div', attrs={'class':'right-sedy-banner-svetlejsi'})
|
DaveM@13
|
21 for cell in tcCell:
|
DaveM@13
|
22 divList = cell.find_all('div')
|
DaveM@13
|
23 for i in range(len(divList)):
|
DaveM@13
|
24 planetName = divList[i].getText().lower()
|
DaveM@13
|
25 if planetName in planetPositions.planetNames:
|
DaveM@13
|
26 horiscope.planets[planetName].setLocation(divList[i+2].getText(),divList[i+4].getText())
|
DaveM@13
|
27
|
DaveM@13
|
28 def makePeople(filename):
|
DaveM@13
|
29 stream = csv.DictReader(open(filename,'rb'))
|
DaveM@13
|
30 dictList = []
|
DaveM@13
|
31 people = []
|
DaveM@13
|
32 for line in stream:
|
DaveM@13
|
33 thisPerson = syn.Person(dp.regulateData(line))
|
DaveM@13
|
34 people.append(thisPerson)
|
DaveM@13
|
35 # pdb.set_trace()
|
DaveM@13
|
36 return people
|
DaveM@13
|
37
|
DaveM@13
|
38 # def setURL(p):
|
DaveM@13
|
39 # url = 'https://horoscopes.astro-seek.com/calculate-love-compatibility/'
|
DaveM@13
|
40 # payload = dp.makePayload(p)
|
DaveM@13
|
41 # return (url,payload)
|
DaveM@13
|
42
|
DaveM@13
|
43 def requestURL(url,payload):
|
DaveM@13
|
44 r = requests.get(url, params=payload)
|
DaveM@13
|
45 time.sleep(5)
|
DaveM@13
|
46 return r
|
DaveM@13
|
47
|
DaveM@13
|
48 # def makeURLPayload(url,payload):
|
DaveM@13
|
49 # url += '?'
|
DaveM@13
|
50 # for p in payload:
|
DaveM@13
|
51 # url += '&' + str(p)
|
DaveM@13
|
52 # url += '=' + str(payload[p])
|
DaveM@13
|
53 # return url
|
DaveM@13
|
54
|
DaveM@13
|
55 # def printToFile(filename,data,removeAdds):
|
DaveM@13
|
56 # if removeAdds == True:
|
DaveM@13
|
57 # del data['DOB']
|
DaveM@13
|
58 # del data['TOB']
|
DaveM@13
|
59 # del data['pDOB']
|
DaveM@13
|
60 # del data['pTOB']
|
DaveM@13
|
61 # del data['COB']
|
DaveM@13
|
62 # del data['pCOB']
|
DaveM@13
|
63 # del data['horiscope']
|
DaveM@13
|
64 # # keys = data[0].keys()
|
DaveM@13
|
65 # keys = []
|
DaveM@13
|
66 # for d in data:
|
DaveM@13
|
67 # keys = keys + d.keys()
|
DaveM@13
|
68 # keys = sorted(uniqueList(keys))
|
DaveM@13
|
69 # with open(filename,'w') as stream:
|
DaveM@13
|
70 # dict_writer = csv.DictWriter(stream, keys, extrasaction='ignore')
|
DaveM@13
|
71 # dict_writer.writeheader()
|
DaveM@13
|
72 # dict_writer.writerows(data)
|
DaveM@13
|
73
|
DaveM@13
|
74 def loadPick(filename):
|
DaveM@13
|
75 with open(filename, 'rb') as handle:
|
DaveM@13
|
76 b = pickle.load(handle)
|
DaveM@13
|
77 return b
|
DaveM@13
|
78
|
DaveM@13
|
79 def savePick(filename,data):
|
DaveM@13
|
80 with open(filename, 'wb') as handle:
|
DaveM@13
|
81 pickle.dump(data,handle)
|
DaveM@13
|
82
|
DaveM@13
|
83 # def tempPF(fName,data):
|
DaveM@13
|
84 # f__ = open(fName,'w')
|
DaveM@13
|
85 # f__.write(data)
|
DaveM@13
|
86 # f__.close()
|
DaveM@13
|
87
|
DaveM@13
|
88 def parseHoriscope(people,saveFile):
|
DaveM@13
|
89 horiscopeList = []
|
DaveM@13
|
90 for person in people:
|
DaveM@13
|
91 if person.p_dob is None or person.p_dob == '':
|
DaveM@13
|
92 print 'SKIPPING person '+ person.id + ' p_dob is None'
|
DaveM@13
|
93 # person.horiscope = None
|
DaveM@13
|
94 # horiscopeList.append({'ID':person['ID']})
|
DaveM@13
|
95 else:
|
DaveM@13
|
96 print 'parsing person '+ person.id
|
DaveM@13
|
97 parseTries = 3
|
DaveM@13
|
98 while parseTries > 0:
|
DaveM@13
|
99 try:
|
DaveM@13
|
100 person.makePayload()
|
DaveM@13
|
101 resp = requestURL(person.url,person.payload)
|
DaveM@13
|
102 parsePage(person.horiscope,resp)
|
DaveM@13
|
103 pdb.set_trace()
|
DaveM@13
|
104 parseTries = 0
|
DaveM@13
|
105 except:
|
DaveM@13
|
106 print sys.exc_info()[0]
|
DaveM@13
|
107 parseTries -= 1
|
DaveM@13
|
108 # for d in person.horiscope.keys():
|
DaveM@13
|
109 # person[d] = person['horiscope'][d]
|
DaveM@13
|
110 # horiscopeList.append(person)
|
DaveM@13
|
111 # if saveFile is not None:
|
DaveM@13
|
112 # savePick(saveFile,horiscopeList)
|
DaveM@13
|
113 # return horiscopeList
|
DaveM@13
|
114 # savePick(pickFile,person)
|
DaveM@13
|
115 # savePick('2'+pickFile,horiscopeList)
|
DaveM@13
|
116 # printToFile('final_'+outFile,horiscopeList)
|
DaveM@13
|
117
|
DaveM@13
|
118 # def printDict(d):
|
DaveM@13
|
119 # for d_ in d:
|
DaveM@13
|
120 # print (d,d_)
|
DaveM@13
|
121
|
DaveM@13
|
122 # def refactorHoriscope(hor):
|
DaveM@13
|
123 # d = {}
|
DaveM@13
|
124 # d['ID'] = hor['ID']
|
DaveM@13
|
125 # for h in hor['horiscope']:
|
DaveM@13
|
126 # hs = sorted(h)
|
DaveM@13
|
127 # d[(hs[0], hs[1], hor['horiscope'][h][0])] = 1
|
DaveM@13
|
128 # d[(hs[0], hs[1])] = float(str(hor['horiscope'][h][1]) + '.' + str(hor['horiscope'][h][2]))
|
DaveM@13
|
129 # return d
|
DaveM@13
|
130
|
DaveM@13
|
131 # def uniqueList(seq):
|
DaveM@13
|
132 # # order preserving
|
DaveM@13
|
133 # noDupes = []
|
DaveM@13
|
134 # [noDupes.append(i) for i in seq if not noDupes.count(i)]
|
DaveM@13
|
135 # return noDupes
|
DaveM@13
|
136
|
DaveM@13
|
137 # def merge_two_dicts(x, y):
|
DaveM@13
|
138 # z = x.copy() # start with x's keys and values
|
DaveM@13
|
139 # z.update(y) # modifies z with y's keys and values & returns None
|
DaveM@13
|
140 # return z
|
DaveM@13
|
141
|
DaveM@13
|
142 # def findMissing(unique,keyList):
|
DaveM@13
|
143 # missing = []
|
DaveM@13
|
144 # for u in unique:
|
DaveM@13
|
145 # if u not in keyList:
|
DaveM@13
|
146 # missing.append(u)
|
DaveM@13
|
147 # return u
|
DaveM@13
|
148
|
DaveM@13
|
149 # def presentResults(saveFile):
|
DaveM@13
|
150 # data = []
|
DaveM@13
|
151 # data2 = []
|
DaveM@13
|
152 # hlist = loadPick(saveFile)
|
DaveM@13
|
153 # keyList = []
|
DaveM@13
|
154 # for h in hlist:
|
DaveM@13
|
155 # d = refactorHoriscope(h)
|
DaveM@13
|
156 # keyList.append(d.keys())
|
DaveM@13
|
157 # data.append(d)
|
DaveM@13
|
158 # uniqueKeys = uniqueList(keyList)
|
DaveM@13
|
159 # # for da in data:
|
DaveM@13
|
160 # # missingKeys = findMissing(uniqueKeys,da.keys())
|
DaveM@13
|
161 # # # pdb.set_trace()
|
DaveM@13
|
162 # # d2 = dict(zip(missingKeys,[0]*len(missingKeys)))
|
DaveM@13
|
163 # # da = merge_two_dicts(da,d2)
|
DaveM@13
|
164 # # data2.append(da)
|
DaveM@13
|
165 # return data
|
DaveM@13
|
166
|
DaveM@13
|
167
|
DaveM@13
|
168 def newTest():
|
DaveM@13
|
169 people = makePeople('individuals.csv')
|
DaveM@13
|
170
|
DaveM@13
|
171
|
DaveM@13
|
172 def testMain():
|
DaveM@13
|
173 pickFile = 'outData.pick'
|
DaveM@13
|
174 # people = makePeople('individuals.csv')
|
DaveM@13
|
175 # savePick(pickFile,people)
|
DaveM@13
|
176 people = loadPick(pickFile)
|
DaveM@13
|
177 parseSaveFile = pickFile.split('.')[0]+'_collect.pick'
|
DaveM@13
|
178 parseHoriscope(people,parseSaveFile)
|
DaveM@13
|
179 # horiscopeData = presentResults(parseSaveFile)
|
DaveM@13
|
180 # comRules = comp.parseCompatDef('compatibilityRules.csv')
|
DaveM@13
|
181 # applyCompatScore(horiscopeData,rules)
|
DaveM@13
|
182
|
DaveM@13
|
183 def _main():
|
DaveM@13
|
184 pickFile = 'outData.pick'
|
DaveM@13
|
185 # people = dict()
|
DaveM@13
|
186 if not os.path.exists(pickFile):
|
DaveM@13
|
187 print 'reParse file'
|
DaveM@13
|
188 people = makePeople('individuals.csv')
|
DaveM@13
|
189 savePick(pickFile,people)
|
DaveM@13
|
190 else:
|
DaveM@13
|
191 print 'read in ' + pickFile
|
DaveM@13
|
192 people = loadPick(pickFile)
|
DaveM@13
|
193 parseSaveFile = pickFile.split('.')[0]+'_collect.pick'
|
DaveM@13
|
194 parseHoriscope(people,parseSaveFile)
|
DaveM@13
|
195 horiscopeData = presentResults(parseSaveFile)
|
DaveM@13
|
196 comRules = comp.parseCompatDef('compatibilityRules.csv')
|
DaveM@13
|
197 applyCompatScore(horiscopeData,rules)
|
DaveM@13
|
198
|
DaveM@13
|
199 if __name__ == "__main__":
|
DaveM@13
|
200 testMain()
|