comparison V4/runme.py @ 13:b253748dbb11

developing V4 - Class based structure, with self calcuation of all aspects
author DaveM
date Sun, 04 Mar 2018 14:51:43 +0000
parents
children a0c217ee4168
comparison
equal deleted inserted replaced
12:18e337b2550d 13:b253748dbb11
1 #!/usr/bin/env python
2 import dParse as dp
3 # import compatibility as comp
4 import synastry as syn
5 import requests
6 import re
7 import time
8 import csv
9 import random
10 import pdb
11 import os
12 import pickle
13 from HTMLParser import HTMLParser
14 # from lxml import html
15 from bs4 import BeautifulSoup
16
17 def parsePage(horiscope, resp):
18 horiscope = syn.planetPositions()
19 soup = BeautifulSoup(resp.content, 'lxml')
20 tcCell = soup.find_all('div', attrs={'class':'right-sedy-banner-svetlejsi'})
21 for cell in tcCell:
22 divList = cell.find_all('div')
23 for i in range(len(divList)):
24 planetName = divList[i].getText().lower()
25 if planetName in planetPositions.planetNames:
26 horiscope.planets[planetName].setLocation(divList[i+2].getText(),divList[i+4].getText())
27
28 def makePeople(filename):
29 stream = csv.DictReader(open(filename,'rb'))
30 dictList = []
31 people = []
32 for line in stream:
33 thisPerson = syn.Person(dp.regulateData(line))
34 people.append(thisPerson)
35 # pdb.set_trace()
36 return people
37
38 # def setURL(p):
39 # url = 'https://horoscopes.astro-seek.com/calculate-love-compatibility/'
40 # payload = dp.makePayload(p)
41 # return (url,payload)
42
43 def requestURL(url,payload):
44 r = requests.get(url, params=payload)
45 time.sleep(5)
46 return r
47
48 # def makeURLPayload(url,payload):
49 # url += '?'
50 # for p in payload:
51 # url += '&' + str(p)
52 # url += '=' + str(payload[p])
53 # return url
54
55 # def printToFile(filename,data,removeAdds):
56 # if removeAdds == True:
57 # del data['DOB']
58 # del data['TOB']
59 # del data['pDOB']
60 # del data['pTOB']
61 # del data['COB']
62 # del data['pCOB']
63 # del data['horiscope']
64 # # keys = data[0].keys()
65 # keys = []
66 # for d in data:
67 # keys = keys + d.keys()
68 # keys = sorted(uniqueList(keys))
69 # with open(filename,'w') as stream:
70 # dict_writer = csv.DictWriter(stream, keys, extrasaction='ignore')
71 # dict_writer.writeheader()
72 # dict_writer.writerows(data)
73
74 def loadPick(filename):
75 with open(filename, 'rb') as handle:
76 b = pickle.load(handle)
77 return b
78
79 def savePick(filename,data):
80 with open(filename, 'wb') as handle:
81 pickle.dump(data,handle)
82
83 # def tempPF(fName,data):
84 # f__ = open(fName,'w')
85 # f__.write(data)
86 # f__.close()
87
88 def parseHoriscope(people,saveFile):
89 horiscopeList = []
90 for person in people:
91 if person.p_dob is None or person.p_dob == '':
92 print 'SKIPPING person '+ person.id + ' p_dob is None'
93 # person.horiscope = None
94 # horiscopeList.append({'ID':person['ID']})
95 else:
96 print 'parsing person '+ person.id
97 parseTries = 3
98 while parseTries > 0:
99 try:
100 person.makePayload()
101 resp = requestURL(person.url,person.payload)
102 parsePage(person.horiscope,resp)
103 pdb.set_trace()
104 parseTries = 0
105 except:
106 print sys.exc_info()[0]
107 parseTries -= 1
108 # for d in person.horiscope.keys():
109 # person[d] = person['horiscope'][d]
110 # horiscopeList.append(person)
111 # if saveFile is not None:
112 # savePick(saveFile,horiscopeList)
113 # return horiscopeList
114 # savePick(pickFile,person)
115 # savePick('2'+pickFile,horiscopeList)
116 # printToFile('final_'+outFile,horiscopeList)
117
118 # def printDict(d):
119 # for d_ in d:
120 # print (d,d_)
121
122 # def refactorHoriscope(hor):
123 # d = {}
124 # d['ID'] = hor['ID']
125 # for h in hor['horiscope']:
126 # hs = sorted(h)
127 # d[(hs[0], hs[1], hor['horiscope'][h][0])] = 1
128 # d[(hs[0], hs[1])] = float(str(hor['horiscope'][h][1]) + '.' + str(hor['horiscope'][h][2]))
129 # return d
130
131 # def uniqueList(seq):
132 # # order preserving
133 # noDupes = []
134 # [noDupes.append(i) for i in seq if not noDupes.count(i)]
135 # return noDupes
136
137 # def merge_two_dicts(x, y):
138 # z = x.copy() # start with x's keys and values
139 # z.update(y) # modifies z with y's keys and values & returns None
140 # return z
141
142 # def findMissing(unique,keyList):
143 # missing = []
144 # for u in unique:
145 # if u not in keyList:
146 # missing.append(u)
147 # return u
148
149 # def presentResults(saveFile):
150 # data = []
151 # data2 = []
152 # hlist = loadPick(saveFile)
153 # keyList = []
154 # for h in hlist:
155 # d = refactorHoriscope(h)
156 # keyList.append(d.keys())
157 # data.append(d)
158 # uniqueKeys = uniqueList(keyList)
159 # # for da in data:
160 # # missingKeys = findMissing(uniqueKeys,da.keys())
161 # # # pdb.set_trace()
162 # # d2 = dict(zip(missingKeys,[0]*len(missingKeys)))
163 # # da = merge_two_dicts(da,d2)
164 # # data2.append(da)
165 # return data
166
167
168 def newTest():
169 people = makePeople('individuals.csv')
170
171
172 def testMain():
173 pickFile = 'outData.pick'
174 # people = makePeople('individuals.csv')
175 # savePick(pickFile,people)
176 people = loadPick(pickFile)
177 parseSaveFile = pickFile.split('.')[0]+'_collect.pick'
178 parseHoriscope(people,parseSaveFile)
179 # horiscopeData = presentResults(parseSaveFile)
180 # comRules = comp.parseCompatDef('compatibilityRules.csv')
181 # applyCompatScore(horiscopeData,rules)
182
183 def _main():
184 pickFile = 'outData.pick'
185 # people = dict()
186 if not os.path.exists(pickFile):
187 print 'reParse file'
188 people = makePeople('individuals.csv')
189 savePick(pickFile,people)
190 else:
191 print 'read in ' + pickFile
192 people = loadPick(pickFile)
193 parseSaveFile = pickFile.split('.')[0]+'_collect.pick'
194 parseHoriscope(people,parseSaveFile)
195 horiscopeData = presentResults(parseSaveFile)
196 comRules = comp.parseCompatDef('compatibilityRules.csv')
197 applyCompatScore(horiscopeData,rules)
198
199 if __name__ == "__main__":
200 testMain()