comparison V4/runme.py @ 15:50a95089414d

updating to allow for all aspects to be calculated, and remove default value passing for people.
author DaveM
date Sun, 04 Mar 2018 17:09:50 +0000
parents a0c217ee4168
children b11cff4b7f83
comparison
equal deleted inserted replaced
14:a0c217ee4168 15:50a95089414d
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2
3
2 import dParse as dp 4 import dParse as dp
3 # import compatibility as comp
4 import synastry as syn 5 import synastry as syn
5 import requests 6 import requests
6 import re
7 import time 7 import time
8 import csv 8 import csv
9 import random
10 import pdb 9 import pdb
11 import os 10 import os
12 import pickle 11 import pickle
13 from HTMLParser import HTMLParser 12 import sys
14 # from lxml import html
15 from bs4 import BeautifulSoup 13 from bs4 import BeautifulSoup
16 14
17 def parsePage(resp): 15 def parsePage(resp):
16 gotLocation = 0
18 horiscope = syn.planetPositions() 17 horiscope = syn.planetPositions()
19 soup = BeautifulSoup(resp.content, 'lxml') 18 soup = BeautifulSoup(resp.content, 'lxml')
20 tcCell = soup.find_all('div', attrs={'class':'right-sedy-banner-svetlejsi'}) 19 tcCell = soup.find_all('div', attrs={'class':'right-sedy-banner-svetlejsi'})
21 for cell in tcCell: 20 for cell in tcCell:
21 if "Planets in partner's house" in cell.get_text():
22 gotLocation = 1
22 divList = cell.find_all('div') 23 divList = cell.find_all('div')
23 for i in range(len(divList)): 24 for i in range(len(divList)):
24 planetName = divList[i].getText().lower() 25 planetName = divList[i].getText().lower().strip().replace(':','').split(' ')[0]
25 if planetName in planetPositions.planetNames: 26 if planetName in syn.planetPositions.planetNames:
26 horiscope.planets[planetName].setLocation(divList[i+2].getText(),divList[i+4].getText()) 27 if gotLocation and not '/' in planetName:
28 horiscope.planets[planetName].setHouse(divList[i+2].getText(),divList[i+4].getText())
29 else:
30 horiscope.planets[planetName].setLocation(divList[i+2].getText(),divList[i+4].getText())
27 return horiscope 31 return horiscope
28 32
29 def makePeople(filename): 33 def makePeople(filename):
30 stream = csv.DictReader(open(filename,'rb')) 34 stream = csv.DictReader(open(filename,'rb'))
31 dictList = [] 35 dictList = []
34 thisPerson = syn.Person(dp.regulateData(line)) 38 thisPerson = syn.Person(dp.regulateData(line))
35 people.append(thisPerson) 39 people.append(thisPerson)
36 # pdb.set_trace() 40 # pdb.set_trace()
37 return people 41 return people
38 42
39 # def setURL(p):
40 # url = 'https://horoscopes.astro-seek.com/calculate-love-compatibility/'
41 # payload = dp.makePayload(p)
42 # return (url,payload)
43
44 def requestURL(url,payload): 43 def requestURL(url,payload):
45 r = requests.get(url, params=payload) 44 r = requests.get(url, params=payload)
46 time.sleep(5) 45 time.sleep(5)
47 return r 46 return r
48
49 # def makeURLPayload(url,payload):
50 # url += '?'
51 # for p in payload:
52 # url += '&' + str(p)
53 # url += '=' + str(payload[p])
54 # return url
55
56 # def printToFile(filename,data,removeAdds):
57 # if removeAdds == True:
58 # del data['DOB']
59 # del data['TOB']
60 # del data['pDOB']
61 # del data['pTOB']
62 # del data['COB']
63 # del data['pCOB']
64 # del data['horiscope']
65 # # keys = data[0].keys()
66 # keys = []
67 # for d in data:
68 # keys = keys + d.keys()
69 # keys = sorted(uniqueList(keys))
70 # with open(filename,'w') as stream:
71 # dict_writer = csv.DictWriter(stream, keys, extrasaction='ignore')
72 # dict_writer.writeheader()
73 # dict_writer.writerows(data)
74 47
75 def loadPick(filename): 48 def loadPick(filename):
76 with open(filename, 'rb') as handle: 49 with open(filename, 'rb') as handle:
77 b = pickle.load(handle) 50 b = pickle.load(handle)
78 return b 51 return b
79 52
80 def savePick(filename,data): 53 def savePick(filename,data):
81 with open(filename, 'wb') as handle: 54 with open(filename, 'wb') as handle:
82 pickle.dump(data,handle) 55 pickle.dump(data,handle)
83 56
84 # def tempPF(fName,data):
85 # f__ = open(fName,'w')
86 # f__.write(data)
87 # f__.close()
88
89 def parseHoriscope(people,saveFile): 57 def parseHoriscope(people,saveFile):
90 horiscopeList = [] 58 horiscopeList = []
91 for person in people: 59 for person in people:
92 if person.p_dob is None or person.p_dob == '': 60 issue = person.identifyIssues()
93 print 'SKIPPING person '+ person.id + ' p_dob is None' 61 if issue is not None:
94 # person.horiscope = None 62 print 'SKIPPING person '+ person.id + ' error with ' + issue
95 # horiscopeList.append({'ID':person['ID']})
96 else: 63 else:
97 print 'parsing person '+ person.id 64 print 'parsing person '+ person.id
98 parseTries = 3 65 person.makePayload()
99 while parseTries > 0: 66 person.resp = requestURL(person.url,person.payload)
100 try: 67 person.horiscope = parsePage(person.resp)
101 person.makePayload() 68 # person.horiscope.printPositions()
102 resp = requestURL(person.url,person.payload) 69 if saveFile is not None:
103 person.horiscope = parsePage(resp) 70 savePick(saveFile,people)
104 pdb.set_trace()
105 parseTries = 0
106 except:
107 print sys.exc_info()[0]
108 parseTries -= 1
109 # for d in person.horiscope.keys():
110 # person[d] = person['horiscope'][d]
111 # horiscopeList.append(person)
112 # if saveFile is not None:
113 # savePick(saveFile,horiscopeList)
114 # return horiscopeList
115 # savePick(pickFile,person)
116 # savePick('2'+pickFile,horiscopeList)
117 # printToFile('final_'+outFile,horiscopeList)
118
119 # def printDict(d):
120 # for d_ in d:
121 # print (d,d_)
122
123 # def refactorHoriscope(hor):
124 # d = {}
125 # d['ID'] = hor['ID']
126 # for h in hor['horiscope']:
127 # hs = sorted(h)
128 # d[(hs[0], hs[1], hor['horiscope'][h][0])] = 1
129 # d[(hs[0], hs[1])] = float(str(hor['horiscope'][h][1]) + '.' + str(hor['horiscope'][h][2]))
130 # return d
131
132 # def uniqueList(seq):
133 # # order preserving
134 # noDupes = []
135 # [noDupes.append(i) for i in seq if not noDupes.count(i)]
136 # return noDupes
137
138 # def merge_two_dicts(x, y):
139 # z = x.copy() # start with x's keys and values
140 # z.update(y) # modifies z with y's keys and values & returns None
141 # return z
142
143 # def findMissing(unique,keyList):
144 # missing = []
145 # for u in unique:
146 # if u not in keyList:
147 # missing.append(u)
148 # return u
149
150 # def presentResults(saveFile):
151 # data = []
152 # data2 = []
153 # hlist = loadPick(saveFile)
154 # keyList = []
155 # for h in hlist:
156 # d = refactorHoriscope(h)
157 # keyList.append(d.keys())
158 # data.append(d)
159 # uniqueKeys = uniqueList(keyList)
160 # # for da in data:
161 # # missingKeys = findMissing(uniqueKeys,da.keys())
162 # # # pdb.set_trace()
163 # # d2 = dict(zip(missingKeys,[0]*len(missingKeys)))
164 # # da = merge_two_dicts(da,d2)
165 # # data2.append(da)
166 # return data
167
168
169 def newTest():
170 people = makePeople('individuals.csv')
171
172 71
173 def testMain(): 72 def testMain():
174 pickFile = 'outData.pick' 73 restartDataFile = 1
175 # people = makePeople('individuals.csv') 74 if(restartDataFile):
176 # savePick(pickFile,people) 75 pickFile = 'outData.pick'
177 people = loadPick(pickFile) 76 # people = makePeople('individuals.csv')
178 parseSaveFile = pickFile.split('.')[0]+'_collect.pick' 77 # savePick(pickFile,people)
179 parseHoriscope(people,parseSaveFile) 78 people = loadPick(pickFile)
79 parseSaveFile = pickFile.split('.')[0]+'_collect.pick'
80 parseHoriscope(people,parseSaveFile)
81 else:
82 people = loadPick('onlineDatacollect.pick')
83 for p in people:
84 if p.horiscope is None:
85 print p.id
86 else:
87 p.horiscope.calcAllAspects()
88
180 # horiscopeData = presentResults(parseSaveFile) 89 # horiscopeData = presentResults(parseSaveFile)
181 # comRules = comp.parseCompatDef('compatibilityRules.csv') 90 # comRules = comp.parseCompatDef('compatibilityRules.csv')
182 # applyCompatScore(horiscopeData,rules) 91 # applyCompatScore(horiscopeData,rules)
183 92
184 def _main(): 93 def _main():