diff V4/runme.py @ 13:b253748dbb11

developing V4 - Class based structure, with self calcuation of all aspects
author DaveM
date Sun, 04 Mar 2018 14:51:43 +0000
parents
children a0c217ee4168
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/V4/runme.py	Sun Mar 04 14:51:43 2018 +0000
@@ -0,0 +1,200 @@
+#!/usr/bin/env python
+import dParse as dp
+# import compatibility as comp
+import synastry as syn
+import requests
+import re
+import time
+import csv
+import random
+import pdb
+import os
+import pickle
+from HTMLParser import HTMLParser
+# from lxml import html
+from bs4 import BeautifulSoup
+
+def parsePage(horiscope, resp):
+	horiscope = syn.planetPositions()
+	soup = BeautifulSoup(resp.content, 'lxml')
+	tcCell = soup.find_all('div', attrs={'class':'right-sedy-banner-svetlejsi'})
+	for cell in tcCell:
+		divList = cell.find_all('div')
+		for i in range(len(divList)):
+			planetName = divList[i].getText().lower()
+			if planetName in planetPositions.planetNames:
+				horiscope.planets[planetName].setLocation(divList[i+2].getText(),divList[i+4].getText())
+
+def makePeople(filename):
+	stream = csv.DictReader(open(filename,'rb'))
+	dictList = []
+	people = []
+	for line in stream:
+		thisPerson = syn.Person(dp.regulateData(line))
+		people.append(thisPerson)
+		# pdb.set_trace()
+	return people
+
+# def setURL(p):
+# 	url = 'https://horoscopes.astro-seek.com/calculate-love-compatibility/'
+# 	payload = dp.makePayload(p)
+# 	return (url,payload)
+
+def requestURL(url,payload):
+	r = requests.get(url, params=payload)
+	time.sleep(5)
+	return r
+
+# def makeURLPayload(url,payload):
+# 	url += '?'
+# 	for p in payload:
+# 		url += '&' + str(p)
+# 		url += '=' + str(payload[p])
+# 	return url
+
+# def printToFile(filename,data,removeAdds):
+# 	if removeAdds == True:
+# 		del data['DOB']
+# 		del data['TOB']
+# 		del data['pDOB']
+# 		del data['pTOB']
+# 		del data['COB']
+# 		del data['pCOB']
+# 		del data['horiscope']
+# 	# keys = data[0].keys()
+# 	keys = []
+# 	for d in data:
+# 		keys = keys + d.keys()
+# 	keys = sorted(uniqueList(keys))
+# 	with open(filename,'w') as stream:
+# 		dict_writer = csv.DictWriter(stream, keys, extrasaction='ignore')
+# 		dict_writer.writeheader()
+# 		dict_writer.writerows(data)
+
+def loadPick(filename):
+	with open(filename, 'rb') as handle:
+		b = pickle.load(handle)
+	return b
+
+def savePick(filename,data):
+	with open(filename, 'wb') as handle:
+		pickle.dump(data,handle)
+
+# def tempPF(fName,data):
+# 	f__ = open(fName,'w')
+# 	f__.write(data)
+# 	f__.close()
+
+def parseHoriscope(people,saveFile):
+	horiscopeList = []
+	for person in people:
+		if person.p_dob is None or person.p_dob == '':
+			print 'SKIPPING person '+ person.id + ' p_dob is None'
+			# person.horiscope = None
+			# horiscopeList.append({'ID':person['ID']})
+		else:
+			print 'parsing person '+ person.id
+			parseTries = 3
+			while parseTries > 0:
+				try:
+					person.makePayload()
+					resp = requestURL(person.url,person.payload)
+					parsePage(person.horiscope,resp)
+					pdb.set_trace()
+					parseTries = 0
+				except:
+					print sys.exc_info()[0]
+					parseTries -= 1
+			# for d in person.horiscope.keys():
+			# 	person[d] = person['horiscope'][d]
+			# horiscopeList.append(person)
+	# 		if saveFile is not None:
+	# 			savePick(saveFile,horiscopeList)
+	# return horiscopeList
+	# savePick(pickFile,person)
+	# savePick('2'+pickFile,horiscopeList)
+	# printToFile('final_'+outFile,horiscopeList)
+
+# def printDict(d):
+# 	for d_ in d:
+# 		print (d,d_)
+
+# def refactorHoriscope(hor):
+# 	d = {}
+# 	d['ID'] = hor['ID']
+# 	for h in hor['horiscope']:
+# 		hs = sorted(h)
+# 		d[(hs[0], hs[1], hor['horiscope'][h][0])] = 1
+# 		d[(hs[0], hs[1])] = float(str(hor['horiscope'][h][1]) + '.' + str(hor['horiscope'][h][2]))
+# 	return d
+
+# def uniqueList(seq): 
+#    # order preserving
+#    noDupes = []
+#    [noDupes.append(i) for i in seq if not noDupes.count(i)]
+#    return noDupes
+
+# def merge_two_dicts(x, y):
+#     z = x.copy()   # start with x's keys and values
+#     z.update(y)    # modifies z with y's keys and values & returns None
+#     return z
+
+# def findMissing(unique,keyList):
+# 	missing = []
+# 	for u in unique:
+# 		if u not in keyList:
+# 			missing.append(u)
+# 	return u
+
+# def presentResults(saveFile):
+# 	data = []
+# 	data2 = []
+# 	hlist = loadPick(saveFile)
+# 	keyList = []
+# 	for h in hlist:
+# 		d = refactorHoriscope(h)
+# 		keyList.append(d.keys())
+# 		data.append(d)
+# 	uniqueKeys = uniqueList(keyList)
+# 	# for da in data:
+# 	# 	missingKeys = findMissing(uniqueKeys,da.keys())
+# 	# 	# pdb.set_trace()
+# 	# 	d2 = dict(zip(missingKeys,[0]*len(missingKeys)))
+# 	# 	da = merge_two_dicts(da,d2)
+# 	# 	data2.append(da)
+# 	return data
+
+
+def newTest():
+	people = makePeople('individuals.csv')
+
+
+def testMain():
+	pickFile = 'outData.pick'
+	# people = makePeople('individuals.csv')
+	# savePick(pickFile,people)
+	people = loadPick(pickFile)
+	parseSaveFile = pickFile.split('.')[0]+'_collect.pick'
+	parseHoriscope(people,parseSaveFile)
+	# horiscopeData = presentResults(parseSaveFile)
+	# comRules = comp.parseCompatDef('compatibilityRules.csv')
+	# applyCompatScore(horiscopeData,rules)
+
+def _main():
+	pickFile = 'outData.pick'
+	# people = dict()
+	if not os.path.exists(pickFile):
+		print 'reParse file'
+		people = makePeople('individuals.csv')
+		savePick(pickFile,people)
+	else:
+		print 'read in ' + pickFile
+		people = loadPick(pickFile)
+	parseSaveFile = pickFile.split('.')[0]+'_collect.pick'
+	parseHoriscope(people,parseSaveFile)
+	horiscopeData = presentResults(parseSaveFile)
+	comRules = comp.parseCompatDef('compatibilityRules.csv')
+	applyCompatScore(horiscopeData,rules)
+
+if __name__ == "__main__":
+	testMain()