view V4/runme.py @ 14:a0c217ee4168

current edits
author DaveM
date Sun, 04 Mar 2018 15:03:15 +0000
parents b253748dbb11
children 50a95089414d
line wrap: on
line source
#!/usr/bin/env python
import dParse as dp
# import compatibility as comp
import synastry as syn
import requests
import re
import time
import csv
import random
import pdb
import os
import pickle
from HTMLParser import HTMLParser
# from lxml import html
from bs4 import BeautifulSoup

def parsePage(resp):
	horiscope = syn.planetPositions()
	soup = BeautifulSoup(resp.content, 'lxml')
	tcCell = soup.find_all('div', attrs={'class':'right-sedy-banner-svetlejsi'})
	for cell in tcCell:
		divList = cell.find_all('div')
		for i in range(len(divList)):
			planetName = divList[i].getText().lower()
			if planetName in planetPositions.planetNames:
				horiscope.planets[planetName].setLocation(divList[i+2].getText(),divList[i+4].getText())
	return horiscope

def makePeople(filename):
	stream = csv.DictReader(open(filename,'rb'))
	dictList = []
	people = []
	for line in stream:
		thisPerson = syn.Person(dp.regulateData(line))
		people.append(thisPerson)
		# pdb.set_trace()
	return people

# def setURL(p):
# 	url = 'https://horoscopes.astro-seek.com/calculate-love-compatibility/'
# 	payload = dp.makePayload(p)
# 	return (url,payload)

def requestURL(url,payload):
	r = requests.get(url, params=payload)
	time.sleep(5)
	return r

# def makeURLPayload(url,payload):
# 	url += '?'
# 	for p in payload:
# 		url += '&' + str(p)
# 		url += '=' + str(payload[p])
# 	return url

# def printToFile(filename,data,removeAdds):
# 	if removeAdds == True:
# 		del data['DOB']
# 		del data['TOB']
# 		del data['pDOB']
# 		del data['pTOB']
# 		del data['COB']
# 		del data['pCOB']
# 		del data['horiscope']
# 	# keys = data[0].keys()
# 	keys = []
# 	for d in data:
# 		keys = keys + d.keys()
# 	keys = sorted(uniqueList(keys))
# 	with open(filename,'w') as stream:
# 		dict_writer = csv.DictWriter(stream, keys, extrasaction='ignore')
# 		dict_writer.writeheader()
# 		dict_writer.writerows(data)

def loadPick(filename):
	with open(filename, 'rb') as handle:
		b = pickle.load(handle)
	return b

def savePick(filename,data):
	with open(filename, 'wb') as handle:
		pickle.dump(data,handle)

# def tempPF(fName,data):
# 	f__ = open(fName,'w')
# 	f__.write(data)
# 	f__.close()

def parseHoriscope(people,saveFile):
	horiscopeList = []
	for person in people:
		if person.p_dob is None or person.p_dob == '':
			print 'SKIPPING person '+ person.id + ' p_dob is None'
			# person.horiscope = None
			# horiscopeList.append({'ID':person['ID']})
		else:
			print 'parsing person '+ person.id
			parseTries = 3
			while parseTries > 0:
				try:
					person.makePayload()
					resp = requestURL(person.url,person.payload)
					person.horiscope = parsePage(resp)
					pdb.set_trace()
					parseTries = 0
				except:
					print sys.exc_info()[0]
					parseTries -= 1
			# for d in person.horiscope.keys():
			# 	person[d] = person['horiscope'][d]
			# horiscopeList.append(person)
	# 		if saveFile is not None:
	# 			savePick(saveFile,horiscopeList)
	# return horiscopeList
	# savePick(pickFile,person)
	# savePick('2'+pickFile,horiscopeList)
	# printToFile('final_'+outFile,horiscopeList)

# def printDict(d):
# 	for d_ in d:
# 		print (d,d_)

# def refactorHoriscope(hor):
# 	d = {}
# 	d['ID'] = hor['ID']
# 	for h in hor['horiscope']:
# 		hs = sorted(h)
# 		d[(hs[0], hs[1], hor['horiscope'][h][0])] = 1
# 		d[(hs[0], hs[1])] = float(str(hor['horiscope'][h][1]) + '.' + str(hor['horiscope'][h][2]))
# 	return d

# def uniqueList(seq): 
#    # order preserving
#    noDupes = []
#    [noDupes.append(i) for i in seq if not noDupes.count(i)]
#    return noDupes

# def merge_two_dicts(x, y):
#     z = x.copy()   # start with x's keys and values
#     z.update(y)    # modifies z with y's keys and values & returns None
#     return z

# def findMissing(unique,keyList):
# 	missing = []
# 	for u in unique:
# 		if u not in keyList:
# 			missing.append(u)
# 	return u

# def presentResults(saveFile):
# 	data = []
# 	data2 = []
# 	hlist = loadPick(saveFile)
# 	keyList = []
# 	for h in hlist:
# 		d = refactorHoriscope(h)
# 		keyList.append(d.keys())
# 		data.append(d)
# 	uniqueKeys = uniqueList(keyList)
# 	# for da in data:
# 	# 	missingKeys = findMissing(uniqueKeys,da.keys())
# 	# 	# pdb.set_trace()
# 	# 	d2 = dict(zip(missingKeys,[0]*len(missingKeys)))
# 	# 	da = merge_two_dicts(da,d2)
# 	# 	data2.append(da)
# 	return data


def newTest():
	people = makePeople('individuals.csv')


def testMain():
	pickFile = 'outData.pick'
	# people = makePeople('individuals.csv')
	# savePick(pickFile,people)
	people = loadPick(pickFile)
	parseSaveFile = pickFile.split('.')[0]+'_collect.pick'
	parseHoriscope(people,parseSaveFile)
	# horiscopeData = presentResults(parseSaveFile)
	# comRules = comp.parseCompatDef('compatibilityRules.csv')
	# applyCompatScore(horiscopeData,rules)

def _main():
	pickFile = 'outData.pick'
	# people = dict()
	if not os.path.exists(pickFile):
		print 'reParse file'
		people = makePeople('individuals.csv')
		savePick(pickFile,people)
	else:
		print 'read in ' + pickFile
		people = loadPick(pickFile)
	parseSaveFile = pickFile.split('.')[0]+'_collect.pick'
	parseHoriscope(people,parseSaveFile)
	horiscopeData = presentResults(parseSaveFile)
	comRules = comp.parseCompatDef('compatibilityRules.csv')
	applyCompatScore(horiscopeData,rules)

if __name__ == "__main__":
	testMain()