annotate V2/runme.py @ 10:85c9aa9d90c5

implement refactor and present results, to output to csv
author DaveM
date Mon, 22 Jan 2018 22:31:20 +0000
parents 3d5ca8e78f8f
children
rev   line source
DaveM@2 1 #!/usr/bin/env python
DaveM@3 2 import dParse as dp
DaveM@2 3 import requests
DaveM@2 4 import re
DaveM@2 5 import time
DaveM@2 6 import csv
DaveM@2 7 import random
DaveM@6 8 import pdb
DaveM@6 9 import os
DaveM@6 10 import pickle
DaveM@2 11 from HTMLParser import HTMLParser
DaveM@2 12 # from lxml import html
DaveM@2 13 from bs4 import BeautifulSoup
DaveM@2 14
DaveM@2 15 def parsePage(resp):
DaveM@6 16 # pdb.set_trace()
DaveM@6 17 person = dict()
DaveM@2 18 soup = BeautifulSoup(resp.content, 'lxml')
DaveM@2 19 tcCell = soup.find_all('div', attrs={'class':'tc'})
DaveM@2 20 for cell in tcCell:
DaveM@2 21 tableCell = cell.find_all('td');
DaveM@2 22 if len(tableCell) > 2:
DaveM@2 23 C = tableCell[0].strong.contents[0].encode('utf-8')
DaveM@2 24 D = tableCell[2].strong.contents[0].encode('utf-8')
DaveM@6 25 # print (C,D)
DaveM@2 26 A = re.search("\/>(.*)<br/>.*\(([0-9]*)\\xc2\\xb0([0-9]*)(.*)\)",str(tableCell[1]))
DaveM@2 27 # A0 = A.group(1)
DaveM@2 28 # A1 = A.group(2).split('\xc2\xb0')[0]
DaveM@2 29 # A2 = A.group(2).split('\xc2\xb0')[1].split('\xe2')[0]
DaveM@6 30 # print (A.group(1),A.group(2),A.group(3))
DaveM@2 31 person[(C,D)] = (A.group(1),A.group(2),A.group(3))
DaveM@6 32 return person
DaveM@2 33
DaveM@2 34
DaveM@2 35 def setURL(p):
DaveM@2 36 """
DaveM@2 37 Code impacting factors into URL
DaveM@2 38 IMPACTING FACTORS
DaveM@2 39 Date of Birth
DaveM@2 40 Birth Time
DaveM@2 41 Country of birth
DaveM@2 42 City of birth (And state of birth)
DaveM@2 43 """
DaveM@2 44 ## For some reason we need to post men first then women.
DaveM@2 45 # url = "https://horoscopes.astro-seek.com/calculate-love-compatibility/?send_calculation=1&muz_narozeni_den=1&muz_narozeni_mesic=1&muz_narozeni_rok=1970&muz_narozeni_hodina=00&muz_narozeni_minuta=00&muz_narozeni_city=London%2C+United+Kingdom&muz_narozeni_mesto_hidden=London&muz_narozeni_stat_hidden=GB&muz_narozeni_podstat_kratky_hidden=England&muz_narozeni_podstat_hidden=England&muz_narozeni_podstat2_kratky_hidden=Greater+London&muz_narozeni_podstat3_kratky_hidden=undefined&muz_narozeni_input_hidden=&muz_narozeni_sirka_stupne=51&muz_narozeni_sirka_minuty=30&muz_narozeni_sirka_smer=0&muz_narozeni_delka_stupne=0&muz_narozeni_delka_minuty=8&muz_narozeni_delka_smer=1&muz_narozeni_timezone_form=auto&muz_narozeni_timezone_dst_form=auto&send_calculation=1&zena_narozeni_den=1&zena_narozeni_mesic=1&zena_narozeni_rok=1970&zena_narozeni_hodina=00&zena_narozeni_minuta=00&zena_narozeni_city=Berlin%2C+Germany&zena_narozeni_mesto_hidden=Berlin&zena_narozeni_stat_hidden=DE&zena_narozeni_podstat_kratky_hidden=Berlin&zena_narozeni_podstat_hidden=Berlin&zena_narozeni_podstat2_kratky_hidden=undefined&zena_narozeni_podstat3_kratky_hidden=undefined&zena_narozeni_input_hidden=&zena_narozeni_sirka_stupne=52&zena_narozeni_sirka_minuty=31&zena_narozeni_sirka_smer=0&zena_narozeni_delka_stupne=13&zena_narozeni_delka_minuty=24&zena_narozeni_delka_smer=0&zena_narozeni_timezone_form=auto&zena_narozeni_timezone_dst_form=auto&switch_interpretations=0&house_system=placidus&uhel_orbis=#tabs_redraw"
DaveM@2 46 # payload = {'muz_narozeni_den':'1','muz_narozeni_mesic':'1','muz_narozeni_rok':'1970'}
DaveM@2 47 url = 'https://horoscopes.astro-seek.com/calculate-love-compatibility/'
DaveM@6 48 # payload = {'send_calculation':'1','muz_narozeni_den':mDay,'muz_narozeni_mesic':mMonth,'muz_narozeni_rok':mYear,'zena_narozeni_den':fDay,'zena_narozeni_mesic':fMonth,'zena_narozeni_rok':fYear}
DaveM@6 49 payload = dp.makePayload(p)
DaveM@2 50 return (url,payload)
DaveM@2 51
DaveM@2 52 def requestURL(url,payload):
DaveM@2 53 r = requests.get(url, params=payload)
DaveM@2 54 time.sleep(5)
DaveM@2 55 return r
DaveM@2 56
DaveM@6 57 def makeURLPayload(url,payload):
DaveM@6 58 url += '?'
DaveM@6 59 for p in payload:
DaveM@6 60 url += '&' + str(p)
DaveM@6 61 url += '=' + str(payload[p])
DaveM@6 62 return url
DaveM@3 63
DaveM@6 64 def printToFile(filename,data,removeAdds):
DaveM@6 65 if removeAdds == True:
DaveM@6 66 del data['DOB']
DaveM@6 67 del data['TOB']
DaveM@6 68 del data['pDOB']
DaveM@6 69 del data['pTOB']
DaveM@6 70 del data['COB']
DaveM@6 71 del data['pCOB']
DaveM@6 72 del data['horiscope']
DaveM@2 73 keys = data[0].keys()
DaveM@2 74 with open(filename,'w') as stream:
DaveM@2 75 dict_writer = csv.DictWriter(stream, keys)
DaveM@2 76 dict_writer.writeheader()
DaveM@2 77 dict_writer.writerows(data)
DaveM@2 78
DaveM@6 79 def loadPick(filename):
DaveM@6 80 with open(filename, 'rb') as handle:
DaveM@6 81 b = pickle.load(handle)
DaveM@6 82 return b
DaveM@6 83
DaveM@6 84 def savePick(filename,data):
DaveM@6 85 with open(filename, 'wb') as handle:
DaveM@6 86 pickle.dump(data,handle)
DaveM@6 87
DaveM@6 88 def tempPF(fName,data):
DaveM@6 89 f__ = open(fName,'w')
DaveM@6 90 f__.write(data)
DaveM@6 91 f__.close()
DaveM@6 92
DaveM@3 93 def testMain():
DaveM@4 94 people = dp.parseCSV('individuals.csv')
DaveM@3 95
DaveM@3 96 def _main():
DaveM@6 97 pickFile = 'outData.pick'
DaveM@2 98 # people = dict()
DaveM@6 99 if not os.path.exists(pickFile):
DaveM@6 100 print 'reParse file'
DaveM@6 101 people = dp.parseCSV('individuals.csv')
DaveM@6 102 savePick(pickFile,people)
DaveM@6 103 else:
DaveM@6 104 print 'read in ' + pickFile
DaveM@6 105 people = loadPick(pickFile)
DaveM@6 106
DaveM@2 107 horiscopeList = []
DaveM@2 108 for person in people:
DaveM@6 109 if person['pDOB'] is None or person['pDOB'] == '':
DaveM@6 110 print 'SKIPPING person '+ person['ID'] + ' pDOB is None'
DaveM@6 111 else:
DaveM@6 112 print 'parsing person '+ person['ID']
DaveM@6 113 url,payload = setURL(person)
DaveM@6 114 resp = requestURL(url,payload)
DaveM@6 115 person['horiscope'] = parsePage(resp)
DaveM@6 116 if not person['horiscope']: # debug if dict is empty
DaveM@6 117 print 'attempt failed, try again'
DaveM@6 118 url,payload = setURL(person)
DaveM@6 119 resp = requestURL(url,payload)
DaveM@6 120 person['horiscope'] = parsePage(resp)
DaveM@6 121 if not person['horiscope']:
DaveM@6 122 print 'attempt two failed'
DaveM@6 123 # pdb.set_trace()
DaveM@6 124 for d in person['horiscope'].keys():
DaveM@6 125 person[d] = person['horiscope'][d]
DaveM@6 126 horiscopeList.append(person)
DaveM@6 127 savePick(pickFile,person)
DaveM@2 128 print horiscopeList
DaveM@6 129 savePick(pickFile,person)
DaveM@6 130 savePick('2'+pickFile,horiscopeList)
DaveM@6 131 printToFile('final_'+outFile,horiscopeList)
DaveM@2 132
DaveM@2 133 if __name__ == "__main__":
DaveM@6 134 _main()