comparison python/copyRemoteSaves.py @ 3140:7180d6a2a271

Add super basic web scraper to collect remote tests
author Nicholas Jillings <nicholas.jillings@mail.bcu.ac.uk>
date Wed, 24 Mar 2021 17:07:59 +0000
parents
children
comparison
equal deleted inserted replaced
3139:bc0ef78bb07a 3140:7180d6a2a271
1 #!/usr/bin/python
2
3 import xml.etree.ElementTree as ET
4 import os
5 import sys
6 from lxml import html
7 import requests
8
9
10 url = input('Where is the remote WAET URL? ')
11 output = input('Where am I saving all these? (Provide the full path using pwd to the saves directory) ')
12 if output.endswith('/') == False:
13 output = output + '/'
14 if url.endswith('/saves/') == False and url.endswith('/saves') == False:
15 if url.endswith('/') == False:
16 url = url + '/'
17 url = url + 'saves'
18 print(url)
19 page = requests.get(url)
20 tree = html.fromstring(page.content)
21 print(tree)
22 ahref = tree.xpath('//a/text()')
23 for a in ahref:
24 if a.endswith('.xml'):
25 r = requests.get(url+a, allow_redirects=True)
26 open(output+a, 'wb').write(r.content)
27 print(ahref)