Mercurial > hg > webaudioevaluationtool
comparison python/copyRemoteSaves.py @ 3140:7180d6a2a271
Add super basic web scraper to collect remote tests
author | Nicholas Jillings <nicholas.jillings@mail.bcu.ac.uk> |
---|---|
date | Wed, 24 Mar 2021 17:07:59 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
3139:bc0ef78bb07a | 3140:7180d6a2a271 |
---|---|
1 #!/usr/bin/python | |
2 | |
3 import xml.etree.ElementTree as ET | |
4 import os | |
5 import sys | |
6 from lxml import html | |
7 import requests | |
8 | |
9 | |
10 url = input('Where is the remote WAET URL? ') | |
11 output = input('Where am I saving all these? (Provide the full path using pwd to the saves directory) ') | |
12 if output.endswith('/') == False: | |
13 output = output + '/' | |
14 if url.endswith('/saves/') == False and url.endswith('/saves') == False: | |
15 if url.endswith('/') == False: | |
16 url = url + '/' | |
17 url = url + 'saves' | |
18 print(url) | |
19 page = requests.get(url) | |
20 tree = html.fromstring(page.content) | |
21 print(tree) | |
22 ahref = tree.xpath('//a/text()') | |
23 for a in ahref: | |
24 if a.endswith('.xml'): | |
25 r = requests.get(url+a, allow_redirects=True) | |
26 open(output+a, 'wb').write(r.content) | |
27 print(ahref) |