Mercurial > hg > webaudioevaluationtool
view python/copyRemoteSaves.py @ 3140:7180d6a2a271
Add super basic web scraper to collect remote tests
author | Nicholas Jillings <nicholas.jillings@mail.bcu.ac.uk> |
---|---|
date | Wed, 24 Mar 2021 17:07:59 +0000 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/python import xml.etree.ElementTree as ET import os import sys from lxml import html import requests url = input('Where is the remote WAET URL? ') output = input('Where am I saving all these? (Provide the full path using pwd to the saves directory) ') if output.endswith('/') == False: output = output + '/' if url.endswith('/saves/') == False and url.endswith('/saves') == False: if url.endswith('/') == False: url = url + '/' url = url + 'saves' print(url) page = requests.get(url) tree = html.fromstring(page.content) print(tree) ahref = tree.xpath('//a/text()') for a in ahref: if a.endswith('.xml'): r = requests.get(url+a, allow_redirects=True) open(output+a, 'wb').write(r.content) print(ahref)