Mercurial > hg > webaudioevaluationtool
diff python/copyRemoteSaves.py @ 3140:7180d6a2a271
Add super basic web scraper to collect remote tests
author | Nicholas Jillings <nicholas.jillings@mail.bcu.ac.uk> |
---|---|
date | Wed, 24 Mar 2021 17:07:59 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/python/copyRemoteSaves.py Wed Mar 24 17:07:59 2021 +0000 @@ -0,0 +1,27 @@ +#!/usr/bin/python + +import xml.etree.ElementTree as ET +import os +import sys +from lxml import html +import requests + + +url = input('Where is the remote WAET URL? ') +output = input('Where am I saving all these? (Provide the full path using pwd to the saves directory) ') +if output.endswith('/') == False: + output = output + '/' +if url.endswith('/saves/') == False and url.endswith('/saves') == False: + if url.endswith('/') == False: + url = url + '/' + url = url + 'saves' +print(url) +page = requests.get(url) +tree = html.fromstring(page.content) +print(tree) +ahref = tree.xpath('//a/text()') +for a in ahref: + if a.endswith('.xml'): + r = requests.get(url+a, allow_redirects=True) + open(output+a, 'wb').write(r.content) +print(ahref) \ No newline at end of file