# HG changeset patch
# User Nicholas Jillings <nicholas.jillings@mail.bcu.ac.uk>
# Date 1616605679 0
# Node ID 7180d6a2a271ecd3a3992309a43c162ec653e8ae
# Parent  bc0ef78bb07acd722291f79e84a582f351c97051
Add super basic web scraper to collect remote tests

diff -r bc0ef78bb07a -r 7180d6a2a271 python/copyRemoteSaves.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/python/copyRemoteSaves.py	Wed Mar 24 17:07:59 2021 +0000
@@ -0,0 +1,27 @@
+#!/usr/bin/python
+
+import xml.etree.ElementTree as ET
+import os
+import sys
+from lxml import html
+import requests
+
+
+url = input('Where is the remote WAET URL? ')
+output = input('Where am I saving all these? (Provide the full path using pwd to the saves directory) ')
+if output.endswith('/') == False:
+    output = output + '/'
+if url.endswith('/saves/') == False and url.endswith('/saves') == False:
+    if url.endswith('/') == False:
+        url = url + '/'
+    url = url + 'saves'
+print(url)
+page = requests.get(url)
+tree = html.fromstring(page.content)
+print(tree)
+ahref = tree.xpath('//a/text()')
+for a in ahref:
+    if a.endswith('.xml'):
+        r = requests.get(url+a, allow_redirects=True)
+        open(output+a, 'wb').write(r.content)
+print(ahref)
\ No newline at end of file