diff python/copyRemoteSaves.py @ 3140:7180d6a2a271

Add super basic web scraper to collect remote tests
author Nicholas Jillings <nicholas.jillings@mail.bcu.ac.uk>
date Wed, 24 Mar 2021 17:07:59 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/python/copyRemoteSaves.py	Wed Mar 24 17:07:59 2021 +0000
@@ -0,0 +1,27 @@
+#!/usr/bin/python
+
+import xml.etree.ElementTree as ET
+import os
+import sys
+from lxml import html
+import requests
+
+
+url = input('Where is the remote WAET URL? ')
+output = input('Where am I saving all these? (Provide the full path using pwd to the saves directory) ')
+if output.endswith('/') == False:
+    output = output + '/'
+if url.endswith('/saves/') == False and url.endswith('/saves') == False:
+    if url.endswith('/') == False:
+        url = url + '/'
+    url = url + 'saves'
+print(url)
+page = requests.get(url)
+tree = html.fromstring(page.content)
+print(tree)
+ahref = tree.xpath('//a/text()')
+for a in ahref:
+    if a.endswith('.xml'):
+        r = requests.get(url+a, allow_redirects=True)
+        open(output+a, 'wb').write(r.content)
+print(ahref)
\ No newline at end of file