Mercurial > hg > sworduploader
view sword2-libraries-pyinstaller-compatible/sword2/service_document.py @ 20:8b9e7f2f80e2 timeouts
Updated to:
(i) allow timeout and password as parameters
(ii) use connection/collection/item/file objects
author | Steve Welburn <stephen.welburn@eecs.qmul.ac.uk> |
---|---|
date | Tue, 22 Jan 2013 13:41:24 +0000 |
parents | 8b69bba225c9 |
children |
line wrap: on
line source
#!/usr/bin/env python # -*- coding: utf-8 -*- """ Class to accept, parse and make queriable the Service Document response. Example: >>> doc = '''<?xml version="1.0" ?> <service xmlns:dcterms="http://purl.org/dc/terms/" xmlns:sword="http://purl.org/net/sword/terms/" xmlns:atom="http://www.w3.org/2005/Atom" xmlns="http://www.w3.org/2007/app"> <sword:version>2.0</sword:version> <sword:maxUploadSize>16777216</sword:maxUploadSize> <workspace> <atom:title>Main Site</atom:title> <collection href="http://swordapp.org/col-iri/43"> <atom:title>Collection 43</atom:title> <accept>*/*</accept> <accept alternate="multipart-related">*/*</accept> <sword:collectionPolicy>Collection Policy</sword:collectionPolicy> <dcterms:abstract>Collection Description</dcterms:abstract> <sword:mediation>false</sword:mediation> <sword:treatment>Treatment description</sword:treatment> <sword:acceptPackaging>http://purl.org/net/sword/package/SimpleZip</sword:acceptPackaging> <sword:acceptPackaging>http://purl.org/net/sword/package/METSDSpaceSIP</sword:acceptPackaging> <sword:service>http://swordapp.org/sd-iri/e4</sword:service> </collection> </workspace> </service>''' >>> from sword2 import ServiceDocument >>> s = ServiceDocument(doc) >>> s.maxUploadSize 16777216 >>> s.workspaces [('Main Site', [<sword2.service_document.Collection object at 0x167be10>])] >>> for c in s.workspaces[0][1]: print c ... Collection: 'Collection 43' @ 'http://swordapp.org/col-iri/43'. Accept:[] SWORD: Collection Policy - 'Collection Policy' SWORD: Treatment - 'Treatment description' SWORD: Accept Packaging: '['http://purl.org/net/sword/package/SimpleZip', 'http://purl.org/net/sword/package/METSDSpaceSIP']' SWORD: Nested Service Documents - 'http://swordapp.org/sd-iri/e4' """ from sword2_logging import logging sd_l = logging.getLogger(__name__) from collection import SDCollection from compatible_libs import etree from utils import NS, get_text class ServiceDocument(object): def __init__(self, xml_response=None, sd_uri=None): self.sd_uri = sd_uri # Used mainly for debugging and logging self.parsed = False self.valid = False self.maxUploadSize = 0 # Zero implies no limit as default, as per spec self.version = None # Default to an empty string before attempting to parse self.workspaces = [] # Once enumerated, this will be a list of tuples, # of the form: ("Workspace Title", [list of SDCollection instances]) if xml_response: self.load_document(xml_response) def load_document(self, xml_response): try: if self.sd_uri: sd_l.debug("Attempting to load service document for %s" % self.sd_uri) else: sd_l.debug("Attempting to load service document") self.raw_response = xml_response self.service_dom = etree.fromstring(xml_response) self.parsed = True self.valid = self.validate() sd_l.info("Initial SWORD2 validation checks on service document - Valid document? %s" % self.valid) self._enumerate_workspaces() except Exception, e: # Due to variability of underlying etree implementations, catching all # exceptions... sd_l.error("Could not parse the Service Document response from the server - %s" % e) sd_l.debug("Received the following raw response:") sd_l.debug(self.raw_response) def validate(self): valid = True if not self.parsed: return False # The SWORD server MUST specify the sword:version element with a value of 2.0 # -- MUST have sword:version element # -- MUST have value of '2.0' self.version = get_text(self.service_dom, NS['sword'] % "version") if self.version: if self.version != "2.0": # Not a SWORD2 server... # Fail here? sd_l.error("The service document states that the server's endpoint is not SWORD 2.0 - stated version:%s" % self.version) valid = False else: sd_l.error("The service document did not have a sword:version") valid = False # The SWORD server MAY specify the sword:maxUploadSize (in kB) of content that can be uploaded in one request [SWORD003] as a child of the app:service element. If provided this MUST contain an integer. maxupload = get_text(self.service_dom, NS['sword'] % "maxUploadSize") if maxupload: try: self.maxUploadSize = int(maxupload) except ValueError: # Unparsable as an integer. Enough to fail a validation? # Strictly... yep sd_l.error("The service document did not have maximum upload size parseable as an integer.") valid = False # Check for the first workspace for a collection element, just to make sure there is something there. test_workspace = self.service_dom.find(NS['app'] % "workspace") if test_workspace != None: sd_l.debug("At least one app:workspace found, with at least one app:collection within it.") else: valid = False sd_l.error("Could not find a app:workspace element in the service document.") return valid def _enumerate_workspaces(self): if not self.valid: sd_l.error("The service document didn't pass the SWORD2 validation steps ('MUST' statements in spec). The workspaces and collections will not be enumerated.") return if self.sd_uri: sd_l.info("Enumerating workspaces and collections from the service document for %s" % self.sd_uri) # Reset the internally cached set self.workspaces = [] for workspace in self.service_dom.findall(NS['app'] % "workspace"): workspace_title = get_text(workspace, NS['atom'] % 'title') sd_l.debug("Found workspace '%s'" % workspace_title) collections = [] for collection_element in workspace.findall(NS['app'] % 'collection'): # app:collection + sword extensions c = SDCollection() c.load_from_etree(collection_element) collections.append(c) self.workspaces.append( (workspace_title, collections) ) # Add tuple