marco@16: #!/usr/bin/env python marco@16: # -*- coding: utf-8 -*- marco@16: marco@16: """ marco@16: This module provides `Deposit_Receipt`, a convenient class for extracting information from the Deposit Receipts sent back by the marco@16: SWORD2-compliant server for many transactions. marco@16: marco@16: #BETASWORD2URL marco@16: See Section 10. Deposit Receipt: http://sword-app.svn.sourceforge.net/viewvc/sword-app/spec/trunk/SWORDProfile.html?revision=HEAD#depositreceipt marco@16: marco@16: """ marco@16: marco@16: from sword2_logging import logging marco@16: d_l = logging.getLogger(__name__) marco@16: marco@16: from atom_objects import Category marco@16: marco@16: from compatible_libs import etree marco@16: from utils import NS, get_text marco@16: marco@16: class Deposit_Receipt(object): marco@16: def __init__(self, xml_deposit_receipt=None, dom=None, response_headers={}, location=None, code=0): marco@16: """ marco@16: `Deposit_Receipt` - provides convenience methods for extracting information from the Deposit Receipts sent back by the marco@16: SWORD2-compliant server for many transactions. marco@16: marco@16: #BETASWORD2URL marco@16: See Section 10. Deposit Receipt: http://sword-app.svn.sourceforge.net/viewvc/sword-app/spec/trunk/SWORDProfile.html?revision=HEAD#depositreceipt marco@16: marco@16: Transactions carried out by `sword2.Connection` will return a `Deposit_Receipt` object, if a deposit receipt document is sent back by the server. marco@16: marco@16: Usage: marco@16: marco@16: >>> from sword2 import Deposit_Receipt marco@16: marco@16: .... get the XML text for a Deposit Receipt in the variable `doc` marco@16: marco@16: # Parse the response: marco@16: >>> dr = Deposit_Receipt(xml_deposit_receipt = doc) marco@16: marco@16: # Check that the response is parsable (valid XML) and is SWORD2-compliant marco@16: >>> assert dr.parsed == True marco@16: >>> assert dr.valid == True marco@16: marco@16: Availible attributes: marco@16: marco@16: Atom convenience attribs -- corresponds to (type of object that is held) marco@16: `self.title` -- (`str`) marco@16: `self.id` -- (`str`) marco@16: `self.updated` -- (`str`) marco@16: `self.summary` -- (`str`) marco@16: `self.categories` -- (`list` of `sword2.Category`) marco@16: marco@16: IRI/URIs marco@16: `self.edit` -- The Edit-IRI (`str`) marco@16: marco@16: `self.edit_media` -- The Edit-Media-IRI (`str`) marco@16: marco@16: `self.edit_media_feed` -- The Edit-Media-IRI [Atom Feed] (`str`) marco@16: marco@16: `self.alternate` -- A link which, according to the spec, (`str`) marco@16: "points to the splash page of the item on the server" marco@16: `self.se_iri` -- The SWORD2 Edit IRI (SE-IRI), defined by (`str`) marco@16: marco@16: which MAY be the same as the Edit-IRI marco@16: marco@16: `self.cont_iri` -- The Content-IRI (`str`) marco@16: eg `src` from marco@16: `self.content` -- All Content-IRIs (`dict` with the src or Content-IRI as the key, with a `dict` of the other attributes as its value marco@16: marco@16: `self.links` -- All links elements in a `dict`, with the 'rel' value being used as its key. The values of this are `list`s marco@16: with a `dict` of attributes for each item, corresponding to the information in a single element. marco@16: marco@16: SWORD2 links for "http://purl.org/net/sword/terms/originalDeposit" and "http://purl.org.net/sword/terms/derivedResource" marco@16: are to be found in `self.links` marco@16: marco@16: eg marco@16: >>> dr.links.get("http://purl.org.net/sword/terms/derivedResource") marco@16: {'href': "....", 'type':'application/pdf'} marco@16: marco@16: marco@16: General metadata: marco@16: `self.metadata` -- Simple metadata access. marco@16: A `dict` where the keys are equivalent to the prefixed element names, with an underscore(_) replacing the colon (:) marco@16: eg "" in the deposit receipt would be accessible in this attribute, under marco@16: the key of 'dcterms_title' marco@16: marco@16: eg marco@16: >>> dr.metadata.get("dcterms_title") marco@16: "The Origin of Species" marco@16: marco@16: >>> dr.metadata.get("dcterms_madeupelement") marco@16: `None` marco@16: marco@16: `self.packaging` -- sword:packaging elements declaring the formats that the Media Resource can be retrieved in (`list` of `str`) marco@16: marco@16: `self.response_headers` -- The HTTP response headers that accompanied this receipt marco@16: marco@16: `self.location` -- The location, if given (from HTTP Header: "Location: ....") marco@16: """ marco@16: self.parsed = False marco@16: self.response_headers=response_headers marco@16: self.location = location marco@16: self.content = None marco@16: self.code = code marco@16: self.metadata = {} marco@16: self.links = {} marco@16: self.edit = None marco@16: self.edit_media = None marco@16: self.edit_media_feed = None marco@16: self.alternate = None marco@16: self.se_iri = None marco@16: # Atom convenience attribs marco@16: self.title = None marco@16: self.id = None marco@16: self.updated = None marco@16: self.summary = None marco@16: marco@16: self.packaging = [] marco@16: self.categories = [] marco@16: self.content = {} marco@16: self.cont_iri = None marco@16: marco@16: if xml_deposit_receipt: marco@16: try: marco@16: self.dom = etree.fromstring(xml_deposit_receipt) marco@16: self.parsed = True marco@16: except Exception, e: marco@16: d_l.error("Was not able to parse the deposit receipt as XML.") marco@16: return marco@16: self.handle_metadata() marco@16: elif dom != None: marco@16: self.dom = dom marco@16: self.parsed = True marco@16: self.handle_metadata() marco@16: marco@16: def handle_metadata(self): marco@16: """Method that walks the `etree.SubElement`, assigning the information to the objects attributes.""" marco@16: for e in self.dom.getchildren(): marco@16: for nmsp, prefix in NS.iteritems(): marco@16: if str(e.tag).startswith(prefix % ""): marco@16: _, tagname = e.tag.rsplit("}", 1) marco@16: field = "%s_%s" % (nmsp, tagname) marco@16: d_l.debug("Attempting to intepret field: '%s'" % field) marco@16: if field == "atom_link": marco@16: self.handle_link(e) marco@16: elif field == "atom_content": marco@16: self.handle_content(e) marco@16: elif field == "atom_generator": marco@16: for ak,av in e.attrib.iteritems(): marco@16: if not e.text: marco@16: e.text = "" marco@16: e.text += " %s:\"%s\"" % (ak, av) marco@16: self.metadata[field] = e.text.strip() marco@16: elif field == "sword_packaging": marco@16: self.packaging.append(e.text) marco@16: else: marco@16: if field == "atom_title": marco@16: self.title = e.text marco@16: if field == "atom_id": marco@16: self.id = e.text marco@16: if field == "atom_updated": marco@16: self.updated = e.text marco@16: if field == "atom_summary": marco@16: self.summary = e.text marco@16: if field == "atom_category": marco@16: self.categories.append(Category(dom=e)) marco@16: if self.metadata.has_key(field): marco@16: if isinstance(self.metadata[field], list): marco@16: self.metadata[field].append(e.text) marco@16: else: marco@16: self.metadata[field] = [self.metadata[field], e.text] marco@16: else: marco@16: self.metadata[field] = e.text marco@16: marco@16: def handle_link(self, e): marco@16: """Method that handles the intepreting of element information and placing it into the anticipated attributes.""" marco@16: # MUST have rel marco@16: rel = e.attrib.get('rel', None) marco@16: if rel: marco@16: if rel == "edit": marco@16: self.edit = e.attrib.get('href', None) marco@16: elif rel == "edit-media": marco@16: # only put the edit-media iri in the convenience attribute if marco@16: # there is no 'type' marco@16: if not ('type' in e.attrib.keys()): marco@16: self.edit_media = e.attrib.get('href', None) marco@16: elif e.attrib['type'] == ("application/atom+xml; type=feed" or "application/atom+xml;type=feed"): marco@16: self.edit_media_feed = e.attrib.get('href', None) marco@16: elif e.attrib['type'] == ("application/zip"): marco@16: self.edit_media = e.attrib.get('href', None) marco@16: elif rel == "http://purl.org/net/sword/terms/add": marco@16: self.se_iri = e.attrib.get('href', None) marco@16: elif rel == "alternate": marco@16: self.alternate = e.attrib.get('href', None) marco@16: # Put all links into .links attribute, with all element attribs marco@16: attribs = {} marco@16: for k,v in e.attrib.iteritems(): marco@16: if k != "rel": marco@16: attribs[k] = v marco@16: if self.links.has_key(rel): marco@16: self.links[rel].append(attribs) marco@16: else: marco@16: self.links[rel] = [attribs] marco@16: marco@16: marco@16: def handle_content(self, e): marco@16: """Method to intepret the elements.""" marco@16: # eg marco@16: if e.attrib.has_key("src"): marco@16: src = e.attrib['src'] marco@16: info = dict(e.attrib).copy() marco@16: del info['src'] marco@16: self.content[src] = info marco@16: self.cont_iri = src marco@16: marco@16: def to_xml(self): marco@16: """Convenience method for outputing the DOM as a (byte)string.""" marco@16: return etree.tostring(self.dom) marco@16: marco@16: def __str__(self): marco@16: """Method for producing a human-readable report about the information in this object, suitable marco@16: for CLI or other logging. marco@16: marco@16: NB does not report all information, just key parts.""" marco@16: _s = [] marco@16: for k in sorted(self.metadata.keys()): marco@16: _s.append("%s: '%s'" % (k, self.metadata[k])) marco@16: if self.edit: marco@16: _s.append("Edit IRI: %s" % self.edit) marco@16: if self.edit_media: marco@16: _s.append("Edit-Media IRI: %s" % self.edit_media) marco@16: if self.se_iri: marco@16: _s.append("SWORD2 Add IRI: %s" % self.se_iri) marco@16: for c in self.categories: marco@16: _s.append(str(c)) marco@16: if self.packaging: marco@16: _s.append("SWORD2 Package formats available: %s" % self.packaging) marco@16: if self.alternate: marco@16: _s.append("Alternate IRI: %s" % self.alternate) marco@16: for k, v in self.links.iteritems(): marco@16: _s.append("Link rel:'%s' -- %s" % (k, v)) marco@16: return "\n".join(_s)