Mercurial > hg > sworduploader

#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
This module provides `Deposit_Receipt`, a convenient class for extracting information from the Deposit Receipts sent back by the
SWORD2-compliant server for many transactions.

#BETASWORD2URL
See Section 10. Deposit Receipt: http://sword-app.svn.sourceforge.net/viewvc/sword-app/spec/trunk/SWORDProfile.html?revision=HEAD#depositreceipt

"""

from sword2_logging import logging
d_l = logging.getLogger(__name__)

from atom_objects import Category

from compatible_libs import etree
from utils import NS, get_text

class Deposit_Receipt(object):
    def __init__(self, xml_deposit_receipt=None, dom=None, response_headers={}, location=None, code=0):
        """
`Deposit_Receipt` - provides convenience methods for extracting information from the Deposit Receipts sent back by the
SWORD2-compliant server for many transactions.

#BETASWORD2URL
See Section 10. Deposit Receipt: http://sword-app.svn.sourceforge.net/viewvc/sword-app/spec/trunk/SWORDProfile.html?revision=HEAD#depositreceipt

Transactions carried out by `sword2.Connection` will return a `Deposit_Receipt` object, if a deposit receipt document is sent back by the server.

Usage:

>>> from sword2 import Deposit_Receipt

.... get the XML text for a Deposit Receipt in the variable `doc`

# Parse the response:
>>> dr = Deposit_Receipt(xml_deposit_receipt = doc)

# Check that the response is parsable (valid XML) and is SWORD2-compliant
>>> assert dr.parsed == True
>>> assert dr.valid == True

Availible attributes:

    Atom convenience attribs -- corresponds to (type of object that is held)
    `self.title`            -- <atom:title>   (`str`)
    `self.id`               -- <id>           (`str`)
    `self.updated`          -- <updated>      (`str`)
    `self.summary`          -- <atom:summary> (`str`)
    `self.categories`       -- <category>     (`list` of `sword2.Category`)

    IRI/URIs
    `self.edit`             -- The Edit-IRI         (`str`)
                                <link rel="edit">
    `self.edit_media`       -- The Edit-Media-IRI   (`str`)
                                <link rel="edit-media">
    `self.edit_media_feed`  -- The Edit-Media-IRI [Atom Feed]  (`str`)
                                <link rel="edit-media" type="application/atom+xml;type=feed">
    `self.alternate`        -- A link which, according to the spec,                     (`str`)
                               "points to the splash page of the item on the server"
    `self.se_iri`           -- The SWORD2 Edit IRI (SE-IRI), defined by                 (`str`)
                                <link rel="http://purl.org/net/sword/terms/add">
                                which MAY be the same as the Edit-IRI

    `self.cont_iri`         -- The Content-IRI     (`str`)
                                eg `src` from <content type="application/zip" src="http://swordapp.org/cont-IRI/43/my_deposit"/>
    `self.content`          -- All Content-IRIs    (`dict` with the src or Content-IRI as the key, with a `dict` of the other attributes as its value

    `self.links`            -- All links elements in a `dict`, with the 'rel' value being used as its key. The values of this are `list`s
                                with a `dict` of attributes for each item, corresponding to the information in a single <link> element.

                                SWORD2 links for "http://purl.org/net/sword/terms/originalDeposit" and "http://purl.org.net/sword/terms/derivedResource"
                                are to be found in `self.links`

                                eg
                                >>> dr.links.get("http://purl.org.net/sword/terms/derivedResource")
                                {'href': "....", 'type':'application/pdf'}


    General metadata:
    `self.metadata`         -- Simple metadata access.
                                A `dict` where the keys are equivalent to the prefixed element names, with an underscore(_) replacing the colon (:)
                                eg "<dcterms:title>" in the deposit receipt would be accessible in this attribute, under
                                the key of 'dcterms_title'

                                eg
                                >>> dr.metadata.get("dcterms_title")
                                "The Origin of Species"

                                >>> dr.metadata.get("dcterms_madeupelement")
                                `None`

    `self.packaging`        -- sword:packaging elements declaring the formats that the Media Resource can be retrieved in   (`list` of `str`)

    `self.response_headers` -- The HTTP response headers that accompanied this receipt

    `self.location`         -- The location, if given (from HTTP Header: "Location: ....")
    """
        self.parsed = False
        self.response_headers=response_headers
        self.location = location
        self.content = None
        self.code = code
        self.metadata = {}
        self.links = {}
        self.edit = None
        self.edit_media = None
        self.edit_media_feed = None
        self.alternate = None
        self.se_iri = None
        # Atom convenience attribs
        self.title = None
        self.id = None
        self.updated = None
        self.summary = None

        self.packaging = []
        self.categories = []
        self.content = {}
        self.cont_iri = None

        if xml_deposit_receipt:
            try:
                self.dom = etree.fromstring(xml_deposit_receipt)
                self.parsed = True
            except Exception, e:
                d_l.error("Was not able to parse the deposit receipt as XML.")
                return
            self.handle_metadata()
        elif dom != None:
            self.dom = dom
            self.parsed = True
            self.handle_metadata()

    def handle_metadata(self):
        """Method that walks the `etree.SubElement`, assigning the information to the objects attributes."""
        for e in self.dom.getchildren():
            for nmsp, prefix in NS.iteritems():
                if str(e.tag).startswith(prefix % ""):
                    _, tagname = e.tag.rsplit("}", 1)
                    field = "%s_%s" % (nmsp, tagname)
                    d_l.debug("Attempting to intepret field: '%s'" % field)
                    if field == "atom_link":
                        self.handle_link(e)
                    elif field == "atom_content":
                        self.handle_content(e)
                    elif field == "atom_generator":
                        for ak,av in e.attrib.iteritems():
                            if not e.text:
                                e.text = ""
                            e.text += " %s:\"%s\"" % (ak, av)
                        self.metadata[field] = e.text.strip()
                    elif field == "sword_packaging":
                        self.packaging.append(e.text)
                    else:
                        if field == "atom_title":
                            self.title = e.text
                        if field == "atom_id":
                            self.id = e.text
                        if field == "atom_updated":
                            self.updated = e.text
                        if field == "atom_summary":
                            self.summary = e.text
                        if field == "atom_category":
                            self.categories.append(Category(dom=e))
                        if self.metadata.has_key(field):
                            if isinstance(self.metadata[field], list):
                                self.metadata[field].append(e.text)
                            else:
                                self.metadata[field] = [self.metadata[field], e.text]
                        else:
                            self.metadata[field] = e.text

    def handle_link(self, e):
        """Method that handles the intepreting of <atom:link> element information and placing it into the anticipated attributes."""
        # MUST have rel
        rel = e.attrib.get('rel', None)
        if rel:
            if rel == "edit":
                self.edit = e.attrib.get('href', None)
            elif rel == "edit-media":
                # only put the edit-media iri in the convenience attribute if
                # there is no 'type'
                if not ('type' in e.attrib.keys()):
                    self.edit_media = e.attrib.get('href', None)
                elif e.attrib['type'] == ("application/atom+xml; type=feed" or "application/atom+xml;type=feed"):
                    self.edit_media_feed = e.attrib.get('href', None)
                elif e.attrib['type'] == ("application/zip"):
                    self.edit_media = e.attrib.get('href', None)
            elif rel == "http://purl.org/net/sword/terms/add":
                self.se_iri = e.attrib.get('href', None)
            elif rel == "alternate":
                self.alternate = e.attrib.get('href', None)
            # Put all links into .links attribute, with all element attribs
            attribs = {}
            for k,v in e.attrib.iteritems():
                if k != "rel":
                    attribs[k] = v
            if self.links.has_key(rel):
                self.links[rel].append(attribs)
            else:
                self.links[rel] = [attribs]


    def handle_content(self, e):
        """Method to intepret the <atom:content> elements."""
        # eg <content type="application/zip" src="http://swordapp.org/cont-IRI/43/my_deposit"/>
        if e.attrib.has_key("src"):
            src = e.attrib['src']
            info = dict(e.attrib).copy()
            del info['src']
            self.content[src] = info
            self.cont_iri = src

    def to_xml(self):
        """Convenience method for outputing the DOM as a (byte)string."""
        return etree.tostring(self.dom)

    def __str__(self):
        """Method for producing a human-readable report about the information in this object, suitable
        for CLI or other logging.

        NB does not report all information, just key parts."""
        _s = []
        for k in sorted(self.metadata.keys()):
            _s.append("%s: '%s'" % (k, self.metadata[k]))
        if self.edit:
            _s.append("Edit IRI: %s" % self.edit)
        if self.edit_media:
            _s.append("Edit-Media IRI: %s" % self.edit_media)
        if self.se_iri:
            _s.append("SWORD2 Add IRI: %s" % self.se_iri)
        for c in self.categories:
            _s.append(str(c))
        if self.packaging:
            _s.append("SWORD2 Package formats available: %s" % self.packaging)
        if self.alternate:
            _s.append("Alternate IRI: %s" % self.alternate)
        for k, v in self.links.iteritems():
            _s.append("Link rel:'%s' -- %s" % (k, v))
        return "\n".join(_s)
author	Marco Fabiani <marco.fabiani@eecs.qmul.ac.uk>
date	Tue, 29 May 2012 12:42:49 +0100
parents
children