annotate sword2-libraries-pyinstaller-compatible/sword2/deposit_receipt.py @ 22:d1752c7031e4 timeouts tip

Updated .hgignore to ignore sword2_logging.conf and anything in .cache
author Steve Welburn <stephen.welburn@eecs.qmul.ac.uk>
date Tue, 22 Jan 2013 14:43:42 +0000
parents 8b69bba225c9
children
rev   line source
marco@16 1 #!/usr/bin/env python
marco@16 2 # -*- coding: utf-8 -*-
marco@16 3
marco@16 4 """
marco@16 5 This module provides `Deposit_Receipt`, a convenient class for extracting information from the Deposit Receipts sent back by the
marco@16 6 SWORD2-compliant server for many transactions.
marco@16 7
marco@16 8 #BETASWORD2URL
marco@16 9 See Section 10. Deposit Receipt: http://sword-app.svn.sourceforge.net/viewvc/sword-app/spec/trunk/SWORDProfile.html?revision=HEAD#depositreceipt
marco@16 10
marco@16 11 """
marco@16 12
marco@16 13 from sword2_logging import logging
marco@16 14 d_l = logging.getLogger(__name__)
marco@16 15
marco@16 16 from atom_objects import Category
marco@16 17
marco@16 18 from compatible_libs import etree
marco@16 19 from utils import NS, get_text
marco@16 20
marco@16 21 class Deposit_Receipt(object):
marco@16 22 def __init__(self, xml_deposit_receipt=None, dom=None, response_headers={}, location=None, code=0):
marco@16 23 """
marco@16 24 `Deposit_Receipt` - provides convenience methods for extracting information from the Deposit Receipts sent back by the
marco@16 25 SWORD2-compliant server for many transactions.
marco@16 26
marco@16 27 #BETASWORD2URL
marco@16 28 See Section 10. Deposit Receipt: http://sword-app.svn.sourceforge.net/viewvc/sword-app/spec/trunk/SWORDProfile.html?revision=HEAD#depositreceipt
marco@16 29
marco@16 30 Transactions carried out by `sword2.Connection` will return a `Deposit_Receipt` object, if a deposit receipt document is sent back by the server.
marco@16 31
marco@16 32 Usage:
marco@16 33
marco@16 34 >>> from sword2 import Deposit_Receipt
marco@16 35
marco@16 36 .... get the XML text for a Deposit Receipt in the variable `doc`
marco@16 37
marco@16 38 # Parse the response:
marco@16 39 >>> dr = Deposit_Receipt(xml_deposit_receipt = doc)
marco@16 40
marco@16 41 # Check that the response is parsable (valid XML) and is SWORD2-compliant
marco@16 42 >>> assert dr.parsed == True
marco@16 43 >>> assert dr.valid == True
marco@16 44
marco@16 45 Availible attributes:
marco@16 46
marco@16 47 Atom convenience attribs -- corresponds to (type of object that is held)
marco@16 48 `self.title` -- <atom:title> (`str`)
marco@16 49 `self.id` -- <id> (`str`)
marco@16 50 `self.updated` -- <updated> (`str`)
marco@16 51 `self.summary` -- <atom:summary> (`str`)
marco@16 52 `self.categories` -- <category> (`list` of `sword2.Category`)
marco@16 53
marco@16 54 IRI/URIs
marco@16 55 `self.edit` -- The Edit-IRI (`str`)
marco@16 56 <link rel="edit">
marco@16 57 `self.edit_media` -- The Edit-Media-IRI (`str`)
marco@16 58 <link rel="edit-media">
marco@16 59 `self.edit_media_feed` -- The Edit-Media-IRI [Atom Feed] (`str`)
marco@16 60 <link rel="edit-media" type="application/atom+xml;type=feed">
marco@16 61 `self.alternate` -- A link which, according to the spec, (`str`)
marco@16 62 "points to the splash page of the item on the server"
marco@16 63 `self.se_iri` -- The SWORD2 Edit IRI (SE-IRI), defined by (`str`)
marco@16 64 <link rel="http://purl.org/net/sword/terms/add">
marco@16 65 which MAY be the same as the Edit-IRI
marco@16 66
marco@16 67 `self.cont_iri` -- The Content-IRI (`str`)
marco@16 68 eg `src` from <content type="application/zip" src="http://swordapp.org/cont-IRI/43/my_deposit"/>
marco@16 69 `self.content` -- All Content-IRIs (`dict` with the src or Content-IRI as the key, with a `dict` of the other attributes as its value
marco@16 70
marco@16 71 `self.links` -- All links elements in a `dict`, with the 'rel' value being used as its key. The values of this are `list`s
marco@16 72 with a `dict` of attributes for each item, corresponding to the information in a single <link> element.
marco@16 73
marco@16 74 SWORD2 links for "http://purl.org/net/sword/terms/originalDeposit" and "http://purl.org.net/sword/terms/derivedResource"
marco@16 75 are to be found in `self.links`
marco@16 76
marco@16 77 eg
marco@16 78 >>> dr.links.get("http://purl.org.net/sword/terms/derivedResource")
marco@16 79 {'href': "....", 'type':'application/pdf'}
marco@16 80
marco@16 81
marco@16 82 General metadata:
marco@16 83 `self.metadata` -- Simple metadata access.
marco@16 84 A `dict` where the keys are equivalent to the prefixed element names, with an underscore(_) replacing the colon (:)
marco@16 85 eg "<dcterms:title>" in the deposit receipt would be accessible in this attribute, under
marco@16 86 the key of 'dcterms_title'
marco@16 87
marco@16 88 eg
marco@16 89 >>> dr.metadata.get("dcterms_title")
marco@16 90 "The Origin of Species"
marco@16 91
marco@16 92 >>> dr.metadata.get("dcterms_madeupelement")
marco@16 93 `None`
marco@16 94
marco@16 95 `self.packaging` -- sword:packaging elements declaring the formats that the Media Resource can be retrieved in (`list` of `str`)
marco@16 96
marco@16 97 `self.response_headers` -- The HTTP response headers that accompanied this receipt
marco@16 98
marco@16 99 `self.location` -- The location, if given (from HTTP Header: "Location: ....")
marco@16 100 """
marco@16 101 self.parsed = False
marco@16 102 self.response_headers=response_headers
marco@16 103 self.location = location
marco@16 104 self.content = None
marco@16 105 self.code = code
marco@16 106 self.metadata = {}
marco@16 107 self.links = {}
marco@16 108 self.edit = None
marco@16 109 self.edit_media = None
marco@16 110 self.edit_media_feed = None
marco@16 111 self.alternate = None
marco@16 112 self.se_iri = None
marco@16 113 # Atom convenience attribs
marco@16 114 self.title = None
marco@16 115 self.id = None
marco@16 116 self.updated = None
marco@16 117 self.summary = None
marco@16 118
marco@16 119 self.packaging = []
marco@16 120 self.categories = []
marco@16 121 self.content = {}
marco@16 122 self.cont_iri = None
marco@16 123
marco@16 124 if xml_deposit_receipt:
marco@16 125 try:
marco@16 126 self.dom = etree.fromstring(xml_deposit_receipt)
marco@16 127 self.parsed = True
marco@16 128 except Exception, e:
marco@16 129 d_l.error("Was not able to parse the deposit receipt as XML.")
marco@16 130 return
marco@16 131 self.handle_metadata()
marco@16 132 elif dom != None:
marco@16 133 self.dom = dom
marco@16 134 self.parsed = True
marco@16 135 self.handle_metadata()
marco@16 136
marco@16 137 def handle_metadata(self):
marco@16 138 """Method that walks the `etree.SubElement`, assigning the information to the objects attributes."""
marco@16 139 for e in self.dom.getchildren():
marco@16 140 for nmsp, prefix in NS.iteritems():
marco@16 141 if str(e.tag).startswith(prefix % ""):
marco@16 142 _, tagname = e.tag.rsplit("}", 1)
marco@16 143 field = "%s_%s" % (nmsp, tagname)
marco@16 144 d_l.debug("Attempting to intepret field: '%s'" % field)
marco@16 145 if field == "atom_link":
marco@16 146 self.handle_link(e)
marco@16 147 elif field == "atom_content":
marco@16 148 self.handle_content(e)
marco@16 149 elif field == "atom_generator":
marco@16 150 for ak,av in e.attrib.iteritems():
marco@16 151 if not e.text:
marco@16 152 e.text = ""
marco@16 153 e.text += " %s:\"%s\"" % (ak, av)
marco@16 154 self.metadata[field] = e.text.strip()
marco@16 155 elif field == "sword_packaging":
marco@16 156 self.packaging.append(e.text)
marco@16 157 else:
marco@16 158 if field == "atom_title":
marco@16 159 self.title = e.text
marco@16 160 if field == "atom_id":
marco@16 161 self.id = e.text
marco@16 162 if field == "atom_updated":
marco@16 163 self.updated = e.text
marco@16 164 if field == "atom_summary":
marco@16 165 self.summary = e.text
marco@16 166 if field == "atom_category":
marco@16 167 self.categories.append(Category(dom=e))
marco@16 168 if self.metadata.has_key(field):
marco@16 169 if isinstance(self.metadata[field], list):
marco@16 170 self.metadata[field].append(e.text)
marco@16 171 else:
marco@16 172 self.metadata[field] = [self.metadata[field], e.text]
marco@16 173 else:
marco@16 174 self.metadata[field] = e.text
marco@16 175
marco@16 176 def handle_link(self, e):
marco@16 177 """Method that handles the intepreting of <atom:link> element information and placing it into the anticipated attributes."""
marco@16 178 # MUST have rel
marco@16 179 rel = e.attrib.get('rel', None)
marco@16 180 if rel:
marco@16 181 if rel == "edit":
marco@16 182 self.edit = e.attrib.get('href', None)
marco@16 183 elif rel == "edit-media":
marco@16 184 # only put the edit-media iri in the convenience attribute if
marco@16 185 # there is no 'type'
marco@16 186 if not ('type' in e.attrib.keys()):
marco@16 187 self.edit_media = e.attrib.get('href', None)
marco@16 188 elif e.attrib['type'] == ("application/atom+xml; type=feed" or "application/atom+xml;type=feed"):
marco@16 189 self.edit_media_feed = e.attrib.get('href', None)
marco@16 190 elif e.attrib['type'] == ("application/zip"):
marco@16 191 self.edit_media = e.attrib.get('href', None)
marco@16 192 elif rel == "http://purl.org/net/sword/terms/add":
marco@16 193 self.se_iri = e.attrib.get('href', None)
marco@16 194 elif rel == "alternate":
marco@16 195 self.alternate = e.attrib.get('href', None)
marco@16 196 # Put all links into .links attribute, with all element attribs
marco@16 197 attribs = {}
marco@16 198 for k,v in e.attrib.iteritems():
marco@16 199 if k != "rel":
marco@16 200 attribs[k] = v
marco@16 201 if self.links.has_key(rel):
marco@16 202 self.links[rel].append(attribs)
marco@16 203 else:
marco@16 204 self.links[rel] = [attribs]
marco@16 205
marco@16 206
marco@16 207 def handle_content(self, e):
marco@16 208 """Method to intepret the <atom:content> elements."""
marco@16 209 # eg <content type="application/zip" src="http://swordapp.org/cont-IRI/43/my_deposit"/>
marco@16 210 if e.attrib.has_key("src"):
marco@16 211 src = e.attrib['src']
marco@16 212 info = dict(e.attrib).copy()
marco@16 213 del info['src']
marco@16 214 self.content[src] = info
marco@16 215 self.cont_iri = src
marco@16 216
marco@16 217 def to_xml(self):
marco@16 218 """Convenience method for outputing the DOM as a (byte)string."""
marco@16 219 return etree.tostring(self.dom)
marco@16 220
marco@16 221 def __str__(self):
marco@16 222 """Method for producing a human-readable report about the information in this object, suitable
marco@16 223 for CLI or other logging.
marco@16 224
marco@16 225 NB does not report all information, just key parts."""
marco@16 226 _s = []
marco@16 227 for k in sorted(self.metadata.keys()):
marco@16 228 _s.append("%s: '%s'" % (k, self.metadata[k]))
marco@16 229 if self.edit:
marco@16 230 _s.append("Edit IRI: %s" % self.edit)
marco@16 231 if self.edit_media:
marco@16 232 _s.append("Edit-Media IRI: %s" % self.edit_media)
marco@16 233 if self.se_iri:
marco@16 234 _s.append("SWORD2 Add IRI: %s" % self.se_iri)
marco@16 235 for c in self.categories:
marco@16 236 _s.append(str(c))
marco@16 237 if self.packaging:
marco@16 238 _s.append("SWORD2 Package formats available: %s" % self.packaging)
marco@16 239 if self.alternate:
marco@16 240 _s.append("Alternate IRI: %s" % self.alternate)
marco@16 241 for k, v in self.links.iteritems():
marco@16 242 _s.append("Link rel:'%s' -- %s" % (k, v))
marco@16 243 return "\n".join(_s)