marco@16
|
1 #!/usr/bin/env python
|
marco@16
|
2 # -*- coding: utf-8 -*-
|
marco@16
|
3
|
marco@16
|
4 """
|
marco@16
|
5 This module provides `Deposit_Receipt`, a convenient class for extracting information from the Deposit Receipts sent back by the
|
marco@16
|
6 SWORD2-compliant server for many transactions.
|
marco@16
|
7
|
marco@16
|
8 #BETASWORD2URL
|
marco@16
|
9 See Section 10. Deposit Receipt: http://sword-app.svn.sourceforge.net/viewvc/sword-app/spec/trunk/SWORDProfile.html?revision=HEAD#depositreceipt
|
marco@16
|
10
|
marco@16
|
11 """
|
marco@16
|
12
|
marco@16
|
13 from sword2_logging import logging
|
marco@16
|
14 d_l = logging.getLogger(__name__)
|
marco@16
|
15
|
marco@16
|
16 from atom_objects import Category
|
marco@16
|
17
|
marco@16
|
18 from compatible_libs import etree
|
marco@16
|
19 from utils import NS, get_text
|
marco@16
|
20
|
marco@16
|
21 class Deposit_Receipt(object):
|
marco@16
|
22 def __init__(self, xml_deposit_receipt=None, dom=None, response_headers={}, location=None, code=0):
|
marco@16
|
23 """
|
marco@16
|
24 `Deposit_Receipt` - provides convenience methods for extracting information from the Deposit Receipts sent back by the
|
marco@16
|
25 SWORD2-compliant server for many transactions.
|
marco@16
|
26
|
marco@16
|
27 #BETASWORD2URL
|
marco@16
|
28 See Section 10. Deposit Receipt: http://sword-app.svn.sourceforge.net/viewvc/sword-app/spec/trunk/SWORDProfile.html?revision=HEAD#depositreceipt
|
marco@16
|
29
|
marco@16
|
30 Transactions carried out by `sword2.Connection` will return a `Deposit_Receipt` object, if a deposit receipt document is sent back by the server.
|
marco@16
|
31
|
marco@16
|
32 Usage:
|
marco@16
|
33
|
marco@16
|
34 >>> from sword2 import Deposit_Receipt
|
marco@16
|
35
|
marco@16
|
36 .... get the XML text for a Deposit Receipt in the variable `doc`
|
marco@16
|
37
|
marco@16
|
38 # Parse the response:
|
marco@16
|
39 >>> dr = Deposit_Receipt(xml_deposit_receipt = doc)
|
marco@16
|
40
|
marco@16
|
41 # Check that the response is parsable (valid XML) and is SWORD2-compliant
|
marco@16
|
42 >>> assert dr.parsed == True
|
marco@16
|
43 >>> assert dr.valid == True
|
marco@16
|
44
|
marco@16
|
45 Availible attributes:
|
marco@16
|
46
|
marco@16
|
47 Atom convenience attribs -- corresponds to (type of object that is held)
|
marco@16
|
48 `self.title` -- <atom:title> (`str`)
|
marco@16
|
49 `self.id` -- <id> (`str`)
|
marco@16
|
50 `self.updated` -- <updated> (`str`)
|
marco@16
|
51 `self.summary` -- <atom:summary> (`str`)
|
marco@16
|
52 `self.categories` -- <category> (`list` of `sword2.Category`)
|
marco@16
|
53
|
marco@16
|
54 IRI/URIs
|
marco@16
|
55 `self.edit` -- The Edit-IRI (`str`)
|
marco@16
|
56 <link rel="edit">
|
marco@16
|
57 `self.edit_media` -- The Edit-Media-IRI (`str`)
|
marco@16
|
58 <link rel="edit-media">
|
marco@16
|
59 `self.edit_media_feed` -- The Edit-Media-IRI [Atom Feed] (`str`)
|
marco@16
|
60 <link rel="edit-media" type="application/atom+xml;type=feed">
|
marco@16
|
61 `self.alternate` -- A link which, according to the spec, (`str`)
|
marco@16
|
62 "points to the splash page of the item on the server"
|
marco@16
|
63 `self.se_iri` -- The SWORD2 Edit IRI (SE-IRI), defined by (`str`)
|
marco@16
|
64 <link rel="http://purl.org/net/sword/terms/add">
|
marco@16
|
65 which MAY be the same as the Edit-IRI
|
marco@16
|
66
|
marco@16
|
67 `self.cont_iri` -- The Content-IRI (`str`)
|
marco@16
|
68 eg `src` from <content type="application/zip" src="http://swordapp.org/cont-IRI/43/my_deposit"/>
|
marco@16
|
69 `self.content` -- All Content-IRIs (`dict` with the src or Content-IRI as the key, with a `dict` of the other attributes as its value
|
marco@16
|
70
|
marco@16
|
71 `self.links` -- All links elements in a `dict`, with the 'rel' value being used as its key. The values of this are `list`s
|
marco@16
|
72 with a `dict` of attributes for each item, corresponding to the information in a single <link> element.
|
marco@16
|
73
|
marco@16
|
74 SWORD2 links for "http://purl.org/net/sword/terms/originalDeposit" and "http://purl.org.net/sword/terms/derivedResource"
|
marco@16
|
75 are to be found in `self.links`
|
marco@16
|
76
|
marco@16
|
77 eg
|
marco@16
|
78 >>> dr.links.get("http://purl.org.net/sword/terms/derivedResource")
|
marco@16
|
79 {'href': "....", 'type':'application/pdf'}
|
marco@16
|
80
|
marco@16
|
81
|
marco@16
|
82 General metadata:
|
marco@16
|
83 `self.metadata` -- Simple metadata access.
|
marco@16
|
84 A `dict` where the keys are equivalent to the prefixed element names, with an underscore(_) replacing the colon (:)
|
marco@16
|
85 eg "<dcterms:title>" in the deposit receipt would be accessible in this attribute, under
|
marco@16
|
86 the key of 'dcterms_title'
|
marco@16
|
87
|
marco@16
|
88 eg
|
marco@16
|
89 >>> dr.metadata.get("dcterms_title")
|
marco@16
|
90 "The Origin of Species"
|
marco@16
|
91
|
marco@16
|
92 >>> dr.metadata.get("dcterms_madeupelement")
|
marco@16
|
93 `None`
|
marco@16
|
94
|
marco@16
|
95 `self.packaging` -- sword:packaging elements declaring the formats that the Media Resource can be retrieved in (`list` of `str`)
|
marco@16
|
96
|
marco@16
|
97 `self.response_headers` -- The HTTP response headers that accompanied this receipt
|
marco@16
|
98
|
marco@16
|
99 `self.location` -- The location, if given (from HTTP Header: "Location: ....")
|
marco@16
|
100 """
|
marco@16
|
101 self.parsed = False
|
marco@16
|
102 self.response_headers=response_headers
|
marco@16
|
103 self.location = location
|
marco@16
|
104 self.content = None
|
marco@16
|
105 self.code = code
|
marco@16
|
106 self.metadata = {}
|
marco@16
|
107 self.links = {}
|
marco@16
|
108 self.edit = None
|
marco@16
|
109 self.edit_media = None
|
marco@16
|
110 self.edit_media_feed = None
|
marco@16
|
111 self.alternate = None
|
marco@16
|
112 self.se_iri = None
|
marco@16
|
113 # Atom convenience attribs
|
marco@16
|
114 self.title = None
|
marco@16
|
115 self.id = None
|
marco@16
|
116 self.updated = None
|
marco@16
|
117 self.summary = None
|
marco@16
|
118
|
marco@16
|
119 self.packaging = []
|
marco@16
|
120 self.categories = []
|
marco@16
|
121 self.content = {}
|
marco@16
|
122 self.cont_iri = None
|
marco@16
|
123
|
marco@16
|
124 if xml_deposit_receipt:
|
marco@16
|
125 try:
|
marco@16
|
126 self.dom = etree.fromstring(xml_deposit_receipt)
|
marco@16
|
127 self.parsed = True
|
marco@16
|
128 except Exception, e:
|
marco@16
|
129 d_l.error("Was not able to parse the deposit receipt as XML.")
|
marco@16
|
130 return
|
marco@16
|
131 self.handle_metadata()
|
marco@16
|
132 elif dom != None:
|
marco@16
|
133 self.dom = dom
|
marco@16
|
134 self.parsed = True
|
marco@16
|
135 self.handle_metadata()
|
marco@16
|
136
|
marco@16
|
137 def handle_metadata(self):
|
marco@16
|
138 """Method that walks the `etree.SubElement`, assigning the information to the objects attributes."""
|
marco@16
|
139 for e in self.dom.getchildren():
|
marco@16
|
140 for nmsp, prefix in NS.iteritems():
|
marco@16
|
141 if str(e.tag).startswith(prefix % ""):
|
marco@16
|
142 _, tagname = e.tag.rsplit("}", 1)
|
marco@16
|
143 field = "%s_%s" % (nmsp, tagname)
|
marco@16
|
144 d_l.debug("Attempting to intepret field: '%s'" % field)
|
marco@16
|
145 if field == "atom_link":
|
marco@16
|
146 self.handle_link(e)
|
marco@16
|
147 elif field == "atom_content":
|
marco@16
|
148 self.handle_content(e)
|
marco@16
|
149 elif field == "atom_generator":
|
marco@16
|
150 for ak,av in e.attrib.iteritems():
|
marco@16
|
151 if not e.text:
|
marco@16
|
152 e.text = ""
|
marco@16
|
153 e.text += " %s:\"%s\"" % (ak, av)
|
marco@16
|
154 self.metadata[field] = e.text.strip()
|
marco@16
|
155 elif field == "sword_packaging":
|
marco@16
|
156 self.packaging.append(e.text)
|
marco@16
|
157 else:
|
marco@16
|
158 if field == "atom_title":
|
marco@16
|
159 self.title = e.text
|
marco@16
|
160 if field == "atom_id":
|
marco@16
|
161 self.id = e.text
|
marco@16
|
162 if field == "atom_updated":
|
marco@16
|
163 self.updated = e.text
|
marco@16
|
164 if field == "atom_summary":
|
marco@16
|
165 self.summary = e.text
|
marco@16
|
166 if field == "atom_category":
|
marco@16
|
167 self.categories.append(Category(dom=e))
|
marco@16
|
168 if self.metadata.has_key(field):
|
marco@16
|
169 if isinstance(self.metadata[field], list):
|
marco@16
|
170 self.metadata[field].append(e.text)
|
marco@16
|
171 else:
|
marco@16
|
172 self.metadata[field] = [self.metadata[field], e.text]
|
marco@16
|
173 else:
|
marco@16
|
174 self.metadata[field] = e.text
|
marco@16
|
175
|
marco@16
|
176 def handle_link(self, e):
|
marco@16
|
177 """Method that handles the intepreting of <atom:link> element information and placing it into the anticipated attributes."""
|
marco@16
|
178 # MUST have rel
|
marco@16
|
179 rel = e.attrib.get('rel', None)
|
marco@16
|
180 if rel:
|
marco@16
|
181 if rel == "edit":
|
marco@16
|
182 self.edit = e.attrib.get('href', None)
|
marco@16
|
183 elif rel == "edit-media":
|
marco@16
|
184 # only put the edit-media iri in the convenience attribute if
|
marco@16
|
185 # there is no 'type'
|
marco@16
|
186 if not ('type' in e.attrib.keys()):
|
marco@16
|
187 self.edit_media = e.attrib.get('href', None)
|
marco@16
|
188 elif e.attrib['type'] == ("application/atom+xml; type=feed" or "application/atom+xml;type=feed"):
|
marco@16
|
189 self.edit_media_feed = e.attrib.get('href', None)
|
marco@16
|
190 elif e.attrib['type'] == ("application/zip"):
|
marco@16
|
191 self.edit_media = e.attrib.get('href', None)
|
marco@16
|
192 elif rel == "http://purl.org/net/sword/terms/add":
|
marco@16
|
193 self.se_iri = e.attrib.get('href', None)
|
marco@16
|
194 elif rel == "alternate":
|
marco@16
|
195 self.alternate = e.attrib.get('href', None)
|
marco@16
|
196 # Put all links into .links attribute, with all element attribs
|
marco@16
|
197 attribs = {}
|
marco@16
|
198 for k,v in e.attrib.iteritems():
|
marco@16
|
199 if k != "rel":
|
marco@16
|
200 attribs[k] = v
|
marco@16
|
201 if self.links.has_key(rel):
|
marco@16
|
202 self.links[rel].append(attribs)
|
marco@16
|
203 else:
|
marco@16
|
204 self.links[rel] = [attribs]
|
marco@16
|
205
|
marco@16
|
206
|
marco@16
|
207 def handle_content(self, e):
|
marco@16
|
208 """Method to intepret the <atom:content> elements."""
|
marco@16
|
209 # eg <content type="application/zip" src="http://swordapp.org/cont-IRI/43/my_deposit"/>
|
marco@16
|
210 if e.attrib.has_key("src"):
|
marco@16
|
211 src = e.attrib['src']
|
marco@16
|
212 info = dict(e.attrib).copy()
|
marco@16
|
213 del info['src']
|
marco@16
|
214 self.content[src] = info
|
marco@16
|
215 self.cont_iri = src
|
marco@16
|
216
|
marco@16
|
217 def to_xml(self):
|
marco@16
|
218 """Convenience method for outputing the DOM as a (byte)string."""
|
marco@16
|
219 return etree.tostring(self.dom)
|
marco@16
|
220
|
marco@16
|
221 def __str__(self):
|
marco@16
|
222 """Method for producing a human-readable report about the information in this object, suitable
|
marco@16
|
223 for CLI or other logging.
|
marco@16
|
224
|
marco@16
|
225 NB does not report all information, just key parts."""
|
marco@16
|
226 _s = []
|
marco@16
|
227 for k in sorted(self.metadata.keys()):
|
marco@16
|
228 _s.append("%s: '%s'" % (k, self.metadata[k]))
|
marco@16
|
229 if self.edit:
|
marco@16
|
230 _s.append("Edit IRI: %s" % self.edit)
|
marco@16
|
231 if self.edit_media:
|
marco@16
|
232 _s.append("Edit-Media IRI: %s" % self.edit_media)
|
marco@16
|
233 if self.se_iri:
|
marco@16
|
234 _s.append("SWORD2 Add IRI: %s" % self.se_iri)
|
marco@16
|
235 for c in self.categories:
|
marco@16
|
236 _s.append(str(c))
|
marco@16
|
237 if self.packaging:
|
marco@16
|
238 _s.append("SWORD2 Package formats available: %s" % self.packaging)
|
marco@16
|
239 if self.alternate:
|
marco@16
|
240 _s.append("Alternate IRI: %s" % self.alternate)
|
marco@16
|
241 for k, v in self.links.iteritems():
|
marco@16
|
242 _s.append("Link rel:'%s' -- %s" % (k, v))
|
marco@16
|
243 return "\n".join(_s)
|