marco@16
|
1 #!/usr/bin/env python
|
marco@16
|
2 # -*- coding: utf-8 -*-
|
marco@16
|
3
|
marco@16
|
4 """Non-SWORD2 specific Atom/APP helper classes.
|
marco@16
|
5
|
marco@16
|
6 Most often used class will be 'Entry' - it provides an easy means to make an atom:entry
|
marco@16
|
7 document which can be used directly as the metadata entry.
|
marco@16
|
8
|
marco@16
|
9 Also provides Category, which is a convenience function to simplify reading in category information from an atom:entry
|
marco@16
|
10 """
|
marco@16
|
11
|
marco@16
|
12 from sword2_logging import logging
|
marco@16
|
13 from implementation_info import __version__
|
marco@16
|
14 coll_l = logging.getLogger(__name__)
|
marco@16
|
15
|
marco@16
|
16 from compatible_libs import etree
|
marco@16
|
17 from utils import NS, get_text
|
marco@16
|
18
|
marco@16
|
19 from datetime import datetime
|
marco@16
|
20
|
marco@16
|
21 class Category(object):
|
marco@16
|
22 """Convenience class to aid in the intepreting of atom:category elements in XML. Currently, this is read-only.
|
marco@16
|
23
|
marco@16
|
24 Usage:
|
marco@16
|
25
|
marco@16
|
26 >>> from sword2 import Category
|
marco@16
|
27
|
marco@16
|
28 ... # `Category` expects an etree.SubElement node (`c_node` in this example) referencing an <atom:category> element:
|
marco@16
|
29 <atom:category term="...." scheme="...." label="....."> .... </atom:category>
|
marco@16
|
30
|
marco@16
|
31 # Load a `Category` instance:
|
marco@16
|
32 >>> c = Category(dom = c_node)
|
marco@16
|
33
|
marco@16
|
34 # Overrides `__str__` to provide a simple means to view the content
|
marco@16
|
35 >>> print c
|
marco@16
|
36 "Category scheme:http://purl.org/net/sword/terms/ term:http://purl.org/net/sword/terms/originalDeposit label:Orignal Deposit text:'None'"
|
marco@16
|
37
|
marco@16
|
38 # Element attributes appear as object attibutes:
|
marco@16
|
39 >>> c.scheme
|
marco@16
|
40 'http://purl.org/net/sword/terms/'
|
marco@16
|
41
|
marco@16
|
42 # Element text will be in the text attribute, if text is present
|
marco@16
|
43 >>> c.text
|
marco@16
|
44 None
|
marco@16
|
45
|
marco@16
|
46 """
|
marco@16
|
47 def __init__(self, term=None,
|
marco@16
|
48 scheme=None,
|
marco@16
|
49 label=None,
|
marco@16
|
50 text=None,
|
marco@16
|
51 dom=None):
|
marco@16
|
52 """Init a `Category` class - 99% of the time, this will be done by setting the dom parameter.
|
marco@16
|
53
|
marco@16
|
54 However, if (for testing) there is a need to 'fake' a `Category`, all the attributes can be set in the constructor."""
|
marco@16
|
55 self.term = term
|
marco@16
|
56 self.scheme = scheme
|
marco@16
|
57 self.label = label
|
marco@16
|
58 self.text = text
|
marco@16
|
59 if dom != None:
|
marco@16
|
60 self.dom = dom
|
marco@16
|
61 self._from_element(self.dom)
|
marco@16
|
62
|
marco@16
|
63 def _from_element(self, e):
|
marco@16
|
64 """ Load the `Category`'s internal attributes using the information within an `etree.SubElement`
|
marco@16
|
65
|
marco@16
|
66 """
|
marco@16
|
67 for item in e.attrib.keys():
|
marco@16
|
68 if item.endswith("scheme"):
|
marco@16
|
69 self.scheme = e.attrib[item]
|
marco@16
|
70 elif item.endswith("term"):
|
marco@16
|
71 self.term = e.attrib[item]
|
marco@16
|
72 elif item.endswith("label"):
|
marco@16
|
73 self.label = e.attrib[item]
|
marco@16
|
74 if e.text:
|
marco@16
|
75 self.text = e.text
|
marco@16
|
76
|
marco@16
|
77 def __str__(self):
|
marco@16
|
78 """Rudimentary way to display the data held, in a way amenable to stdout."""
|
marco@16
|
79 return "Category scheme:%s term:%s label:%s text:'%s'" % (self.scheme,
|
marco@16
|
80 self.term,
|
marco@16
|
81 self.label,
|
marco@16
|
82 self.text)
|
marco@16
|
83
|
marco@16
|
84
|
marco@16
|
85 class Entry(object):
|
marco@16
|
86 """Used to create `Entry`s - for multipart/metadata submission. Has a simple and extendable way to add in
|
marco@16
|
87 namespace-aware key-value pairs.
|
marco@16
|
88
|
marco@16
|
89 Example of use:
|
marco@16
|
90
|
marco@16
|
91 >>> from sword2 import Entry
|
marco@16
|
92 >>> e = Entry() # it can be opened blank, but more usefully...
|
marco@16
|
93 >>> e = Entry(id="atom id",
|
marco@16
|
94 title="atom title",
|
marco@16
|
95 dcterms_identifier="some other id")
|
marco@16
|
96
|
marco@16
|
97 # Getting the bytestring document
|
marco@16
|
98 >>> print str(e)
|
marco@16
|
99 <?xml version="1.0"?><entry xmlns="http://www.w3.org/2005/Atom" xmlns:dcterms="http://purl.org/dc/terms/">
|
marco@16
|
100 <generator uri="http://bitbucket.org/beno/python-sword2" version="0.1"/>
|
marco@16
|
101 <updated>2011-06-05T16:20:34.914474</updated><dcterms:identifier>some other id</dcterms:identifier><id>atom id</id><title>atom title</title></entry>
|
marco@16
|
102
|
marco@16
|
103
|
marco@16
|
104 # Adding fields to the metadata entry
|
marco@16
|
105 # dcterms (and other, non-atom fields) can be used by passing in a parameter with an underscore between the
|
marco@16
|
106 # prefix and element name, eg:
|
marco@16
|
107 >>> e.add_fields(dcterms_title= "dcterms title", dcterms_some_other_field = "other")
|
marco@16
|
108
|
marco@16
|
109 # atom:author field is treated slightly differently than all the other fields:
|
marco@16
|
110 # dictionary is required
|
marco@16
|
111 >>> e.add_fields(author={"name":"Ben", "email":"foo@example.org"})
|
marco@16
|
112 >>> print str(e)
|
marco@16
|
113 <?xml version="1.0"?>
|
marco@16
|
114 <entry xmlns="http://www.w3.org/2005/Atom" xmlns:dcterms="http://purl.org/dc/terms/">
|
marco@16
|
115 <generator uri="http://bitbucket.org/beno/python-sword2" version="0.1"/>
|
marco@16
|
116 <updated>2011-06-05T16:20:34.914474</updated>
|
marco@16
|
117 <dcterms:identifier>some other id</dcterms:identifier>
|
marco@16
|
118 <id>atom id</id><title>atom title</title>
|
marco@16
|
119 <author>
|
marco@16
|
120 <name>Ben</name>
|
marco@16
|
121 <email>foo@example.org</email>
|
marco@16
|
122 </author>
|
marco@16
|
123 <dcterms:some_other_field>other</dcterms:some_other_field>
|
marco@16
|
124 <dcterms:title>dcterms title</dcterms:title>
|
marco@16
|
125 </entry>
|
marco@16
|
126 >>>
|
marco@16
|
127
|
marco@16
|
128 # Other namespaces - use `Entry.register_namespace` to add them to the list of those considered (prefix, URL):
|
marco@16
|
129 >>> e.register_namespace("myschema", "http://example.org")
|
marco@16
|
130 >>> e.add_fields(myschema_foo = "bar")
|
marco@16
|
131 >>> print str(e)
|
marco@16
|
132 <?xml version="1.0"?><entry xmlns="http://www.w3.org/2005/Atom" xmlns:dcterms="http://purl.org/dc/terms/">
|
marco@16
|
133 <generator uri="http://bitbucket.org/beno/python-sword2" version="0.1"/>
|
marco@16
|
134 <updated>2011-06-05T16:20:34.914474</updated>
|
marco@16
|
135 <dcterms:identifier>some other id</dcterms:identifier>
|
marco@16
|
136 <id>atom id</id><title>atom title</title>
|
marco@16
|
137 <author>
|
marco@16
|
138 <name>Ben</name>
|
marco@16
|
139 <email>foo@example.org</email>
|
marco@16
|
140 </author>
|
marco@16
|
141 <dcterms:some_other_field>other</dcterms:some_other_field>
|
marco@16
|
142 <dcterms:title>dcterms title</dcterms:title>
|
marco@16
|
143 <myschema:foo xmlns:myschema="http://example.org">bar</myschema:foo>
|
marco@16
|
144 </entry>
|
marco@16
|
145
|
marco@16
|
146 This class doesn't provide editing/updating functions as the full etree API is exposed through the
|
marco@16
|
147 attribute 'entry'. For example:
|
marco@16
|
148
|
marco@16
|
149 >>> len(e.entry.getchildren())
|
marco@16
|
150 14
|
marco@16
|
151 """
|
marco@16
|
152 atom_fields = ['title','id','updated','summary']
|
marco@16
|
153 add_ns = ['dcterms', 'atom', 'app']
|
marco@16
|
154 bootstrap = """<?xml version="1.0"?>
|
marco@16
|
155 <entry xmlns="http://www.w3.org/2005/Atom"
|
marco@16
|
156 xmlns:dcterms="http://purl.org/dc/terms/">
|
marco@16
|
157 <generator uri="http://bitbucket.org/beno/python-sword2" version="%s"/>
|
marco@16
|
158 </entry>""" % __version__
|
marco@16
|
159 def __init__(self, **kw):
|
marco@16
|
160 """Create a basic `Entry` document, setting the generator and a timestamp for the updated element value.
|
marco@16
|
161
|
marco@16
|
162 Any keyword parameters passed in will be passed to the add_fields method and added to the entry
|
marco@16
|
163 bootstrap document. It's currently not possible to add a namespace and use it within the init call."""
|
marco@16
|
164 self.entry = etree.fromstring(self.bootstrap)
|
marco@16
|
165 if not 'updated' in kw.keys():
|
marco@16
|
166 kw['updated'] = datetime.now().isoformat()
|
marco@16
|
167 self.add_fields(**kw)
|
marco@16
|
168
|
marco@16
|
169 def register_namespace(self, prefix, uri):
|
marco@16
|
170 """Registers a namespace,, making it available for use when adding subsequent fields to the entry.
|
marco@16
|
171
|
marco@16
|
172 Registration will also affect the XML export, adding in the xmlns:prefix="url" attribute when required."""
|
marco@16
|
173 etree.register_namespace(prefix, uri)
|
marco@16
|
174 self.add_ns.append(prefix)
|
marco@16
|
175 if prefix not in NS.keys():
|
marco@16
|
176 NS[prefix] = "{%s}%%s" % uri
|
marco@16
|
177
|
marco@16
|
178 def add_field(self, k, v):
|
marco@16
|
179 """Append a single key-value pair to the `Entry` document.
|
marco@16
|
180
|
marco@16
|
181 eg
|
marco@16
|
182
|
marco@16
|
183 >>> e.add_field("myprefix_fooo", "value")
|
marco@16
|
184
|
marco@16
|
185 It is advisable to use the `Entry.add_fields` method instead as this is neater and simplifies element entry.
|
marco@16
|
186
|
marco@16
|
187 Note that the atom:author field is handled differently, as it requires certain fields from the author:
|
marco@16
|
188
|
marco@16
|
189 >>> e.add_field("author", {'name':".....",
|
marco@16
|
190 'email':"....",
|
marco@16
|
191 'uri':"...."} )
|
marco@16
|
192
|
marco@16
|
193 Note that this means of entry is not supported for other elements."""
|
marco@16
|
194 if k in self.atom_fields:
|
marco@16
|
195 # These should be unique!
|
marco@16
|
196 old_e = self.entry.find(NS['atom'] % k)
|
marco@16
|
197 if old_e == None:
|
marco@16
|
198 e = etree.SubElement(self.entry, NS['atom'] % k)
|
marco@16
|
199 e.text = v
|
marco@16
|
200 else:
|
marco@16
|
201 old_e.text = v
|
marco@16
|
202 elif "_" in k:
|
marco@16
|
203 # possible XML namespace, eg 'dcterms_title'
|
marco@16
|
204 nmsp, tag = k.split("_", 1)
|
marco@16
|
205 if nmsp in self.add_ns:
|
marco@16
|
206 e = etree.SubElement(self.entry, NS[nmsp] % tag)
|
marco@16
|
207 e.text = v
|
marco@16
|
208 elif k == "author" and isinstance(v, dict):
|
marco@16
|
209 self.add_author(**v)
|
marco@16
|
210
|
marco@16
|
211 def add_fields(self, **kw):
|
marco@16
|
212 """Add in multiple elements in one method call.
|
marco@16
|
213
|
marco@16
|
214 Eg:
|
marco@16
|
215
|
marco@16
|
216 >>> e.add_fields(dcterms_title="Origin of the Species",
|
marco@16
|
217 dcterms_contributor="Darwin, Charles")
|
marco@16
|
218 """
|
marco@16
|
219 for k,v in kw.iteritems():
|
marco@16
|
220 self.add_field(k,v)
|
marco@16
|
221
|
marco@16
|
222 def add_author(self, name, uri=None, email=None):
|
marco@16
|
223 """Convenience function to add in the atom:author elements in the fashion
|
marco@16
|
224 required for Atom"""
|
marco@16
|
225 a = etree.SubElement(self.entry, NS['atom'] % 'author')
|
marco@16
|
226 n = etree.SubElement(a, NS['atom'] % 'name')
|
marco@16
|
227 n.text = name
|
marco@16
|
228 if uri:
|
marco@16
|
229 u = etree.SubElement(a, NS['atom'] % 'uri')
|
marco@16
|
230 u.text = uri
|
marco@16
|
231 if email:
|
marco@16
|
232 e = etree.SubElement(a, NS['atom'] % 'email')
|
marco@16
|
233 e.text = email
|
marco@16
|
234
|
marco@16
|
235 def __str__(self):
|
marco@16
|
236 """Export the XML to a bytestring, ready for use"""
|
marco@16
|
237 xml_str = etree.tostring(self.entry)
|
marco@16
|
238 if not xml_str.startswith('<?xml version="1.0"?>'):
|
marco@16
|
239 xml_str = '<?xml version="1.0"?>' + xml_str
|
marco@16
|
240 return xml_str
|
marco@16
|
241
|
marco@16
|
242 def pretty_print(self):
|
marco@16
|
243 """A version of the XML document which should be slightly more readable on the command line."""
|
marco@16
|
244 return etree.tostring(self.entry, pretty_print=True)
|