annotate sword2-libraries-pyinstaller-compatible/sword2/atom_objects.py @ 22:d1752c7031e4 timeouts tip

Updated .hgignore to ignore sword2_logging.conf and anything in .cache
author Steve Welburn <stephen.welburn@eecs.qmul.ac.uk>
date Tue, 22 Jan 2013 14:43:42 +0000
parents 8b69bba225c9
children
rev   line source
marco@16 1 #!/usr/bin/env python
marco@16 2 # -*- coding: utf-8 -*-
marco@16 3
marco@16 4 """Non-SWORD2 specific Atom/APP helper classes.
marco@16 5
marco@16 6 Most often used class will be 'Entry' - it provides an easy means to make an atom:entry
marco@16 7 document which can be used directly as the metadata entry.
marco@16 8
marco@16 9 Also provides Category, which is a convenience function to simplify reading in category information from an atom:entry
marco@16 10 """
marco@16 11
marco@16 12 from sword2_logging import logging
marco@16 13 from implementation_info import __version__
marco@16 14 coll_l = logging.getLogger(__name__)
marco@16 15
marco@16 16 from compatible_libs import etree
marco@16 17 from utils import NS, get_text
marco@16 18
marco@16 19 from datetime import datetime
marco@16 20
marco@16 21 class Category(object):
marco@16 22 """Convenience class to aid in the intepreting of atom:category elements in XML. Currently, this is read-only.
marco@16 23
marco@16 24 Usage:
marco@16 25
marco@16 26 >>> from sword2 import Category
marco@16 27
marco@16 28 ... # `Category` expects an etree.SubElement node (`c_node` in this example) referencing an <atom:category> element:
marco@16 29 <atom:category term="...." scheme="...." label="....."> .... </atom:category>
marco@16 30
marco@16 31 # Load a `Category` instance:
marco@16 32 >>> c = Category(dom = c_node)
marco@16 33
marco@16 34 # Overrides `__str__` to provide a simple means to view the content
marco@16 35 >>> print c
marco@16 36 "Category scheme:http://purl.org/net/sword/terms/ term:http://purl.org/net/sword/terms/originalDeposit label:Orignal Deposit text:'None'"
marco@16 37
marco@16 38 # Element attributes appear as object attibutes:
marco@16 39 >>> c.scheme
marco@16 40 'http://purl.org/net/sword/terms/'
marco@16 41
marco@16 42 # Element text will be in the text attribute, if text is present
marco@16 43 >>> c.text
marco@16 44 None
marco@16 45
marco@16 46 """
marco@16 47 def __init__(self, term=None,
marco@16 48 scheme=None,
marco@16 49 label=None,
marco@16 50 text=None,
marco@16 51 dom=None):
marco@16 52 """Init a `Category` class - 99% of the time, this will be done by setting the dom parameter.
marco@16 53
marco@16 54 However, if (for testing) there is a need to 'fake' a `Category`, all the attributes can be set in the constructor."""
marco@16 55 self.term = term
marco@16 56 self.scheme = scheme
marco@16 57 self.label = label
marco@16 58 self.text = text
marco@16 59 if dom != None:
marco@16 60 self.dom = dom
marco@16 61 self._from_element(self.dom)
marco@16 62
marco@16 63 def _from_element(self, e):
marco@16 64 """ Load the `Category`'s internal attributes using the information within an `etree.SubElement`
marco@16 65
marco@16 66 """
marco@16 67 for item in e.attrib.keys():
marco@16 68 if item.endswith("scheme"):
marco@16 69 self.scheme = e.attrib[item]
marco@16 70 elif item.endswith("term"):
marco@16 71 self.term = e.attrib[item]
marco@16 72 elif item.endswith("label"):
marco@16 73 self.label = e.attrib[item]
marco@16 74 if e.text:
marco@16 75 self.text = e.text
marco@16 76
marco@16 77 def __str__(self):
marco@16 78 """Rudimentary way to display the data held, in a way amenable to stdout."""
marco@16 79 return "Category scheme:%s term:%s label:%s text:'%s'" % (self.scheme,
marco@16 80 self.term,
marco@16 81 self.label,
marco@16 82 self.text)
marco@16 83
marco@16 84
marco@16 85 class Entry(object):
marco@16 86 """Used to create `Entry`s - for multipart/metadata submission. Has a simple and extendable way to add in
marco@16 87 namespace-aware key-value pairs.
marco@16 88
marco@16 89 Example of use:
marco@16 90
marco@16 91 >>> from sword2 import Entry
marco@16 92 >>> e = Entry() # it can be opened blank, but more usefully...
marco@16 93 >>> e = Entry(id="atom id",
marco@16 94 title="atom title",
marco@16 95 dcterms_identifier="some other id")
marco@16 96
marco@16 97 # Getting the bytestring document
marco@16 98 >>> print str(e)
marco@16 99 <?xml version="1.0"?><entry xmlns="http://www.w3.org/2005/Atom" xmlns:dcterms="http://purl.org/dc/terms/">
marco@16 100 <generator uri="http://bitbucket.org/beno/python-sword2" version="0.1"/>
marco@16 101 <updated>2011-06-05T16:20:34.914474</updated><dcterms:identifier>some other id</dcterms:identifier><id>atom id</id><title>atom title</title></entry>
marco@16 102
marco@16 103
marco@16 104 # Adding fields to the metadata entry
marco@16 105 # dcterms (and other, non-atom fields) can be used by passing in a parameter with an underscore between the
marco@16 106 # prefix and element name, eg:
marco@16 107 >>> e.add_fields(dcterms_title= "dcterms title", dcterms_some_other_field = "other")
marco@16 108
marco@16 109 # atom:author field is treated slightly differently than all the other fields:
marco@16 110 # dictionary is required
marco@16 111 >>> e.add_fields(author={"name":"Ben", "email":"foo@example.org"})
marco@16 112 >>> print str(e)
marco@16 113 <?xml version="1.0"?>
marco@16 114 <entry xmlns="http://www.w3.org/2005/Atom" xmlns:dcterms="http://purl.org/dc/terms/">
marco@16 115 <generator uri="http://bitbucket.org/beno/python-sword2" version="0.1"/>
marco@16 116 <updated>2011-06-05T16:20:34.914474</updated>
marco@16 117 <dcterms:identifier>some other id</dcterms:identifier>
marco@16 118 <id>atom id</id><title>atom title</title>
marco@16 119 <author>
marco@16 120 <name>Ben</name>
marco@16 121 <email>foo@example.org</email>
marco@16 122 </author>
marco@16 123 <dcterms:some_other_field>other</dcterms:some_other_field>
marco@16 124 <dcterms:title>dcterms title</dcterms:title>
marco@16 125 </entry>
marco@16 126 >>>
marco@16 127
marco@16 128 # Other namespaces - use `Entry.register_namespace` to add them to the list of those considered (prefix, URL):
marco@16 129 >>> e.register_namespace("myschema", "http://example.org")
marco@16 130 >>> e.add_fields(myschema_foo = "bar")
marco@16 131 >>> print str(e)
marco@16 132 <?xml version="1.0"?><entry xmlns="http://www.w3.org/2005/Atom" xmlns:dcterms="http://purl.org/dc/terms/">
marco@16 133 <generator uri="http://bitbucket.org/beno/python-sword2" version="0.1"/>
marco@16 134 <updated>2011-06-05T16:20:34.914474</updated>
marco@16 135 <dcterms:identifier>some other id</dcterms:identifier>
marco@16 136 <id>atom id</id><title>atom title</title>
marco@16 137 <author>
marco@16 138 <name>Ben</name>
marco@16 139 <email>foo@example.org</email>
marco@16 140 </author>
marco@16 141 <dcterms:some_other_field>other</dcterms:some_other_field>
marco@16 142 <dcterms:title>dcterms title</dcterms:title>
marco@16 143 <myschema:foo xmlns:myschema="http://example.org">bar</myschema:foo>
marco@16 144 </entry>
marco@16 145
marco@16 146 This class doesn't provide editing/updating functions as the full etree API is exposed through the
marco@16 147 attribute 'entry'. For example:
marco@16 148
marco@16 149 >>> len(e.entry.getchildren())
marco@16 150 14
marco@16 151 """
marco@16 152 atom_fields = ['title','id','updated','summary']
marco@16 153 add_ns = ['dcterms', 'atom', 'app']
marco@16 154 bootstrap = """<?xml version="1.0"?>
marco@16 155 <entry xmlns="http://www.w3.org/2005/Atom"
marco@16 156 xmlns:dcterms="http://purl.org/dc/terms/">
marco@16 157 <generator uri="http://bitbucket.org/beno/python-sword2" version="%s"/>
marco@16 158 </entry>""" % __version__
marco@16 159 def __init__(self, **kw):
marco@16 160 """Create a basic `Entry` document, setting the generator and a timestamp for the updated element value.
marco@16 161
marco@16 162 Any keyword parameters passed in will be passed to the add_fields method and added to the entry
marco@16 163 bootstrap document. It's currently not possible to add a namespace and use it within the init call."""
marco@16 164 self.entry = etree.fromstring(self.bootstrap)
marco@16 165 if not 'updated' in kw.keys():
marco@16 166 kw['updated'] = datetime.now().isoformat()
marco@16 167 self.add_fields(**kw)
marco@16 168
marco@16 169 def register_namespace(self, prefix, uri):
marco@16 170 """Registers a namespace,, making it available for use when adding subsequent fields to the entry.
marco@16 171
marco@16 172 Registration will also affect the XML export, adding in the xmlns:prefix="url" attribute when required."""
marco@16 173 etree.register_namespace(prefix, uri)
marco@16 174 self.add_ns.append(prefix)
marco@16 175 if prefix not in NS.keys():
marco@16 176 NS[prefix] = "{%s}%%s" % uri
marco@16 177
marco@16 178 def add_field(self, k, v):
marco@16 179 """Append a single key-value pair to the `Entry` document.
marco@16 180
marco@16 181 eg
marco@16 182
marco@16 183 >>> e.add_field("myprefix_fooo", "value")
marco@16 184
marco@16 185 It is advisable to use the `Entry.add_fields` method instead as this is neater and simplifies element entry.
marco@16 186
marco@16 187 Note that the atom:author field is handled differently, as it requires certain fields from the author:
marco@16 188
marco@16 189 >>> e.add_field("author", {'name':".....",
marco@16 190 'email':"....",
marco@16 191 'uri':"...."} )
marco@16 192
marco@16 193 Note that this means of entry is not supported for other elements."""
marco@16 194 if k in self.atom_fields:
marco@16 195 # These should be unique!
marco@16 196 old_e = self.entry.find(NS['atom'] % k)
marco@16 197 if old_e == None:
marco@16 198 e = etree.SubElement(self.entry, NS['atom'] % k)
marco@16 199 e.text = v
marco@16 200 else:
marco@16 201 old_e.text = v
marco@16 202 elif "_" in k:
marco@16 203 # possible XML namespace, eg 'dcterms_title'
marco@16 204 nmsp, tag = k.split("_", 1)
marco@16 205 if nmsp in self.add_ns:
marco@16 206 e = etree.SubElement(self.entry, NS[nmsp] % tag)
marco@16 207 e.text = v
marco@16 208 elif k == "author" and isinstance(v, dict):
marco@16 209 self.add_author(**v)
marco@16 210
marco@16 211 def add_fields(self, **kw):
marco@16 212 """Add in multiple elements in one method call.
marco@16 213
marco@16 214 Eg:
marco@16 215
marco@16 216 >>> e.add_fields(dcterms_title="Origin of the Species",
marco@16 217 dcterms_contributor="Darwin, Charles")
marco@16 218 """
marco@16 219 for k,v in kw.iteritems():
marco@16 220 self.add_field(k,v)
marco@16 221
marco@16 222 def add_author(self, name, uri=None, email=None):
marco@16 223 """Convenience function to add in the atom:author elements in the fashion
marco@16 224 required for Atom"""
marco@16 225 a = etree.SubElement(self.entry, NS['atom'] % 'author')
marco@16 226 n = etree.SubElement(a, NS['atom'] % 'name')
marco@16 227 n.text = name
marco@16 228 if uri:
marco@16 229 u = etree.SubElement(a, NS['atom'] % 'uri')
marco@16 230 u.text = uri
marco@16 231 if email:
marco@16 232 e = etree.SubElement(a, NS['atom'] % 'email')
marco@16 233 e.text = email
marco@16 234
marco@16 235 def __str__(self):
marco@16 236 """Export the XML to a bytestring, ready for use"""
marco@16 237 xml_str = etree.tostring(self.entry)
marco@16 238 if not xml_str.startswith('<?xml version="1.0"?>'):
marco@16 239 xml_str = '<?xml version="1.0"?>' + xml_str
marco@16 240 return xml_str
marco@16 241
marco@16 242 def pretty_print(self):
marco@16 243 """A version of the XML document which should be slightly more readable on the command line."""
marco@16 244 return etree.tostring(self.entry, pretty_print=True)