marco@16: #!/usr/bin/env python marco@16: # -*- coding: utf-8 -*- marco@16: marco@16: """Non-SWORD2 specific Atom/APP helper classes. marco@16: marco@16: Most often used class will be 'Entry' - it provides an easy means to make an atom:entry marco@16: document which can be used directly as the metadata entry. marco@16: marco@16: Also provides Category, which is a convenience function to simplify reading in category information from an atom:entry marco@16: """ marco@16: marco@16: from sword2_logging import logging marco@16: from implementation_info import __version__ marco@16: coll_l = logging.getLogger(__name__) marco@16: marco@16: from compatible_libs import etree marco@16: from utils import NS, get_text marco@16: marco@16: from datetime import datetime marco@16: marco@16: class Category(object): marco@16: """Convenience class to aid in the intepreting of atom:category elements in XML. Currently, this is read-only. marco@16: marco@16: Usage: marco@16: marco@16: >>> from sword2 import Category marco@16: marco@16: ... # `Category` expects an etree.SubElement node (`c_node` in this example) referencing an element: marco@16: .... marco@16: marco@16: # Load a `Category` instance: marco@16: >>> c = Category(dom = c_node) marco@16: marco@16: # Overrides `__str__` to provide a simple means to view the content marco@16: >>> print c marco@16: "Category scheme:http://purl.org/net/sword/terms/ term:http://purl.org/net/sword/terms/originalDeposit label:Orignal Deposit text:'None'" marco@16: marco@16: # Element attributes appear as object attibutes: marco@16: >>> c.scheme marco@16: 'http://purl.org/net/sword/terms/' marco@16: marco@16: # Element text will be in the text attribute, if text is present marco@16: >>> c.text marco@16: None marco@16: marco@16: """ marco@16: def __init__(self, term=None, marco@16: scheme=None, marco@16: label=None, marco@16: text=None, marco@16: dom=None): marco@16: """Init a `Category` class - 99% of the time, this will be done by setting the dom parameter. marco@16: marco@16: However, if (for testing) there is a need to 'fake' a `Category`, all the attributes can be set in the constructor.""" marco@16: self.term = term marco@16: self.scheme = scheme marco@16: self.label = label marco@16: self.text = text marco@16: if dom != None: marco@16: self.dom = dom marco@16: self._from_element(self.dom) marco@16: marco@16: def _from_element(self, e): marco@16: """ Load the `Category`'s internal attributes using the information within an `etree.SubElement` marco@16: marco@16: """ marco@16: for item in e.attrib.keys(): marco@16: if item.endswith("scheme"): marco@16: self.scheme = e.attrib[item] marco@16: elif item.endswith("term"): marco@16: self.term = e.attrib[item] marco@16: elif item.endswith("label"): marco@16: self.label = e.attrib[item] marco@16: if e.text: marco@16: self.text = e.text marco@16: marco@16: def __str__(self): marco@16: """Rudimentary way to display the data held, in a way amenable to stdout.""" marco@16: return "Category scheme:%s term:%s label:%s text:'%s'" % (self.scheme, marco@16: self.term, marco@16: self.label, marco@16: self.text) marco@16: marco@16: marco@16: class Entry(object): marco@16: """Used to create `Entry`s - for multipart/metadata submission. Has a simple and extendable way to add in marco@16: namespace-aware key-value pairs. marco@16: marco@16: Example of use: marco@16: marco@16: >>> from sword2 import Entry marco@16: >>> e = Entry() # it can be opened blank, but more usefully... marco@16: >>> e = Entry(id="atom id", marco@16: title="atom title", marco@16: dcterms_identifier="some other id") marco@16: marco@16: # Getting the bytestring document marco@16: >>> print str(e) marco@16: marco@16: marco@16: 2011-06-05T16:20:34.914474some other idatom idatom title marco@16: marco@16: marco@16: # Adding fields to the metadata entry marco@16: # dcterms (and other, non-atom fields) can be used by passing in a parameter with an underscore between the marco@16: # prefix and element name, eg: marco@16: >>> e.add_fields(dcterms_title= "dcterms title", dcterms_some_other_field = "other") marco@16: marco@16: # atom:author field is treated slightly differently than all the other fields: marco@16: # dictionary is required marco@16: >>> e.add_fields(author={"name":"Ben", "email":"foo@example.org"}) marco@16: >>> print str(e) marco@16: marco@16: marco@16: marco@16: 2011-06-05T16:20:34.914474 marco@16: some other id marco@16: atom idatom title marco@16: marco@16: Ben marco@16: foo@example.org marco@16: marco@16: other marco@16: dcterms title marco@16: marco@16: >>> marco@16: marco@16: # Other namespaces - use `Entry.register_namespace` to add them to the list of those considered (prefix, URL): marco@16: >>> e.register_namespace("myschema", "http://example.org") marco@16: >>> e.add_fields(myschema_foo = "bar") marco@16: >>> print str(e) marco@16: marco@16: marco@16: 2011-06-05T16:20:34.914474 marco@16: some other id marco@16: atom idatom title marco@16: marco@16: Ben marco@16: foo@example.org marco@16: marco@16: other marco@16: dcterms title marco@16: bar marco@16: marco@16: marco@16: This class doesn't provide editing/updating functions as the full etree API is exposed through the marco@16: attribute 'entry'. For example: marco@16: marco@16: >>> len(e.entry.getchildren()) marco@16: 14 marco@16: """ marco@16: atom_fields = ['title','id','updated','summary'] marco@16: add_ns = ['dcterms', 'atom', 'app'] marco@16: bootstrap = """ marco@16: marco@16: marco@16: """ % __version__ marco@16: def __init__(self, **kw): marco@16: """Create a basic `Entry` document, setting the generator and a timestamp for the updated element value. marco@16: marco@16: Any keyword parameters passed in will be passed to the add_fields method and added to the entry marco@16: bootstrap document. It's currently not possible to add a namespace and use it within the init call.""" marco@16: self.entry = etree.fromstring(self.bootstrap) marco@16: if not 'updated' in kw.keys(): marco@16: kw['updated'] = datetime.now().isoformat() marco@16: self.add_fields(**kw) marco@16: marco@16: def register_namespace(self, prefix, uri): marco@16: """Registers a namespace,, making it available for use when adding subsequent fields to the entry. marco@16: marco@16: Registration will also affect the XML export, adding in the xmlns:prefix="url" attribute when required.""" marco@16: etree.register_namespace(prefix, uri) marco@16: self.add_ns.append(prefix) marco@16: if prefix not in NS.keys(): marco@16: NS[prefix] = "{%s}%%s" % uri marco@16: marco@16: def add_field(self, k, v): marco@16: """Append a single key-value pair to the `Entry` document. marco@16: marco@16: eg marco@16: marco@16: >>> e.add_field("myprefix_fooo", "value") marco@16: marco@16: It is advisable to use the `Entry.add_fields` method instead as this is neater and simplifies element entry. marco@16: marco@16: Note that the atom:author field is handled differently, as it requires certain fields from the author: marco@16: marco@16: >>> e.add_field("author", {'name':".....", marco@16: 'email':"....", marco@16: 'uri':"...."} ) marco@16: marco@16: Note that this means of entry is not supported for other elements.""" marco@16: if k in self.atom_fields: marco@16: # These should be unique! marco@16: old_e = self.entry.find(NS['atom'] % k) marco@16: if old_e == None: marco@16: e = etree.SubElement(self.entry, NS['atom'] % k) marco@16: e.text = v marco@16: else: marco@16: old_e.text = v marco@16: elif "_" in k: marco@16: # possible XML namespace, eg 'dcterms_title' marco@16: nmsp, tag = k.split("_", 1) marco@16: if nmsp in self.add_ns: marco@16: e = etree.SubElement(self.entry, NS[nmsp] % tag) marco@16: e.text = v marco@16: elif k == "author" and isinstance(v, dict): marco@16: self.add_author(**v) marco@16: marco@16: def add_fields(self, **kw): marco@16: """Add in multiple elements in one method call. marco@16: marco@16: Eg: marco@16: marco@16: >>> e.add_fields(dcterms_title="Origin of the Species", marco@16: dcterms_contributor="Darwin, Charles") marco@16: """ marco@16: for k,v in kw.iteritems(): marco@16: self.add_field(k,v) marco@16: marco@16: def add_author(self, name, uri=None, email=None): marco@16: """Convenience function to add in the atom:author elements in the fashion marco@16: required for Atom""" marco@16: a = etree.SubElement(self.entry, NS['atom'] % 'author') marco@16: n = etree.SubElement(a, NS['atom'] % 'name') marco@16: n.text = name marco@16: if uri: marco@16: u = etree.SubElement(a, NS['atom'] % 'uri') marco@16: u.text = uri marco@16: if email: marco@16: e = etree.SubElement(a, NS['atom'] % 'email') marco@16: e.text = email marco@16: marco@16: def __str__(self): marco@16: """Export the XML to a bytestring, ready for use""" marco@16: xml_str = etree.tostring(self.entry) marco@16: if not xml_str.startswith(''): marco@16: xml_str = '' + xml_str marco@16: return xml_str marco@16: marco@16: def pretty_print(self): marco@16: """A version of the XML document which should be slightly more readable on the command line.""" marco@16: return etree.tostring(self.entry, pretty_print=True)