marco@16: #!/usr/bin/env python marco@16: # -*- coding: utf-8 -*- marco@16: marco@16: """ Collection classes marco@16: marco@16: These classes are used in their documented manner but most collect or group various other items marco@16: to make them suitable for use. marco@16: marco@16: The key class is `Collection`, which is presents a simple read-only object which represents the marco@16: information held within a collection element in a SWORD2 document such as the Service Document. marco@16: marco@16: Two other classes, `Collection_Feed` and `Sword_Statement` are works in progress for now, with limited support marco@16: for the things they logically handle. marco@16: marco@16: """ marco@16: marco@16: from sword2_logging import logging marco@16: from implementation_info import __version__ marco@16: coll_l = logging.getLogger(__name__) marco@16: marco@16: from compatible_libs import etree marco@16: from utils import NS, get_text marco@16: marco@16: from deposit_receipt import Deposit_Receipt marco@16: marco@16: from atom_objects import Category marco@16: marco@16: from datetime import datetime marco@16: marco@16: marco@16: class SDCollection(object): marco@16: """ marco@16: `Collection` - holds, parses and presents simple attributes with information taken from a collection entry marco@16: within a SWORD2 Service Document. marco@16: marco@16: This will be instanciated by a `sword2.Service_Document` and as such, is unlikely to be called explicitly. marco@16: marco@16: Usage: marco@16: marco@16: >>> from sword2 import SDCollection marco@16: >>> c = SDCollection() marco@16: marco@16: .... pull an `etree.SubElement` from a service document into `collection_node` marco@16: marco@16: >>> c.load_from_etree(collection_node) marco@16: >>> c.collectionPolicy marco@16: "This collection has the following policy for deposits" marco@16: >>> c.title marco@16: "Thesis Deposit" marco@16: """ marco@16: def __init__(self, title=None, marco@16: href=None, marco@16: accept=[], marco@16: accept_multipart=[], marco@16: categories=[], marco@16: collectionPolicy=None, marco@16: description = None, marco@16: mediation=None, marco@16: treatment=None, marco@16: acceptPackaging=[], marco@16: service=[], marco@16: dom=None): marco@16: """ marco@16: Creates a `Collection` object - as used by `sword2.Service_Document` marco@16: marco@16: #BETASWORD2URL marco@16: See http://sword-app.svn.sourceforge.net/viewvc/sword-app/spec/trunk/SWORDProfile.html?revision=HEAD#protocoloperations_retreivingservicedocument marco@16: for more details about the SWORD2 Service Document. marco@16: marco@16: Usage: marco@16: marco@16: Read useful information from the attributes of this object once loaded. marco@16: marco@16: Attributes:: marco@16: marco@16: title -- - Title of collection, (`str`) marco@16: href -- - Collection IRI (`str`) marco@16: accept -- * - the formats which this collection can take in (`list` of `str`) marco@16: accept_multipart -- * - the formats which this collection can take marco@16: in via multipart-related (`list` of `str`) marco@16: categories -- - Collection category (`list` of `sword2.Category`'s) marco@16: collectionPolicy -- - Collection policy (`str`) marco@16: description -- - Collection descriptive text (`str`) marco@16: mediation -- - Support for mediated deposit (`True` or `False`) marco@16: treatment -- - from the SWORD2 specifications: marco@16: ".. either a human-readable statement describing treatment the deposited resource marco@16: has received or a IRI that dereferences to such a description." marco@16: acceptPackaging -- - Accepted package types (`list` of `str`) marco@16: from the SWORD2 specifications: "The value SHOULD be a IRI for a known packaging format" marco@16: service -- - References to nested service descriptions (`list` of `str`) marco@16: marco@16: Example XML fragment that is expected: (xmlns="http://www.w3.org/2007/app") marco@16: marco@16: ... marco@16: marco@16: marco@16: Collection 43 marco@16: */* marco@16: */* marco@16: Collection Policy marco@16: Collection Description marco@16: false marco@16: Treatment description marco@16: http://purl.org/net/sword/package/SimpleZip marco@16: http://purl.org/net/sword/package/METSDSpaceSIP marco@16: http://swordapp.org/sd-iri/e4 marco@16: marco@16: ... marco@16: marco@16: Parsing this fragment: marco@16: marco@16: Again, this step is done by the `sword2.Service_Document`, but if the above XML was in the `doc` variable: marco@16: marco@16: # Get an etree-compatible library, such as from `lxml.etree`, `xml.etree` or `elementtree.ElementTree` marco@16: >>> from sword2.compatible_libs import etree marco@16: >>> from sword2 import SDCollection marco@16: >>> dom = etree.fromstring(doc) marco@16: marco@16: # create an `SDCollection` instance from this XML document marco@16: >>> c = SDCollection(dom = dom) marco@16: marco@16: # query it marco@16: >>> c.treatment marco@16: "Treatment description" marco@16: # Non-unique elements, for example: marco@16: >>> c.service marco@16: ["http://swordapp.org/sd-iri/e4"] marco@16: >>> c.accept marco@16: ["*/*"] marco@16: marco@16: """ marco@16: # APP/Atom marco@16: self.title = title marco@16: self.href = href marco@16: self.accept = accept marco@16: self.accept_multipart = accept_multipart marco@16: # SWORD marco@16: self.mediation = mediation marco@16: self.description = description marco@16: self.treatment = treatment marco@16: self.collectionPolicy = collectionPolicy marco@16: self.acceptPackaging = acceptPackaging marco@16: self.service = service marco@16: self.categories = categories marco@16: if dom != None: marco@16: # Allow constructor variables to provide defaults, but information within the marco@16: # XML element overwrites or appends. marco@16: self.load_from_etree(dom) marco@16: marco@16: def _reset(self): marco@16: """Blank this instance of `SDCollection`""" marco@16: self.title = None marco@16: self.href = None marco@16: self.accept = [] marco@16: self.accept_multipart = [] marco@16: # SWORD marco@16: self.mediation = None marco@16: self.description = None marco@16: self.treatment = None marco@16: self.collectionPolicy = None marco@16: self.acceptPackaging = [] marco@16: self.service = None marco@16: self.categories = [] marco@16: marco@16: def load_from_etree(self, collection): marco@16: """ marco@16: Parse an `etree.SubElement` into attributes in this object. marco@16: marco@16: Also, caches the most recently used DOM object it is passed in marco@16: `self.dom` marco@16: """ marco@16: self._reset() marco@16: self.dom = collection marco@16: self.title = get_text(collection, NS['atom'] % 'title') marco@16: # MUST have href attribute marco@16: self.href = collection.attrib.get('href', None) marco@16: # Accept and Accept multipart marco@16: for accept in collection.findall(NS['app'] % 'accept'): marco@16: if accept.attrib.get("alternate", None) == "multipart-related": marco@16: self.accept_multipart.append(accept.text) marco@16: else: marco@16: self.accept.append(accept.text) marco@16: # Categories marco@16: for category_element in collection.findall(NS['atom'] % 'category'): marco@16: self.categories.append(Category(dom=category_element)) marco@16: # SWORD extensions: marco@16: self.collectionPolicy = get_text(collection, NS['sword'] % 'collectionPolicy') marco@16: marco@16: # Mediation: True/False marco@16: mediation = get_text(collection, NS['sword'] % 'mediation') marco@16: self.mediation = mediation.lower() == "true" marco@16: marco@16: self.treatment = get_text(collection, NS['sword'] % 'treatment') marco@16: self.description = get_text(collection, NS['dcterms'] % 'abstract') marco@16: self.service = get_text(collection, NS['sword'] % 'service', plural = True) marco@16: self.acceptPackaging = get_text(collection, NS['sword'] % 'acceptPackaging', plural = True) marco@16: marco@16: # Log collection details: marco@16: coll_l.debug(str(self)) marco@16: marco@16: def __str__(self): marco@16: """Provides a simple display of the pertinent information in this object suitable for CLI logging.""" marco@16: _s = ["Collection: '%s' @ '%s'. Accept:%s" % (self.title, self.href, self.accept)] marco@16: if self.description: marco@16: _s.append("SWORD: Description - '%s'" % self.description) marco@16: if self.collectionPolicy: marco@16: _s.append("SWORD: Collection Policy - '%s'" % self.collectionPolicy) marco@16: if self.mediation: marco@16: _s.append("SWORD: Mediation? - '%s'" % self.mediation) marco@16: if self.treatment: marco@16: _s.append("SWORD: Treatment - '%s'" % self.treatment) marco@16: if self.acceptPackaging: marco@16: _s.append("SWORD: Accept Packaging: '%s'" % self.acceptPackaging) marco@16: if self.service: marco@16: _s.append("SWORD: Nested Service Documents - '%s'" % self.service) marco@16: for c in self.categories: marco@16: _s.append(str(c)) marco@16: return "\n".join(_s) marco@16: marco@16: def __repr__(self): marco@16: """Provides the atom.title of the collection as part of the repr reply""" marco@16: return "" % self.title marco@16: marco@16: def to_json(self): marco@16: """Provides a simple means to turn the important parsed information into a simple JSON-encoded form. marco@16: marco@16: NB this uses the attributes of the object, not the cached DOM object, so information can be altered/added marco@16: on the fly.""" marco@16: from compatible_libs import json marco@16: if json: marco@16: _j = {'title':self.title, marco@16: 'href':self.href, marco@16: 'description':self.description, marco@16: 'accept':self.accept, marco@16: 'accept_multipart':self.accept_multipart, marco@16: 'mediation':self.mediation, marco@16: 'treatment':self.treatment, marco@16: 'collectionPolicy':self.collectionPolicy, marco@16: 'acceptPackaging':self.acceptPackaging, marco@16: 'service':self.service, marco@16: 'categories':self.categories} marco@16: return json.dumps(_j) marco@16: else: marco@16: coll_l.error("Could not return information about Collection '%s' as JSON" % self.title) marco@16: return marco@16: marco@16: class Collection_Feed(object): marco@16: """Nothing to see here yet. Move along.""" marco@16: def __init__(self, feed_iri=None, http_client=None, feed_xml=None): marco@16: self.feed_xml = feed_xml marco@16: self.feed_iri = feed_iri marco@16: self._cached = [] marco@16: self.h = http_client marco@16: marco@16: class Sword_Statement(object): marco@16: """Beginning SWORD2 Sword Statement support. marco@16: marco@16: The aim is for the sword statements to be available through attributes on this object. marco@16: marco@16: In the meantime, please use the low-level `self.feed` for access to an etree.Element containing the marco@16: parsed form of the `xml_document` it is passed. marco@16: marco@16: NB if `self.parsed` is not `True`, then there has been a problem parsing the xml document so check the original text, marco@16: cached in `self.xml_document` marco@16: """ marco@16: def __init__(self, xml_document): marco@16: self.xml_document = xml_document marco@16: self.parsed = False marco@16: self.first = None marco@16: self.next = None marco@16: self.previous = None marco@16: self.last = None marco@16: self.categories = [] marco@16: self.entries = [] marco@16: try: marco@16: coll_l.info("Attempting to parse the Feed XML document") marco@16: self.feed = etree.fromstring(xml_document) marco@16: self.parsed = True marco@16: except Exception, e: marco@16: coll_l.error("Failed to parse document - %s" % e) marco@16: coll_l.error("XML document begins:\n %s" % xml_document[:300]) marco@16: self.enumerate_feed() marco@16: marco@16: def enumerate_feed(self): marco@16: # Handle Categories marco@16: for cate in self.feed.findall(NS['atom'] % 'category'): marco@16: self.categories.append(Category(dom = cate)) marco@16: # handle entries - each one is compatible with a Deposit receipt, so using that marco@16: for entry in self.feed.findall(NS['atom'] % 'entry'): marco@16: self.entries.append(Deposit_Receipt(dom=entry)) marco@16: # TODO handle multipage first/last pagination marco@16: marco@16: