annotate sword2-libraries-pyinstaller-compatible/sword2/collection.py @ 22:d1752c7031e4 timeouts tip

Updated .hgignore to ignore sword2_logging.conf and anything in .cache
author Steve Welburn <stephen.welburn@eecs.qmul.ac.uk>
date Tue, 22 Jan 2013 14:43:42 +0000
parents 8b69bba225c9
children
rev   line source
marco@16 1 #!/usr/bin/env python
marco@16 2 # -*- coding: utf-8 -*-
marco@16 3
marco@16 4 """ Collection classes
marco@16 5
marco@16 6 These classes are used in their documented manner but most collect or group various other items
marco@16 7 to make them suitable for use.
marco@16 8
marco@16 9 The key class is `Collection`, which is presents a simple read-only object which represents the
marco@16 10 information held within a collection element in a SWORD2 document such as the Service Document.
marco@16 11
marco@16 12 Two other classes, `Collection_Feed` and `Sword_Statement` are works in progress for now, with limited support
marco@16 13 for the things they logically handle.
marco@16 14
marco@16 15 """
marco@16 16
marco@16 17 from sword2_logging import logging
marco@16 18 from implementation_info import __version__
marco@16 19 coll_l = logging.getLogger(__name__)
marco@16 20
marco@16 21 from compatible_libs import etree
marco@16 22 from utils import NS, get_text
marco@16 23
marco@16 24 from deposit_receipt import Deposit_Receipt
marco@16 25
marco@16 26 from atom_objects import Category
marco@16 27
marco@16 28 from datetime import datetime
marco@16 29
marco@16 30
marco@16 31 class SDCollection(object):
marco@16 32 """
marco@16 33 `Collection` - holds, parses and presents simple attributes with information taken from a collection entry
marco@16 34 within a SWORD2 Service Document.
marco@16 35
marco@16 36 This will be instanciated by a `sword2.Service_Document` and as such, is unlikely to be called explicitly.
marco@16 37
marco@16 38 Usage:
marco@16 39
marco@16 40 >>> from sword2 import SDCollection
marco@16 41 >>> c = SDCollection()
marco@16 42
marco@16 43 .... pull an `etree.SubElement` from a service document into `collection_node`
marco@16 44
marco@16 45 >>> c.load_from_etree(collection_node)
marco@16 46 >>> c.collectionPolicy
marco@16 47 "This collection has the following policy for deposits"
marco@16 48 >>> c.title
marco@16 49 "Thesis Deposit"
marco@16 50 """
marco@16 51 def __init__(self, title=None,
marco@16 52 href=None,
marco@16 53 accept=[],
marco@16 54 accept_multipart=[],
marco@16 55 categories=[],
marco@16 56 collectionPolicy=None,
marco@16 57 description = None,
marco@16 58 mediation=None,
marco@16 59 treatment=None,
marco@16 60 acceptPackaging=[],
marco@16 61 service=[],
marco@16 62 dom=None):
marco@16 63 """
marco@16 64 Creates a `Collection` object - as used by `sword2.Service_Document`
marco@16 65
marco@16 66 #BETASWORD2URL
marco@16 67 See http://sword-app.svn.sourceforge.net/viewvc/sword-app/spec/trunk/SWORDProfile.html?revision=HEAD#protocoloperations_retreivingservicedocument
marco@16 68 for more details about the SWORD2 Service Document.
marco@16 69
marco@16 70 Usage:
marco@16 71
marco@16 72 Read useful information from the attributes of this object once loaded.
marco@16 73
marco@16 74 Attributes::
marco@16 75
marco@16 76 title -- <atom:title> - Title of collection, (`str`)
marco@16 77 href -- <collection href=... > - Collection IRI (`str`)
marco@16 78 accept -- <accept>*</accept> - the formats which this collection can take in (`list` of `str`)
marco@16 79 accept_multipart -- <accept alternate="multipart-related">*</accept> - the formats which this collection can take
marco@16 80 in via multipart-related (`list` of `str`)
marco@16 81 categories -- <atom:catogory> - Collection category (`list` of `sword2.Category`'s)
marco@16 82 collectionPolicy -- <sword:collectionPolicy> - Collection policy (`str`)
marco@16 83 description -- <dcterms:description> - Collection descriptive text (`str`)
marco@16 84 mediation -- <sword:mediation> - Support for mediated deposit (`True` or `False`)
marco@16 85 treatment -- <sword:treatment> - from the SWORD2 specifications:
marco@16 86 ".. either a human-readable statement describing treatment the deposited resource
marco@16 87 has received or a IRI that dereferences to such a description."
marco@16 88 acceptPackaging -- <sword:acceptPackaging> - Accepted package types (`list` of `str`)
marco@16 89 from the SWORD2 specifications: "The value SHOULD be a IRI for a known packaging format"
marco@16 90 service -- <sword:service> - References to nested service descriptions (`list` of `str`)
marco@16 91
marco@16 92 Example XML fragment that is expected: (xmlns="http://www.w3.org/2007/app")
marco@16 93
marco@16 94 ...
marco@16 95
marco@16 96 <collection href="http://swordapp.org/col-iri/43">
marco@16 97 <atom:title>Collection 43</atom:title>
marco@16 98 <accept>*/*</accept>
marco@16 99 <accept alternate="multipart-related">*/*</accept>
marco@16 100 <sword:collectionPolicy>Collection Policy</sword:collectionPolicy>
marco@16 101 <dcterms:abstract>Collection Description</dcterms:abstract>
marco@16 102 <sword:mediation>false</sword:mediation>
marco@16 103 <sword:treatment>Treatment description</sword:treatment>
marco@16 104 <sword:acceptPackaging>http://purl.org/net/sword/package/SimpleZip</sword:acceptPackaging>
marco@16 105 <sword:acceptPackaging>http://purl.org/net/sword/package/METSDSpaceSIP</sword:acceptPackaging>
marco@16 106 <sword:service>http://swordapp.org/sd-iri/e4</sword:service>
marco@16 107 </collection>
marco@16 108 ...
marco@16 109
marco@16 110 Parsing this fragment:
marco@16 111
marco@16 112 Again, this step is done by the `sword2.Service_Document`, but if the above XML was in the `doc` variable:
marco@16 113
marco@16 114 # Get an etree-compatible library, such as from `lxml.etree`, `xml.etree` or `elementtree.ElementTree`
marco@16 115 >>> from sword2.compatible_libs import etree
marco@16 116 >>> from sword2 import SDCollection
marco@16 117 >>> dom = etree.fromstring(doc)
marco@16 118
marco@16 119 # create an `SDCollection` instance from this XML document
marco@16 120 >>> c = SDCollection(dom = dom)
marco@16 121
marco@16 122 # query it
marco@16 123 >>> c.treatment
marco@16 124 "Treatment description"
marco@16 125 # Non-unique elements, for example:
marco@16 126 >>> c.service
marco@16 127 ["http://swordapp.org/sd-iri/e4"]
marco@16 128 >>> c.accept
marco@16 129 ["*/*"]
marco@16 130
marco@16 131 """
marco@16 132 # APP/Atom
marco@16 133 self.title = title
marco@16 134 self.href = href
marco@16 135 self.accept = accept
marco@16 136 self.accept_multipart = accept_multipart
marco@16 137 # SWORD
marco@16 138 self.mediation = mediation
marco@16 139 self.description = description
marco@16 140 self.treatment = treatment
marco@16 141 self.collectionPolicy = collectionPolicy
marco@16 142 self.acceptPackaging = acceptPackaging
marco@16 143 self.service = service
marco@16 144 self.categories = categories
marco@16 145 if dom != None:
marco@16 146 # Allow constructor variables to provide defaults, but information within the
marco@16 147 # XML element overwrites or appends.
marco@16 148 self.load_from_etree(dom)
marco@16 149
marco@16 150 def _reset(self):
marco@16 151 """Blank this instance of `SDCollection`"""
marco@16 152 self.title = None
marco@16 153 self.href = None
marco@16 154 self.accept = []
marco@16 155 self.accept_multipart = []
marco@16 156 # SWORD
marco@16 157 self.mediation = None
marco@16 158 self.description = None
marco@16 159 self.treatment = None
marco@16 160 self.collectionPolicy = None
marco@16 161 self.acceptPackaging = []
marco@16 162 self.service = None
marco@16 163 self.categories = []
marco@16 164
marco@16 165 def load_from_etree(self, collection):
marco@16 166 """
marco@16 167 Parse an `etree.SubElement` into attributes in this object.
marco@16 168
marco@16 169 Also, caches the most recently used DOM object it is passed in
marco@16 170 `self.dom`
marco@16 171 """
marco@16 172 self._reset()
marco@16 173 self.dom = collection
marco@16 174 self.title = get_text(collection, NS['atom'] % 'title')
marco@16 175 # MUST have href attribute
marco@16 176 self.href = collection.attrib.get('href', None)
marco@16 177 # Accept and Accept multipart
marco@16 178 for accept in collection.findall(NS['app'] % 'accept'):
marco@16 179 if accept.attrib.get("alternate", None) == "multipart-related":
marco@16 180 self.accept_multipart.append(accept.text)
marco@16 181 else:
marco@16 182 self.accept.append(accept.text)
marco@16 183 # Categories
marco@16 184 for category_element in collection.findall(NS['atom'] % 'category'):
marco@16 185 self.categories.append(Category(dom=category_element))
marco@16 186 # SWORD extensions:
marco@16 187 self.collectionPolicy = get_text(collection, NS['sword'] % 'collectionPolicy')
marco@16 188
marco@16 189 # Mediation: True/False
marco@16 190 mediation = get_text(collection, NS['sword'] % 'mediation')
marco@16 191 self.mediation = mediation.lower() == "true"
marco@16 192
marco@16 193 self.treatment = get_text(collection, NS['sword'] % 'treatment')
marco@16 194 self.description = get_text(collection, NS['dcterms'] % 'abstract')
marco@16 195 self.service = get_text(collection, NS['sword'] % 'service', plural = True)
marco@16 196 self.acceptPackaging = get_text(collection, NS['sword'] % 'acceptPackaging', plural = True)
marco@16 197
marco@16 198 # Log collection details:
marco@16 199 coll_l.debug(str(self))
marco@16 200
marco@16 201 def __str__(self):
marco@16 202 """Provides a simple display of the pertinent information in this object suitable for CLI logging."""
marco@16 203 _s = ["Collection: '%s' @ '%s'. Accept:%s" % (self.title, self.href, self.accept)]
marco@16 204 if self.description:
marco@16 205 _s.append("SWORD: Description - '%s'" % self.description)
marco@16 206 if self.collectionPolicy:
marco@16 207 _s.append("SWORD: Collection Policy - '%s'" % self.collectionPolicy)
marco@16 208 if self.mediation:
marco@16 209 _s.append("SWORD: Mediation? - '%s'" % self.mediation)
marco@16 210 if self.treatment:
marco@16 211 _s.append("SWORD: Treatment - '%s'" % self.treatment)
marco@16 212 if self.acceptPackaging:
marco@16 213 _s.append("SWORD: Accept Packaging: '%s'" % self.acceptPackaging)
marco@16 214 if self.service:
marco@16 215 _s.append("SWORD: Nested Service Documents - '%s'" % self.service)
marco@16 216 for c in self.categories:
marco@16 217 _s.append(str(c))
marco@16 218 return "\n".join(_s)
marco@16 219
marco@16 220 def __repr__(self):
marco@16 221 """Provides the atom.title of the collection as part of the repr reply"""
marco@16 222 return "<sword2.SDCollection - title: %s>" % self.title
marco@16 223
marco@16 224 def to_json(self):
marco@16 225 """Provides a simple means to turn the important parsed information into a simple JSON-encoded form.
marco@16 226
marco@16 227 NB this uses the attributes of the object, not the cached DOM object, so information can be altered/added
marco@16 228 on the fly."""
marco@16 229 from compatible_libs import json
marco@16 230 if json:
marco@16 231 _j = {'title':self.title,
marco@16 232 'href':self.href,
marco@16 233 'description':self.description,
marco@16 234 'accept':self.accept,
marco@16 235 'accept_multipart':self.accept_multipart,
marco@16 236 'mediation':self.mediation,
marco@16 237 'treatment':self.treatment,
marco@16 238 'collectionPolicy':self.collectionPolicy,
marco@16 239 'acceptPackaging':self.acceptPackaging,
marco@16 240 'service':self.service,
marco@16 241 'categories':self.categories}
marco@16 242 return json.dumps(_j)
marco@16 243 else:
marco@16 244 coll_l.error("Could not return information about Collection '%s' as JSON" % self.title)
marco@16 245 return
marco@16 246
marco@16 247 class Collection_Feed(object):
marco@16 248 """Nothing to see here yet. Move along."""
marco@16 249 def __init__(self, feed_iri=None, http_client=None, feed_xml=None):
marco@16 250 self.feed_xml = feed_xml
marco@16 251 self.feed_iri = feed_iri
marco@16 252 self._cached = []
marco@16 253 self.h = http_client
marco@16 254
marco@16 255 class Sword_Statement(object):
marco@16 256 """Beginning SWORD2 Sword Statement support.
marco@16 257
marco@16 258 The aim is for the sword statements to be available through attributes on this object.
marco@16 259
marco@16 260 In the meantime, please use the low-level `self.feed` for access to an etree.Element containing the
marco@16 261 parsed form of the `xml_document` it is passed.
marco@16 262
marco@16 263 NB if `self.parsed` is not `True`, then there has been a problem parsing the xml document so check the original text,
marco@16 264 cached in `self.xml_document`
marco@16 265 """
marco@16 266 def __init__(self, xml_document):
marco@16 267 self.xml_document = xml_document
marco@16 268 self.parsed = False
marco@16 269 self.first = None
marco@16 270 self.next = None
marco@16 271 self.previous = None
marco@16 272 self.last = None
marco@16 273 self.categories = []
marco@16 274 self.entries = []
marco@16 275 try:
marco@16 276 coll_l.info("Attempting to parse the Feed XML document")
marco@16 277 self.feed = etree.fromstring(xml_document)
marco@16 278 self.parsed = True
marco@16 279 except Exception, e:
marco@16 280 coll_l.error("Failed to parse document - %s" % e)
marco@16 281 coll_l.error("XML document begins:\n %s" % xml_document[:300])
marco@16 282 self.enumerate_feed()
marco@16 283
marco@16 284 def enumerate_feed(self):
marco@16 285 # Handle Categories
marco@16 286 for cate in self.feed.findall(NS['atom'] % 'category'):
marco@16 287 self.categories.append(Category(dom = cate))
marco@16 288 # handle entries - each one is compatible with a Deposit receipt, so using that
marco@16 289 for entry in self.feed.findall(NS['atom'] % 'entry'):
marco@16 290 self.entries.append(Deposit_Receipt(dom=entry))
marco@16 291 # TODO handle multipage first/last pagination
marco@16 292
marco@16 293