marco@16: #!/usr/bin/env python marco@16: # -*- coding: utf-8 -*- marco@16: marco@16: """ marco@16: Utility methods used within the module marco@16: """ marco@16: marco@16: from sword2_logging import logging marco@16: utils_l = logging.getLogger(__name__) marco@16: marco@16: from time import time marco@16: from datetime import datetime marco@16: marco@16: from base64 import b64encode marco@16: marco@16: try: marco@16: from hashlib import md5 marco@16: except ImportError: marco@16: import md5 marco@16: marco@16: import mimetypes marco@16: marco@16: NS = {} marco@16: NS['dcterms'] = "{http://purl.org/dc/terms/}%s" marco@16: NS['sword'] ="{http://purl.org/net/sword/terms/}%s" marco@16: NS['atom'] = "{http://www.w3.org/2005/Atom}%s" marco@16: NS['app'] = "{http://www.w3.org/2007/app}%s" marco@16: marco@16: def get_text(parent, tag, plural = False): marco@16: """Takes an `etree.Element` and a tag name to search for and retrieves the text attribute from any marco@16: of the parent element's direct children. marco@16: marco@16: Returns a simple `str` if only a single element is found, or a list if multiple elements with the marco@16: same tag. Ignores element attributes, returning only the text.""" marco@16: text = None marco@16: for item in parent.findall(tag): marco@16: t = item.text marco@16: if not text: marco@16: if plural: marco@16: text = [t] marco@16: else: marco@16: text = t marco@16: elif isinstance(text, list): marco@16: text.append(t) marco@16: else: marco@16: text = [text, t] marco@16: return text marco@16: marco@16: def get_md5(data): marco@16: """Takes either a `str` or a file-like object and passes back a tuple containing (md5sum, filesize) marco@16: marco@16: The file is streamed as 1Mb chunks so should work for large files. File-like object must support `seek()` marco@16: """ marco@16: if hasattr(data, "read") and hasattr(data, 'seek'): marco@16: m = md5() marco@16: chunk = data.read(1024*1024) # 1Mb marco@16: f_size = 0 marco@16: while(chunk): marco@16: f_size += len(chunk) marco@16: m.update(chunk) marco@16: chunk = data.read(1024*1024) marco@16: data.seek(0) marco@16: return m.hexdigest(), f_size marco@16: else: # normal str marco@16: m = md5() marco@16: f_size = len(data) marco@16: m.update(data) marco@16: return m.hexdigest(), f_size marco@16: marco@16: marco@16: class Timer(object): marco@16: """Simple timer, providing a 'stopwatch' mechanism. marco@16: marco@16: Usage example: marco@16: marco@16: >>> from sword2.utils import Timer marco@16: >>> from time import sleep marco@16: >>> t = Timer() marco@16: >>> t.get_timestamp() marco@16: datetime.datetime(2011, 6, 7, 7, 40, 53, 87248) marco@16: >>> t.get_loggable_timestamp() marco@16: '2011-06-07T07:40:53.087516' marco@16: marco@16: >>> # Start a few timers marco@16: ... t.start("kaylee", "river", "inara") marco@16: >>> sleep(3) # wait a little while marco@16: >>> t.time_since_start("kaylee") marco@16: (0, 3.0048139095306396) marco@16: marco@16: # tuple -> (index of the logged .duration, time since the .start method was called) marco@16: # eg 't.duration['kaylee'][0]' would equal 3.00481.... marco@16: marco@16: >>> sleep(2) marco@16: >>> t.time_since_start("kaylee", "inara") marco@16: [(1, 5.00858998298645), (0, 5.00858998298645)] marco@16: >>> sleep(5) marco@16: >>> t.time_since_start("kaylee", "river") marco@16: [(2, 10.015379905700684), (0, 10.015379905700684)] marco@16: >>> sleep(4) marco@16: >>> t.time_since_start("kaylee", "inara", "river") marco@16: [(3, 14.021538972854614), (1, 14.021538972854614), (1, 14.021538972854614)] marco@16: marco@16: # The order of the response is the same as the order of the names in the method call. marco@16: marco@16: >>> # report back marco@16: ... t.duration['kaylee'] marco@16: [3.0048139095306396, 5.00858998298645, 10.015379905700684, 14.021538972854614] marco@16: >>> t.duration['inara'] marco@16: [5.00858998298645, 14.021538972854614] marco@16: >>> t.duration['river'] marco@16: [10.015379905700684, 14.021538972854614] marco@16: >>> marco@16: """ marco@16: def __init__(self): marco@16: self.reset_all() marco@16: marco@16: def reset_all(self): marco@16: self.counts = {} marco@16: self.duration = {} marco@16: self.stop = {} marco@16: marco@16: def reset(self, name): marco@16: if name in self.counts: marco@16: self.counts[name] = 0 marco@16: marco@16: def read_raw(self, name): marco@16: return self.counts.get(name, None) marco@16: marco@16: def read(self, name): marco@16: if name in self.counts: marco@16: return datetime.fromtimestamp(self.counts[name]) marco@16: else: marco@16: return None marco@16: marco@16: def start(self, *args): marco@16: st_time = time() marco@16: for arg in args: marco@16: self.counts[arg] = st_time marco@16: marco@16: def stop(self, *args): marco@16: st_time = time() marco@16: for arg in args: marco@16: self.stop[arg] = st_time marco@16: marco@16: def get_timestamp(self): marco@16: # Convenience function marco@16: return datetime.now() marco@16: marco@16: def get_loggable_timestamp(self): marco@16: """Human-readable by intent""" marco@16: return datetime.now().isoformat() marco@16: marco@16: def time_since_start(self, *args): marco@16: r = [] marco@16: st_time = time() marco@16: for name in args: marco@16: if name in self.counts: marco@16: duration = st_time - self.counts[name] marco@16: if not self.duration.has_key(name): marco@16: self.duration[name] = [] marco@16: self.duration[name].append(duration) marco@16: r.append((len(self.duration[name]) - 1, duration)) marco@16: else: marco@16: r.append((0, 0)) marco@16: if len(r) == 1: marco@16: return r.pop() marco@16: else: marco@16: return r marco@16: marco@16: marco@16: def get_content_type(filename): marco@16: # Does a simple .ext -> mimetype mapping. marco@16: # Generally better to specify the mimetype upfront. marco@16: return mimetypes.guess_type(filename)[0] or 'application/octet-stream' marco@16: marco@16: def create_multipart_related(payloads): marco@16: """ Expected: list of dicts with keys 'key', 'type'='content type','filename'=optional,'data'=payload, 'headers'={} marco@16: marco@16: TODO: More mem-efficient to spool this to disc rather than hold in RAM, but until Httplib2 bug gets fixed (issue 151) marco@16: this might be in vain. marco@16: marco@16: Can handle more than just two files. marco@16: marco@16: SWORD2 multipart POST/PUT expects two attachments - key = 'atom' w/ Atom Entry (metadata) marco@16: key = 'payload' (file) marco@16: """ marco@16: # Generate random boundary code marco@16: # TODO check that it does not occur in the payload data marco@16: bhash = md5(datetime.now().isoformat()).hexdigest() # eg 'd8bb3ea6f4e0a4b4682be0cfb4e0a24e' marco@16: BOUNDARY = '===========%s_$' % bhash marco@16: CRLF = '\r\n' # As some servers might barf without this. marco@16: body = [] marco@16: for payload in payloads: # predicatable ordering... marco@16: body.append('--' + BOUNDARY) marco@16: if payload.get('type', None): marco@16: body.append('Content-Type: %(type)s' % payload) marco@16: else: marco@16: body.append('Content-Type: %s' % get_content_type(payload.get("filename"))) marco@16: marco@16: if payload.get('filename', None): marco@16: body.append('Content-Disposition: attachment; name="%(key)s"; filename="%(filename)s"' % (payload)) marco@16: else: marco@16: body.append('Content-Disposition: attachment; name="%(key)s"' % (payload)) marco@16: marco@16: if payload.has_key("headers"): marco@16: for f,v in payload['headers'].iteritems(): marco@16: body.append("%s: %s" % (f, v)) # TODO force ASCII? marco@16: marco@16: body.append('MIME-Version: 1.0') marco@16: if payload['key'] == 'payload': marco@16: body.append('Content-Transfer-Encoding: base64') marco@16: body.append('') marco@16: body.append(b64encode(payload['data'])) marco@16: else: marco@16: body.append('') marco@16: body.append(payload['data']) marco@16: body.append('--' + BOUNDARY + '--') marco@16: body.append('') marco@16: body_bytes = CRLF.join(body) marco@16: content_type = 'multipart/related; boundary="%s"' % BOUNDARY marco@16: return content_type, body_bytes