annotate sword2-libraries-pyinstaller-compatible/sword2/utils.py @ 22:d1752c7031e4 timeouts tip

Updated .hgignore to ignore sword2_logging.conf and anything in .cache
author Steve Welburn <stephen.welburn@eecs.qmul.ac.uk>
date Tue, 22 Jan 2013 14:43:42 +0000
parents 8b69bba225c9
children
rev   line source
marco@16 1 #!/usr/bin/env python
marco@16 2 # -*- coding: utf-8 -*-
marco@16 3
marco@16 4 """
marco@16 5 Utility methods used within the module
marco@16 6 """
marco@16 7
marco@16 8 from sword2_logging import logging
marco@16 9 utils_l = logging.getLogger(__name__)
marco@16 10
marco@16 11 from time import time
marco@16 12 from datetime import datetime
marco@16 13
marco@16 14 from base64 import b64encode
marco@16 15
marco@16 16 try:
marco@16 17 from hashlib import md5
marco@16 18 except ImportError:
marco@16 19 import md5
marco@16 20
marco@16 21 import mimetypes
marco@16 22
marco@16 23 NS = {}
marco@16 24 NS['dcterms'] = "{http://purl.org/dc/terms/}%s"
marco@16 25 NS['sword'] ="{http://purl.org/net/sword/terms/}%s"
marco@16 26 NS['atom'] = "{http://www.w3.org/2005/Atom}%s"
marco@16 27 NS['app'] = "{http://www.w3.org/2007/app}%s"
marco@16 28
marco@16 29 def get_text(parent, tag, plural = False):
marco@16 30 """Takes an `etree.Element` and a tag name to search for and retrieves the text attribute from any
marco@16 31 of the parent element's direct children.
marco@16 32
marco@16 33 Returns a simple `str` if only a single element is found, or a list if multiple elements with the
marco@16 34 same tag. Ignores element attributes, returning only the text."""
marco@16 35 text = None
marco@16 36 for item in parent.findall(tag):
marco@16 37 t = item.text
marco@16 38 if not text:
marco@16 39 if plural:
marco@16 40 text = [t]
marco@16 41 else:
marco@16 42 text = t
marco@16 43 elif isinstance(text, list):
marco@16 44 text.append(t)
marco@16 45 else:
marco@16 46 text = [text, t]
marco@16 47 return text
marco@16 48
marco@16 49 def get_md5(data):
marco@16 50 """Takes either a `str` or a file-like object and passes back a tuple containing (md5sum, filesize)
marco@16 51
marco@16 52 The file is streamed as 1Mb chunks so should work for large files. File-like object must support `seek()`
marco@16 53 """
marco@16 54 if hasattr(data, "read") and hasattr(data, 'seek'):
marco@16 55 m = md5()
marco@16 56 chunk = data.read(1024*1024) # 1Mb
marco@16 57 f_size = 0
marco@16 58 while(chunk):
marco@16 59 f_size += len(chunk)
marco@16 60 m.update(chunk)
marco@16 61 chunk = data.read(1024*1024)
marco@16 62 data.seek(0)
marco@16 63 return m.hexdigest(), f_size
marco@16 64 else: # normal str
marco@16 65 m = md5()
marco@16 66 f_size = len(data)
marco@16 67 m.update(data)
marco@16 68 return m.hexdigest(), f_size
marco@16 69
marco@16 70
marco@16 71 class Timer(object):
marco@16 72 """Simple timer, providing a 'stopwatch' mechanism.
marco@16 73
marco@16 74 Usage example:
marco@16 75
marco@16 76 >>> from sword2.utils import Timer
marco@16 77 >>> from time import sleep
marco@16 78 >>> t = Timer()
marco@16 79 >>> t.get_timestamp()
marco@16 80 datetime.datetime(2011, 6, 7, 7, 40, 53, 87248)
marco@16 81 >>> t.get_loggable_timestamp()
marco@16 82 '2011-06-07T07:40:53.087516'
marco@16 83
marco@16 84 >>> # Start a few timers
marco@16 85 ... t.start("kaylee", "river", "inara")
marco@16 86 >>> sleep(3) # wait a little while
marco@16 87 >>> t.time_since_start("kaylee")
marco@16 88 (0, 3.0048139095306396)
marco@16 89
marco@16 90 # tuple -> (index of the logged .duration, time since the .start method was called)
marco@16 91 # eg 't.duration['kaylee'][0]' would equal 3.00481....
marco@16 92
marco@16 93 >>> sleep(2)
marco@16 94 >>> t.time_since_start("kaylee", "inara")
marco@16 95 [(1, 5.00858998298645), (0, 5.00858998298645)]
marco@16 96 >>> sleep(5)
marco@16 97 >>> t.time_since_start("kaylee", "river")
marco@16 98 [(2, 10.015379905700684), (0, 10.015379905700684)]
marco@16 99 >>> sleep(4)
marco@16 100 >>> t.time_since_start("kaylee", "inara", "river")
marco@16 101 [(3, 14.021538972854614), (1, 14.021538972854614), (1, 14.021538972854614)]
marco@16 102
marco@16 103 # The order of the response is the same as the order of the names in the method call.
marco@16 104
marco@16 105 >>> # report back
marco@16 106 ... t.duration['kaylee']
marco@16 107 [3.0048139095306396, 5.00858998298645, 10.015379905700684, 14.021538972854614]
marco@16 108 >>> t.duration['inara']
marco@16 109 [5.00858998298645, 14.021538972854614]
marco@16 110 >>> t.duration['river']
marco@16 111 [10.015379905700684, 14.021538972854614]
marco@16 112 >>>
marco@16 113 """
marco@16 114 def __init__(self):
marco@16 115 self.reset_all()
marco@16 116
marco@16 117 def reset_all(self):
marco@16 118 self.counts = {}
marco@16 119 self.duration = {}
marco@16 120 self.stop = {}
marco@16 121
marco@16 122 def reset(self, name):
marco@16 123 if name in self.counts:
marco@16 124 self.counts[name] = 0
marco@16 125
marco@16 126 def read_raw(self, name):
marco@16 127 return self.counts.get(name, None)
marco@16 128
marco@16 129 def read(self, name):
marco@16 130 if name in self.counts:
marco@16 131 return datetime.fromtimestamp(self.counts[name])
marco@16 132 else:
marco@16 133 return None
marco@16 134
marco@16 135 def start(self, *args):
marco@16 136 st_time = time()
marco@16 137 for arg in args:
marco@16 138 self.counts[arg] = st_time
marco@16 139
marco@16 140 def stop(self, *args):
marco@16 141 st_time = time()
marco@16 142 for arg in args:
marco@16 143 self.stop[arg] = st_time
marco@16 144
marco@16 145 def get_timestamp(self):
marco@16 146 # Convenience function
marco@16 147 return datetime.now()
marco@16 148
marco@16 149 def get_loggable_timestamp(self):
marco@16 150 """Human-readable by intent"""
marco@16 151 return datetime.now().isoformat()
marco@16 152
marco@16 153 def time_since_start(self, *args):
marco@16 154 r = []
marco@16 155 st_time = time()
marco@16 156 for name in args:
marco@16 157 if name in self.counts:
marco@16 158 duration = st_time - self.counts[name]
marco@16 159 if not self.duration.has_key(name):
marco@16 160 self.duration[name] = []
marco@16 161 self.duration[name].append(duration)
marco@16 162 r.append((len(self.duration[name]) - 1, duration))
marco@16 163 else:
marco@16 164 r.append((0, 0))
marco@16 165 if len(r) == 1:
marco@16 166 return r.pop()
marco@16 167 else:
marco@16 168 return r
marco@16 169
marco@16 170
marco@16 171 def get_content_type(filename):
marco@16 172 # Does a simple .ext -> mimetype mapping.
marco@16 173 # Generally better to specify the mimetype upfront.
marco@16 174 return mimetypes.guess_type(filename)[0] or 'application/octet-stream'
marco@16 175
marco@16 176 def create_multipart_related(payloads):
marco@16 177 """ Expected: list of dicts with keys 'key', 'type'='content type','filename'=optional,'data'=payload, 'headers'={}
marco@16 178
marco@16 179 TODO: More mem-efficient to spool this to disc rather than hold in RAM, but until Httplib2 bug gets fixed (issue 151)
marco@16 180 this might be in vain.
marco@16 181
marco@16 182 Can handle more than just two files.
marco@16 183
marco@16 184 SWORD2 multipart POST/PUT expects two attachments - key = 'atom' w/ Atom Entry (metadata)
marco@16 185 key = 'payload' (file)
marco@16 186 """
marco@16 187 # Generate random boundary code
marco@16 188 # TODO check that it does not occur in the payload data
marco@16 189 bhash = md5(datetime.now().isoformat()).hexdigest() # eg 'd8bb3ea6f4e0a4b4682be0cfb4e0a24e'
marco@16 190 BOUNDARY = '===========%s_$' % bhash
marco@16 191 CRLF = '\r\n' # As some servers might barf without this.
marco@16 192 body = []
marco@16 193 for payload in payloads: # predicatable ordering...
marco@16 194 body.append('--' + BOUNDARY)
marco@16 195 if payload.get('type', None):
marco@16 196 body.append('Content-Type: %(type)s' % payload)
marco@16 197 else:
marco@16 198 body.append('Content-Type: %s' % get_content_type(payload.get("filename")))
marco@16 199
marco@16 200 if payload.get('filename', None):
marco@16 201 body.append('Content-Disposition: attachment; name="%(key)s"; filename="%(filename)s"' % (payload))
marco@16 202 else:
marco@16 203 body.append('Content-Disposition: attachment; name="%(key)s"' % (payload))
marco@16 204
marco@16 205 if payload.has_key("headers"):
marco@16 206 for f,v in payload['headers'].iteritems():
marco@16 207 body.append("%s: %s" % (f, v)) # TODO force ASCII?
marco@16 208
marco@16 209 body.append('MIME-Version: 1.0')
marco@16 210 if payload['key'] == 'payload':
marco@16 211 body.append('Content-Transfer-Encoding: base64')
marco@16 212 body.append('')
marco@16 213 body.append(b64encode(payload['data']))
marco@16 214 else:
marco@16 215 body.append('')
marco@16 216 body.append(payload['data'])
marco@16 217 body.append('--' + BOUNDARY + '--')
marco@16 218 body.append('')
marco@16 219 body_bytes = CRLF.join(body)
marco@16 220 content_type = 'multipart/related; boundary="%s"' % BOUNDARY
marco@16 221 return content_type, body_bytes