sworduploader: sword2-libraries-pyinstaller-compatible/sword2/utils.py annotate

annotate sword2-libraries-pyinstaller-compatible/sword2/utils.py @ 22:d1752c7031e4 timeouts tip

Updated .hgignore to ignore sword2_logging.conf and anything in .cache

author	Steve Welburn <stephen.welburn@eecs.qmul.ac.uk>
date	Tue, 22 Jan 2013 14:43:42 +0000
parents	8b69bba225c9
children

rev	line source
marco@16	1 #!/usr/bin/env python
marco@16	2 # -- coding: utf-8 --
marco@16	3
marco@16	4 """
marco@16	5 Utility methods used within the module
marco@16	6 """
marco@16	7
marco@16	8 from sword2_logging import logging
marco@16	9 utils_l = logging.getLogger(__name__)
marco@16	10
marco@16	11 from time import time
marco@16	12 from datetime import datetime
marco@16	13
marco@16	14 from base64 import b64encode
marco@16	15
marco@16	16 try:
marco@16	17 from hashlib import md5
marco@16	18 except ImportError:
marco@16	19 import md5
marco@16	20
marco@16	21 import mimetypes
marco@16	22
marco@16	23 NS = {}
marco@16	24 NS['dcterms'] = "{http://purl.org/dc/terms/}%s"
marco@16	25 NS['sword'] ="{http://purl.org/net/sword/terms/}%s"
marco@16	26 NS['atom'] = "{http://www.w3.org/2005/Atom}%s"
marco@16	27 NS['app'] = "{http://www.w3.org/2007/app}%s"
marco@16	28
marco@16	29 def get_text(parent, tag, plural = False):
marco@16	30 """Takes an `etree.Element` and a tag name to search for and retrieves the text attribute from any
marco@16	31 of the parent element's direct children.
marco@16	32
marco@16	33 Returns a simple `str` if only a single element is found, or a list if multiple elements with the
marco@16	34 same tag. Ignores element attributes, returning only the text."""
marco@16	35 text = None
marco@16	36 for item in parent.findall(tag):
marco@16	37 t = item.text
marco@16	38 if not text:
marco@16	39 if plural:
marco@16	40 text = [t]
marco@16	41 else:
marco@16	42 text = t
marco@16	43 elif isinstance(text, list):
marco@16	44 text.append(t)
marco@16	45 else:
marco@16	46 text = [text, t]
marco@16	47 return text
marco@16	48
marco@16	49 def get_md5(data):
marco@16	50 """Takes either a `str` or a file-like object and passes back a tuple containing (md5sum, filesize)
marco@16	51
marco@16	52 The file is streamed as 1Mb chunks so should work for large files. File-like object must support `seek()`
marco@16	53 """
marco@16	54 if hasattr(data, "read") and hasattr(data, 'seek'):
marco@16	55 m = md5()
marco@16	56 chunk = data.read(1024*1024) # 1Mb
marco@16	57 f_size = 0
marco@16	58 while(chunk):
marco@16	59 f_size += len(chunk)
marco@16	60 m.update(chunk)
marco@16	61 chunk = data.read(1024*1024)
marco@16	62 data.seek(0)
marco@16	63 return m.hexdigest(), f_size
marco@16	64 else: # normal str
marco@16	65 m = md5()
marco@16	66 f_size = len(data)
marco@16	67 m.update(data)
marco@16	68 return m.hexdigest(), f_size
marco@16	69
marco@16	70
marco@16	71 class Timer(object):
marco@16	72 """Simple timer, providing a 'stopwatch' mechanism.
marco@16	73
marco@16	74 Usage example:
marco@16	75
marco@16	76 >>> from sword2.utils import Timer
marco@16	77 >>> from time import sleep
marco@16	78 >>> t = Timer()
marco@16	79 >>> t.get_timestamp()
marco@16	80 datetime.datetime(2011, 6, 7, 7, 40, 53, 87248)
marco@16	81 >>> t.get_loggable_timestamp()
marco@16	82 '2011-06-07T07:40:53.087516'
marco@16	83
marco@16	84 >>> # Start a few timers
marco@16	85 ... t.start("kaylee", "river", "inara")
marco@16	86 >>> sleep(3) # wait a little while
marco@16	87 >>> t.time_since_start("kaylee")
marco@16	88 (0, 3.0048139095306396)
marco@16	89
marco@16	90 # tuple -> (index of the logged .duration, time since the .start method was called)
marco@16	91 # eg 't.duration['kaylee'][0]' would equal 3.00481....
marco@16	92
marco@16	93 >>> sleep(2)
marco@16	94 >>> t.time_since_start("kaylee", "inara")
marco@16	95 [(1, 5.00858998298645), (0, 5.00858998298645)]
marco@16	96 >>> sleep(5)
marco@16	97 >>> t.time_since_start("kaylee", "river")
marco@16	98 [(2, 10.015379905700684), (0, 10.015379905700684)]
marco@16	99 >>> sleep(4)
marco@16	100 >>> t.time_since_start("kaylee", "inara", "river")
marco@16	101 [(3, 14.021538972854614), (1, 14.021538972854614), (1, 14.021538972854614)]
marco@16	102
marco@16	103 # The order of the response is the same as the order of the names in the method call.
marco@16	104
marco@16	105 >>> # report back
marco@16	106 ... t.duration['kaylee']
marco@16	107 [3.0048139095306396, 5.00858998298645, 10.015379905700684, 14.021538972854614]
marco@16	108 >>> t.duration['inara']
marco@16	109 [5.00858998298645, 14.021538972854614]
marco@16	110 >>> t.duration['river']
marco@16	111 [10.015379905700684, 14.021538972854614]
marco@16	112 >>>
marco@16	113 """
marco@16	114 def __init__(self):
marco@16	115 self.reset_all()
marco@16	116
marco@16	117 def reset_all(self):
marco@16	118 self.counts = {}
marco@16	119 self.duration = {}
marco@16	120 self.stop = {}
marco@16	121
marco@16	122 def reset(self, name):
marco@16	123 if name in self.counts:
marco@16	124 self.counts[name] = 0
marco@16	125
marco@16	126 def read_raw(self, name):
marco@16	127 return self.counts.get(name, None)
marco@16	128
marco@16	129 def read(self, name):
marco@16	130 if name in self.counts:
marco@16	131 return datetime.fromtimestamp(self.counts[name])
marco@16	132 else:
marco@16	133 return None
marco@16	134
marco@16	135 def start(self, *args):
marco@16	136 st_time = time()
marco@16	137 for arg in args:
marco@16	138 self.counts[arg] = st_time
marco@16	139
marco@16	140 def stop(self, *args):
marco@16	141 st_time = time()
marco@16	142 for arg in args:
marco@16	143 self.stop[arg] = st_time
marco@16	144
marco@16	145 def get_timestamp(self):
marco@16	146 # Convenience function
marco@16	147 return datetime.now()
marco@16	148
marco@16	149 def get_loggable_timestamp(self):
marco@16	150 """Human-readable by intent"""
marco@16	151 return datetime.now().isoformat()
marco@16	152
marco@16	153 def time_since_start(self, *args):
marco@16	154 r = []
marco@16	155 st_time = time()
marco@16	156 for name in args:
marco@16	157 if name in self.counts:
marco@16	158 duration = st_time - self.counts[name]
marco@16	159 if not self.duration.has_key(name):
marco@16	160 self.duration[name] = []
marco@16	161 self.duration[name].append(duration)
marco@16	162 r.append((len(self.duration[name]) - 1, duration))
marco@16	163 else:
marco@16	164 r.append((0, 0))
marco@16	165 if len(r) == 1:
marco@16	166 return r.pop()
marco@16	167 else:
marco@16	168 return r
marco@16	169
marco@16	170
marco@16	171 def get_content_type(filename):
marco@16	172 # Does a simple .ext -> mimetype mapping.
marco@16	173 # Generally better to specify the mimetype upfront.
marco@16	174 return mimetypes.guess_type(filename)[0] or 'application/octet-stream'
marco@16	175
marco@16	176 def create_multipart_related(payloads):
marco@16	177 """ Expected: list of dicts with keys 'key', 'type'='content type','filename'=optional,'data'=payload, 'headers'={}
marco@16	178
marco@16	179 TODO: More mem-efficient to spool this to disc rather than hold in RAM, but until Httplib2 bug gets fixed (issue 151)
marco@16	180 this might be in vain.
marco@16	181
marco@16	182 Can handle more than just two files.
marco@16	183
marco@16	184 SWORD2 multipart POST/PUT expects two attachments - key = 'atom' w/ Atom Entry (metadata)
marco@16	185 key = 'payload' (file)
marco@16	186 """
marco@16	187 # Generate random boundary code
marco@16	188 # TODO check that it does not occur in the payload data
marco@16	189 bhash = md5(datetime.now().isoformat()).hexdigest() # eg 'd8bb3ea6f4e0a4b4682be0cfb4e0a24e'
marco@16	190 BOUNDARY = '===========%s_$' % bhash
marco@16	191 CRLF = '\r\n' # As some servers might barf without this.
marco@16	192 body = []
marco@16	193 for payload in payloads: # predicatable ordering...
marco@16	194 body.append('--' + BOUNDARY)
marco@16	195 if payload.get('type', None):
marco@16	196 body.append('Content-Type: %(type)s' % payload)
marco@16	197 else:
marco@16	198 body.append('Content-Type: %s' % get_content_type(payload.get("filename")))
marco@16	199
marco@16	200 if payload.get('filename', None):
marco@16	201 body.append('Content-Disposition: attachment; name="%(key)s"; filename="%(filename)s"' % (payload))
marco@16	202 else:
marco@16	203 body.append('Content-Disposition: attachment; name="%(key)s"' % (payload))
marco@16	204
marco@16	205 if payload.has_key("headers"):
marco@16	206 for f,v in payload['headers'].iteritems():
marco@16	207 body.append("%s: %s" % (f, v)) # TODO force ASCII?
marco@16	208
marco@16	209 body.append('MIME-Version: 1.0')
marco@16	210 if payload['key'] == 'payload':
marco@16	211 body.append('Content-Transfer-Encoding: base64')
marco@16	212 body.append('')
marco@16	213 body.append(b64encode(payload['data']))
marco@16	214 else:
marco@16	215 body.append('')
marco@16	216 body.append(payload['data'])
marco@16	217 body.append('--' + BOUNDARY + '--')
marco@16	218 body.append('')
marco@16	219 body_bytes = CRLF.join(body)
marco@16	220 content_type = 'multipart/related; boundary="%s"' % BOUNDARY
marco@16	221 return content_type, body_bytes

Mercurial > hg > sworduploader

annotate sword2-libraries-pyinstaller-compatible/sword2/utils.py @ 22:d1752c7031e4 timeouts tip