Mercurial > hg > vamp-build-and-test
comparison DEPENDENCIES/mingw32/Python27/Lib/site-packages/numpy/lib/_iotools.py @ 87:2a2c65a20a8b
Add Python libs and headers
author | Chris Cannam |
---|---|
date | Wed, 25 Feb 2015 14:05:22 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
86:413a9d26189e | 87:2a2c65a20a8b |
---|---|
1 """A collection of functions designed to help I/O with ascii files. | |
2 | |
3 """ | |
4 from __future__ import division, absolute_import, print_function | |
5 | |
6 __docformat__ = "restructuredtext en" | |
7 | |
8 import sys | |
9 import numpy as np | |
10 import numpy.core.numeric as nx | |
11 from numpy.compat import asbytes, bytes, asbytes_nested, basestring | |
12 | |
13 if sys.version_info[0] >= 3: | |
14 from builtins import bool, int, float, complex, object, str | |
15 unicode = str | |
16 else: | |
17 from __builtin__ import bool, int, float, complex, object, unicode, str | |
18 | |
19 | |
20 if sys.version_info[0] >= 3: | |
21 def _bytes_to_complex(s): | |
22 return complex(s.decode('ascii')) | |
23 | |
24 def _bytes_to_name(s): | |
25 return s.decode('ascii') | |
26 else: | |
27 _bytes_to_complex = complex | |
28 _bytes_to_name = str | |
29 | |
30 def _is_string_like(obj): | |
31 """ | |
32 Check whether obj behaves like a string. | |
33 """ | |
34 try: | |
35 obj + '' | |
36 except (TypeError, ValueError): | |
37 return False | |
38 return True | |
39 | |
40 def _is_bytes_like(obj): | |
41 """ | |
42 Check whether obj behaves like a bytes object. | |
43 """ | |
44 try: | |
45 obj + asbytes('') | |
46 except (TypeError, ValueError): | |
47 return False | |
48 return True | |
49 | |
50 | |
51 def _to_filehandle(fname, flag='r', return_opened=False): | |
52 """ | |
53 Returns the filehandle corresponding to a string or a file. | |
54 If the string ends in '.gz', the file is automatically unzipped. | |
55 | |
56 Parameters | |
57 ---------- | |
58 fname : string, filehandle | |
59 Name of the file whose filehandle must be returned. | |
60 flag : string, optional | |
61 Flag indicating the status of the file ('r' for read, 'w' for write). | |
62 return_opened : boolean, optional | |
63 Whether to return the opening status of the file. | |
64 """ | |
65 if _is_string_like(fname): | |
66 if fname.endswith('.gz'): | |
67 import gzip | |
68 fhd = gzip.open(fname, flag) | |
69 elif fname.endswith('.bz2'): | |
70 import bz2 | |
71 fhd = bz2.BZ2File(fname) | |
72 else: | |
73 fhd = file(fname, flag) | |
74 opened = True | |
75 elif hasattr(fname, 'seek'): | |
76 fhd = fname | |
77 opened = False | |
78 else: | |
79 raise ValueError('fname must be a string or file handle') | |
80 if return_opened: | |
81 return fhd, opened | |
82 return fhd | |
83 | |
84 | |
85 def has_nested_fields(ndtype): | |
86 """ | |
87 Returns whether one or several fields of a dtype are nested. | |
88 | |
89 Parameters | |
90 ---------- | |
91 ndtype : dtype | |
92 Data-type of a structured array. | |
93 | |
94 Raises | |
95 ------ | |
96 AttributeError | |
97 If `ndtype` does not have a `names` attribute. | |
98 | |
99 Examples | |
100 -------- | |
101 >>> dt = np.dtype([('name', 'S4'), ('x', float), ('y', float)]) | |
102 >>> np.lib._iotools.has_nested_fields(dt) | |
103 False | |
104 | |
105 """ | |
106 for name in ndtype.names or (): | |
107 if ndtype[name].names: | |
108 return True | |
109 return False | |
110 | |
111 | |
112 def flatten_dtype(ndtype, flatten_base=False): | |
113 """ | |
114 Unpack a structured data-type by collapsing nested fields and/or fields | |
115 with a shape. | |
116 | |
117 Note that the field names are lost. | |
118 | |
119 Parameters | |
120 ---------- | |
121 ndtype : dtype | |
122 The datatype to collapse | |
123 flatten_base : {False, True}, optional | |
124 Whether to transform a field with a shape into several fields or not. | |
125 | |
126 Examples | |
127 -------- | |
128 >>> dt = np.dtype([('name', 'S4'), ('x', float), ('y', float), | |
129 ... ('block', int, (2, 3))]) | |
130 >>> np.lib._iotools.flatten_dtype(dt) | |
131 [dtype('|S4'), dtype('float64'), dtype('float64'), dtype('int32')] | |
132 >>> np.lib._iotools.flatten_dtype(dt, flatten_base=True) | |
133 [dtype('|S4'), dtype('float64'), dtype('float64'), dtype('int32'), | |
134 dtype('int32'), dtype('int32'), dtype('int32'), dtype('int32'), | |
135 dtype('int32')] | |
136 | |
137 """ | |
138 names = ndtype.names | |
139 if names is None: | |
140 if flatten_base: | |
141 return [ndtype.base] * int(np.prod(ndtype.shape)) | |
142 return [ndtype.base] | |
143 else: | |
144 types = [] | |
145 for field in names: | |
146 info = ndtype.fields[field] | |
147 flat_dt = flatten_dtype(info[0], flatten_base) | |
148 types.extend(flat_dt) | |
149 return types | |
150 | |
151 | |
152 class LineSplitter(object): | |
153 """ | |
154 Object to split a string at a given delimiter or at given places. | |
155 | |
156 Parameters | |
157 ---------- | |
158 delimiter : str, int, or sequence of ints, optional | |
159 If a string, character used to delimit consecutive fields. | |
160 If an integer or a sequence of integers, width(s) of each field. | |
161 comment : str, optional | |
162 Character used to mark the beginning of a comment. Default is '#'. | |
163 autostrip : bool, optional | |
164 Whether to strip each individual field. Default is True. | |
165 | |
166 """ | |
167 | |
168 def autostrip(self, method): | |
169 """ | |
170 Wrapper to strip each member of the output of `method`. | |
171 | |
172 Parameters | |
173 ---------- | |
174 method : function | |
175 Function that takes a single argument and returns a sequence of | |
176 strings. | |
177 | |
178 Returns | |
179 ------- | |
180 wrapped : function | |
181 The result of wrapping `method`. `wrapped` takes a single input | |
182 argument and returns a list of strings that are stripped of | |
183 white-space. | |
184 | |
185 """ | |
186 return lambda input: [_.strip() for _ in method(input)] | |
187 # | |
188 | |
189 def __init__(self, delimiter=None, comments=asbytes('#'), autostrip=True): | |
190 self.comments = comments | |
191 # Delimiter is a character | |
192 if isinstance(delimiter, unicode): | |
193 delimiter = delimiter.encode('ascii') | |
194 if (delimiter is None) or _is_bytes_like(delimiter): | |
195 delimiter = delimiter or None | |
196 _handyman = self._delimited_splitter | |
197 # Delimiter is a list of field widths | |
198 elif hasattr(delimiter, '__iter__'): | |
199 _handyman = self._variablewidth_splitter | |
200 idx = np.cumsum([0] + list(delimiter)) | |
201 delimiter = [slice(i, j) for (i, j) in zip(idx[:-1], idx[1:])] | |
202 # Delimiter is a single integer | |
203 elif int(delimiter): | |
204 (_handyman, delimiter) = ( | |
205 self._fixedwidth_splitter, int(delimiter)) | |
206 else: | |
207 (_handyman, delimiter) = (self._delimited_splitter, None) | |
208 self.delimiter = delimiter | |
209 if autostrip: | |
210 self._handyman = self.autostrip(_handyman) | |
211 else: | |
212 self._handyman = _handyman | |
213 # | |
214 | |
215 def _delimited_splitter(self, line): | |
216 if self.comments is not None: | |
217 line = line.split(self.comments)[0] | |
218 line = line.strip(asbytes(" \r\n")) | |
219 if not line: | |
220 return [] | |
221 return line.split(self.delimiter) | |
222 # | |
223 | |
224 def _fixedwidth_splitter(self, line): | |
225 if self.comments is not None: | |
226 line = line.split(self.comments)[0] | |
227 line = line.strip(asbytes("\r\n")) | |
228 if not line: | |
229 return [] | |
230 fixed = self.delimiter | |
231 slices = [slice(i, i + fixed) for i in range(0, len(line), fixed)] | |
232 return [line[s] for s in slices] | |
233 # | |
234 | |
235 def _variablewidth_splitter(self, line): | |
236 if self.comments is not None: | |
237 line = line.split(self.comments)[0] | |
238 if not line: | |
239 return [] | |
240 slices = self.delimiter | |
241 return [line[s] for s in slices] | |
242 # | |
243 | |
244 def __call__(self, line): | |
245 return self._handyman(line) | |
246 | |
247 | |
248 class NameValidator(object): | |
249 """ | |
250 Object to validate a list of strings to use as field names. | |
251 | |
252 The strings are stripped of any non alphanumeric character, and spaces | |
253 are replaced by '_'. During instantiation, the user can define a list | |
254 of names to exclude, as well as a list of invalid characters. Names in | |
255 the exclusion list are appended a '_' character. | |
256 | |
257 Once an instance has been created, it can be called with a list of | |
258 names, and a list of valid names will be created. The `__call__` | |
259 method accepts an optional keyword "default" that sets the default name | |
260 in case of ambiguity. By default this is 'f', so that names will | |
261 default to `f0`, `f1`, etc. | |
262 | |
263 Parameters | |
264 ---------- | |
265 excludelist : sequence, optional | |
266 A list of names to exclude. This list is appended to the default | |
267 list ['return', 'file', 'print']. Excluded names are appended an | |
268 underscore: for example, `file` becomes `file_` if supplied. | |
269 deletechars : str, optional | |
270 A string combining invalid characters that must be deleted from the | |
271 names. | |
272 casesensitive : {True, False, 'upper', 'lower'}, optional | |
273 * If True, field names are case-sensitive. | |
274 * If False or 'upper', field names are converted to upper case. | |
275 * If 'lower', field names are converted to lower case. | |
276 | |
277 The default value is True. | |
278 replace_space : '_', optional | |
279 Character(s) used in replacement of white spaces. | |
280 | |
281 Notes | |
282 ----- | |
283 Calling an instance of `NameValidator` is the same as calling its | |
284 method `validate`. | |
285 | |
286 Examples | |
287 -------- | |
288 >>> validator = np.lib._iotools.NameValidator() | |
289 >>> validator(['file', 'field2', 'with space', 'CaSe']) | |
290 ['file_', 'field2', 'with_space', 'CaSe'] | |
291 | |
292 >>> validator = np.lib._iotools.NameValidator(excludelist=['excl'], | |
293 deletechars='q', | |
294 case_sensitive='False') | |
295 >>> validator(['excl', 'field2', 'no_q', 'with space', 'CaSe']) | |
296 ['excl_', 'field2', 'no_', 'with_space', 'case'] | |
297 | |
298 """ | |
299 # | |
300 defaultexcludelist = ['return', 'file', 'print'] | |
301 defaultdeletechars = set("""~!@#$%^&*()-=+~\|]}[{';: /?.>,<""") | |
302 # | |
303 | |
304 def __init__(self, excludelist=None, deletechars=None, | |
305 case_sensitive=None, replace_space='_'): | |
306 # Process the exclusion list .. | |
307 if excludelist is None: | |
308 excludelist = [] | |
309 excludelist.extend(self.defaultexcludelist) | |
310 self.excludelist = excludelist | |
311 # Process the list of characters to delete | |
312 if deletechars is None: | |
313 delete = self.defaultdeletechars | |
314 else: | |
315 delete = set(deletechars) | |
316 delete.add('"') | |
317 self.deletechars = delete | |
318 # Process the case option ..... | |
319 if (case_sensitive is None) or (case_sensitive is True): | |
320 self.case_converter = lambda x: x | |
321 elif (case_sensitive is False) or ('u' in case_sensitive): | |
322 self.case_converter = lambda x: x.upper() | |
323 elif 'l' in case_sensitive: | |
324 self.case_converter = lambda x: x.lower() | |
325 else: | |
326 self.case_converter = lambda x: x | |
327 # | |
328 self.replace_space = replace_space | |
329 | |
330 def validate(self, names, defaultfmt="f%i", nbfields=None): | |
331 """ | |
332 Validate a list of strings as field names for a structured array. | |
333 | |
334 Parameters | |
335 ---------- | |
336 names : sequence of str | |
337 Strings to be validated. | |
338 defaultfmt : str, optional | |
339 Default format string, used if validating a given string | |
340 reduces its length to zero. | |
341 nboutput : integer, optional | |
342 Final number of validated names, used to expand or shrink the | |
343 initial list of names. | |
344 | |
345 Returns | |
346 ------- | |
347 validatednames : list of str | |
348 The list of validated field names. | |
349 | |
350 Notes | |
351 ----- | |
352 A `NameValidator` instance can be called directly, which is the | |
353 same as calling `validate`. For examples, see `NameValidator`. | |
354 | |
355 """ | |
356 # Initial checks .............. | |
357 if (names is None): | |
358 if (nbfields is None): | |
359 return None | |
360 names = [] | |
361 if isinstance(names, basestring): | |
362 names = [names, ] | |
363 if nbfields is not None: | |
364 nbnames = len(names) | |
365 if (nbnames < nbfields): | |
366 names = list(names) + [''] * (nbfields - nbnames) | |
367 elif (nbnames > nbfields): | |
368 names = names[:nbfields] | |
369 # Set some shortcuts ........... | |
370 deletechars = self.deletechars | |
371 excludelist = self.excludelist | |
372 case_converter = self.case_converter | |
373 replace_space = self.replace_space | |
374 # Initializes some variables ... | |
375 validatednames = [] | |
376 seen = dict() | |
377 nbempty = 0 | |
378 # | |
379 for item in names: | |
380 item = case_converter(item).strip() | |
381 if replace_space: | |
382 item = item.replace(' ', replace_space) | |
383 item = ''.join([c for c in item if c not in deletechars]) | |
384 if item == '': | |
385 item = defaultfmt % nbempty | |
386 while item in names: | |
387 nbempty += 1 | |
388 item = defaultfmt % nbempty | |
389 nbempty += 1 | |
390 elif item in excludelist: | |
391 item += '_' | |
392 cnt = seen.get(item, 0) | |
393 if cnt > 0: | |
394 validatednames.append(item + '_%d' % cnt) | |
395 else: | |
396 validatednames.append(item) | |
397 seen[item] = cnt + 1 | |
398 return tuple(validatednames) | |
399 # | |
400 | |
401 def __call__(self, names, defaultfmt="f%i", nbfields=None): | |
402 return self.validate(names, defaultfmt=defaultfmt, nbfields=nbfields) | |
403 | |
404 | |
405 def str2bool(value): | |
406 """ | |
407 Tries to transform a string supposed to represent a boolean to a boolean. | |
408 | |
409 Parameters | |
410 ---------- | |
411 value : str | |
412 The string that is transformed to a boolean. | |
413 | |
414 Returns | |
415 ------- | |
416 boolval : bool | |
417 The boolean representation of `value`. | |
418 | |
419 Raises | |
420 ------ | |
421 ValueError | |
422 If the string is not 'True' or 'False' (case independent) | |
423 | |
424 Examples | |
425 -------- | |
426 >>> np.lib._iotools.str2bool('TRUE') | |
427 True | |
428 >>> np.lib._iotools.str2bool('false') | |
429 False | |
430 | |
431 """ | |
432 value = value.upper() | |
433 if value == asbytes('TRUE'): | |
434 return True | |
435 elif value == asbytes('FALSE'): | |
436 return False | |
437 else: | |
438 raise ValueError("Invalid boolean") | |
439 | |
440 | |
441 class ConverterError(Exception): | |
442 """ | |
443 Exception raised when an error occurs in a converter for string values. | |
444 | |
445 """ | |
446 pass | |
447 | |
448 class ConverterLockError(ConverterError): | |
449 """ | |
450 Exception raised when an attempt is made to upgrade a locked converter. | |
451 | |
452 """ | |
453 pass | |
454 | |
455 class ConversionWarning(UserWarning): | |
456 """ | |
457 Warning issued when a string converter has a problem. | |
458 | |
459 Notes | |
460 ----- | |
461 In `genfromtxt` a `ConversionWarning` is issued if raising exceptions | |
462 is explicitly suppressed with the "invalid_raise" keyword. | |
463 | |
464 """ | |
465 pass | |
466 | |
467 | |
468 class StringConverter(object): | |
469 """ | |
470 Factory class for function transforming a string into another object | |
471 (int, float). | |
472 | |
473 After initialization, an instance can be called to transform a string | |
474 into another object. If the string is recognized as representing a | |
475 missing value, a default value is returned. | |
476 | |
477 Attributes | |
478 ---------- | |
479 func : function | |
480 Function used for the conversion. | |
481 default : any | |
482 Default value to return when the input corresponds to a missing | |
483 value. | |
484 type : type | |
485 Type of the output. | |
486 _status : int | |
487 Integer representing the order of the conversion. | |
488 _mapper : sequence of tuples | |
489 Sequence of tuples (dtype, function, default value) to evaluate in | |
490 order. | |
491 _locked : bool | |
492 Holds `locked` parameter. | |
493 | |
494 Parameters | |
495 ---------- | |
496 dtype_or_func : {None, dtype, function}, optional | |
497 If a `dtype`, specifies the input data type, used to define a basic | |
498 function and a default value for missing data. For example, when | |
499 `dtype` is float, the `func` attribute is set to `float` and the | |
500 default value to `np.nan`. If a function, this function is used to | |
501 convert a string to another object. In this case, it is recommended | |
502 to give an associated default value as input. | |
503 default : any, optional | |
504 Value to return by default, that is, when the string to be | |
505 converted is flagged as missing. If not given, `StringConverter` | |
506 tries to supply a reasonable default value. | |
507 missing_values : sequence of str, optional | |
508 Sequence of strings indicating a missing value. | |
509 locked : bool, optional | |
510 Whether the StringConverter should be locked to prevent automatic | |
511 upgrade or not. Default is False. | |
512 | |
513 """ | |
514 # | |
515 _mapper = [(nx.bool_, str2bool, False), | |
516 (nx.integer, int, -1), | |
517 (nx.floating, float, nx.nan), | |
518 (complex, _bytes_to_complex, nx.nan + 0j), | |
519 (nx.string_, bytes, asbytes('???'))] | |
520 (_defaulttype, _defaultfunc, _defaultfill) = zip(*_mapper) | |
521 # | |
522 | |
523 @classmethod | |
524 def _getdtype(cls, val): | |
525 """Returns the dtype of the input variable.""" | |
526 return np.array(val).dtype | |
527 # | |
528 | |
529 @classmethod | |
530 def _getsubdtype(cls, val): | |
531 """Returns the type of the dtype of the input variable.""" | |
532 return np.array(val).dtype.type | |
533 # | |
534 # This is a bit annoying. We want to return the "general" type in most | |
535 # cases (ie. "string" rather than "S10"), but we want to return the | |
536 # specific type for datetime64 (ie. "datetime64[us]" rather than | |
537 # "datetime64"). | |
538 | |
539 @classmethod | |
540 def _dtypeortype(cls, dtype): | |
541 """Returns dtype for datetime64 and type of dtype otherwise.""" | |
542 if dtype.type == np.datetime64: | |
543 return dtype | |
544 return dtype.type | |
545 # | |
546 | |
547 @classmethod | |
548 def upgrade_mapper(cls, func, default=None): | |
549 """ | |
550 Upgrade the mapper of a StringConverter by adding a new function and | |
551 its corresponding default. | |
552 | |
553 The input function (or sequence of functions) and its associated | |
554 default value (if any) is inserted in penultimate position of the | |
555 mapper. The corresponding type is estimated from the dtype of the | |
556 default value. | |
557 | |
558 Parameters | |
559 ---------- | |
560 func : var | |
561 Function, or sequence of functions | |
562 | |
563 Examples | |
564 -------- | |
565 >>> import dateutil.parser | |
566 >>> import datetime | |
567 >>> dateparser = datetustil.parser.parse | |
568 >>> defaultdate = datetime.date(2000, 1, 1) | |
569 >>> StringConverter.upgrade_mapper(dateparser, default=defaultdate) | |
570 """ | |
571 # Func is a single functions | |
572 if hasattr(func, '__call__'): | |
573 cls._mapper.insert(-1, (cls._getsubdtype(default), func, default)) | |
574 return | |
575 elif hasattr(func, '__iter__'): | |
576 if isinstance(func[0], (tuple, list)): | |
577 for _ in func: | |
578 cls._mapper.insert(-1, _) | |
579 return | |
580 if default is None: | |
581 default = [None] * len(func) | |
582 else: | |
583 default = list(default) | |
584 default.append([None] * (len(func) - len(default))) | |
585 for (fct, dft) in zip(func, default): | |
586 cls._mapper.insert(-1, (cls._getsubdtype(dft), fct, dft)) | |
587 # | |
588 | |
589 def __init__(self, dtype_or_func=None, default=None, missing_values=None, | |
590 locked=False): | |
591 # Convert unicode (for Py3) | |
592 if isinstance(missing_values, unicode): | |
593 missing_values = asbytes(missing_values) | |
594 elif isinstance(missing_values, (list, tuple)): | |
595 missing_values = asbytes_nested(missing_values) | |
596 # Defines a lock for upgrade | |
597 self._locked = bool(locked) | |
598 # No input dtype: minimal initialization | |
599 if dtype_or_func is None: | |
600 self.func = str2bool | |
601 self._status = 0 | |
602 self.default = default or False | |
603 dtype = np.dtype('bool') | |
604 else: | |
605 # Is the input a np.dtype ? | |
606 try: | |
607 self.func = None | |
608 dtype = np.dtype(dtype_or_func) | |
609 except TypeError: | |
610 # dtype_or_func must be a function, then | |
611 if not hasattr(dtype_or_func, '__call__'): | |
612 errmsg = ("The input argument `dtype` is neither a" | |
613 " function nor a dtype (got '%s' instead)") | |
614 raise TypeError(errmsg % type(dtype_or_func)) | |
615 # Set the function | |
616 self.func = dtype_or_func | |
617 # If we don't have a default, try to guess it or set it to | |
618 # None | |
619 if default is None: | |
620 try: | |
621 default = self.func(asbytes('0')) | |
622 except ValueError: | |
623 default = None | |
624 dtype = self._getdtype(default) | |
625 # Set the status according to the dtype | |
626 _status = -1 | |
627 for (i, (deftype, func, default_def)) in enumerate(self._mapper): | |
628 if np.issubdtype(dtype.type, deftype): | |
629 _status = i | |
630 if default is None: | |
631 self.default = default_def | |
632 else: | |
633 self.default = default | |
634 break | |
635 if _status == -1: | |
636 # We never found a match in the _mapper... | |
637 _status = 0 | |
638 self.default = default | |
639 self._status = _status | |
640 # If the input was a dtype, set the function to the last we saw | |
641 if self.func is None: | |
642 self.func = func | |
643 # If the status is 1 (int), change the function to | |
644 # something more robust. | |
645 if self.func == self._mapper[1][1]: | |
646 if issubclass(dtype.type, np.uint64): | |
647 self.func = np.uint64 | |
648 elif issubclass(dtype.type, np.int64): | |
649 self.func = np.int64 | |
650 else: | |
651 self.func = lambda x: int(float(x)) | |
652 # Store the list of strings corresponding to missing values. | |
653 if missing_values is None: | |
654 self.missing_values = set([asbytes('')]) | |
655 else: | |
656 if isinstance(missing_values, bytes): | |
657 missing_values = missing_values.split(asbytes(",")) | |
658 self.missing_values = set(list(missing_values) + [asbytes('')]) | |
659 # | |
660 self._callingfunction = self._strict_call | |
661 self.type = self._dtypeortype(dtype) | |
662 self._checked = False | |
663 self._initial_default = default | |
664 # | |
665 | |
666 def _loose_call(self, value): | |
667 try: | |
668 return self.func(value) | |
669 except ValueError: | |
670 return self.default | |
671 # | |
672 | |
673 def _strict_call(self, value): | |
674 try: | |
675 return self.func(value) | |
676 except ValueError: | |
677 if value.strip() in self.missing_values: | |
678 if not self._status: | |
679 self._checked = False | |
680 return self.default | |
681 raise ValueError("Cannot convert string '%s'" % value) | |
682 # | |
683 | |
684 def __call__(self, value): | |
685 return self._callingfunction(value) | |
686 # | |
687 | |
688 def upgrade(self, value): | |
689 """ | |
690 Find the best converter for a given string, and return the result. | |
691 | |
692 The supplied string `value` is converted by testing different | |
693 converters in order. First the `func` method of the | |
694 `StringConverter` instance is tried, if this fails other available | |
695 converters are tried. The order in which these other converters | |
696 are tried is determined by the `_status` attribute of the instance. | |
697 | |
698 Parameters | |
699 ---------- | |
700 value : str | |
701 The string to convert. | |
702 | |
703 Returns | |
704 ------- | |
705 out : any | |
706 The result of converting `value` with the appropriate converter. | |
707 | |
708 """ | |
709 self._checked = True | |
710 try: | |
711 self._strict_call(value) | |
712 except ValueError: | |
713 # Raise an exception if we locked the converter... | |
714 if self._locked: | |
715 errmsg = "Converter is locked and cannot be upgraded" | |
716 raise ConverterLockError(errmsg) | |
717 _statusmax = len(self._mapper) | |
718 # Complains if we try to upgrade by the maximum | |
719 _status = self._status | |
720 if _status == _statusmax: | |
721 errmsg = "Could not find a valid conversion function" | |
722 raise ConverterError(errmsg) | |
723 elif _status < _statusmax - 1: | |
724 _status += 1 | |
725 (self.type, self.func, default) = self._mapper[_status] | |
726 self._status = _status | |
727 if self._initial_default is not None: | |
728 self.default = self._initial_default | |
729 else: | |
730 self.default = default | |
731 self.upgrade(value) | |
732 | |
733 def iterupgrade(self, value): | |
734 self._checked = True | |
735 if not hasattr(value, '__iter__'): | |
736 value = (value,) | |
737 _strict_call = self._strict_call | |
738 try: | |
739 for _m in value: | |
740 _strict_call(_m) | |
741 except ValueError: | |
742 # Raise an exception if we locked the converter... | |
743 if self._locked: | |
744 errmsg = "Converter is locked and cannot be upgraded" | |
745 raise ConverterLockError(errmsg) | |
746 _statusmax = len(self._mapper) | |
747 # Complains if we try to upgrade by the maximum | |
748 _status = self._status | |
749 if _status == _statusmax: | |
750 raise ConverterError( | |
751 "Could not find a valid conversion function" | |
752 ) | |
753 elif _status < _statusmax - 1: | |
754 _status += 1 | |
755 (self.type, self.func, default) = self._mapper[_status] | |
756 if self._initial_default is not None: | |
757 self.default = self._initial_default | |
758 else: | |
759 self.default = default | |
760 self._status = _status | |
761 self.iterupgrade(value) | |
762 | |
763 def update(self, func, default=None, testing_value=None, | |
764 missing_values=asbytes(''), locked=False): | |
765 """ | |
766 Set StringConverter attributes directly. | |
767 | |
768 Parameters | |
769 ---------- | |
770 func : function | |
771 Conversion function. | |
772 default : any, optional | |
773 Value to return by default, that is, when the string to be | |
774 converted is flagged as missing. If not given, | |
775 `StringConverter` tries to supply a reasonable default value. | |
776 testing_value : str, optional | |
777 A string representing a standard input value of the converter. | |
778 This string is used to help defining a reasonable default | |
779 value. | |
780 missing_values : sequence of str, optional | |
781 Sequence of strings indicating a missing value. | |
782 locked : bool, optional | |
783 Whether the StringConverter should be locked to prevent | |
784 automatic upgrade or not. Default is False. | |
785 | |
786 Notes | |
787 ----- | |
788 `update` takes the same parameters as the constructor of | |
789 `StringConverter`, except that `func` does not accept a `dtype` | |
790 whereas `dtype_or_func` in the constructor does. | |
791 | |
792 """ | |
793 self.func = func | |
794 self._locked = locked | |
795 # Don't reset the default to None if we can avoid it | |
796 if default is not None: | |
797 self.default = default | |
798 self.type = self._dtypeortype(self._getdtype(default)) | |
799 else: | |
800 try: | |
801 tester = func(testing_value or asbytes('1')) | |
802 except (TypeError, ValueError): | |
803 tester = None | |
804 self.type = self._dtypeortype(self._getdtype(tester)) | |
805 # Add the missing values to the existing set | |
806 if missing_values is not None: | |
807 if _is_bytes_like(missing_values): | |
808 self.missing_values.add(missing_values) | |
809 elif hasattr(missing_values, '__iter__'): | |
810 for val in missing_values: | |
811 self.missing_values.add(val) | |
812 else: | |
813 self.missing_values = [] | |
814 | |
815 | |
816 def easy_dtype(ndtype, names=None, defaultfmt="f%i", **validationargs): | |
817 """ | |
818 Convenience function to create a `np.dtype` object. | |
819 | |
820 The function processes the input `dtype` and matches it with the given | |
821 names. | |
822 | |
823 Parameters | |
824 ---------- | |
825 ndtype : var | |
826 Definition of the dtype. Can be any string or dictionary recognized | |
827 by the `np.dtype` function, or a sequence of types. | |
828 names : str or sequence, optional | |
829 Sequence of strings to use as field names for a structured dtype. | |
830 For convenience, `names` can be a string of a comma-separated list | |
831 of names. | |
832 defaultfmt : str, optional | |
833 Format string used to define missing names, such as ``"f%i"`` | |
834 (default) or ``"fields_%02i"``. | |
835 validationargs : optional | |
836 A series of optional arguments used to initialize a | |
837 `NameValidator`. | |
838 | |
839 Examples | |
840 -------- | |
841 >>> np.lib._iotools.easy_dtype(float) | |
842 dtype('float64') | |
843 >>> np.lib._iotools.easy_dtype("i4, f8") | |
844 dtype([('f0', '<i4'), ('f1', '<f8')]) | |
845 >>> np.lib._iotools.easy_dtype("i4, f8", defaultfmt="field_%03i") | |
846 dtype([('field_000', '<i4'), ('field_001', '<f8')]) | |
847 | |
848 >>> np.lib._iotools.easy_dtype((int, float, float), names="a,b,c") | |
849 dtype([('a', '<i8'), ('b', '<f8'), ('c', '<f8')]) | |
850 >>> np.lib._iotools.easy_dtype(float, names="a,b,c") | |
851 dtype([('a', '<f8'), ('b', '<f8'), ('c', '<f8')]) | |
852 | |
853 """ | |
854 try: | |
855 ndtype = np.dtype(ndtype) | |
856 except TypeError: | |
857 validate = NameValidator(**validationargs) | |
858 nbfields = len(ndtype) | |
859 if names is None: | |
860 names = [''] * len(ndtype) | |
861 elif isinstance(names, basestring): | |
862 names = names.split(",") | |
863 names = validate(names, nbfields=nbfields, defaultfmt=defaultfmt) | |
864 ndtype = np.dtype(dict(formats=ndtype, names=names)) | |
865 else: | |
866 nbtypes = len(ndtype) | |
867 # Explicit names | |
868 if names is not None: | |
869 validate = NameValidator(**validationargs) | |
870 if isinstance(names, basestring): | |
871 names = names.split(",") | |
872 # Simple dtype: repeat to match the nb of names | |
873 if nbtypes == 0: | |
874 formats = tuple([ndtype.type] * len(names)) | |
875 names = validate(names, defaultfmt=defaultfmt) | |
876 ndtype = np.dtype(list(zip(names, formats))) | |
877 # Structured dtype: just validate the names as needed | |
878 else: | |
879 ndtype.names = validate(names, nbfields=nbtypes, | |
880 defaultfmt=defaultfmt) | |
881 # No implicit names | |
882 elif (nbtypes > 0): | |
883 validate = NameValidator(**validationargs) | |
884 # Default initial names : should we change the format ? | |
885 if ((ndtype.names == tuple("f%i" % i for i in range(nbtypes))) and | |
886 (defaultfmt != "f%i")): | |
887 ndtype.names = validate([''] * nbtypes, defaultfmt=defaultfmt) | |
888 # Explicit initial names : just validate | |
889 else: | |
890 ndtype.names = validate(ndtype.names, defaultfmt=defaultfmt) | |
891 return ndtype |