comparison DEPENDENCIES/mingw32/Python27/Lib/site-packages/numpy/lib/_iotools.py @ 87:2a2c65a20a8b

Add Python libs and headers
author Chris Cannam
date Wed, 25 Feb 2015 14:05:22 +0000
parents
children
comparison
equal deleted inserted replaced
86:413a9d26189e 87:2a2c65a20a8b
1 """A collection of functions designed to help I/O with ascii files.
2
3 """
4 from __future__ import division, absolute_import, print_function
5
6 __docformat__ = "restructuredtext en"
7
8 import sys
9 import numpy as np
10 import numpy.core.numeric as nx
11 from numpy.compat import asbytes, bytes, asbytes_nested, basestring
12
13 if sys.version_info[0] >= 3:
14 from builtins import bool, int, float, complex, object, str
15 unicode = str
16 else:
17 from __builtin__ import bool, int, float, complex, object, unicode, str
18
19
20 if sys.version_info[0] >= 3:
21 def _bytes_to_complex(s):
22 return complex(s.decode('ascii'))
23
24 def _bytes_to_name(s):
25 return s.decode('ascii')
26 else:
27 _bytes_to_complex = complex
28 _bytes_to_name = str
29
30 def _is_string_like(obj):
31 """
32 Check whether obj behaves like a string.
33 """
34 try:
35 obj + ''
36 except (TypeError, ValueError):
37 return False
38 return True
39
40 def _is_bytes_like(obj):
41 """
42 Check whether obj behaves like a bytes object.
43 """
44 try:
45 obj + asbytes('')
46 except (TypeError, ValueError):
47 return False
48 return True
49
50
51 def _to_filehandle(fname, flag='r', return_opened=False):
52 """
53 Returns the filehandle corresponding to a string or a file.
54 If the string ends in '.gz', the file is automatically unzipped.
55
56 Parameters
57 ----------
58 fname : string, filehandle
59 Name of the file whose filehandle must be returned.
60 flag : string, optional
61 Flag indicating the status of the file ('r' for read, 'w' for write).
62 return_opened : boolean, optional
63 Whether to return the opening status of the file.
64 """
65 if _is_string_like(fname):
66 if fname.endswith('.gz'):
67 import gzip
68 fhd = gzip.open(fname, flag)
69 elif fname.endswith('.bz2'):
70 import bz2
71 fhd = bz2.BZ2File(fname)
72 else:
73 fhd = file(fname, flag)
74 opened = True
75 elif hasattr(fname, 'seek'):
76 fhd = fname
77 opened = False
78 else:
79 raise ValueError('fname must be a string or file handle')
80 if return_opened:
81 return fhd, opened
82 return fhd
83
84
85 def has_nested_fields(ndtype):
86 """
87 Returns whether one or several fields of a dtype are nested.
88
89 Parameters
90 ----------
91 ndtype : dtype
92 Data-type of a structured array.
93
94 Raises
95 ------
96 AttributeError
97 If `ndtype` does not have a `names` attribute.
98
99 Examples
100 --------
101 >>> dt = np.dtype([('name', 'S4'), ('x', float), ('y', float)])
102 >>> np.lib._iotools.has_nested_fields(dt)
103 False
104
105 """
106 for name in ndtype.names or ():
107 if ndtype[name].names:
108 return True
109 return False
110
111
112 def flatten_dtype(ndtype, flatten_base=False):
113 """
114 Unpack a structured data-type by collapsing nested fields and/or fields
115 with a shape.
116
117 Note that the field names are lost.
118
119 Parameters
120 ----------
121 ndtype : dtype
122 The datatype to collapse
123 flatten_base : {False, True}, optional
124 Whether to transform a field with a shape into several fields or not.
125
126 Examples
127 --------
128 >>> dt = np.dtype([('name', 'S4'), ('x', float), ('y', float),
129 ... ('block', int, (2, 3))])
130 >>> np.lib._iotools.flatten_dtype(dt)
131 [dtype('|S4'), dtype('float64'), dtype('float64'), dtype('int32')]
132 >>> np.lib._iotools.flatten_dtype(dt, flatten_base=True)
133 [dtype('|S4'), dtype('float64'), dtype('float64'), dtype('int32'),
134 dtype('int32'), dtype('int32'), dtype('int32'), dtype('int32'),
135 dtype('int32')]
136
137 """
138 names = ndtype.names
139 if names is None:
140 if flatten_base:
141 return [ndtype.base] * int(np.prod(ndtype.shape))
142 return [ndtype.base]
143 else:
144 types = []
145 for field in names:
146 info = ndtype.fields[field]
147 flat_dt = flatten_dtype(info[0], flatten_base)
148 types.extend(flat_dt)
149 return types
150
151
152 class LineSplitter(object):
153 """
154 Object to split a string at a given delimiter or at given places.
155
156 Parameters
157 ----------
158 delimiter : str, int, or sequence of ints, optional
159 If a string, character used to delimit consecutive fields.
160 If an integer or a sequence of integers, width(s) of each field.
161 comment : str, optional
162 Character used to mark the beginning of a comment. Default is '#'.
163 autostrip : bool, optional
164 Whether to strip each individual field. Default is True.
165
166 """
167
168 def autostrip(self, method):
169 """
170 Wrapper to strip each member of the output of `method`.
171
172 Parameters
173 ----------
174 method : function
175 Function that takes a single argument and returns a sequence of
176 strings.
177
178 Returns
179 -------
180 wrapped : function
181 The result of wrapping `method`. `wrapped` takes a single input
182 argument and returns a list of strings that are stripped of
183 white-space.
184
185 """
186 return lambda input: [_.strip() for _ in method(input)]
187 #
188
189 def __init__(self, delimiter=None, comments=asbytes('#'), autostrip=True):
190 self.comments = comments
191 # Delimiter is a character
192 if isinstance(delimiter, unicode):
193 delimiter = delimiter.encode('ascii')
194 if (delimiter is None) or _is_bytes_like(delimiter):
195 delimiter = delimiter or None
196 _handyman = self._delimited_splitter
197 # Delimiter is a list of field widths
198 elif hasattr(delimiter, '__iter__'):
199 _handyman = self._variablewidth_splitter
200 idx = np.cumsum([0] + list(delimiter))
201 delimiter = [slice(i, j) for (i, j) in zip(idx[:-1], idx[1:])]
202 # Delimiter is a single integer
203 elif int(delimiter):
204 (_handyman, delimiter) = (
205 self._fixedwidth_splitter, int(delimiter))
206 else:
207 (_handyman, delimiter) = (self._delimited_splitter, None)
208 self.delimiter = delimiter
209 if autostrip:
210 self._handyman = self.autostrip(_handyman)
211 else:
212 self._handyman = _handyman
213 #
214
215 def _delimited_splitter(self, line):
216 if self.comments is not None:
217 line = line.split(self.comments)[0]
218 line = line.strip(asbytes(" \r\n"))
219 if not line:
220 return []
221 return line.split(self.delimiter)
222 #
223
224 def _fixedwidth_splitter(self, line):
225 if self.comments is not None:
226 line = line.split(self.comments)[0]
227 line = line.strip(asbytes("\r\n"))
228 if not line:
229 return []
230 fixed = self.delimiter
231 slices = [slice(i, i + fixed) for i in range(0, len(line), fixed)]
232 return [line[s] for s in slices]
233 #
234
235 def _variablewidth_splitter(self, line):
236 if self.comments is not None:
237 line = line.split(self.comments)[0]
238 if not line:
239 return []
240 slices = self.delimiter
241 return [line[s] for s in slices]
242 #
243
244 def __call__(self, line):
245 return self._handyman(line)
246
247
248 class NameValidator(object):
249 """
250 Object to validate a list of strings to use as field names.
251
252 The strings are stripped of any non alphanumeric character, and spaces
253 are replaced by '_'. During instantiation, the user can define a list
254 of names to exclude, as well as a list of invalid characters. Names in
255 the exclusion list are appended a '_' character.
256
257 Once an instance has been created, it can be called with a list of
258 names, and a list of valid names will be created. The `__call__`
259 method accepts an optional keyword "default" that sets the default name
260 in case of ambiguity. By default this is 'f', so that names will
261 default to `f0`, `f1`, etc.
262
263 Parameters
264 ----------
265 excludelist : sequence, optional
266 A list of names to exclude. This list is appended to the default
267 list ['return', 'file', 'print']. Excluded names are appended an
268 underscore: for example, `file` becomes `file_` if supplied.
269 deletechars : str, optional
270 A string combining invalid characters that must be deleted from the
271 names.
272 casesensitive : {True, False, 'upper', 'lower'}, optional
273 * If True, field names are case-sensitive.
274 * If False or 'upper', field names are converted to upper case.
275 * If 'lower', field names are converted to lower case.
276
277 The default value is True.
278 replace_space : '_', optional
279 Character(s) used in replacement of white spaces.
280
281 Notes
282 -----
283 Calling an instance of `NameValidator` is the same as calling its
284 method `validate`.
285
286 Examples
287 --------
288 >>> validator = np.lib._iotools.NameValidator()
289 >>> validator(['file', 'field2', 'with space', 'CaSe'])
290 ['file_', 'field2', 'with_space', 'CaSe']
291
292 >>> validator = np.lib._iotools.NameValidator(excludelist=['excl'],
293 deletechars='q',
294 case_sensitive='False')
295 >>> validator(['excl', 'field2', 'no_q', 'with space', 'CaSe'])
296 ['excl_', 'field2', 'no_', 'with_space', 'case']
297
298 """
299 #
300 defaultexcludelist = ['return', 'file', 'print']
301 defaultdeletechars = set("""~!@#$%^&*()-=+~\|]}[{';: /?.>,<""")
302 #
303
304 def __init__(self, excludelist=None, deletechars=None,
305 case_sensitive=None, replace_space='_'):
306 # Process the exclusion list ..
307 if excludelist is None:
308 excludelist = []
309 excludelist.extend(self.defaultexcludelist)
310 self.excludelist = excludelist
311 # Process the list of characters to delete
312 if deletechars is None:
313 delete = self.defaultdeletechars
314 else:
315 delete = set(deletechars)
316 delete.add('"')
317 self.deletechars = delete
318 # Process the case option .....
319 if (case_sensitive is None) or (case_sensitive is True):
320 self.case_converter = lambda x: x
321 elif (case_sensitive is False) or ('u' in case_sensitive):
322 self.case_converter = lambda x: x.upper()
323 elif 'l' in case_sensitive:
324 self.case_converter = lambda x: x.lower()
325 else:
326 self.case_converter = lambda x: x
327 #
328 self.replace_space = replace_space
329
330 def validate(self, names, defaultfmt="f%i", nbfields=None):
331 """
332 Validate a list of strings as field names for a structured array.
333
334 Parameters
335 ----------
336 names : sequence of str
337 Strings to be validated.
338 defaultfmt : str, optional
339 Default format string, used if validating a given string
340 reduces its length to zero.
341 nboutput : integer, optional
342 Final number of validated names, used to expand or shrink the
343 initial list of names.
344
345 Returns
346 -------
347 validatednames : list of str
348 The list of validated field names.
349
350 Notes
351 -----
352 A `NameValidator` instance can be called directly, which is the
353 same as calling `validate`. For examples, see `NameValidator`.
354
355 """
356 # Initial checks ..............
357 if (names is None):
358 if (nbfields is None):
359 return None
360 names = []
361 if isinstance(names, basestring):
362 names = [names, ]
363 if nbfields is not None:
364 nbnames = len(names)
365 if (nbnames < nbfields):
366 names = list(names) + [''] * (nbfields - nbnames)
367 elif (nbnames > nbfields):
368 names = names[:nbfields]
369 # Set some shortcuts ...........
370 deletechars = self.deletechars
371 excludelist = self.excludelist
372 case_converter = self.case_converter
373 replace_space = self.replace_space
374 # Initializes some variables ...
375 validatednames = []
376 seen = dict()
377 nbempty = 0
378 #
379 for item in names:
380 item = case_converter(item).strip()
381 if replace_space:
382 item = item.replace(' ', replace_space)
383 item = ''.join([c for c in item if c not in deletechars])
384 if item == '':
385 item = defaultfmt % nbempty
386 while item in names:
387 nbempty += 1
388 item = defaultfmt % nbempty
389 nbempty += 1
390 elif item in excludelist:
391 item += '_'
392 cnt = seen.get(item, 0)
393 if cnt > 0:
394 validatednames.append(item + '_%d' % cnt)
395 else:
396 validatednames.append(item)
397 seen[item] = cnt + 1
398 return tuple(validatednames)
399 #
400
401 def __call__(self, names, defaultfmt="f%i", nbfields=None):
402 return self.validate(names, defaultfmt=defaultfmt, nbfields=nbfields)
403
404
405 def str2bool(value):
406 """
407 Tries to transform a string supposed to represent a boolean to a boolean.
408
409 Parameters
410 ----------
411 value : str
412 The string that is transformed to a boolean.
413
414 Returns
415 -------
416 boolval : bool
417 The boolean representation of `value`.
418
419 Raises
420 ------
421 ValueError
422 If the string is not 'True' or 'False' (case independent)
423
424 Examples
425 --------
426 >>> np.lib._iotools.str2bool('TRUE')
427 True
428 >>> np.lib._iotools.str2bool('false')
429 False
430
431 """
432 value = value.upper()
433 if value == asbytes('TRUE'):
434 return True
435 elif value == asbytes('FALSE'):
436 return False
437 else:
438 raise ValueError("Invalid boolean")
439
440
441 class ConverterError(Exception):
442 """
443 Exception raised when an error occurs in a converter for string values.
444
445 """
446 pass
447
448 class ConverterLockError(ConverterError):
449 """
450 Exception raised when an attempt is made to upgrade a locked converter.
451
452 """
453 pass
454
455 class ConversionWarning(UserWarning):
456 """
457 Warning issued when a string converter has a problem.
458
459 Notes
460 -----
461 In `genfromtxt` a `ConversionWarning` is issued if raising exceptions
462 is explicitly suppressed with the "invalid_raise" keyword.
463
464 """
465 pass
466
467
468 class StringConverter(object):
469 """
470 Factory class for function transforming a string into another object
471 (int, float).
472
473 After initialization, an instance can be called to transform a string
474 into another object. If the string is recognized as representing a
475 missing value, a default value is returned.
476
477 Attributes
478 ----------
479 func : function
480 Function used for the conversion.
481 default : any
482 Default value to return when the input corresponds to a missing
483 value.
484 type : type
485 Type of the output.
486 _status : int
487 Integer representing the order of the conversion.
488 _mapper : sequence of tuples
489 Sequence of tuples (dtype, function, default value) to evaluate in
490 order.
491 _locked : bool
492 Holds `locked` parameter.
493
494 Parameters
495 ----------
496 dtype_or_func : {None, dtype, function}, optional
497 If a `dtype`, specifies the input data type, used to define a basic
498 function and a default value for missing data. For example, when
499 `dtype` is float, the `func` attribute is set to `float` and the
500 default value to `np.nan`. If a function, this function is used to
501 convert a string to another object. In this case, it is recommended
502 to give an associated default value as input.
503 default : any, optional
504 Value to return by default, that is, when the string to be
505 converted is flagged as missing. If not given, `StringConverter`
506 tries to supply a reasonable default value.
507 missing_values : sequence of str, optional
508 Sequence of strings indicating a missing value.
509 locked : bool, optional
510 Whether the StringConverter should be locked to prevent automatic
511 upgrade or not. Default is False.
512
513 """
514 #
515 _mapper = [(nx.bool_, str2bool, False),
516 (nx.integer, int, -1),
517 (nx.floating, float, nx.nan),
518 (complex, _bytes_to_complex, nx.nan + 0j),
519 (nx.string_, bytes, asbytes('???'))]
520 (_defaulttype, _defaultfunc, _defaultfill) = zip(*_mapper)
521 #
522
523 @classmethod
524 def _getdtype(cls, val):
525 """Returns the dtype of the input variable."""
526 return np.array(val).dtype
527 #
528
529 @classmethod
530 def _getsubdtype(cls, val):
531 """Returns the type of the dtype of the input variable."""
532 return np.array(val).dtype.type
533 #
534 # This is a bit annoying. We want to return the "general" type in most
535 # cases (ie. "string" rather than "S10"), but we want to return the
536 # specific type for datetime64 (ie. "datetime64[us]" rather than
537 # "datetime64").
538
539 @classmethod
540 def _dtypeortype(cls, dtype):
541 """Returns dtype for datetime64 and type of dtype otherwise."""
542 if dtype.type == np.datetime64:
543 return dtype
544 return dtype.type
545 #
546
547 @classmethod
548 def upgrade_mapper(cls, func, default=None):
549 """
550 Upgrade the mapper of a StringConverter by adding a new function and
551 its corresponding default.
552
553 The input function (or sequence of functions) and its associated
554 default value (if any) is inserted in penultimate position of the
555 mapper. The corresponding type is estimated from the dtype of the
556 default value.
557
558 Parameters
559 ----------
560 func : var
561 Function, or sequence of functions
562
563 Examples
564 --------
565 >>> import dateutil.parser
566 >>> import datetime
567 >>> dateparser = datetustil.parser.parse
568 >>> defaultdate = datetime.date(2000, 1, 1)
569 >>> StringConverter.upgrade_mapper(dateparser, default=defaultdate)
570 """
571 # Func is a single functions
572 if hasattr(func, '__call__'):
573 cls._mapper.insert(-1, (cls._getsubdtype(default), func, default))
574 return
575 elif hasattr(func, '__iter__'):
576 if isinstance(func[0], (tuple, list)):
577 for _ in func:
578 cls._mapper.insert(-1, _)
579 return
580 if default is None:
581 default = [None] * len(func)
582 else:
583 default = list(default)
584 default.append([None] * (len(func) - len(default)))
585 for (fct, dft) in zip(func, default):
586 cls._mapper.insert(-1, (cls._getsubdtype(dft), fct, dft))
587 #
588
589 def __init__(self, dtype_or_func=None, default=None, missing_values=None,
590 locked=False):
591 # Convert unicode (for Py3)
592 if isinstance(missing_values, unicode):
593 missing_values = asbytes(missing_values)
594 elif isinstance(missing_values, (list, tuple)):
595 missing_values = asbytes_nested(missing_values)
596 # Defines a lock for upgrade
597 self._locked = bool(locked)
598 # No input dtype: minimal initialization
599 if dtype_or_func is None:
600 self.func = str2bool
601 self._status = 0
602 self.default = default or False
603 dtype = np.dtype('bool')
604 else:
605 # Is the input a np.dtype ?
606 try:
607 self.func = None
608 dtype = np.dtype(dtype_or_func)
609 except TypeError:
610 # dtype_or_func must be a function, then
611 if not hasattr(dtype_or_func, '__call__'):
612 errmsg = ("The input argument `dtype` is neither a"
613 " function nor a dtype (got '%s' instead)")
614 raise TypeError(errmsg % type(dtype_or_func))
615 # Set the function
616 self.func = dtype_or_func
617 # If we don't have a default, try to guess it or set it to
618 # None
619 if default is None:
620 try:
621 default = self.func(asbytes('0'))
622 except ValueError:
623 default = None
624 dtype = self._getdtype(default)
625 # Set the status according to the dtype
626 _status = -1
627 for (i, (deftype, func, default_def)) in enumerate(self._mapper):
628 if np.issubdtype(dtype.type, deftype):
629 _status = i
630 if default is None:
631 self.default = default_def
632 else:
633 self.default = default
634 break
635 if _status == -1:
636 # We never found a match in the _mapper...
637 _status = 0
638 self.default = default
639 self._status = _status
640 # If the input was a dtype, set the function to the last we saw
641 if self.func is None:
642 self.func = func
643 # If the status is 1 (int), change the function to
644 # something more robust.
645 if self.func == self._mapper[1][1]:
646 if issubclass(dtype.type, np.uint64):
647 self.func = np.uint64
648 elif issubclass(dtype.type, np.int64):
649 self.func = np.int64
650 else:
651 self.func = lambda x: int(float(x))
652 # Store the list of strings corresponding to missing values.
653 if missing_values is None:
654 self.missing_values = set([asbytes('')])
655 else:
656 if isinstance(missing_values, bytes):
657 missing_values = missing_values.split(asbytes(","))
658 self.missing_values = set(list(missing_values) + [asbytes('')])
659 #
660 self._callingfunction = self._strict_call
661 self.type = self._dtypeortype(dtype)
662 self._checked = False
663 self._initial_default = default
664 #
665
666 def _loose_call(self, value):
667 try:
668 return self.func(value)
669 except ValueError:
670 return self.default
671 #
672
673 def _strict_call(self, value):
674 try:
675 return self.func(value)
676 except ValueError:
677 if value.strip() in self.missing_values:
678 if not self._status:
679 self._checked = False
680 return self.default
681 raise ValueError("Cannot convert string '%s'" % value)
682 #
683
684 def __call__(self, value):
685 return self._callingfunction(value)
686 #
687
688 def upgrade(self, value):
689 """
690 Find the best converter for a given string, and return the result.
691
692 The supplied string `value` is converted by testing different
693 converters in order. First the `func` method of the
694 `StringConverter` instance is tried, if this fails other available
695 converters are tried. The order in which these other converters
696 are tried is determined by the `_status` attribute of the instance.
697
698 Parameters
699 ----------
700 value : str
701 The string to convert.
702
703 Returns
704 -------
705 out : any
706 The result of converting `value` with the appropriate converter.
707
708 """
709 self._checked = True
710 try:
711 self._strict_call(value)
712 except ValueError:
713 # Raise an exception if we locked the converter...
714 if self._locked:
715 errmsg = "Converter is locked and cannot be upgraded"
716 raise ConverterLockError(errmsg)
717 _statusmax = len(self._mapper)
718 # Complains if we try to upgrade by the maximum
719 _status = self._status
720 if _status == _statusmax:
721 errmsg = "Could not find a valid conversion function"
722 raise ConverterError(errmsg)
723 elif _status < _statusmax - 1:
724 _status += 1
725 (self.type, self.func, default) = self._mapper[_status]
726 self._status = _status
727 if self._initial_default is not None:
728 self.default = self._initial_default
729 else:
730 self.default = default
731 self.upgrade(value)
732
733 def iterupgrade(self, value):
734 self._checked = True
735 if not hasattr(value, '__iter__'):
736 value = (value,)
737 _strict_call = self._strict_call
738 try:
739 for _m in value:
740 _strict_call(_m)
741 except ValueError:
742 # Raise an exception if we locked the converter...
743 if self._locked:
744 errmsg = "Converter is locked and cannot be upgraded"
745 raise ConverterLockError(errmsg)
746 _statusmax = len(self._mapper)
747 # Complains if we try to upgrade by the maximum
748 _status = self._status
749 if _status == _statusmax:
750 raise ConverterError(
751 "Could not find a valid conversion function"
752 )
753 elif _status < _statusmax - 1:
754 _status += 1
755 (self.type, self.func, default) = self._mapper[_status]
756 if self._initial_default is not None:
757 self.default = self._initial_default
758 else:
759 self.default = default
760 self._status = _status
761 self.iterupgrade(value)
762
763 def update(self, func, default=None, testing_value=None,
764 missing_values=asbytes(''), locked=False):
765 """
766 Set StringConverter attributes directly.
767
768 Parameters
769 ----------
770 func : function
771 Conversion function.
772 default : any, optional
773 Value to return by default, that is, when the string to be
774 converted is flagged as missing. If not given,
775 `StringConverter` tries to supply a reasonable default value.
776 testing_value : str, optional
777 A string representing a standard input value of the converter.
778 This string is used to help defining a reasonable default
779 value.
780 missing_values : sequence of str, optional
781 Sequence of strings indicating a missing value.
782 locked : bool, optional
783 Whether the StringConverter should be locked to prevent
784 automatic upgrade or not. Default is False.
785
786 Notes
787 -----
788 `update` takes the same parameters as the constructor of
789 `StringConverter`, except that `func` does not accept a `dtype`
790 whereas `dtype_or_func` in the constructor does.
791
792 """
793 self.func = func
794 self._locked = locked
795 # Don't reset the default to None if we can avoid it
796 if default is not None:
797 self.default = default
798 self.type = self._dtypeortype(self._getdtype(default))
799 else:
800 try:
801 tester = func(testing_value or asbytes('1'))
802 except (TypeError, ValueError):
803 tester = None
804 self.type = self._dtypeortype(self._getdtype(tester))
805 # Add the missing values to the existing set
806 if missing_values is not None:
807 if _is_bytes_like(missing_values):
808 self.missing_values.add(missing_values)
809 elif hasattr(missing_values, '__iter__'):
810 for val in missing_values:
811 self.missing_values.add(val)
812 else:
813 self.missing_values = []
814
815
816 def easy_dtype(ndtype, names=None, defaultfmt="f%i", **validationargs):
817 """
818 Convenience function to create a `np.dtype` object.
819
820 The function processes the input `dtype` and matches it with the given
821 names.
822
823 Parameters
824 ----------
825 ndtype : var
826 Definition of the dtype. Can be any string or dictionary recognized
827 by the `np.dtype` function, or a sequence of types.
828 names : str or sequence, optional
829 Sequence of strings to use as field names for a structured dtype.
830 For convenience, `names` can be a string of a comma-separated list
831 of names.
832 defaultfmt : str, optional
833 Format string used to define missing names, such as ``"f%i"``
834 (default) or ``"fields_%02i"``.
835 validationargs : optional
836 A series of optional arguments used to initialize a
837 `NameValidator`.
838
839 Examples
840 --------
841 >>> np.lib._iotools.easy_dtype(float)
842 dtype('float64')
843 >>> np.lib._iotools.easy_dtype("i4, f8")
844 dtype([('f0', '<i4'), ('f1', '<f8')])
845 >>> np.lib._iotools.easy_dtype("i4, f8", defaultfmt="field_%03i")
846 dtype([('field_000', '<i4'), ('field_001', '<f8')])
847
848 >>> np.lib._iotools.easy_dtype((int, float, float), names="a,b,c")
849 dtype([('a', '<i8'), ('b', '<f8'), ('c', '<f8')])
850 >>> np.lib._iotools.easy_dtype(float, names="a,b,c")
851 dtype([('a', '<f8'), ('b', '<f8'), ('c', '<f8')])
852
853 """
854 try:
855 ndtype = np.dtype(ndtype)
856 except TypeError:
857 validate = NameValidator(**validationargs)
858 nbfields = len(ndtype)
859 if names is None:
860 names = [''] * len(ndtype)
861 elif isinstance(names, basestring):
862 names = names.split(",")
863 names = validate(names, nbfields=nbfields, defaultfmt=defaultfmt)
864 ndtype = np.dtype(dict(formats=ndtype, names=names))
865 else:
866 nbtypes = len(ndtype)
867 # Explicit names
868 if names is not None:
869 validate = NameValidator(**validationargs)
870 if isinstance(names, basestring):
871 names = names.split(",")
872 # Simple dtype: repeat to match the nb of names
873 if nbtypes == 0:
874 formats = tuple([ndtype.type] * len(names))
875 names = validate(names, defaultfmt=defaultfmt)
876 ndtype = np.dtype(list(zip(names, formats)))
877 # Structured dtype: just validate the names as needed
878 else:
879 ndtype.names = validate(names, nbfields=nbtypes,
880 defaultfmt=defaultfmt)
881 # No implicit names
882 elif (nbtypes > 0):
883 validate = NameValidator(**validationargs)
884 # Default initial names : should we change the format ?
885 if ((ndtype.names == tuple("f%i" % i for i in range(nbtypes))) and
886 (defaultfmt != "f%i")):
887 ndtype.names = validate([''] * nbtypes, defaultfmt=defaultfmt)
888 # Explicit initial names : just validate
889 else:
890 ndtype.names = validate(ndtype.names, defaultfmt=defaultfmt)
891 return ndtype