Chris@87: """A collection of functions designed to help I/O with ascii files.
Chris@87: 
Chris@87: """
Chris@87: from __future__ import division, absolute_import, print_function
Chris@87: 
Chris@87: __docformat__ = "restructuredtext en"
Chris@87: 
Chris@87: import sys
Chris@87: import numpy as np
Chris@87: import numpy.core.numeric as nx
Chris@87: from numpy.compat import asbytes, bytes, asbytes_nested, basestring
Chris@87: 
Chris@87: if sys.version_info[0] >= 3:
Chris@87:     from builtins import bool, int, float, complex, object, str
Chris@87:     unicode = str
Chris@87: else:
Chris@87:     from __builtin__ import bool, int, float, complex, object, unicode, str
Chris@87: 
Chris@87: 
Chris@87: if sys.version_info[0] >= 3:
Chris@87:     def _bytes_to_complex(s):
Chris@87:         return complex(s.decode('ascii'))
Chris@87: 
Chris@87:     def _bytes_to_name(s):
Chris@87:         return s.decode('ascii')
Chris@87: else:
Chris@87:     _bytes_to_complex = complex
Chris@87:     _bytes_to_name = str
Chris@87: 
Chris@87: def _is_string_like(obj):
Chris@87:     """
Chris@87:     Check whether obj behaves like a string.
Chris@87:     """
Chris@87:     try:
Chris@87:         obj + ''
Chris@87:     except (TypeError, ValueError):
Chris@87:         return False
Chris@87:     return True
Chris@87: 
Chris@87: def _is_bytes_like(obj):
Chris@87:     """
Chris@87:     Check whether obj behaves like a bytes object.
Chris@87:     """
Chris@87:     try:
Chris@87:         obj + asbytes('')
Chris@87:     except (TypeError, ValueError):
Chris@87:         return False
Chris@87:     return True
Chris@87: 
Chris@87: 
Chris@87: def _to_filehandle(fname, flag='r', return_opened=False):
Chris@87:     """
Chris@87:     Returns the filehandle corresponding to a string or a file.
Chris@87:     If the string ends in '.gz', the file is automatically unzipped.
Chris@87: 
Chris@87:     Parameters
Chris@87:     ----------
Chris@87:     fname : string, filehandle
Chris@87:         Name of the file whose filehandle must be returned.
Chris@87:     flag : string, optional
Chris@87:         Flag indicating the status of the file ('r' for read, 'w' for write).
Chris@87:     return_opened : boolean, optional
Chris@87:         Whether to return the opening status of the file.
Chris@87:     """
Chris@87:     if _is_string_like(fname):
Chris@87:         if fname.endswith('.gz'):
Chris@87:             import gzip
Chris@87:             fhd = gzip.open(fname, flag)
Chris@87:         elif fname.endswith('.bz2'):
Chris@87:             import bz2
Chris@87:             fhd = bz2.BZ2File(fname)
Chris@87:         else:
Chris@87:             fhd = file(fname, flag)
Chris@87:         opened = True
Chris@87:     elif hasattr(fname, 'seek'):
Chris@87:         fhd = fname
Chris@87:         opened = False
Chris@87:     else:
Chris@87:         raise ValueError('fname must be a string or file handle')
Chris@87:     if return_opened:
Chris@87:         return fhd, opened
Chris@87:     return fhd
Chris@87: 
Chris@87: 
Chris@87: def has_nested_fields(ndtype):
Chris@87:     """
Chris@87:     Returns whether one or several fields of a dtype are nested.
Chris@87: 
Chris@87:     Parameters
Chris@87:     ----------
Chris@87:     ndtype : dtype
Chris@87:         Data-type of a structured array.
Chris@87: 
Chris@87:     Raises
Chris@87:     ------
Chris@87:     AttributeError
Chris@87:         If `ndtype` does not have a `names` attribute.
Chris@87: 
Chris@87:     Examples
Chris@87:     --------
Chris@87:     >>> dt = np.dtype([('name', 'S4'), ('x', float), ('y', float)])
Chris@87:     >>> np.lib._iotools.has_nested_fields(dt)
Chris@87:     False
Chris@87: 
Chris@87:     """
Chris@87:     for name in ndtype.names or ():
Chris@87:         if ndtype[name].names:
Chris@87:             return True
Chris@87:     return False
Chris@87: 
Chris@87: 
Chris@87: def flatten_dtype(ndtype, flatten_base=False):
Chris@87:     """
Chris@87:     Unpack a structured data-type by collapsing nested fields and/or fields
Chris@87:     with a shape.
Chris@87: 
Chris@87:     Note that the field names are lost.
Chris@87: 
Chris@87:     Parameters
Chris@87:     ----------
Chris@87:     ndtype : dtype
Chris@87:         The datatype to collapse
Chris@87:     flatten_base : {False, True}, optional
Chris@87:         Whether to transform a field with a shape into several fields or not.
Chris@87: 
Chris@87:     Examples
Chris@87:     --------
Chris@87:     >>> dt = np.dtype([('name', 'S4'), ('x', float), ('y', float),
Chris@87:     ...                ('block', int, (2, 3))])
Chris@87:     >>> np.lib._iotools.flatten_dtype(dt)
Chris@87:     [dtype('|S4'), dtype('float64'), dtype('float64'), dtype('int32')]
Chris@87:     >>> np.lib._iotools.flatten_dtype(dt, flatten_base=True)
Chris@87:     [dtype('|S4'), dtype('float64'), dtype('float64'), dtype('int32'),
Chris@87:      dtype('int32'), dtype('int32'), dtype('int32'), dtype('int32'),
Chris@87:      dtype('int32')]
Chris@87: 
Chris@87:     """
Chris@87:     names = ndtype.names
Chris@87:     if names is None:
Chris@87:         if flatten_base:
Chris@87:             return [ndtype.base] * int(np.prod(ndtype.shape))
Chris@87:         return [ndtype.base]
Chris@87:     else:
Chris@87:         types = []
Chris@87:         for field in names:
Chris@87:             info = ndtype.fields[field]
Chris@87:             flat_dt = flatten_dtype(info[0], flatten_base)
Chris@87:             types.extend(flat_dt)
Chris@87:         return types
Chris@87: 
Chris@87: 
Chris@87: class LineSplitter(object):
Chris@87:     """
Chris@87:     Object to split a string at a given delimiter or at given places.
Chris@87: 
Chris@87:     Parameters
Chris@87:     ----------
Chris@87:     delimiter : str, int, or sequence of ints, optional
Chris@87:         If a string, character used to delimit consecutive fields.
Chris@87:         If an integer or a sequence of integers, width(s) of each field.
Chris@87:     comment : str, optional
Chris@87:         Character used to mark the beginning of a comment. Default is '#'.
Chris@87:     autostrip : bool, optional
Chris@87:         Whether to strip each individual field. Default is True.
Chris@87: 
Chris@87:     """
Chris@87: 
Chris@87:     def autostrip(self, method):
Chris@87:         """
Chris@87:         Wrapper to strip each member of the output of `method`.
Chris@87: 
Chris@87:         Parameters
Chris@87:         ----------
Chris@87:         method : function
Chris@87:             Function that takes a single argument and returns a sequence of
Chris@87:             strings.
Chris@87: 
Chris@87:         Returns
Chris@87:         -------
Chris@87:         wrapped : function
Chris@87:             The result of wrapping `method`. `wrapped` takes a single input
Chris@87:             argument and returns a list of strings that are stripped of
Chris@87:             white-space.
Chris@87: 
Chris@87:         """
Chris@87:         return lambda input: [_.strip() for _ in method(input)]
Chris@87:     #
Chris@87: 
Chris@87:     def __init__(self, delimiter=None, comments=asbytes('#'), autostrip=True):
Chris@87:         self.comments = comments
Chris@87:         # Delimiter is a character
Chris@87:         if isinstance(delimiter, unicode):
Chris@87:             delimiter = delimiter.encode('ascii')
Chris@87:         if (delimiter is None) or _is_bytes_like(delimiter):
Chris@87:             delimiter = delimiter or None
Chris@87:             _handyman = self._delimited_splitter
Chris@87:         # Delimiter is a list of field widths
Chris@87:         elif hasattr(delimiter, '__iter__'):
Chris@87:             _handyman = self._variablewidth_splitter
Chris@87:             idx = np.cumsum([0] + list(delimiter))
Chris@87:             delimiter = [slice(i, j) for (i, j) in zip(idx[:-1], idx[1:])]
Chris@87:         # Delimiter is a single integer
Chris@87:         elif int(delimiter):
Chris@87:             (_handyman, delimiter) = (
Chris@87:                     self._fixedwidth_splitter, int(delimiter))
Chris@87:         else:
Chris@87:             (_handyman, delimiter) = (self._delimited_splitter, None)
Chris@87:         self.delimiter = delimiter
Chris@87:         if autostrip:
Chris@87:             self._handyman = self.autostrip(_handyman)
Chris@87:         else:
Chris@87:             self._handyman = _handyman
Chris@87:     #
Chris@87: 
Chris@87:     def _delimited_splitter(self, line):
Chris@87:         if self.comments is not None:
Chris@87:             line = line.split(self.comments)[0]
Chris@87:         line = line.strip(asbytes(" \r\n"))
Chris@87:         if not line:
Chris@87:             return []
Chris@87:         return line.split(self.delimiter)
Chris@87:     #
Chris@87: 
Chris@87:     def _fixedwidth_splitter(self, line):
Chris@87:         if self.comments is not None:
Chris@87:             line = line.split(self.comments)[0]
Chris@87:         line = line.strip(asbytes("\r\n"))
Chris@87:         if not line:
Chris@87:             return []
Chris@87:         fixed = self.delimiter
Chris@87:         slices = [slice(i, i + fixed) for i in range(0, len(line), fixed)]
Chris@87:         return [line[s] for s in slices]
Chris@87:     #
Chris@87: 
Chris@87:     def _variablewidth_splitter(self, line):
Chris@87:         if self.comments is not None:
Chris@87:             line = line.split(self.comments)[0]
Chris@87:         if not line:
Chris@87:             return []
Chris@87:         slices = self.delimiter
Chris@87:         return [line[s] for s in slices]
Chris@87:     #
Chris@87: 
Chris@87:     def __call__(self, line):
Chris@87:         return self._handyman(line)
Chris@87: 
Chris@87: 
Chris@87: class NameValidator(object):
Chris@87:     """
Chris@87:     Object to validate a list of strings to use as field names.
Chris@87: 
Chris@87:     The strings are stripped of any non alphanumeric character, and spaces
Chris@87:     are replaced by '_'. During instantiation, the user can define a list
Chris@87:     of names to exclude, as well as a list of invalid characters. Names in
Chris@87:     the exclusion list are appended a '_' character.
Chris@87: 
Chris@87:     Once an instance has been created, it can be called with a list of
Chris@87:     names, and a list of valid names will be created.  The `__call__`
Chris@87:     method accepts an optional keyword "default" that sets the default name
Chris@87:     in case of ambiguity. By default this is 'f', so that names will
Chris@87:     default to `f0`, `f1`, etc.
Chris@87: 
Chris@87:     Parameters
Chris@87:     ----------
Chris@87:     excludelist : sequence, optional
Chris@87:         A list of names to exclude. This list is appended to the default
Chris@87:         list ['return', 'file', 'print']. Excluded names are appended an
Chris@87:         underscore: for example, `file` becomes `file_` if supplied.
Chris@87:     deletechars : str, optional
Chris@87:         A string combining invalid characters that must be deleted from the
Chris@87:         names.
Chris@87:     casesensitive : {True, False, 'upper', 'lower'}, optional
Chris@87:         * If True, field names are case-sensitive.
Chris@87:         * If False or 'upper', field names are converted to upper case.
Chris@87:         * If 'lower', field names are converted to lower case.
Chris@87: 
Chris@87:         The default value is True.
Chris@87:     replace_space : '_', optional
Chris@87:         Character(s) used in replacement of white spaces.
Chris@87: 
Chris@87:     Notes
Chris@87:     -----
Chris@87:     Calling an instance of `NameValidator` is the same as calling its
Chris@87:     method `validate`.
Chris@87: 
Chris@87:     Examples
Chris@87:     --------
Chris@87:     >>> validator = np.lib._iotools.NameValidator()
Chris@87:     >>> validator(['file', 'field2', 'with space', 'CaSe'])
Chris@87:     ['file_', 'field2', 'with_space', 'CaSe']
Chris@87: 
Chris@87:     >>> validator = np.lib._iotools.NameValidator(excludelist=['excl'],
Chris@87:                                                   deletechars='q',
Chris@87:                                                   case_sensitive='False')
Chris@87:     >>> validator(['excl', 'field2', 'no_q', 'with space', 'CaSe'])
Chris@87:     ['excl_', 'field2', 'no_', 'with_space', 'case']
Chris@87: 
Chris@87:     """
Chris@87:     #
Chris@87:     defaultexcludelist = ['return', 'file', 'print']
Chris@87:     defaultdeletechars = set("""~!@#$%^&*()-=+~\|]}[{';: /?.>,<""")
Chris@87:     #
Chris@87: 
Chris@87:     def __init__(self, excludelist=None, deletechars=None,
Chris@87:                  case_sensitive=None, replace_space='_'):
Chris@87:         # Process the exclusion list ..
Chris@87:         if excludelist is None:
Chris@87:             excludelist = []
Chris@87:         excludelist.extend(self.defaultexcludelist)
Chris@87:         self.excludelist = excludelist
Chris@87:         # Process the list of characters to delete
Chris@87:         if deletechars is None:
Chris@87:             delete = self.defaultdeletechars
Chris@87:         else:
Chris@87:             delete = set(deletechars)
Chris@87:         delete.add('"')
Chris@87:         self.deletechars = delete
Chris@87:         # Process the case option .....
Chris@87:         if (case_sensitive is None) or (case_sensitive is True):
Chris@87:             self.case_converter = lambda x: x
Chris@87:         elif (case_sensitive is False) or ('u' in case_sensitive):
Chris@87:             self.case_converter = lambda x: x.upper()
Chris@87:         elif 'l' in case_sensitive:
Chris@87:             self.case_converter = lambda x: x.lower()
Chris@87:         else:
Chris@87:             self.case_converter = lambda x: x
Chris@87:         #
Chris@87:         self.replace_space = replace_space
Chris@87: 
Chris@87:     def validate(self, names, defaultfmt="f%i", nbfields=None):
Chris@87:         """
Chris@87:         Validate a list of strings as field names for a structured array.
Chris@87: 
Chris@87:         Parameters
Chris@87:         ----------
Chris@87:         names : sequence of str
Chris@87:             Strings to be validated.
Chris@87:         defaultfmt : str, optional
Chris@87:             Default format string, used if validating a given string
Chris@87:             reduces its length to zero.
Chris@87:         nboutput : integer, optional
Chris@87:             Final number of validated names, used to expand or shrink the
Chris@87:             initial list of names.
Chris@87: 
Chris@87:         Returns
Chris@87:         -------
Chris@87:         validatednames : list of str
Chris@87:             The list of validated field names.
Chris@87: 
Chris@87:         Notes
Chris@87:         -----
Chris@87:         A `NameValidator` instance can be called directly, which is the
Chris@87:         same as calling `validate`. For examples, see `NameValidator`.
Chris@87: 
Chris@87:         """
Chris@87:         # Initial checks ..............
Chris@87:         if (names is None):
Chris@87:             if (nbfields is None):
Chris@87:                 return None
Chris@87:             names = []
Chris@87:         if isinstance(names, basestring):
Chris@87:             names = [names, ]
Chris@87:         if nbfields is not None:
Chris@87:             nbnames = len(names)
Chris@87:             if (nbnames < nbfields):
Chris@87:                 names = list(names) + [''] * (nbfields - nbnames)
Chris@87:             elif (nbnames > nbfields):
Chris@87:                 names = names[:nbfields]
Chris@87:         # Set some shortcuts ...........
Chris@87:         deletechars = self.deletechars
Chris@87:         excludelist = self.excludelist
Chris@87:         case_converter = self.case_converter
Chris@87:         replace_space = self.replace_space
Chris@87:         # Initializes some variables ...
Chris@87:         validatednames = []
Chris@87:         seen = dict()
Chris@87:         nbempty = 0
Chris@87:         #
Chris@87:         for item in names:
Chris@87:             item = case_converter(item).strip()
Chris@87:             if replace_space:
Chris@87:                 item = item.replace(' ', replace_space)
Chris@87:             item = ''.join([c for c in item if c not in deletechars])
Chris@87:             if item == '':
Chris@87:                 item = defaultfmt % nbempty
Chris@87:                 while item in names:
Chris@87:                     nbempty += 1
Chris@87:                     item = defaultfmt % nbempty
Chris@87:                 nbempty += 1
Chris@87:             elif item in excludelist:
Chris@87:                 item += '_'
Chris@87:             cnt = seen.get(item, 0)
Chris@87:             if cnt > 0:
Chris@87:                 validatednames.append(item + '_%d' % cnt)
Chris@87:             else:
Chris@87:                 validatednames.append(item)
Chris@87:             seen[item] = cnt + 1
Chris@87:         return tuple(validatednames)
Chris@87:     #
Chris@87: 
Chris@87:     def __call__(self, names, defaultfmt="f%i", nbfields=None):
Chris@87:         return self.validate(names, defaultfmt=defaultfmt, nbfields=nbfields)
Chris@87: 
Chris@87: 
Chris@87: def str2bool(value):
Chris@87:     """
Chris@87:     Tries to transform a string supposed to represent a boolean to a boolean.
Chris@87: 
Chris@87:     Parameters
Chris@87:     ----------
Chris@87:     value : str
Chris@87:         The string that is transformed to a boolean.
Chris@87: 
Chris@87:     Returns
Chris@87:     -------
Chris@87:     boolval : bool
Chris@87:         The boolean representation of `value`.
Chris@87: 
Chris@87:     Raises
Chris@87:     ------
Chris@87:     ValueError
Chris@87:         If the string is not 'True' or 'False' (case independent)
Chris@87: 
Chris@87:     Examples
Chris@87:     --------
Chris@87:     >>> np.lib._iotools.str2bool('TRUE')
Chris@87:     True
Chris@87:     >>> np.lib._iotools.str2bool('false')
Chris@87:     False
Chris@87: 
Chris@87:     """
Chris@87:     value = value.upper()
Chris@87:     if value == asbytes('TRUE'):
Chris@87:         return True
Chris@87:     elif value == asbytes('FALSE'):
Chris@87:         return False
Chris@87:     else:
Chris@87:         raise ValueError("Invalid boolean")
Chris@87: 
Chris@87: 
Chris@87: class ConverterError(Exception):
Chris@87:     """
Chris@87:     Exception raised when an error occurs in a converter for string values.
Chris@87: 
Chris@87:     """
Chris@87:     pass
Chris@87: 
Chris@87: class ConverterLockError(ConverterError):
Chris@87:     """
Chris@87:     Exception raised when an attempt is made to upgrade a locked converter.
Chris@87: 
Chris@87:     """
Chris@87:     pass
Chris@87: 
Chris@87: class ConversionWarning(UserWarning):
Chris@87:     """
Chris@87:     Warning issued when a string converter has a problem.
Chris@87: 
Chris@87:     Notes
Chris@87:     -----
Chris@87:     In `genfromtxt` a `ConversionWarning` is issued if raising exceptions
Chris@87:     is explicitly suppressed with the "invalid_raise" keyword.
Chris@87: 
Chris@87:     """
Chris@87:     pass
Chris@87: 
Chris@87: 
Chris@87: class StringConverter(object):
Chris@87:     """
Chris@87:     Factory class for function transforming a string into another object
Chris@87:     (int, float).
Chris@87: 
Chris@87:     After initialization, an instance can be called to transform a string
Chris@87:     into another object. If the string is recognized as representing a
Chris@87:     missing value, a default value is returned.
Chris@87: 
Chris@87:     Attributes
Chris@87:     ----------
Chris@87:     func : function
Chris@87:         Function used for the conversion.
Chris@87:     default : any
Chris@87:         Default value to return when the input corresponds to a missing
Chris@87:         value.
Chris@87:     type : type
Chris@87:         Type of the output.
Chris@87:     _status : int
Chris@87:         Integer representing the order of the conversion.
Chris@87:     _mapper : sequence of tuples
Chris@87:         Sequence of tuples (dtype, function, default value) to evaluate in
Chris@87:         order.
Chris@87:     _locked : bool
Chris@87:         Holds `locked` parameter.
Chris@87: 
Chris@87:     Parameters
Chris@87:     ----------
Chris@87:     dtype_or_func : {None, dtype, function}, optional
Chris@87:         If a `dtype`, specifies the input data type, used to define a basic
Chris@87:         function and a default value for missing data. For example, when
Chris@87:         `dtype` is float, the `func` attribute is set to `float` and the
Chris@87:         default value to `np.nan`.  If a function, this function is used to
Chris@87:         convert a string to another object. In this case, it is recommended
Chris@87:         to give an associated default value as input.
Chris@87:     default : any, optional
Chris@87:         Value to return by default, that is, when the string to be
Chris@87:         converted is flagged as missing. If not given, `StringConverter`
Chris@87:         tries to supply a reasonable default value.
Chris@87:     missing_values : sequence of str, optional
Chris@87:         Sequence of strings indicating a missing value.
Chris@87:     locked : bool, optional
Chris@87:         Whether the StringConverter should be locked to prevent automatic
Chris@87:         upgrade or not. Default is False.
Chris@87: 
Chris@87:     """
Chris@87:     #
Chris@87:     _mapper = [(nx.bool_, str2bool, False),
Chris@87:                (nx.integer, int, -1),
Chris@87:                (nx.floating, float, nx.nan),
Chris@87:                (complex, _bytes_to_complex, nx.nan + 0j),
Chris@87:                (nx.string_, bytes, asbytes('???'))]
Chris@87:     (_defaulttype, _defaultfunc, _defaultfill) = zip(*_mapper)
Chris@87:     #
Chris@87: 
Chris@87:     @classmethod
Chris@87:     def _getdtype(cls, val):
Chris@87:         """Returns the dtype of the input variable."""
Chris@87:         return np.array(val).dtype
Chris@87:     #
Chris@87: 
Chris@87:     @classmethod
Chris@87:     def _getsubdtype(cls, val):
Chris@87:         """Returns the type of the dtype of the input variable."""
Chris@87:         return np.array(val).dtype.type
Chris@87:     #
Chris@87:     # This is a bit annoying. We want to return the "general" type in most
Chris@87:     # cases (ie. "string" rather than "S10"), but we want to return the
Chris@87:     # specific type for datetime64 (ie. "datetime64[us]" rather than
Chris@87:     # "datetime64").
Chris@87: 
Chris@87:     @classmethod
Chris@87:     def _dtypeortype(cls, dtype):
Chris@87:         """Returns dtype for datetime64 and type of dtype otherwise."""
Chris@87:         if dtype.type == np.datetime64:
Chris@87:             return dtype
Chris@87:         return dtype.type
Chris@87:     #
Chris@87: 
Chris@87:     @classmethod
Chris@87:     def upgrade_mapper(cls, func, default=None):
Chris@87:         """
Chris@87:     Upgrade the mapper of a StringConverter by adding a new function and
Chris@87:     its corresponding default.
Chris@87: 
Chris@87:     The input function (or sequence of functions) and its associated
Chris@87:     default value (if any) is inserted in penultimate position of the
Chris@87:     mapper.  The corresponding type is estimated from the dtype of the
Chris@87:     default value.
Chris@87: 
Chris@87:     Parameters
Chris@87:     ----------
Chris@87:     func : var
Chris@87:         Function, or sequence of functions
Chris@87: 
Chris@87:     Examples
Chris@87:     --------
Chris@87:     >>> import dateutil.parser
Chris@87:     >>> import datetime
Chris@87:     >>> dateparser = datetustil.parser.parse
Chris@87:     >>> defaultdate = datetime.date(2000, 1, 1)
Chris@87:     >>> StringConverter.upgrade_mapper(dateparser, default=defaultdate)
Chris@87:         """
Chris@87:         # Func is a single functions
Chris@87:         if hasattr(func, '__call__'):
Chris@87:             cls._mapper.insert(-1, (cls._getsubdtype(default), func, default))
Chris@87:             return
Chris@87:         elif hasattr(func, '__iter__'):
Chris@87:             if isinstance(func[0], (tuple, list)):
Chris@87:                 for _ in func:
Chris@87:                     cls._mapper.insert(-1, _)
Chris@87:                 return
Chris@87:             if default is None:
Chris@87:                 default = [None] * len(func)
Chris@87:             else:
Chris@87:                 default = list(default)
Chris@87:                 default.append([None] * (len(func) - len(default)))
Chris@87:             for (fct, dft) in zip(func, default):
Chris@87:                 cls._mapper.insert(-1, (cls._getsubdtype(dft), fct, dft))
Chris@87:     #
Chris@87: 
Chris@87:     def __init__(self, dtype_or_func=None, default=None, missing_values=None,
Chris@87:                  locked=False):
Chris@87:         # Convert unicode (for Py3)
Chris@87:         if isinstance(missing_values, unicode):
Chris@87:             missing_values = asbytes(missing_values)
Chris@87:         elif isinstance(missing_values, (list, tuple)):
Chris@87:             missing_values = asbytes_nested(missing_values)
Chris@87:         # Defines a lock for upgrade
Chris@87:         self._locked = bool(locked)
Chris@87:         # No input dtype: minimal initialization
Chris@87:         if dtype_or_func is None:
Chris@87:             self.func = str2bool
Chris@87:             self._status = 0
Chris@87:             self.default = default or False
Chris@87:             dtype = np.dtype('bool')
Chris@87:         else:
Chris@87:             # Is the input a np.dtype ?
Chris@87:             try:
Chris@87:                 self.func = None
Chris@87:                 dtype = np.dtype(dtype_or_func)
Chris@87:             except TypeError:
Chris@87:                 # dtype_or_func must be a function, then
Chris@87:                 if not hasattr(dtype_or_func, '__call__'):
Chris@87:                     errmsg = ("The input argument `dtype` is neither a"
Chris@87:                               " function nor a dtype (got '%s' instead)")
Chris@87:                     raise TypeError(errmsg % type(dtype_or_func))
Chris@87:                 # Set the function
Chris@87:                 self.func = dtype_or_func
Chris@87:                 # If we don't have a default, try to guess it or set it to
Chris@87:                 # None
Chris@87:                 if default is None:
Chris@87:                     try:
Chris@87:                         default = self.func(asbytes('0'))
Chris@87:                     except ValueError:
Chris@87:                         default = None
Chris@87:                 dtype = self._getdtype(default)
Chris@87:             # Set the status according to the dtype
Chris@87:             _status = -1
Chris@87:             for (i, (deftype, func, default_def)) in enumerate(self._mapper):
Chris@87:                 if np.issubdtype(dtype.type, deftype):
Chris@87:                     _status = i
Chris@87:                     if default is None:
Chris@87:                         self.default = default_def
Chris@87:                     else:
Chris@87:                         self.default = default
Chris@87:                     break
Chris@87:             if _status == -1:
Chris@87:                 # We never found a match in the _mapper...
Chris@87:                 _status = 0
Chris@87:                 self.default = default
Chris@87:             self._status = _status
Chris@87:             # If the input was a dtype, set the function to the last we saw
Chris@87:             if self.func is None:
Chris@87:                 self.func = func
Chris@87:             # If the status is 1 (int), change the function to
Chris@87:             # something more robust.
Chris@87:             if self.func == self._mapper[1][1]:
Chris@87:                 if issubclass(dtype.type, np.uint64):
Chris@87:                     self.func = np.uint64
Chris@87:                 elif issubclass(dtype.type, np.int64):
Chris@87:                     self.func = np.int64
Chris@87:                 else:
Chris@87:                     self.func = lambda x: int(float(x))
Chris@87:         # Store the list of strings corresponding to missing values.
Chris@87:         if missing_values is None:
Chris@87:             self.missing_values = set([asbytes('')])
Chris@87:         else:
Chris@87:             if isinstance(missing_values, bytes):
Chris@87:                 missing_values = missing_values.split(asbytes(","))
Chris@87:             self.missing_values = set(list(missing_values) + [asbytes('')])
Chris@87:         #
Chris@87:         self._callingfunction = self._strict_call
Chris@87:         self.type = self._dtypeortype(dtype)
Chris@87:         self._checked = False
Chris@87:         self._initial_default = default
Chris@87:     #
Chris@87: 
Chris@87:     def _loose_call(self, value):
Chris@87:         try:
Chris@87:             return self.func(value)
Chris@87:         except ValueError:
Chris@87:             return self.default
Chris@87:     #
Chris@87: 
Chris@87:     def _strict_call(self, value):
Chris@87:         try:
Chris@87:             return self.func(value)
Chris@87:         except ValueError:
Chris@87:             if value.strip() in self.missing_values:
Chris@87:                 if not self._status:
Chris@87:                     self._checked = False
Chris@87:                 return self.default
Chris@87:             raise ValueError("Cannot convert string '%s'" % value)
Chris@87:     #
Chris@87: 
Chris@87:     def __call__(self, value):
Chris@87:         return self._callingfunction(value)
Chris@87:     #
Chris@87: 
Chris@87:     def upgrade(self, value):
Chris@87:         """
Chris@87:         Find the best converter for a given string, and return the result.
Chris@87: 
Chris@87:         The supplied string `value` is converted by testing different
Chris@87:         converters in order. First the `func` method of the
Chris@87:         `StringConverter` instance is tried, if this fails other available
Chris@87:         converters are tried.  The order in which these other converters
Chris@87:         are tried is determined by the `_status` attribute of the instance.
Chris@87: 
Chris@87:         Parameters
Chris@87:         ----------
Chris@87:         value : str
Chris@87:             The string to convert.
Chris@87: 
Chris@87:         Returns
Chris@87:         -------
Chris@87:         out : any
Chris@87:             The result of converting `value` with the appropriate converter.
Chris@87: 
Chris@87:         """
Chris@87:         self._checked = True
Chris@87:         try:
Chris@87:             self._strict_call(value)
Chris@87:         except ValueError:
Chris@87:             # Raise an exception if we locked the converter...
Chris@87:             if self._locked:
Chris@87:                 errmsg = "Converter is locked and cannot be upgraded"
Chris@87:                 raise ConverterLockError(errmsg)
Chris@87:             _statusmax = len(self._mapper)
Chris@87:             # Complains if we try to upgrade by the maximum
Chris@87:             _status = self._status
Chris@87:             if _status == _statusmax:
Chris@87:                 errmsg = "Could not find a valid conversion function"
Chris@87:                 raise ConverterError(errmsg)
Chris@87:             elif _status < _statusmax - 1:
Chris@87:                 _status += 1
Chris@87:             (self.type, self.func, default) = self._mapper[_status]
Chris@87:             self._status = _status
Chris@87:             if self._initial_default is not None:
Chris@87:                 self.default = self._initial_default
Chris@87:             else:
Chris@87:                 self.default = default
Chris@87:             self.upgrade(value)
Chris@87: 
Chris@87:     def iterupgrade(self, value):
Chris@87:         self._checked = True
Chris@87:         if not hasattr(value, '__iter__'):
Chris@87:             value = (value,)
Chris@87:         _strict_call = self._strict_call
Chris@87:         try:
Chris@87:             for _m in value:
Chris@87:                 _strict_call(_m)
Chris@87:         except ValueError:
Chris@87:             # Raise an exception if we locked the converter...
Chris@87:             if self._locked:
Chris@87:                 errmsg = "Converter is locked and cannot be upgraded"
Chris@87:                 raise ConverterLockError(errmsg)
Chris@87:             _statusmax = len(self._mapper)
Chris@87:             # Complains if we try to upgrade by the maximum
Chris@87:             _status = self._status
Chris@87:             if _status == _statusmax:
Chris@87:                 raise ConverterError(
Chris@87:                     "Could not find a valid conversion function"
Chris@87:                     )
Chris@87:             elif _status < _statusmax - 1:
Chris@87:                 _status += 1
Chris@87:             (self.type, self.func, default) = self._mapper[_status]
Chris@87:             if self._initial_default is not None:
Chris@87:                 self.default = self._initial_default
Chris@87:             else:
Chris@87:                 self.default = default
Chris@87:             self._status = _status
Chris@87:             self.iterupgrade(value)
Chris@87: 
Chris@87:     def update(self, func, default=None, testing_value=None,
Chris@87:                missing_values=asbytes(''), locked=False):
Chris@87:         """
Chris@87:         Set StringConverter attributes directly.
Chris@87: 
Chris@87:         Parameters
Chris@87:         ----------
Chris@87:         func : function
Chris@87:             Conversion function.
Chris@87:         default : any, optional
Chris@87:             Value to return by default, that is, when the string to be
Chris@87:             converted is flagged as missing. If not given,
Chris@87:             `StringConverter` tries to supply a reasonable default value.
Chris@87:         testing_value : str, optional
Chris@87:             A string representing a standard input value of the converter.
Chris@87:             This string is used to help defining a reasonable default
Chris@87:             value.
Chris@87:         missing_values : sequence of str, optional
Chris@87:             Sequence of strings indicating a missing value.
Chris@87:         locked : bool, optional
Chris@87:             Whether the StringConverter should be locked to prevent
Chris@87:             automatic upgrade or not. Default is False.
Chris@87: 
Chris@87:         Notes
Chris@87:         -----
Chris@87:         `update` takes the same parameters as the constructor of
Chris@87:         `StringConverter`, except that `func` does not accept a `dtype`
Chris@87:         whereas `dtype_or_func` in the constructor does.
Chris@87: 
Chris@87:         """
Chris@87:         self.func = func
Chris@87:         self._locked = locked
Chris@87:         # Don't reset the default to None if we can avoid it
Chris@87:         if default is not None:
Chris@87:             self.default = default
Chris@87:             self.type = self._dtypeortype(self._getdtype(default))
Chris@87:         else:
Chris@87:             try:
Chris@87:                 tester = func(testing_value or asbytes('1'))
Chris@87:             except (TypeError, ValueError):
Chris@87:                 tester = None
Chris@87:             self.type = self._dtypeortype(self._getdtype(tester))
Chris@87:         # Add the missing values to the existing set
Chris@87:         if missing_values is not None:
Chris@87:             if _is_bytes_like(missing_values):
Chris@87:                 self.missing_values.add(missing_values)
Chris@87:             elif hasattr(missing_values, '__iter__'):
Chris@87:                 for val in missing_values:
Chris@87:                     self.missing_values.add(val)
Chris@87:         else:
Chris@87:             self.missing_values = []
Chris@87: 
Chris@87: 
Chris@87: def easy_dtype(ndtype, names=None, defaultfmt="f%i", **validationargs):
Chris@87:     """
Chris@87:     Convenience function to create a `np.dtype` object.
Chris@87: 
Chris@87:     The function processes the input `dtype` and matches it with the given
Chris@87:     names.
Chris@87: 
Chris@87:     Parameters
Chris@87:     ----------
Chris@87:     ndtype : var
Chris@87:         Definition of the dtype. Can be any string or dictionary recognized
Chris@87:         by the `np.dtype` function, or a sequence of types.
Chris@87:     names : str or sequence, optional
Chris@87:         Sequence of strings to use as field names for a structured dtype.
Chris@87:         For convenience, `names` can be a string of a comma-separated list
Chris@87:         of names.
Chris@87:     defaultfmt : str, optional
Chris@87:         Format string used to define missing names, such as ``"f%i"``
Chris@87:         (default) or ``"fields_%02i"``.
Chris@87:     validationargs : optional
Chris@87:         A series of optional arguments used to initialize a
Chris@87:         `NameValidator`.
Chris@87: 
Chris@87:     Examples
Chris@87:     --------
Chris@87:     >>> np.lib._iotools.easy_dtype(float)
Chris@87:     dtype('float64')
Chris@87:     >>> np.lib._iotools.easy_dtype("i4, f8")
Chris@87:     dtype([('f0', '<i4'), ('f1', '<f8')])
Chris@87:     >>> np.lib._iotools.easy_dtype("i4, f8", defaultfmt="field_%03i")
Chris@87:     dtype([('field_000', '<i4'), ('field_001', '<f8')])
Chris@87: 
Chris@87:     >>> np.lib._iotools.easy_dtype((int, float, float), names="a,b,c")
Chris@87:     dtype([('a', '<i8'), ('b', '<f8'), ('c', '<f8')])
Chris@87:     >>> np.lib._iotools.easy_dtype(float, names="a,b,c")
Chris@87:     dtype([('a', '<f8'), ('b', '<f8'), ('c', '<f8')])
Chris@87: 
Chris@87:     """
Chris@87:     try:
Chris@87:         ndtype = np.dtype(ndtype)
Chris@87:     except TypeError:
Chris@87:         validate = NameValidator(**validationargs)
Chris@87:         nbfields = len(ndtype)
Chris@87:         if names is None:
Chris@87:             names = [''] * len(ndtype)
Chris@87:         elif isinstance(names, basestring):
Chris@87:             names = names.split(",")
Chris@87:         names = validate(names, nbfields=nbfields, defaultfmt=defaultfmt)
Chris@87:         ndtype = np.dtype(dict(formats=ndtype, names=names))
Chris@87:     else:
Chris@87:         nbtypes = len(ndtype)
Chris@87:         # Explicit names
Chris@87:         if names is not None:
Chris@87:             validate = NameValidator(**validationargs)
Chris@87:             if isinstance(names, basestring):
Chris@87:                 names = names.split(",")
Chris@87:             # Simple dtype: repeat to match the nb of names
Chris@87:             if nbtypes == 0:
Chris@87:                 formats = tuple([ndtype.type] * len(names))
Chris@87:                 names = validate(names, defaultfmt=defaultfmt)
Chris@87:                 ndtype = np.dtype(list(zip(names, formats)))
Chris@87:             # Structured dtype: just validate the names as needed
Chris@87:             else:
Chris@87:                 ndtype.names = validate(names, nbfields=nbtypes,
Chris@87:                                         defaultfmt=defaultfmt)
Chris@87:         # No implicit names
Chris@87:         elif (nbtypes > 0):
Chris@87:             validate = NameValidator(**validationargs)
Chris@87:             # Default initial names : should we change the format ?
Chris@87:             if ((ndtype.names == tuple("f%i" % i for i in range(nbtypes))) and
Chris@87:                     (defaultfmt != "f%i")):
Chris@87:                 ndtype.names = validate([''] * nbtypes, defaultfmt=defaultfmt)
Chris@87:             # Explicit initial names : just validate
Chris@87:             else:
Chris@87:                 ndtype.names = validate(ndtype.names, defaultfmt=defaultfmt)
Chris@87:     return ndtype