Chris@87: """A collection of functions designed to help I/O with ascii files. Chris@87: Chris@87: """ Chris@87: from __future__ import division, absolute_import, print_function Chris@87: Chris@87: __docformat__ = "restructuredtext en" Chris@87: Chris@87: import sys Chris@87: import numpy as np Chris@87: import numpy.core.numeric as nx Chris@87: from numpy.compat import asbytes, bytes, asbytes_nested, basestring Chris@87: Chris@87: if sys.version_info[0] >= 3: Chris@87: from builtins import bool, int, float, complex, object, str Chris@87: unicode = str Chris@87: else: Chris@87: from __builtin__ import bool, int, float, complex, object, unicode, str Chris@87: Chris@87: Chris@87: if sys.version_info[0] >= 3: Chris@87: def _bytes_to_complex(s): Chris@87: return complex(s.decode('ascii')) Chris@87: Chris@87: def _bytes_to_name(s): Chris@87: return s.decode('ascii') Chris@87: else: Chris@87: _bytes_to_complex = complex Chris@87: _bytes_to_name = str Chris@87: Chris@87: def _is_string_like(obj): Chris@87: """ Chris@87: Check whether obj behaves like a string. Chris@87: """ Chris@87: try: Chris@87: obj + '' Chris@87: except (TypeError, ValueError): Chris@87: return False Chris@87: return True Chris@87: Chris@87: def _is_bytes_like(obj): Chris@87: """ Chris@87: Check whether obj behaves like a bytes object. Chris@87: """ Chris@87: try: Chris@87: obj + asbytes('') Chris@87: except (TypeError, ValueError): Chris@87: return False Chris@87: return True Chris@87: Chris@87: Chris@87: def _to_filehandle(fname, flag='r', return_opened=False): Chris@87: """ Chris@87: Returns the filehandle corresponding to a string or a file. Chris@87: If the string ends in '.gz', the file is automatically unzipped. Chris@87: Chris@87: Parameters Chris@87: ---------- Chris@87: fname : string, filehandle Chris@87: Name of the file whose filehandle must be returned. Chris@87: flag : string, optional Chris@87: Flag indicating the status of the file ('r' for read, 'w' for write). Chris@87: return_opened : boolean, optional Chris@87: Whether to return the opening status of the file. Chris@87: """ Chris@87: if _is_string_like(fname): Chris@87: if fname.endswith('.gz'): Chris@87: import gzip Chris@87: fhd = gzip.open(fname, flag) Chris@87: elif fname.endswith('.bz2'): Chris@87: import bz2 Chris@87: fhd = bz2.BZ2File(fname) Chris@87: else: Chris@87: fhd = file(fname, flag) Chris@87: opened = True Chris@87: elif hasattr(fname, 'seek'): Chris@87: fhd = fname Chris@87: opened = False Chris@87: else: Chris@87: raise ValueError('fname must be a string or file handle') Chris@87: if return_opened: Chris@87: return fhd, opened Chris@87: return fhd Chris@87: Chris@87: Chris@87: def has_nested_fields(ndtype): Chris@87: """ Chris@87: Returns whether one or several fields of a dtype are nested. Chris@87: Chris@87: Parameters Chris@87: ---------- Chris@87: ndtype : dtype Chris@87: Data-type of a structured array. Chris@87: Chris@87: Raises Chris@87: ------ Chris@87: AttributeError Chris@87: If `ndtype` does not have a `names` attribute. Chris@87: Chris@87: Examples Chris@87: -------- Chris@87: >>> dt = np.dtype([('name', 'S4'), ('x', float), ('y', float)]) Chris@87: >>> np.lib._iotools.has_nested_fields(dt) Chris@87: False Chris@87: Chris@87: """ Chris@87: for name in ndtype.names or (): Chris@87: if ndtype[name].names: Chris@87: return True Chris@87: return False Chris@87: Chris@87: Chris@87: def flatten_dtype(ndtype, flatten_base=False): Chris@87: """ Chris@87: Unpack a structured data-type by collapsing nested fields and/or fields Chris@87: with a shape. Chris@87: Chris@87: Note that the field names are lost. Chris@87: Chris@87: Parameters Chris@87: ---------- Chris@87: ndtype : dtype Chris@87: The datatype to collapse Chris@87: flatten_base : {False, True}, optional Chris@87: Whether to transform a field with a shape into several fields or not. Chris@87: Chris@87: Examples Chris@87: -------- Chris@87: >>> dt = np.dtype([('name', 'S4'), ('x', float), ('y', float), Chris@87: ... ('block', int, (2, 3))]) Chris@87: >>> np.lib._iotools.flatten_dtype(dt) Chris@87: [dtype('|S4'), dtype('float64'), dtype('float64'), dtype('int32')] Chris@87: >>> np.lib._iotools.flatten_dtype(dt, flatten_base=True) Chris@87: [dtype('|S4'), dtype('float64'), dtype('float64'), dtype('int32'), Chris@87: dtype('int32'), dtype('int32'), dtype('int32'), dtype('int32'), Chris@87: dtype('int32')] Chris@87: Chris@87: """ Chris@87: names = ndtype.names Chris@87: if names is None: Chris@87: if flatten_base: Chris@87: return [ndtype.base] * int(np.prod(ndtype.shape)) Chris@87: return [ndtype.base] Chris@87: else: Chris@87: types = [] Chris@87: for field in names: Chris@87: info = ndtype.fields[field] Chris@87: flat_dt = flatten_dtype(info[0], flatten_base) Chris@87: types.extend(flat_dt) Chris@87: return types Chris@87: Chris@87: Chris@87: class LineSplitter(object): Chris@87: """ Chris@87: Object to split a string at a given delimiter or at given places. Chris@87: Chris@87: Parameters Chris@87: ---------- Chris@87: delimiter : str, int, or sequence of ints, optional Chris@87: If a string, character used to delimit consecutive fields. Chris@87: If an integer or a sequence of integers, width(s) of each field. Chris@87: comment : str, optional Chris@87: Character used to mark the beginning of a comment. Default is '#'. Chris@87: autostrip : bool, optional Chris@87: Whether to strip each individual field. Default is True. Chris@87: Chris@87: """ Chris@87: Chris@87: def autostrip(self, method): Chris@87: """ Chris@87: Wrapper to strip each member of the output of `method`. Chris@87: Chris@87: Parameters Chris@87: ---------- Chris@87: method : function Chris@87: Function that takes a single argument and returns a sequence of Chris@87: strings. Chris@87: Chris@87: Returns Chris@87: ------- Chris@87: wrapped : function Chris@87: The result of wrapping `method`. `wrapped` takes a single input Chris@87: argument and returns a list of strings that are stripped of Chris@87: white-space. Chris@87: Chris@87: """ Chris@87: return lambda input: [_.strip() for _ in method(input)] Chris@87: # Chris@87: Chris@87: def __init__(self, delimiter=None, comments=asbytes('#'), autostrip=True): Chris@87: self.comments = comments Chris@87: # Delimiter is a character Chris@87: if isinstance(delimiter, unicode): Chris@87: delimiter = delimiter.encode('ascii') Chris@87: if (delimiter is None) or _is_bytes_like(delimiter): Chris@87: delimiter = delimiter or None Chris@87: _handyman = self._delimited_splitter Chris@87: # Delimiter is a list of field widths Chris@87: elif hasattr(delimiter, '__iter__'): Chris@87: _handyman = self._variablewidth_splitter Chris@87: idx = np.cumsum([0] + list(delimiter)) Chris@87: delimiter = [slice(i, j) for (i, j) in zip(idx[:-1], idx[1:])] Chris@87: # Delimiter is a single integer Chris@87: elif int(delimiter): Chris@87: (_handyman, delimiter) = ( Chris@87: self._fixedwidth_splitter, int(delimiter)) Chris@87: else: Chris@87: (_handyman, delimiter) = (self._delimited_splitter, None) Chris@87: self.delimiter = delimiter Chris@87: if autostrip: Chris@87: self._handyman = self.autostrip(_handyman) Chris@87: else: Chris@87: self._handyman = _handyman Chris@87: # Chris@87: Chris@87: def _delimited_splitter(self, line): Chris@87: if self.comments is not None: Chris@87: line = line.split(self.comments)[0] Chris@87: line = line.strip(asbytes(" \r\n")) Chris@87: if not line: Chris@87: return [] Chris@87: return line.split(self.delimiter) Chris@87: # Chris@87: Chris@87: def _fixedwidth_splitter(self, line): Chris@87: if self.comments is not None: Chris@87: line = line.split(self.comments)[0] Chris@87: line = line.strip(asbytes("\r\n")) Chris@87: if not line: Chris@87: return [] Chris@87: fixed = self.delimiter Chris@87: slices = [slice(i, i + fixed) for i in range(0, len(line), fixed)] Chris@87: return [line[s] for s in slices] Chris@87: # Chris@87: Chris@87: def _variablewidth_splitter(self, line): Chris@87: if self.comments is not None: Chris@87: line = line.split(self.comments)[0] Chris@87: if not line: Chris@87: return [] Chris@87: slices = self.delimiter Chris@87: return [line[s] for s in slices] Chris@87: # Chris@87: Chris@87: def __call__(self, line): Chris@87: return self._handyman(line) Chris@87: Chris@87: Chris@87: class NameValidator(object): Chris@87: """ Chris@87: Object to validate a list of strings to use as field names. Chris@87: Chris@87: The strings are stripped of any non alphanumeric character, and spaces Chris@87: are replaced by '_'. During instantiation, the user can define a list Chris@87: of names to exclude, as well as a list of invalid characters. Names in Chris@87: the exclusion list are appended a '_' character. Chris@87: Chris@87: Once an instance has been created, it can be called with a list of Chris@87: names, and a list of valid names will be created. The `__call__` Chris@87: method accepts an optional keyword "default" that sets the default name Chris@87: in case of ambiguity. By default this is 'f', so that names will Chris@87: default to `f0`, `f1`, etc. Chris@87: Chris@87: Parameters Chris@87: ---------- Chris@87: excludelist : sequence, optional Chris@87: A list of names to exclude. This list is appended to the default Chris@87: list ['return', 'file', 'print']. Excluded names are appended an Chris@87: underscore: for example, `file` becomes `file_` if supplied. Chris@87: deletechars : str, optional Chris@87: A string combining invalid characters that must be deleted from the Chris@87: names. Chris@87: casesensitive : {True, False, 'upper', 'lower'}, optional Chris@87: * If True, field names are case-sensitive. Chris@87: * If False or 'upper', field names are converted to upper case. Chris@87: * If 'lower', field names are converted to lower case. Chris@87: Chris@87: The default value is True. Chris@87: replace_space : '_', optional Chris@87: Character(s) used in replacement of white spaces. Chris@87: Chris@87: Notes Chris@87: ----- Chris@87: Calling an instance of `NameValidator` is the same as calling its Chris@87: method `validate`. Chris@87: Chris@87: Examples Chris@87: -------- Chris@87: >>> validator = np.lib._iotools.NameValidator() Chris@87: >>> validator(['file', 'field2', 'with space', 'CaSe']) Chris@87: ['file_', 'field2', 'with_space', 'CaSe'] Chris@87: Chris@87: >>> validator = np.lib._iotools.NameValidator(excludelist=['excl'], Chris@87: deletechars='q', Chris@87: case_sensitive='False') Chris@87: >>> validator(['excl', 'field2', 'no_q', 'with space', 'CaSe']) Chris@87: ['excl_', 'field2', 'no_', 'with_space', 'case'] Chris@87: Chris@87: """ Chris@87: # Chris@87: defaultexcludelist = ['return', 'file', 'print'] Chris@87: defaultdeletechars = set("""~!@#$%^&*()-=+~\|]}[{';: /?.>,<""") Chris@87: # Chris@87: Chris@87: def __init__(self, excludelist=None, deletechars=None, Chris@87: case_sensitive=None, replace_space='_'): Chris@87: # Process the exclusion list .. Chris@87: if excludelist is None: Chris@87: excludelist = [] Chris@87: excludelist.extend(self.defaultexcludelist) Chris@87: self.excludelist = excludelist Chris@87: # Process the list of characters to delete Chris@87: if deletechars is None: Chris@87: delete = self.defaultdeletechars Chris@87: else: Chris@87: delete = set(deletechars) Chris@87: delete.add('"') Chris@87: self.deletechars = delete Chris@87: # Process the case option ..... Chris@87: if (case_sensitive is None) or (case_sensitive is True): Chris@87: self.case_converter = lambda x: x Chris@87: elif (case_sensitive is False) or ('u' in case_sensitive): Chris@87: self.case_converter = lambda x: x.upper() Chris@87: elif 'l' in case_sensitive: Chris@87: self.case_converter = lambda x: x.lower() Chris@87: else: Chris@87: self.case_converter = lambda x: x Chris@87: # Chris@87: self.replace_space = replace_space Chris@87: Chris@87: def validate(self, names, defaultfmt="f%i", nbfields=None): Chris@87: """ Chris@87: Validate a list of strings as field names for a structured array. Chris@87: Chris@87: Parameters Chris@87: ---------- Chris@87: names : sequence of str Chris@87: Strings to be validated. Chris@87: defaultfmt : str, optional Chris@87: Default format string, used if validating a given string Chris@87: reduces its length to zero. Chris@87: nboutput : integer, optional Chris@87: Final number of validated names, used to expand or shrink the Chris@87: initial list of names. Chris@87: Chris@87: Returns Chris@87: ------- Chris@87: validatednames : list of str Chris@87: The list of validated field names. Chris@87: Chris@87: Notes Chris@87: ----- Chris@87: A `NameValidator` instance can be called directly, which is the Chris@87: same as calling `validate`. For examples, see `NameValidator`. Chris@87: Chris@87: """ Chris@87: # Initial checks .............. Chris@87: if (names is None): Chris@87: if (nbfields is None): Chris@87: return None Chris@87: names = [] Chris@87: if isinstance(names, basestring): Chris@87: names = [names, ] Chris@87: if nbfields is not None: Chris@87: nbnames = len(names) Chris@87: if (nbnames < nbfields): Chris@87: names = list(names) + [''] * (nbfields - nbnames) Chris@87: elif (nbnames > nbfields): Chris@87: names = names[:nbfields] Chris@87: # Set some shortcuts ........... Chris@87: deletechars = self.deletechars Chris@87: excludelist = self.excludelist Chris@87: case_converter = self.case_converter Chris@87: replace_space = self.replace_space Chris@87: # Initializes some variables ... Chris@87: validatednames = [] Chris@87: seen = dict() Chris@87: nbempty = 0 Chris@87: # Chris@87: for item in names: Chris@87: item = case_converter(item).strip() Chris@87: if replace_space: Chris@87: item = item.replace(' ', replace_space) Chris@87: item = ''.join([c for c in item if c not in deletechars]) Chris@87: if item == '': Chris@87: item = defaultfmt % nbempty Chris@87: while item in names: Chris@87: nbempty += 1 Chris@87: item = defaultfmt % nbempty Chris@87: nbempty += 1 Chris@87: elif item in excludelist: Chris@87: item += '_' Chris@87: cnt = seen.get(item, 0) Chris@87: if cnt > 0: Chris@87: validatednames.append(item + '_%d' % cnt) Chris@87: else: Chris@87: validatednames.append(item) Chris@87: seen[item] = cnt + 1 Chris@87: return tuple(validatednames) Chris@87: # Chris@87: Chris@87: def __call__(self, names, defaultfmt="f%i", nbfields=None): Chris@87: return self.validate(names, defaultfmt=defaultfmt, nbfields=nbfields) Chris@87: Chris@87: Chris@87: def str2bool(value): Chris@87: """ Chris@87: Tries to transform a string supposed to represent a boolean to a boolean. Chris@87: Chris@87: Parameters Chris@87: ---------- Chris@87: value : str Chris@87: The string that is transformed to a boolean. Chris@87: Chris@87: Returns Chris@87: ------- Chris@87: boolval : bool Chris@87: The boolean representation of `value`. Chris@87: Chris@87: Raises Chris@87: ------ Chris@87: ValueError Chris@87: If the string is not 'True' or 'False' (case independent) Chris@87: Chris@87: Examples Chris@87: -------- Chris@87: >>> np.lib._iotools.str2bool('TRUE') Chris@87: True Chris@87: >>> np.lib._iotools.str2bool('false') Chris@87: False Chris@87: Chris@87: """ Chris@87: value = value.upper() Chris@87: if value == asbytes('TRUE'): Chris@87: return True Chris@87: elif value == asbytes('FALSE'): Chris@87: return False Chris@87: else: Chris@87: raise ValueError("Invalid boolean") Chris@87: Chris@87: Chris@87: class ConverterError(Exception): Chris@87: """ Chris@87: Exception raised when an error occurs in a converter for string values. Chris@87: Chris@87: """ Chris@87: pass Chris@87: Chris@87: class ConverterLockError(ConverterError): Chris@87: """ Chris@87: Exception raised when an attempt is made to upgrade a locked converter. Chris@87: Chris@87: """ Chris@87: pass Chris@87: Chris@87: class ConversionWarning(UserWarning): Chris@87: """ Chris@87: Warning issued when a string converter has a problem. Chris@87: Chris@87: Notes Chris@87: ----- Chris@87: In `genfromtxt` a `ConversionWarning` is issued if raising exceptions Chris@87: is explicitly suppressed with the "invalid_raise" keyword. Chris@87: Chris@87: """ Chris@87: pass Chris@87: Chris@87: Chris@87: class StringConverter(object): Chris@87: """ Chris@87: Factory class for function transforming a string into another object Chris@87: (int, float). Chris@87: Chris@87: After initialization, an instance can be called to transform a string Chris@87: into another object. If the string is recognized as representing a Chris@87: missing value, a default value is returned. Chris@87: Chris@87: Attributes Chris@87: ---------- Chris@87: func : function Chris@87: Function used for the conversion. Chris@87: default : any Chris@87: Default value to return when the input corresponds to a missing Chris@87: value. Chris@87: type : type Chris@87: Type of the output. Chris@87: _status : int Chris@87: Integer representing the order of the conversion. Chris@87: _mapper : sequence of tuples Chris@87: Sequence of tuples (dtype, function, default value) to evaluate in Chris@87: order. Chris@87: _locked : bool Chris@87: Holds `locked` parameter. Chris@87: Chris@87: Parameters Chris@87: ---------- Chris@87: dtype_or_func : {None, dtype, function}, optional Chris@87: If a `dtype`, specifies the input data type, used to define a basic Chris@87: function and a default value for missing data. For example, when Chris@87: `dtype` is float, the `func` attribute is set to `float` and the Chris@87: default value to `np.nan`. If a function, this function is used to Chris@87: convert a string to another object. In this case, it is recommended Chris@87: to give an associated default value as input. Chris@87: default : any, optional Chris@87: Value to return by default, that is, when the string to be Chris@87: converted is flagged as missing. If not given, `StringConverter` Chris@87: tries to supply a reasonable default value. Chris@87: missing_values : sequence of str, optional Chris@87: Sequence of strings indicating a missing value. Chris@87: locked : bool, optional Chris@87: Whether the StringConverter should be locked to prevent automatic Chris@87: upgrade or not. Default is False. Chris@87: Chris@87: """ Chris@87: # Chris@87: _mapper = [(nx.bool_, str2bool, False), Chris@87: (nx.integer, int, -1), Chris@87: (nx.floating, float, nx.nan), Chris@87: (complex, _bytes_to_complex, nx.nan + 0j), Chris@87: (nx.string_, bytes, asbytes('???'))] Chris@87: (_defaulttype, _defaultfunc, _defaultfill) = zip(*_mapper) Chris@87: # Chris@87: Chris@87: @classmethod Chris@87: def _getdtype(cls, val): Chris@87: """Returns the dtype of the input variable.""" Chris@87: return np.array(val).dtype Chris@87: # Chris@87: Chris@87: @classmethod Chris@87: def _getsubdtype(cls, val): Chris@87: """Returns the type of the dtype of the input variable.""" Chris@87: return np.array(val).dtype.type Chris@87: # Chris@87: # This is a bit annoying. We want to return the "general" type in most Chris@87: # cases (ie. "string" rather than "S10"), but we want to return the Chris@87: # specific type for datetime64 (ie. "datetime64[us]" rather than Chris@87: # "datetime64"). Chris@87: Chris@87: @classmethod Chris@87: def _dtypeortype(cls, dtype): Chris@87: """Returns dtype for datetime64 and type of dtype otherwise.""" Chris@87: if dtype.type == np.datetime64: Chris@87: return dtype Chris@87: return dtype.type Chris@87: # Chris@87: Chris@87: @classmethod Chris@87: def upgrade_mapper(cls, func, default=None): Chris@87: """ Chris@87: Upgrade the mapper of a StringConverter by adding a new function and Chris@87: its corresponding default. Chris@87: Chris@87: The input function (or sequence of functions) and its associated Chris@87: default value (if any) is inserted in penultimate position of the Chris@87: mapper. The corresponding type is estimated from the dtype of the Chris@87: default value. Chris@87: Chris@87: Parameters Chris@87: ---------- Chris@87: func : var Chris@87: Function, or sequence of functions Chris@87: Chris@87: Examples Chris@87: -------- Chris@87: >>> import dateutil.parser Chris@87: >>> import datetime Chris@87: >>> dateparser = datetustil.parser.parse Chris@87: >>> defaultdate = datetime.date(2000, 1, 1) Chris@87: >>> StringConverter.upgrade_mapper(dateparser, default=defaultdate) Chris@87: """ Chris@87: # Func is a single functions Chris@87: if hasattr(func, '__call__'): Chris@87: cls._mapper.insert(-1, (cls._getsubdtype(default), func, default)) Chris@87: return Chris@87: elif hasattr(func, '__iter__'): Chris@87: if isinstance(func[0], (tuple, list)): Chris@87: for _ in func: Chris@87: cls._mapper.insert(-1, _) Chris@87: return Chris@87: if default is None: Chris@87: default = [None] * len(func) Chris@87: else: Chris@87: default = list(default) Chris@87: default.append([None] * (len(func) - len(default))) Chris@87: for (fct, dft) in zip(func, default): Chris@87: cls._mapper.insert(-1, (cls._getsubdtype(dft), fct, dft)) Chris@87: # Chris@87: Chris@87: def __init__(self, dtype_or_func=None, default=None, missing_values=None, Chris@87: locked=False): Chris@87: # Convert unicode (for Py3) Chris@87: if isinstance(missing_values, unicode): Chris@87: missing_values = asbytes(missing_values) Chris@87: elif isinstance(missing_values, (list, tuple)): Chris@87: missing_values = asbytes_nested(missing_values) Chris@87: # Defines a lock for upgrade Chris@87: self._locked = bool(locked) Chris@87: # No input dtype: minimal initialization Chris@87: if dtype_or_func is None: Chris@87: self.func = str2bool Chris@87: self._status = 0 Chris@87: self.default = default or False Chris@87: dtype = np.dtype('bool') Chris@87: else: Chris@87: # Is the input a np.dtype ? Chris@87: try: Chris@87: self.func = None Chris@87: dtype = np.dtype(dtype_or_func) Chris@87: except TypeError: Chris@87: # dtype_or_func must be a function, then Chris@87: if not hasattr(dtype_or_func, '__call__'): Chris@87: errmsg = ("The input argument `dtype` is neither a" Chris@87: " function nor a dtype (got '%s' instead)") Chris@87: raise TypeError(errmsg % type(dtype_or_func)) Chris@87: # Set the function Chris@87: self.func = dtype_or_func Chris@87: # If we don't have a default, try to guess it or set it to Chris@87: # None Chris@87: if default is None: Chris@87: try: Chris@87: default = self.func(asbytes('0')) Chris@87: except ValueError: Chris@87: default = None Chris@87: dtype = self._getdtype(default) Chris@87: # Set the status according to the dtype Chris@87: _status = -1 Chris@87: for (i, (deftype, func, default_def)) in enumerate(self._mapper): Chris@87: if np.issubdtype(dtype.type, deftype): Chris@87: _status = i Chris@87: if default is None: Chris@87: self.default = default_def Chris@87: else: Chris@87: self.default = default Chris@87: break Chris@87: if _status == -1: Chris@87: # We never found a match in the _mapper... Chris@87: _status = 0 Chris@87: self.default = default Chris@87: self._status = _status Chris@87: # If the input was a dtype, set the function to the last we saw Chris@87: if self.func is None: Chris@87: self.func = func Chris@87: # If the status is 1 (int), change the function to Chris@87: # something more robust. Chris@87: if self.func == self._mapper[1][1]: Chris@87: if issubclass(dtype.type, np.uint64): Chris@87: self.func = np.uint64 Chris@87: elif issubclass(dtype.type, np.int64): Chris@87: self.func = np.int64 Chris@87: else: Chris@87: self.func = lambda x: int(float(x)) Chris@87: # Store the list of strings corresponding to missing values. Chris@87: if missing_values is None: Chris@87: self.missing_values = set([asbytes('')]) Chris@87: else: Chris@87: if isinstance(missing_values, bytes): Chris@87: missing_values = missing_values.split(asbytes(",")) Chris@87: self.missing_values = set(list(missing_values) + [asbytes('')]) Chris@87: # Chris@87: self._callingfunction = self._strict_call Chris@87: self.type = self._dtypeortype(dtype) Chris@87: self._checked = False Chris@87: self._initial_default = default Chris@87: # Chris@87: Chris@87: def _loose_call(self, value): Chris@87: try: Chris@87: return self.func(value) Chris@87: except ValueError: Chris@87: return self.default Chris@87: # Chris@87: Chris@87: def _strict_call(self, value): Chris@87: try: Chris@87: return self.func(value) Chris@87: except ValueError: Chris@87: if value.strip() in self.missing_values: Chris@87: if not self._status: Chris@87: self._checked = False Chris@87: return self.default Chris@87: raise ValueError("Cannot convert string '%s'" % value) Chris@87: # Chris@87: Chris@87: def __call__(self, value): Chris@87: return self._callingfunction(value) Chris@87: # Chris@87: Chris@87: def upgrade(self, value): Chris@87: """ Chris@87: Find the best converter for a given string, and return the result. Chris@87: Chris@87: The supplied string `value` is converted by testing different Chris@87: converters in order. First the `func` method of the Chris@87: `StringConverter` instance is tried, if this fails other available Chris@87: converters are tried. The order in which these other converters Chris@87: are tried is determined by the `_status` attribute of the instance. Chris@87: Chris@87: Parameters Chris@87: ---------- Chris@87: value : str Chris@87: The string to convert. Chris@87: Chris@87: Returns Chris@87: ------- Chris@87: out : any Chris@87: The result of converting `value` with the appropriate converter. Chris@87: Chris@87: """ Chris@87: self._checked = True Chris@87: try: Chris@87: self._strict_call(value) Chris@87: except ValueError: Chris@87: # Raise an exception if we locked the converter... Chris@87: if self._locked: Chris@87: errmsg = "Converter is locked and cannot be upgraded" Chris@87: raise ConverterLockError(errmsg) Chris@87: _statusmax = len(self._mapper) Chris@87: # Complains if we try to upgrade by the maximum Chris@87: _status = self._status Chris@87: if _status == _statusmax: Chris@87: errmsg = "Could not find a valid conversion function" Chris@87: raise ConverterError(errmsg) Chris@87: elif _status < _statusmax - 1: Chris@87: _status += 1 Chris@87: (self.type, self.func, default) = self._mapper[_status] Chris@87: self._status = _status Chris@87: if self._initial_default is not None: Chris@87: self.default = self._initial_default Chris@87: else: Chris@87: self.default = default Chris@87: self.upgrade(value) Chris@87: Chris@87: def iterupgrade(self, value): Chris@87: self._checked = True Chris@87: if not hasattr(value, '__iter__'): Chris@87: value = (value,) Chris@87: _strict_call = self._strict_call Chris@87: try: Chris@87: for _m in value: Chris@87: _strict_call(_m) Chris@87: except ValueError: Chris@87: # Raise an exception if we locked the converter... Chris@87: if self._locked: Chris@87: errmsg = "Converter is locked and cannot be upgraded" Chris@87: raise ConverterLockError(errmsg) Chris@87: _statusmax = len(self._mapper) Chris@87: # Complains if we try to upgrade by the maximum Chris@87: _status = self._status Chris@87: if _status == _statusmax: Chris@87: raise ConverterError( Chris@87: "Could not find a valid conversion function" Chris@87: ) Chris@87: elif _status < _statusmax - 1: Chris@87: _status += 1 Chris@87: (self.type, self.func, default) = self._mapper[_status] Chris@87: if self._initial_default is not None: Chris@87: self.default = self._initial_default Chris@87: else: Chris@87: self.default = default Chris@87: self._status = _status Chris@87: self.iterupgrade(value) Chris@87: Chris@87: def update(self, func, default=None, testing_value=None, Chris@87: missing_values=asbytes(''), locked=False): Chris@87: """ Chris@87: Set StringConverter attributes directly. Chris@87: Chris@87: Parameters Chris@87: ---------- Chris@87: func : function Chris@87: Conversion function. Chris@87: default : any, optional Chris@87: Value to return by default, that is, when the string to be Chris@87: converted is flagged as missing. If not given, Chris@87: `StringConverter` tries to supply a reasonable default value. Chris@87: testing_value : str, optional Chris@87: A string representing a standard input value of the converter. Chris@87: This string is used to help defining a reasonable default Chris@87: value. Chris@87: missing_values : sequence of str, optional Chris@87: Sequence of strings indicating a missing value. Chris@87: locked : bool, optional Chris@87: Whether the StringConverter should be locked to prevent Chris@87: automatic upgrade or not. Default is False. Chris@87: Chris@87: Notes Chris@87: ----- Chris@87: `update` takes the same parameters as the constructor of Chris@87: `StringConverter`, except that `func` does not accept a `dtype` Chris@87: whereas `dtype_or_func` in the constructor does. Chris@87: Chris@87: """ Chris@87: self.func = func Chris@87: self._locked = locked Chris@87: # Don't reset the default to None if we can avoid it Chris@87: if default is not None: Chris@87: self.default = default Chris@87: self.type = self._dtypeortype(self._getdtype(default)) Chris@87: else: Chris@87: try: Chris@87: tester = func(testing_value or asbytes('1')) Chris@87: except (TypeError, ValueError): Chris@87: tester = None Chris@87: self.type = self._dtypeortype(self._getdtype(tester)) Chris@87: # Add the missing values to the existing set Chris@87: if missing_values is not None: Chris@87: if _is_bytes_like(missing_values): Chris@87: self.missing_values.add(missing_values) Chris@87: elif hasattr(missing_values, '__iter__'): Chris@87: for val in missing_values: Chris@87: self.missing_values.add(val) Chris@87: else: Chris@87: self.missing_values = [] Chris@87: Chris@87: Chris@87: def easy_dtype(ndtype, names=None, defaultfmt="f%i", **validationargs): Chris@87: """ Chris@87: Convenience function to create a `np.dtype` object. Chris@87: Chris@87: The function processes the input `dtype` and matches it with the given Chris@87: names. Chris@87: Chris@87: Parameters Chris@87: ---------- Chris@87: ndtype : var Chris@87: Definition of the dtype. Can be any string or dictionary recognized Chris@87: by the `np.dtype` function, or a sequence of types. Chris@87: names : str or sequence, optional Chris@87: Sequence of strings to use as field names for a structured dtype. Chris@87: For convenience, `names` can be a string of a comma-separated list Chris@87: of names. Chris@87: defaultfmt : str, optional Chris@87: Format string used to define missing names, such as ``"f%i"`` Chris@87: (default) or ``"fields_%02i"``. Chris@87: validationargs : optional Chris@87: A series of optional arguments used to initialize a Chris@87: `NameValidator`. Chris@87: Chris@87: Examples Chris@87: -------- Chris@87: >>> np.lib._iotools.easy_dtype(float) Chris@87: dtype('float64') Chris@87: >>> np.lib._iotools.easy_dtype("i4, f8") Chris@87: dtype([('f0', '>> np.lib._iotools.easy_dtype("i4, f8", defaultfmt="field_%03i") Chris@87: dtype([('field_000', '>> np.lib._iotools.easy_dtype((int, float, float), names="a,b,c") Chris@87: dtype([('a', '>> np.lib._iotools.easy_dtype(float, names="a,b,c") Chris@87: dtype([('a', ' 0): Chris@87: validate = NameValidator(**validationargs) Chris@87: # Default initial names : should we change the format ? Chris@87: if ((ndtype.names == tuple("f%i" % i for i in range(nbtypes))) and Chris@87: (defaultfmt != "f%i")): Chris@87: ndtype.names = validate([''] * nbtypes, defaultfmt=defaultfmt) Chris@87: # Explicit initial names : just validate Chris@87: else: Chris@87: ndtype.names = validate(ndtype.names, defaultfmt=defaultfmt) Chris@87: return ndtype