annotate DEPENDENCIES/mingw32/Python27/Lib/site-packages/numpy/lib/_iotools.py @ 133:4acb5d8d80b6 tip

Don't fail environmental check if README.md exists (but .txt and no-suffix don't)
author Chris Cannam
date Tue, 30 Jul 2019 12:25:44 +0100
parents 2a2c65a20a8b
children
rev   line source
Chris@87 1 """A collection of functions designed to help I/O with ascii files.
Chris@87 2
Chris@87 3 """
Chris@87 4 from __future__ import division, absolute_import, print_function
Chris@87 5
Chris@87 6 __docformat__ = "restructuredtext en"
Chris@87 7
Chris@87 8 import sys
Chris@87 9 import numpy as np
Chris@87 10 import numpy.core.numeric as nx
Chris@87 11 from numpy.compat import asbytes, bytes, asbytes_nested, basestring
Chris@87 12
Chris@87 13 if sys.version_info[0] >= 3:
Chris@87 14 from builtins import bool, int, float, complex, object, str
Chris@87 15 unicode = str
Chris@87 16 else:
Chris@87 17 from __builtin__ import bool, int, float, complex, object, unicode, str
Chris@87 18
Chris@87 19
Chris@87 20 if sys.version_info[0] >= 3:
Chris@87 21 def _bytes_to_complex(s):
Chris@87 22 return complex(s.decode('ascii'))
Chris@87 23
Chris@87 24 def _bytes_to_name(s):
Chris@87 25 return s.decode('ascii')
Chris@87 26 else:
Chris@87 27 _bytes_to_complex = complex
Chris@87 28 _bytes_to_name = str
Chris@87 29
Chris@87 30 def _is_string_like(obj):
Chris@87 31 """
Chris@87 32 Check whether obj behaves like a string.
Chris@87 33 """
Chris@87 34 try:
Chris@87 35 obj + ''
Chris@87 36 except (TypeError, ValueError):
Chris@87 37 return False
Chris@87 38 return True
Chris@87 39
Chris@87 40 def _is_bytes_like(obj):
Chris@87 41 """
Chris@87 42 Check whether obj behaves like a bytes object.
Chris@87 43 """
Chris@87 44 try:
Chris@87 45 obj + asbytes('')
Chris@87 46 except (TypeError, ValueError):
Chris@87 47 return False
Chris@87 48 return True
Chris@87 49
Chris@87 50
Chris@87 51 def _to_filehandle(fname, flag='r', return_opened=False):
Chris@87 52 """
Chris@87 53 Returns the filehandle corresponding to a string or a file.
Chris@87 54 If the string ends in '.gz', the file is automatically unzipped.
Chris@87 55
Chris@87 56 Parameters
Chris@87 57 ----------
Chris@87 58 fname : string, filehandle
Chris@87 59 Name of the file whose filehandle must be returned.
Chris@87 60 flag : string, optional
Chris@87 61 Flag indicating the status of the file ('r' for read, 'w' for write).
Chris@87 62 return_opened : boolean, optional
Chris@87 63 Whether to return the opening status of the file.
Chris@87 64 """
Chris@87 65 if _is_string_like(fname):
Chris@87 66 if fname.endswith('.gz'):
Chris@87 67 import gzip
Chris@87 68 fhd = gzip.open(fname, flag)
Chris@87 69 elif fname.endswith('.bz2'):
Chris@87 70 import bz2
Chris@87 71 fhd = bz2.BZ2File(fname)
Chris@87 72 else:
Chris@87 73 fhd = file(fname, flag)
Chris@87 74 opened = True
Chris@87 75 elif hasattr(fname, 'seek'):
Chris@87 76 fhd = fname
Chris@87 77 opened = False
Chris@87 78 else:
Chris@87 79 raise ValueError('fname must be a string or file handle')
Chris@87 80 if return_opened:
Chris@87 81 return fhd, opened
Chris@87 82 return fhd
Chris@87 83
Chris@87 84
Chris@87 85 def has_nested_fields(ndtype):
Chris@87 86 """
Chris@87 87 Returns whether one or several fields of a dtype are nested.
Chris@87 88
Chris@87 89 Parameters
Chris@87 90 ----------
Chris@87 91 ndtype : dtype
Chris@87 92 Data-type of a structured array.
Chris@87 93
Chris@87 94 Raises
Chris@87 95 ------
Chris@87 96 AttributeError
Chris@87 97 If `ndtype` does not have a `names` attribute.
Chris@87 98
Chris@87 99 Examples
Chris@87 100 --------
Chris@87 101 >>> dt = np.dtype([('name', 'S4'), ('x', float), ('y', float)])
Chris@87 102 >>> np.lib._iotools.has_nested_fields(dt)
Chris@87 103 False
Chris@87 104
Chris@87 105 """
Chris@87 106 for name in ndtype.names or ():
Chris@87 107 if ndtype[name].names:
Chris@87 108 return True
Chris@87 109 return False
Chris@87 110
Chris@87 111
Chris@87 112 def flatten_dtype(ndtype, flatten_base=False):
Chris@87 113 """
Chris@87 114 Unpack a structured data-type by collapsing nested fields and/or fields
Chris@87 115 with a shape.
Chris@87 116
Chris@87 117 Note that the field names are lost.
Chris@87 118
Chris@87 119 Parameters
Chris@87 120 ----------
Chris@87 121 ndtype : dtype
Chris@87 122 The datatype to collapse
Chris@87 123 flatten_base : {False, True}, optional
Chris@87 124 Whether to transform a field with a shape into several fields or not.
Chris@87 125
Chris@87 126 Examples
Chris@87 127 --------
Chris@87 128 >>> dt = np.dtype([('name', 'S4'), ('x', float), ('y', float),
Chris@87 129 ... ('block', int, (2, 3))])
Chris@87 130 >>> np.lib._iotools.flatten_dtype(dt)
Chris@87 131 [dtype('|S4'), dtype('float64'), dtype('float64'), dtype('int32')]
Chris@87 132 >>> np.lib._iotools.flatten_dtype(dt, flatten_base=True)
Chris@87 133 [dtype('|S4'), dtype('float64'), dtype('float64'), dtype('int32'),
Chris@87 134 dtype('int32'), dtype('int32'), dtype('int32'), dtype('int32'),
Chris@87 135 dtype('int32')]
Chris@87 136
Chris@87 137 """
Chris@87 138 names = ndtype.names
Chris@87 139 if names is None:
Chris@87 140 if flatten_base:
Chris@87 141 return [ndtype.base] * int(np.prod(ndtype.shape))
Chris@87 142 return [ndtype.base]
Chris@87 143 else:
Chris@87 144 types = []
Chris@87 145 for field in names:
Chris@87 146 info = ndtype.fields[field]
Chris@87 147 flat_dt = flatten_dtype(info[0], flatten_base)
Chris@87 148 types.extend(flat_dt)
Chris@87 149 return types
Chris@87 150
Chris@87 151
Chris@87 152 class LineSplitter(object):
Chris@87 153 """
Chris@87 154 Object to split a string at a given delimiter or at given places.
Chris@87 155
Chris@87 156 Parameters
Chris@87 157 ----------
Chris@87 158 delimiter : str, int, or sequence of ints, optional
Chris@87 159 If a string, character used to delimit consecutive fields.
Chris@87 160 If an integer or a sequence of integers, width(s) of each field.
Chris@87 161 comment : str, optional
Chris@87 162 Character used to mark the beginning of a comment. Default is '#'.
Chris@87 163 autostrip : bool, optional
Chris@87 164 Whether to strip each individual field. Default is True.
Chris@87 165
Chris@87 166 """
Chris@87 167
Chris@87 168 def autostrip(self, method):
Chris@87 169 """
Chris@87 170 Wrapper to strip each member of the output of `method`.
Chris@87 171
Chris@87 172 Parameters
Chris@87 173 ----------
Chris@87 174 method : function
Chris@87 175 Function that takes a single argument and returns a sequence of
Chris@87 176 strings.
Chris@87 177
Chris@87 178 Returns
Chris@87 179 -------
Chris@87 180 wrapped : function
Chris@87 181 The result of wrapping `method`. `wrapped` takes a single input
Chris@87 182 argument and returns a list of strings that are stripped of
Chris@87 183 white-space.
Chris@87 184
Chris@87 185 """
Chris@87 186 return lambda input: [_.strip() for _ in method(input)]
Chris@87 187 #
Chris@87 188
Chris@87 189 def __init__(self, delimiter=None, comments=asbytes('#'), autostrip=True):
Chris@87 190 self.comments = comments
Chris@87 191 # Delimiter is a character
Chris@87 192 if isinstance(delimiter, unicode):
Chris@87 193 delimiter = delimiter.encode('ascii')
Chris@87 194 if (delimiter is None) or _is_bytes_like(delimiter):
Chris@87 195 delimiter = delimiter or None
Chris@87 196 _handyman = self._delimited_splitter
Chris@87 197 # Delimiter is a list of field widths
Chris@87 198 elif hasattr(delimiter, '__iter__'):
Chris@87 199 _handyman = self._variablewidth_splitter
Chris@87 200 idx = np.cumsum([0] + list(delimiter))
Chris@87 201 delimiter = [slice(i, j) for (i, j) in zip(idx[:-1], idx[1:])]
Chris@87 202 # Delimiter is a single integer
Chris@87 203 elif int(delimiter):
Chris@87 204 (_handyman, delimiter) = (
Chris@87 205 self._fixedwidth_splitter, int(delimiter))
Chris@87 206 else:
Chris@87 207 (_handyman, delimiter) = (self._delimited_splitter, None)
Chris@87 208 self.delimiter = delimiter
Chris@87 209 if autostrip:
Chris@87 210 self._handyman = self.autostrip(_handyman)
Chris@87 211 else:
Chris@87 212 self._handyman = _handyman
Chris@87 213 #
Chris@87 214
Chris@87 215 def _delimited_splitter(self, line):
Chris@87 216 if self.comments is not None:
Chris@87 217 line = line.split(self.comments)[0]
Chris@87 218 line = line.strip(asbytes(" \r\n"))
Chris@87 219 if not line:
Chris@87 220 return []
Chris@87 221 return line.split(self.delimiter)
Chris@87 222 #
Chris@87 223
Chris@87 224 def _fixedwidth_splitter(self, line):
Chris@87 225 if self.comments is not None:
Chris@87 226 line = line.split(self.comments)[0]
Chris@87 227 line = line.strip(asbytes("\r\n"))
Chris@87 228 if not line:
Chris@87 229 return []
Chris@87 230 fixed = self.delimiter
Chris@87 231 slices = [slice(i, i + fixed) for i in range(0, len(line), fixed)]
Chris@87 232 return [line[s] for s in slices]
Chris@87 233 #
Chris@87 234
Chris@87 235 def _variablewidth_splitter(self, line):
Chris@87 236 if self.comments is not None:
Chris@87 237 line = line.split(self.comments)[0]
Chris@87 238 if not line:
Chris@87 239 return []
Chris@87 240 slices = self.delimiter
Chris@87 241 return [line[s] for s in slices]
Chris@87 242 #
Chris@87 243
Chris@87 244 def __call__(self, line):
Chris@87 245 return self._handyman(line)
Chris@87 246
Chris@87 247
Chris@87 248 class NameValidator(object):
Chris@87 249 """
Chris@87 250 Object to validate a list of strings to use as field names.
Chris@87 251
Chris@87 252 The strings are stripped of any non alphanumeric character, and spaces
Chris@87 253 are replaced by '_'. During instantiation, the user can define a list
Chris@87 254 of names to exclude, as well as a list of invalid characters. Names in
Chris@87 255 the exclusion list are appended a '_' character.
Chris@87 256
Chris@87 257 Once an instance has been created, it can be called with a list of
Chris@87 258 names, and a list of valid names will be created. The `__call__`
Chris@87 259 method accepts an optional keyword "default" that sets the default name
Chris@87 260 in case of ambiguity. By default this is 'f', so that names will
Chris@87 261 default to `f0`, `f1`, etc.
Chris@87 262
Chris@87 263 Parameters
Chris@87 264 ----------
Chris@87 265 excludelist : sequence, optional
Chris@87 266 A list of names to exclude. This list is appended to the default
Chris@87 267 list ['return', 'file', 'print']. Excluded names are appended an
Chris@87 268 underscore: for example, `file` becomes `file_` if supplied.
Chris@87 269 deletechars : str, optional
Chris@87 270 A string combining invalid characters that must be deleted from the
Chris@87 271 names.
Chris@87 272 casesensitive : {True, False, 'upper', 'lower'}, optional
Chris@87 273 * If True, field names are case-sensitive.
Chris@87 274 * If False or 'upper', field names are converted to upper case.
Chris@87 275 * If 'lower', field names are converted to lower case.
Chris@87 276
Chris@87 277 The default value is True.
Chris@87 278 replace_space : '_', optional
Chris@87 279 Character(s) used in replacement of white spaces.
Chris@87 280
Chris@87 281 Notes
Chris@87 282 -----
Chris@87 283 Calling an instance of `NameValidator` is the same as calling its
Chris@87 284 method `validate`.
Chris@87 285
Chris@87 286 Examples
Chris@87 287 --------
Chris@87 288 >>> validator = np.lib._iotools.NameValidator()
Chris@87 289 >>> validator(['file', 'field2', 'with space', 'CaSe'])
Chris@87 290 ['file_', 'field2', 'with_space', 'CaSe']
Chris@87 291
Chris@87 292 >>> validator = np.lib._iotools.NameValidator(excludelist=['excl'],
Chris@87 293 deletechars='q',
Chris@87 294 case_sensitive='False')
Chris@87 295 >>> validator(['excl', 'field2', 'no_q', 'with space', 'CaSe'])
Chris@87 296 ['excl_', 'field2', 'no_', 'with_space', 'case']
Chris@87 297
Chris@87 298 """
Chris@87 299 #
Chris@87 300 defaultexcludelist = ['return', 'file', 'print']
Chris@87 301 defaultdeletechars = set("""~!@#$%^&*()-=+~\|]}[{';: /?.>,<""")
Chris@87 302 #
Chris@87 303
Chris@87 304 def __init__(self, excludelist=None, deletechars=None,
Chris@87 305 case_sensitive=None, replace_space='_'):
Chris@87 306 # Process the exclusion list ..
Chris@87 307 if excludelist is None:
Chris@87 308 excludelist = []
Chris@87 309 excludelist.extend(self.defaultexcludelist)
Chris@87 310 self.excludelist = excludelist
Chris@87 311 # Process the list of characters to delete
Chris@87 312 if deletechars is None:
Chris@87 313 delete = self.defaultdeletechars
Chris@87 314 else:
Chris@87 315 delete = set(deletechars)
Chris@87 316 delete.add('"')
Chris@87 317 self.deletechars = delete
Chris@87 318 # Process the case option .....
Chris@87 319 if (case_sensitive is None) or (case_sensitive is True):
Chris@87 320 self.case_converter = lambda x: x
Chris@87 321 elif (case_sensitive is False) or ('u' in case_sensitive):
Chris@87 322 self.case_converter = lambda x: x.upper()
Chris@87 323 elif 'l' in case_sensitive:
Chris@87 324 self.case_converter = lambda x: x.lower()
Chris@87 325 else:
Chris@87 326 self.case_converter = lambda x: x
Chris@87 327 #
Chris@87 328 self.replace_space = replace_space
Chris@87 329
Chris@87 330 def validate(self, names, defaultfmt="f%i", nbfields=None):
Chris@87 331 """
Chris@87 332 Validate a list of strings as field names for a structured array.
Chris@87 333
Chris@87 334 Parameters
Chris@87 335 ----------
Chris@87 336 names : sequence of str
Chris@87 337 Strings to be validated.
Chris@87 338 defaultfmt : str, optional
Chris@87 339 Default format string, used if validating a given string
Chris@87 340 reduces its length to zero.
Chris@87 341 nboutput : integer, optional
Chris@87 342 Final number of validated names, used to expand or shrink the
Chris@87 343 initial list of names.
Chris@87 344
Chris@87 345 Returns
Chris@87 346 -------
Chris@87 347 validatednames : list of str
Chris@87 348 The list of validated field names.
Chris@87 349
Chris@87 350 Notes
Chris@87 351 -----
Chris@87 352 A `NameValidator` instance can be called directly, which is the
Chris@87 353 same as calling `validate`. For examples, see `NameValidator`.
Chris@87 354
Chris@87 355 """
Chris@87 356 # Initial checks ..............
Chris@87 357 if (names is None):
Chris@87 358 if (nbfields is None):
Chris@87 359 return None
Chris@87 360 names = []
Chris@87 361 if isinstance(names, basestring):
Chris@87 362 names = [names, ]
Chris@87 363 if nbfields is not None:
Chris@87 364 nbnames = len(names)
Chris@87 365 if (nbnames < nbfields):
Chris@87 366 names = list(names) + [''] * (nbfields - nbnames)
Chris@87 367 elif (nbnames > nbfields):
Chris@87 368 names = names[:nbfields]
Chris@87 369 # Set some shortcuts ...........
Chris@87 370 deletechars = self.deletechars
Chris@87 371 excludelist = self.excludelist
Chris@87 372 case_converter = self.case_converter
Chris@87 373 replace_space = self.replace_space
Chris@87 374 # Initializes some variables ...
Chris@87 375 validatednames = []
Chris@87 376 seen = dict()
Chris@87 377 nbempty = 0
Chris@87 378 #
Chris@87 379 for item in names:
Chris@87 380 item = case_converter(item).strip()
Chris@87 381 if replace_space:
Chris@87 382 item = item.replace(' ', replace_space)
Chris@87 383 item = ''.join([c for c in item if c not in deletechars])
Chris@87 384 if item == '':
Chris@87 385 item = defaultfmt % nbempty
Chris@87 386 while item in names:
Chris@87 387 nbempty += 1
Chris@87 388 item = defaultfmt % nbempty
Chris@87 389 nbempty += 1
Chris@87 390 elif item in excludelist:
Chris@87 391 item += '_'
Chris@87 392 cnt = seen.get(item, 0)
Chris@87 393 if cnt > 0:
Chris@87 394 validatednames.append(item + '_%d' % cnt)
Chris@87 395 else:
Chris@87 396 validatednames.append(item)
Chris@87 397 seen[item] = cnt + 1
Chris@87 398 return tuple(validatednames)
Chris@87 399 #
Chris@87 400
Chris@87 401 def __call__(self, names, defaultfmt="f%i", nbfields=None):
Chris@87 402 return self.validate(names, defaultfmt=defaultfmt, nbfields=nbfields)
Chris@87 403
Chris@87 404
Chris@87 405 def str2bool(value):
Chris@87 406 """
Chris@87 407 Tries to transform a string supposed to represent a boolean to a boolean.
Chris@87 408
Chris@87 409 Parameters
Chris@87 410 ----------
Chris@87 411 value : str
Chris@87 412 The string that is transformed to a boolean.
Chris@87 413
Chris@87 414 Returns
Chris@87 415 -------
Chris@87 416 boolval : bool
Chris@87 417 The boolean representation of `value`.
Chris@87 418
Chris@87 419 Raises
Chris@87 420 ------
Chris@87 421 ValueError
Chris@87 422 If the string is not 'True' or 'False' (case independent)
Chris@87 423
Chris@87 424 Examples
Chris@87 425 --------
Chris@87 426 >>> np.lib._iotools.str2bool('TRUE')
Chris@87 427 True
Chris@87 428 >>> np.lib._iotools.str2bool('false')
Chris@87 429 False
Chris@87 430
Chris@87 431 """
Chris@87 432 value = value.upper()
Chris@87 433 if value == asbytes('TRUE'):
Chris@87 434 return True
Chris@87 435 elif value == asbytes('FALSE'):
Chris@87 436 return False
Chris@87 437 else:
Chris@87 438 raise ValueError("Invalid boolean")
Chris@87 439
Chris@87 440
Chris@87 441 class ConverterError(Exception):
Chris@87 442 """
Chris@87 443 Exception raised when an error occurs in a converter for string values.
Chris@87 444
Chris@87 445 """
Chris@87 446 pass
Chris@87 447
Chris@87 448 class ConverterLockError(ConverterError):
Chris@87 449 """
Chris@87 450 Exception raised when an attempt is made to upgrade a locked converter.
Chris@87 451
Chris@87 452 """
Chris@87 453 pass
Chris@87 454
Chris@87 455 class ConversionWarning(UserWarning):
Chris@87 456 """
Chris@87 457 Warning issued when a string converter has a problem.
Chris@87 458
Chris@87 459 Notes
Chris@87 460 -----
Chris@87 461 In `genfromtxt` a `ConversionWarning` is issued if raising exceptions
Chris@87 462 is explicitly suppressed with the "invalid_raise" keyword.
Chris@87 463
Chris@87 464 """
Chris@87 465 pass
Chris@87 466
Chris@87 467
Chris@87 468 class StringConverter(object):
Chris@87 469 """
Chris@87 470 Factory class for function transforming a string into another object
Chris@87 471 (int, float).
Chris@87 472
Chris@87 473 After initialization, an instance can be called to transform a string
Chris@87 474 into another object. If the string is recognized as representing a
Chris@87 475 missing value, a default value is returned.
Chris@87 476
Chris@87 477 Attributes
Chris@87 478 ----------
Chris@87 479 func : function
Chris@87 480 Function used for the conversion.
Chris@87 481 default : any
Chris@87 482 Default value to return when the input corresponds to a missing
Chris@87 483 value.
Chris@87 484 type : type
Chris@87 485 Type of the output.
Chris@87 486 _status : int
Chris@87 487 Integer representing the order of the conversion.
Chris@87 488 _mapper : sequence of tuples
Chris@87 489 Sequence of tuples (dtype, function, default value) to evaluate in
Chris@87 490 order.
Chris@87 491 _locked : bool
Chris@87 492 Holds `locked` parameter.
Chris@87 493
Chris@87 494 Parameters
Chris@87 495 ----------
Chris@87 496 dtype_or_func : {None, dtype, function}, optional
Chris@87 497 If a `dtype`, specifies the input data type, used to define a basic
Chris@87 498 function and a default value for missing data. For example, when
Chris@87 499 `dtype` is float, the `func` attribute is set to `float` and the
Chris@87 500 default value to `np.nan`. If a function, this function is used to
Chris@87 501 convert a string to another object. In this case, it is recommended
Chris@87 502 to give an associated default value as input.
Chris@87 503 default : any, optional
Chris@87 504 Value to return by default, that is, when the string to be
Chris@87 505 converted is flagged as missing. If not given, `StringConverter`
Chris@87 506 tries to supply a reasonable default value.
Chris@87 507 missing_values : sequence of str, optional
Chris@87 508 Sequence of strings indicating a missing value.
Chris@87 509 locked : bool, optional
Chris@87 510 Whether the StringConverter should be locked to prevent automatic
Chris@87 511 upgrade or not. Default is False.
Chris@87 512
Chris@87 513 """
Chris@87 514 #
Chris@87 515 _mapper = [(nx.bool_, str2bool, False),
Chris@87 516 (nx.integer, int, -1),
Chris@87 517 (nx.floating, float, nx.nan),
Chris@87 518 (complex, _bytes_to_complex, nx.nan + 0j),
Chris@87 519 (nx.string_, bytes, asbytes('???'))]
Chris@87 520 (_defaulttype, _defaultfunc, _defaultfill) = zip(*_mapper)
Chris@87 521 #
Chris@87 522
Chris@87 523 @classmethod
Chris@87 524 def _getdtype(cls, val):
Chris@87 525 """Returns the dtype of the input variable."""
Chris@87 526 return np.array(val).dtype
Chris@87 527 #
Chris@87 528
Chris@87 529 @classmethod
Chris@87 530 def _getsubdtype(cls, val):
Chris@87 531 """Returns the type of the dtype of the input variable."""
Chris@87 532 return np.array(val).dtype.type
Chris@87 533 #
Chris@87 534 # This is a bit annoying. We want to return the "general" type in most
Chris@87 535 # cases (ie. "string" rather than "S10"), but we want to return the
Chris@87 536 # specific type for datetime64 (ie. "datetime64[us]" rather than
Chris@87 537 # "datetime64").
Chris@87 538
Chris@87 539 @classmethod
Chris@87 540 def _dtypeortype(cls, dtype):
Chris@87 541 """Returns dtype for datetime64 and type of dtype otherwise."""
Chris@87 542 if dtype.type == np.datetime64:
Chris@87 543 return dtype
Chris@87 544 return dtype.type
Chris@87 545 #
Chris@87 546
Chris@87 547 @classmethod
Chris@87 548 def upgrade_mapper(cls, func, default=None):
Chris@87 549 """
Chris@87 550 Upgrade the mapper of a StringConverter by adding a new function and
Chris@87 551 its corresponding default.
Chris@87 552
Chris@87 553 The input function (or sequence of functions) and its associated
Chris@87 554 default value (if any) is inserted in penultimate position of the
Chris@87 555 mapper. The corresponding type is estimated from the dtype of the
Chris@87 556 default value.
Chris@87 557
Chris@87 558 Parameters
Chris@87 559 ----------
Chris@87 560 func : var
Chris@87 561 Function, or sequence of functions
Chris@87 562
Chris@87 563 Examples
Chris@87 564 --------
Chris@87 565 >>> import dateutil.parser
Chris@87 566 >>> import datetime
Chris@87 567 >>> dateparser = datetustil.parser.parse
Chris@87 568 >>> defaultdate = datetime.date(2000, 1, 1)
Chris@87 569 >>> StringConverter.upgrade_mapper(dateparser, default=defaultdate)
Chris@87 570 """
Chris@87 571 # Func is a single functions
Chris@87 572 if hasattr(func, '__call__'):
Chris@87 573 cls._mapper.insert(-1, (cls._getsubdtype(default), func, default))
Chris@87 574 return
Chris@87 575 elif hasattr(func, '__iter__'):
Chris@87 576 if isinstance(func[0], (tuple, list)):
Chris@87 577 for _ in func:
Chris@87 578 cls._mapper.insert(-1, _)
Chris@87 579 return
Chris@87 580 if default is None:
Chris@87 581 default = [None] * len(func)
Chris@87 582 else:
Chris@87 583 default = list(default)
Chris@87 584 default.append([None] * (len(func) - len(default)))
Chris@87 585 for (fct, dft) in zip(func, default):
Chris@87 586 cls._mapper.insert(-1, (cls._getsubdtype(dft), fct, dft))
Chris@87 587 #
Chris@87 588
Chris@87 589 def __init__(self, dtype_or_func=None, default=None, missing_values=None,
Chris@87 590 locked=False):
Chris@87 591 # Convert unicode (for Py3)
Chris@87 592 if isinstance(missing_values, unicode):
Chris@87 593 missing_values = asbytes(missing_values)
Chris@87 594 elif isinstance(missing_values, (list, tuple)):
Chris@87 595 missing_values = asbytes_nested(missing_values)
Chris@87 596 # Defines a lock for upgrade
Chris@87 597 self._locked = bool(locked)
Chris@87 598 # No input dtype: minimal initialization
Chris@87 599 if dtype_or_func is None:
Chris@87 600 self.func = str2bool
Chris@87 601 self._status = 0
Chris@87 602 self.default = default or False
Chris@87 603 dtype = np.dtype('bool')
Chris@87 604 else:
Chris@87 605 # Is the input a np.dtype ?
Chris@87 606 try:
Chris@87 607 self.func = None
Chris@87 608 dtype = np.dtype(dtype_or_func)
Chris@87 609 except TypeError:
Chris@87 610 # dtype_or_func must be a function, then
Chris@87 611 if not hasattr(dtype_or_func, '__call__'):
Chris@87 612 errmsg = ("The input argument `dtype` is neither a"
Chris@87 613 " function nor a dtype (got '%s' instead)")
Chris@87 614 raise TypeError(errmsg % type(dtype_or_func))
Chris@87 615 # Set the function
Chris@87 616 self.func = dtype_or_func
Chris@87 617 # If we don't have a default, try to guess it or set it to
Chris@87 618 # None
Chris@87 619 if default is None:
Chris@87 620 try:
Chris@87 621 default = self.func(asbytes('0'))
Chris@87 622 except ValueError:
Chris@87 623 default = None
Chris@87 624 dtype = self._getdtype(default)
Chris@87 625 # Set the status according to the dtype
Chris@87 626 _status = -1
Chris@87 627 for (i, (deftype, func, default_def)) in enumerate(self._mapper):
Chris@87 628 if np.issubdtype(dtype.type, deftype):
Chris@87 629 _status = i
Chris@87 630 if default is None:
Chris@87 631 self.default = default_def
Chris@87 632 else:
Chris@87 633 self.default = default
Chris@87 634 break
Chris@87 635 if _status == -1:
Chris@87 636 # We never found a match in the _mapper...
Chris@87 637 _status = 0
Chris@87 638 self.default = default
Chris@87 639 self._status = _status
Chris@87 640 # If the input was a dtype, set the function to the last we saw
Chris@87 641 if self.func is None:
Chris@87 642 self.func = func
Chris@87 643 # If the status is 1 (int), change the function to
Chris@87 644 # something more robust.
Chris@87 645 if self.func == self._mapper[1][1]:
Chris@87 646 if issubclass(dtype.type, np.uint64):
Chris@87 647 self.func = np.uint64
Chris@87 648 elif issubclass(dtype.type, np.int64):
Chris@87 649 self.func = np.int64
Chris@87 650 else:
Chris@87 651 self.func = lambda x: int(float(x))
Chris@87 652 # Store the list of strings corresponding to missing values.
Chris@87 653 if missing_values is None:
Chris@87 654 self.missing_values = set([asbytes('')])
Chris@87 655 else:
Chris@87 656 if isinstance(missing_values, bytes):
Chris@87 657 missing_values = missing_values.split(asbytes(","))
Chris@87 658 self.missing_values = set(list(missing_values) + [asbytes('')])
Chris@87 659 #
Chris@87 660 self._callingfunction = self._strict_call
Chris@87 661 self.type = self._dtypeortype(dtype)
Chris@87 662 self._checked = False
Chris@87 663 self._initial_default = default
Chris@87 664 #
Chris@87 665
Chris@87 666 def _loose_call(self, value):
Chris@87 667 try:
Chris@87 668 return self.func(value)
Chris@87 669 except ValueError:
Chris@87 670 return self.default
Chris@87 671 #
Chris@87 672
Chris@87 673 def _strict_call(self, value):
Chris@87 674 try:
Chris@87 675 return self.func(value)
Chris@87 676 except ValueError:
Chris@87 677 if value.strip() in self.missing_values:
Chris@87 678 if not self._status:
Chris@87 679 self._checked = False
Chris@87 680 return self.default
Chris@87 681 raise ValueError("Cannot convert string '%s'" % value)
Chris@87 682 #
Chris@87 683
Chris@87 684 def __call__(self, value):
Chris@87 685 return self._callingfunction(value)
Chris@87 686 #
Chris@87 687
Chris@87 688 def upgrade(self, value):
Chris@87 689 """
Chris@87 690 Find the best converter for a given string, and return the result.
Chris@87 691
Chris@87 692 The supplied string `value` is converted by testing different
Chris@87 693 converters in order. First the `func` method of the
Chris@87 694 `StringConverter` instance is tried, if this fails other available
Chris@87 695 converters are tried. The order in which these other converters
Chris@87 696 are tried is determined by the `_status` attribute of the instance.
Chris@87 697
Chris@87 698 Parameters
Chris@87 699 ----------
Chris@87 700 value : str
Chris@87 701 The string to convert.
Chris@87 702
Chris@87 703 Returns
Chris@87 704 -------
Chris@87 705 out : any
Chris@87 706 The result of converting `value` with the appropriate converter.
Chris@87 707
Chris@87 708 """
Chris@87 709 self._checked = True
Chris@87 710 try:
Chris@87 711 self._strict_call(value)
Chris@87 712 except ValueError:
Chris@87 713 # Raise an exception if we locked the converter...
Chris@87 714 if self._locked:
Chris@87 715 errmsg = "Converter is locked and cannot be upgraded"
Chris@87 716 raise ConverterLockError(errmsg)
Chris@87 717 _statusmax = len(self._mapper)
Chris@87 718 # Complains if we try to upgrade by the maximum
Chris@87 719 _status = self._status
Chris@87 720 if _status == _statusmax:
Chris@87 721 errmsg = "Could not find a valid conversion function"
Chris@87 722 raise ConverterError(errmsg)
Chris@87 723 elif _status < _statusmax - 1:
Chris@87 724 _status += 1
Chris@87 725 (self.type, self.func, default) = self._mapper[_status]
Chris@87 726 self._status = _status
Chris@87 727 if self._initial_default is not None:
Chris@87 728 self.default = self._initial_default
Chris@87 729 else:
Chris@87 730 self.default = default
Chris@87 731 self.upgrade(value)
Chris@87 732
Chris@87 733 def iterupgrade(self, value):
Chris@87 734 self._checked = True
Chris@87 735 if not hasattr(value, '__iter__'):
Chris@87 736 value = (value,)
Chris@87 737 _strict_call = self._strict_call
Chris@87 738 try:
Chris@87 739 for _m in value:
Chris@87 740 _strict_call(_m)
Chris@87 741 except ValueError:
Chris@87 742 # Raise an exception if we locked the converter...
Chris@87 743 if self._locked:
Chris@87 744 errmsg = "Converter is locked and cannot be upgraded"
Chris@87 745 raise ConverterLockError(errmsg)
Chris@87 746 _statusmax = len(self._mapper)
Chris@87 747 # Complains if we try to upgrade by the maximum
Chris@87 748 _status = self._status
Chris@87 749 if _status == _statusmax:
Chris@87 750 raise ConverterError(
Chris@87 751 "Could not find a valid conversion function"
Chris@87 752 )
Chris@87 753 elif _status < _statusmax - 1:
Chris@87 754 _status += 1
Chris@87 755 (self.type, self.func, default) = self._mapper[_status]
Chris@87 756 if self._initial_default is not None:
Chris@87 757 self.default = self._initial_default
Chris@87 758 else:
Chris@87 759 self.default = default
Chris@87 760 self._status = _status
Chris@87 761 self.iterupgrade(value)
Chris@87 762
Chris@87 763 def update(self, func, default=None, testing_value=None,
Chris@87 764 missing_values=asbytes(''), locked=False):
Chris@87 765 """
Chris@87 766 Set StringConverter attributes directly.
Chris@87 767
Chris@87 768 Parameters
Chris@87 769 ----------
Chris@87 770 func : function
Chris@87 771 Conversion function.
Chris@87 772 default : any, optional
Chris@87 773 Value to return by default, that is, when the string to be
Chris@87 774 converted is flagged as missing. If not given,
Chris@87 775 `StringConverter` tries to supply a reasonable default value.
Chris@87 776 testing_value : str, optional
Chris@87 777 A string representing a standard input value of the converter.
Chris@87 778 This string is used to help defining a reasonable default
Chris@87 779 value.
Chris@87 780 missing_values : sequence of str, optional
Chris@87 781 Sequence of strings indicating a missing value.
Chris@87 782 locked : bool, optional
Chris@87 783 Whether the StringConverter should be locked to prevent
Chris@87 784 automatic upgrade or not. Default is False.
Chris@87 785
Chris@87 786 Notes
Chris@87 787 -----
Chris@87 788 `update` takes the same parameters as the constructor of
Chris@87 789 `StringConverter`, except that `func` does not accept a `dtype`
Chris@87 790 whereas `dtype_or_func` in the constructor does.
Chris@87 791
Chris@87 792 """
Chris@87 793 self.func = func
Chris@87 794 self._locked = locked
Chris@87 795 # Don't reset the default to None if we can avoid it
Chris@87 796 if default is not None:
Chris@87 797 self.default = default
Chris@87 798 self.type = self._dtypeortype(self._getdtype(default))
Chris@87 799 else:
Chris@87 800 try:
Chris@87 801 tester = func(testing_value or asbytes('1'))
Chris@87 802 except (TypeError, ValueError):
Chris@87 803 tester = None
Chris@87 804 self.type = self._dtypeortype(self._getdtype(tester))
Chris@87 805 # Add the missing values to the existing set
Chris@87 806 if missing_values is not None:
Chris@87 807 if _is_bytes_like(missing_values):
Chris@87 808 self.missing_values.add(missing_values)
Chris@87 809 elif hasattr(missing_values, '__iter__'):
Chris@87 810 for val in missing_values:
Chris@87 811 self.missing_values.add(val)
Chris@87 812 else:
Chris@87 813 self.missing_values = []
Chris@87 814
Chris@87 815
Chris@87 816 def easy_dtype(ndtype, names=None, defaultfmt="f%i", **validationargs):
Chris@87 817 """
Chris@87 818 Convenience function to create a `np.dtype` object.
Chris@87 819
Chris@87 820 The function processes the input `dtype` and matches it with the given
Chris@87 821 names.
Chris@87 822
Chris@87 823 Parameters
Chris@87 824 ----------
Chris@87 825 ndtype : var
Chris@87 826 Definition of the dtype. Can be any string or dictionary recognized
Chris@87 827 by the `np.dtype` function, or a sequence of types.
Chris@87 828 names : str or sequence, optional
Chris@87 829 Sequence of strings to use as field names for a structured dtype.
Chris@87 830 For convenience, `names` can be a string of a comma-separated list
Chris@87 831 of names.
Chris@87 832 defaultfmt : str, optional
Chris@87 833 Format string used to define missing names, such as ``"f%i"``
Chris@87 834 (default) or ``"fields_%02i"``.
Chris@87 835 validationargs : optional
Chris@87 836 A series of optional arguments used to initialize a
Chris@87 837 `NameValidator`.
Chris@87 838
Chris@87 839 Examples
Chris@87 840 --------
Chris@87 841 >>> np.lib._iotools.easy_dtype(float)
Chris@87 842 dtype('float64')
Chris@87 843 >>> np.lib._iotools.easy_dtype("i4, f8")
Chris@87 844 dtype([('f0', '<i4'), ('f1', '<f8')])
Chris@87 845 >>> np.lib._iotools.easy_dtype("i4, f8", defaultfmt="field_%03i")
Chris@87 846 dtype([('field_000', '<i4'), ('field_001', '<f8')])
Chris@87 847
Chris@87 848 >>> np.lib._iotools.easy_dtype((int, float, float), names="a,b,c")
Chris@87 849 dtype([('a', '<i8'), ('b', '<f8'), ('c', '<f8')])
Chris@87 850 >>> np.lib._iotools.easy_dtype(float, names="a,b,c")
Chris@87 851 dtype([('a', '<f8'), ('b', '<f8'), ('c', '<f8')])
Chris@87 852
Chris@87 853 """
Chris@87 854 try:
Chris@87 855 ndtype = np.dtype(ndtype)
Chris@87 856 except TypeError:
Chris@87 857 validate = NameValidator(**validationargs)
Chris@87 858 nbfields = len(ndtype)
Chris@87 859 if names is None:
Chris@87 860 names = [''] * len(ndtype)
Chris@87 861 elif isinstance(names, basestring):
Chris@87 862 names = names.split(",")
Chris@87 863 names = validate(names, nbfields=nbfields, defaultfmt=defaultfmt)
Chris@87 864 ndtype = np.dtype(dict(formats=ndtype, names=names))
Chris@87 865 else:
Chris@87 866 nbtypes = len(ndtype)
Chris@87 867 # Explicit names
Chris@87 868 if names is not None:
Chris@87 869 validate = NameValidator(**validationargs)
Chris@87 870 if isinstance(names, basestring):
Chris@87 871 names = names.split(",")
Chris@87 872 # Simple dtype: repeat to match the nb of names
Chris@87 873 if nbtypes == 0:
Chris@87 874 formats = tuple([ndtype.type] * len(names))
Chris@87 875 names = validate(names, defaultfmt=defaultfmt)
Chris@87 876 ndtype = np.dtype(list(zip(names, formats)))
Chris@87 877 # Structured dtype: just validate the names as needed
Chris@87 878 else:
Chris@87 879 ndtype.names = validate(names, nbfields=nbtypes,
Chris@87 880 defaultfmt=defaultfmt)
Chris@87 881 # No implicit names
Chris@87 882 elif (nbtypes > 0):
Chris@87 883 validate = NameValidator(**validationargs)
Chris@87 884 # Default initial names : should we change the format ?
Chris@87 885 if ((ndtype.names == tuple("f%i" % i for i in range(nbtypes))) and
Chris@87 886 (defaultfmt != "f%i")):
Chris@87 887 ndtype.names = validate([''] * nbtypes, defaultfmt=defaultfmt)
Chris@87 888 # Explicit initial names : just validate
Chris@87 889 else:
Chris@87 890 ndtype.names = validate(ndtype.names, defaultfmt=defaultfmt)
Chris@87 891 return ndtype