vamp-build-and-test: DEPENDENCIES/mingw32/Python27/Lib/site-packages/numpy/lib/

annotate DEPENDENCIES/mingw32/Python27/Lib/site-packages/numpy/lib/_iotools.py @ 133:4acb5d8d80b6 tip

Don't fail environmental check if README.md exists (but .txt and no-suffix don't)

author	Chris Cannam
date	Tue, 30 Jul 2019 12:25:44 +0100
parents	2a2c65a20a8b
children

rev	line source
Chris@87	1 """A collection of functions designed to help I/O with ascii files.
Chris@87	2
Chris@87	3 """
Chris@87	4 from __future__ import division, absolute_import, print_function
Chris@87	5
Chris@87	6 __docformat__ = "restructuredtext en"
Chris@87	7
Chris@87	8 import sys
Chris@87	9 import numpy as np
Chris@87	10 import numpy.core.numeric as nx
Chris@87	11 from numpy.compat import asbytes, bytes, asbytes_nested, basestring
Chris@87	12
Chris@87	13 if sys.version_info[0] >= 3:
Chris@87	14 from builtins import bool, int, float, complex, object, str
Chris@87	15 unicode = str
Chris@87	16 else:
Chris@87	17 from __builtin__ import bool, int, float, complex, object, unicode, str
Chris@87	18
Chris@87	19
Chris@87	20 if sys.version_info[0] >= 3:
Chris@87	21 def _bytes_to_complex(s):
Chris@87	22 return complex(s.decode('ascii'))
Chris@87	23
Chris@87	24 def _bytes_to_name(s):
Chris@87	25 return s.decode('ascii')
Chris@87	26 else:
Chris@87	27 _bytes_to_complex = complex
Chris@87	28 _bytes_to_name = str
Chris@87	29
Chris@87	30 def _is_string_like(obj):
Chris@87	31 """
Chris@87	32 Check whether obj behaves like a string.
Chris@87	33 """
Chris@87	34 try:
Chris@87	35 obj + ''
Chris@87	36 except (TypeError, ValueError):
Chris@87	37 return False
Chris@87	38 return True
Chris@87	39
Chris@87	40 def _is_bytes_like(obj):
Chris@87	41 """
Chris@87	42 Check whether obj behaves like a bytes object.
Chris@87	43 """
Chris@87	44 try:
Chris@87	45 obj + asbytes('')
Chris@87	46 except (TypeError, ValueError):
Chris@87	47 return False
Chris@87	48 return True
Chris@87	49
Chris@87	50
Chris@87	51 def _to_filehandle(fname, flag='r', return_opened=False):
Chris@87	52 """
Chris@87	53 Returns the filehandle corresponding to a string or a file.
Chris@87	54 If the string ends in '.gz', the file is automatically unzipped.
Chris@87	55
Chris@87	56 Parameters
Chris@87	57 ----------
Chris@87	58 fname : string, filehandle
Chris@87	59 Name of the file whose filehandle must be returned.
Chris@87	60 flag : string, optional
Chris@87	61 Flag indicating the status of the file ('r' for read, 'w' for write).
Chris@87	62 return_opened : boolean, optional
Chris@87	63 Whether to return the opening status of the file.
Chris@87	64 """
Chris@87	65 if _is_string_like(fname):
Chris@87	66 if fname.endswith('.gz'):
Chris@87	67 import gzip
Chris@87	68 fhd = gzip.open(fname, flag)
Chris@87	69 elif fname.endswith('.bz2'):
Chris@87	70 import bz2
Chris@87	71 fhd = bz2.BZ2File(fname)
Chris@87	72 else:
Chris@87	73 fhd = file(fname, flag)
Chris@87	74 opened = True
Chris@87	75 elif hasattr(fname, 'seek'):
Chris@87	76 fhd = fname
Chris@87	77 opened = False
Chris@87	78 else:
Chris@87	79 raise ValueError('fname must be a string or file handle')
Chris@87	80 if return_opened:
Chris@87	81 return fhd, opened
Chris@87	82 return fhd
Chris@87	83
Chris@87	84
Chris@87	85 def has_nested_fields(ndtype):
Chris@87	86 """
Chris@87	87 Returns whether one or several fields of a dtype are nested.
Chris@87	88
Chris@87	89 Parameters
Chris@87	90 ----------
Chris@87	91 ndtype : dtype
Chris@87	92 Data-type of a structured array.
Chris@87	93
Chris@87	94 Raises
Chris@87	95 ------
Chris@87	96 AttributeError
Chris@87	97 If `ndtype` does not have a `names` attribute.
Chris@87	98
Chris@87	99 Examples
Chris@87	100 --------
Chris@87	101 >>> dt = np.dtype([('name', 'S4'), ('x', float), ('y', float)])
Chris@87	102 >>> np.lib._iotools.has_nested_fields(dt)
Chris@87	103 False
Chris@87	104
Chris@87	105 """
Chris@87	106 for name in ndtype.names or ():
Chris@87	107 if ndtype[name].names:
Chris@87	108 return True
Chris@87	109 return False
Chris@87	110
Chris@87	111
Chris@87	112 def flatten_dtype(ndtype, flatten_base=False):
Chris@87	113 """
Chris@87	114 Unpack a structured data-type by collapsing nested fields and/or fields
Chris@87	115 with a shape.
Chris@87	116
Chris@87	117 Note that the field names are lost.
Chris@87	118
Chris@87	119 Parameters
Chris@87	120 ----------
Chris@87	121 ndtype : dtype
Chris@87	122 The datatype to collapse
Chris@87	123 flatten_base : {False, True}, optional
Chris@87	124 Whether to transform a field with a shape into several fields or not.
Chris@87	125
Chris@87	126 Examples
Chris@87	127 --------
Chris@87	128 >>> dt = np.dtype([('name', 'S4'), ('x', float), ('y', float),
Chris@87	129 ... ('block', int, (2, 3))])
Chris@87	130 >>> np.lib._iotools.flatten_dtype(dt)
Chris@87	131 [dtype('\|S4'), dtype('float64'), dtype('float64'), dtype('int32')]
Chris@87	132 >>> np.lib._iotools.flatten_dtype(dt, flatten_base=True)
Chris@87	133 [dtype('\|S4'), dtype('float64'), dtype('float64'), dtype('int32'),
Chris@87	134 dtype('int32'), dtype('int32'), dtype('int32'), dtype('int32'),
Chris@87	135 dtype('int32')]
Chris@87	136
Chris@87	137 """
Chris@87	138 names = ndtype.names
Chris@87	139 if names is None:
Chris@87	140 if flatten_base:
Chris@87	141 return [ndtype.base] * int(np.prod(ndtype.shape))
Chris@87	142 return [ndtype.base]
Chris@87	143 else:
Chris@87	144 types = []
Chris@87	145 for field in names:
Chris@87	146 info = ndtype.fields[field]
Chris@87	147 flat_dt = flatten_dtype(info[0], flatten_base)
Chris@87	148 types.extend(flat_dt)
Chris@87	149 return types
Chris@87	150
Chris@87	151
Chris@87	152 class LineSplitter(object):
Chris@87	153 """
Chris@87	154 Object to split a string at a given delimiter or at given places.
Chris@87	155
Chris@87	156 Parameters
Chris@87	157 ----------
Chris@87	158 delimiter : str, int, or sequence of ints, optional
Chris@87	159 If a string, character used to delimit consecutive fields.
Chris@87	160 If an integer or a sequence of integers, width(s) of each field.
Chris@87	161 comment : str, optional
Chris@87	162 Character used to mark the beginning of a comment. Default is '#'.
Chris@87	163 autostrip : bool, optional
Chris@87	164 Whether to strip each individual field. Default is True.
Chris@87	165
Chris@87	166 """
Chris@87	167
Chris@87	168 def autostrip(self, method):
Chris@87	169 """
Chris@87	170 Wrapper to strip each member of the output of `method`.
Chris@87	171
Chris@87	172 Parameters
Chris@87	173 ----------
Chris@87	174 method : function
Chris@87	175 Function that takes a single argument and returns a sequence of
Chris@87	176 strings.
Chris@87	177
Chris@87	178 Returns
Chris@87	179 -------
Chris@87	180 wrapped : function
Chris@87	181 The result of wrapping `method`. `wrapped` takes a single input
Chris@87	182 argument and returns a list of strings that are stripped of
Chris@87	183 white-space.
Chris@87	184
Chris@87	185 """
Chris@87	186 return lambda input: [_.strip() for _ in method(input)]
Chris@87	187 #
Chris@87	188
Chris@87	189 def __init__(self, delimiter=None, comments=asbytes('#'), autostrip=True):
Chris@87	190 self.comments = comments
Chris@87	191 # Delimiter is a character
Chris@87	192 if isinstance(delimiter, unicode):
Chris@87	193 delimiter = delimiter.encode('ascii')
Chris@87	194 if (delimiter is None) or _is_bytes_like(delimiter):
Chris@87	195 delimiter = delimiter or None
Chris@87	196 _handyman = self._delimited_splitter
Chris@87	197 # Delimiter is a list of field widths
Chris@87	198 elif hasattr(delimiter, '__iter__'):
Chris@87	199 _handyman = self._variablewidth_splitter
Chris@87	200 idx = np.cumsum([0] + list(delimiter))
Chris@87	201 delimiter = [slice(i, j) for (i, j) in zip(idx[:-1], idx[1:])]
Chris@87	202 # Delimiter is a single integer
Chris@87	203 elif int(delimiter):
Chris@87	204 (_handyman, delimiter) = (
Chris@87	205 self._fixedwidth_splitter, int(delimiter))
Chris@87	206 else:
Chris@87	207 (_handyman, delimiter) = (self._delimited_splitter, None)
Chris@87	208 self.delimiter = delimiter
Chris@87	209 if autostrip:
Chris@87	210 self._handyman = self.autostrip(_handyman)
Chris@87	211 else:
Chris@87	212 self._handyman = _handyman
Chris@87	213 #
Chris@87	214
Chris@87	215 def _delimited_splitter(self, line):
Chris@87	216 if self.comments is not None:
Chris@87	217 line = line.split(self.comments)[0]
Chris@87	218 line = line.strip(asbytes(" \r\n"))
Chris@87	219 if not line:
Chris@87	220 return []
Chris@87	221 return line.split(self.delimiter)
Chris@87	222 #
Chris@87	223
Chris@87	224 def _fixedwidth_splitter(self, line):
Chris@87	225 if self.comments is not None:
Chris@87	226 line = line.split(self.comments)[0]
Chris@87	227 line = line.strip(asbytes("\r\n"))
Chris@87	228 if not line:
Chris@87	229 return []
Chris@87	230 fixed = self.delimiter
Chris@87	231 slices = [slice(i, i + fixed) for i in range(0, len(line), fixed)]
Chris@87	232 return [line[s] for s in slices]
Chris@87	233 #
Chris@87	234
Chris@87	235 def _variablewidth_splitter(self, line):
Chris@87	236 if self.comments is not None:
Chris@87	237 line = line.split(self.comments)[0]
Chris@87	238 if not line:
Chris@87	239 return []
Chris@87	240 slices = self.delimiter
Chris@87	241 return [line[s] for s in slices]
Chris@87	242 #
Chris@87	243
Chris@87	244 def __call__(self, line):
Chris@87	245 return self._handyman(line)
Chris@87	246
Chris@87	247
Chris@87	248 class NameValidator(object):
Chris@87	249 """
Chris@87	250 Object to validate a list of strings to use as field names.
Chris@87	251
Chris@87	252 The strings are stripped of any non alphanumeric character, and spaces
Chris@87	253 are replaced by '_'. During instantiation, the user can define a list
Chris@87	254 of names to exclude, as well as a list of invalid characters. Names in
Chris@87	255 the exclusion list are appended a '_' character.
Chris@87	256
Chris@87	257 Once an instance has been created, it can be called with a list of
Chris@87	258 names, and a list of valid names will be created. The `__call__`
Chris@87	259 method accepts an optional keyword "default" that sets the default name
Chris@87	260 in case of ambiguity. By default this is 'f', so that names will
Chris@87	261 default to `f0`, `f1`, etc.
Chris@87	262
Chris@87	263 Parameters
Chris@87	264 ----------
Chris@87	265 excludelist : sequence, optional
Chris@87	266 A list of names to exclude. This list is appended to the default
Chris@87	267 list ['return', 'file', 'print']. Excluded names are appended an
Chris@87	268 underscore: for example, `file` becomes `file_` if supplied.
Chris@87	269 deletechars : str, optional
Chris@87	270 A string combining invalid characters that must be deleted from the
Chris@87	271 names.
Chris@87	272 casesensitive : {True, False, 'upper', 'lower'}, optional
Chris@87	273 * If True, field names are case-sensitive.
Chris@87	274 * If False or 'upper', field names are converted to upper case.
Chris@87	275 * If 'lower', field names are converted to lower case.
Chris@87	276
Chris@87	277 The default value is True.
Chris@87	278 replace_space : '_', optional
Chris@87	279 Character(s) used in replacement of white spaces.
Chris@87	280
Chris@87	281 Notes
Chris@87	282 -----
Chris@87	283 Calling an instance of `NameValidator` is the same as calling its
Chris@87	284 method `validate`.
Chris@87	285
Chris@87	286 Examples
Chris@87	287 --------
Chris@87	288 >>> validator = np.lib._iotools.NameValidator()
Chris@87	289 >>> validator(['file', 'field2', 'with space', 'CaSe'])
Chris@87	290 ['file_', 'field2', 'with_space', 'CaSe']
Chris@87	291
Chris@87	292 >>> validator = np.lib._iotools.NameValidator(excludelist=['excl'],
Chris@87	293 deletechars='q',
Chris@87	294 case_sensitive='False')
Chris@87	295 >>> validator(['excl', 'field2', 'no_q', 'with space', 'CaSe'])
Chris@87	296 ['excl_', 'field2', 'no_', 'with_space', 'case']
Chris@87	297
Chris@87	298 """
Chris@87	299 #
Chris@87	300 defaultexcludelist = ['return', 'file', 'print']
Chris@87	301 defaultdeletechars = set("""~!@#$%^&*()-=+~\\|]}[{';: /?.>,<""")
Chris@87	302 #
Chris@87	303
Chris@87	304 def __init__(self, excludelist=None, deletechars=None,
Chris@87	305 case_sensitive=None, replace_space='_'):
Chris@87	306 # Process the exclusion list ..
Chris@87	307 if excludelist is None:
Chris@87	308 excludelist = []
Chris@87	309 excludelist.extend(self.defaultexcludelist)
Chris@87	310 self.excludelist = excludelist
Chris@87	311 # Process the list of characters to delete
Chris@87	312 if deletechars is None:
Chris@87	313 delete = self.defaultdeletechars
Chris@87	314 else:
Chris@87	315 delete = set(deletechars)
Chris@87	316 delete.add('"')
Chris@87	317 self.deletechars = delete
Chris@87	318 # Process the case option .....
Chris@87	319 if (case_sensitive is None) or (case_sensitive is True):
Chris@87	320 self.case_converter = lambda x: x
Chris@87	321 elif (case_sensitive is False) or ('u' in case_sensitive):
Chris@87	322 self.case_converter = lambda x: x.upper()
Chris@87	323 elif 'l' in case_sensitive:
Chris@87	324 self.case_converter = lambda x: x.lower()
Chris@87	325 else:
Chris@87	326 self.case_converter = lambda x: x
Chris@87	327 #
Chris@87	328 self.replace_space = replace_space
Chris@87	329
Chris@87	330 def validate(self, names, defaultfmt="f%i", nbfields=None):
Chris@87	331 """
Chris@87	332 Validate a list of strings as field names for a structured array.
Chris@87	333
Chris@87	334 Parameters
Chris@87	335 ----------
Chris@87	336 names : sequence of str
Chris@87	337 Strings to be validated.
Chris@87	338 defaultfmt : str, optional
Chris@87	339 Default format string, used if validating a given string
Chris@87	340 reduces its length to zero.
Chris@87	341 nboutput : integer, optional
Chris@87	342 Final number of validated names, used to expand or shrink the
Chris@87	343 initial list of names.
Chris@87	344
Chris@87	345 Returns
Chris@87	346 -------
Chris@87	347 validatednames : list of str
Chris@87	348 The list of validated field names.
Chris@87	349
Chris@87	350 Notes
Chris@87	351 -----
Chris@87	352 A `NameValidator` instance can be called directly, which is the
Chris@87	353 same as calling `validate`. For examples, see `NameValidator`.
Chris@87	354
Chris@87	355 """
Chris@87	356 # Initial checks ..............
Chris@87	357 if (names is None):
Chris@87	358 if (nbfields is None):
Chris@87	359 return None
Chris@87	360 names = []
Chris@87	361 if isinstance(names, basestring):
Chris@87	362 names = [names, ]
Chris@87	363 if nbfields is not None:
Chris@87	364 nbnames = len(names)
Chris@87	365 if (nbnames < nbfields):
Chris@87	366 names = list(names) + [''] * (nbfields - nbnames)
Chris@87	367 elif (nbnames > nbfields):
Chris@87	368 names = names[:nbfields]
Chris@87	369 # Set some shortcuts ...........
Chris@87	370 deletechars = self.deletechars
Chris@87	371 excludelist = self.excludelist
Chris@87	372 case_converter = self.case_converter
Chris@87	373 replace_space = self.replace_space
Chris@87	374 # Initializes some variables ...
Chris@87	375 validatednames = []
Chris@87	376 seen = dict()
Chris@87	377 nbempty = 0
Chris@87	378 #
Chris@87	379 for item in names:
Chris@87	380 item = case_converter(item).strip()
Chris@87	381 if replace_space:
Chris@87	382 item = item.replace(' ', replace_space)
Chris@87	383 item = ''.join([c for c in item if c not in deletechars])
Chris@87	384 if item == '':
Chris@87	385 item = defaultfmt % nbempty
Chris@87	386 while item in names:
Chris@87	387 nbempty += 1
Chris@87	388 item = defaultfmt % nbempty
Chris@87	389 nbempty += 1
Chris@87	390 elif item in excludelist:
Chris@87	391 item += '_'
Chris@87	392 cnt = seen.get(item, 0)
Chris@87	393 if cnt > 0:
Chris@87	394 validatednames.append(item + '_%d' % cnt)
Chris@87	395 else:
Chris@87	396 validatednames.append(item)
Chris@87	397 seen[item] = cnt + 1
Chris@87	398 return tuple(validatednames)
Chris@87	399 #
Chris@87	400
Chris@87	401 def __call__(self, names, defaultfmt="f%i", nbfields=None):
Chris@87	402 return self.validate(names, defaultfmt=defaultfmt, nbfields=nbfields)
Chris@87	403
Chris@87	404
Chris@87	405 def str2bool(value):
Chris@87	406 """
Chris@87	407 Tries to transform a string supposed to represent a boolean to a boolean.
Chris@87	408
Chris@87	409 Parameters
Chris@87	410 ----------
Chris@87	411 value : str
Chris@87	412 The string that is transformed to a boolean.
Chris@87	413
Chris@87	414 Returns
Chris@87	415 -------
Chris@87	416 boolval : bool
Chris@87	417 The boolean representation of `value`.
Chris@87	418
Chris@87	419 Raises
Chris@87	420 ------
Chris@87	421 ValueError
Chris@87	422 If the string is not 'True' or 'False' (case independent)
Chris@87	423
Chris@87	424 Examples
Chris@87	425 --------
Chris@87	426 >>> np.lib._iotools.str2bool('TRUE')
Chris@87	427 True
Chris@87	428 >>> np.lib._iotools.str2bool('false')
Chris@87	429 False
Chris@87	430
Chris@87	431 """
Chris@87	432 value = value.upper()
Chris@87	433 if value == asbytes('TRUE'):
Chris@87	434 return True
Chris@87	435 elif value == asbytes('FALSE'):
Chris@87	436 return False
Chris@87	437 else:
Chris@87	438 raise ValueError("Invalid boolean")
Chris@87	439
Chris@87	440
Chris@87	441 class ConverterError(Exception):
Chris@87	442 """
Chris@87	443 Exception raised when an error occurs in a converter for string values.
Chris@87	444
Chris@87	445 """
Chris@87	446 pass
Chris@87	447
Chris@87	448 class ConverterLockError(ConverterError):
Chris@87	449 """
Chris@87	450 Exception raised when an attempt is made to upgrade a locked converter.
Chris@87	451
Chris@87	452 """
Chris@87	453 pass
Chris@87	454
Chris@87	455 class ConversionWarning(UserWarning):
Chris@87	456 """
Chris@87	457 Warning issued when a string converter has a problem.
Chris@87	458
Chris@87	459 Notes
Chris@87	460 -----
Chris@87	461 In `genfromtxt` a `ConversionWarning` is issued if raising exceptions
Chris@87	462 is explicitly suppressed with the "invalid_raise" keyword.
Chris@87	463
Chris@87	464 """
Chris@87	465 pass
Chris@87	466
Chris@87	467
Chris@87	468 class StringConverter(object):
Chris@87	469 """
Chris@87	470 Factory class for function transforming a string into another object
Chris@87	471 (int, float).
Chris@87	472
Chris@87	473 After initialization, an instance can be called to transform a string
Chris@87	474 into another object. If the string is recognized as representing a
Chris@87	475 missing value, a default value is returned.
Chris@87	476
Chris@87	477 Attributes
Chris@87	478 ----------
Chris@87	479 func : function
Chris@87	480 Function used for the conversion.
Chris@87	481 default : any
Chris@87	482 Default value to return when the input corresponds to a missing
Chris@87	483 value.
Chris@87	484 type : type
Chris@87	485 Type of the output.
Chris@87	486 _status : int
Chris@87	487 Integer representing the order of the conversion.
Chris@87	488 _mapper : sequence of tuples
Chris@87	489 Sequence of tuples (dtype, function, default value) to evaluate in
Chris@87	490 order.
Chris@87	491 _locked : bool
Chris@87	492 Holds `locked` parameter.
Chris@87	493
Chris@87	494 Parameters
Chris@87	495 ----------
Chris@87	496 dtype_or_func : {None, dtype, function}, optional
Chris@87	497 If a `dtype`, specifies the input data type, used to define a basic
Chris@87	498 function and a default value for missing data. For example, when
Chris@87	499 `dtype` is float, the `func` attribute is set to `float` and the
Chris@87	500 default value to `np.nan`. If a function, this function is used to
Chris@87	501 convert a string to another object. In this case, it is recommended
Chris@87	502 to give an associated default value as input.
Chris@87	503 default : any, optional
Chris@87	504 Value to return by default, that is, when the string to be
Chris@87	505 converted is flagged as missing. If not given, `StringConverter`
Chris@87	506 tries to supply a reasonable default value.
Chris@87	507 missing_values : sequence of str, optional
Chris@87	508 Sequence of strings indicating a missing value.
Chris@87	509 locked : bool, optional
Chris@87	510 Whether the StringConverter should be locked to prevent automatic
Chris@87	511 upgrade or not. Default is False.
Chris@87	512
Chris@87	513 """
Chris@87	514 #
Chris@87	515 _mapper = [(nx.bool_, str2bool, False),
Chris@87	516 (nx.integer, int, -1),
Chris@87	517 (nx.floating, float, nx.nan),
Chris@87	518 (complex, _bytes_to_complex, nx.nan + 0j),
Chris@87	519 (nx.string_, bytes, asbytes('???'))]
Chris@87	520 (_defaulttype, _defaultfunc, _defaultfill) = zip(*_mapper)
Chris@87	521 #
Chris@87	522
Chris@87	523 @classmethod
Chris@87	524 def _getdtype(cls, val):
Chris@87	525 """Returns the dtype of the input variable."""
Chris@87	526 return np.array(val).dtype
Chris@87	527 #
Chris@87	528
Chris@87	529 @classmethod
Chris@87	530 def _getsubdtype(cls, val):
Chris@87	531 """Returns the type of the dtype of the input variable."""
Chris@87	532 return np.array(val).dtype.type
Chris@87	533 #
Chris@87	534 # This is a bit annoying. We want to return the "general" type in most
Chris@87	535 # cases (ie. "string" rather than "S10"), but we want to return the
Chris@87	536 # specific type for datetime64 (ie. "datetime64[us]" rather than
Chris@87	537 # "datetime64").
Chris@87	538
Chris@87	539 @classmethod
Chris@87	540 def _dtypeortype(cls, dtype):
Chris@87	541 """Returns dtype for datetime64 and type of dtype otherwise."""
Chris@87	542 if dtype.type == np.datetime64:
Chris@87	543 return dtype
Chris@87	544 return dtype.type
Chris@87	545 #
Chris@87	546
Chris@87	547 @classmethod
Chris@87	548 def upgrade_mapper(cls, func, default=None):
Chris@87	549 """
Chris@87	550 Upgrade the mapper of a StringConverter by adding a new function and
Chris@87	551 its corresponding default.
Chris@87	552
Chris@87	553 The input function (or sequence of functions) and its associated
Chris@87	554 default value (if any) is inserted in penultimate position of the
Chris@87	555 mapper. The corresponding type is estimated from the dtype of the
Chris@87	556 default value.
Chris@87	557
Chris@87	558 Parameters
Chris@87	559 ----------
Chris@87	560 func : var
Chris@87	561 Function, or sequence of functions
Chris@87	562
Chris@87	563 Examples
Chris@87	564 --------
Chris@87	565 >>> import dateutil.parser
Chris@87	566 >>> import datetime
Chris@87	567 >>> dateparser = datetustil.parser.parse
Chris@87	568 >>> defaultdate = datetime.date(2000, 1, 1)
Chris@87	569 >>> StringConverter.upgrade_mapper(dateparser, default=defaultdate)
Chris@87	570 """
Chris@87	571 # Func is a single functions
Chris@87	572 if hasattr(func, '__call__'):
Chris@87	573 cls._mapper.insert(-1, (cls._getsubdtype(default), func, default))
Chris@87	574 return
Chris@87	575 elif hasattr(func, '__iter__'):
Chris@87	576 if isinstance(func[0], (tuple, list)):
Chris@87	577 for _ in func:
Chris@87	578 cls._mapper.insert(-1, _)
Chris@87	579 return
Chris@87	580 if default is None:
Chris@87	581 default = [None] * len(func)
Chris@87	582 else:
Chris@87	583 default = list(default)
Chris@87	584 default.append([None] * (len(func) - len(default)))
Chris@87	585 for (fct, dft) in zip(func, default):
Chris@87	586 cls._mapper.insert(-1, (cls._getsubdtype(dft), fct, dft))
Chris@87	587 #
Chris@87	588
Chris@87	589 def __init__(self, dtype_or_func=None, default=None, missing_values=None,
Chris@87	590 locked=False):
Chris@87	591 # Convert unicode (for Py3)
Chris@87	592 if isinstance(missing_values, unicode):
Chris@87	593 missing_values = asbytes(missing_values)
Chris@87	594 elif isinstance(missing_values, (list, tuple)):
Chris@87	595 missing_values = asbytes_nested(missing_values)
Chris@87	596 # Defines a lock for upgrade
Chris@87	597 self._locked = bool(locked)
Chris@87	598 # No input dtype: minimal initialization
Chris@87	599 if dtype_or_func is None:
Chris@87	600 self.func = str2bool
Chris@87	601 self._status = 0
Chris@87	602 self.default = default or False
Chris@87	603 dtype = np.dtype('bool')
Chris@87	604 else:
Chris@87	605 # Is the input a np.dtype ?
Chris@87	606 try:
Chris@87	607 self.func = None
Chris@87	608 dtype = np.dtype(dtype_or_func)
Chris@87	609 except TypeError:
Chris@87	610 # dtype_or_func must be a function, then
Chris@87	611 if not hasattr(dtype_or_func, '__call__'):
Chris@87	612 errmsg = ("The input argument `dtype` is neither a"
Chris@87	613 " function nor a dtype (got '%s' instead)")
Chris@87	614 raise TypeError(errmsg % type(dtype_or_func))
Chris@87	615 # Set the function
Chris@87	616 self.func = dtype_or_func
Chris@87	617 # If we don't have a default, try to guess it or set it to
Chris@87	618 # None
Chris@87	619 if default is None:
Chris@87	620 try:
Chris@87	621 default = self.func(asbytes('0'))
Chris@87	622 except ValueError:
Chris@87	623 default = None
Chris@87	624 dtype = self._getdtype(default)
Chris@87	625 # Set the status according to the dtype
Chris@87	626 _status = -1
Chris@87	627 for (i, (deftype, func, default_def)) in enumerate(self._mapper):
Chris@87	628 if np.issubdtype(dtype.type, deftype):
Chris@87	629 _status = i
Chris@87	630 if default is None:
Chris@87	631 self.default = default_def
Chris@87	632 else:
Chris@87	633 self.default = default
Chris@87	634 break
Chris@87	635 if _status == -1:
Chris@87	636 # We never found a match in the _mapper...
Chris@87	637 _status = 0
Chris@87	638 self.default = default
Chris@87	639 self._status = _status
Chris@87	640 # If the input was a dtype, set the function to the last we saw
Chris@87	641 if self.func is None:
Chris@87	642 self.func = func
Chris@87	643 # If the status is 1 (int), change the function to
Chris@87	644 # something more robust.
Chris@87	645 if self.func == self._mapper[1][1]:
Chris@87	646 if issubclass(dtype.type, np.uint64):
Chris@87	647 self.func = np.uint64
Chris@87	648 elif issubclass(dtype.type, np.int64):
Chris@87	649 self.func = np.int64
Chris@87	650 else:
Chris@87	651 self.func = lambda x: int(float(x))
Chris@87	652 # Store the list of strings corresponding to missing values.
Chris@87	653 if missing_values is None:
Chris@87	654 self.missing_values = set([asbytes('')])
Chris@87	655 else:
Chris@87	656 if isinstance(missing_values, bytes):
Chris@87	657 missing_values = missing_values.split(asbytes(","))
Chris@87	658 self.missing_values = set(list(missing_values) + [asbytes('')])
Chris@87	659 #
Chris@87	660 self._callingfunction = self._strict_call
Chris@87	661 self.type = self._dtypeortype(dtype)
Chris@87	662 self._checked = False
Chris@87	663 self._initial_default = default
Chris@87	664 #
Chris@87	665
Chris@87	666 def _loose_call(self, value):
Chris@87	667 try:
Chris@87	668 return self.func(value)
Chris@87	669 except ValueError:
Chris@87	670 return self.default
Chris@87	671 #
Chris@87	672
Chris@87	673 def _strict_call(self, value):
Chris@87	674 try:
Chris@87	675 return self.func(value)
Chris@87	676 except ValueError:
Chris@87	677 if value.strip() in self.missing_values:
Chris@87	678 if not self._status:
Chris@87	679 self._checked = False
Chris@87	680 return self.default
Chris@87	681 raise ValueError("Cannot convert string '%s'" % value)
Chris@87	682 #
Chris@87	683
Chris@87	684 def __call__(self, value):
Chris@87	685 return self._callingfunction(value)
Chris@87	686 #
Chris@87	687
Chris@87	688 def upgrade(self, value):
Chris@87	689 """
Chris@87	690 Find the best converter for a given string, and return the result.
Chris@87	691
Chris@87	692 The supplied string `value` is converted by testing different
Chris@87	693 converters in order. First the `func` method of the
Chris@87	694 `StringConverter` instance is tried, if this fails other available
Chris@87	695 converters are tried. The order in which these other converters
Chris@87	696 are tried is determined by the `_status` attribute of the instance.
Chris@87	697
Chris@87	698 Parameters
Chris@87	699 ----------
Chris@87	700 value : str
Chris@87	701 The string to convert.
Chris@87	702
Chris@87	703 Returns
Chris@87	704 -------
Chris@87	705 out : any
Chris@87	706 The result of converting `value` with the appropriate converter.
Chris@87	707
Chris@87	708 """
Chris@87	709 self._checked = True
Chris@87	710 try:
Chris@87	711 self._strict_call(value)
Chris@87	712 except ValueError:
Chris@87	713 # Raise an exception if we locked the converter...
Chris@87	714 if self._locked:
Chris@87	715 errmsg = "Converter is locked and cannot be upgraded"
Chris@87	716 raise ConverterLockError(errmsg)
Chris@87	717 _statusmax = len(self._mapper)
Chris@87	718 # Complains if we try to upgrade by the maximum
Chris@87	719 _status = self._status
Chris@87	720 if _status == _statusmax:
Chris@87	721 errmsg = "Could not find a valid conversion function"
Chris@87	722 raise ConverterError(errmsg)
Chris@87	723 elif _status < _statusmax - 1:
Chris@87	724 _status += 1
Chris@87	725 (self.type, self.func, default) = self._mapper[_status]
Chris@87	726 self._status = _status
Chris@87	727 if self._initial_default is not None:
Chris@87	728 self.default = self._initial_default
Chris@87	729 else:
Chris@87	730 self.default = default
Chris@87	731 self.upgrade(value)
Chris@87	732
Chris@87	733 def iterupgrade(self, value):
Chris@87	734 self._checked = True
Chris@87	735 if not hasattr(value, '__iter__'):
Chris@87	736 value = (value,)
Chris@87	737 _strict_call = self._strict_call
Chris@87	738 try:
Chris@87	739 for _m in value:
Chris@87	740 _strict_call(_m)
Chris@87	741 except ValueError:
Chris@87	742 # Raise an exception if we locked the converter...
Chris@87	743 if self._locked:
Chris@87	744 errmsg = "Converter is locked and cannot be upgraded"
Chris@87	745 raise ConverterLockError(errmsg)
Chris@87	746 _statusmax = len(self._mapper)
Chris@87	747 # Complains if we try to upgrade by the maximum
Chris@87	748 _status = self._status
Chris@87	749 if _status == _statusmax:
Chris@87	750 raise ConverterError(
Chris@87	751 "Could not find a valid conversion function"
Chris@87	752 )
Chris@87	753 elif _status < _statusmax - 1:
Chris@87	754 _status += 1
Chris@87	755 (self.type, self.func, default) = self._mapper[_status]
Chris@87	756 if self._initial_default is not None:
Chris@87	757 self.default = self._initial_default
Chris@87	758 else:
Chris@87	759 self.default = default
Chris@87	760 self._status = _status
Chris@87	761 self.iterupgrade(value)
Chris@87	762
Chris@87	763 def update(self, func, default=None, testing_value=None,
Chris@87	764 missing_values=asbytes(''), locked=False):
Chris@87	765 """
Chris@87	766 Set StringConverter attributes directly.
Chris@87	767
Chris@87	768 Parameters
Chris@87	769 ----------
Chris@87	770 func : function
Chris@87	771 Conversion function.
Chris@87	772 default : any, optional
Chris@87	773 Value to return by default, that is, when the string to be
Chris@87	774 converted is flagged as missing. If not given,
Chris@87	775 `StringConverter` tries to supply a reasonable default value.
Chris@87	776 testing_value : str, optional
Chris@87	777 A string representing a standard input value of the converter.
Chris@87	778 This string is used to help defining a reasonable default
Chris@87	779 value.
Chris@87	780 missing_values : sequence of str, optional
Chris@87	781 Sequence of strings indicating a missing value.
Chris@87	782 locked : bool, optional
Chris@87	783 Whether the StringConverter should be locked to prevent
Chris@87	784 automatic upgrade or not. Default is False.
Chris@87	785
Chris@87	786 Notes
Chris@87	787 -----
Chris@87	788 `update` takes the same parameters as the constructor of
Chris@87	789 `StringConverter`, except that `func` does not accept a `dtype`
Chris@87	790 whereas `dtype_or_func` in the constructor does.
Chris@87	791
Chris@87	792 """
Chris@87	793 self.func = func
Chris@87	794 self._locked = locked
Chris@87	795 # Don't reset the default to None if we can avoid it
Chris@87	796 if default is not None:
Chris@87	797 self.default = default
Chris@87	798 self.type = self._dtypeortype(self._getdtype(default))
Chris@87	799 else:
Chris@87	800 try:
Chris@87	801 tester = func(testing_value or asbytes('1'))
Chris@87	802 except (TypeError, ValueError):
Chris@87	803 tester = None
Chris@87	804 self.type = self._dtypeortype(self._getdtype(tester))
Chris@87	805 # Add the missing values to the existing set
Chris@87	806 if missing_values is not None:
Chris@87	807 if _is_bytes_like(missing_values):
Chris@87	808 self.missing_values.add(missing_values)
Chris@87	809 elif hasattr(missing_values, '__iter__'):
Chris@87	810 for val in missing_values:
Chris@87	811 self.missing_values.add(val)
Chris@87	812 else:
Chris@87	813 self.missing_values = []
Chris@87	814
Chris@87	815
Chris@87	816 def easy_dtype(ndtype, names=None, defaultfmt="f%i", **validationargs):
Chris@87	817 """
Chris@87	818 Convenience function to create a `np.dtype` object.
Chris@87	819
Chris@87	820 The function processes the input `dtype` and matches it with the given
Chris@87	821 names.
Chris@87	822
Chris@87	823 Parameters
Chris@87	824 ----------
Chris@87	825 ndtype : var
Chris@87	826 Definition of the dtype. Can be any string or dictionary recognized
Chris@87	827 by the `np.dtype` function, or a sequence of types.
Chris@87	828 names : str or sequence, optional
Chris@87	829 Sequence of strings to use as field names for a structured dtype.
Chris@87	830 For convenience, `names` can be a string of a comma-separated list
Chris@87	831 of names.
Chris@87	832 defaultfmt : str, optional
Chris@87	833 Format string used to define missing names, such as ``"f%i"``
Chris@87	834 (default) or ``"fields_%02i"``.
Chris@87	835 validationargs : optional
Chris@87	836 A series of optional arguments used to initialize a
Chris@87	837 `NameValidator`.
Chris@87	838
Chris@87	839 Examples
Chris@87	840 --------
Chris@87	841 >>> np.lib._iotools.easy_dtype(float)
Chris@87	842 dtype('float64')
Chris@87	843 >>> np.lib._iotools.easy_dtype("i4, f8")
Chris@87	844 dtype([('f0', '<i4'), ('f1', '<f8')])
Chris@87	845 >>> np.lib._iotools.easy_dtype("i4, f8", defaultfmt="field_%03i")
Chris@87	846 dtype([('field_000', '<i4'), ('field_001', '<f8')])
Chris@87	847
Chris@87	848 >>> np.lib._iotools.easy_dtype((int, float, float), names="a,b,c")
Chris@87	849 dtype([('a', '<i8'), ('b', '<f8'), ('c', '<f8')])
Chris@87	850 >>> np.lib._iotools.easy_dtype(float, names="a,b,c")
Chris@87	851 dtype([('a', '<f8'), ('b', '<f8'), ('c', '<f8')])
Chris@87	852
Chris@87	853 """
Chris@87	854 try:
Chris@87	855 ndtype = np.dtype(ndtype)
Chris@87	856 except TypeError:
Chris@87	857 validate = NameValidator(**validationargs)
Chris@87	858 nbfields = len(ndtype)
Chris@87	859 if names is None:
Chris@87	860 names = [''] * len(ndtype)
Chris@87	861 elif isinstance(names, basestring):
Chris@87	862 names = names.split(",")
Chris@87	863 names = validate(names, nbfields=nbfields, defaultfmt=defaultfmt)
Chris@87	864 ndtype = np.dtype(dict(formats=ndtype, names=names))
Chris@87	865 else:
Chris@87	866 nbtypes = len(ndtype)
Chris@87	867 # Explicit names
Chris@87	868 if names is not None:
Chris@87	869 validate = NameValidator(**validationargs)
Chris@87	870 if isinstance(names, basestring):
Chris@87	871 names = names.split(",")
Chris@87	872 # Simple dtype: repeat to match the nb of names
Chris@87	873 if nbtypes == 0:
Chris@87	874 formats = tuple([ndtype.type] * len(names))
Chris@87	875 names = validate(names, defaultfmt=defaultfmt)
Chris@87	876 ndtype = np.dtype(list(zip(names, formats)))
Chris@87	877 # Structured dtype: just validate the names as needed
Chris@87	878 else:
Chris@87	879 ndtype.names = validate(names, nbfields=nbtypes,
Chris@87	880 defaultfmt=defaultfmt)
Chris@87	881 # No implicit names
Chris@87	882 elif (nbtypes > 0):
Chris@87	883 validate = NameValidator(**validationargs)
Chris@87	884 # Default initial names : should we change the format ?
Chris@87	885 if ((ndtype.names == tuple("f%i" % i for i in range(nbtypes))) and
Chris@87	886 (defaultfmt != "f%i")):
Chris@87	887 ndtype.names = validate([''] * nbtypes, defaultfmt=defaultfmt)
Chris@87	888 # Explicit initial names : just validate
Chris@87	889 else:
Chris@87	890 ndtype.names = validate(ndtype.names, defaultfmt=defaultfmt)
Chris@87	891 return ndtype

Mercurial > hg > vamp-build-and-test

annotate DEPENDENCIES/mingw32/Python27/Lib/site-packages/numpy/lib/_iotools.py @ 133:4acb5d8d80b6 tip