vamp-build-and-test: DEPENDENCIES/mingw32/Python27/Lib/site-packages/numpy/lib/npyio.py annotate

annotate DEPENDENCIES/mingw32/Python27/Lib/site-packages/numpy/lib/npyio.py @ 133:4acb5d8d80b6 tip

Don't fail environmental check if README.md exists (but .txt and no-suffix don't)

author	Chris Cannam
date	Tue, 30 Jul 2019 12:25:44 +0100
parents	2a2c65a20a8b
children

rev	line source
Chris@87	1 from __future__ import division, absolute_import, print_function
Chris@87	2
Chris@87	3 import sys
Chris@87	4 import os
Chris@87	5 import re
Chris@87	6 import itertools
Chris@87	7 import warnings
Chris@87	8 import weakref
Chris@87	9 from operator import itemgetter
Chris@87	10
Chris@87	11 import numpy as np
Chris@87	12 from . import format
Chris@87	13 from ._datasource import DataSource
Chris@87	14 from ._compiled_base import packbits, unpackbits
Chris@87	15 from ._iotools import (
Chris@87	16 LineSplitter, NameValidator, StringConverter, ConverterError,
Chris@87	17 ConverterLockError, ConversionWarning, _is_string_like, has_nested_fields,
Chris@87	18 flatten_dtype, easy_dtype, _bytes_to_name
Chris@87	19 )
Chris@87	20
Chris@87	21 from numpy.compat import (
Chris@87	22 asbytes, asstr, asbytes_nested, bytes, basestring, unicode
Chris@87	23 )
Chris@87	24
Chris@87	25 if sys.version_info[0] >= 3:
Chris@87	26 import pickle
Chris@87	27 else:
Chris@87	28 import cPickle as pickle
Chris@87	29 from future_builtins import map
Chris@87	30
Chris@87	31 loads = pickle.loads
Chris@87	32
Chris@87	33 __all__ = [
Chris@87	34 'savetxt', 'loadtxt', 'genfromtxt', 'ndfromtxt', 'mafromtxt',
Chris@87	35 'recfromtxt', 'recfromcsv', 'load', 'loads', 'save', 'savez',
Chris@87	36 'savez_compressed', 'packbits', 'unpackbits', 'fromregex', 'DataSource'
Chris@87	37 ]
Chris@87	38
Chris@87	39
Chris@87	40 def seek_gzip_factory(f):
Chris@87	41 """Use this factory to produce the class so that we can do a lazy
Chris@87	42 import on gzip.
Chris@87	43
Chris@87	44 """
Chris@87	45 import gzip
Chris@87	46
Chris@87	47 class GzipFile(gzip.GzipFile):
Chris@87	48
Chris@87	49 def seek(self, offset, whence=0):
Chris@87	50 # figure out new position (we can only seek forwards)
Chris@87	51 if whence == 1:
Chris@87	52 offset = self.offset + offset
Chris@87	53
Chris@87	54 if whence not in [0, 1]:
Chris@87	55 raise IOError("Illegal argument")
Chris@87	56
Chris@87	57 if offset < self.offset:
Chris@87	58 # for negative seek, rewind and do positive seek
Chris@87	59 self.rewind()
Chris@87	60 count = offset - self.offset
Chris@87	61 for i in range(count // 1024):
Chris@87	62 self.read(1024)
Chris@87	63 self.read(count % 1024)
Chris@87	64
Chris@87	65 def tell(self):
Chris@87	66 return self.offset
Chris@87	67
Chris@87	68 if isinstance(f, str):
Chris@87	69 f = GzipFile(f)
Chris@87	70 elif isinstance(f, gzip.GzipFile):
Chris@87	71 # cast to our GzipFile if its already a gzip.GzipFile
Chris@87	72
Chris@87	73 try:
Chris@87	74 name = f.name
Chris@87	75 except AttributeError:
Chris@87	76 # Backward compatibility for <= 2.5
Chris@87	77 name = f.filename
Chris@87	78 mode = f.mode
Chris@87	79
Chris@87	80 f = GzipFile(fileobj=f.fileobj, filename=name)
Chris@87	81 f.mode = mode
Chris@87	82
Chris@87	83 return f
Chris@87	84
Chris@87	85
Chris@87	86 class BagObj(object):
Chris@87	87 """
Chris@87	88 BagObj(obj)
Chris@87	89
Chris@87	90 Convert attribute look-ups to getitems on the object passed in.
Chris@87	91
Chris@87	92 Parameters
Chris@87	93 ----------
Chris@87	94 obj : class instance
Chris@87	95 Object on which attribute look-up is performed.
Chris@87	96
Chris@87	97 Examples
Chris@87	98 --------
Chris@87	99 >>> from numpy.lib.npyio import BagObj as BO
Chris@87	100 >>> class BagDemo(object):
Chris@87	101 ... def __getitem__(self, key): # An instance of BagObj(BagDemo)
Chris@87	102 ... # will call this method when any
Chris@87	103 ... # attribute look-up is required
Chris@87	104 ... result = "Doesn't matter what you want, "
Chris@87	105 ... return result + "you're gonna get this"
Chris@87	106 ...
Chris@87	107 >>> demo_obj = BagDemo()
Chris@87	108 >>> bagobj = BO(demo_obj)
Chris@87	109 >>> bagobj.hello_there
Chris@87	110 "Doesn't matter what you want, you're gonna get this"
Chris@87	111 >>> bagobj.I_can_be_anything
Chris@87	112 "Doesn't matter what you want, you're gonna get this"
Chris@87	113
Chris@87	114 """
Chris@87	115
Chris@87	116 def __init__(self, obj):
Chris@87	117 # Use weakref to make NpzFile objects collectable by refcount
Chris@87	118 self._obj = weakref.proxy(obj)
Chris@87	119
Chris@87	120 def __getattribute__(self, key):
Chris@87	121 try:
Chris@87	122 return object.__getattribute__(self, '_obj')[key]
Chris@87	123 except KeyError:
Chris@87	124 raise AttributeError(key)
Chris@87	125
Chris@87	126
Chris@87	127 def zipfile_factory(args, *kwargs):
Chris@87	128 import zipfile
Chris@87	129 kwargs['allowZip64'] = True
Chris@87	130 return zipfile.ZipFile(args, *kwargs)
Chris@87	131
Chris@87	132
Chris@87	133 class NpzFile(object):
Chris@87	134 """
Chris@87	135 NpzFile(fid)
Chris@87	136
Chris@87	137 A dictionary-like object with lazy-loading of files in the zipped
Chris@87	138 archive provided on construction.
Chris@87	139
Chris@87	140 `NpzFile` is used to load files in the NumPy ``.npz`` data archive
Chris@87	141 format. It assumes that files in the archive have a ``.npy`` extension,
Chris@87	142 other files are ignored.
Chris@87	143
Chris@87	144 The arrays and file strings are lazily loaded on either
Chris@87	145 getitem access using ``obj['key']`` or attribute lookup using
Chris@87	146 ``obj.f.key``. A list of all files (without ``.npy`` extensions) can
Chris@87	147 be obtained with ``obj.files`` and the ZipFile object itself using
Chris@87	148 ``obj.zip``.
Chris@87	149
Chris@87	150 Attributes
Chris@87	151 ----------
Chris@87	152 files : list of str
Chris@87	153 List of all files in the archive with a ``.npy`` extension.
Chris@87	154 zip : ZipFile instance
Chris@87	155 The ZipFile object initialized with the zipped archive.
Chris@87	156 f : BagObj instance
Chris@87	157 An object on which attribute can be performed as an alternative
Chris@87	158 to getitem access on the `NpzFile` instance itself.
Chris@87	159
Chris@87	160 Parameters
Chris@87	161 ----------
Chris@87	162 fid : file or str
Chris@87	163 The zipped archive to open. This is either a file-like object
Chris@87	164 or a string containing the path to the archive.
Chris@87	165 own_fid : bool, optional
Chris@87	166 Whether NpzFile should close the file handle.
Chris@87	167 Requires that `fid` is a file-like object.
Chris@87	168
Chris@87	169 Examples
Chris@87	170 --------
Chris@87	171 >>> from tempfile import TemporaryFile
Chris@87	172 >>> outfile = TemporaryFile()
Chris@87	173 >>> x = np.arange(10)
Chris@87	174 >>> y = np.sin(x)
Chris@87	175 >>> np.savez(outfile, x=x, y=y)
Chris@87	176 >>> outfile.seek(0)
Chris@87	177
Chris@87	178 >>> npz = np.load(outfile)
Chris@87	179 >>> isinstance(npz, np.lib.io.NpzFile)
Chris@87	180 True
Chris@87	181 >>> npz.files
Chris@87	182 ['y', 'x']
Chris@87	183 >>> npz['x'] # getitem access
Chris@87	184 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
Chris@87	185 >>> npz.f.x # attribute lookup
Chris@87	186 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
Chris@87	187
Chris@87	188 """
Chris@87	189
Chris@87	190 def __init__(self, fid, own_fid=False):
Chris@87	191 # Import is postponed to here since zipfile depends on gzip, an
Chris@87	192 # optional component of the so-called standard library.
Chris@87	193 _zip = zipfile_factory(fid)
Chris@87	194 self._files = _zip.namelist()
Chris@87	195 self.files = []
Chris@87	196 for x in self._files:
Chris@87	197 if x.endswith('.npy'):
Chris@87	198 self.files.append(x[:-4])
Chris@87	199 else:
Chris@87	200 self.files.append(x)
Chris@87	201 self.zip = _zip
Chris@87	202 self.f = BagObj(self)
Chris@87	203 if own_fid:
Chris@87	204 self.fid = fid
Chris@87	205 else:
Chris@87	206 self.fid = None
Chris@87	207
Chris@87	208 def __enter__(self):
Chris@87	209 return self
Chris@87	210
Chris@87	211 def __exit__(self, exc_type, exc_value, traceback):
Chris@87	212 self.close()
Chris@87	213
Chris@87	214 def close(self):
Chris@87	215 """
Chris@87	216 Close the file.
Chris@87	217
Chris@87	218 """
Chris@87	219 if self.zip is not None:
Chris@87	220 self.zip.close()
Chris@87	221 self.zip = None
Chris@87	222 if self.fid is not None:
Chris@87	223 self.fid.close()
Chris@87	224 self.fid = None
Chris@87	225 self.f = None # break reference cycle
Chris@87	226
Chris@87	227 def __del__(self):
Chris@87	228 self.close()
Chris@87	229
Chris@87	230 def __getitem__(self, key):
Chris@87	231 # FIXME: This seems like it will copy strings around
Chris@87	232 # more than is strictly necessary. The zipfile
Chris@87	233 # will read the string and then
Chris@87	234 # the format.read_array will copy the string
Chris@87	235 # to another place in memory.
Chris@87	236 # It would be better if the zipfile could read
Chris@87	237 # (or at least uncompress) the data
Chris@87	238 # directly into the array memory.
Chris@87	239 member = 0
Chris@87	240 if key in self._files:
Chris@87	241 member = 1
Chris@87	242 elif key in self.files:
Chris@87	243 member = 1
Chris@87	244 key += '.npy'
Chris@87	245 if member:
Chris@87	246 bytes = self.zip.open(key)
Chris@87	247 magic = bytes.read(len(format.MAGIC_PREFIX))
Chris@87	248 bytes.close()
Chris@87	249 if magic == format.MAGIC_PREFIX:
Chris@87	250 bytes = self.zip.open(key)
Chris@87	251 return format.read_array(bytes)
Chris@87	252 else:
Chris@87	253 return self.zip.read(key)
Chris@87	254 else:
Chris@87	255 raise KeyError("%s is not a file in the archive" % key)
Chris@87	256
Chris@87	257 def __iter__(self):
Chris@87	258 return iter(self.files)
Chris@87	259
Chris@87	260 def items(self):
Chris@87	261 """
Chris@87	262 Return a list of tuples, with each tuple (filename, array in file).
Chris@87	263
Chris@87	264 """
Chris@87	265 return [(f, self[f]) for f in self.files]
Chris@87	266
Chris@87	267 def iteritems(self):
Chris@87	268 """Generator that returns tuples (filename, array in file)."""
Chris@87	269 for f in self.files:
Chris@87	270 yield (f, self[f])
Chris@87	271
Chris@87	272 def keys(self):
Chris@87	273 """Return files in the archive with a ``.npy`` extension."""
Chris@87	274 return self.files
Chris@87	275
Chris@87	276 def iterkeys(self):
Chris@87	277 """Return an iterator over the files in the archive."""
Chris@87	278 return self.__iter__()
Chris@87	279
Chris@87	280 def __contains__(self, key):
Chris@87	281 return self.files.__contains__(key)
Chris@87	282
Chris@87	283
Chris@87	284 def load(file, mmap_mode=None):
Chris@87	285 """
Chris@87	286 Load arrays or pickled objects from ``.npy``, ``.npz`` or pickled files.
Chris@87	287
Chris@87	288 Parameters
Chris@87	289 ----------
Chris@87	290 file : file-like object or string
Chris@87	291 The file to read. File-like objects must support the
Chris@87	292 ``seek()`` and ``read()`` methods. Pickled files require that the
Chris@87	293 file-like object support the ``readline()`` method as well.
Chris@87	294 mmap_mode : {None, 'r+', 'r', 'w+', 'c'}, optional
Chris@87	295 If not None, then memory-map the file, using the given mode (see
Chris@87	296 `numpy.memmap` for a detailed description of the modes). A
Chris@87	297 memory-mapped array is kept on disk. However, it can be accessed
Chris@87	298 and sliced like any ndarray. Memory mapping is especially useful
Chris@87	299 for accessing small fragments of large files without reading the
Chris@87	300 entire file into memory.
Chris@87	301
Chris@87	302 Returns
Chris@87	303 -------
Chris@87	304 result : array, tuple, dict, etc.
Chris@87	305 Data stored in the file. For ``.npz`` files, the returned instance
Chris@87	306 of NpzFile class must be closed to avoid leaking file descriptors.
Chris@87	307
Chris@87	308 Raises
Chris@87	309 ------
Chris@87	310 IOError
Chris@87	311 If the input file does not exist or cannot be read.
Chris@87	312
Chris@87	313 See Also
Chris@87	314 --------
Chris@87	315 save, savez, savez_compressed, loadtxt
Chris@87	316 memmap : Create a memory-map to an array stored in a file on disk.
Chris@87	317
Chris@87	318 Notes
Chris@87	319 -----
Chris@87	320 - If the file contains pickle data, then whatever object is stored
Chris@87	321 in the pickle is returned.
Chris@87	322 - If the file is a ``.npy`` file, then a single array is returned.
Chris@87	323 - If the file is a ``.npz`` file, then a dictionary-like object is
Chris@87	324 returned, containing ``{filename: array}`` key-value pairs, one for
Chris@87	325 each file in the archive.
Chris@87	326 - If the file is a ``.npz`` file, the returned value supports the
Chris@87	327 context manager protocol in a similar fashion to the open function::
Chris@87	328
Chris@87	329 with load('foo.npz') as data:
Chris@87	330 a = data['a']
Chris@87	331
Chris@87	332 The underlying file descriptor is closed when exiting the 'with'
Chris@87	333 block.
Chris@87	334
Chris@87	335 Examples
Chris@87	336 --------
Chris@87	337 Store data to disk, and load it again:
Chris@87	338
Chris@87	339 >>> np.save('/tmp/123', np.array([[1, 2, 3], [4, 5, 6]]))
Chris@87	340 >>> np.load('/tmp/123.npy')
Chris@87	341 array([[1, 2, 3],
Chris@87	342 [4, 5, 6]])
Chris@87	343
Chris@87	344 Store compressed data to disk, and load it again:
Chris@87	345
Chris@87	346 >>> a=np.array([[1, 2, 3], [4, 5, 6]])
Chris@87	347 >>> b=np.array([1, 2])
Chris@87	348 >>> np.savez('/tmp/123.npz', a=a, b=b)
Chris@87	349 >>> data = np.load('/tmp/123.npz')
Chris@87	350 >>> data['a']
Chris@87	351 array([[1, 2, 3],
Chris@87	352 [4, 5, 6]])
Chris@87	353 >>> data['b']
Chris@87	354 array([1, 2])
Chris@87	355 >>> data.close()
Chris@87	356
Chris@87	357 Mem-map the stored array, and then access the second row
Chris@87	358 directly from disk:
Chris@87	359
Chris@87	360 >>> X = np.load('/tmp/123.npy', mmap_mode='r')
Chris@87	361 >>> X[1, :]
Chris@87	362 memmap([4, 5, 6])
Chris@87	363
Chris@87	364 """
Chris@87	365 import gzip
Chris@87	366
Chris@87	367 own_fid = False
Chris@87	368 if isinstance(file, basestring):
Chris@87	369 fid = open(file, "rb")
Chris@87	370 own_fid = True
Chris@87	371 elif isinstance(file, gzip.GzipFile):
Chris@87	372 fid = seek_gzip_factory(file)
Chris@87	373 else:
Chris@87	374 fid = file
Chris@87	375
Chris@87	376 try:
Chris@87	377 # Code to distinguish from NumPy binary files and pickles.
Chris@87	378 _ZIP_PREFIX = asbytes('PK\x03\x04')
Chris@87	379 N = len(format.MAGIC_PREFIX)
Chris@87	380 magic = fid.read(N)
Chris@87	381 fid.seek(-N, 1) # back-up
Chris@87	382 if magic.startswith(_ZIP_PREFIX):
Chris@87	383 # zip-file (assume .npz)
Chris@87	384 # Transfer file ownership to NpzFile
Chris@87	385 tmp = own_fid
Chris@87	386 own_fid = False
Chris@87	387 return NpzFile(fid, own_fid=tmp)
Chris@87	388 elif magic == format.MAGIC_PREFIX:
Chris@87	389 # .npy file
Chris@87	390 if mmap_mode:
Chris@87	391 return format.open_memmap(file, mode=mmap_mode)
Chris@87	392 else:
Chris@87	393 return format.read_array(fid)
Chris@87	394 else:
Chris@87	395 # Try a pickle
Chris@87	396 try:
Chris@87	397 return pickle.load(fid)
Chris@87	398 except:
Chris@87	399 raise IOError(
Chris@87	400 "Failed to interpret file %s as a pickle" % repr(file))
Chris@87	401 finally:
Chris@87	402 if own_fid:
Chris@87	403 fid.close()
Chris@87	404
Chris@87	405
Chris@87	406 def save(file, arr):
Chris@87	407 """
Chris@87	408 Save an array to a binary file in NumPy ``.npy`` format.
Chris@87	409
Chris@87	410 Parameters
Chris@87	411 ----------
Chris@87	412 file : file or str
Chris@87	413 File or filename to which the data is saved. If file is a file-object,
Chris@87	414 then the filename is unchanged. If file is a string, a ``.npy``
Chris@87	415 extension will be appended to the file name if it does not already
Chris@87	416 have one.
Chris@87	417 arr : array_like
Chris@87	418 Array data to be saved.
Chris@87	419
Chris@87	420 See Also
Chris@87	421 --------
Chris@87	422 savez : Save several arrays into a ``.npz`` archive
Chris@87	423 savetxt, load
Chris@87	424
Chris@87	425 Notes
Chris@87	426 -----
Chris@87	427 For a description of the ``.npy`` format, see `format`.
Chris@87	428
Chris@87	429 Examples
Chris@87	430 --------
Chris@87	431 >>> from tempfile import TemporaryFile
Chris@87	432 >>> outfile = TemporaryFile()
Chris@87	433
Chris@87	434 >>> x = np.arange(10)
Chris@87	435 >>> np.save(outfile, x)
Chris@87	436
Chris@87	437 >>> outfile.seek(0) # Only needed here to simulate closing & reopening file
Chris@87	438 >>> np.load(outfile)
Chris@87	439 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
Chris@87	440
Chris@87	441 """
Chris@87	442 own_fid = False
Chris@87	443 if isinstance(file, basestring):
Chris@87	444 if not file.endswith('.npy'):
Chris@87	445 file = file + '.npy'
Chris@87	446 fid = open(file, "wb")
Chris@87	447 own_fid = True
Chris@87	448 else:
Chris@87	449 fid = file
Chris@87	450
Chris@87	451 try:
Chris@87	452 arr = np.asanyarray(arr)
Chris@87	453 format.write_array(fid, arr)
Chris@87	454 finally:
Chris@87	455 if own_fid:
Chris@87	456 fid.close()
Chris@87	457
Chris@87	458
Chris@87	459 def savez(file, args, *kwds):
Chris@87	460 """
Chris@87	461 Save several arrays into a single file in uncompressed ``.npz`` format.
Chris@87	462
Chris@87	463 If arguments are passed in with no keywords, the corresponding variable
Chris@87	464 names, in the ``.npz`` file, are 'arr_0', 'arr_1', etc. If keyword
Chris@87	465 arguments are given, the corresponding variable names, in the ``.npz``
Chris@87	466 file will match the keyword names.
Chris@87	467
Chris@87	468 Parameters
Chris@87	469 ----------
Chris@87	470 file : str or file
Chris@87	471 Either the file name (string) or an open file (file-like object)
Chris@87	472 where the data will be saved. If file is a string, the ``.npz``
Chris@87	473 extension will be appended to the file name if it is not already there.
Chris@87	474 args : Arguments, optional
Chris@87	475 Arrays to save to the file. Since it is not possible for Python to
Chris@87	476 know the names of the arrays outside `savez`, the arrays will be saved
Chris@87	477 with names "arr_0", "arr_1", and so on. These arguments can be any
Chris@87	478 expression.
Chris@87	479 kwds : Keyword arguments, optional
Chris@87	480 Arrays to save to the file. Arrays will be saved in the file with the
Chris@87	481 keyword names.
Chris@87	482
Chris@87	483 Returns
Chris@87	484 -------
Chris@87	485 None
Chris@87	486
Chris@87	487 See Also
Chris@87	488 --------
Chris@87	489 save : Save a single array to a binary file in NumPy format.
Chris@87	490 savetxt : Save an array to a file as plain text.
Chris@87	491 savez_compressed : Save several arrays into a compressed ``.npz`` archive
Chris@87	492
Chris@87	493 Notes
Chris@87	494 -----
Chris@87	495 The ``.npz`` file format is a zipped archive of files named after the
Chris@87	496 variables they contain. The archive is not compressed and each file
Chris@87	497 in the archive contains one variable in ``.npy`` format. For a
Chris@87	498 description of the ``.npy`` format, see `format`.
Chris@87	499
Chris@87	500 When opening the saved ``.npz`` file with `load` a `NpzFile` object is
Chris@87	501 returned. This is a dictionary-like object which can be queried for
Chris@87	502 its list of arrays (with the ``.files`` attribute), and for the arrays
Chris@87	503 themselves.
Chris@87	504
Chris@87	505 Examples
Chris@87	506 --------
Chris@87	507 >>> from tempfile import TemporaryFile
Chris@87	508 >>> outfile = TemporaryFile()
Chris@87	509 >>> x = np.arange(10)
Chris@87	510 >>> y = np.sin(x)
Chris@87	511
Chris@87	512 Using `savez` with \\*args, the arrays are saved with default names.
Chris@87	513
Chris@87	514 >>> np.savez(outfile, x, y)
Chris@87	515 >>> outfile.seek(0) # Only needed here to simulate closing & reopening file
Chris@87	516 >>> npzfile = np.load(outfile)
Chris@87	517 >>> npzfile.files
Chris@87	518 ['arr_1', 'arr_0']
Chris@87	519 >>> npzfile['arr_0']
Chris@87	520 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
Chris@87	521
Chris@87	522 Using `savez` with \\**kwds, the arrays are saved with the keyword names.
Chris@87	523
Chris@87	524 >>> outfile = TemporaryFile()
Chris@87	525 >>> np.savez(outfile, x=x, y=y)
Chris@87	526 >>> outfile.seek(0)
Chris@87	527 >>> npzfile = np.load(outfile)
Chris@87	528 >>> npzfile.files
Chris@87	529 ['y', 'x']
Chris@87	530 >>> npzfile['x']
Chris@87	531 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
Chris@87	532
Chris@87	533 """
Chris@87	534 _savez(file, args, kwds, False)
Chris@87	535
Chris@87	536
Chris@87	537 def savez_compressed(file, args, *kwds):
Chris@87	538 """
Chris@87	539 Save several arrays into a single file in compressed ``.npz`` format.
Chris@87	540
Chris@87	541 If keyword arguments are given, then filenames are taken from the keywords.
Chris@87	542 If arguments are passed in with no keywords, then stored file names are
Chris@87	543 arr_0, arr_1, etc.
Chris@87	544
Chris@87	545 Parameters
Chris@87	546 ----------
Chris@87	547 file : str
Chris@87	548 File name of ``.npz`` file.
Chris@87	549 args : Arguments
Chris@87	550 Function arguments.
Chris@87	551 kwds : Keyword arguments
Chris@87	552 Keywords.
Chris@87	553
Chris@87	554 See Also
Chris@87	555 --------
Chris@87	556 numpy.savez : Save several arrays into an uncompressed ``.npz`` file format
Chris@87	557 numpy.load : Load the files created by savez_compressed.
Chris@87	558
Chris@87	559 """
Chris@87	560 _savez(file, args, kwds, True)
Chris@87	561
Chris@87	562
Chris@87	563 def _savez(file, args, kwds, compress):
Chris@87	564 # Import is postponed to here since zipfile depends on gzip, an optional
Chris@87	565 # component of the so-called standard library.
Chris@87	566 import zipfile
Chris@87	567 # Import deferred for startup time improvement
Chris@87	568 import tempfile
Chris@87	569
Chris@87	570 if isinstance(file, basestring):
Chris@87	571 if not file.endswith('.npz'):
Chris@87	572 file = file + '.npz'
Chris@87	573
Chris@87	574 namedict = kwds
Chris@87	575 for i, val in enumerate(args):
Chris@87	576 key = 'arr_%d' % i
Chris@87	577 if key in namedict.keys():
Chris@87	578 raise ValueError(
Chris@87	579 "Cannot use un-named variables and keyword %s" % key)
Chris@87	580 namedict[key] = val
Chris@87	581
Chris@87	582 if compress:
Chris@87	583 compression = zipfile.ZIP_DEFLATED
Chris@87	584 else:
Chris@87	585 compression = zipfile.ZIP_STORED
Chris@87	586
Chris@87	587 zipf = zipfile_factory(file, mode="w", compression=compression)
Chris@87	588
Chris@87	589 # Stage arrays in a temporary file on disk, before writing to zip.
Chris@87	590 fd, tmpfile = tempfile.mkstemp(suffix='-numpy.npy')
Chris@87	591 os.close(fd)
Chris@87	592 try:
Chris@87	593 for key, val in namedict.items():
Chris@87	594 fname = key + '.npy'
Chris@87	595 fid = open(tmpfile, 'wb')
Chris@87	596 try:
Chris@87	597 format.write_array(fid, np.asanyarray(val))
Chris@87	598 fid.close()
Chris@87	599 fid = None
Chris@87	600 zipf.write(tmpfile, arcname=fname)
Chris@87	601 finally:
Chris@87	602 if fid:
Chris@87	603 fid.close()
Chris@87	604 finally:
Chris@87	605 os.remove(tmpfile)
Chris@87	606
Chris@87	607 zipf.close()
Chris@87	608
Chris@87	609
Chris@87	610 def _getconv(dtype):
Chris@87	611 """ Find the correct dtype converter. Adapted from matplotlib """
Chris@87	612 typ = dtype.type
Chris@87	613 if issubclass(typ, np.bool_):
Chris@87	614 return lambda x: bool(int(x))
Chris@87	615 if issubclass(typ, np.uint64):
Chris@87	616 return np.uint64
Chris@87	617 if issubclass(typ, np.int64):
Chris@87	618 return np.int64
Chris@87	619 if issubclass(typ, np.integer):
Chris@87	620 return lambda x: int(float(x))
Chris@87	621 elif issubclass(typ, np.floating):
Chris@87	622 return float
Chris@87	623 elif issubclass(typ, np.complex):
Chris@87	624 return complex
Chris@87	625 elif issubclass(typ, np.bytes_):
Chris@87	626 return bytes
Chris@87	627 else:
Chris@87	628 return str
Chris@87	629
Chris@87	630
Chris@87	631 def loadtxt(fname, dtype=float, comments='#', delimiter=None,
Chris@87	632 converters=None, skiprows=0, usecols=None, unpack=False,
Chris@87	633 ndmin=0):
Chris@87	634 """
Chris@87	635 Load data from a text file.
Chris@87	636
Chris@87	637 Each row in the text file must have the same number of values.
Chris@87	638
Chris@87	639 Parameters
Chris@87	640 ----------
Chris@87	641 fname : file or str
Chris@87	642 File, filename, or generator to read. If the filename extension is
Chris@87	643 ``.gz`` or ``.bz2``, the file is first decompressed. Note that
Chris@87	644 generators should return byte strings for Python 3k.
Chris@87	645 dtype : data-type, optional
Chris@87	646 Data-type of the resulting array; default: float. If this is a
Chris@87	647 record data-type, the resulting array will be 1-dimensional, and
Chris@87	648 each row will be interpreted as an element of the array. In this
Chris@87	649 case, the number of columns used must match the number of fields in
Chris@87	650 the data-type.
Chris@87	651 comments : str, optional
Chris@87	652 The character used to indicate the start of a comment;
Chris@87	653 default: '#'.
Chris@87	654 delimiter : str, optional
Chris@87	655 The string used to separate values. By default, this is any
Chris@87	656 whitespace.
Chris@87	657 converters : dict, optional
Chris@87	658 A dictionary mapping column number to a function that will convert
Chris@87	659 that column to a float. E.g., if column 0 is a date string:
Chris@87	660 ``converters = {0: datestr2num}``. Converters can also be used to
Chris@87	661 provide a default value for missing data (but see also `genfromtxt`):
Chris@87	662 ``converters = {3: lambda s: float(s.strip() or 0)}``. Default: None.
Chris@87	663 skiprows : int, optional
Chris@87	664 Skip the first `skiprows` lines; default: 0.
Chris@87	665 usecols : sequence, optional
Chris@87	666 Which columns to read, with 0 being the first. For example,
Chris@87	667 ``usecols = (1,4,5)`` will extract the 2nd, 5th and 6th columns.
Chris@87	668 The default, None, results in all columns being read.
Chris@87	669 unpack : bool, optional
Chris@87	670 If True, the returned array is transposed, so that arguments may be
Chris@87	671 unpacked using ``x, y, z = loadtxt(...)``. When used with a record
Chris@87	672 data-type, arrays are returned for each field. Default is False.
Chris@87	673 ndmin : int, optional
Chris@87	674 The returned array will have at least `ndmin` dimensions.
Chris@87	675 Otherwise mono-dimensional axes will be squeezed.
Chris@87	676 Legal values: 0 (default), 1 or 2.
Chris@87	677
Chris@87	678 .. versionadded:: 1.6.0
Chris@87	679
Chris@87	680 Returns
Chris@87	681 -------
Chris@87	682 out : ndarray
Chris@87	683 Data read from the text file.
Chris@87	684
Chris@87	685 See Also
Chris@87	686 --------
Chris@87	687 load, fromstring, fromregex
Chris@87	688 genfromtxt : Load data with missing values handled as specified.
Chris@87	689 scipy.io.loadmat : reads MATLAB data files
Chris@87	690
Chris@87	691 Notes
Chris@87	692 -----
Chris@87	693 This function aims to be a fast reader for simply formatted files. The
Chris@87	694 `genfromtxt` function provides more sophisticated handling of, e.g.,
Chris@87	695 lines with missing values.
Chris@87	696
Chris@87	697 Examples
Chris@87	698 --------
Chris@87	699 >>> from StringIO import StringIO # StringIO behaves like a file object
Chris@87	700 >>> c = StringIO("0 1\\n2 3")
Chris@87	701 >>> np.loadtxt(c)
Chris@87	702 array([[ 0., 1.],
Chris@87	703 [ 2., 3.]])
Chris@87	704
Chris@87	705 >>> d = StringIO("M 21 72\\nF 35 58")
Chris@87	706 >>> np.loadtxt(d, dtype={'names': ('gender', 'age', 'weight'),
Chris@87	707 ... 'formats': ('S1', 'i4', 'f4')})
Chris@87	708 array([('M', 21, 72.0), ('F', 35, 58.0)],
Chris@87	709 dtype=[('gender', '\|S1'), ('age', '<i4'), ('weight', '<f4')])
Chris@87	710
Chris@87	711 >>> c = StringIO("1,0,2\\n3,0,4")
Chris@87	712 >>> x, y = np.loadtxt(c, delimiter=',', usecols=(0, 2), unpack=True)
Chris@87	713 >>> x
Chris@87	714 array([ 1., 3.])
Chris@87	715 >>> y
Chris@87	716 array([ 2., 4.])
Chris@87	717
Chris@87	718 """
Chris@87	719 # Type conversions for Py3 convenience
Chris@87	720 comments = asbytes(comments)
Chris@87	721 user_converters = converters
Chris@87	722 if delimiter is not None:
Chris@87	723 delimiter = asbytes(delimiter)
Chris@87	724 if usecols is not None:
Chris@87	725 usecols = list(usecols)
Chris@87	726
Chris@87	727 fown = False
Chris@87	728 try:
Chris@87	729 if _is_string_like(fname):
Chris@87	730 fown = True
Chris@87	731 if fname.endswith('.gz'):
Chris@87	732 fh = iter(seek_gzip_factory(fname))
Chris@87	733 elif fname.endswith('.bz2'):
Chris@87	734 import bz2
Chris@87	735 fh = iter(bz2.BZ2File(fname))
Chris@87	736 elif sys.version_info[0] == 2:
Chris@87	737 fh = iter(open(fname, 'U'))
Chris@87	738 else:
Chris@87	739 fh = iter(open(fname))
Chris@87	740 else:
Chris@87	741 fh = iter(fname)
Chris@87	742 except TypeError:
Chris@87	743 raise ValueError('fname must be a string, file handle, or generator')
Chris@87	744 X = []
Chris@87	745
Chris@87	746 def flatten_dtype(dt):
Chris@87	747 """Unpack a structured data-type, and produce re-packing info."""
Chris@87	748 if dt.names is None:
Chris@87	749 # If the dtype is flattened, return.
Chris@87	750 # If the dtype has a shape, the dtype occurs
Chris@87	751 # in the list more than once.
Chris@87	752 shape = dt.shape
Chris@87	753 if len(shape) == 0:
Chris@87	754 return ([dt.base], None)
Chris@87	755 else:
Chris@87	756 packing = [(shape[-1], list)]
Chris@87	757 if len(shape) > 1:
Chris@87	758 for dim in dt.shape[-2::-1]:
Chris@87	759 packing = [(dimpacking[0][0], packingdim)]
Chris@87	760 return ([dt.base] * int(np.prod(dt.shape)), packing)
Chris@87	761 else:
Chris@87	762 types = []
Chris@87	763 packing = []
Chris@87	764 for field in dt.names:
Chris@87	765 tp, bytes = dt.fields[field]
Chris@87	766 flat_dt, flat_packing = flatten_dtype(tp)
Chris@87	767 types.extend(flat_dt)
Chris@87	768 # Avoid extra nesting for subarrays
Chris@87	769 if len(tp.shape) > 0:
Chris@87	770 packing.extend(flat_packing)
Chris@87	771 else:
Chris@87	772 packing.append((len(flat_dt), flat_packing))
Chris@87	773 return (types, packing)
Chris@87	774
Chris@87	775 def pack_items(items, packing):
Chris@87	776 """Pack items into nested lists based on re-packing info."""
Chris@87	777 if packing is None:
Chris@87	778 return items[0]
Chris@87	779 elif packing is tuple:
Chris@87	780 return tuple(items)
Chris@87	781 elif packing is list:
Chris@87	782 return list(items)
Chris@87	783 else:
Chris@87	784 start = 0
Chris@87	785 ret = []
Chris@87	786 for length, subpacking in packing:
Chris@87	787 ret.append(pack_items(items[start:start+length], subpacking))
Chris@87	788 start += length
Chris@87	789 return tuple(ret)
Chris@87	790
Chris@87	791 def split_line(line):
Chris@87	792 """Chop off comments, strip, and split at delimiter."""
Chris@87	793 line = asbytes(line).split(comments)[0].strip(asbytes('\r\n'))
Chris@87	794 if line:
Chris@87	795 return line.split(delimiter)
Chris@87	796 else:
Chris@87	797 return []
Chris@87	798
Chris@87	799 try:
Chris@87	800 # Make sure we're dealing with a proper dtype
Chris@87	801 dtype = np.dtype(dtype)
Chris@87	802 defconv = _getconv(dtype)
Chris@87	803
Chris@87	804 # Skip the first `skiprows` lines
Chris@87	805 for i in range(skiprows):
Chris@87	806 next(fh)
Chris@87	807
Chris@87	808 # Read until we find a line with some values, and use
Chris@87	809 # it to estimate the number of columns, N.
Chris@87	810 first_vals = None
Chris@87	811 try:
Chris@87	812 while not first_vals:
Chris@87	813 first_line = next(fh)
Chris@87	814 first_vals = split_line(first_line)
Chris@87	815 except StopIteration:
Chris@87	816 # End of lines reached
Chris@87	817 first_line = ''
Chris@87	818 first_vals = []
Chris@87	819 warnings.warn('loadtxt: Empty input file: "%s"' % fname)
Chris@87	820 N = len(usecols or first_vals)
Chris@87	821
Chris@87	822 dtype_types, packing = flatten_dtype(dtype)
Chris@87	823 if len(dtype_types) > 1:
Chris@87	824 # We're dealing with a structured array, each field of
Chris@87	825 # the dtype matches a column
Chris@87	826 converters = [_getconv(dt) for dt in dtype_types]
Chris@87	827 else:
Chris@87	828 # All fields have the same dtype
Chris@87	829 converters = [defconv for i in range(N)]
Chris@87	830 if N > 1:
Chris@87	831 packing = [(N, tuple)]
Chris@87	832
Chris@87	833 # By preference, use the converters specified by the user
Chris@87	834 for i, conv in (user_converters or {}).items():
Chris@87	835 if usecols:
Chris@87	836 try:
Chris@87	837 i = usecols.index(i)
Chris@87	838 except ValueError:
Chris@87	839 # Unused converter specified
Chris@87	840 continue
Chris@87	841 converters[i] = conv
Chris@87	842
Chris@87	843 # Parse each line, including the first
Chris@87	844 for i, line in enumerate(itertools.chain([first_line], fh)):
Chris@87	845 vals = split_line(line)
Chris@87	846 if len(vals) == 0:
Chris@87	847 continue
Chris@87	848 if usecols:
Chris@87	849 vals = [vals[i] for i in usecols]
Chris@87	850 if len(vals) != N:
Chris@87	851 line_num = i + skiprows + 1
Chris@87	852 raise ValueError("Wrong number of columns at line %d"
Chris@87	853 % line_num)
Chris@87	854
Chris@87	855 # Convert each value according to its column and store
Chris@87	856 items = [conv(val) for (conv, val) in zip(converters, vals)]
Chris@87	857 # Then pack it according to the dtype's nesting
Chris@87	858 items = pack_items(items, packing)
Chris@87	859 X.append(items)
Chris@87	860 finally:
Chris@87	861 if fown:
Chris@87	862 fh.close()
Chris@87	863
Chris@87	864 X = np.array(X, dtype)
Chris@87	865 # Multicolumn data are returned with shape (1, N, M), i.e.
Chris@87	866 # (1, 1, M) for a single row - remove the singleton dimension there
Chris@87	867 if X.ndim == 3 and X.shape[:2] == (1, 1):
Chris@87	868 X.shape = (1, -1)
Chris@87	869
Chris@87	870 # Verify that the array has at least dimensions `ndmin`.
Chris@87	871 # Check correctness of the values of `ndmin`
Chris@87	872 if ndmin not in [0, 1, 2]:
Chris@87	873 raise ValueError('Illegal value of ndmin keyword: %s' % ndmin)
Chris@87	874 # Tweak the size and shape of the arrays - remove extraneous dimensions
Chris@87	875 if X.ndim > ndmin:
Chris@87	876 X = np.squeeze(X)
Chris@87	877 # and ensure we have the minimum number of dimensions asked for
Chris@87	878 # - has to be in this order for the odd case ndmin=1, X.squeeze().ndim=0
Chris@87	879 if X.ndim < ndmin:
Chris@87	880 if ndmin == 1:
Chris@87	881 X = np.atleast_1d(X)
Chris@87	882 elif ndmin == 2:
Chris@87	883 X = np.atleast_2d(X).T
Chris@87	884
Chris@87	885 if unpack:
Chris@87	886 if len(dtype_types) > 1:
Chris@87	887 # For structured arrays, return an array for each field.
Chris@87	888 return [X[field] for field in dtype.names]
Chris@87	889 else:
Chris@87	890 return X.T
Chris@87	891 else:
Chris@87	892 return X
Chris@87	893
Chris@87	894
Chris@87	895 def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='',
Chris@87	896 footer='', comments='# '):
Chris@87	897 """
Chris@87	898 Save an array to a text file.
Chris@87	899
Chris@87	900 Parameters
Chris@87	901 ----------
Chris@87	902 fname : filename or file handle
Chris@87	903 If the filename ends in ``.gz``, the file is automatically saved in
Chris@87	904 compressed gzip format. `loadtxt` understands gzipped files
Chris@87	905 transparently.
Chris@87	906 X : array_like
Chris@87	907 Data to be saved to a text file.
Chris@87	908 fmt : str or sequence of strs, optional
Chris@87	909 A single format (%10.5f), a sequence of formats, or a
Chris@87	910 multi-format string, e.g. 'Iteration %d -- %10.5f', in which
Chris@87	911 case `delimiter` is ignored. For complex `X`, the legal options
Chris@87	912 for `fmt` are:
Chris@87	913 a) a single specifier, `fmt='%.4e'`, resulting in numbers formatted
Chris@87	914 like `' (%s+%sj)' % (fmt, fmt)`
Chris@87	915 b) a full string specifying every real and imaginary part, e.g.
Chris@87	916 `' %.4e %+.4j %.4e %+.4j %.4e %+.4j'` for 3 columns
Chris@87	917 c) a list of specifiers, one per column - in this case, the real
Chris@87	918 and imaginary part must have separate specifiers,
Chris@87	919 e.g. `['%.3e + %.3ej', '(%.15e%+.15ej)']` for 2 columns
Chris@87	920 delimiter : str, optional
Chris@87	921 String or character separating columns.
Chris@87	922 newline : str, optional
Chris@87	923 String or character separating lines.
Chris@87	924
Chris@87	925 .. versionadded:: 1.5.0
Chris@87	926 header : str, optional
Chris@87	927 String that will be written at the beginning of the file.
Chris@87	928
Chris@87	929 .. versionadded:: 1.7.0
Chris@87	930 footer : str, optional
Chris@87	931 String that will be written at the end of the file.
Chris@87	932
Chris@87	933 .. versionadded:: 1.7.0
Chris@87	934 comments : str, optional
Chris@87	935 String that will be prepended to the ``header`` and ``footer`` strings,
Chris@87	936 to mark them as comments. Default: '# ', as expected by e.g.
Chris@87	937 ``numpy.loadtxt``.
Chris@87	938
Chris@87	939 .. versionadded:: 1.7.0
Chris@87	940
Chris@87	941
Chris@87	942 See Also
Chris@87	943 --------
Chris@87	944 save : Save an array to a binary file in NumPy ``.npy`` format
Chris@87	945 savez : Save several arrays into an uncompressed ``.npz`` archive
Chris@87	946 savez_compressed : Save several arrays into a compressed ``.npz`` archive
Chris@87	947
Chris@87	948 Notes
Chris@87	949 -----
Chris@87	950 Further explanation of the `fmt` parameter
Chris@87	951 (``%[flag]width[.precision]specifier``):
Chris@87	952
Chris@87	953 flags:
Chris@87	954 ``-`` : left justify
Chris@87	955
Chris@87	956 ``+`` : Forces to precede result with + or -.
Chris@87	957
Chris@87	958 ``0`` : Left pad the number with zeros instead of space (see width).
Chris@87	959
Chris@87	960 width:
Chris@87	961 Minimum number of characters to be printed. The value is not truncated
Chris@87	962 if it has more characters.
Chris@87	963
Chris@87	964 precision:
Chris@87	965 - For integer specifiers (eg. ``d,i,o,x``), the minimum number of
Chris@87	966 digits.
Chris@87	967 - For ``e, E`` and ``f`` specifiers, the number of digits to print
Chris@87	968 after the decimal point.
Chris@87	969 - For ``g`` and ``G``, the maximum number of significant digits.
Chris@87	970 - For ``s``, the maximum number of characters.
Chris@87	971
Chris@87	972 specifiers:
Chris@87	973 ``c`` : character
Chris@87	974
Chris@87	975 ``d`` or ``i`` : signed decimal integer
Chris@87	976
Chris@87	977 ``e`` or ``E`` : scientific notation with ``e`` or ``E``.
Chris@87	978
Chris@87	979 ``f`` : decimal floating point
Chris@87	980
Chris@87	981 ``g,G`` : use the shorter of ``e,E`` or ``f``
Chris@87	982
Chris@87	983 ``o`` : signed octal
Chris@87	984
Chris@87	985 ``s`` : string of characters
Chris@87	986
Chris@87	987 ``u`` : unsigned decimal integer
Chris@87	988
Chris@87	989 ``x,X`` : unsigned hexadecimal integer
Chris@87	990
Chris@87	991 This explanation of ``fmt`` is not complete, for an exhaustive
Chris@87	992 specification see [1]_.
Chris@87	993
Chris@87	994 References
Chris@87	995 ----------
Chris@87	996 .. [1] `Format Specification Mini-Language
Chris@87	997 <http://docs.python.org/library/string.html#
Chris@87	998 format-specification-mini-language>`_, Python Documentation.
Chris@87	999
Chris@87	1000 Examples
Chris@87	1001 --------
Chris@87	1002 >>> x = y = z = np.arange(0.0,5.0,1.0)
Chris@87	1003 >>> np.savetxt('test.out', x, delimiter=',') # X is an array
Chris@87	1004 >>> np.savetxt('test.out', (x,y,z)) # x,y,z equal sized 1D arrays
Chris@87	1005 >>> np.savetxt('test.out', x, fmt='%1.4e') # use exponential notation
Chris@87	1006
Chris@87	1007 """
Chris@87	1008
Chris@87	1009 # Py3 conversions first
Chris@87	1010 if isinstance(fmt, bytes):
Chris@87	1011 fmt = asstr(fmt)
Chris@87	1012 delimiter = asstr(delimiter)
Chris@87	1013
Chris@87	1014 own_fh = False
Chris@87	1015 if _is_string_like(fname):
Chris@87	1016 own_fh = True
Chris@87	1017 if fname.endswith('.gz'):
Chris@87	1018 import gzip
Chris@87	1019 fh = gzip.open(fname, 'wb')
Chris@87	1020 else:
Chris@87	1021 if sys.version_info[0] >= 3:
Chris@87	1022 fh = open(fname, 'wb')
Chris@87	1023 else:
Chris@87	1024 fh = open(fname, 'w')
Chris@87	1025 elif hasattr(fname, 'write'):
Chris@87	1026 fh = fname
Chris@87	1027 else:
Chris@87	1028 raise ValueError('fname must be a string or file handle')
Chris@87	1029
Chris@87	1030 try:
Chris@87	1031 X = np.asarray(X)
Chris@87	1032
Chris@87	1033 # Handle 1-dimensional arrays
Chris@87	1034 if X.ndim == 1:
Chris@87	1035 # Common case -- 1d array of numbers
Chris@87	1036 if X.dtype.names is None:
Chris@87	1037 X = np.atleast_2d(X).T
Chris@87	1038 ncol = 1
Chris@87	1039
Chris@87	1040 # Complex dtype -- each field indicates a separate column
Chris@87	1041 else:
Chris@87	1042 ncol = len(X.dtype.descr)
Chris@87	1043 else:
Chris@87	1044 ncol = X.shape[1]
Chris@87	1045
Chris@87	1046 iscomplex_X = np.iscomplexobj(X)
Chris@87	1047 # `fmt` can be a string with multiple insertion points or a
Chris@87	1048 # list of formats. E.g. '%10.5f\t%10d' or ('%10.5f', '$10d')
Chris@87	1049 if type(fmt) in (list, tuple):
Chris@87	1050 if len(fmt) != ncol:
Chris@87	1051 raise AttributeError('fmt has wrong shape. %s' % str(fmt))
Chris@87	1052 format = asstr(delimiter).join(map(asstr, fmt))
Chris@87	1053 elif isinstance(fmt, str):
Chris@87	1054 n_fmt_chars = fmt.count('%')
Chris@87	1055 error = ValueError('fmt has wrong number of %% formats: %s' % fmt)
Chris@87	1056 if n_fmt_chars == 1:
Chris@87	1057 if iscomplex_X:
Chris@87	1058 fmt = [' (%s+%sj)' % (fmt, fmt), ] * ncol
Chris@87	1059 else:
Chris@87	1060 fmt = [fmt, ] * ncol
Chris@87	1061 format = delimiter.join(fmt)
Chris@87	1062 elif iscomplex_X and n_fmt_chars != (2 * ncol):
Chris@87	1063 raise error
Chris@87	1064 elif ((not iscomplex_X) and n_fmt_chars != ncol):
Chris@87	1065 raise error
Chris@87	1066 else:
Chris@87	1067 format = fmt
Chris@87	1068 else:
Chris@87	1069 raise ValueError('invalid fmt: %r' % (fmt,))
Chris@87	1070
Chris@87	1071 if len(header) > 0:
Chris@87	1072 header = header.replace('\n', '\n' + comments)
Chris@87	1073 fh.write(asbytes(comments + header + newline))
Chris@87	1074 if iscomplex_X:
Chris@87	1075 for row in X:
Chris@87	1076 row2 = []
Chris@87	1077 for number in row:
Chris@87	1078 row2.append(number.real)
Chris@87	1079 row2.append(number.imag)
Chris@87	1080 fh.write(asbytes(format % tuple(row2) + newline))
Chris@87	1081 else:
Chris@87	1082 for row in X:
Chris@87	1083 fh.write(asbytes(format % tuple(row) + newline))
Chris@87	1084 if len(footer) > 0:
Chris@87	1085 footer = footer.replace('\n', '\n' + comments)
Chris@87	1086 fh.write(asbytes(comments + footer + newline))
Chris@87	1087 finally:
Chris@87	1088 if own_fh:
Chris@87	1089 fh.close()
Chris@87	1090
Chris@87	1091
Chris@87	1092 def fromregex(file, regexp, dtype):
Chris@87	1093 """
Chris@87	1094 Construct an array from a text file, using regular expression parsing.
Chris@87	1095
Chris@87	1096 The returned array is always a structured array, and is constructed from
Chris@87	1097 all matches of the regular expression in the file. Groups in the regular
Chris@87	1098 expression are converted to fields of the structured array.
Chris@87	1099
Chris@87	1100 Parameters
Chris@87	1101 ----------
Chris@87	1102 file : str or file
Chris@87	1103 File name or file object to read.
Chris@87	1104 regexp : str or regexp
Chris@87	1105 Regular expression used to parse the file.
Chris@87	1106 Groups in the regular expression correspond to fields in the dtype.
Chris@87	1107 dtype : dtype or list of dtypes
Chris@87	1108 Dtype for the structured array.
Chris@87	1109
Chris@87	1110 Returns
Chris@87	1111 -------
Chris@87	1112 output : ndarray
Chris@87	1113 The output array, containing the part of the content of `file` that
Chris@87	1114 was matched by `regexp`. `output` is always a structured array.
Chris@87	1115
Chris@87	1116 Raises
Chris@87	1117 ------
Chris@87	1118 TypeError
Chris@87	1119 When `dtype` is not a valid dtype for a structured array.
Chris@87	1120
Chris@87	1121 See Also
Chris@87	1122 --------
Chris@87	1123 fromstring, loadtxt
Chris@87	1124
Chris@87	1125 Notes
Chris@87	1126 -----
Chris@87	1127 Dtypes for structured arrays can be specified in several forms, but all
Chris@87	1128 forms specify at least the data type and field name. For details see
Chris@87	1129 `doc.structured_arrays`.
Chris@87	1130
Chris@87	1131 Examples
Chris@87	1132 --------
Chris@87	1133 >>> f = open('test.dat', 'w')
Chris@87	1134 >>> f.write("1312 foo\\n1534 bar\\n444 qux")
Chris@87	1135 >>> f.close()
Chris@87	1136
Chris@87	1137 >>> regexp = r"(\\d+)\\s+(...)" # match [digits, whitespace, anything]
Chris@87	1138 >>> output = np.fromregex('test.dat', regexp,
Chris@87	1139 ... [('num', np.int64), ('key', 'S3')])
Chris@87	1140 >>> output
Chris@87	1141 array([(1312L, 'foo'), (1534L, 'bar'), (444L, 'qux')],
Chris@87	1142 dtype=[('num', '<i8'), ('key', '\|S3')])
Chris@87	1143 >>> output['num']
Chris@87	1144 array([1312, 1534, 444], dtype=int64)
Chris@87	1145
Chris@87	1146 """
Chris@87	1147 own_fh = False
Chris@87	1148 if not hasattr(file, "read"):
Chris@87	1149 file = open(file, 'rb')
Chris@87	1150 own_fh = True
Chris@87	1151
Chris@87	1152 try:
Chris@87	1153 if not hasattr(regexp, 'match'):
Chris@87	1154 regexp = re.compile(asbytes(regexp))
Chris@87	1155 if not isinstance(dtype, np.dtype):
Chris@87	1156 dtype = np.dtype(dtype)
Chris@87	1157
Chris@87	1158 seq = regexp.findall(file.read())
Chris@87	1159 if seq and not isinstance(seq[0], tuple):
Chris@87	1160 # Only one group is in the regexp.
Chris@87	1161 # Create the new array as a single data-type and then
Chris@87	1162 # re-interpret as a single-field structured array.
Chris@87	1163 newdtype = np.dtype(dtype[dtype.names[0]])
Chris@87	1164 output = np.array(seq, dtype=newdtype)
Chris@87	1165 output.dtype = dtype
Chris@87	1166 else:
Chris@87	1167 output = np.array(seq, dtype=dtype)
Chris@87	1168
Chris@87	1169 return output
Chris@87	1170 finally:
Chris@87	1171 if own_fh:
Chris@87	1172 file.close()
Chris@87	1173
Chris@87	1174
Chris@87	1175 #####--------------------------------------------------------------------------
Chris@87	1176 #---- --- ASCII functions ---
Chris@87	1177 #####--------------------------------------------------------------------------
Chris@87	1178
Chris@87	1179
Chris@87	1180 def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
Chris@87	1181 skiprows=0, skip_header=0, skip_footer=0, converters=None,
Chris@87	1182 missing='', missing_values=None, filling_values=None,
Chris@87	1183 usecols=None, names=None,
Chris@87	1184 excludelist=None, deletechars=None, replace_space='_',
Chris@87	1185 autostrip=False, case_sensitive=True, defaultfmt="f%i",
Chris@87	1186 unpack=None, usemask=False, loose=True, invalid_raise=True):
Chris@87	1187 """
Chris@87	1188 Load data from a text file, with missing values handled as specified.
Chris@87	1189
Chris@87	1190 Each line past the first `skip_header` lines is split at the `delimiter`
Chris@87	1191 character, and characters following the `comments` character are discarded.
Chris@87	1192
Chris@87	1193 Parameters
Chris@87	1194 ----------
Chris@87	1195 fname : file or str
Chris@87	1196 File, filename, or generator to read. If the filename extension is
Chris@87	1197 `.gz` or `.bz2`, the file is first decompressed. Note that
Chris@87	1198 generators must return byte strings in Python 3k.
Chris@87	1199 dtype : dtype, optional
Chris@87	1200 Data type of the resulting array.
Chris@87	1201 If None, the dtypes will be determined by the contents of each
Chris@87	1202 column, individually.
Chris@87	1203 comments : str, optional
Chris@87	1204 The character used to indicate the start of a comment.
Chris@87	1205 All the characters occurring on a line after a comment are discarded
Chris@87	1206 delimiter : str, int, or sequence, optional
Chris@87	1207 The string used to separate values. By default, any consecutive
Chris@87	1208 whitespaces act as delimiter. An integer or sequence of integers
Chris@87	1209 can also be provided as width(s) of each field.
Chris@87	1210 skip_rows : int, optional
Chris@87	1211 `skip_rows` was deprecated in numpy 1.5, and will be removed in
Chris@87	1212 numpy 2.0. Please use `skip_header` instead.
Chris@87	1213 skip_header : int, optional
Chris@87	1214 The number of lines to skip at the beginning of the file.
Chris@87	1215 skip_footer : int, optional
Chris@87	1216 The number of lines to skip at the end of the file.
Chris@87	1217 converters : variable, optional
Chris@87	1218 The set of functions that convert the data of a column to a value.
Chris@87	1219 The converters can also be used to provide a default value
Chris@87	1220 for missing data: ``converters = {3: lambda s: float(s or 0)}``.
Chris@87	1221 missing : variable, optional
Chris@87	1222 `missing` was deprecated in numpy 1.5, and will be removed in
Chris@87	1223 numpy 2.0. Please use `missing_values` instead.
Chris@87	1224 missing_values : variable, optional
Chris@87	1225 The set of strings corresponding to missing data.
Chris@87	1226 filling_values : variable, optional
Chris@87	1227 The set of values to be used as default when the data are missing.
Chris@87	1228 usecols : sequence, optional
Chris@87	1229 Which columns to read, with 0 being the first. For example,
Chris@87	1230 ``usecols = (1, 4, 5)`` will extract the 2nd, 5th and 6th columns.
Chris@87	1231 names : {None, True, str, sequence}, optional
Chris@87	1232 If `names` is True, the field names are read from the first valid line
Chris@87	1233 after the first `skip_header` lines.
Chris@87	1234 If `names` is a sequence or a single-string of comma-separated names,
Chris@87	1235 the names will be used to define the field names in a structured dtype.
Chris@87	1236 If `names` is None, the names of the dtype fields will be used, if any.
Chris@87	1237 excludelist : sequence, optional
Chris@87	1238 A list of names to exclude. This list is appended to the default list
Chris@87	1239 ['return','file','print']. Excluded names are appended an underscore:
Chris@87	1240 for example, `file` would become `file_`.
Chris@87	1241 deletechars : str, optional
Chris@87	1242 A string combining invalid characters that must be deleted from the
Chris@87	1243 names.
Chris@87	1244 defaultfmt : str, optional
Chris@87	1245 A format used to define default field names, such as "f%i" or "f_%02i".
Chris@87	1246 autostrip : bool, optional
Chris@87	1247 Whether to automatically strip white spaces from the variables.
Chris@87	1248 replace_space : char, optional
Chris@87	1249 Character(s) used in replacement of white spaces in the variables
Chris@87	1250 names. By default, use a '_'.
Chris@87	1251 case_sensitive : {True, False, 'upper', 'lower'}, optional
Chris@87	1252 If True, field names are case sensitive.
Chris@87	1253 If False or 'upper', field names are converted to upper case.
Chris@87	1254 If 'lower', field names are converted to lower case.
Chris@87	1255 unpack : bool, optional
Chris@87	1256 If True, the returned array is transposed, so that arguments may be
Chris@87	1257 unpacked using ``x, y, z = loadtxt(...)``
Chris@87	1258 usemask : bool, optional
Chris@87	1259 If True, return a masked array.
Chris@87	1260 If False, return a regular array.
Chris@87	1261 loose : bool, optional
Chris@87	1262 If True, do not raise errors for invalid values.
Chris@87	1263 invalid_raise : bool, optional
Chris@87	1264 If True, an exception is raised if an inconsistency is detected in the
Chris@87	1265 number of columns.
Chris@87	1266 If False, a warning is emitted and the offending lines are skipped.
Chris@87	1267
Chris@87	1268 Returns
Chris@87	1269 -------
Chris@87	1270 out : ndarray
Chris@87	1271 Data read from the text file. If `usemask` is True, this is a
Chris@87	1272 masked array.
Chris@87	1273
Chris@87	1274 See Also
Chris@87	1275 --------
Chris@87	1276 numpy.loadtxt : equivalent function when no data is missing.
Chris@87	1277
Chris@87	1278 Notes
Chris@87	1279 -----
Chris@87	1280 * When spaces are used as delimiters, or when no delimiter has been given
Chris@87	1281 as input, there should not be any missing data between two fields.
Chris@87	1282 * When the variables are named (either by a flexible dtype or with `names`,
Chris@87	1283 there must not be any header in the file (else a ValueError
Chris@87	1284 exception is raised).
Chris@87	1285 * Individual values are not stripped of spaces by default.
Chris@87	1286 When using a custom converter, make sure the function does remove spaces.
Chris@87	1287
Chris@87	1288 References
Chris@87	1289 ----------
Chris@87	1290 .. [1] Numpy User Guide, section `I/O with Numpy
Chris@87	1291 <http://docs.scipy.org/doc/numpy/user/basics.io.genfromtxt.html>`_.
Chris@87	1292
Chris@87	1293 Examples
Chris@87	1294 ---------
Chris@87	1295 >>> from StringIO import StringIO
Chris@87	1296 >>> import numpy as np
Chris@87	1297
Chris@87	1298 Comma delimited file with mixed dtype
Chris@87	1299
Chris@87	1300 >>> s = StringIO("1,1.3,abcde")
Chris@87	1301 >>> data = np.genfromtxt(s, dtype=[('myint','i8'),('myfloat','f8'),
Chris@87	1302 ... ('mystring','S5')], delimiter=",")
Chris@87	1303 >>> data
Chris@87	1304 array((1, 1.3, 'abcde'),
Chris@87	1305 dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', '\|S5')])
Chris@87	1306
Chris@87	1307 Using dtype = None
Chris@87	1308
Chris@87	1309 >>> s.seek(0) # needed for StringIO example only
Chris@87	1310 >>> data = np.genfromtxt(s, dtype=None,
Chris@87	1311 ... names = ['myint','myfloat','mystring'], delimiter=",")
Chris@87	1312 >>> data
Chris@87	1313 array((1, 1.3, 'abcde'),
Chris@87	1314 dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', '\|S5')])
Chris@87	1315
Chris@87	1316 Specifying dtype and names
Chris@87	1317
Chris@87	1318 >>> s.seek(0)
Chris@87	1319 >>> data = np.genfromtxt(s, dtype="i8,f8,S5",
Chris@87	1320 ... names=['myint','myfloat','mystring'], delimiter=",")
Chris@87	1321 >>> data
Chris@87	1322 array((1, 1.3, 'abcde'),
Chris@87	1323 dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', '\|S5')])
Chris@87	1324
Chris@87	1325 An example with fixed-width columns
Chris@87	1326
Chris@87	1327 >>> s = StringIO("11.3abcde")
Chris@87	1328 >>> data = np.genfromtxt(s, dtype=None, names=['intvar','fltvar','strvar'],
Chris@87	1329 ... delimiter=[1,3,5])
Chris@87	1330 >>> data
Chris@87	1331 array((1, 1.3, 'abcde'),
Chris@87	1332 dtype=[('intvar', '<i8'), ('fltvar', '<f8'), ('strvar', '\|S5')])
Chris@87	1333
Chris@87	1334 """
Chris@87	1335 # Py3 data conversions to bytes, for convenience
Chris@87	1336 if comments is not None:
Chris@87	1337 comments = asbytes(comments)
Chris@87	1338 if isinstance(delimiter, unicode):
Chris@87	1339 delimiter = asbytes(delimiter)
Chris@87	1340 if isinstance(missing, unicode):
Chris@87	1341 missing = asbytes(missing)
Chris@87	1342 if isinstance(missing_values, (unicode, list, tuple)):
Chris@87	1343 missing_values = asbytes_nested(missing_values)
Chris@87	1344
Chris@87	1345 #
Chris@87	1346 if usemask:
Chris@87	1347 from numpy.ma import MaskedArray, make_mask_descr
Chris@87	1348 # Check the input dictionary of converters
Chris@87	1349 user_converters = converters or {}
Chris@87	1350 if not isinstance(user_converters, dict):
Chris@87	1351 raise TypeError(
Chris@87	1352 "The input argument 'converter' should be a valid dictionary "
Chris@87	1353 "(got '%s' instead)" % type(user_converters))
Chris@87	1354
Chris@87	1355 # Initialize the filehandle, the LineSplitter and the NameValidator
Chris@87	1356 own_fhd = False
Chris@87	1357 try:
Chris@87	1358 if isinstance(fname, basestring):
Chris@87	1359 if sys.version_info[0] == 2:
Chris@87	1360 fhd = iter(np.lib._datasource.open(fname, 'rbU'))
Chris@87	1361 else:
Chris@87	1362 fhd = iter(np.lib._datasource.open(fname, 'rb'))
Chris@87	1363 own_fhd = True
Chris@87	1364 else:
Chris@87	1365 fhd = iter(fname)
Chris@87	1366 except TypeError:
Chris@87	1367 raise TypeError(
Chris@87	1368 "fname must be a string, filehandle, or generator. "
Chris@87	1369 "(got %s instead)" % type(fname))
Chris@87	1370
Chris@87	1371 split_line = LineSplitter(delimiter=delimiter, comments=comments,
Chris@87	1372 autostrip=autostrip)._handyman
Chris@87	1373 validate_names = NameValidator(excludelist=excludelist,
Chris@87	1374 deletechars=deletechars,
Chris@87	1375 case_sensitive=case_sensitive,
Chris@87	1376 replace_space=replace_space)
Chris@87	1377
Chris@87	1378 # Get the first valid lines after the first skiprows ones ..
Chris@87	1379 if skiprows:
Chris@87	1380 warnings.warn(
Chris@87	1381 "The use of `skiprows` is deprecated, it will be removed in "
Chris@87	1382 "numpy 2.0.\nPlease use `skip_header` instead.",
Chris@87	1383 DeprecationWarning)
Chris@87	1384 skip_header = skiprows
Chris@87	1385 # Skip the first `skip_header` rows
Chris@87	1386 for i in range(skip_header):
Chris@87	1387 next(fhd)
Chris@87	1388
Chris@87	1389 # Keep on until we find the first valid values
Chris@87	1390 first_values = None
Chris@87	1391 try:
Chris@87	1392 while not first_values:
Chris@87	1393 first_line = next(fhd)
Chris@87	1394 if names is True:
Chris@87	1395 if comments in first_line:
Chris@87	1396 first_line = (
Chris@87	1397 asbytes('').join(first_line.split(comments)[1:]))
Chris@87	1398 first_values = split_line(first_line)
Chris@87	1399 except StopIteration:
Chris@87	1400 # return an empty array if the datafile is empty
Chris@87	1401 first_line = asbytes('')
Chris@87	1402 first_values = []
Chris@87	1403 warnings.warn('genfromtxt: Empty input file: "%s"' % fname)
Chris@87	1404
Chris@87	1405 # Should we take the first values as names ?
Chris@87	1406 if names is True:
Chris@87	1407 fval = first_values[0].strip()
Chris@87	1408 if fval in comments:
Chris@87	1409 del first_values[0]
Chris@87	1410
Chris@87	1411 # Check the columns to use: make sure `usecols` is a list
Chris@87	1412 if usecols is not None:
Chris@87	1413 try:
Chris@87	1414 usecols = [_.strip() for _ in usecols.split(",")]
Chris@87	1415 except AttributeError:
Chris@87	1416 try:
Chris@87	1417 usecols = list(usecols)
Chris@87	1418 except TypeError:
Chris@87	1419 usecols = [usecols, ]
Chris@87	1420 nbcols = len(usecols or first_values)
Chris@87	1421
Chris@87	1422 # Check the names and overwrite the dtype.names if needed
Chris@87	1423 if names is True:
Chris@87	1424 names = validate_names([_bytes_to_name(_.strip())
Chris@87	1425 for _ in first_values])
Chris@87	1426 first_line = asbytes('')
Chris@87	1427 elif _is_string_like(names):
Chris@87	1428 names = validate_names([_.strip() for _ in names.split(',')])
Chris@87	1429 elif names:
Chris@87	1430 names = validate_names(names)
Chris@87	1431 # Get the dtype
Chris@87	1432 if dtype is not None:
Chris@87	1433 dtype = easy_dtype(dtype, defaultfmt=defaultfmt, names=names)
Chris@87	1434 # Make sure the names is a list (for 2.5)
Chris@87	1435 if names is not None:
Chris@87	1436 names = list(names)
Chris@87	1437
Chris@87	1438 if usecols:
Chris@87	1439 for (i, current) in enumerate(usecols):
Chris@87	1440 # if usecols is a list of names, convert to a list of indices
Chris@87	1441 if _is_string_like(current):
Chris@87	1442 usecols[i] = names.index(current)
Chris@87	1443 elif current < 0:
Chris@87	1444 usecols[i] = current + len(first_values)
Chris@87	1445 # If the dtype is not None, make sure we update it
Chris@87	1446 if (dtype is not None) and (len(dtype) > nbcols):
Chris@87	1447 descr = dtype.descr
Chris@87	1448 dtype = np.dtype([descr[_] for _ in usecols])
Chris@87	1449 names = list(dtype.names)
Chris@87	1450 # If `names` is not None, update the names
Chris@87	1451 elif (names is not None) and (len(names) > nbcols):
Chris@87	1452 names = [names[_] for _ in usecols]
Chris@87	1453 elif (names is not None) and (dtype is not None):
Chris@87	1454 names = list(dtype.names)
Chris@87	1455
Chris@87	1456 # Process the missing values ...............................
Chris@87	1457 # Rename missing_values for convenience
Chris@87	1458 user_missing_values = missing_values or ()
Chris@87	1459
Chris@87	1460 # Define the list of missing_values (one column: one list)
Chris@87	1461 missing_values = [list([asbytes('')]) for _ in range(nbcols)]
Chris@87	1462
Chris@87	1463 # We have a dictionary: process it field by field
Chris@87	1464 if isinstance(user_missing_values, dict):
Chris@87	1465 # Loop on the items
Chris@87	1466 for (key, val) in user_missing_values.items():
Chris@87	1467 # Is the key a string ?
Chris@87	1468 if _is_string_like(key):
Chris@87	1469 try:
Chris@87	1470 # Transform it into an integer
Chris@87	1471 key = names.index(key)
Chris@87	1472 except ValueError:
Chris@87	1473 # We couldn't find it: the name must have been dropped
Chris@87	1474 continue
Chris@87	1475 # Redefine the key as needed if it's a column number
Chris@87	1476 if usecols:
Chris@87	1477 try:
Chris@87	1478 key = usecols.index(key)
Chris@87	1479 except ValueError:
Chris@87	1480 pass
Chris@87	1481 # Transform the value as a list of string
Chris@87	1482 if isinstance(val, (list, tuple)):
Chris@87	1483 val = [str(_) for _ in val]
Chris@87	1484 else:
Chris@87	1485 val = [str(val), ]
Chris@87	1486 # Add the value(s) to the current list of missing
Chris@87	1487 if key is None:
Chris@87	1488 # None acts as default
Chris@87	1489 for miss in missing_values:
Chris@87	1490 miss.extend(val)
Chris@87	1491 else:
Chris@87	1492 missing_values[key].extend(val)
Chris@87	1493 # We have a sequence : each item matches a column
Chris@87	1494 elif isinstance(user_missing_values, (list, tuple)):
Chris@87	1495 for (value, entry) in zip(user_missing_values, missing_values):
Chris@87	1496 value = str(value)
Chris@87	1497 if value not in entry:
Chris@87	1498 entry.append(value)
Chris@87	1499 # We have a string : apply it to all entries
Chris@87	1500 elif isinstance(user_missing_values, bytes):
Chris@87	1501 user_value = user_missing_values.split(asbytes(","))
Chris@87	1502 for entry in missing_values:
Chris@87	1503 entry.extend(user_value)
Chris@87	1504 # We have something else: apply it to all entries
Chris@87	1505 else:
Chris@87	1506 for entry in missing_values:
Chris@87	1507 entry.extend([str(user_missing_values)])
Chris@87	1508
Chris@87	1509 # Process the deprecated `missing`
Chris@87	1510 if missing != asbytes(''):
Chris@87	1511 warnings.warn(
Chris@87	1512 "The use of `missing` is deprecated, it will be removed in "
Chris@87	1513 "Numpy 2.0.\nPlease use `missing_values` instead.",
Chris@87	1514 DeprecationWarning)
Chris@87	1515 values = [str(_) for _ in missing.split(asbytes(","))]
Chris@87	1516 for entry in missing_values:
Chris@87	1517 entry.extend(values)
Chris@87	1518
Chris@87	1519 # Process the filling_values ...............................
Chris@87	1520 # Rename the input for convenience
Chris@87	1521 user_filling_values = filling_values
Chris@87	1522 if user_filling_values is None:
Chris@87	1523 user_filling_values = []
Chris@87	1524 # Define the default
Chris@87	1525 filling_values = [None] * nbcols
Chris@87	1526 # We have a dictionary : update each entry individually
Chris@87	1527 if isinstance(user_filling_values, dict):
Chris@87	1528 for (key, val) in user_filling_values.items():
Chris@87	1529 if _is_string_like(key):
Chris@87	1530 try:
Chris@87	1531 # Transform it into an integer
Chris@87	1532 key = names.index(key)
Chris@87	1533 except ValueError:
Chris@87	1534 # We couldn't find it: the name must have been dropped,
Chris@87	1535 continue
Chris@87	1536 # Redefine the key if it's a column number and usecols is defined
Chris@87	1537 if usecols:
Chris@87	1538 try:
Chris@87	1539 key = usecols.index(key)
Chris@87	1540 except ValueError:
Chris@87	1541 pass
Chris@87	1542 # Add the value to the list
Chris@87	1543 filling_values[key] = val
Chris@87	1544 # We have a sequence : update on a one-to-one basis
Chris@87	1545 elif isinstance(user_filling_values, (list, tuple)):
Chris@87	1546 n = len(user_filling_values)
Chris@87	1547 if (n <= nbcols):
Chris@87	1548 filling_values[:n] = user_filling_values
Chris@87	1549 else:
Chris@87	1550 filling_values = user_filling_values[:nbcols]
Chris@87	1551 # We have something else : use it for all entries
Chris@87	1552 else:
Chris@87	1553 filling_values = [user_filling_values] * nbcols
Chris@87	1554
Chris@87	1555 # Initialize the converters ................................
Chris@87	1556 if dtype is None:
Chris@87	1557 # Note: we can't use a [...]*nbcols, as we would have 3 times the same
Chris@87	1558 # ... converter, instead of 3 different converters.
Chris@87	1559 converters = [StringConverter(None, missing_values=miss, default=fill)
Chris@87	1560 for (miss, fill) in zip(missing_values, filling_values)]
Chris@87	1561 else:
Chris@87	1562 dtype_flat = flatten_dtype(dtype, flatten_base=True)
Chris@87	1563 # Initialize the converters
Chris@87	1564 if len(dtype_flat) > 1:
Chris@87	1565 # Flexible type : get a converter from each dtype
Chris@87	1566 zipit = zip(dtype_flat, missing_values, filling_values)
Chris@87	1567 converters = [StringConverter(dt, locked=True,
Chris@87	1568 missing_values=miss, default=fill)
Chris@87	1569 for (dt, miss, fill) in zipit]
Chris@87	1570 else:
Chris@87	1571 # Set to a default converter (but w/ different missing values)
Chris@87	1572 zipit = zip(missing_values, filling_values)
Chris@87	1573 converters = [StringConverter(dtype, locked=True,
Chris@87	1574 missing_values=miss, default=fill)
Chris@87	1575 for (miss, fill) in zipit]
Chris@87	1576 # Update the converters to use the user-defined ones
Chris@87	1577 uc_update = []
Chris@87	1578 for (j, conv) in user_converters.items():
Chris@87	1579 # If the converter is specified by column names, use the index instead
Chris@87	1580 if _is_string_like(j):
Chris@87	1581 try:
Chris@87	1582 j = names.index(j)
Chris@87	1583 i = j
Chris@87	1584 except ValueError:
Chris@87	1585 continue
Chris@87	1586 elif usecols:
Chris@87	1587 try:
Chris@87	1588 i = usecols.index(j)
Chris@87	1589 except ValueError:
Chris@87	1590 # Unused converter specified
Chris@87	1591 continue
Chris@87	1592 else:
Chris@87	1593 i = j
Chris@87	1594 # Find the value to test - first_line is not filtered by usecols:
Chris@87	1595 if len(first_line):
Chris@87	1596 testing_value = first_values[j]
Chris@87	1597 else:
Chris@87	1598 testing_value = None
Chris@87	1599 converters[i].update(conv, locked=True,
Chris@87	1600 testing_value=testing_value,
Chris@87	1601 default=filling_values[i],
Chris@87	1602 missing_values=missing_values[i],)
Chris@87	1603 uc_update.append((i, conv))
Chris@87	1604 # Make sure we have the corrected keys in user_converters...
Chris@87	1605 user_converters.update(uc_update)
Chris@87	1606
Chris@87	1607 # Fixme: possible error as following variable never used.
Chris@87	1608 #miss_chars = [_.missing_values for _ in converters]
Chris@87	1609
Chris@87	1610 # Initialize the output lists ...
Chris@87	1611 # ... rows
Chris@87	1612 rows = []
Chris@87	1613 append_to_rows = rows.append
Chris@87	1614 # ... masks
Chris@87	1615 if usemask:
Chris@87	1616 masks = []
Chris@87	1617 append_to_masks = masks.append
Chris@87	1618 # ... invalid
Chris@87	1619 invalid = []
Chris@87	1620 append_to_invalid = invalid.append
Chris@87	1621
Chris@87	1622 # Parse each line
Chris@87	1623 for (i, line) in enumerate(itertools.chain([first_line, ], fhd)):
Chris@87	1624 values = split_line(line)
Chris@87	1625 nbvalues = len(values)
Chris@87	1626 # Skip an empty line
Chris@87	1627 if nbvalues == 0:
Chris@87	1628 continue
Chris@87	1629 # Select only the columns we need
Chris@87	1630 if usecols:
Chris@87	1631 try:
Chris@87	1632 values = [values[_] for _ in usecols]
Chris@87	1633 except IndexError:
Chris@87	1634 append_to_invalid((i + skip_header + 1, nbvalues))
Chris@87	1635 continue
Chris@87	1636 elif nbvalues != nbcols:
Chris@87	1637 append_to_invalid((i + skip_header + 1, nbvalues))
Chris@87	1638 continue
Chris@87	1639 # Store the values
Chris@87	1640 append_to_rows(tuple(values))
Chris@87	1641 if usemask:
Chris@87	1642 append_to_masks(tuple([v.strip() in m
Chris@87	1643 for (v, m) in zip(values, missing_values)]))
Chris@87	1644
Chris@87	1645 if own_fhd:
Chris@87	1646 fhd.close()
Chris@87	1647
Chris@87	1648 # Upgrade the converters (if needed)
Chris@87	1649 if dtype is None:
Chris@87	1650 for (i, converter) in enumerate(converters):
Chris@87	1651 current_column = [itemgetter(i)(_m) for _m in rows]
Chris@87	1652 try:
Chris@87	1653 converter.iterupgrade(current_column)
Chris@87	1654 except ConverterLockError:
Chris@87	1655 errmsg = "Converter #%i is locked and cannot be upgraded: " % i
Chris@87	1656 current_column = map(itemgetter(i), rows)
Chris@87	1657 for (j, value) in enumerate(current_column):
Chris@87	1658 try:
Chris@87	1659 converter.upgrade(value)
Chris@87	1660 except (ConverterError, ValueError):
Chris@87	1661 errmsg += "(occurred line #%i for value '%s')"
Chris@87	1662 errmsg %= (j + 1 + skip_header, value)
Chris@87	1663 raise ConverterError(errmsg)
Chris@87	1664
Chris@87	1665 # Check that we don't have invalid values
Chris@87	1666 nbinvalid = len(invalid)
Chris@87	1667 if nbinvalid > 0:
Chris@87	1668 nbrows = len(rows) + nbinvalid - skip_footer
Chris@87	1669 # Construct the error message
Chris@87	1670 template = " Line #%%i (got %%i columns instead of %i)" % nbcols
Chris@87	1671 if skip_footer > 0:
Chris@87	1672 nbinvalid_skipped = len([_ for _ in invalid
Chris@87	1673 if _[0] > nbrows + skip_header])
Chris@87	1674 invalid = invalid[:nbinvalid - nbinvalid_skipped]
Chris@87	1675 skip_footer -= nbinvalid_skipped
Chris@87	1676 #
Chris@87	1677 # nbrows -= skip_footer
Chris@87	1678 # errmsg = [template % (i, nb)
Chris@87	1679 # for (i, nb) in invalid if i < nbrows]
Chris@87	1680 # else:
Chris@87	1681 errmsg = [template % (i, nb)
Chris@87	1682 for (i, nb) in invalid]
Chris@87	1683 if len(errmsg):
Chris@87	1684 errmsg.insert(0, "Some errors were detected !")
Chris@87	1685 errmsg = "\n".join(errmsg)
Chris@87	1686 # Raise an exception ?
Chris@87	1687 if invalid_raise:
Chris@87	1688 raise ValueError(errmsg)
Chris@87	1689 # Issue a warning ?
Chris@87	1690 else:
Chris@87	1691 warnings.warn(errmsg, ConversionWarning)
Chris@87	1692
Chris@87	1693 # Strip the last skip_footer data
Chris@87	1694 if skip_footer > 0:
Chris@87	1695 rows = rows[:-skip_footer]
Chris@87	1696 if usemask:
Chris@87	1697 masks = masks[:-skip_footer]
Chris@87	1698
Chris@87	1699 # Convert each value according to the converter:
Chris@87	1700 # We want to modify the list in place to avoid creating a new one...
Chris@87	1701 if loose:
Chris@87	1702 rows = list(
Chris@87	1703 zip(*[[conv._loose_call(_r) for _r in map(itemgetter(i), rows)]
Chris@87	1704 for (i, conv) in enumerate(converters)]))
Chris@87	1705 else:
Chris@87	1706 rows = list(
Chris@87	1707 zip(*[[conv._strict_call(_r) for _r in map(itemgetter(i), rows)]
Chris@87	1708 for (i, conv) in enumerate(converters)]))
Chris@87	1709
Chris@87	1710 # Reset the dtype
Chris@87	1711 data = rows
Chris@87	1712 if dtype is None:
Chris@87	1713 # Get the dtypes from the types of the converters
Chris@87	1714 column_types = [conv.type for conv in converters]
Chris@87	1715 # Find the columns with strings...
Chris@87	1716 strcolidx = [i for (i, v) in enumerate(column_types)
Chris@87	1717 if v in (type('S'), np.string_)]
Chris@87	1718 # ... and take the largest number of chars.
Chris@87	1719 for i in strcolidx:
Chris@87	1720 column_types[i] = "\|S%i" % max(len(row[i]) for row in data)
Chris@87	1721 #
Chris@87	1722 if names is None:
Chris@87	1723 # If the dtype is uniform, don't define names, else use ''
Chris@87	1724 base = set([c.type for c in converters if c._checked])
Chris@87	1725 if len(base) == 1:
Chris@87	1726 (ddtype, mdtype) = (list(base)[0], np.bool)
Chris@87	1727 else:
Chris@87	1728 ddtype = [(defaultfmt % i, dt)
Chris@87	1729 for (i, dt) in enumerate(column_types)]
Chris@87	1730 if usemask:
Chris@87	1731 mdtype = [(defaultfmt % i, np.bool)
Chris@87	1732 for (i, dt) in enumerate(column_types)]
Chris@87	1733 else:
Chris@87	1734 ddtype = list(zip(names, column_types))
Chris@87	1735 mdtype = list(zip(names, [np.bool] * len(column_types)))
Chris@87	1736 output = np.array(data, dtype=ddtype)
Chris@87	1737 if usemask:
Chris@87	1738 outputmask = np.array(masks, dtype=mdtype)
Chris@87	1739 else:
Chris@87	1740 # Overwrite the initial dtype names if needed
Chris@87	1741 if names and dtype.names:
Chris@87	1742 dtype.names = names
Chris@87	1743 # Case 1. We have a structured type
Chris@87	1744 if len(dtype_flat) > 1:
Chris@87	1745 # Nested dtype, eg [('a', int), ('b', [('b0', int), ('b1', 'f4')])]
Chris@87	1746 # First, create the array using a flattened dtype:
Chris@87	1747 # [('a', int), ('b1', int), ('b2', float)]
Chris@87	1748 # Then, view the array using the specified dtype.
Chris@87	1749 if 'O' in (_.char for _ in dtype_flat):
Chris@87	1750 if has_nested_fields(dtype):
Chris@87	1751 raise NotImplementedError(
Chris@87	1752 "Nested fields involving objects are not supported...")
Chris@87	1753 else:
Chris@87	1754 output = np.array(data, dtype=dtype)
Chris@87	1755 else:
Chris@87	1756 rows = np.array(data, dtype=[('', _) for _ in dtype_flat])
Chris@87	1757 output = rows.view(dtype)
Chris@87	1758 # Now, process the rowmasks the same way
Chris@87	1759 if usemask:
Chris@87	1760 rowmasks = np.array(
Chris@87	1761 masks, dtype=np.dtype([('', np.bool) for t in dtype_flat]))
Chris@87	1762 # Construct the new dtype
Chris@87	1763 mdtype = make_mask_descr(dtype)
Chris@87	1764 outputmask = rowmasks.view(mdtype)
Chris@87	1765 # Case #2. We have a basic dtype
Chris@87	1766 else:
Chris@87	1767 # We used some user-defined converters
Chris@87	1768 if user_converters:
Chris@87	1769 ishomogeneous = True
Chris@87	1770 descr = []
Chris@87	1771 for i, ttype in enumerate([conv.type for conv in converters]):
Chris@87	1772 # Keep the dtype of the current converter
Chris@87	1773 if i in user_converters:
Chris@87	1774 ishomogeneous &= (ttype == dtype.type)
Chris@87	1775 if ttype == np.string_:
Chris@87	1776 ttype = "\|S%i" % max(len(row[i]) for row in data)
Chris@87	1777 descr.append(('', ttype))
Chris@87	1778 else:
Chris@87	1779 descr.append(('', dtype))
Chris@87	1780 # So we changed the dtype ?
Chris@87	1781 if not ishomogeneous:
Chris@87	1782 # We have more than one field
Chris@87	1783 if len(descr) > 1:
Chris@87	1784 dtype = np.dtype(descr)
Chris@87	1785 # We have only one field: drop the name if not needed.
Chris@87	1786 else:
Chris@87	1787 dtype = np.dtype(ttype)
Chris@87	1788 #
Chris@87	1789 output = np.array(data, dtype)
Chris@87	1790 if usemask:
Chris@87	1791 if dtype.names:
Chris@87	1792 mdtype = [(_, np.bool) for _ in dtype.names]
Chris@87	1793 else:
Chris@87	1794 mdtype = np.bool
Chris@87	1795 outputmask = np.array(masks, dtype=mdtype)
Chris@87	1796 # Try to take care of the missing data we missed
Chris@87	1797 names = output.dtype.names
Chris@87	1798 if usemask and names:
Chris@87	1799 for (name, conv) in zip(names or (), converters):
Chris@87	1800 missing_values = [conv(_) for _ in conv.missing_values
Chris@87	1801 if _ != asbytes('')]
Chris@87	1802 for mval in missing_values:
Chris@87	1803 outputmask[name] \|= (output[name] == mval)
Chris@87	1804 # Construct the final array
Chris@87	1805 if usemask:
Chris@87	1806 output = output.view(MaskedArray)
Chris@87	1807 output._mask = outputmask
Chris@87	1808 if unpack:
Chris@87	1809 return output.squeeze().T
Chris@87	1810 return output.squeeze()
Chris@87	1811
Chris@87	1812
Chris@87	1813 def ndfromtxt(fname, **kwargs):
Chris@87	1814 """
Chris@87	1815 Load ASCII data stored in a file and return it as a single array.
Chris@87	1816
Chris@87	1817 Parameters
Chris@87	1818 ----------
Chris@87	1819 fname, kwargs : For a description of input parameters, see `genfromtxt`.
Chris@87	1820
Chris@87	1821 See Also
Chris@87	1822 --------
Chris@87	1823 numpy.genfromtxt : generic function.
Chris@87	1824
Chris@87	1825 """
Chris@87	1826 kwargs['usemask'] = False
Chris@87	1827 return genfromtxt(fname, **kwargs)
Chris@87	1828
Chris@87	1829
Chris@87	1830 def mafromtxt(fname, **kwargs):
Chris@87	1831 """
Chris@87	1832 Load ASCII data stored in a text file and return a masked array.
Chris@87	1833
Chris@87	1834 Parameters
Chris@87	1835 ----------
Chris@87	1836 fname, kwargs : For a description of input parameters, see `genfromtxt`.
Chris@87	1837
Chris@87	1838 See Also
Chris@87	1839 --------
Chris@87	1840 numpy.genfromtxt : generic function to load ASCII data.
Chris@87	1841
Chris@87	1842 """
Chris@87	1843 kwargs['usemask'] = True
Chris@87	1844 return genfromtxt(fname, **kwargs)
Chris@87	1845
Chris@87	1846
Chris@87	1847 def recfromtxt(fname, **kwargs):
Chris@87	1848 """
Chris@87	1849 Load ASCII data from a file and return it in a record array.
Chris@87	1850
Chris@87	1851 If ``usemask=False`` a standard `recarray` is returned,
Chris@87	1852 if ``usemask=True`` a MaskedRecords array is returned.
Chris@87	1853
Chris@87	1854 Parameters
Chris@87	1855 ----------
Chris@87	1856 fname, kwargs : For a description of input parameters, see `genfromtxt`.
Chris@87	1857
Chris@87	1858 See Also
Chris@87	1859 --------
Chris@87	1860 numpy.genfromtxt : generic function
Chris@87	1861
Chris@87	1862 Notes
Chris@87	1863 -----
Chris@87	1864 By default, `dtype` is None, which means that the data-type of the output
Chris@87	1865 array will be determined from the data.
Chris@87	1866
Chris@87	1867 """
Chris@87	1868 kwargs.setdefault("dtype", None)
Chris@87	1869 usemask = kwargs.get('usemask', False)
Chris@87	1870 output = genfromtxt(fname, **kwargs)
Chris@87	1871 if usemask:
Chris@87	1872 from numpy.ma.mrecords import MaskedRecords
Chris@87	1873 output = output.view(MaskedRecords)
Chris@87	1874 else:
Chris@87	1875 output = output.view(np.recarray)
Chris@87	1876 return output
Chris@87	1877
Chris@87	1878
Chris@87	1879 def recfromcsv(fname, **kwargs):
Chris@87	1880 """
Chris@87	1881 Load ASCII data stored in a comma-separated file.
Chris@87	1882
Chris@87	1883 The returned array is a record array (if ``usemask=False``, see
Chris@87	1884 `recarray`) or a masked record array (if ``usemask=True``,
Chris@87	1885 see `ma.mrecords.MaskedRecords`).
Chris@87	1886
Chris@87	1887 Parameters
Chris@87	1888 ----------
Chris@87	1889 fname, kwargs : For a description of input parameters, see `genfromtxt`.
Chris@87	1890
Chris@87	1891 See Also
Chris@87	1892 --------
Chris@87	1893 numpy.genfromtxt : generic function to load ASCII data.
Chris@87	1894
Chris@87	1895 Notes
Chris@87	1896 -----
Chris@87	1897 By default, `dtype` is None, which means that the data-type of the output
Chris@87	1898 array will be determined from the data.
Chris@87	1899
Chris@87	1900 """
Chris@87	1901 # Set default kwargs for genfromtxt as relevant to csv import.
Chris@87	1902 kwargs.setdefault("case_sensitive", "lower")
Chris@87	1903 kwargs.setdefault("names", True)
Chris@87	1904 kwargs.setdefault("delimiter", ",")
Chris@87	1905 kwargs.setdefault("dtype", None)
Chris@87	1906 output = genfromtxt(fname, **kwargs)
Chris@87	1907
Chris@87	1908 usemask = kwargs.get("usemask", False)
Chris@87	1909 if usemask:
Chris@87	1910 from numpy.ma.mrecords import MaskedRecords
Chris@87	1911 output = output.view(MaskedRecords)
Chris@87	1912 else:
Chris@87	1913 output = output.view(np.recarray)
Chris@87	1914 return output

Mercurial > hg > vamp-build-and-test

annotate DEPENDENCIES/mingw32/Python27/Lib/site-packages/numpy/lib/npyio.py @ 133:4acb5d8d80b6 tip