Chris@87: from __future__ import division, absolute_import, print_function
Chris@87: 
Chris@87: import sys
Chris@87: import os
Chris@87: import re
Chris@87: import itertools
Chris@87: import warnings
Chris@87: import weakref
Chris@87: from operator import itemgetter
Chris@87: 
Chris@87: import numpy as np
Chris@87: from . import format
Chris@87: from ._datasource import DataSource
Chris@87: from ._compiled_base import packbits, unpackbits
Chris@87: from ._iotools import (
Chris@87:     LineSplitter, NameValidator, StringConverter, ConverterError,
Chris@87:     ConverterLockError, ConversionWarning, _is_string_like, has_nested_fields,
Chris@87:     flatten_dtype, easy_dtype, _bytes_to_name
Chris@87:     )
Chris@87: 
Chris@87: from numpy.compat import (
Chris@87:     asbytes, asstr, asbytes_nested, bytes, basestring, unicode
Chris@87:     )
Chris@87: 
Chris@87: if sys.version_info[0] >= 3:
Chris@87:     import pickle
Chris@87: else:
Chris@87:     import cPickle as pickle
Chris@87:     from future_builtins import map
Chris@87: 
Chris@87: loads = pickle.loads
Chris@87: 
Chris@87: __all__ = [
Chris@87:     'savetxt', 'loadtxt', 'genfromtxt', 'ndfromtxt', 'mafromtxt',
Chris@87:     'recfromtxt', 'recfromcsv', 'load', 'loads', 'save', 'savez',
Chris@87:     'savez_compressed', 'packbits', 'unpackbits', 'fromregex', 'DataSource'
Chris@87:     ]
Chris@87: 
Chris@87: 
Chris@87: def seek_gzip_factory(f):
Chris@87:     """Use this factory to produce the class so that we can do a lazy
Chris@87:     import on gzip.
Chris@87: 
Chris@87:     """
Chris@87:     import gzip
Chris@87: 
Chris@87:     class GzipFile(gzip.GzipFile):
Chris@87: 
Chris@87:         def seek(self, offset, whence=0):
Chris@87:             # figure out new position (we can only seek forwards)
Chris@87:             if whence == 1:
Chris@87:                 offset = self.offset + offset
Chris@87: 
Chris@87:             if whence not in [0, 1]:
Chris@87:                 raise IOError("Illegal argument")
Chris@87: 
Chris@87:             if offset < self.offset:
Chris@87:                 # for negative seek, rewind and do positive seek
Chris@87:                 self.rewind()
Chris@87:                 count = offset - self.offset
Chris@87:                 for i in range(count // 1024):
Chris@87:                     self.read(1024)
Chris@87:                 self.read(count % 1024)
Chris@87: 
Chris@87:         def tell(self):
Chris@87:             return self.offset
Chris@87: 
Chris@87:     if isinstance(f, str):
Chris@87:         f = GzipFile(f)
Chris@87:     elif isinstance(f, gzip.GzipFile):
Chris@87:         # cast to our GzipFile if its already a gzip.GzipFile
Chris@87: 
Chris@87:         try:
Chris@87:             name = f.name
Chris@87:         except AttributeError:
Chris@87:             # Backward compatibility for <= 2.5
Chris@87:             name = f.filename
Chris@87:         mode = f.mode
Chris@87: 
Chris@87:         f = GzipFile(fileobj=f.fileobj, filename=name)
Chris@87:         f.mode = mode
Chris@87: 
Chris@87:     return f
Chris@87: 
Chris@87: 
Chris@87: class BagObj(object):
Chris@87:     """
Chris@87:     BagObj(obj)
Chris@87: 
Chris@87:     Convert attribute look-ups to getitems on the object passed in.
Chris@87: 
Chris@87:     Parameters
Chris@87:     ----------
Chris@87:     obj : class instance
Chris@87:         Object on which attribute look-up is performed.
Chris@87: 
Chris@87:     Examples
Chris@87:     --------
Chris@87:     >>> from numpy.lib.npyio import BagObj as BO
Chris@87:     >>> class BagDemo(object):
Chris@87:     ...     def __getitem__(self, key): # An instance of BagObj(BagDemo)
Chris@87:     ...                                 # will call this method when any
Chris@87:     ...                                 # attribute look-up is required
Chris@87:     ...         result = "Doesn't matter what you want, "
Chris@87:     ...         return result + "you're gonna get this"
Chris@87:     ...
Chris@87:     >>> demo_obj = BagDemo()
Chris@87:     >>> bagobj = BO(demo_obj)
Chris@87:     >>> bagobj.hello_there
Chris@87:     "Doesn't matter what you want, you're gonna get this"
Chris@87:     >>> bagobj.I_can_be_anything
Chris@87:     "Doesn't matter what you want, you're gonna get this"
Chris@87: 
Chris@87:     """
Chris@87: 
Chris@87:     def __init__(self, obj):
Chris@87:         # Use weakref to make NpzFile objects collectable by refcount
Chris@87:         self._obj = weakref.proxy(obj)
Chris@87: 
Chris@87:     def __getattribute__(self, key):
Chris@87:         try:
Chris@87:             return object.__getattribute__(self, '_obj')[key]
Chris@87:         except KeyError:
Chris@87:             raise AttributeError(key)
Chris@87: 
Chris@87: 
Chris@87: def zipfile_factory(*args, **kwargs):
Chris@87:     import zipfile
Chris@87:     kwargs['allowZip64'] = True
Chris@87:     return zipfile.ZipFile(*args, **kwargs)
Chris@87: 
Chris@87: 
Chris@87: class NpzFile(object):
Chris@87:     """
Chris@87:     NpzFile(fid)
Chris@87: 
Chris@87:     A dictionary-like object with lazy-loading of files in the zipped
Chris@87:     archive provided on construction.
Chris@87: 
Chris@87:     `NpzFile` is used to load files in the NumPy ``.npz`` data archive
Chris@87:     format. It assumes that files in the archive have a ``.npy`` extension,
Chris@87:     other files are ignored.
Chris@87: 
Chris@87:     The arrays and file strings are lazily loaded on either
Chris@87:     getitem access using ``obj['key']`` or attribute lookup using
Chris@87:     ``obj.f.key``. A list of all files (without ``.npy`` extensions) can
Chris@87:     be obtained with ``obj.files`` and the ZipFile object itself using
Chris@87:     ``obj.zip``.
Chris@87: 
Chris@87:     Attributes
Chris@87:     ----------
Chris@87:     files : list of str
Chris@87:         List of all files in the archive with a ``.npy`` extension.
Chris@87:     zip : ZipFile instance
Chris@87:         The ZipFile object initialized with the zipped archive.
Chris@87:     f : BagObj instance
Chris@87:         An object on which attribute can be performed as an alternative
Chris@87:         to getitem access on the `NpzFile` instance itself.
Chris@87: 
Chris@87:     Parameters
Chris@87:     ----------
Chris@87:     fid : file or str
Chris@87:         The zipped archive to open. This is either a file-like object
Chris@87:         or a string containing the path to the archive.
Chris@87:     own_fid : bool, optional
Chris@87:         Whether NpzFile should close the file handle.
Chris@87:         Requires that `fid` is a file-like object.
Chris@87: 
Chris@87:     Examples
Chris@87:     --------
Chris@87:     >>> from tempfile import TemporaryFile
Chris@87:     >>> outfile = TemporaryFile()
Chris@87:     >>> x = np.arange(10)
Chris@87:     >>> y = np.sin(x)
Chris@87:     >>> np.savez(outfile, x=x, y=y)
Chris@87:     >>> outfile.seek(0)
Chris@87: 
Chris@87:     >>> npz = np.load(outfile)
Chris@87:     >>> isinstance(npz, np.lib.io.NpzFile)
Chris@87:     True
Chris@87:     >>> npz.files
Chris@87:     ['y', 'x']
Chris@87:     >>> npz['x']  # getitem access
Chris@87:     array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
Chris@87:     >>> npz.f.x  # attribute lookup
Chris@87:     array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
Chris@87: 
Chris@87:     """
Chris@87: 
Chris@87:     def __init__(self, fid, own_fid=False):
Chris@87:         # Import is postponed to here since zipfile depends on gzip, an
Chris@87:         # optional component of the so-called standard library.
Chris@87:         _zip = zipfile_factory(fid)
Chris@87:         self._files = _zip.namelist()
Chris@87:         self.files = []
Chris@87:         for x in self._files:
Chris@87:             if x.endswith('.npy'):
Chris@87:                 self.files.append(x[:-4])
Chris@87:             else:
Chris@87:                 self.files.append(x)
Chris@87:         self.zip = _zip
Chris@87:         self.f = BagObj(self)
Chris@87:         if own_fid:
Chris@87:             self.fid = fid
Chris@87:         else:
Chris@87:             self.fid = None
Chris@87: 
Chris@87:     def __enter__(self):
Chris@87:         return self
Chris@87: 
Chris@87:     def __exit__(self, exc_type, exc_value, traceback):
Chris@87:         self.close()
Chris@87: 
Chris@87:     def close(self):
Chris@87:         """
Chris@87:         Close the file.
Chris@87: 
Chris@87:         """
Chris@87:         if self.zip is not None:
Chris@87:             self.zip.close()
Chris@87:             self.zip = None
Chris@87:         if self.fid is not None:
Chris@87:             self.fid.close()
Chris@87:             self.fid = None
Chris@87:         self.f = None  # break reference cycle
Chris@87: 
Chris@87:     def __del__(self):
Chris@87:         self.close()
Chris@87: 
Chris@87:     def __getitem__(self, key):
Chris@87:         # FIXME: This seems like it will copy strings around
Chris@87:         #   more than is strictly necessary.  The zipfile
Chris@87:         #   will read the string and then
Chris@87:         #   the format.read_array will copy the string
Chris@87:         #   to another place in memory.
Chris@87:         #   It would be better if the zipfile could read
Chris@87:         #   (or at least uncompress) the data
Chris@87:         #   directly into the array memory.
Chris@87:         member = 0
Chris@87:         if key in self._files:
Chris@87:             member = 1
Chris@87:         elif key in self.files:
Chris@87:             member = 1
Chris@87:             key += '.npy'
Chris@87:         if member:
Chris@87:             bytes = self.zip.open(key)
Chris@87:             magic = bytes.read(len(format.MAGIC_PREFIX))
Chris@87:             bytes.close()
Chris@87:             if magic == format.MAGIC_PREFIX:
Chris@87:                 bytes = self.zip.open(key)
Chris@87:                 return format.read_array(bytes)
Chris@87:             else:
Chris@87:                 return self.zip.read(key)
Chris@87:         else:
Chris@87:             raise KeyError("%s is not a file in the archive" % key)
Chris@87: 
Chris@87:     def __iter__(self):
Chris@87:         return iter(self.files)
Chris@87: 
Chris@87:     def items(self):
Chris@87:         """
Chris@87:         Return a list of tuples, with each tuple (filename, array in file).
Chris@87: 
Chris@87:         """
Chris@87:         return [(f, self[f]) for f in self.files]
Chris@87: 
Chris@87:     def iteritems(self):
Chris@87:         """Generator that returns tuples (filename, array in file)."""
Chris@87:         for f in self.files:
Chris@87:             yield (f, self[f])
Chris@87: 
Chris@87:     def keys(self):
Chris@87:         """Return files in the archive with a ``.npy`` extension."""
Chris@87:         return self.files
Chris@87: 
Chris@87:     def iterkeys(self):
Chris@87:         """Return an iterator over the files in the archive."""
Chris@87:         return self.__iter__()
Chris@87: 
Chris@87:     def __contains__(self, key):
Chris@87:         return self.files.__contains__(key)
Chris@87: 
Chris@87: 
Chris@87: def load(file, mmap_mode=None):
Chris@87:     """
Chris@87:     Load arrays or pickled objects from ``.npy``, ``.npz`` or pickled files.
Chris@87: 
Chris@87:     Parameters
Chris@87:     ----------
Chris@87:     file : file-like object or string
Chris@87:         The file to read. File-like objects must support the
Chris@87:         ``seek()`` and ``read()`` methods. Pickled files require that the
Chris@87:         file-like object support the ``readline()`` method as well.
Chris@87:     mmap_mode : {None, 'r+', 'r', 'w+', 'c'}, optional
Chris@87:         If not None, then memory-map the file, using the given mode (see
Chris@87:         `numpy.memmap` for a detailed description of the modes).  A
Chris@87:         memory-mapped array is kept on disk. However, it can be accessed
Chris@87:         and sliced like any ndarray.  Memory mapping is especially useful
Chris@87:         for accessing small fragments of large files without reading the
Chris@87:         entire file into memory.
Chris@87: 
Chris@87:     Returns
Chris@87:     -------
Chris@87:     result : array, tuple, dict, etc.
Chris@87:         Data stored in the file. For ``.npz`` files, the returned instance
Chris@87:         of NpzFile class must be closed to avoid leaking file descriptors.
Chris@87: 
Chris@87:     Raises
Chris@87:     ------
Chris@87:     IOError
Chris@87:         If the input file does not exist or cannot be read.
Chris@87: 
Chris@87:     See Also
Chris@87:     --------
Chris@87:     save, savez, savez_compressed, loadtxt
Chris@87:     memmap : Create a memory-map to an array stored in a file on disk.
Chris@87: 
Chris@87:     Notes
Chris@87:     -----
Chris@87:     - If the file contains pickle data, then whatever object is stored
Chris@87:       in the pickle is returned.
Chris@87:     - If the file is a ``.npy`` file, then a single array is returned.
Chris@87:     - If the file is a ``.npz`` file, then a dictionary-like object is
Chris@87:       returned, containing ``{filename: array}`` key-value pairs, one for
Chris@87:       each file in the archive.
Chris@87:     - If the file is a ``.npz`` file, the returned value supports the
Chris@87:       context manager protocol in a similar fashion to the open function::
Chris@87: 
Chris@87:         with load('foo.npz') as data:
Chris@87:             a = data['a']
Chris@87: 
Chris@87:       The underlying file descriptor is closed when exiting the 'with'
Chris@87:       block.
Chris@87: 
Chris@87:     Examples
Chris@87:     --------
Chris@87:     Store data to disk, and load it again:
Chris@87: 
Chris@87:     >>> np.save('/tmp/123', np.array([[1, 2, 3], [4, 5, 6]]))
Chris@87:     >>> np.load('/tmp/123.npy')
Chris@87:     array([[1, 2, 3],
Chris@87:            [4, 5, 6]])
Chris@87: 
Chris@87:     Store compressed data to disk, and load it again:
Chris@87: 
Chris@87:     >>> a=np.array([[1, 2, 3], [4, 5, 6]])
Chris@87:     >>> b=np.array([1, 2])
Chris@87:     >>> np.savez('/tmp/123.npz', a=a, b=b)
Chris@87:     >>> data = np.load('/tmp/123.npz')
Chris@87:     >>> data['a']
Chris@87:     array([[1, 2, 3],
Chris@87:            [4, 5, 6]])
Chris@87:     >>> data['b']
Chris@87:     array([1, 2])
Chris@87:     >>> data.close()
Chris@87: 
Chris@87:     Mem-map the stored array, and then access the second row
Chris@87:     directly from disk:
Chris@87: 
Chris@87:     >>> X = np.load('/tmp/123.npy', mmap_mode='r')
Chris@87:     >>> X[1, :]
Chris@87:     memmap([4, 5, 6])
Chris@87: 
Chris@87:     """
Chris@87:     import gzip
Chris@87: 
Chris@87:     own_fid = False
Chris@87:     if isinstance(file, basestring):
Chris@87:         fid = open(file, "rb")
Chris@87:         own_fid = True
Chris@87:     elif isinstance(file, gzip.GzipFile):
Chris@87:         fid = seek_gzip_factory(file)
Chris@87:     else:
Chris@87:         fid = file
Chris@87: 
Chris@87:     try:
Chris@87:         # Code to distinguish from NumPy binary files and pickles.
Chris@87:         _ZIP_PREFIX = asbytes('PK\x03\x04')
Chris@87:         N = len(format.MAGIC_PREFIX)
Chris@87:         magic = fid.read(N)
Chris@87:         fid.seek(-N, 1)  # back-up
Chris@87:         if magic.startswith(_ZIP_PREFIX):
Chris@87:             # zip-file (assume .npz)
Chris@87:             # Transfer file ownership to NpzFile
Chris@87:             tmp = own_fid
Chris@87:             own_fid = False
Chris@87:             return NpzFile(fid, own_fid=tmp)
Chris@87:         elif magic == format.MAGIC_PREFIX:
Chris@87:             # .npy file
Chris@87:             if mmap_mode:
Chris@87:                 return format.open_memmap(file, mode=mmap_mode)
Chris@87:             else:
Chris@87:                 return format.read_array(fid)
Chris@87:         else:
Chris@87:             # Try a pickle
Chris@87:             try:
Chris@87:                 return pickle.load(fid)
Chris@87:             except:
Chris@87:                 raise IOError(
Chris@87:                     "Failed to interpret file %s as a pickle" % repr(file))
Chris@87:     finally:
Chris@87:         if own_fid:
Chris@87:             fid.close()
Chris@87: 
Chris@87: 
Chris@87: def save(file, arr):
Chris@87:     """
Chris@87:     Save an array to a binary file in NumPy ``.npy`` format.
Chris@87: 
Chris@87:     Parameters
Chris@87:     ----------
Chris@87:     file : file or str
Chris@87:         File or filename to which the data is saved.  If file is a file-object,
Chris@87:         then the filename is unchanged.  If file is a string, a ``.npy``
Chris@87:         extension will be appended to the file name if it does not already
Chris@87:         have one.
Chris@87:     arr : array_like
Chris@87:         Array data to be saved.
Chris@87: 
Chris@87:     See Also
Chris@87:     --------
Chris@87:     savez : Save several arrays into a ``.npz`` archive
Chris@87:     savetxt, load
Chris@87: 
Chris@87:     Notes
Chris@87:     -----
Chris@87:     For a description of the ``.npy`` format, see `format`.
Chris@87: 
Chris@87:     Examples
Chris@87:     --------
Chris@87:     >>> from tempfile import TemporaryFile
Chris@87:     >>> outfile = TemporaryFile()
Chris@87: 
Chris@87:     >>> x = np.arange(10)
Chris@87:     >>> np.save(outfile, x)
Chris@87: 
Chris@87:     >>> outfile.seek(0) # Only needed here to simulate closing & reopening file
Chris@87:     >>> np.load(outfile)
Chris@87:     array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
Chris@87: 
Chris@87:     """
Chris@87:     own_fid = False
Chris@87:     if isinstance(file, basestring):
Chris@87:         if not file.endswith('.npy'):
Chris@87:             file = file + '.npy'
Chris@87:         fid = open(file, "wb")
Chris@87:         own_fid = True
Chris@87:     else:
Chris@87:         fid = file
Chris@87: 
Chris@87:     try:
Chris@87:         arr = np.asanyarray(arr)
Chris@87:         format.write_array(fid, arr)
Chris@87:     finally:
Chris@87:         if own_fid:
Chris@87:             fid.close()
Chris@87: 
Chris@87: 
Chris@87: def savez(file, *args, **kwds):
Chris@87:     """
Chris@87:     Save several arrays into a single file in uncompressed ``.npz`` format.
Chris@87: 
Chris@87:     If arguments are passed in with no keywords, the corresponding variable
Chris@87:     names, in the ``.npz`` file, are 'arr_0', 'arr_1', etc. If keyword
Chris@87:     arguments are given, the corresponding variable names, in the ``.npz``
Chris@87:     file will match the keyword names.
Chris@87: 
Chris@87:     Parameters
Chris@87:     ----------
Chris@87:     file : str or file
Chris@87:         Either the file name (string) or an open file (file-like object)
Chris@87:         where the data will be saved. If file is a string, the ``.npz``
Chris@87:         extension will be appended to the file name if it is not already there.
Chris@87:     args : Arguments, optional
Chris@87:         Arrays to save to the file. Since it is not possible for Python to
Chris@87:         know the names of the arrays outside `savez`, the arrays will be saved
Chris@87:         with names "arr_0", "arr_1", and so on. These arguments can be any
Chris@87:         expression.
Chris@87:     kwds : Keyword arguments, optional
Chris@87:         Arrays to save to the file. Arrays will be saved in the file with the
Chris@87:         keyword names.
Chris@87: 
Chris@87:     Returns
Chris@87:     -------
Chris@87:     None
Chris@87: 
Chris@87:     See Also
Chris@87:     --------
Chris@87:     save : Save a single array to a binary file in NumPy format.
Chris@87:     savetxt : Save an array to a file as plain text.
Chris@87:     savez_compressed : Save several arrays into a compressed ``.npz`` archive
Chris@87: 
Chris@87:     Notes
Chris@87:     -----
Chris@87:     The ``.npz`` file format is a zipped archive of files named after the
Chris@87:     variables they contain.  The archive is not compressed and each file
Chris@87:     in the archive contains one variable in ``.npy`` format. For a
Chris@87:     description of the ``.npy`` format, see `format`.
Chris@87: 
Chris@87:     When opening the saved ``.npz`` file with `load` a `NpzFile` object is
Chris@87:     returned. This is a dictionary-like object which can be queried for
Chris@87:     its list of arrays (with the ``.files`` attribute), and for the arrays
Chris@87:     themselves.
Chris@87: 
Chris@87:     Examples
Chris@87:     --------
Chris@87:     >>> from tempfile import TemporaryFile
Chris@87:     >>> outfile = TemporaryFile()
Chris@87:     >>> x = np.arange(10)
Chris@87:     >>> y = np.sin(x)
Chris@87: 
Chris@87:     Using `savez` with \\*args, the arrays are saved with default names.
Chris@87: 
Chris@87:     >>> np.savez(outfile, x, y)
Chris@87:     >>> outfile.seek(0) # Only needed here to simulate closing & reopening file
Chris@87:     >>> npzfile = np.load(outfile)
Chris@87:     >>> npzfile.files
Chris@87:     ['arr_1', 'arr_0']
Chris@87:     >>> npzfile['arr_0']
Chris@87:     array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
Chris@87: 
Chris@87:     Using `savez` with \\**kwds, the arrays are saved with the keyword names.
Chris@87: 
Chris@87:     >>> outfile = TemporaryFile()
Chris@87:     >>> np.savez(outfile, x=x, y=y)
Chris@87:     >>> outfile.seek(0)
Chris@87:     >>> npzfile = np.load(outfile)
Chris@87:     >>> npzfile.files
Chris@87:     ['y', 'x']
Chris@87:     >>> npzfile['x']
Chris@87:     array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
Chris@87: 
Chris@87:     """
Chris@87:     _savez(file, args, kwds, False)
Chris@87: 
Chris@87: 
Chris@87: def savez_compressed(file, *args, **kwds):
Chris@87:     """
Chris@87:     Save several arrays into a single file in compressed ``.npz`` format.
Chris@87: 
Chris@87:     If keyword arguments are given, then filenames are taken from the keywords.
Chris@87:     If arguments are passed in with no keywords, then stored file names are
Chris@87:     arr_0, arr_1, etc.
Chris@87: 
Chris@87:     Parameters
Chris@87:     ----------
Chris@87:     file : str
Chris@87:         File name of ``.npz`` file.
Chris@87:     args : Arguments
Chris@87:         Function arguments.
Chris@87:     kwds : Keyword arguments
Chris@87:         Keywords.
Chris@87: 
Chris@87:     See Also
Chris@87:     --------
Chris@87:     numpy.savez : Save several arrays into an uncompressed ``.npz`` file format
Chris@87:     numpy.load : Load the files created by savez_compressed.
Chris@87: 
Chris@87:     """
Chris@87:     _savez(file, args, kwds, True)
Chris@87: 
Chris@87: 
Chris@87: def _savez(file, args, kwds, compress):
Chris@87:     # Import is postponed to here since zipfile depends on gzip, an optional
Chris@87:     # component of the so-called standard library.
Chris@87:     import zipfile
Chris@87:     # Import deferred for startup time improvement
Chris@87:     import tempfile
Chris@87: 
Chris@87:     if isinstance(file, basestring):
Chris@87:         if not file.endswith('.npz'):
Chris@87:             file = file + '.npz'
Chris@87: 
Chris@87:     namedict = kwds
Chris@87:     for i, val in enumerate(args):
Chris@87:         key = 'arr_%d' % i
Chris@87:         if key in namedict.keys():
Chris@87:             raise ValueError(
Chris@87:                 "Cannot use un-named variables and keyword %s" % key)
Chris@87:         namedict[key] = val
Chris@87: 
Chris@87:     if compress:
Chris@87:         compression = zipfile.ZIP_DEFLATED
Chris@87:     else:
Chris@87:         compression = zipfile.ZIP_STORED
Chris@87: 
Chris@87:     zipf = zipfile_factory(file, mode="w", compression=compression)
Chris@87: 
Chris@87:     # Stage arrays in a temporary file on disk, before writing to zip.
Chris@87:     fd, tmpfile = tempfile.mkstemp(suffix='-numpy.npy')
Chris@87:     os.close(fd)
Chris@87:     try:
Chris@87:         for key, val in namedict.items():
Chris@87:             fname = key + '.npy'
Chris@87:             fid = open(tmpfile, 'wb')
Chris@87:             try:
Chris@87:                 format.write_array(fid, np.asanyarray(val))
Chris@87:                 fid.close()
Chris@87:                 fid = None
Chris@87:                 zipf.write(tmpfile, arcname=fname)
Chris@87:             finally:
Chris@87:                 if fid:
Chris@87:                     fid.close()
Chris@87:     finally:
Chris@87:         os.remove(tmpfile)
Chris@87: 
Chris@87:     zipf.close()
Chris@87: 
Chris@87: 
Chris@87: def _getconv(dtype):
Chris@87:     """ Find the correct dtype converter. Adapted from matplotlib """
Chris@87:     typ = dtype.type
Chris@87:     if issubclass(typ, np.bool_):
Chris@87:         return lambda x: bool(int(x))
Chris@87:     if issubclass(typ, np.uint64):
Chris@87:         return np.uint64
Chris@87:     if issubclass(typ, np.int64):
Chris@87:         return np.int64
Chris@87:     if issubclass(typ, np.integer):
Chris@87:         return lambda x: int(float(x))
Chris@87:     elif issubclass(typ, np.floating):
Chris@87:         return float
Chris@87:     elif issubclass(typ, np.complex):
Chris@87:         return complex
Chris@87:     elif issubclass(typ, np.bytes_):
Chris@87:         return bytes
Chris@87:     else:
Chris@87:         return str
Chris@87: 
Chris@87: 
Chris@87: def loadtxt(fname, dtype=float, comments='#', delimiter=None,
Chris@87:             converters=None, skiprows=0, usecols=None, unpack=False,
Chris@87:             ndmin=0):
Chris@87:     """
Chris@87:     Load data from a text file.
Chris@87: 
Chris@87:     Each row in the text file must have the same number of values.
Chris@87: 
Chris@87:     Parameters
Chris@87:     ----------
Chris@87:     fname : file or str
Chris@87:         File, filename, or generator to read.  If the filename extension is
Chris@87:         ``.gz`` or ``.bz2``, the file is first decompressed. Note that
Chris@87:         generators should return byte strings for Python 3k.
Chris@87:     dtype : data-type, optional
Chris@87:         Data-type of the resulting array; default: float.  If this is a
Chris@87:         record data-type, the resulting array will be 1-dimensional, and
Chris@87:         each row will be interpreted as an element of the array.  In this
Chris@87:         case, the number of columns used must match the number of fields in
Chris@87:         the data-type.
Chris@87:     comments : str, optional
Chris@87:         The character used to indicate the start of a comment;
Chris@87:         default: '#'.
Chris@87:     delimiter : str, optional
Chris@87:         The string used to separate values.  By default, this is any
Chris@87:         whitespace.
Chris@87:     converters : dict, optional
Chris@87:         A dictionary mapping column number to a function that will convert
Chris@87:         that column to a float.  E.g., if column 0 is a date string:
Chris@87:         ``converters = {0: datestr2num}``.  Converters can also be used to
Chris@87:         provide a default value for missing data (but see also `genfromtxt`):
Chris@87:         ``converters = {3: lambda s: float(s.strip() or 0)}``.  Default: None.
Chris@87:     skiprows : int, optional
Chris@87:         Skip the first `skiprows` lines; default: 0.
Chris@87:     usecols : sequence, optional
Chris@87:         Which columns to read, with 0 being the first.  For example,
Chris@87:         ``usecols = (1,4,5)`` will extract the 2nd, 5th and 6th columns.
Chris@87:         The default, None, results in all columns being read.
Chris@87:     unpack : bool, optional
Chris@87:         If True, the returned array is transposed, so that arguments may be
Chris@87:         unpacked using ``x, y, z = loadtxt(...)``.  When used with a record
Chris@87:         data-type, arrays are returned for each field.  Default is False.
Chris@87:     ndmin : int, optional
Chris@87:         The returned array will have at least `ndmin` dimensions.
Chris@87:         Otherwise mono-dimensional axes will be squeezed.
Chris@87:         Legal values: 0 (default), 1 or 2.
Chris@87: 
Chris@87:         .. versionadded:: 1.6.0
Chris@87: 
Chris@87:     Returns
Chris@87:     -------
Chris@87:     out : ndarray
Chris@87:         Data read from the text file.
Chris@87: 
Chris@87:     See Also
Chris@87:     --------
Chris@87:     load, fromstring, fromregex
Chris@87:     genfromtxt : Load data with missing values handled as specified.
Chris@87:     scipy.io.loadmat : reads MATLAB data files
Chris@87: 
Chris@87:     Notes
Chris@87:     -----
Chris@87:     This function aims to be a fast reader for simply formatted files.  The
Chris@87:     `genfromtxt` function provides more sophisticated handling of, e.g.,
Chris@87:     lines with missing values.
Chris@87: 
Chris@87:     Examples
Chris@87:     --------
Chris@87:     >>> from StringIO import StringIO   # StringIO behaves like a file object
Chris@87:     >>> c = StringIO("0 1\\n2 3")
Chris@87:     >>> np.loadtxt(c)
Chris@87:     array([[ 0.,  1.],
Chris@87:            [ 2.,  3.]])
Chris@87: 
Chris@87:     >>> d = StringIO("M 21 72\\nF 35 58")
Chris@87:     >>> np.loadtxt(d, dtype={'names': ('gender', 'age', 'weight'),
Chris@87:     ...                      'formats': ('S1', 'i4', 'f4')})
Chris@87:     array([('M', 21, 72.0), ('F', 35, 58.0)],
Chris@87:           dtype=[('gender', '|S1'), ('age', '<i4'), ('weight', '<f4')])
Chris@87: 
Chris@87:     >>> c = StringIO("1,0,2\\n3,0,4")
Chris@87:     >>> x, y = np.loadtxt(c, delimiter=',', usecols=(0, 2), unpack=True)
Chris@87:     >>> x
Chris@87:     array([ 1.,  3.])
Chris@87:     >>> y
Chris@87:     array([ 2.,  4.])
Chris@87: 
Chris@87:     """
Chris@87:     # Type conversions for Py3 convenience
Chris@87:     comments = asbytes(comments)
Chris@87:     user_converters = converters
Chris@87:     if delimiter is not None:
Chris@87:         delimiter = asbytes(delimiter)
Chris@87:     if usecols is not None:
Chris@87:         usecols = list(usecols)
Chris@87: 
Chris@87:     fown = False
Chris@87:     try:
Chris@87:         if _is_string_like(fname):
Chris@87:             fown = True
Chris@87:             if fname.endswith('.gz'):
Chris@87:                 fh = iter(seek_gzip_factory(fname))
Chris@87:             elif fname.endswith('.bz2'):
Chris@87:                 import bz2
Chris@87:                 fh = iter(bz2.BZ2File(fname))
Chris@87:             elif sys.version_info[0] == 2:
Chris@87:                 fh = iter(open(fname, 'U'))
Chris@87:             else:
Chris@87:                 fh = iter(open(fname))
Chris@87:         else:
Chris@87:             fh = iter(fname)
Chris@87:     except TypeError:
Chris@87:         raise ValueError('fname must be a string, file handle, or generator')
Chris@87:     X = []
Chris@87: 
Chris@87:     def flatten_dtype(dt):
Chris@87:         """Unpack a structured data-type, and produce re-packing info."""
Chris@87:         if dt.names is None:
Chris@87:             # If the dtype is flattened, return.
Chris@87:             # If the dtype has a shape, the dtype occurs
Chris@87:             # in the list more than once.
Chris@87:             shape = dt.shape
Chris@87:             if len(shape) == 0:
Chris@87:                 return ([dt.base], None)
Chris@87:             else:
Chris@87:                 packing = [(shape[-1], list)]
Chris@87:                 if len(shape) > 1:
Chris@87:                     for dim in dt.shape[-2::-1]:
Chris@87:                         packing = [(dim*packing[0][0], packing*dim)]
Chris@87:                 return ([dt.base] * int(np.prod(dt.shape)), packing)
Chris@87:         else:
Chris@87:             types = []
Chris@87:             packing = []
Chris@87:             for field in dt.names:
Chris@87:                 tp, bytes = dt.fields[field]
Chris@87:                 flat_dt, flat_packing = flatten_dtype(tp)
Chris@87:                 types.extend(flat_dt)
Chris@87:                 # Avoid extra nesting for subarrays
Chris@87:                 if len(tp.shape) > 0:
Chris@87:                     packing.extend(flat_packing)
Chris@87:                 else:
Chris@87:                     packing.append((len(flat_dt), flat_packing))
Chris@87:             return (types, packing)
Chris@87: 
Chris@87:     def pack_items(items, packing):
Chris@87:         """Pack items into nested lists based on re-packing info."""
Chris@87:         if packing is None:
Chris@87:             return items[0]
Chris@87:         elif packing is tuple:
Chris@87:             return tuple(items)
Chris@87:         elif packing is list:
Chris@87:             return list(items)
Chris@87:         else:
Chris@87:             start = 0
Chris@87:             ret = []
Chris@87:             for length, subpacking in packing:
Chris@87:                 ret.append(pack_items(items[start:start+length], subpacking))
Chris@87:                 start += length
Chris@87:             return tuple(ret)
Chris@87: 
Chris@87:     def split_line(line):
Chris@87:         """Chop off comments, strip, and split at delimiter."""
Chris@87:         line = asbytes(line).split(comments)[0].strip(asbytes('\r\n'))
Chris@87:         if line:
Chris@87:             return line.split(delimiter)
Chris@87:         else:
Chris@87:             return []
Chris@87: 
Chris@87:     try:
Chris@87:         # Make sure we're dealing with a proper dtype
Chris@87:         dtype = np.dtype(dtype)
Chris@87:         defconv = _getconv(dtype)
Chris@87: 
Chris@87:         # Skip the first `skiprows` lines
Chris@87:         for i in range(skiprows):
Chris@87:             next(fh)
Chris@87: 
Chris@87:         # Read until we find a line with some values, and use
Chris@87:         # it to estimate the number of columns, N.
Chris@87:         first_vals = None
Chris@87:         try:
Chris@87:             while not first_vals:
Chris@87:                 first_line = next(fh)
Chris@87:                 first_vals = split_line(first_line)
Chris@87:         except StopIteration:
Chris@87:             # End of lines reached
Chris@87:             first_line = ''
Chris@87:             first_vals = []
Chris@87:             warnings.warn('loadtxt: Empty input file: "%s"' % fname)
Chris@87:         N = len(usecols or first_vals)
Chris@87: 
Chris@87:         dtype_types, packing = flatten_dtype(dtype)
Chris@87:         if len(dtype_types) > 1:
Chris@87:             # We're dealing with a structured array, each field of
Chris@87:             # the dtype matches a column
Chris@87:             converters = [_getconv(dt) for dt in dtype_types]
Chris@87:         else:
Chris@87:             # All fields have the same dtype
Chris@87:             converters = [defconv for i in range(N)]
Chris@87:             if N > 1:
Chris@87:                 packing = [(N, tuple)]
Chris@87: 
Chris@87:         # By preference, use the converters specified by the user
Chris@87:         for i, conv in (user_converters or {}).items():
Chris@87:             if usecols:
Chris@87:                 try:
Chris@87:                     i = usecols.index(i)
Chris@87:                 except ValueError:
Chris@87:                     # Unused converter specified
Chris@87:                     continue
Chris@87:             converters[i] = conv
Chris@87: 
Chris@87:         # Parse each line, including the first
Chris@87:         for i, line in enumerate(itertools.chain([first_line], fh)):
Chris@87:             vals = split_line(line)
Chris@87:             if len(vals) == 0:
Chris@87:                 continue
Chris@87:             if usecols:
Chris@87:                 vals = [vals[i] for i in usecols]
Chris@87:             if len(vals) != N:
Chris@87:                 line_num = i + skiprows + 1
Chris@87:                 raise ValueError("Wrong number of columns at line %d"
Chris@87:                                  % line_num)
Chris@87: 
Chris@87:             # Convert each value according to its column and store
Chris@87:             items = [conv(val) for (conv, val) in zip(converters, vals)]
Chris@87:             # Then pack it according to the dtype's nesting
Chris@87:             items = pack_items(items, packing)
Chris@87:             X.append(items)
Chris@87:     finally:
Chris@87:         if fown:
Chris@87:             fh.close()
Chris@87: 
Chris@87:     X = np.array(X, dtype)
Chris@87:     # Multicolumn data are returned with shape (1, N, M), i.e.
Chris@87:     # (1, 1, M) for a single row - remove the singleton dimension there
Chris@87:     if X.ndim == 3 and X.shape[:2] == (1, 1):
Chris@87:         X.shape = (1, -1)
Chris@87: 
Chris@87:     # Verify that the array has at least dimensions `ndmin`.
Chris@87:     # Check correctness of the values of `ndmin`
Chris@87:     if ndmin not in [0, 1, 2]:
Chris@87:         raise ValueError('Illegal value of ndmin keyword: %s' % ndmin)
Chris@87:     # Tweak the size and shape of the arrays - remove extraneous dimensions
Chris@87:     if X.ndim > ndmin:
Chris@87:         X = np.squeeze(X)
Chris@87:     # and ensure we have the minimum number of dimensions asked for
Chris@87:     # - has to be in this order for the odd case ndmin=1, X.squeeze().ndim=0
Chris@87:     if X.ndim < ndmin:
Chris@87:         if ndmin == 1:
Chris@87:             X = np.atleast_1d(X)
Chris@87:         elif ndmin == 2:
Chris@87:             X = np.atleast_2d(X).T
Chris@87: 
Chris@87:     if unpack:
Chris@87:         if len(dtype_types) > 1:
Chris@87:             # For structured arrays, return an array for each field.
Chris@87:             return [X[field] for field in dtype.names]
Chris@87:         else:
Chris@87:             return X.T
Chris@87:     else:
Chris@87:         return X
Chris@87: 
Chris@87: 
Chris@87: def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='',
Chris@87:             footer='', comments='# '):
Chris@87:     """
Chris@87:     Save an array to a text file.
Chris@87: 
Chris@87:     Parameters
Chris@87:     ----------
Chris@87:     fname : filename or file handle
Chris@87:         If the filename ends in ``.gz``, the file is automatically saved in
Chris@87:         compressed gzip format.  `loadtxt` understands gzipped files
Chris@87:         transparently.
Chris@87:     X : array_like
Chris@87:         Data to be saved to a text file.
Chris@87:     fmt : str or sequence of strs, optional
Chris@87:         A single format (%10.5f), a sequence of formats, or a
Chris@87:         multi-format string, e.g. 'Iteration %d -- %10.5f', in which
Chris@87:         case `delimiter` is ignored. For complex `X`, the legal options
Chris@87:         for `fmt` are:
Chris@87:             a) a single specifier, `fmt='%.4e'`, resulting in numbers formatted
Chris@87:                 like `' (%s+%sj)' % (fmt, fmt)`
Chris@87:             b) a full string specifying every real and imaginary part, e.g.
Chris@87:                 `' %.4e %+.4j %.4e %+.4j %.4e %+.4j'` for 3 columns
Chris@87:             c) a list of specifiers, one per column - in this case, the real
Chris@87:                 and imaginary part must have separate specifiers,
Chris@87:                 e.g. `['%.3e + %.3ej', '(%.15e%+.15ej)']` for 2 columns
Chris@87:     delimiter : str, optional
Chris@87:         String or character separating columns.
Chris@87:     newline : str, optional
Chris@87:         String or character separating lines.
Chris@87: 
Chris@87:         .. versionadded:: 1.5.0
Chris@87:     header : str, optional
Chris@87:         String that will be written at the beginning of the file.
Chris@87: 
Chris@87:         .. versionadded:: 1.7.0
Chris@87:     footer : str, optional
Chris@87:         String that will be written at the end of the file.
Chris@87: 
Chris@87:         .. versionadded:: 1.7.0
Chris@87:     comments : str, optional
Chris@87:         String that will be prepended to the ``header`` and ``footer`` strings,
Chris@87:         to mark them as comments. Default: '# ',  as expected by e.g.
Chris@87:         ``numpy.loadtxt``.
Chris@87: 
Chris@87:         .. versionadded:: 1.7.0
Chris@87: 
Chris@87: 
Chris@87:     See Also
Chris@87:     --------
Chris@87:     save : Save an array to a binary file in NumPy ``.npy`` format
Chris@87:     savez : Save several arrays into an uncompressed ``.npz`` archive
Chris@87:     savez_compressed : Save several arrays into a compressed ``.npz`` archive
Chris@87: 
Chris@87:     Notes
Chris@87:     -----
Chris@87:     Further explanation of the `fmt` parameter
Chris@87:     (``%[flag]width[.precision]specifier``):
Chris@87: 
Chris@87:     flags:
Chris@87:         ``-`` : left justify
Chris@87: 
Chris@87:         ``+`` : Forces to precede result with + or -.
Chris@87: 
Chris@87:         ``0`` : Left pad the number with zeros instead of space (see width).
Chris@87: 
Chris@87:     width:
Chris@87:         Minimum number of characters to be printed. The value is not truncated
Chris@87:         if it has more characters.
Chris@87: 
Chris@87:     precision:
Chris@87:         - For integer specifiers (eg. ``d,i,o,x``), the minimum number of
Chris@87:           digits.
Chris@87:         - For ``e, E`` and ``f`` specifiers, the number of digits to print
Chris@87:           after the decimal point.
Chris@87:         - For ``g`` and ``G``, the maximum number of significant digits.
Chris@87:         - For ``s``, the maximum number of characters.
Chris@87: 
Chris@87:     specifiers:
Chris@87:         ``c`` : character
Chris@87: 
Chris@87:         ``d`` or ``i`` : signed decimal integer
Chris@87: 
Chris@87:         ``e`` or ``E`` : scientific notation with ``e`` or ``E``.
Chris@87: 
Chris@87:         ``f`` : decimal floating point
Chris@87: 
Chris@87:         ``g,G`` : use the shorter of ``e,E`` or ``f``
Chris@87: 
Chris@87:         ``o`` : signed octal
Chris@87: 
Chris@87:         ``s`` : string of characters
Chris@87: 
Chris@87:         ``u`` : unsigned decimal integer
Chris@87: 
Chris@87:         ``x,X`` : unsigned hexadecimal integer
Chris@87: 
Chris@87:     This explanation of ``fmt`` is not complete, for an exhaustive
Chris@87:     specification see [1]_.
Chris@87: 
Chris@87:     References
Chris@87:     ----------
Chris@87:     .. [1] `Format Specification Mini-Language
Chris@87:            <http://docs.python.org/library/string.html#
Chris@87:            format-specification-mini-language>`_, Python Documentation.
Chris@87: 
Chris@87:     Examples
Chris@87:     --------
Chris@87:     >>> x = y = z = np.arange(0.0,5.0,1.0)
Chris@87:     >>> np.savetxt('test.out', x, delimiter=',')   # X is an array
Chris@87:     >>> np.savetxt('test.out', (x,y,z))   # x,y,z equal sized 1D arrays
Chris@87:     >>> np.savetxt('test.out', x, fmt='%1.4e')   # use exponential notation
Chris@87: 
Chris@87:     """
Chris@87: 
Chris@87:     # Py3 conversions first
Chris@87:     if isinstance(fmt, bytes):
Chris@87:         fmt = asstr(fmt)
Chris@87:     delimiter = asstr(delimiter)
Chris@87: 
Chris@87:     own_fh = False
Chris@87:     if _is_string_like(fname):
Chris@87:         own_fh = True
Chris@87:         if fname.endswith('.gz'):
Chris@87:             import gzip
Chris@87:             fh = gzip.open(fname, 'wb')
Chris@87:         else:
Chris@87:             if sys.version_info[0] >= 3:
Chris@87:                 fh = open(fname, 'wb')
Chris@87:             else:
Chris@87:                 fh = open(fname, 'w')
Chris@87:     elif hasattr(fname, 'write'):
Chris@87:         fh = fname
Chris@87:     else:
Chris@87:         raise ValueError('fname must be a string or file handle')
Chris@87: 
Chris@87:     try:
Chris@87:         X = np.asarray(X)
Chris@87: 
Chris@87:         # Handle 1-dimensional arrays
Chris@87:         if X.ndim == 1:
Chris@87:             # Common case -- 1d array of numbers
Chris@87:             if X.dtype.names is None:
Chris@87:                 X = np.atleast_2d(X).T
Chris@87:                 ncol = 1
Chris@87: 
Chris@87:             # Complex dtype -- each field indicates a separate column
Chris@87:             else:
Chris@87:                 ncol = len(X.dtype.descr)
Chris@87:         else:
Chris@87:             ncol = X.shape[1]
Chris@87: 
Chris@87:         iscomplex_X = np.iscomplexobj(X)
Chris@87:         # `fmt` can be a string with multiple insertion points or a
Chris@87:         # list of formats.  E.g. '%10.5f\t%10d' or ('%10.5f', '$10d')
Chris@87:         if type(fmt) in (list, tuple):
Chris@87:             if len(fmt) != ncol:
Chris@87:                 raise AttributeError('fmt has wrong shape.  %s' % str(fmt))
Chris@87:             format = asstr(delimiter).join(map(asstr, fmt))
Chris@87:         elif isinstance(fmt, str):
Chris@87:             n_fmt_chars = fmt.count('%')
Chris@87:             error = ValueError('fmt has wrong number of %% formats:  %s' % fmt)
Chris@87:             if n_fmt_chars == 1:
Chris@87:                 if iscomplex_X:
Chris@87:                     fmt = [' (%s+%sj)' % (fmt, fmt), ] * ncol
Chris@87:                 else:
Chris@87:                     fmt = [fmt, ] * ncol
Chris@87:                 format = delimiter.join(fmt)
Chris@87:             elif iscomplex_X and n_fmt_chars != (2 * ncol):
Chris@87:                 raise error
Chris@87:             elif ((not iscomplex_X) and n_fmt_chars != ncol):
Chris@87:                 raise error
Chris@87:             else:
Chris@87:                 format = fmt
Chris@87:         else:
Chris@87:             raise ValueError('invalid fmt: %r' % (fmt,))
Chris@87: 
Chris@87:         if len(header) > 0:
Chris@87:             header = header.replace('\n', '\n' + comments)
Chris@87:             fh.write(asbytes(comments + header + newline))
Chris@87:         if iscomplex_X:
Chris@87:             for row in X:
Chris@87:                 row2 = []
Chris@87:                 for number in row:
Chris@87:                     row2.append(number.real)
Chris@87:                     row2.append(number.imag)
Chris@87:                 fh.write(asbytes(format % tuple(row2) + newline))
Chris@87:         else:
Chris@87:             for row in X:
Chris@87:                 fh.write(asbytes(format % tuple(row) + newline))
Chris@87:         if len(footer) > 0:
Chris@87:             footer = footer.replace('\n', '\n' + comments)
Chris@87:             fh.write(asbytes(comments + footer + newline))
Chris@87:     finally:
Chris@87:         if own_fh:
Chris@87:             fh.close()
Chris@87: 
Chris@87: 
Chris@87: def fromregex(file, regexp, dtype):
Chris@87:     """
Chris@87:     Construct an array from a text file, using regular expression parsing.
Chris@87: 
Chris@87:     The returned array is always a structured array, and is constructed from
Chris@87:     all matches of the regular expression in the file. Groups in the regular
Chris@87:     expression are converted to fields of the structured array.
Chris@87: 
Chris@87:     Parameters
Chris@87:     ----------
Chris@87:     file : str or file
Chris@87:         File name or file object to read.
Chris@87:     regexp : str or regexp
Chris@87:         Regular expression used to parse the file.
Chris@87:         Groups in the regular expression correspond to fields in the dtype.
Chris@87:     dtype : dtype or list of dtypes
Chris@87:         Dtype for the structured array.
Chris@87: 
Chris@87:     Returns
Chris@87:     -------
Chris@87:     output : ndarray
Chris@87:         The output array, containing the part of the content of `file` that
Chris@87:         was matched by `regexp`. `output` is always a structured array.
Chris@87: 
Chris@87:     Raises
Chris@87:     ------
Chris@87:     TypeError
Chris@87:         When `dtype` is not a valid dtype for a structured array.
Chris@87: 
Chris@87:     See Also
Chris@87:     --------
Chris@87:     fromstring, loadtxt
Chris@87: 
Chris@87:     Notes
Chris@87:     -----
Chris@87:     Dtypes for structured arrays can be specified in several forms, but all
Chris@87:     forms specify at least the data type and field name. For details see
Chris@87:     `doc.structured_arrays`.
Chris@87: 
Chris@87:     Examples
Chris@87:     --------
Chris@87:     >>> f = open('test.dat', 'w')
Chris@87:     >>> f.write("1312 foo\\n1534  bar\\n444   qux")
Chris@87:     >>> f.close()
Chris@87: 
Chris@87:     >>> regexp = r"(\\d+)\\s+(...)"  # match [digits, whitespace, anything]
Chris@87:     >>> output = np.fromregex('test.dat', regexp,
Chris@87:     ...                       [('num', np.int64), ('key', 'S3')])
Chris@87:     >>> output
Chris@87:     array([(1312L, 'foo'), (1534L, 'bar'), (444L, 'qux')],
Chris@87:           dtype=[('num', '<i8'), ('key', '|S3')])
Chris@87:     >>> output['num']
Chris@87:     array([1312, 1534,  444], dtype=int64)
Chris@87: 
Chris@87:     """
Chris@87:     own_fh = False
Chris@87:     if not hasattr(file, "read"):
Chris@87:         file = open(file, 'rb')
Chris@87:         own_fh = True
Chris@87: 
Chris@87:     try:
Chris@87:         if not hasattr(regexp, 'match'):
Chris@87:             regexp = re.compile(asbytes(regexp))
Chris@87:         if not isinstance(dtype, np.dtype):
Chris@87:             dtype = np.dtype(dtype)
Chris@87: 
Chris@87:         seq = regexp.findall(file.read())
Chris@87:         if seq and not isinstance(seq[0], tuple):
Chris@87:             # Only one group is in the regexp.
Chris@87:             # Create the new array as a single data-type and then
Chris@87:             #   re-interpret as a single-field structured array.
Chris@87:             newdtype = np.dtype(dtype[dtype.names[0]])
Chris@87:             output = np.array(seq, dtype=newdtype)
Chris@87:             output.dtype = dtype
Chris@87:         else:
Chris@87:             output = np.array(seq, dtype=dtype)
Chris@87: 
Chris@87:         return output
Chris@87:     finally:
Chris@87:         if own_fh:
Chris@87:             file.close()
Chris@87: 
Chris@87: 
Chris@87: #####--------------------------------------------------------------------------
Chris@87: #---- --- ASCII functions ---
Chris@87: #####--------------------------------------------------------------------------
Chris@87: 
Chris@87: 
Chris@87: def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
Chris@87:                skiprows=0, skip_header=0, skip_footer=0, converters=None,
Chris@87:                missing='', missing_values=None, filling_values=None,
Chris@87:                usecols=None, names=None,
Chris@87:                excludelist=None, deletechars=None, replace_space='_',
Chris@87:                autostrip=False, case_sensitive=True, defaultfmt="f%i",
Chris@87:                unpack=None, usemask=False, loose=True, invalid_raise=True):
Chris@87:     """
Chris@87:     Load data from a text file, with missing values handled as specified.
Chris@87: 
Chris@87:     Each line past the first `skip_header` lines is split at the `delimiter`
Chris@87:     character, and characters following the `comments` character are discarded.
Chris@87: 
Chris@87:     Parameters
Chris@87:     ----------
Chris@87:     fname : file or str
Chris@87:         File, filename, or generator to read.  If the filename extension is
Chris@87:         `.gz` or `.bz2`, the file is first decompressed. Note that
Chris@87:         generators must return byte strings in Python 3k.
Chris@87:     dtype : dtype, optional
Chris@87:         Data type of the resulting array.
Chris@87:         If None, the dtypes will be determined by the contents of each
Chris@87:         column, individually.
Chris@87:     comments : str, optional
Chris@87:         The character used to indicate the start of a comment.
Chris@87:         All the characters occurring on a line after a comment are discarded
Chris@87:     delimiter : str, int, or sequence, optional
Chris@87:         The string used to separate values.  By default, any consecutive
Chris@87:         whitespaces act as delimiter.  An integer or sequence of integers
Chris@87:         can also be provided as width(s) of each field.
Chris@87:     skip_rows : int, optional
Chris@87:         `skip_rows` was deprecated in numpy 1.5, and will be removed in
Chris@87:         numpy 2.0. Please use `skip_header` instead.
Chris@87:     skip_header : int, optional
Chris@87:         The number of lines to skip at the beginning of the file.
Chris@87:     skip_footer : int, optional
Chris@87:         The number of lines to skip at the end of the file.
Chris@87:     converters : variable, optional
Chris@87:         The set of functions that convert the data of a column to a value.
Chris@87:         The converters can also be used to provide a default value
Chris@87:         for missing data: ``converters = {3: lambda s: float(s or 0)}``.
Chris@87:     missing : variable, optional
Chris@87:         `missing` was deprecated in numpy 1.5, and will be removed in
Chris@87:         numpy 2.0. Please use `missing_values` instead.
Chris@87:     missing_values : variable, optional
Chris@87:         The set of strings corresponding to missing data.
Chris@87:     filling_values : variable, optional
Chris@87:         The set of values to be used as default when the data are missing.
Chris@87:     usecols : sequence, optional
Chris@87:         Which columns to read, with 0 being the first.  For example,
Chris@87:         ``usecols = (1, 4, 5)`` will extract the 2nd, 5th and 6th columns.
Chris@87:     names : {None, True, str, sequence}, optional
Chris@87:         If `names` is True, the field names are read from the first valid line
Chris@87:         after the first `skip_header` lines.
Chris@87:         If `names` is a sequence or a single-string of comma-separated names,
Chris@87:         the names will be used to define the field names in a structured dtype.
Chris@87:         If `names` is None, the names of the dtype fields will be used, if any.
Chris@87:     excludelist : sequence, optional
Chris@87:         A list of names to exclude. This list is appended to the default list
Chris@87:         ['return','file','print']. Excluded names are appended an underscore:
Chris@87:         for example, `file` would become `file_`.
Chris@87:     deletechars : str, optional
Chris@87:         A string combining invalid characters that must be deleted from the
Chris@87:         names.
Chris@87:     defaultfmt : str, optional
Chris@87:         A format used to define default field names, such as "f%i" or "f_%02i".
Chris@87:     autostrip : bool, optional
Chris@87:         Whether to automatically strip white spaces from the variables.
Chris@87:     replace_space : char, optional
Chris@87:         Character(s) used in replacement of white spaces in the variables
Chris@87:         names. By default, use a '_'.
Chris@87:     case_sensitive : {True, False, 'upper', 'lower'}, optional
Chris@87:         If True, field names are case sensitive.
Chris@87:         If False or 'upper', field names are converted to upper case.
Chris@87:         If 'lower', field names are converted to lower case.
Chris@87:     unpack : bool, optional
Chris@87:         If True, the returned array is transposed, so that arguments may be
Chris@87:         unpacked using ``x, y, z = loadtxt(...)``
Chris@87:     usemask : bool, optional
Chris@87:         If True, return a masked array.
Chris@87:         If False, return a regular array.
Chris@87:     loose : bool, optional
Chris@87:         If True, do not raise errors for invalid values.
Chris@87:     invalid_raise : bool, optional
Chris@87:         If True, an exception is raised if an inconsistency is detected in the
Chris@87:         number of columns.
Chris@87:         If False, a warning is emitted and the offending lines are skipped.
Chris@87: 
Chris@87:     Returns
Chris@87:     -------
Chris@87:     out : ndarray
Chris@87:         Data read from the text file. If `usemask` is True, this is a
Chris@87:         masked array.
Chris@87: 
Chris@87:     See Also
Chris@87:     --------
Chris@87:     numpy.loadtxt : equivalent function when no data is missing.
Chris@87: 
Chris@87:     Notes
Chris@87:     -----
Chris@87:     * When spaces are used as delimiters, or when no delimiter has been given
Chris@87:       as input, there should not be any missing data between two fields.
Chris@87:     * When the variables are named (either by a flexible dtype or with `names`,
Chris@87:       there must not be any header in the file (else a ValueError
Chris@87:       exception is raised).
Chris@87:     * Individual values are not stripped of spaces by default.
Chris@87:       When using a custom converter, make sure the function does remove spaces.
Chris@87: 
Chris@87:     References
Chris@87:     ----------
Chris@87:     .. [1] Numpy User Guide, section `I/O with Numpy
Chris@87:            <http://docs.scipy.org/doc/numpy/user/basics.io.genfromtxt.html>`_.
Chris@87: 
Chris@87:     Examples
Chris@87:     ---------
Chris@87:     >>> from StringIO import StringIO
Chris@87:     >>> import numpy as np
Chris@87: 
Chris@87:     Comma delimited file with mixed dtype
Chris@87: 
Chris@87:     >>> s = StringIO("1,1.3,abcde")
Chris@87:     >>> data = np.genfromtxt(s, dtype=[('myint','i8'),('myfloat','f8'),
Chris@87:     ... ('mystring','S5')], delimiter=",")
Chris@87:     >>> data
Chris@87:     array((1, 1.3, 'abcde'),
Chris@87:           dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', '|S5')])
Chris@87: 
Chris@87:     Using dtype = None
Chris@87: 
Chris@87:     >>> s.seek(0) # needed for StringIO example only
Chris@87:     >>> data = np.genfromtxt(s, dtype=None,
Chris@87:     ... names = ['myint','myfloat','mystring'], delimiter=",")
Chris@87:     >>> data
Chris@87:     array((1, 1.3, 'abcde'),
Chris@87:           dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', '|S5')])
Chris@87: 
Chris@87:     Specifying dtype and names
Chris@87: 
Chris@87:     >>> s.seek(0)
Chris@87:     >>> data = np.genfromtxt(s, dtype="i8,f8,S5",
Chris@87:     ... names=['myint','myfloat','mystring'], delimiter=",")
Chris@87:     >>> data
Chris@87:     array((1, 1.3, 'abcde'),
Chris@87:           dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', '|S5')])
Chris@87: 
Chris@87:     An example with fixed-width columns
Chris@87: 
Chris@87:     >>> s = StringIO("11.3abcde")
Chris@87:     >>> data = np.genfromtxt(s, dtype=None, names=['intvar','fltvar','strvar'],
Chris@87:     ...     delimiter=[1,3,5])
Chris@87:     >>> data
Chris@87:     array((1, 1.3, 'abcde'),
Chris@87:           dtype=[('intvar', '<i8'), ('fltvar', '<f8'), ('strvar', '|S5')])
Chris@87: 
Chris@87:     """
Chris@87:     # Py3 data conversions to bytes, for convenience
Chris@87:     if comments is not None:
Chris@87:         comments = asbytes(comments)
Chris@87:     if isinstance(delimiter, unicode):
Chris@87:         delimiter = asbytes(delimiter)
Chris@87:     if isinstance(missing, unicode):
Chris@87:         missing = asbytes(missing)
Chris@87:     if isinstance(missing_values, (unicode, list, tuple)):
Chris@87:         missing_values = asbytes_nested(missing_values)
Chris@87: 
Chris@87:     #
Chris@87:     if usemask:
Chris@87:         from numpy.ma import MaskedArray, make_mask_descr
Chris@87:     # Check the input dictionary of converters
Chris@87:     user_converters = converters or {}
Chris@87:     if not isinstance(user_converters, dict):
Chris@87:         raise TypeError(
Chris@87:             "The input argument 'converter' should be a valid dictionary "
Chris@87:             "(got '%s' instead)" % type(user_converters))
Chris@87: 
Chris@87:     # Initialize the filehandle, the LineSplitter and the NameValidator
Chris@87:     own_fhd = False
Chris@87:     try:
Chris@87:         if isinstance(fname, basestring):
Chris@87:             if sys.version_info[0] == 2:
Chris@87:                 fhd = iter(np.lib._datasource.open(fname, 'rbU'))
Chris@87:             else:
Chris@87:                 fhd = iter(np.lib._datasource.open(fname, 'rb'))
Chris@87:             own_fhd = True
Chris@87:         else:
Chris@87:             fhd = iter(fname)
Chris@87:     except TypeError:
Chris@87:         raise TypeError(
Chris@87:             "fname must be a string, filehandle, or generator. "
Chris@87:             "(got %s instead)" % type(fname))
Chris@87: 
Chris@87:     split_line = LineSplitter(delimiter=delimiter, comments=comments,
Chris@87:                               autostrip=autostrip)._handyman
Chris@87:     validate_names = NameValidator(excludelist=excludelist,
Chris@87:                                    deletechars=deletechars,
Chris@87:                                    case_sensitive=case_sensitive,
Chris@87:                                    replace_space=replace_space)
Chris@87: 
Chris@87:     # Get the first valid lines after the first skiprows ones ..
Chris@87:     if skiprows:
Chris@87:         warnings.warn(
Chris@87:             "The use of `skiprows` is deprecated, it will be removed in "
Chris@87:             "numpy 2.0.\nPlease use `skip_header` instead.",
Chris@87:             DeprecationWarning)
Chris@87:         skip_header = skiprows
Chris@87:     # Skip the first `skip_header` rows
Chris@87:     for i in range(skip_header):
Chris@87:         next(fhd)
Chris@87: 
Chris@87:     # Keep on until we find the first valid values
Chris@87:     first_values = None
Chris@87:     try:
Chris@87:         while not first_values:
Chris@87:             first_line = next(fhd)
Chris@87:             if names is True:
Chris@87:                 if comments in first_line:
Chris@87:                     first_line = (
Chris@87:                         asbytes('').join(first_line.split(comments)[1:]))
Chris@87:             first_values = split_line(first_line)
Chris@87:     except StopIteration:
Chris@87:         # return an empty array if the datafile is empty
Chris@87:         first_line = asbytes('')
Chris@87:         first_values = []
Chris@87:         warnings.warn('genfromtxt: Empty input file: "%s"' % fname)
Chris@87: 
Chris@87:     # Should we take the first values as names ?
Chris@87:     if names is True:
Chris@87:         fval = first_values[0].strip()
Chris@87:         if fval in comments:
Chris@87:             del first_values[0]
Chris@87: 
Chris@87:     # Check the columns to use: make sure `usecols` is a list
Chris@87:     if usecols is not None:
Chris@87:         try:
Chris@87:             usecols = [_.strip() for _ in usecols.split(",")]
Chris@87:         except AttributeError:
Chris@87:             try:
Chris@87:                 usecols = list(usecols)
Chris@87:             except TypeError:
Chris@87:                 usecols = [usecols, ]
Chris@87:     nbcols = len(usecols or first_values)
Chris@87: 
Chris@87:     # Check the names and overwrite the dtype.names if needed
Chris@87:     if names is True:
Chris@87:         names = validate_names([_bytes_to_name(_.strip())
Chris@87:                                 for _ in first_values])
Chris@87:         first_line = asbytes('')
Chris@87:     elif _is_string_like(names):
Chris@87:         names = validate_names([_.strip() for _ in names.split(',')])
Chris@87:     elif names:
Chris@87:         names = validate_names(names)
Chris@87:     # Get the dtype
Chris@87:     if dtype is not None:
Chris@87:         dtype = easy_dtype(dtype, defaultfmt=defaultfmt, names=names)
Chris@87:     # Make sure the names is a list (for 2.5)
Chris@87:     if names is not None:
Chris@87:         names = list(names)
Chris@87: 
Chris@87:     if usecols:
Chris@87:         for (i, current) in enumerate(usecols):
Chris@87:             # if usecols is a list of names, convert to a list of indices
Chris@87:             if _is_string_like(current):
Chris@87:                 usecols[i] = names.index(current)
Chris@87:             elif current < 0:
Chris@87:                 usecols[i] = current + len(first_values)
Chris@87:         # If the dtype is not None, make sure we update it
Chris@87:         if (dtype is not None) and (len(dtype) > nbcols):
Chris@87:             descr = dtype.descr
Chris@87:             dtype = np.dtype([descr[_] for _ in usecols])
Chris@87:             names = list(dtype.names)
Chris@87:         # If `names` is not None, update the names
Chris@87:         elif (names is not None) and (len(names) > nbcols):
Chris@87:             names = [names[_] for _ in usecols]
Chris@87:     elif (names is not None) and (dtype is not None):
Chris@87:         names = list(dtype.names)
Chris@87: 
Chris@87:     # Process the missing values ...............................
Chris@87:     # Rename missing_values for convenience
Chris@87:     user_missing_values = missing_values or ()
Chris@87: 
Chris@87:     # Define the list of missing_values (one column: one list)
Chris@87:     missing_values = [list([asbytes('')]) for _ in range(nbcols)]
Chris@87: 
Chris@87:     # We have a dictionary: process it field by field
Chris@87:     if isinstance(user_missing_values, dict):
Chris@87:         # Loop on the items
Chris@87:         for (key, val) in user_missing_values.items():
Chris@87:             # Is the key a string ?
Chris@87:             if _is_string_like(key):
Chris@87:                 try:
Chris@87:                     # Transform it into an integer
Chris@87:                     key = names.index(key)
Chris@87:                 except ValueError:
Chris@87:                     # We couldn't find it: the name must have been dropped
Chris@87:                     continue
Chris@87:             # Redefine the key as needed if it's a column number
Chris@87:             if usecols:
Chris@87:                 try:
Chris@87:                     key = usecols.index(key)
Chris@87:                 except ValueError:
Chris@87:                     pass
Chris@87:             # Transform the value as a list of string
Chris@87:             if isinstance(val, (list, tuple)):
Chris@87:                 val = [str(_) for _ in val]
Chris@87:             else:
Chris@87:                 val = [str(val), ]
Chris@87:             # Add the value(s) to the current list of missing
Chris@87:             if key is None:
Chris@87:                 # None acts as default
Chris@87:                 for miss in missing_values:
Chris@87:                     miss.extend(val)
Chris@87:             else:
Chris@87:                 missing_values[key].extend(val)
Chris@87:     # We have a sequence : each item matches a column
Chris@87:     elif isinstance(user_missing_values, (list, tuple)):
Chris@87:         for (value, entry) in zip(user_missing_values, missing_values):
Chris@87:             value = str(value)
Chris@87:             if value not in entry:
Chris@87:                 entry.append(value)
Chris@87:     # We have a string : apply it to all entries
Chris@87:     elif isinstance(user_missing_values, bytes):
Chris@87:         user_value = user_missing_values.split(asbytes(","))
Chris@87:         for entry in missing_values:
Chris@87:             entry.extend(user_value)
Chris@87:     # We have something else: apply it to all entries
Chris@87:     else:
Chris@87:         for entry in missing_values:
Chris@87:             entry.extend([str(user_missing_values)])
Chris@87: 
Chris@87:     # Process the deprecated `missing`
Chris@87:     if missing != asbytes(''):
Chris@87:         warnings.warn(
Chris@87:             "The use of `missing` is deprecated, it will be removed in "
Chris@87:             "Numpy 2.0.\nPlease use `missing_values` instead.",
Chris@87:             DeprecationWarning)
Chris@87:         values = [str(_) for _ in missing.split(asbytes(","))]
Chris@87:         for entry in missing_values:
Chris@87:             entry.extend(values)
Chris@87: 
Chris@87:     # Process the filling_values ...............................
Chris@87:     # Rename the input for convenience
Chris@87:     user_filling_values = filling_values
Chris@87:     if user_filling_values is None:
Chris@87:         user_filling_values = []
Chris@87:     # Define the default
Chris@87:     filling_values = [None] * nbcols
Chris@87:     # We have a dictionary : update each entry individually
Chris@87:     if isinstance(user_filling_values, dict):
Chris@87:         for (key, val) in user_filling_values.items():
Chris@87:             if _is_string_like(key):
Chris@87:                 try:
Chris@87:                     # Transform it into an integer
Chris@87:                     key = names.index(key)
Chris@87:                 except ValueError:
Chris@87:                     # We couldn't find it: the name must have been dropped,
Chris@87:                     continue
Chris@87:             # Redefine the key if it's a column number and usecols is defined
Chris@87:             if usecols:
Chris@87:                 try:
Chris@87:                     key = usecols.index(key)
Chris@87:                 except ValueError:
Chris@87:                     pass
Chris@87:             # Add the value to the list
Chris@87:             filling_values[key] = val
Chris@87:     # We have a sequence : update on a one-to-one basis
Chris@87:     elif isinstance(user_filling_values, (list, tuple)):
Chris@87:         n = len(user_filling_values)
Chris@87:         if (n <= nbcols):
Chris@87:             filling_values[:n] = user_filling_values
Chris@87:         else:
Chris@87:             filling_values = user_filling_values[:nbcols]
Chris@87:     # We have something else : use it for all entries
Chris@87:     else:
Chris@87:         filling_values = [user_filling_values] * nbcols
Chris@87: 
Chris@87:     # Initialize the converters ................................
Chris@87:     if dtype is None:
Chris@87:         # Note: we can't use a [...]*nbcols, as we would have 3 times the same
Chris@87:         # ... converter, instead of 3 different converters.
Chris@87:         converters = [StringConverter(None, missing_values=miss, default=fill)
Chris@87:                       for (miss, fill) in zip(missing_values, filling_values)]
Chris@87:     else:
Chris@87:         dtype_flat = flatten_dtype(dtype, flatten_base=True)
Chris@87:         # Initialize the converters
Chris@87:         if len(dtype_flat) > 1:
Chris@87:             # Flexible type : get a converter from each dtype
Chris@87:             zipit = zip(dtype_flat, missing_values, filling_values)
Chris@87:             converters = [StringConverter(dt, locked=True,
Chris@87:                                           missing_values=miss, default=fill)
Chris@87:                           for (dt, miss, fill) in zipit]
Chris@87:         else:
Chris@87:             # Set to a default converter (but w/ different missing values)
Chris@87:             zipit = zip(missing_values, filling_values)
Chris@87:             converters = [StringConverter(dtype, locked=True,
Chris@87:                                           missing_values=miss, default=fill)
Chris@87:                           for (miss, fill) in zipit]
Chris@87:     # Update the converters to use the user-defined ones
Chris@87:     uc_update = []
Chris@87:     for (j, conv) in user_converters.items():
Chris@87:         # If the converter is specified by column names, use the index instead
Chris@87:         if _is_string_like(j):
Chris@87:             try:
Chris@87:                 j = names.index(j)
Chris@87:                 i = j
Chris@87:             except ValueError:
Chris@87:                 continue
Chris@87:         elif usecols:
Chris@87:             try:
Chris@87:                 i = usecols.index(j)
Chris@87:             except ValueError:
Chris@87:                 # Unused converter specified
Chris@87:                 continue
Chris@87:         else:
Chris@87:             i = j
Chris@87:         # Find the value to test - first_line is not filtered by usecols:
Chris@87:         if len(first_line):
Chris@87:             testing_value = first_values[j]
Chris@87:         else:
Chris@87:             testing_value = None
Chris@87:         converters[i].update(conv, locked=True,
Chris@87:                              testing_value=testing_value,
Chris@87:                              default=filling_values[i],
Chris@87:                              missing_values=missing_values[i],)
Chris@87:         uc_update.append((i, conv))
Chris@87:     # Make sure we have the corrected keys in user_converters...
Chris@87:     user_converters.update(uc_update)
Chris@87: 
Chris@87:     # Fixme: possible error as following variable never used.
Chris@87:     #miss_chars = [_.missing_values for _ in converters]
Chris@87: 
Chris@87:     # Initialize the output lists ...
Chris@87:     # ... rows
Chris@87:     rows = []
Chris@87:     append_to_rows = rows.append
Chris@87:     # ... masks
Chris@87:     if usemask:
Chris@87:         masks = []
Chris@87:         append_to_masks = masks.append
Chris@87:     # ... invalid
Chris@87:     invalid = []
Chris@87:     append_to_invalid = invalid.append
Chris@87: 
Chris@87:     # Parse each line
Chris@87:     for (i, line) in enumerate(itertools.chain([first_line, ], fhd)):
Chris@87:         values = split_line(line)
Chris@87:         nbvalues = len(values)
Chris@87:         # Skip an empty line
Chris@87:         if nbvalues == 0:
Chris@87:             continue
Chris@87:         # Select only the columns we need
Chris@87:         if usecols:
Chris@87:             try:
Chris@87:                 values = [values[_] for _ in usecols]
Chris@87:             except IndexError:
Chris@87:                 append_to_invalid((i + skip_header + 1, nbvalues))
Chris@87:                 continue
Chris@87:         elif nbvalues != nbcols:
Chris@87:             append_to_invalid((i + skip_header + 1, nbvalues))
Chris@87:             continue
Chris@87:         # Store the values
Chris@87:         append_to_rows(tuple(values))
Chris@87:         if usemask:
Chris@87:             append_to_masks(tuple([v.strip() in m
Chris@87:                                    for (v, m) in zip(values, missing_values)]))
Chris@87: 
Chris@87:     if own_fhd:
Chris@87:         fhd.close()
Chris@87: 
Chris@87:     # Upgrade the converters (if needed)
Chris@87:     if dtype is None:
Chris@87:         for (i, converter) in enumerate(converters):
Chris@87:             current_column = [itemgetter(i)(_m) for _m in rows]
Chris@87:             try:
Chris@87:                 converter.iterupgrade(current_column)
Chris@87:             except ConverterLockError:
Chris@87:                 errmsg = "Converter #%i is locked and cannot be upgraded: " % i
Chris@87:                 current_column = map(itemgetter(i), rows)
Chris@87:                 for (j, value) in enumerate(current_column):
Chris@87:                     try:
Chris@87:                         converter.upgrade(value)
Chris@87:                     except (ConverterError, ValueError):
Chris@87:                         errmsg += "(occurred line #%i for value '%s')"
Chris@87:                         errmsg %= (j + 1 + skip_header, value)
Chris@87:                         raise ConverterError(errmsg)
Chris@87: 
Chris@87:     # Check that we don't have invalid values
Chris@87:     nbinvalid = len(invalid)
Chris@87:     if nbinvalid > 0:
Chris@87:         nbrows = len(rows) + nbinvalid - skip_footer
Chris@87:         # Construct the error message
Chris@87:         template = "    Line #%%i (got %%i columns instead of %i)" % nbcols
Chris@87:         if skip_footer > 0:
Chris@87:             nbinvalid_skipped = len([_ for _ in invalid
Chris@87:                                      if _[0] > nbrows + skip_header])
Chris@87:             invalid = invalid[:nbinvalid - nbinvalid_skipped]
Chris@87:             skip_footer -= nbinvalid_skipped
Chris@87: #
Chris@87: #            nbrows -= skip_footer
Chris@87: #            errmsg = [template % (i, nb)
Chris@87: #                      for (i, nb) in invalid if i < nbrows]
Chris@87: #        else:
Chris@87:         errmsg = [template % (i, nb)
Chris@87:                   for (i, nb) in invalid]
Chris@87:         if len(errmsg):
Chris@87:             errmsg.insert(0, "Some errors were detected !")
Chris@87:             errmsg = "\n".join(errmsg)
Chris@87:             # Raise an exception ?
Chris@87:             if invalid_raise:
Chris@87:                 raise ValueError(errmsg)
Chris@87:             # Issue a warning ?
Chris@87:             else:
Chris@87:                 warnings.warn(errmsg, ConversionWarning)
Chris@87: 
Chris@87:     # Strip the last skip_footer data
Chris@87:     if skip_footer > 0:
Chris@87:         rows = rows[:-skip_footer]
Chris@87:         if usemask:
Chris@87:             masks = masks[:-skip_footer]
Chris@87: 
Chris@87:     # Convert each value according to the converter:
Chris@87:     # We want to modify the list in place to avoid creating a new one...
Chris@87:     if loose:
Chris@87:         rows = list(
Chris@87:             zip(*[[conv._loose_call(_r) for _r in map(itemgetter(i), rows)]
Chris@87:                   for (i, conv) in enumerate(converters)]))
Chris@87:     else:
Chris@87:         rows = list(
Chris@87:             zip(*[[conv._strict_call(_r) for _r in map(itemgetter(i), rows)]
Chris@87:                   for (i, conv) in enumerate(converters)]))
Chris@87: 
Chris@87:     # Reset the dtype
Chris@87:     data = rows
Chris@87:     if dtype is None:
Chris@87:         # Get the dtypes from the types of the converters
Chris@87:         column_types = [conv.type for conv in converters]
Chris@87:         # Find the columns with strings...
Chris@87:         strcolidx = [i for (i, v) in enumerate(column_types)
Chris@87:                      if v in (type('S'), np.string_)]
Chris@87:         # ... and take the largest number of chars.
Chris@87:         for i in strcolidx:
Chris@87:             column_types[i] = "|S%i" % max(len(row[i]) for row in data)
Chris@87:         #
Chris@87:         if names is None:
Chris@87:             # If the dtype is uniform, don't define names, else use ''
Chris@87:             base = set([c.type for c in converters if c._checked])
Chris@87:             if len(base) == 1:
Chris@87:                 (ddtype, mdtype) = (list(base)[0], np.bool)
Chris@87:             else:
Chris@87:                 ddtype = [(defaultfmt % i, dt)
Chris@87:                           for (i, dt) in enumerate(column_types)]
Chris@87:                 if usemask:
Chris@87:                     mdtype = [(defaultfmt % i, np.bool)
Chris@87:                               for (i, dt) in enumerate(column_types)]
Chris@87:         else:
Chris@87:             ddtype = list(zip(names, column_types))
Chris@87:             mdtype = list(zip(names, [np.bool] * len(column_types)))
Chris@87:         output = np.array(data, dtype=ddtype)
Chris@87:         if usemask:
Chris@87:             outputmask = np.array(masks, dtype=mdtype)
Chris@87:     else:
Chris@87:         # Overwrite the initial dtype names if needed
Chris@87:         if names and dtype.names:
Chris@87:             dtype.names = names
Chris@87:         # Case 1. We have a structured type
Chris@87:         if len(dtype_flat) > 1:
Chris@87:             # Nested dtype, eg [('a', int), ('b', [('b0', int), ('b1', 'f4')])]
Chris@87:             # First, create the array using a flattened dtype:
Chris@87:             # [('a', int), ('b1', int), ('b2', float)]
Chris@87:             # Then, view the array using the specified dtype.
Chris@87:             if 'O' in (_.char for _ in dtype_flat):
Chris@87:                 if has_nested_fields(dtype):
Chris@87:                     raise NotImplementedError(
Chris@87:                         "Nested fields involving objects are not supported...")
Chris@87:                 else:
Chris@87:                     output = np.array(data, dtype=dtype)
Chris@87:             else:
Chris@87:                 rows = np.array(data, dtype=[('', _) for _ in dtype_flat])
Chris@87:                 output = rows.view(dtype)
Chris@87:             # Now, process the rowmasks the same way
Chris@87:             if usemask:
Chris@87:                 rowmasks = np.array(
Chris@87:                     masks, dtype=np.dtype([('', np.bool) for t in dtype_flat]))
Chris@87:                 # Construct the new dtype
Chris@87:                 mdtype = make_mask_descr(dtype)
Chris@87:                 outputmask = rowmasks.view(mdtype)
Chris@87:         # Case #2. We have a basic dtype
Chris@87:         else:
Chris@87:             # We used some user-defined converters
Chris@87:             if user_converters:
Chris@87:                 ishomogeneous = True
Chris@87:                 descr = []
Chris@87:                 for i, ttype in enumerate([conv.type for conv in converters]):
Chris@87:                     # Keep the dtype of the current converter
Chris@87:                     if i in user_converters:
Chris@87:                         ishomogeneous &= (ttype == dtype.type)
Chris@87:                         if ttype == np.string_:
Chris@87:                             ttype = "|S%i" % max(len(row[i]) for row in data)
Chris@87:                         descr.append(('', ttype))
Chris@87:                     else:
Chris@87:                         descr.append(('', dtype))
Chris@87:                 # So we changed the dtype ?
Chris@87:                 if not ishomogeneous:
Chris@87:                     # We have more than one field
Chris@87:                     if len(descr) > 1:
Chris@87:                         dtype = np.dtype(descr)
Chris@87:                     # We have only one field: drop the name if not needed.
Chris@87:                     else:
Chris@87:                         dtype = np.dtype(ttype)
Chris@87:             #
Chris@87:             output = np.array(data, dtype)
Chris@87:             if usemask:
Chris@87:                 if dtype.names:
Chris@87:                     mdtype = [(_, np.bool) for _ in dtype.names]
Chris@87:                 else:
Chris@87:                     mdtype = np.bool
Chris@87:                 outputmask = np.array(masks, dtype=mdtype)
Chris@87:     # Try to take care of the missing data we missed
Chris@87:     names = output.dtype.names
Chris@87:     if usemask and names:
Chris@87:         for (name, conv) in zip(names or (), converters):
Chris@87:             missing_values = [conv(_) for _ in conv.missing_values
Chris@87:                               if _ != asbytes('')]
Chris@87:             for mval in missing_values:
Chris@87:                 outputmask[name] |= (output[name] == mval)
Chris@87:     # Construct the final array
Chris@87:     if usemask:
Chris@87:         output = output.view(MaskedArray)
Chris@87:         output._mask = outputmask
Chris@87:     if unpack:
Chris@87:         return output.squeeze().T
Chris@87:     return output.squeeze()
Chris@87: 
Chris@87: 
Chris@87: def ndfromtxt(fname, **kwargs):
Chris@87:     """
Chris@87:     Load ASCII data stored in a file and return it as a single array.
Chris@87: 
Chris@87:     Parameters
Chris@87:     ----------
Chris@87:     fname, kwargs : For a description of input parameters, see `genfromtxt`.
Chris@87: 
Chris@87:     See Also
Chris@87:     --------
Chris@87:     numpy.genfromtxt : generic function.
Chris@87: 
Chris@87:     """
Chris@87:     kwargs['usemask'] = False
Chris@87:     return genfromtxt(fname, **kwargs)
Chris@87: 
Chris@87: 
Chris@87: def mafromtxt(fname, **kwargs):
Chris@87:     """
Chris@87:     Load ASCII data stored in a text file and return a masked array.
Chris@87: 
Chris@87:     Parameters
Chris@87:     ----------
Chris@87:     fname, kwargs : For a description of input parameters, see `genfromtxt`.
Chris@87: 
Chris@87:     See Also
Chris@87:     --------
Chris@87:     numpy.genfromtxt : generic function to load ASCII data.
Chris@87: 
Chris@87:     """
Chris@87:     kwargs['usemask'] = True
Chris@87:     return genfromtxt(fname, **kwargs)
Chris@87: 
Chris@87: 
Chris@87: def recfromtxt(fname, **kwargs):
Chris@87:     """
Chris@87:     Load ASCII data from a file and return it in a record array.
Chris@87: 
Chris@87:     If ``usemask=False`` a standard `recarray` is returned,
Chris@87:     if ``usemask=True`` a MaskedRecords array is returned.
Chris@87: 
Chris@87:     Parameters
Chris@87:     ----------
Chris@87:     fname, kwargs : For a description of input parameters, see `genfromtxt`.
Chris@87: 
Chris@87:     See Also
Chris@87:     --------
Chris@87:     numpy.genfromtxt : generic function
Chris@87: 
Chris@87:     Notes
Chris@87:     -----
Chris@87:     By default, `dtype` is None, which means that the data-type of the output
Chris@87:     array will be determined from the data.
Chris@87: 
Chris@87:     """
Chris@87:     kwargs.setdefault("dtype", None)
Chris@87:     usemask = kwargs.get('usemask', False)
Chris@87:     output = genfromtxt(fname, **kwargs)
Chris@87:     if usemask:
Chris@87:         from numpy.ma.mrecords import MaskedRecords
Chris@87:         output = output.view(MaskedRecords)
Chris@87:     else:
Chris@87:         output = output.view(np.recarray)
Chris@87:     return output
Chris@87: 
Chris@87: 
Chris@87: def recfromcsv(fname, **kwargs):
Chris@87:     """
Chris@87:     Load ASCII data stored in a comma-separated file.
Chris@87: 
Chris@87:     The returned array is a record array (if ``usemask=False``, see
Chris@87:     `recarray`) or a masked record array (if ``usemask=True``,
Chris@87:     see `ma.mrecords.MaskedRecords`).
Chris@87: 
Chris@87:     Parameters
Chris@87:     ----------
Chris@87:     fname, kwargs : For a description of input parameters, see `genfromtxt`.
Chris@87: 
Chris@87:     See Also
Chris@87:     --------
Chris@87:     numpy.genfromtxt : generic function to load ASCII data.
Chris@87: 
Chris@87:     Notes
Chris@87:     -----
Chris@87:     By default, `dtype` is None, which means that the data-type of the output
Chris@87:     array will be determined from the data.
Chris@87: 
Chris@87:     """
Chris@87:     # Set default kwargs for genfromtxt as relevant to csv import.
Chris@87:     kwargs.setdefault("case_sensitive", "lower")
Chris@87:     kwargs.setdefault("names", True)
Chris@87:     kwargs.setdefault("delimiter", ",")
Chris@87:     kwargs.setdefault("dtype", None)
Chris@87:     output = genfromtxt(fname, **kwargs)
Chris@87: 
Chris@87:     usemask = kwargs.get("usemask", False)
Chris@87:     if usemask:
Chris@87:         from numpy.ma.mrecords import MaskedRecords
Chris@87:         output = output.view(MaskedRecords)
Chris@87:     else:
Chris@87:         output = output.view(np.recarray)
Chris@87:     return output