Chris@87: from __future__ import division, absolute_import, print_function Chris@87: Chris@87: import sys Chris@87: import os Chris@87: import re Chris@87: import itertools Chris@87: import warnings Chris@87: import weakref Chris@87: from operator import itemgetter Chris@87: Chris@87: import numpy as np Chris@87: from . import format Chris@87: from ._datasource import DataSource Chris@87: from ._compiled_base import packbits, unpackbits Chris@87: from ._iotools import ( Chris@87: LineSplitter, NameValidator, StringConverter, ConverterError, Chris@87: ConverterLockError, ConversionWarning, _is_string_like, has_nested_fields, Chris@87: flatten_dtype, easy_dtype, _bytes_to_name Chris@87: ) Chris@87: Chris@87: from numpy.compat import ( Chris@87: asbytes, asstr, asbytes_nested, bytes, basestring, unicode Chris@87: ) Chris@87: Chris@87: if sys.version_info[0] >= 3: Chris@87: import pickle Chris@87: else: Chris@87: import cPickle as pickle Chris@87: from future_builtins import map Chris@87: Chris@87: loads = pickle.loads Chris@87: Chris@87: __all__ = [ Chris@87: 'savetxt', 'loadtxt', 'genfromtxt', 'ndfromtxt', 'mafromtxt', Chris@87: 'recfromtxt', 'recfromcsv', 'load', 'loads', 'save', 'savez', Chris@87: 'savez_compressed', 'packbits', 'unpackbits', 'fromregex', 'DataSource' Chris@87: ] Chris@87: Chris@87: Chris@87: def seek_gzip_factory(f): Chris@87: """Use this factory to produce the class so that we can do a lazy Chris@87: import on gzip. Chris@87: Chris@87: """ Chris@87: import gzip Chris@87: Chris@87: class GzipFile(gzip.GzipFile): Chris@87: Chris@87: def seek(self, offset, whence=0): Chris@87: # figure out new position (we can only seek forwards) Chris@87: if whence == 1: Chris@87: offset = self.offset + offset Chris@87: Chris@87: if whence not in [0, 1]: Chris@87: raise IOError("Illegal argument") Chris@87: Chris@87: if offset < self.offset: Chris@87: # for negative seek, rewind and do positive seek Chris@87: self.rewind() Chris@87: count = offset - self.offset Chris@87: for i in range(count // 1024): Chris@87: self.read(1024) Chris@87: self.read(count % 1024) Chris@87: Chris@87: def tell(self): Chris@87: return self.offset Chris@87: Chris@87: if isinstance(f, str): Chris@87: f = GzipFile(f) Chris@87: elif isinstance(f, gzip.GzipFile): Chris@87: # cast to our GzipFile if its already a gzip.GzipFile Chris@87: Chris@87: try: Chris@87: name = f.name Chris@87: except AttributeError: Chris@87: # Backward compatibility for <= 2.5 Chris@87: name = f.filename Chris@87: mode = f.mode Chris@87: Chris@87: f = GzipFile(fileobj=f.fileobj, filename=name) Chris@87: f.mode = mode Chris@87: Chris@87: return f Chris@87: Chris@87: Chris@87: class BagObj(object): Chris@87: """ Chris@87: BagObj(obj) Chris@87: Chris@87: Convert attribute look-ups to getitems on the object passed in. Chris@87: Chris@87: Parameters Chris@87: ---------- Chris@87: obj : class instance Chris@87: Object on which attribute look-up is performed. Chris@87: Chris@87: Examples Chris@87: -------- Chris@87: >>> from numpy.lib.npyio import BagObj as BO Chris@87: >>> class BagDemo(object): Chris@87: ... def __getitem__(self, key): # An instance of BagObj(BagDemo) Chris@87: ... # will call this method when any Chris@87: ... # attribute look-up is required Chris@87: ... result = "Doesn't matter what you want, " Chris@87: ... return result + "you're gonna get this" Chris@87: ... Chris@87: >>> demo_obj = BagDemo() Chris@87: >>> bagobj = BO(demo_obj) Chris@87: >>> bagobj.hello_there Chris@87: "Doesn't matter what you want, you're gonna get this" Chris@87: >>> bagobj.I_can_be_anything Chris@87: "Doesn't matter what you want, you're gonna get this" Chris@87: Chris@87: """ Chris@87: Chris@87: def __init__(self, obj): Chris@87: # Use weakref to make NpzFile objects collectable by refcount Chris@87: self._obj = weakref.proxy(obj) Chris@87: Chris@87: def __getattribute__(self, key): Chris@87: try: Chris@87: return object.__getattribute__(self, '_obj')[key] Chris@87: except KeyError: Chris@87: raise AttributeError(key) Chris@87: Chris@87: Chris@87: def zipfile_factory(*args, **kwargs): Chris@87: import zipfile Chris@87: kwargs['allowZip64'] = True Chris@87: return zipfile.ZipFile(*args, **kwargs) Chris@87: Chris@87: Chris@87: class NpzFile(object): Chris@87: """ Chris@87: NpzFile(fid) Chris@87: Chris@87: A dictionary-like object with lazy-loading of files in the zipped Chris@87: archive provided on construction. Chris@87: Chris@87: `NpzFile` is used to load files in the NumPy ``.npz`` data archive Chris@87: format. It assumes that files in the archive have a ``.npy`` extension, Chris@87: other files are ignored. Chris@87: Chris@87: The arrays and file strings are lazily loaded on either Chris@87: getitem access using ``obj['key']`` or attribute lookup using Chris@87: ``obj.f.key``. A list of all files (without ``.npy`` extensions) can Chris@87: be obtained with ``obj.files`` and the ZipFile object itself using Chris@87: ``obj.zip``. Chris@87: Chris@87: Attributes Chris@87: ---------- Chris@87: files : list of str Chris@87: List of all files in the archive with a ``.npy`` extension. Chris@87: zip : ZipFile instance Chris@87: The ZipFile object initialized with the zipped archive. Chris@87: f : BagObj instance Chris@87: An object on which attribute can be performed as an alternative Chris@87: to getitem access on the `NpzFile` instance itself. Chris@87: Chris@87: Parameters Chris@87: ---------- Chris@87: fid : file or str Chris@87: The zipped archive to open. This is either a file-like object Chris@87: or a string containing the path to the archive. Chris@87: own_fid : bool, optional Chris@87: Whether NpzFile should close the file handle. Chris@87: Requires that `fid` is a file-like object. Chris@87: Chris@87: Examples Chris@87: -------- Chris@87: >>> from tempfile import TemporaryFile Chris@87: >>> outfile = TemporaryFile() Chris@87: >>> x = np.arange(10) Chris@87: >>> y = np.sin(x) Chris@87: >>> np.savez(outfile, x=x, y=y) Chris@87: >>> outfile.seek(0) Chris@87: Chris@87: >>> npz = np.load(outfile) Chris@87: >>> isinstance(npz, np.lib.io.NpzFile) Chris@87: True Chris@87: >>> npz.files Chris@87: ['y', 'x'] Chris@87: >>> npz['x'] # getitem access Chris@87: array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) Chris@87: >>> npz.f.x # attribute lookup Chris@87: array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) Chris@87: Chris@87: """ Chris@87: Chris@87: def __init__(self, fid, own_fid=False): Chris@87: # Import is postponed to here since zipfile depends on gzip, an Chris@87: # optional component of the so-called standard library. Chris@87: _zip = zipfile_factory(fid) Chris@87: self._files = _zip.namelist() Chris@87: self.files = [] Chris@87: for x in self._files: Chris@87: if x.endswith('.npy'): Chris@87: self.files.append(x[:-4]) Chris@87: else: Chris@87: self.files.append(x) Chris@87: self.zip = _zip Chris@87: self.f = BagObj(self) Chris@87: if own_fid: Chris@87: self.fid = fid Chris@87: else: Chris@87: self.fid = None Chris@87: Chris@87: def __enter__(self): Chris@87: return self Chris@87: Chris@87: def __exit__(self, exc_type, exc_value, traceback): Chris@87: self.close() Chris@87: Chris@87: def close(self): Chris@87: """ Chris@87: Close the file. Chris@87: Chris@87: """ Chris@87: if self.zip is not None: Chris@87: self.zip.close() Chris@87: self.zip = None Chris@87: if self.fid is not None: Chris@87: self.fid.close() Chris@87: self.fid = None Chris@87: self.f = None # break reference cycle Chris@87: Chris@87: def __del__(self): Chris@87: self.close() Chris@87: Chris@87: def __getitem__(self, key): Chris@87: # FIXME: This seems like it will copy strings around Chris@87: # more than is strictly necessary. The zipfile Chris@87: # will read the string and then Chris@87: # the format.read_array will copy the string Chris@87: # to another place in memory. Chris@87: # It would be better if the zipfile could read Chris@87: # (or at least uncompress) the data Chris@87: # directly into the array memory. Chris@87: member = 0 Chris@87: if key in self._files: Chris@87: member = 1 Chris@87: elif key in self.files: Chris@87: member = 1 Chris@87: key += '.npy' Chris@87: if member: Chris@87: bytes = self.zip.open(key) Chris@87: magic = bytes.read(len(format.MAGIC_PREFIX)) Chris@87: bytes.close() Chris@87: if magic == format.MAGIC_PREFIX: Chris@87: bytes = self.zip.open(key) Chris@87: return format.read_array(bytes) Chris@87: else: Chris@87: return self.zip.read(key) Chris@87: else: Chris@87: raise KeyError("%s is not a file in the archive" % key) Chris@87: Chris@87: def __iter__(self): Chris@87: return iter(self.files) Chris@87: Chris@87: def items(self): Chris@87: """ Chris@87: Return a list of tuples, with each tuple (filename, array in file). Chris@87: Chris@87: """ Chris@87: return [(f, self[f]) for f in self.files] Chris@87: Chris@87: def iteritems(self): Chris@87: """Generator that returns tuples (filename, array in file).""" Chris@87: for f in self.files: Chris@87: yield (f, self[f]) Chris@87: Chris@87: def keys(self): Chris@87: """Return files in the archive with a ``.npy`` extension.""" Chris@87: return self.files Chris@87: Chris@87: def iterkeys(self): Chris@87: """Return an iterator over the files in the archive.""" Chris@87: return self.__iter__() Chris@87: Chris@87: def __contains__(self, key): Chris@87: return self.files.__contains__(key) Chris@87: Chris@87: Chris@87: def load(file, mmap_mode=None): Chris@87: """ Chris@87: Load arrays or pickled objects from ``.npy``, ``.npz`` or pickled files. Chris@87: Chris@87: Parameters Chris@87: ---------- Chris@87: file : file-like object or string Chris@87: The file to read. File-like objects must support the Chris@87: ``seek()`` and ``read()`` methods. Pickled files require that the Chris@87: file-like object support the ``readline()`` method as well. Chris@87: mmap_mode : {None, 'r+', 'r', 'w+', 'c'}, optional Chris@87: If not None, then memory-map the file, using the given mode (see Chris@87: `numpy.memmap` for a detailed description of the modes). A Chris@87: memory-mapped array is kept on disk. However, it can be accessed Chris@87: and sliced like any ndarray. Memory mapping is especially useful Chris@87: for accessing small fragments of large files without reading the Chris@87: entire file into memory. Chris@87: Chris@87: Returns Chris@87: ------- Chris@87: result : array, tuple, dict, etc. Chris@87: Data stored in the file. For ``.npz`` files, the returned instance Chris@87: of NpzFile class must be closed to avoid leaking file descriptors. Chris@87: Chris@87: Raises Chris@87: ------ Chris@87: IOError Chris@87: If the input file does not exist or cannot be read. Chris@87: Chris@87: See Also Chris@87: -------- Chris@87: save, savez, savez_compressed, loadtxt Chris@87: memmap : Create a memory-map to an array stored in a file on disk. Chris@87: Chris@87: Notes Chris@87: ----- Chris@87: - If the file contains pickle data, then whatever object is stored Chris@87: in the pickle is returned. Chris@87: - If the file is a ``.npy`` file, then a single array is returned. Chris@87: - If the file is a ``.npz`` file, then a dictionary-like object is Chris@87: returned, containing ``{filename: array}`` key-value pairs, one for Chris@87: each file in the archive. Chris@87: - If the file is a ``.npz`` file, the returned value supports the Chris@87: context manager protocol in a similar fashion to the open function:: Chris@87: Chris@87: with load('foo.npz') as data: Chris@87: a = data['a'] Chris@87: Chris@87: The underlying file descriptor is closed when exiting the 'with' Chris@87: block. Chris@87: Chris@87: Examples Chris@87: -------- Chris@87: Store data to disk, and load it again: Chris@87: Chris@87: >>> np.save('/tmp/123', np.array([[1, 2, 3], [4, 5, 6]])) Chris@87: >>> np.load('/tmp/123.npy') Chris@87: array([[1, 2, 3], Chris@87: [4, 5, 6]]) Chris@87: Chris@87: Store compressed data to disk, and load it again: Chris@87: Chris@87: >>> a=np.array([[1, 2, 3], [4, 5, 6]]) Chris@87: >>> b=np.array([1, 2]) Chris@87: >>> np.savez('/tmp/123.npz', a=a, b=b) Chris@87: >>> data = np.load('/tmp/123.npz') Chris@87: >>> data['a'] Chris@87: array([[1, 2, 3], Chris@87: [4, 5, 6]]) Chris@87: >>> data['b'] Chris@87: array([1, 2]) Chris@87: >>> data.close() Chris@87: Chris@87: Mem-map the stored array, and then access the second row Chris@87: directly from disk: Chris@87: Chris@87: >>> X = np.load('/tmp/123.npy', mmap_mode='r') Chris@87: >>> X[1, :] Chris@87: memmap([4, 5, 6]) Chris@87: Chris@87: """ Chris@87: import gzip Chris@87: Chris@87: own_fid = False Chris@87: if isinstance(file, basestring): Chris@87: fid = open(file, "rb") Chris@87: own_fid = True Chris@87: elif isinstance(file, gzip.GzipFile): Chris@87: fid = seek_gzip_factory(file) Chris@87: else: Chris@87: fid = file Chris@87: Chris@87: try: Chris@87: # Code to distinguish from NumPy binary files and pickles. Chris@87: _ZIP_PREFIX = asbytes('PK\x03\x04') Chris@87: N = len(format.MAGIC_PREFIX) Chris@87: magic = fid.read(N) Chris@87: fid.seek(-N, 1) # back-up Chris@87: if magic.startswith(_ZIP_PREFIX): Chris@87: # zip-file (assume .npz) Chris@87: # Transfer file ownership to NpzFile Chris@87: tmp = own_fid Chris@87: own_fid = False Chris@87: return NpzFile(fid, own_fid=tmp) Chris@87: elif magic == format.MAGIC_PREFIX: Chris@87: # .npy file Chris@87: if mmap_mode: Chris@87: return format.open_memmap(file, mode=mmap_mode) Chris@87: else: Chris@87: return format.read_array(fid) Chris@87: else: Chris@87: # Try a pickle Chris@87: try: Chris@87: return pickle.load(fid) Chris@87: except: Chris@87: raise IOError( Chris@87: "Failed to interpret file %s as a pickle" % repr(file)) Chris@87: finally: Chris@87: if own_fid: Chris@87: fid.close() Chris@87: Chris@87: Chris@87: def save(file, arr): Chris@87: """ Chris@87: Save an array to a binary file in NumPy ``.npy`` format. Chris@87: Chris@87: Parameters Chris@87: ---------- Chris@87: file : file or str Chris@87: File or filename to which the data is saved. If file is a file-object, Chris@87: then the filename is unchanged. If file is a string, a ``.npy`` Chris@87: extension will be appended to the file name if it does not already Chris@87: have one. Chris@87: arr : array_like Chris@87: Array data to be saved. Chris@87: Chris@87: See Also Chris@87: -------- Chris@87: savez : Save several arrays into a ``.npz`` archive Chris@87: savetxt, load Chris@87: Chris@87: Notes Chris@87: ----- Chris@87: For a description of the ``.npy`` format, see `format`. Chris@87: Chris@87: Examples Chris@87: -------- Chris@87: >>> from tempfile import TemporaryFile Chris@87: >>> outfile = TemporaryFile() Chris@87: Chris@87: >>> x = np.arange(10) Chris@87: >>> np.save(outfile, x) Chris@87: Chris@87: >>> outfile.seek(0) # Only needed here to simulate closing & reopening file Chris@87: >>> np.load(outfile) Chris@87: array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) Chris@87: Chris@87: """ Chris@87: own_fid = False Chris@87: if isinstance(file, basestring): Chris@87: if not file.endswith('.npy'): Chris@87: file = file + '.npy' Chris@87: fid = open(file, "wb") Chris@87: own_fid = True Chris@87: else: Chris@87: fid = file Chris@87: Chris@87: try: Chris@87: arr = np.asanyarray(arr) Chris@87: format.write_array(fid, arr) Chris@87: finally: Chris@87: if own_fid: Chris@87: fid.close() Chris@87: Chris@87: Chris@87: def savez(file, *args, **kwds): Chris@87: """ Chris@87: Save several arrays into a single file in uncompressed ``.npz`` format. Chris@87: Chris@87: If arguments are passed in with no keywords, the corresponding variable Chris@87: names, in the ``.npz`` file, are 'arr_0', 'arr_1', etc. If keyword Chris@87: arguments are given, the corresponding variable names, in the ``.npz`` Chris@87: file will match the keyword names. Chris@87: Chris@87: Parameters Chris@87: ---------- Chris@87: file : str or file Chris@87: Either the file name (string) or an open file (file-like object) Chris@87: where the data will be saved. If file is a string, the ``.npz`` Chris@87: extension will be appended to the file name if it is not already there. Chris@87: args : Arguments, optional Chris@87: Arrays to save to the file. Since it is not possible for Python to Chris@87: know the names of the arrays outside `savez`, the arrays will be saved Chris@87: with names "arr_0", "arr_1", and so on. These arguments can be any Chris@87: expression. Chris@87: kwds : Keyword arguments, optional Chris@87: Arrays to save to the file. Arrays will be saved in the file with the Chris@87: keyword names. Chris@87: Chris@87: Returns Chris@87: ------- Chris@87: None Chris@87: Chris@87: See Also Chris@87: -------- Chris@87: save : Save a single array to a binary file in NumPy format. Chris@87: savetxt : Save an array to a file as plain text. Chris@87: savez_compressed : Save several arrays into a compressed ``.npz`` archive Chris@87: Chris@87: Notes Chris@87: ----- Chris@87: The ``.npz`` file format is a zipped archive of files named after the Chris@87: variables they contain. The archive is not compressed and each file Chris@87: in the archive contains one variable in ``.npy`` format. For a Chris@87: description of the ``.npy`` format, see `format`. Chris@87: Chris@87: When opening the saved ``.npz`` file with `load` a `NpzFile` object is Chris@87: returned. This is a dictionary-like object which can be queried for Chris@87: its list of arrays (with the ``.files`` attribute), and for the arrays Chris@87: themselves. Chris@87: Chris@87: Examples Chris@87: -------- Chris@87: >>> from tempfile import TemporaryFile Chris@87: >>> outfile = TemporaryFile() Chris@87: >>> x = np.arange(10) Chris@87: >>> y = np.sin(x) Chris@87: Chris@87: Using `savez` with \\*args, the arrays are saved with default names. Chris@87: Chris@87: >>> np.savez(outfile, x, y) Chris@87: >>> outfile.seek(0) # Only needed here to simulate closing & reopening file Chris@87: >>> npzfile = np.load(outfile) Chris@87: >>> npzfile.files Chris@87: ['arr_1', 'arr_0'] Chris@87: >>> npzfile['arr_0'] Chris@87: array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) Chris@87: Chris@87: Using `savez` with \\**kwds, the arrays are saved with the keyword names. Chris@87: Chris@87: >>> outfile = TemporaryFile() Chris@87: >>> np.savez(outfile, x=x, y=y) Chris@87: >>> outfile.seek(0) Chris@87: >>> npzfile = np.load(outfile) Chris@87: >>> npzfile.files Chris@87: ['y', 'x'] Chris@87: >>> npzfile['x'] Chris@87: array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) Chris@87: Chris@87: """ Chris@87: _savez(file, args, kwds, False) Chris@87: Chris@87: Chris@87: def savez_compressed(file, *args, **kwds): Chris@87: """ Chris@87: Save several arrays into a single file in compressed ``.npz`` format. Chris@87: Chris@87: If keyword arguments are given, then filenames are taken from the keywords. Chris@87: If arguments are passed in with no keywords, then stored file names are Chris@87: arr_0, arr_1, etc. Chris@87: Chris@87: Parameters Chris@87: ---------- Chris@87: file : str Chris@87: File name of ``.npz`` file. Chris@87: args : Arguments Chris@87: Function arguments. Chris@87: kwds : Keyword arguments Chris@87: Keywords. Chris@87: Chris@87: See Also Chris@87: -------- Chris@87: numpy.savez : Save several arrays into an uncompressed ``.npz`` file format Chris@87: numpy.load : Load the files created by savez_compressed. Chris@87: Chris@87: """ Chris@87: _savez(file, args, kwds, True) Chris@87: Chris@87: Chris@87: def _savez(file, args, kwds, compress): Chris@87: # Import is postponed to here since zipfile depends on gzip, an optional Chris@87: # component of the so-called standard library. Chris@87: import zipfile Chris@87: # Import deferred for startup time improvement Chris@87: import tempfile Chris@87: Chris@87: if isinstance(file, basestring): Chris@87: if not file.endswith('.npz'): Chris@87: file = file + '.npz' Chris@87: Chris@87: namedict = kwds Chris@87: for i, val in enumerate(args): Chris@87: key = 'arr_%d' % i Chris@87: if key in namedict.keys(): Chris@87: raise ValueError( Chris@87: "Cannot use un-named variables and keyword %s" % key) Chris@87: namedict[key] = val Chris@87: Chris@87: if compress: Chris@87: compression = zipfile.ZIP_DEFLATED Chris@87: else: Chris@87: compression = zipfile.ZIP_STORED Chris@87: Chris@87: zipf = zipfile_factory(file, mode="w", compression=compression) Chris@87: Chris@87: # Stage arrays in a temporary file on disk, before writing to zip. Chris@87: fd, tmpfile = tempfile.mkstemp(suffix='-numpy.npy') Chris@87: os.close(fd) Chris@87: try: Chris@87: for key, val in namedict.items(): Chris@87: fname = key + '.npy' Chris@87: fid = open(tmpfile, 'wb') Chris@87: try: Chris@87: format.write_array(fid, np.asanyarray(val)) Chris@87: fid.close() Chris@87: fid = None Chris@87: zipf.write(tmpfile, arcname=fname) Chris@87: finally: Chris@87: if fid: Chris@87: fid.close() Chris@87: finally: Chris@87: os.remove(tmpfile) Chris@87: Chris@87: zipf.close() Chris@87: Chris@87: Chris@87: def _getconv(dtype): Chris@87: """ Find the correct dtype converter. Adapted from matplotlib """ Chris@87: typ = dtype.type Chris@87: if issubclass(typ, np.bool_): Chris@87: return lambda x: bool(int(x)) Chris@87: if issubclass(typ, np.uint64): Chris@87: return np.uint64 Chris@87: if issubclass(typ, np.int64): Chris@87: return np.int64 Chris@87: if issubclass(typ, np.integer): Chris@87: return lambda x: int(float(x)) Chris@87: elif issubclass(typ, np.floating): Chris@87: return float Chris@87: elif issubclass(typ, np.complex): Chris@87: return complex Chris@87: elif issubclass(typ, np.bytes_): Chris@87: return bytes Chris@87: else: Chris@87: return str Chris@87: Chris@87: Chris@87: def loadtxt(fname, dtype=float, comments='#', delimiter=None, Chris@87: converters=None, skiprows=0, usecols=None, unpack=False, Chris@87: ndmin=0): Chris@87: """ Chris@87: Load data from a text file. Chris@87: Chris@87: Each row in the text file must have the same number of values. Chris@87: Chris@87: Parameters Chris@87: ---------- Chris@87: fname : file or str Chris@87: File, filename, or generator to read. If the filename extension is Chris@87: ``.gz`` or ``.bz2``, the file is first decompressed. Note that Chris@87: generators should return byte strings for Python 3k. Chris@87: dtype : data-type, optional Chris@87: Data-type of the resulting array; default: float. If this is a Chris@87: record data-type, the resulting array will be 1-dimensional, and Chris@87: each row will be interpreted as an element of the array. In this Chris@87: case, the number of columns used must match the number of fields in Chris@87: the data-type. Chris@87: comments : str, optional Chris@87: The character used to indicate the start of a comment; Chris@87: default: '#'. Chris@87: delimiter : str, optional Chris@87: The string used to separate values. By default, this is any Chris@87: whitespace. Chris@87: converters : dict, optional Chris@87: A dictionary mapping column number to a function that will convert Chris@87: that column to a float. E.g., if column 0 is a date string: Chris@87: ``converters = {0: datestr2num}``. Converters can also be used to Chris@87: provide a default value for missing data (but see also `genfromtxt`): Chris@87: ``converters = {3: lambda s: float(s.strip() or 0)}``. Default: None. Chris@87: skiprows : int, optional Chris@87: Skip the first `skiprows` lines; default: 0. Chris@87: usecols : sequence, optional Chris@87: Which columns to read, with 0 being the first. For example, Chris@87: ``usecols = (1,4,5)`` will extract the 2nd, 5th and 6th columns. Chris@87: The default, None, results in all columns being read. Chris@87: unpack : bool, optional Chris@87: If True, the returned array is transposed, so that arguments may be Chris@87: unpacked using ``x, y, z = loadtxt(...)``. When used with a record Chris@87: data-type, arrays are returned for each field. Default is False. Chris@87: ndmin : int, optional Chris@87: The returned array will have at least `ndmin` dimensions. Chris@87: Otherwise mono-dimensional axes will be squeezed. Chris@87: Legal values: 0 (default), 1 or 2. Chris@87: Chris@87: .. versionadded:: 1.6.0 Chris@87: Chris@87: Returns Chris@87: ------- Chris@87: out : ndarray Chris@87: Data read from the text file. Chris@87: Chris@87: See Also Chris@87: -------- Chris@87: load, fromstring, fromregex Chris@87: genfromtxt : Load data with missing values handled as specified. Chris@87: scipy.io.loadmat : reads MATLAB data files Chris@87: Chris@87: Notes Chris@87: ----- Chris@87: This function aims to be a fast reader for simply formatted files. The Chris@87: `genfromtxt` function provides more sophisticated handling of, e.g., Chris@87: lines with missing values. Chris@87: Chris@87: Examples Chris@87: -------- Chris@87: >>> from StringIO import StringIO # StringIO behaves like a file object Chris@87: >>> c = StringIO("0 1\\n2 3") Chris@87: >>> np.loadtxt(c) Chris@87: array([[ 0., 1.], Chris@87: [ 2., 3.]]) Chris@87: Chris@87: >>> d = StringIO("M 21 72\\nF 35 58") Chris@87: >>> np.loadtxt(d, dtype={'names': ('gender', 'age', 'weight'), Chris@87: ... 'formats': ('S1', 'i4', 'f4')}) Chris@87: array([('M', 21, 72.0), ('F', 35, 58.0)], Chris@87: dtype=[('gender', '|S1'), ('age', '>> c = StringIO("1,0,2\\n3,0,4") Chris@87: >>> x, y = np.loadtxt(c, delimiter=',', usecols=(0, 2), unpack=True) Chris@87: >>> x Chris@87: array([ 1., 3.]) Chris@87: >>> y Chris@87: array([ 2., 4.]) Chris@87: Chris@87: """ Chris@87: # Type conversions for Py3 convenience Chris@87: comments = asbytes(comments) Chris@87: user_converters = converters Chris@87: if delimiter is not None: Chris@87: delimiter = asbytes(delimiter) Chris@87: if usecols is not None: Chris@87: usecols = list(usecols) Chris@87: Chris@87: fown = False Chris@87: try: Chris@87: if _is_string_like(fname): Chris@87: fown = True Chris@87: if fname.endswith('.gz'): Chris@87: fh = iter(seek_gzip_factory(fname)) Chris@87: elif fname.endswith('.bz2'): Chris@87: import bz2 Chris@87: fh = iter(bz2.BZ2File(fname)) Chris@87: elif sys.version_info[0] == 2: Chris@87: fh = iter(open(fname, 'U')) Chris@87: else: Chris@87: fh = iter(open(fname)) Chris@87: else: Chris@87: fh = iter(fname) Chris@87: except TypeError: Chris@87: raise ValueError('fname must be a string, file handle, or generator') Chris@87: X = [] Chris@87: Chris@87: def flatten_dtype(dt): Chris@87: """Unpack a structured data-type, and produce re-packing info.""" Chris@87: if dt.names is None: Chris@87: # If the dtype is flattened, return. Chris@87: # If the dtype has a shape, the dtype occurs Chris@87: # in the list more than once. Chris@87: shape = dt.shape Chris@87: if len(shape) == 0: Chris@87: return ([dt.base], None) Chris@87: else: Chris@87: packing = [(shape[-1], list)] Chris@87: if len(shape) > 1: Chris@87: for dim in dt.shape[-2::-1]: Chris@87: packing = [(dim*packing[0][0], packing*dim)] Chris@87: return ([dt.base] * int(np.prod(dt.shape)), packing) Chris@87: else: Chris@87: types = [] Chris@87: packing = [] Chris@87: for field in dt.names: Chris@87: tp, bytes = dt.fields[field] Chris@87: flat_dt, flat_packing = flatten_dtype(tp) Chris@87: types.extend(flat_dt) Chris@87: # Avoid extra nesting for subarrays Chris@87: if len(tp.shape) > 0: Chris@87: packing.extend(flat_packing) Chris@87: else: Chris@87: packing.append((len(flat_dt), flat_packing)) Chris@87: return (types, packing) Chris@87: Chris@87: def pack_items(items, packing): Chris@87: """Pack items into nested lists based on re-packing info.""" Chris@87: if packing is None: Chris@87: return items[0] Chris@87: elif packing is tuple: Chris@87: return tuple(items) Chris@87: elif packing is list: Chris@87: return list(items) Chris@87: else: Chris@87: start = 0 Chris@87: ret = [] Chris@87: for length, subpacking in packing: Chris@87: ret.append(pack_items(items[start:start+length], subpacking)) Chris@87: start += length Chris@87: return tuple(ret) Chris@87: Chris@87: def split_line(line): Chris@87: """Chop off comments, strip, and split at delimiter.""" Chris@87: line = asbytes(line).split(comments)[0].strip(asbytes('\r\n')) Chris@87: if line: Chris@87: return line.split(delimiter) Chris@87: else: Chris@87: return [] Chris@87: Chris@87: try: Chris@87: # Make sure we're dealing with a proper dtype Chris@87: dtype = np.dtype(dtype) Chris@87: defconv = _getconv(dtype) Chris@87: Chris@87: # Skip the first `skiprows` lines Chris@87: for i in range(skiprows): Chris@87: next(fh) Chris@87: Chris@87: # Read until we find a line with some values, and use Chris@87: # it to estimate the number of columns, N. Chris@87: first_vals = None Chris@87: try: Chris@87: while not first_vals: Chris@87: first_line = next(fh) Chris@87: first_vals = split_line(first_line) Chris@87: except StopIteration: Chris@87: # End of lines reached Chris@87: first_line = '' Chris@87: first_vals = [] Chris@87: warnings.warn('loadtxt: Empty input file: "%s"' % fname) Chris@87: N = len(usecols or first_vals) Chris@87: Chris@87: dtype_types, packing = flatten_dtype(dtype) Chris@87: if len(dtype_types) > 1: Chris@87: # We're dealing with a structured array, each field of Chris@87: # the dtype matches a column Chris@87: converters = [_getconv(dt) for dt in dtype_types] Chris@87: else: Chris@87: # All fields have the same dtype Chris@87: converters = [defconv for i in range(N)] Chris@87: if N > 1: Chris@87: packing = [(N, tuple)] Chris@87: Chris@87: # By preference, use the converters specified by the user Chris@87: for i, conv in (user_converters or {}).items(): Chris@87: if usecols: Chris@87: try: Chris@87: i = usecols.index(i) Chris@87: except ValueError: Chris@87: # Unused converter specified Chris@87: continue Chris@87: converters[i] = conv Chris@87: Chris@87: # Parse each line, including the first Chris@87: for i, line in enumerate(itertools.chain([first_line], fh)): Chris@87: vals = split_line(line) Chris@87: if len(vals) == 0: Chris@87: continue Chris@87: if usecols: Chris@87: vals = [vals[i] for i in usecols] Chris@87: if len(vals) != N: Chris@87: line_num = i + skiprows + 1 Chris@87: raise ValueError("Wrong number of columns at line %d" Chris@87: % line_num) Chris@87: Chris@87: # Convert each value according to its column and store Chris@87: items = [conv(val) for (conv, val) in zip(converters, vals)] Chris@87: # Then pack it according to the dtype's nesting Chris@87: items = pack_items(items, packing) Chris@87: X.append(items) Chris@87: finally: Chris@87: if fown: Chris@87: fh.close() Chris@87: Chris@87: X = np.array(X, dtype) Chris@87: # Multicolumn data are returned with shape (1, N, M), i.e. Chris@87: # (1, 1, M) for a single row - remove the singleton dimension there Chris@87: if X.ndim == 3 and X.shape[:2] == (1, 1): Chris@87: X.shape = (1, -1) Chris@87: Chris@87: # Verify that the array has at least dimensions `ndmin`. Chris@87: # Check correctness of the values of `ndmin` Chris@87: if ndmin not in [0, 1, 2]: Chris@87: raise ValueError('Illegal value of ndmin keyword: %s' % ndmin) Chris@87: # Tweak the size and shape of the arrays - remove extraneous dimensions Chris@87: if X.ndim > ndmin: Chris@87: X = np.squeeze(X) Chris@87: # and ensure we have the minimum number of dimensions asked for Chris@87: # - has to be in this order for the odd case ndmin=1, X.squeeze().ndim=0 Chris@87: if X.ndim < ndmin: Chris@87: if ndmin == 1: Chris@87: X = np.atleast_1d(X) Chris@87: elif ndmin == 2: Chris@87: X = np.atleast_2d(X).T Chris@87: Chris@87: if unpack: Chris@87: if len(dtype_types) > 1: Chris@87: # For structured arrays, return an array for each field. Chris@87: return [X[field] for field in dtype.names] Chris@87: else: Chris@87: return X.T Chris@87: else: Chris@87: return X Chris@87: Chris@87: Chris@87: def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='', Chris@87: footer='', comments='# '): Chris@87: """ Chris@87: Save an array to a text file. Chris@87: Chris@87: Parameters Chris@87: ---------- Chris@87: fname : filename or file handle Chris@87: If the filename ends in ``.gz``, the file is automatically saved in Chris@87: compressed gzip format. `loadtxt` understands gzipped files Chris@87: transparently. Chris@87: X : array_like Chris@87: Data to be saved to a text file. Chris@87: fmt : str or sequence of strs, optional Chris@87: A single format (%10.5f), a sequence of formats, or a Chris@87: multi-format string, e.g. 'Iteration %d -- %10.5f', in which Chris@87: case `delimiter` is ignored. For complex `X`, the legal options Chris@87: for `fmt` are: Chris@87: a) a single specifier, `fmt='%.4e'`, resulting in numbers formatted Chris@87: like `' (%s+%sj)' % (fmt, fmt)` Chris@87: b) a full string specifying every real and imaginary part, e.g. Chris@87: `' %.4e %+.4j %.4e %+.4j %.4e %+.4j'` for 3 columns Chris@87: c) a list of specifiers, one per column - in this case, the real Chris@87: and imaginary part must have separate specifiers, Chris@87: e.g. `['%.3e + %.3ej', '(%.15e%+.15ej)']` for 2 columns Chris@87: delimiter : str, optional Chris@87: String or character separating columns. Chris@87: newline : str, optional Chris@87: String or character separating lines. Chris@87: Chris@87: .. versionadded:: 1.5.0 Chris@87: header : str, optional Chris@87: String that will be written at the beginning of the file. Chris@87: Chris@87: .. versionadded:: 1.7.0 Chris@87: footer : str, optional Chris@87: String that will be written at the end of the file. Chris@87: Chris@87: .. versionadded:: 1.7.0 Chris@87: comments : str, optional Chris@87: String that will be prepended to the ``header`` and ``footer`` strings, Chris@87: to mark them as comments. Default: '# ', as expected by e.g. Chris@87: ``numpy.loadtxt``. Chris@87: Chris@87: .. versionadded:: 1.7.0 Chris@87: Chris@87: Chris@87: See Also Chris@87: -------- Chris@87: save : Save an array to a binary file in NumPy ``.npy`` format Chris@87: savez : Save several arrays into an uncompressed ``.npz`` archive Chris@87: savez_compressed : Save several arrays into a compressed ``.npz`` archive Chris@87: Chris@87: Notes Chris@87: ----- Chris@87: Further explanation of the `fmt` parameter Chris@87: (``%[flag]width[.precision]specifier``): Chris@87: Chris@87: flags: Chris@87: ``-`` : left justify Chris@87: Chris@87: ``+`` : Forces to precede result with + or -. Chris@87: Chris@87: ``0`` : Left pad the number with zeros instead of space (see width). Chris@87: Chris@87: width: Chris@87: Minimum number of characters to be printed. The value is not truncated Chris@87: if it has more characters. Chris@87: Chris@87: precision: Chris@87: - For integer specifiers (eg. ``d,i,o,x``), the minimum number of Chris@87: digits. Chris@87: - For ``e, E`` and ``f`` specifiers, the number of digits to print Chris@87: after the decimal point. Chris@87: - For ``g`` and ``G``, the maximum number of significant digits. Chris@87: - For ``s``, the maximum number of characters. Chris@87: Chris@87: specifiers: Chris@87: ``c`` : character Chris@87: Chris@87: ``d`` or ``i`` : signed decimal integer Chris@87: Chris@87: ``e`` or ``E`` : scientific notation with ``e`` or ``E``. Chris@87: Chris@87: ``f`` : decimal floating point Chris@87: Chris@87: ``g,G`` : use the shorter of ``e,E`` or ``f`` Chris@87: Chris@87: ``o`` : signed octal Chris@87: Chris@87: ``s`` : string of characters Chris@87: Chris@87: ``u`` : unsigned decimal integer Chris@87: Chris@87: ``x,X`` : unsigned hexadecimal integer Chris@87: Chris@87: This explanation of ``fmt`` is not complete, for an exhaustive Chris@87: specification see [1]_. Chris@87: Chris@87: References Chris@87: ---------- Chris@87: .. [1] `Format Specification Mini-Language Chris@87: `_, Python Documentation. Chris@87: Chris@87: Examples Chris@87: -------- Chris@87: >>> x = y = z = np.arange(0.0,5.0,1.0) Chris@87: >>> np.savetxt('test.out', x, delimiter=',') # X is an array Chris@87: >>> np.savetxt('test.out', (x,y,z)) # x,y,z equal sized 1D arrays Chris@87: >>> np.savetxt('test.out', x, fmt='%1.4e') # use exponential notation Chris@87: Chris@87: """ Chris@87: Chris@87: # Py3 conversions first Chris@87: if isinstance(fmt, bytes): Chris@87: fmt = asstr(fmt) Chris@87: delimiter = asstr(delimiter) Chris@87: Chris@87: own_fh = False Chris@87: if _is_string_like(fname): Chris@87: own_fh = True Chris@87: if fname.endswith('.gz'): Chris@87: import gzip Chris@87: fh = gzip.open(fname, 'wb') Chris@87: else: Chris@87: if sys.version_info[0] >= 3: Chris@87: fh = open(fname, 'wb') Chris@87: else: Chris@87: fh = open(fname, 'w') Chris@87: elif hasattr(fname, 'write'): Chris@87: fh = fname Chris@87: else: Chris@87: raise ValueError('fname must be a string or file handle') Chris@87: Chris@87: try: Chris@87: X = np.asarray(X) Chris@87: Chris@87: # Handle 1-dimensional arrays Chris@87: if X.ndim == 1: Chris@87: # Common case -- 1d array of numbers Chris@87: if X.dtype.names is None: Chris@87: X = np.atleast_2d(X).T Chris@87: ncol = 1 Chris@87: Chris@87: # Complex dtype -- each field indicates a separate column Chris@87: else: Chris@87: ncol = len(X.dtype.descr) Chris@87: else: Chris@87: ncol = X.shape[1] Chris@87: Chris@87: iscomplex_X = np.iscomplexobj(X) Chris@87: # `fmt` can be a string with multiple insertion points or a Chris@87: # list of formats. E.g. '%10.5f\t%10d' or ('%10.5f', '$10d') Chris@87: if type(fmt) in (list, tuple): Chris@87: if len(fmt) != ncol: Chris@87: raise AttributeError('fmt has wrong shape. %s' % str(fmt)) Chris@87: format = asstr(delimiter).join(map(asstr, fmt)) Chris@87: elif isinstance(fmt, str): Chris@87: n_fmt_chars = fmt.count('%') Chris@87: error = ValueError('fmt has wrong number of %% formats: %s' % fmt) Chris@87: if n_fmt_chars == 1: Chris@87: if iscomplex_X: Chris@87: fmt = [' (%s+%sj)' % (fmt, fmt), ] * ncol Chris@87: else: Chris@87: fmt = [fmt, ] * ncol Chris@87: format = delimiter.join(fmt) Chris@87: elif iscomplex_X and n_fmt_chars != (2 * ncol): Chris@87: raise error Chris@87: elif ((not iscomplex_X) and n_fmt_chars != ncol): Chris@87: raise error Chris@87: else: Chris@87: format = fmt Chris@87: else: Chris@87: raise ValueError('invalid fmt: %r' % (fmt,)) Chris@87: Chris@87: if len(header) > 0: Chris@87: header = header.replace('\n', '\n' + comments) Chris@87: fh.write(asbytes(comments + header + newline)) Chris@87: if iscomplex_X: Chris@87: for row in X: Chris@87: row2 = [] Chris@87: for number in row: Chris@87: row2.append(number.real) Chris@87: row2.append(number.imag) Chris@87: fh.write(asbytes(format % tuple(row2) + newline)) Chris@87: else: Chris@87: for row in X: Chris@87: fh.write(asbytes(format % tuple(row) + newline)) Chris@87: if len(footer) > 0: Chris@87: footer = footer.replace('\n', '\n' + comments) Chris@87: fh.write(asbytes(comments + footer + newline)) Chris@87: finally: Chris@87: if own_fh: Chris@87: fh.close() Chris@87: Chris@87: Chris@87: def fromregex(file, regexp, dtype): Chris@87: """ Chris@87: Construct an array from a text file, using regular expression parsing. Chris@87: Chris@87: The returned array is always a structured array, and is constructed from Chris@87: all matches of the regular expression in the file. Groups in the regular Chris@87: expression are converted to fields of the structured array. Chris@87: Chris@87: Parameters Chris@87: ---------- Chris@87: file : str or file Chris@87: File name or file object to read. Chris@87: regexp : str or regexp Chris@87: Regular expression used to parse the file. Chris@87: Groups in the regular expression correspond to fields in the dtype. Chris@87: dtype : dtype or list of dtypes Chris@87: Dtype for the structured array. Chris@87: Chris@87: Returns Chris@87: ------- Chris@87: output : ndarray Chris@87: The output array, containing the part of the content of `file` that Chris@87: was matched by `regexp`. `output` is always a structured array. Chris@87: Chris@87: Raises Chris@87: ------ Chris@87: TypeError Chris@87: When `dtype` is not a valid dtype for a structured array. Chris@87: Chris@87: See Also Chris@87: -------- Chris@87: fromstring, loadtxt Chris@87: Chris@87: Notes Chris@87: ----- Chris@87: Dtypes for structured arrays can be specified in several forms, but all Chris@87: forms specify at least the data type and field name. For details see Chris@87: `doc.structured_arrays`. Chris@87: Chris@87: Examples Chris@87: -------- Chris@87: >>> f = open('test.dat', 'w') Chris@87: >>> f.write("1312 foo\\n1534 bar\\n444 qux") Chris@87: >>> f.close() Chris@87: Chris@87: >>> regexp = r"(\\d+)\\s+(...)" # match [digits, whitespace, anything] Chris@87: >>> output = np.fromregex('test.dat', regexp, Chris@87: ... [('num', np.int64), ('key', 'S3')]) Chris@87: >>> output Chris@87: array([(1312L, 'foo'), (1534L, 'bar'), (444L, 'qux')], Chris@87: dtype=[('num', '>> output['num'] Chris@87: array([1312, 1534, 444], dtype=int64) Chris@87: Chris@87: """ Chris@87: own_fh = False Chris@87: if not hasattr(file, "read"): Chris@87: file = open(file, 'rb') Chris@87: own_fh = True Chris@87: Chris@87: try: Chris@87: if not hasattr(regexp, 'match'): Chris@87: regexp = re.compile(asbytes(regexp)) Chris@87: if not isinstance(dtype, np.dtype): Chris@87: dtype = np.dtype(dtype) Chris@87: Chris@87: seq = regexp.findall(file.read()) Chris@87: if seq and not isinstance(seq[0], tuple): Chris@87: # Only one group is in the regexp. Chris@87: # Create the new array as a single data-type and then Chris@87: # re-interpret as a single-field structured array. Chris@87: newdtype = np.dtype(dtype[dtype.names[0]]) Chris@87: output = np.array(seq, dtype=newdtype) Chris@87: output.dtype = dtype Chris@87: else: Chris@87: output = np.array(seq, dtype=dtype) Chris@87: Chris@87: return output Chris@87: finally: Chris@87: if own_fh: Chris@87: file.close() Chris@87: Chris@87: Chris@87: #####-------------------------------------------------------------------------- Chris@87: #---- --- ASCII functions --- Chris@87: #####-------------------------------------------------------------------------- Chris@87: Chris@87: Chris@87: def genfromtxt(fname, dtype=float, comments='#', delimiter=None, Chris@87: skiprows=0, skip_header=0, skip_footer=0, converters=None, Chris@87: missing='', missing_values=None, filling_values=None, Chris@87: usecols=None, names=None, Chris@87: excludelist=None, deletechars=None, replace_space='_', Chris@87: autostrip=False, case_sensitive=True, defaultfmt="f%i", Chris@87: unpack=None, usemask=False, loose=True, invalid_raise=True): Chris@87: """ Chris@87: Load data from a text file, with missing values handled as specified. Chris@87: Chris@87: Each line past the first `skip_header` lines is split at the `delimiter` Chris@87: character, and characters following the `comments` character are discarded. Chris@87: Chris@87: Parameters Chris@87: ---------- Chris@87: fname : file or str Chris@87: File, filename, or generator to read. If the filename extension is Chris@87: `.gz` or `.bz2`, the file is first decompressed. Note that Chris@87: generators must return byte strings in Python 3k. Chris@87: dtype : dtype, optional Chris@87: Data type of the resulting array. Chris@87: If None, the dtypes will be determined by the contents of each Chris@87: column, individually. Chris@87: comments : str, optional Chris@87: The character used to indicate the start of a comment. Chris@87: All the characters occurring on a line after a comment are discarded Chris@87: delimiter : str, int, or sequence, optional Chris@87: The string used to separate values. By default, any consecutive Chris@87: whitespaces act as delimiter. An integer or sequence of integers Chris@87: can also be provided as width(s) of each field. Chris@87: skip_rows : int, optional Chris@87: `skip_rows` was deprecated in numpy 1.5, and will be removed in Chris@87: numpy 2.0. Please use `skip_header` instead. Chris@87: skip_header : int, optional Chris@87: The number of lines to skip at the beginning of the file. Chris@87: skip_footer : int, optional Chris@87: The number of lines to skip at the end of the file. Chris@87: converters : variable, optional Chris@87: The set of functions that convert the data of a column to a value. Chris@87: The converters can also be used to provide a default value Chris@87: for missing data: ``converters = {3: lambda s: float(s or 0)}``. Chris@87: missing : variable, optional Chris@87: `missing` was deprecated in numpy 1.5, and will be removed in Chris@87: numpy 2.0. Please use `missing_values` instead. Chris@87: missing_values : variable, optional Chris@87: The set of strings corresponding to missing data. Chris@87: filling_values : variable, optional Chris@87: The set of values to be used as default when the data are missing. Chris@87: usecols : sequence, optional Chris@87: Which columns to read, with 0 being the first. For example, Chris@87: ``usecols = (1, 4, 5)`` will extract the 2nd, 5th and 6th columns. Chris@87: names : {None, True, str, sequence}, optional Chris@87: If `names` is True, the field names are read from the first valid line Chris@87: after the first `skip_header` lines. Chris@87: If `names` is a sequence or a single-string of comma-separated names, Chris@87: the names will be used to define the field names in a structured dtype. Chris@87: If `names` is None, the names of the dtype fields will be used, if any. Chris@87: excludelist : sequence, optional Chris@87: A list of names to exclude. This list is appended to the default list Chris@87: ['return','file','print']. Excluded names are appended an underscore: Chris@87: for example, `file` would become `file_`. Chris@87: deletechars : str, optional Chris@87: A string combining invalid characters that must be deleted from the Chris@87: names. Chris@87: defaultfmt : str, optional Chris@87: A format used to define default field names, such as "f%i" or "f_%02i". Chris@87: autostrip : bool, optional Chris@87: Whether to automatically strip white spaces from the variables. Chris@87: replace_space : char, optional Chris@87: Character(s) used in replacement of white spaces in the variables Chris@87: names. By default, use a '_'. Chris@87: case_sensitive : {True, False, 'upper', 'lower'}, optional Chris@87: If True, field names are case sensitive. Chris@87: If False or 'upper', field names are converted to upper case. Chris@87: If 'lower', field names are converted to lower case. Chris@87: unpack : bool, optional Chris@87: If True, the returned array is transposed, so that arguments may be Chris@87: unpacked using ``x, y, z = loadtxt(...)`` Chris@87: usemask : bool, optional Chris@87: If True, return a masked array. Chris@87: If False, return a regular array. Chris@87: loose : bool, optional Chris@87: If True, do not raise errors for invalid values. Chris@87: invalid_raise : bool, optional Chris@87: If True, an exception is raised if an inconsistency is detected in the Chris@87: number of columns. Chris@87: If False, a warning is emitted and the offending lines are skipped. Chris@87: Chris@87: Returns Chris@87: ------- Chris@87: out : ndarray Chris@87: Data read from the text file. If `usemask` is True, this is a Chris@87: masked array. Chris@87: Chris@87: See Also Chris@87: -------- Chris@87: numpy.loadtxt : equivalent function when no data is missing. Chris@87: Chris@87: Notes Chris@87: ----- Chris@87: * When spaces are used as delimiters, or when no delimiter has been given Chris@87: as input, there should not be any missing data between two fields. Chris@87: * When the variables are named (either by a flexible dtype or with `names`, Chris@87: there must not be any header in the file (else a ValueError Chris@87: exception is raised). Chris@87: * Individual values are not stripped of spaces by default. Chris@87: When using a custom converter, make sure the function does remove spaces. Chris@87: Chris@87: References Chris@87: ---------- Chris@87: .. [1] Numpy User Guide, section `I/O with Numpy Chris@87: `_. Chris@87: Chris@87: Examples Chris@87: --------- Chris@87: >>> from StringIO import StringIO Chris@87: >>> import numpy as np Chris@87: Chris@87: Comma delimited file with mixed dtype Chris@87: Chris@87: >>> s = StringIO("1,1.3,abcde") Chris@87: >>> data = np.genfromtxt(s, dtype=[('myint','i8'),('myfloat','f8'), Chris@87: ... ('mystring','S5')], delimiter=",") Chris@87: >>> data Chris@87: array((1, 1.3, 'abcde'), Chris@87: dtype=[('myint', '>> s.seek(0) # needed for StringIO example only Chris@87: >>> data = np.genfromtxt(s, dtype=None, Chris@87: ... names = ['myint','myfloat','mystring'], delimiter=",") Chris@87: >>> data Chris@87: array((1, 1.3, 'abcde'), Chris@87: dtype=[('myint', '>> s.seek(0) Chris@87: >>> data = np.genfromtxt(s, dtype="i8,f8,S5", Chris@87: ... names=['myint','myfloat','mystring'], delimiter=",") Chris@87: >>> data Chris@87: array((1, 1.3, 'abcde'), Chris@87: dtype=[('myint', '>> s = StringIO("11.3abcde") Chris@87: >>> data = np.genfromtxt(s, dtype=None, names=['intvar','fltvar','strvar'], Chris@87: ... delimiter=[1,3,5]) Chris@87: >>> data Chris@87: array((1, 1.3, 'abcde'), Chris@87: dtype=[('intvar', ' nbcols): Chris@87: descr = dtype.descr Chris@87: dtype = np.dtype([descr[_] for _ in usecols]) Chris@87: names = list(dtype.names) Chris@87: # If `names` is not None, update the names Chris@87: elif (names is not None) and (len(names) > nbcols): Chris@87: names = [names[_] for _ in usecols] Chris@87: elif (names is not None) and (dtype is not None): Chris@87: names = list(dtype.names) Chris@87: Chris@87: # Process the missing values ............................... Chris@87: # Rename missing_values for convenience Chris@87: user_missing_values = missing_values or () Chris@87: Chris@87: # Define the list of missing_values (one column: one list) Chris@87: missing_values = [list([asbytes('')]) for _ in range(nbcols)] Chris@87: Chris@87: # We have a dictionary: process it field by field Chris@87: if isinstance(user_missing_values, dict): Chris@87: # Loop on the items Chris@87: for (key, val) in user_missing_values.items(): Chris@87: # Is the key a string ? Chris@87: if _is_string_like(key): Chris@87: try: Chris@87: # Transform it into an integer Chris@87: key = names.index(key) Chris@87: except ValueError: Chris@87: # We couldn't find it: the name must have been dropped Chris@87: continue Chris@87: # Redefine the key as needed if it's a column number Chris@87: if usecols: Chris@87: try: Chris@87: key = usecols.index(key) Chris@87: except ValueError: Chris@87: pass Chris@87: # Transform the value as a list of string Chris@87: if isinstance(val, (list, tuple)): Chris@87: val = [str(_) for _ in val] Chris@87: else: Chris@87: val = [str(val), ] Chris@87: # Add the value(s) to the current list of missing Chris@87: if key is None: Chris@87: # None acts as default Chris@87: for miss in missing_values: Chris@87: miss.extend(val) Chris@87: else: Chris@87: missing_values[key].extend(val) Chris@87: # We have a sequence : each item matches a column Chris@87: elif isinstance(user_missing_values, (list, tuple)): Chris@87: for (value, entry) in zip(user_missing_values, missing_values): Chris@87: value = str(value) Chris@87: if value not in entry: Chris@87: entry.append(value) Chris@87: # We have a string : apply it to all entries Chris@87: elif isinstance(user_missing_values, bytes): Chris@87: user_value = user_missing_values.split(asbytes(",")) Chris@87: for entry in missing_values: Chris@87: entry.extend(user_value) Chris@87: # We have something else: apply it to all entries Chris@87: else: Chris@87: for entry in missing_values: Chris@87: entry.extend([str(user_missing_values)]) Chris@87: Chris@87: # Process the deprecated `missing` Chris@87: if missing != asbytes(''): Chris@87: warnings.warn( Chris@87: "The use of `missing` is deprecated, it will be removed in " Chris@87: "Numpy 2.0.\nPlease use `missing_values` instead.", Chris@87: DeprecationWarning) Chris@87: values = [str(_) for _ in missing.split(asbytes(","))] Chris@87: for entry in missing_values: Chris@87: entry.extend(values) Chris@87: Chris@87: # Process the filling_values ............................... Chris@87: # Rename the input for convenience Chris@87: user_filling_values = filling_values Chris@87: if user_filling_values is None: Chris@87: user_filling_values = [] Chris@87: # Define the default Chris@87: filling_values = [None] * nbcols Chris@87: # We have a dictionary : update each entry individually Chris@87: if isinstance(user_filling_values, dict): Chris@87: for (key, val) in user_filling_values.items(): Chris@87: if _is_string_like(key): Chris@87: try: Chris@87: # Transform it into an integer Chris@87: key = names.index(key) Chris@87: except ValueError: Chris@87: # We couldn't find it: the name must have been dropped, Chris@87: continue Chris@87: # Redefine the key if it's a column number and usecols is defined Chris@87: if usecols: Chris@87: try: Chris@87: key = usecols.index(key) Chris@87: except ValueError: Chris@87: pass Chris@87: # Add the value to the list Chris@87: filling_values[key] = val Chris@87: # We have a sequence : update on a one-to-one basis Chris@87: elif isinstance(user_filling_values, (list, tuple)): Chris@87: n = len(user_filling_values) Chris@87: if (n <= nbcols): Chris@87: filling_values[:n] = user_filling_values Chris@87: else: Chris@87: filling_values = user_filling_values[:nbcols] Chris@87: # We have something else : use it for all entries Chris@87: else: Chris@87: filling_values = [user_filling_values] * nbcols Chris@87: Chris@87: # Initialize the converters ................................ Chris@87: if dtype is None: Chris@87: # Note: we can't use a [...]*nbcols, as we would have 3 times the same Chris@87: # ... converter, instead of 3 different converters. Chris@87: converters = [StringConverter(None, missing_values=miss, default=fill) Chris@87: for (miss, fill) in zip(missing_values, filling_values)] Chris@87: else: Chris@87: dtype_flat = flatten_dtype(dtype, flatten_base=True) Chris@87: # Initialize the converters Chris@87: if len(dtype_flat) > 1: Chris@87: # Flexible type : get a converter from each dtype Chris@87: zipit = zip(dtype_flat, missing_values, filling_values) Chris@87: converters = [StringConverter(dt, locked=True, Chris@87: missing_values=miss, default=fill) Chris@87: for (dt, miss, fill) in zipit] Chris@87: else: Chris@87: # Set to a default converter (but w/ different missing values) Chris@87: zipit = zip(missing_values, filling_values) Chris@87: converters = [StringConverter(dtype, locked=True, Chris@87: missing_values=miss, default=fill) Chris@87: for (miss, fill) in zipit] Chris@87: # Update the converters to use the user-defined ones Chris@87: uc_update = [] Chris@87: for (j, conv) in user_converters.items(): Chris@87: # If the converter is specified by column names, use the index instead Chris@87: if _is_string_like(j): Chris@87: try: Chris@87: j = names.index(j) Chris@87: i = j Chris@87: except ValueError: Chris@87: continue Chris@87: elif usecols: Chris@87: try: Chris@87: i = usecols.index(j) Chris@87: except ValueError: Chris@87: # Unused converter specified Chris@87: continue Chris@87: else: Chris@87: i = j Chris@87: # Find the value to test - first_line is not filtered by usecols: Chris@87: if len(first_line): Chris@87: testing_value = first_values[j] Chris@87: else: Chris@87: testing_value = None Chris@87: converters[i].update(conv, locked=True, Chris@87: testing_value=testing_value, Chris@87: default=filling_values[i], Chris@87: missing_values=missing_values[i],) Chris@87: uc_update.append((i, conv)) Chris@87: # Make sure we have the corrected keys in user_converters... Chris@87: user_converters.update(uc_update) Chris@87: Chris@87: # Fixme: possible error as following variable never used. Chris@87: #miss_chars = [_.missing_values for _ in converters] Chris@87: Chris@87: # Initialize the output lists ... Chris@87: # ... rows Chris@87: rows = [] Chris@87: append_to_rows = rows.append Chris@87: # ... masks Chris@87: if usemask: Chris@87: masks = [] Chris@87: append_to_masks = masks.append Chris@87: # ... invalid Chris@87: invalid = [] Chris@87: append_to_invalid = invalid.append Chris@87: Chris@87: # Parse each line Chris@87: for (i, line) in enumerate(itertools.chain([first_line, ], fhd)): Chris@87: values = split_line(line) Chris@87: nbvalues = len(values) Chris@87: # Skip an empty line Chris@87: if nbvalues == 0: Chris@87: continue Chris@87: # Select only the columns we need Chris@87: if usecols: Chris@87: try: Chris@87: values = [values[_] for _ in usecols] Chris@87: except IndexError: Chris@87: append_to_invalid((i + skip_header + 1, nbvalues)) Chris@87: continue Chris@87: elif nbvalues != nbcols: Chris@87: append_to_invalid((i + skip_header + 1, nbvalues)) Chris@87: continue Chris@87: # Store the values Chris@87: append_to_rows(tuple(values)) Chris@87: if usemask: Chris@87: append_to_masks(tuple([v.strip() in m Chris@87: for (v, m) in zip(values, missing_values)])) Chris@87: Chris@87: if own_fhd: Chris@87: fhd.close() Chris@87: Chris@87: # Upgrade the converters (if needed) Chris@87: if dtype is None: Chris@87: for (i, converter) in enumerate(converters): Chris@87: current_column = [itemgetter(i)(_m) for _m in rows] Chris@87: try: Chris@87: converter.iterupgrade(current_column) Chris@87: except ConverterLockError: Chris@87: errmsg = "Converter #%i is locked and cannot be upgraded: " % i Chris@87: current_column = map(itemgetter(i), rows) Chris@87: for (j, value) in enumerate(current_column): Chris@87: try: Chris@87: converter.upgrade(value) Chris@87: except (ConverterError, ValueError): Chris@87: errmsg += "(occurred line #%i for value '%s')" Chris@87: errmsg %= (j + 1 + skip_header, value) Chris@87: raise ConverterError(errmsg) Chris@87: Chris@87: # Check that we don't have invalid values Chris@87: nbinvalid = len(invalid) Chris@87: if nbinvalid > 0: Chris@87: nbrows = len(rows) + nbinvalid - skip_footer Chris@87: # Construct the error message Chris@87: template = " Line #%%i (got %%i columns instead of %i)" % nbcols Chris@87: if skip_footer > 0: Chris@87: nbinvalid_skipped = len([_ for _ in invalid Chris@87: if _[0] > nbrows + skip_header]) Chris@87: invalid = invalid[:nbinvalid - nbinvalid_skipped] Chris@87: skip_footer -= nbinvalid_skipped Chris@87: # Chris@87: # nbrows -= skip_footer Chris@87: # errmsg = [template % (i, nb) Chris@87: # for (i, nb) in invalid if i < nbrows] Chris@87: # else: Chris@87: errmsg = [template % (i, nb) Chris@87: for (i, nb) in invalid] Chris@87: if len(errmsg): Chris@87: errmsg.insert(0, "Some errors were detected !") Chris@87: errmsg = "\n".join(errmsg) Chris@87: # Raise an exception ? Chris@87: if invalid_raise: Chris@87: raise ValueError(errmsg) Chris@87: # Issue a warning ? Chris@87: else: Chris@87: warnings.warn(errmsg, ConversionWarning) Chris@87: Chris@87: # Strip the last skip_footer data Chris@87: if skip_footer > 0: Chris@87: rows = rows[:-skip_footer] Chris@87: if usemask: Chris@87: masks = masks[:-skip_footer] Chris@87: Chris@87: # Convert each value according to the converter: Chris@87: # We want to modify the list in place to avoid creating a new one... Chris@87: if loose: Chris@87: rows = list( Chris@87: zip(*[[conv._loose_call(_r) for _r in map(itemgetter(i), rows)] Chris@87: for (i, conv) in enumerate(converters)])) Chris@87: else: Chris@87: rows = list( Chris@87: zip(*[[conv._strict_call(_r) for _r in map(itemgetter(i), rows)] Chris@87: for (i, conv) in enumerate(converters)])) Chris@87: Chris@87: # Reset the dtype Chris@87: data = rows Chris@87: if dtype is None: Chris@87: # Get the dtypes from the types of the converters Chris@87: column_types = [conv.type for conv in converters] Chris@87: # Find the columns with strings... Chris@87: strcolidx = [i for (i, v) in enumerate(column_types) Chris@87: if v in (type('S'), np.string_)] Chris@87: # ... and take the largest number of chars. Chris@87: for i in strcolidx: Chris@87: column_types[i] = "|S%i" % max(len(row[i]) for row in data) Chris@87: # Chris@87: if names is None: Chris@87: # If the dtype is uniform, don't define names, else use '' Chris@87: base = set([c.type for c in converters if c._checked]) Chris@87: if len(base) == 1: Chris@87: (ddtype, mdtype) = (list(base)[0], np.bool) Chris@87: else: Chris@87: ddtype = [(defaultfmt % i, dt) Chris@87: for (i, dt) in enumerate(column_types)] Chris@87: if usemask: Chris@87: mdtype = [(defaultfmt % i, np.bool) Chris@87: for (i, dt) in enumerate(column_types)] Chris@87: else: Chris@87: ddtype = list(zip(names, column_types)) Chris@87: mdtype = list(zip(names, [np.bool] * len(column_types))) Chris@87: output = np.array(data, dtype=ddtype) Chris@87: if usemask: Chris@87: outputmask = np.array(masks, dtype=mdtype) Chris@87: else: Chris@87: # Overwrite the initial dtype names if needed Chris@87: if names and dtype.names: Chris@87: dtype.names = names Chris@87: # Case 1. We have a structured type Chris@87: if len(dtype_flat) > 1: Chris@87: # Nested dtype, eg [('a', int), ('b', [('b0', int), ('b1', 'f4')])] Chris@87: # First, create the array using a flattened dtype: Chris@87: # [('a', int), ('b1', int), ('b2', float)] Chris@87: # Then, view the array using the specified dtype. Chris@87: if 'O' in (_.char for _ in dtype_flat): Chris@87: if has_nested_fields(dtype): Chris@87: raise NotImplementedError( Chris@87: "Nested fields involving objects are not supported...") Chris@87: else: Chris@87: output = np.array(data, dtype=dtype) Chris@87: else: Chris@87: rows = np.array(data, dtype=[('', _) for _ in dtype_flat]) Chris@87: output = rows.view(dtype) Chris@87: # Now, process the rowmasks the same way Chris@87: if usemask: Chris@87: rowmasks = np.array( Chris@87: masks, dtype=np.dtype([('', np.bool) for t in dtype_flat])) Chris@87: # Construct the new dtype Chris@87: mdtype = make_mask_descr(dtype) Chris@87: outputmask = rowmasks.view(mdtype) Chris@87: # Case #2. We have a basic dtype Chris@87: else: Chris@87: # We used some user-defined converters Chris@87: if user_converters: Chris@87: ishomogeneous = True Chris@87: descr = [] Chris@87: for i, ttype in enumerate([conv.type for conv in converters]): Chris@87: # Keep the dtype of the current converter Chris@87: if i in user_converters: Chris@87: ishomogeneous &= (ttype == dtype.type) Chris@87: if ttype == np.string_: Chris@87: ttype = "|S%i" % max(len(row[i]) for row in data) Chris@87: descr.append(('', ttype)) Chris@87: else: Chris@87: descr.append(('', dtype)) Chris@87: # So we changed the dtype ? Chris@87: if not ishomogeneous: Chris@87: # We have more than one field Chris@87: if len(descr) > 1: Chris@87: dtype = np.dtype(descr) Chris@87: # We have only one field: drop the name if not needed. Chris@87: else: Chris@87: dtype = np.dtype(ttype) Chris@87: # Chris@87: output = np.array(data, dtype) Chris@87: if usemask: Chris@87: if dtype.names: Chris@87: mdtype = [(_, np.bool) for _ in dtype.names] Chris@87: else: Chris@87: mdtype = np.bool Chris@87: outputmask = np.array(masks, dtype=mdtype) Chris@87: # Try to take care of the missing data we missed Chris@87: names = output.dtype.names Chris@87: if usemask and names: Chris@87: for (name, conv) in zip(names or (), converters): Chris@87: missing_values = [conv(_) for _ in conv.missing_values Chris@87: if _ != asbytes('')] Chris@87: for mval in missing_values: Chris@87: outputmask[name] |= (output[name] == mval) Chris@87: # Construct the final array Chris@87: if usemask: Chris@87: output = output.view(MaskedArray) Chris@87: output._mask = outputmask Chris@87: if unpack: Chris@87: return output.squeeze().T Chris@87: return output.squeeze() Chris@87: Chris@87: Chris@87: def ndfromtxt(fname, **kwargs): Chris@87: """ Chris@87: Load ASCII data stored in a file and return it as a single array. Chris@87: Chris@87: Parameters Chris@87: ---------- Chris@87: fname, kwargs : For a description of input parameters, see `genfromtxt`. Chris@87: Chris@87: See Also Chris@87: -------- Chris@87: numpy.genfromtxt : generic function. Chris@87: Chris@87: """ Chris@87: kwargs['usemask'] = False Chris@87: return genfromtxt(fname, **kwargs) Chris@87: Chris@87: Chris@87: def mafromtxt(fname, **kwargs): Chris@87: """ Chris@87: Load ASCII data stored in a text file and return a masked array. Chris@87: Chris@87: Parameters Chris@87: ---------- Chris@87: fname, kwargs : For a description of input parameters, see `genfromtxt`. Chris@87: Chris@87: See Also Chris@87: -------- Chris@87: numpy.genfromtxt : generic function to load ASCII data. Chris@87: Chris@87: """ Chris@87: kwargs['usemask'] = True Chris@87: return genfromtxt(fname, **kwargs) Chris@87: Chris@87: Chris@87: def recfromtxt(fname, **kwargs): Chris@87: """ Chris@87: Load ASCII data from a file and return it in a record array. Chris@87: Chris@87: If ``usemask=False`` a standard `recarray` is returned, Chris@87: if ``usemask=True`` a MaskedRecords array is returned. Chris@87: Chris@87: Parameters Chris@87: ---------- Chris@87: fname, kwargs : For a description of input parameters, see `genfromtxt`. Chris@87: Chris@87: See Also Chris@87: -------- Chris@87: numpy.genfromtxt : generic function Chris@87: Chris@87: Notes Chris@87: ----- Chris@87: By default, `dtype` is None, which means that the data-type of the output Chris@87: array will be determined from the data. Chris@87: Chris@87: """ Chris@87: kwargs.setdefault("dtype", None) Chris@87: usemask = kwargs.get('usemask', False) Chris@87: output = genfromtxt(fname, **kwargs) Chris@87: if usemask: Chris@87: from numpy.ma.mrecords import MaskedRecords Chris@87: output = output.view(MaskedRecords) Chris@87: else: Chris@87: output = output.view(np.recarray) Chris@87: return output Chris@87: Chris@87: Chris@87: def recfromcsv(fname, **kwargs): Chris@87: """ Chris@87: Load ASCII data stored in a comma-separated file. Chris@87: Chris@87: The returned array is a record array (if ``usemask=False``, see Chris@87: `recarray`) or a masked record array (if ``usemask=True``, Chris@87: see `ma.mrecords.MaskedRecords`). Chris@87: Chris@87: Parameters Chris@87: ---------- Chris@87: fname, kwargs : For a description of input parameters, see `genfromtxt`. Chris@87: Chris@87: See Also Chris@87: -------- Chris@87: numpy.genfromtxt : generic function to load ASCII data. Chris@87: Chris@87: Notes Chris@87: ----- Chris@87: By default, `dtype` is None, which means that the data-type of the output Chris@87: array will be determined from the data. Chris@87: Chris@87: """ Chris@87: # Set default kwargs for genfromtxt as relevant to csv import. Chris@87: kwargs.setdefault("case_sensitive", "lower") Chris@87: kwargs.setdefault("names", True) Chris@87: kwargs.setdefault("delimiter", ",") Chris@87: kwargs.setdefault("dtype", None) Chris@87: output = genfromtxt(fname, **kwargs) Chris@87: Chris@87: usemask = kwargs.get("usemask", False) Chris@87: if usemask: Chris@87: from numpy.ma.mrecords import MaskedRecords Chris@87: output = output.view(MaskedRecords) Chris@87: else: Chris@87: output = output.view(np.recarray) Chris@87: return output