Chris@87
|
1 from __future__ import division, absolute_import, print_function
|
Chris@87
|
2
|
Chris@87
|
3 import sys
|
Chris@87
|
4 import os
|
Chris@87
|
5 import re
|
Chris@87
|
6 import itertools
|
Chris@87
|
7 import warnings
|
Chris@87
|
8 import weakref
|
Chris@87
|
9 from operator import itemgetter
|
Chris@87
|
10
|
Chris@87
|
11 import numpy as np
|
Chris@87
|
12 from . import format
|
Chris@87
|
13 from ._datasource import DataSource
|
Chris@87
|
14 from ._compiled_base import packbits, unpackbits
|
Chris@87
|
15 from ._iotools import (
|
Chris@87
|
16 LineSplitter, NameValidator, StringConverter, ConverterError,
|
Chris@87
|
17 ConverterLockError, ConversionWarning, _is_string_like, has_nested_fields,
|
Chris@87
|
18 flatten_dtype, easy_dtype, _bytes_to_name
|
Chris@87
|
19 )
|
Chris@87
|
20
|
Chris@87
|
21 from numpy.compat import (
|
Chris@87
|
22 asbytes, asstr, asbytes_nested, bytes, basestring, unicode
|
Chris@87
|
23 )
|
Chris@87
|
24
|
Chris@87
|
25 if sys.version_info[0] >= 3:
|
Chris@87
|
26 import pickle
|
Chris@87
|
27 else:
|
Chris@87
|
28 import cPickle as pickle
|
Chris@87
|
29 from future_builtins import map
|
Chris@87
|
30
|
Chris@87
|
31 loads = pickle.loads
|
Chris@87
|
32
|
Chris@87
|
33 __all__ = [
|
Chris@87
|
34 'savetxt', 'loadtxt', 'genfromtxt', 'ndfromtxt', 'mafromtxt',
|
Chris@87
|
35 'recfromtxt', 'recfromcsv', 'load', 'loads', 'save', 'savez',
|
Chris@87
|
36 'savez_compressed', 'packbits', 'unpackbits', 'fromregex', 'DataSource'
|
Chris@87
|
37 ]
|
Chris@87
|
38
|
Chris@87
|
39
|
Chris@87
|
40 def seek_gzip_factory(f):
|
Chris@87
|
41 """Use this factory to produce the class so that we can do a lazy
|
Chris@87
|
42 import on gzip.
|
Chris@87
|
43
|
Chris@87
|
44 """
|
Chris@87
|
45 import gzip
|
Chris@87
|
46
|
Chris@87
|
47 class GzipFile(gzip.GzipFile):
|
Chris@87
|
48
|
Chris@87
|
49 def seek(self, offset, whence=0):
|
Chris@87
|
50 # figure out new position (we can only seek forwards)
|
Chris@87
|
51 if whence == 1:
|
Chris@87
|
52 offset = self.offset + offset
|
Chris@87
|
53
|
Chris@87
|
54 if whence not in [0, 1]:
|
Chris@87
|
55 raise IOError("Illegal argument")
|
Chris@87
|
56
|
Chris@87
|
57 if offset < self.offset:
|
Chris@87
|
58 # for negative seek, rewind and do positive seek
|
Chris@87
|
59 self.rewind()
|
Chris@87
|
60 count = offset - self.offset
|
Chris@87
|
61 for i in range(count // 1024):
|
Chris@87
|
62 self.read(1024)
|
Chris@87
|
63 self.read(count % 1024)
|
Chris@87
|
64
|
Chris@87
|
65 def tell(self):
|
Chris@87
|
66 return self.offset
|
Chris@87
|
67
|
Chris@87
|
68 if isinstance(f, str):
|
Chris@87
|
69 f = GzipFile(f)
|
Chris@87
|
70 elif isinstance(f, gzip.GzipFile):
|
Chris@87
|
71 # cast to our GzipFile if its already a gzip.GzipFile
|
Chris@87
|
72
|
Chris@87
|
73 try:
|
Chris@87
|
74 name = f.name
|
Chris@87
|
75 except AttributeError:
|
Chris@87
|
76 # Backward compatibility for <= 2.5
|
Chris@87
|
77 name = f.filename
|
Chris@87
|
78 mode = f.mode
|
Chris@87
|
79
|
Chris@87
|
80 f = GzipFile(fileobj=f.fileobj, filename=name)
|
Chris@87
|
81 f.mode = mode
|
Chris@87
|
82
|
Chris@87
|
83 return f
|
Chris@87
|
84
|
Chris@87
|
85
|
Chris@87
|
86 class BagObj(object):
|
Chris@87
|
87 """
|
Chris@87
|
88 BagObj(obj)
|
Chris@87
|
89
|
Chris@87
|
90 Convert attribute look-ups to getitems on the object passed in.
|
Chris@87
|
91
|
Chris@87
|
92 Parameters
|
Chris@87
|
93 ----------
|
Chris@87
|
94 obj : class instance
|
Chris@87
|
95 Object on which attribute look-up is performed.
|
Chris@87
|
96
|
Chris@87
|
97 Examples
|
Chris@87
|
98 --------
|
Chris@87
|
99 >>> from numpy.lib.npyio import BagObj as BO
|
Chris@87
|
100 >>> class BagDemo(object):
|
Chris@87
|
101 ... def __getitem__(self, key): # An instance of BagObj(BagDemo)
|
Chris@87
|
102 ... # will call this method when any
|
Chris@87
|
103 ... # attribute look-up is required
|
Chris@87
|
104 ... result = "Doesn't matter what you want, "
|
Chris@87
|
105 ... return result + "you're gonna get this"
|
Chris@87
|
106 ...
|
Chris@87
|
107 >>> demo_obj = BagDemo()
|
Chris@87
|
108 >>> bagobj = BO(demo_obj)
|
Chris@87
|
109 >>> bagobj.hello_there
|
Chris@87
|
110 "Doesn't matter what you want, you're gonna get this"
|
Chris@87
|
111 >>> bagobj.I_can_be_anything
|
Chris@87
|
112 "Doesn't matter what you want, you're gonna get this"
|
Chris@87
|
113
|
Chris@87
|
114 """
|
Chris@87
|
115
|
Chris@87
|
116 def __init__(self, obj):
|
Chris@87
|
117 # Use weakref to make NpzFile objects collectable by refcount
|
Chris@87
|
118 self._obj = weakref.proxy(obj)
|
Chris@87
|
119
|
Chris@87
|
120 def __getattribute__(self, key):
|
Chris@87
|
121 try:
|
Chris@87
|
122 return object.__getattribute__(self, '_obj')[key]
|
Chris@87
|
123 except KeyError:
|
Chris@87
|
124 raise AttributeError(key)
|
Chris@87
|
125
|
Chris@87
|
126
|
Chris@87
|
127 def zipfile_factory(*args, **kwargs):
|
Chris@87
|
128 import zipfile
|
Chris@87
|
129 kwargs['allowZip64'] = True
|
Chris@87
|
130 return zipfile.ZipFile(*args, **kwargs)
|
Chris@87
|
131
|
Chris@87
|
132
|
Chris@87
|
133 class NpzFile(object):
|
Chris@87
|
134 """
|
Chris@87
|
135 NpzFile(fid)
|
Chris@87
|
136
|
Chris@87
|
137 A dictionary-like object with lazy-loading of files in the zipped
|
Chris@87
|
138 archive provided on construction.
|
Chris@87
|
139
|
Chris@87
|
140 `NpzFile` is used to load files in the NumPy ``.npz`` data archive
|
Chris@87
|
141 format. It assumes that files in the archive have a ``.npy`` extension,
|
Chris@87
|
142 other files are ignored.
|
Chris@87
|
143
|
Chris@87
|
144 The arrays and file strings are lazily loaded on either
|
Chris@87
|
145 getitem access using ``obj['key']`` or attribute lookup using
|
Chris@87
|
146 ``obj.f.key``. A list of all files (without ``.npy`` extensions) can
|
Chris@87
|
147 be obtained with ``obj.files`` and the ZipFile object itself using
|
Chris@87
|
148 ``obj.zip``.
|
Chris@87
|
149
|
Chris@87
|
150 Attributes
|
Chris@87
|
151 ----------
|
Chris@87
|
152 files : list of str
|
Chris@87
|
153 List of all files in the archive with a ``.npy`` extension.
|
Chris@87
|
154 zip : ZipFile instance
|
Chris@87
|
155 The ZipFile object initialized with the zipped archive.
|
Chris@87
|
156 f : BagObj instance
|
Chris@87
|
157 An object on which attribute can be performed as an alternative
|
Chris@87
|
158 to getitem access on the `NpzFile` instance itself.
|
Chris@87
|
159
|
Chris@87
|
160 Parameters
|
Chris@87
|
161 ----------
|
Chris@87
|
162 fid : file or str
|
Chris@87
|
163 The zipped archive to open. This is either a file-like object
|
Chris@87
|
164 or a string containing the path to the archive.
|
Chris@87
|
165 own_fid : bool, optional
|
Chris@87
|
166 Whether NpzFile should close the file handle.
|
Chris@87
|
167 Requires that `fid` is a file-like object.
|
Chris@87
|
168
|
Chris@87
|
169 Examples
|
Chris@87
|
170 --------
|
Chris@87
|
171 >>> from tempfile import TemporaryFile
|
Chris@87
|
172 >>> outfile = TemporaryFile()
|
Chris@87
|
173 >>> x = np.arange(10)
|
Chris@87
|
174 >>> y = np.sin(x)
|
Chris@87
|
175 >>> np.savez(outfile, x=x, y=y)
|
Chris@87
|
176 >>> outfile.seek(0)
|
Chris@87
|
177
|
Chris@87
|
178 >>> npz = np.load(outfile)
|
Chris@87
|
179 >>> isinstance(npz, np.lib.io.NpzFile)
|
Chris@87
|
180 True
|
Chris@87
|
181 >>> npz.files
|
Chris@87
|
182 ['y', 'x']
|
Chris@87
|
183 >>> npz['x'] # getitem access
|
Chris@87
|
184 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
|
Chris@87
|
185 >>> npz.f.x # attribute lookup
|
Chris@87
|
186 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
|
Chris@87
|
187
|
Chris@87
|
188 """
|
Chris@87
|
189
|
Chris@87
|
190 def __init__(self, fid, own_fid=False):
|
Chris@87
|
191 # Import is postponed to here since zipfile depends on gzip, an
|
Chris@87
|
192 # optional component of the so-called standard library.
|
Chris@87
|
193 _zip = zipfile_factory(fid)
|
Chris@87
|
194 self._files = _zip.namelist()
|
Chris@87
|
195 self.files = []
|
Chris@87
|
196 for x in self._files:
|
Chris@87
|
197 if x.endswith('.npy'):
|
Chris@87
|
198 self.files.append(x[:-4])
|
Chris@87
|
199 else:
|
Chris@87
|
200 self.files.append(x)
|
Chris@87
|
201 self.zip = _zip
|
Chris@87
|
202 self.f = BagObj(self)
|
Chris@87
|
203 if own_fid:
|
Chris@87
|
204 self.fid = fid
|
Chris@87
|
205 else:
|
Chris@87
|
206 self.fid = None
|
Chris@87
|
207
|
Chris@87
|
208 def __enter__(self):
|
Chris@87
|
209 return self
|
Chris@87
|
210
|
Chris@87
|
211 def __exit__(self, exc_type, exc_value, traceback):
|
Chris@87
|
212 self.close()
|
Chris@87
|
213
|
Chris@87
|
214 def close(self):
|
Chris@87
|
215 """
|
Chris@87
|
216 Close the file.
|
Chris@87
|
217
|
Chris@87
|
218 """
|
Chris@87
|
219 if self.zip is not None:
|
Chris@87
|
220 self.zip.close()
|
Chris@87
|
221 self.zip = None
|
Chris@87
|
222 if self.fid is not None:
|
Chris@87
|
223 self.fid.close()
|
Chris@87
|
224 self.fid = None
|
Chris@87
|
225 self.f = None # break reference cycle
|
Chris@87
|
226
|
Chris@87
|
227 def __del__(self):
|
Chris@87
|
228 self.close()
|
Chris@87
|
229
|
Chris@87
|
230 def __getitem__(self, key):
|
Chris@87
|
231 # FIXME: This seems like it will copy strings around
|
Chris@87
|
232 # more than is strictly necessary. The zipfile
|
Chris@87
|
233 # will read the string and then
|
Chris@87
|
234 # the format.read_array will copy the string
|
Chris@87
|
235 # to another place in memory.
|
Chris@87
|
236 # It would be better if the zipfile could read
|
Chris@87
|
237 # (or at least uncompress) the data
|
Chris@87
|
238 # directly into the array memory.
|
Chris@87
|
239 member = 0
|
Chris@87
|
240 if key in self._files:
|
Chris@87
|
241 member = 1
|
Chris@87
|
242 elif key in self.files:
|
Chris@87
|
243 member = 1
|
Chris@87
|
244 key += '.npy'
|
Chris@87
|
245 if member:
|
Chris@87
|
246 bytes = self.zip.open(key)
|
Chris@87
|
247 magic = bytes.read(len(format.MAGIC_PREFIX))
|
Chris@87
|
248 bytes.close()
|
Chris@87
|
249 if magic == format.MAGIC_PREFIX:
|
Chris@87
|
250 bytes = self.zip.open(key)
|
Chris@87
|
251 return format.read_array(bytes)
|
Chris@87
|
252 else:
|
Chris@87
|
253 return self.zip.read(key)
|
Chris@87
|
254 else:
|
Chris@87
|
255 raise KeyError("%s is not a file in the archive" % key)
|
Chris@87
|
256
|
Chris@87
|
257 def __iter__(self):
|
Chris@87
|
258 return iter(self.files)
|
Chris@87
|
259
|
Chris@87
|
260 def items(self):
|
Chris@87
|
261 """
|
Chris@87
|
262 Return a list of tuples, with each tuple (filename, array in file).
|
Chris@87
|
263
|
Chris@87
|
264 """
|
Chris@87
|
265 return [(f, self[f]) for f in self.files]
|
Chris@87
|
266
|
Chris@87
|
267 def iteritems(self):
|
Chris@87
|
268 """Generator that returns tuples (filename, array in file)."""
|
Chris@87
|
269 for f in self.files:
|
Chris@87
|
270 yield (f, self[f])
|
Chris@87
|
271
|
Chris@87
|
272 def keys(self):
|
Chris@87
|
273 """Return files in the archive with a ``.npy`` extension."""
|
Chris@87
|
274 return self.files
|
Chris@87
|
275
|
Chris@87
|
276 def iterkeys(self):
|
Chris@87
|
277 """Return an iterator over the files in the archive."""
|
Chris@87
|
278 return self.__iter__()
|
Chris@87
|
279
|
Chris@87
|
280 def __contains__(self, key):
|
Chris@87
|
281 return self.files.__contains__(key)
|
Chris@87
|
282
|
Chris@87
|
283
|
Chris@87
|
284 def load(file, mmap_mode=None):
|
Chris@87
|
285 """
|
Chris@87
|
286 Load arrays or pickled objects from ``.npy``, ``.npz`` or pickled files.
|
Chris@87
|
287
|
Chris@87
|
288 Parameters
|
Chris@87
|
289 ----------
|
Chris@87
|
290 file : file-like object or string
|
Chris@87
|
291 The file to read. File-like objects must support the
|
Chris@87
|
292 ``seek()`` and ``read()`` methods. Pickled files require that the
|
Chris@87
|
293 file-like object support the ``readline()`` method as well.
|
Chris@87
|
294 mmap_mode : {None, 'r+', 'r', 'w+', 'c'}, optional
|
Chris@87
|
295 If not None, then memory-map the file, using the given mode (see
|
Chris@87
|
296 `numpy.memmap` for a detailed description of the modes). A
|
Chris@87
|
297 memory-mapped array is kept on disk. However, it can be accessed
|
Chris@87
|
298 and sliced like any ndarray. Memory mapping is especially useful
|
Chris@87
|
299 for accessing small fragments of large files without reading the
|
Chris@87
|
300 entire file into memory.
|
Chris@87
|
301
|
Chris@87
|
302 Returns
|
Chris@87
|
303 -------
|
Chris@87
|
304 result : array, tuple, dict, etc.
|
Chris@87
|
305 Data stored in the file. For ``.npz`` files, the returned instance
|
Chris@87
|
306 of NpzFile class must be closed to avoid leaking file descriptors.
|
Chris@87
|
307
|
Chris@87
|
308 Raises
|
Chris@87
|
309 ------
|
Chris@87
|
310 IOError
|
Chris@87
|
311 If the input file does not exist or cannot be read.
|
Chris@87
|
312
|
Chris@87
|
313 See Also
|
Chris@87
|
314 --------
|
Chris@87
|
315 save, savez, savez_compressed, loadtxt
|
Chris@87
|
316 memmap : Create a memory-map to an array stored in a file on disk.
|
Chris@87
|
317
|
Chris@87
|
318 Notes
|
Chris@87
|
319 -----
|
Chris@87
|
320 - If the file contains pickle data, then whatever object is stored
|
Chris@87
|
321 in the pickle is returned.
|
Chris@87
|
322 - If the file is a ``.npy`` file, then a single array is returned.
|
Chris@87
|
323 - If the file is a ``.npz`` file, then a dictionary-like object is
|
Chris@87
|
324 returned, containing ``{filename: array}`` key-value pairs, one for
|
Chris@87
|
325 each file in the archive.
|
Chris@87
|
326 - If the file is a ``.npz`` file, the returned value supports the
|
Chris@87
|
327 context manager protocol in a similar fashion to the open function::
|
Chris@87
|
328
|
Chris@87
|
329 with load('foo.npz') as data:
|
Chris@87
|
330 a = data['a']
|
Chris@87
|
331
|
Chris@87
|
332 The underlying file descriptor is closed when exiting the 'with'
|
Chris@87
|
333 block.
|
Chris@87
|
334
|
Chris@87
|
335 Examples
|
Chris@87
|
336 --------
|
Chris@87
|
337 Store data to disk, and load it again:
|
Chris@87
|
338
|
Chris@87
|
339 >>> np.save('/tmp/123', np.array([[1, 2, 3], [4, 5, 6]]))
|
Chris@87
|
340 >>> np.load('/tmp/123.npy')
|
Chris@87
|
341 array([[1, 2, 3],
|
Chris@87
|
342 [4, 5, 6]])
|
Chris@87
|
343
|
Chris@87
|
344 Store compressed data to disk, and load it again:
|
Chris@87
|
345
|
Chris@87
|
346 >>> a=np.array([[1, 2, 3], [4, 5, 6]])
|
Chris@87
|
347 >>> b=np.array([1, 2])
|
Chris@87
|
348 >>> np.savez('/tmp/123.npz', a=a, b=b)
|
Chris@87
|
349 >>> data = np.load('/tmp/123.npz')
|
Chris@87
|
350 >>> data['a']
|
Chris@87
|
351 array([[1, 2, 3],
|
Chris@87
|
352 [4, 5, 6]])
|
Chris@87
|
353 >>> data['b']
|
Chris@87
|
354 array([1, 2])
|
Chris@87
|
355 >>> data.close()
|
Chris@87
|
356
|
Chris@87
|
357 Mem-map the stored array, and then access the second row
|
Chris@87
|
358 directly from disk:
|
Chris@87
|
359
|
Chris@87
|
360 >>> X = np.load('/tmp/123.npy', mmap_mode='r')
|
Chris@87
|
361 >>> X[1, :]
|
Chris@87
|
362 memmap([4, 5, 6])
|
Chris@87
|
363
|
Chris@87
|
364 """
|
Chris@87
|
365 import gzip
|
Chris@87
|
366
|
Chris@87
|
367 own_fid = False
|
Chris@87
|
368 if isinstance(file, basestring):
|
Chris@87
|
369 fid = open(file, "rb")
|
Chris@87
|
370 own_fid = True
|
Chris@87
|
371 elif isinstance(file, gzip.GzipFile):
|
Chris@87
|
372 fid = seek_gzip_factory(file)
|
Chris@87
|
373 else:
|
Chris@87
|
374 fid = file
|
Chris@87
|
375
|
Chris@87
|
376 try:
|
Chris@87
|
377 # Code to distinguish from NumPy binary files and pickles.
|
Chris@87
|
378 _ZIP_PREFIX = asbytes('PK\x03\x04')
|
Chris@87
|
379 N = len(format.MAGIC_PREFIX)
|
Chris@87
|
380 magic = fid.read(N)
|
Chris@87
|
381 fid.seek(-N, 1) # back-up
|
Chris@87
|
382 if magic.startswith(_ZIP_PREFIX):
|
Chris@87
|
383 # zip-file (assume .npz)
|
Chris@87
|
384 # Transfer file ownership to NpzFile
|
Chris@87
|
385 tmp = own_fid
|
Chris@87
|
386 own_fid = False
|
Chris@87
|
387 return NpzFile(fid, own_fid=tmp)
|
Chris@87
|
388 elif magic == format.MAGIC_PREFIX:
|
Chris@87
|
389 # .npy file
|
Chris@87
|
390 if mmap_mode:
|
Chris@87
|
391 return format.open_memmap(file, mode=mmap_mode)
|
Chris@87
|
392 else:
|
Chris@87
|
393 return format.read_array(fid)
|
Chris@87
|
394 else:
|
Chris@87
|
395 # Try a pickle
|
Chris@87
|
396 try:
|
Chris@87
|
397 return pickle.load(fid)
|
Chris@87
|
398 except:
|
Chris@87
|
399 raise IOError(
|
Chris@87
|
400 "Failed to interpret file %s as a pickle" % repr(file))
|
Chris@87
|
401 finally:
|
Chris@87
|
402 if own_fid:
|
Chris@87
|
403 fid.close()
|
Chris@87
|
404
|
Chris@87
|
405
|
Chris@87
|
406 def save(file, arr):
|
Chris@87
|
407 """
|
Chris@87
|
408 Save an array to a binary file in NumPy ``.npy`` format.
|
Chris@87
|
409
|
Chris@87
|
410 Parameters
|
Chris@87
|
411 ----------
|
Chris@87
|
412 file : file or str
|
Chris@87
|
413 File or filename to which the data is saved. If file is a file-object,
|
Chris@87
|
414 then the filename is unchanged. If file is a string, a ``.npy``
|
Chris@87
|
415 extension will be appended to the file name if it does not already
|
Chris@87
|
416 have one.
|
Chris@87
|
417 arr : array_like
|
Chris@87
|
418 Array data to be saved.
|
Chris@87
|
419
|
Chris@87
|
420 See Also
|
Chris@87
|
421 --------
|
Chris@87
|
422 savez : Save several arrays into a ``.npz`` archive
|
Chris@87
|
423 savetxt, load
|
Chris@87
|
424
|
Chris@87
|
425 Notes
|
Chris@87
|
426 -----
|
Chris@87
|
427 For a description of the ``.npy`` format, see `format`.
|
Chris@87
|
428
|
Chris@87
|
429 Examples
|
Chris@87
|
430 --------
|
Chris@87
|
431 >>> from tempfile import TemporaryFile
|
Chris@87
|
432 >>> outfile = TemporaryFile()
|
Chris@87
|
433
|
Chris@87
|
434 >>> x = np.arange(10)
|
Chris@87
|
435 >>> np.save(outfile, x)
|
Chris@87
|
436
|
Chris@87
|
437 >>> outfile.seek(0) # Only needed here to simulate closing & reopening file
|
Chris@87
|
438 >>> np.load(outfile)
|
Chris@87
|
439 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
|
Chris@87
|
440
|
Chris@87
|
441 """
|
Chris@87
|
442 own_fid = False
|
Chris@87
|
443 if isinstance(file, basestring):
|
Chris@87
|
444 if not file.endswith('.npy'):
|
Chris@87
|
445 file = file + '.npy'
|
Chris@87
|
446 fid = open(file, "wb")
|
Chris@87
|
447 own_fid = True
|
Chris@87
|
448 else:
|
Chris@87
|
449 fid = file
|
Chris@87
|
450
|
Chris@87
|
451 try:
|
Chris@87
|
452 arr = np.asanyarray(arr)
|
Chris@87
|
453 format.write_array(fid, arr)
|
Chris@87
|
454 finally:
|
Chris@87
|
455 if own_fid:
|
Chris@87
|
456 fid.close()
|
Chris@87
|
457
|
Chris@87
|
458
|
Chris@87
|
459 def savez(file, *args, **kwds):
|
Chris@87
|
460 """
|
Chris@87
|
461 Save several arrays into a single file in uncompressed ``.npz`` format.
|
Chris@87
|
462
|
Chris@87
|
463 If arguments are passed in with no keywords, the corresponding variable
|
Chris@87
|
464 names, in the ``.npz`` file, are 'arr_0', 'arr_1', etc. If keyword
|
Chris@87
|
465 arguments are given, the corresponding variable names, in the ``.npz``
|
Chris@87
|
466 file will match the keyword names.
|
Chris@87
|
467
|
Chris@87
|
468 Parameters
|
Chris@87
|
469 ----------
|
Chris@87
|
470 file : str or file
|
Chris@87
|
471 Either the file name (string) or an open file (file-like object)
|
Chris@87
|
472 where the data will be saved. If file is a string, the ``.npz``
|
Chris@87
|
473 extension will be appended to the file name if it is not already there.
|
Chris@87
|
474 args : Arguments, optional
|
Chris@87
|
475 Arrays to save to the file. Since it is not possible for Python to
|
Chris@87
|
476 know the names of the arrays outside `savez`, the arrays will be saved
|
Chris@87
|
477 with names "arr_0", "arr_1", and so on. These arguments can be any
|
Chris@87
|
478 expression.
|
Chris@87
|
479 kwds : Keyword arguments, optional
|
Chris@87
|
480 Arrays to save to the file. Arrays will be saved in the file with the
|
Chris@87
|
481 keyword names.
|
Chris@87
|
482
|
Chris@87
|
483 Returns
|
Chris@87
|
484 -------
|
Chris@87
|
485 None
|
Chris@87
|
486
|
Chris@87
|
487 See Also
|
Chris@87
|
488 --------
|
Chris@87
|
489 save : Save a single array to a binary file in NumPy format.
|
Chris@87
|
490 savetxt : Save an array to a file as plain text.
|
Chris@87
|
491 savez_compressed : Save several arrays into a compressed ``.npz`` archive
|
Chris@87
|
492
|
Chris@87
|
493 Notes
|
Chris@87
|
494 -----
|
Chris@87
|
495 The ``.npz`` file format is a zipped archive of files named after the
|
Chris@87
|
496 variables they contain. The archive is not compressed and each file
|
Chris@87
|
497 in the archive contains one variable in ``.npy`` format. For a
|
Chris@87
|
498 description of the ``.npy`` format, see `format`.
|
Chris@87
|
499
|
Chris@87
|
500 When opening the saved ``.npz`` file with `load` a `NpzFile` object is
|
Chris@87
|
501 returned. This is a dictionary-like object which can be queried for
|
Chris@87
|
502 its list of arrays (with the ``.files`` attribute), and for the arrays
|
Chris@87
|
503 themselves.
|
Chris@87
|
504
|
Chris@87
|
505 Examples
|
Chris@87
|
506 --------
|
Chris@87
|
507 >>> from tempfile import TemporaryFile
|
Chris@87
|
508 >>> outfile = TemporaryFile()
|
Chris@87
|
509 >>> x = np.arange(10)
|
Chris@87
|
510 >>> y = np.sin(x)
|
Chris@87
|
511
|
Chris@87
|
512 Using `savez` with \\*args, the arrays are saved with default names.
|
Chris@87
|
513
|
Chris@87
|
514 >>> np.savez(outfile, x, y)
|
Chris@87
|
515 >>> outfile.seek(0) # Only needed here to simulate closing & reopening file
|
Chris@87
|
516 >>> npzfile = np.load(outfile)
|
Chris@87
|
517 >>> npzfile.files
|
Chris@87
|
518 ['arr_1', 'arr_0']
|
Chris@87
|
519 >>> npzfile['arr_0']
|
Chris@87
|
520 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
|
Chris@87
|
521
|
Chris@87
|
522 Using `savez` with \\**kwds, the arrays are saved with the keyword names.
|
Chris@87
|
523
|
Chris@87
|
524 >>> outfile = TemporaryFile()
|
Chris@87
|
525 >>> np.savez(outfile, x=x, y=y)
|
Chris@87
|
526 >>> outfile.seek(0)
|
Chris@87
|
527 >>> npzfile = np.load(outfile)
|
Chris@87
|
528 >>> npzfile.files
|
Chris@87
|
529 ['y', 'x']
|
Chris@87
|
530 >>> npzfile['x']
|
Chris@87
|
531 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
|
Chris@87
|
532
|
Chris@87
|
533 """
|
Chris@87
|
534 _savez(file, args, kwds, False)
|
Chris@87
|
535
|
Chris@87
|
536
|
Chris@87
|
537 def savez_compressed(file, *args, **kwds):
|
Chris@87
|
538 """
|
Chris@87
|
539 Save several arrays into a single file in compressed ``.npz`` format.
|
Chris@87
|
540
|
Chris@87
|
541 If keyword arguments are given, then filenames are taken from the keywords.
|
Chris@87
|
542 If arguments are passed in with no keywords, then stored file names are
|
Chris@87
|
543 arr_0, arr_1, etc.
|
Chris@87
|
544
|
Chris@87
|
545 Parameters
|
Chris@87
|
546 ----------
|
Chris@87
|
547 file : str
|
Chris@87
|
548 File name of ``.npz`` file.
|
Chris@87
|
549 args : Arguments
|
Chris@87
|
550 Function arguments.
|
Chris@87
|
551 kwds : Keyword arguments
|
Chris@87
|
552 Keywords.
|
Chris@87
|
553
|
Chris@87
|
554 See Also
|
Chris@87
|
555 --------
|
Chris@87
|
556 numpy.savez : Save several arrays into an uncompressed ``.npz`` file format
|
Chris@87
|
557 numpy.load : Load the files created by savez_compressed.
|
Chris@87
|
558
|
Chris@87
|
559 """
|
Chris@87
|
560 _savez(file, args, kwds, True)
|
Chris@87
|
561
|
Chris@87
|
562
|
Chris@87
|
563 def _savez(file, args, kwds, compress):
|
Chris@87
|
564 # Import is postponed to here since zipfile depends on gzip, an optional
|
Chris@87
|
565 # component of the so-called standard library.
|
Chris@87
|
566 import zipfile
|
Chris@87
|
567 # Import deferred for startup time improvement
|
Chris@87
|
568 import tempfile
|
Chris@87
|
569
|
Chris@87
|
570 if isinstance(file, basestring):
|
Chris@87
|
571 if not file.endswith('.npz'):
|
Chris@87
|
572 file = file + '.npz'
|
Chris@87
|
573
|
Chris@87
|
574 namedict = kwds
|
Chris@87
|
575 for i, val in enumerate(args):
|
Chris@87
|
576 key = 'arr_%d' % i
|
Chris@87
|
577 if key in namedict.keys():
|
Chris@87
|
578 raise ValueError(
|
Chris@87
|
579 "Cannot use un-named variables and keyword %s" % key)
|
Chris@87
|
580 namedict[key] = val
|
Chris@87
|
581
|
Chris@87
|
582 if compress:
|
Chris@87
|
583 compression = zipfile.ZIP_DEFLATED
|
Chris@87
|
584 else:
|
Chris@87
|
585 compression = zipfile.ZIP_STORED
|
Chris@87
|
586
|
Chris@87
|
587 zipf = zipfile_factory(file, mode="w", compression=compression)
|
Chris@87
|
588
|
Chris@87
|
589 # Stage arrays in a temporary file on disk, before writing to zip.
|
Chris@87
|
590 fd, tmpfile = tempfile.mkstemp(suffix='-numpy.npy')
|
Chris@87
|
591 os.close(fd)
|
Chris@87
|
592 try:
|
Chris@87
|
593 for key, val in namedict.items():
|
Chris@87
|
594 fname = key + '.npy'
|
Chris@87
|
595 fid = open(tmpfile, 'wb')
|
Chris@87
|
596 try:
|
Chris@87
|
597 format.write_array(fid, np.asanyarray(val))
|
Chris@87
|
598 fid.close()
|
Chris@87
|
599 fid = None
|
Chris@87
|
600 zipf.write(tmpfile, arcname=fname)
|
Chris@87
|
601 finally:
|
Chris@87
|
602 if fid:
|
Chris@87
|
603 fid.close()
|
Chris@87
|
604 finally:
|
Chris@87
|
605 os.remove(tmpfile)
|
Chris@87
|
606
|
Chris@87
|
607 zipf.close()
|
Chris@87
|
608
|
Chris@87
|
609
|
Chris@87
|
610 def _getconv(dtype):
|
Chris@87
|
611 """ Find the correct dtype converter. Adapted from matplotlib """
|
Chris@87
|
612 typ = dtype.type
|
Chris@87
|
613 if issubclass(typ, np.bool_):
|
Chris@87
|
614 return lambda x: bool(int(x))
|
Chris@87
|
615 if issubclass(typ, np.uint64):
|
Chris@87
|
616 return np.uint64
|
Chris@87
|
617 if issubclass(typ, np.int64):
|
Chris@87
|
618 return np.int64
|
Chris@87
|
619 if issubclass(typ, np.integer):
|
Chris@87
|
620 return lambda x: int(float(x))
|
Chris@87
|
621 elif issubclass(typ, np.floating):
|
Chris@87
|
622 return float
|
Chris@87
|
623 elif issubclass(typ, np.complex):
|
Chris@87
|
624 return complex
|
Chris@87
|
625 elif issubclass(typ, np.bytes_):
|
Chris@87
|
626 return bytes
|
Chris@87
|
627 else:
|
Chris@87
|
628 return str
|
Chris@87
|
629
|
Chris@87
|
630
|
Chris@87
|
631 def loadtxt(fname, dtype=float, comments='#', delimiter=None,
|
Chris@87
|
632 converters=None, skiprows=0, usecols=None, unpack=False,
|
Chris@87
|
633 ndmin=0):
|
Chris@87
|
634 """
|
Chris@87
|
635 Load data from a text file.
|
Chris@87
|
636
|
Chris@87
|
637 Each row in the text file must have the same number of values.
|
Chris@87
|
638
|
Chris@87
|
639 Parameters
|
Chris@87
|
640 ----------
|
Chris@87
|
641 fname : file or str
|
Chris@87
|
642 File, filename, or generator to read. If the filename extension is
|
Chris@87
|
643 ``.gz`` or ``.bz2``, the file is first decompressed. Note that
|
Chris@87
|
644 generators should return byte strings for Python 3k.
|
Chris@87
|
645 dtype : data-type, optional
|
Chris@87
|
646 Data-type of the resulting array; default: float. If this is a
|
Chris@87
|
647 record data-type, the resulting array will be 1-dimensional, and
|
Chris@87
|
648 each row will be interpreted as an element of the array. In this
|
Chris@87
|
649 case, the number of columns used must match the number of fields in
|
Chris@87
|
650 the data-type.
|
Chris@87
|
651 comments : str, optional
|
Chris@87
|
652 The character used to indicate the start of a comment;
|
Chris@87
|
653 default: '#'.
|
Chris@87
|
654 delimiter : str, optional
|
Chris@87
|
655 The string used to separate values. By default, this is any
|
Chris@87
|
656 whitespace.
|
Chris@87
|
657 converters : dict, optional
|
Chris@87
|
658 A dictionary mapping column number to a function that will convert
|
Chris@87
|
659 that column to a float. E.g., if column 0 is a date string:
|
Chris@87
|
660 ``converters = {0: datestr2num}``. Converters can also be used to
|
Chris@87
|
661 provide a default value for missing data (but see also `genfromtxt`):
|
Chris@87
|
662 ``converters = {3: lambda s: float(s.strip() or 0)}``. Default: None.
|
Chris@87
|
663 skiprows : int, optional
|
Chris@87
|
664 Skip the first `skiprows` lines; default: 0.
|
Chris@87
|
665 usecols : sequence, optional
|
Chris@87
|
666 Which columns to read, with 0 being the first. For example,
|
Chris@87
|
667 ``usecols = (1,4,5)`` will extract the 2nd, 5th and 6th columns.
|
Chris@87
|
668 The default, None, results in all columns being read.
|
Chris@87
|
669 unpack : bool, optional
|
Chris@87
|
670 If True, the returned array is transposed, so that arguments may be
|
Chris@87
|
671 unpacked using ``x, y, z = loadtxt(...)``. When used with a record
|
Chris@87
|
672 data-type, arrays are returned for each field. Default is False.
|
Chris@87
|
673 ndmin : int, optional
|
Chris@87
|
674 The returned array will have at least `ndmin` dimensions.
|
Chris@87
|
675 Otherwise mono-dimensional axes will be squeezed.
|
Chris@87
|
676 Legal values: 0 (default), 1 or 2.
|
Chris@87
|
677
|
Chris@87
|
678 .. versionadded:: 1.6.0
|
Chris@87
|
679
|
Chris@87
|
680 Returns
|
Chris@87
|
681 -------
|
Chris@87
|
682 out : ndarray
|
Chris@87
|
683 Data read from the text file.
|
Chris@87
|
684
|
Chris@87
|
685 See Also
|
Chris@87
|
686 --------
|
Chris@87
|
687 load, fromstring, fromregex
|
Chris@87
|
688 genfromtxt : Load data with missing values handled as specified.
|
Chris@87
|
689 scipy.io.loadmat : reads MATLAB data files
|
Chris@87
|
690
|
Chris@87
|
691 Notes
|
Chris@87
|
692 -----
|
Chris@87
|
693 This function aims to be a fast reader for simply formatted files. The
|
Chris@87
|
694 `genfromtxt` function provides more sophisticated handling of, e.g.,
|
Chris@87
|
695 lines with missing values.
|
Chris@87
|
696
|
Chris@87
|
697 Examples
|
Chris@87
|
698 --------
|
Chris@87
|
699 >>> from StringIO import StringIO # StringIO behaves like a file object
|
Chris@87
|
700 >>> c = StringIO("0 1\\n2 3")
|
Chris@87
|
701 >>> np.loadtxt(c)
|
Chris@87
|
702 array([[ 0., 1.],
|
Chris@87
|
703 [ 2., 3.]])
|
Chris@87
|
704
|
Chris@87
|
705 >>> d = StringIO("M 21 72\\nF 35 58")
|
Chris@87
|
706 >>> np.loadtxt(d, dtype={'names': ('gender', 'age', 'weight'),
|
Chris@87
|
707 ... 'formats': ('S1', 'i4', 'f4')})
|
Chris@87
|
708 array([('M', 21, 72.0), ('F', 35, 58.0)],
|
Chris@87
|
709 dtype=[('gender', '|S1'), ('age', '<i4'), ('weight', '<f4')])
|
Chris@87
|
710
|
Chris@87
|
711 >>> c = StringIO("1,0,2\\n3,0,4")
|
Chris@87
|
712 >>> x, y = np.loadtxt(c, delimiter=',', usecols=(0, 2), unpack=True)
|
Chris@87
|
713 >>> x
|
Chris@87
|
714 array([ 1., 3.])
|
Chris@87
|
715 >>> y
|
Chris@87
|
716 array([ 2., 4.])
|
Chris@87
|
717
|
Chris@87
|
718 """
|
Chris@87
|
719 # Type conversions for Py3 convenience
|
Chris@87
|
720 comments = asbytes(comments)
|
Chris@87
|
721 user_converters = converters
|
Chris@87
|
722 if delimiter is not None:
|
Chris@87
|
723 delimiter = asbytes(delimiter)
|
Chris@87
|
724 if usecols is not None:
|
Chris@87
|
725 usecols = list(usecols)
|
Chris@87
|
726
|
Chris@87
|
727 fown = False
|
Chris@87
|
728 try:
|
Chris@87
|
729 if _is_string_like(fname):
|
Chris@87
|
730 fown = True
|
Chris@87
|
731 if fname.endswith('.gz'):
|
Chris@87
|
732 fh = iter(seek_gzip_factory(fname))
|
Chris@87
|
733 elif fname.endswith('.bz2'):
|
Chris@87
|
734 import bz2
|
Chris@87
|
735 fh = iter(bz2.BZ2File(fname))
|
Chris@87
|
736 elif sys.version_info[0] == 2:
|
Chris@87
|
737 fh = iter(open(fname, 'U'))
|
Chris@87
|
738 else:
|
Chris@87
|
739 fh = iter(open(fname))
|
Chris@87
|
740 else:
|
Chris@87
|
741 fh = iter(fname)
|
Chris@87
|
742 except TypeError:
|
Chris@87
|
743 raise ValueError('fname must be a string, file handle, or generator')
|
Chris@87
|
744 X = []
|
Chris@87
|
745
|
Chris@87
|
746 def flatten_dtype(dt):
|
Chris@87
|
747 """Unpack a structured data-type, and produce re-packing info."""
|
Chris@87
|
748 if dt.names is None:
|
Chris@87
|
749 # If the dtype is flattened, return.
|
Chris@87
|
750 # If the dtype has a shape, the dtype occurs
|
Chris@87
|
751 # in the list more than once.
|
Chris@87
|
752 shape = dt.shape
|
Chris@87
|
753 if len(shape) == 0:
|
Chris@87
|
754 return ([dt.base], None)
|
Chris@87
|
755 else:
|
Chris@87
|
756 packing = [(shape[-1], list)]
|
Chris@87
|
757 if len(shape) > 1:
|
Chris@87
|
758 for dim in dt.shape[-2::-1]:
|
Chris@87
|
759 packing = [(dim*packing[0][0], packing*dim)]
|
Chris@87
|
760 return ([dt.base] * int(np.prod(dt.shape)), packing)
|
Chris@87
|
761 else:
|
Chris@87
|
762 types = []
|
Chris@87
|
763 packing = []
|
Chris@87
|
764 for field in dt.names:
|
Chris@87
|
765 tp, bytes = dt.fields[field]
|
Chris@87
|
766 flat_dt, flat_packing = flatten_dtype(tp)
|
Chris@87
|
767 types.extend(flat_dt)
|
Chris@87
|
768 # Avoid extra nesting for subarrays
|
Chris@87
|
769 if len(tp.shape) > 0:
|
Chris@87
|
770 packing.extend(flat_packing)
|
Chris@87
|
771 else:
|
Chris@87
|
772 packing.append((len(flat_dt), flat_packing))
|
Chris@87
|
773 return (types, packing)
|
Chris@87
|
774
|
Chris@87
|
775 def pack_items(items, packing):
|
Chris@87
|
776 """Pack items into nested lists based on re-packing info."""
|
Chris@87
|
777 if packing is None:
|
Chris@87
|
778 return items[0]
|
Chris@87
|
779 elif packing is tuple:
|
Chris@87
|
780 return tuple(items)
|
Chris@87
|
781 elif packing is list:
|
Chris@87
|
782 return list(items)
|
Chris@87
|
783 else:
|
Chris@87
|
784 start = 0
|
Chris@87
|
785 ret = []
|
Chris@87
|
786 for length, subpacking in packing:
|
Chris@87
|
787 ret.append(pack_items(items[start:start+length], subpacking))
|
Chris@87
|
788 start += length
|
Chris@87
|
789 return tuple(ret)
|
Chris@87
|
790
|
Chris@87
|
791 def split_line(line):
|
Chris@87
|
792 """Chop off comments, strip, and split at delimiter."""
|
Chris@87
|
793 line = asbytes(line).split(comments)[0].strip(asbytes('\r\n'))
|
Chris@87
|
794 if line:
|
Chris@87
|
795 return line.split(delimiter)
|
Chris@87
|
796 else:
|
Chris@87
|
797 return []
|
Chris@87
|
798
|
Chris@87
|
799 try:
|
Chris@87
|
800 # Make sure we're dealing with a proper dtype
|
Chris@87
|
801 dtype = np.dtype(dtype)
|
Chris@87
|
802 defconv = _getconv(dtype)
|
Chris@87
|
803
|
Chris@87
|
804 # Skip the first `skiprows` lines
|
Chris@87
|
805 for i in range(skiprows):
|
Chris@87
|
806 next(fh)
|
Chris@87
|
807
|
Chris@87
|
808 # Read until we find a line with some values, and use
|
Chris@87
|
809 # it to estimate the number of columns, N.
|
Chris@87
|
810 first_vals = None
|
Chris@87
|
811 try:
|
Chris@87
|
812 while not first_vals:
|
Chris@87
|
813 first_line = next(fh)
|
Chris@87
|
814 first_vals = split_line(first_line)
|
Chris@87
|
815 except StopIteration:
|
Chris@87
|
816 # End of lines reached
|
Chris@87
|
817 first_line = ''
|
Chris@87
|
818 first_vals = []
|
Chris@87
|
819 warnings.warn('loadtxt: Empty input file: "%s"' % fname)
|
Chris@87
|
820 N = len(usecols or first_vals)
|
Chris@87
|
821
|
Chris@87
|
822 dtype_types, packing = flatten_dtype(dtype)
|
Chris@87
|
823 if len(dtype_types) > 1:
|
Chris@87
|
824 # We're dealing with a structured array, each field of
|
Chris@87
|
825 # the dtype matches a column
|
Chris@87
|
826 converters = [_getconv(dt) for dt in dtype_types]
|
Chris@87
|
827 else:
|
Chris@87
|
828 # All fields have the same dtype
|
Chris@87
|
829 converters = [defconv for i in range(N)]
|
Chris@87
|
830 if N > 1:
|
Chris@87
|
831 packing = [(N, tuple)]
|
Chris@87
|
832
|
Chris@87
|
833 # By preference, use the converters specified by the user
|
Chris@87
|
834 for i, conv in (user_converters or {}).items():
|
Chris@87
|
835 if usecols:
|
Chris@87
|
836 try:
|
Chris@87
|
837 i = usecols.index(i)
|
Chris@87
|
838 except ValueError:
|
Chris@87
|
839 # Unused converter specified
|
Chris@87
|
840 continue
|
Chris@87
|
841 converters[i] = conv
|
Chris@87
|
842
|
Chris@87
|
843 # Parse each line, including the first
|
Chris@87
|
844 for i, line in enumerate(itertools.chain([first_line], fh)):
|
Chris@87
|
845 vals = split_line(line)
|
Chris@87
|
846 if len(vals) == 0:
|
Chris@87
|
847 continue
|
Chris@87
|
848 if usecols:
|
Chris@87
|
849 vals = [vals[i] for i in usecols]
|
Chris@87
|
850 if len(vals) != N:
|
Chris@87
|
851 line_num = i + skiprows + 1
|
Chris@87
|
852 raise ValueError("Wrong number of columns at line %d"
|
Chris@87
|
853 % line_num)
|
Chris@87
|
854
|
Chris@87
|
855 # Convert each value according to its column and store
|
Chris@87
|
856 items = [conv(val) for (conv, val) in zip(converters, vals)]
|
Chris@87
|
857 # Then pack it according to the dtype's nesting
|
Chris@87
|
858 items = pack_items(items, packing)
|
Chris@87
|
859 X.append(items)
|
Chris@87
|
860 finally:
|
Chris@87
|
861 if fown:
|
Chris@87
|
862 fh.close()
|
Chris@87
|
863
|
Chris@87
|
864 X = np.array(X, dtype)
|
Chris@87
|
865 # Multicolumn data are returned with shape (1, N, M), i.e.
|
Chris@87
|
866 # (1, 1, M) for a single row - remove the singleton dimension there
|
Chris@87
|
867 if X.ndim == 3 and X.shape[:2] == (1, 1):
|
Chris@87
|
868 X.shape = (1, -1)
|
Chris@87
|
869
|
Chris@87
|
870 # Verify that the array has at least dimensions `ndmin`.
|
Chris@87
|
871 # Check correctness of the values of `ndmin`
|
Chris@87
|
872 if ndmin not in [0, 1, 2]:
|
Chris@87
|
873 raise ValueError('Illegal value of ndmin keyword: %s' % ndmin)
|
Chris@87
|
874 # Tweak the size and shape of the arrays - remove extraneous dimensions
|
Chris@87
|
875 if X.ndim > ndmin:
|
Chris@87
|
876 X = np.squeeze(X)
|
Chris@87
|
877 # and ensure we have the minimum number of dimensions asked for
|
Chris@87
|
878 # - has to be in this order for the odd case ndmin=1, X.squeeze().ndim=0
|
Chris@87
|
879 if X.ndim < ndmin:
|
Chris@87
|
880 if ndmin == 1:
|
Chris@87
|
881 X = np.atleast_1d(X)
|
Chris@87
|
882 elif ndmin == 2:
|
Chris@87
|
883 X = np.atleast_2d(X).T
|
Chris@87
|
884
|
Chris@87
|
885 if unpack:
|
Chris@87
|
886 if len(dtype_types) > 1:
|
Chris@87
|
887 # For structured arrays, return an array for each field.
|
Chris@87
|
888 return [X[field] for field in dtype.names]
|
Chris@87
|
889 else:
|
Chris@87
|
890 return X.T
|
Chris@87
|
891 else:
|
Chris@87
|
892 return X
|
Chris@87
|
893
|
Chris@87
|
894
|
Chris@87
|
895 def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='',
|
Chris@87
|
896 footer='', comments='# '):
|
Chris@87
|
897 """
|
Chris@87
|
898 Save an array to a text file.
|
Chris@87
|
899
|
Chris@87
|
900 Parameters
|
Chris@87
|
901 ----------
|
Chris@87
|
902 fname : filename or file handle
|
Chris@87
|
903 If the filename ends in ``.gz``, the file is automatically saved in
|
Chris@87
|
904 compressed gzip format. `loadtxt` understands gzipped files
|
Chris@87
|
905 transparently.
|
Chris@87
|
906 X : array_like
|
Chris@87
|
907 Data to be saved to a text file.
|
Chris@87
|
908 fmt : str or sequence of strs, optional
|
Chris@87
|
909 A single format (%10.5f), a sequence of formats, or a
|
Chris@87
|
910 multi-format string, e.g. 'Iteration %d -- %10.5f', in which
|
Chris@87
|
911 case `delimiter` is ignored. For complex `X`, the legal options
|
Chris@87
|
912 for `fmt` are:
|
Chris@87
|
913 a) a single specifier, `fmt='%.4e'`, resulting in numbers formatted
|
Chris@87
|
914 like `' (%s+%sj)' % (fmt, fmt)`
|
Chris@87
|
915 b) a full string specifying every real and imaginary part, e.g.
|
Chris@87
|
916 `' %.4e %+.4j %.4e %+.4j %.4e %+.4j'` for 3 columns
|
Chris@87
|
917 c) a list of specifiers, one per column - in this case, the real
|
Chris@87
|
918 and imaginary part must have separate specifiers,
|
Chris@87
|
919 e.g. `['%.3e + %.3ej', '(%.15e%+.15ej)']` for 2 columns
|
Chris@87
|
920 delimiter : str, optional
|
Chris@87
|
921 String or character separating columns.
|
Chris@87
|
922 newline : str, optional
|
Chris@87
|
923 String or character separating lines.
|
Chris@87
|
924
|
Chris@87
|
925 .. versionadded:: 1.5.0
|
Chris@87
|
926 header : str, optional
|
Chris@87
|
927 String that will be written at the beginning of the file.
|
Chris@87
|
928
|
Chris@87
|
929 .. versionadded:: 1.7.0
|
Chris@87
|
930 footer : str, optional
|
Chris@87
|
931 String that will be written at the end of the file.
|
Chris@87
|
932
|
Chris@87
|
933 .. versionadded:: 1.7.0
|
Chris@87
|
934 comments : str, optional
|
Chris@87
|
935 String that will be prepended to the ``header`` and ``footer`` strings,
|
Chris@87
|
936 to mark them as comments. Default: '# ', as expected by e.g.
|
Chris@87
|
937 ``numpy.loadtxt``.
|
Chris@87
|
938
|
Chris@87
|
939 .. versionadded:: 1.7.0
|
Chris@87
|
940
|
Chris@87
|
941
|
Chris@87
|
942 See Also
|
Chris@87
|
943 --------
|
Chris@87
|
944 save : Save an array to a binary file in NumPy ``.npy`` format
|
Chris@87
|
945 savez : Save several arrays into an uncompressed ``.npz`` archive
|
Chris@87
|
946 savez_compressed : Save several arrays into a compressed ``.npz`` archive
|
Chris@87
|
947
|
Chris@87
|
948 Notes
|
Chris@87
|
949 -----
|
Chris@87
|
950 Further explanation of the `fmt` parameter
|
Chris@87
|
951 (``%[flag]width[.precision]specifier``):
|
Chris@87
|
952
|
Chris@87
|
953 flags:
|
Chris@87
|
954 ``-`` : left justify
|
Chris@87
|
955
|
Chris@87
|
956 ``+`` : Forces to precede result with + or -.
|
Chris@87
|
957
|
Chris@87
|
958 ``0`` : Left pad the number with zeros instead of space (see width).
|
Chris@87
|
959
|
Chris@87
|
960 width:
|
Chris@87
|
961 Minimum number of characters to be printed. The value is not truncated
|
Chris@87
|
962 if it has more characters.
|
Chris@87
|
963
|
Chris@87
|
964 precision:
|
Chris@87
|
965 - For integer specifiers (eg. ``d,i,o,x``), the minimum number of
|
Chris@87
|
966 digits.
|
Chris@87
|
967 - For ``e, E`` and ``f`` specifiers, the number of digits to print
|
Chris@87
|
968 after the decimal point.
|
Chris@87
|
969 - For ``g`` and ``G``, the maximum number of significant digits.
|
Chris@87
|
970 - For ``s``, the maximum number of characters.
|
Chris@87
|
971
|
Chris@87
|
972 specifiers:
|
Chris@87
|
973 ``c`` : character
|
Chris@87
|
974
|
Chris@87
|
975 ``d`` or ``i`` : signed decimal integer
|
Chris@87
|
976
|
Chris@87
|
977 ``e`` or ``E`` : scientific notation with ``e`` or ``E``.
|
Chris@87
|
978
|
Chris@87
|
979 ``f`` : decimal floating point
|
Chris@87
|
980
|
Chris@87
|
981 ``g,G`` : use the shorter of ``e,E`` or ``f``
|
Chris@87
|
982
|
Chris@87
|
983 ``o`` : signed octal
|
Chris@87
|
984
|
Chris@87
|
985 ``s`` : string of characters
|
Chris@87
|
986
|
Chris@87
|
987 ``u`` : unsigned decimal integer
|
Chris@87
|
988
|
Chris@87
|
989 ``x,X`` : unsigned hexadecimal integer
|
Chris@87
|
990
|
Chris@87
|
991 This explanation of ``fmt`` is not complete, for an exhaustive
|
Chris@87
|
992 specification see [1]_.
|
Chris@87
|
993
|
Chris@87
|
994 References
|
Chris@87
|
995 ----------
|
Chris@87
|
996 .. [1] `Format Specification Mini-Language
|
Chris@87
|
997 <http://docs.python.org/library/string.html#
|
Chris@87
|
998 format-specification-mini-language>`_, Python Documentation.
|
Chris@87
|
999
|
Chris@87
|
1000 Examples
|
Chris@87
|
1001 --------
|
Chris@87
|
1002 >>> x = y = z = np.arange(0.0,5.0,1.0)
|
Chris@87
|
1003 >>> np.savetxt('test.out', x, delimiter=',') # X is an array
|
Chris@87
|
1004 >>> np.savetxt('test.out', (x,y,z)) # x,y,z equal sized 1D arrays
|
Chris@87
|
1005 >>> np.savetxt('test.out', x, fmt='%1.4e') # use exponential notation
|
Chris@87
|
1006
|
Chris@87
|
1007 """
|
Chris@87
|
1008
|
Chris@87
|
1009 # Py3 conversions first
|
Chris@87
|
1010 if isinstance(fmt, bytes):
|
Chris@87
|
1011 fmt = asstr(fmt)
|
Chris@87
|
1012 delimiter = asstr(delimiter)
|
Chris@87
|
1013
|
Chris@87
|
1014 own_fh = False
|
Chris@87
|
1015 if _is_string_like(fname):
|
Chris@87
|
1016 own_fh = True
|
Chris@87
|
1017 if fname.endswith('.gz'):
|
Chris@87
|
1018 import gzip
|
Chris@87
|
1019 fh = gzip.open(fname, 'wb')
|
Chris@87
|
1020 else:
|
Chris@87
|
1021 if sys.version_info[0] >= 3:
|
Chris@87
|
1022 fh = open(fname, 'wb')
|
Chris@87
|
1023 else:
|
Chris@87
|
1024 fh = open(fname, 'w')
|
Chris@87
|
1025 elif hasattr(fname, 'write'):
|
Chris@87
|
1026 fh = fname
|
Chris@87
|
1027 else:
|
Chris@87
|
1028 raise ValueError('fname must be a string or file handle')
|
Chris@87
|
1029
|
Chris@87
|
1030 try:
|
Chris@87
|
1031 X = np.asarray(X)
|
Chris@87
|
1032
|
Chris@87
|
1033 # Handle 1-dimensional arrays
|
Chris@87
|
1034 if X.ndim == 1:
|
Chris@87
|
1035 # Common case -- 1d array of numbers
|
Chris@87
|
1036 if X.dtype.names is None:
|
Chris@87
|
1037 X = np.atleast_2d(X).T
|
Chris@87
|
1038 ncol = 1
|
Chris@87
|
1039
|
Chris@87
|
1040 # Complex dtype -- each field indicates a separate column
|
Chris@87
|
1041 else:
|
Chris@87
|
1042 ncol = len(X.dtype.descr)
|
Chris@87
|
1043 else:
|
Chris@87
|
1044 ncol = X.shape[1]
|
Chris@87
|
1045
|
Chris@87
|
1046 iscomplex_X = np.iscomplexobj(X)
|
Chris@87
|
1047 # `fmt` can be a string with multiple insertion points or a
|
Chris@87
|
1048 # list of formats. E.g. '%10.5f\t%10d' or ('%10.5f', '$10d')
|
Chris@87
|
1049 if type(fmt) in (list, tuple):
|
Chris@87
|
1050 if len(fmt) != ncol:
|
Chris@87
|
1051 raise AttributeError('fmt has wrong shape. %s' % str(fmt))
|
Chris@87
|
1052 format = asstr(delimiter).join(map(asstr, fmt))
|
Chris@87
|
1053 elif isinstance(fmt, str):
|
Chris@87
|
1054 n_fmt_chars = fmt.count('%')
|
Chris@87
|
1055 error = ValueError('fmt has wrong number of %% formats: %s' % fmt)
|
Chris@87
|
1056 if n_fmt_chars == 1:
|
Chris@87
|
1057 if iscomplex_X:
|
Chris@87
|
1058 fmt = [' (%s+%sj)' % (fmt, fmt), ] * ncol
|
Chris@87
|
1059 else:
|
Chris@87
|
1060 fmt = [fmt, ] * ncol
|
Chris@87
|
1061 format = delimiter.join(fmt)
|
Chris@87
|
1062 elif iscomplex_X and n_fmt_chars != (2 * ncol):
|
Chris@87
|
1063 raise error
|
Chris@87
|
1064 elif ((not iscomplex_X) and n_fmt_chars != ncol):
|
Chris@87
|
1065 raise error
|
Chris@87
|
1066 else:
|
Chris@87
|
1067 format = fmt
|
Chris@87
|
1068 else:
|
Chris@87
|
1069 raise ValueError('invalid fmt: %r' % (fmt,))
|
Chris@87
|
1070
|
Chris@87
|
1071 if len(header) > 0:
|
Chris@87
|
1072 header = header.replace('\n', '\n' + comments)
|
Chris@87
|
1073 fh.write(asbytes(comments + header + newline))
|
Chris@87
|
1074 if iscomplex_X:
|
Chris@87
|
1075 for row in X:
|
Chris@87
|
1076 row2 = []
|
Chris@87
|
1077 for number in row:
|
Chris@87
|
1078 row2.append(number.real)
|
Chris@87
|
1079 row2.append(number.imag)
|
Chris@87
|
1080 fh.write(asbytes(format % tuple(row2) + newline))
|
Chris@87
|
1081 else:
|
Chris@87
|
1082 for row in X:
|
Chris@87
|
1083 fh.write(asbytes(format % tuple(row) + newline))
|
Chris@87
|
1084 if len(footer) > 0:
|
Chris@87
|
1085 footer = footer.replace('\n', '\n' + comments)
|
Chris@87
|
1086 fh.write(asbytes(comments + footer + newline))
|
Chris@87
|
1087 finally:
|
Chris@87
|
1088 if own_fh:
|
Chris@87
|
1089 fh.close()
|
Chris@87
|
1090
|
Chris@87
|
1091
|
Chris@87
|
1092 def fromregex(file, regexp, dtype):
|
Chris@87
|
1093 """
|
Chris@87
|
1094 Construct an array from a text file, using regular expression parsing.
|
Chris@87
|
1095
|
Chris@87
|
1096 The returned array is always a structured array, and is constructed from
|
Chris@87
|
1097 all matches of the regular expression in the file. Groups in the regular
|
Chris@87
|
1098 expression are converted to fields of the structured array.
|
Chris@87
|
1099
|
Chris@87
|
1100 Parameters
|
Chris@87
|
1101 ----------
|
Chris@87
|
1102 file : str or file
|
Chris@87
|
1103 File name or file object to read.
|
Chris@87
|
1104 regexp : str or regexp
|
Chris@87
|
1105 Regular expression used to parse the file.
|
Chris@87
|
1106 Groups in the regular expression correspond to fields in the dtype.
|
Chris@87
|
1107 dtype : dtype or list of dtypes
|
Chris@87
|
1108 Dtype for the structured array.
|
Chris@87
|
1109
|
Chris@87
|
1110 Returns
|
Chris@87
|
1111 -------
|
Chris@87
|
1112 output : ndarray
|
Chris@87
|
1113 The output array, containing the part of the content of `file` that
|
Chris@87
|
1114 was matched by `regexp`. `output` is always a structured array.
|
Chris@87
|
1115
|
Chris@87
|
1116 Raises
|
Chris@87
|
1117 ------
|
Chris@87
|
1118 TypeError
|
Chris@87
|
1119 When `dtype` is not a valid dtype for a structured array.
|
Chris@87
|
1120
|
Chris@87
|
1121 See Also
|
Chris@87
|
1122 --------
|
Chris@87
|
1123 fromstring, loadtxt
|
Chris@87
|
1124
|
Chris@87
|
1125 Notes
|
Chris@87
|
1126 -----
|
Chris@87
|
1127 Dtypes for structured arrays can be specified in several forms, but all
|
Chris@87
|
1128 forms specify at least the data type and field name. For details see
|
Chris@87
|
1129 `doc.structured_arrays`.
|
Chris@87
|
1130
|
Chris@87
|
1131 Examples
|
Chris@87
|
1132 --------
|
Chris@87
|
1133 >>> f = open('test.dat', 'w')
|
Chris@87
|
1134 >>> f.write("1312 foo\\n1534 bar\\n444 qux")
|
Chris@87
|
1135 >>> f.close()
|
Chris@87
|
1136
|
Chris@87
|
1137 >>> regexp = r"(\\d+)\\s+(...)" # match [digits, whitespace, anything]
|
Chris@87
|
1138 >>> output = np.fromregex('test.dat', regexp,
|
Chris@87
|
1139 ... [('num', np.int64), ('key', 'S3')])
|
Chris@87
|
1140 >>> output
|
Chris@87
|
1141 array([(1312L, 'foo'), (1534L, 'bar'), (444L, 'qux')],
|
Chris@87
|
1142 dtype=[('num', '<i8'), ('key', '|S3')])
|
Chris@87
|
1143 >>> output['num']
|
Chris@87
|
1144 array([1312, 1534, 444], dtype=int64)
|
Chris@87
|
1145
|
Chris@87
|
1146 """
|
Chris@87
|
1147 own_fh = False
|
Chris@87
|
1148 if not hasattr(file, "read"):
|
Chris@87
|
1149 file = open(file, 'rb')
|
Chris@87
|
1150 own_fh = True
|
Chris@87
|
1151
|
Chris@87
|
1152 try:
|
Chris@87
|
1153 if not hasattr(regexp, 'match'):
|
Chris@87
|
1154 regexp = re.compile(asbytes(regexp))
|
Chris@87
|
1155 if not isinstance(dtype, np.dtype):
|
Chris@87
|
1156 dtype = np.dtype(dtype)
|
Chris@87
|
1157
|
Chris@87
|
1158 seq = regexp.findall(file.read())
|
Chris@87
|
1159 if seq and not isinstance(seq[0], tuple):
|
Chris@87
|
1160 # Only one group is in the regexp.
|
Chris@87
|
1161 # Create the new array as a single data-type and then
|
Chris@87
|
1162 # re-interpret as a single-field structured array.
|
Chris@87
|
1163 newdtype = np.dtype(dtype[dtype.names[0]])
|
Chris@87
|
1164 output = np.array(seq, dtype=newdtype)
|
Chris@87
|
1165 output.dtype = dtype
|
Chris@87
|
1166 else:
|
Chris@87
|
1167 output = np.array(seq, dtype=dtype)
|
Chris@87
|
1168
|
Chris@87
|
1169 return output
|
Chris@87
|
1170 finally:
|
Chris@87
|
1171 if own_fh:
|
Chris@87
|
1172 file.close()
|
Chris@87
|
1173
|
Chris@87
|
1174
|
Chris@87
|
1175 #####--------------------------------------------------------------------------
|
Chris@87
|
1176 #---- --- ASCII functions ---
|
Chris@87
|
1177 #####--------------------------------------------------------------------------
|
Chris@87
|
1178
|
Chris@87
|
1179
|
Chris@87
|
1180 def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
|
Chris@87
|
1181 skiprows=0, skip_header=0, skip_footer=0, converters=None,
|
Chris@87
|
1182 missing='', missing_values=None, filling_values=None,
|
Chris@87
|
1183 usecols=None, names=None,
|
Chris@87
|
1184 excludelist=None, deletechars=None, replace_space='_',
|
Chris@87
|
1185 autostrip=False, case_sensitive=True, defaultfmt="f%i",
|
Chris@87
|
1186 unpack=None, usemask=False, loose=True, invalid_raise=True):
|
Chris@87
|
1187 """
|
Chris@87
|
1188 Load data from a text file, with missing values handled as specified.
|
Chris@87
|
1189
|
Chris@87
|
1190 Each line past the first `skip_header` lines is split at the `delimiter`
|
Chris@87
|
1191 character, and characters following the `comments` character are discarded.
|
Chris@87
|
1192
|
Chris@87
|
1193 Parameters
|
Chris@87
|
1194 ----------
|
Chris@87
|
1195 fname : file or str
|
Chris@87
|
1196 File, filename, or generator to read. If the filename extension is
|
Chris@87
|
1197 `.gz` or `.bz2`, the file is first decompressed. Note that
|
Chris@87
|
1198 generators must return byte strings in Python 3k.
|
Chris@87
|
1199 dtype : dtype, optional
|
Chris@87
|
1200 Data type of the resulting array.
|
Chris@87
|
1201 If None, the dtypes will be determined by the contents of each
|
Chris@87
|
1202 column, individually.
|
Chris@87
|
1203 comments : str, optional
|
Chris@87
|
1204 The character used to indicate the start of a comment.
|
Chris@87
|
1205 All the characters occurring on a line after a comment are discarded
|
Chris@87
|
1206 delimiter : str, int, or sequence, optional
|
Chris@87
|
1207 The string used to separate values. By default, any consecutive
|
Chris@87
|
1208 whitespaces act as delimiter. An integer or sequence of integers
|
Chris@87
|
1209 can also be provided as width(s) of each field.
|
Chris@87
|
1210 skip_rows : int, optional
|
Chris@87
|
1211 `skip_rows` was deprecated in numpy 1.5, and will be removed in
|
Chris@87
|
1212 numpy 2.0. Please use `skip_header` instead.
|
Chris@87
|
1213 skip_header : int, optional
|
Chris@87
|
1214 The number of lines to skip at the beginning of the file.
|
Chris@87
|
1215 skip_footer : int, optional
|
Chris@87
|
1216 The number of lines to skip at the end of the file.
|
Chris@87
|
1217 converters : variable, optional
|
Chris@87
|
1218 The set of functions that convert the data of a column to a value.
|
Chris@87
|
1219 The converters can also be used to provide a default value
|
Chris@87
|
1220 for missing data: ``converters = {3: lambda s: float(s or 0)}``.
|
Chris@87
|
1221 missing : variable, optional
|
Chris@87
|
1222 `missing` was deprecated in numpy 1.5, and will be removed in
|
Chris@87
|
1223 numpy 2.0. Please use `missing_values` instead.
|
Chris@87
|
1224 missing_values : variable, optional
|
Chris@87
|
1225 The set of strings corresponding to missing data.
|
Chris@87
|
1226 filling_values : variable, optional
|
Chris@87
|
1227 The set of values to be used as default when the data are missing.
|
Chris@87
|
1228 usecols : sequence, optional
|
Chris@87
|
1229 Which columns to read, with 0 being the first. For example,
|
Chris@87
|
1230 ``usecols = (1, 4, 5)`` will extract the 2nd, 5th and 6th columns.
|
Chris@87
|
1231 names : {None, True, str, sequence}, optional
|
Chris@87
|
1232 If `names` is True, the field names are read from the first valid line
|
Chris@87
|
1233 after the first `skip_header` lines.
|
Chris@87
|
1234 If `names` is a sequence or a single-string of comma-separated names,
|
Chris@87
|
1235 the names will be used to define the field names in a structured dtype.
|
Chris@87
|
1236 If `names` is None, the names of the dtype fields will be used, if any.
|
Chris@87
|
1237 excludelist : sequence, optional
|
Chris@87
|
1238 A list of names to exclude. This list is appended to the default list
|
Chris@87
|
1239 ['return','file','print']. Excluded names are appended an underscore:
|
Chris@87
|
1240 for example, `file` would become `file_`.
|
Chris@87
|
1241 deletechars : str, optional
|
Chris@87
|
1242 A string combining invalid characters that must be deleted from the
|
Chris@87
|
1243 names.
|
Chris@87
|
1244 defaultfmt : str, optional
|
Chris@87
|
1245 A format used to define default field names, such as "f%i" or "f_%02i".
|
Chris@87
|
1246 autostrip : bool, optional
|
Chris@87
|
1247 Whether to automatically strip white spaces from the variables.
|
Chris@87
|
1248 replace_space : char, optional
|
Chris@87
|
1249 Character(s) used in replacement of white spaces in the variables
|
Chris@87
|
1250 names. By default, use a '_'.
|
Chris@87
|
1251 case_sensitive : {True, False, 'upper', 'lower'}, optional
|
Chris@87
|
1252 If True, field names are case sensitive.
|
Chris@87
|
1253 If False or 'upper', field names are converted to upper case.
|
Chris@87
|
1254 If 'lower', field names are converted to lower case.
|
Chris@87
|
1255 unpack : bool, optional
|
Chris@87
|
1256 If True, the returned array is transposed, so that arguments may be
|
Chris@87
|
1257 unpacked using ``x, y, z = loadtxt(...)``
|
Chris@87
|
1258 usemask : bool, optional
|
Chris@87
|
1259 If True, return a masked array.
|
Chris@87
|
1260 If False, return a regular array.
|
Chris@87
|
1261 loose : bool, optional
|
Chris@87
|
1262 If True, do not raise errors for invalid values.
|
Chris@87
|
1263 invalid_raise : bool, optional
|
Chris@87
|
1264 If True, an exception is raised if an inconsistency is detected in the
|
Chris@87
|
1265 number of columns.
|
Chris@87
|
1266 If False, a warning is emitted and the offending lines are skipped.
|
Chris@87
|
1267
|
Chris@87
|
1268 Returns
|
Chris@87
|
1269 -------
|
Chris@87
|
1270 out : ndarray
|
Chris@87
|
1271 Data read from the text file. If `usemask` is True, this is a
|
Chris@87
|
1272 masked array.
|
Chris@87
|
1273
|
Chris@87
|
1274 See Also
|
Chris@87
|
1275 --------
|
Chris@87
|
1276 numpy.loadtxt : equivalent function when no data is missing.
|
Chris@87
|
1277
|
Chris@87
|
1278 Notes
|
Chris@87
|
1279 -----
|
Chris@87
|
1280 * When spaces are used as delimiters, or when no delimiter has been given
|
Chris@87
|
1281 as input, there should not be any missing data between two fields.
|
Chris@87
|
1282 * When the variables are named (either by a flexible dtype or with `names`,
|
Chris@87
|
1283 there must not be any header in the file (else a ValueError
|
Chris@87
|
1284 exception is raised).
|
Chris@87
|
1285 * Individual values are not stripped of spaces by default.
|
Chris@87
|
1286 When using a custom converter, make sure the function does remove spaces.
|
Chris@87
|
1287
|
Chris@87
|
1288 References
|
Chris@87
|
1289 ----------
|
Chris@87
|
1290 .. [1] Numpy User Guide, section `I/O with Numpy
|
Chris@87
|
1291 <http://docs.scipy.org/doc/numpy/user/basics.io.genfromtxt.html>`_.
|
Chris@87
|
1292
|
Chris@87
|
1293 Examples
|
Chris@87
|
1294 ---------
|
Chris@87
|
1295 >>> from StringIO import StringIO
|
Chris@87
|
1296 >>> import numpy as np
|
Chris@87
|
1297
|
Chris@87
|
1298 Comma delimited file with mixed dtype
|
Chris@87
|
1299
|
Chris@87
|
1300 >>> s = StringIO("1,1.3,abcde")
|
Chris@87
|
1301 >>> data = np.genfromtxt(s, dtype=[('myint','i8'),('myfloat','f8'),
|
Chris@87
|
1302 ... ('mystring','S5')], delimiter=",")
|
Chris@87
|
1303 >>> data
|
Chris@87
|
1304 array((1, 1.3, 'abcde'),
|
Chris@87
|
1305 dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', '|S5')])
|
Chris@87
|
1306
|
Chris@87
|
1307 Using dtype = None
|
Chris@87
|
1308
|
Chris@87
|
1309 >>> s.seek(0) # needed for StringIO example only
|
Chris@87
|
1310 >>> data = np.genfromtxt(s, dtype=None,
|
Chris@87
|
1311 ... names = ['myint','myfloat','mystring'], delimiter=",")
|
Chris@87
|
1312 >>> data
|
Chris@87
|
1313 array((1, 1.3, 'abcde'),
|
Chris@87
|
1314 dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', '|S5')])
|
Chris@87
|
1315
|
Chris@87
|
1316 Specifying dtype and names
|
Chris@87
|
1317
|
Chris@87
|
1318 >>> s.seek(0)
|
Chris@87
|
1319 >>> data = np.genfromtxt(s, dtype="i8,f8,S5",
|
Chris@87
|
1320 ... names=['myint','myfloat','mystring'], delimiter=",")
|
Chris@87
|
1321 >>> data
|
Chris@87
|
1322 array((1, 1.3, 'abcde'),
|
Chris@87
|
1323 dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', '|S5')])
|
Chris@87
|
1324
|
Chris@87
|
1325 An example with fixed-width columns
|
Chris@87
|
1326
|
Chris@87
|
1327 >>> s = StringIO("11.3abcde")
|
Chris@87
|
1328 >>> data = np.genfromtxt(s, dtype=None, names=['intvar','fltvar','strvar'],
|
Chris@87
|
1329 ... delimiter=[1,3,5])
|
Chris@87
|
1330 >>> data
|
Chris@87
|
1331 array((1, 1.3, 'abcde'),
|
Chris@87
|
1332 dtype=[('intvar', '<i8'), ('fltvar', '<f8'), ('strvar', '|S5')])
|
Chris@87
|
1333
|
Chris@87
|
1334 """
|
Chris@87
|
1335 # Py3 data conversions to bytes, for convenience
|
Chris@87
|
1336 if comments is not None:
|
Chris@87
|
1337 comments = asbytes(comments)
|
Chris@87
|
1338 if isinstance(delimiter, unicode):
|
Chris@87
|
1339 delimiter = asbytes(delimiter)
|
Chris@87
|
1340 if isinstance(missing, unicode):
|
Chris@87
|
1341 missing = asbytes(missing)
|
Chris@87
|
1342 if isinstance(missing_values, (unicode, list, tuple)):
|
Chris@87
|
1343 missing_values = asbytes_nested(missing_values)
|
Chris@87
|
1344
|
Chris@87
|
1345 #
|
Chris@87
|
1346 if usemask:
|
Chris@87
|
1347 from numpy.ma import MaskedArray, make_mask_descr
|
Chris@87
|
1348 # Check the input dictionary of converters
|
Chris@87
|
1349 user_converters = converters or {}
|
Chris@87
|
1350 if not isinstance(user_converters, dict):
|
Chris@87
|
1351 raise TypeError(
|
Chris@87
|
1352 "The input argument 'converter' should be a valid dictionary "
|
Chris@87
|
1353 "(got '%s' instead)" % type(user_converters))
|
Chris@87
|
1354
|
Chris@87
|
1355 # Initialize the filehandle, the LineSplitter and the NameValidator
|
Chris@87
|
1356 own_fhd = False
|
Chris@87
|
1357 try:
|
Chris@87
|
1358 if isinstance(fname, basestring):
|
Chris@87
|
1359 if sys.version_info[0] == 2:
|
Chris@87
|
1360 fhd = iter(np.lib._datasource.open(fname, 'rbU'))
|
Chris@87
|
1361 else:
|
Chris@87
|
1362 fhd = iter(np.lib._datasource.open(fname, 'rb'))
|
Chris@87
|
1363 own_fhd = True
|
Chris@87
|
1364 else:
|
Chris@87
|
1365 fhd = iter(fname)
|
Chris@87
|
1366 except TypeError:
|
Chris@87
|
1367 raise TypeError(
|
Chris@87
|
1368 "fname must be a string, filehandle, or generator. "
|
Chris@87
|
1369 "(got %s instead)" % type(fname))
|
Chris@87
|
1370
|
Chris@87
|
1371 split_line = LineSplitter(delimiter=delimiter, comments=comments,
|
Chris@87
|
1372 autostrip=autostrip)._handyman
|
Chris@87
|
1373 validate_names = NameValidator(excludelist=excludelist,
|
Chris@87
|
1374 deletechars=deletechars,
|
Chris@87
|
1375 case_sensitive=case_sensitive,
|
Chris@87
|
1376 replace_space=replace_space)
|
Chris@87
|
1377
|
Chris@87
|
1378 # Get the first valid lines after the first skiprows ones ..
|
Chris@87
|
1379 if skiprows:
|
Chris@87
|
1380 warnings.warn(
|
Chris@87
|
1381 "The use of `skiprows` is deprecated, it will be removed in "
|
Chris@87
|
1382 "numpy 2.0.\nPlease use `skip_header` instead.",
|
Chris@87
|
1383 DeprecationWarning)
|
Chris@87
|
1384 skip_header = skiprows
|
Chris@87
|
1385 # Skip the first `skip_header` rows
|
Chris@87
|
1386 for i in range(skip_header):
|
Chris@87
|
1387 next(fhd)
|
Chris@87
|
1388
|
Chris@87
|
1389 # Keep on until we find the first valid values
|
Chris@87
|
1390 first_values = None
|
Chris@87
|
1391 try:
|
Chris@87
|
1392 while not first_values:
|
Chris@87
|
1393 first_line = next(fhd)
|
Chris@87
|
1394 if names is True:
|
Chris@87
|
1395 if comments in first_line:
|
Chris@87
|
1396 first_line = (
|
Chris@87
|
1397 asbytes('').join(first_line.split(comments)[1:]))
|
Chris@87
|
1398 first_values = split_line(first_line)
|
Chris@87
|
1399 except StopIteration:
|
Chris@87
|
1400 # return an empty array if the datafile is empty
|
Chris@87
|
1401 first_line = asbytes('')
|
Chris@87
|
1402 first_values = []
|
Chris@87
|
1403 warnings.warn('genfromtxt: Empty input file: "%s"' % fname)
|
Chris@87
|
1404
|
Chris@87
|
1405 # Should we take the first values as names ?
|
Chris@87
|
1406 if names is True:
|
Chris@87
|
1407 fval = first_values[0].strip()
|
Chris@87
|
1408 if fval in comments:
|
Chris@87
|
1409 del first_values[0]
|
Chris@87
|
1410
|
Chris@87
|
1411 # Check the columns to use: make sure `usecols` is a list
|
Chris@87
|
1412 if usecols is not None:
|
Chris@87
|
1413 try:
|
Chris@87
|
1414 usecols = [_.strip() for _ in usecols.split(",")]
|
Chris@87
|
1415 except AttributeError:
|
Chris@87
|
1416 try:
|
Chris@87
|
1417 usecols = list(usecols)
|
Chris@87
|
1418 except TypeError:
|
Chris@87
|
1419 usecols = [usecols, ]
|
Chris@87
|
1420 nbcols = len(usecols or first_values)
|
Chris@87
|
1421
|
Chris@87
|
1422 # Check the names and overwrite the dtype.names if needed
|
Chris@87
|
1423 if names is True:
|
Chris@87
|
1424 names = validate_names([_bytes_to_name(_.strip())
|
Chris@87
|
1425 for _ in first_values])
|
Chris@87
|
1426 first_line = asbytes('')
|
Chris@87
|
1427 elif _is_string_like(names):
|
Chris@87
|
1428 names = validate_names([_.strip() for _ in names.split(',')])
|
Chris@87
|
1429 elif names:
|
Chris@87
|
1430 names = validate_names(names)
|
Chris@87
|
1431 # Get the dtype
|
Chris@87
|
1432 if dtype is not None:
|
Chris@87
|
1433 dtype = easy_dtype(dtype, defaultfmt=defaultfmt, names=names)
|
Chris@87
|
1434 # Make sure the names is a list (for 2.5)
|
Chris@87
|
1435 if names is not None:
|
Chris@87
|
1436 names = list(names)
|
Chris@87
|
1437
|
Chris@87
|
1438 if usecols:
|
Chris@87
|
1439 for (i, current) in enumerate(usecols):
|
Chris@87
|
1440 # if usecols is a list of names, convert to a list of indices
|
Chris@87
|
1441 if _is_string_like(current):
|
Chris@87
|
1442 usecols[i] = names.index(current)
|
Chris@87
|
1443 elif current < 0:
|
Chris@87
|
1444 usecols[i] = current + len(first_values)
|
Chris@87
|
1445 # If the dtype is not None, make sure we update it
|
Chris@87
|
1446 if (dtype is not None) and (len(dtype) > nbcols):
|
Chris@87
|
1447 descr = dtype.descr
|
Chris@87
|
1448 dtype = np.dtype([descr[_] for _ in usecols])
|
Chris@87
|
1449 names = list(dtype.names)
|
Chris@87
|
1450 # If `names` is not None, update the names
|
Chris@87
|
1451 elif (names is not None) and (len(names) > nbcols):
|
Chris@87
|
1452 names = [names[_] for _ in usecols]
|
Chris@87
|
1453 elif (names is not None) and (dtype is not None):
|
Chris@87
|
1454 names = list(dtype.names)
|
Chris@87
|
1455
|
Chris@87
|
1456 # Process the missing values ...............................
|
Chris@87
|
1457 # Rename missing_values for convenience
|
Chris@87
|
1458 user_missing_values = missing_values or ()
|
Chris@87
|
1459
|
Chris@87
|
1460 # Define the list of missing_values (one column: one list)
|
Chris@87
|
1461 missing_values = [list([asbytes('')]) for _ in range(nbcols)]
|
Chris@87
|
1462
|
Chris@87
|
1463 # We have a dictionary: process it field by field
|
Chris@87
|
1464 if isinstance(user_missing_values, dict):
|
Chris@87
|
1465 # Loop on the items
|
Chris@87
|
1466 for (key, val) in user_missing_values.items():
|
Chris@87
|
1467 # Is the key a string ?
|
Chris@87
|
1468 if _is_string_like(key):
|
Chris@87
|
1469 try:
|
Chris@87
|
1470 # Transform it into an integer
|
Chris@87
|
1471 key = names.index(key)
|
Chris@87
|
1472 except ValueError:
|
Chris@87
|
1473 # We couldn't find it: the name must have been dropped
|
Chris@87
|
1474 continue
|
Chris@87
|
1475 # Redefine the key as needed if it's a column number
|
Chris@87
|
1476 if usecols:
|
Chris@87
|
1477 try:
|
Chris@87
|
1478 key = usecols.index(key)
|
Chris@87
|
1479 except ValueError:
|
Chris@87
|
1480 pass
|
Chris@87
|
1481 # Transform the value as a list of string
|
Chris@87
|
1482 if isinstance(val, (list, tuple)):
|
Chris@87
|
1483 val = [str(_) for _ in val]
|
Chris@87
|
1484 else:
|
Chris@87
|
1485 val = [str(val), ]
|
Chris@87
|
1486 # Add the value(s) to the current list of missing
|
Chris@87
|
1487 if key is None:
|
Chris@87
|
1488 # None acts as default
|
Chris@87
|
1489 for miss in missing_values:
|
Chris@87
|
1490 miss.extend(val)
|
Chris@87
|
1491 else:
|
Chris@87
|
1492 missing_values[key].extend(val)
|
Chris@87
|
1493 # We have a sequence : each item matches a column
|
Chris@87
|
1494 elif isinstance(user_missing_values, (list, tuple)):
|
Chris@87
|
1495 for (value, entry) in zip(user_missing_values, missing_values):
|
Chris@87
|
1496 value = str(value)
|
Chris@87
|
1497 if value not in entry:
|
Chris@87
|
1498 entry.append(value)
|
Chris@87
|
1499 # We have a string : apply it to all entries
|
Chris@87
|
1500 elif isinstance(user_missing_values, bytes):
|
Chris@87
|
1501 user_value = user_missing_values.split(asbytes(","))
|
Chris@87
|
1502 for entry in missing_values:
|
Chris@87
|
1503 entry.extend(user_value)
|
Chris@87
|
1504 # We have something else: apply it to all entries
|
Chris@87
|
1505 else:
|
Chris@87
|
1506 for entry in missing_values:
|
Chris@87
|
1507 entry.extend([str(user_missing_values)])
|
Chris@87
|
1508
|
Chris@87
|
1509 # Process the deprecated `missing`
|
Chris@87
|
1510 if missing != asbytes(''):
|
Chris@87
|
1511 warnings.warn(
|
Chris@87
|
1512 "The use of `missing` is deprecated, it will be removed in "
|
Chris@87
|
1513 "Numpy 2.0.\nPlease use `missing_values` instead.",
|
Chris@87
|
1514 DeprecationWarning)
|
Chris@87
|
1515 values = [str(_) for _ in missing.split(asbytes(","))]
|
Chris@87
|
1516 for entry in missing_values:
|
Chris@87
|
1517 entry.extend(values)
|
Chris@87
|
1518
|
Chris@87
|
1519 # Process the filling_values ...............................
|
Chris@87
|
1520 # Rename the input for convenience
|
Chris@87
|
1521 user_filling_values = filling_values
|
Chris@87
|
1522 if user_filling_values is None:
|
Chris@87
|
1523 user_filling_values = []
|
Chris@87
|
1524 # Define the default
|
Chris@87
|
1525 filling_values = [None] * nbcols
|
Chris@87
|
1526 # We have a dictionary : update each entry individually
|
Chris@87
|
1527 if isinstance(user_filling_values, dict):
|
Chris@87
|
1528 for (key, val) in user_filling_values.items():
|
Chris@87
|
1529 if _is_string_like(key):
|
Chris@87
|
1530 try:
|
Chris@87
|
1531 # Transform it into an integer
|
Chris@87
|
1532 key = names.index(key)
|
Chris@87
|
1533 except ValueError:
|
Chris@87
|
1534 # We couldn't find it: the name must have been dropped,
|
Chris@87
|
1535 continue
|
Chris@87
|
1536 # Redefine the key if it's a column number and usecols is defined
|
Chris@87
|
1537 if usecols:
|
Chris@87
|
1538 try:
|
Chris@87
|
1539 key = usecols.index(key)
|
Chris@87
|
1540 except ValueError:
|
Chris@87
|
1541 pass
|
Chris@87
|
1542 # Add the value to the list
|
Chris@87
|
1543 filling_values[key] = val
|
Chris@87
|
1544 # We have a sequence : update on a one-to-one basis
|
Chris@87
|
1545 elif isinstance(user_filling_values, (list, tuple)):
|
Chris@87
|
1546 n = len(user_filling_values)
|
Chris@87
|
1547 if (n <= nbcols):
|
Chris@87
|
1548 filling_values[:n] = user_filling_values
|
Chris@87
|
1549 else:
|
Chris@87
|
1550 filling_values = user_filling_values[:nbcols]
|
Chris@87
|
1551 # We have something else : use it for all entries
|
Chris@87
|
1552 else:
|
Chris@87
|
1553 filling_values = [user_filling_values] * nbcols
|
Chris@87
|
1554
|
Chris@87
|
1555 # Initialize the converters ................................
|
Chris@87
|
1556 if dtype is None:
|
Chris@87
|
1557 # Note: we can't use a [...]*nbcols, as we would have 3 times the same
|
Chris@87
|
1558 # ... converter, instead of 3 different converters.
|
Chris@87
|
1559 converters = [StringConverter(None, missing_values=miss, default=fill)
|
Chris@87
|
1560 for (miss, fill) in zip(missing_values, filling_values)]
|
Chris@87
|
1561 else:
|
Chris@87
|
1562 dtype_flat = flatten_dtype(dtype, flatten_base=True)
|
Chris@87
|
1563 # Initialize the converters
|
Chris@87
|
1564 if len(dtype_flat) > 1:
|
Chris@87
|
1565 # Flexible type : get a converter from each dtype
|
Chris@87
|
1566 zipit = zip(dtype_flat, missing_values, filling_values)
|
Chris@87
|
1567 converters = [StringConverter(dt, locked=True,
|
Chris@87
|
1568 missing_values=miss, default=fill)
|
Chris@87
|
1569 for (dt, miss, fill) in zipit]
|
Chris@87
|
1570 else:
|
Chris@87
|
1571 # Set to a default converter (but w/ different missing values)
|
Chris@87
|
1572 zipit = zip(missing_values, filling_values)
|
Chris@87
|
1573 converters = [StringConverter(dtype, locked=True,
|
Chris@87
|
1574 missing_values=miss, default=fill)
|
Chris@87
|
1575 for (miss, fill) in zipit]
|
Chris@87
|
1576 # Update the converters to use the user-defined ones
|
Chris@87
|
1577 uc_update = []
|
Chris@87
|
1578 for (j, conv) in user_converters.items():
|
Chris@87
|
1579 # If the converter is specified by column names, use the index instead
|
Chris@87
|
1580 if _is_string_like(j):
|
Chris@87
|
1581 try:
|
Chris@87
|
1582 j = names.index(j)
|
Chris@87
|
1583 i = j
|
Chris@87
|
1584 except ValueError:
|
Chris@87
|
1585 continue
|
Chris@87
|
1586 elif usecols:
|
Chris@87
|
1587 try:
|
Chris@87
|
1588 i = usecols.index(j)
|
Chris@87
|
1589 except ValueError:
|
Chris@87
|
1590 # Unused converter specified
|
Chris@87
|
1591 continue
|
Chris@87
|
1592 else:
|
Chris@87
|
1593 i = j
|
Chris@87
|
1594 # Find the value to test - first_line is not filtered by usecols:
|
Chris@87
|
1595 if len(first_line):
|
Chris@87
|
1596 testing_value = first_values[j]
|
Chris@87
|
1597 else:
|
Chris@87
|
1598 testing_value = None
|
Chris@87
|
1599 converters[i].update(conv, locked=True,
|
Chris@87
|
1600 testing_value=testing_value,
|
Chris@87
|
1601 default=filling_values[i],
|
Chris@87
|
1602 missing_values=missing_values[i],)
|
Chris@87
|
1603 uc_update.append((i, conv))
|
Chris@87
|
1604 # Make sure we have the corrected keys in user_converters...
|
Chris@87
|
1605 user_converters.update(uc_update)
|
Chris@87
|
1606
|
Chris@87
|
1607 # Fixme: possible error as following variable never used.
|
Chris@87
|
1608 #miss_chars = [_.missing_values for _ in converters]
|
Chris@87
|
1609
|
Chris@87
|
1610 # Initialize the output lists ...
|
Chris@87
|
1611 # ... rows
|
Chris@87
|
1612 rows = []
|
Chris@87
|
1613 append_to_rows = rows.append
|
Chris@87
|
1614 # ... masks
|
Chris@87
|
1615 if usemask:
|
Chris@87
|
1616 masks = []
|
Chris@87
|
1617 append_to_masks = masks.append
|
Chris@87
|
1618 # ... invalid
|
Chris@87
|
1619 invalid = []
|
Chris@87
|
1620 append_to_invalid = invalid.append
|
Chris@87
|
1621
|
Chris@87
|
1622 # Parse each line
|
Chris@87
|
1623 for (i, line) in enumerate(itertools.chain([first_line, ], fhd)):
|
Chris@87
|
1624 values = split_line(line)
|
Chris@87
|
1625 nbvalues = len(values)
|
Chris@87
|
1626 # Skip an empty line
|
Chris@87
|
1627 if nbvalues == 0:
|
Chris@87
|
1628 continue
|
Chris@87
|
1629 # Select only the columns we need
|
Chris@87
|
1630 if usecols:
|
Chris@87
|
1631 try:
|
Chris@87
|
1632 values = [values[_] for _ in usecols]
|
Chris@87
|
1633 except IndexError:
|
Chris@87
|
1634 append_to_invalid((i + skip_header + 1, nbvalues))
|
Chris@87
|
1635 continue
|
Chris@87
|
1636 elif nbvalues != nbcols:
|
Chris@87
|
1637 append_to_invalid((i + skip_header + 1, nbvalues))
|
Chris@87
|
1638 continue
|
Chris@87
|
1639 # Store the values
|
Chris@87
|
1640 append_to_rows(tuple(values))
|
Chris@87
|
1641 if usemask:
|
Chris@87
|
1642 append_to_masks(tuple([v.strip() in m
|
Chris@87
|
1643 for (v, m) in zip(values, missing_values)]))
|
Chris@87
|
1644
|
Chris@87
|
1645 if own_fhd:
|
Chris@87
|
1646 fhd.close()
|
Chris@87
|
1647
|
Chris@87
|
1648 # Upgrade the converters (if needed)
|
Chris@87
|
1649 if dtype is None:
|
Chris@87
|
1650 for (i, converter) in enumerate(converters):
|
Chris@87
|
1651 current_column = [itemgetter(i)(_m) for _m in rows]
|
Chris@87
|
1652 try:
|
Chris@87
|
1653 converter.iterupgrade(current_column)
|
Chris@87
|
1654 except ConverterLockError:
|
Chris@87
|
1655 errmsg = "Converter #%i is locked and cannot be upgraded: " % i
|
Chris@87
|
1656 current_column = map(itemgetter(i), rows)
|
Chris@87
|
1657 for (j, value) in enumerate(current_column):
|
Chris@87
|
1658 try:
|
Chris@87
|
1659 converter.upgrade(value)
|
Chris@87
|
1660 except (ConverterError, ValueError):
|
Chris@87
|
1661 errmsg += "(occurred line #%i for value '%s')"
|
Chris@87
|
1662 errmsg %= (j + 1 + skip_header, value)
|
Chris@87
|
1663 raise ConverterError(errmsg)
|
Chris@87
|
1664
|
Chris@87
|
1665 # Check that we don't have invalid values
|
Chris@87
|
1666 nbinvalid = len(invalid)
|
Chris@87
|
1667 if nbinvalid > 0:
|
Chris@87
|
1668 nbrows = len(rows) + nbinvalid - skip_footer
|
Chris@87
|
1669 # Construct the error message
|
Chris@87
|
1670 template = " Line #%%i (got %%i columns instead of %i)" % nbcols
|
Chris@87
|
1671 if skip_footer > 0:
|
Chris@87
|
1672 nbinvalid_skipped = len([_ for _ in invalid
|
Chris@87
|
1673 if _[0] > nbrows + skip_header])
|
Chris@87
|
1674 invalid = invalid[:nbinvalid - nbinvalid_skipped]
|
Chris@87
|
1675 skip_footer -= nbinvalid_skipped
|
Chris@87
|
1676 #
|
Chris@87
|
1677 # nbrows -= skip_footer
|
Chris@87
|
1678 # errmsg = [template % (i, nb)
|
Chris@87
|
1679 # for (i, nb) in invalid if i < nbrows]
|
Chris@87
|
1680 # else:
|
Chris@87
|
1681 errmsg = [template % (i, nb)
|
Chris@87
|
1682 for (i, nb) in invalid]
|
Chris@87
|
1683 if len(errmsg):
|
Chris@87
|
1684 errmsg.insert(0, "Some errors were detected !")
|
Chris@87
|
1685 errmsg = "\n".join(errmsg)
|
Chris@87
|
1686 # Raise an exception ?
|
Chris@87
|
1687 if invalid_raise:
|
Chris@87
|
1688 raise ValueError(errmsg)
|
Chris@87
|
1689 # Issue a warning ?
|
Chris@87
|
1690 else:
|
Chris@87
|
1691 warnings.warn(errmsg, ConversionWarning)
|
Chris@87
|
1692
|
Chris@87
|
1693 # Strip the last skip_footer data
|
Chris@87
|
1694 if skip_footer > 0:
|
Chris@87
|
1695 rows = rows[:-skip_footer]
|
Chris@87
|
1696 if usemask:
|
Chris@87
|
1697 masks = masks[:-skip_footer]
|
Chris@87
|
1698
|
Chris@87
|
1699 # Convert each value according to the converter:
|
Chris@87
|
1700 # We want to modify the list in place to avoid creating a new one...
|
Chris@87
|
1701 if loose:
|
Chris@87
|
1702 rows = list(
|
Chris@87
|
1703 zip(*[[conv._loose_call(_r) for _r in map(itemgetter(i), rows)]
|
Chris@87
|
1704 for (i, conv) in enumerate(converters)]))
|
Chris@87
|
1705 else:
|
Chris@87
|
1706 rows = list(
|
Chris@87
|
1707 zip(*[[conv._strict_call(_r) for _r in map(itemgetter(i), rows)]
|
Chris@87
|
1708 for (i, conv) in enumerate(converters)]))
|
Chris@87
|
1709
|
Chris@87
|
1710 # Reset the dtype
|
Chris@87
|
1711 data = rows
|
Chris@87
|
1712 if dtype is None:
|
Chris@87
|
1713 # Get the dtypes from the types of the converters
|
Chris@87
|
1714 column_types = [conv.type for conv in converters]
|
Chris@87
|
1715 # Find the columns with strings...
|
Chris@87
|
1716 strcolidx = [i for (i, v) in enumerate(column_types)
|
Chris@87
|
1717 if v in (type('S'), np.string_)]
|
Chris@87
|
1718 # ... and take the largest number of chars.
|
Chris@87
|
1719 for i in strcolidx:
|
Chris@87
|
1720 column_types[i] = "|S%i" % max(len(row[i]) for row in data)
|
Chris@87
|
1721 #
|
Chris@87
|
1722 if names is None:
|
Chris@87
|
1723 # If the dtype is uniform, don't define names, else use ''
|
Chris@87
|
1724 base = set([c.type for c in converters if c._checked])
|
Chris@87
|
1725 if len(base) == 1:
|
Chris@87
|
1726 (ddtype, mdtype) = (list(base)[0], np.bool)
|
Chris@87
|
1727 else:
|
Chris@87
|
1728 ddtype = [(defaultfmt % i, dt)
|
Chris@87
|
1729 for (i, dt) in enumerate(column_types)]
|
Chris@87
|
1730 if usemask:
|
Chris@87
|
1731 mdtype = [(defaultfmt % i, np.bool)
|
Chris@87
|
1732 for (i, dt) in enumerate(column_types)]
|
Chris@87
|
1733 else:
|
Chris@87
|
1734 ddtype = list(zip(names, column_types))
|
Chris@87
|
1735 mdtype = list(zip(names, [np.bool] * len(column_types)))
|
Chris@87
|
1736 output = np.array(data, dtype=ddtype)
|
Chris@87
|
1737 if usemask:
|
Chris@87
|
1738 outputmask = np.array(masks, dtype=mdtype)
|
Chris@87
|
1739 else:
|
Chris@87
|
1740 # Overwrite the initial dtype names if needed
|
Chris@87
|
1741 if names and dtype.names:
|
Chris@87
|
1742 dtype.names = names
|
Chris@87
|
1743 # Case 1. We have a structured type
|
Chris@87
|
1744 if len(dtype_flat) > 1:
|
Chris@87
|
1745 # Nested dtype, eg [('a', int), ('b', [('b0', int), ('b1', 'f4')])]
|
Chris@87
|
1746 # First, create the array using a flattened dtype:
|
Chris@87
|
1747 # [('a', int), ('b1', int), ('b2', float)]
|
Chris@87
|
1748 # Then, view the array using the specified dtype.
|
Chris@87
|
1749 if 'O' in (_.char for _ in dtype_flat):
|
Chris@87
|
1750 if has_nested_fields(dtype):
|
Chris@87
|
1751 raise NotImplementedError(
|
Chris@87
|
1752 "Nested fields involving objects are not supported...")
|
Chris@87
|
1753 else:
|
Chris@87
|
1754 output = np.array(data, dtype=dtype)
|
Chris@87
|
1755 else:
|
Chris@87
|
1756 rows = np.array(data, dtype=[('', _) for _ in dtype_flat])
|
Chris@87
|
1757 output = rows.view(dtype)
|
Chris@87
|
1758 # Now, process the rowmasks the same way
|
Chris@87
|
1759 if usemask:
|
Chris@87
|
1760 rowmasks = np.array(
|
Chris@87
|
1761 masks, dtype=np.dtype([('', np.bool) for t in dtype_flat]))
|
Chris@87
|
1762 # Construct the new dtype
|
Chris@87
|
1763 mdtype = make_mask_descr(dtype)
|
Chris@87
|
1764 outputmask = rowmasks.view(mdtype)
|
Chris@87
|
1765 # Case #2. We have a basic dtype
|
Chris@87
|
1766 else:
|
Chris@87
|
1767 # We used some user-defined converters
|
Chris@87
|
1768 if user_converters:
|
Chris@87
|
1769 ishomogeneous = True
|
Chris@87
|
1770 descr = []
|
Chris@87
|
1771 for i, ttype in enumerate([conv.type for conv in converters]):
|
Chris@87
|
1772 # Keep the dtype of the current converter
|
Chris@87
|
1773 if i in user_converters:
|
Chris@87
|
1774 ishomogeneous &= (ttype == dtype.type)
|
Chris@87
|
1775 if ttype == np.string_:
|
Chris@87
|
1776 ttype = "|S%i" % max(len(row[i]) for row in data)
|
Chris@87
|
1777 descr.append(('', ttype))
|
Chris@87
|
1778 else:
|
Chris@87
|
1779 descr.append(('', dtype))
|
Chris@87
|
1780 # So we changed the dtype ?
|
Chris@87
|
1781 if not ishomogeneous:
|
Chris@87
|
1782 # We have more than one field
|
Chris@87
|
1783 if len(descr) > 1:
|
Chris@87
|
1784 dtype = np.dtype(descr)
|
Chris@87
|
1785 # We have only one field: drop the name if not needed.
|
Chris@87
|
1786 else:
|
Chris@87
|
1787 dtype = np.dtype(ttype)
|
Chris@87
|
1788 #
|
Chris@87
|
1789 output = np.array(data, dtype)
|
Chris@87
|
1790 if usemask:
|
Chris@87
|
1791 if dtype.names:
|
Chris@87
|
1792 mdtype = [(_, np.bool) for _ in dtype.names]
|
Chris@87
|
1793 else:
|
Chris@87
|
1794 mdtype = np.bool
|
Chris@87
|
1795 outputmask = np.array(masks, dtype=mdtype)
|
Chris@87
|
1796 # Try to take care of the missing data we missed
|
Chris@87
|
1797 names = output.dtype.names
|
Chris@87
|
1798 if usemask and names:
|
Chris@87
|
1799 for (name, conv) in zip(names or (), converters):
|
Chris@87
|
1800 missing_values = [conv(_) for _ in conv.missing_values
|
Chris@87
|
1801 if _ != asbytes('')]
|
Chris@87
|
1802 for mval in missing_values:
|
Chris@87
|
1803 outputmask[name] |= (output[name] == mval)
|
Chris@87
|
1804 # Construct the final array
|
Chris@87
|
1805 if usemask:
|
Chris@87
|
1806 output = output.view(MaskedArray)
|
Chris@87
|
1807 output._mask = outputmask
|
Chris@87
|
1808 if unpack:
|
Chris@87
|
1809 return output.squeeze().T
|
Chris@87
|
1810 return output.squeeze()
|
Chris@87
|
1811
|
Chris@87
|
1812
|
Chris@87
|
1813 def ndfromtxt(fname, **kwargs):
|
Chris@87
|
1814 """
|
Chris@87
|
1815 Load ASCII data stored in a file and return it as a single array.
|
Chris@87
|
1816
|
Chris@87
|
1817 Parameters
|
Chris@87
|
1818 ----------
|
Chris@87
|
1819 fname, kwargs : For a description of input parameters, see `genfromtxt`.
|
Chris@87
|
1820
|
Chris@87
|
1821 See Also
|
Chris@87
|
1822 --------
|
Chris@87
|
1823 numpy.genfromtxt : generic function.
|
Chris@87
|
1824
|
Chris@87
|
1825 """
|
Chris@87
|
1826 kwargs['usemask'] = False
|
Chris@87
|
1827 return genfromtxt(fname, **kwargs)
|
Chris@87
|
1828
|
Chris@87
|
1829
|
Chris@87
|
1830 def mafromtxt(fname, **kwargs):
|
Chris@87
|
1831 """
|
Chris@87
|
1832 Load ASCII data stored in a text file and return a masked array.
|
Chris@87
|
1833
|
Chris@87
|
1834 Parameters
|
Chris@87
|
1835 ----------
|
Chris@87
|
1836 fname, kwargs : For a description of input parameters, see `genfromtxt`.
|
Chris@87
|
1837
|
Chris@87
|
1838 See Also
|
Chris@87
|
1839 --------
|
Chris@87
|
1840 numpy.genfromtxt : generic function to load ASCII data.
|
Chris@87
|
1841
|
Chris@87
|
1842 """
|
Chris@87
|
1843 kwargs['usemask'] = True
|
Chris@87
|
1844 return genfromtxt(fname, **kwargs)
|
Chris@87
|
1845
|
Chris@87
|
1846
|
Chris@87
|
1847 def recfromtxt(fname, **kwargs):
|
Chris@87
|
1848 """
|
Chris@87
|
1849 Load ASCII data from a file and return it in a record array.
|
Chris@87
|
1850
|
Chris@87
|
1851 If ``usemask=False`` a standard `recarray` is returned,
|
Chris@87
|
1852 if ``usemask=True`` a MaskedRecords array is returned.
|
Chris@87
|
1853
|
Chris@87
|
1854 Parameters
|
Chris@87
|
1855 ----------
|
Chris@87
|
1856 fname, kwargs : For a description of input parameters, see `genfromtxt`.
|
Chris@87
|
1857
|
Chris@87
|
1858 See Also
|
Chris@87
|
1859 --------
|
Chris@87
|
1860 numpy.genfromtxt : generic function
|
Chris@87
|
1861
|
Chris@87
|
1862 Notes
|
Chris@87
|
1863 -----
|
Chris@87
|
1864 By default, `dtype` is None, which means that the data-type of the output
|
Chris@87
|
1865 array will be determined from the data.
|
Chris@87
|
1866
|
Chris@87
|
1867 """
|
Chris@87
|
1868 kwargs.setdefault("dtype", None)
|
Chris@87
|
1869 usemask = kwargs.get('usemask', False)
|
Chris@87
|
1870 output = genfromtxt(fname, **kwargs)
|
Chris@87
|
1871 if usemask:
|
Chris@87
|
1872 from numpy.ma.mrecords import MaskedRecords
|
Chris@87
|
1873 output = output.view(MaskedRecords)
|
Chris@87
|
1874 else:
|
Chris@87
|
1875 output = output.view(np.recarray)
|
Chris@87
|
1876 return output
|
Chris@87
|
1877
|
Chris@87
|
1878
|
Chris@87
|
1879 def recfromcsv(fname, **kwargs):
|
Chris@87
|
1880 """
|
Chris@87
|
1881 Load ASCII data stored in a comma-separated file.
|
Chris@87
|
1882
|
Chris@87
|
1883 The returned array is a record array (if ``usemask=False``, see
|
Chris@87
|
1884 `recarray`) or a masked record array (if ``usemask=True``,
|
Chris@87
|
1885 see `ma.mrecords.MaskedRecords`).
|
Chris@87
|
1886
|
Chris@87
|
1887 Parameters
|
Chris@87
|
1888 ----------
|
Chris@87
|
1889 fname, kwargs : For a description of input parameters, see `genfromtxt`.
|
Chris@87
|
1890
|
Chris@87
|
1891 See Also
|
Chris@87
|
1892 --------
|
Chris@87
|
1893 numpy.genfromtxt : generic function to load ASCII data.
|
Chris@87
|
1894
|
Chris@87
|
1895 Notes
|
Chris@87
|
1896 -----
|
Chris@87
|
1897 By default, `dtype` is None, which means that the data-type of the output
|
Chris@87
|
1898 array will be determined from the data.
|
Chris@87
|
1899
|
Chris@87
|
1900 """
|
Chris@87
|
1901 # Set default kwargs for genfromtxt as relevant to csv import.
|
Chris@87
|
1902 kwargs.setdefault("case_sensitive", "lower")
|
Chris@87
|
1903 kwargs.setdefault("names", True)
|
Chris@87
|
1904 kwargs.setdefault("delimiter", ",")
|
Chris@87
|
1905 kwargs.setdefault("dtype", None)
|
Chris@87
|
1906 output = genfromtxt(fname, **kwargs)
|
Chris@87
|
1907
|
Chris@87
|
1908 usemask = kwargs.get("usemask", False)
|
Chris@87
|
1909 if usemask:
|
Chris@87
|
1910 from numpy.ma.mrecords import MaskedRecords
|
Chris@87
|
1911 output = output.view(MaskedRecords)
|
Chris@87
|
1912 else:
|
Chris@87
|
1913 output = output.view(np.recarray)
|
Chris@87
|
1914 return output
|