Mercurial > hg > vamp-build-and-test
comparison DEPENDENCIES/mingw32/Python27/Lib/site-packages/numpy/lib/npyio.py @ 87:2a2c65a20a8b
Add Python libs and headers
author | Chris Cannam |
---|---|
date | Wed, 25 Feb 2015 14:05:22 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
86:413a9d26189e | 87:2a2c65a20a8b |
---|---|
1 from __future__ import division, absolute_import, print_function | |
2 | |
3 import sys | |
4 import os | |
5 import re | |
6 import itertools | |
7 import warnings | |
8 import weakref | |
9 from operator import itemgetter | |
10 | |
11 import numpy as np | |
12 from . import format | |
13 from ._datasource import DataSource | |
14 from ._compiled_base import packbits, unpackbits | |
15 from ._iotools import ( | |
16 LineSplitter, NameValidator, StringConverter, ConverterError, | |
17 ConverterLockError, ConversionWarning, _is_string_like, has_nested_fields, | |
18 flatten_dtype, easy_dtype, _bytes_to_name | |
19 ) | |
20 | |
21 from numpy.compat import ( | |
22 asbytes, asstr, asbytes_nested, bytes, basestring, unicode | |
23 ) | |
24 | |
25 if sys.version_info[0] >= 3: | |
26 import pickle | |
27 else: | |
28 import cPickle as pickle | |
29 from future_builtins import map | |
30 | |
31 loads = pickle.loads | |
32 | |
33 __all__ = [ | |
34 'savetxt', 'loadtxt', 'genfromtxt', 'ndfromtxt', 'mafromtxt', | |
35 'recfromtxt', 'recfromcsv', 'load', 'loads', 'save', 'savez', | |
36 'savez_compressed', 'packbits', 'unpackbits', 'fromregex', 'DataSource' | |
37 ] | |
38 | |
39 | |
40 def seek_gzip_factory(f): | |
41 """Use this factory to produce the class so that we can do a lazy | |
42 import on gzip. | |
43 | |
44 """ | |
45 import gzip | |
46 | |
47 class GzipFile(gzip.GzipFile): | |
48 | |
49 def seek(self, offset, whence=0): | |
50 # figure out new position (we can only seek forwards) | |
51 if whence == 1: | |
52 offset = self.offset + offset | |
53 | |
54 if whence not in [0, 1]: | |
55 raise IOError("Illegal argument") | |
56 | |
57 if offset < self.offset: | |
58 # for negative seek, rewind and do positive seek | |
59 self.rewind() | |
60 count = offset - self.offset | |
61 for i in range(count // 1024): | |
62 self.read(1024) | |
63 self.read(count % 1024) | |
64 | |
65 def tell(self): | |
66 return self.offset | |
67 | |
68 if isinstance(f, str): | |
69 f = GzipFile(f) | |
70 elif isinstance(f, gzip.GzipFile): | |
71 # cast to our GzipFile if its already a gzip.GzipFile | |
72 | |
73 try: | |
74 name = f.name | |
75 except AttributeError: | |
76 # Backward compatibility for <= 2.5 | |
77 name = f.filename | |
78 mode = f.mode | |
79 | |
80 f = GzipFile(fileobj=f.fileobj, filename=name) | |
81 f.mode = mode | |
82 | |
83 return f | |
84 | |
85 | |
86 class BagObj(object): | |
87 """ | |
88 BagObj(obj) | |
89 | |
90 Convert attribute look-ups to getitems on the object passed in. | |
91 | |
92 Parameters | |
93 ---------- | |
94 obj : class instance | |
95 Object on which attribute look-up is performed. | |
96 | |
97 Examples | |
98 -------- | |
99 >>> from numpy.lib.npyio import BagObj as BO | |
100 >>> class BagDemo(object): | |
101 ... def __getitem__(self, key): # An instance of BagObj(BagDemo) | |
102 ... # will call this method when any | |
103 ... # attribute look-up is required | |
104 ... result = "Doesn't matter what you want, " | |
105 ... return result + "you're gonna get this" | |
106 ... | |
107 >>> demo_obj = BagDemo() | |
108 >>> bagobj = BO(demo_obj) | |
109 >>> bagobj.hello_there | |
110 "Doesn't matter what you want, you're gonna get this" | |
111 >>> bagobj.I_can_be_anything | |
112 "Doesn't matter what you want, you're gonna get this" | |
113 | |
114 """ | |
115 | |
116 def __init__(self, obj): | |
117 # Use weakref to make NpzFile objects collectable by refcount | |
118 self._obj = weakref.proxy(obj) | |
119 | |
120 def __getattribute__(self, key): | |
121 try: | |
122 return object.__getattribute__(self, '_obj')[key] | |
123 except KeyError: | |
124 raise AttributeError(key) | |
125 | |
126 | |
127 def zipfile_factory(*args, **kwargs): | |
128 import zipfile | |
129 kwargs['allowZip64'] = True | |
130 return zipfile.ZipFile(*args, **kwargs) | |
131 | |
132 | |
133 class NpzFile(object): | |
134 """ | |
135 NpzFile(fid) | |
136 | |
137 A dictionary-like object with lazy-loading of files in the zipped | |
138 archive provided on construction. | |
139 | |
140 `NpzFile` is used to load files in the NumPy ``.npz`` data archive | |
141 format. It assumes that files in the archive have a ``.npy`` extension, | |
142 other files are ignored. | |
143 | |
144 The arrays and file strings are lazily loaded on either | |
145 getitem access using ``obj['key']`` or attribute lookup using | |
146 ``obj.f.key``. A list of all files (without ``.npy`` extensions) can | |
147 be obtained with ``obj.files`` and the ZipFile object itself using | |
148 ``obj.zip``. | |
149 | |
150 Attributes | |
151 ---------- | |
152 files : list of str | |
153 List of all files in the archive with a ``.npy`` extension. | |
154 zip : ZipFile instance | |
155 The ZipFile object initialized with the zipped archive. | |
156 f : BagObj instance | |
157 An object on which attribute can be performed as an alternative | |
158 to getitem access on the `NpzFile` instance itself. | |
159 | |
160 Parameters | |
161 ---------- | |
162 fid : file or str | |
163 The zipped archive to open. This is either a file-like object | |
164 or a string containing the path to the archive. | |
165 own_fid : bool, optional | |
166 Whether NpzFile should close the file handle. | |
167 Requires that `fid` is a file-like object. | |
168 | |
169 Examples | |
170 -------- | |
171 >>> from tempfile import TemporaryFile | |
172 >>> outfile = TemporaryFile() | |
173 >>> x = np.arange(10) | |
174 >>> y = np.sin(x) | |
175 >>> np.savez(outfile, x=x, y=y) | |
176 >>> outfile.seek(0) | |
177 | |
178 >>> npz = np.load(outfile) | |
179 >>> isinstance(npz, np.lib.io.NpzFile) | |
180 True | |
181 >>> npz.files | |
182 ['y', 'x'] | |
183 >>> npz['x'] # getitem access | |
184 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) | |
185 >>> npz.f.x # attribute lookup | |
186 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) | |
187 | |
188 """ | |
189 | |
190 def __init__(self, fid, own_fid=False): | |
191 # Import is postponed to here since zipfile depends on gzip, an | |
192 # optional component of the so-called standard library. | |
193 _zip = zipfile_factory(fid) | |
194 self._files = _zip.namelist() | |
195 self.files = [] | |
196 for x in self._files: | |
197 if x.endswith('.npy'): | |
198 self.files.append(x[:-4]) | |
199 else: | |
200 self.files.append(x) | |
201 self.zip = _zip | |
202 self.f = BagObj(self) | |
203 if own_fid: | |
204 self.fid = fid | |
205 else: | |
206 self.fid = None | |
207 | |
208 def __enter__(self): | |
209 return self | |
210 | |
211 def __exit__(self, exc_type, exc_value, traceback): | |
212 self.close() | |
213 | |
214 def close(self): | |
215 """ | |
216 Close the file. | |
217 | |
218 """ | |
219 if self.zip is not None: | |
220 self.zip.close() | |
221 self.zip = None | |
222 if self.fid is not None: | |
223 self.fid.close() | |
224 self.fid = None | |
225 self.f = None # break reference cycle | |
226 | |
227 def __del__(self): | |
228 self.close() | |
229 | |
230 def __getitem__(self, key): | |
231 # FIXME: This seems like it will copy strings around | |
232 # more than is strictly necessary. The zipfile | |
233 # will read the string and then | |
234 # the format.read_array will copy the string | |
235 # to another place in memory. | |
236 # It would be better if the zipfile could read | |
237 # (or at least uncompress) the data | |
238 # directly into the array memory. | |
239 member = 0 | |
240 if key in self._files: | |
241 member = 1 | |
242 elif key in self.files: | |
243 member = 1 | |
244 key += '.npy' | |
245 if member: | |
246 bytes = self.zip.open(key) | |
247 magic = bytes.read(len(format.MAGIC_PREFIX)) | |
248 bytes.close() | |
249 if magic == format.MAGIC_PREFIX: | |
250 bytes = self.zip.open(key) | |
251 return format.read_array(bytes) | |
252 else: | |
253 return self.zip.read(key) | |
254 else: | |
255 raise KeyError("%s is not a file in the archive" % key) | |
256 | |
257 def __iter__(self): | |
258 return iter(self.files) | |
259 | |
260 def items(self): | |
261 """ | |
262 Return a list of tuples, with each tuple (filename, array in file). | |
263 | |
264 """ | |
265 return [(f, self[f]) for f in self.files] | |
266 | |
267 def iteritems(self): | |
268 """Generator that returns tuples (filename, array in file).""" | |
269 for f in self.files: | |
270 yield (f, self[f]) | |
271 | |
272 def keys(self): | |
273 """Return files in the archive with a ``.npy`` extension.""" | |
274 return self.files | |
275 | |
276 def iterkeys(self): | |
277 """Return an iterator over the files in the archive.""" | |
278 return self.__iter__() | |
279 | |
280 def __contains__(self, key): | |
281 return self.files.__contains__(key) | |
282 | |
283 | |
284 def load(file, mmap_mode=None): | |
285 """ | |
286 Load arrays or pickled objects from ``.npy``, ``.npz`` or pickled files. | |
287 | |
288 Parameters | |
289 ---------- | |
290 file : file-like object or string | |
291 The file to read. File-like objects must support the | |
292 ``seek()`` and ``read()`` methods. Pickled files require that the | |
293 file-like object support the ``readline()`` method as well. | |
294 mmap_mode : {None, 'r+', 'r', 'w+', 'c'}, optional | |
295 If not None, then memory-map the file, using the given mode (see | |
296 `numpy.memmap` for a detailed description of the modes). A | |
297 memory-mapped array is kept on disk. However, it can be accessed | |
298 and sliced like any ndarray. Memory mapping is especially useful | |
299 for accessing small fragments of large files without reading the | |
300 entire file into memory. | |
301 | |
302 Returns | |
303 ------- | |
304 result : array, tuple, dict, etc. | |
305 Data stored in the file. For ``.npz`` files, the returned instance | |
306 of NpzFile class must be closed to avoid leaking file descriptors. | |
307 | |
308 Raises | |
309 ------ | |
310 IOError | |
311 If the input file does not exist or cannot be read. | |
312 | |
313 See Also | |
314 -------- | |
315 save, savez, savez_compressed, loadtxt | |
316 memmap : Create a memory-map to an array stored in a file on disk. | |
317 | |
318 Notes | |
319 ----- | |
320 - If the file contains pickle data, then whatever object is stored | |
321 in the pickle is returned. | |
322 - If the file is a ``.npy`` file, then a single array is returned. | |
323 - If the file is a ``.npz`` file, then a dictionary-like object is | |
324 returned, containing ``{filename: array}`` key-value pairs, one for | |
325 each file in the archive. | |
326 - If the file is a ``.npz`` file, the returned value supports the | |
327 context manager protocol in a similar fashion to the open function:: | |
328 | |
329 with load('foo.npz') as data: | |
330 a = data['a'] | |
331 | |
332 The underlying file descriptor is closed when exiting the 'with' | |
333 block. | |
334 | |
335 Examples | |
336 -------- | |
337 Store data to disk, and load it again: | |
338 | |
339 >>> np.save('/tmp/123', np.array([[1, 2, 3], [4, 5, 6]])) | |
340 >>> np.load('/tmp/123.npy') | |
341 array([[1, 2, 3], | |
342 [4, 5, 6]]) | |
343 | |
344 Store compressed data to disk, and load it again: | |
345 | |
346 >>> a=np.array([[1, 2, 3], [4, 5, 6]]) | |
347 >>> b=np.array([1, 2]) | |
348 >>> np.savez('/tmp/123.npz', a=a, b=b) | |
349 >>> data = np.load('/tmp/123.npz') | |
350 >>> data['a'] | |
351 array([[1, 2, 3], | |
352 [4, 5, 6]]) | |
353 >>> data['b'] | |
354 array([1, 2]) | |
355 >>> data.close() | |
356 | |
357 Mem-map the stored array, and then access the second row | |
358 directly from disk: | |
359 | |
360 >>> X = np.load('/tmp/123.npy', mmap_mode='r') | |
361 >>> X[1, :] | |
362 memmap([4, 5, 6]) | |
363 | |
364 """ | |
365 import gzip | |
366 | |
367 own_fid = False | |
368 if isinstance(file, basestring): | |
369 fid = open(file, "rb") | |
370 own_fid = True | |
371 elif isinstance(file, gzip.GzipFile): | |
372 fid = seek_gzip_factory(file) | |
373 else: | |
374 fid = file | |
375 | |
376 try: | |
377 # Code to distinguish from NumPy binary files and pickles. | |
378 _ZIP_PREFIX = asbytes('PK\x03\x04') | |
379 N = len(format.MAGIC_PREFIX) | |
380 magic = fid.read(N) | |
381 fid.seek(-N, 1) # back-up | |
382 if magic.startswith(_ZIP_PREFIX): | |
383 # zip-file (assume .npz) | |
384 # Transfer file ownership to NpzFile | |
385 tmp = own_fid | |
386 own_fid = False | |
387 return NpzFile(fid, own_fid=tmp) | |
388 elif magic == format.MAGIC_PREFIX: | |
389 # .npy file | |
390 if mmap_mode: | |
391 return format.open_memmap(file, mode=mmap_mode) | |
392 else: | |
393 return format.read_array(fid) | |
394 else: | |
395 # Try a pickle | |
396 try: | |
397 return pickle.load(fid) | |
398 except: | |
399 raise IOError( | |
400 "Failed to interpret file %s as a pickle" % repr(file)) | |
401 finally: | |
402 if own_fid: | |
403 fid.close() | |
404 | |
405 | |
406 def save(file, arr): | |
407 """ | |
408 Save an array to a binary file in NumPy ``.npy`` format. | |
409 | |
410 Parameters | |
411 ---------- | |
412 file : file or str | |
413 File or filename to which the data is saved. If file is a file-object, | |
414 then the filename is unchanged. If file is a string, a ``.npy`` | |
415 extension will be appended to the file name if it does not already | |
416 have one. | |
417 arr : array_like | |
418 Array data to be saved. | |
419 | |
420 See Also | |
421 -------- | |
422 savez : Save several arrays into a ``.npz`` archive | |
423 savetxt, load | |
424 | |
425 Notes | |
426 ----- | |
427 For a description of the ``.npy`` format, see `format`. | |
428 | |
429 Examples | |
430 -------- | |
431 >>> from tempfile import TemporaryFile | |
432 >>> outfile = TemporaryFile() | |
433 | |
434 >>> x = np.arange(10) | |
435 >>> np.save(outfile, x) | |
436 | |
437 >>> outfile.seek(0) # Only needed here to simulate closing & reopening file | |
438 >>> np.load(outfile) | |
439 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) | |
440 | |
441 """ | |
442 own_fid = False | |
443 if isinstance(file, basestring): | |
444 if not file.endswith('.npy'): | |
445 file = file + '.npy' | |
446 fid = open(file, "wb") | |
447 own_fid = True | |
448 else: | |
449 fid = file | |
450 | |
451 try: | |
452 arr = np.asanyarray(arr) | |
453 format.write_array(fid, arr) | |
454 finally: | |
455 if own_fid: | |
456 fid.close() | |
457 | |
458 | |
459 def savez(file, *args, **kwds): | |
460 """ | |
461 Save several arrays into a single file in uncompressed ``.npz`` format. | |
462 | |
463 If arguments are passed in with no keywords, the corresponding variable | |
464 names, in the ``.npz`` file, are 'arr_0', 'arr_1', etc. If keyword | |
465 arguments are given, the corresponding variable names, in the ``.npz`` | |
466 file will match the keyword names. | |
467 | |
468 Parameters | |
469 ---------- | |
470 file : str or file | |
471 Either the file name (string) or an open file (file-like object) | |
472 where the data will be saved. If file is a string, the ``.npz`` | |
473 extension will be appended to the file name if it is not already there. | |
474 args : Arguments, optional | |
475 Arrays to save to the file. Since it is not possible for Python to | |
476 know the names of the arrays outside `savez`, the arrays will be saved | |
477 with names "arr_0", "arr_1", and so on. These arguments can be any | |
478 expression. | |
479 kwds : Keyword arguments, optional | |
480 Arrays to save to the file. Arrays will be saved in the file with the | |
481 keyword names. | |
482 | |
483 Returns | |
484 ------- | |
485 None | |
486 | |
487 See Also | |
488 -------- | |
489 save : Save a single array to a binary file in NumPy format. | |
490 savetxt : Save an array to a file as plain text. | |
491 savez_compressed : Save several arrays into a compressed ``.npz`` archive | |
492 | |
493 Notes | |
494 ----- | |
495 The ``.npz`` file format is a zipped archive of files named after the | |
496 variables they contain. The archive is not compressed and each file | |
497 in the archive contains one variable in ``.npy`` format. For a | |
498 description of the ``.npy`` format, see `format`. | |
499 | |
500 When opening the saved ``.npz`` file with `load` a `NpzFile` object is | |
501 returned. This is a dictionary-like object which can be queried for | |
502 its list of arrays (with the ``.files`` attribute), and for the arrays | |
503 themselves. | |
504 | |
505 Examples | |
506 -------- | |
507 >>> from tempfile import TemporaryFile | |
508 >>> outfile = TemporaryFile() | |
509 >>> x = np.arange(10) | |
510 >>> y = np.sin(x) | |
511 | |
512 Using `savez` with \\*args, the arrays are saved with default names. | |
513 | |
514 >>> np.savez(outfile, x, y) | |
515 >>> outfile.seek(0) # Only needed here to simulate closing & reopening file | |
516 >>> npzfile = np.load(outfile) | |
517 >>> npzfile.files | |
518 ['arr_1', 'arr_0'] | |
519 >>> npzfile['arr_0'] | |
520 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) | |
521 | |
522 Using `savez` with \\**kwds, the arrays are saved with the keyword names. | |
523 | |
524 >>> outfile = TemporaryFile() | |
525 >>> np.savez(outfile, x=x, y=y) | |
526 >>> outfile.seek(0) | |
527 >>> npzfile = np.load(outfile) | |
528 >>> npzfile.files | |
529 ['y', 'x'] | |
530 >>> npzfile['x'] | |
531 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) | |
532 | |
533 """ | |
534 _savez(file, args, kwds, False) | |
535 | |
536 | |
537 def savez_compressed(file, *args, **kwds): | |
538 """ | |
539 Save several arrays into a single file in compressed ``.npz`` format. | |
540 | |
541 If keyword arguments are given, then filenames are taken from the keywords. | |
542 If arguments are passed in with no keywords, then stored file names are | |
543 arr_0, arr_1, etc. | |
544 | |
545 Parameters | |
546 ---------- | |
547 file : str | |
548 File name of ``.npz`` file. | |
549 args : Arguments | |
550 Function arguments. | |
551 kwds : Keyword arguments | |
552 Keywords. | |
553 | |
554 See Also | |
555 -------- | |
556 numpy.savez : Save several arrays into an uncompressed ``.npz`` file format | |
557 numpy.load : Load the files created by savez_compressed. | |
558 | |
559 """ | |
560 _savez(file, args, kwds, True) | |
561 | |
562 | |
563 def _savez(file, args, kwds, compress): | |
564 # Import is postponed to here since zipfile depends on gzip, an optional | |
565 # component of the so-called standard library. | |
566 import zipfile | |
567 # Import deferred for startup time improvement | |
568 import tempfile | |
569 | |
570 if isinstance(file, basestring): | |
571 if not file.endswith('.npz'): | |
572 file = file + '.npz' | |
573 | |
574 namedict = kwds | |
575 for i, val in enumerate(args): | |
576 key = 'arr_%d' % i | |
577 if key in namedict.keys(): | |
578 raise ValueError( | |
579 "Cannot use un-named variables and keyword %s" % key) | |
580 namedict[key] = val | |
581 | |
582 if compress: | |
583 compression = zipfile.ZIP_DEFLATED | |
584 else: | |
585 compression = zipfile.ZIP_STORED | |
586 | |
587 zipf = zipfile_factory(file, mode="w", compression=compression) | |
588 | |
589 # Stage arrays in a temporary file on disk, before writing to zip. | |
590 fd, tmpfile = tempfile.mkstemp(suffix='-numpy.npy') | |
591 os.close(fd) | |
592 try: | |
593 for key, val in namedict.items(): | |
594 fname = key + '.npy' | |
595 fid = open(tmpfile, 'wb') | |
596 try: | |
597 format.write_array(fid, np.asanyarray(val)) | |
598 fid.close() | |
599 fid = None | |
600 zipf.write(tmpfile, arcname=fname) | |
601 finally: | |
602 if fid: | |
603 fid.close() | |
604 finally: | |
605 os.remove(tmpfile) | |
606 | |
607 zipf.close() | |
608 | |
609 | |
610 def _getconv(dtype): | |
611 """ Find the correct dtype converter. Adapted from matplotlib """ | |
612 typ = dtype.type | |
613 if issubclass(typ, np.bool_): | |
614 return lambda x: bool(int(x)) | |
615 if issubclass(typ, np.uint64): | |
616 return np.uint64 | |
617 if issubclass(typ, np.int64): | |
618 return np.int64 | |
619 if issubclass(typ, np.integer): | |
620 return lambda x: int(float(x)) | |
621 elif issubclass(typ, np.floating): | |
622 return float | |
623 elif issubclass(typ, np.complex): | |
624 return complex | |
625 elif issubclass(typ, np.bytes_): | |
626 return bytes | |
627 else: | |
628 return str | |
629 | |
630 | |
631 def loadtxt(fname, dtype=float, comments='#', delimiter=None, | |
632 converters=None, skiprows=0, usecols=None, unpack=False, | |
633 ndmin=0): | |
634 """ | |
635 Load data from a text file. | |
636 | |
637 Each row in the text file must have the same number of values. | |
638 | |
639 Parameters | |
640 ---------- | |
641 fname : file or str | |
642 File, filename, or generator to read. If the filename extension is | |
643 ``.gz`` or ``.bz2``, the file is first decompressed. Note that | |
644 generators should return byte strings for Python 3k. | |
645 dtype : data-type, optional | |
646 Data-type of the resulting array; default: float. If this is a | |
647 record data-type, the resulting array will be 1-dimensional, and | |
648 each row will be interpreted as an element of the array. In this | |
649 case, the number of columns used must match the number of fields in | |
650 the data-type. | |
651 comments : str, optional | |
652 The character used to indicate the start of a comment; | |
653 default: '#'. | |
654 delimiter : str, optional | |
655 The string used to separate values. By default, this is any | |
656 whitespace. | |
657 converters : dict, optional | |
658 A dictionary mapping column number to a function that will convert | |
659 that column to a float. E.g., if column 0 is a date string: | |
660 ``converters = {0: datestr2num}``. Converters can also be used to | |
661 provide a default value for missing data (but see also `genfromtxt`): | |
662 ``converters = {3: lambda s: float(s.strip() or 0)}``. Default: None. | |
663 skiprows : int, optional | |
664 Skip the first `skiprows` lines; default: 0. | |
665 usecols : sequence, optional | |
666 Which columns to read, with 0 being the first. For example, | |
667 ``usecols = (1,4,5)`` will extract the 2nd, 5th and 6th columns. | |
668 The default, None, results in all columns being read. | |
669 unpack : bool, optional | |
670 If True, the returned array is transposed, so that arguments may be | |
671 unpacked using ``x, y, z = loadtxt(...)``. When used with a record | |
672 data-type, arrays are returned for each field. Default is False. | |
673 ndmin : int, optional | |
674 The returned array will have at least `ndmin` dimensions. | |
675 Otherwise mono-dimensional axes will be squeezed. | |
676 Legal values: 0 (default), 1 or 2. | |
677 | |
678 .. versionadded:: 1.6.0 | |
679 | |
680 Returns | |
681 ------- | |
682 out : ndarray | |
683 Data read from the text file. | |
684 | |
685 See Also | |
686 -------- | |
687 load, fromstring, fromregex | |
688 genfromtxt : Load data with missing values handled as specified. | |
689 scipy.io.loadmat : reads MATLAB data files | |
690 | |
691 Notes | |
692 ----- | |
693 This function aims to be a fast reader for simply formatted files. The | |
694 `genfromtxt` function provides more sophisticated handling of, e.g., | |
695 lines with missing values. | |
696 | |
697 Examples | |
698 -------- | |
699 >>> from StringIO import StringIO # StringIO behaves like a file object | |
700 >>> c = StringIO("0 1\\n2 3") | |
701 >>> np.loadtxt(c) | |
702 array([[ 0., 1.], | |
703 [ 2., 3.]]) | |
704 | |
705 >>> d = StringIO("M 21 72\\nF 35 58") | |
706 >>> np.loadtxt(d, dtype={'names': ('gender', 'age', 'weight'), | |
707 ... 'formats': ('S1', 'i4', 'f4')}) | |
708 array([('M', 21, 72.0), ('F', 35, 58.0)], | |
709 dtype=[('gender', '|S1'), ('age', '<i4'), ('weight', '<f4')]) | |
710 | |
711 >>> c = StringIO("1,0,2\\n3,0,4") | |
712 >>> x, y = np.loadtxt(c, delimiter=',', usecols=(0, 2), unpack=True) | |
713 >>> x | |
714 array([ 1., 3.]) | |
715 >>> y | |
716 array([ 2., 4.]) | |
717 | |
718 """ | |
719 # Type conversions for Py3 convenience | |
720 comments = asbytes(comments) | |
721 user_converters = converters | |
722 if delimiter is not None: | |
723 delimiter = asbytes(delimiter) | |
724 if usecols is not None: | |
725 usecols = list(usecols) | |
726 | |
727 fown = False | |
728 try: | |
729 if _is_string_like(fname): | |
730 fown = True | |
731 if fname.endswith('.gz'): | |
732 fh = iter(seek_gzip_factory(fname)) | |
733 elif fname.endswith('.bz2'): | |
734 import bz2 | |
735 fh = iter(bz2.BZ2File(fname)) | |
736 elif sys.version_info[0] == 2: | |
737 fh = iter(open(fname, 'U')) | |
738 else: | |
739 fh = iter(open(fname)) | |
740 else: | |
741 fh = iter(fname) | |
742 except TypeError: | |
743 raise ValueError('fname must be a string, file handle, or generator') | |
744 X = [] | |
745 | |
746 def flatten_dtype(dt): | |
747 """Unpack a structured data-type, and produce re-packing info.""" | |
748 if dt.names is None: | |
749 # If the dtype is flattened, return. | |
750 # If the dtype has a shape, the dtype occurs | |
751 # in the list more than once. | |
752 shape = dt.shape | |
753 if len(shape) == 0: | |
754 return ([dt.base], None) | |
755 else: | |
756 packing = [(shape[-1], list)] | |
757 if len(shape) > 1: | |
758 for dim in dt.shape[-2::-1]: | |
759 packing = [(dim*packing[0][0], packing*dim)] | |
760 return ([dt.base] * int(np.prod(dt.shape)), packing) | |
761 else: | |
762 types = [] | |
763 packing = [] | |
764 for field in dt.names: | |
765 tp, bytes = dt.fields[field] | |
766 flat_dt, flat_packing = flatten_dtype(tp) | |
767 types.extend(flat_dt) | |
768 # Avoid extra nesting for subarrays | |
769 if len(tp.shape) > 0: | |
770 packing.extend(flat_packing) | |
771 else: | |
772 packing.append((len(flat_dt), flat_packing)) | |
773 return (types, packing) | |
774 | |
775 def pack_items(items, packing): | |
776 """Pack items into nested lists based on re-packing info.""" | |
777 if packing is None: | |
778 return items[0] | |
779 elif packing is tuple: | |
780 return tuple(items) | |
781 elif packing is list: | |
782 return list(items) | |
783 else: | |
784 start = 0 | |
785 ret = [] | |
786 for length, subpacking in packing: | |
787 ret.append(pack_items(items[start:start+length], subpacking)) | |
788 start += length | |
789 return tuple(ret) | |
790 | |
791 def split_line(line): | |
792 """Chop off comments, strip, and split at delimiter.""" | |
793 line = asbytes(line).split(comments)[0].strip(asbytes('\r\n')) | |
794 if line: | |
795 return line.split(delimiter) | |
796 else: | |
797 return [] | |
798 | |
799 try: | |
800 # Make sure we're dealing with a proper dtype | |
801 dtype = np.dtype(dtype) | |
802 defconv = _getconv(dtype) | |
803 | |
804 # Skip the first `skiprows` lines | |
805 for i in range(skiprows): | |
806 next(fh) | |
807 | |
808 # Read until we find a line with some values, and use | |
809 # it to estimate the number of columns, N. | |
810 first_vals = None | |
811 try: | |
812 while not first_vals: | |
813 first_line = next(fh) | |
814 first_vals = split_line(first_line) | |
815 except StopIteration: | |
816 # End of lines reached | |
817 first_line = '' | |
818 first_vals = [] | |
819 warnings.warn('loadtxt: Empty input file: "%s"' % fname) | |
820 N = len(usecols or first_vals) | |
821 | |
822 dtype_types, packing = flatten_dtype(dtype) | |
823 if len(dtype_types) > 1: | |
824 # We're dealing with a structured array, each field of | |
825 # the dtype matches a column | |
826 converters = [_getconv(dt) for dt in dtype_types] | |
827 else: | |
828 # All fields have the same dtype | |
829 converters = [defconv for i in range(N)] | |
830 if N > 1: | |
831 packing = [(N, tuple)] | |
832 | |
833 # By preference, use the converters specified by the user | |
834 for i, conv in (user_converters or {}).items(): | |
835 if usecols: | |
836 try: | |
837 i = usecols.index(i) | |
838 except ValueError: | |
839 # Unused converter specified | |
840 continue | |
841 converters[i] = conv | |
842 | |
843 # Parse each line, including the first | |
844 for i, line in enumerate(itertools.chain([first_line], fh)): | |
845 vals = split_line(line) | |
846 if len(vals) == 0: | |
847 continue | |
848 if usecols: | |
849 vals = [vals[i] for i in usecols] | |
850 if len(vals) != N: | |
851 line_num = i + skiprows + 1 | |
852 raise ValueError("Wrong number of columns at line %d" | |
853 % line_num) | |
854 | |
855 # Convert each value according to its column and store | |
856 items = [conv(val) for (conv, val) in zip(converters, vals)] | |
857 # Then pack it according to the dtype's nesting | |
858 items = pack_items(items, packing) | |
859 X.append(items) | |
860 finally: | |
861 if fown: | |
862 fh.close() | |
863 | |
864 X = np.array(X, dtype) | |
865 # Multicolumn data are returned with shape (1, N, M), i.e. | |
866 # (1, 1, M) for a single row - remove the singleton dimension there | |
867 if X.ndim == 3 and X.shape[:2] == (1, 1): | |
868 X.shape = (1, -1) | |
869 | |
870 # Verify that the array has at least dimensions `ndmin`. | |
871 # Check correctness of the values of `ndmin` | |
872 if ndmin not in [0, 1, 2]: | |
873 raise ValueError('Illegal value of ndmin keyword: %s' % ndmin) | |
874 # Tweak the size and shape of the arrays - remove extraneous dimensions | |
875 if X.ndim > ndmin: | |
876 X = np.squeeze(X) | |
877 # and ensure we have the minimum number of dimensions asked for | |
878 # - has to be in this order for the odd case ndmin=1, X.squeeze().ndim=0 | |
879 if X.ndim < ndmin: | |
880 if ndmin == 1: | |
881 X = np.atleast_1d(X) | |
882 elif ndmin == 2: | |
883 X = np.atleast_2d(X).T | |
884 | |
885 if unpack: | |
886 if len(dtype_types) > 1: | |
887 # For structured arrays, return an array for each field. | |
888 return [X[field] for field in dtype.names] | |
889 else: | |
890 return X.T | |
891 else: | |
892 return X | |
893 | |
894 | |
895 def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='', | |
896 footer='', comments='# '): | |
897 """ | |
898 Save an array to a text file. | |
899 | |
900 Parameters | |
901 ---------- | |
902 fname : filename or file handle | |
903 If the filename ends in ``.gz``, the file is automatically saved in | |
904 compressed gzip format. `loadtxt` understands gzipped files | |
905 transparently. | |
906 X : array_like | |
907 Data to be saved to a text file. | |
908 fmt : str or sequence of strs, optional | |
909 A single format (%10.5f), a sequence of formats, or a | |
910 multi-format string, e.g. 'Iteration %d -- %10.5f', in which | |
911 case `delimiter` is ignored. For complex `X`, the legal options | |
912 for `fmt` are: | |
913 a) a single specifier, `fmt='%.4e'`, resulting in numbers formatted | |
914 like `' (%s+%sj)' % (fmt, fmt)` | |
915 b) a full string specifying every real and imaginary part, e.g. | |
916 `' %.4e %+.4j %.4e %+.4j %.4e %+.4j'` for 3 columns | |
917 c) a list of specifiers, one per column - in this case, the real | |
918 and imaginary part must have separate specifiers, | |
919 e.g. `['%.3e + %.3ej', '(%.15e%+.15ej)']` for 2 columns | |
920 delimiter : str, optional | |
921 String or character separating columns. | |
922 newline : str, optional | |
923 String or character separating lines. | |
924 | |
925 .. versionadded:: 1.5.0 | |
926 header : str, optional | |
927 String that will be written at the beginning of the file. | |
928 | |
929 .. versionadded:: 1.7.0 | |
930 footer : str, optional | |
931 String that will be written at the end of the file. | |
932 | |
933 .. versionadded:: 1.7.0 | |
934 comments : str, optional | |
935 String that will be prepended to the ``header`` and ``footer`` strings, | |
936 to mark them as comments. Default: '# ', as expected by e.g. | |
937 ``numpy.loadtxt``. | |
938 | |
939 .. versionadded:: 1.7.0 | |
940 | |
941 | |
942 See Also | |
943 -------- | |
944 save : Save an array to a binary file in NumPy ``.npy`` format | |
945 savez : Save several arrays into an uncompressed ``.npz`` archive | |
946 savez_compressed : Save several arrays into a compressed ``.npz`` archive | |
947 | |
948 Notes | |
949 ----- | |
950 Further explanation of the `fmt` parameter | |
951 (``%[flag]width[.precision]specifier``): | |
952 | |
953 flags: | |
954 ``-`` : left justify | |
955 | |
956 ``+`` : Forces to precede result with + or -. | |
957 | |
958 ``0`` : Left pad the number with zeros instead of space (see width). | |
959 | |
960 width: | |
961 Minimum number of characters to be printed. The value is not truncated | |
962 if it has more characters. | |
963 | |
964 precision: | |
965 - For integer specifiers (eg. ``d,i,o,x``), the minimum number of | |
966 digits. | |
967 - For ``e, E`` and ``f`` specifiers, the number of digits to print | |
968 after the decimal point. | |
969 - For ``g`` and ``G``, the maximum number of significant digits. | |
970 - For ``s``, the maximum number of characters. | |
971 | |
972 specifiers: | |
973 ``c`` : character | |
974 | |
975 ``d`` or ``i`` : signed decimal integer | |
976 | |
977 ``e`` or ``E`` : scientific notation with ``e`` or ``E``. | |
978 | |
979 ``f`` : decimal floating point | |
980 | |
981 ``g,G`` : use the shorter of ``e,E`` or ``f`` | |
982 | |
983 ``o`` : signed octal | |
984 | |
985 ``s`` : string of characters | |
986 | |
987 ``u`` : unsigned decimal integer | |
988 | |
989 ``x,X`` : unsigned hexadecimal integer | |
990 | |
991 This explanation of ``fmt`` is not complete, for an exhaustive | |
992 specification see [1]_. | |
993 | |
994 References | |
995 ---------- | |
996 .. [1] `Format Specification Mini-Language | |
997 <http://docs.python.org/library/string.html# | |
998 format-specification-mini-language>`_, Python Documentation. | |
999 | |
1000 Examples | |
1001 -------- | |
1002 >>> x = y = z = np.arange(0.0,5.0,1.0) | |
1003 >>> np.savetxt('test.out', x, delimiter=',') # X is an array | |
1004 >>> np.savetxt('test.out', (x,y,z)) # x,y,z equal sized 1D arrays | |
1005 >>> np.savetxt('test.out', x, fmt='%1.4e') # use exponential notation | |
1006 | |
1007 """ | |
1008 | |
1009 # Py3 conversions first | |
1010 if isinstance(fmt, bytes): | |
1011 fmt = asstr(fmt) | |
1012 delimiter = asstr(delimiter) | |
1013 | |
1014 own_fh = False | |
1015 if _is_string_like(fname): | |
1016 own_fh = True | |
1017 if fname.endswith('.gz'): | |
1018 import gzip | |
1019 fh = gzip.open(fname, 'wb') | |
1020 else: | |
1021 if sys.version_info[0] >= 3: | |
1022 fh = open(fname, 'wb') | |
1023 else: | |
1024 fh = open(fname, 'w') | |
1025 elif hasattr(fname, 'write'): | |
1026 fh = fname | |
1027 else: | |
1028 raise ValueError('fname must be a string or file handle') | |
1029 | |
1030 try: | |
1031 X = np.asarray(X) | |
1032 | |
1033 # Handle 1-dimensional arrays | |
1034 if X.ndim == 1: | |
1035 # Common case -- 1d array of numbers | |
1036 if X.dtype.names is None: | |
1037 X = np.atleast_2d(X).T | |
1038 ncol = 1 | |
1039 | |
1040 # Complex dtype -- each field indicates a separate column | |
1041 else: | |
1042 ncol = len(X.dtype.descr) | |
1043 else: | |
1044 ncol = X.shape[1] | |
1045 | |
1046 iscomplex_X = np.iscomplexobj(X) | |
1047 # `fmt` can be a string with multiple insertion points or a | |
1048 # list of formats. E.g. '%10.5f\t%10d' or ('%10.5f', '$10d') | |
1049 if type(fmt) in (list, tuple): | |
1050 if len(fmt) != ncol: | |
1051 raise AttributeError('fmt has wrong shape. %s' % str(fmt)) | |
1052 format = asstr(delimiter).join(map(asstr, fmt)) | |
1053 elif isinstance(fmt, str): | |
1054 n_fmt_chars = fmt.count('%') | |
1055 error = ValueError('fmt has wrong number of %% formats: %s' % fmt) | |
1056 if n_fmt_chars == 1: | |
1057 if iscomplex_X: | |
1058 fmt = [' (%s+%sj)' % (fmt, fmt), ] * ncol | |
1059 else: | |
1060 fmt = [fmt, ] * ncol | |
1061 format = delimiter.join(fmt) | |
1062 elif iscomplex_X and n_fmt_chars != (2 * ncol): | |
1063 raise error | |
1064 elif ((not iscomplex_X) and n_fmt_chars != ncol): | |
1065 raise error | |
1066 else: | |
1067 format = fmt | |
1068 else: | |
1069 raise ValueError('invalid fmt: %r' % (fmt,)) | |
1070 | |
1071 if len(header) > 0: | |
1072 header = header.replace('\n', '\n' + comments) | |
1073 fh.write(asbytes(comments + header + newline)) | |
1074 if iscomplex_X: | |
1075 for row in X: | |
1076 row2 = [] | |
1077 for number in row: | |
1078 row2.append(number.real) | |
1079 row2.append(number.imag) | |
1080 fh.write(asbytes(format % tuple(row2) + newline)) | |
1081 else: | |
1082 for row in X: | |
1083 fh.write(asbytes(format % tuple(row) + newline)) | |
1084 if len(footer) > 0: | |
1085 footer = footer.replace('\n', '\n' + comments) | |
1086 fh.write(asbytes(comments + footer + newline)) | |
1087 finally: | |
1088 if own_fh: | |
1089 fh.close() | |
1090 | |
1091 | |
1092 def fromregex(file, regexp, dtype): | |
1093 """ | |
1094 Construct an array from a text file, using regular expression parsing. | |
1095 | |
1096 The returned array is always a structured array, and is constructed from | |
1097 all matches of the regular expression in the file. Groups in the regular | |
1098 expression are converted to fields of the structured array. | |
1099 | |
1100 Parameters | |
1101 ---------- | |
1102 file : str or file | |
1103 File name or file object to read. | |
1104 regexp : str or regexp | |
1105 Regular expression used to parse the file. | |
1106 Groups in the regular expression correspond to fields in the dtype. | |
1107 dtype : dtype or list of dtypes | |
1108 Dtype for the structured array. | |
1109 | |
1110 Returns | |
1111 ------- | |
1112 output : ndarray | |
1113 The output array, containing the part of the content of `file` that | |
1114 was matched by `regexp`. `output` is always a structured array. | |
1115 | |
1116 Raises | |
1117 ------ | |
1118 TypeError | |
1119 When `dtype` is not a valid dtype for a structured array. | |
1120 | |
1121 See Also | |
1122 -------- | |
1123 fromstring, loadtxt | |
1124 | |
1125 Notes | |
1126 ----- | |
1127 Dtypes for structured arrays can be specified in several forms, but all | |
1128 forms specify at least the data type and field name. For details see | |
1129 `doc.structured_arrays`. | |
1130 | |
1131 Examples | |
1132 -------- | |
1133 >>> f = open('test.dat', 'w') | |
1134 >>> f.write("1312 foo\\n1534 bar\\n444 qux") | |
1135 >>> f.close() | |
1136 | |
1137 >>> regexp = r"(\\d+)\\s+(...)" # match [digits, whitespace, anything] | |
1138 >>> output = np.fromregex('test.dat', regexp, | |
1139 ... [('num', np.int64), ('key', 'S3')]) | |
1140 >>> output | |
1141 array([(1312L, 'foo'), (1534L, 'bar'), (444L, 'qux')], | |
1142 dtype=[('num', '<i8'), ('key', '|S3')]) | |
1143 >>> output['num'] | |
1144 array([1312, 1534, 444], dtype=int64) | |
1145 | |
1146 """ | |
1147 own_fh = False | |
1148 if not hasattr(file, "read"): | |
1149 file = open(file, 'rb') | |
1150 own_fh = True | |
1151 | |
1152 try: | |
1153 if not hasattr(regexp, 'match'): | |
1154 regexp = re.compile(asbytes(regexp)) | |
1155 if not isinstance(dtype, np.dtype): | |
1156 dtype = np.dtype(dtype) | |
1157 | |
1158 seq = regexp.findall(file.read()) | |
1159 if seq and not isinstance(seq[0], tuple): | |
1160 # Only one group is in the regexp. | |
1161 # Create the new array as a single data-type and then | |
1162 # re-interpret as a single-field structured array. | |
1163 newdtype = np.dtype(dtype[dtype.names[0]]) | |
1164 output = np.array(seq, dtype=newdtype) | |
1165 output.dtype = dtype | |
1166 else: | |
1167 output = np.array(seq, dtype=dtype) | |
1168 | |
1169 return output | |
1170 finally: | |
1171 if own_fh: | |
1172 file.close() | |
1173 | |
1174 | |
1175 #####-------------------------------------------------------------------------- | |
1176 #---- --- ASCII functions --- | |
1177 #####-------------------------------------------------------------------------- | |
1178 | |
1179 | |
1180 def genfromtxt(fname, dtype=float, comments='#', delimiter=None, | |
1181 skiprows=0, skip_header=0, skip_footer=0, converters=None, | |
1182 missing='', missing_values=None, filling_values=None, | |
1183 usecols=None, names=None, | |
1184 excludelist=None, deletechars=None, replace_space='_', | |
1185 autostrip=False, case_sensitive=True, defaultfmt="f%i", | |
1186 unpack=None, usemask=False, loose=True, invalid_raise=True): | |
1187 """ | |
1188 Load data from a text file, with missing values handled as specified. | |
1189 | |
1190 Each line past the first `skip_header` lines is split at the `delimiter` | |
1191 character, and characters following the `comments` character are discarded. | |
1192 | |
1193 Parameters | |
1194 ---------- | |
1195 fname : file or str | |
1196 File, filename, or generator to read. If the filename extension is | |
1197 `.gz` or `.bz2`, the file is first decompressed. Note that | |
1198 generators must return byte strings in Python 3k. | |
1199 dtype : dtype, optional | |
1200 Data type of the resulting array. | |
1201 If None, the dtypes will be determined by the contents of each | |
1202 column, individually. | |
1203 comments : str, optional | |
1204 The character used to indicate the start of a comment. | |
1205 All the characters occurring on a line after a comment are discarded | |
1206 delimiter : str, int, or sequence, optional | |
1207 The string used to separate values. By default, any consecutive | |
1208 whitespaces act as delimiter. An integer or sequence of integers | |
1209 can also be provided as width(s) of each field. | |
1210 skip_rows : int, optional | |
1211 `skip_rows` was deprecated in numpy 1.5, and will be removed in | |
1212 numpy 2.0. Please use `skip_header` instead. | |
1213 skip_header : int, optional | |
1214 The number of lines to skip at the beginning of the file. | |
1215 skip_footer : int, optional | |
1216 The number of lines to skip at the end of the file. | |
1217 converters : variable, optional | |
1218 The set of functions that convert the data of a column to a value. | |
1219 The converters can also be used to provide a default value | |
1220 for missing data: ``converters = {3: lambda s: float(s or 0)}``. | |
1221 missing : variable, optional | |
1222 `missing` was deprecated in numpy 1.5, and will be removed in | |
1223 numpy 2.0. Please use `missing_values` instead. | |
1224 missing_values : variable, optional | |
1225 The set of strings corresponding to missing data. | |
1226 filling_values : variable, optional | |
1227 The set of values to be used as default when the data are missing. | |
1228 usecols : sequence, optional | |
1229 Which columns to read, with 0 being the first. For example, | |
1230 ``usecols = (1, 4, 5)`` will extract the 2nd, 5th and 6th columns. | |
1231 names : {None, True, str, sequence}, optional | |
1232 If `names` is True, the field names are read from the first valid line | |
1233 after the first `skip_header` lines. | |
1234 If `names` is a sequence or a single-string of comma-separated names, | |
1235 the names will be used to define the field names in a structured dtype. | |
1236 If `names` is None, the names of the dtype fields will be used, if any. | |
1237 excludelist : sequence, optional | |
1238 A list of names to exclude. This list is appended to the default list | |
1239 ['return','file','print']. Excluded names are appended an underscore: | |
1240 for example, `file` would become `file_`. | |
1241 deletechars : str, optional | |
1242 A string combining invalid characters that must be deleted from the | |
1243 names. | |
1244 defaultfmt : str, optional | |
1245 A format used to define default field names, such as "f%i" or "f_%02i". | |
1246 autostrip : bool, optional | |
1247 Whether to automatically strip white spaces from the variables. | |
1248 replace_space : char, optional | |
1249 Character(s) used in replacement of white spaces in the variables | |
1250 names. By default, use a '_'. | |
1251 case_sensitive : {True, False, 'upper', 'lower'}, optional | |
1252 If True, field names are case sensitive. | |
1253 If False or 'upper', field names are converted to upper case. | |
1254 If 'lower', field names are converted to lower case. | |
1255 unpack : bool, optional | |
1256 If True, the returned array is transposed, so that arguments may be | |
1257 unpacked using ``x, y, z = loadtxt(...)`` | |
1258 usemask : bool, optional | |
1259 If True, return a masked array. | |
1260 If False, return a regular array. | |
1261 loose : bool, optional | |
1262 If True, do not raise errors for invalid values. | |
1263 invalid_raise : bool, optional | |
1264 If True, an exception is raised if an inconsistency is detected in the | |
1265 number of columns. | |
1266 If False, a warning is emitted and the offending lines are skipped. | |
1267 | |
1268 Returns | |
1269 ------- | |
1270 out : ndarray | |
1271 Data read from the text file. If `usemask` is True, this is a | |
1272 masked array. | |
1273 | |
1274 See Also | |
1275 -------- | |
1276 numpy.loadtxt : equivalent function when no data is missing. | |
1277 | |
1278 Notes | |
1279 ----- | |
1280 * When spaces are used as delimiters, or when no delimiter has been given | |
1281 as input, there should not be any missing data between two fields. | |
1282 * When the variables are named (either by a flexible dtype or with `names`, | |
1283 there must not be any header in the file (else a ValueError | |
1284 exception is raised). | |
1285 * Individual values are not stripped of spaces by default. | |
1286 When using a custom converter, make sure the function does remove spaces. | |
1287 | |
1288 References | |
1289 ---------- | |
1290 .. [1] Numpy User Guide, section `I/O with Numpy | |
1291 <http://docs.scipy.org/doc/numpy/user/basics.io.genfromtxt.html>`_. | |
1292 | |
1293 Examples | |
1294 --------- | |
1295 >>> from StringIO import StringIO | |
1296 >>> import numpy as np | |
1297 | |
1298 Comma delimited file with mixed dtype | |
1299 | |
1300 >>> s = StringIO("1,1.3,abcde") | |
1301 >>> data = np.genfromtxt(s, dtype=[('myint','i8'),('myfloat','f8'), | |
1302 ... ('mystring','S5')], delimiter=",") | |
1303 >>> data | |
1304 array((1, 1.3, 'abcde'), | |
1305 dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', '|S5')]) | |
1306 | |
1307 Using dtype = None | |
1308 | |
1309 >>> s.seek(0) # needed for StringIO example only | |
1310 >>> data = np.genfromtxt(s, dtype=None, | |
1311 ... names = ['myint','myfloat','mystring'], delimiter=",") | |
1312 >>> data | |
1313 array((1, 1.3, 'abcde'), | |
1314 dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', '|S5')]) | |
1315 | |
1316 Specifying dtype and names | |
1317 | |
1318 >>> s.seek(0) | |
1319 >>> data = np.genfromtxt(s, dtype="i8,f8,S5", | |
1320 ... names=['myint','myfloat','mystring'], delimiter=",") | |
1321 >>> data | |
1322 array((1, 1.3, 'abcde'), | |
1323 dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', '|S5')]) | |
1324 | |
1325 An example with fixed-width columns | |
1326 | |
1327 >>> s = StringIO("11.3abcde") | |
1328 >>> data = np.genfromtxt(s, dtype=None, names=['intvar','fltvar','strvar'], | |
1329 ... delimiter=[1,3,5]) | |
1330 >>> data | |
1331 array((1, 1.3, 'abcde'), | |
1332 dtype=[('intvar', '<i8'), ('fltvar', '<f8'), ('strvar', '|S5')]) | |
1333 | |
1334 """ | |
1335 # Py3 data conversions to bytes, for convenience | |
1336 if comments is not None: | |
1337 comments = asbytes(comments) | |
1338 if isinstance(delimiter, unicode): | |
1339 delimiter = asbytes(delimiter) | |
1340 if isinstance(missing, unicode): | |
1341 missing = asbytes(missing) | |
1342 if isinstance(missing_values, (unicode, list, tuple)): | |
1343 missing_values = asbytes_nested(missing_values) | |
1344 | |
1345 # | |
1346 if usemask: | |
1347 from numpy.ma import MaskedArray, make_mask_descr | |
1348 # Check the input dictionary of converters | |
1349 user_converters = converters or {} | |
1350 if not isinstance(user_converters, dict): | |
1351 raise TypeError( | |
1352 "The input argument 'converter' should be a valid dictionary " | |
1353 "(got '%s' instead)" % type(user_converters)) | |
1354 | |
1355 # Initialize the filehandle, the LineSplitter and the NameValidator | |
1356 own_fhd = False | |
1357 try: | |
1358 if isinstance(fname, basestring): | |
1359 if sys.version_info[0] == 2: | |
1360 fhd = iter(np.lib._datasource.open(fname, 'rbU')) | |
1361 else: | |
1362 fhd = iter(np.lib._datasource.open(fname, 'rb')) | |
1363 own_fhd = True | |
1364 else: | |
1365 fhd = iter(fname) | |
1366 except TypeError: | |
1367 raise TypeError( | |
1368 "fname must be a string, filehandle, or generator. " | |
1369 "(got %s instead)" % type(fname)) | |
1370 | |
1371 split_line = LineSplitter(delimiter=delimiter, comments=comments, | |
1372 autostrip=autostrip)._handyman | |
1373 validate_names = NameValidator(excludelist=excludelist, | |
1374 deletechars=deletechars, | |
1375 case_sensitive=case_sensitive, | |
1376 replace_space=replace_space) | |
1377 | |
1378 # Get the first valid lines after the first skiprows ones .. | |
1379 if skiprows: | |
1380 warnings.warn( | |
1381 "The use of `skiprows` is deprecated, it will be removed in " | |
1382 "numpy 2.0.\nPlease use `skip_header` instead.", | |
1383 DeprecationWarning) | |
1384 skip_header = skiprows | |
1385 # Skip the first `skip_header` rows | |
1386 for i in range(skip_header): | |
1387 next(fhd) | |
1388 | |
1389 # Keep on until we find the first valid values | |
1390 first_values = None | |
1391 try: | |
1392 while not first_values: | |
1393 first_line = next(fhd) | |
1394 if names is True: | |
1395 if comments in first_line: | |
1396 first_line = ( | |
1397 asbytes('').join(first_line.split(comments)[1:])) | |
1398 first_values = split_line(first_line) | |
1399 except StopIteration: | |
1400 # return an empty array if the datafile is empty | |
1401 first_line = asbytes('') | |
1402 first_values = [] | |
1403 warnings.warn('genfromtxt: Empty input file: "%s"' % fname) | |
1404 | |
1405 # Should we take the first values as names ? | |
1406 if names is True: | |
1407 fval = first_values[0].strip() | |
1408 if fval in comments: | |
1409 del first_values[0] | |
1410 | |
1411 # Check the columns to use: make sure `usecols` is a list | |
1412 if usecols is not None: | |
1413 try: | |
1414 usecols = [_.strip() for _ in usecols.split(",")] | |
1415 except AttributeError: | |
1416 try: | |
1417 usecols = list(usecols) | |
1418 except TypeError: | |
1419 usecols = [usecols, ] | |
1420 nbcols = len(usecols or first_values) | |
1421 | |
1422 # Check the names and overwrite the dtype.names if needed | |
1423 if names is True: | |
1424 names = validate_names([_bytes_to_name(_.strip()) | |
1425 for _ in first_values]) | |
1426 first_line = asbytes('') | |
1427 elif _is_string_like(names): | |
1428 names = validate_names([_.strip() for _ in names.split(',')]) | |
1429 elif names: | |
1430 names = validate_names(names) | |
1431 # Get the dtype | |
1432 if dtype is not None: | |
1433 dtype = easy_dtype(dtype, defaultfmt=defaultfmt, names=names) | |
1434 # Make sure the names is a list (for 2.5) | |
1435 if names is not None: | |
1436 names = list(names) | |
1437 | |
1438 if usecols: | |
1439 for (i, current) in enumerate(usecols): | |
1440 # if usecols is a list of names, convert to a list of indices | |
1441 if _is_string_like(current): | |
1442 usecols[i] = names.index(current) | |
1443 elif current < 0: | |
1444 usecols[i] = current + len(first_values) | |
1445 # If the dtype is not None, make sure we update it | |
1446 if (dtype is not None) and (len(dtype) > nbcols): | |
1447 descr = dtype.descr | |
1448 dtype = np.dtype([descr[_] for _ in usecols]) | |
1449 names = list(dtype.names) | |
1450 # If `names` is not None, update the names | |
1451 elif (names is not None) and (len(names) > nbcols): | |
1452 names = [names[_] for _ in usecols] | |
1453 elif (names is not None) and (dtype is not None): | |
1454 names = list(dtype.names) | |
1455 | |
1456 # Process the missing values ............................... | |
1457 # Rename missing_values for convenience | |
1458 user_missing_values = missing_values or () | |
1459 | |
1460 # Define the list of missing_values (one column: one list) | |
1461 missing_values = [list([asbytes('')]) for _ in range(nbcols)] | |
1462 | |
1463 # We have a dictionary: process it field by field | |
1464 if isinstance(user_missing_values, dict): | |
1465 # Loop on the items | |
1466 for (key, val) in user_missing_values.items(): | |
1467 # Is the key a string ? | |
1468 if _is_string_like(key): | |
1469 try: | |
1470 # Transform it into an integer | |
1471 key = names.index(key) | |
1472 except ValueError: | |
1473 # We couldn't find it: the name must have been dropped | |
1474 continue | |
1475 # Redefine the key as needed if it's a column number | |
1476 if usecols: | |
1477 try: | |
1478 key = usecols.index(key) | |
1479 except ValueError: | |
1480 pass | |
1481 # Transform the value as a list of string | |
1482 if isinstance(val, (list, tuple)): | |
1483 val = [str(_) for _ in val] | |
1484 else: | |
1485 val = [str(val), ] | |
1486 # Add the value(s) to the current list of missing | |
1487 if key is None: | |
1488 # None acts as default | |
1489 for miss in missing_values: | |
1490 miss.extend(val) | |
1491 else: | |
1492 missing_values[key].extend(val) | |
1493 # We have a sequence : each item matches a column | |
1494 elif isinstance(user_missing_values, (list, tuple)): | |
1495 for (value, entry) in zip(user_missing_values, missing_values): | |
1496 value = str(value) | |
1497 if value not in entry: | |
1498 entry.append(value) | |
1499 # We have a string : apply it to all entries | |
1500 elif isinstance(user_missing_values, bytes): | |
1501 user_value = user_missing_values.split(asbytes(",")) | |
1502 for entry in missing_values: | |
1503 entry.extend(user_value) | |
1504 # We have something else: apply it to all entries | |
1505 else: | |
1506 for entry in missing_values: | |
1507 entry.extend([str(user_missing_values)]) | |
1508 | |
1509 # Process the deprecated `missing` | |
1510 if missing != asbytes(''): | |
1511 warnings.warn( | |
1512 "The use of `missing` is deprecated, it will be removed in " | |
1513 "Numpy 2.0.\nPlease use `missing_values` instead.", | |
1514 DeprecationWarning) | |
1515 values = [str(_) for _ in missing.split(asbytes(","))] | |
1516 for entry in missing_values: | |
1517 entry.extend(values) | |
1518 | |
1519 # Process the filling_values ............................... | |
1520 # Rename the input for convenience | |
1521 user_filling_values = filling_values | |
1522 if user_filling_values is None: | |
1523 user_filling_values = [] | |
1524 # Define the default | |
1525 filling_values = [None] * nbcols | |
1526 # We have a dictionary : update each entry individually | |
1527 if isinstance(user_filling_values, dict): | |
1528 for (key, val) in user_filling_values.items(): | |
1529 if _is_string_like(key): | |
1530 try: | |
1531 # Transform it into an integer | |
1532 key = names.index(key) | |
1533 except ValueError: | |
1534 # We couldn't find it: the name must have been dropped, | |
1535 continue | |
1536 # Redefine the key if it's a column number and usecols is defined | |
1537 if usecols: | |
1538 try: | |
1539 key = usecols.index(key) | |
1540 except ValueError: | |
1541 pass | |
1542 # Add the value to the list | |
1543 filling_values[key] = val | |
1544 # We have a sequence : update on a one-to-one basis | |
1545 elif isinstance(user_filling_values, (list, tuple)): | |
1546 n = len(user_filling_values) | |
1547 if (n <= nbcols): | |
1548 filling_values[:n] = user_filling_values | |
1549 else: | |
1550 filling_values = user_filling_values[:nbcols] | |
1551 # We have something else : use it for all entries | |
1552 else: | |
1553 filling_values = [user_filling_values] * nbcols | |
1554 | |
1555 # Initialize the converters ................................ | |
1556 if dtype is None: | |
1557 # Note: we can't use a [...]*nbcols, as we would have 3 times the same | |
1558 # ... converter, instead of 3 different converters. | |
1559 converters = [StringConverter(None, missing_values=miss, default=fill) | |
1560 for (miss, fill) in zip(missing_values, filling_values)] | |
1561 else: | |
1562 dtype_flat = flatten_dtype(dtype, flatten_base=True) | |
1563 # Initialize the converters | |
1564 if len(dtype_flat) > 1: | |
1565 # Flexible type : get a converter from each dtype | |
1566 zipit = zip(dtype_flat, missing_values, filling_values) | |
1567 converters = [StringConverter(dt, locked=True, | |
1568 missing_values=miss, default=fill) | |
1569 for (dt, miss, fill) in zipit] | |
1570 else: | |
1571 # Set to a default converter (but w/ different missing values) | |
1572 zipit = zip(missing_values, filling_values) | |
1573 converters = [StringConverter(dtype, locked=True, | |
1574 missing_values=miss, default=fill) | |
1575 for (miss, fill) in zipit] | |
1576 # Update the converters to use the user-defined ones | |
1577 uc_update = [] | |
1578 for (j, conv) in user_converters.items(): | |
1579 # If the converter is specified by column names, use the index instead | |
1580 if _is_string_like(j): | |
1581 try: | |
1582 j = names.index(j) | |
1583 i = j | |
1584 except ValueError: | |
1585 continue | |
1586 elif usecols: | |
1587 try: | |
1588 i = usecols.index(j) | |
1589 except ValueError: | |
1590 # Unused converter specified | |
1591 continue | |
1592 else: | |
1593 i = j | |
1594 # Find the value to test - first_line is not filtered by usecols: | |
1595 if len(first_line): | |
1596 testing_value = first_values[j] | |
1597 else: | |
1598 testing_value = None | |
1599 converters[i].update(conv, locked=True, | |
1600 testing_value=testing_value, | |
1601 default=filling_values[i], | |
1602 missing_values=missing_values[i],) | |
1603 uc_update.append((i, conv)) | |
1604 # Make sure we have the corrected keys in user_converters... | |
1605 user_converters.update(uc_update) | |
1606 | |
1607 # Fixme: possible error as following variable never used. | |
1608 #miss_chars = [_.missing_values for _ in converters] | |
1609 | |
1610 # Initialize the output lists ... | |
1611 # ... rows | |
1612 rows = [] | |
1613 append_to_rows = rows.append | |
1614 # ... masks | |
1615 if usemask: | |
1616 masks = [] | |
1617 append_to_masks = masks.append | |
1618 # ... invalid | |
1619 invalid = [] | |
1620 append_to_invalid = invalid.append | |
1621 | |
1622 # Parse each line | |
1623 for (i, line) in enumerate(itertools.chain([first_line, ], fhd)): | |
1624 values = split_line(line) | |
1625 nbvalues = len(values) | |
1626 # Skip an empty line | |
1627 if nbvalues == 0: | |
1628 continue | |
1629 # Select only the columns we need | |
1630 if usecols: | |
1631 try: | |
1632 values = [values[_] for _ in usecols] | |
1633 except IndexError: | |
1634 append_to_invalid((i + skip_header + 1, nbvalues)) | |
1635 continue | |
1636 elif nbvalues != nbcols: | |
1637 append_to_invalid((i + skip_header + 1, nbvalues)) | |
1638 continue | |
1639 # Store the values | |
1640 append_to_rows(tuple(values)) | |
1641 if usemask: | |
1642 append_to_masks(tuple([v.strip() in m | |
1643 for (v, m) in zip(values, missing_values)])) | |
1644 | |
1645 if own_fhd: | |
1646 fhd.close() | |
1647 | |
1648 # Upgrade the converters (if needed) | |
1649 if dtype is None: | |
1650 for (i, converter) in enumerate(converters): | |
1651 current_column = [itemgetter(i)(_m) for _m in rows] | |
1652 try: | |
1653 converter.iterupgrade(current_column) | |
1654 except ConverterLockError: | |
1655 errmsg = "Converter #%i is locked and cannot be upgraded: " % i | |
1656 current_column = map(itemgetter(i), rows) | |
1657 for (j, value) in enumerate(current_column): | |
1658 try: | |
1659 converter.upgrade(value) | |
1660 except (ConverterError, ValueError): | |
1661 errmsg += "(occurred line #%i for value '%s')" | |
1662 errmsg %= (j + 1 + skip_header, value) | |
1663 raise ConverterError(errmsg) | |
1664 | |
1665 # Check that we don't have invalid values | |
1666 nbinvalid = len(invalid) | |
1667 if nbinvalid > 0: | |
1668 nbrows = len(rows) + nbinvalid - skip_footer | |
1669 # Construct the error message | |
1670 template = " Line #%%i (got %%i columns instead of %i)" % nbcols | |
1671 if skip_footer > 0: | |
1672 nbinvalid_skipped = len([_ for _ in invalid | |
1673 if _[0] > nbrows + skip_header]) | |
1674 invalid = invalid[:nbinvalid - nbinvalid_skipped] | |
1675 skip_footer -= nbinvalid_skipped | |
1676 # | |
1677 # nbrows -= skip_footer | |
1678 # errmsg = [template % (i, nb) | |
1679 # for (i, nb) in invalid if i < nbrows] | |
1680 # else: | |
1681 errmsg = [template % (i, nb) | |
1682 for (i, nb) in invalid] | |
1683 if len(errmsg): | |
1684 errmsg.insert(0, "Some errors were detected !") | |
1685 errmsg = "\n".join(errmsg) | |
1686 # Raise an exception ? | |
1687 if invalid_raise: | |
1688 raise ValueError(errmsg) | |
1689 # Issue a warning ? | |
1690 else: | |
1691 warnings.warn(errmsg, ConversionWarning) | |
1692 | |
1693 # Strip the last skip_footer data | |
1694 if skip_footer > 0: | |
1695 rows = rows[:-skip_footer] | |
1696 if usemask: | |
1697 masks = masks[:-skip_footer] | |
1698 | |
1699 # Convert each value according to the converter: | |
1700 # We want to modify the list in place to avoid creating a new one... | |
1701 if loose: | |
1702 rows = list( | |
1703 zip(*[[conv._loose_call(_r) for _r in map(itemgetter(i), rows)] | |
1704 for (i, conv) in enumerate(converters)])) | |
1705 else: | |
1706 rows = list( | |
1707 zip(*[[conv._strict_call(_r) for _r in map(itemgetter(i), rows)] | |
1708 for (i, conv) in enumerate(converters)])) | |
1709 | |
1710 # Reset the dtype | |
1711 data = rows | |
1712 if dtype is None: | |
1713 # Get the dtypes from the types of the converters | |
1714 column_types = [conv.type for conv in converters] | |
1715 # Find the columns with strings... | |
1716 strcolidx = [i for (i, v) in enumerate(column_types) | |
1717 if v in (type('S'), np.string_)] | |
1718 # ... and take the largest number of chars. | |
1719 for i in strcolidx: | |
1720 column_types[i] = "|S%i" % max(len(row[i]) for row in data) | |
1721 # | |
1722 if names is None: | |
1723 # If the dtype is uniform, don't define names, else use '' | |
1724 base = set([c.type for c in converters if c._checked]) | |
1725 if len(base) == 1: | |
1726 (ddtype, mdtype) = (list(base)[0], np.bool) | |
1727 else: | |
1728 ddtype = [(defaultfmt % i, dt) | |
1729 for (i, dt) in enumerate(column_types)] | |
1730 if usemask: | |
1731 mdtype = [(defaultfmt % i, np.bool) | |
1732 for (i, dt) in enumerate(column_types)] | |
1733 else: | |
1734 ddtype = list(zip(names, column_types)) | |
1735 mdtype = list(zip(names, [np.bool] * len(column_types))) | |
1736 output = np.array(data, dtype=ddtype) | |
1737 if usemask: | |
1738 outputmask = np.array(masks, dtype=mdtype) | |
1739 else: | |
1740 # Overwrite the initial dtype names if needed | |
1741 if names and dtype.names: | |
1742 dtype.names = names | |
1743 # Case 1. We have a structured type | |
1744 if len(dtype_flat) > 1: | |
1745 # Nested dtype, eg [('a', int), ('b', [('b0', int), ('b1', 'f4')])] | |
1746 # First, create the array using a flattened dtype: | |
1747 # [('a', int), ('b1', int), ('b2', float)] | |
1748 # Then, view the array using the specified dtype. | |
1749 if 'O' in (_.char for _ in dtype_flat): | |
1750 if has_nested_fields(dtype): | |
1751 raise NotImplementedError( | |
1752 "Nested fields involving objects are not supported...") | |
1753 else: | |
1754 output = np.array(data, dtype=dtype) | |
1755 else: | |
1756 rows = np.array(data, dtype=[('', _) for _ in dtype_flat]) | |
1757 output = rows.view(dtype) | |
1758 # Now, process the rowmasks the same way | |
1759 if usemask: | |
1760 rowmasks = np.array( | |
1761 masks, dtype=np.dtype([('', np.bool) for t in dtype_flat])) | |
1762 # Construct the new dtype | |
1763 mdtype = make_mask_descr(dtype) | |
1764 outputmask = rowmasks.view(mdtype) | |
1765 # Case #2. We have a basic dtype | |
1766 else: | |
1767 # We used some user-defined converters | |
1768 if user_converters: | |
1769 ishomogeneous = True | |
1770 descr = [] | |
1771 for i, ttype in enumerate([conv.type for conv in converters]): | |
1772 # Keep the dtype of the current converter | |
1773 if i in user_converters: | |
1774 ishomogeneous &= (ttype == dtype.type) | |
1775 if ttype == np.string_: | |
1776 ttype = "|S%i" % max(len(row[i]) for row in data) | |
1777 descr.append(('', ttype)) | |
1778 else: | |
1779 descr.append(('', dtype)) | |
1780 # So we changed the dtype ? | |
1781 if not ishomogeneous: | |
1782 # We have more than one field | |
1783 if len(descr) > 1: | |
1784 dtype = np.dtype(descr) | |
1785 # We have only one field: drop the name if not needed. | |
1786 else: | |
1787 dtype = np.dtype(ttype) | |
1788 # | |
1789 output = np.array(data, dtype) | |
1790 if usemask: | |
1791 if dtype.names: | |
1792 mdtype = [(_, np.bool) for _ in dtype.names] | |
1793 else: | |
1794 mdtype = np.bool | |
1795 outputmask = np.array(masks, dtype=mdtype) | |
1796 # Try to take care of the missing data we missed | |
1797 names = output.dtype.names | |
1798 if usemask and names: | |
1799 for (name, conv) in zip(names or (), converters): | |
1800 missing_values = [conv(_) for _ in conv.missing_values | |
1801 if _ != asbytes('')] | |
1802 for mval in missing_values: | |
1803 outputmask[name] |= (output[name] == mval) | |
1804 # Construct the final array | |
1805 if usemask: | |
1806 output = output.view(MaskedArray) | |
1807 output._mask = outputmask | |
1808 if unpack: | |
1809 return output.squeeze().T | |
1810 return output.squeeze() | |
1811 | |
1812 | |
1813 def ndfromtxt(fname, **kwargs): | |
1814 """ | |
1815 Load ASCII data stored in a file and return it as a single array. | |
1816 | |
1817 Parameters | |
1818 ---------- | |
1819 fname, kwargs : For a description of input parameters, see `genfromtxt`. | |
1820 | |
1821 See Also | |
1822 -------- | |
1823 numpy.genfromtxt : generic function. | |
1824 | |
1825 """ | |
1826 kwargs['usemask'] = False | |
1827 return genfromtxt(fname, **kwargs) | |
1828 | |
1829 | |
1830 def mafromtxt(fname, **kwargs): | |
1831 """ | |
1832 Load ASCII data stored in a text file and return a masked array. | |
1833 | |
1834 Parameters | |
1835 ---------- | |
1836 fname, kwargs : For a description of input parameters, see `genfromtxt`. | |
1837 | |
1838 See Also | |
1839 -------- | |
1840 numpy.genfromtxt : generic function to load ASCII data. | |
1841 | |
1842 """ | |
1843 kwargs['usemask'] = True | |
1844 return genfromtxt(fname, **kwargs) | |
1845 | |
1846 | |
1847 def recfromtxt(fname, **kwargs): | |
1848 """ | |
1849 Load ASCII data from a file and return it in a record array. | |
1850 | |
1851 If ``usemask=False`` a standard `recarray` is returned, | |
1852 if ``usemask=True`` a MaskedRecords array is returned. | |
1853 | |
1854 Parameters | |
1855 ---------- | |
1856 fname, kwargs : For a description of input parameters, see `genfromtxt`. | |
1857 | |
1858 See Also | |
1859 -------- | |
1860 numpy.genfromtxt : generic function | |
1861 | |
1862 Notes | |
1863 ----- | |
1864 By default, `dtype` is None, which means that the data-type of the output | |
1865 array will be determined from the data. | |
1866 | |
1867 """ | |
1868 kwargs.setdefault("dtype", None) | |
1869 usemask = kwargs.get('usemask', False) | |
1870 output = genfromtxt(fname, **kwargs) | |
1871 if usemask: | |
1872 from numpy.ma.mrecords import MaskedRecords | |
1873 output = output.view(MaskedRecords) | |
1874 else: | |
1875 output = output.view(np.recarray) | |
1876 return output | |
1877 | |
1878 | |
1879 def recfromcsv(fname, **kwargs): | |
1880 """ | |
1881 Load ASCII data stored in a comma-separated file. | |
1882 | |
1883 The returned array is a record array (if ``usemask=False``, see | |
1884 `recarray`) or a masked record array (if ``usemask=True``, | |
1885 see `ma.mrecords.MaskedRecords`). | |
1886 | |
1887 Parameters | |
1888 ---------- | |
1889 fname, kwargs : For a description of input parameters, see `genfromtxt`. | |
1890 | |
1891 See Also | |
1892 -------- | |
1893 numpy.genfromtxt : generic function to load ASCII data. | |
1894 | |
1895 Notes | |
1896 ----- | |
1897 By default, `dtype` is None, which means that the data-type of the output | |
1898 array will be determined from the data. | |
1899 | |
1900 """ | |
1901 # Set default kwargs for genfromtxt as relevant to csv import. | |
1902 kwargs.setdefault("case_sensitive", "lower") | |
1903 kwargs.setdefault("names", True) | |
1904 kwargs.setdefault("delimiter", ",") | |
1905 kwargs.setdefault("dtype", None) | |
1906 output = genfromtxt(fname, **kwargs) | |
1907 | |
1908 usemask = kwargs.get("usemask", False) | |
1909 if usemask: | |
1910 from numpy.ma.mrecords import MaskedRecords | |
1911 output = output.view(MaskedRecords) | |
1912 else: | |
1913 output = output.view(np.recarray) | |
1914 return output |