comparison DEPENDENCIES/mingw32/Python27/Lib/site-packages/numpy/lib/arrayterator.py @ 87:2a2c65a20a8b

Add Python libs and headers
author Chris Cannam
date Wed, 25 Feb 2015 14:05:22 +0000
parents
children
comparison
equal deleted inserted replaced
86:413a9d26189e 87:2a2c65a20a8b
1 """
2 A buffered iterator for big arrays.
3
4 This module solves the problem of iterating over a big file-based array
5 without having to read it into memory. The `Arrayterator` class wraps
6 an array object, and when iterated it will return sub-arrays with at most
7 a user-specified number of elements.
8
9 """
10 from __future__ import division, absolute_import, print_function
11
12 from operator import mul
13 from functools import reduce
14
15 from numpy.compat import long
16
17 __all__ = ['Arrayterator']
18
19
20 class Arrayterator(object):
21 """
22 Buffered iterator for big arrays.
23
24 `Arrayterator` creates a buffered iterator for reading big arrays in small
25 contiguous blocks. The class is useful for objects stored in the
26 file system. It allows iteration over the object *without* reading
27 everything in memory; instead, small blocks are read and iterated over.
28
29 `Arrayterator` can be used with any object that supports multidimensional
30 slices. This includes NumPy arrays, but also variables from
31 Scientific.IO.NetCDF or pynetcdf for example.
32
33 Parameters
34 ----------
35 var : array_like
36 The object to iterate over.
37 buf_size : int, optional
38 The buffer size. If `buf_size` is supplied, the maximum amount of
39 data that will be read into memory is `buf_size` elements.
40 Default is None, which will read as many element as possible
41 into memory.
42
43 Attributes
44 ----------
45 var
46 buf_size
47 start
48 stop
49 step
50 shape
51 flat
52
53 See Also
54 --------
55 ndenumerate : Multidimensional array iterator.
56 flatiter : Flat array iterator.
57 memmap : Create a memory-map to an array stored in a binary file on disk.
58
59 Notes
60 -----
61 The algorithm works by first finding a "running dimension", along which
62 the blocks will be extracted. Given an array of dimensions
63 ``(d1, d2, ..., dn)``, e.g. if `buf_size` is smaller than ``d1``, the
64 first dimension will be used. If, on the other hand,
65 ``d1 < buf_size < d1*d2`` the second dimension will be used, and so on.
66 Blocks are extracted along this dimension, and when the last block is
67 returned the process continues from the next dimension, until all
68 elements have been read.
69
70 Examples
71 --------
72 >>> import numpy as np
73 >>> a = np.arange(3 * 4 * 5 * 6).reshape(3, 4, 5, 6)
74 >>> a_itor = np.lib.arrayterator.Arrayterator(a, 2)
75 >>> a_itor.shape
76 (3, 4, 5, 6)
77
78 Now we can iterate over ``a_itor``, and it will return arrays of size
79 two. Since `buf_size` was smaller than any dimension, the first
80 dimension will be iterated over first:
81
82 >>> for subarr in a_itor:
83 ... if not subarr.all():
84 ... print subarr, subarr.shape
85 ...
86 [[[[0 1]]]] (1, 1, 1, 2)
87
88 """
89
90 def __init__(self, var, buf_size=None):
91 self.var = var
92 self.buf_size = buf_size
93
94 self.start = [0 for dim in var.shape]
95 self.stop = [dim for dim in var.shape]
96 self.step = [1 for dim in var.shape]
97
98 def __getattr__(self, attr):
99 return getattr(self.var, attr)
100
101 def __getitem__(self, index):
102 """
103 Return a new arrayterator.
104
105 """
106 # Fix index, handling ellipsis and incomplete slices.
107 if not isinstance(index, tuple):
108 index = (index,)
109 fixed = []
110 length, dims = len(index), len(self.shape)
111 for slice_ in index:
112 if slice_ is Ellipsis:
113 fixed.extend([slice(None)] * (dims-length+1))
114 length = len(fixed)
115 elif isinstance(slice_, (int, long)):
116 fixed.append(slice(slice_, slice_+1, 1))
117 else:
118 fixed.append(slice_)
119 index = tuple(fixed)
120 if len(index) < dims:
121 index += (slice(None),) * (dims-len(index))
122
123 # Return a new arrayterator object.
124 out = self.__class__(self.var, self.buf_size)
125 for i, (start, stop, step, slice_) in enumerate(
126 zip(self.start, self.stop, self.step, index)):
127 out.start[i] = start + (slice_.start or 0)
128 out.step[i] = step * (slice_.step or 1)
129 out.stop[i] = start + (slice_.stop or stop-start)
130 out.stop[i] = min(stop, out.stop[i])
131 return out
132
133 def __array__(self):
134 """
135 Return corresponding data.
136
137 """
138 slice_ = tuple(slice(*t) for t in zip(
139 self.start, self.stop, self.step))
140 return self.var[slice_]
141
142 @property
143 def flat(self):
144 """
145 A 1-D flat iterator for Arrayterator objects.
146
147 This iterator returns elements of the array to be iterated over in
148 `Arrayterator` one by one. It is similar to `flatiter`.
149
150 See Also
151 --------
152 `Arrayterator`
153 flatiter
154
155 Examples
156 --------
157 >>> a = np.arange(3 * 4 * 5 * 6).reshape(3, 4, 5, 6)
158 >>> a_itor = np.lib.arrayterator.Arrayterator(a, 2)
159
160 >>> for subarr in a_itor.flat:
161 ... if not subarr:
162 ... print subarr, type(subarr)
163 ...
164 0 <type 'numpy.int32'>
165
166 """
167 for block in self:
168 for value in block.flat:
169 yield value
170
171 @property
172 def shape(self):
173 """
174 The shape of the array to be iterated over.
175
176 For an example, see `Arrayterator`.
177
178 """
179 return tuple(((stop-start-1)//step+1) for start, stop, step in
180 zip(self.start, self.stop, self.step))
181
182 def __iter__(self):
183 # Skip arrays with degenerate dimensions
184 if [dim for dim in self.shape if dim <= 0]:
185 raise StopIteration
186
187 start = self.start[:]
188 stop = self.stop[:]
189 step = self.step[:]
190 ndims = len(self.var.shape)
191
192 while True:
193 count = self.buf_size or reduce(mul, self.shape)
194
195 # iterate over each dimension, looking for the
196 # running dimension (ie, the dimension along which
197 # the blocks will be built from)
198 rundim = 0
199 for i in range(ndims-1, -1, -1):
200 # if count is zero we ran out of elements to read
201 # along higher dimensions, so we read only a single position
202 if count == 0:
203 stop[i] = start[i]+1
204 elif count <= self.shape[i]:
205 # limit along this dimension
206 stop[i] = start[i] + count*step[i]
207 rundim = i
208 else:
209 # read everything along this dimension
210 stop[i] = self.stop[i]
211 stop[i] = min(self.stop[i], stop[i])
212 count = count//self.shape[i]
213
214 # yield a block
215 slice_ = tuple(slice(*t) for t in zip(start, stop, step))
216 yield self.var[slice_]
217
218 # Update start position, taking care of overflow to
219 # other dimensions
220 start[rundim] = stop[rundim] # start where we stopped
221 for i in range(ndims-1, 0, -1):
222 if start[i] >= self.stop[i]:
223 start[i] = self.start[i]
224 start[i-1] += self.step[i-1]
225 if start[0] >= self.stop[0]:
226 raise StopIteration