Mercurial > hg > vamp-build-and-test
comparison DEPENDENCIES/mingw32/Python27/Lib/site-packages/numpy/lib/arrayterator.py @ 87:2a2c65a20a8b
Add Python libs and headers
author | Chris Cannam |
---|---|
date | Wed, 25 Feb 2015 14:05:22 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
86:413a9d26189e | 87:2a2c65a20a8b |
---|---|
1 """ | |
2 A buffered iterator for big arrays. | |
3 | |
4 This module solves the problem of iterating over a big file-based array | |
5 without having to read it into memory. The `Arrayterator` class wraps | |
6 an array object, and when iterated it will return sub-arrays with at most | |
7 a user-specified number of elements. | |
8 | |
9 """ | |
10 from __future__ import division, absolute_import, print_function | |
11 | |
12 from operator import mul | |
13 from functools import reduce | |
14 | |
15 from numpy.compat import long | |
16 | |
17 __all__ = ['Arrayterator'] | |
18 | |
19 | |
20 class Arrayterator(object): | |
21 """ | |
22 Buffered iterator for big arrays. | |
23 | |
24 `Arrayterator` creates a buffered iterator for reading big arrays in small | |
25 contiguous blocks. The class is useful for objects stored in the | |
26 file system. It allows iteration over the object *without* reading | |
27 everything in memory; instead, small blocks are read and iterated over. | |
28 | |
29 `Arrayterator` can be used with any object that supports multidimensional | |
30 slices. This includes NumPy arrays, but also variables from | |
31 Scientific.IO.NetCDF or pynetcdf for example. | |
32 | |
33 Parameters | |
34 ---------- | |
35 var : array_like | |
36 The object to iterate over. | |
37 buf_size : int, optional | |
38 The buffer size. If `buf_size` is supplied, the maximum amount of | |
39 data that will be read into memory is `buf_size` elements. | |
40 Default is None, which will read as many element as possible | |
41 into memory. | |
42 | |
43 Attributes | |
44 ---------- | |
45 var | |
46 buf_size | |
47 start | |
48 stop | |
49 step | |
50 shape | |
51 flat | |
52 | |
53 See Also | |
54 -------- | |
55 ndenumerate : Multidimensional array iterator. | |
56 flatiter : Flat array iterator. | |
57 memmap : Create a memory-map to an array stored in a binary file on disk. | |
58 | |
59 Notes | |
60 ----- | |
61 The algorithm works by first finding a "running dimension", along which | |
62 the blocks will be extracted. Given an array of dimensions | |
63 ``(d1, d2, ..., dn)``, e.g. if `buf_size` is smaller than ``d1``, the | |
64 first dimension will be used. If, on the other hand, | |
65 ``d1 < buf_size < d1*d2`` the second dimension will be used, and so on. | |
66 Blocks are extracted along this dimension, and when the last block is | |
67 returned the process continues from the next dimension, until all | |
68 elements have been read. | |
69 | |
70 Examples | |
71 -------- | |
72 >>> import numpy as np | |
73 >>> a = np.arange(3 * 4 * 5 * 6).reshape(3, 4, 5, 6) | |
74 >>> a_itor = np.lib.arrayterator.Arrayterator(a, 2) | |
75 >>> a_itor.shape | |
76 (3, 4, 5, 6) | |
77 | |
78 Now we can iterate over ``a_itor``, and it will return arrays of size | |
79 two. Since `buf_size` was smaller than any dimension, the first | |
80 dimension will be iterated over first: | |
81 | |
82 >>> for subarr in a_itor: | |
83 ... if not subarr.all(): | |
84 ... print subarr, subarr.shape | |
85 ... | |
86 [[[[0 1]]]] (1, 1, 1, 2) | |
87 | |
88 """ | |
89 | |
90 def __init__(self, var, buf_size=None): | |
91 self.var = var | |
92 self.buf_size = buf_size | |
93 | |
94 self.start = [0 for dim in var.shape] | |
95 self.stop = [dim for dim in var.shape] | |
96 self.step = [1 for dim in var.shape] | |
97 | |
98 def __getattr__(self, attr): | |
99 return getattr(self.var, attr) | |
100 | |
101 def __getitem__(self, index): | |
102 """ | |
103 Return a new arrayterator. | |
104 | |
105 """ | |
106 # Fix index, handling ellipsis and incomplete slices. | |
107 if not isinstance(index, tuple): | |
108 index = (index,) | |
109 fixed = [] | |
110 length, dims = len(index), len(self.shape) | |
111 for slice_ in index: | |
112 if slice_ is Ellipsis: | |
113 fixed.extend([slice(None)] * (dims-length+1)) | |
114 length = len(fixed) | |
115 elif isinstance(slice_, (int, long)): | |
116 fixed.append(slice(slice_, slice_+1, 1)) | |
117 else: | |
118 fixed.append(slice_) | |
119 index = tuple(fixed) | |
120 if len(index) < dims: | |
121 index += (slice(None),) * (dims-len(index)) | |
122 | |
123 # Return a new arrayterator object. | |
124 out = self.__class__(self.var, self.buf_size) | |
125 for i, (start, stop, step, slice_) in enumerate( | |
126 zip(self.start, self.stop, self.step, index)): | |
127 out.start[i] = start + (slice_.start or 0) | |
128 out.step[i] = step * (slice_.step or 1) | |
129 out.stop[i] = start + (slice_.stop or stop-start) | |
130 out.stop[i] = min(stop, out.stop[i]) | |
131 return out | |
132 | |
133 def __array__(self): | |
134 """ | |
135 Return corresponding data. | |
136 | |
137 """ | |
138 slice_ = tuple(slice(*t) for t in zip( | |
139 self.start, self.stop, self.step)) | |
140 return self.var[slice_] | |
141 | |
142 @property | |
143 def flat(self): | |
144 """ | |
145 A 1-D flat iterator for Arrayterator objects. | |
146 | |
147 This iterator returns elements of the array to be iterated over in | |
148 `Arrayterator` one by one. It is similar to `flatiter`. | |
149 | |
150 See Also | |
151 -------- | |
152 `Arrayterator` | |
153 flatiter | |
154 | |
155 Examples | |
156 -------- | |
157 >>> a = np.arange(3 * 4 * 5 * 6).reshape(3, 4, 5, 6) | |
158 >>> a_itor = np.lib.arrayterator.Arrayterator(a, 2) | |
159 | |
160 >>> for subarr in a_itor.flat: | |
161 ... if not subarr: | |
162 ... print subarr, type(subarr) | |
163 ... | |
164 0 <type 'numpy.int32'> | |
165 | |
166 """ | |
167 for block in self: | |
168 for value in block.flat: | |
169 yield value | |
170 | |
171 @property | |
172 def shape(self): | |
173 """ | |
174 The shape of the array to be iterated over. | |
175 | |
176 For an example, see `Arrayterator`. | |
177 | |
178 """ | |
179 return tuple(((stop-start-1)//step+1) for start, stop, step in | |
180 zip(self.start, self.stop, self.step)) | |
181 | |
182 def __iter__(self): | |
183 # Skip arrays with degenerate dimensions | |
184 if [dim for dim in self.shape if dim <= 0]: | |
185 raise StopIteration | |
186 | |
187 start = self.start[:] | |
188 stop = self.stop[:] | |
189 step = self.step[:] | |
190 ndims = len(self.var.shape) | |
191 | |
192 while True: | |
193 count = self.buf_size or reduce(mul, self.shape) | |
194 | |
195 # iterate over each dimension, looking for the | |
196 # running dimension (ie, the dimension along which | |
197 # the blocks will be built from) | |
198 rundim = 0 | |
199 for i in range(ndims-1, -1, -1): | |
200 # if count is zero we ran out of elements to read | |
201 # along higher dimensions, so we read only a single position | |
202 if count == 0: | |
203 stop[i] = start[i]+1 | |
204 elif count <= self.shape[i]: | |
205 # limit along this dimension | |
206 stop[i] = start[i] + count*step[i] | |
207 rundim = i | |
208 else: | |
209 # read everything along this dimension | |
210 stop[i] = self.stop[i] | |
211 stop[i] = min(self.stop[i], stop[i]) | |
212 count = count//self.shape[i] | |
213 | |
214 # yield a block | |
215 slice_ = tuple(slice(*t) for t in zip(start, stop, step)) | |
216 yield self.var[slice_] | |
217 | |
218 # Update start position, taking care of overflow to | |
219 # other dimensions | |
220 start[rundim] = stop[rundim] # start where we stopped | |
221 for i in range(ndims-1, 0, -1): | |
222 if start[i] >= self.stop[i]: | |
223 start[i] = self.start[i] | |
224 start[i-1] += self.step[i-1] | |
225 if start[0] >= self.stop[0]: | |
226 raise StopIteration |