Chris@87
|
1 """
|
Chris@87
|
2 Functions that ignore NaN.
|
Chris@87
|
3
|
Chris@87
|
4 Functions
|
Chris@87
|
5 ---------
|
Chris@87
|
6
|
Chris@87
|
7 - `nanmin` -- minimum non-NaN value
|
Chris@87
|
8 - `nanmax` -- maximum non-NaN value
|
Chris@87
|
9 - `nanargmin` -- index of minimum non-NaN value
|
Chris@87
|
10 - `nanargmax` -- index of maximum non-NaN value
|
Chris@87
|
11 - `nansum` -- sum of non-NaN values
|
Chris@87
|
12 - `nanmean` -- mean of non-NaN values
|
Chris@87
|
13 - `nanvar` -- variance of non-NaN values
|
Chris@87
|
14 - `nanstd` -- standard deviation of non-NaN values
|
Chris@87
|
15
|
Chris@87
|
16 """
|
Chris@87
|
17 from __future__ import division, absolute_import, print_function
|
Chris@87
|
18
|
Chris@87
|
19 import warnings
|
Chris@87
|
20 import numpy as np
|
Chris@87
|
21 from numpy.lib.function_base import _ureduce as _ureduce
|
Chris@87
|
22
|
Chris@87
|
23 __all__ = [
|
Chris@87
|
24 'nansum', 'nanmax', 'nanmin', 'nanargmax', 'nanargmin', 'nanmean',
|
Chris@87
|
25 'nanmedian', 'nanpercentile', 'nanvar', 'nanstd'
|
Chris@87
|
26 ]
|
Chris@87
|
27
|
Chris@87
|
28
|
Chris@87
|
29 def _replace_nan(a, val):
|
Chris@87
|
30 """
|
Chris@87
|
31 If `a` is of inexact type, make a copy of `a`, replace NaNs with
|
Chris@87
|
32 the `val` value, and return the copy together with a boolean mask
|
Chris@87
|
33 marking the locations where NaNs were present. If `a` is not of
|
Chris@87
|
34 inexact type, do nothing and return `a` together with a mask of None.
|
Chris@87
|
35
|
Chris@87
|
36 Parameters
|
Chris@87
|
37 ----------
|
Chris@87
|
38 a : array-like
|
Chris@87
|
39 Input array.
|
Chris@87
|
40 val : float
|
Chris@87
|
41 NaN values are set to val before doing the operation.
|
Chris@87
|
42
|
Chris@87
|
43 Returns
|
Chris@87
|
44 -------
|
Chris@87
|
45 y : ndarray
|
Chris@87
|
46 If `a` is of inexact type, return a copy of `a` with the NaNs
|
Chris@87
|
47 replaced by the fill value, otherwise return `a`.
|
Chris@87
|
48 mask: {bool, None}
|
Chris@87
|
49 If `a` is of inexact type, return a boolean mask marking locations of
|
Chris@87
|
50 NaNs, otherwise return None.
|
Chris@87
|
51
|
Chris@87
|
52 """
|
Chris@87
|
53 is_new = not isinstance(a, np.ndarray)
|
Chris@87
|
54 if is_new:
|
Chris@87
|
55 a = np.array(a)
|
Chris@87
|
56 if not issubclass(a.dtype.type, np.inexact):
|
Chris@87
|
57 return a, None
|
Chris@87
|
58 if not is_new:
|
Chris@87
|
59 # need copy
|
Chris@87
|
60 a = np.array(a, subok=True)
|
Chris@87
|
61
|
Chris@87
|
62 mask = np.isnan(a)
|
Chris@87
|
63 np.copyto(a, val, where=mask)
|
Chris@87
|
64 return a, mask
|
Chris@87
|
65
|
Chris@87
|
66
|
Chris@87
|
67 def _copyto(a, val, mask):
|
Chris@87
|
68 """
|
Chris@87
|
69 Replace values in `a` with NaN where `mask` is True. This differs from
|
Chris@87
|
70 copyto in that it will deal with the case where `a` is a numpy scalar.
|
Chris@87
|
71
|
Chris@87
|
72 Parameters
|
Chris@87
|
73 ----------
|
Chris@87
|
74 a : ndarray or numpy scalar
|
Chris@87
|
75 Array or numpy scalar some of whose values are to be replaced
|
Chris@87
|
76 by val.
|
Chris@87
|
77 val : numpy scalar
|
Chris@87
|
78 Value used a replacement.
|
Chris@87
|
79 mask : ndarray, scalar
|
Chris@87
|
80 Boolean array. Where True the corresponding element of `a` is
|
Chris@87
|
81 replaced by `val`. Broadcasts.
|
Chris@87
|
82
|
Chris@87
|
83 Returns
|
Chris@87
|
84 -------
|
Chris@87
|
85 res : ndarray, scalar
|
Chris@87
|
86 Array with elements replaced or scalar `val`.
|
Chris@87
|
87
|
Chris@87
|
88 """
|
Chris@87
|
89 if isinstance(a, np.ndarray):
|
Chris@87
|
90 np.copyto(a, val, where=mask, casting='unsafe')
|
Chris@87
|
91 else:
|
Chris@87
|
92 a = a.dtype.type(val)
|
Chris@87
|
93 return a
|
Chris@87
|
94
|
Chris@87
|
95
|
Chris@87
|
96 def _divide_by_count(a, b, out=None):
|
Chris@87
|
97 """
|
Chris@87
|
98 Compute a/b ignoring invalid results. If `a` is an array the division
|
Chris@87
|
99 is done in place. If `a` is a scalar, then its type is preserved in the
|
Chris@87
|
100 output. If out is None, then then a is used instead so that the
|
Chris@87
|
101 division is in place. Note that this is only called with `a` an inexact
|
Chris@87
|
102 type.
|
Chris@87
|
103
|
Chris@87
|
104 Parameters
|
Chris@87
|
105 ----------
|
Chris@87
|
106 a : {ndarray, numpy scalar}
|
Chris@87
|
107 Numerator. Expected to be of inexact type but not checked.
|
Chris@87
|
108 b : {ndarray, numpy scalar}
|
Chris@87
|
109 Denominator.
|
Chris@87
|
110 out : ndarray, optional
|
Chris@87
|
111 Alternate output array in which to place the result. The default
|
Chris@87
|
112 is ``None``; if provided, it must have the same shape as the
|
Chris@87
|
113 expected output, but the type will be cast if necessary.
|
Chris@87
|
114
|
Chris@87
|
115 Returns
|
Chris@87
|
116 -------
|
Chris@87
|
117 ret : {ndarray, numpy scalar}
|
Chris@87
|
118 The return value is a/b. If `a` was an ndarray the division is done
|
Chris@87
|
119 in place. If `a` is a numpy scalar, the division preserves its type.
|
Chris@87
|
120
|
Chris@87
|
121 """
|
Chris@87
|
122 with np.errstate(invalid='ignore'):
|
Chris@87
|
123 if isinstance(a, np.ndarray):
|
Chris@87
|
124 if out is None:
|
Chris@87
|
125 return np.divide(a, b, out=a, casting='unsafe')
|
Chris@87
|
126 else:
|
Chris@87
|
127 return np.divide(a, b, out=out, casting='unsafe')
|
Chris@87
|
128 else:
|
Chris@87
|
129 if out is None:
|
Chris@87
|
130 return a.dtype.type(a / b)
|
Chris@87
|
131 else:
|
Chris@87
|
132 # This is questionable, but currently a numpy scalar can
|
Chris@87
|
133 # be output to a zero dimensional array.
|
Chris@87
|
134 return np.divide(a, b, out=out, casting='unsafe')
|
Chris@87
|
135
|
Chris@87
|
136
|
Chris@87
|
137 def nanmin(a, axis=None, out=None, keepdims=False):
|
Chris@87
|
138 """
|
Chris@87
|
139 Return minimum of an array or minimum along an axis, ignoring any NaNs.
|
Chris@87
|
140 When all-NaN slices are encountered a ``RuntimeWarning`` is raised and
|
Chris@87
|
141 Nan is returned for that slice.
|
Chris@87
|
142
|
Chris@87
|
143 Parameters
|
Chris@87
|
144 ----------
|
Chris@87
|
145 a : array_like
|
Chris@87
|
146 Array containing numbers whose minimum is desired. If `a` is not an
|
Chris@87
|
147 array, a conversion is attempted.
|
Chris@87
|
148 axis : int, optional
|
Chris@87
|
149 Axis along which the minimum is computed. The default is to compute
|
Chris@87
|
150 the minimum of the flattened array.
|
Chris@87
|
151 out : ndarray, optional
|
Chris@87
|
152 Alternate output array in which to place the result. The default
|
Chris@87
|
153 is ``None``; if provided, it must have the same shape as the
|
Chris@87
|
154 expected output, but the type will be cast if necessary. See
|
Chris@87
|
155 `doc.ufuncs` for details.
|
Chris@87
|
156
|
Chris@87
|
157 .. versionadded:: 1.8.0
|
Chris@87
|
158 keepdims : bool, optional
|
Chris@87
|
159 If this is set to True, the axes which are reduced are left in the
|
Chris@87
|
160 result as dimensions with size one. With this option, the result
|
Chris@87
|
161 will broadcast correctly against the original `a`.
|
Chris@87
|
162
|
Chris@87
|
163 .. versionadded:: 1.8.0
|
Chris@87
|
164
|
Chris@87
|
165 Returns
|
Chris@87
|
166 -------
|
Chris@87
|
167 nanmin : ndarray
|
Chris@87
|
168 An array with the same shape as `a`, with the specified axis
|
Chris@87
|
169 removed. If `a` is a 0-d array, or if axis is None, an ndarray
|
Chris@87
|
170 scalar is returned. The same dtype as `a` is returned.
|
Chris@87
|
171
|
Chris@87
|
172 See Also
|
Chris@87
|
173 --------
|
Chris@87
|
174 nanmax :
|
Chris@87
|
175 The maximum value of an array along a given axis, ignoring any NaNs.
|
Chris@87
|
176 amin :
|
Chris@87
|
177 The minimum value of an array along a given axis, propagating any NaNs.
|
Chris@87
|
178 fmin :
|
Chris@87
|
179 Element-wise minimum of two arrays, ignoring any NaNs.
|
Chris@87
|
180 minimum :
|
Chris@87
|
181 Element-wise minimum of two arrays, propagating any NaNs.
|
Chris@87
|
182 isnan :
|
Chris@87
|
183 Shows which elements are Not a Number (NaN).
|
Chris@87
|
184 isfinite:
|
Chris@87
|
185 Shows which elements are neither NaN nor infinity.
|
Chris@87
|
186
|
Chris@87
|
187 amax, fmax, maximum
|
Chris@87
|
188
|
Chris@87
|
189 Notes
|
Chris@87
|
190 -----
|
Chris@87
|
191 Numpy uses the IEEE Standard for Binary Floating-Point for Arithmetic
|
Chris@87
|
192 (IEEE 754). This means that Not a Number is not equivalent to infinity.
|
Chris@87
|
193 Positive infinity is treated as a very large number and negative
|
Chris@87
|
194 infinity is treated as a very small (i.e. negative) number.
|
Chris@87
|
195
|
Chris@87
|
196 If the input has a integer type the function is equivalent to np.min.
|
Chris@87
|
197
|
Chris@87
|
198 Examples
|
Chris@87
|
199 --------
|
Chris@87
|
200 >>> a = np.array([[1, 2], [3, np.nan]])
|
Chris@87
|
201 >>> np.nanmin(a)
|
Chris@87
|
202 1.0
|
Chris@87
|
203 >>> np.nanmin(a, axis=0)
|
Chris@87
|
204 array([ 1., 2.])
|
Chris@87
|
205 >>> np.nanmin(a, axis=1)
|
Chris@87
|
206 array([ 1., 3.])
|
Chris@87
|
207
|
Chris@87
|
208 When positive infinity and negative infinity are present:
|
Chris@87
|
209
|
Chris@87
|
210 >>> np.nanmin([1, 2, np.nan, np.inf])
|
Chris@87
|
211 1.0
|
Chris@87
|
212 >>> np.nanmin([1, 2, np.nan, np.NINF])
|
Chris@87
|
213 -inf
|
Chris@87
|
214
|
Chris@87
|
215 """
|
Chris@87
|
216 if not isinstance(a, np.ndarray) or type(a) is np.ndarray:
|
Chris@87
|
217 # Fast, but not safe for subclasses of ndarray
|
Chris@87
|
218 res = np.fmin.reduce(a, axis=axis, out=out, keepdims=keepdims)
|
Chris@87
|
219 if np.isnan(res).any():
|
Chris@87
|
220 warnings.warn("All-NaN axis encountered", RuntimeWarning)
|
Chris@87
|
221 else:
|
Chris@87
|
222 # Slow, but safe for subclasses of ndarray
|
Chris@87
|
223 a, mask = _replace_nan(a, +np.inf)
|
Chris@87
|
224 res = np.amin(a, axis=axis, out=out, keepdims=keepdims)
|
Chris@87
|
225 if mask is None:
|
Chris@87
|
226 return res
|
Chris@87
|
227
|
Chris@87
|
228 # Check for all-NaN axis
|
Chris@87
|
229 mask = np.all(mask, axis=axis, keepdims=keepdims)
|
Chris@87
|
230 if np.any(mask):
|
Chris@87
|
231 res = _copyto(res, np.nan, mask)
|
Chris@87
|
232 warnings.warn("All-NaN axis encountered", RuntimeWarning)
|
Chris@87
|
233 return res
|
Chris@87
|
234
|
Chris@87
|
235
|
Chris@87
|
236 def nanmax(a, axis=None, out=None, keepdims=False):
|
Chris@87
|
237 """
|
Chris@87
|
238 Return the maximum of an array or maximum along an axis, ignoring any
|
Chris@87
|
239 NaNs. When all-NaN slices are encountered a ``RuntimeWarning`` is
|
Chris@87
|
240 raised and NaN is returned for that slice.
|
Chris@87
|
241
|
Chris@87
|
242 Parameters
|
Chris@87
|
243 ----------
|
Chris@87
|
244 a : array_like
|
Chris@87
|
245 Array containing numbers whose maximum is desired. If `a` is not an
|
Chris@87
|
246 array, a conversion is attempted.
|
Chris@87
|
247 axis : int, optional
|
Chris@87
|
248 Axis along which the maximum is computed. The default is to compute
|
Chris@87
|
249 the maximum of the flattened array.
|
Chris@87
|
250 out : ndarray, optional
|
Chris@87
|
251 Alternate output array in which to place the result. The default
|
Chris@87
|
252 is ``None``; if provided, it must have the same shape as the
|
Chris@87
|
253 expected output, but the type will be cast if necessary. See
|
Chris@87
|
254 `doc.ufuncs` for details.
|
Chris@87
|
255
|
Chris@87
|
256 .. versionadded:: 1.8.0
|
Chris@87
|
257 keepdims : bool, optional
|
Chris@87
|
258 If this is set to True, the axes which are reduced are left in the
|
Chris@87
|
259 result as dimensions with size one. With this option, the result
|
Chris@87
|
260 will broadcast correctly against the original `a`.
|
Chris@87
|
261
|
Chris@87
|
262 .. versionadded:: 1.8.0
|
Chris@87
|
263
|
Chris@87
|
264 Returns
|
Chris@87
|
265 -------
|
Chris@87
|
266 nanmax : ndarray
|
Chris@87
|
267 An array with the same shape as `a`, with the specified axis removed.
|
Chris@87
|
268 If `a` is a 0-d array, or if axis is None, an ndarray scalar is
|
Chris@87
|
269 returned. The same dtype as `a` is returned.
|
Chris@87
|
270
|
Chris@87
|
271 See Also
|
Chris@87
|
272 --------
|
Chris@87
|
273 nanmin :
|
Chris@87
|
274 The minimum value of an array along a given axis, ignoring any NaNs.
|
Chris@87
|
275 amax :
|
Chris@87
|
276 The maximum value of an array along a given axis, propagating any NaNs.
|
Chris@87
|
277 fmax :
|
Chris@87
|
278 Element-wise maximum of two arrays, ignoring any NaNs.
|
Chris@87
|
279 maximum :
|
Chris@87
|
280 Element-wise maximum of two arrays, propagating any NaNs.
|
Chris@87
|
281 isnan :
|
Chris@87
|
282 Shows which elements are Not a Number (NaN).
|
Chris@87
|
283 isfinite:
|
Chris@87
|
284 Shows which elements are neither NaN nor infinity.
|
Chris@87
|
285
|
Chris@87
|
286 amin, fmin, minimum
|
Chris@87
|
287
|
Chris@87
|
288 Notes
|
Chris@87
|
289 -----
|
Chris@87
|
290 Numpy uses the IEEE Standard for Binary Floating-Point for Arithmetic
|
Chris@87
|
291 (IEEE 754). This means that Not a Number is not equivalent to infinity.
|
Chris@87
|
292 Positive infinity is treated as a very large number and negative
|
Chris@87
|
293 infinity is treated as a very small (i.e. negative) number.
|
Chris@87
|
294
|
Chris@87
|
295 If the input has a integer type the function is equivalent to np.max.
|
Chris@87
|
296
|
Chris@87
|
297 Examples
|
Chris@87
|
298 --------
|
Chris@87
|
299 >>> a = np.array([[1, 2], [3, np.nan]])
|
Chris@87
|
300 >>> np.nanmax(a)
|
Chris@87
|
301 3.0
|
Chris@87
|
302 >>> np.nanmax(a, axis=0)
|
Chris@87
|
303 array([ 3., 2.])
|
Chris@87
|
304 >>> np.nanmax(a, axis=1)
|
Chris@87
|
305 array([ 2., 3.])
|
Chris@87
|
306
|
Chris@87
|
307 When positive infinity and negative infinity are present:
|
Chris@87
|
308
|
Chris@87
|
309 >>> np.nanmax([1, 2, np.nan, np.NINF])
|
Chris@87
|
310 2.0
|
Chris@87
|
311 >>> np.nanmax([1, 2, np.nan, np.inf])
|
Chris@87
|
312 inf
|
Chris@87
|
313
|
Chris@87
|
314 """
|
Chris@87
|
315 if not isinstance(a, np.ndarray) or type(a) is np.ndarray:
|
Chris@87
|
316 # Fast, but not safe for subclasses of ndarray
|
Chris@87
|
317 res = np.fmax.reduce(a, axis=axis, out=out, keepdims=keepdims)
|
Chris@87
|
318 if np.isnan(res).any():
|
Chris@87
|
319 warnings.warn("All-NaN slice encountered", RuntimeWarning)
|
Chris@87
|
320 else:
|
Chris@87
|
321 # Slow, but safe for subclasses of ndarray
|
Chris@87
|
322 a, mask = _replace_nan(a, -np.inf)
|
Chris@87
|
323 res = np.amax(a, axis=axis, out=out, keepdims=keepdims)
|
Chris@87
|
324 if mask is None:
|
Chris@87
|
325 return res
|
Chris@87
|
326
|
Chris@87
|
327 # Check for all-NaN axis
|
Chris@87
|
328 mask = np.all(mask, axis=axis, keepdims=keepdims)
|
Chris@87
|
329 if np.any(mask):
|
Chris@87
|
330 res = _copyto(res, np.nan, mask)
|
Chris@87
|
331 warnings.warn("All-NaN axis encountered", RuntimeWarning)
|
Chris@87
|
332 return res
|
Chris@87
|
333
|
Chris@87
|
334
|
Chris@87
|
335 def nanargmin(a, axis=None):
|
Chris@87
|
336 """
|
Chris@87
|
337 Return the indices of the minimum values in the specified axis ignoring
|
Chris@87
|
338 NaNs. For all-NaN slices ``ValueError`` is raised. Warning: the results
|
Chris@87
|
339 cannot be trusted if a slice contains only NaNs and Infs.
|
Chris@87
|
340
|
Chris@87
|
341 Parameters
|
Chris@87
|
342 ----------
|
Chris@87
|
343 a : array_like
|
Chris@87
|
344 Input data.
|
Chris@87
|
345 axis : int, optional
|
Chris@87
|
346 Axis along which to operate. By default flattened input is used.
|
Chris@87
|
347
|
Chris@87
|
348 Returns
|
Chris@87
|
349 -------
|
Chris@87
|
350 index_array : ndarray
|
Chris@87
|
351 An array of indices or a single index value.
|
Chris@87
|
352
|
Chris@87
|
353 See Also
|
Chris@87
|
354 --------
|
Chris@87
|
355 argmin, nanargmax
|
Chris@87
|
356
|
Chris@87
|
357 Examples
|
Chris@87
|
358 --------
|
Chris@87
|
359 >>> a = np.array([[np.nan, 4], [2, 3]])
|
Chris@87
|
360 >>> np.argmin(a)
|
Chris@87
|
361 0
|
Chris@87
|
362 >>> np.nanargmin(a)
|
Chris@87
|
363 2
|
Chris@87
|
364 >>> np.nanargmin(a, axis=0)
|
Chris@87
|
365 array([1, 1])
|
Chris@87
|
366 >>> np.nanargmin(a, axis=1)
|
Chris@87
|
367 array([1, 0])
|
Chris@87
|
368
|
Chris@87
|
369 """
|
Chris@87
|
370 a, mask = _replace_nan(a, np.inf)
|
Chris@87
|
371 res = np.argmin(a, axis=axis)
|
Chris@87
|
372 if mask is not None:
|
Chris@87
|
373 mask = np.all(mask, axis=axis)
|
Chris@87
|
374 if np.any(mask):
|
Chris@87
|
375 raise ValueError("All-NaN slice encountered")
|
Chris@87
|
376 return res
|
Chris@87
|
377
|
Chris@87
|
378
|
Chris@87
|
379 def nanargmax(a, axis=None):
|
Chris@87
|
380 """
|
Chris@87
|
381 Return the indices of the maximum values in the specified axis ignoring
|
Chris@87
|
382 NaNs. For all-NaN slices ``ValueError`` is raised. Warning: the
|
Chris@87
|
383 results cannot be trusted if a slice contains only NaNs and -Infs.
|
Chris@87
|
384
|
Chris@87
|
385
|
Chris@87
|
386 Parameters
|
Chris@87
|
387 ----------
|
Chris@87
|
388 a : array_like
|
Chris@87
|
389 Input data.
|
Chris@87
|
390 axis : int, optional
|
Chris@87
|
391 Axis along which to operate. By default flattened input is used.
|
Chris@87
|
392
|
Chris@87
|
393 Returns
|
Chris@87
|
394 -------
|
Chris@87
|
395 index_array : ndarray
|
Chris@87
|
396 An array of indices or a single index value.
|
Chris@87
|
397
|
Chris@87
|
398 See Also
|
Chris@87
|
399 --------
|
Chris@87
|
400 argmax, nanargmin
|
Chris@87
|
401
|
Chris@87
|
402 Examples
|
Chris@87
|
403 --------
|
Chris@87
|
404 >>> a = np.array([[np.nan, 4], [2, 3]])
|
Chris@87
|
405 >>> np.argmax(a)
|
Chris@87
|
406 0
|
Chris@87
|
407 >>> np.nanargmax(a)
|
Chris@87
|
408 1
|
Chris@87
|
409 >>> np.nanargmax(a, axis=0)
|
Chris@87
|
410 array([1, 0])
|
Chris@87
|
411 >>> np.nanargmax(a, axis=1)
|
Chris@87
|
412 array([1, 1])
|
Chris@87
|
413
|
Chris@87
|
414 """
|
Chris@87
|
415 a, mask = _replace_nan(a, -np.inf)
|
Chris@87
|
416 res = np.argmax(a, axis=axis)
|
Chris@87
|
417 if mask is not None:
|
Chris@87
|
418 mask = np.all(mask, axis=axis)
|
Chris@87
|
419 if np.any(mask):
|
Chris@87
|
420 raise ValueError("All-NaN slice encountered")
|
Chris@87
|
421 return res
|
Chris@87
|
422
|
Chris@87
|
423
|
Chris@87
|
424 def nansum(a, axis=None, dtype=None, out=None, keepdims=0):
|
Chris@87
|
425 """
|
Chris@87
|
426 Return the sum of array elements over a given axis treating Not a
|
Chris@87
|
427 Numbers (NaNs) as zero.
|
Chris@87
|
428
|
Chris@87
|
429 In Numpy versions <= 1.8 Nan is returned for slices that are all-NaN or
|
Chris@87
|
430 empty. In later versions zero is returned.
|
Chris@87
|
431
|
Chris@87
|
432 Parameters
|
Chris@87
|
433 ----------
|
Chris@87
|
434 a : array_like
|
Chris@87
|
435 Array containing numbers whose sum is desired. If `a` is not an
|
Chris@87
|
436 array, a conversion is attempted.
|
Chris@87
|
437 axis : int, optional
|
Chris@87
|
438 Axis along which the sum is computed. The default is to compute the
|
Chris@87
|
439 sum of the flattened array.
|
Chris@87
|
440 dtype : data-type, optional
|
Chris@87
|
441 The type of the returned array and of the accumulator in which the
|
Chris@87
|
442 elements are summed. By default, the dtype of `a` is used. An
|
Chris@87
|
443 exception is when `a` has an integer type with less precision than
|
Chris@87
|
444 the platform (u)intp. In that case, the default will be either
|
Chris@87
|
445 (u)int32 or (u)int64 depending on whether the platform is 32 or 64
|
Chris@87
|
446 bits. For inexact inputs, dtype must be inexact.
|
Chris@87
|
447
|
Chris@87
|
448 .. versionadded:: 1.8.0
|
Chris@87
|
449 out : ndarray, optional
|
Chris@87
|
450 Alternate output array in which to place the result. The default
|
Chris@87
|
451 is ``None``. If provided, it must have the same shape as the
|
Chris@87
|
452 expected output, but the type will be cast if necessary. See
|
Chris@87
|
453 `doc.ufuncs` for details. The casting of NaN to integer can yield
|
Chris@87
|
454 unexpected results.
|
Chris@87
|
455
|
Chris@87
|
456 .. versionadded:: 1.8.0
|
Chris@87
|
457 keepdims : bool, optional
|
Chris@87
|
458 If True, the axes which are reduced are left in the result as
|
Chris@87
|
459 dimensions with size one. With this option, the result will
|
Chris@87
|
460 broadcast correctly against the original `arr`.
|
Chris@87
|
461
|
Chris@87
|
462 .. versionadded:: 1.8.0
|
Chris@87
|
463
|
Chris@87
|
464 Returns
|
Chris@87
|
465 -------
|
Chris@87
|
466 y : ndarray or numpy scalar
|
Chris@87
|
467
|
Chris@87
|
468 See Also
|
Chris@87
|
469 --------
|
Chris@87
|
470 numpy.sum : Sum across array propagating NaNs.
|
Chris@87
|
471 isnan : Show which elements are NaN.
|
Chris@87
|
472 isfinite: Show which elements are not NaN or +/-inf.
|
Chris@87
|
473
|
Chris@87
|
474 Notes
|
Chris@87
|
475 -----
|
Chris@87
|
476 If both positive and negative infinity are present, the sum will be Not
|
Chris@87
|
477 A Number (NaN).
|
Chris@87
|
478
|
Chris@87
|
479 Numpy integer arithmetic is modular. If the size of a sum exceeds the
|
Chris@87
|
480 size of an integer accumulator, its value will wrap around and the
|
Chris@87
|
481 result will be incorrect. Specifying ``dtype=double`` can alleviate
|
Chris@87
|
482 that problem.
|
Chris@87
|
483
|
Chris@87
|
484 Examples
|
Chris@87
|
485 --------
|
Chris@87
|
486 >>> np.nansum(1)
|
Chris@87
|
487 1
|
Chris@87
|
488 >>> np.nansum([1])
|
Chris@87
|
489 1
|
Chris@87
|
490 >>> np.nansum([1, np.nan])
|
Chris@87
|
491 1.0
|
Chris@87
|
492 >>> a = np.array([[1, 1], [1, np.nan]])
|
Chris@87
|
493 >>> np.nansum(a)
|
Chris@87
|
494 3.0
|
Chris@87
|
495 >>> np.nansum(a, axis=0)
|
Chris@87
|
496 array([ 2., 1.])
|
Chris@87
|
497 >>> np.nansum([1, np.nan, np.inf])
|
Chris@87
|
498 inf
|
Chris@87
|
499 >>> np.nansum([1, np.nan, np.NINF])
|
Chris@87
|
500 -inf
|
Chris@87
|
501 >>> np.nansum([1, np.nan, np.inf, -np.inf]) # both +/- infinity present
|
Chris@87
|
502 nan
|
Chris@87
|
503
|
Chris@87
|
504 """
|
Chris@87
|
505 a, mask = _replace_nan(a, 0)
|
Chris@87
|
506 return np.sum(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims)
|
Chris@87
|
507
|
Chris@87
|
508
|
Chris@87
|
509 def nanmean(a, axis=None, dtype=None, out=None, keepdims=False):
|
Chris@87
|
510 """
|
Chris@87
|
511 Compute the arithmetic mean along the specified axis, ignoring NaNs.
|
Chris@87
|
512
|
Chris@87
|
513 Returns the average of the array elements. The average is taken over
|
Chris@87
|
514 the flattened array by default, otherwise over the specified axis.
|
Chris@87
|
515 `float64` intermediate and return values are used for integer inputs.
|
Chris@87
|
516
|
Chris@87
|
517 For all-NaN slices, NaN is returned and a `RuntimeWarning` is raised.
|
Chris@87
|
518
|
Chris@87
|
519 .. versionadded:: 1.8.0
|
Chris@87
|
520
|
Chris@87
|
521 Parameters
|
Chris@87
|
522 ----------
|
Chris@87
|
523 a : array_like
|
Chris@87
|
524 Array containing numbers whose mean is desired. If `a` is not an
|
Chris@87
|
525 array, a conversion is attempted.
|
Chris@87
|
526 axis : int, optional
|
Chris@87
|
527 Axis along which the means are computed. The default is to compute
|
Chris@87
|
528 the mean of the flattened array.
|
Chris@87
|
529 dtype : data-type, optional
|
Chris@87
|
530 Type to use in computing the mean. For integer inputs, the default
|
Chris@87
|
531 is `float64`; for inexact inputs, it is the same as the input
|
Chris@87
|
532 dtype.
|
Chris@87
|
533 out : ndarray, optional
|
Chris@87
|
534 Alternate output array in which to place the result. The default
|
Chris@87
|
535 is ``None``; if provided, it must have the same shape as the
|
Chris@87
|
536 expected output, but the type will be cast if necessary. See
|
Chris@87
|
537 `doc.ufuncs` for details.
|
Chris@87
|
538 keepdims : bool, optional
|
Chris@87
|
539 If this is set to True, the axes which are reduced are left in the
|
Chris@87
|
540 result as dimensions with size one. With this option, the result
|
Chris@87
|
541 will broadcast correctly against the original `arr`.
|
Chris@87
|
542
|
Chris@87
|
543 Returns
|
Chris@87
|
544 -------
|
Chris@87
|
545 m : ndarray, see dtype parameter above
|
Chris@87
|
546 If `out=None`, returns a new array containing the mean values,
|
Chris@87
|
547 otherwise a reference to the output array is returned. Nan is
|
Chris@87
|
548 returned for slices that contain only NaNs.
|
Chris@87
|
549
|
Chris@87
|
550 See Also
|
Chris@87
|
551 --------
|
Chris@87
|
552 average : Weighted average
|
Chris@87
|
553 mean : Arithmetic mean taken while not ignoring NaNs
|
Chris@87
|
554 var, nanvar
|
Chris@87
|
555
|
Chris@87
|
556 Notes
|
Chris@87
|
557 -----
|
Chris@87
|
558 The arithmetic mean is the sum of the non-NaN elements along the axis
|
Chris@87
|
559 divided by the number of non-NaN elements.
|
Chris@87
|
560
|
Chris@87
|
561 Note that for floating-point input, the mean is computed using the same
|
Chris@87
|
562 precision the input has. Depending on the input data, this can cause
|
Chris@87
|
563 the results to be inaccurate, especially for `float32`. Specifying a
|
Chris@87
|
564 higher-precision accumulator using the `dtype` keyword can alleviate
|
Chris@87
|
565 this issue.
|
Chris@87
|
566
|
Chris@87
|
567 Examples
|
Chris@87
|
568 --------
|
Chris@87
|
569 >>> a = np.array([[1, np.nan], [3, 4]])
|
Chris@87
|
570 >>> np.nanmean(a)
|
Chris@87
|
571 2.6666666666666665
|
Chris@87
|
572 >>> np.nanmean(a, axis=0)
|
Chris@87
|
573 array([ 2., 4.])
|
Chris@87
|
574 >>> np.nanmean(a, axis=1)
|
Chris@87
|
575 array([ 1., 3.5])
|
Chris@87
|
576
|
Chris@87
|
577 """
|
Chris@87
|
578 arr, mask = _replace_nan(a, 0)
|
Chris@87
|
579 if mask is None:
|
Chris@87
|
580 return np.mean(arr, axis=axis, dtype=dtype, out=out, keepdims=keepdims)
|
Chris@87
|
581
|
Chris@87
|
582 if dtype is not None:
|
Chris@87
|
583 dtype = np.dtype(dtype)
|
Chris@87
|
584 if dtype is not None and not issubclass(dtype.type, np.inexact):
|
Chris@87
|
585 raise TypeError("If a is inexact, then dtype must be inexact")
|
Chris@87
|
586 if out is not None and not issubclass(out.dtype.type, np.inexact):
|
Chris@87
|
587 raise TypeError("If a is inexact, then out must be inexact")
|
Chris@87
|
588
|
Chris@87
|
589 # The warning context speeds things up.
|
Chris@87
|
590 with warnings.catch_warnings():
|
Chris@87
|
591 warnings.simplefilter('ignore')
|
Chris@87
|
592 cnt = np.sum(~mask, axis=axis, dtype=np.intp, keepdims=keepdims)
|
Chris@87
|
593 tot = np.sum(arr, axis=axis, dtype=dtype, out=out, keepdims=keepdims)
|
Chris@87
|
594 avg = _divide_by_count(tot, cnt, out=out)
|
Chris@87
|
595
|
Chris@87
|
596 isbad = (cnt == 0)
|
Chris@87
|
597 if isbad.any():
|
Chris@87
|
598 warnings.warn("Mean of empty slice", RuntimeWarning)
|
Chris@87
|
599 # NaN is the only possible bad value, so no further
|
Chris@87
|
600 # action is needed to handle bad results.
|
Chris@87
|
601 return avg
|
Chris@87
|
602
|
Chris@87
|
603
|
Chris@87
|
604 def _nanmedian1d(arr1d, overwrite_input=False):
|
Chris@87
|
605 """
|
Chris@87
|
606 Private function for rank 1 arrays. Compute the median ignoring NaNs.
|
Chris@87
|
607 See nanmedian for parameter usage
|
Chris@87
|
608 """
|
Chris@87
|
609 c = np.isnan(arr1d)
|
Chris@87
|
610 s = np.where(c)[0]
|
Chris@87
|
611 if s.size == arr1d.size:
|
Chris@87
|
612 warnings.warn("All-NaN slice encountered", RuntimeWarning)
|
Chris@87
|
613 return np.nan
|
Chris@87
|
614 elif s.size == 0:
|
Chris@87
|
615 return np.median(arr1d, overwrite_input=overwrite_input)
|
Chris@87
|
616 else:
|
Chris@87
|
617 if overwrite_input:
|
Chris@87
|
618 x = arr1d
|
Chris@87
|
619 else:
|
Chris@87
|
620 x = arr1d.copy()
|
Chris@87
|
621 # select non-nans at end of array
|
Chris@87
|
622 enonan = arr1d[-s.size:][~c[-s.size:]]
|
Chris@87
|
623 # fill nans in beginning of array with non-nans of end
|
Chris@87
|
624 x[s[:enonan.size]] = enonan
|
Chris@87
|
625 # slice nans away
|
Chris@87
|
626 return np.median(x[:-s.size], overwrite_input=True)
|
Chris@87
|
627
|
Chris@87
|
628
|
Chris@87
|
629 def _nanmedian(a, axis=None, out=None, overwrite_input=False):
|
Chris@87
|
630 """
|
Chris@87
|
631 Private function that doesn't support extended axis or keepdims.
|
Chris@87
|
632 These methods are extended to this function using _ureduce
|
Chris@87
|
633 See nanmedian for parameter usage
|
Chris@87
|
634
|
Chris@87
|
635 """
|
Chris@87
|
636 if axis is None or a.ndim == 1:
|
Chris@87
|
637 part = a.ravel()
|
Chris@87
|
638 if out is None:
|
Chris@87
|
639 return _nanmedian1d(part, overwrite_input)
|
Chris@87
|
640 else:
|
Chris@87
|
641 out[...] = _nanmedian1d(part, overwrite_input)
|
Chris@87
|
642 return out
|
Chris@87
|
643 else:
|
Chris@87
|
644 # for small medians use sort + indexing which is still faster than
|
Chris@87
|
645 # apply_along_axis
|
Chris@87
|
646 if a.shape[axis] < 400:
|
Chris@87
|
647 return _nanmedian_small(a, axis, out, overwrite_input)
|
Chris@87
|
648 result = np.apply_along_axis(_nanmedian1d, axis, a, overwrite_input)
|
Chris@87
|
649 if out is not None:
|
Chris@87
|
650 out[...] = result
|
Chris@87
|
651 return result
|
Chris@87
|
652
|
Chris@87
|
653 def _nanmedian_small(a, axis=None, out=None, overwrite_input=False):
|
Chris@87
|
654 """
|
Chris@87
|
655 sort + indexing median, faster for small medians along multiple dimensions
|
Chris@87
|
656 due to the high overhead of apply_along_axis
|
Chris@87
|
657 see nanmedian for parameter usage
|
Chris@87
|
658 """
|
Chris@87
|
659 a = np.ma.masked_array(a, np.isnan(a))
|
Chris@87
|
660 m = np.ma.median(a, axis=axis, overwrite_input=overwrite_input)
|
Chris@87
|
661 for i in range(np.count_nonzero(m.mask.ravel())):
|
Chris@87
|
662 warnings.warn("All-NaN slice encountered", RuntimeWarning)
|
Chris@87
|
663 if out is not None:
|
Chris@87
|
664 out[...] = m.filled(np.nan)
|
Chris@87
|
665 return out
|
Chris@87
|
666 return m.filled(np.nan)
|
Chris@87
|
667
|
Chris@87
|
668 def nanmedian(a, axis=None, out=None, overwrite_input=False, keepdims=False):
|
Chris@87
|
669 """
|
Chris@87
|
670 Compute the median along the specified axis, while ignoring NaNs.
|
Chris@87
|
671
|
Chris@87
|
672 Returns the median of the array elements.
|
Chris@87
|
673
|
Chris@87
|
674 .. versionadded:: 1.9.0
|
Chris@87
|
675
|
Chris@87
|
676 Parameters
|
Chris@87
|
677 ----------
|
Chris@87
|
678 a : array_like
|
Chris@87
|
679 Input array or object that can be converted to an array.
|
Chris@87
|
680 axis : int, optional
|
Chris@87
|
681 Axis along which the medians are computed. The default (axis=None)
|
Chris@87
|
682 is to compute the median along a flattened version of the array.
|
Chris@87
|
683 A sequence of axes is supported since version 1.9.0.
|
Chris@87
|
684 out : ndarray, optional
|
Chris@87
|
685 Alternative output array in which to place the result. It must have
|
Chris@87
|
686 the same shape and buffer length as the expected output, but the
|
Chris@87
|
687 type (of the output) will be cast if necessary.
|
Chris@87
|
688 overwrite_input : bool, optional
|
Chris@87
|
689 If True, then allow use of memory of input array (a) for
|
Chris@87
|
690 calculations. The input array will be modified by the call to
|
Chris@87
|
691 median. This will save memory when you do not need to preserve
|
Chris@87
|
692 the contents of the input array. Treat the input as undefined,
|
Chris@87
|
693 but it will probably be fully or partially sorted. Default is
|
Chris@87
|
694 False. Note that, if `overwrite_input` is True and the input
|
Chris@87
|
695 is not already an ndarray, an error will be raised.
|
Chris@87
|
696 keepdims : bool, optional
|
Chris@87
|
697 If this is set to True, the axes which are reduced are left
|
Chris@87
|
698 in the result as dimensions with size one. With this option,
|
Chris@87
|
699 the result will broadcast correctly against the original `arr`.
|
Chris@87
|
700
|
Chris@87
|
701
|
Chris@87
|
702
|
Chris@87
|
703 Returns
|
Chris@87
|
704 -------
|
Chris@87
|
705 median : ndarray
|
Chris@87
|
706 A new array holding the result. If the input contains integers, or
|
Chris@87
|
707 floats of smaller precision than 64, then the output data-type is
|
Chris@87
|
708 float64. Otherwise, the output data-type is the same as that of the
|
Chris@87
|
709 input.
|
Chris@87
|
710
|
Chris@87
|
711 See Also
|
Chris@87
|
712 --------
|
Chris@87
|
713 mean, median, percentile
|
Chris@87
|
714
|
Chris@87
|
715 Notes
|
Chris@87
|
716 -----
|
Chris@87
|
717 Given a vector V of length N, the median of V is the middle value of
|
Chris@87
|
718 a sorted copy of V, ``V_sorted`` - i.e., ``V_sorted[(N-1)/2]``, when N is
|
Chris@87
|
719 odd. When N is even, it is the average of the two middle values of
|
Chris@87
|
720 ``V_sorted``.
|
Chris@87
|
721
|
Chris@87
|
722 Examples
|
Chris@87
|
723 --------
|
Chris@87
|
724 >>> a = np.array([[10.0, 7, 4], [3, 2, 1]])
|
Chris@87
|
725 >>> a[0, 1] = np.nan
|
Chris@87
|
726 >>> a
|
Chris@87
|
727 array([[ 10., nan, 4.],
|
Chris@87
|
728 [ 3., 2., 1.]])
|
Chris@87
|
729 >>> np.median(a)
|
Chris@87
|
730 nan
|
Chris@87
|
731 >>> np.nanmedian(a)
|
Chris@87
|
732 3.0
|
Chris@87
|
733 >>> np.nanmedian(a, axis=0)
|
Chris@87
|
734 array([ 6.5, 2., 2.5])
|
Chris@87
|
735 >>> np.median(a, axis=1)
|
Chris@87
|
736 array([ 7., 2.])
|
Chris@87
|
737 >>> b = a.copy()
|
Chris@87
|
738 >>> np.nanmedian(b, axis=1, overwrite_input=True)
|
Chris@87
|
739 array([ 7., 2.])
|
Chris@87
|
740 >>> assert not np.all(a==b)
|
Chris@87
|
741 >>> b = a.copy()
|
Chris@87
|
742 >>> np.nanmedian(b, axis=None, overwrite_input=True)
|
Chris@87
|
743 3.0
|
Chris@87
|
744 >>> assert not np.all(a==b)
|
Chris@87
|
745
|
Chris@87
|
746 """
|
Chris@87
|
747 a = np.asanyarray(a)
|
Chris@87
|
748 # apply_along_axis in _nanmedian doesn't handle empty arrays well,
|
Chris@87
|
749 # so deal them upfront
|
Chris@87
|
750 if a.size == 0:
|
Chris@87
|
751 return np.nanmean(a, axis, out=out, keepdims=keepdims)
|
Chris@87
|
752
|
Chris@87
|
753 r, k = _ureduce(a, func=_nanmedian, axis=axis, out=out,
|
Chris@87
|
754 overwrite_input=overwrite_input)
|
Chris@87
|
755 if keepdims:
|
Chris@87
|
756 return r.reshape(k)
|
Chris@87
|
757 else:
|
Chris@87
|
758 return r
|
Chris@87
|
759
|
Chris@87
|
760
|
Chris@87
|
761 def nanpercentile(a, q, axis=None, out=None, overwrite_input=False,
|
Chris@87
|
762 interpolation='linear', keepdims=False):
|
Chris@87
|
763 """
|
Chris@87
|
764 Compute the qth percentile of the data along the specified axis, while
|
Chris@87
|
765 ignoring nan values.
|
Chris@87
|
766
|
Chris@87
|
767 Returns the qth percentile of the array elements.
|
Chris@87
|
768
|
Chris@87
|
769 Parameters
|
Chris@87
|
770 ----------
|
Chris@87
|
771 a : array_like
|
Chris@87
|
772 Input array or object that can be converted to an array.
|
Chris@87
|
773 q : float in range of [0,100] (or sequence of floats)
|
Chris@87
|
774 Percentile to compute which must be between 0 and 100 inclusive.
|
Chris@87
|
775 axis : int or sequence of int, optional
|
Chris@87
|
776 Axis along which the percentiles are computed. The default (None)
|
Chris@87
|
777 is to compute the percentiles along a flattened version of the array.
|
Chris@87
|
778 A sequence of axes is supported since version 1.9.0.
|
Chris@87
|
779 out : ndarray, optional
|
Chris@87
|
780 Alternative output array in which to place the result. It must
|
Chris@87
|
781 have the same shape and buffer length as the expected output,
|
Chris@87
|
782 but the type (of the output) will be cast if necessary.
|
Chris@87
|
783 overwrite_input : bool, optional
|
Chris@87
|
784 If True, then allow use of memory of input array `a` for
|
Chris@87
|
785 calculations. The input array will be modified by the call to
|
Chris@87
|
786 percentile. This will save memory when you do not need to preserve
|
Chris@87
|
787 the contents of the input array. In this case you should not make
|
Chris@87
|
788 any assumptions about the content of the passed in array `a` after
|
Chris@87
|
789 this function completes -- treat it as undefined. Default is False.
|
Chris@87
|
790 Note that, if the `a` input is not already an array this parameter
|
Chris@87
|
791 will have no effect, `a` will be converted to an array internally
|
Chris@87
|
792 regardless of the value of this parameter.
|
Chris@87
|
793 interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
|
Chris@87
|
794 This optional parameter specifies the interpolation method to use,
|
Chris@87
|
795 when the desired quantile lies between two data points `i` and `j`:
|
Chris@87
|
796 * linear: `i + (j - i) * fraction`, where `fraction` is the
|
Chris@87
|
797 fractional part of the index surrounded by `i` and `j`.
|
Chris@87
|
798 * lower: `i`.
|
Chris@87
|
799 * higher: `j`.
|
Chris@87
|
800 * nearest: `i` or `j` whichever is nearest.
|
Chris@87
|
801 * midpoint: (`i` + `j`) / 2.
|
Chris@87
|
802
|
Chris@87
|
803 keepdims : bool, optional
|
Chris@87
|
804 If this is set to True, the axes which are reduced are left
|
Chris@87
|
805 in the result as dimensions with size one. With this option,
|
Chris@87
|
806 the result will broadcast correctly against the original `arr`.
|
Chris@87
|
807
|
Chris@87
|
808
|
Chris@87
|
809 Returns
|
Chris@87
|
810 -------
|
Chris@87
|
811 nanpercentile : scalar or ndarray
|
Chris@87
|
812 If a single percentile `q` is given and axis=None a scalar is
|
Chris@87
|
813 returned. If multiple percentiles `q` are given an array holding
|
Chris@87
|
814 the result is returned. The results are listed in the first axis.
|
Chris@87
|
815 (If `out` is specified, in which case that array is returned
|
Chris@87
|
816 instead). If the input contains integers, or floats of smaller
|
Chris@87
|
817 precision than 64, then the output data-type is float64. Otherwise,
|
Chris@87
|
818 the output data-type is the same as that of the input.
|
Chris@87
|
819
|
Chris@87
|
820 See Also
|
Chris@87
|
821 --------
|
Chris@87
|
822 nanmean, nanmedian, percentile, median, mean
|
Chris@87
|
823
|
Chris@87
|
824 Notes
|
Chris@87
|
825 -----
|
Chris@87
|
826 Given a vector V of length N, the q-th percentile of V is the q-th ranked
|
Chris@87
|
827 value in a sorted copy of V. The values and distances of the two
|
Chris@87
|
828 nearest neighbors as well as the `interpolation` parameter will
|
Chris@87
|
829 determine the percentile if the normalized ranking does not match q
|
Chris@87
|
830 exactly. This function is the same as the median if ``q=50``, the same
|
Chris@87
|
831 as the minimum if ``q=0``and the same as the maximum if ``q=100``.
|
Chris@87
|
832
|
Chris@87
|
833 Examples
|
Chris@87
|
834 --------
|
Chris@87
|
835 >>> a = np.array([[10., 7., 4.], [3., 2., 1.]])
|
Chris@87
|
836 >>> a[0][1] = np.nan
|
Chris@87
|
837 >>> a
|
Chris@87
|
838 array([[ 10., nan, 4.],
|
Chris@87
|
839 [ 3., 2., 1.]])
|
Chris@87
|
840 >>> np.percentile(a, 50)
|
Chris@87
|
841 nan
|
Chris@87
|
842 >>> np.nanpercentile(a, 50)
|
Chris@87
|
843 3.5
|
Chris@87
|
844 >>> np.nanpercentile(a, 50, axis=0)
|
Chris@87
|
845 array([[ 6.5, 4.5, 2.5]])
|
Chris@87
|
846 >>> np.nanpercentile(a, 50, axis=1)
|
Chris@87
|
847 array([[ 7.],
|
Chris@87
|
848 [ 2.]])
|
Chris@87
|
849 >>> m = np.nanpercentile(a, 50, axis=0)
|
Chris@87
|
850 >>> out = np.zeros_like(m)
|
Chris@87
|
851 >>> np.nanpercentile(a, 50, axis=0, out=m)
|
Chris@87
|
852 array([[ 6.5, 4.5, 2.5]])
|
Chris@87
|
853 >>> m
|
Chris@87
|
854 array([[ 6.5, 4.5, 2.5]])
|
Chris@87
|
855 >>> b = a.copy()
|
Chris@87
|
856 >>> np.nanpercentile(b, 50, axis=1, overwrite_input=True)
|
Chris@87
|
857 array([[ 7.],
|
Chris@87
|
858 [ 2.]])
|
Chris@87
|
859 >>> assert not np.all(a==b)
|
Chris@87
|
860 >>> b = a.copy()
|
Chris@87
|
861 >>> np.nanpercentile(b, 50, axis=None, overwrite_input=True)
|
Chris@87
|
862 array([ 3.5])
|
Chris@87
|
863
|
Chris@87
|
864 """
|
Chris@87
|
865
|
Chris@87
|
866 a = np.asanyarray(a)
|
Chris@87
|
867 q = np.asanyarray(q)
|
Chris@87
|
868 # apply_along_axis in _nanpercentile doesn't handle empty arrays well,
|
Chris@87
|
869 # so deal them upfront
|
Chris@87
|
870 if a.size == 0:
|
Chris@87
|
871 return np.nanmean(a, axis, out=out, keepdims=keepdims)
|
Chris@87
|
872
|
Chris@87
|
873 r, k = _ureduce(a, func=_nanpercentile, q=q, axis=axis, out=out,
|
Chris@87
|
874 overwrite_input=overwrite_input,
|
Chris@87
|
875 interpolation=interpolation)
|
Chris@87
|
876 if keepdims:
|
Chris@87
|
877 if q.ndim == 0:
|
Chris@87
|
878 return r.reshape(k)
|
Chris@87
|
879 else:
|
Chris@87
|
880 return r.reshape([len(q)] + k)
|
Chris@87
|
881 else:
|
Chris@87
|
882 return r
|
Chris@87
|
883
|
Chris@87
|
884
|
Chris@87
|
885 def _nanpercentile(a, q, axis=None, out=None, overwrite_input=False,
|
Chris@87
|
886 interpolation='linear', keepdims=False):
|
Chris@87
|
887 """
|
Chris@87
|
888 Private function that doesn't support extended axis or keepdims.
|
Chris@87
|
889 These methods are extended to this function using _ureduce
|
Chris@87
|
890 See nanpercentile for parameter usage
|
Chris@87
|
891
|
Chris@87
|
892 """
|
Chris@87
|
893 if axis is None:
|
Chris@87
|
894 part = a.ravel()
|
Chris@87
|
895 result = _nanpercentile1d(part, q, overwrite_input, interpolation)
|
Chris@87
|
896 else:
|
Chris@87
|
897 result = np.apply_along_axis(_nanpercentile1d, axis, a, q,
|
Chris@87
|
898 overwrite_input, interpolation)
|
Chris@87
|
899
|
Chris@87
|
900 if out is not None:
|
Chris@87
|
901 out[...] = result
|
Chris@87
|
902 return result
|
Chris@87
|
903
|
Chris@87
|
904
|
Chris@87
|
905 def _nanpercentile1d(arr1d, q, overwrite_input=False, interpolation='linear'):
|
Chris@87
|
906 """
|
Chris@87
|
907 Private function for rank 1 arrays. Compute percentile ignoring NaNs.
|
Chris@87
|
908 See nanpercentile for parameter usage
|
Chris@87
|
909
|
Chris@87
|
910 """
|
Chris@87
|
911 c = np.isnan(arr1d)
|
Chris@87
|
912 s = np.where(c)[0]
|
Chris@87
|
913 if s.size == arr1d.size:
|
Chris@87
|
914 warnings.warn("All-NaN slice encountered", RuntimeWarning)
|
Chris@87
|
915 return np.nan
|
Chris@87
|
916 elif s.size == 0:
|
Chris@87
|
917 return np.percentile(arr1d, q, overwrite_input=overwrite_input,
|
Chris@87
|
918 interpolation=interpolation)
|
Chris@87
|
919 else:
|
Chris@87
|
920 if overwrite_input:
|
Chris@87
|
921 x = arr1d
|
Chris@87
|
922 else:
|
Chris@87
|
923 x = arr1d.copy()
|
Chris@87
|
924 # select non-nans at end of array
|
Chris@87
|
925 enonan = arr1d[-s.size:][~c[-s.size:]]
|
Chris@87
|
926 # fill nans in beginning of array with non-nans of end
|
Chris@87
|
927 x[s[:enonan.size]] = enonan
|
Chris@87
|
928 # slice nans away
|
Chris@87
|
929 return np.percentile(x[:-s.size], q, overwrite_input=True,
|
Chris@87
|
930 interpolation=interpolation)
|
Chris@87
|
931
|
Chris@87
|
932
|
Chris@87
|
933 def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False):
|
Chris@87
|
934 """
|
Chris@87
|
935 Compute the variance along the specified axis, while ignoring NaNs.
|
Chris@87
|
936
|
Chris@87
|
937 Returns the variance of the array elements, a measure of the spread of
|
Chris@87
|
938 a distribution. The variance is computed for the flattened array by
|
Chris@87
|
939 default, otherwise over the specified axis.
|
Chris@87
|
940
|
Chris@87
|
941 For all-NaN slices or slices with zero degrees of freedom, NaN is
|
Chris@87
|
942 returned and a `RuntimeWarning` is raised.
|
Chris@87
|
943
|
Chris@87
|
944 .. versionadded:: 1.8.0
|
Chris@87
|
945
|
Chris@87
|
946 Parameters
|
Chris@87
|
947 ----------
|
Chris@87
|
948 a : array_like
|
Chris@87
|
949 Array containing numbers whose variance is desired. If `a` is not an
|
Chris@87
|
950 array, a conversion is attempted.
|
Chris@87
|
951 axis : int, optional
|
Chris@87
|
952 Axis along which the variance is computed. The default is to compute
|
Chris@87
|
953 the variance of the flattened array.
|
Chris@87
|
954 dtype : data-type, optional
|
Chris@87
|
955 Type to use in computing the variance. For arrays of integer type
|
Chris@87
|
956 the default is `float32`; for arrays of float types it is the same as
|
Chris@87
|
957 the array type.
|
Chris@87
|
958 out : ndarray, optional
|
Chris@87
|
959 Alternate output array in which to place the result. It must have
|
Chris@87
|
960 the same shape as the expected output, but the type is cast if
|
Chris@87
|
961 necessary.
|
Chris@87
|
962 ddof : int, optional
|
Chris@87
|
963 "Delta Degrees of Freedom": the divisor used in the calculation is
|
Chris@87
|
964 ``N - ddof``, where ``N`` represents the number of non-NaN
|
Chris@87
|
965 elements. By default `ddof` is zero.
|
Chris@87
|
966 keepdims : bool, optional
|
Chris@87
|
967 If this is set to True, the axes which are reduced are left
|
Chris@87
|
968 in the result as dimensions with size one. With this option,
|
Chris@87
|
969 the result will broadcast correctly against the original `arr`.
|
Chris@87
|
970
|
Chris@87
|
971 Returns
|
Chris@87
|
972 -------
|
Chris@87
|
973 variance : ndarray, see dtype parameter above
|
Chris@87
|
974 If `out` is None, return a new array containing the variance,
|
Chris@87
|
975 otherwise return a reference to the output array. If ddof is >= the
|
Chris@87
|
976 number of non-NaN elements in a slice or the slice contains only
|
Chris@87
|
977 NaNs, then the result for that slice is NaN.
|
Chris@87
|
978
|
Chris@87
|
979 See Also
|
Chris@87
|
980 --------
|
Chris@87
|
981 std : Standard deviation
|
Chris@87
|
982 mean : Average
|
Chris@87
|
983 var : Variance while not ignoring NaNs
|
Chris@87
|
984 nanstd, nanmean
|
Chris@87
|
985 numpy.doc.ufuncs : Section "Output arguments"
|
Chris@87
|
986
|
Chris@87
|
987 Notes
|
Chris@87
|
988 -----
|
Chris@87
|
989 The variance is the average of the squared deviations from the mean,
|
Chris@87
|
990 i.e., ``var = mean(abs(x - x.mean())**2)``.
|
Chris@87
|
991
|
Chris@87
|
992 The mean is normally calculated as ``x.sum() / N``, where ``N = len(x)``.
|
Chris@87
|
993 If, however, `ddof` is specified, the divisor ``N - ddof`` is used
|
Chris@87
|
994 instead. In standard statistical practice, ``ddof=1`` provides an
|
Chris@87
|
995 unbiased estimator of the variance of a hypothetical infinite
|
Chris@87
|
996 population. ``ddof=0`` provides a maximum likelihood estimate of the
|
Chris@87
|
997 variance for normally distributed variables.
|
Chris@87
|
998
|
Chris@87
|
999 Note that for complex numbers, the absolute value is taken before
|
Chris@87
|
1000 squaring, so that the result is always real and nonnegative.
|
Chris@87
|
1001
|
Chris@87
|
1002 For floating-point input, the variance is computed using the same
|
Chris@87
|
1003 precision the input has. Depending on the input data, this can cause
|
Chris@87
|
1004 the results to be inaccurate, especially for `float32` (see example
|
Chris@87
|
1005 below). Specifying a higher-accuracy accumulator using the ``dtype``
|
Chris@87
|
1006 keyword can alleviate this issue.
|
Chris@87
|
1007
|
Chris@87
|
1008 Examples
|
Chris@87
|
1009 --------
|
Chris@87
|
1010 >>> a = np.array([[1, np.nan], [3, 4]])
|
Chris@87
|
1011 >>> np.var(a)
|
Chris@87
|
1012 1.5555555555555554
|
Chris@87
|
1013 >>> np.nanvar(a, axis=0)
|
Chris@87
|
1014 array([ 1., 0.])
|
Chris@87
|
1015 >>> np.nanvar(a, axis=1)
|
Chris@87
|
1016 array([ 0., 0.25])
|
Chris@87
|
1017
|
Chris@87
|
1018 """
|
Chris@87
|
1019 arr, mask = _replace_nan(a, 0)
|
Chris@87
|
1020 if mask is None:
|
Chris@87
|
1021 return np.var(arr, axis=axis, dtype=dtype, out=out, ddof=ddof,
|
Chris@87
|
1022 keepdims=keepdims)
|
Chris@87
|
1023
|
Chris@87
|
1024 if dtype is not None:
|
Chris@87
|
1025 dtype = np.dtype(dtype)
|
Chris@87
|
1026 if dtype is not None and not issubclass(dtype.type, np.inexact):
|
Chris@87
|
1027 raise TypeError("If a is inexact, then dtype must be inexact")
|
Chris@87
|
1028 if out is not None and not issubclass(out.dtype.type, np.inexact):
|
Chris@87
|
1029 raise TypeError("If a is inexact, then out must be inexact")
|
Chris@87
|
1030
|
Chris@87
|
1031 with warnings.catch_warnings():
|
Chris@87
|
1032 warnings.simplefilter('ignore')
|
Chris@87
|
1033
|
Chris@87
|
1034 # Compute mean
|
Chris@87
|
1035 cnt = np.sum(~mask, axis=axis, dtype=np.intp, keepdims=True)
|
Chris@87
|
1036 avg = np.sum(arr, axis=axis, dtype=dtype, keepdims=True)
|
Chris@87
|
1037 avg = _divide_by_count(avg, cnt)
|
Chris@87
|
1038
|
Chris@87
|
1039 # Compute squared deviation from mean.
|
Chris@87
|
1040 arr -= avg
|
Chris@87
|
1041 arr = _copyto(arr, 0, mask)
|
Chris@87
|
1042 if issubclass(arr.dtype.type, np.complexfloating):
|
Chris@87
|
1043 sqr = np.multiply(arr, arr.conj(), out=arr).real
|
Chris@87
|
1044 else:
|
Chris@87
|
1045 sqr = np.multiply(arr, arr, out=arr)
|
Chris@87
|
1046
|
Chris@87
|
1047 # Compute variance.
|
Chris@87
|
1048 var = np.sum(sqr, axis=axis, dtype=dtype, out=out, keepdims=keepdims)
|
Chris@87
|
1049 if var.ndim < cnt.ndim:
|
Chris@87
|
1050 # Subclasses of ndarray may ignore keepdims, so check here.
|
Chris@87
|
1051 cnt = cnt.squeeze(axis)
|
Chris@87
|
1052 dof = cnt - ddof
|
Chris@87
|
1053 var = _divide_by_count(var, dof)
|
Chris@87
|
1054
|
Chris@87
|
1055 isbad = (dof <= 0)
|
Chris@87
|
1056 if np.any(isbad):
|
Chris@87
|
1057 warnings.warn("Degrees of freedom <= 0 for slice.", RuntimeWarning)
|
Chris@87
|
1058 # NaN, inf, or negative numbers are all possible bad
|
Chris@87
|
1059 # values, so explicitly replace them with NaN.
|
Chris@87
|
1060 var = _copyto(var, np.nan, isbad)
|
Chris@87
|
1061 return var
|
Chris@87
|
1062
|
Chris@87
|
1063
|
Chris@87
|
1064 def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False):
|
Chris@87
|
1065 """
|
Chris@87
|
1066 Compute the standard deviation along the specified axis, while
|
Chris@87
|
1067 ignoring NaNs.
|
Chris@87
|
1068
|
Chris@87
|
1069 Returns the standard deviation, a measure of the spread of a
|
Chris@87
|
1070 distribution, of the non-NaN array elements. The standard deviation is
|
Chris@87
|
1071 computed for the flattened array by default, otherwise over the
|
Chris@87
|
1072 specified axis.
|
Chris@87
|
1073
|
Chris@87
|
1074 For all-NaN slices or slices with zero degrees of freedom, NaN is
|
Chris@87
|
1075 returned and a `RuntimeWarning` is raised.
|
Chris@87
|
1076
|
Chris@87
|
1077 .. versionadded:: 1.8.0
|
Chris@87
|
1078
|
Chris@87
|
1079 Parameters
|
Chris@87
|
1080 ----------
|
Chris@87
|
1081 a : array_like
|
Chris@87
|
1082 Calculate the standard deviation of the non-NaN values.
|
Chris@87
|
1083 axis : int, optional
|
Chris@87
|
1084 Axis along which the standard deviation is computed. The default is
|
Chris@87
|
1085 to compute the standard deviation of the flattened array.
|
Chris@87
|
1086 dtype : dtype, optional
|
Chris@87
|
1087 Type to use in computing the standard deviation. For arrays of
|
Chris@87
|
1088 integer type the default is float64, for arrays of float types it
|
Chris@87
|
1089 is the same as the array type.
|
Chris@87
|
1090 out : ndarray, optional
|
Chris@87
|
1091 Alternative output array in which to place the result. It must have
|
Chris@87
|
1092 the same shape as the expected output but the type (of the
|
Chris@87
|
1093 calculated values) will be cast if necessary.
|
Chris@87
|
1094 ddof : int, optional
|
Chris@87
|
1095 Means Delta Degrees of Freedom. The divisor used in calculations
|
Chris@87
|
1096 is ``N - ddof``, where ``N`` represents the number of non-NaN
|
Chris@87
|
1097 elements. By default `ddof` is zero.
|
Chris@87
|
1098 keepdims : bool, optional
|
Chris@87
|
1099 If this is set to True, the axes which are reduced are left
|
Chris@87
|
1100 in the result as dimensions with size one. With this option,
|
Chris@87
|
1101 the result will broadcast correctly against the original `arr`.
|
Chris@87
|
1102
|
Chris@87
|
1103 Returns
|
Chris@87
|
1104 -------
|
Chris@87
|
1105 standard_deviation : ndarray, see dtype parameter above.
|
Chris@87
|
1106 If `out` is None, return a new array containing the standard
|
Chris@87
|
1107 deviation, otherwise return a reference to the output array. If
|
Chris@87
|
1108 ddof is >= the number of non-NaN elements in a slice or the slice
|
Chris@87
|
1109 contains only NaNs, then the result for that slice is NaN.
|
Chris@87
|
1110
|
Chris@87
|
1111 See Also
|
Chris@87
|
1112 --------
|
Chris@87
|
1113 var, mean, std
|
Chris@87
|
1114 nanvar, nanmean
|
Chris@87
|
1115 numpy.doc.ufuncs : Section "Output arguments"
|
Chris@87
|
1116
|
Chris@87
|
1117 Notes
|
Chris@87
|
1118 -----
|
Chris@87
|
1119 The standard deviation is the square root of the average of the squared
|
Chris@87
|
1120 deviations from the mean: ``std = sqrt(mean(abs(x - x.mean())**2))``.
|
Chris@87
|
1121
|
Chris@87
|
1122 The average squared deviation is normally calculated as
|
Chris@87
|
1123 ``x.sum() / N``, where ``N = len(x)``. If, however, `ddof` is
|
Chris@87
|
1124 specified, the divisor ``N - ddof`` is used instead. In standard
|
Chris@87
|
1125 statistical practice, ``ddof=1`` provides an unbiased estimator of the
|
Chris@87
|
1126 variance of the infinite population. ``ddof=0`` provides a maximum
|
Chris@87
|
1127 likelihood estimate of the variance for normally distributed variables.
|
Chris@87
|
1128 The standard deviation computed in this function is the square root of
|
Chris@87
|
1129 the estimated variance, so even with ``ddof=1``, it will not be an
|
Chris@87
|
1130 unbiased estimate of the standard deviation per se.
|
Chris@87
|
1131
|
Chris@87
|
1132 Note that, for complex numbers, `std` takes the absolute value before
|
Chris@87
|
1133 squaring, so that the result is always real and nonnegative.
|
Chris@87
|
1134
|
Chris@87
|
1135 For floating-point input, the *std* is computed using the same
|
Chris@87
|
1136 precision the input has. Depending on the input data, this can cause
|
Chris@87
|
1137 the results to be inaccurate, especially for float32 (see example
|
Chris@87
|
1138 below). Specifying a higher-accuracy accumulator using the `dtype`
|
Chris@87
|
1139 keyword can alleviate this issue.
|
Chris@87
|
1140
|
Chris@87
|
1141 Examples
|
Chris@87
|
1142 --------
|
Chris@87
|
1143 >>> a = np.array([[1, np.nan], [3, 4]])
|
Chris@87
|
1144 >>> np.nanstd(a)
|
Chris@87
|
1145 1.247219128924647
|
Chris@87
|
1146 >>> np.nanstd(a, axis=0)
|
Chris@87
|
1147 array([ 1., 0.])
|
Chris@87
|
1148 >>> np.nanstd(a, axis=1)
|
Chris@87
|
1149 array([ 0., 0.5])
|
Chris@87
|
1150
|
Chris@87
|
1151 """
|
Chris@87
|
1152 var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
|
Chris@87
|
1153 keepdims=keepdims)
|
Chris@87
|
1154 if isinstance(var, np.ndarray):
|
Chris@87
|
1155 std = np.sqrt(var, out=var)
|
Chris@87
|
1156 else:
|
Chris@87
|
1157 std = var.dtype.type(np.sqrt(var))
|
Chris@87
|
1158 return std
|