Chris@87
|
1 """
|
Chris@87
|
2 Collection of utilities to manipulate structured arrays.
|
Chris@87
|
3
|
Chris@87
|
4 Most of these functions were initially implemented by John Hunter for
|
Chris@87
|
5 matplotlib. They have been rewritten and extended for convenience.
|
Chris@87
|
6
|
Chris@87
|
7 """
|
Chris@87
|
8 from __future__ import division, absolute_import, print_function
|
Chris@87
|
9
|
Chris@87
|
10 import sys
|
Chris@87
|
11 import itertools
|
Chris@87
|
12 import numpy as np
|
Chris@87
|
13 import numpy.ma as ma
|
Chris@87
|
14 from numpy import ndarray, recarray
|
Chris@87
|
15 from numpy.ma import MaskedArray
|
Chris@87
|
16 from numpy.ma.mrecords import MaskedRecords
|
Chris@87
|
17 from numpy.lib._iotools import _is_string_like
|
Chris@87
|
18 from numpy.compat import basestring
|
Chris@87
|
19
|
Chris@87
|
20 if sys.version_info[0] < 3:
|
Chris@87
|
21 from future_builtins import zip
|
Chris@87
|
22
|
Chris@87
|
23 _check_fill_value = np.ma.core._check_fill_value
|
Chris@87
|
24
|
Chris@87
|
25
|
Chris@87
|
26 __all__ = [
|
Chris@87
|
27 'append_fields', 'drop_fields', 'find_duplicates',
|
Chris@87
|
28 'get_fieldstructure', 'join_by', 'merge_arrays',
|
Chris@87
|
29 'rec_append_fields', 'rec_drop_fields', 'rec_join',
|
Chris@87
|
30 'recursive_fill_fields', 'rename_fields', 'stack_arrays',
|
Chris@87
|
31 ]
|
Chris@87
|
32
|
Chris@87
|
33
|
Chris@87
|
34 def recursive_fill_fields(input, output):
|
Chris@87
|
35 """
|
Chris@87
|
36 Fills fields from output with fields from input,
|
Chris@87
|
37 with support for nested structures.
|
Chris@87
|
38
|
Chris@87
|
39 Parameters
|
Chris@87
|
40 ----------
|
Chris@87
|
41 input : ndarray
|
Chris@87
|
42 Input array.
|
Chris@87
|
43 output : ndarray
|
Chris@87
|
44 Output array.
|
Chris@87
|
45
|
Chris@87
|
46 Notes
|
Chris@87
|
47 -----
|
Chris@87
|
48 * `output` should be at least the same size as `input`
|
Chris@87
|
49
|
Chris@87
|
50 Examples
|
Chris@87
|
51 --------
|
Chris@87
|
52 >>> from numpy.lib import recfunctions as rfn
|
Chris@87
|
53 >>> a = np.array([(1, 10.), (2, 20.)], dtype=[('A', int), ('B', float)])
|
Chris@87
|
54 >>> b = np.zeros((3,), dtype=a.dtype)
|
Chris@87
|
55 >>> rfn.recursive_fill_fields(a, b)
|
Chris@87
|
56 array([(1, 10.0), (2, 20.0), (0, 0.0)],
|
Chris@87
|
57 dtype=[('A', '<i4'), ('B', '<f8')])
|
Chris@87
|
58
|
Chris@87
|
59 """
|
Chris@87
|
60 newdtype = output.dtype
|
Chris@87
|
61 for field in newdtype.names:
|
Chris@87
|
62 try:
|
Chris@87
|
63 current = input[field]
|
Chris@87
|
64 except ValueError:
|
Chris@87
|
65 continue
|
Chris@87
|
66 if current.dtype.names:
|
Chris@87
|
67 recursive_fill_fields(current, output[field])
|
Chris@87
|
68 else:
|
Chris@87
|
69 output[field][:len(current)] = current
|
Chris@87
|
70 return output
|
Chris@87
|
71
|
Chris@87
|
72
|
Chris@87
|
73 def get_names(adtype):
|
Chris@87
|
74 """
|
Chris@87
|
75 Returns the field names of the input datatype as a tuple.
|
Chris@87
|
76
|
Chris@87
|
77 Parameters
|
Chris@87
|
78 ----------
|
Chris@87
|
79 adtype : dtype
|
Chris@87
|
80 Input datatype
|
Chris@87
|
81
|
Chris@87
|
82 Examples
|
Chris@87
|
83 --------
|
Chris@87
|
84 >>> from numpy.lib import recfunctions as rfn
|
Chris@87
|
85 >>> rfn.get_names(np.empty((1,), dtype=int)) is None
|
Chris@87
|
86 True
|
Chris@87
|
87 >>> rfn.get_names(np.empty((1,), dtype=[('A',int), ('B', float)]))
|
Chris@87
|
88 ('A', 'B')
|
Chris@87
|
89 >>> adtype = np.dtype([('a', int), ('b', [('ba', int), ('bb', int)])])
|
Chris@87
|
90 >>> rfn.get_names(adtype)
|
Chris@87
|
91 ('a', ('b', ('ba', 'bb')))
|
Chris@87
|
92 """
|
Chris@87
|
93 listnames = []
|
Chris@87
|
94 names = adtype.names
|
Chris@87
|
95 for name in names:
|
Chris@87
|
96 current = adtype[name]
|
Chris@87
|
97 if current.names:
|
Chris@87
|
98 listnames.append((name, tuple(get_names(current))))
|
Chris@87
|
99 else:
|
Chris@87
|
100 listnames.append(name)
|
Chris@87
|
101 return tuple(listnames) or None
|
Chris@87
|
102
|
Chris@87
|
103
|
Chris@87
|
104 def get_names_flat(adtype):
|
Chris@87
|
105 """
|
Chris@87
|
106 Returns the field names of the input datatype as a tuple. Nested structure
|
Chris@87
|
107 are flattend beforehand.
|
Chris@87
|
108
|
Chris@87
|
109 Parameters
|
Chris@87
|
110 ----------
|
Chris@87
|
111 adtype : dtype
|
Chris@87
|
112 Input datatype
|
Chris@87
|
113
|
Chris@87
|
114 Examples
|
Chris@87
|
115 --------
|
Chris@87
|
116 >>> from numpy.lib import recfunctions as rfn
|
Chris@87
|
117 >>> rfn.get_names_flat(np.empty((1,), dtype=int)) is None
|
Chris@87
|
118 True
|
Chris@87
|
119 >>> rfn.get_names_flat(np.empty((1,), dtype=[('A',int), ('B', float)]))
|
Chris@87
|
120 ('A', 'B')
|
Chris@87
|
121 >>> adtype = np.dtype([('a', int), ('b', [('ba', int), ('bb', int)])])
|
Chris@87
|
122 >>> rfn.get_names_flat(adtype)
|
Chris@87
|
123 ('a', 'b', 'ba', 'bb')
|
Chris@87
|
124 """
|
Chris@87
|
125 listnames = []
|
Chris@87
|
126 names = adtype.names
|
Chris@87
|
127 for name in names:
|
Chris@87
|
128 listnames.append(name)
|
Chris@87
|
129 current = adtype[name]
|
Chris@87
|
130 if current.names:
|
Chris@87
|
131 listnames.extend(get_names_flat(current))
|
Chris@87
|
132 return tuple(listnames) or None
|
Chris@87
|
133
|
Chris@87
|
134
|
Chris@87
|
135 def flatten_descr(ndtype):
|
Chris@87
|
136 """
|
Chris@87
|
137 Flatten a structured data-type description.
|
Chris@87
|
138
|
Chris@87
|
139 Examples
|
Chris@87
|
140 --------
|
Chris@87
|
141 >>> from numpy.lib import recfunctions as rfn
|
Chris@87
|
142 >>> ndtype = np.dtype([('a', '<i4'), ('b', [('ba', '<f8'), ('bb', '<i4')])])
|
Chris@87
|
143 >>> rfn.flatten_descr(ndtype)
|
Chris@87
|
144 (('a', dtype('int32')), ('ba', dtype('float64')), ('bb', dtype('int32')))
|
Chris@87
|
145
|
Chris@87
|
146 """
|
Chris@87
|
147 names = ndtype.names
|
Chris@87
|
148 if names is None:
|
Chris@87
|
149 return ndtype.descr
|
Chris@87
|
150 else:
|
Chris@87
|
151 descr = []
|
Chris@87
|
152 for field in names:
|
Chris@87
|
153 (typ, _) = ndtype.fields[field]
|
Chris@87
|
154 if typ.names:
|
Chris@87
|
155 descr.extend(flatten_descr(typ))
|
Chris@87
|
156 else:
|
Chris@87
|
157 descr.append((field, typ))
|
Chris@87
|
158 return tuple(descr)
|
Chris@87
|
159
|
Chris@87
|
160
|
Chris@87
|
161 def zip_descr(seqarrays, flatten=False):
|
Chris@87
|
162 """
|
Chris@87
|
163 Combine the dtype description of a series of arrays.
|
Chris@87
|
164
|
Chris@87
|
165 Parameters
|
Chris@87
|
166 ----------
|
Chris@87
|
167 seqarrays : sequence of arrays
|
Chris@87
|
168 Sequence of arrays
|
Chris@87
|
169 flatten : {boolean}, optional
|
Chris@87
|
170 Whether to collapse nested descriptions.
|
Chris@87
|
171 """
|
Chris@87
|
172 newdtype = []
|
Chris@87
|
173 if flatten:
|
Chris@87
|
174 for a in seqarrays:
|
Chris@87
|
175 newdtype.extend(flatten_descr(a.dtype))
|
Chris@87
|
176 else:
|
Chris@87
|
177 for a in seqarrays:
|
Chris@87
|
178 current = a.dtype
|
Chris@87
|
179 names = current.names or ()
|
Chris@87
|
180 if len(names) > 1:
|
Chris@87
|
181 newdtype.append(('', current.descr))
|
Chris@87
|
182 else:
|
Chris@87
|
183 newdtype.extend(current.descr)
|
Chris@87
|
184 return np.dtype(newdtype).descr
|
Chris@87
|
185
|
Chris@87
|
186
|
Chris@87
|
187 def get_fieldstructure(adtype, lastname=None, parents=None,):
|
Chris@87
|
188 """
|
Chris@87
|
189 Returns a dictionary with fields indexing lists of their parent fields.
|
Chris@87
|
190
|
Chris@87
|
191 This function is used to simplify access to fields nested in other fields.
|
Chris@87
|
192
|
Chris@87
|
193 Parameters
|
Chris@87
|
194 ----------
|
Chris@87
|
195 adtype : np.dtype
|
Chris@87
|
196 Input datatype
|
Chris@87
|
197 lastname : optional
|
Chris@87
|
198 Last processed field name (used internally during recursion).
|
Chris@87
|
199 parents : dictionary
|
Chris@87
|
200 Dictionary of parent fields (used interbally during recursion).
|
Chris@87
|
201
|
Chris@87
|
202 Examples
|
Chris@87
|
203 --------
|
Chris@87
|
204 >>> from numpy.lib import recfunctions as rfn
|
Chris@87
|
205 >>> ndtype = np.dtype([('A', int),
|
Chris@87
|
206 ... ('B', [('BA', int),
|
Chris@87
|
207 ... ('BB', [('BBA', int), ('BBB', int)])])])
|
Chris@87
|
208 >>> rfn.get_fieldstructure(ndtype)
|
Chris@87
|
209 ... # XXX: possible regression, order of BBA and BBB is swapped
|
Chris@87
|
210 {'A': [], 'B': [], 'BA': ['B'], 'BB': ['B'], 'BBA': ['B', 'BB'], 'BBB': ['B', 'BB']}
|
Chris@87
|
211
|
Chris@87
|
212 """
|
Chris@87
|
213 if parents is None:
|
Chris@87
|
214 parents = {}
|
Chris@87
|
215 names = adtype.names
|
Chris@87
|
216 for name in names:
|
Chris@87
|
217 current = adtype[name]
|
Chris@87
|
218 if current.names:
|
Chris@87
|
219 if lastname:
|
Chris@87
|
220 parents[name] = [lastname, ]
|
Chris@87
|
221 else:
|
Chris@87
|
222 parents[name] = []
|
Chris@87
|
223 parents.update(get_fieldstructure(current, name, parents))
|
Chris@87
|
224 else:
|
Chris@87
|
225 lastparent = [_ for _ in (parents.get(lastname, []) or [])]
|
Chris@87
|
226 if lastparent:
|
Chris@87
|
227 lastparent.append(lastname)
|
Chris@87
|
228 elif lastname:
|
Chris@87
|
229 lastparent = [lastname, ]
|
Chris@87
|
230 parents[name] = lastparent or []
|
Chris@87
|
231 return parents or None
|
Chris@87
|
232
|
Chris@87
|
233
|
Chris@87
|
234 def _izip_fields_flat(iterable):
|
Chris@87
|
235 """
|
Chris@87
|
236 Returns an iterator of concatenated fields from a sequence of arrays,
|
Chris@87
|
237 collapsing any nested structure.
|
Chris@87
|
238
|
Chris@87
|
239 """
|
Chris@87
|
240 for element in iterable:
|
Chris@87
|
241 if isinstance(element, np.void):
|
Chris@87
|
242 for f in _izip_fields_flat(tuple(element)):
|
Chris@87
|
243 yield f
|
Chris@87
|
244 else:
|
Chris@87
|
245 yield element
|
Chris@87
|
246
|
Chris@87
|
247
|
Chris@87
|
248 def _izip_fields(iterable):
|
Chris@87
|
249 """
|
Chris@87
|
250 Returns an iterator of concatenated fields from a sequence of arrays.
|
Chris@87
|
251
|
Chris@87
|
252 """
|
Chris@87
|
253 for element in iterable:
|
Chris@87
|
254 if (hasattr(element, '__iter__') and
|
Chris@87
|
255 not isinstance(element, basestring)):
|
Chris@87
|
256 for f in _izip_fields(element):
|
Chris@87
|
257 yield f
|
Chris@87
|
258 elif isinstance(element, np.void) and len(tuple(element)) == 1:
|
Chris@87
|
259 for f in _izip_fields(element):
|
Chris@87
|
260 yield f
|
Chris@87
|
261 else:
|
Chris@87
|
262 yield element
|
Chris@87
|
263
|
Chris@87
|
264
|
Chris@87
|
265 def izip_records(seqarrays, fill_value=None, flatten=True):
|
Chris@87
|
266 """
|
Chris@87
|
267 Returns an iterator of concatenated items from a sequence of arrays.
|
Chris@87
|
268
|
Chris@87
|
269 Parameters
|
Chris@87
|
270 ----------
|
Chris@87
|
271 seqarray : sequence of arrays
|
Chris@87
|
272 Sequence of arrays.
|
Chris@87
|
273 fill_value : {None, integer}
|
Chris@87
|
274 Value used to pad shorter iterables.
|
Chris@87
|
275 flatten : {True, False},
|
Chris@87
|
276 Whether to
|
Chris@87
|
277 """
|
Chris@87
|
278 # OK, that's a complete ripoff from Python2.6 itertools.izip_longest
|
Chris@87
|
279 def sentinel(counter=([fill_value] * (len(seqarrays) - 1)).pop):
|
Chris@87
|
280 "Yields the fill_value or raises IndexError"
|
Chris@87
|
281 yield counter()
|
Chris@87
|
282 #
|
Chris@87
|
283 fillers = itertools.repeat(fill_value)
|
Chris@87
|
284 iters = [itertools.chain(it, sentinel(), fillers) for it in seqarrays]
|
Chris@87
|
285 # Should we flatten the items, or just use a nested approach
|
Chris@87
|
286 if flatten:
|
Chris@87
|
287 zipfunc = _izip_fields_flat
|
Chris@87
|
288 else:
|
Chris@87
|
289 zipfunc = _izip_fields
|
Chris@87
|
290 #
|
Chris@87
|
291 try:
|
Chris@87
|
292 for tup in zip(*iters):
|
Chris@87
|
293 yield tuple(zipfunc(tup))
|
Chris@87
|
294 except IndexError:
|
Chris@87
|
295 pass
|
Chris@87
|
296
|
Chris@87
|
297
|
Chris@87
|
298 def _fix_output(output, usemask=True, asrecarray=False):
|
Chris@87
|
299 """
|
Chris@87
|
300 Private function: return a recarray, a ndarray, a MaskedArray
|
Chris@87
|
301 or a MaskedRecords depending on the input parameters
|
Chris@87
|
302 """
|
Chris@87
|
303 if not isinstance(output, MaskedArray):
|
Chris@87
|
304 usemask = False
|
Chris@87
|
305 if usemask:
|
Chris@87
|
306 if asrecarray:
|
Chris@87
|
307 output = output.view(MaskedRecords)
|
Chris@87
|
308 else:
|
Chris@87
|
309 output = ma.filled(output)
|
Chris@87
|
310 if asrecarray:
|
Chris@87
|
311 output = output.view(recarray)
|
Chris@87
|
312 return output
|
Chris@87
|
313
|
Chris@87
|
314
|
Chris@87
|
315 def _fix_defaults(output, defaults=None):
|
Chris@87
|
316 """
|
Chris@87
|
317 Update the fill_value and masked data of `output`
|
Chris@87
|
318 from the default given in a dictionary defaults.
|
Chris@87
|
319 """
|
Chris@87
|
320 names = output.dtype.names
|
Chris@87
|
321 (data, mask, fill_value) = (output.data, output.mask, output.fill_value)
|
Chris@87
|
322 for (k, v) in (defaults or {}).items():
|
Chris@87
|
323 if k in names:
|
Chris@87
|
324 fill_value[k] = v
|
Chris@87
|
325 data[k][mask[k]] = v
|
Chris@87
|
326 return output
|
Chris@87
|
327
|
Chris@87
|
328
|
Chris@87
|
329 def merge_arrays(seqarrays, fill_value=-1, flatten=False,
|
Chris@87
|
330 usemask=False, asrecarray=False):
|
Chris@87
|
331 """
|
Chris@87
|
332 Merge arrays field by field.
|
Chris@87
|
333
|
Chris@87
|
334 Parameters
|
Chris@87
|
335 ----------
|
Chris@87
|
336 seqarrays : sequence of ndarrays
|
Chris@87
|
337 Sequence of arrays
|
Chris@87
|
338 fill_value : {float}, optional
|
Chris@87
|
339 Filling value used to pad missing data on the shorter arrays.
|
Chris@87
|
340 flatten : {False, True}, optional
|
Chris@87
|
341 Whether to collapse nested fields.
|
Chris@87
|
342 usemask : {False, True}, optional
|
Chris@87
|
343 Whether to return a masked array or not.
|
Chris@87
|
344 asrecarray : {False, True}, optional
|
Chris@87
|
345 Whether to return a recarray (MaskedRecords) or not.
|
Chris@87
|
346
|
Chris@87
|
347 Examples
|
Chris@87
|
348 --------
|
Chris@87
|
349 >>> from numpy.lib import recfunctions as rfn
|
Chris@87
|
350 >>> rfn.merge_arrays((np.array([1, 2]), np.array([10., 20., 30.])))
|
Chris@87
|
351 masked_array(data = [(1, 10.0) (2, 20.0) (--, 30.0)],
|
Chris@87
|
352 mask = [(False, False) (False, False) (True, False)],
|
Chris@87
|
353 fill_value = (999999, 1e+20),
|
Chris@87
|
354 dtype = [('f0', '<i4'), ('f1', '<f8')])
|
Chris@87
|
355
|
Chris@87
|
356 >>> rfn.merge_arrays((np.array([1, 2]), np.array([10., 20., 30.])),
|
Chris@87
|
357 ... usemask=False)
|
Chris@87
|
358 array([(1, 10.0), (2, 20.0), (-1, 30.0)],
|
Chris@87
|
359 dtype=[('f0', '<i4'), ('f1', '<f8')])
|
Chris@87
|
360 >>> rfn.merge_arrays((np.array([1, 2]).view([('a', int)]),
|
Chris@87
|
361 ... np.array([10., 20., 30.])),
|
Chris@87
|
362 ... usemask=False, asrecarray=True)
|
Chris@87
|
363 rec.array([(1, 10.0), (2, 20.0), (-1, 30.0)],
|
Chris@87
|
364 dtype=[('a', '<i4'), ('f1', '<f8')])
|
Chris@87
|
365
|
Chris@87
|
366 Notes
|
Chris@87
|
367 -----
|
Chris@87
|
368 * Without a mask, the missing value will be filled with something,
|
Chris@87
|
369 * depending on what its corresponding type:
|
Chris@87
|
370 -1 for integers
|
Chris@87
|
371 -1.0 for floating point numbers
|
Chris@87
|
372 '-' for characters
|
Chris@87
|
373 '-1' for strings
|
Chris@87
|
374 True for boolean values
|
Chris@87
|
375 * XXX: I just obtained these values empirically
|
Chris@87
|
376 """
|
Chris@87
|
377 # Only one item in the input sequence ?
|
Chris@87
|
378 if (len(seqarrays) == 1):
|
Chris@87
|
379 seqarrays = np.asanyarray(seqarrays[0])
|
Chris@87
|
380 # Do we have a single ndarray as input ?
|
Chris@87
|
381 if isinstance(seqarrays, (ndarray, np.void)):
|
Chris@87
|
382 seqdtype = seqarrays.dtype
|
Chris@87
|
383 if (not flatten) or \
|
Chris@87
|
384 (zip_descr((seqarrays,), flatten=True) == seqdtype.descr):
|
Chris@87
|
385 # Minimal processing needed: just make sure everythng's a-ok
|
Chris@87
|
386 seqarrays = seqarrays.ravel()
|
Chris@87
|
387 # Make sure we have named fields
|
Chris@87
|
388 if not seqdtype.names:
|
Chris@87
|
389 seqdtype = [('', seqdtype)]
|
Chris@87
|
390 # Find what type of array we must return
|
Chris@87
|
391 if usemask:
|
Chris@87
|
392 if asrecarray:
|
Chris@87
|
393 seqtype = MaskedRecords
|
Chris@87
|
394 else:
|
Chris@87
|
395 seqtype = MaskedArray
|
Chris@87
|
396 elif asrecarray:
|
Chris@87
|
397 seqtype = recarray
|
Chris@87
|
398 else:
|
Chris@87
|
399 seqtype = ndarray
|
Chris@87
|
400 return seqarrays.view(dtype=seqdtype, type=seqtype)
|
Chris@87
|
401 else:
|
Chris@87
|
402 seqarrays = (seqarrays,)
|
Chris@87
|
403 else:
|
Chris@87
|
404 # Make sure we have arrays in the input sequence
|
Chris@87
|
405 seqarrays = [np.asanyarray(_m) for _m in seqarrays]
|
Chris@87
|
406 # Find the sizes of the inputs and their maximum
|
Chris@87
|
407 sizes = tuple(a.size for a in seqarrays)
|
Chris@87
|
408 maxlength = max(sizes)
|
Chris@87
|
409 # Get the dtype of the output (flattening if needed)
|
Chris@87
|
410 newdtype = zip_descr(seqarrays, flatten=flatten)
|
Chris@87
|
411 # Initialize the sequences for data and mask
|
Chris@87
|
412 seqdata = []
|
Chris@87
|
413 seqmask = []
|
Chris@87
|
414 # If we expect some kind of MaskedArray, make a special loop.
|
Chris@87
|
415 if usemask:
|
Chris@87
|
416 for (a, n) in zip(seqarrays, sizes):
|
Chris@87
|
417 nbmissing = (maxlength - n)
|
Chris@87
|
418 # Get the data and mask
|
Chris@87
|
419 data = a.ravel().__array__()
|
Chris@87
|
420 mask = ma.getmaskarray(a).ravel()
|
Chris@87
|
421 # Get the filling value (if needed)
|
Chris@87
|
422 if nbmissing:
|
Chris@87
|
423 fval = _check_fill_value(fill_value, a.dtype)
|
Chris@87
|
424 if isinstance(fval, (ndarray, np.void)):
|
Chris@87
|
425 if len(fval.dtype) == 1:
|
Chris@87
|
426 fval = fval.item()[0]
|
Chris@87
|
427 fmsk = True
|
Chris@87
|
428 else:
|
Chris@87
|
429 fval = np.array(fval, dtype=a.dtype, ndmin=1)
|
Chris@87
|
430 fmsk = np.ones((1,), dtype=mask.dtype)
|
Chris@87
|
431 else:
|
Chris@87
|
432 fval = None
|
Chris@87
|
433 fmsk = True
|
Chris@87
|
434 # Store an iterator padding the input to the expected length
|
Chris@87
|
435 seqdata.append(itertools.chain(data, [fval] * nbmissing))
|
Chris@87
|
436 seqmask.append(itertools.chain(mask, [fmsk] * nbmissing))
|
Chris@87
|
437 # Create an iterator for the data
|
Chris@87
|
438 data = tuple(izip_records(seqdata, flatten=flatten))
|
Chris@87
|
439 output = ma.array(np.fromiter(data, dtype=newdtype, count=maxlength),
|
Chris@87
|
440 mask=list(izip_records(seqmask, flatten=flatten)))
|
Chris@87
|
441 if asrecarray:
|
Chris@87
|
442 output = output.view(MaskedRecords)
|
Chris@87
|
443 else:
|
Chris@87
|
444 # Same as before, without the mask we don't need...
|
Chris@87
|
445 for (a, n) in zip(seqarrays, sizes):
|
Chris@87
|
446 nbmissing = (maxlength - n)
|
Chris@87
|
447 data = a.ravel().__array__()
|
Chris@87
|
448 if nbmissing:
|
Chris@87
|
449 fval = _check_fill_value(fill_value, a.dtype)
|
Chris@87
|
450 if isinstance(fval, (ndarray, np.void)):
|
Chris@87
|
451 if len(fval.dtype) == 1:
|
Chris@87
|
452 fval = fval.item()[0]
|
Chris@87
|
453 else:
|
Chris@87
|
454 fval = np.array(fval, dtype=a.dtype, ndmin=1)
|
Chris@87
|
455 else:
|
Chris@87
|
456 fval = None
|
Chris@87
|
457 seqdata.append(itertools.chain(data, [fval] * nbmissing))
|
Chris@87
|
458 output = np.fromiter(tuple(izip_records(seqdata, flatten=flatten)),
|
Chris@87
|
459 dtype=newdtype, count=maxlength)
|
Chris@87
|
460 if asrecarray:
|
Chris@87
|
461 output = output.view(recarray)
|
Chris@87
|
462 # And we're done...
|
Chris@87
|
463 return output
|
Chris@87
|
464
|
Chris@87
|
465
|
Chris@87
|
466 def drop_fields(base, drop_names, usemask=True, asrecarray=False):
|
Chris@87
|
467 """
|
Chris@87
|
468 Return a new array with fields in `drop_names` dropped.
|
Chris@87
|
469
|
Chris@87
|
470 Nested fields are supported.
|
Chris@87
|
471
|
Chris@87
|
472 Parameters
|
Chris@87
|
473 ----------
|
Chris@87
|
474 base : array
|
Chris@87
|
475 Input array
|
Chris@87
|
476 drop_names : string or sequence
|
Chris@87
|
477 String or sequence of strings corresponding to the names of the
|
Chris@87
|
478 fields to drop.
|
Chris@87
|
479 usemask : {False, True}, optional
|
Chris@87
|
480 Whether to return a masked array or not.
|
Chris@87
|
481 asrecarray : string or sequence, optional
|
Chris@87
|
482 Whether to return a recarray or a mrecarray (`asrecarray=True`) or
|
Chris@87
|
483 a plain ndarray or masked array with flexible dtype. The default
|
Chris@87
|
484 is False.
|
Chris@87
|
485
|
Chris@87
|
486 Examples
|
Chris@87
|
487 --------
|
Chris@87
|
488 >>> from numpy.lib import recfunctions as rfn
|
Chris@87
|
489 >>> a = np.array([(1, (2, 3.0)), (4, (5, 6.0))],
|
Chris@87
|
490 ... dtype=[('a', int), ('b', [('ba', float), ('bb', int)])])
|
Chris@87
|
491 >>> rfn.drop_fields(a, 'a')
|
Chris@87
|
492 array([((2.0, 3),), ((5.0, 6),)],
|
Chris@87
|
493 dtype=[('b', [('ba', '<f8'), ('bb', '<i4')])])
|
Chris@87
|
494 >>> rfn.drop_fields(a, 'ba')
|
Chris@87
|
495 array([(1, (3,)), (4, (6,))],
|
Chris@87
|
496 dtype=[('a', '<i4'), ('b', [('bb', '<i4')])])
|
Chris@87
|
497 >>> rfn.drop_fields(a, ['ba', 'bb'])
|
Chris@87
|
498 array([(1,), (4,)],
|
Chris@87
|
499 dtype=[('a', '<i4')])
|
Chris@87
|
500 """
|
Chris@87
|
501 if _is_string_like(drop_names):
|
Chris@87
|
502 drop_names = [drop_names, ]
|
Chris@87
|
503 else:
|
Chris@87
|
504 drop_names = set(drop_names)
|
Chris@87
|
505
|
Chris@87
|
506 def _drop_descr(ndtype, drop_names):
|
Chris@87
|
507 names = ndtype.names
|
Chris@87
|
508 newdtype = []
|
Chris@87
|
509 for name in names:
|
Chris@87
|
510 current = ndtype[name]
|
Chris@87
|
511 if name in drop_names:
|
Chris@87
|
512 continue
|
Chris@87
|
513 if current.names:
|
Chris@87
|
514 descr = _drop_descr(current, drop_names)
|
Chris@87
|
515 if descr:
|
Chris@87
|
516 newdtype.append((name, descr))
|
Chris@87
|
517 else:
|
Chris@87
|
518 newdtype.append((name, current))
|
Chris@87
|
519 return newdtype
|
Chris@87
|
520
|
Chris@87
|
521 newdtype = _drop_descr(base.dtype, drop_names)
|
Chris@87
|
522 if not newdtype:
|
Chris@87
|
523 return None
|
Chris@87
|
524
|
Chris@87
|
525 output = np.empty(base.shape, dtype=newdtype)
|
Chris@87
|
526 output = recursive_fill_fields(base, output)
|
Chris@87
|
527 return _fix_output(output, usemask=usemask, asrecarray=asrecarray)
|
Chris@87
|
528
|
Chris@87
|
529
|
Chris@87
|
530 def rec_drop_fields(base, drop_names):
|
Chris@87
|
531 """
|
Chris@87
|
532 Returns a new numpy.recarray with fields in `drop_names` dropped.
|
Chris@87
|
533 """
|
Chris@87
|
534 return drop_fields(base, drop_names, usemask=False, asrecarray=True)
|
Chris@87
|
535
|
Chris@87
|
536
|
Chris@87
|
537 def rename_fields(base, namemapper):
|
Chris@87
|
538 """
|
Chris@87
|
539 Rename the fields from a flexible-datatype ndarray or recarray.
|
Chris@87
|
540
|
Chris@87
|
541 Nested fields are supported.
|
Chris@87
|
542
|
Chris@87
|
543 Parameters
|
Chris@87
|
544 ----------
|
Chris@87
|
545 base : ndarray
|
Chris@87
|
546 Input array whose fields must be modified.
|
Chris@87
|
547 namemapper : dictionary
|
Chris@87
|
548 Dictionary mapping old field names to their new version.
|
Chris@87
|
549
|
Chris@87
|
550 Examples
|
Chris@87
|
551 --------
|
Chris@87
|
552 >>> from numpy.lib import recfunctions as rfn
|
Chris@87
|
553 >>> a = np.array([(1, (2, [3.0, 30.])), (4, (5, [6.0, 60.]))],
|
Chris@87
|
554 ... dtype=[('a', int),('b', [('ba', float), ('bb', (float, 2))])])
|
Chris@87
|
555 >>> rfn.rename_fields(a, {'a':'A', 'bb':'BB'})
|
Chris@87
|
556 array([(1, (2.0, [3.0, 30.0])), (4, (5.0, [6.0, 60.0]))],
|
Chris@87
|
557 dtype=[('A', '<i4'), ('b', [('ba', '<f8'), ('BB', '<f8', 2)])])
|
Chris@87
|
558
|
Chris@87
|
559 """
|
Chris@87
|
560 def _recursive_rename_fields(ndtype, namemapper):
|
Chris@87
|
561 newdtype = []
|
Chris@87
|
562 for name in ndtype.names:
|
Chris@87
|
563 newname = namemapper.get(name, name)
|
Chris@87
|
564 current = ndtype[name]
|
Chris@87
|
565 if current.names:
|
Chris@87
|
566 newdtype.append(
|
Chris@87
|
567 (newname, _recursive_rename_fields(current, namemapper))
|
Chris@87
|
568 )
|
Chris@87
|
569 else:
|
Chris@87
|
570 newdtype.append((newname, current))
|
Chris@87
|
571 return newdtype
|
Chris@87
|
572 newdtype = _recursive_rename_fields(base.dtype, namemapper)
|
Chris@87
|
573 return base.view(newdtype)
|
Chris@87
|
574
|
Chris@87
|
575
|
Chris@87
|
576 def append_fields(base, names, data, dtypes=None,
|
Chris@87
|
577 fill_value=-1, usemask=True, asrecarray=False):
|
Chris@87
|
578 """
|
Chris@87
|
579 Add new fields to an existing array.
|
Chris@87
|
580
|
Chris@87
|
581 The names of the fields are given with the `names` arguments,
|
Chris@87
|
582 the corresponding values with the `data` arguments.
|
Chris@87
|
583 If a single field is appended, `names`, `data` and `dtypes` do not have
|
Chris@87
|
584 to be lists but just values.
|
Chris@87
|
585
|
Chris@87
|
586 Parameters
|
Chris@87
|
587 ----------
|
Chris@87
|
588 base : array
|
Chris@87
|
589 Input array to extend.
|
Chris@87
|
590 names : string, sequence
|
Chris@87
|
591 String or sequence of strings corresponding to the names
|
Chris@87
|
592 of the new fields.
|
Chris@87
|
593 data : array or sequence of arrays
|
Chris@87
|
594 Array or sequence of arrays storing the fields to add to the base.
|
Chris@87
|
595 dtypes : sequence of datatypes, optional
|
Chris@87
|
596 Datatype or sequence of datatypes.
|
Chris@87
|
597 If None, the datatypes are estimated from the `data`.
|
Chris@87
|
598 fill_value : {float}, optional
|
Chris@87
|
599 Filling value used to pad missing data on the shorter arrays.
|
Chris@87
|
600 usemask : {False, True}, optional
|
Chris@87
|
601 Whether to return a masked array or not.
|
Chris@87
|
602 asrecarray : {False, True}, optional
|
Chris@87
|
603 Whether to return a recarray (MaskedRecords) or not.
|
Chris@87
|
604
|
Chris@87
|
605 """
|
Chris@87
|
606 # Check the names
|
Chris@87
|
607 if isinstance(names, (tuple, list)):
|
Chris@87
|
608 if len(names) != len(data):
|
Chris@87
|
609 msg = "The number of arrays does not match the number of names"
|
Chris@87
|
610 raise ValueError(msg)
|
Chris@87
|
611 elif isinstance(names, basestring):
|
Chris@87
|
612 names = [names, ]
|
Chris@87
|
613 data = [data, ]
|
Chris@87
|
614 #
|
Chris@87
|
615 if dtypes is None:
|
Chris@87
|
616 data = [np.array(a, copy=False, subok=True) for a in data]
|
Chris@87
|
617 data = [a.view([(name, a.dtype)]) for (name, a) in zip(names, data)]
|
Chris@87
|
618 else:
|
Chris@87
|
619 if not isinstance(dtypes, (tuple, list)):
|
Chris@87
|
620 dtypes = [dtypes, ]
|
Chris@87
|
621 if len(data) != len(dtypes):
|
Chris@87
|
622 if len(dtypes) == 1:
|
Chris@87
|
623 dtypes = dtypes * len(data)
|
Chris@87
|
624 else:
|
Chris@87
|
625 msg = "The dtypes argument must be None, a dtype, or a list."
|
Chris@87
|
626 raise ValueError(msg)
|
Chris@87
|
627 data = [np.array(a, copy=False, subok=True, dtype=d).view([(n, d)])
|
Chris@87
|
628 for (a, n, d) in zip(data, names, dtypes)]
|
Chris@87
|
629 #
|
Chris@87
|
630 base = merge_arrays(base, usemask=usemask, fill_value=fill_value)
|
Chris@87
|
631 if len(data) > 1:
|
Chris@87
|
632 data = merge_arrays(data, flatten=True, usemask=usemask,
|
Chris@87
|
633 fill_value=fill_value)
|
Chris@87
|
634 else:
|
Chris@87
|
635 data = data.pop()
|
Chris@87
|
636 #
|
Chris@87
|
637 output = ma.masked_all(max(len(base), len(data)),
|
Chris@87
|
638 dtype=base.dtype.descr + data.dtype.descr)
|
Chris@87
|
639 output = recursive_fill_fields(base, output)
|
Chris@87
|
640 output = recursive_fill_fields(data, output)
|
Chris@87
|
641 #
|
Chris@87
|
642 return _fix_output(output, usemask=usemask, asrecarray=asrecarray)
|
Chris@87
|
643
|
Chris@87
|
644
|
Chris@87
|
645 def rec_append_fields(base, names, data, dtypes=None):
|
Chris@87
|
646 """
|
Chris@87
|
647 Add new fields to an existing array.
|
Chris@87
|
648
|
Chris@87
|
649 The names of the fields are given with the `names` arguments,
|
Chris@87
|
650 the corresponding values with the `data` arguments.
|
Chris@87
|
651 If a single field is appended, `names`, `data` and `dtypes` do not have
|
Chris@87
|
652 to be lists but just values.
|
Chris@87
|
653
|
Chris@87
|
654 Parameters
|
Chris@87
|
655 ----------
|
Chris@87
|
656 base : array
|
Chris@87
|
657 Input array to extend.
|
Chris@87
|
658 names : string, sequence
|
Chris@87
|
659 String or sequence of strings corresponding to the names
|
Chris@87
|
660 of the new fields.
|
Chris@87
|
661 data : array or sequence of arrays
|
Chris@87
|
662 Array or sequence of arrays storing the fields to add to the base.
|
Chris@87
|
663 dtypes : sequence of datatypes, optional
|
Chris@87
|
664 Datatype or sequence of datatypes.
|
Chris@87
|
665 If None, the datatypes are estimated from the `data`.
|
Chris@87
|
666
|
Chris@87
|
667 See Also
|
Chris@87
|
668 --------
|
Chris@87
|
669 append_fields
|
Chris@87
|
670
|
Chris@87
|
671 Returns
|
Chris@87
|
672 -------
|
Chris@87
|
673 appended_array : np.recarray
|
Chris@87
|
674 """
|
Chris@87
|
675 return append_fields(base, names, data=data, dtypes=dtypes,
|
Chris@87
|
676 asrecarray=True, usemask=False)
|
Chris@87
|
677
|
Chris@87
|
678
|
Chris@87
|
679 def stack_arrays(arrays, defaults=None, usemask=True, asrecarray=False,
|
Chris@87
|
680 autoconvert=False):
|
Chris@87
|
681 """
|
Chris@87
|
682 Superposes arrays fields by fields
|
Chris@87
|
683
|
Chris@87
|
684 Parameters
|
Chris@87
|
685 ----------
|
Chris@87
|
686 seqarrays : array or sequence
|
Chris@87
|
687 Sequence of input arrays.
|
Chris@87
|
688 defaults : dictionary, optional
|
Chris@87
|
689 Dictionary mapping field names to the corresponding default values.
|
Chris@87
|
690 usemask : {True, False}, optional
|
Chris@87
|
691 Whether to return a MaskedArray (or MaskedRecords is
|
Chris@87
|
692 `asrecarray==True`) or a ndarray.
|
Chris@87
|
693 asrecarray : {False, True}, optional
|
Chris@87
|
694 Whether to return a recarray (or MaskedRecords if `usemask==True`)
|
Chris@87
|
695 or just a flexible-type ndarray.
|
Chris@87
|
696 autoconvert : {False, True}, optional
|
Chris@87
|
697 Whether automatically cast the type of the field to the maximum.
|
Chris@87
|
698
|
Chris@87
|
699 Examples
|
Chris@87
|
700 --------
|
Chris@87
|
701 >>> from numpy.lib import recfunctions as rfn
|
Chris@87
|
702 >>> x = np.array([1, 2,])
|
Chris@87
|
703 >>> rfn.stack_arrays(x) is x
|
Chris@87
|
704 True
|
Chris@87
|
705 >>> z = np.array([('A', 1), ('B', 2)], dtype=[('A', '|S3'), ('B', float)])
|
Chris@87
|
706 >>> zz = np.array([('a', 10., 100.), ('b', 20., 200.), ('c', 30., 300.)],
|
Chris@87
|
707 ... dtype=[('A', '|S3'), ('B', float), ('C', float)])
|
Chris@87
|
708 >>> test = rfn.stack_arrays((z,zz))
|
Chris@87
|
709 >>> test
|
Chris@87
|
710 masked_array(data = [('A', 1.0, --) ('B', 2.0, --) ('a', 10.0, 100.0) ('b', 20.0, 200.0)
|
Chris@87
|
711 ('c', 30.0, 300.0)],
|
Chris@87
|
712 mask = [(False, False, True) (False, False, True) (False, False, False)
|
Chris@87
|
713 (False, False, False) (False, False, False)],
|
Chris@87
|
714 fill_value = ('N/A', 1e+20, 1e+20),
|
Chris@87
|
715 dtype = [('A', '|S3'), ('B', '<f8'), ('C', '<f8')])
|
Chris@87
|
716
|
Chris@87
|
717 """
|
Chris@87
|
718 if isinstance(arrays, ndarray):
|
Chris@87
|
719 return arrays
|
Chris@87
|
720 elif len(arrays) == 1:
|
Chris@87
|
721 return arrays[0]
|
Chris@87
|
722 seqarrays = [np.asanyarray(a).ravel() for a in arrays]
|
Chris@87
|
723 nrecords = [len(a) for a in seqarrays]
|
Chris@87
|
724 ndtype = [a.dtype for a in seqarrays]
|
Chris@87
|
725 fldnames = [d.names for d in ndtype]
|
Chris@87
|
726 #
|
Chris@87
|
727 dtype_l = ndtype[0]
|
Chris@87
|
728 newdescr = dtype_l.descr
|
Chris@87
|
729 names = [_[0] for _ in newdescr]
|
Chris@87
|
730 for dtype_n in ndtype[1:]:
|
Chris@87
|
731 for descr in dtype_n.descr:
|
Chris@87
|
732 name = descr[0] or ''
|
Chris@87
|
733 if name not in names:
|
Chris@87
|
734 newdescr.append(descr)
|
Chris@87
|
735 names.append(name)
|
Chris@87
|
736 else:
|
Chris@87
|
737 nameidx = names.index(name)
|
Chris@87
|
738 current_descr = newdescr[nameidx]
|
Chris@87
|
739 if autoconvert:
|
Chris@87
|
740 if np.dtype(descr[1]) > np.dtype(current_descr[-1]):
|
Chris@87
|
741 current_descr = list(current_descr)
|
Chris@87
|
742 current_descr[-1] = descr[1]
|
Chris@87
|
743 newdescr[nameidx] = tuple(current_descr)
|
Chris@87
|
744 elif descr[1] != current_descr[-1]:
|
Chris@87
|
745 raise TypeError("Incompatible type '%s' <> '%s'" %
|
Chris@87
|
746 (dict(newdescr)[name], descr[1]))
|
Chris@87
|
747 # Only one field: use concatenate
|
Chris@87
|
748 if len(newdescr) == 1:
|
Chris@87
|
749 output = ma.concatenate(seqarrays)
|
Chris@87
|
750 else:
|
Chris@87
|
751 #
|
Chris@87
|
752 output = ma.masked_all((np.sum(nrecords),), newdescr)
|
Chris@87
|
753 offset = np.cumsum(np.r_[0, nrecords])
|
Chris@87
|
754 seen = []
|
Chris@87
|
755 for (a, n, i, j) in zip(seqarrays, fldnames, offset[:-1], offset[1:]):
|
Chris@87
|
756 names = a.dtype.names
|
Chris@87
|
757 if names is None:
|
Chris@87
|
758 output['f%i' % len(seen)][i:j] = a
|
Chris@87
|
759 else:
|
Chris@87
|
760 for name in n:
|
Chris@87
|
761 output[name][i:j] = a[name]
|
Chris@87
|
762 if name not in seen:
|
Chris@87
|
763 seen.append(name)
|
Chris@87
|
764 #
|
Chris@87
|
765 return _fix_output(_fix_defaults(output, defaults),
|
Chris@87
|
766 usemask=usemask, asrecarray=asrecarray)
|
Chris@87
|
767
|
Chris@87
|
768
|
Chris@87
|
769 def find_duplicates(a, key=None, ignoremask=True, return_index=False):
|
Chris@87
|
770 """
|
Chris@87
|
771 Find the duplicates in a structured array along a given key
|
Chris@87
|
772
|
Chris@87
|
773 Parameters
|
Chris@87
|
774 ----------
|
Chris@87
|
775 a : array-like
|
Chris@87
|
776 Input array
|
Chris@87
|
777 key : {string, None}, optional
|
Chris@87
|
778 Name of the fields along which to check the duplicates.
|
Chris@87
|
779 If None, the search is performed by records
|
Chris@87
|
780 ignoremask : {True, False}, optional
|
Chris@87
|
781 Whether masked data should be discarded or considered as duplicates.
|
Chris@87
|
782 return_index : {False, True}, optional
|
Chris@87
|
783 Whether to return the indices of the duplicated values.
|
Chris@87
|
784
|
Chris@87
|
785 Examples
|
Chris@87
|
786 --------
|
Chris@87
|
787 >>> from numpy.lib import recfunctions as rfn
|
Chris@87
|
788 >>> ndtype = [('a', int)]
|
Chris@87
|
789 >>> a = np.ma.array([1, 1, 1, 2, 2, 3, 3],
|
Chris@87
|
790 ... mask=[0, 0, 1, 0, 0, 0, 1]).view(ndtype)
|
Chris@87
|
791 >>> rfn.find_duplicates(a, ignoremask=True, return_index=True)
|
Chris@87
|
792 ... # XXX: judging by the output, the ignoremask flag has no effect
|
Chris@87
|
793 """
|
Chris@87
|
794 a = np.asanyarray(a).ravel()
|
Chris@87
|
795 # Get a dictionary of fields
|
Chris@87
|
796 fields = get_fieldstructure(a.dtype)
|
Chris@87
|
797 # Get the sorting data (by selecting the corresponding field)
|
Chris@87
|
798 base = a
|
Chris@87
|
799 if key:
|
Chris@87
|
800 for f in fields[key]:
|
Chris@87
|
801 base = base[f]
|
Chris@87
|
802 base = base[key]
|
Chris@87
|
803 # Get the sorting indices and the sorted data
|
Chris@87
|
804 sortidx = base.argsort()
|
Chris@87
|
805 sortedbase = base[sortidx]
|
Chris@87
|
806 sorteddata = sortedbase.filled()
|
Chris@87
|
807 # Compare the sorting data
|
Chris@87
|
808 flag = (sorteddata[:-1] == sorteddata[1:])
|
Chris@87
|
809 # If masked data must be ignored, set the flag to false where needed
|
Chris@87
|
810 if ignoremask:
|
Chris@87
|
811 sortedmask = sortedbase.recordmask
|
Chris@87
|
812 flag[sortedmask[1:]] = False
|
Chris@87
|
813 flag = np.concatenate(([False], flag))
|
Chris@87
|
814 # We need to take the point on the left as well (else we're missing it)
|
Chris@87
|
815 flag[:-1] = flag[:-1] + flag[1:]
|
Chris@87
|
816 duplicates = a[sortidx][flag]
|
Chris@87
|
817 if return_index:
|
Chris@87
|
818 return (duplicates, sortidx[flag])
|
Chris@87
|
819 else:
|
Chris@87
|
820 return duplicates
|
Chris@87
|
821
|
Chris@87
|
822
|
Chris@87
|
823 def join_by(key, r1, r2, jointype='inner', r1postfix='1', r2postfix='2',
|
Chris@87
|
824 defaults=None, usemask=True, asrecarray=False):
|
Chris@87
|
825 """
|
Chris@87
|
826 Join arrays `r1` and `r2` on key `key`.
|
Chris@87
|
827
|
Chris@87
|
828 The key should be either a string or a sequence of string corresponding
|
Chris@87
|
829 to the fields used to join the array. An exception is raised if the
|
Chris@87
|
830 `key` field cannot be found in the two input arrays. Neither `r1` nor
|
Chris@87
|
831 `r2` should have any duplicates along `key`: the presence of duplicates
|
Chris@87
|
832 will make the output quite unreliable. Note that duplicates are not
|
Chris@87
|
833 looked for by the algorithm.
|
Chris@87
|
834
|
Chris@87
|
835 Parameters
|
Chris@87
|
836 ----------
|
Chris@87
|
837 key : {string, sequence}
|
Chris@87
|
838 A string or a sequence of strings corresponding to the fields used
|
Chris@87
|
839 for comparison.
|
Chris@87
|
840 r1, r2 : arrays
|
Chris@87
|
841 Structured arrays.
|
Chris@87
|
842 jointype : {'inner', 'outer', 'leftouter'}, optional
|
Chris@87
|
843 If 'inner', returns the elements common to both r1 and r2.
|
Chris@87
|
844 If 'outer', returns the common elements as well as the elements of
|
Chris@87
|
845 r1 not in r2 and the elements of not in r2.
|
Chris@87
|
846 If 'leftouter', returns the common elements and the elements of r1
|
Chris@87
|
847 not in r2.
|
Chris@87
|
848 r1postfix : string, optional
|
Chris@87
|
849 String appended to the names of the fields of r1 that are present
|
Chris@87
|
850 in r2 but absent of the key.
|
Chris@87
|
851 r2postfix : string, optional
|
Chris@87
|
852 String appended to the names of the fields of r2 that are present
|
Chris@87
|
853 in r1 but absent of the key.
|
Chris@87
|
854 defaults : {dictionary}, optional
|
Chris@87
|
855 Dictionary mapping field names to the corresponding default values.
|
Chris@87
|
856 usemask : {True, False}, optional
|
Chris@87
|
857 Whether to return a MaskedArray (or MaskedRecords is
|
Chris@87
|
858 `asrecarray==True`) or a ndarray.
|
Chris@87
|
859 asrecarray : {False, True}, optional
|
Chris@87
|
860 Whether to return a recarray (or MaskedRecords if `usemask==True`)
|
Chris@87
|
861 or just a flexible-type ndarray.
|
Chris@87
|
862
|
Chris@87
|
863 Notes
|
Chris@87
|
864 -----
|
Chris@87
|
865 * The output is sorted along the key.
|
Chris@87
|
866 * A temporary array is formed by dropping the fields not in the key for
|
Chris@87
|
867 the two arrays and concatenating the result. This array is then
|
Chris@87
|
868 sorted, and the common entries selected. The output is constructed by
|
Chris@87
|
869 filling the fields with the selected entries. Matching is not
|
Chris@87
|
870 preserved if there are some duplicates...
|
Chris@87
|
871
|
Chris@87
|
872 """
|
Chris@87
|
873 # Check jointype
|
Chris@87
|
874 if jointype not in ('inner', 'outer', 'leftouter'):
|
Chris@87
|
875 raise ValueError(
|
Chris@87
|
876 "The 'jointype' argument should be in 'inner', "
|
Chris@87
|
877 "'outer' or 'leftouter' (got '%s' instead)" % jointype
|
Chris@87
|
878 )
|
Chris@87
|
879 # If we have a single key, put it in a tuple
|
Chris@87
|
880 if isinstance(key, basestring):
|
Chris@87
|
881 key = (key,)
|
Chris@87
|
882
|
Chris@87
|
883 # Check the keys
|
Chris@87
|
884 for name in key:
|
Chris@87
|
885 if name not in r1.dtype.names:
|
Chris@87
|
886 raise ValueError('r1 does not have key field %s' % name)
|
Chris@87
|
887 if name not in r2.dtype.names:
|
Chris@87
|
888 raise ValueError('r2 does not have key field %s' % name)
|
Chris@87
|
889
|
Chris@87
|
890 # Make sure we work with ravelled arrays
|
Chris@87
|
891 r1 = r1.ravel()
|
Chris@87
|
892 r2 = r2.ravel()
|
Chris@87
|
893 # Fixme: nb2 below is never used. Commenting out for pyflakes.
|
Chris@87
|
894 # (nb1, nb2) = (len(r1), len(r2))
|
Chris@87
|
895 nb1 = len(r1)
|
Chris@87
|
896 (r1names, r2names) = (r1.dtype.names, r2.dtype.names)
|
Chris@87
|
897
|
Chris@87
|
898 # Check the names for collision
|
Chris@87
|
899 if (set.intersection(set(r1names), set(r2names)).difference(key) and
|
Chris@87
|
900 not (r1postfix or r2postfix)):
|
Chris@87
|
901 msg = "r1 and r2 contain common names, r1postfix and r2postfix "
|
Chris@87
|
902 msg += "can't be empty"
|
Chris@87
|
903 raise ValueError(msg)
|
Chris@87
|
904
|
Chris@87
|
905 # Make temporary arrays of just the keys
|
Chris@87
|
906 r1k = drop_fields(r1, [n for n in r1names if n not in key])
|
Chris@87
|
907 r2k = drop_fields(r2, [n for n in r2names if n not in key])
|
Chris@87
|
908
|
Chris@87
|
909 # Concatenate the two arrays for comparison
|
Chris@87
|
910 aux = ma.concatenate((r1k, r2k))
|
Chris@87
|
911 idx_sort = aux.argsort(order=key)
|
Chris@87
|
912 aux = aux[idx_sort]
|
Chris@87
|
913 #
|
Chris@87
|
914 # Get the common keys
|
Chris@87
|
915 flag_in = ma.concatenate(([False], aux[1:] == aux[:-1]))
|
Chris@87
|
916 flag_in[:-1] = flag_in[1:] + flag_in[:-1]
|
Chris@87
|
917 idx_in = idx_sort[flag_in]
|
Chris@87
|
918 idx_1 = idx_in[(idx_in < nb1)]
|
Chris@87
|
919 idx_2 = idx_in[(idx_in >= nb1)] - nb1
|
Chris@87
|
920 (r1cmn, r2cmn) = (len(idx_1), len(idx_2))
|
Chris@87
|
921 if jointype == 'inner':
|
Chris@87
|
922 (r1spc, r2spc) = (0, 0)
|
Chris@87
|
923 elif jointype == 'outer':
|
Chris@87
|
924 idx_out = idx_sort[~flag_in]
|
Chris@87
|
925 idx_1 = np.concatenate((idx_1, idx_out[(idx_out < nb1)]))
|
Chris@87
|
926 idx_2 = np.concatenate((idx_2, idx_out[(idx_out >= nb1)] - nb1))
|
Chris@87
|
927 (r1spc, r2spc) = (len(idx_1) - r1cmn, len(idx_2) - r2cmn)
|
Chris@87
|
928 elif jointype == 'leftouter':
|
Chris@87
|
929 idx_out = idx_sort[~flag_in]
|
Chris@87
|
930 idx_1 = np.concatenate((idx_1, idx_out[(idx_out < nb1)]))
|
Chris@87
|
931 (r1spc, r2spc) = (len(idx_1) - r1cmn, 0)
|
Chris@87
|
932 # Select the entries from each input
|
Chris@87
|
933 (s1, s2) = (r1[idx_1], r2[idx_2])
|
Chris@87
|
934 #
|
Chris@87
|
935 # Build the new description of the output array .......
|
Chris@87
|
936 # Start with the key fields
|
Chris@87
|
937 ndtype = [list(_) for _ in r1k.dtype.descr]
|
Chris@87
|
938 # Add the other fields
|
Chris@87
|
939 ndtype.extend(list(_) for _ in r1.dtype.descr if _[0] not in key)
|
Chris@87
|
940 # Find the new list of names (it may be different from r1names)
|
Chris@87
|
941 names = list(_[0] for _ in ndtype)
|
Chris@87
|
942 for desc in r2.dtype.descr:
|
Chris@87
|
943 desc = list(desc)
|
Chris@87
|
944 name = desc[0]
|
Chris@87
|
945 # Have we seen the current name already ?
|
Chris@87
|
946 if name in names:
|
Chris@87
|
947 nameidx = ndtype.index(desc)
|
Chris@87
|
948 current = ndtype[nameidx]
|
Chris@87
|
949 # The current field is part of the key: take the largest dtype
|
Chris@87
|
950 if name in key:
|
Chris@87
|
951 current[-1] = max(desc[1], current[-1])
|
Chris@87
|
952 # The current field is not part of the key: add the suffixes
|
Chris@87
|
953 else:
|
Chris@87
|
954 current[0] += r1postfix
|
Chris@87
|
955 desc[0] += r2postfix
|
Chris@87
|
956 ndtype.insert(nameidx + 1, desc)
|
Chris@87
|
957 #... we haven't: just add the description to the current list
|
Chris@87
|
958 else:
|
Chris@87
|
959 names.extend(desc[0])
|
Chris@87
|
960 ndtype.append(desc)
|
Chris@87
|
961 # Revert the elements to tuples
|
Chris@87
|
962 ndtype = [tuple(_) for _ in ndtype]
|
Chris@87
|
963 # Find the largest nb of common fields :
|
Chris@87
|
964 # r1cmn and r2cmn should be equal, but...
|
Chris@87
|
965 cmn = max(r1cmn, r2cmn)
|
Chris@87
|
966 # Construct an empty array
|
Chris@87
|
967 output = ma.masked_all((cmn + r1spc + r2spc,), dtype=ndtype)
|
Chris@87
|
968 names = output.dtype.names
|
Chris@87
|
969 for f in r1names:
|
Chris@87
|
970 selected = s1[f]
|
Chris@87
|
971 if f not in names or (f in r2names and not r2postfix and f not in key):
|
Chris@87
|
972 f += r1postfix
|
Chris@87
|
973 current = output[f]
|
Chris@87
|
974 current[:r1cmn] = selected[:r1cmn]
|
Chris@87
|
975 if jointype in ('outer', 'leftouter'):
|
Chris@87
|
976 current[cmn:cmn + r1spc] = selected[r1cmn:]
|
Chris@87
|
977 for f in r2names:
|
Chris@87
|
978 selected = s2[f]
|
Chris@87
|
979 if f not in names or (f in r1names and not r1postfix and f not in key):
|
Chris@87
|
980 f += r2postfix
|
Chris@87
|
981 current = output[f]
|
Chris@87
|
982 current[:r2cmn] = selected[:r2cmn]
|
Chris@87
|
983 if (jointype == 'outer') and r2spc:
|
Chris@87
|
984 current[-r2spc:] = selected[r2cmn:]
|
Chris@87
|
985 # Sort and finalize the output
|
Chris@87
|
986 output.sort(order=key)
|
Chris@87
|
987 kwargs = dict(usemask=usemask, asrecarray=asrecarray)
|
Chris@87
|
988 return _fix_output(_fix_defaults(output, defaults), **kwargs)
|
Chris@87
|
989
|
Chris@87
|
990
|
Chris@87
|
991 def rec_join(key, r1, r2, jointype='inner', r1postfix='1', r2postfix='2',
|
Chris@87
|
992 defaults=None):
|
Chris@87
|
993 """
|
Chris@87
|
994 Join arrays `r1` and `r2` on keys.
|
Chris@87
|
995 Alternative to join_by, that always returns a np.recarray.
|
Chris@87
|
996
|
Chris@87
|
997 See Also
|
Chris@87
|
998 --------
|
Chris@87
|
999 join_by : equivalent function
|
Chris@87
|
1000 """
|
Chris@87
|
1001 kwargs = dict(jointype=jointype, r1postfix=r1postfix, r2postfix=r2postfix,
|
Chris@87
|
1002 defaults=defaults, usemask=False, asrecarray=True)
|
Chris@87
|
1003 return join_by(key, r1, r2, **kwargs)
|