Chris@87
|
1 from __future__ import division, absolute_import, print_function
|
Chris@87
|
2
|
Chris@87
|
3 import sys
|
Chris@87
|
4 import time
|
Chris@87
|
5 from datetime import date
|
Chris@87
|
6
|
Chris@87
|
7 import numpy as np
|
Chris@87
|
8 from numpy.compat import asbytes, asbytes_nested
|
Chris@87
|
9 from numpy.testing import (
|
Chris@87
|
10 run_module_suite, TestCase, assert_, assert_equal
|
Chris@87
|
11 )
|
Chris@87
|
12 from numpy.lib._iotools import (
|
Chris@87
|
13 LineSplitter, NameValidator, StringConverter,
|
Chris@87
|
14 has_nested_fields, easy_dtype, flatten_dtype
|
Chris@87
|
15 )
|
Chris@87
|
16
|
Chris@87
|
17
|
Chris@87
|
18 class TestLineSplitter(TestCase):
|
Chris@87
|
19 "Tests the LineSplitter class."
|
Chris@87
|
20
|
Chris@87
|
21 def test_no_delimiter(self):
|
Chris@87
|
22 "Test LineSplitter w/o delimiter"
|
Chris@87
|
23 strg = asbytes(" 1 2 3 4 5 # test")
|
Chris@87
|
24 test = LineSplitter()(strg)
|
Chris@87
|
25 assert_equal(test, asbytes_nested(['1', '2', '3', '4', '5']))
|
Chris@87
|
26 test = LineSplitter('')(strg)
|
Chris@87
|
27 assert_equal(test, asbytes_nested(['1', '2', '3', '4', '5']))
|
Chris@87
|
28
|
Chris@87
|
29 def test_space_delimiter(self):
|
Chris@87
|
30 "Test space delimiter"
|
Chris@87
|
31 strg = asbytes(" 1 2 3 4 5 # test")
|
Chris@87
|
32 test = LineSplitter(asbytes(' '))(strg)
|
Chris@87
|
33 assert_equal(test, asbytes_nested(['1', '2', '3', '4', '', '5']))
|
Chris@87
|
34 test = LineSplitter(asbytes(' '))(strg)
|
Chris@87
|
35 assert_equal(test, asbytes_nested(['1 2 3 4', '5']))
|
Chris@87
|
36
|
Chris@87
|
37 def test_tab_delimiter(self):
|
Chris@87
|
38 "Test tab delimiter"
|
Chris@87
|
39 strg = asbytes(" 1\t 2\t 3\t 4\t 5 6")
|
Chris@87
|
40 test = LineSplitter(asbytes('\t'))(strg)
|
Chris@87
|
41 assert_equal(test, asbytes_nested(['1', '2', '3', '4', '5 6']))
|
Chris@87
|
42 strg = asbytes(" 1 2\t 3 4\t 5 6")
|
Chris@87
|
43 test = LineSplitter(asbytes('\t'))(strg)
|
Chris@87
|
44 assert_equal(test, asbytes_nested(['1 2', '3 4', '5 6']))
|
Chris@87
|
45
|
Chris@87
|
46 def test_other_delimiter(self):
|
Chris@87
|
47 "Test LineSplitter on delimiter"
|
Chris@87
|
48 strg = asbytes("1,2,3,4,,5")
|
Chris@87
|
49 test = LineSplitter(asbytes(','))(strg)
|
Chris@87
|
50 assert_equal(test, asbytes_nested(['1', '2', '3', '4', '', '5']))
|
Chris@87
|
51 #
|
Chris@87
|
52 strg = asbytes(" 1,2,3,4,,5 # test")
|
Chris@87
|
53 test = LineSplitter(asbytes(','))(strg)
|
Chris@87
|
54 assert_equal(test, asbytes_nested(['1', '2', '3', '4', '', '5']))
|
Chris@87
|
55
|
Chris@87
|
56 def test_constant_fixed_width(self):
|
Chris@87
|
57 "Test LineSplitter w/ fixed-width fields"
|
Chris@87
|
58 strg = asbytes(" 1 2 3 4 5 # test")
|
Chris@87
|
59 test = LineSplitter(3)(strg)
|
Chris@87
|
60 assert_equal(test, asbytes_nested(['1', '2', '3', '4', '', '5', '']))
|
Chris@87
|
61 #
|
Chris@87
|
62 strg = asbytes(" 1 3 4 5 6# test")
|
Chris@87
|
63 test = LineSplitter(20)(strg)
|
Chris@87
|
64 assert_equal(test, asbytes_nested(['1 3 4 5 6']))
|
Chris@87
|
65 #
|
Chris@87
|
66 strg = asbytes(" 1 3 4 5 6# test")
|
Chris@87
|
67 test = LineSplitter(30)(strg)
|
Chris@87
|
68 assert_equal(test, asbytes_nested(['1 3 4 5 6']))
|
Chris@87
|
69
|
Chris@87
|
70 def test_variable_fixed_width(self):
|
Chris@87
|
71 strg = asbytes(" 1 3 4 5 6# test")
|
Chris@87
|
72 test = LineSplitter((3, 6, 6, 3))(strg)
|
Chris@87
|
73 assert_equal(test, asbytes_nested(['1', '3', '4 5', '6']))
|
Chris@87
|
74 #
|
Chris@87
|
75 strg = asbytes(" 1 3 4 5 6# test")
|
Chris@87
|
76 test = LineSplitter((6, 6, 9))(strg)
|
Chris@87
|
77 assert_equal(test, asbytes_nested(['1', '3 4', '5 6']))
|
Chris@87
|
78
|
Chris@87
|
79 #-------------------------------------------------------------------------------
|
Chris@87
|
80
|
Chris@87
|
81
|
Chris@87
|
82 class TestNameValidator(TestCase):
|
Chris@87
|
83
|
Chris@87
|
84 def test_case_sensitivity(self):
|
Chris@87
|
85 "Test case sensitivity"
|
Chris@87
|
86 names = ['A', 'a', 'b', 'c']
|
Chris@87
|
87 test = NameValidator().validate(names)
|
Chris@87
|
88 assert_equal(test, ['A', 'a', 'b', 'c'])
|
Chris@87
|
89 test = NameValidator(case_sensitive=False).validate(names)
|
Chris@87
|
90 assert_equal(test, ['A', 'A_1', 'B', 'C'])
|
Chris@87
|
91 test = NameValidator(case_sensitive='upper').validate(names)
|
Chris@87
|
92 assert_equal(test, ['A', 'A_1', 'B', 'C'])
|
Chris@87
|
93 test = NameValidator(case_sensitive='lower').validate(names)
|
Chris@87
|
94 assert_equal(test, ['a', 'a_1', 'b', 'c'])
|
Chris@87
|
95
|
Chris@87
|
96 def test_excludelist(self):
|
Chris@87
|
97 "Test excludelist"
|
Chris@87
|
98 names = ['dates', 'data', 'Other Data', 'mask']
|
Chris@87
|
99 validator = NameValidator(excludelist=['dates', 'data', 'mask'])
|
Chris@87
|
100 test = validator.validate(names)
|
Chris@87
|
101 assert_equal(test, ['dates_', 'data_', 'Other_Data', 'mask_'])
|
Chris@87
|
102
|
Chris@87
|
103 def test_missing_names(self):
|
Chris@87
|
104 "Test validate missing names"
|
Chris@87
|
105 namelist = ('a', 'b', 'c')
|
Chris@87
|
106 validator = NameValidator()
|
Chris@87
|
107 assert_equal(validator(namelist), ['a', 'b', 'c'])
|
Chris@87
|
108 namelist = ('', 'b', 'c')
|
Chris@87
|
109 assert_equal(validator(namelist), ['f0', 'b', 'c'])
|
Chris@87
|
110 namelist = ('a', 'b', '')
|
Chris@87
|
111 assert_equal(validator(namelist), ['a', 'b', 'f0'])
|
Chris@87
|
112 namelist = ('', 'f0', '')
|
Chris@87
|
113 assert_equal(validator(namelist), ['f1', 'f0', 'f2'])
|
Chris@87
|
114
|
Chris@87
|
115 def test_validate_nb_names(self):
|
Chris@87
|
116 "Test validate nb names"
|
Chris@87
|
117 namelist = ('a', 'b', 'c')
|
Chris@87
|
118 validator = NameValidator()
|
Chris@87
|
119 assert_equal(validator(namelist, nbfields=1), ('a',))
|
Chris@87
|
120 assert_equal(validator(namelist, nbfields=5, defaultfmt="g%i"),
|
Chris@87
|
121 ['a', 'b', 'c', 'g0', 'g1'])
|
Chris@87
|
122
|
Chris@87
|
123 def test_validate_wo_names(self):
|
Chris@87
|
124 "Test validate no names"
|
Chris@87
|
125 namelist = None
|
Chris@87
|
126 validator = NameValidator()
|
Chris@87
|
127 assert_(validator(namelist) is None)
|
Chris@87
|
128 assert_equal(validator(namelist, nbfields=3), ['f0', 'f1', 'f2'])
|
Chris@87
|
129
|
Chris@87
|
130 #-------------------------------------------------------------------------------
|
Chris@87
|
131
|
Chris@87
|
132
|
Chris@87
|
133 def _bytes_to_date(s):
|
Chris@87
|
134 if sys.version_info[0] >= 3:
|
Chris@87
|
135 return date(*time.strptime(s.decode('latin1'), "%Y-%m-%d")[:3])
|
Chris@87
|
136 else:
|
Chris@87
|
137 return date(*time.strptime(s, "%Y-%m-%d")[:3])
|
Chris@87
|
138
|
Chris@87
|
139
|
Chris@87
|
140 class TestStringConverter(TestCase):
|
Chris@87
|
141 "Test StringConverter"
|
Chris@87
|
142
|
Chris@87
|
143 def test_creation(self):
|
Chris@87
|
144 "Test creation of a StringConverter"
|
Chris@87
|
145 converter = StringConverter(int, -99999)
|
Chris@87
|
146 assert_equal(converter._status, 1)
|
Chris@87
|
147 assert_equal(converter.default, -99999)
|
Chris@87
|
148
|
Chris@87
|
149 def test_upgrade(self):
|
Chris@87
|
150 "Tests the upgrade method."
|
Chris@87
|
151 converter = StringConverter()
|
Chris@87
|
152 assert_equal(converter._status, 0)
|
Chris@87
|
153 converter.upgrade(asbytes('0'))
|
Chris@87
|
154 assert_equal(converter._status, 1)
|
Chris@87
|
155 converter.upgrade(asbytes('0.'))
|
Chris@87
|
156 assert_equal(converter._status, 2)
|
Chris@87
|
157 converter.upgrade(asbytes('0j'))
|
Chris@87
|
158 assert_equal(converter._status, 3)
|
Chris@87
|
159 converter.upgrade(asbytes('a'))
|
Chris@87
|
160 assert_equal(converter._status, len(converter._mapper) - 1)
|
Chris@87
|
161
|
Chris@87
|
162 def test_missing(self):
|
Chris@87
|
163 "Tests the use of missing values."
|
Chris@87
|
164 converter = StringConverter(missing_values=(asbytes('missing'),
|
Chris@87
|
165 asbytes('missed')))
|
Chris@87
|
166 converter.upgrade(asbytes('0'))
|
Chris@87
|
167 assert_equal(converter(asbytes('0')), 0)
|
Chris@87
|
168 assert_equal(converter(asbytes('')), converter.default)
|
Chris@87
|
169 assert_equal(converter(asbytes('missing')), converter.default)
|
Chris@87
|
170 assert_equal(converter(asbytes('missed')), converter.default)
|
Chris@87
|
171 try:
|
Chris@87
|
172 converter('miss')
|
Chris@87
|
173 except ValueError:
|
Chris@87
|
174 pass
|
Chris@87
|
175
|
Chris@87
|
176 def test_upgrademapper(self):
|
Chris@87
|
177 "Tests updatemapper"
|
Chris@87
|
178 dateparser = _bytes_to_date
|
Chris@87
|
179 StringConverter.upgrade_mapper(dateparser, date(2000, 1, 1))
|
Chris@87
|
180 convert = StringConverter(dateparser, date(2000, 1, 1))
|
Chris@87
|
181 test = convert(asbytes('2001-01-01'))
|
Chris@87
|
182 assert_equal(test, date(2001, 1, 1))
|
Chris@87
|
183 test = convert(asbytes('2009-01-01'))
|
Chris@87
|
184 assert_equal(test, date(2009, 1, 1))
|
Chris@87
|
185 test = convert(asbytes(''))
|
Chris@87
|
186 assert_equal(test, date(2000, 1, 1))
|
Chris@87
|
187
|
Chris@87
|
188 def test_string_to_object(self):
|
Chris@87
|
189 "Make sure that string-to-object functions are properly recognized"
|
Chris@87
|
190 conv = StringConverter(_bytes_to_date)
|
Chris@87
|
191 assert_equal(conv._mapper[-2][0](0), 0j)
|
Chris@87
|
192 assert_(hasattr(conv, 'default'))
|
Chris@87
|
193
|
Chris@87
|
194 def test_keep_default(self):
|
Chris@87
|
195 "Make sure we don't lose an explicit default"
|
Chris@87
|
196 converter = StringConverter(None, missing_values=asbytes(''),
|
Chris@87
|
197 default=-999)
|
Chris@87
|
198 converter.upgrade(asbytes('3.14159265'))
|
Chris@87
|
199 assert_equal(converter.default, -999)
|
Chris@87
|
200 assert_equal(converter.type, np.dtype(float))
|
Chris@87
|
201 #
|
Chris@87
|
202 converter = StringConverter(
|
Chris@87
|
203 None, missing_values=asbytes(''), default=0)
|
Chris@87
|
204 converter.upgrade(asbytes('3.14159265'))
|
Chris@87
|
205 assert_equal(converter.default, 0)
|
Chris@87
|
206 assert_equal(converter.type, np.dtype(float))
|
Chris@87
|
207
|
Chris@87
|
208 def test_keep_default_zero(self):
|
Chris@87
|
209 "Check that we don't lose a default of 0"
|
Chris@87
|
210 converter = StringConverter(int, default=0,
|
Chris@87
|
211 missing_values=asbytes("N/A"))
|
Chris@87
|
212 assert_equal(converter.default, 0)
|
Chris@87
|
213
|
Chris@87
|
214 def test_keep_missing_values(self):
|
Chris@87
|
215 "Check that we're not losing missing values"
|
Chris@87
|
216 converter = StringConverter(int, default=0,
|
Chris@87
|
217 missing_values=asbytes("N/A"))
|
Chris@87
|
218 assert_equal(
|
Chris@87
|
219 converter.missing_values, set(asbytes_nested(['', 'N/A'])))
|
Chris@87
|
220
|
Chris@87
|
221 def test_int64_dtype(self):
|
Chris@87
|
222 "Check that int64 integer types can be specified"
|
Chris@87
|
223 converter = StringConverter(np.int64, default=0)
|
Chris@87
|
224 val = asbytes("-9223372036854775807")
|
Chris@87
|
225 assert_(converter(val) == -9223372036854775807)
|
Chris@87
|
226 val = asbytes("9223372036854775807")
|
Chris@87
|
227 assert_(converter(val) == 9223372036854775807)
|
Chris@87
|
228
|
Chris@87
|
229 def test_uint64_dtype(self):
|
Chris@87
|
230 "Check that uint64 integer types can be specified"
|
Chris@87
|
231 converter = StringConverter(np.uint64, default=0)
|
Chris@87
|
232 val = asbytes("9223372043271415339")
|
Chris@87
|
233 assert_(converter(val) == 9223372043271415339)
|
Chris@87
|
234
|
Chris@87
|
235
|
Chris@87
|
236 class TestMiscFunctions(TestCase):
|
Chris@87
|
237
|
Chris@87
|
238 def test_has_nested_dtype(self):
|
Chris@87
|
239 "Test has_nested_dtype"
|
Chris@87
|
240 ndtype = np.dtype(np.float)
|
Chris@87
|
241 assert_equal(has_nested_fields(ndtype), False)
|
Chris@87
|
242 ndtype = np.dtype([('A', '|S3'), ('B', float)])
|
Chris@87
|
243 assert_equal(has_nested_fields(ndtype), False)
|
Chris@87
|
244 ndtype = np.dtype([('A', int), ('B', [('BA', float), ('BB', '|S1')])])
|
Chris@87
|
245 assert_equal(has_nested_fields(ndtype), True)
|
Chris@87
|
246
|
Chris@87
|
247 def test_easy_dtype(self):
|
Chris@87
|
248 "Test ndtype on dtypes"
|
Chris@87
|
249 # Simple case
|
Chris@87
|
250 ndtype = float
|
Chris@87
|
251 assert_equal(easy_dtype(ndtype), np.dtype(float))
|
Chris@87
|
252 # As string w/o names
|
Chris@87
|
253 ndtype = "i4, f8"
|
Chris@87
|
254 assert_equal(easy_dtype(ndtype),
|
Chris@87
|
255 np.dtype([('f0', "i4"), ('f1', "f8")]))
|
Chris@87
|
256 # As string w/o names but different default format
|
Chris@87
|
257 assert_equal(easy_dtype(ndtype, defaultfmt="field_%03i"),
|
Chris@87
|
258 np.dtype([('field_000', "i4"), ('field_001', "f8")]))
|
Chris@87
|
259 # As string w/ names
|
Chris@87
|
260 ndtype = "i4, f8"
|
Chris@87
|
261 assert_equal(easy_dtype(ndtype, names="a, b"),
|
Chris@87
|
262 np.dtype([('a', "i4"), ('b', "f8")]))
|
Chris@87
|
263 # As string w/ names (too many)
|
Chris@87
|
264 ndtype = "i4, f8"
|
Chris@87
|
265 assert_equal(easy_dtype(ndtype, names="a, b, c"),
|
Chris@87
|
266 np.dtype([('a', "i4"), ('b', "f8")]))
|
Chris@87
|
267 # As string w/ names (not enough)
|
Chris@87
|
268 ndtype = "i4, f8"
|
Chris@87
|
269 assert_equal(easy_dtype(ndtype, names=", b"),
|
Chris@87
|
270 np.dtype([('f0', "i4"), ('b', "f8")]))
|
Chris@87
|
271 # ... (with different default format)
|
Chris@87
|
272 assert_equal(easy_dtype(ndtype, names="a", defaultfmt="f%02i"),
|
Chris@87
|
273 np.dtype([('a', "i4"), ('f00', "f8")]))
|
Chris@87
|
274 # As list of tuples w/o names
|
Chris@87
|
275 ndtype = [('A', int), ('B', float)]
|
Chris@87
|
276 assert_equal(easy_dtype(ndtype), np.dtype([('A', int), ('B', float)]))
|
Chris@87
|
277 # As list of tuples w/ names
|
Chris@87
|
278 assert_equal(easy_dtype(ndtype, names="a,b"),
|
Chris@87
|
279 np.dtype([('a', int), ('b', float)]))
|
Chris@87
|
280 # As list of tuples w/ not enough names
|
Chris@87
|
281 assert_equal(easy_dtype(ndtype, names="a"),
|
Chris@87
|
282 np.dtype([('a', int), ('f0', float)]))
|
Chris@87
|
283 # As list of tuples w/ too many names
|
Chris@87
|
284 assert_equal(easy_dtype(ndtype, names="a,b,c"),
|
Chris@87
|
285 np.dtype([('a', int), ('b', float)]))
|
Chris@87
|
286 # As list of types w/o names
|
Chris@87
|
287 ndtype = (int, float, float)
|
Chris@87
|
288 assert_equal(easy_dtype(ndtype),
|
Chris@87
|
289 np.dtype([('f0', int), ('f1', float), ('f2', float)]))
|
Chris@87
|
290 # As list of types w names
|
Chris@87
|
291 ndtype = (int, float, float)
|
Chris@87
|
292 assert_equal(easy_dtype(ndtype, names="a, b, c"),
|
Chris@87
|
293 np.dtype([('a', int), ('b', float), ('c', float)]))
|
Chris@87
|
294 # As simple dtype w/ names
|
Chris@87
|
295 ndtype = np.dtype(float)
|
Chris@87
|
296 assert_equal(easy_dtype(ndtype, names="a, b, c"),
|
Chris@87
|
297 np.dtype([(_, float) for _ in ('a', 'b', 'c')]))
|
Chris@87
|
298 # As simple dtype w/o names (but multiple fields)
|
Chris@87
|
299 ndtype = np.dtype(float)
|
Chris@87
|
300 assert_equal(
|
Chris@87
|
301 easy_dtype(ndtype, names=['', '', ''], defaultfmt="f%02i"),
|
Chris@87
|
302 np.dtype([(_, float) for _ in ('f00', 'f01', 'f02')]))
|
Chris@87
|
303
|
Chris@87
|
304 def test_flatten_dtype(self):
|
Chris@87
|
305 "Testing flatten_dtype"
|
Chris@87
|
306 # Standard dtype
|
Chris@87
|
307 dt = np.dtype([("a", "f8"), ("b", "f8")])
|
Chris@87
|
308 dt_flat = flatten_dtype(dt)
|
Chris@87
|
309 assert_equal(dt_flat, [float, float])
|
Chris@87
|
310 # Recursive dtype
|
Chris@87
|
311 dt = np.dtype([("a", [("aa", '|S1'), ("ab", '|S2')]), ("b", int)])
|
Chris@87
|
312 dt_flat = flatten_dtype(dt)
|
Chris@87
|
313 assert_equal(dt_flat, [np.dtype('|S1'), np.dtype('|S2'), int])
|
Chris@87
|
314 # dtype with shaped fields
|
Chris@87
|
315 dt = np.dtype([("a", (float, 2)), ("b", (int, 3))])
|
Chris@87
|
316 dt_flat = flatten_dtype(dt)
|
Chris@87
|
317 assert_equal(dt_flat, [float, int])
|
Chris@87
|
318 dt_flat = flatten_dtype(dt, True)
|
Chris@87
|
319 assert_equal(dt_flat, [float] * 2 + [int] * 3)
|
Chris@87
|
320 # dtype w/ titles
|
Chris@87
|
321 dt = np.dtype([(("a", "A"), "f8"), (("b", "B"), "f8")])
|
Chris@87
|
322 dt_flat = flatten_dtype(dt)
|
Chris@87
|
323 assert_equal(dt_flat, [float, float])
|
Chris@87
|
324
|
Chris@87
|
325 if __name__ == "__main__":
|
Chris@87
|
326 run_module_suite()
|