Chris@87: from __future__ import division, absolute_import, print_function Chris@87: Chris@87: import sys Chris@87: import time Chris@87: from datetime import date Chris@87: Chris@87: import numpy as np Chris@87: from numpy.compat import asbytes, asbytes_nested Chris@87: from numpy.testing import ( Chris@87: run_module_suite, TestCase, assert_, assert_equal Chris@87: ) Chris@87: from numpy.lib._iotools import ( Chris@87: LineSplitter, NameValidator, StringConverter, Chris@87: has_nested_fields, easy_dtype, flatten_dtype Chris@87: ) Chris@87: Chris@87: Chris@87: class TestLineSplitter(TestCase): Chris@87: "Tests the LineSplitter class." Chris@87: Chris@87: def test_no_delimiter(self): Chris@87: "Test LineSplitter w/o delimiter" Chris@87: strg = asbytes(" 1 2 3 4 5 # test") Chris@87: test = LineSplitter()(strg) Chris@87: assert_equal(test, asbytes_nested(['1', '2', '3', '4', '5'])) Chris@87: test = LineSplitter('')(strg) Chris@87: assert_equal(test, asbytes_nested(['1', '2', '3', '4', '5'])) Chris@87: Chris@87: def test_space_delimiter(self): Chris@87: "Test space delimiter" Chris@87: strg = asbytes(" 1 2 3 4 5 # test") Chris@87: test = LineSplitter(asbytes(' '))(strg) Chris@87: assert_equal(test, asbytes_nested(['1', '2', '3', '4', '', '5'])) Chris@87: test = LineSplitter(asbytes(' '))(strg) Chris@87: assert_equal(test, asbytes_nested(['1 2 3 4', '5'])) Chris@87: Chris@87: def test_tab_delimiter(self): Chris@87: "Test tab delimiter" Chris@87: strg = asbytes(" 1\t 2\t 3\t 4\t 5 6") Chris@87: test = LineSplitter(asbytes('\t'))(strg) Chris@87: assert_equal(test, asbytes_nested(['1', '2', '3', '4', '5 6'])) Chris@87: strg = asbytes(" 1 2\t 3 4\t 5 6") Chris@87: test = LineSplitter(asbytes('\t'))(strg) Chris@87: assert_equal(test, asbytes_nested(['1 2', '3 4', '5 6'])) Chris@87: Chris@87: def test_other_delimiter(self): Chris@87: "Test LineSplitter on delimiter" Chris@87: strg = asbytes("1,2,3,4,,5") Chris@87: test = LineSplitter(asbytes(','))(strg) Chris@87: assert_equal(test, asbytes_nested(['1', '2', '3', '4', '', '5'])) Chris@87: # Chris@87: strg = asbytes(" 1,2,3,4,,5 # test") Chris@87: test = LineSplitter(asbytes(','))(strg) Chris@87: assert_equal(test, asbytes_nested(['1', '2', '3', '4', '', '5'])) Chris@87: Chris@87: def test_constant_fixed_width(self): Chris@87: "Test LineSplitter w/ fixed-width fields" Chris@87: strg = asbytes(" 1 2 3 4 5 # test") Chris@87: test = LineSplitter(3)(strg) Chris@87: assert_equal(test, asbytes_nested(['1', '2', '3', '4', '', '5', ''])) Chris@87: # Chris@87: strg = asbytes(" 1 3 4 5 6# test") Chris@87: test = LineSplitter(20)(strg) Chris@87: assert_equal(test, asbytes_nested(['1 3 4 5 6'])) Chris@87: # Chris@87: strg = asbytes(" 1 3 4 5 6# test") Chris@87: test = LineSplitter(30)(strg) Chris@87: assert_equal(test, asbytes_nested(['1 3 4 5 6'])) Chris@87: Chris@87: def test_variable_fixed_width(self): Chris@87: strg = asbytes(" 1 3 4 5 6# test") Chris@87: test = LineSplitter((3, 6, 6, 3))(strg) Chris@87: assert_equal(test, asbytes_nested(['1', '3', '4 5', '6'])) Chris@87: # Chris@87: strg = asbytes(" 1 3 4 5 6# test") Chris@87: test = LineSplitter((6, 6, 9))(strg) Chris@87: assert_equal(test, asbytes_nested(['1', '3 4', '5 6'])) Chris@87: Chris@87: #------------------------------------------------------------------------------- Chris@87: Chris@87: Chris@87: class TestNameValidator(TestCase): Chris@87: Chris@87: def test_case_sensitivity(self): Chris@87: "Test case sensitivity" Chris@87: names = ['A', 'a', 'b', 'c'] Chris@87: test = NameValidator().validate(names) Chris@87: assert_equal(test, ['A', 'a', 'b', 'c']) Chris@87: test = NameValidator(case_sensitive=False).validate(names) Chris@87: assert_equal(test, ['A', 'A_1', 'B', 'C']) Chris@87: test = NameValidator(case_sensitive='upper').validate(names) Chris@87: assert_equal(test, ['A', 'A_1', 'B', 'C']) Chris@87: test = NameValidator(case_sensitive='lower').validate(names) Chris@87: assert_equal(test, ['a', 'a_1', 'b', 'c']) Chris@87: Chris@87: def test_excludelist(self): Chris@87: "Test excludelist" Chris@87: names = ['dates', 'data', 'Other Data', 'mask'] Chris@87: validator = NameValidator(excludelist=['dates', 'data', 'mask']) Chris@87: test = validator.validate(names) Chris@87: assert_equal(test, ['dates_', 'data_', 'Other_Data', 'mask_']) Chris@87: Chris@87: def test_missing_names(self): Chris@87: "Test validate missing names" Chris@87: namelist = ('a', 'b', 'c') Chris@87: validator = NameValidator() Chris@87: assert_equal(validator(namelist), ['a', 'b', 'c']) Chris@87: namelist = ('', 'b', 'c') Chris@87: assert_equal(validator(namelist), ['f0', 'b', 'c']) Chris@87: namelist = ('a', 'b', '') Chris@87: assert_equal(validator(namelist), ['a', 'b', 'f0']) Chris@87: namelist = ('', 'f0', '') Chris@87: assert_equal(validator(namelist), ['f1', 'f0', 'f2']) Chris@87: Chris@87: def test_validate_nb_names(self): Chris@87: "Test validate nb names" Chris@87: namelist = ('a', 'b', 'c') Chris@87: validator = NameValidator() Chris@87: assert_equal(validator(namelist, nbfields=1), ('a',)) Chris@87: assert_equal(validator(namelist, nbfields=5, defaultfmt="g%i"), Chris@87: ['a', 'b', 'c', 'g0', 'g1']) Chris@87: Chris@87: def test_validate_wo_names(self): Chris@87: "Test validate no names" Chris@87: namelist = None Chris@87: validator = NameValidator() Chris@87: assert_(validator(namelist) is None) Chris@87: assert_equal(validator(namelist, nbfields=3), ['f0', 'f1', 'f2']) Chris@87: Chris@87: #------------------------------------------------------------------------------- Chris@87: Chris@87: Chris@87: def _bytes_to_date(s): Chris@87: if sys.version_info[0] >= 3: Chris@87: return date(*time.strptime(s.decode('latin1'), "%Y-%m-%d")[:3]) Chris@87: else: Chris@87: return date(*time.strptime(s, "%Y-%m-%d")[:3]) Chris@87: Chris@87: Chris@87: class TestStringConverter(TestCase): Chris@87: "Test StringConverter" Chris@87: Chris@87: def test_creation(self): Chris@87: "Test creation of a StringConverter" Chris@87: converter = StringConverter(int, -99999) Chris@87: assert_equal(converter._status, 1) Chris@87: assert_equal(converter.default, -99999) Chris@87: Chris@87: def test_upgrade(self): Chris@87: "Tests the upgrade method." Chris@87: converter = StringConverter() Chris@87: assert_equal(converter._status, 0) Chris@87: converter.upgrade(asbytes('0')) Chris@87: assert_equal(converter._status, 1) Chris@87: converter.upgrade(asbytes('0.')) Chris@87: assert_equal(converter._status, 2) Chris@87: converter.upgrade(asbytes('0j')) Chris@87: assert_equal(converter._status, 3) Chris@87: converter.upgrade(asbytes('a')) Chris@87: assert_equal(converter._status, len(converter._mapper) - 1) Chris@87: Chris@87: def test_missing(self): Chris@87: "Tests the use of missing values." Chris@87: converter = StringConverter(missing_values=(asbytes('missing'), Chris@87: asbytes('missed'))) Chris@87: converter.upgrade(asbytes('0')) Chris@87: assert_equal(converter(asbytes('0')), 0) Chris@87: assert_equal(converter(asbytes('')), converter.default) Chris@87: assert_equal(converter(asbytes('missing')), converter.default) Chris@87: assert_equal(converter(asbytes('missed')), converter.default) Chris@87: try: Chris@87: converter('miss') Chris@87: except ValueError: Chris@87: pass Chris@87: Chris@87: def test_upgrademapper(self): Chris@87: "Tests updatemapper" Chris@87: dateparser = _bytes_to_date Chris@87: StringConverter.upgrade_mapper(dateparser, date(2000, 1, 1)) Chris@87: convert = StringConverter(dateparser, date(2000, 1, 1)) Chris@87: test = convert(asbytes('2001-01-01')) Chris@87: assert_equal(test, date(2001, 1, 1)) Chris@87: test = convert(asbytes('2009-01-01')) Chris@87: assert_equal(test, date(2009, 1, 1)) Chris@87: test = convert(asbytes('')) Chris@87: assert_equal(test, date(2000, 1, 1)) Chris@87: Chris@87: def test_string_to_object(self): Chris@87: "Make sure that string-to-object functions are properly recognized" Chris@87: conv = StringConverter(_bytes_to_date) Chris@87: assert_equal(conv._mapper[-2][0](0), 0j) Chris@87: assert_(hasattr(conv, 'default')) Chris@87: Chris@87: def test_keep_default(self): Chris@87: "Make sure we don't lose an explicit default" Chris@87: converter = StringConverter(None, missing_values=asbytes(''), Chris@87: default=-999) Chris@87: converter.upgrade(asbytes('3.14159265')) Chris@87: assert_equal(converter.default, -999) Chris@87: assert_equal(converter.type, np.dtype(float)) Chris@87: # Chris@87: converter = StringConverter( Chris@87: None, missing_values=asbytes(''), default=0) Chris@87: converter.upgrade(asbytes('3.14159265')) Chris@87: assert_equal(converter.default, 0) Chris@87: assert_equal(converter.type, np.dtype(float)) Chris@87: Chris@87: def test_keep_default_zero(self): Chris@87: "Check that we don't lose a default of 0" Chris@87: converter = StringConverter(int, default=0, Chris@87: missing_values=asbytes("N/A")) Chris@87: assert_equal(converter.default, 0) Chris@87: Chris@87: def test_keep_missing_values(self): Chris@87: "Check that we're not losing missing values" Chris@87: converter = StringConverter(int, default=0, Chris@87: missing_values=asbytes("N/A")) Chris@87: assert_equal( Chris@87: converter.missing_values, set(asbytes_nested(['', 'N/A']))) Chris@87: Chris@87: def test_int64_dtype(self): Chris@87: "Check that int64 integer types can be specified" Chris@87: converter = StringConverter(np.int64, default=0) Chris@87: val = asbytes("-9223372036854775807") Chris@87: assert_(converter(val) == -9223372036854775807) Chris@87: val = asbytes("9223372036854775807") Chris@87: assert_(converter(val) == 9223372036854775807) Chris@87: Chris@87: def test_uint64_dtype(self): Chris@87: "Check that uint64 integer types can be specified" Chris@87: converter = StringConverter(np.uint64, default=0) Chris@87: val = asbytes("9223372043271415339") Chris@87: assert_(converter(val) == 9223372043271415339) Chris@87: Chris@87: Chris@87: class TestMiscFunctions(TestCase): Chris@87: Chris@87: def test_has_nested_dtype(self): Chris@87: "Test has_nested_dtype" Chris@87: ndtype = np.dtype(np.float) Chris@87: assert_equal(has_nested_fields(ndtype), False) Chris@87: ndtype = np.dtype([('A', '|S3'), ('B', float)]) Chris@87: assert_equal(has_nested_fields(ndtype), False) Chris@87: ndtype = np.dtype([('A', int), ('B', [('BA', float), ('BB', '|S1')])]) Chris@87: assert_equal(has_nested_fields(ndtype), True) Chris@87: Chris@87: def test_easy_dtype(self): Chris@87: "Test ndtype on dtypes" Chris@87: # Simple case Chris@87: ndtype = float Chris@87: assert_equal(easy_dtype(ndtype), np.dtype(float)) Chris@87: # As string w/o names Chris@87: ndtype = "i4, f8" Chris@87: assert_equal(easy_dtype(ndtype), Chris@87: np.dtype([('f0', "i4"), ('f1', "f8")])) Chris@87: # As string w/o names but different default format Chris@87: assert_equal(easy_dtype(ndtype, defaultfmt="field_%03i"), Chris@87: np.dtype([('field_000', "i4"), ('field_001', "f8")])) Chris@87: # As string w/ names Chris@87: ndtype = "i4, f8" Chris@87: assert_equal(easy_dtype(ndtype, names="a, b"), Chris@87: np.dtype([('a', "i4"), ('b', "f8")])) Chris@87: # As string w/ names (too many) Chris@87: ndtype = "i4, f8" Chris@87: assert_equal(easy_dtype(ndtype, names="a, b, c"), Chris@87: np.dtype([('a', "i4"), ('b', "f8")])) Chris@87: # As string w/ names (not enough) Chris@87: ndtype = "i4, f8" Chris@87: assert_equal(easy_dtype(ndtype, names=", b"), Chris@87: np.dtype([('f0', "i4"), ('b', "f8")])) Chris@87: # ... (with different default format) Chris@87: assert_equal(easy_dtype(ndtype, names="a", defaultfmt="f%02i"), Chris@87: np.dtype([('a', "i4"), ('f00', "f8")])) Chris@87: # As list of tuples w/o names Chris@87: ndtype = [('A', int), ('B', float)] Chris@87: assert_equal(easy_dtype(ndtype), np.dtype([('A', int), ('B', float)])) Chris@87: # As list of tuples w/ names Chris@87: assert_equal(easy_dtype(ndtype, names="a,b"), Chris@87: np.dtype([('a', int), ('b', float)])) Chris@87: # As list of tuples w/ not enough names Chris@87: assert_equal(easy_dtype(ndtype, names="a"), Chris@87: np.dtype([('a', int), ('f0', float)])) Chris@87: # As list of tuples w/ too many names Chris@87: assert_equal(easy_dtype(ndtype, names="a,b,c"), Chris@87: np.dtype([('a', int), ('b', float)])) Chris@87: # As list of types w/o names Chris@87: ndtype = (int, float, float) Chris@87: assert_equal(easy_dtype(ndtype), Chris@87: np.dtype([('f0', int), ('f1', float), ('f2', float)])) Chris@87: # As list of types w names Chris@87: ndtype = (int, float, float) Chris@87: assert_equal(easy_dtype(ndtype, names="a, b, c"), Chris@87: np.dtype([('a', int), ('b', float), ('c', float)])) Chris@87: # As simple dtype w/ names Chris@87: ndtype = np.dtype(float) Chris@87: assert_equal(easy_dtype(ndtype, names="a, b, c"), Chris@87: np.dtype([(_, float) for _ in ('a', 'b', 'c')])) Chris@87: # As simple dtype w/o names (but multiple fields) Chris@87: ndtype = np.dtype(float) Chris@87: assert_equal( Chris@87: easy_dtype(ndtype, names=['', '', ''], defaultfmt="f%02i"), Chris@87: np.dtype([(_, float) for _ in ('f00', 'f01', 'f02')])) Chris@87: Chris@87: def test_flatten_dtype(self): Chris@87: "Testing flatten_dtype" Chris@87: # Standard dtype Chris@87: dt = np.dtype([("a", "f8"), ("b", "f8")]) Chris@87: dt_flat = flatten_dtype(dt) Chris@87: assert_equal(dt_flat, [float, float]) Chris@87: # Recursive dtype Chris@87: dt = np.dtype([("a", [("aa", '|S1'), ("ab", '|S2')]), ("b", int)]) Chris@87: dt_flat = flatten_dtype(dt) Chris@87: assert_equal(dt_flat, [np.dtype('|S1'), np.dtype('|S2'), int]) Chris@87: # dtype with shaped fields Chris@87: dt = np.dtype([("a", (float, 2)), ("b", (int, 3))]) Chris@87: dt_flat = flatten_dtype(dt) Chris@87: assert_equal(dt_flat, [float, int]) Chris@87: dt_flat = flatten_dtype(dt, True) Chris@87: assert_equal(dt_flat, [float] * 2 + [int] * 3) Chris@87: # dtype w/ titles Chris@87: dt = np.dtype([(("a", "A"), "f8"), (("b", "B"), "f8")]) Chris@87: dt_flat = flatten_dtype(dt) Chris@87: assert_equal(dt_flat, [float, float]) Chris@87: Chris@87: if __name__ == "__main__": Chris@87: run_module_suite()