e@1: // Copyright: (2012-2015) Ben Strasser e@1: // License: BSD-3 e@1: // e@1: // All rights reserved. e@1: // e@1: // Redistribution and use in source and binary forms, with or without e@1: // modification, are permitted provided that the following conditions are met: e@1: // e@1: // 1. Redistributions of source code must retain the above copyright notice, e@1: // this list of conditions and the following disclaimer. e@1: // e@1: //2. Redistributions in binary form must reproduce the above copyright notice, e@1: // this list of conditions and the following disclaimer in the documentation e@1: // and/or other materials provided with the distribution. e@1: // e@1: //3. Neither the name of the copyright holder nor the names of its contributors e@1: // may be used to endorse or promote products derived from this software e@1: // without specific prior written permission. e@1: // e@1: // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" e@1: // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE e@1: // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE e@1: // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE e@1: // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR e@1: // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF e@1: // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS e@1: // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN e@1: // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) e@1: // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE e@1: // POSSIBILITY OF SUCH DAMAGE. e@1: e@1: #ifndef CSV_H e@1: #define CSV_H e@1: e@1: #include e@1: #include e@1: #include e@1: #include e@1: #include e@1: #include e@1: #include e@1: #ifndef CSV_IO_NO_THREAD e@1: #include e@1: #include e@1: #include e@1: #endif e@1: #include e@1: #include e@1: #include e@1: #include e@1: e@1: namespace io{ e@1: //////////////////////////////////////////////////////////////////////////// e@1: // LineReader // e@1: //////////////////////////////////////////////////////////////////////////// e@1: e@1: namespace error{ e@1: struct base : std::exception{ e@1: virtual void format_error_message()const = 0; e@1: e@1: const char*what()const throw(){ e@1: format_error_message(); e@1: return error_message_buffer; e@1: } e@1: e@1: mutable char error_message_buffer[256]; e@1: }; e@1: e@1: const int max_file_name_length = 255; e@1: e@1: struct with_file_name{ e@1: with_file_name(){ e@1: std::memset(file_name, 0, max_file_name_length+1); e@1: } e@1: e@1: void set_file_name(const char*file_name){ e@1: std::strncpy(this->file_name, file_name, max_file_name_length); e@1: this->file_name[max_file_name_length] = '\0'; e@1: } e@1: e@1: char file_name[max_file_name_length+1]; e@1: }; e@1: e@1: struct with_file_line{ e@1: with_file_line(){ e@1: file_line = -1; e@1: } e@1: e@1: void set_file_line(int file_line){ e@1: this->file_line = file_line; e@1: } e@1: e@1: int file_line; e@1: }; e@1: e@1: struct with_errno{ e@1: with_errno(){ e@1: errno_value = 0; e@1: } e@1: e@1: void set_errno(int errno_value){ e@1: this->errno_value = errno_value; e@1: } e@1: e@1: int errno_value; e@1: }; e@1: e@1: struct can_not_open_file : e@1: base, e@1: with_file_name, e@1: with_errno{ e@1: void format_error_message()const{ e@1: if(errno_value != 0) e@1: std::snprintf(error_message_buffer, sizeof(error_message_buffer), e@1: "Can not open file \"%s\" because \"%s\"." e@1: , file_name, std::strerror(errno_value)); e@1: else e@1: std::snprintf(error_message_buffer, sizeof(error_message_buffer), e@1: "Can not open file \"%s\"." e@1: , file_name); e@1: } e@1: }; e@1: e@1: struct line_length_limit_exceeded : e@1: base, e@1: with_file_name, e@1: with_file_line{ e@1: void format_error_message()const{ e@1: std::snprintf(error_message_buffer, sizeof(error_message_buffer), e@1: "Line number %d in file \"%s\" exceeds the maximum length of 2^24-1." e@1: , file_line, file_name); e@1: } e@1: }; e@1: } e@1: e@1: class ByteSourceBase{ e@1: public: e@1: virtual int read(char*buffer, int size)=0; e@1: virtual ~ByteSourceBase(){} e@1: }; e@1: e@1: namespace detail{ e@1: e@1: class OwningStdIOByteSourceBase : public ByteSourceBase{ e@1: public: e@1: explicit OwningStdIOByteSourceBase(FILE*file):file(file){ e@1: // Tell the std library that we want to do the buffering ourself. e@1: std::setvbuf(file, 0, _IONBF, 0); e@1: } e@1: e@1: int read(char*buffer, int size){ e@1: return std::fread(buffer, 1, size, file); e@1: } e@1: e@1: ~OwningStdIOByteSourceBase(){ e@1: std::fclose(file); e@1: } e@1: e@1: private: e@1: FILE*file; e@1: }; e@1: e@1: class NonOwningIStreamByteSource : public ByteSourceBase{ e@1: public: e@1: explicit NonOwningIStreamByteSource(std::istream&in):in(in){} e@1: e@1: int read(char*buffer, int size){ e@1: in.read(buffer, size); e@1: return in.gcount(); e@1: } e@1: e@1: ~NonOwningIStreamByteSource(){} e@1: e@1: private: e@1: std::istream∈ e@1: }; e@1: e@1: class NonOwningStringByteSource : public ByteSourceBase{ e@1: public: e@1: NonOwningStringByteSource(const char*str, long long size):str(str), remaining_byte_count(size){} e@1: e@1: int read(char*buffer, int desired_byte_count){ e@1: int to_copy_byte_count = desired_byte_count; e@1: if(remaining_byte_count < to_copy_byte_count) e@1: to_copy_byte_count = remaining_byte_count; e@1: std::memcpy(buffer, str, to_copy_byte_count); e@1: remaining_byte_count -= to_copy_byte_count; e@1: str += to_copy_byte_count; e@1: return to_copy_byte_count; e@1: } e@1: e@1: ~NonOwningStringByteSource(){} e@1: e@1: private: e@1: const char*str; e@1: long long remaining_byte_count; e@1: }; e@1: e@1: #ifndef CSV_IO_NO_THREAD e@1: class AsynchronousReader{ e@1: public: e@1: void init(std::unique_ptrarg_byte_source){ e@1: std::unique_lockguard(lock); e@1: byte_source = std::move(arg_byte_source); e@1: desired_byte_count = -1; e@1: termination_requested = false; e@1: worker = std::thread( e@1: [&]{ e@1: std::unique_lockguard(lock); e@1: try{ e@1: for(;;){ e@1: read_requested_condition.wait( e@1: guard, e@1: [&]{ e@1: return desired_byte_count != -1 || termination_requested; e@1: } e@1: ); e@1: if(termination_requested) e@1: return; e@1: e@1: read_byte_count = byte_source->read(buffer, desired_byte_count); e@1: desired_byte_count = -1; e@1: if(read_byte_count == 0) e@1: break; e@1: read_finished_condition.notify_one(); e@1: } e@1: }catch(...){ e@1: read_error = std::current_exception(); e@1: } e@1: read_finished_condition.notify_one(); e@1: } e@1: ); e@1: } e@1: e@1: bool is_valid()const{ e@1: return byte_source != 0; e@1: } e@1: e@1: void start_read(char*arg_buffer, int arg_desired_byte_count){ e@1: std::unique_lockguard(lock); e@1: buffer = arg_buffer; e@1: desired_byte_count = arg_desired_byte_count; e@1: read_byte_count = -1; e@1: read_requested_condition.notify_one(); e@1: } e@1: e@1: int finish_read(){ e@1: std::unique_lockguard(lock); e@1: read_finished_condition.wait( e@1: guard, e@1: [&]{ e@1: return read_byte_count != -1 || read_error; e@1: } e@1: ); e@1: if(read_error) e@1: std::rethrow_exception(read_error); e@1: else e@1: return read_byte_count; e@1: } e@1: e@1: ~AsynchronousReader(){ e@1: if(byte_source != 0){ e@1: { e@1: std::unique_lockguard(lock); e@1: termination_requested = true; e@1: } e@1: read_requested_condition.notify_one(); e@1: worker.join(); e@1: } e@1: } e@1: e@1: private: e@1: std::unique_ptrbyte_source; e@1: e@1: std::thread worker; e@1: e@1: bool termination_requested; e@1: std::exception_ptr read_error; e@1: char*buffer; e@1: int desired_byte_count; e@1: int read_byte_count; e@1: e@1: std::mutex lock; e@1: std::condition_variable read_finished_condition; e@1: std::condition_variable read_requested_condition; e@1: }; e@1: #endif e@1: e@1: class SynchronousReader{ e@1: public: e@1: void init(std::unique_ptrarg_byte_source){ e@1: byte_source = std::move(arg_byte_source); e@1: } e@1: e@1: bool is_valid()const{ e@1: return byte_source != 0; e@1: } e@1: e@1: void start_read(char*arg_buffer, int arg_desired_byte_count){ e@1: buffer = arg_buffer; e@1: desired_byte_count = arg_desired_byte_count; e@1: } e@1: e@1: int finish_read(){ e@1: return byte_source->read(buffer, desired_byte_count); e@1: } e@1: private: e@1: std::unique_ptrbyte_source; e@1: char*buffer; e@1: int desired_byte_count; e@1: }; e@1: } e@1: e@1: class LineReader{ e@1: private: e@1: static const int block_len = 1<<24; e@1: #ifdef CSV_IO_NO_THREAD e@1: detail::SynchronousReader reader; e@1: #else e@1: detail::AsynchronousReader reader; e@1: #endif e@1: char*buffer; e@1: int data_begin; e@1: int data_end; e@1: e@1: char file_name[error::max_file_name_length+1]; e@1: unsigned file_line; e@1: e@1: static std::unique_ptr open_file(const char*file_name){ e@1: // We open the file in binary mode as it makes no difference under *nix e@1: // and under Windows we handle \r\n newlines ourself. e@1: FILE*file = std::fopen(file_name, "rb"); e@1: if(file == 0){ e@1: int x = errno; // store errno as soon as possible, doing it after constructor call can fail. e@1: error::can_not_open_file err; e@1: err.set_errno(x); e@1: err.set_file_name(file_name); e@1: throw err; e@1: } e@1: return std::unique_ptr(new detail::OwningStdIOByteSourceBase(file)); e@1: } e@1: e@1: void init(std::unique_ptrbyte_source){ e@1: file_line = 0; e@1: e@1: buffer = new char[3*block_len]; e@1: try{ e@1: data_begin = 0; e@1: data_end = byte_source->read(buffer, 2*block_len); e@1: e@1: // Ignore UTF-8 BOM e@1: if(data_end >= 3 && buffer[0] == '\xEF' && buffer[1] == '\xBB' && buffer[2] == '\xBF') e@1: data_begin = 3; e@1: e@1: if(data_end == 2*block_len){ e@1: reader.init(std::move(byte_source)); e@1: reader.start_read(buffer + 2*block_len, block_len); e@1: } e@1: }catch(...){ e@1: delete[]buffer; e@1: throw; e@1: } e@1: } e@1: e@1: public: e@1: LineReader() = delete; e@1: LineReader(const LineReader&) = delete; e@1: LineReader&operator=(const LineReader&) = delete; e@1: e@1: explicit LineReader(const char*file_name){ e@1: set_file_name(file_name); e@1: init(open_file(file_name)); e@1: } e@1: e@1: explicit LineReader(const std::string&file_name){ e@1: set_file_name(file_name.c_str()); e@1: init(open_file(file_name.c_str())); e@1: } e@1: e@1: LineReader(const char*file_name, std::unique_ptrbyte_source){ e@1: set_file_name(file_name); e@1: init(std::move(byte_source)); e@1: } e@1: e@1: LineReader(const std::string&file_name, std::unique_ptrbyte_source){ e@1: set_file_name(file_name.c_str()); e@1: init(std::move(byte_source)); e@1: } e@1: e@1: LineReader(const char*file_name, const char*data_begin, const char*data_end){ e@1: set_file_name(file_name); e@1: init(std::unique_ptr(new detail::NonOwningStringByteSource(data_begin, data_end-data_begin))); e@1: } e@1: e@1: LineReader(const std::string&file_name, const char*data_begin, const char*data_end){ e@1: set_file_name(file_name.c_str()); e@1: init(std::unique_ptr(new detail::NonOwningStringByteSource(data_begin, data_end-data_begin))); e@1: } e@1: e@1: LineReader(const char*file_name, FILE*file){ e@1: set_file_name(file_name); e@1: init(std::unique_ptr(new detail::OwningStdIOByteSourceBase(file))); e@1: } e@1: e@1: LineReader(const std::string&file_name, FILE*file){ e@1: set_file_name(file_name.c_str()); e@1: init(std::unique_ptr(new detail::OwningStdIOByteSourceBase(file))); e@1: } e@1: e@1: LineReader(const char*file_name, std::istream&in){ e@1: set_file_name(file_name); e@1: init(std::unique_ptr(new detail::NonOwningIStreamByteSource(in))); e@1: } e@1: e@1: LineReader(const std::string&file_name, std::istream&in){ e@1: set_file_name(file_name.c_str()); e@1: init(std::unique_ptr(new detail::NonOwningIStreamByteSource(in))); e@1: } e@1: e@1: void set_file_name(const std::string&file_name){ e@1: set_file_name(file_name.c_str()); e@1: } e@1: e@1: void set_file_name(const char*file_name){ e@1: strncpy(this->file_name, file_name, error::max_file_name_length); e@1: this->file_name[error::max_file_name_length] = '\0'; e@1: } e@1: e@1: const char*get_truncated_file_name()const{ e@1: return file_name; e@1: } e@1: e@1: void set_file_line(unsigned file_line){ e@1: this->file_line = file_line; e@1: } e@1: e@1: unsigned get_file_line()const{ e@1: return file_line; e@1: } e@1: e@1: char*next_line(){ e@1: if(data_begin == data_end) e@1: return 0; e@1: e@1: ++file_line; e@1: e@1: assert(data_begin < data_end); e@1: assert(data_end <= block_len*2); e@1: e@1: if(data_begin >= block_len){ e@1: std::memcpy(buffer, buffer+block_len, block_len); e@1: data_begin -= block_len; e@1: data_end -= block_len; e@1: if(reader.is_valid()) e@1: { e@1: data_end += reader.finish_read(); e@1: std::memcpy(buffer+block_len, buffer+2*block_len, block_len); e@1: reader.start_read(buffer + 2*block_len, block_len); e@1: } e@1: } e@1: e@1: int line_end = data_begin; e@1: while(buffer[line_end] != '\n' && line_end != data_end){ e@1: ++line_end; e@1: } e@1: e@1: if(line_end - data_begin + 1 > block_len){ e@1: error::line_length_limit_exceeded err; e@1: err.set_file_name(file_name); e@1: err.set_file_line(file_line); e@1: throw err; e@1: } e@1: e@1: if(buffer[line_end] == '\n'){ e@1: buffer[line_end] = '\0'; e@1: }else{ e@1: // some files are missing the newline at the end of the e@1: // last line e@1: ++data_end; e@1: buffer[line_end] = '\0'; e@1: } e@1: e@1: // handle windows \r\n-line breaks e@1: if(line_end != data_begin && buffer[line_end-1] == '\r') e@1: buffer[line_end-1] = '\0'; e@1: e@1: char*ret = buffer + data_begin; e@1: data_begin = line_end+1; e@1: return ret; e@1: } e@1: e@1: ~LineReader(){ e@1: delete[] buffer; e@1: } e@1: }; e@1: e@1: e@1: //////////////////////////////////////////////////////////////////////////// e@1: // CSV // e@1: //////////////////////////////////////////////////////////////////////////// e@1: e@1: namespace error{ e@1: const int max_column_name_length = 63; e@1: struct with_column_name{ e@1: with_column_name(){ e@1: std::memset(column_name, 0, max_column_name_length+1); e@1: } e@1: e@1: void set_column_name(const char*column_name){ e@1: std::strncpy(this->column_name, column_name, max_column_name_length); e@1: this->column_name[max_column_name_length] = '\0'; e@1: } e@1: e@1: char column_name[max_column_name_length+1]; e@1: }; e@1: e@1: e@1: const int max_column_content_length = 63; e@1: e@1: struct with_column_content{ e@1: with_column_content(){ e@1: std::memset(column_content, 0, max_column_content_length+1); e@1: } e@1: e@1: void set_column_content(const char*column_content){ e@1: std::strncpy(this->column_content, column_content, max_column_content_length); e@1: this->column_content[max_column_content_length] = '\0'; e@1: } e@1: e@1: char column_content[max_column_content_length+1]; e@1: }; e@1: e@1: e@1: struct extra_column_in_header : e@1: base, e@1: with_file_name, e@1: with_column_name{ e@1: void format_error_message()const{ e@1: std::snprintf(error_message_buffer, sizeof(error_message_buffer), e@1: "Extra column \"%s\" in header of file \"%s\"." e@1: , column_name, file_name); e@1: } e@1: }; e@1: e@1: struct missing_column_in_header : e@1: base, e@1: with_file_name, e@1: with_column_name{ e@1: void format_error_message()const{ e@1: std::snprintf(error_message_buffer, sizeof(error_message_buffer), e@1: "Missing column \"%s\" in header of file \"%s\"." e@1: , column_name, file_name); e@1: } e@1: }; e@1: e@1: struct duplicated_column_in_header : e@1: base, e@1: with_file_name, e@1: with_column_name{ e@1: void format_error_message()const{ e@1: std::snprintf(error_message_buffer, sizeof(error_message_buffer), e@1: "Duplicated column \"%s\" in header of file \"%s\"." e@1: , column_name, file_name); e@1: } e@1: }; e@1: e@1: struct header_missing : e@1: base, e@1: with_file_name{ e@1: void format_error_message()const{ e@1: std::snprintf(error_message_buffer, sizeof(error_message_buffer), e@1: "Header missing in file \"%s\"." e@1: , file_name); e@1: } e@1: }; e@1: e@1: struct too_few_columns : e@1: base, e@1: with_file_name, e@1: with_file_line{ e@1: void format_error_message()const{ e@1: std::snprintf(error_message_buffer, sizeof(error_message_buffer), e@1: "Too few columns in line %d in file \"%s\"." e@1: , file_line, file_name); e@1: } e@1: }; e@1: e@1: struct too_many_columns : e@1: base, e@1: with_file_name, e@1: with_file_line{ e@1: void format_error_message()const{ e@1: std::snprintf(error_message_buffer, sizeof(error_message_buffer), e@1: "Too many columns in line %d in file \"%s\"." e@1: , file_line, file_name); e@1: } e@1: }; e@1: e@1: struct escaped_string_not_closed : e@1: base, e@1: with_file_name, e@1: with_file_line{ e@1: void format_error_message()const{ e@1: std::snprintf(error_message_buffer, sizeof(error_message_buffer), e@1: "Escaped string was not closed in line %d in file \"%s\"." e@1: , file_line, file_name); e@1: } e@1: }; e@1: e@1: struct integer_must_be_positive : e@1: base, e@1: with_file_name, e@1: with_file_line, e@1: with_column_name, e@1: with_column_content{ e@1: void format_error_message()const{ e@1: std::snprintf(error_message_buffer, sizeof(error_message_buffer), e@1: "The integer \"%s\" must be positive or 0 in column \"%s\" in file \"%s\" in line \"%d\"." e@1: , column_content, column_name, file_name, file_line); e@1: } e@1: }; e@1: e@1: struct no_digit : e@1: base, e@1: with_file_name, e@1: with_file_line, e@1: with_column_name, e@1: with_column_content{ e@1: void format_error_message()const{ e@1: std::snprintf(error_message_buffer, sizeof(error_message_buffer), e@1: "The integer \"%s\" contains an invalid digit in column \"%s\" in file \"%s\" in line \"%d\"." e@1: , column_content, column_name, file_name, file_line); e@1: } e@1: }; e@1: e@1: struct integer_overflow : e@1: base, e@1: with_file_name, e@1: with_file_line, e@1: with_column_name, e@1: with_column_content{ e@1: void format_error_message()const{ e@1: std::snprintf(error_message_buffer, sizeof(error_message_buffer), e@1: "The integer \"%s\" overflows in column \"%s\" in file \"%s\" in line \"%d\"." e@1: , column_content, column_name, file_name, file_line); e@1: } e@1: }; e@1: e@1: struct integer_underflow : e@1: base, e@1: with_file_name, e@1: with_file_line, e@1: with_column_name, e@1: with_column_content{ e@1: void format_error_message()const{ e@1: std::snprintf(error_message_buffer, sizeof(error_message_buffer), e@1: "The integer \"%s\" underflows in column \"%s\" in file \"%s\" in line \"%d\"." e@1: , column_content, column_name, file_name, file_line); e@1: } e@1: }; e@1: e@1: struct invalid_single_character : e@1: base, e@1: with_file_name, e@1: with_file_line, e@1: with_column_name, e@1: with_column_content{ e@1: void format_error_message()const{ e@1: std::snprintf(error_message_buffer, sizeof(error_message_buffer), e@1: "The content \"%s\" of column \"%s\" in file \"%s\" in line \"%d\" is not a single character." e@1: , column_content, column_name, file_name, file_line); e@1: } e@1: }; e@1: } e@1: e@1: typedef unsigned ignore_column; e@1: static const ignore_column ignore_no_column = 0; e@1: static const ignore_column ignore_extra_column = 1; e@1: static const ignore_column ignore_missing_column = 2; e@1: e@1: template e@1: struct trim_chars{ e@1: private: e@1: constexpr static bool is_trim_char(char){ e@1: return false; e@1: } e@1: e@1: template e@1: constexpr static bool is_trim_char(char c, char trim_char, OtherTrimChars...other_trim_chars){ e@1: return c == trim_char || is_trim_char(c, other_trim_chars...); e@1: } e@1: e@1: public: e@1: static void trim(char*&str_begin, char*&str_end){ e@1: while(is_trim_char(*str_begin, trim_char_list...) && str_begin != str_end) e@1: ++str_begin; e@1: while(is_trim_char(*(str_end-1), trim_char_list...) && str_begin != str_end) e@1: --str_end; e@1: *str_end = '\0'; e@1: } e@1: }; e@1: e@1: e@1: struct no_comment{ e@1: static bool is_comment(const char*){ e@1: return false; e@1: } e@1: }; e@1: e@1: template e@1: struct single_line_comment{ e@1: private: e@1: constexpr static bool is_comment_start_char(char){ e@1: return false; e@1: } e@1: e@1: template e@1: constexpr static bool is_comment_start_char(char c, char comment_start_char, OtherCommentStartChars...other_comment_start_chars){ e@1: return c == comment_start_char || is_comment_start_char(c, other_comment_start_chars...); e@1: } e@1: e@1: public: e@1: e@1: static bool is_comment(const char*line){ e@1: return is_comment_start_char(*line, comment_start_char_list...); e@1: } e@1: }; e@1: e@1: struct empty_line_comment{ e@1: static bool is_comment(const char*line){ e@1: if(*line == '\0') e@1: return true; e@1: while(*line == ' ' || *line == '\t'){ e@1: ++line; e@1: if(*line == 0) e@1: return true; e@1: } e@1: return false; e@1: } e@1: }; e@1: e@1: template e@1: struct single_and_empty_line_comment{ e@1: static bool is_comment(const char*line){ e@1: return single_line_comment::is_comment(line) || empty_line_comment::is_comment(line); e@1: } e@1: }; e@1: e@1: template e@1: struct no_quote_escape{ e@1: static const char*find_next_column_end(const char*col_begin){ e@1: while(*col_begin != sep && *col_begin != '\0') e@1: ++col_begin; e@1: return col_begin; e@1: } e@1: e@1: static void unescape(char*&, char*&){ e@1: e@1: } e@1: }; e@1: e@1: template e@1: struct double_quote_escape{ e@1: static const char*find_next_column_end(const char*col_begin){ e@1: while(*col_begin != sep && *col_begin != '\0') e@1: if(*col_begin != quote) e@1: ++col_begin; e@1: else{ e@1: do{ e@1: ++col_begin; e@1: while(*col_begin != quote){ e@1: if(*col_begin == '\0') e@1: throw error::escaped_string_not_closed(); e@1: ++col_begin; e@1: } e@1: ++col_begin; e@1: }while(*col_begin == quote); e@1: } e@1: return col_begin; e@1: } e@1: e@1: static void unescape(char*&col_begin, char*&col_end){ e@1: if(col_end - col_begin >= 2){ e@1: if(*col_begin == quote && *(col_end-1) == quote){ e@1: ++col_begin; e@1: --col_end; e@1: char*out = col_begin; e@1: for(char*in = col_begin; in!=col_end; ++in){ e@1: if(*in == quote && (in+1) != col_end && *(in+1) == quote){ e@1: ++in; e@1: } e@1: *out = *in; e@1: ++out; e@1: } e@1: col_end = out; e@1: *col_end = '\0'; e@1: } e@1: } e@1: e@1: } e@1: }; e@1: e@1: struct throw_on_overflow{ e@1: template e@1: static void on_overflow(T&){ e@1: throw error::integer_overflow(); e@1: } e@1: e@1: template e@1: static void on_underflow(T&){ e@1: throw error::integer_underflow(); e@1: } e@1: }; e@1: e@1: struct ignore_overflow{ e@1: template e@1: static void on_overflow(T&){} e@1: e@1: template e@1: static void on_underflow(T&){} e@1: }; e@1: e@1: struct set_to_max_on_overflow{ e@1: template e@1: static void on_overflow(T&x){ e@1: x = std::numeric_limits::max(); e@1: } e@1: e@1: template e@1: static void on_underflow(T&x){ e@1: x = std::numeric_limits::min(); e@1: } e@1: }; e@1: e@1: e@1: namespace detail{ e@1: template e@1: void chop_next_column( e@1: char*&line, char*&col_begin, char*&col_end e@1: ){ e@1: assert(line != nullptr); e@1: e@1: col_begin = line; e@1: // the col_begin + (... - col_begin) removes the constness e@1: col_end = col_begin + (quote_policy::find_next_column_end(col_begin) - col_begin); e@1: e@1: if(*col_end == '\0'){ e@1: line = nullptr; e@1: }else{ e@1: *col_end = '\0'; e@1: line = col_end + 1; e@1: } e@1: } e@1: e@1: template e@1: void parse_line( e@1: char*line, e@1: char**sorted_col, e@1: const std::vector&col_order e@1: ){ e@1: for(std::size_t i=0; i(line, col_begin, col_end); e@1: e@1: if(col_order[i] != -1){ e@1: trim_policy::trim(col_begin, col_end); e@1: quote_policy::unescape(col_begin, col_end); e@1: e@1: sorted_col[col_order[i]] = col_begin; e@1: } e@1: } e@1: if(line != nullptr) e@1: throw ::io::error::too_many_columns(); e@1: } e@1: e@1: template e@1: void parse_header_line( e@1: char*line, e@1: std::vector&col_order, e@1: const std::string*col_name, e@1: ignore_column ignore_policy e@1: ){ e@1: col_order.clear(); e@1: e@1: bool found[column_count]; e@1: std::fill(found, found + column_count, false); e@1: while(line){ e@1: char*col_begin,*col_end; e@1: chop_next_column(line, col_begin, col_end); e@1: e@1: trim_policy::trim(col_begin, col_end); e@1: quote_policy::unescape(col_begin, col_end); e@1: e@1: for(unsigned i=0; i e@1: void parse(char*col, char &x){ e@1: if(!*col) e@1: throw error::invalid_single_character(); e@1: x = *col; e@1: ++col; e@1: if(*col) e@1: throw error::invalid_single_character(); e@1: } e@1: e@1: template e@1: void parse(char*col, std::string&x){ e@1: x = col; e@1: } e@1: e@1: template e@1: void parse(char*col, const char*&x){ e@1: x = col; e@1: } e@1: e@1: template e@1: void parse(char*col, char*&x){ e@1: x = col; e@1: } e@1: e@1: template e@1: void parse_unsigned_integer(const char*col, T&x){ e@1: x = 0; e@1: while(*col != '\0'){ e@1: if('0' <= *col && *col <= '9'){ e@1: T y = *col - '0'; e@1: if(x > (std::numeric_limits::max()-y)/10){ e@1: overflow_policy::on_overflow(x); e@1: return; e@1: } e@1: x = 10*x+y; e@1: }else e@1: throw error::no_digit(); e@1: ++col; e@1: } e@1: } e@1: e@1: templatevoid parse(char*col, unsigned char &x) e@1: {parse_unsigned_integer(col, x);} e@1: templatevoid parse(char*col, unsigned short &x) e@1: {parse_unsigned_integer(col, x);} e@1: templatevoid parse(char*col, unsigned int &x) e@1: {parse_unsigned_integer(col, x);} e@1: templatevoid parse(char*col, unsigned long &x) e@1: {parse_unsigned_integer(col, x);} e@1: templatevoid parse(char*col, unsigned long long &x) e@1: {parse_unsigned_integer(col, x);} e@1: e@1: template e@1: void parse_signed_integer(const char*col, T&x){ e@1: if(*col == '-'){ e@1: ++col; e@1: e@1: x = 0; e@1: while(*col != '\0'){ e@1: if('0' <= *col && *col <= '9'){ e@1: T y = *col - '0'; e@1: if(x < (std::numeric_limits::min()+y)/10){ e@1: overflow_policy::on_underflow(x); e@1: return; e@1: } e@1: x = 10*x-y; e@1: }else e@1: throw error::no_digit(); e@1: ++col; e@1: } e@1: return; e@1: }else if(*col == '+') e@1: ++col; e@1: parse_unsigned_integer(col, x); e@1: } e@1: e@1: templatevoid parse(char*col, signed char &x) e@1: {parse_signed_integer(col, x);} e@1: templatevoid parse(char*col, signed short &x) e@1: {parse_signed_integer(col, x);} e@1: templatevoid parse(char*col, signed int &x) e@1: {parse_signed_integer(col, x);} e@1: templatevoid parse(char*col, signed long &x) e@1: {parse_signed_integer(col, x);} e@1: templatevoid parse(char*col, signed long long &x) e@1: {parse_signed_integer(col, x);} e@1: e@1: template e@1: void parse_float(const char*col, T&x){ e@1: bool is_neg = false; e@1: if(*col == '-'){ e@1: is_neg = true; e@1: ++col; e@1: }else if(*col == '+') e@1: ++col; e@1: e@1: x = 0; e@1: while('0' <= *col && *col <= '9'){ e@1: int y = *col - '0'; e@1: x *= 10; e@1: x += y; e@1: ++col; e@1: } e@1: e@1: if(*col == '.'|| *col == ','){ e@1: ++col; e@1: T pos = 1; e@1: while('0' <= *col && *col <= '9'){ e@1: pos /= 10; e@1: int y = *col - '0'; e@1: ++col; e@1: x += y*pos; e@1: } e@1: } e@1: e@1: if(*col == 'e' || *col == 'E'){ e@1: ++col; e@1: int e; e@1: e@1: parse_signed_integer(col, e); e@1: e@1: if(e != 0){ e@1: T base; e@1: if(e < 0){ e@1: base = 0.1; e@1: e = -e; e@1: }else{ e@1: base = 10; e@1: } e@1: e@1: while(e != 1){ e@1: if((e & 1) == 0){ e@1: base = base*base; e@1: e >>= 1; e@1: }else{ e@1: x *= base; e@1: --e; e@1: } e@1: } e@1: x *= base; e@1: } e@1: }else{ e@1: if(*col != '\0') e@1: throw error::no_digit(); e@1: } e@1: e@1: if(is_neg) e@1: x = -x; e@1: } e@1: e@1: template void parse(char*col, float&x) { parse_float(col, x); } e@1: template void parse(char*col, double&x) { parse_float(col, x); } e@1: template void parse(char*col, long double&x) { parse_float(col, x); } e@1: e@1: template e@1: void parse(char*col, T&x){ e@1: // GCC evalutes "false" when reading the template and e@1: // "sizeof(T)!=sizeof(T)" only when instantiating it. This is why e@1: // this strange construct is used. e@1: static_assert(sizeof(T)!=sizeof(T), e@1: "Can not parse this type. Only buildin integrals, floats, char, char*, const char* and std::string are supported"); e@1: } e@1: e@1: } e@1: e@1: template, e@1: class quote_policy = no_quote_escape<','>, e@1: class overflow_policy = throw_on_overflow, e@1: class comment_policy = no_comment e@1: > e@1: class CSVReader{ e@1: private: e@1: LineReader in; e@1: e@1: char*(row[column_count]); e@1: std::string column_names[column_count]; e@1: e@1: std::vectorcol_order; e@1: e@1: template e@1: void set_column_names(std::string s, ColNames...cols){ e@1: column_names[column_count-sizeof...(ColNames)-1] = std::move(s); e@1: set_column_names(std::forward(cols)...); e@1: } e@1: e@1: void set_column_names(){} e@1: e@1: e@1: public: e@1: CSVReader() = delete; e@1: CSVReader(const CSVReader&) = delete; e@1: CSVReader&operator=(const CSVReader&); e@1: e@1: template e@1: explicit CSVReader(Args&&...args):in(std::forward(args)...){ e@1: std::fill(row, row+column_count, nullptr); e@1: col_order.resize(column_count); e@1: for(unsigned i=0; i e@1: void read_header(ignore_column ignore_policy, ColNames...cols){ e@1: static_assert(sizeof...(ColNames)>=column_count, "not enough column names specified"); e@1: static_assert(sizeof...(ColNames)<=column_count, "too many column names specified"); e@1: try{ e@1: set_column_names(std::forward(cols)...); e@1: e@1: char*line; e@1: do{ e@1: line = in.next_line(); e@1: if(!line) e@1: throw error::header_missing(); e@1: }while(comment_policy::is_comment(line)); e@1: e@1: detail::parse_header_line e@1: e@1: (line, col_order, column_names, ignore_policy); e@1: }catch(error::with_file_name&err){ e@1: err.set_file_name(in.get_truncated_file_name()); e@1: throw; e@1: } e@1: } e@1: e@1: template e@1: void set_header(ColNames...cols){ e@1: static_assert(sizeof...(ColNames)>=column_count, e@1: "not enough column names specified"); e@1: static_assert(sizeof...(ColNames)<=column_count, e@1: "too many column names specified"); e@1: set_column_names(std::forward(cols)...); e@1: std::fill(row, row+column_count, nullptr); e@1: col_order.resize(column_count); e@1: for(unsigned i=0; i e@1: void parse_helper(std::size_t r, T&t, ColType&...cols){ e@1: if(row[r]){ e@1: try{ e@1: try{ e@1: ::io::detail::parse(row[r], t); e@1: }catch(error::with_column_content&err){ e@1: err.set_column_content(row[r]); e@1: throw; e@1: } e@1: }catch(error::with_column_name&err){ e@1: err.set_column_name(column_names[r].c_str()); e@1: throw; e@1: } e@1: } e@1: parse_helper(r+1, cols...); e@1: } e@1: e@1: e@1: public: e@1: template e@1: bool read_row(ColType& ...cols){ e@1: static_assert(sizeof...(ColType)>=column_count, e@1: "not enough columns specified"); e@1: static_assert(sizeof...(ColType)<=column_count, e@1: "too many columns specified"); e@1: try{ e@1: try{ e@1: e@1: char*line; e@1: do{ e@1: line = in.next_line(); e@1: if(!line) e@1: return false; e@1: }while(comment_policy::is_comment(line)); e@1: e@1: detail::parse_line e@1: (line, row, col_order); e@1: e@1: parse_helper(0, cols...); e@1: }catch(error::with_file_name&err){ e@1: err.set_file_name(in.get_truncated_file_name()); e@1: throw; e@1: } e@1: }catch(error::with_file_line&err){ e@1: err.set_file_line(in.get_file_line()); e@1: throw; e@1: } e@1: e@1: return true; e@1: } e@1: }; e@1: } e@1: #endif e@1: