e@1
|
1 // Copyright: (2012-2015) Ben Strasser <code@ben-strasser.net>
|
e@1
|
2 // License: BSD-3
|
e@1
|
3 //
|
e@1
|
4 // All rights reserved.
|
e@1
|
5 //
|
e@1
|
6 // Redistribution and use in source and binary forms, with or without
|
e@1
|
7 // modification, are permitted provided that the following conditions are met:
|
e@1
|
8 //
|
e@1
|
9 // 1. Redistributions of source code must retain the above copyright notice,
|
e@1
|
10 // this list of conditions and the following disclaimer.
|
e@1
|
11 //
|
e@1
|
12 //2. Redistributions in binary form must reproduce the above copyright notice,
|
e@1
|
13 // this list of conditions and the following disclaimer in the documentation
|
e@1
|
14 // and/or other materials provided with the distribution.
|
e@1
|
15 //
|
e@1
|
16 //3. Neither the name of the copyright holder nor the names of its contributors
|
e@1
|
17 // may be used to endorse or promote products derived from this software
|
e@1
|
18 // without specific prior written permission.
|
e@1
|
19 //
|
e@1
|
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
e@1
|
21 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
e@1
|
22 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
e@1
|
23 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
e@1
|
24 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
e@1
|
25 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
e@1
|
26 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
e@1
|
27 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
e@1
|
28 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
e@1
|
29 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
e@1
|
30 // POSSIBILITY OF SUCH DAMAGE.
|
e@1
|
31
|
e@1
|
32 #ifndef CSV_H
|
e@1
|
33 #define CSV_H
|
e@1
|
34
|
e@1
|
35 #include <vector>
|
e@1
|
36 #include <string>
|
e@1
|
37 #include <cstring>
|
e@1
|
38 #include <algorithm>
|
e@1
|
39 #include <utility>
|
e@1
|
40 #include <cstdio>
|
e@1
|
41 #include <exception>
|
e@1
|
42 #ifndef CSV_IO_NO_THREAD
|
e@1
|
43 #include <mutex>
|
e@1
|
44 #include <thread>
|
e@1
|
45 #include <condition_variable>
|
e@1
|
46 #endif
|
e@1
|
47 #include <memory>
|
e@1
|
48 #include <cassert>
|
e@1
|
49 #include <cerrno>
|
e@1
|
50 #include <istream>
|
e@1
|
51
|
e@1
|
52 namespace io{
|
e@1
|
53 ////////////////////////////////////////////////////////////////////////////
|
e@1
|
54 // LineReader //
|
e@1
|
55 ////////////////////////////////////////////////////////////////////////////
|
e@1
|
56
|
e@1
|
57 namespace error{
|
e@1
|
58 struct base : std::exception{
|
e@1
|
59 virtual void format_error_message()const = 0;
|
e@1
|
60
|
e@1
|
61 const char*what()const throw(){
|
e@1
|
62 format_error_message();
|
e@1
|
63 return error_message_buffer;
|
e@1
|
64 }
|
e@1
|
65
|
e@1
|
66 mutable char error_message_buffer[256];
|
e@1
|
67 };
|
e@1
|
68
|
e@1
|
69 const int max_file_name_length = 255;
|
e@1
|
70
|
e@1
|
71 struct with_file_name{
|
e@1
|
72 with_file_name(){
|
e@1
|
73 std::memset(file_name, 0, max_file_name_length+1);
|
e@1
|
74 }
|
e@1
|
75
|
e@1
|
76 void set_file_name(const char*file_name){
|
e@1
|
77 std::strncpy(this->file_name, file_name, max_file_name_length);
|
e@1
|
78 this->file_name[max_file_name_length] = '\0';
|
e@1
|
79 }
|
e@1
|
80
|
e@1
|
81 char file_name[max_file_name_length+1];
|
e@1
|
82 };
|
e@1
|
83
|
e@1
|
84 struct with_file_line{
|
e@1
|
85 with_file_line(){
|
e@1
|
86 file_line = -1;
|
e@1
|
87 }
|
e@1
|
88
|
e@1
|
89 void set_file_line(int file_line){
|
e@1
|
90 this->file_line = file_line;
|
e@1
|
91 }
|
e@1
|
92
|
e@1
|
93 int file_line;
|
e@1
|
94 };
|
e@1
|
95
|
e@1
|
96 struct with_errno{
|
e@1
|
97 with_errno(){
|
e@1
|
98 errno_value = 0;
|
e@1
|
99 }
|
e@1
|
100
|
e@1
|
101 void set_errno(int errno_value){
|
e@1
|
102 this->errno_value = errno_value;
|
e@1
|
103 }
|
e@1
|
104
|
e@1
|
105 int errno_value;
|
e@1
|
106 };
|
e@1
|
107
|
e@1
|
108 struct can_not_open_file :
|
e@1
|
109 base,
|
e@1
|
110 with_file_name,
|
e@1
|
111 with_errno{
|
e@1
|
112 void format_error_message()const{
|
e@1
|
113 if(errno_value != 0)
|
e@1
|
114 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
|
e@1
|
115 "Can not open file \"%s\" because \"%s\"."
|
e@1
|
116 , file_name, std::strerror(errno_value));
|
e@1
|
117 else
|
e@1
|
118 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
|
e@1
|
119 "Can not open file \"%s\"."
|
e@1
|
120 , file_name);
|
e@1
|
121 }
|
e@1
|
122 };
|
e@1
|
123
|
e@1
|
124 struct line_length_limit_exceeded :
|
e@1
|
125 base,
|
e@1
|
126 with_file_name,
|
e@1
|
127 with_file_line{
|
e@1
|
128 void format_error_message()const{
|
e@1
|
129 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
|
e@1
|
130 "Line number %d in file \"%s\" exceeds the maximum length of 2^24-1."
|
e@1
|
131 , file_line, file_name);
|
e@1
|
132 }
|
e@1
|
133 };
|
e@1
|
134 }
|
e@1
|
135
|
e@1
|
136 class ByteSourceBase{
|
e@1
|
137 public:
|
e@1
|
138 virtual int read(char*buffer, int size)=0;
|
e@1
|
139 virtual ~ByteSourceBase(){}
|
e@1
|
140 };
|
e@1
|
141
|
e@1
|
142 namespace detail{
|
e@1
|
143
|
e@1
|
144 class OwningStdIOByteSourceBase : public ByteSourceBase{
|
e@1
|
145 public:
|
e@1
|
146 explicit OwningStdIOByteSourceBase(FILE*file):file(file){
|
e@1
|
147 // Tell the std library that we want to do the buffering ourself.
|
e@1
|
148 std::setvbuf(file, 0, _IONBF, 0);
|
e@1
|
149 }
|
e@1
|
150
|
e@1
|
151 int read(char*buffer, int size){
|
e@1
|
152 return std::fread(buffer, 1, size, file);
|
e@1
|
153 }
|
e@1
|
154
|
e@1
|
155 ~OwningStdIOByteSourceBase(){
|
e@1
|
156 std::fclose(file);
|
e@1
|
157 }
|
e@1
|
158
|
e@1
|
159 private:
|
e@1
|
160 FILE*file;
|
e@1
|
161 };
|
e@1
|
162
|
e@1
|
163 class NonOwningIStreamByteSource : public ByteSourceBase{
|
e@1
|
164 public:
|
e@1
|
165 explicit NonOwningIStreamByteSource(std::istream&in):in(in){}
|
e@1
|
166
|
e@1
|
167 int read(char*buffer, int size){
|
e@1
|
168 in.read(buffer, size);
|
e@1
|
169 return in.gcount();
|
e@1
|
170 }
|
e@1
|
171
|
e@1
|
172 ~NonOwningIStreamByteSource(){}
|
e@1
|
173
|
e@1
|
174 private:
|
e@1
|
175 std::istream∈
|
e@1
|
176 };
|
e@1
|
177
|
e@1
|
178 class NonOwningStringByteSource : public ByteSourceBase{
|
e@1
|
179 public:
|
e@1
|
180 NonOwningStringByteSource(const char*str, long long size):str(str), remaining_byte_count(size){}
|
e@1
|
181
|
e@1
|
182 int read(char*buffer, int desired_byte_count){
|
e@1
|
183 int to_copy_byte_count = desired_byte_count;
|
e@1
|
184 if(remaining_byte_count < to_copy_byte_count)
|
e@1
|
185 to_copy_byte_count = remaining_byte_count;
|
e@1
|
186 std::memcpy(buffer, str, to_copy_byte_count);
|
e@1
|
187 remaining_byte_count -= to_copy_byte_count;
|
e@1
|
188 str += to_copy_byte_count;
|
e@1
|
189 return to_copy_byte_count;
|
e@1
|
190 }
|
e@1
|
191
|
e@1
|
192 ~NonOwningStringByteSource(){}
|
e@1
|
193
|
e@1
|
194 private:
|
e@1
|
195 const char*str;
|
e@1
|
196 long long remaining_byte_count;
|
e@1
|
197 };
|
e@1
|
198
|
e@1
|
199 #ifndef CSV_IO_NO_THREAD
|
e@1
|
200 class AsynchronousReader{
|
e@1
|
201 public:
|
e@1
|
202 void init(std::unique_ptr<ByteSourceBase>arg_byte_source){
|
e@1
|
203 std::unique_lock<std::mutex>guard(lock);
|
e@1
|
204 byte_source = std::move(arg_byte_source);
|
e@1
|
205 desired_byte_count = -1;
|
e@1
|
206 termination_requested = false;
|
e@1
|
207 worker = std::thread(
|
e@1
|
208 [&]{
|
e@1
|
209 std::unique_lock<std::mutex>guard(lock);
|
e@1
|
210 try{
|
e@1
|
211 for(;;){
|
e@1
|
212 read_requested_condition.wait(
|
e@1
|
213 guard,
|
e@1
|
214 [&]{
|
e@1
|
215 return desired_byte_count != -1 || termination_requested;
|
e@1
|
216 }
|
e@1
|
217 );
|
e@1
|
218 if(termination_requested)
|
e@1
|
219 return;
|
e@1
|
220
|
e@1
|
221 read_byte_count = byte_source->read(buffer, desired_byte_count);
|
e@1
|
222 desired_byte_count = -1;
|
e@1
|
223 if(read_byte_count == 0)
|
e@1
|
224 break;
|
e@1
|
225 read_finished_condition.notify_one();
|
e@1
|
226 }
|
e@1
|
227 }catch(...){
|
e@1
|
228 read_error = std::current_exception();
|
e@1
|
229 }
|
e@1
|
230 read_finished_condition.notify_one();
|
e@1
|
231 }
|
e@1
|
232 );
|
e@1
|
233 }
|
e@1
|
234
|
e@1
|
235 bool is_valid()const{
|
e@1
|
236 return byte_source != 0;
|
e@1
|
237 }
|
e@1
|
238
|
e@1
|
239 void start_read(char*arg_buffer, int arg_desired_byte_count){
|
e@1
|
240 std::unique_lock<std::mutex>guard(lock);
|
e@1
|
241 buffer = arg_buffer;
|
e@1
|
242 desired_byte_count = arg_desired_byte_count;
|
e@1
|
243 read_byte_count = -1;
|
e@1
|
244 read_requested_condition.notify_one();
|
e@1
|
245 }
|
e@1
|
246
|
e@1
|
247 int finish_read(){
|
e@1
|
248 std::unique_lock<std::mutex>guard(lock);
|
e@1
|
249 read_finished_condition.wait(
|
e@1
|
250 guard,
|
e@1
|
251 [&]{
|
e@1
|
252 return read_byte_count != -1 || read_error;
|
e@1
|
253 }
|
e@1
|
254 );
|
e@1
|
255 if(read_error)
|
e@1
|
256 std::rethrow_exception(read_error);
|
e@1
|
257 else
|
e@1
|
258 return read_byte_count;
|
e@1
|
259 }
|
e@1
|
260
|
e@1
|
261 ~AsynchronousReader(){
|
e@1
|
262 if(byte_source != 0){
|
e@1
|
263 {
|
e@1
|
264 std::unique_lock<std::mutex>guard(lock);
|
e@1
|
265 termination_requested = true;
|
e@1
|
266 }
|
e@1
|
267 read_requested_condition.notify_one();
|
e@1
|
268 worker.join();
|
e@1
|
269 }
|
e@1
|
270 }
|
e@1
|
271
|
e@1
|
272 private:
|
e@1
|
273 std::unique_ptr<ByteSourceBase>byte_source;
|
e@1
|
274
|
e@1
|
275 std::thread worker;
|
e@1
|
276
|
e@1
|
277 bool termination_requested;
|
e@1
|
278 std::exception_ptr read_error;
|
e@1
|
279 char*buffer;
|
e@1
|
280 int desired_byte_count;
|
e@1
|
281 int read_byte_count;
|
e@1
|
282
|
e@1
|
283 std::mutex lock;
|
e@1
|
284 std::condition_variable read_finished_condition;
|
e@1
|
285 std::condition_variable read_requested_condition;
|
e@1
|
286 };
|
e@1
|
287 #endif
|
e@1
|
288
|
e@1
|
289 class SynchronousReader{
|
e@1
|
290 public:
|
e@1
|
291 void init(std::unique_ptr<ByteSourceBase>arg_byte_source){
|
e@1
|
292 byte_source = std::move(arg_byte_source);
|
e@1
|
293 }
|
e@1
|
294
|
e@1
|
295 bool is_valid()const{
|
e@1
|
296 return byte_source != 0;
|
e@1
|
297 }
|
e@1
|
298
|
e@1
|
299 void start_read(char*arg_buffer, int arg_desired_byte_count){
|
e@1
|
300 buffer = arg_buffer;
|
e@1
|
301 desired_byte_count = arg_desired_byte_count;
|
e@1
|
302 }
|
e@1
|
303
|
e@1
|
304 int finish_read(){
|
e@1
|
305 return byte_source->read(buffer, desired_byte_count);
|
e@1
|
306 }
|
e@1
|
307 private:
|
e@1
|
308 std::unique_ptr<ByteSourceBase>byte_source;
|
e@1
|
309 char*buffer;
|
e@1
|
310 int desired_byte_count;
|
e@1
|
311 };
|
e@1
|
312 }
|
e@1
|
313
|
e@1
|
314 class LineReader{
|
e@1
|
315 private:
|
e@1
|
316 static const int block_len = 1<<24;
|
e@1
|
317 #ifdef CSV_IO_NO_THREAD
|
e@1
|
318 detail::SynchronousReader reader;
|
e@1
|
319 #else
|
e@1
|
320 detail::AsynchronousReader reader;
|
e@1
|
321 #endif
|
e@1
|
322 char*buffer;
|
e@1
|
323 int data_begin;
|
e@1
|
324 int data_end;
|
e@1
|
325
|
e@1
|
326 char file_name[error::max_file_name_length+1];
|
e@1
|
327 unsigned file_line;
|
e@1
|
328
|
e@1
|
329 static std::unique_ptr<ByteSourceBase> open_file(const char*file_name){
|
e@1
|
330 // We open the file in binary mode as it makes no difference under *nix
|
e@1
|
331 // and under Windows we handle \r\n newlines ourself.
|
e@1
|
332 FILE*file = std::fopen(file_name, "rb");
|
e@1
|
333 if(file == 0){
|
e@1
|
334 int x = errno; // store errno as soon as possible, doing it after constructor call can fail.
|
e@1
|
335 error::can_not_open_file err;
|
e@1
|
336 err.set_errno(x);
|
e@1
|
337 err.set_file_name(file_name);
|
e@1
|
338 throw err;
|
e@1
|
339 }
|
e@1
|
340 return std::unique_ptr<ByteSourceBase>(new detail::OwningStdIOByteSourceBase(file));
|
e@1
|
341 }
|
e@1
|
342
|
e@1
|
343 void init(std::unique_ptr<ByteSourceBase>byte_source){
|
e@1
|
344 file_line = 0;
|
e@1
|
345
|
e@1
|
346 buffer = new char[3*block_len];
|
e@1
|
347 try{
|
e@1
|
348 data_begin = 0;
|
e@1
|
349 data_end = byte_source->read(buffer, 2*block_len);
|
e@1
|
350
|
e@1
|
351 // Ignore UTF-8 BOM
|
e@1
|
352 if(data_end >= 3 && buffer[0] == '\xEF' && buffer[1] == '\xBB' && buffer[2] == '\xBF')
|
e@1
|
353 data_begin = 3;
|
e@1
|
354
|
e@1
|
355 if(data_end == 2*block_len){
|
e@1
|
356 reader.init(std::move(byte_source));
|
e@1
|
357 reader.start_read(buffer + 2*block_len, block_len);
|
e@1
|
358 }
|
e@1
|
359 }catch(...){
|
e@1
|
360 delete[]buffer;
|
e@1
|
361 throw;
|
e@1
|
362 }
|
e@1
|
363 }
|
e@1
|
364
|
e@1
|
365 public:
|
e@1
|
366 LineReader() = delete;
|
e@1
|
367 LineReader(const LineReader&) = delete;
|
e@1
|
368 LineReader&operator=(const LineReader&) = delete;
|
e@1
|
369
|
e@1
|
370 explicit LineReader(const char*file_name){
|
e@1
|
371 set_file_name(file_name);
|
e@1
|
372 init(open_file(file_name));
|
e@1
|
373 }
|
e@1
|
374
|
e@1
|
375 explicit LineReader(const std::string&file_name){
|
e@1
|
376 set_file_name(file_name.c_str());
|
e@1
|
377 init(open_file(file_name.c_str()));
|
e@1
|
378 }
|
e@1
|
379
|
e@1
|
380 LineReader(const char*file_name, std::unique_ptr<ByteSourceBase>byte_source){
|
e@1
|
381 set_file_name(file_name);
|
e@1
|
382 init(std::move(byte_source));
|
e@1
|
383 }
|
e@1
|
384
|
e@1
|
385 LineReader(const std::string&file_name, std::unique_ptr<ByteSourceBase>byte_source){
|
e@1
|
386 set_file_name(file_name.c_str());
|
e@1
|
387 init(std::move(byte_source));
|
e@1
|
388 }
|
e@1
|
389
|
e@1
|
390 LineReader(const char*file_name, const char*data_begin, const char*data_end){
|
e@1
|
391 set_file_name(file_name);
|
e@1
|
392 init(std::unique_ptr<ByteSourceBase>(new detail::NonOwningStringByteSource(data_begin, data_end-data_begin)));
|
e@1
|
393 }
|
e@1
|
394
|
e@1
|
395 LineReader(const std::string&file_name, const char*data_begin, const char*data_end){
|
e@1
|
396 set_file_name(file_name.c_str());
|
e@1
|
397 init(std::unique_ptr<ByteSourceBase>(new detail::NonOwningStringByteSource(data_begin, data_end-data_begin)));
|
e@1
|
398 }
|
e@1
|
399
|
e@1
|
400 LineReader(const char*file_name, FILE*file){
|
e@1
|
401 set_file_name(file_name);
|
e@1
|
402 init(std::unique_ptr<ByteSourceBase>(new detail::OwningStdIOByteSourceBase(file)));
|
e@1
|
403 }
|
e@1
|
404
|
e@1
|
405 LineReader(const std::string&file_name, FILE*file){
|
e@1
|
406 set_file_name(file_name.c_str());
|
e@1
|
407 init(std::unique_ptr<ByteSourceBase>(new detail::OwningStdIOByteSourceBase(file)));
|
e@1
|
408 }
|
e@1
|
409
|
e@1
|
410 LineReader(const char*file_name, std::istream&in){
|
e@1
|
411 set_file_name(file_name);
|
e@1
|
412 init(std::unique_ptr<ByteSourceBase>(new detail::NonOwningIStreamByteSource(in)));
|
e@1
|
413 }
|
e@1
|
414
|
e@1
|
415 LineReader(const std::string&file_name, std::istream&in){
|
e@1
|
416 set_file_name(file_name.c_str());
|
e@1
|
417 init(std::unique_ptr<ByteSourceBase>(new detail::NonOwningIStreamByteSource(in)));
|
e@1
|
418 }
|
e@1
|
419
|
e@1
|
420 void set_file_name(const std::string&file_name){
|
e@1
|
421 set_file_name(file_name.c_str());
|
e@1
|
422 }
|
e@1
|
423
|
e@1
|
424 void set_file_name(const char*file_name){
|
e@1
|
425 strncpy(this->file_name, file_name, error::max_file_name_length);
|
e@1
|
426 this->file_name[error::max_file_name_length] = '\0';
|
e@1
|
427 }
|
e@1
|
428
|
e@1
|
429 const char*get_truncated_file_name()const{
|
e@1
|
430 return file_name;
|
e@1
|
431 }
|
e@1
|
432
|
e@1
|
433 void set_file_line(unsigned file_line){
|
e@1
|
434 this->file_line = file_line;
|
e@1
|
435 }
|
e@1
|
436
|
e@1
|
437 unsigned get_file_line()const{
|
e@1
|
438 return file_line;
|
e@1
|
439 }
|
e@1
|
440
|
e@1
|
441 char*next_line(){
|
e@1
|
442 if(data_begin == data_end)
|
e@1
|
443 return 0;
|
e@1
|
444
|
e@1
|
445 ++file_line;
|
e@1
|
446
|
e@1
|
447 assert(data_begin < data_end);
|
e@1
|
448 assert(data_end <= block_len*2);
|
e@1
|
449
|
e@1
|
450 if(data_begin >= block_len){
|
e@1
|
451 std::memcpy(buffer, buffer+block_len, block_len);
|
e@1
|
452 data_begin -= block_len;
|
e@1
|
453 data_end -= block_len;
|
e@1
|
454 if(reader.is_valid())
|
e@1
|
455 {
|
e@1
|
456 data_end += reader.finish_read();
|
e@1
|
457 std::memcpy(buffer+block_len, buffer+2*block_len, block_len);
|
e@1
|
458 reader.start_read(buffer + 2*block_len, block_len);
|
e@1
|
459 }
|
e@1
|
460 }
|
e@1
|
461
|
e@1
|
462 int line_end = data_begin;
|
e@1
|
463 while(buffer[line_end] != '\n' && line_end != data_end){
|
e@1
|
464 ++line_end;
|
e@1
|
465 }
|
e@1
|
466
|
e@1
|
467 if(line_end - data_begin + 1 > block_len){
|
e@1
|
468 error::line_length_limit_exceeded err;
|
e@1
|
469 err.set_file_name(file_name);
|
e@1
|
470 err.set_file_line(file_line);
|
e@1
|
471 throw err;
|
e@1
|
472 }
|
e@1
|
473
|
e@1
|
474 if(buffer[line_end] == '\n'){
|
e@1
|
475 buffer[line_end] = '\0';
|
e@1
|
476 }else{
|
e@1
|
477 // some files are missing the newline at the end of the
|
e@1
|
478 // last line
|
e@1
|
479 ++data_end;
|
e@1
|
480 buffer[line_end] = '\0';
|
e@1
|
481 }
|
e@1
|
482
|
e@1
|
483 // handle windows \r\n-line breaks
|
e@1
|
484 if(line_end != data_begin && buffer[line_end-1] == '\r')
|
e@1
|
485 buffer[line_end-1] = '\0';
|
e@1
|
486
|
e@1
|
487 char*ret = buffer + data_begin;
|
e@1
|
488 data_begin = line_end+1;
|
e@1
|
489 return ret;
|
e@1
|
490 }
|
e@1
|
491
|
e@1
|
492 ~LineReader(){
|
e@1
|
493 delete[] buffer;
|
e@1
|
494 }
|
e@1
|
495 };
|
e@1
|
496
|
e@1
|
497
|
e@1
|
498 ////////////////////////////////////////////////////////////////////////////
|
e@1
|
499 // CSV //
|
e@1
|
500 ////////////////////////////////////////////////////////////////////////////
|
e@1
|
501
|
e@1
|
502 namespace error{
|
e@1
|
503 const int max_column_name_length = 63;
|
e@1
|
504 struct with_column_name{
|
e@1
|
505 with_column_name(){
|
e@1
|
506 std::memset(column_name, 0, max_column_name_length+1);
|
e@1
|
507 }
|
e@1
|
508
|
e@1
|
509 void set_column_name(const char*column_name){
|
e@1
|
510 std::strncpy(this->column_name, column_name, max_column_name_length);
|
e@1
|
511 this->column_name[max_column_name_length] = '\0';
|
e@1
|
512 }
|
e@1
|
513
|
e@1
|
514 char column_name[max_column_name_length+1];
|
e@1
|
515 };
|
e@1
|
516
|
e@1
|
517
|
e@1
|
518 const int max_column_content_length = 63;
|
e@1
|
519
|
e@1
|
520 struct with_column_content{
|
e@1
|
521 with_column_content(){
|
e@1
|
522 std::memset(column_content, 0, max_column_content_length+1);
|
e@1
|
523 }
|
e@1
|
524
|
e@1
|
525 void set_column_content(const char*column_content){
|
e@1
|
526 std::strncpy(this->column_content, column_content, max_column_content_length);
|
e@1
|
527 this->column_content[max_column_content_length] = '\0';
|
e@1
|
528 }
|
e@1
|
529
|
e@1
|
530 char column_content[max_column_content_length+1];
|
e@1
|
531 };
|
e@1
|
532
|
e@1
|
533
|
e@1
|
534 struct extra_column_in_header :
|
e@1
|
535 base,
|
e@1
|
536 with_file_name,
|
e@1
|
537 with_column_name{
|
e@1
|
538 void format_error_message()const{
|
e@1
|
539 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
|
e@1
|
540 "Extra column \"%s\" in header of file \"%s\"."
|
e@1
|
541 , column_name, file_name);
|
e@1
|
542 }
|
e@1
|
543 };
|
e@1
|
544
|
e@1
|
545 struct missing_column_in_header :
|
e@1
|
546 base,
|
e@1
|
547 with_file_name,
|
e@1
|
548 with_column_name{
|
e@1
|
549 void format_error_message()const{
|
e@1
|
550 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
|
e@1
|
551 "Missing column \"%s\" in header of file \"%s\"."
|
e@1
|
552 , column_name, file_name);
|
e@1
|
553 }
|
e@1
|
554 };
|
e@1
|
555
|
e@1
|
556 struct duplicated_column_in_header :
|
e@1
|
557 base,
|
e@1
|
558 with_file_name,
|
e@1
|
559 with_column_name{
|
e@1
|
560 void format_error_message()const{
|
e@1
|
561 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
|
e@1
|
562 "Duplicated column \"%s\" in header of file \"%s\"."
|
e@1
|
563 , column_name, file_name);
|
e@1
|
564 }
|
e@1
|
565 };
|
e@1
|
566
|
e@1
|
567 struct header_missing :
|
e@1
|
568 base,
|
e@1
|
569 with_file_name{
|
e@1
|
570 void format_error_message()const{
|
e@1
|
571 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
|
e@1
|
572 "Header missing in file \"%s\"."
|
e@1
|
573 , file_name);
|
e@1
|
574 }
|
e@1
|
575 };
|
e@1
|
576
|
e@1
|
577 struct too_few_columns :
|
e@1
|
578 base,
|
e@1
|
579 with_file_name,
|
e@1
|
580 with_file_line{
|
e@1
|
581 void format_error_message()const{
|
e@1
|
582 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
|
e@1
|
583 "Too few columns in line %d in file \"%s\"."
|
e@1
|
584 , file_line, file_name);
|
e@1
|
585 }
|
e@1
|
586 };
|
e@1
|
587
|
e@1
|
588 struct too_many_columns :
|
e@1
|
589 base,
|
e@1
|
590 with_file_name,
|
e@1
|
591 with_file_line{
|
e@1
|
592 void format_error_message()const{
|
e@1
|
593 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
|
e@1
|
594 "Too many columns in line %d in file \"%s\"."
|
e@1
|
595 , file_line, file_name);
|
e@1
|
596 }
|
e@1
|
597 };
|
e@1
|
598
|
e@1
|
599 struct escaped_string_not_closed :
|
e@1
|
600 base,
|
e@1
|
601 with_file_name,
|
e@1
|
602 with_file_line{
|
e@1
|
603 void format_error_message()const{
|
e@1
|
604 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
|
e@1
|
605 "Escaped string was not closed in line %d in file \"%s\"."
|
e@1
|
606 , file_line, file_name);
|
e@1
|
607 }
|
e@1
|
608 };
|
e@1
|
609
|
e@1
|
610 struct integer_must_be_positive :
|
e@1
|
611 base,
|
e@1
|
612 with_file_name,
|
e@1
|
613 with_file_line,
|
e@1
|
614 with_column_name,
|
e@1
|
615 with_column_content{
|
e@1
|
616 void format_error_message()const{
|
e@1
|
617 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
|
e@1
|
618 "The integer \"%s\" must be positive or 0 in column \"%s\" in file \"%s\" in line \"%d\"."
|
e@1
|
619 , column_content, column_name, file_name, file_line);
|
e@1
|
620 }
|
e@1
|
621 };
|
e@1
|
622
|
e@1
|
623 struct no_digit :
|
e@1
|
624 base,
|
e@1
|
625 with_file_name,
|
e@1
|
626 with_file_line,
|
e@1
|
627 with_column_name,
|
e@1
|
628 with_column_content{
|
e@1
|
629 void format_error_message()const{
|
e@1
|
630 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
|
e@1
|
631 "The integer \"%s\" contains an invalid digit in column \"%s\" in file \"%s\" in line \"%d\"."
|
e@1
|
632 , column_content, column_name, file_name, file_line);
|
e@1
|
633 }
|
e@1
|
634 };
|
e@1
|
635
|
e@1
|
636 struct integer_overflow :
|
e@1
|
637 base,
|
e@1
|
638 with_file_name,
|
e@1
|
639 with_file_line,
|
e@1
|
640 with_column_name,
|
e@1
|
641 with_column_content{
|
e@1
|
642 void format_error_message()const{
|
e@1
|
643 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
|
e@1
|
644 "The integer \"%s\" overflows in column \"%s\" in file \"%s\" in line \"%d\"."
|
e@1
|
645 , column_content, column_name, file_name, file_line);
|
e@1
|
646 }
|
e@1
|
647 };
|
e@1
|
648
|
e@1
|
649 struct integer_underflow :
|
e@1
|
650 base,
|
e@1
|
651 with_file_name,
|
e@1
|
652 with_file_line,
|
e@1
|
653 with_column_name,
|
e@1
|
654 with_column_content{
|
e@1
|
655 void format_error_message()const{
|
e@1
|
656 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
|
e@1
|
657 "The integer \"%s\" underflows in column \"%s\" in file \"%s\" in line \"%d\"."
|
e@1
|
658 , column_content, column_name, file_name, file_line);
|
e@1
|
659 }
|
e@1
|
660 };
|
e@1
|
661
|
e@1
|
662 struct invalid_single_character :
|
e@1
|
663 base,
|
e@1
|
664 with_file_name,
|
e@1
|
665 with_file_line,
|
e@1
|
666 with_column_name,
|
e@1
|
667 with_column_content{
|
e@1
|
668 void format_error_message()const{
|
e@1
|
669 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
|
e@1
|
670 "The content \"%s\" of column \"%s\" in file \"%s\" in line \"%d\" is not a single character."
|
e@1
|
671 , column_content, column_name, file_name, file_line);
|
e@1
|
672 }
|
e@1
|
673 };
|
e@1
|
674 }
|
e@1
|
675
|
e@1
|
676 typedef unsigned ignore_column;
|
e@1
|
677 static const ignore_column ignore_no_column = 0;
|
e@1
|
678 static const ignore_column ignore_extra_column = 1;
|
e@1
|
679 static const ignore_column ignore_missing_column = 2;
|
e@1
|
680
|
e@1
|
681 template<char ... trim_char_list>
|
e@1
|
682 struct trim_chars{
|
e@1
|
683 private:
|
e@1
|
684 constexpr static bool is_trim_char(char){
|
e@1
|
685 return false;
|
e@1
|
686 }
|
e@1
|
687
|
e@1
|
688 template<class ...OtherTrimChars>
|
e@1
|
689 constexpr static bool is_trim_char(char c, char trim_char, OtherTrimChars...other_trim_chars){
|
e@1
|
690 return c == trim_char || is_trim_char(c, other_trim_chars...);
|
e@1
|
691 }
|
e@1
|
692
|
e@1
|
693 public:
|
e@1
|
694 static void trim(char*&str_begin, char*&str_end){
|
e@1
|
695 while(is_trim_char(*str_begin, trim_char_list...) && str_begin != str_end)
|
e@1
|
696 ++str_begin;
|
e@1
|
697 while(is_trim_char(*(str_end-1), trim_char_list...) && str_begin != str_end)
|
e@1
|
698 --str_end;
|
e@1
|
699 *str_end = '\0';
|
e@1
|
700 }
|
e@1
|
701 };
|
e@1
|
702
|
e@1
|
703
|
e@1
|
704 struct no_comment{
|
e@1
|
705 static bool is_comment(const char*){
|
e@1
|
706 return false;
|
e@1
|
707 }
|
e@1
|
708 };
|
e@1
|
709
|
e@1
|
710 template<char ... comment_start_char_list>
|
e@1
|
711 struct single_line_comment{
|
e@1
|
712 private:
|
e@1
|
713 constexpr static bool is_comment_start_char(char){
|
e@1
|
714 return false;
|
e@1
|
715 }
|
e@1
|
716
|
e@1
|
717 template<class ...OtherCommentStartChars>
|
e@1
|
718 constexpr static bool is_comment_start_char(char c, char comment_start_char, OtherCommentStartChars...other_comment_start_chars){
|
e@1
|
719 return c == comment_start_char || is_comment_start_char(c, other_comment_start_chars...);
|
e@1
|
720 }
|
e@1
|
721
|
e@1
|
722 public:
|
e@1
|
723
|
e@1
|
724 static bool is_comment(const char*line){
|
e@1
|
725 return is_comment_start_char(*line, comment_start_char_list...);
|
e@1
|
726 }
|
e@1
|
727 };
|
e@1
|
728
|
e@1
|
729 struct empty_line_comment{
|
e@1
|
730 static bool is_comment(const char*line){
|
e@1
|
731 if(*line == '\0')
|
e@1
|
732 return true;
|
e@1
|
733 while(*line == ' ' || *line == '\t'){
|
e@1
|
734 ++line;
|
e@1
|
735 if(*line == 0)
|
e@1
|
736 return true;
|
e@1
|
737 }
|
e@1
|
738 return false;
|
e@1
|
739 }
|
e@1
|
740 };
|
e@1
|
741
|
e@1
|
742 template<char ... comment_start_char_list>
|
e@1
|
743 struct single_and_empty_line_comment{
|
e@1
|
744 static bool is_comment(const char*line){
|
e@1
|
745 return single_line_comment<comment_start_char_list...>::is_comment(line) || empty_line_comment::is_comment(line);
|
e@1
|
746 }
|
e@1
|
747 };
|
e@1
|
748
|
e@1
|
749 template<char sep>
|
e@1
|
750 struct no_quote_escape{
|
e@1
|
751 static const char*find_next_column_end(const char*col_begin){
|
e@1
|
752 while(*col_begin != sep && *col_begin != '\0')
|
e@1
|
753 ++col_begin;
|
e@1
|
754 return col_begin;
|
e@1
|
755 }
|
e@1
|
756
|
e@1
|
757 static void unescape(char*&, char*&){
|
e@1
|
758
|
e@1
|
759 }
|
e@1
|
760 };
|
e@1
|
761
|
e@1
|
762 template<char sep, char quote>
|
e@1
|
763 struct double_quote_escape{
|
e@1
|
764 static const char*find_next_column_end(const char*col_begin){
|
e@1
|
765 while(*col_begin != sep && *col_begin != '\0')
|
e@1
|
766 if(*col_begin != quote)
|
e@1
|
767 ++col_begin;
|
e@1
|
768 else{
|
e@1
|
769 do{
|
e@1
|
770 ++col_begin;
|
e@1
|
771 while(*col_begin != quote){
|
e@1
|
772 if(*col_begin == '\0')
|
e@1
|
773 throw error::escaped_string_not_closed();
|
e@1
|
774 ++col_begin;
|
e@1
|
775 }
|
e@1
|
776 ++col_begin;
|
e@1
|
777 }while(*col_begin == quote);
|
e@1
|
778 }
|
e@1
|
779 return col_begin;
|
e@1
|
780 }
|
e@1
|
781
|
e@1
|
782 static void unescape(char*&col_begin, char*&col_end){
|
e@1
|
783 if(col_end - col_begin >= 2){
|
e@1
|
784 if(*col_begin == quote && *(col_end-1) == quote){
|
e@1
|
785 ++col_begin;
|
e@1
|
786 --col_end;
|
e@1
|
787 char*out = col_begin;
|
e@1
|
788 for(char*in = col_begin; in!=col_end; ++in){
|
e@1
|
789 if(*in == quote && (in+1) != col_end && *(in+1) == quote){
|
e@1
|
790 ++in;
|
e@1
|
791 }
|
e@1
|
792 *out = *in;
|
e@1
|
793 ++out;
|
e@1
|
794 }
|
e@1
|
795 col_end = out;
|
e@1
|
796 *col_end = '\0';
|
e@1
|
797 }
|
e@1
|
798 }
|
e@1
|
799
|
e@1
|
800 }
|
e@1
|
801 };
|
e@1
|
802
|
e@1
|
803 struct throw_on_overflow{
|
e@1
|
804 template<class T>
|
e@1
|
805 static void on_overflow(T&){
|
e@1
|
806 throw error::integer_overflow();
|
e@1
|
807 }
|
e@1
|
808
|
e@1
|
809 template<class T>
|
e@1
|
810 static void on_underflow(T&){
|
e@1
|
811 throw error::integer_underflow();
|
e@1
|
812 }
|
e@1
|
813 };
|
e@1
|
814
|
e@1
|
815 struct ignore_overflow{
|
e@1
|
816 template<class T>
|
e@1
|
817 static void on_overflow(T&){}
|
e@1
|
818
|
e@1
|
819 template<class T>
|
e@1
|
820 static void on_underflow(T&){}
|
e@1
|
821 };
|
e@1
|
822
|
e@1
|
823 struct set_to_max_on_overflow{
|
e@1
|
824 template<class T>
|
e@1
|
825 static void on_overflow(T&x){
|
e@1
|
826 x = std::numeric_limits<T>::max();
|
e@1
|
827 }
|
e@1
|
828
|
e@1
|
829 template<class T>
|
e@1
|
830 static void on_underflow(T&x){
|
e@1
|
831 x = std::numeric_limits<T>::min();
|
e@1
|
832 }
|
e@1
|
833 };
|
e@1
|
834
|
e@1
|
835
|
e@1
|
836 namespace detail{
|
e@1
|
837 template<class quote_policy>
|
e@1
|
838 void chop_next_column(
|
e@1
|
839 char*&line, char*&col_begin, char*&col_end
|
e@1
|
840 ){
|
e@1
|
841 assert(line != nullptr);
|
e@1
|
842
|
e@1
|
843 col_begin = line;
|
e@1
|
844 // the col_begin + (... - col_begin) removes the constness
|
e@1
|
845 col_end = col_begin + (quote_policy::find_next_column_end(col_begin) - col_begin);
|
e@1
|
846
|
e@1
|
847 if(*col_end == '\0'){
|
e@1
|
848 line = nullptr;
|
e@1
|
849 }else{
|
e@1
|
850 *col_end = '\0';
|
e@1
|
851 line = col_end + 1;
|
e@1
|
852 }
|
e@1
|
853 }
|
e@1
|
854
|
e@1
|
855 template<class trim_policy, class quote_policy>
|
e@1
|
856 void parse_line(
|
e@1
|
857 char*line,
|
e@1
|
858 char**sorted_col,
|
e@1
|
859 const std::vector<int>&col_order
|
e@1
|
860 ){
|
e@1
|
861 for(std::size_t i=0; i<col_order.size(); ++i){
|
e@1
|
862 if(line == nullptr)
|
e@1
|
863 throw ::io::error::too_few_columns();
|
e@1
|
864 char*col_begin, *col_end;
|
e@1
|
865 chop_next_column<quote_policy>(line, col_begin, col_end);
|
e@1
|
866
|
e@1
|
867 if(col_order[i] != -1){
|
e@1
|
868 trim_policy::trim(col_begin, col_end);
|
e@1
|
869 quote_policy::unescape(col_begin, col_end);
|
e@1
|
870
|
e@1
|
871 sorted_col[col_order[i]] = col_begin;
|
e@1
|
872 }
|
e@1
|
873 }
|
e@1
|
874 if(line != nullptr)
|
e@1
|
875 throw ::io::error::too_many_columns();
|
e@1
|
876 }
|
e@1
|
877
|
e@1
|
878 template<unsigned column_count, class trim_policy, class quote_policy>
|
e@1
|
879 void parse_header_line(
|
e@1
|
880 char*line,
|
e@1
|
881 std::vector<int>&col_order,
|
e@1
|
882 const std::string*col_name,
|
e@1
|
883 ignore_column ignore_policy
|
e@1
|
884 ){
|
e@1
|
885 col_order.clear();
|
e@1
|
886
|
e@1
|
887 bool found[column_count];
|
e@1
|
888 std::fill(found, found + column_count, false);
|
e@1
|
889 while(line){
|
e@1
|
890 char*col_begin,*col_end;
|
e@1
|
891 chop_next_column<quote_policy>(line, col_begin, col_end);
|
e@1
|
892
|
e@1
|
893 trim_policy::trim(col_begin, col_end);
|
e@1
|
894 quote_policy::unescape(col_begin, col_end);
|
e@1
|
895
|
e@1
|
896 for(unsigned i=0; i<column_count; ++i)
|
e@1
|
897 if(col_begin == col_name[i]){
|
e@1
|
898 if(found[i]){
|
e@1
|
899 error::duplicated_column_in_header err;
|
e@1
|
900 err.set_column_name(col_begin);
|
e@1
|
901 throw err;
|
e@1
|
902 }
|
e@1
|
903 found[i] = true;
|
e@1
|
904 col_order.push_back(i);
|
e@1
|
905 col_begin = 0;
|
e@1
|
906 break;
|
e@1
|
907 }
|
e@1
|
908 if(col_begin){
|
e@1
|
909 if(ignore_policy & ::io::ignore_extra_column)
|
e@1
|
910 col_order.push_back(-1);
|
e@1
|
911 else{
|
e@1
|
912 error::extra_column_in_header err;
|
e@1
|
913 err.set_column_name(col_begin);
|
e@1
|
914 throw err;
|
e@1
|
915 }
|
e@1
|
916 }
|
e@1
|
917 }
|
e@1
|
918 if(!(ignore_policy & ::io::ignore_missing_column)){
|
e@1
|
919 for(unsigned i=0; i<column_count; ++i){
|
e@1
|
920 if(!found[i]){
|
e@1
|
921 error::missing_column_in_header err;
|
e@1
|
922 err.set_column_name(col_name[i].c_str());
|
e@1
|
923 throw err;
|
e@1
|
924 }
|
e@1
|
925 }
|
e@1
|
926 }
|
e@1
|
927 }
|
e@1
|
928
|
e@1
|
929 template<class overflow_policy>
|
e@1
|
930 void parse(char*col, char &x){
|
e@1
|
931 if(!*col)
|
e@1
|
932 throw error::invalid_single_character();
|
e@1
|
933 x = *col;
|
e@1
|
934 ++col;
|
e@1
|
935 if(*col)
|
e@1
|
936 throw error::invalid_single_character();
|
e@1
|
937 }
|
e@1
|
938
|
e@1
|
939 template<class overflow_policy>
|
e@1
|
940 void parse(char*col, std::string&x){
|
e@1
|
941 x = col;
|
e@1
|
942 }
|
e@1
|
943
|
e@1
|
944 template<class overflow_policy>
|
e@1
|
945 void parse(char*col, const char*&x){
|
e@1
|
946 x = col;
|
e@1
|
947 }
|
e@1
|
948
|
e@1
|
949 template<class overflow_policy>
|
e@1
|
950 void parse(char*col, char*&x){
|
e@1
|
951 x = col;
|
e@1
|
952 }
|
e@1
|
953
|
e@1
|
954 template<class overflow_policy, class T>
|
e@1
|
955 void parse_unsigned_integer(const char*col, T&x){
|
e@1
|
956 x = 0;
|
e@1
|
957 while(*col != '\0'){
|
e@1
|
958 if('0' <= *col && *col <= '9'){
|
e@1
|
959 T y = *col - '0';
|
e@1
|
960 if(x > (std::numeric_limits<T>::max()-y)/10){
|
e@1
|
961 overflow_policy::on_overflow(x);
|
e@1
|
962 return;
|
e@1
|
963 }
|
e@1
|
964 x = 10*x+y;
|
e@1
|
965 }else
|
e@1
|
966 throw error::no_digit();
|
e@1
|
967 ++col;
|
e@1
|
968 }
|
e@1
|
969 }
|
e@1
|
970
|
e@1
|
971 template<class overflow_policy>void parse(char*col, unsigned char &x)
|
e@1
|
972 {parse_unsigned_integer<overflow_policy>(col, x);}
|
e@1
|
973 template<class overflow_policy>void parse(char*col, unsigned short &x)
|
e@1
|
974 {parse_unsigned_integer<overflow_policy>(col, x);}
|
e@1
|
975 template<class overflow_policy>void parse(char*col, unsigned int &x)
|
e@1
|
976 {parse_unsigned_integer<overflow_policy>(col, x);}
|
e@1
|
977 template<class overflow_policy>void parse(char*col, unsigned long &x)
|
e@1
|
978 {parse_unsigned_integer<overflow_policy>(col, x);}
|
e@1
|
979 template<class overflow_policy>void parse(char*col, unsigned long long &x)
|
e@1
|
980 {parse_unsigned_integer<overflow_policy>(col, x);}
|
e@1
|
981
|
e@1
|
982 template<class overflow_policy, class T>
|
e@1
|
983 void parse_signed_integer(const char*col, T&x){
|
e@1
|
984 if(*col == '-'){
|
e@1
|
985 ++col;
|
e@1
|
986
|
e@1
|
987 x = 0;
|
e@1
|
988 while(*col != '\0'){
|
e@1
|
989 if('0' <= *col && *col <= '9'){
|
e@1
|
990 T y = *col - '0';
|
e@1
|
991 if(x < (std::numeric_limits<T>::min()+y)/10){
|
e@1
|
992 overflow_policy::on_underflow(x);
|
e@1
|
993 return;
|
e@1
|
994 }
|
e@1
|
995 x = 10*x-y;
|
e@1
|
996 }else
|
e@1
|
997 throw error::no_digit();
|
e@1
|
998 ++col;
|
e@1
|
999 }
|
e@1
|
1000 return;
|
e@1
|
1001 }else if(*col == '+')
|
e@1
|
1002 ++col;
|
e@1
|
1003 parse_unsigned_integer<overflow_policy>(col, x);
|
e@1
|
1004 }
|
e@1
|
1005
|
e@1
|
1006 template<class overflow_policy>void parse(char*col, signed char &x)
|
e@1
|
1007 {parse_signed_integer<overflow_policy>(col, x);}
|
e@1
|
1008 template<class overflow_policy>void parse(char*col, signed short &x)
|
e@1
|
1009 {parse_signed_integer<overflow_policy>(col, x);}
|
e@1
|
1010 template<class overflow_policy>void parse(char*col, signed int &x)
|
e@1
|
1011 {parse_signed_integer<overflow_policy>(col, x);}
|
e@1
|
1012 template<class overflow_policy>void parse(char*col, signed long &x)
|
e@1
|
1013 {parse_signed_integer<overflow_policy>(col, x);}
|
e@1
|
1014 template<class overflow_policy>void parse(char*col, signed long long &x)
|
e@1
|
1015 {parse_signed_integer<overflow_policy>(col, x);}
|
e@1
|
1016
|
e@1
|
1017 template<class T>
|
e@1
|
1018 void parse_float(const char*col, T&x){
|
e@1
|
1019 bool is_neg = false;
|
e@1
|
1020 if(*col == '-'){
|
e@1
|
1021 is_neg = true;
|
e@1
|
1022 ++col;
|
e@1
|
1023 }else if(*col == '+')
|
e@1
|
1024 ++col;
|
e@1
|
1025
|
e@1
|
1026 x = 0;
|
e@1
|
1027 while('0' <= *col && *col <= '9'){
|
e@1
|
1028 int y = *col - '0';
|
e@1
|
1029 x *= 10;
|
e@1
|
1030 x += y;
|
e@1
|
1031 ++col;
|
e@1
|
1032 }
|
e@1
|
1033
|
e@1
|
1034 if(*col == '.'|| *col == ','){
|
e@1
|
1035 ++col;
|
e@1
|
1036 T pos = 1;
|
e@1
|
1037 while('0' <= *col && *col <= '9'){
|
e@1
|
1038 pos /= 10;
|
e@1
|
1039 int y = *col - '0';
|
e@1
|
1040 ++col;
|
e@1
|
1041 x += y*pos;
|
e@1
|
1042 }
|
e@1
|
1043 }
|
e@1
|
1044
|
e@1
|
1045 if(*col == 'e' || *col == 'E'){
|
e@1
|
1046 ++col;
|
e@1
|
1047 int e;
|
e@1
|
1048
|
e@1
|
1049 parse_signed_integer<set_to_max_on_overflow>(col, e);
|
e@1
|
1050
|
e@1
|
1051 if(e != 0){
|
e@1
|
1052 T base;
|
e@1
|
1053 if(e < 0){
|
e@1
|
1054 base = 0.1;
|
e@1
|
1055 e = -e;
|
e@1
|
1056 }else{
|
e@1
|
1057 base = 10;
|
e@1
|
1058 }
|
e@1
|
1059
|
e@1
|
1060 while(e != 1){
|
e@1
|
1061 if((e & 1) == 0){
|
e@1
|
1062 base = base*base;
|
e@1
|
1063 e >>= 1;
|
e@1
|
1064 }else{
|
e@1
|
1065 x *= base;
|
e@1
|
1066 --e;
|
e@1
|
1067 }
|
e@1
|
1068 }
|
e@1
|
1069 x *= base;
|
e@1
|
1070 }
|
e@1
|
1071 }else{
|
e@1
|
1072 if(*col != '\0')
|
e@1
|
1073 throw error::no_digit();
|
e@1
|
1074 }
|
e@1
|
1075
|
e@1
|
1076 if(is_neg)
|
e@1
|
1077 x = -x;
|
e@1
|
1078 }
|
e@1
|
1079
|
e@1
|
1080 template<class overflow_policy> void parse(char*col, float&x) { parse_float(col, x); }
|
e@1
|
1081 template<class overflow_policy> void parse(char*col, double&x) { parse_float(col, x); }
|
e@1
|
1082 template<class overflow_policy> void parse(char*col, long double&x) { parse_float(col, x); }
|
e@1
|
1083
|
e@1
|
1084 template<class overflow_policy, class T>
|
e@1
|
1085 void parse(char*col, T&x){
|
e@1
|
1086 // GCC evalutes "false" when reading the template and
|
e@1
|
1087 // "sizeof(T)!=sizeof(T)" only when instantiating it. This is why
|
e@1
|
1088 // this strange construct is used.
|
e@1
|
1089 static_assert(sizeof(T)!=sizeof(T),
|
e@1
|
1090 "Can not parse this type. Only buildin integrals, floats, char, char*, const char* and std::string are supported");
|
e@1
|
1091 }
|
e@1
|
1092
|
e@1
|
1093 }
|
e@1
|
1094
|
e@1
|
1095 template<unsigned column_count,
|
e@1
|
1096 class trim_policy = trim_chars<' ', '\t'>,
|
e@1
|
1097 class quote_policy = no_quote_escape<','>,
|
e@1
|
1098 class overflow_policy = throw_on_overflow,
|
e@1
|
1099 class comment_policy = no_comment
|
e@1
|
1100 >
|
e@1
|
1101 class CSVReader{
|
e@1
|
1102 private:
|
e@1
|
1103 LineReader in;
|
e@1
|
1104
|
e@1
|
1105 char*(row[column_count]);
|
e@1
|
1106 std::string column_names[column_count];
|
e@1
|
1107
|
e@1
|
1108 std::vector<int>col_order;
|
e@1
|
1109
|
e@1
|
1110 template<class ...ColNames>
|
e@1
|
1111 void set_column_names(std::string s, ColNames...cols){
|
e@1
|
1112 column_names[column_count-sizeof...(ColNames)-1] = std::move(s);
|
e@1
|
1113 set_column_names(std::forward<ColNames>(cols)...);
|
e@1
|
1114 }
|
e@1
|
1115
|
e@1
|
1116 void set_column_names(){}
|
e@1
|
1117
|
e@1
|
1118
|
e@1
|
1119 public:
|
e@1
|
1120 CSVReader() = delete;
|
e@1
|
1121 CSVReader(const CSVReader&) = delete;
|
e@1
|
1122 CSVReader&operator=(const CSVReader&);
|
e@1
|
1123
|
e@1
|
1124 template<class ...Args>
|
e@1
|
1125 explicit CSVReader(Args&&...args):in(std::forward<Args>(args)...){
|
e@1
|
1126 std::fill(row, row+column_count, nullptr);
|
e@1
|
1127 col_order.resize(column_count);
|
e@1
|
1128 for(unsigned i=0; i<column_count; ++i)
|
e@1
|
1129 col_order[i] = i;
|
e@1
|
1130 for(unsigned i=1; i<=column_count; ++i)
|
e@1
|
1131 column_names[i-1] = "col"+std::to_string(i);
|
e@1
|
1132 }
|
e@1
|
1133
|
e@1
|
1134 char*next_line(){
|
e@1
|
1135 return in.next_line();
|
e@1
|
1136 }
|
e@1
|
1137
|
e@1
|
1138 template<class ...ColNames>
|
e@1
|
1139 void read_header(ignore_column ignore_policy, ColNames...cols){
|
e@1
|
1140 static_assert(sizeof...(ColNames)>=column_count, "not enough column names specified");
|
e@1
|
1141 static_assert(sizeof...(ColNames)<=column_count, "too many column names specified");
|
e@1
|
1142 try{
|
e@1
|
1143 set_column_names(std::forward<ColNames>(cols)...);
|
e@1
|
1144
|
e@1
|
1145 char*line;
|
e@1
|
1146 do{
|
e@1
|
1147 line = in.next_line();
|
e@1
|
1148 if(!line)
|
e@1
|
1149 throw error::header_missing();
|
e@1
|
1150 }while(comment_policy::is_comment(line));
|
e@1
|
1151
|
e@1
|
1152 detail::parse_header_line
|
e@1
|
1153 <column_count, trim_policy, quote_policy>
|
e@1
|
1154 (line, col_order, column_names, ignore_policy);
|
e@1
|
1155 }catch(error::with_file_name&err){
|
e@1
|
1156 err.set_file_name(in.get_truncated_file_name());
|
e@1
|
1157 throw;
|
e@1
|
1158 }
|
e@1
|
1159 }
|
e@1
|
1160
|
e@1
|
1161 template<class ...ColNames>
|
e@1
|
1162 void set_header(ColNames...cols){
|
e@1
|
1163 static_assert(sizeof...(ColNames)>=column_count,
|
e@1
|
1164 "not enough column names specified");
|
e@1
|
1165 static_assert(sizeof...(ColNames)<=column_count,
|
e@1
|
1166 "too many column names specified");
|
e@1
|
1167 set_column_names(std::forward<ColNames>(cols)...);
|
e@1
|
1168 std::fill(row, row+column_count, nullptr);
|
e@1
|
1169 col_order.resize(column_count);
|
e@1
|
1170 for(unsigned i=0; i<column_count; ++i)
|
e@1
|
1171 col_order[i] = i;
|
e@1
|
1172 }
|
e@1
|
1173
|
e@1
|
1174 bool has_column(const std::string&name) const {
|
e@1
|
1175 return col_order.end() != std::find(
|
e@1
|
1176 col_order.begin(), col_order.end(),
|
e@1
|
1177 std::find(std::begin(column_names), std::end(column_names), name)
|
e@1
|
1178 - std::begin(column_names));
|
e@1
|
1179 }
|
e@1
|
1180
|
e@1
|
1181 void set_file_name(const std::string&file_name){
|
e@1
|
1182 in.set_file_name(file_name);
|
e@1
|
1183 }
|
e@1
|
1184
|
e@1
|
1185 void set_file_name(const char*file_name){
|
e@1
|
1186 in.set_file_name(file_name);
|
e@1
|
1187 }
|
e@1
|
1188
|
e@1
|
1189 const char*get_truncated_file_name()const{
|
e@1
|
1190 return in.get_truncated_file_name();
|
e@1
|
1191 }
|
e@1
|
1192
|
e@1
|
1193 void set_file_line(unsigned file_line){
|
e@1
|
1194 in.set_file_line(file_line);
|
e@1
|
1195 }
|
e@1
|
1196
|
e@1
|
1197 unsigned get_file_line()const{
|
e@1
|
1198 return in.get_file_line();
|
e@1
|
1199 }
|
e@1
|
1200
|
e@1
|
1201 private:
|
e@1
|
1202 void parse_helper(std::size_t){}
|
e@1
|
1203
|
e@1
|
1204 template<class T, class ...ColType>
|
e@1
|
1205 void parse_helper(std::size_t r, T&t, ColType&...cols){
|
e@1
|
1206 if(row[r]){
|
e@1
|
1207 try{
|
e@1
|
1208 try{
|
e@1
|
1209 ::io::detail::parse<overflow_policy>(row[r], t);
|
e@1
|
1210 }catch(error::with_column_content&err){
|
e@1
|
1211 err.set_column_content(row[r]);
|
e@1
|
1212 throw;
|
e@1
|
1213 }
|
e@1
|
1214 }catch(error::with_column_name&err){
|
e@1
|
1215 err.set_column_name(column_names[r].c_str());
|
e@1
|
1216 throw;
|
e@1
|
1217 }
|
e@1
|
1218 }
|
e@1
|
1219 parse_helper(r+1, cols...);
|
e@1
|
1220 }
|
e@1
|
1221
|
e@1
|
1222
|
e@1
|
1223 public:
|
e@1
|
1224 template<class ...ColType>
|
e@1
|
1225 bool read_row(ColType& ...cols){
|
e@1
|
1226 static_assert(sizeof...(ColType)>=column_count,
|
e@1
|
1227 "not enough columns specified");
|
e@1
|
1228 static_assert(sizeof...(ColType)<=column_count,
|
e@1
|
1229 "too many columns specified");
|
e@1
|
1230 try{
|
e@1
|
1231 try{
|
e@1
|
1232
|
e@1
|
1233 char*line;
|
e@1
|
1234 do{
|
e@1
|
1235 line = in.next_line();
|
e@1
|
1236 if(!line)
|
e@1
|
1237 return false;
|
e@1
|
1238 }while(comment_policy::is_comment(line));
|
e@1
|
1239
|
e@1
|
1240 detail::parse_line<trim_policy, quote_policy>
|
e@1
|
1241 (line, row, col_order);
|
e@1
|
1242
|
e@1
|
1243 parse_helper(0, cols...);
|
e@1
|
1244 }catch(error::with_file_name&err){
|
e@1
|
1245 err.set_file_name(in.get_truncated_file_name());
|
e@1
|
1246 throw;
|
e@1
|
1247 }
|
e@1
|
1248 }catch(error::with_file_line&err){
|
e@1
|
1249 err.set_file_line(in.get_file_line());
|
e@1
|
1250 throw;
|
e@1
|
1251 }
|
e@1
|
1252
|
e@1
|
1253 return true;
|
e@1
|
1254 }
|
e@1
|
1255 };
|
e@1
|
1256 }
|
e@1
|
1257 #endif
|
e@1
|
1258
|