Chris@16
|
1 #ifndef DATE_TIME_TZ_DB_BASE_HPP__
|
Chris@16
|
2 #define DATE_TIME_TZ_DB_BASE_HPP__
|
Chris@16
|
3
|
Chris@16
|
4 /* Copyright (c) 2003-2005 CrystalClear Software, Inc.
|
Chris@16
|
5 * Subject to the Boost Software License, Version 1.0.
|
Chris@16
|
6 * (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
|
Chris@16
|
7 * Author: Jeff Garland, Bart Garst
|
Chris@101
|
8 * $Date$
|
Chris@16
|
9 */
|
Chris@16
|
10
|
Chris@16
|
11 #include <map>
|
Chris@16
|
12 #include <vector>
|
Chris@16
|
13 #include <string>
|
Chris@16
|
14 #include <sstream>
|
Chris@16
|
15 #include <fstream>
|
Chris@16
|
16 #include <stdexcept>
|
Chris@16
|
17 #include <boost/tokenizer.hpp>
|
Chris@16
|
18 #include <boost/shared_ptr.hpp>
|
Chris@16
|
19 #include <boost/throw_exception.hpp>
|
Chris@16
|
20 #include <boost/date_time/compiler_config.hpp>
|
Chris@16
|
21 #include <boost/date_time/time_zone_names.hpp>
|
Chris@16
|
22 #include <boost/date_time/time_zone_base.hpp>
|
Chris@16
|
23 #include <boost/date_time/time_parsing.hpp>
|
Chris@16
|
24
|
Chris@16
|
25 namespace boost {
|
Chris@16
|
26 namespace date_time {
|
Chris@16
|
27
|
Chris@16
|
28 //! Exception thrown when tz database cannot locate requested data file
|
Chris@16
|
29 class data_not_accessible : public std::logic_error
|
Chris@16
|
30 {
|
Chris@16
|
31 public:
|
Chris@16
|
32 data_not_accessible() :
|
Chris@16
|
33 std::logic_error(std::string("Unable to locate or access the required datafile."))
|
Chris@16
|
34 {}
|
Chris@16
|
35 data_not_accessible(const std::string& filespec) :
|
Chris@16
|
36 std::logic_error(std::string("Unable to locate or access the required datafile. Filespec: " + filespec))
|
Chris@16
|
37 {}
|
Chris@16
|
38 };
|
Chris@16
|
39
|
Chris@16
|
40 //! Exception thrown when tz database locates incorrect field structure in data file
|
Chris@16
|
41 class bad_field_count : public std::out_of_range
|
Chris@16
|
42 {
|
Chris@16
|
43 public:
|
Chris@16
|
44 bad_field_count(const std::string& s) :
|
Chris@16
|
45 std::out_of_range(s)
|
Chris@16
|
46 {}
|
Chris@16
|
47 };
|
Chris@16
|
48
|
Chris@16
|
49 //! Creates a database of time_zones from csv datafile
|
Chris@16
|
50 /*! The csv file containing the zone_specs used by the
|
Chris@16
|
51 * tz_db_base is intended to be customized by the
|
Chris@16
|
52 * library user. When customizing this file (or creating your own) the
|
Chris@16
|
53 * file must follow a specific format.
|
Chris@16
|
54 *
|
Chris@16
|
55 * This first line is expected to contain column headings and is therefore
|
Chris@16
|
56 * not processed by the tz_db_base.
|
Chris@16
|
57 *
|
Chris@16
|
58 * Each record (line) must have eleven fields. Some of those fields can
|
Chris@16
|
59 * be empty. Every field (even empty ones) must be enclosed in
|
Chris@16
|
60 * double-quotes.
|
Chris@16
|
61 * Ex:
|
Chris@16
|
62 * @code
|
Chris@16
|
63 * "America/Phoenix" <- string enclosed in quotes
|
Chris@16
|
64 * "" <- empty field
|
Chris@16
|
65 * @endcode
|
Chris@16
|
66 *
|
Chris@16
|
67 * Some fields represent a length of time. The format of these fields
|
Chris@16
|
68 * must be:
|
Chris@16
|
69 * @code
|
Chris@16
|
70 * "{+|-}hh:mm[:ss]" <- length-of-time format
|
Chris@16
|
71 * @endcode
|
Chris@16
|
72 * Where the plus or minus is mandatory and the seconds are optional.
|
Chris@16
|
73 *
|
Chris@16
|
74 * Since some time zones do not use daylight savings it is not always
|
Chris@16
|
75 * necessary for every field in a zone_spec to contain a value. All
|
Chris@16
|
76 * zone_specs must have at least ID and GMT offset. Zones that use
|
Chris@16
|
77 * daylight savings must have all fields filled except:
|
Chris@16
|
78 * STD ABBR, STD NAME, DST NAME. You should take note
|
Chris@16
|
79 * that DST ABBR is mandatory for zones that use daylight savings
|
Chris@16
|
80 * (see field descriptions for further details).
|
Chris@16
|
81 *
|
Chris@16
|
82 * ******* Fields and their description/details *********
|
Chris@16
|
83 *
|
Chris@16
|
84 * ID:
|
Chris@16
|
85 * Contains the identifying string for the zone_spec. Any string will
|
Chris@16
|
86 * do as long as it's unique. No two ID's can be the same.
|
Chris@16
|
87 *
|
Chris@16
|
88 * STD ABBR:
|
Chris@16
|
89 * STD NAME:
|
Chris@16
|
90 * DST ABBR:
|
Chris@16
|
91 * DST NAME:
|
Chris@16
|
92 * These four are all the names and abbreviations used by the time
|
Chris@16
|
93 * zone being described. While any string will do in these fields,
|
Chris@16
|
94 * care should be taken. These fields hold the strings that will be
|
Chris@16
|
95 * used in the output of many of the local_time classes.
|
Chris@16
|
96 * Ex:
|
Chris@16
|
97 * @code
|
Chris@16
|
98 * time_zone nyc = tz_db.time_zone_from_region("America/New_York");
|
Chris@16
|
99 * local_time ny_time(date(2004, Aug, 30), IS_DST, nyc);
|
Chris@16
|
100 * cout << ny_time.to_long_string() << endl;
|
Chris@16
|
101 * // 2004-Aug-30 00:00:00 Eastern Daylight Time
|
Chris@16
|
102 * cout << ny_time.to_short_string() << endl;
|
Chris@16
|
103 * // 2004-Aug-30 00:00:00 EDT
|
Chris@16
|
104 * @endcode
|
Chris@16
|
105 *
|
Chris@16
|
106 * NOTE: The exact format/function names may vary - see local_time
|
Chris@16
|
107 * documentation for further details.
|
Chris@16
|
108 *
|
Chris@16
|
109 * GMT offset:
|
Chris@16
|
110 * This is the number of hours added to utc to get the local time
|
Chris@16
|
111 * before any daylight savings adjustments are made. Some examples
|
Chris@16
|
112 * are: America/New_York offset -5 hours, & Africa/Cairo offset +2 hours.
|
Chris@16
|
113 * The format must follow the length-of-time format described above.
|
Chris@16
|
114 *
|
Chris@16
|
115 * DST adjustment:
|
Chris@16
|
116 * The amount of time added to gmt_offset when daylight savings is in
|
Chris@16
|
117 * effect. The format must follow the length-of-time format described
|
Chris@16
|
118 * above.
|
Chris@16
|
119 *
|
Chris@16
|
120 * DST Start Date rule:
|
Chris@16
|
121 * This is a specially formatted string that describes the day of year
|
Chris@16
|
122 * in which the transition take place. It holds three fields of it's own,
|
Chris@16
|
123 * separated by semicolons.
|
Chris@16
|
124 * The first field indicates the "nth" weekday of the month. The possible
|
Chris@16
|
125 * values are: 1 (first), 2 (second), 3 (third), 4 (fourth), 5 (fifth),
|
Chris@16
|
126 * and -1 (last).
|
Chris@16
|
127 * The second field indicates the day-of-week from 0-6 (Sun=0).
|
Chris@16
|
128 * The third field indicates the month from 1-12 (Jan=1).
|
Chris@16
|
129 *
|
Chris@16
|
130 * Examples are: "-1;5;9"="Last Friday of September",
|
Chris@16
|
131 * "2;1;3"="Second Monday of March"
|
Chris@16
|
132 *
|
Chris@16
|
133 * Start time:
|
Chris@16
|
134 * Start time is the number of hours past midnight, on the day of the
|
Chris@16
|
135 * start transition, the transition takes place. More simply put, the
|
Chris@16
|
136 * time of day the transition is made (in 24 hours format). The format
|
Chris@16
|
137 * must follow the length-of-time format described above with the
|
Chris@16
|
138 * exception that it must always be positive.
|
Chris@16
|
139 *
|
Chris@16
|
140 * DST End date rule:
|
Chris@16
|
141 * See DST Start date rule. The difference here is this is the day
|
Chris@16
|
142 * daylight savings ends (transition to STD).
|
Chris@16
|
143 *
|
Chris@16
|
144 * End time:
|
Chris@16
|
145 * Same as Start time.
|
Chris@16
|
146 */
|
Chris@16
|
147 template<class time_zone_type, class rule_type>
|
Chris@16
|
148 class tz_db_base {
|
Chris@16
|
149 public:
|
Chris@16
|
150 /* Having CharT as a template parameter created problems
|
Chris@16
|
151 * with posix_time::duration_from_string. Templatizing
|
Chris@16
|
152 * duration_from_string was not possible at this time, however,
|
Chris@16
|
153 * it should be possible in the future (when poor compilers get
|
Chris@16
|
154 * fixed or stop being used).
|
Chris@16
|
155 * Since this class was designed to use CharT as a parameter it
|
Chris@16
|
156 * is simply typedef'd here to ease converting in back to a
|
Chris@16
|
157 * parameter the future */
|
Chris@16
|
158 typedef char char_type;
|
Chris@16
|
159
|
Chris@16
|
160 typedef typename time_zone_type::base_type time_zone_base_type;
|
Chris@16
|
161 typedef typename time_zone_type::time_duration_type time_duration_type;
|
Chris@16
|
162 typedef time_zone_names_base<char_type> time_zone_names;
|
Chris@16
|
163 typedef boost::date_time::dst_adjustment_offsets<time_duration_type> dst_adjustment_offsets;
|
Chris@16
|
164 typedef std::basic_string<char_type> string_type;
|
Chris@16
|
165
|
Chris@16
|
166 //! Constructs an empty database
|
Chris@16
|
167 tz_db_base() {}
|
Chris@16
|
168
|
Chris@16
|
169 //! Process csv data file, may throw exceptions
|
Chris@16
|
170 /*! May throw bad_field_count exceptions */
|
Chris@16
|
171 void load_from_stream(std::istream &in)
|
Chris@16
|
172 {
|
Chris@16
|
173 std::string buff;
|
Chris@16
|
174 while( std::getline(in, buff)) {
|
Chris@16
|
175 parse_string(buff);
|
Chris@16
|
176 }
|
Chris@16
|
177 }
|
Chris@16
|
178
|
Chris@16
|
179 //! Process csv data file, may throw exceptions
|
Chris@16
|
180 /*! May throw data_not_accessible, or bad_field_count exceptions */
|
Chris@16
|
181 void load_from_file(const std::string& pathspec)
|
Chris@16
|
182 {
|
Chris@16
|
183 std::string buff;
|
Chris@16
|
184
|
Chris@16
|
185 std::ifstream ifs(pathspec.c_str());
|
Chris@16
|
186 if(!ifs){
|
Chris@16
|
187 boost::throw_exception(data_not_accessible(pathspec));
|
Chris@16
|
188 }
|
Chris@16
|
189 std::getline(ifs, buff); // first line is column headings
|
Chris@16
|
190 this->load_from_stream(ifs);
|
Chris@16
|
191 }
|
Chris@16
|
192
|
Chris@16
|
193 //! returns true if record successfully added to map
|
Chris@16
|
194 /*! Takes a region name in the form of "America/Phoenix", and a
|
Chris@16
|
195 * time_zone object for that region. The id string must be a unique
|
Chris@16
|
196 * name that does not already exist in the database. */
|
Chris@16
|
197 bool add_record(const string_type& region,
|
Chris@16
|
198 boost::shared_ptr<time_zone_base_type> tz)
|
Chris@16
|
199 {
|
Chris@16
|
200 typename map_type::value_type p(region, tz);
|
Chris@16
|
201 return (m_zone_map.insert(p)).second;
|
Chris@16
|
202 }
|
Chris@16
|
203
|
Chris@16
|
204 //! Returns a time_zone object built from the specs for the given region
|
Chris@16
|
205 /*! Returns a time_zone object built from the specs for the given
|
Chris@16
|
206 * region. If region does not exist a local_time::record_not_found
|
Chris@16
|
207 * exception will be thrown */
|
Chris@16
|
208 boost::shared_ptr<time_zone_base_type>
|
Chris@16
|
209 time_zone_from_region(const string_type& region) const
|
Chris@16
|
210 {
|
Chris@16
|
211 // get the record
|
Chris@16
|
212 typename map_type::const_iterator record = m_zone_map.find(region);
|
Chris@16
|
213 if(record == m_zone_map.end()){
|
Chris@16
|
214 return boost::shared_ptr<time_zone_base_type>(); //null pointer
|
Chris@16
|
215 }
|
Chris@16
|
216 return record->second;
|
Chris@16
|
217 }
|
Chris@16
|
218
|
Chris@16
|
219 //! Returns a vector of strings holding the time zone regions in the database
|
Chris@16
|
220 std::vector<std::string> region_list() const
|
Chris@16
|
221 {
|
Chris@16
|
222 typedef std::vector<std::string> vector_type;
|
Chris@16
|
223 vector_type regions;
|
Chris@16
|
224 typename map_type::const_iterator itr = m_zone_map.begin();
|
Chris@16
|
225 while(itr != m_zone_map.end()) {
|
Chris@16
|
226 regions.push_back(itr->first);
|
Chris@16
|
227 ++itr;
|
Chris@16
|
228 }
|
Chris@16
|
229 return regions;
|
Chris@16
|
230 }
|
Chris@16
|
231
|
Chris@16
|
232 private:
|
Chris@16
|
233 typedef std::map<string_type, boost::shared_ptr<time_zone_base_type> > map_type;
|
Chris@16
|
234 map_type m_zone_map;
|
Chris@16
|
235
|
Chris@16
|
236 // start and end rule are of the same type
|
Chris@16
|
237 typedef typename rule_type::start_rule::week_num week_num;
|
Chris@16
|
238
|
Chris@16
|
239 /* TODO: mechanisms need to be put in place to handle different
|
Chris@16
|
240 * types of rule specs. parse_rules() only handles nth_kday
|
Chris@16
|
241 * rule types. */
|
Chris@16
|
242
|
Chris@16
|
243 //! parses rule specs for transition day rules
|
Chris@16
|
244 rule_type* parse_rules(const string_type& sr, const string_type& er) const
|
Chris@16
|
245 {
|
Chris@16
|
246 using namespace gregorian;
|
Chris@16
|
247 // start and end rule are of the same type,
|
Chris@16
|
248 // both are included here for readability
|
Chris@16
|
249 typedef typename rule_type::start_rule start_rule;
|
Chris@16
|
250 typedef typename rule_type::end_rule end_rule;
|
Chris@16
|
251
|
Chris@16
|
252 // these are: [start|end] nth, day, month
|
Chris@16
|
253 int s_nth = 0, s_d = 0, s_m = 0;
|
Chris@16
|
254 int e_nth = 0, e_d = 0, e_m = 0;
|
Chris@16
|
255 split_rule_spec(s_nth, s_d, s_m, sr);
|
Chris@16
|
256 split_rule_spec(e_nth, e_d, e_m, er);
|
Chris@16
|
257
|
Chris@16
|
258 typename start_rule::week_num s_wn, e_wn;
|
Chris@16
|
259 s_wn = get_week_num(s_nth);
|
Chris@16
|
260 e_wn = get_week_num(e_nth);
|
Chris@16
|
261
|
Chris@16
|
262
|
Chris@101
|
263 return new rule_type(start_rule(s_wn,
|
Chris@101
|
264 static_cast<unsigned short>(s_d),
|
Chris@101
|
265 static_cast<unsigned short>(s_m)),
|
Chris@101
|
266 end_rule(e_wn,
|
Chris@101
|
267 static_cast<unsigned short>(e_d),
|
Chris@101
|
268 static_cast<unsigned short>(e_m)));
|
Chris@16
|
269 }
|
Chris@16
|
270 //! helper function for parse_rules()
|
Chris@16
|
271 week_num get_week_num(int nth) const
|
Chris@16
|
272 {
|
Chris@16
|
273 typedef typename rule_type::start_rule start_rule;
|
Chris@16
|
274 switch(nth){
|
Chris@16
|
275 case 1:
|
Chris@16
|
276 return start_rule::first;
|
Chris@16
|
277 case 2:
|
Chris@16
|
278 return start_rule::second;
|
Chris@16
|
279 case 3:
|
Chris@16
|
280 return start_rule::third;
|
Chris@16
|
281 case 4:
|
Chris@16
|
282 return start_rule::fourth;
|
Chris@16
|
283 case 5:
|
Chris@16
|
284 case -1:
|
Chris@16
|
285 return start_rule::fifth;
|
Chris@16
|
286 default:
|
Chris@16
|
287 // shouldn't get here - add error handling later
|
Chris@16
|
288 break;
|
Chris@16
|
289 }
|
Chris@16
|
290 return start_rule::fifth; // silence warnings
|
Chris@16
|
291 }
|
Chris@16
|
292
|
Chris@16
|
293 //! splits the [start|end]_date_rule string into 3 ints
|
Chris@16
|
294 void split_rule_spec(int& nth, int& d, int& m, string_type rule) const
|
Chris@16
|
295 {
|
Chris@16
|
296 typedef boost::char_separator<char_type, std::char_traits<char_type> > char_separator_type;
|
Chris@16
|
297 typedef boost::tokenizer<char_separator_type,
|
Chris@16
|
298 std::basic_string<char_type>::const_iterator,
|
Chris@16
|
299 std::basic_string<char_type> > tokenizer;
|
Chris@16
|
300 typedef boost::tokenizer<char_separator_type,
|
Chris@16
|
301 std::basic_string<char_type>::const_iterator,
|
Chris@16
|
302 std::basic_string<char_type> >::iterator tokenizer_iterator;
|
Chris@16
|
303
|
Chris@16
|
304 const char_type sep_char[] = { ';', '\0'};
|
Chris@16
|
305 char_separator_type sep(sep_char);
|
Chris@16
|
306 tokenizer tokens(rule, sep); // 3 fields
|
Chris@16
|
307
|
Chris@16
|
308 if ( std::distance ( tokens.begin(), tokens.end ()) != 3 ) {
|
Chris@16
|
309 std::ostringstream msg;
|
Chris@16
|
310 msg << "Expecting 3 fields, got "
|
Chris@16
|
311 << std::distance ( tokens.begin(), tokens.end ())
|
Chris@16
|
312 << " fields in line: " << rule;
|
Chris@16
|
313 boost::throw_exception(bad_field_count(msg.str()));
|
Chris@16
|
314 }
|
Chris@16
|
315
|
Chris@16
|
316 tokenizer_iterator tok_iter = tokens.begin();
|
Chris@16
|
317 nth = std::atoi(tok_iter->c_str()); ++tok_iter;
|
Chris@16
|
318 d = std::atoi(tok_iter->c_str()); ++tok_iter;
|
Chris@16
|
319 m = std::atoi(tok_iter->c_str());
|
Chris@16
|
320 }
|
Chris@16
|
321
|
Chris@16
|
322
|
Chris@16
|
323 //! Take a line from the csv, turn it into a time_zone_type.
|
Chris@16
|
324 /*! Take a line from the csv, turn it into a time_zone_type,
|
Chris@16
|
325 * and add it to the map. Zone_specs in csv file are expected to
|
Chris@16
|
326 * have eleven fields that describe the time zone. Returns true if
|
Chris@16
|
327 * zone_spec successfully added to database */
|
Chris@16
|
328 bool parse_string(string_type& s)
|
Chris@16
|
329 {
|
Chris@16
|
330 std::vector<string_type> result;
|
Chris@16
|
331 typedef boost::token_iterator_generator<boost::escaped_list_separator<char_type>, string_type::const_iterator, string_type >::type token_iter_type;
|
Chris@16
|
332
|
Chris@16
|
333 token_iter_type i = boost::make_token_iterator<string_type>(s.begin(), s.end(),boost::escaped_list_separator<char_type>());
|
Chris@16
|
334
|
Chris@16
|
335 token_iter_type end;
|
Chris@16
|
336 while (i != end) {
|
Chris@16
|
337 result.push_back(*i);
|
Chris@16
|
338 i++;
|
Chris@16
|
339 }
|
Chris@16
|
340
|
Chris@16
|
341 enum db_fields { ID, STDABBR, STDNAME, DSTABBR, DSTNAME, GMTOFFSET,
|
Chris@16
|
342 DSTADJUST, START_DATE_RULE, START_TIME, END_DATE_RULE,
|
Chris@16
|
343 END_TIME, FIELD_COUNT };
|
Chris@16
|
344
|
Chris@16
|
345 //take a shot at fixing gcc 4.x error
|
Chris@16
|
346 const unsigned int expected_fields = static_cast<unsigned int>(FIELD_COUNT);
|
Chris@16
|
347 if (result.size() != expected_fields) {
|
Chris@16
|
348 std::ostringstream msg;
|
Chris@16
|
349 msg << "Expecting " << FIELD_COUNT << " fields, got "
|
Chris@16
|
350 << result.size() << " fields in line: " << s;
|
Chris@16
|
351 boost::throw_exception(bad_field_count(msg.str()));
|
Chris@16
|
352 BOOST_DATE_TIME_UNREACHABLE_EXPRESSION(return false); // should never reach
|
Chris@16
|
353 }
|
Chris@16
|
354
|
Chris@16
|
355 // initializations
|
Chris@16
|
356 bool has_dst = true;
|
Chris@16
|
357 if(result[DSTABBR] == std::string()){
|
Chris@16
|
358 has_dst = false;
|
Chris@16
|
359 }
|
Chris@16
|
360
|
Chris@16
|
361
|
Chris@16
|
362 // start building components of a time_zone
|
Chris@16
|
363 time_zone_names names(result[STDNAME], result[STDABBR],
|
Chris@16
|
364 result[DSTNAME], result[DSTABBR]);
|
Chris@16
|
365
|
Chris@16
|
366 time_duration_type utc_offset =
|
Chris@16
|
367 str_from_delimited_time_duration<time_duration_type,char_type>(result[GMTOFFSET]);
|
Chris@16
|
368
|
Chris@16
|
369 dst_adjustment_offsets adjust(time_duration_type(0,0,0),
|
Chris@16
|
370 time_duration_type(0,0,0),
|
Chris@16
|
371 time_duration_type(0,0,0));
|
Chris@16
|
372
|
Chris@16
|
373 boost::shared_ptr<rule_type> rules;
|
Chris@16
|
374
|
Chris@16
|
375 if(has_dst){
|
Chris@16
|
376 adjust = dst_adjustment_offsets(
|
Chris@16
|
377 str_from_delimited_time_duration<time_duration_type,char_type>(result[DSTADJUST]),
|
Chris@16
|
378 str_from_delimited_time_duration<time_duration_type,char_type>(result[START_TIME]),
|
Chris@16
|
379 str_from_delimited_time_duration<time_duration_type,char_type>(result[END_TIME])
|
Chris@16
|
380 );
|
Chris@16
|
381
|
Chris@16
|
382 rules =
|
Chris@16
|
383 boost::shared_ptr<rule_type>(parse_rules(result[START_DATE_RULE],
|
Chris@16
|
384 result[END_DATE_RULE]));
|
Chris@16
|
385 }
|
Chris@16
|
386 string_type id(result[ID]);
|
Chris@16
|
387 boost::shared_ptr<time_zone_base_type> zone(new time_zone_type(names, utc_offset, adjust, rules));
|
Chris@16
|
388 return (add_record(id, zone));
|
Chris@16
|
389
|
Chris@16
|
390 }
|
Chris@16
|
391
|
Chris@16
|
392 };
|
Chris@16
|
393
|
Chris@16
|
394 } } // namespace
|
Chris@16
|
395
|
Chris@16
|
396 #endif // DATE_TIME_TZ_DB_BASE_HPP__
|