cannam@150: /**
cannam@150:  * base-n, 1.0
cannam@150:  * Copyright (C) 2012 Andrzej Zawadzki (azawadzki@gmail.com)
cannam@150:  * 
cannam@150:  * Permission is hereby granted, free of charge, to any person obtaining a copy
cannam@150:  * of this software and associated documentation files (the "Software"), to deal
cannam@150:  * in the Software without restriction, including without limitation the rights
cannam@150:  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
cannam@150:  * copies of the Software, and to permit persons to whom the Software is
cannam@150:  * furnished to do so, subject to the following conditions:
cannam@150:  * 
cannam@150:  * The above copyright notice and this permission notice shall be included in
cannam@150:  * all copies or substantial portions of the Software.
cannam@150:  * 
cannam@150:  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
cannam@150:  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
cannam@150:  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
cannam@150:  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
cannam@150:  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
cannam@150:  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
cannam@150:  * SOFTWARE.
cannam@150: **/
cannam@150: #ifndef BASEN_HPP
cannam@150: #define BASEN_HPP
cannam@150: 
cannam@150: #include <algorithm>
cannam@150: #include <cctype>
cannam@150: #include <cassert>
cannam@150: #include <cstring>
cannam@150: 
cannam@150: namespace bn
cannam@150: {
cannam@150: 
cannam@150: template<class Iter1, class Iter2>
cannam@150: void encode_b16(Iter1 start, Iter1 end, Iter2 out);
cannam@150: 
cannam@150: template<class Iter1, class Iter2>
cannam@150: void encode_b32(Iter1 start, Iter1 end, Iter2 out);
cannam@150: 
cannam@150: template<class Iter1, class Iter2>
cannam@150: void encode_b64(Iter1 start, Iter1 end, Iter2 out);
cannam@150: 
cannam@150: template<class Iter1, class Iter2>
cannam@150: void decode_b16(Iter1 start, Iter1 end, Iter2 out);
cannam@150: 
cannam@150: template<class Iter1, class Iter2>
cannam@150: void decode_b32(Iter1 start, Iter1 end, Iter2 out);
cannam@150: 
cannam@150: template<class Iter1, class Iter2>
cannam@150: void decode_b64(Iter1 start, Iter1 end, Iter2 out);
cannam@150: 
cannam@150: namespace impl
cannam@150: {
cannam@150: 
cannam@258: const int Error = -1;
cannam@150: 
cannam@150: namespace {
cannam@150: 
cannam@258: char extract_partial_bits(char value, size_t start_bit, size_t bits_count)
cannam@150: {
cannam@150:     assert(start_bit + bits_count < 8);
cannam@150:     // shift extracted bits to the beginning of the byte
cannam@150:     char t1 = value >> (8 - bits_count - start_bit);
cannam@150:     // mask out bits on the left
cannam@258:     char t2 = t1 & ~(0xff << bits_count);
cannam@150:     return t2;
cannam@150: }
cannam@150: 
cannam@258: char extract_overlapping_bits(char previous, char next, size_t start_bit, size_t bits_count)
cannam@150: {
cannam@150:     assert(start_bit + bits_count < 16);
cannam@258:     size_t bits_count_in_previous = 8 - start_bit;
cannam@258:     size_t bits_count_in_next = bits_count - bits_count_in_previous;
cannam@150:     char t1 = previous << bits_count_in_next;
cannam@258:     char t2 = next >> (8 - bits_count_in_next) & ~(0xff << bits_count_in_next) ;
cannam@258:     return (t1 | t2) & ~(0xff << bits_count);
cannam@150: }
cannam@150: 
cannam@150: }
cannam@150: 
cannam@150: struct b16_conversion_traits
cannam@150: {
cannam@150:     static size_t group_length()
cannam@150:     {
cannam@150:        return 4;
cannam@150:     }
cannam@150: 
cannam@150:     static char encode(unsigned int index)
cannam@150:     {
cannam@150:         const char* const dictionary = "0123456789ABCDEF";
cannam@150:         assert(index < strlen(dictionary));
cannam@150:         return dictionary[index];
cannam@150:     }
cannam@150: 
cannam@150:     static char decode(char c)
cannam@150:     {
cannam@150:         if (c >= '0' && c <= '9') {
cannam@150:             return c - '0';
cannam@150:         } else if (c >= 'A' && c <= 'F') {
cannam@150:             return c - 'A' + 10;
cannam@150:         }
cannam@258:         return Error;
cannam@150:     }
cannam@150: };
cannam@150: 
cannam@150: struct b32_conversion_traits
cannam@150: {
cannam@150:     static size_t group_length()
cannam@150:     {
cannam@150:        return 5;
cannam@150:     }
cannam@150: 
cannam@150:     static char encode(unsigned int index)
cannam@150:     {
cannam@150:         const char * dictionary = "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567";
cannam@150:         assert(index < strlen(dictionary));
cannam@150:         return dictionary[index];
cannam@150:     }
cannam@150: 
cannam@150:     static char decode(char c)
cannam@150:     {
cannam@150:         if (c >= 'A' && c <= 'Z') {
cannam@150:             return c - 'A';
cannam@150:         } else if (c >= '2' && c <= '7') {
cannam@150:             return c - '2' + 26;
cannam@150:         }
cannam@258:         return Error;
cannam@150:     }
cannam@150: };
cannam@150: 
cannam@150: struct b64_conversion_traits
cannam@150: {
cannam@150:     static size_t group_length()
cannam@150:     {
cannam@150:        return 6;
cannam@150:     }
cannam@150: 
cannam@150:     static char encode(unsigned int index)
cannam@150:     {
cannam@150:         const char* const dictionary = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
cannam@150:         assert(index < strlen(dictionary));
cannam@150:         return dictionary[index];
cannam@150:     }
cannam@150: 
cannam@150:     static char decode(char c)
cannam@150:     {
cannam@150:         const int alph_len = 26;
cannam@150:         if (c >= 'A' && c <= 'Z') {
cannam@150:             return c - 'A';
cannam@150:         } else if (c >= 'a' && c <= 'z') {
cannam@150:             return c - 'a' + alph_len * 1;
cannam@150:         } else if (c >= '0' && c <= '9') {
cannam@150:             return c - '0' + alph_len * 2;
cannam@150:         } else if (c == '+') {
cannam@150:             return c - '+' + alph_len * 2 + 10;
cannam@150:         } else if (c == '/') {
cannam@150:             return c - '/' + alph_len * 2 + 11;
cannam@150:         }
cannam@258:         return Error;
cannam@150:     }
cannam@150: };
cannam@150: 
cannam@150: template<class ConversionTraits, class Iter1, class Iter2>
cannam@150: void decode(Iter1 start, Iter1 end, Iter2 out)
cannam@150: {
cannam@150:     Iter1 iter = start;
cannam@258:     size_t output_current_bit = 0;
cannam@150:     char buffer = 0;
cannam@150: 
cannam@150:     while (iter != end) {
cannam@150:         if (std::isspace(*iter)) {
cannam@150:             ++iter;
cannam@150:             continue;
cannam@150:         }
cannam@150:         char value = ConversionTraits::decode(*iter);
cannam@258:         if (value == Error) {
cannam@150:             // malformed data, but let's go on...
cannam@150:             ++iter;
cannam@150:             continue;
cannam@150:         }
cannam@258:         size_t bits_in_current_byte = std::min<size_t>(output_current_bit + ConversionTraits::group_length(), 8) - output_current_bit;
cannam@150:         if (bits_in_current_byte == ConversionTraits::group_length()) {
cannam@150:             // the value fits within current byte, so we can extract it directly
cannam@150:             buffer |= value << (8 - output_current_bit - ConversionTraits::group_length());
cannam@150:             output_current_bit += ConversionTraits::group_length();
cannam@150:             // check if we filled up current byte completely; in such case we flush output and continue
cannam@150:             if (output_current_bit == 8) {
cannam@150:                 *out++ = buffer;
cannam@150:                 buffer = 0;
cannam@150:                 output_current_bit = 0;
cannam@150:             }
cannam@150:         } else {
cannam@150:             // the value spans across the current and the next byte
cannam@258:             size_t bits_in_next_byte = ConversionTraits::group_length() - bits_in_current_byte;
cannam@150:             // fill the current byte and flush it to our output
cannam@150:             buffer |= value >> bits_in_next_byte;
cannam@150:             *out++ = buffer;
cannam@150:             buffer = 0;
cannam@150:             // save the remainder of our value in the buffer; it will be flushed
cannam@150:             // during next iterations
cannam@150:             buffer |= value << (8 - bits_in_next_byte);
cannam@150:             output_current_bit = bits_in_next_byte;
cannam@150:         }
cannam@150:         ++iter;
cannam@150:     }
cannam@150: }
cannam@150: 
cannam@150: template<class ConversionTraits, class Iter1, class Iter2>
cannam@150: void encode(Iter1 start, Iter1 end, Iter2 out)
cannam@150: {
cannam@150:     Iter1 iter = start;
cannam@258:     size_t start_bit = 0;
cannam@150:     bool has_backlog = false;
cannam@150:     char backlog = 0;
cannam@150: 
cannam@150:     while (has_backlog || iter != end) {
cannam@150:         if (!has_backlog) {
cannam@150:             if (start_bit + ConversionTraits::group_length() < 8) {
cannam@150:                 // the value fits within single byte, so we can extract it
cannam@150:                 // directly
cannam@150:                 char v = extract_partial_bits(*iter, start_bit, ConversionTraits::group_length());
cannam@150:                 *out++ = ConversionTraits::encode(v);
cannam@150:                 // since we know that start_bit + ConversionTraits::group_length() < 8 we don't need to go
cannam@150:                 // to the next byte
cannam@150:                 start_bit += ConversionTraits::group_length();
cannam@150:             } else {
cannam@150:                 // our bits are spanning across byte border; we need to keep the
cannam@150:                 // starting point and move over to next byte.
cannam@150:                 backlog = *iter++;
cannam@150:                 has_backlog = true;
cannam@150:             }
cannam@150:         } else {
cannam@150:             // encode value which is made from bits spanning across byte
cannam@150:             // boundary
cannam@150:             char v;
cannam@150:             if (iter == end)
cannam@150:                  v = extract_overlapping_bits(backlog, 0, start_bit, ConversionTraits::group_length());
cannam@150:             else
cannam@150:                  v = extract_overlapping_bits(backlog, *iter, start_bit, ConversionTraits::group_length());
cannam@150:             *out++ = ConversionTraits::encode(v);
cannam@150:             has_backlog = false;
cannam@150:             start_bit = (start_bit + ConversionTraits::group_length()) % 8;
cannam@150:         }
cannam@150:     }
cannam@150: }
cannam@150: 
cannam@150: } // impl
cannam@150: 
cannam@150: using namespace bn::impl;
cannam@150: 
cannam@150: template<class Iter1, class Iter2>
cannam@150: void encode_b16(Iter1 start, Iter1 end, Iter2 out)
cannam@150: {
cannam@150:     encode<b16_conversion_traits>(start, end, out);
cannam@150: }
cannam@150: 
cannam@150: template<class Iter1, class Iter2>
cannam@150: void encode_b32(Iter1 start, Iter1 end, Iter2 out)
cannam@150: {
cannam@150:     encode<b32_conversion_traits>(start, end, out);
cannam@150: }
cannam@150: 
cannam@150: template<class Iter1, class Iter2>
cannam@150: void encode_b64(Iter1 start, Iter1 end, Iter2 out)
cannam@150: {
cannam@150:     encode<b64_conversion_traits>(start, end, out);
cannam@150: }
cannam@150: 
cannam@150: template<class Iter1, class Iter2>
cannam@150: void decode_b16(Iter1 start, Iter1 end, Iter2 out)
cannam@150: {
cannam@150:     decode<b16_conversion_traits>(start, end, out);
cannam@150: }
cannam@150: 
cannam@150: template<class Iter1, class Iter2>
cannam@150: void decode_b32(Iter1 start, Iter1 end, Iter2 out)
cannam@150: {
cannam@150:     decode<b32_conversion_traits>(start, end, out);
cannam@150: }
cannam@150: 
cannam@150: template<class Iter1, class Iter2>
cannam@150: void decode_b64(Iter1 start, Iter1 end, Iter2 out)
cannam@150: {
cannam@150:     decode<b64_conversion_traits>(start, end, out);
cannam@150: }
cannam@150: 
cannam@150: } // bn
cannam@150: 
cannam@150: #endif // BASEN_HPP