diff osx/include/kj/parse/char.h @ 62:0994c39f1e94

Cap'n Proto v0.6 + build for OSX
author Chris Cannam <cannam@all-day-breakfast.com>
date Mon, 22 May 2017 10:01:37 +0100
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/osx/include/kj/parse/char.h	Mon May 22 10:01:37 2017 +0100
@@ -0,0 +1,361 @@
+// Copyright (c) 2013-2014 Sandstorm Development Group, Inc. and contributors
+// Licensed under the MIT License:
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+// This file contains parsers useful for character stream inputs, including parsers to parse
+// common kinds of tokens like identifiers, numbers, and quoted strings.
+
+#ifndef KJ_PARSE_CHAR_H_
+#define KJ_PARSE_CHAR_H_
+
+#if defined(__GNUC__) && !KJ_HEADER_WARNINGS
+#pragma GCC system_header
+#endif
+
+#include "common.h"
+#include "../string.h"
+#include <inttypes.h>
+
+namespace kj {
+namespace parse {
+
+// =======================================================================================
+// Exact char/string.
+
+class ExactString_ {
+public:
+  constexpr inline ExactString_(const char* str): str(str) {}
+
+  template <typename Input>
+  Maybe<Tuple<>> operator()(Input& input) const {
+    const char* ptr = str;
+
+    while (*ptr != '\0') {
+      if (input.atEnd() || input.current() != *ptr) return nullptr;
+      input.next();
+      ++ptr;
+    }
+
+    return Tuple<>();
+  }
+
+private:
+  const char* str;
+};
+
+constexpr inline ExactString_ exactString(const char* str) {
+  return ExactString_(str);
+}
+
+template <char c>
+constexpr ExactlyConst_<char, c> exactChar() {
+  // Returns a parser that matches exactly the character given by the template argument (returning
+  // no result).
+  return ExactlyConst_<char, c>();
+}
+
+// =======================================================================================
+// Char ranges / sets
+
+class CharGroup_ {
+public:
+  constexpr inline CharGroup_(): bits{0, 0, 0, 0} {}
+
+  constexpr inline CharGroup_ orRange(unsigned char first, unsigned char last) const {
+    return CharGroup_(bits[0] | (oneBits(last +   1) & ~oneBits(first      )),
+                      bits[1] | (oneBits(last -  63) & ~oneBits(first -  64)),
+                      bits[2] | (oneBits(last - 127) & ~oneBits(first - 128)),
+                      bits[3] | (oneBits(last - 191) & ~oneBits(first - 192)));
+  }
+
+  constexpr inline CharGroup_ orAny(const char* chars) const {
+    return *chars == 0 ? *this : orChar(*chars).orAny(chars + 1);
+  }
+
+  constexpr inline CharGroup_ orChar(unsigned char c) const {
+    return CharGroup_(bits[0] | bit(c),
+                      bits[1] | bit(c - 64),
+                      bits[2] | bit(c - 128),
+                      bits[3] | bit(c - 256));
+  }
+
+  constexpr inline CharGroup_ orGroup(CharGroup_ other) const {
+    return CharGroup_(bits[0] | other.bits[0],
+                      bits[1] | other.bits[1],
+                      bits[2] | other.bits[2],
+                      bits[3] | other.bits[3]);
+  }
+
+  constexpr inline CharGroup_ invert() const {
+    return CharGroup_(~bits[0], ~bits[1], ~bits[2], ~bits[3]);
+  }
+
+  constexpr inline bool contains(unsigned char c) const {
+    return (bits[c / 64] & (1ll << (c % 64))) != 0;
+  }
+
+  template <typename Input>
+  Maybe<char> operator()(Input& input) const {
+    if (input.atEnd()) return nullptr;
+    unsigned char c = input.current();
+    if (contains(c)) {
+      input.next();
+      return c;
+    } else {
+      return nullptr;
+    }
+  }
+
+private:
+  typedef unsigned long long Bits64;
+
+  constexpr inline CharGroup_(Bits64 a, Bits64 b, Bits64 c, Bits64 d): bits{a, b, c, d} {}
+  Bits64 bits[4];
+
+  static constexpr inline Bits64 oneBits(int count) {
+    return count <= 0 ? 0ll : count >= 64 ? -1ll : ((1ll << count) - 1);
+  }
+  static constexpr inline Bits64 bit(int index) {
+    return index < 0 ? 0 : index >= 64 ? 0 : (1ll << index);
+  }
+};
+
+constexpr inline CharGroup_ charRange(char first, char last) {
+  // Create a parser which accepts any character in the range from `first` to `last`, inclusive.
+  // For example: `charRange('a', 'z')` matches all lower-case letters.  The parser's result is the
+  // character matched.
+  //
+  // The returned object has methods which can be used to match more characters.  The following
+  // produces a parser which accepts any letter as well as '_', '+', '-', and '.'.
+  //
+  //     charRange('a', 'z').orRange('A', 'Z').orChar('_').orAny("+-.")
+  //
+  // You can also use `.invert()` to match the opposite set of characters.
+
+  return CharGroup_().orRange(first, last);
+}
+
+#if _MSC_VER
+#define anyOfChars(chars) CharGroup_().orAny(chars)
+// TODO(msvc): MSVC ICEs on the proper definition of `anyOfChars()`, which in turn prevents us from
+//   building the compiler or schema parser. We don't know why this happens, but Harris found that
+//   this horrible, horrible hack makes things work. This is awful, but it's better than nothing.
+//   Hopefully, MSVC will get fixed soon and we'll be able to remove this.
+#else
+constexpr inline CharGroup_ anyOfChars(const char* chars) {
+  // Returns a parser that accepts any of the characters in the given string (which should usually
+  // be a literal).  The returned parser is of the same type as returned by `charRange()` -- see
+  // that function for more info.
+
+  return CharGroup_().orAny(chars);
+}
+#endif
+
+// =======================================================================================
+
+namespace _ {  // private
+
+struct ArrayToString {
+  inline String operator()(const Array<char>& arr) const {
+    return heapString(arr);
+  }
+};
+
+}  // namespace _ (private)
+
+template <typename SubParser>
+constexpr inline auto charsToString(SubParser&& subParser)
+    -> decltype(transform(kj::fwd<SubParser>(subParser), _::ArrayToString())) {
+  // Wraps a parser that returns Array<char> such that it returns String instead.
+  return parse::transform(kj::fwd<SubParser>(subParser), _::ArrayToString());
+}
+
+// =======================================================================================
+// Basic character classes.
+
+constexpr auto alpha = charRange('a', 'z').orRange('A', 'Z');
+constexpr auto digit = charRange('0', '9');
+constexpr auto alphaNumeric = alpha.orGroup(digit);
+constexpr auto nameStart = alpha.orChar('_');
+constexpr auto nameChar = alphaNumeric.orChar('_');
+constexpr auto hexDigit = charRange('0', '9').orRange('a', 'f').orRange('A', 'F');
+constexpr auto octDigit = charRange('0', '7');
+constexpr auto whitespaceChar = anyOfChars(" \f\n\r\t\v");
+constexpr auto controlChar = charRange(0, 0x1f).invert().orGroup(whitespaceChar).invert();
+
+constexpr auto whitespace = many(anyOfChars(" \f\n\r\t\v"));
+
+constexpr auto discardWhitespace = discard(many(discard(anyOfChars(" \f\n\r\t\v"))));
+// Like discard(whitespace) but avoids some memory allocation.
+
+// =======================================================================================
+// Identifiers
+
+namespace _ { // private
+
+struct IdentifierToString {
+  inline String operator()(char first, const Array<char>& rest) const {
+    String result = heapString(rest.size() + 1);
+    result[0] = first;
+    memcpy(result.begin() + 1, rest.begin(), rest.size());
+    return result;
+  }
+};
+
+}  // namespace _ (private)
+
+constexpr auto identifier = transform(sequence(nameStart, many(nameChar)), _::IdentifierToString());
+// Parses an identifier (e.g. a C variable name).
+
+// =======================================================================================
+// Integers
+
+namespace _ {  // private
+
+inline char parseDigit(char c) {
+  if (c < 'A') return c - '0';
+  if (c < 'a') return c - 'A' + 10;
+  return c - 'a' + 10;
+}
+
+template <uint base>
+struct ParseInteger {
+  inline uint64_t operator()(const Array<char>& digits) const {
+    return operator()('0', digits);
+  }
+  uint64_t operator()(char first, const Array<char>& digits) const {
+    uint64_t result = parseDigit(first);
+    for (char digit: digits) {
+      result = result * base + parseDigit(digit);
+    }
+    return result;
+  }
+};
+
+
+}  // namespace _ (private)
+
+constexpr auto integer = sequence(
+    oneOf(
+      transform(sequence(exactChar<'0'>(), exactChar<'x'>(), oneOrMore(hexDigit)), _::ParseInteger<16>()),
+      transform(sequence(exactChar<'0'>(), many(octDigit)), _::ParseInteger<8>()),
+      transform(sequence(charRange('1', '9'), many(digit)), _::ParseInteger<10>())),
+    notLookingAt(alpha.orAny("_.")));
+
+// =======================================================================================
+// Numbers (i.e. floats)
+
+namespace _ {  // private
+
+struct ParseFloat {
+  double operator()(const Array<char>& digits,
+                    const Maybe<Array<char>>& fraction,
+                    const Maybe<Tuple<Maybe<char>, Array<char>>>& exponent) const;
+};
+
+}  // namespace _ (private)
+
+constexpr auto number = transform(
+    sequence(
+        oneOrMore(digit),
+        optional(sequence(exactChar<'.'>(), many(digit))),
+        optional(sequence(discard(anyOfChars("eE")), optional(anyOfChars("+-")), many(digit))),
+        notLookingAt(alpha.orAny("_."))),
+    _::ParseFloat());
+
+// =======================================================================================
+// Quoted strings
+
+namespace _ {  // private
+
+struct InterpretEscape {
+  char operator()(char c) const {
+    switch (c) {
+      case 'a': return '\a';
+      case 'b': return '\b';
+      case 'f': return '\f';
+      case 'n': return '\n';
+      case 'r': return '\r';
+      case 't': return '\t';
+      case 'v': return '\v';
+      default: return c;
+    }
+  }
+};
+
+struct ParseHexEscape {
+  inline char operator()(char first, char second) const {
+    return (parseDigit(first) << 4) | parseDigit(second);
+  }
+};
+
+struct ParseHexByte {
+  inline byte operator()(char first, char second) const {
+    return (parseDigit(first) << 4) | parseDigit(second);
+  }
+};
+
+struct ParseOctEscape {
+  inline char operator()(char first, Maybe<char> second, Maybe<char> third) const {
+    char result = first - '0';
+    KJ_IF_MAYBE(digit1, second) {
+      result = (result << 3) | (*digit1 - '0');
+      KJ_IF_MAYBE(digit2, third) {
+        result = (result << 3) | (*digit2 - '0');
+      }
+    }
+    return result;
+  }
+};
+
+}  // namespace _ (private)
+
+constexpr auto escapeSequence =
+    sequence(exactChar<'\\'>(), oneOf(
+        transform(anyOfChars("abfnrtv'\"\\\?"), _::InterpretEscape()),
+        transform(sequence(exactChar<'x'>(), hexDigit, hexDigit), _::ParseHexEscape()),
+        transform(sequence(octDigit, optional(octDigit), optional(octDigit)),
+                  _::ParseOctEscape())));
+// A parser that parses a C-string-style escape sequence (starting with a backslash).  Returns
+// a char.
+
+constexpr auto doubleQuotedString = charsToString(sequence(
+    exactChar<'\"'>(),
+    many(oneOf(anyOfChars("\\\n\"").invert(), escapeSequence)),
+    exactChar<'\"'>()));
+// Parses a C-style double-quoted string.
+
+constexpr auto singleQuotedString = charsToString(sequence(
+    exactChar<'\''>(),
+    many(oneOf(anyOfChars("\\\n\'").invert(), escapeSequence)),
+    exactChar<'\''>()));
+// Parses a C-style single-quoted string.
+
+constexpr auto doubleQuotedHexBinary = sequence(
+    exactChar<'0'>(), exactChar<'x'>(), exactChar<'\"'>(),
+    oneOrMore(transform(sequence(discardWhitespace, hexDigit, hexDigit), _::ParseHexByte())),
+    discardWhitespace,
+    exactChar<'\"'>());
+// Parses a double-quoted hex binary literal. Returns Array<byte>.
+
+}  // namespace parse
+}  // namespace kj
+
+#endif  // KJ_PARSE_CHAR_H_