diff src/capnproto-0.6.0/c++/src/capnp/schema-parser.c++ @ 147:45360b968bf4

Cap'n Proto v0.6 + build for OSX
author Chris Cannam <cannam@all-day-breakfast.com>
date Mon, 22 May 2017 10:01:37 +0100
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/capnproto-0.6.0/c++/src/capnp/schema-parser.c++	Mon May 22 10:01:37 2017 +0100
@@ -0,0 +1,492 @@
+// Copyright (c) 2013-2014 Sandstorm Development Group, Inc. and contributors
+// Licensed under the MIT License:
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#include "schema-parser.h"
+#include "message.h"
+#include <capnp/compiler/compiler.h>
+#include <capnp/compiler/lexer.capnp.h>
+#include <capnp/compiler/lexer.h>
+#include <capnp/compiler/grammar.capnp.h>
+#include <capnp/compiler/parser.h>
+#include <unordered_map>
+#include <kj/mutex.h>
+#include <kj/vector.h>
+#include <kj/debug.h>
+#include <kj/io.h>
+#include <kj/miniposix.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+
+#if _WIN32
+#include <windows.h>
+#else
+#include <sys/mman.h>
+#endif
+
+namespace capnp {
+
+namespace {
+
+template <typename T>
+size_t findLargestElementBefore(const kj::Vector<T>& vec, const T& key) {
+  KJ_REQUIRE(vec.size() > 0 && vec[0] <= key);
+
+  size_t lower = 0;
+  size_t upper = vec.size();
+
+  while (upper - lower > 1) {
+    size_t mid = (lower + upper) / 2;
+    if (vec[mid] > key) {
+      upper = mid;
+    } else {
+      lower = mid;
+    }
+  }
+
+  return lower;
+}
+
+}  // namespace
+
+// =======================================================================================
+
+class SchemaParser::ModuleImpl final: public compiler::Module {
+public:
+  ModuleImpl(const SchemaParser& parser, kj::Own<const SchemaFile>&& file)
+      : parser(parser), file(kj::mv(file)) {}
+
+  kj::StringPtr getSourceName() override {
+    return file->getDisplayName();
+  }
+
+  Orphan<compiler::ParsedFile> loadContent(Orphanage orphanage) override {
+    kj::Array<const char> content = file->readContent();
+
+    lineBreaks.get([&](kj::SpaceFor<kj::Vector<uint>>& space) {
+      auto vec = space.construct(content.size() / 40);
+      vec->add(0);
+      for (const char* pos = content.begin(); pos < content.end(); ++pos) {
+        if (*pos == '\n') {
+          vec->add(pos + 1 - content.begin());
+        }
+      }
+      return vec;
+    });
+
+    MallocMessageBuilder lexedBuilder;
+    auto statements = lexedBuilder.initRoot<compiler::LexedStatements>();
+    compiler::lex(content, statements, *this);
+
+    auto parsed = orphanage.newOrphan<compiler::ParsedFile>();
+    compiler::parseFile(statements.getStatements(), parsed.get(), *this);
+    return parsed;
+  }
+
+  kj::Maybe<Module&> importRelative(kj::StringPtr importPath) override {
+    KJ_IF_MAYBE(importedFile, file->import(importPath)) {
+      return parser.getModuleImpl(kj::mv(*importedFile));
+    } else {
+      return nullptr;
+    }
+  }
+
+  kj::Maybe<kj::Array<const byte>> embedRelative(kj::StringPtr embedPath) override {
+    KJ_IF_MAYBE(importedFile, file->import(embedPath)) {
+      return importedFile->get()->readContent().releaseAsBytes();
+    } else {
+      return nullptr;
+    }
+  }
+
+  void addError(uint32_t startByte, uint32_t endByte, kj::StringPtr message) override {
+    auto& lines = lineBreaks.get(
+        [](kj::SpaceFor<kj::Vector<uint>>& space) {
+          KJ_FAIL_REQUIRE("Can't report errors until loadContent() is called.");
+          return space.construct();
+        });
+
+    // TODO(someday):  This counts tabs as single characters.  Do we care?
+    uint startLine = findLargestElementBefore(lines, startByte);
+    uint startCol = startByte - lines[startLine];
+    uint endLine = findLargestElementBefore(lines, endByte);
+    uint endCol = endByte - lines[endLine];
+
+    file->reportError(
+        SchemaFile::SourcePos { startByte, startLine, startCol },
+        SchemaFile::SourcePos { endByte, endLine, endCol },
+        message);
+
+    // We intentionally only set hadErrors true if reportError() didn't throw.
+    parser.hadErrors = true;
+  }
+
+  bool hadErrors() override {
+    return parser.hadErrors;
+  }
+
+private:
+  const SchemaParser& parser;
+  kj::Own<const SchemaFile> file;
+
+  kj::Lazy<kj::Vector<uint>> lineBreaks;
+  // Byte offsets of the first byte in each source line.  The first element is always zero.
+  // Initialized the first time the module is loaded.
+};
+
+// =======================================================================================
+
+namespace {
+
+struct SchemaFileHash {
+  inline bool operator()(const SchemaFile* f) const {
+    return f->hashCode();
+  }
+};
+
+struct SchemaFileEq {
+  inline bool operator()(const SchemaFile* a, const SchemaFile* b) const {
+    return *a == *b;
+  }
+};
+
+}  // namespace
+
+struct SchemaParser::Impl {
+  typedef std::unordered_map<
+      const SchemaFile*, kj::Own<ModuleImpl>, SchemaFileHash, SchemaFileEq> FileMap;
+  kj::MutexGuarded<FileMap> fileMap;
+  compiler::Compiler compiler;
+};
+
+SchemaParser::SchemaParser(): impl(kj::heap<Impl>()) {}
+SchemaParser::~SchemaParser() noexcept(false) {}
+
+ParsedSchema SchemaParser::parseDiskFile(
+    kj::StringPtr displayName, kj::StringPtr diskPath,
+    kj::ArrayPtr<const kj::StringPtr> importPath) const {
+  return parseFile(SchemaFile::newDiskFile(displayName, diskPath, importPath));
+}
+
+ParsedSchema SchemaParser::parseFile(kj::Own<SchemaFile>&& file) const {
+  KJ_DEFER(impl->compiler.clearWorkspace());
+  uint64_t id = impl->compiler.add(getModuleImpl(kj::mv(file)));
+  impl->compiler.eagerlyCompile(id,
+      compiler::Compiler::NODE | compiler::Compiler::CHILDREN |
+      compiler::Compiler::DEPENDENCIES | compiler::Compiler::DEPENDENCY_DEPENDENCIES);
+  return ParsedSchema(impl->compiler.getLoader().get(id), *this);
+}
+
+SchemaParser::ModuleImpl& SchemaParser::getModuleImpl(kj::Own<SchemaFile>&& file) const {
+  auto lock = impl->fileMap.lockExclusive();
+
+  auto insertResult = lock->insert(std::make_pair(file.get(), kj::Own<ModuleImpl>()));
+  if (insertResult.second) {
+    // This is a newly-inserted entry.  Construct the ModuleImpl.
+    insertResult.first->second = kj::heap<ModuleImpl>(*this, kj::mv(file));
+  }
+  return *insertResult.first->second;
+}
+
+SchemaLoader& SchemaParser::getLoader() {
+  return impl->compiler.getLoader();
+}
+
+kj::Maybe<ParsedSchema> ParsedSchema::findNested(kj::StringPtr name) const {
+  return parser->impl->compiler.lookup(getProto().getId(), name).map(
+      [this](uint64_t childId) {
+        return ParsedSchema(parser->impl->compiler.getLoader().get(childId), *parser);
+      });
+}
+
+ParsedSchema ParsedSchema::getNested(kj::StringPtr nestedName) const {
+  KJ_IF_MAYBE(nested, findNested(nestedName)) {
+    return *nested;
+  } else {
+    KJ_FAIL_REQUIRE("no such nested declaration", getProto().getDisplayName(), nestedName);
+  }
+}
+
+// =======================================================================================
+
+namespace {
+
+class MmapDisposer: public kj::ArrayDisposer {
+protected:
+  void disposeImpl(void* firstElement, size_t elementSize, size_t elementCount,
+                   size_t capacity, void (*destroyElement)(void*)) const {
+#if _WIN32
+    KJ_ASSERT(UnmapViewOfFile(firstElement));
+#else
+    munmap(firstElement, elementSize * elementCount);
+#endif
+  }
+};
+
+KJ_CONSTEXPR(static const) MmapDisposer mmapDisposer = MmapDisposer();
+
+static char* canonicalizePath(char* path) {
+  // Taken from some old C code of mine.
+
+  // Preconditions:
+  // - path has already been determined to be relative, perhaps because the pointer actually points
+  //   into the middle of some larger path string, in which case it must point to the character
+  //   immediately after a '/'.
+
+  // Invariants:
+  // - src points to the beginning of a path component.
+  // - dst points to the location where the path component should end up, if it is not special.
+  // - src == path or src[-1] == '/'.
+  // - dst == path or dst[-1] == '/'.
+
+  char* src = path;
+  char* dst = path;
+  char* locked = dst;  // dst cannot backtrack past this
+  char* partEnd;
+  bool hasMore;
+
+  for (;;) {
+    while (*src == '/') {
+      // Skip duplicate slash.
+      ++src;
+    }
+
+    partEnd = strchr(src, '/');
+    hasMore = partEnd != NULL;
+    if (hasMore) {
+      *partEnd = '\0';
+    } else {
+      partEnd = src + strlen(src);
+    }
+
+    if (strcmp(src, ".") == 0) {
+      // Skip it.
+    } else if (strcmp(src, "..") == 0) {
+      if (dst > locked) {
+        // Backtrack over last path component.
+        --dst;
+        while (dst > locked && dst[-1] != '/') --dst;
+      } else {
+        locked += 3;
+        goto copy;
+      }
+    } else {
+      // Copy if needed.
+    copy:
+      if (dst < src) {
+        memmove(dst, src, partEnd - src);
+        dst += partEnd - src;
+      } else {
+        dst = partEnd;
+      }
+      *dst++ = '/';
+    }
+
+    if (hasMore) {
+      src = partEnd + 1;
+    } else {
+      // Oops, we have to remove the trailing '/'.
+      if (dst == path) {
+        // Oops, there is no trailing '/'.  We have to return ".".
+        strcpy(path, ".");
+        return path + 1;
+      } else {
+        // Remove the trailing '/'.  Note that this means that opening the file will work even
+        // if it is not a directory, where normally it should fail on non-directories when a
+        // trailing '/' is present.  If this is a problem, we need to add some sort of special
+        // handling for this case where we stat() it separately to check if it is a directory,
+        // because Ekam findInput will not accept a trailing '/'.
+        --dst;
+        *dst = '\0';
+        return dst;
+      }
+    }
+  }
+}
+
+kj::String canonicalizePath(kj::StringPtr path) {
+  KJ_STACK_ARRAY(char, result, path.size() + 1, 128, 512);
+  strcpy(result.begin(), path.begin());
+
+  char* start = path.startsWith("/") ? result.begin() + 1 : result.begin();
+  char* end = canonicalizePath(start);
+  return kj::heapString(result.slice(0, end - result.begin()));
+}
+
+kj::String relativePath(kj::StringPtr base, kj::StringPtr add) {
+  if (add.size() > 0 && add[0] == '/') {
+    return kj::heapString(add);
+  }
+
+  const char* pos = base.end();
+  while (pos > base.begin() && pos[-1] != '/') {
+    --pos;
+  }
+
+  return kj::str(base.slice(0, pos - base.begin()), add);
+}
+
+kj::String joinPath(kj::StringPtr base, kj::StringPtr add) {
+  KJ_REQUIRE(!add.startsWith("/"));
+
+  return kj::str(base, '/', add);
+}
+
+}  // namespace
+
+const SchemaFile::DiskFileReader SchemaFile::DiskFileReader::instance =
+    SchemaFile::DiskFileReader();
+
+bool SchemaFile::DiskFileReader::exists(kj::StringPtr path) const {
+  return access(path.cStr(), F_OK) == 0;
+}
+
+kj::Array<const char> SchemaFile::DiskFileReader::read(kj::StringPtr path) const {
+  int fd;
+  // We already established that the file exists, so this should not fail.
+  KJ_SYSCALL(fd = open(path.cStr(), O_RDONLY), path);
+  kj::AutoCloseFd closer(fd);
+
+  struct stat stats;
+  KJ_SYSCALL(fstat(fd, &stats));
+
+  if (S_ISREG(stats.st_mode)) {
+    if (stats.st_size == 0) {
+      // mmap()ing zero bytes will fail.
+      return nullptr;
+    }
+
+    // Regular file.  Just mmap() it.
+#if _WIN32
+    HANDLE handle = reinterpret_cast<HANDLE>(_get_osfhandle(fd));
+    KJ_ASSERT(handle != INVALID_HANDLE_VALUE);
+    HANDLE mappingHandle = CreateFileMapping(
+        handle, NULL, PAGE_READONLY, 0, stats.st_size, NULL);
+    KJ_ASSERT(mappingHandle != INVALID_HANDLE_VALUE);
+    KJ_DEFER(KJ_ASSERT(CloseHandle(mappingHandle)));
+    const void* mapping = MapViewOfFile(mappingHandle, FILE_MAP_READ, 0, 0, stats.st_size);
+#else  // _WIN32
+    const void* mapping = mmap(NULL, stats.st_size, PROT_READ, MAP_SHARED, fd, 0);
+    if (mapping == MAP_FAILED) {
+      KJ_FAIL_SYSCALL("mmap", errno, path);
+    }
+#endif  // !_WIN32
+
+    return kj::Array<const char>(
+        reinterpret_cast<const char*>(mapping), stats.st_size, mmapDisposer);
+  } else {
+    // This could be a stream of some sort, like a pipe.  Fall back to read().
+    // TODO(cleanup):  This does a lot of copies.  Not sure I care.
+    kj::Vector<char> data(8192);
+
+    char buffer[4096];
+    for (;;) {
+      kj::miniposix::ssize_t n;
+      KJ_SYSCALL(n = ::read(fd, buffer, sizeof(buffer)));
+      if (n == 0) break;
+      data.addAll(buffer, buffer + n);
+    }
+
+    return data.releaseAsArray();
+  }
+}
+
+// -------------------------------------------------------------------
+
+class SchemaFile::DiskSchemaFile final: public SchemaFile {
+public:
+  DiskSchemaFile(const FileReader& fileReader, kj::String displayName,
+                 kj::String diskPath, kj::ArrayPtr<const kj::StringPtr> importPath)
+      : fileReader(fileReader),
+        displayName(kj::mv(displayName)),
+        diskPath(kj::mv(diskPath)),
+        importPath(importPath) {}
+
+  kj::StringPtr getDisplayName() const override {
+    return displayName;
+  }
+
+  kj::Array<const char> readContent() const override {
+    return fileReader.read(diskPath);
+  }
+
+  kj::Maybe<kj::Own<SchemaFile>> import(kj::StringPtr path) const override {
+    if (path.startsWith("/")) {
+      for (auto candidate: importPath) {
+        kj::String newDiskPath = canonicalizePath(joinPath(candidate, path.slice(1)));
+        if (fileReader.exists(newDiskPath)) {
+          return kj::implicitCast<kj::Own<SchemaFile>>(kj::heap<DiskSchemaFile>(
+              fileReader, canonicalizePath(path.slice(1)),
+              kj::mv(newDiskPath), importPath));
+        }
+      }
+      return nullptr;
+    } else {
+      kj::String newDiskPath = canonicalizePath(relativePath(diskPath, path));
+      if (fileReader.exists(newDiskPath)) {
+        return kj::implicitCast<kj::Own<SchemaFile>>(kj::heap<DiskSchemaFile>(
+            fileReader, canonicalizePath(relativePath(displayName, path)),
+            kj::mv(newDiskPath), importPath));
+      } else {
+        return nullptr;
+      }
+    }
+  }
+
+  bool operator==(const SchemaFile& other) const override {
+    return diskPath == kj::downcast<const DiskSchemaFile>(other).diskPath;
+  }
+  bool operator!=(const SchemaFile& other) const override {
+    return diskPath != kj::downcast<const DiskSchemaFile>(other).diskPath;
+  }
+  size_t hashCode() const override {
+    // djb hash with xor
+    // TODO(someday):  Add hashing library to KJ.
+    size_t result = 5381;
+    for (char c: diskPath) {
+      result = (result * 33) ^ c;
+    }
+    return result;
+  }
+
+  void reportError(SourcePos start, SourcePos end, kj::StringPtr message) const override {
+    kj::getExceptionCallback().onRecoverableException(kj::Exception(
+        kj::Exception::Type::FAILED, kj::heapString(diskPath), start.line,
+        kj::heapString(message)));
+  }
+
+private:
+  const FileReader& fileReader;
+  kj::String displayName;
+  kj::String diskPath;
+  kj::ArrayPtr<const kj::StringPtr> importPath;
+};
+
+kj::Own<SchemaFile> SchemaFile::newDiskFile(
+    kj::StringPtr displayName, kj::StringPtr diskPath,
+    kj::ArrayPtr<const kj::StringPtr> importPath,
+    const FileReader& fileReader) {
+  return kj::heap<DiskSchemaFile>(fileReader, canonicalizePath(displayName),
+                                  canonicalizePath(diskPath), importPath);
+}
+
+}  // namespace capnp