Mercurial > hg > sv-dependency-builds
diff osx/include/capnp/message.h @ 49:3ab5a40c4e3b
Add Capnp and KJ builds for OSX
author | Chris Cannam <cannam@all-day-breakfast.com> |
---|---|
date | Tue, 25 Oct 2016 14:48:23 +0100 |
parents | |
children | 0994c39f1e94 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/osx/include/capnp/message.h Tue Oct 25 14:48:23 2016 +0100 @@ -0,0 +1,508 @@ +// Copyright (c) 2013-2016 Sandstorm Development Group, Inc. and contributors +// Licensed under the MIT License: +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include <kj/common.h> +#include <kj/memory.h> +#include <kj/mutex.h> +#include <kj/debug.h> +#include "common.h" +#include "layout.h" +#include "any.h" + +#ifndef CAPNP_MESSAGE_H_ +#define CAPNP_MESSAGE_H_ + +#if defined(__GNUC__) && !defined(CAPNP_HEADER_WARNINGS) +#pragma GCC system_header +#endif + +namespace capnp { + +namespace _ { // private + class ReaderArena; + class BuilderArena; +} + +class StructSchema; +class Orphanage; +template <typename T> +class Orphan; + +// ======================================================================================= + +struct ReaderOptions { + // Options controlling how data is read. + + uint64_t traversalLimitInWords = 8 * 1024 * 1024; + // Limits how many total words of data are allowed to be traversed. Traversal is counted when + // a new struct or list builder is obtained, e.g. from a get() accessor. This means that calling + // the getter for the same sub-struct multiple times will cause it to be double-counted. Once + // the traversal limit is reached, an error will be reported. + // + // This limit exists for security reasons. It is possible for an attacker to construct a message + // in which multiple pointers point at the same location. This is technically invalid, but hard + // to detect. Using such a message, an attacker could cause a message which is small on the wire + // to appear much larger when actually traversed, possibly exhausting server resources leading to + // denial-of-service. + // + // It makes sense to set a traversal limit that is much larger than the underlying message. + // Together with sensible coding practices (e.g. trying to avoid calling sub-object getters + // multiple times, which is expensive anyway), this should provide adequate protection without + // inconvenience. + // + // The default limit is 64 MiB. This may or may not be a sensible number for any given use case, + // but probably at least prevents easy exploitation while also avoiding causing problems in most + // typical cases. + + int nestingLimit = 64; + // Limits how deeply-nested a message structure can be, e.g. structs containing other structs or + // lists of structs. + // + // Like the traversal limit, this limit exists for security reasons. Since it is common to use + // recursive code to traverse recursive data structures, an attacker could easily cause a stack + // overflow by sending a very-deeply-nested (or even cyclic) message, without the message even + // being very large. The default limit of 64 is probably low enough to prevent any chance of + // stack overflow, yet high enough that it is never a problem in practice. +}; + +class MessageReader { + // Abstract interface for an object used to read a Cap'n Proto message. Subclasses of + // MessageReader are responsible for reading the raw, flat message content. Callers should + // usually call `messageReader.getRoot<MyStructType>()` to get a `MyStructType::Reader` + // representing the root of the message, then use that to traverse the message content. + // + // Some common subclasses of `MessageReader` include `SegmentArrayMessageReader`, whose + // constructor accepts pointers to the raw data, and `StreamFdMessageReader` (from + // `serialize.h`), which reads the message from a file descriptor. One might implement other + // subclasses to handle things like reading from shared memory segments, mmap()ed files, etc. + +public: + MessageReader(ReaderOptions options); + // It is suggested that subclasses take ReaderOptions as a constructor parameter, but give it a + // default value of "ReaderOptions()". The base class constructor doesn't have a default value + // in order to remind subclasses that they really need to give the user a way to provide this. + + virtual ~MessageReader() noexcept(false); + + virtual kj::ArrayPtr<const word> getSegment(uint id) = 0; + // Gets the segment with the given ID, or returns null if no such segment exists. This method + // will be called at most once for each segment ID. + + inline const ReaderOptions& getOptions(); + // Get the options passed to the constructor. + + template <typename RootType> + typename RootType::Reader getRoot(); + // Get the root struct of the message, interpreting it as the given struct type. + + template <typename RootType, typename SchemaType> + typename RootType::Reader getRoot(SchemaType schema); + // Dynamically interpret the root struct of the message using the given schema (a StructSchema). + // RootType in this case must be DynamicStruct, and you must #include <capnp/dynamic.h> to + // use this. + + bool isCanonical(); + // Returns whether the message encoded in the reader is in canonical form. + +private: + ReaderOptions options; + + // Space in which we can construct a ReaderArena. We don't use ReaderArena directly here + // because we don't want clients to have to #include arena.h, which itself includes a bunch of + // big STL headers. We don't use a pointer to a ReaderArena because that would require an + // extra malloc on every message which could be expensive when processing small messages. + void* arenaSpace[15 + sizeof(kj::MutexGuarded<void*>) / sizeof(void*)]; + bool allocatedArena; + + _::ReaderArena* arena() { return reinterpret_cast<_::ReaderArena*>(arenaSpace); } + AnyPointer::Reader getRootInternal(); +}; + +class MessageBuilder { + // Abstract interface for an object used to allocate and build a message. Subclasses of + // MessageBuilder are responsible for allocating the space in which the message will be written. + // The most common subclass is `MallocMessageBuilder`, but other subclasses may be used to do + // tricky things like allocate messages in shared memory or mmap()ed files. + // + // Creating a new message ususually means allocating a new MessageBuilder (ideally on the stack) + // and then calling `messageBuilder.initRoot<MyStructType>()` to get a `MyStructType::Builder`. + // That, in turn, can be used to fill in the message content. When done, you can call + // `messageBuilder.getSegmentsForOutput()` to get a list of flat data arrays containing the + // message. + +public: + MessageBuilder(); + virtual ~MessageBuilder() noexcept(false); + KJ_DISALLOW_COPY(MessageBuilder); + + struct SegmentInit { + kj::ArrayPtr<word> space; + + size_t wordsUsed; + // Number of words in `space` which are used; the rest are free space in which additional + // objects may be allocated. + }; + + explicit MessageBuilder(kj::ArrayPtr<SegmentInit> segments); + // Create a MessageBuilder backed by existing memory. This is an advanced interface that most + // people should not use. THIS METHOD IS INSECURE; see below. + // + // This allows a MessageBuilder to be constructed to modify an in-memory message without first + // making a copy of the content. This is especially useful in conjunction with mmap(). + // + // The contents of each segment must outlive the MessageBuilder, but the SegmentInit array itself + // only need outlive the constructor. + // + // SECURITY: Do not use this in conjunction with untrusted data. This constructor assumes that + // the input message is valid. This constructor is designed to be used with data you control, + // e.g. an mmap'd file which is owned and accessed by only one program. When reading data you + // do not trust, you *must* load it into a Reader and then copy into a Builder as a means of + // validating the content. + // + // WARNING: It is NOT safe to initialize a MessageBuilder in this way from memory that is + // currently in use by another MessageBuilder or MessageReader. Other readers/builders will + // not observe changes to the segment sizes nor newly-allocated segments caused by allocating + // new objects in this message. + + virtual kj::ArrayPtr<word> allocateSegment(uint minimumSize) = 0; + // Allocates an array of at least the given number of words, throwing an exception or crashing if + // this is not possible. It is expected that this method will usually return more space than + // requested, and the caller should use that extra space as much as possible before allocating + // more. The returned space remains valid at least until the MessageBuilder is destroyed. + // + // Cap'n Proto will only call this once at a time, so the subclass need not worry about + // thread-safety. + + template <typename RootType> + typename RootType::Builder initRoot(); + // Initialize the root struct of the message as the given struct type. + + template <typename Reader> + void setRoot(Reader&& value); + // Set the root struct to a deep copy of the given struct. + + template <typename RootType> + typename RootType::Builder getRoot(); + // Get the root struct of the message, interpreting it as the given struct type. + + template <typename RootType, typename SchemaType> + typename RootType::Builder getRoot(SchemaType schema); + // Dynamically interpret the root struct of the message using the given schema (a StructSchema). + // RootType in this case must be DynamicStruct, and you must #include <capnp/dynamic.h> to + // use this. + + template <typename RootType, typename SchemaType> + typename RootType::Builder initRoot(SchemaType schema); + // Dynamically init the root struct of the message using the given schema (a StructSchema). + // RootType in this case must be DynamicStruct, and you must #include <capnp/dynamic.h> to + // use this. + + template <typename T> + void adoptRoot(Orphan<T>&& orphan); + // Like setRoot() but adopts the orphan without copying. + + kj::ArrayPtr<const kj::ArrayPtr<const word>> getSegmentsForOutput(); + // Get the raw data that makes up the message. + + Orphanage getOrphanage(); + + bool isCanonical(); + // Check whether the message builder is in canonical form + +private: + void* arenaSpace[22]; + // Space in which we can construct a BuilderArena. We don't use BuilderArena directly here + // because we don't want clients to have to #include arena.h, which itself includes a bunch of + // big STL headers. We don't use a pointer to a BuilderArena because that would require an + // extra malloc on every message which could be expensive when processing small messages. + + bool allocatedArena = false; + // We have to initialize the arena lazily because when we do so we want to allocate the root + // pointer immediately, and this will allocate a segment, which requires a virtual function + // call on the MessageBuilder. We can't do such a call in the constructor since the subclass + // isn't constructed yet. This is kind of annoying because it means that getOrphanage() is + // not thread-safe, but that shouldn't be a huge deal... + + _::BuilderArena* arena() { return reinterpret_cast<_::BuilderArena*>(arenaSpace); } + _::SegmentBuilder* getRootSegment(); + AnyPointer::Builder getRootInternal(); +}; + +template <typename RootType> +typename RootType::Reader readMessageUnchecked(const word* data); +// IF THE INPUT IS INVALID, THIS MAY CRASH, CORRUPT MEMORY, CREATE A SECURITY HOLE IN YOUR APP, +// MURDER YOUR FIRST-BORN CHILD, AND/OR BRING ABOUT ETERNAL DAMNATION ON ALL OF HUMANITY. DO NOT +// USE UNLESS YOU UNDERSTAND THE CONSEQUENCES. +// +// Given a pointer to a known-valid message located in a single contiguous memory segment, +// returns a reader for that message. No bounds-checking will be done while traversing this +// message. Use this only if you have already verified that all pointers are valid and in-bounds, +// and there are no far pointers in the message. +// +// To create a message that can be passed to this function, build a message using a MallocAllocator +// whose preferred segment size is larger than the message size. This guarantees that the message +// will be allocated as a single segment, meaning getSegmentsForOutput() returns a single word +// array. That word array is your message; you may pass a pointer to its first word into +// readMessageUnchecked() to read the message. +// +// This can be particularly handy for embedding messages in generated code: you can +// embed the raw bytes (using AlignedData) then make a Reader for it using this. This is the way +// default values are embedded in code generated by the Cap'n Proto compiler. E.g., if you have +// a message MyMessage, you can read its default value like so: +// MyMessage::Reader reader = Message<MyMessage>::readMessageUnchecked(MyMessage::DEFAULT.words); +// +// To sanitize a message from an untrusted source such that it can be safely passed to +// readMessageUnchecked(), use copyToUnchecked(). + +template <typename Reader> +void copyToUnchecked(Reader&& reader, kj::ArrayPtr<word> uncheckedBuffer); +// Copy the content of the given reader into the given buffer, such that it can safely be passed to +// readMessageUnchecked(). The buffer's size must be exactly reader.totalSizeInWords() + 1, +// otherwise an exception will be thrown. The buffer must be zero'd before calling. + +template <typename RootType> +typename RootType::Reader readDataStruct(kj::ArrayPtr<const word> data); +// Interprets the given data as a single, data-only struct. Only primitive fields (booleans, +// numbers, and enums) will be readable; all pointers will be null. This is useful if you want +// to use Cap'n Proto as a language/platform-neutral way to pack some bits. +// +// The input is a word array rather than a byte array to enforce alignment. If you have a byte +// array which you know is word-aligned (or if your platform supports unaligned reads and you don't +// mind the performance penalty), then you can use `reinterpret_cast` to convert a byte array into +// a word array: +// +// kj::arrayPtr(reinterpret_cast<const word*>(bytes.begin()), +// reinterpret_cast<const word*>(bytes.end())) + +template <typename BuilderType> +typename kj::ArrayPtr<const word> writeDataStruct(BuilderType builder); +// Given a struct builder, get the underlying data section as a word array, suitable for passing +// to `readDataStruct()`. +// +// Note that you may call `.toBytes()` on the returned value to convert to `ArrayPtr<const byte>`. + +template <typename Type> +static typename Type::Reader defaultValue(); +// Get a default instance of the given struct or list type. +// +// TODO(cleanup): Find a better home for this function? + +// ======================================================================================= + +class SegmentArrayMessageReader: public MessageReader { + // A simple MessageReader that reads from an array of word arrays representing all segments. + // In particular you can read directly from the output of MessageBuilder::getSegmentsForOutput() + // (although it would probably make more sense to call builder.getRoot().asReader() in that case). + +public: + SegmentArrayMessageReader(kj::ArrayPtr<const kj::ArrayPtr<const word>> segments, + ReaderOptions options = ReaderOptions()); + // Creates a message pointing at the given segment array, without taking ownership of the + // segments. All arrays passed in must remain valid until the MessageReader is destroyed. + + KJ_DISALLOW_COPY(SegmentArrayMessageReader); + ~SegmentArrayMessageReader() noexcept(false); + + virtual kj::ArrayPtr<const word> getSegment(uint id) override; + +private: + kj::ArrayPtr<const kj::ArrayPtr<const word>> segments; +}; + +enum class AllocationStrategy: uint8_t { + FIXED_SIZE, + // The builder will prefer to allocate the same amount of space for each segment with no + // heuristic growth. It will still allocate larger segments when the preferred size is too small + // for some single object. This mode is generally not recommended, but can be particularly useful + // for testing in order to force a message to allocate a predictable number of segments. Note + // that you can force every single object in the message to be located in a separate segment by + // using this mode with firstSegmentWords = 0. + + GROW_HEURISTICALLY + // The builder will heuristically decide how much space to allocate for each segment. Each + // allocated segment will be progressively larger than the previous segments on the assumption + // that message sizes are exponentially distributed. The total number of segments that will be + // allocated for a message of size n is O(log n). +}; + +constexpr uint SUGGESTED_FIRST_SEGMENT_WORDS = 1024; +constexpr AllocationStrategy SUGGESTED_ALLOCATION_STRATEGY = AllocationStrategy::GROW_HEURISTICALLY; + +class MallocMessageBuilder: public MessageBuilder { + // A simple MessageBuilder that uses malloc() (actually, calloc()) to allocate segments. This + // implementation should be reasonable for any case that doesn't require writing the message to + // a specific location in memory. + +public: + explicit MallocMessageBuilder(uint firstSegmentWords = SUGGESTED_FIRST_SEGMENT_WORDS, + AllocationStrategy allocationStrategy = SUGGESTED_ALLOCATION_STRATEGY); + // Creates a BuilderContext which allocates at least the given number of words for the first + // segment, and then uses the given strategy to decide how much to allocate for subsequent + // segments. When choosing a value for firstSegmentWords, consider that: + // 1) Reading and writing messages gets slower when multiple segments are involved, so it's good + // if most messages fit in a single segment. + // 2) Unused bytes will not be written to the wire, so generally it is not a big deal to allocate + // more space than you need. It only becomes problematic if you are allocating many messages + // in parallel and thus use lots of memory, or if you allocate so much extra space that just + // zeroing it out becomes a bottleneck. + // The defaults have been chosen to be reasonable for most people, so don't change them unless you + // have reason to believe you need to. + + explicit MallocMessageBuilder(kj::ArrayPtr<word> firstSegment, + AllocationStrategy allocationStrategy = SUGGESTED_ALLOCATION_STRATEGY); + // This version always returns the given array for the first segment, and then proceeds with the + // allocation strategy. This is useful for optimization when building lots of small messages in + // a tight loop: you can reuse the space for the first segment. + // + // firstSegment MUST be zero-initialized. MallocMessageBuilder's destructor will write new zeros + // over any space that was used so that it can be reused. + + KJ_DISALLOW_COPY(MallocMessageBuilder); + virtual ~MallocMessageBuilder() noexcept(false); + + virtual kj::ArrayPtr<word> allocateSegment(uint minimumSize) override; + +private: + uint nextSize; + AllocationStrategy allocationStrategy; + + bool ownFirstSegment; + bool returnedFirstSegment; + + void* firstSegment; + + struct MoreSegments; + kj::Maybe<kj::Own<MoreSegments>> moreSegments; +}; + +class FlatMessageBuilder: public MessageBuilder { + // THIS IS NOT THE CLASS YOU'RE LOOKING FOR. + // + // If you want to write a message into already-existing scratch space, use `MallocMessageBuilder` + // and pass the scratch space to its constructor. It will then only fall back to malloc() if + // the scratch space is not large enough. + // + // Do NOT use this class unless you really know what you're doing. This class is problematic + // because it requires advance knowledge of the size of your message, which is usually impossible + // to determine without actually building the message. The class was created primarily to + // implement `copyToUnchecked()`, which itself exists only to support other internal parts of + // the Cap'n Proto implementation. + +public: + explicit FlatMessageBuilder(kj::ArrayPtr<word> array); + KJ_DISALLOW_COPY(FlatMessageBuilder); + virtual ~FlatMessageBuilder() noexcept(false); + + void requireFilled(); + // Throws an exception if the flat array is not exactly full. + + virtual kj::ArrayPtr<word> allocateSegment(uint minimumSize) override; + +private: + kj::ArrayPtr<word> array; + bool allocated; +}; + +// ======================================================================================= +// implementation details + +inline const ReaderOptions& MessageReader::getOptions() { + return options; +} + +template <typename RootType> +inline typename RootType::Reader MessageReader::getRoot() { + return getRootInternal().getAs<RootType>(); +} + +template <typename RootType> +inline typename RootType::Builder MessageBuilder::initRoot() { + return getRootInternal().initAs<RootType>(); +} + +template <typename Reader> +inline void MessageBuilder::setRoot(Reader&& value) { + getRootInternal().setAs<FromReader<Reader>>(value); +} + +template <typename RootType> +inline typename RootType::Builder MessageBuilder::getRoot() { + return getRootInternal().getAs<RootType>(); +} + +template <typename T> +void MessageBuilder::adoptRoot(Orphan<T>&& orphan) { + return getRootInternal().adopt(kj::mv(orphan)); +} + +template <typename RootType, typename SchemaType> +typename RootType::Reader MessageReader::getRoot(SchemaType schema) { + return getRootInternal().getAs<RootType>(schema); +} + +template <typename RootType, typename SchemaType> +typename RootType::Builder MessageBuilder::getRoot(SchemaType schema) { + return getRootInternal().getAs<RootType>(schema); +} + +template <typename RootType, typename SchemaType> +typename RootType::Builder MessageBuilder::initRoot(SchemaType schema) { + return getRootInternal().initAs<RootType>(schema); +} + +template <typename RootType> +typename RootType::Reader readMessageUnchecked(const word* data) { + return AnyPointer::Reader(_::PointerReader::getRootUnchecked(data)).getAs<RootType>(); +} + +template <typename Reader> +void copyToUnchecked(Reader&& reader, kj::ArrayPtr<word> uncheckedBuffer) { + FlatMessageBuilder builder(uncheckedBuffer); + builder.setRoot(kj::fwd<Reader>(reader)); + builder.requireFilled(); +} + +template <typename RootType> +typename RootType::Reader readDataStruct(kj::ArrayPtr<const word> data) { + return typename RootType::Reader(_::StructReader(data)); +} + +template <typename BuilderType> +typename kj::ArrayPtr<const word> writeDataStruct(BuilderType builder) { + auto bytes = _::PointerHelpers<FromBuilder<BuilderType>>::getInternalBuilder(kj::mv(builder)) + .getDataSectionAsBlob(); + return kj::arrayPtr(reinterpret_cast<word*>(bytes.begin()), + reinterpret_cast<word*>(bytes.end())); +} + +template <typename Type> +static typename Type::Reader defaultValue() { + return typename Type::Reader(_::StructReader()); +} + +template <typename T> +kj::Array<word> canonicalize(T&& reader) { + return _::PointerHelpers<FromReader<T>>::getInternalReader(reader).canonicalize(); +} + +} // namespace capnp + +#endif // CAPNP_MESSAGE_H_