Mercurial > hg > sv-dependency-builds
diff osx/include/capnp/message.h @ 147:45360b968bf4
Cap'n Proto v0.6 + build for OSX
author | Chris Cannam <cannam@all-day-breakfast.com> |
---|---|
date | Mon, 22 May 2017 10:01:37 +0100 |
parents | 41e769c91eca |
children |
line wrap: on
line diff
--- a/osx/include/capnp/message.h Mon Mar 06 13:29:58 2017 +0000 +++ b/osx/include/capnp/message.h Mon May 22 10:01:37 2017 +0100 @@ -1,508 +1,508 @@ -// Copyright (c) 2013-2016 Sandstorm Development Group, Inc. and contributors -// Licensed under the MIT License: -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#include <kj/common.h> -#include <kj/memory.h> -#include <kj/mutex.h> -#include <kj/debug.h> -#include "common.h" -#include "layout.h" -#include "any.h" - -#ifndef CAPNP_MESSAGE_H_ -#define CAPNP_MESSAGE_H_ - -#if defined(__GNUC__) && !defined(CAPNP_HEADER_WARNINGS) -#pragma GCC system_header -#endif - -namespace capnp { - -namespace _ { // private - class ReaderArena; - class BuilderArena; -} - -class StructSchema; -class Orphanage; -template <typename T> -class Orphan; - -// ======================================================================================= - -struct ReaderOptions { - // Options controlling how data is read. - - uint64_t traversalLimitInWords = 8 * 1024 * 1024; - // Limits how many total words of data are allowed to be traversed. Traversal is counted when - // a new struct or list builder is obtained, e.g. from a get() accessor. This means that calling - // the getter for the same sub-struct multiple times will cause it to be double-counted. Once - // the traversal limit is reached, an error will be reported. - // - // This limit exists for security reasons. It is possible for an attacker to construct a message - // in which multiple pointers point at the same location. This is technically invalid, but hard - // to detect. Using such a message, an attacker could cause a message which is small on the wire - // to appear much larger when actually traversed, possibly exhausting server resources leading to - // denial-of-service. - // - // It makes sense to set a traversal limit that is much larger than the underlying message. - // Together with sensible coding practices (e.g. trying to avoid calling sub-object getters - // multiple times, which is expensive anyway), this should provide adequate protection without - // inconvenience. - // - // The default limit is 64 MiB. This may or may not be a sensible number for any given use case, - // but probably at least prevents easy exploitation while also avoiding causing problems in most - // typical cases. - - int nestingLimit = 64; - // Limits how deeply-nested a message structure can be, e.g. structs containing other structs or - // lists of structs. - // - // Like the traversal limit, this limit exists for security reasons. Since it is common to use - // recursive code to traverse recursive data structures, an attacker could easily cause a stack - // overflow by sending a very-deeply-nested (or even cyclic) message, without the message even - // being very large. The default limit of 64 is probably low enough to prevent any chance of - // stack overflow, yet high enough that it is never a problem in practice. -}; - -class MessageReader { - // Abstract interface for an object used to read a Cap'n Proto message. Subclasses of - // MessageReader are responsible for reading the raw, flat message content. Callers should - // usually call `messageReader.getRoot<MyStructType>()` to get a `MyStructType::Reader` - // representing the root of the message, then use that to traverse the message content. - // - // Some common subclasses of `MessageReader` include `SegmentArrayMessageReader`, whose - // constructor accepts pointers to the raw data, and `StreamFdMessageReader` (from - // `serialize.h`), which reads the message from a file descriptor. One might implement other - // subclasses to handle things like reading from shared memory segments, mmap()ed files, etc. - -public: - MessageReader(ReaderOptions options); - // It is suggested that subclasses take ReaderOptions as a constructor parameter, but give it a - // default value of "ReaderOptions()". The base class constructor doesn't have a default value - // in order to remind subclasses that they really need to give the user a way to provide this. - - virtual ~MessageReader() noexcept(false); - - virtual kj::ArrayPtr<const word> getSegment(uint id) = 0; - // Gets the segment with the given ID, or returns null if no such segment exists. This method - // will be called at most once for each segment ID. - - inline const ReaderOptions& getOptions(); - // Get the options passed to the constructor. - - template <typename RootType> - typename RootType::Reader getRoot(); - // Get the root struct of the message, interpreting it as the given struct type. - - template <typename RootType, typename SchemaType> - typename RootType::Reader getRoot(SchemaType schema); - // Dynamically interpret the root struct of the message using the given schema (a StructSchema). - // RootType in this case must be DynamicStruct, and you must #include <capnp/dynamic.h> to - // use this. - - bool isCanonical(); - // Returns whether the message encoded in the reader is in canonical form. - -private: - ReaderOptions options; - - // Space in which we can construct a ReaderArena. We don't use ReaderArena directly here - // because we don't want clients to have to #include arena.h, which itself includes a bunch of - // big STL headers. We don't use a pointer to a ReaderArena because that would require an - // extra malloc on every message which could be expensive when processing small messages. - void* arenaSpace[15 + sizeof(kj::MutexGuarded<void*>) / sizeof(void*)]; - bool allocatedArena; - - _::ReaderArena* arena() { return reinterpret_cast<_::ReaderArena*>(arenaSpace); } - AnyPointer::Reader getRootInternal(); -}; - -class MessageBuilder { - // Abstract interface for an object used to allocate and build a message. Subclasses of - // MessageBuilder are responsible for allocating the space in which the message will be written. - // The most common subclass is `MallocMessageBuilder`, but other subclasses may be used to do - // tricky things like allocate messages in shared memory or mmap()ed files. - // - // Creating a new message ususually means allocating a new MessageBuilder (ideally on the stack) - // and then calling `messageBuilder.initRoot<MyStructType>()` to get a `MyStructType::Builder`. - // That, in turn, can be used to fill in the message content. When done, you can call - // `messageBuilder.getSegmentsForOutput()` to get a list of flat data arrays containing the - // message. - -public: - MessageBuilder(); - virtual ~MessageBuilder() noexcept(false); - KJ_DISALLOW_COPY(MessageBuilder); - - struct SegmentInit { - kj::ArrayPtr<word> space; - - size_t wordsUsed; - // Number of words in `space` which are used; the rest are free space in which additional - // objects may be allocated. - }; - - explicit MessageBuilder(kj::ArrayPtr<SegmentInit> segments); - // Create a MessageBuilder backed by existing memory. This is an advanced interface that most - // people should not use. THIS METHOD IS INSECURE; see below. - // - // This allows a MessageBuilder to be constructed to modify an in-memory message without first - // making a copy of the content. This is especially useful in conjunction with mmap(). - // - // The contents of each segment must outlive the MessageBuilder, but the SegmentInit array itself - // only need outlive the constructor. - // - // SECURITY: Do not use this in conjunction with untrusted data. This constructor assumes that - // the input message is valid. This constructor is designed to be used with data you control, - // e.g. an mmap'd file which is owned and accessed by only one program. When reading data you - // do not trust, you *must* load it into a Reader and then copy into a Builder as a means of - // validating the content. - // - // WARNING: It is NOT safe to initialize a MessageBuilder in this way from memory that is - // currently in use by another MessageBuilder or MessageReader. Other readers/builders will - // not observe changes to the segment sizes nor newly-allocated segments caused by allocating - // new objects in this message. - - virtual kj::ArrayPtr<word> allocateSegment(uint minimumSize) = 0; - // Allocates an array of at least the given number of words, throwing an exception or crashing if - // this is not possible. It is expected that this method will usually return more space than - // requested, and the caller should use that extra space as much as possible before allocating - // more. The returned space remains valid at least until the MessageBuilder is destroyed. - // - // Cap'n Proto will only call this once at a time, so the subclass need not worry about - // thread-safety. - - template <typename RootType> - typename RootType::Builder initRoot(); - // Initialize the root struct of the message as the given struct type. - - template <typename Reader> - void setRoot(Reader&& value); - // Set the root struct to a deep copy of the given struct. - - template <typename RootType> - typename RootType::Builder getRoot(); - // Get the root struct of the message, interpreting it as the given struct type. - - template <typename RootType, typename SchemaType> - typename RootType::Builder getRoot(SchemaType schema); - // Dynamically interpret the root struct of the message using the given schema (a StructSchema). - // RootType in this case must be DynamicStruct, and you must #include <capnp/dynamic.h> to - // use this. - - template <typename RootType, typename SchemaType> - typename RootType::Builder initRoot(SchemaType schema); - // Dynamically init the root struct of the message using the given schema (a StructSchema). - // RootType in this case must be DynamicStruct, and you must #include <capnp/dynamic.h> to - // use this. - - template <typename T> - void adoptRoot(Orphan<T>&& orphan); - // Like setRoot() but adopts the orphan without copying. - - kj::ArrayPtr<const kj::ArrayPtr<const word>> getSegmentsForOutput(); - // Get the raw data that makes up the message. - - Orphanage getOrphanage(); - - bool isCanonical(); - // Check whether the message builder is in canonical form - -private: - void* arenaSpace[22]; - // Space in which we can construct a BuilderArena. We don't use BuilderArena directly here - // because we don't want clients to have to #include arena.h, which itself includes a bunch of - // big STL headers. We don't use a pointer to a BuilderArena because that would require an - // extra malloc on every message which could be expensive when processing small messages. - - bool allocatedArena = false; - // We have to initialize the arena lazily because when we do so we want to allocate the root - // pointer immediately, and this will allocate a segment, which requires a virtual function - // call on the MessageBuilder. We can't do such a call in the constructor since the subclass - // isn't constructed yet. This is kind of annoying because it means that getOrphanage() is - // not thread-safe, but that shouldn't be a huge deal... - - _::BuilderArena* arena() { return reinterpret_cast<_::BuilderArena*>(arenaSpace); } - _::SegmentBuilder* getRootSegment(); - AnyPointer::Builder getRootInternal(); -}; - -template <typename RootType> -typename RootType::Reader readMessageUnchecked(const word* data); -// IF THE INPUT IS INVALID, THIS MAY CRASH, CORRUPT MEMORY, CREATE A SECURITY HOLE IN YOUR APP, -// MURDER YOUR FIRST-BORN CHILD, AND/OR BRING ABOUT ETERNAL DAMNATION ON ALL OF HUMANITY. DO NOT -// USE UNLESS YOU UNDERSTAND THE CONSEQUENCES. -// -// Given a pointer to a known-valid message located in a single contiguous memory segment, -// returns a reader for that message. No bounds-checking will be done while traversing this -// message. Use this only if you have already verified that all pointers are valid and in-bounds, -// and there are no far pointers in the message. -// -// To create a message that can be passed to this function, build a message using a MallocAllocator -// whose preferred segment size is larger than the message size. This guarantees that the message -// will be allocated as a single segment, meaning getSegmentsForOutput() returns a single word -// array. That word array is your message; you may pass a pointer to its first word into -// readMessageUnchecked() to read the message. -// -// This can be particularly handy for embedding messages in generated code: you can -// embed the raw bytes (using AlignedData) then make a Reader for it using this. This is the way -// default values are embedded in code generated by the Cap'n Proto compiler. E.g., if you have -// a message MyMessage, you can read its default value like so: -// MyMessage::Reader reader = Message<MyMessage>::readMessageUnchecked(MyMessage::DEFAULT.words); -// -// To sanitize a message from an untrusted source such that it can be safely passed to -// readMessageUnchecked(), use copyToUnchecked(). - -template <typename Reader> -void copyToUnchecked(Reader&& reader, kj::ArrayPtr<word> uncheckedBuffer); -// Copy the content of the given reader into the given buffer, such that it can safely be passed to -// readMessageUnchecked(). The buffer's size must be exactly reader.totalSizeInWords() + 1, -// otherwise an exception will be thrown. The buffer must be zero'd before calling. - -template <typename RootType> -typename RootType::Reader readDataStruct(kj::ArrayPtr<const word> data); -// Interprets the given data as a single, data-only struct. Only primitive fields (booleans, -// numbers, and enums) will be readable; all pointers will be null. This is useful if you want -// to use Cap'n Proto as a language/platform-neutral way to pack some bits. -// -// The input is a word array rather than a byte array to enforce alignment. If you have a byte -// array which you know is word-aligned (or if your platform supports unaligned reads and you don't -// mind the performance penalty), then you can use `reinterpret_cast` to convert a byte array into -// a word array: -// -// kj::arrayPtr(reinterpret_cast<const word*>(bytes.begin()), -// reinterpret_cast<const word*>(bytes.end())) - -template <typename BuilderType> -typename kj::ArrayPtr<const word> writeDataStruct(BuilderType builder); -// Given a struct builder, get the underlying data section as a word array, suitable for passing -// to `readDataStruct()`. -// -// Note that you may call `.toBytes()` on the returned value to convert to `ArrayPtr<const byte>`. - -template <typename Type> -static typename Type::Reader defaultValue(); -// Get a default instance of the given struct or list type. -// -// TODO(cleanup): Find a better home for this function? - -// ======================================================================================= - -class SegmentArrayMessageReader: public MessageReader { - // A simple MessageReader that reads from an array of word arrays representing all segments. - // In particular you can read directly from the output of MessageBuilder::getSegmentsForOutput() - // (although it would probably make more sense to call builder.getRoot().asReader() in that case). - -public: - SegmentArrayMessageReader(kj::ArrayPtr<const kj::ArrayPtr<const word>> segments, - ReaderOptions options = ReaderOptions()); - // Creates a message pointing at the given segment array, without taking ownership of the - // segments. All arrays passed in must remain valid until the MessageReader is destroyed. - - KJ_DISALLOW_COPY(SegmentArrayMessageReader); - ~SegmentArrayMessageReader() noexcept(false); - - virtual kj::ArrayPtr<const word> getSegment(uint id) override; - -private: - kj::ArrayPtr<const kj::ArrayPtr<const word>> segments; -}; - -enum class AllocationStrategy: uint8_t { - FIXED_SIZE, - // The builder will prefer to allocate the same amount of space for each segment with no - // heuristic growth. It will still allocate larger segments when the preferred size is too small - // for some single object. This mode is generally not recommended, but can be particularly useful - // for testing in order to force a message to allocate a predictable number of segments. Note - // that you can force every single object in the message to be located in a separate segment by - // using this mode with firstSegmentWords = 0. - - GROW_HEURISTICALLY - // The builder will heuristically decide how much space to allocate for each segment. Each - // allocated segment will be progressively larger than the previous segments on the assumption - // that message sizes are exponentially distributed. The total number of segments that will be - // allocated for a message of size n is O(log n). -}; - -constexpr uint SUGGESTED_FIRST_SEGMENT_WORDS = 1024; -constexpr AllocationStrategy SUGGESTED_ALLOCATION_STRATEGY = AllocationStrategy::GROW_HEURISTICALLY; - -class MallocMessageBuilder: public MessageBuilder { - // A simple MessageBuilder that uses malloc() (actually, calloc()) to allocate segments. This - // implementation should be reasonable for any case that doesn't require writing the message to - // a specific location in memory. - -public: - explicit MallocMessageBuilder(uint firstSegmentWords = SUGGESTED_FIRST_SEGMENT_WORDS, - AllocationStrategy allocationStrategy = SUGGESTED_ALLOCATION_STRATEGY); - // Creates a BuilderContext which allocates at least the given number of words for the first - // segment, and then uses the given strategy to decide how much to allocate for subsequent - // segments. When choosing a value for firstSegmentWords, consider that: - // 1) Reading and writing messages gets slower when multiple segments are involved, so it's good - // if most messages fit in a single segment. - // 2) Unused bytes will not be written to the wire, so generally it is not a big deal to allocate - // more space than you need. It only becomes problematic if you are allocating many messages - // in parallel and thus use lots of memory, or if you allocate so much extra space that just - // zeroing it out becomes a bottleneck. - // The defaults have been chosen to be reasonable for most people, so don't change them unless you - // have reason to believe you need to. - - explicit MallocMessageBuilder(kj::ArrayPtr<word> firstSegment, - AllocationStrategy allocationStrategy = SUGGESTED_ALLOCATION_STRATEGY); - // This version always returns the given array for the first segment, and then proceeds with the - // allocation strategy. This is useful for optimization when building lots of small messages in - // a tight loop: you can reuse the space for the first segment. - // - // firstSegment MUST be zero-initialized. MallocMessageBuilder's destructor will write new zeros - // over any space that was used so that it can be reused. - - KJ_DISALLOW_COPY(MallocMessageBuilder); - virtual ~MallocMessageBuilder() noexcept(false); - - virtual kj::ArrayPtr<word> allocateSegment(uint minimumSize) override; - -private: - uint nextSize; - AllocationStrategy allocationStrategy; - - bool ownFirstSegment; - bool returnedFirstSegment; - - void* firstSegment; - - struct MoreSegments; - kj::Maybe<kj::Own<MoreSegments>> moreSegments; -}; - -class FlatMessageBuilder: public MessageBuilder { - // THIS IS NOT THE CLASS YOU'RE LOOKING FOR. - // - // If you want to write a message into already-existing scratch space, use `MallocMessageBuilder` - // and pass the scratch space to its constructor. It will then only fall back to malloc() if - // the scratch space is not large enough. - // - // Do NOT use this class unless you really know what you're doing. This class is problematic - // because it requires advance knowledge of the size of your message, which is usually impossible - // to determine without actually building the message. The class was created primarily to - // implement `copyToUnchecked()`, which itself exists only to support other internal parts of - // the Cap'n Proto implementation. - -public: - explicit FlatMessageBuilder(kj::ArrayPtr<word> array); - KJ_DISALLOW_COPY(FlatMessageBuilder); - virtual ~FlatMessageBuilder() noexcept(false); - - void requireFilled(); - // Throws an exception if the flat array is not exactly full. - - virtual kj::ArrayPtr<word> allocateSegment(uint minimumSize) override; - -private: - kj::ArrayPtr<word> array; - bool allocated; -}; - -// ======================================================================================= -// implementation details - -inline const ReaderOptions& MessageReader::getOptions() { - return options; -} - -template <typename RootType> -inline typename RootType::Reader MessageReader::getRoot() { - return getRootInternal().getAs<RootType>(); -} - -template <typename RootType> -inline typename RootType::Builder MessageBuilder::initRoot() { - return getRootInternal().initAs<RootType>(); -} - -template <typename Reader> -inline void MessageBuilder::setRoot(Reader&& value) { - getRootInternal().setAs<FromReader<Reader>>(value); -} - -template <typename RootType> -inline typename RootType::Builder MessageBuilder::getRoot() { - return getRootInternal().getAs<RootType>(); -} - -template <typename T> -void MessageBuilder::adoptRoot(Orphan<T>&& orphan) { - return getRootInternal().adopt(kj::mv(orphan)); -} - -template <typename RootType, typename SchemaType> -typename RootType::Reader MessageReader::getRoot(SchemaType schema) { - return getRootInternal().getAs<RootType>(schema); -} - -template <typename RootType, typename SchemaType> -typename RootType::Builder MessageBuilder::getRoot(SchemaType schema) { - return getRootInternal().getAs<RootType>(schema); -} - -template <typename RootType, typename SchemaType> -typename RootType::Builder MessageBuilder::initRoot(SchemaType schema) { - return getRootInternal().initAs<RootType>(schema); -} - -template <typename RootType> -typename RootType::Reader readMessageUnchecked(const word* data) { - return AnyPointer::Reader(_::PointerReader::getRootUnchecked(data)).getAs<RootType>(); -} - -template <typename Reader> -void copyToUnchecked(Reader&& reader, kj::ArrayPtr<word> uncheckedBuffer) { - FlatMessageBuilder builder(uncheckedBuffer); - builder.setRoot(kj::fwd<Reader>(reader)); - builder.requireFilled(); -} - -template <typename RootType> -typename RootType::Reader readDataStruct(kj::ArrayPtr<const word> data) { - return typename RootType::Reader(_::StructReader(data)); -} - -template <typename BuilderType> -typename kj::ArrayPtr<const word> writeDataStruct(BuilderType builder) { - auto bytes = _::PointerHelpers<FromBuilder<BuilderType>>::getInternalBuilder(kj::mv(builder)) - .getDataSectionAsBlob(); - return kj::arrayPtr(reinterpret_cast<word*>(bytes.begin()), - reinterpret_cast<word*>(bytes.end())); -} - -template <typename Type> -static typename Type::Reader defaultValue() { - return typename Type::Reader(_::StructReader()); -} - -template <typename T> -kj::Array<word> canonicalize(T&& reader) { - return _::PointerHelpers<FromReader<T>>::getInternalReader(reader).canonicalize(); -} - -} // namespace capnp - -#endif // CAPNP_MESSAGE_H_ +// Copyright (c) 2013-2016 Sandstorm Development Group, Inc. and contributors +// Licensed under the MIT License: +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include <kj/common.h> +#include <kj/memory.h> +#include <kj/mutex.h> +#include <kj/debug.h> +#include "common.h" +#include "layout.h" +#include "any.h" + +#ifndef CAPNP_MESSAGE_H_ +#define CAPNP_MESSAGE_H_ + +#if defined(__GNUC__) && !defined(CAPNP_HEADER_WARNINGS) +#pragma GCC system_header +#endif + +namespace capnp { + +namespace _ { // private + class ReaderArena; + class BuilderArena; +} + +class StructSchema; +class Orphanage; +template <typename T> +class Orphan; + +// ======================================================================================= + +struct ReaderOptions { + // Options controlling how data is read. + + uint64_t traversalLimitInWords = 8 * 1024 * 1024; + // Limits how many total words of data are allowed to be traversed. Traversal is counted when + // a new struct or list builder is obtained, e.g. from a get() accessor. This means that calling + // the getter for the same sub-struct multiple times will cause it to be double-counted. Once + // the traversal limit is reached, an error will be reported. + // + // This limit exists for security reasons. It is possible for an attacker to construct a message + // in which multiple pointers point at the same location. This is technically invalid, but hard + // to detect. Using such a message, an attacker could cause a message which is small on the wire + // to appear much larger when actually traversed, possibly exhausting server resources leading to + // denial-of-service. + // + // It makes sense to set a traversal limit that is much larger than the underlying message. + // Together with sensible coding practices (e.g. trying to avoid calling sub-object getters + // multiple times, which is expensive anyway), this should provide adequate protection without + // inconvenience. + // + // The default limit is 64 MiB. This may or may not be a sensible number for any given use case, + // but probably at least prevents easy exploitation while also avoiding causing problems in most + // typical cases. + + int nestingLimit = 64; + // Limits how deeply-nested a message structure can be, e.g. structs containing other structs or + // lists of structs. + // + // Like the traversal limit, this limit exists for security reasons. Since it is common to use + // recursive code to traverse recursive data structures, an attacker could easily cause a stack + // overflow by sending a very-deeply-nested (or even cyclic) message, without the message even + // being very large. The default limit of 64 is probably low enough to prevent any chance of + // stack overflow, yet high enough that it is never a problem in practice. +}; + +class MessageReader { + // Abstract interface for an object used to read a Cap'n Proto message. Subclasses of + // MessageReader are responsible for reading the raw, flat message content. Callers should + // usually call `messageReader.getRoot<MyStructType>()` to get a `MyStructType::Reader` + // representing the root of the message, then use that to traverse the message content. + // + // Some common subclasses of `MessageReader` include `SegmentArrayMessageReader`, whose + // constructor accepts pointers to the raw data, and `StreamFdMessageReader` (from + // `serialize.h`), which reads the message from a file descriptor. One might implement other + // subclasses to handle things like reading from shared memory segments, mmap()ed files, etc. + +public: + MessageReader(ReaderOptions options); + // It is suggested that subclasses take ReaderOptions as a constructor parameter, but give it a + // default value of "ReaderOptions()". The base class constructor doesn't have a default value + // in order to remind subclasses that they really need to give the user a way to provide this. + + virtual ~MessageReader() noexcept(false); + + virtual kj::ArrayPtr<const word> getSegment(uint id) = 0; + // Gets the segment with the given ID, or returns null if no such segment exists. This method + // will be called at most once for each segment ID. + + inline const ReaderOptions& getOptions(); + // Get the options passed to the constructor. + + template <typename RootType> + typename RootType::Reader getRoot(); + // Get the root struct of the message, interpreting it as the given struct type. + + template <typename RootType, typename SchemaType> + typename RootType::Reader getRoot(SchemaType schema); + // Dynamically interpret the root struct of the message using the given schema (a StructSchema). + // RootType in this case must be DynamicStruct, and you must #include <capnp/dynamic.h> to + // use this. + + bool isCanonical(); + // Returns whether the message encoded in the reader is in canonical form. + +private: + ReaderOptions options; + + // Space in which we can construct a ReaderArena. We don't use ReaderArena directly here + // because we don't want clients to have to #include arena.h, which itself includes a bunch of + // big STL headers. We don't use a pointer to a ReaderArena because that would require an + // extra malloc on every message which could be expensive when processing small messages. + void* arenaSpace[15 + sizeof(kj::MutexGuarded<void*>) / sizeof(void*)]; + bool allocatedArena; + + _::ReaderArena* arena() { return reinterpret_cast<_::ReaderArena*>(arenaSpace); } + AnyPointer::Reader getRootInternal(); +}; + +class MessageBuilder { + // Abstract interface for an object used to allocate and build a message. Subclasses of + // MessageBuilder are responsible for allocating the space in which the message will be written. + // The most common subclass is `MallocMessageBuilder`, but other subclasses may be used to do + // tricky things like allocate messages in shared memory or mmap()ed files. + // + // Creating a new message ususually means allocating a new MessageBuilder (ideally on the stack) + // and then calling `messageBuilder.initRoot<MyStructType>()` to get a `MyStructType::Builder`. + // That, in turn, can be used to fill in the message content. When done, you can call + // `messageBuilder.getSegmentsForOutput()` to get a list of flat data arrays containing the + // message. + +public: + MessageBuilder(); + virtual ~MessageBuilder() noexcept(false); + KJ_DISALLOW_COPY(MessageBuilder); + + struct SegmentInit { + kj::ArrayPtr<word> space; + + size_t wordsUsed; + // Number of words in `space` which are used; the rest are free space in which additional + // objects may be allocated. + }; + + explicit MessageBuilder(kj::ArrayPtr<SegmentInit> segments); + // Create a MessageBuilder backed by existing memory. This is an advanced interface that most + // people should not use. THIS METHOD IS INSECURE; see below. + // + // This allows a MessageBuilder to be constructed to modify an in-memory message without first + // making a copy of the content. This is especially useful in conjunction with mmap(). + // + // The contents of each segment must outlive the MessageBuilder, but the SegmentInit array itself + // only need outlive the constructor. + // + // SECURITY: Do not use this in conjunction with untrusted data. This constructor assumes that + // the input message is valid. This constructor is designed to be used with data you control, + // e.g. an mmap'd file which is owned and accessed by only one program. When reading data you + // do not trust, you *must* load it into a Reader and then copy into a Builder as a means of + // validating the content. + // + // WARNING: It is NOT safe to initialize a MessageBuilder in this way from memory that is + // currently in use by another MessageBuilder or MessageReader. Other readers/builders will + // not observe changes to the segment sizes nor newly-allocated segments caused by allocating + // new objects in this message. + + virtual kj::ArrayPtr<word> allocateSegment(uint minimumSize) = 0; + // Allocates an array of at least the given number of words, throwing an exception or crashing if + // this is not possible. It is expected that this method will usually return more space than + // requested, and the caller should use that extra space as much as possible before allocating + // more. The returned space remains valid at least until the MessageBuilder is destroyed. + // + // Cap'n Proto will only call this once at a time, so the subclass need not worry about + // thread-safety. + + template <typename RootType> + typename RootType::Builder initRoot(); + // Initialize the root struct of the message as the given struct type. + + template <typename Reader> + void setRoot(Reader&& value); + // Set the root struct to a deep copy of the given struct. + + template <typename RootType> + typename RootType::Builder getRoot(); + // Get the root struct of the message, interpreting it as the given struct type. + + template <typename RootType, typename SchemaType> + typename RootType::Builder getRoot(SchemaType schema); + // Dynamically interpret the root struct of the message using the given schema (a StructSchema). + // RootType in this case must be DynamicStruct, and you must #include <capnp/dynamic.h> to + // use this. + + template <typename RootType, typename SchemaType> + typename RootType::Builder initRoot(SchemaType schema); + // Dynamically init the root struct of the message using the given schema (a StructSchema). + // RootType in this case must be DynamicStruct, and you must #include <capnp/dynamic.h> to + // use this. + + template <typename T> + void adoptRoot(Orphan<T>&& orphan); + // Like setRoot() but adopts the orphan without copying. + + kj::ArrayPtr<const kj::ArrayPtr<const word>> getSegmentsForOutput(); + // Get the raw data that makes up the message. + + Orphanage getOrphanage(); + + bool isCanonical(); + // Check whether the message builder is in canonical form + +private: + void* arenaSpace[22]; + // Space in which we can construct a BuilderArena. We don't use BuilderArena directly here + // because we don't want clients to have to #include arena.h, which itself includes a bunch of + // big STL headers. We don't use a pointer to a BuilderArena because that would require an + // extra malloc on every message which could be expensive when processing small messages. + + bool allocatedArena = false; + // We have to initialize the arena lazily because when we do so we want to allocate the root + // pointer immediately, and this will allocate a segment, which requires a virtual function + // call on the MessageBuilder. We can't do such a call in the constructor since the subclass + // isn't constructed yet. This is kind of annoying because it means that getOrphanage() is + // not thread-safe, but that shouldn't be a huge deal... + + _::BuilderArena* arena() { return reinterpret_cast<_::BuilderArena*>(arenaSpace); } + _::SegmentBuilder* getRootSegment(); + AnyPointer::Builder getRootInternal(); +}; + +template <typename RootType> +typename RootType::Reader readMessageUnchecked(const word* data); +// IF THE INPUT IS INVALID, THIS MAY CRASH, CORRUPT MEMORY, CREATE A SECURITY HOLE IN YOUR APP, +// MURDER YOUR FIRST-BORN CHILD, AND/OR BRING ABOUT ETERNAL DAMNATION ON ALL OF HUMANITY. DO NOT +// USE UNLESS YOU UNDERSTAND THE CONSEQUENCES. +// +// Given a pointer to a known-valid message located in a single contiguous memory segment, +// returns a reader for that message. No bounds-checking will be done while traversing this +// message. Use this only if you have already verified that all pointers are valid and in-bounds, +// and there are no far pointers in the message. +// +// To create a message that can be passed to this function, build a message using a MallocAllocator +// whose preferred segment size is larger than the message size. This guarantees that the message +// will be allocated as a single segment, meaning getSegmentsForOutput() returns a single word +// array. That word array is your message; you may pass a pointer to its first word into +// readMessageUnchecked() to read the message. +// +// This can be particularly handy for embedding messages in generated code: you can +// embed the raw bytes (using AlignedData) then make a Reader for it using this. This is the way +// default values are embedded in code generated by the Cap'n Proto compiler. E.g., if you have +// a message MyMessage, you can read its default value like so: +// MyMessage::Reader reader = Message<MyMessage>::readMessageUnchecked(MyMessage::DEFAULT.words); +// +// To sanitize a message from an untrusted source such that it can be safely passed to +// readMessageUnchecked(), use copyToUnchecked(). + +template <typename Reader> +void copyToUnchecked(Reader&& reader, kj::ArrayPtr<word> uncheckedBuffer); +// Copy the content of the given reader into the given buffer, such that it can safely be passed to +// readMessageUnchecked(). The buffer's size must be exactly reader.totalSizeInWords() + 1, +// otherwise an exception will be thrown. The buffer must be zero'd before calling. + +template <typename RootType> +typename RootType::Reader readDataStruct(kj::ArrayPtr<const word> data); +// Interprets the given data as a single, data-only struct. Only primitive fields (booleans, +// numbers, and enums) will be readable; all pointers will be null. This is useful if you want +// to use Cap'n Proto as a language/platform-neutral way to pack some bits. +// +// The input is a word array rather than a byte array to enforce alignment. If you have a byte +// array which you know is word-aligned (or if your platform supports unaligned reads and you don't +// mind the performance penalty), then you can use `reinterpret_cast` to convert a byte array into +// a word array: +// +// kj::arrayPtr(reinterpret_cast<const word*>(bytes.begin()), +// reinterpret_cast<const word*>(bytes.end())) + +template <typename BuilderType> +typename kj::ArrayPtr<const word> writeDataStruct(BuilderType builder); +// Given a struct builder, get the underlying data section as a word array, suitable for passing +// to `readDataStruct()`. +// +// Note that you may call `.toBytes()` on the returned value to convert to `ArrayPtr<const byte>`. + +template <typename Type> +static typename Type::Reader defaultValue(); +// Get a default instance of the given struct or list type. +// +// TODO(cleanup): Find a better home for this function? + +// ======================================================================================= + +class SegmentArrayMessageReader: public MessageReader { + // A simple MessageReader that reads from an array of word arrays representing all segments. + // In particular you can read directly from the output of MessageBuilder::getSegmentsForOutput() + // (although it would probably make more sense to call builder.getRoot().asReader() in that case). + +public: + SegmentArrayMessageReader(kj::ArrayPtr<const kj::ArrayPtr<const word>> segments, + ReaderOptions options = ReaderOptions()); + // Creates a message pointing at the given segment array, without taking ownership of the + // segments. All arrays passed in must remain valid until the MessageReader is destroyed. + + KJ_DISALLOW_COPY(SegmentArrayMessageReader); + ~SegmentArrayMessageReader() noexcept(false); + + virtual kj::ArrayPtr<const word> getSegment(uint id) override; + +private: + kj::ArrayPtr<const kj::ArrayPtr<const word>> segments; +}; + +enum class AllocationStrategy: uint8_t { + FIXED_SIZE, + // The builder will prefer to allocate the same amount of space for each segment with no + // heuristic growth. It will still allocate larger segments when the preferred size is too small + // for some single object. This mode is generally not recommended, but can be particularly useful + // for testing in order to force a message to allocate a predictable number of segments. Note + // that you can force every single object in the message to be located in a separate segment by + // using this mode with firstSegmentWords = 0. + + GROW_HEURISTICALLY + // The builder will heuristically decide how much space to allocate for each segment. Each + // allocated segment will be progressively larger than the previous segments on the assumption + // that message sizes are exponentially distributed. The total number of segments that will be + // allocated for a message of size n is O(log n). +}; + +constexpr uint SUGGESTED_FIRST_SEGMENT_WORDS = 1024; +constexpr AllocationStrategy SUGGESTED_ALLOCATION_STRATEGY = AllocationStrategy::GROW_HEURISTICALLY; + +class MallocMessageBuilder: public MessageBuilder { + // A simple MessageBuilder that uses malloc() (actually, calloc()) to allocate segments. This + // implementation should be reasonable for any case that doesn't require writing the message to + // a specific location in memory. + +public: + explicit MallocMessageBuilder(uint firstSegmentWords = SUGGESTED_FIRST_SEGMENT_WORDS, + AllocationStrategy allocationStrategy = SUGGESTED_ALLOCATION_STRATEGY); + // Creates a BuilderContext which allocates at least the given number of words for the first + // segment, and then uses the given strategy to decide how much to allocate for subsequent + // segments. When choosing a value for firstSegmentWords, consider that: + // 1) Reading and writing messages gets slower when multiple segments are involved, so it's good + // if most messages fit in a single segment. + // 2) Unused bytes will not be written to the wire, so generally it is not a big deal to allocate + // more space than you need. It only becomes problematic if you are allocating many messages + // in parallel and thus use lots of memory, or if you allocate so much extra space that just + // zeroing it out becomes a bottleneck. + // The defaults have been chosen to be reasonable for most people, so don't change them unless you + // have reason to believe you need to. + + explicit MallocMessageBuilder(kj::ArrayPtr<word> firstSegment, + AllocationStrategy allocationStrategy = SUGGESTED_ALLOCATION_STRATEGY); + // This version always returns the given array for the first segment, and then proceeds with the + // allocation strategy. This is useful for optimization when building lots of small messages in + // a tight loop: you can reuse the space for the first segment. + // + // firstSegment MUST be zero-initialized. MallocMessageBuilder's destructor will write new zeros + // over any space that was used so that it can be reused. + + KJ_DISALLOW_COPY(MallocMessageBuilder); + virtual ~MallocMessageBuilder() noexcept(false); + + virtual kj::ArrayPtr<word> allocateSegment(uint minimumSize) override; + +private: + uint nextSize; + AllocationStrategy allocationStrategy; + + bool ownFirstSegment; + bool returnedFirstSegment; + + void* firstSegment; + + struct MoreSegments; + kj::Maybe<kj::Own<MoreSegments>> moreSegments; +}; + +class FlatMessageBuilder: public MessageBuilder { + // THIS IS NOT THE CLASS YOU'RE LOOKING FOR. + // + // If you want to write a message into already-existing scratch space, use `MallocMessageBuilder` + // and pass the scratch space to its constructor. It will then only fall back to malloc() if + // the scratch space is not large enough. + // + // Do NOT use this class unless you really know what you're doing. This class is problematic + // because it requires advance knowledge of the size of your message, which is usually impossible + // to determine without actually building the message. The class was created primarily to + // implement `copyToUnchecked()`, which itself exists only to support other internal parts of + // the Cap'n Proto implementation. + +public: + explicit FlatMessageBuilder(kj::ArrayPtr<word> array); + KJ_DISALLOW_COPY(FlatMessageBuilder); + virtual ~FlatMessageBuilder() noexcept(false); + + void requireFilled(); + // Throws an exception if the flat array is not exactly full. + + virtual kj::ArrayPtr<word> allocateSegment(uint minimumSize) override; + +private: + kj::ArrayPtr<word> array; + bool allocated; +}; + +// ======================================================================================= +// implementation details + +inline const ReaderOptions& MessageReader::getOptions() { + return options; +} + +template <typename RootType> +inline typename RootType::Reader MessageReader::getRoot() { + return getRootInternal().getAs<RootType>(); +} + +template <typename RootType> +inline typename RootType::Builder MessageBuilder::initRoot() { + return getRootInternal().initAs<RootType>(); +} + +template <typename Reader> +inline void MessageBuilder::setRoot(Reader&& value) { + getRootInternal().setAs<FromReader<Reader>>(value); +} + +template <typename RootType> +inline typename RootType::Builder MessageBuilder::getRoot() { + return getRootInternal().getAs<RootType>(); +} + +template <typename T> +void MessageBuilder::adoptRoot(Orphan<T>&& orphan) { + return getRootInternal().adopt(kj::mv(orphan)); +} + +template <typename RootType, typename SchemaType> +typename RootType::Reader MessageReader::getRoot(SchemaType schema) { + return getRootInternal().getAs<RootType>(schema); +} + +template <typename RootType, typename SchemaType> +typename RootType::Builder MessageBuilder::getRoot(SchemaType schema) { + return getRootInternal().getAs<RootType>(schema); +} + +template <typename RootType, typename SchemaType> +typename RootType::Builder MessageBuilder::initRoot(SchemaType schema) { + return getRootInternal().initAs<RootType>(schema); +} + +template <typename RootType> +typename RootType::Reader readMessageUnchecked(const word* data) { + return AnyPointer::Reader(_::PointerReader::getRootUnchecked(data)).getAs<RootType>(); +} + +template <typename Reader> +void copyToUnchecked(Reader&& reader, kj::ArrayPtr<word> uncheckedBuffer) { + FlatMessageBuilder builder(uncheckedBuffer); + builder.setRoot(kj::fwd<Reader>(reader)); + builder.requireFilled(); +} + +template <typename RootType> +typename RootType::Reader readDataStruct(kj::ArrayPtr<const word> data) { + return typename RootType::Reader(_::StructReader(data)); +} + +template <typename BuilderType> +typename kj::ArrayPtr<const word> writeDataStruct(BuilderType builder) { + auto bytes = _::PointerHelpers<FromBuilder<BuilderType>>::getInternalBuilder(kj::mv(builder)) + .getDataSectionAsBlob(); + return kj::arrayPtr(reinterpret_cast<word*>(bytes.begin()), + reinterpret_cast<word*>(bytes.end())); +} + +template <typename Type> +static typename Type::Reader defaultValue() { + return typename Type::Reader(_::StructReader()); +} + +template <typename T> +kj::Array<word> canonicalize(T&& reader) { + return _::PointerHelpers<FromReader<T>>::getInternalReader(reader).canonicalize(); +} + +} // namespace capnp + +#endif // CAPNP_MESSAGE_H_