cannam@134: // Copyright (c) 2013-2014 Sandstorm Development Group, Inc. and contributors cannam@134: // Licensed under the MIT License: cannam@134: // cannam@134: // Permission is hereby granted, free of charge, to any person obtaining a copy cannam@134: // of this software and associated documentation files (the "Software"), to deal cannam@134: // in the Software without restriction, including without limitation the rights cannam@134: // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cannam@134: // copies of the Software, and to permit persons to whom the Software is cannam@134: // furnished to do so, subject to the following conditions: cannam@134: // cannam@134: // The above copyright notice and this permission notice shall be included in cannam@134: // all copies or substantial portions of the Software. cannam@134: // cannam@134: // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR cannam@134: // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, cannam@134: // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE cannam@134: // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER cannam@134: // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, cannam@134: // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN cannam@134: // THE SOFTWARE. cannam@134: cannam@134: // This file implements a simple serialization format for Cap'n Proto messages. The format cannam@134: // is as follows: cannam@134: // cannam@134: // * 32-bit little-endian segment count (4 bytes). cannam@134: // * 32-bit little-endian size of each segment (4*(segment count) bytes). cannam@134: // * Padding so that subsequent data is 64-bit-aligned (0 or 4 bytes). (I.e., if there are an even cannam@134: // number of segments, there are 4 bytes of zeros here, otherwise there is no padding.) cannam@134: // * Data from each segment, in order (8*sum(segment sizes) bytes) cannam@134: // cannam@134: // This format has some important properties: cannam@134: // - It is self-delimiting, so multiple messages may be written to a stream without any external cannam@134: // delimiter. cannam@134: // - The total size and position of each segment can be determined by reading only the first part cannam@134: // of the message, allowing lazy and random-access reading of the segment data. cannam@134: // - A message is always at least 8 bytes. cannam@134: // - A single-segment message can be read entirely in two system calls with no buffering. cannam@134: // - A multi-segment message can be read entirely in three system calls with no buffering. cannam@134: // - The format is appropriate for mmap()ing since all data is aligned. cannam@134: cannam@134: #ifndef CAPNP_SERIALIZE_H_ cannam@134: #define CAPNP_SERIALIZE_H_ cannam@134: cannam@134: #if defined(__GNUC__) && !defined(CAPNP_HEADER_WARNINGS) cannam@134: #pragma GCC system_header cannam@134: #endif cannam@134: cannam@134: #include "message.h" cannam@134: #include cannam@134: cannam@134: namespace capnp { cannam@134: cannam@134: class FlatArrayMessageReader: public MessageReader { cannam@134: // Parses a message from a flat array. Note that it makes sense to use this together with mmap() cannam@134: // for extremely fast parsing. cannam@134: cannam@134: public: cannam@134: FlatArrayMessageReader(kj::ArrayPtr array, ReaderOptions options = ReaderOptions()); cannam@134: // The array must remain valid until the MessageReader is destroyed. cannam@134: cannam@134: kj::ArrayPtr getSegment(uint id) override; cannam@134: cannam@134: const word* getEnd() const { return end; } cannam@134: // Get a pointer just past the end of the message as determined by reading the message header. cannam@134: // This could actually be before the end of the input array. This pointer is useful e.g. if cannam@134: // you know that the input array has extra stuff appended after the message and you want to cannam@134: // get at it. cannam@134: cannam@134: private: cannam@134: // Optimize for single-segment case. cannam@134: kj::ArrayPtr segment0; cannam@134: kj::Array> moreSegments; cannam@134: const word* end; cannam@134: }; cannam@134: cannam@134: kj::ArrayPtr initMessageBuilderFromFlatArrayCopy( cannam@134: kj::ArrayPtr array, MessageBuilder& target, cannam@134: ReaderOptions options = ReaderOptions()); cannam@134: // Convenience function which reads a message using `FlatArrayMessageReader` then copies the cannam@134: // content into the target `MessageBuilder`, verifying that the message structure is valid cannam@134: // (although not necessarily that it matches the desired schema). cannam@134: // cannam@134: // Returns an ArrayPtr containing any words left over in the array after consuming the whole cannam@134: // message. This is useful when reading multiple messages that have been concatenated. See also cannam@134: // FlatArrayMessageReader::getEnd(). cannam@134: // cannam@134: // (Note that it's also possible to initialize a `MessageBuilder` directly without a copy using one cannam@134: // of `MessageBuilder`'s constructors. However, this approach skips the validation step and is not cannam@134: // safe to use on untrusted input. Therefore, we do not provide a convenience method for it.) cannam@134: cannam@134: kj::Array messageToFlatArray(MessageBuilder& builder); cannam@134: // Constructs a flat array containing the entire content of the given message. cannam@134: // cannam@134: // To output the message as bytes, use `.asBytes()` on the returned word array. Keep in mind that cannam@134: // `asBytes()` returns an ArrayPtr, so you have to save the Array as well to prevent it from being cannam@134: // deleted. For example: cannam@134: // cannam@134: // kj::Array words = messageToFlatArray(myMessage); cannam@134: // kj::ArrayPtr bytes = words.asBytes(); cannam@134: // write(fd, bytes.begin(), bytes.size()); cannam@134: cannam@134: kj::Array messageToFlatArray(kj::ArrayPtr> segments); cannam@134: // Version of messageToFlatArray that takes a raw segment array. cannam@134: cannam@134: size_t computeSerializedSizeInWords(MessageBuilder& builder); cannam@134: // Returns the size, in words, that will be needed to serialize the message, including the header. cannam@134: cannam@134: size_t computeSerializedSizeInWords(kj::ArrayPtr> segments); cannam@134: // Version of computeSerializedSizeInWords that takes a raw segment array. cannam@134: cannam@134: size_t expectedSizeInWordsFromPrefix(kj::ArrayPtr messagePrefix); cannam@134: // Given a prefix of a serialized message, try to determine the expected total size of the message, cannam@134: // in words. The returned size is based on the information known so far; it may be an underestimate cannam@134: // if the prefix doesn't contain the full segment table. cannam@134: // cannam@134: // If the returned value is greater than `messagePrefix.size()`, then the message is not yet cannam@134: // complete and the app cannot parse it yet. If the returned value is less than or equal to cannam@134: // `messagePrefix.size()`, then the returned value is the exact total size of the message; any cannam@134: // remaining bytes are part of the next message. cannam@134: // cannam@134: // This function is useful when reading messages from a stream in an asynchronous way, but when cannam@134: // using the full KJ async infrastructure would be too difficult. Each time bytes are received, cannam@134: // use this function to determine if an entire message is ready to be parsed. cannam@134: cannam@134: // ======================================================================================= cannam@134: cannam@134: class InputStreamMessageReader: public MessageReader { cannam@134: // A MessageReader that reads from an abstract kj::InputStream. See also StreamFdMessageReader cannam@134: // for a subclass specific to file descriptors. cannam@134: cannam@134: public: cannam@134: InputStreamMessageReader(kj::InputStream& inputStream, cannam@134: ReaderOptions options = ReaderOptions(), cannam@134: kj::ArrayPtr scratchSpace = nullptr); cannam@134: ~InputStreamMessageReader() noexcept(false); cannam@134: cannam@134: // implements MessageReader ---------------------------------------- cannam@134: kj::ArrayPtr getSegment(uint id) override; cannam@134: cannam@134: private: cannam@134: kj::InputStream& inputStream; cannam@134: byte* readPos; cannam@134: cannam@134: // Optimize for single-segment case. cannam@134: kj::ArrayPtr segment0; cannam@134: kj::Array> moreSegments; cannam@134: cannam@134: kj::Array ownedSpace; cannam@134: // Only if scratchSpace wasn't big enough. cannam@134: cannam@134: kj::UnwindDetector unwindDetector; cannam@134: }; cannam@134: cannam@134: void readMessageCopy(kj::InputStream& input, MessageBuilder& target, cannam@134: ReaderOptions options = ReaderOptions(), cannam@134: kj::ArrayPtr scratchSpace = nullptr); cannam@134: // Convenience function which reads a message using `InputStreamMessageReader` then copies the cannam@134: // content into the target `MessageBuilder`, verifying that the message structure is valid cannam@134: // (although not necessarily that it matches the desired schema). cannam@134: // cannam@134: // (Note that it's also possible to initialize a `MessageBuilder` directly without a copy using one cannam@134: // of `MessageBuilder`'s constructors. However, this approach skips the validation step and is not cannam@134: // safe to use on untrusted input. Therefore, we do not provide a convenience method for it.) cannam@134: cannam@134: void writeMessage(kj::OutputStream& output, MessageBuilder& builder); cannam@134: // Write the message to the given output stream. cannam@134: cannam@134: void writeMessage(kj::OutputStream& output, kj::ArrayPtr> segments); cannam@134: // Write the segment array to the given output stream. cannam@134: cannam@134: // ======================================================================================= cannam@134: // Specializations for reading from / writing to file descriptors. cannam@134: cannam@134: class StreamFdMessageReader: private kj::FdInputStream, public InputStreamMessageReader { cannam@134: // A MessageReader that reads from a steam-based file descriptor. cannam@134: cannam@134: public: cannam@134: StreamFdMessageReader(int fd, ReaderOptions options = ReaderOptions(), cannam@134: kj::ArrayPtr scratchSpace = nullptr) cannam@134: : FdInputStream(fd), InputStreamMessageReader(*this, options, scratchSpace) {} cannam@134: // Read message from a file descriptor, without taking ownership of the descriptor. cannam@134: cannam@134: StreamFdMessageReader(kj::AutoCloseFd fd, ReaderOptions options = ReaderOptions(), cannam@134: kj::ArrayPtr scratchSpace = nullptr) cannam@134: : FdInputStream(kj::mv(fd)), InputStreamMessageReader(*this, options, scratchSpace) {} cannam@134: // Read a message from a file descriptor, taking ownership of the descriptor. cannam@134: cannam@134: ~StreamFdMessageReader() noexcept(false); cannam@134: }; cannam@134: cannam@134: void readMessageCopyFromFd(int fd, MessageBuilder& target, cannam@134: ReaderOptions options = ReaderOptions(), cannam@134: kj::ArrayPtr scratchSpace = nullptr); cannam@134: // Convenience function which reads a message using `StreamFdMessageReader` then copies the cannam@134: // content into the target `MessageBuilder`, verifying that the message structure is valid cannam@134: // (although not necessarily that it matches the desired schema). cannam@134: // cannam@134: // (Note that it's also possible to initialize a `MessageBuilder` directly without a copy using one cannam@134: // of `MessageBuilder`'s constructors. However, this approach skips the validation step and is not cannam@134: // safe to use on untrusted input. Therefore, we do not provide a convenience method for it.) cannam@134: cannam@134: void writeMessageToFd(int fd, MessageBuilder& builder); cannam@134: // Write the message to the given file descriptor. cannam@134: // cannam@134: // This function throws an exception on any I/O error. If your code is not exception-safe, be sure cannam@134: // you catch this exception at the call site. If throwing an exception is not acceptable, you cannam@134: // can implement your own OutputStream with arbitrary error handling and then use writeMessage(). cannam@134: cannam@134: void writeMessageToFd(int fd, kj::ArrayPtr> segments); cannam@134: // Write the segment array to the given file descriptor. cannam@134: // cannam@134: // This function throws an exception on any I/O error. If your code is not exception-safe, be sure cannam@134: // you catch this exception at the call site. If throwing an exception is not acceptable, you cannam@134: // can implement your own OutputStream with arbitrary error handling and then use writeMessage(). cannam@134: cannam@134: // ======================================================================================= cannam@134: // inline stuff cannam@134: cannam@134: inline kj::Array messageToFlatArray(MessageBuilder& builder) { cannam@134: return messageToFlatArray(builder.getSegmentsForOutput()); cannam@134: } cannam@134: cannam@134: inline size_t computeSerializedSizeInWords(MessageBuilder& builder) { cannam@134: return computeSerializedSizeInWords(builder.getSegmentsForOutput()); cannam@134: } cannam@134: cannam@134: inline void writeMessage(kj::OutputStream& output, MessageBuilder& builder) { cannam@134: writeMessage(output, builder.getSegmentsForOutput()); cannam@134: } cannam@134: cannam@134: inline void writeMessageToFd(int fd, MessageBuilder& builder) { cannam@134: writeMessageToFd(fd, builder.getSegmentsForOutput()); cannam@134: } cannam@134: cannam@134: } // namespace capnp cannam@134: cannam@134: #endif // SERIALIZE_H_