annotate osx/include/capnp/serialize.h @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 0994c39f1e94
children
rev   line source
cannam@62 1 // Copyright (c) 2013-2014 Sandstorm Development Group, Inc. and contributors
cannam@62 2 // Licensed under the MIT License:
cannam@62 3 //
cannam@62 4 // Permission is hereby granted, free of charge, to any person obtaining a copy
cannam@62 5 // of this software and associated documentation files (the "Software"), to deal
cannam@62 6 // in the Software without restriction, including without limitation the rights
cannam@62 7 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
cannam@62 8 // copies of the Software, and to permit persons to whom the Software is
cannam@62 9 // furnished to do so, subject to the following conditions:
cannam@62 10 //
cannam@62 11 // The above copyright notice and this permission notice shall be included in
cannam@62 12 // all copies or substantial portions of the Software.
cannam@62 13 //
cannam@62 14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
cannam@62 15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
cannam@62 16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
cannam@62 17 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
cannam@62 18 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
cannam@62 19 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
cannam@62 20 // THE SOFTWARE.
cannam@62 21
cannam@62 22 // This file implements a simple serialization format for Cap'n Proto messages. The format
cannam@62 23 // is as follows:
cannam@62 24 //
cannam@62 25 // * 32-bit little-endian segment count (4 bytes).
cannam@62 26 // * 32-bit little-endian size of each segment (4*(segment count) bytes).
cannam@62 27 // * Padding so that subsequent data is 64-bit-aligned (0 or 4 bytes). (I.e., if there are an even
cannam@62 28 // number of segments, there are 4 bytes of zeros here, otherwise there is no padding.)
cannam@62 29 // * Data from each segment, in order (8*sum(segment sizes) bytes)
cannam@62 30 //
cannam@62 31 // This format has some important properties:
cannam@62 32 // - It is self-delimiting, so multiple messages may be written to a stream without any external
cannam@62 33 // delimiter.
cannam@62 34 // - The total size and position of each segment can be determined by reading only the first part
cannam@62 35 // of the message, allowing lazy and random-access reading of the segment data.
cannam@62 36 // - A message is always at least 8 bytes.
cannam@62 37 // - A single-segment message can be read entirely in two system calls with no buffering.
cannam@62 38 // - A multi-segment message can be read entirely in three system calls with no buffering.
cannam@62 39 // - The format is appropriate for mmap()ing since all data is aligned.
cannam@62 40
cannam@62 41 #ifndef CAPNP_SERIALIZE_H_
cannam@62 42 #define CAPNP_SERIALIZE_H_
cannam@62 43
cannam@62 44 #if defined(__GNUC__) && !defined(CAPNP_HEADER_WARNINGS)
cannam@62 45 #pragma GCC system_header
cannam@62 46 #endif
cannam@62 47
cannam@62 48 #include "message.h"
cannam@62 49 #include <kj/io.h>
cannam@62 50
cannam@62 51 namespace capnp {
cannam@62 52
cannam@62 53 class FlatArrayMessageReader: public MessageReader {
cannam@62 54 // Parses a message from a flat array. Note that it makes sense to use this together with mmap()
cannam@62 55 // for extremely fast parsing.
cannam@62 56
cannam@62 57 public:
cannam@62 58 FlatArrayMessageReader(kj::ArrayPtr<const word> array, ReaderOptions options = ReaderOptions());
cannam@62 59 // The array must remain valid until the MessageReader is destroyed.
cannam@62 60
cannam@62 61 kj::ArrayPtr<const word> getSegment(uint id) override;
cannam@62 62
cannam@62 63 const word* getEnd() const { return end; }
cannam@62 64 // Get a pointer just past the end of the message as determined by reading the message header.
cannam@62 65 // This could actually be before the end of the input array. This pointer is useful e.g. if
cannam@62 66 // you know that the input array has extra stuff appended after the message and you want to
cannam@62 67 // get at it.
cannam@62 68
cannam@62 69 private:
cannam@62 70 // Optimize for single-segment case.
cannam@62 71 kj::ArrayPtr<const word> segment0;
cannam@62 72 kj::Array<kj::ArrayPtr<const word>> moreSegments;
cannam@62 73 const word* end;
cannam@62 74 };
cannam@62 75
cannam@62 76 kj::ArrayPtr<const word> initMessageBuilderFromFlatArrayCopy(
cannam@62 77 kj::ArrayPtr<const word> array, MessageBuilder& target,
cannam@62 78 ReaderOptions options = ReaderOptions());
cannam@62 79 // Convenience function which reads a message using `FlatArrayMessageReader` then copies the
cannam@62 80 // content into the target `MessageBuilder`, verifying that the message structure is valid
cannam@62 81 // (although not necessarily that it matches the desired schema).
cannam@62 82 //
cannam@62 83 // Returns an ArrayPtr containing any words left over in the array after consuming the whole
cannam@62 84 // message. This is useful when reading multiple messages that have been concatenated. See also
cannam@62 85 // FlatArrayMessageReader::getEnd().
cannam@62 86 //
cannam@62 87 // (Note that it's also possible to initialize a `MessageBuilder` directly without a copy using one
cannam@62 88 // of `MessageBuilder`'s constructors. However, this approach skips the validation step and is not
cannam@62 89 // safe to use on untrusted input. Therefore, we do not provide a convenience method for it.)
cannam@62 90
cannam@62 91 kj::Array<word> messageToFlatArray(MessageBuilder& builder);
cannam@62 92 // Constructs a flat array containing the entire content of the given message.
cannam@62 93 //
cannam@62 94 // To output the message as bytes, use `.asBytes()` on the returned word array. Keep in mind that
cannam@62 95 // `asBytes()` returns an ArrayPtr, so you have to save the Array as well to prevent it from being
cannam@62 96 // deleted. For example:
cannam@62 97 //
cannam@62 98 // kj::Array<capnp::word> words = messageToFlatArray(myMessage);
cannam@62 99 // kj::ArrayPtr<kj::byte> bytes = words.asBytes();
cannam@62 100 // write(fd, bytes.begin(), bytes.size());
cannam@62 101
cannam@62 102 kj::Array<word> messageToFlatArray(kj::ArrayPtr<const kj::ArrayPtr<const word>> segments);
cannam@62 103 // Version of messageToFlatArray that takes a raw segment array.
cannam@62 104
cannam@62 105 size_t computeSerializedSizeInWords(MessageBuilder& builder);
cannam@62 106 // Returns the size, in words, that will be needed to serialize the message, including the header.
cannam@62 107
cannam@62 108 size_t computeSerializedSizeInWords(kj::ArrayPtr<const kj::ArrayPtr<const word>> segments);
cannam@62 109 // Version of computeSerializedSizeInWords that takes a raw segment array.
cannam@62 110
cannam@62 111 size_t expectedSizeInWordsFromPrefix(kj::ArrayPtr<const word> messagePrefix);
cannam@62 112 // Given a prefix of a serialized message, try to determine the expected total size of the message,
cannam@62 113 // in words. The returned size is based on the information known so far; it may be an underestimate
cannam@62 114 // if the prefix doesn't contain the full segment table.
cannam@62 115 //
cannam@62 116 // If the returned value is greater than `messagePrefix.size()`, then the message is not yet
cannam@62 117 // complete and the app cannot parse it yet. If the returned value is less than or equal to
cannam@62 118 // `messagePrefix.size()`, then the returned value is the exact total size of the message; any
cannam@62 119 // remaining bytes are part of the next message.
cannam@62 120 //
cannam@62 121 // This function is useful when reading messages from a stream in an asynchronous way, but when
cannam@62 122 // using the full KJ async infrastructure would be too difficult. Each time bytes are received,
cannam@62 123 // use this function to determine if an entire message is ready to be parsed.
cannam@62 124
cannam@62 125 // =======================================================================================
cannam@62 126
cannam@62 127 class InputStreamMessageReader: public MessageReader {
cannam@62 128 // A MessageReader that reads from an abstract kj::InputStream. See also StreamFdMessageReader
cannam@62 129 // for a subclass specific to file descriptors.
cannam@62 130
cannam@62 131 public:
cannam@62 132 InputStreamMessageReader(kj::InputStream& inputStream,
cannam@62 133 ReaderOptions options = ReaderOptions(),
cannam@62 134 kj::ArrayPtr<word> scratchSpace = nullptr);
cannam@62 135 ~InputStreamMessageReader() noexcept(false);
cannam@62 136
cannam@62 137 // implements MessageReader ----------------------------------------
cannam@62 138 kj::ArrayPtr<const word> getSegment(uint id) override;
cannam@62 139
cannam@62 140 private:
cannam@62 141 kj::InputStream& inputStream;
cannam@62 142 byte* readPos;
cannam@62 143
cannam@62 144 // Optimize for single-segment case.
cannam@62 145 kj::ArrayPtr<const word> segment0;
cannam@62 146 kj::Array<kj::ArrayPtr<const word>> moreSegments;
cannam@62 147
cannam@62 148 kj::Array<word> ownedSpace;
cannam@62 149 // Only if scratchSpace wasn't big enough.
cannam@62 150
cannam@62 151 kj::UnwindDetector unwindDetector;
cannam@62 152 };
cannam@62 153
cannam@62 154 void readMessageCopy(kj::InputStream& input, MessageBuilder& target,
cannam@62 155 ReaderOptions options = ReaderOptions(),
cannam@62 156 kj::ArrayPtr<word> scratchSpace = nullptr);
cannam@62 157 // Convenience function which reads a message using `InputStreamMessageReader` then copies the
cannam@62 158 // content into the target `MessageBuilder`, verifying that the message structure is valid
cannam@62 159 // (although not necessarily that it matches the desired schema).
cannam@62 160 //
cannam@62 161 // (Note that it's also possible to initialize a `MessageBuilder` directly without a copy using one
cannam@62 162 // of `MessageBuilder`'s constructors. However, this approach skips the validation step and is not
cannam@62 163 // safe to use on untrusted input. Therefore, we do not provide a convenience method for it.)
cannam@62 164
cannam@62 165 void writeMessage(kj::OutputStream& output, MessageBuilder& builder);
cannam@62 166 // Write the message to the given output stream.
cannam@62 167
cannam@62 168 void writeMessage(kj::OutputStream& output, kj::ArrayPtr<const kj::ArrayPtr<const word>> segments);
cannam@62 169 // Write the segment array to the given output stream.
cannam@62 170
cannam@62 171 // =======================================================================================
cannam@62 172 // Specializations for reading from / writing to file descriptors.
cannam@62 173
cannam@62 174 class StreamFdMessageReader: private kj::FdInputStream, public InputStreamMessageReader {
cannam@62 175 // A MessageReader that reads from a steam-based file descriptor.
cannam@62 176
cannam@62 177 public:
cannam@62 178 StreamFdMessageReader(int fd, ReaderOptions options = ReaderOptions(),
cannam@62 179 kj::ArrayPtr<word> scratchSpace = nullptr)
cannam@62 180 : FdInputStream(fd), InputStreamMessageReader(*this, options, scratchSpace) {}
cannam@62 181 // Read message from a file descriptor, without taking ownership of the descriptor.
cannam@62 182
cannam@62 183 StreamFdMessageReader(kj::AutoCloseFd fd, ReaderOptions options = ReaderOptions(),
cannam@62 184 kj::ArrayPtr<word> scratchSpace = nullptr)
cannam@62 185 : FdInputStream(kj::mv(fd)), InputStreamMessageReader(*this, options, scratchSpace) {}
cannam@62 186 // Read a message from a file descriptor, taking ownership of the descriptor.
cannam@62 187
cannam@62 188 ~StreamFdMessageReader() noexcept(false);
cannam@62 189 };
cannam@62 190
cannam@62 191 void readMessageCopyFromFd(int fd, MessageBuilder& target,
cannam@62 192 ReaderOptions options = ReaderOptions(),
cannam@62 193 kj::ArrayPtr<word> scratchSpace = nullptr);
cannam@62 194 // Convenience function which reads a message using `StreamFdMessageReader` then copies the
cannam@62 195 // content into the target `MessageBuilder`, verifying that the message structure is valid
cannam@62 196 // (although not necessarily that it matches the desired schema).
cannam@62 197 //
cannam@62 198 // (Note that it's also possible to initialize a `MessageBuilder` directly without a copy using one
cannam@62 199 // of `MessageBuilder`'s constructors. However, this approach skips the validation step and is not
cannam@62 200 // safe to use on untrusted input. Therefore, we do not provide a convenience method for it.)
cannam@62 201
cannam@62 202 void writeMessageToFd(int fd, MessageBuilder& builder);
cannam@62 203 // Write the message to the given file descriptor.
cannam@62 204 //
cannam@62 205 // This function throws an exception on any I/O error. If your code is not exception-safe, be sure
cannam@62 206 // you catch this exception at the call site. If throwing an exception is not acceptable, you
cannam@62 207 // can implement your own OutputStream with arbitrary error handling and then use writeMessage().
cannam@62 208
cannam@62 209 void writeMessageToFd(int fd, kj::ArrayPtr<const kj::ArrayPtr<const word>> segments);
cannam@62 210 // Write the segment array to the given file descriptor.
cannam@62 211 //
cannam@62 212 // This function throws an exception on any I/O error. If your code is not exception-safe, be sure
cannam@62 213 // you catch this exception at the call site. If throwing an exception is not acceptable, you
cannam@62 214 // can implement your own OutputStream with arbitrary error handling and then use writeMessage().
cannam@62 215
cannam@62 216 // =======================================================================================
cannam@62 217 // inline stuff
cannam@62 218
cannam@62 219 inline kj::Array<word> messageToFlatArray(MessageBuilder& builder) {
cannam@62 220 return messageToFlatArray(builder.getSegmentsForOutput());
cannam@62 221 }
cannam@62 222
cannam@62 223 inline size_t computeSerializedSizeInWords(MessageBuilder& builder) {
cannam@62 224 return computeSerializedSizeInWords(builder.getSegmentsForOutput());
cannam@62 225 }
cannam@62 226
cannam@62 227 inline void writeMessage(kj::OutputStream& output, MessageBuilder& builder) {
cannam@62 228 writeMessage(output, builder.getSegmentsForOutput());
cannam@62 229 }
cannam@62 230
cannam@62 231 inline void writeMessageToFd(int fd, MessageBuilder& builder) {
cannam@62 232 writeMessageToFd(fd, builder.getSegmentsForOutput());
cannam@62 233 }
cannam@62 234
cannam@62 235 } // namespace capnp
cannam@62 236
cannam@62 237 #endif // SERIALIZE_H_