| Chris@63 | 1 // Copyright (c) 2013-2014 Sandstorm Development Group, Inc. and contributors | 
| Chris@63 | 2 // Licensed under the MIT License: | 
| Chris@63 | 3 // | 
| Chris@63 | 4 // Permission is hereby granted, free of charge, to any person obtaining a copy | 
| Chris@63 | 5 // of this software and associated documentation files (the "Software"), to deal | 
| Chris@63 | 6 // in the Software without restriction, including without limitation the rights | 
| Chris@63 | 7 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | 
| Chris@63 | 8 // copies of the Software, and to permit persons to whom the Software is | 
| Chris@63 | 9 // furnished to do so, subject to the following conditions: | 
| Chris@63 | 10 // | 
| Chris@63 | 11 // The above copyright notice and this permission notice shall be included in | 
| Chris@63 | 12 // all copies or substantial portions of the Software. | 
| Chris@63 | 13 // | 
| Chris@63 | 14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | 
| Chris@63 | 15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | 
| Chris@63 | 16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | 
| Chris@63 | 17 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | 
| Chris@63 | 18 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | 
| Chris@63 | 19 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | 
| Chris@63 | 20 // THE SOFTWARE. | 
| Chris@63 | 21 | 
| Chris@63 | 22 // This file implements a simple serialization format for Cap'n Proto messages.  The format | 
| Chris@63 | 23 // is as follows: | 
| Chris@63 | 24 // | 
| Chris@63 | 25 // * 32-bit little-endian segment count (4 bytes). | 
| Chris@63 | 26 // * 32-bit little-endian size of each segment (4*(segment count) bytes). | 
| Chris@63 | 27 // * Padding so that subsequent data is 64-bit-aligned (0 or 4 bytes).  (I.e., if there are an even | 
| Chris@63 | 28 //     number of segments, there are 4 bytes of zeros here, otherwise there is no padding.) | 
| Chris@63 | 29 // * Data from each segment, in order (8*sum(segment sizes) bytes) | 
| Chris@63 | 30 // | 
| Chris@63 | 31 // This format has some important properties: | 
| Chris@63 | 32 // - It is self-delimiting, so multiple messages may be written to a stream without any external | 
| Chris@63 | 33 //   delimiter. | 
| Chris@63 | 34 // - The total size and position of each segment can be determined by reading only the first part | 
| Chris@63 | 35 //   of the message, allowing lazy and random-access reading of the segment data. | 
| Chris@63 | 36 // - A message is always at least 8 bytes. | 
| Chris@63 | 37 // - A single-segment message can be read entirely in two system calls with no buffering. | 
| Chris@63 | 38 // - A multi-segment message can be read entirely in three system calls with no buffering. | 
| Chris@63 | 39 // - The format is appropriate for mmap()ing since all data is aligned. | 
| Chris@63 | 40 | 
| Chris@63 | 41 #ifndef CAPNP_SERIALIZE_H_ | 
| Chris@63 | 42 #define CAPNP_SERIALIZE_H_ | 
| Chris@63 | 43 | 
| Chris@63 | 44 #if defined(__GNUC__) && !defined(CAPNP_HEADER_WARNINGS) | 
| Chris@63 | 45 #pragma GCC system_header | 
| Chris@63 | 46 #endif | 
| Chris@63 | 47 | 
| Chris@63 | 48 #include "message.h" | 
| Chris@63 | 49 #include <kj/io.h> | 
| Chris@63 | 50 | 
| Chris@63 | 51 namespace capnp { | 
| Chris@63 | 52 | 
| Chris@63 | 53 class FlatArrayMessageReader: public MessageReader { | 
| Chris@63 | 54   // Parses a message from a flat array.  Note that it makes sense to use this together with mmap() | 
| Chris@63 | 55   // for extremely fast parsing. | 
| Chris@63 | 56 | 
| Chris@63 | 57 public: | 
| Chris@63 | 58   FlatArrayMessageReader(kj::ArrayPtr<const word> array, ReaderOptions options = ReaderOptions()); | 
| Chris@63 | 59   // The array must remain valid until the MessageReader is destroyed. | 
| Chris@63 | 60 | 
| Chris@63 | 61   kj::ArrayPtr<const word> getSegment(uint id) override; | 
| Chris@63 | 62 | 
| Chris@63 | 63   const word* getEnd() const { return end; } | 
| Chris@63 | 64   // Get a pointer just past the end of the message as determined by reading the message header. | 
| Chris@63 | 65   // This could actually be before the end of the input array.  This pointer is useful e.g. if | 
| Chris@63 | 66   // you know that the input array has extra stuff appended after the message and you want to | 
| Chris@63 | 67   // get at it. | 
| Chris@63 | 68 | 
| Chris@63 | 69 private: | 
| Chris@63 | 70   // Optimize for single-segment case. | 
| Chris@63 | 71   kj::ArrayPtr<const word> segment0; | 
| Chris@63 | 72   kj::Array<kj::ArrayPtr<const word>> moreSegments; | 
| Chris@63 | 73   const word* end; | 
| Chris@63 | 74 }; | 
| Chris@63 | 75 | 
| Chris@63 | 76 kj::ArrayPtr<const word> initMessageBuilderFromFlatArrayCopy( | 
| Chris@63 | 77     kj::ArrayPtr<const word> array, MessageBuilder& target, | 
| Chris@63 | 78     ReaderOptions options = ReaderOptions()); | 
| Chris@63 | 79 // Convenience function which reads a message using `FlatArrayMessageReader` then copies the | 
| Chris@63 | 80 // content into the target `MessageBuilder`, verifying that the message structure is valid | 
| Chris@63 | 81 // (although not necessarily that it matches the desired schema). | 
| Chris@63 | 82 // | 
| Chris@63 | 83 // Returns an ArrayPtr containing any words left over in the array after consuming the whole | 
| Chris@63 | 84 // message. This is useful when reading multiple messages that have been concatenated. See also | 
| Chris@63 | 85 // FlatArrayMessageReader::getEnd(). | 
| Chris@63 | 86 // | 
| Chris@63 | 87 // (Note that it's also possible to initialize a `MessageBuilder` directly without a copy using one | 
| Chris@63 | 88 // of `MessageBuilder`'s constructors. However, this approach skips the validation step and is not | 
| Chris@63 | 89 // safe to use on untrusted input. Therefore, we do not provide a convenience method for it.) | 
| Chris@63 | 90 | 
| Chris@63 | 91 kj::Array<word> messageToFlatArray(MessageBuilder& builder); | 
| Chris@63 | 92 // Constructs a flat array containing the entire content of the given message. | 
| Chris@63 | 93 // | 
| Chris@63 | 94 // To output the message as bytes, use `.asBytes()` on the returned word array. Keep in mind that | 
| Chris@63 | 95 // `asBytes()` returns an ArrayPtr, so you have to save the Array as well to prevent it from being | 
| Chris@63 | 96 // deleted. For example: | 
| Chris@63 | 97 // | 
| Chris@63 | 98 //     kj::Array<capnp::word> words = messageToFlatArray(myMessage); | 
| Chris@63 | 99 //     kj::ArrayPtr<kj::byte> bytes = words.asBytes(); | 
| Chris@63 | 100 //     write(fd, bytes.begin(), bytes.size()); | 
| Chris@63 | 101 | 
| Chris@63 | 102 kj::Array<word> messageToFlatArray(kj::ArrayPtr<const kj::ArrayPtr<const word>> segments); | 
| Chris@63 | 103 // Version of messageToFlatArray that takes a raw segment array. | 
| Chris@63 | 104 | 
| Chris@63 | 105 size_t computeSerializedSizeInWords(MessageBuilder& builder); | 
| Chris@63 | 106 // Returns the size, in words, that will be needed to serialize the message, including the header. | 
| Chris@63 | 107 | 
| Chris@63 | 108 size_t computeSerializedSizeInWords(kj::ArrayPtr<const kj::ArrayPtr<const word>> segments); | 
| Chris@63 | 109 // Version of computeSerializedSizeInWords that takes a raw segment array. | 
| Chris@63 | 110 | 
| Chris@63 | 111 size_t expectedSizeInWordsFromPrefix(kj::ArrayPtr<const word> messagePrefix); | 
| Chris@63 | 112 // Given a prefix of a serialized message, try to determine the expected total size of the message, | 
| Chris@63 | 113 // in words. The returned size is based on the information known so far; it may be an underestimate | 
| Chris@63 | 114 // if the prefix doesn't contain the full segment table. | 
| Chris@63 | 115 // | 
| Chris@63 | 116 // If the returned value is greater than `messagePrefix.size()`, then the message is not yet | 
| Chris@63 | 117 // complete and the app cannot parse it yet. If the returned value is less than or equal to | 
| Chris@63 | 118 // `messagePrefix.size()`, then the returned value is the exact total size of the message; any | 
| Chris@63 | 119 // remaining bytes are part of the next message. | 
| Chris@63 | 120 // | 
| Chris@63 | 121 // This function is useful when reading messages from a stream in an asynchronous way, but when | 
| Chris@63 | 122 // using the full KJ async infrastructure would be too difficult. Each time bytes are received, | 
| Chris@63 | 123 // use this function to determine if an entire message is ready to be parsed. | 
| Chris@63 | 124 | 
| Chris@63 | 125 // ======================================================================================= | 
| Chris@63 | 126 | 
| Chris@63 | 127 class InputStreamMessageReader: public MessageReader { | 
| Chris@63 | 128   // A MessageReader that reads from an abstract kj::InputStream. See also StreamFdMessageReader | 
| Chris@63 | 129   // for a subclass specific to file descriptors. | 
| Chris@63 | 130 | 
| Chris@63 | 131 public: | 
| Chris@63 | 132   InputStreamMessageReader(kj::InputStream& inputStream, | 
| Chris@63 | 133                            ReaderOptions options = ReaderOptions(), | 
| Chris@63 | 134                            kj::ArrayPtr<word> scratchSpace = nullptr); | 
| Chris@63 | 135   ~InputStreamMessageReader() noexcept(false); | 
| Chris@63 | 136 | 
| Chris@63 | 137   // implements MessageReader ---------------------------------------- | 
| Chris@63 | 138   kj::ArrayPtr<const word> getSegment(uint id) override; | 
| Chris@63 | 139 | 
| Chris@63 | 140 private: | 
| Chris@63 | 141   kj::InputStream& inputStream; | 
| Chris@63 | 142   byte* readPos; | 
| Chris@63 | 143 | 
| Chris@63 | 144   // Optimize for single-segment case. | 
| Chris@63 | 145   kj::ArrayPtr<const word> segment0; | 
| Chris@63 | 146   kj::Array<kj::ArrayPtr<const word>> moreSegments; | 
| Chris@63 | 147 | 
| Chris@63 | 148   kj::Array<word> ownedSpace; | 
| Chris@63 | 149   // Only if scratchSpace wasn't big enough. | 
| Chris@63 | 150 | 
| Chris@63 | 151   kj::UnwindDetector unwindDetector; | 
| Chris@63 | 152 }; | 
| Chris@63 | 153 | 
| Chris@63 | 154 void readMessageCopy(kj::InputStream& input, MessageBuilder& target, | 
| Chris@63 | 155                      ReaderOptions options = ReaderOptions(), | 
| Chris@63 | 156                      kj::ArrayPtr<word> scratchSpace = nullptr); | 
| Chris@63 | 157 // Convenience function which reads a message using `InputStreamMessageReader` then copies the | 
| Chris@63 | 158 // content into the target `MessageBuilder`, verifying that the message structure is valid | 
| Chris@63 | 159 // (although not necessarily that it matches the desired schema). | 
| Chris@63 | 160 // | 
| Chris@63 | 161 // (Note that it's also possible to initialize a `MessageBuilder` directly without a copy using one | 
| Chris@63 | 162 // of `MessageBuilder`'s constructors. However, this approach skips the validation step and is not | 
| Chris@63 | 163 // safe to use on untrusted input. Therefore, we do not provide a convenience method for it.) | 
| Chris@63 | 164 | 
| Chris@63 | 165 void writeMessage(kj::OutputStream& output, MessageBuilder& builder); | 
| Chris@63 | 166 // Write the message to the given output stream. | 
| Chris@63 | 167 | 
| Chris@63 | 168 void writeMessage(kj::OutputStream& output, kj::ArrayPtr<const kj::ArrayPtr<const word>> segments); | 
| Chris@63 | 169 // Write the segment array to the given output stream. | 
| Chris@63 | 170 | 
| Chris@63 | 171 // ======================================================================================= | 
| Chris@63 | 172 // Specializations for reading from / writing to file descriptors. | 
| Chris@63 | 173 | 
| Chris@63 | 174 class StreamFdMessageReader: private kj::FdInputStream, public InputStreamMessageReader { | 
| Chris@63 | 175   // A MessageReader that reads from a steam-based file descriptor. | 
| Chris@63 | 176 | 
| Chris@63 | 177 public: | 
| Chris@63 | 178   StreamFdMessageReader(int fd, ReaderOptions options = ReaderOptions(), | 
| Chris@63 | 179                         kj::ArrayPtr<word> scratchSpace = nullptr) | 
| Chris@63 | 180       : FdInputStream(fd), InputStreamMessageReader(*this, options, scratchSpace) {} | 
| Chris@63 | 181   // Read message from a file descriptor, without taking ownership of the descriptor. | 
| Chris@63 | 182 | 
| Chris@63 | 183   StreamFdMessageReader(kj::AutoCloseFd fd, ReaderOptions options = ReaderOptions(), | 
| Chris@63 | 184                         kj::ArrayPtr<word> scratchSpace = nullptr) | 
| Chris@63 | 185       : FdInputStream(kj::mv(fd)), InputStreamMessageReader(*this, options, scratchSpace) {} | 
| Chris@63 | 186   // Read a message from a file descriptor, taking ownership of the descriptor. | 
| Chris@63 | 187 | 
| Chris@63 | 188   ~StreamFdMessageReader() noexcept(false); | 
| Chris@63 | 189 }; | 
| Chris@63 | 190 | 
| Chris@63 | 191 void readMessageCopyFromFd(int fd, MessageBuilder& target, | 
| Chris@63 | 192                            ReaderOptions options = ReaderOptions(), | 
| Chris@63 | 193                            kj::ArrayPtr<word> scratchSpace = nullptr); | 
| Chris@63 | 194 // Convenience function which reads a message using `StreamFdMessageReader` then copies the | 
| Chris@63 | 195 // content into the target `MessageBuilder`, verifying that the message structure is valid | 
| Chris@63 | 196 // (although not necessarily that it matches the desired schema). | 
| Chris@63 | 197 // | 
| Chris@63 | 198 // (Note that it's also possible to initialize a `MessageBuilder` directly without a copy using one | 
| Chris@63 | 199 // of `MessageBuilder`'s constructors. However, this approach skips the validation step and is not | 
| Chris@63 | 200 // safe to use on untrusted input. Therefore, we do not provide a convenience method for it.) | 
| Chris@63 | 201 | 
| Chris@63 | 202 void writeMessageToFd(int fd, MessageBuilder& builder); | 
| Chris@63 | 203 // Write the message to the given file descriptor. | 
| Chris@63 | 204 // | 
| Chris@63 | 205 // This function throws an exception on any I/O error.  If your code is not exception-safe, be sure | 
| Chris@63 | 206 // you catch this exception at the call site.  If throwing an exception is not acceptable, you | 
| Chris@63 | 207 // can implement your own OutputStream with arbitrary error handling and then use writeMessage(). | 
| Chris@63 | 208 | 
| Chris@63 | 209 void writeMessageToFd(int fd, kj::ArrayPtr<const kj::ArrayPtr<const word>> segments); | 
| Chris@63 | 210 // Write the segment array to the given file descriptor. | 
| Chris@63 | 211 // | 
| Chris@63 | 212 // This function throws an exception on any I/O error.  If your code is not exception-safe, be sure | 
| Chris@63 | 213 // you catch this exception at the call site.  If throwing an exception is not acceptable, you | 
| Chris@63 | 214 // can implement your own OutputStream with arbitrary error handling and then use writeMessage(). | 
| Chris@63 | 215 | 
| Chris@63 | 216 // ======================================================================================= | 
| Chris@63 | 217 // inline stuff | 
| Chris@63 | 218 | 
| Chris@63 | 219 inline kj::Array<word> messageToFlatArray(MessageBuilder& builder) { | 
| Chris@63 | 220   return messageToFlatArray(builder.getSegmentsForOutput()); | 
| Chris@63 | 221 } | 
| Chris@63 | 222 | 
| Chris@63 | 223 inline size_t computeSerializedSizeInWords(MessageBuilder& builder) { | 
| Chris@63 | 224   return computeSerializedSizeInWords(builder.getSegmentsForOutput()); | 
| Chris@63 | 225 } | 
| Chris@63 | 226 | 
| Chris@63 | 227 inline void writeMessage(kj::OutputStream& output, MessageBuilder& builder) { | 
| Chris@63 | 228   writeMessage(output, builder.getSegmentsForOutput()); | 
| Chris@63 | 229 } | 
| Chris@63 | 230 | 
| Chris@63 | 231 inline void writeMessageToFd(int fd, MessageBuilder& builder) { | 
| Chris@63 | 232   writeMessageToFd(fd, builder.getSegmentsForOutput()); | 
| Chris@63 | 233 } | 
| Chris@63 | 234 | 
| Chris@63 | 235 }  // namespace capnp | 
| Chris@63 | 236 | 
| Chris@63 | 237 #endif  // SERIALIZE_H_ |