cannam@134
|
1 // Copyright (c) 2013-2014 Sandstorm Development Group, Inc. and contributors
|
cannam@134
|
2 // Licensed under the MIT License:
|
cannam@134
|
3 //
|
cannam@134
|
4 // Permission is hereby granted, free of charge, to any person obtaining a copy
|
cannam@134
|
5 // of this software and associated documentation files (the "Software"), to deal
|
cannam@134
|
6 // in the Software without restriction, including without limitation the rights
|
cannam@134
|
7 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
cannam@134
|
8 // copies of the Software, and to permit persons to whom the Software is
|
cannam@134
|
9 // furnished to do so, subject to the following conditions:
|
cannam@134
|
10 //
|
cannam@134
|
11 // The above copyright notice and this permission notice shall be included in
|
cannam@134
|
12 // all copies or substantial portions of the Software.
|
cannam@134
|
13 //
|
cannam@134
|
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
cannam@134
|
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
cannam@134
|
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
cannam@134
|
17 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
cannam@134
|
18 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
cannam@134
|
19 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
cannam@134
|
20 // THE SOFTWARE.
|
cannam@134
|
21
|
cannam@134
|
22 // This file implements a simple serialization format for Cap'n Proto messages. The format
|
cannam@134
|
23 // is as follows:
|
cannam@134
|
24 //
|
cannam@134
|
25 // * 32-bit little-endian segment count (4 bytes).
|
cannam@134
|
26 // * 32-bit little-endian size of each segment (4*(segment count) bytes).
|
cannam@134
|
27 // * Padding so that subsequent data is 64-bit-aligned (0 or 4 bytes). (I.e., if there are an even
|
cannam@134
|
28 // number of segments, there are 4 bytes of zeros here, otherwise there is no padding.)
|
cannam@134
|
29 // * Data from each segment, in order (8*sum(segment sizes) bytes)
|
cannam@134
|
30 //
|
cannam@134
|
31 // This format has some important properties:
|
cannam@134
|
32 // - It is self-delimiting, so multiple messages may be written to a stream without any external
|
cannam@134
|
33 // delimiter.
|
cannam@134
|
34 // - The total size and position of each segment can be determined by reading only the first part
|
cannam@134
|
35 // of the message, allowing lazy and random-access reading of the segment data.
|
cannam@134
|
36 // - A message is always at least 8 bytes.
|
cannam@134
|
37 // - A single-segment message can be read entirely in two system calls with no buffering.
|
cannam@134
|
38 // - A multi-segment message can be read entirely in three system calls with no buffering.
|
cannam@134
|
39 // - The format is appropriate for mmap()ing since all data is aligned.
|
cannam@134
|
40
|
cannam@134
|
41 #ifndef CAPNP_SERIALIZE_H_
|
cannam@134
|
42 #define CAPNP_SERIALIZE_H_
|
cannam@134
|
43
|
cannam@134
|
44 #if defined(__GNUC__) && !defined(CAPNP_HEADER_WARNINGS)
|
cannam@134
|
45 #pragma GCC system_header
|
cannam@134
|
46 #endif
|
cannam@134
|
47
|
cannam@134
|
48 #include "message.h"
|
cannam@134
|
49 #include <kj/io.h>
|
cannam@134
|
50
|
cannam@134
|
51 namespace capnp {
|
cannam@134
|
52
|
cannam@134
|
53 class FlatArrayMessageReader: public MessageReader {
|
cannam@134
|
54 // Parses a message from a flat array. Note that it makes sense to use this together with mmap()
|
cannam@134
|
55 // for extremely fast parsing.
|
cannam@134
|
56
|
cannam@134
|
57 public:
|
cannam@134
|
58 FlatArrayMessageReader(kj::ArrayPtr<const word> array, ReaderOptions options = ReaderOptions());
|
cannam@134
|
59 // The array must remain valid until the MessageReader is destroyed.
|
cannam@134
|
60
|
cannam@134
|
61 kj::ArrayPtr<const word> getSegment(uint id) override;
|
cannam@134
|
62
|
cannam@134
|
63 const word* getEnd() const { return end; }
|
cannam@134
|
64 // Get a pointer just past the end of the message as determined by reading the message header.
|
cannam@134
|
65 // This could actually be before the end of the input array. This pointer is useful e.g. if
|
cannam@134
|
66 // you know that the input array has extra stuff appended after the message and you want to
|
cannam@134
|
67 // get at it.
|
cannam@134
|
68
|
cannam@134
|
69 private:
|
cannam@134
|
70 // Optimize for single-segment case.
|
cannam@134
|
71 kj::ArrayPtr<const word> segment0;
|
cannam@134
|
72 kj::Array<kj::ArrayPtr<const word>> moreSegments;
|
cannam@134
|
73 const word* end;
|
cannam@134
|
74 };
|
cannam@134
|
75
|
cannam@134
|
76 kj::ArrayPtr<const word> initMessageBuilderFromFlatArrayCopy(
|
cannam@134
|
77 kj::ArrayPtr<const word> array, MessageBuilder& target,
|
cannam@134
|
78 ReaderOptions options = ReaderOptions());
|
cannam@134
|
79 // Convenience function which reads a message using `FlatArrayMessageReader` then copies the
|
cannam@134
|
80 // content into the target `MessageBuilder`, verifying that the message structure is valid
|
cannam@134
|
81 // (although not necessarily that it matches the desired schema).
|
cannam@134
|
82 //
|
cannam@134
|
83 // Returns an ArrayPtr containing any words left over in the array after consuming the whole
|
cannam@134
|
84 // message. This is useful when reading multiple messages that have been concatenated. See also
|
cannam@134
|
85 // FlatArrayMessageReader::getEnd().
|
cannam@134
|
86 //
|
cannam@134
|
87 // (Note that it's also possible to initialize a `MessageBuilder` directly without a copy using one
|
cannam@134
|
88 // of `MessageBuilder`'s constructors. However, this approach skips the validation step and is not
|
cannam@134
|
89 // safe to use on untrusted input. Therefore, we do not provide a convenience method for it.)
|
cannam@134
|
90
|
cannam@134
|
91 kj::Array<word> messageToFlatArray(MessageBuilder& builder);
|
cannam@134
|
92 // Constructs a flat array containing the entire content of the given message.
|
cannam@134
|
93 //
|
cannam@134
|
94 // To output the message as bytes, use `.asBytes()` on the returned word array. Keep in mind that
|
cannam@134
|
95 // `asBytes()` returns an ArrayPtr, so you have to save the Array as well to prevent it from being
|
cannam@134
|
96 // deleted. For example:
|
cannam@134
|
97 //
|
cannam@134
|
98 // kj::Array<capnp::word> words = messageToFlatArray(myMessage);
|
cannam@134
|
99 // kj::ArrayPtr<kj::byte> bytes = words.asBytes();
|
cannam@134
|
100 // write(fd, bytes.begin(), bytes.size());
|
cannam@134
|
101
|
cannam@134
|
102 kj::Array<word> messageToFlatArray(kj::ArrayPtr<const kj::ArrayPtr<const word>> segments);
|
cannam@134
|
103 // Version of messageToFlatArray that takes a raw segment array.
|
cannam@134
|
104
|
cannam@134
|
105 size_t computeSerializedSizeInWords(MessageBuilder& builder);
|
cannam@134
|
106 // Returns the size, in words, that will be needed to serialize the message, including the header.
|
cannam@134
|
107
|
cannam@134
|
108 size_t computeSerializedSizeInWords(kj::ArrayPtr<const kj::ArrayPtr<const word>> segments);
|
cannam@134
|
109 // Version of computeSerializedSizeInWords that takes a raw segment array.
|
cannam@134
|
110
|
cannam@134
|
111 size_t expectedSizeInWordsFromPrefix(kj::ArrayPtr<const word> messagePrefix);
|
cannam@134
|
112 // Given a prefix of a serialized message, try to determine the expected total size of the message,
|
cannam@134
|
113 // in words. The returned size is based on the information known so far; it may be an underestimate
|
cannam@134
|
114 // if the prefix doesn't contain the full segment table.
|
cannam@134
|
115 //
|
cannam@134
|
116 // If the returned value is greater than `messagePrefix.size()`, then the message is not yet
|
cannam@134
|
117 // complete and the app cannot parse it yet. If the returned value is less than or equal to
|
cannam@134
|
118 // `messagePrefix.size()`, then the returned value is the exact total size of the message; any
|
cannam@134
|
119 // remaining bytes are part of the next message.
|
cannam@134
|
120 //
|
cannam@134
|
121 // This function is useful when reading messages from a stream in an asynchronous way, but when
|
cannam@134
|
122 // using the full KJ async infrastructure would be too difficult. Each time bytes are received,
|
cannam@134
|
123 // use this function to determine if an entire message is ready to be parsed.
|
cannam@134
|
124
|
cannam@134
|
125 // =======================================================================================
|
cannam@134
|
126
|
cannam@134
|
127 class InputStreamMessageReader: public MessageReader {
|
cannam@134
|
128 // A MessageReader that reads from an abstract kj::InputStream. See also StreamFdMessageReader
|
cannam@134
|
129 // for a subclass specific to file descriptors.
|
cannam@134
|
130
|
cannam@134
|
131 public:
|
cannam@134
|
132 InputStreamMessageReader(kj::InputStream& inputStream,
|
cannam@134
|
133 ReaderOptions options = ReaderOptions(),
|
cannam@134
|
134 kj::ArrayPtr<word> scratchSpace = nullptr);
|
cannam@134
|
135 ~InputStreamMessageReader() noexcept(false);
|
cannam@134
|
136
|
cannam@134
|
137 // implements MessageReader ----------------------------------------
|
cannam@134
|
138 kj::ArrayPtr<const word> getSegment(uint id) override;
|
cannam@134
|
139
|
cannam@134
|
140 private:
|
cannam@134
|
141 kj::InputStream& inputStream;
|
cannam@134
|
142 byte* readPos;
|
cannam@134
|
143
|
cannam@134
|
144 // Optimize for single-segment case.
|
cannam@134
|
145 kj::ArrayPtr<const word> segment0;
|
cannam@134
|
146 kj::Array<kj::ArrayPtr<const word>> moreSegments;
|
cannam@134
|
147
|
cannam@134
|
148 kj::Array<word> ownedSpace;
|
cannam@134
|
149 // Only if scratchSpace wasn't big enough.
|
cannam@134
|
150
|
cannam@134
|
151 kj::UnwindDetector unwindDetector;
|
cannam@134
|
152 };
|
cannam@134
|
153
|
cannam@134
|
154 void readMessageCopy(kj::InputStream& input, MessageBuilder& target,
|
cannam@134
|
155 ReaderOptions options = ReaderOptions(),
|
cannam@134
|
156 kj::ArrayPtr<word> scratchSpace = nullptr);
|
cannam@134
|
157 // Convenience function which reads a message using `InputStreamMessageReader` then copies the
|
cannam@134
|
158 // content into the target `MessageBuilder`, verifying that the message structure is valid
|
cannam@134
|
159 // (although not necessarily that it matches the desired schema).
|
cannam@134
|
160 //
|
cannam@134
|
161 // (Note that it's also possible to initialize a `MessageBuilder` directly without a copy using one
|
cannam@134
|
162 // of `MessageBuilder`'s constructors. However, this approach skips the validation step and is not
|
cannam@134
|
163 // safe to use on untrusted input. Therefore, we do not provide a convenience method for it.)
|
cannam@134
|
164
|
cannam@134
|
165 void writeMessage(kj::OutputStream& output, MessageBuilder& builder);
|
cannam@134
|
166 // Write the message to the given output stream.
|
cannam@134
|
167
|
cannam@134
|
168 void writeMessage(kj::OutputStream& output, kj::ArrayPtr<const kj::ArrayPtr<const word>> segments);
|
cannam@134
|
169 // Write the segment array to the given output stream.
|
cannam@134
|
170
|
cannam@134
|
171 // =======================================================================================
|
cannam@134
|
172 // Specializations for reading from / writing to file descriptors.
|
cannam@134
|
173
|
cannam@134
|
174 class StreamFdMessageReader: private kj::FdInputStream, public InputStreamMessageReader {
|
cannam@134
|
175 // A MessageReader that reads from a steam-based file descriptor.
|
cannam@134
|
176
|
cannam@134
|
177 public:
|
cannam@134
|
178 StreamFdMessageReader(int fd, ReaderOptions options = ReaderOptions(),
|
cannam@134
|
179 kj::ArrayPtr<word> scratchSpace = nullptr)
|
cannam@134
|
180 : FdInputStream(fd), InputStreamMessageReader(*this, options, scratchSpace) {}
|
cannam@134
|
181 // Read message from a file descriptor, without taking ownership of the descriptor.
|
cannam@134
|
182
|
cannam@134
|
183 StreamFdMessageReader(kj::AutoCloseFd fd, ReaderOptions options = ReaderOptions(),
|
cannam@134
|
184 kj::ArrayPtr<word> scratchSpace = nullptr)
|
cannam@134
|
185 : FdInputStream(kj::mv(fd)), InputStreamMessageReader(*this, options, scratchSpace) {}
|
cannam@134
|
186 // Read a message from a file descriptor, taking ownership of the descriptor.
|
cannam@134
|
187
|
cannam@134
|
188 ~StreamFdMessageReader() noexcept(false);
|
cannam@134
|
189 };
|
cannam@134
|
190
|
cannam@134
|
191 void readMessageCopyFromFd(int fd, MessageBuilder& target,
|
cannam@134
|
192 ReaderOptions options = ReaderOptions(),
|
cannam@134
|
193 kj::ArrayPtr<word> scratchSpace = nullptr);
|
cannam@134
|
194 // Convenience function which reads a message using `StreamFdMessageReader` then copies the
|
cannam@134
|
195 // content into the target `MessageBuilder`, verifying that the message structure is valid
|
cannam@134
|
196 // (although not necessarily that it matches the desired schema).
|
cannam@134
|
197 //
|
cannam@134
|
198 // (Note that it's also possible to initialize a `MessageBuilder` directly without a copy using one
|
cannam@134
|
199 // of `MessageBuilder`'s constructors. However, this approach skips the validation step and is not
|
cannam@134
|
200 // safe to use on untrusted input. Therefore, we do not provide a convenience method for it.)
|
cannam@134
|
201
|
cannam@134
|
202 void writeMessageToFd(int fd, MessageBuilder& builder);
|
cannam@134
|
203 // Write the message to the given file descriptor.
|
cannam@134
|
204 //
|
cannam@134
|
205 // This function throws an exception on any I/O error. If your code is not exception-safe, be sure
|
cannam@134
|
206 // you catch this exception at the call site. If throwing an exception is not acceptable, you
|
cannam@134
|
207 // can implement your own OutputStream with arbitrary error handling and then use writeMessage().
|
cannam@134
|
208
|
cannam@134
|
209 void writeMessageToFd(int fd, kj::ArrayPtr<const kj::ArrayPtr<const word>> segments);
|
cannam@134
|
210 // Write the segment array to the given file descriptor.
|
cannam@134
|
211 //
|
cannam@134
|
212 // This function throws an exception on any I/O error. If your code is not exception-safe, be sure
|
cannam@134
|
213 // you catch this exception at the call site. If throwing an exception is not acceptable, you
|
cannam@134
|
214 // can implement your own OutputStream with arbitrary error handling and then use writeMessage().
|
cannam@134
|
215
|
cannam@134
|
216 // =======================================================================================
|
cannam@134
|
217 // inline stuff
|
cannam@134
|
218
|
cannam@134
|
219 inline kj::Array<word> messageToFlatArray(MessageBuilder& builder) {
|
cannam@134
|
220 return messageToFlatArray(builder.getSegmentsForOutput());
|
cannam@134
|
221 }
|
cannam@134
|
222
|
cannam@134
|
223 inline size_t computeSerializedSizeInWords(MessageBuilder& builder) {
|
cannam@134
|
224 return computeSerializedSizeInWords(builder.getSegmentsForOutput());
|
cannam@134
|
225 }
|
cannam@134
|
226
|
cannam@134
|
227 inline void writeMessage(kj::OutputStream& output, MessageBuilder& builder) {
|
cannam@134
|
228 writeMessage(output, builder.getSegmentsForOutput());
|
cannam@134
|
229 }
|
cannam@134
|
230
|
cannam@134
|
231 inline void writeMessageToFd(int fd, MessageBuilder& builder) {
|
cannam@134
|
232 writeMessageToFd(fd, builder.getSegmentsForOutput());
|
cannam@134
|
233 }
|
cannam@134
|
234
|
cannam@134
|
235 } // namespace capnp
|
cannam@134
|
236
|
cannam@134
|
237 #endif // SERIALIZE_H_
|