comparison osx/include/capnp/message.h @ 134:41e769c91eca

Add Capnp and KJ builds for OSX
author Chris Cannam <cannam@all-day-breakfast.com>
date Tue, 25 Oct 2016 14:48:23 +0100
parents
children 0994c39f1e94
comparison
equal deleted inserted replaced
133:1ac99bfc383d 134:41e769c91eca
1 // Copyright (c) 2013-2016 Sandstorm Development Group, Inc. and contributors
2 // Licensed under the MIT License:
3 //
4 // Permission is hereby granted, free of charge, to any person obtaining a copy
5 // of this software and associated documentation files (the "Software"), to deal
6 // in the Software without restriction, including without limitation the rights
7 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 // copies of the Software, and to permit persons to whom the Software is
9 // furnished to do so, subject to the following conditions:
10 //
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
13 //
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 // THE SOFTWARE.
21
22 #include <kj/common.h>
23 #include <kj/memory.h>
24 #include <kj/mutex.h>
25 #include <kj/debug.h>
26 #include "common.h"
27 #include "layout.h"
28 #include "any.h"
29
30 #ifndef CAPNP_MESSAGE_H_
31 #define CAPNP_MESSAGE_H_
32
33 #if defined(__GNUC__) && !defined(CAPNP_HEADER_WARNINGS)
34 #pragma GCC system_header
35 #endif
36
37 namespace capnp {
38
39 namespace _ { // private
40 class ReaderArena;
41 class BuilderArena;
42 }
43
44 class StructSchema;
45 class Orphanage;
46 template <typename T>
47 class Orphan;
48
49 // =======================================================================================
50
51 struct ReaderOptions {
52 // Options controlling how data is read.
53
54 uint64_t traversalLimitInWords = 8 * 1024 * 1024;
55 // Limits how many total words of data are allowed to be traversed. Traversal is counted when
56 // a new struct or list builder is obtained, e.g. from a get() accessor. This means that calling
57 // the getter for the same sub-struct multiple times will cause it to be double-counted. Once
58 // the traversal limit is reached, an error will be reported.
59 //
60 // This limit exists for security reasons. It is possible for an attacker to construct a message
61 // in which multiple pointers point at the same location. This is technically invalid, but hard
62 // to detect. Using such a message, an attacker could cause a message which is small on the wire
63 // to appear much larger when actually traversed, possibly exhausting server resources leading to
64 // denial-of-service.
65 //
66 // It makes sense to set a traversal limit that is much larger than the underlying message.
67 // Together with sensible coding practices (e.g. trying to avoid calling sub-object getters
68 // multiple times, which is expensive anyway), this should provide adequate protection without
69 // inconvenience.
70 //
71 // The default limit is 64 MiB. This may or may not be a sensible number for any given use case,
72 // but probably at least prevents easy exploitation while also avoiding causing problems in most
73 // typical cases.
74
75 int nestingLimit = 64;
76 // Limits how deeply-nested a message structure can be, e.g. structs containing other structs or
77 // lists of structs.
78 //
79 // Like the traversal limit, this limit exists for security reasons. Since it is common to use
80 // recursive code to traverse recursive data structures, an attacker could easily cause a stack
81 // overflow by sending a very-deeply-nested (or even cyclic) message, without the message even
82 // being very large. The default limit of 64 is probably low enough to prevent any chance of
83 // stack overflow, yet high enough that it is never a problem in practice.
84 };
85
86 class MessageReader {
87 // Abstract interface for an object used to read a Cap'n Proto message. Subclasses of
88 // MessageReader are responsible for reading the raw, flat message content. Callers should
89 // usually call `messageReader.getRoot<MyStructType>()` to get a `MyStructType::Reader`
90 // representing the root of the message, then use that to traverse the message content.
91 //
92 // Some common subclasses of `MessageReader` include `SegmentArrayMessageReader`, whose
93 // constructor accepts pointers to the raw data, and `StreamFdMessageReader` (from
94 // `serialize.h`), which reads the message from a file descriptor. One might implement other
95 // subclasses to handle things like reading from shared memory segments, mmap()ed files, etc.
96
97 public:
98 MessageReader(ReaderOptions options);
99 // It is suggested that subclasses take ReaderOptions as a constructor parameter, but give it a
100 // default value of "ReaderOptions()". The base class constructor doesn't have a default value
101 // in order to remind subclasses that they really need to give the user a way to provide this.
102
103 virtual ~MessageReader() noexcept(false);
104
105 virtual kj::ArrayPtr<const word> getSegment(uint id) = 0;
106 // Gets the segment with the given ID, or returns null if no such segment exists. This method
107 // will be called at most once for each segment ID.
108
109 inline const ReaderOptions& getOptions();
110 // Get the options passed to the constructor.
111
112 template <typename RootType>
113 typename RootType::Reader getRoot();
114 // Get the root struct of the message, interpreting it as the given struct type.
115
116 template <typename RootType, typename SchemaType>
117 typename RootType::Reader getRoot(SchemaType schema);
118 // Dynamically interpret the root struct of the message using the given schema (a StructSchema).
119 // RootType in this case must be DynamicStruct, and you must #include <capnp/dynamic.h> to
120 // use this.
121
122 bool isCanonical();
123 // Returns whether the message encoded in the reader is in canonical form.
124
125 private:
126 ReaderOptions options;
127
128 // Space in which we can construct a ReaderArena. We don't use ReaderArena directly here
129 // because we don't want clients to have to #include arena.h, which itself includes a bunch of
130 // big STL headers. We don't use a pointer to a ReaderArena because that would require an
131 // extra malloc on every message which could be expensive when processing small messages.
132 void* arenaSpace[15 + sizeof(kj::MutexGuarded<void*>) / sizeof(void*)];
133 bool allocatedArena;
134
135 _::ReaderArena* arena() { return reinterpret_cast<_::ReaderArena*>(arenaSpace); }
136 AnyPointer::Reader getRootInternal();
137 };
138
139 class MessageBuilder {
140 // Abstract interface for an object used to allocate and build a message. Subclasses of
141 // MessageBuilder are responsible for allocating the space in which the message will be written.
142 // The most common subclass is `MallocMessageBuilder`, but other subclasses may be used to do
143 // tricky things like allocate messages in shared memory or mmap()ed files.
144 //
145 // Creating a new message ususually means allocating a new MessageBuilder (ideally on the stack)
146 // and then calling `messageBuilder.initRoot<MyStructType>()` to get a `MyStructType::Builder`.
147 // That, in turn, can be used to fill in the message content. When done, you can call
148 // `messageBuilder.getSegmentsForOutput()` to get a list of flat data arrays containing the
149 // message.
150
151 public:
152 MessageBuilder();
153 virtual ~MessageBuilder() noexcept(false);
154 KJ_DISALLOW_COPY(MessageBuilder);
155
156 struct SegmentInit {
157 kj::ArrayPtr<word> space;
158
159 size_t wordsUsed;
160 // Number of words in `space` which are used; the rest are free space in which additional
161 // objects may be allocated.
162 };
163
164 explicit MessageBuilder(kj::ArrayPtr<SegmentInit> segments);
165 // Create a MessageBuilder backed by existing memory. This is an advanced interface that most
166 // people should not use. THIS METHOD IS INSECURE; see below.
167 //
168 // This allows a MessageBuilder to be constructed to modify an in-memory message without first
169 // making a copy of the content. This is especially useful in conjunction with mmap().
170 //
171 // The contents of each segment must outlive the MessageBuilder, but the SegmentInit array itself
172 // only need outlive the constructor.
173 //
174 // SECURITY: Do not use this in conjunction with untrusted data. This constructor assumes that
175 // the input message is valid. This constructor is designed to be used with data you control,
176 // e.g. an mmap'd file which is owned and accessed by only one program. When reading data you
177 // do not trust, you *must* load it into a Reader and then copy into a Builder as a means of
178 // validating the content.
179 //
180 // WARNING: It is NOT safe to initialize a MessageBuilder in this way from memory that is
181 // currently in use by another MessageBuilder or MessageReader. Other readers/builders will
182 // not observe changes to the segment sizes nor newly-allocated segments caused by allocating
183 // new objects in this message.
184
185 virtual kj::ArrayPtr<word> allocateSegment(uint minimumSize) = 0;
186 // Allocates an array of at least the given number of words, throwing an exception or crashing if
187 // this is not possible. It is expected that this method will usually return more space than
188 // requested, and the caller should use that extra space as much as possible before allocating
189 // more. The returned space remains valid at least until the MessageBuilder is destroyed.
190 //
191 // Cap'n Proto will only call this once at a time, so the subclass need not worry about
192 // thread-safety.
193
194 template <typename RootType>
195 typename RootType::Builder initRoot();
196 // Initialize the root struct of the message as the given struct type.
197
198 template <typename Reader>
199 void setRoot(Reader&& value);
200 // Set the root struct to a deep copy of the given struct.
201
202 template <typename RootType>
203 typename RootType::Builder getRoot();
204 // Get the root struct of the message, interpreting it as the given struct type.
205
206 template <typename RootType, typename SchemaType>
207 typename RootType::Builder getRoot(SchemaType schema);
208 // Dynamically interpret the root struct of the message using the given schema (a StructSchema).
209 // RootType in this case must be DynamicStruct, and you must #include <capnp/dynamic.h> to
210 // use this.
211
212 template <typename RootType, typename SchemaType>
213 typename RootType::Builder initRoot(SchemaType schema);
214 // Dynamically init the root struct of the message using the given schema (a StructSchema).
215 // RootType in this case must be DynamicStruct, and you must #include <capnp/dynamic.h> to
216 // use this.
217
218 template <typename T>
219 void adoptRoot(Orphan<T>&& orphan);
220 // Like setRoot() but adopts the orphan without copying.
221
222 kj::ArrayPtr<const kj::ArrayPtr<const word>> getSegmentsForOutput();
223 // Get the raw data that makes up the message.
224
225 Orphanage getOrphanage();
226
227 bool isCanonical();
228 // Check whether the message builder is in canonical form
229
230 private:
231 void* arenaSpace[22];
232 // Space in which we can construct a BuilderArena. We don't use BuilderArena directly here
233 // because we don't want clients to have to #include arena.h, which itself includes a bunch of
234 // big STL headers. We don't use a pointer to a BuilderArena because that would require an
235 // extra malloc on every message which could be expensive when processing small messages.
236
237 bool allocatedArena = false;
238 // We have to initialize the arena lazily because when we do so we want to allocate the root
239 // pointer immediately, and this will allocate a segment, which requires a virtual function
240 // call on the MessageBuilder. We can't do such a call in the constructor since the subclass
241 // isn't constructed yet. This is kind of annoying because it means that getOrphanage() is
242 // not thread-safe, but that shouldn't be a huge deal...
243
244 _::BuilderArena* arena() { return reinterpret_cast<_::BuilderArena*>(arenaSpace); }
245 _::SegmentBuilder* getRootSegment();
246 AnyPointer::Builder getRootInternal();
247 };
248
249 template <typename RootType>
250 typename RootType::Reader readMessageUnchecked(const word* data);
251 // IF THE INPUT IS INVALID, THIS MAY CRASH, CORRUPT MEMORY, CREATE A SECURITY HOLE IN YOUR APP,
252 // MURDER YOUR FIRST-BORN CHILD, AND/OR BRING ABOUT ETERNAL DAMNATION ON ALL OF HUMANITY. DO NOT
253 // USE UNLESS YOU UNDERSTAND THE CONSEQUENCES.
254 //
255 // Given a pointer to a known-valid message located in a single contiguous memory segment,
256 // returns a reader for that message. No bounds-checking will be done while traversing this
257 // message. Use this only if you have already verified that all pointers are valid and in-bounds,
258 // and there are no far pointers in the message.
259 //
260 // To create a message that can be passed to this function, build a message using a MallocAllocator
261 // whose preferred segment size is larger than the message size. This guarantees that the message
262 // will be allocated as a single segment, meaning getSegmentsForOutput() returns a single word
263 // array. That word array is your message; you may pass a pointer to its first word into
264 // readMessageUnchecked() to read the message.
265 //
266 // This can be particularly handy for embedding messages in generated code: you can
267 // embed the raw bytes (using AlignedData) then make a Reader for it using this. This is the way
268 // default values are embedded in code generated by the Cap'n Proto compiler. E.g., if you have
269 // a message MyMessage, you can read its default value like so:
270 // MyMessage::Reader reader = Message<MyMessage>::readMessageUnchecked(MyMessage::DEFAULT.words);
271 //
272 // To sanitize a message from an untrusted source such that it can be safely passed to
273 // readMessageUnchecked(), use copyToUnchecked().
274
275 template <typename Reader>
276 void copyToUnchecked(Reader&& reader, kj::ArrayPtr<word> uncheckedBuffer);
277 // Copy the content of the given reader into the given buffer, such that it can safely be passed to
278 // readMessageUnchecked(). The buffer's size must be exactly reader.totalSizeInWords() + 1,
279 // otherwise an exception will be thrown. The buffer must be zero'd before calling.
280
281 template <typename RootType>
282 typename RootType::Reader readDataStruct(kj::ArrayPtr<const word> data);
283 // Interprets the given data as a single, data-only struct. Only primitive fields (booleans,
284 // numbers, and enums) will be readable; all pointers will be null. This is useful if you want
285 // to use Cap'n Proto as a language/platform-neutral way to pack some bits.
286 //
287 // The input is a word array rather than a byte array to enforce alignment. If you have a byte
288 // array which you know is word-aligned (or if your platform supports unaligned reads and you don't
289 // mind the performance penalty), then you can use `reinterpret_cast` to convert a byte array into
290 // a word array:
291 //
292 // kj::arrayPtr(reinterpret_cast<const word*>(bytes.begin()),
293 // reinterpret_cast<const word*>(bytes.end()))
294
295 template <typename BuilderType>
296 typename kj::ArrayPtr<const word> writeDataStruct(BuilderType builder);
297 // Given a struct builder, get the underlying data section as a word array, suitable for passing
298 // to `readDataStruct()`.
299 //
300 // Note that you may call `.toBytes()` on the returned value to convert to `ArrayPtr<const byte>`.
301
302 template <typename Type>
303 static typename Type::Reader defaultValue();
304 // Get a default instance of the given struct or list type.
305 //
306 // TODO(cleanup): Find a better home for this function?
307
308 // =======================================================================================
309
310 class SegmentArrayMessageReader: public MessageReader {
311 // A simple MessageReader that reads from an array of word arrays representing all segments.
312 // In particular you can read directly from the output of MessageBuilder::getSegmentsForOutput()
313 // (although it would probably make more sense to call builder.getRoot().asReader() in that case).
314
315 public:
316 SegmentArrayMessageReader(kj::ArrayPtr<const kj::ArrayPtr<const word>> segments,
317 ReaderOptions options = ReaderOptions());
318 // Creates a message pointing at the given segment array, without taking ownership of the
319 // segments. All arrays passed in must remain valid until the MessageReader is destroyed.
320
321 KJ_DISALLOW_COPY(SegmentArrayMessageReader);
322 ~SegmentArrayMessageReader() noexcept(false);
323
324 virtual kj::ArrayPtr<const word> getSegment(uint id) override;
325
326 private:
327 kj::ArrayPtr<const kj::ArrayPtr<const word>> segments;
328 };
329
330 enum class AllocationStrategy: uint8_t {
331 FIXED_SIZE,
332 // The builder will prefer to allocate the same amount of space for each segment with no
333 // heuristic growth. It will still allocate larger segments when the preferred size is too small
334 // for some single object. This mode is generally not recommended, but can be particularly useful
335 // for testing in order to force a message to allocate a predictable number of segments. Note
336 // that you can force every single object in the message to be located in a separate segment by
337 // using this mode with firstSegmentWords = 0.
338
339 GROW_HEURISTICALLY
340 // The builder will heuristically decide how much space to allocate for each segment. Each
341 // allocated segment will be progressively larger than the previous segments on the assumption
342 // that message sizes are exponentially distributed. The total number of segments that will be
343 // allocated for a message of size n is O(log n).
344 };
345
346 constexpr uint SUGGESTED_FIRST_SEGMENT_WORDS = 1024;
347 constexpr AllocationStrategy SUGGESTED_ALLOCATION_STRATEGY = AllocationStrategy::GROW_HEURISTICALLY;
348
349 class MallocMessageBuilder: public MessageBuilder {
350 // A simple MessageBuilder that uses malloc() (actually, calloc()) to allocate segments. This
351 // implementation should be reasonable for any case that doesn't require writing the message to
352 // a specific location in memory.
353
354 public:
355 explicit MallocMessageBuilder(uint firstSegmentWords = SUGGESTED_FIRST_SEGMENT_WORDS,
356 AllocationStrategy allocationStrategy = SUGGESTED_ALLOCATION_STRATEGY);
357 // Creates a BuilderContext which allocates at least the given number of words for the first
358 // segment, and then uses the given strategy to decide how much to allocate for subsequent
359 // segments. When choosing a value for firstSegmentWords, consider that:
360 // 1) Reading and writing messages gets slower when multiple segments are involved, so it's good
361 // if most messages fit in a single segment.
362 // 2) Unused bytes will not be written to the wire, so generally it is not a big deal to allocate
363 // more space than you need. It only becomes problematic if you are allocating many messages
364 // in parallel and thus use lots of memory, or if you allocate so much extra space that just
365 // zeroing it out becomes a bottleneck.
366 // The defaults have been chosen to be reasonable for most people, so don't change them unless you
367 // have reason to believe you need to.
368
369 explicit MallocMessageBuilder(kj::ArrayPtr<word> firstSegment,
370 AllocationStrategy allocationStrategy = SUGGESTED_ALLOCATION_STRATEGY);
371 // This version always returns the given array for the first segment, and then proceeds with the
372 // allocation strategy. This is useful for optimization when building lots of small messages in
373 // a tight loop: you can reuse the space for the first segment.
374 //
375 // firstSegment MUST be zero-initialized. MallocMessageBuilder's destructor will write new zeros
376 // over any space that was used so that it can be reused.
377
378 KJ_DISALLOW_COPY(MallocMessageBuilder);
379 virtual ~MallocMessageBuilder() noexcept(false);
380
381 virtual kj::ArrayPtr<word> allocateSegment(uint minimumSize) override;
382
383 private:
384 uint nextSize;
385 AllocationStrategy allocationStrategy;
386
387 bool ownFirstSegment;
388 bool returnedFirstSegment;
389
390 void* firstSegment;
391
392 struct MoreSegments;
393 kj::Maybe<kj::Own<MoreSegments>> moreSegments;
394 };
395
396 class FlatMessageBuilder: public MessageBuilder {
397 // THIS IS NOT THE CLASS YOU'RE LOOKING FOR.
398 //
399 // If you want to write a message into already-existing scratch space, use `MallocMessageBuilder`
400 // and pass the scratch space to its constructor. It will then only fall back to malloc() if
401 // the scratch space is not large enough.
402 //
403 // Do NOT use this class unless you really know what you're doing. This class is problematic
404 // because it requires advance knowledge of the size of your message, which is usually impossible
405 // to determine without actually building the message. The class was created primarily to
406 // implement `copyToUnchecked()`, which itself exists only to support other internal parts of
407 // the Cap'n Proto implementation.
408
409 public:
410 explicit FlatMessageBuilder(kj::ArrayPtr<word> array);
411 KJ_DISALLOW_COPY(FlatMessageBuilder);
412 virtual ~FlatMessageBuilder() noexcept(false);
413
414 void requireFilled();
415 // Throws an exception if the flat array is not exactly full.
416
417 virtual kj::ArrayPtr<word> allocateSegment(uint minimumSize) override;
418
419 private:
420 kj::ArrayPtr<word> array;
421 bool allocated;
422 };
423
424 // =======================================================================================
425 // implementation details
426
427 inline const ReaderOptions& MessageReader::getOptions() {
428 return options;
429 }
430
431 template <typename RootType>
432 inline typename RootType::Reader MessageReader::getRoot() {
433 return getRootInternal().getAs<RootType>();
434 }
435
436 template <typename RootType>
437 inline typename RootType::Builder MessageBuilder::initRoot() {
438 return getRootInternal().initAs<RootType>();
439 }
440
441 template <typename Reader>
442 inline void MessageBuilder::setRoot(Reader&& value) {
443 getRootInternal().setAs<FromReader<Reader>>(value);
444 }
445
446 template <typename RootType>
447 inline typename RootType::Builder MessageBuilder::getRoot() {
448 return getRootInternal().getAs<RootType>();
449 }
450
451 template <typename T>
452 void MessageBuilder::adoptRoot(Orphan<T>&& orphan) {
453 return getRootInternal().adopt(kj::mv(orphan));
454 }
455
456 template <typename RootType, typename SchemaType>
457 typename RootType::Reader MessageReader::getRoot(SchemaType schema) {
458 return getRootInternal().getAs<RootType>(schema);
459 }
460
461 template <typename RootType, typename SchemaType>
462 typename RootType::Builder MessageBuilder::getRoot(SchemaType schema) {
463 return getRootInternal().getAs<RootType>(schema);
464 }
465
466 template <typename RootType, typename SchemaType>
467 typename RootType::Builder MessageBuilder::initRoot(SchemaType schema) {
468 return getRootInternal().initAs<RootType>(schema);
469 }
470
471 template <typename RootType>
472 typename RootType::Reader readMessageUnchecked(const word* data) {
473 return AnyPointer::Reader(_::PointerReader::getRootUnchecked(data)).getAs<RootType>();
474 }
475
476 template <typename Reader>
477 void copyToUnchecked(Reader&& reader, kj::ArrayPtr<word> uncheckedBuffer) {
478 FlatMessageBuilder builder(uncheckedBuffer);
479 builder.setRoot(kj::fwd<Reader>(reader));
480 builder.requireFilled();
481 }
482
483 template <typename RootType>
484 typename RootType::Reader readDataStruct(kj::ArrayPtr<const word> data) {
485 return typename RootType::Reader(_::StructReader(data));
486 }
487
488 template <typename BuilderType>
489 typename kj::ArrayPtr<const word> writeDataStruct(BuilderType builder) {
490 auto bytes = _::PointerHelpers<FromBuilder<BuilderType>>::getInternalBuilder(kj::mv(builder))
491 .getDataSectionAsBlob();
492 return kj::arrayPtr(reinterpret_cast<word*>(bytes.begin()),
493 reinterpret_cast<word*>(bytes.end()));
494 }
495
496 template <typename Type>
497 static typename Type::Reader defaultValue() {
498 return typename Type::Reader(_::StructReader());
499 }
500
501 template <typename T>
502 kj::Array<word> canonicalize(T&& reader) {
503 return _::PointerHelpers<FromReader<T>>::getInternalReader(reader).canonicalize();
504 }
505
506 } // namespace capnp
507
508 #endif // CAPNP_MESSAGE_H_