annotate src/capnproto-git-20161025/doc/cxx.md @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 9530b331f8c1
children
rev   line source
cannam@48 1 ---
cannam@48 2 layout: page
cannam@48 3 title: C++ Serialization
cannam@48 4 ---
cannam@48 5
cannam@48 6 # C++ Serialization
cannam@48 7
cannam@48 8 The Cap'n Proto C++ runtime implementation provides an easy-to-use interface for manipulating
cannam@48 9 messages backed by fast pointer arithmetic. This page discusses the serialization layer of
cannam@48 10 the runtime; see [C++ RPC](cxxrpc.html) for information about the RPC layer.
cannam@48 11
cannam@48 12 ## Example Usage
cannam@48 13
cannam@48 14 For the Cap'n Proto definition:
cannam@48 15
cannam@48 16 {% highlight capnp %}
cannam@48 17 struct Person {
cannam@48 18 id @0 :UInt32;
cannam@48 19 name @1 :Text;
cannam@48 20 email @2 :Text;
cannam@48 21 phones @3 :List(PhoneNumber);
cannam@48 22
cannam@48 23 struct PhoneNumber {
cannam@48 24 number @0 :Text;
cannam@48 25 type @1 :Type;
cannam@48 26
cannam@48 27 enum Type {
cannam@48 28 mobile @0;
cannam@48 29 home @1;
cannam@48 30 work @2;
cannam@48 31 }
cannam@48 32 }
cannam@48 33
cannam@48 34 employment :union {
cannam@48 35 unemployed @4 :Void;
cannam@48 36 employer @5 :Text;
cannam@48 37 school @6 :Text;
cannam@48 38 selfEmployed @7 :Void;
cannam@48 39 # We assume that a person is only one of these.
cannam@48 40 }
cannam@48 41 }
cannam@48 42
cannam@48 43 struct AddressBook {
cannam@48 44 people @0 :List(Person);
cannam@48 45 }
cannam@48 46 {% endhighlight %}
cannam@48 47
cannam@48 48 You might write code like:
cannam@48 49
cannam@48 50 {% highlight c++ %}
cannam@48 51 #include "addressbook.capnp.h"
cannam@48 52 #include <capnp/message.h>
cannam@48 53 #include <capnp/serialize-packed.h>
cannam@48 54 #include <iostream>
cannam@48 55
cannam@48 56 void writeAddressBook(int fd) {
cannam@48 57 ::capnp::MallocMessageBuilder message;
cannam@48 58
cannam@48 59 AddressBook::Builder addressBook = message.initRoot<AddressBook>();
cannam@48 60 ::capnp::List<Person>::Builder people = addressBook.initPeople(2);
cannam@48 61
cannam@48 62 Person::Builder alice = people[0];
cannam@48 63 alice.setId(123);
cannam@48 64 alice.setName("Alice");
cannam@48 65 alice.setEmail("alice@example.com");
cannam@48 66 // Type shown for explanation purposes; normally you'd use auto.
cannam@48 67 ::capnp::List<Person::PhoneNumber>::Builder alicePhones =
cannam@48 68 alice.initPhones(1);
cannam@48 69 alicePhones[0].setNumber("555-1212");
cannam@48 70 alicePhones[0].setType(Person::PhoneNumber::Type::MOBILE);
cannam@48 71 alice.getEmployment().setSchool("MIT");
cannam@48 72
cannam@48 73 Person::Builder bob = people[1];
cannam@48 74 bob.setId(456);
cannam@48 75 bob.setName("Bob");
cannam@48 76 bob.setEmail("bob@example.com");
cannam@48 77 auto bobPhones = bob.initPhones(2);
cannam@48 78 bobPhones[0].setNumber("555-4567");
cannam@48 79 bobPhones[0].setType(Person::PhoneNumber::Type::HOME);
cannam@48 80 bobPhones[1].setNumber("555-7654");
cannam@48 81 bobPhones[1].setType(Person::PhoneNumber::Type::WORK);
cannam@48 82 bob.getEmployment().setUnemployed();
cannam@48 83
cannam@48 84 writePackedMessageToFd(fd, message);
cannam@48 85 }
cannam@48 86
cannam@48 87 void printAddressBook(int fd) {
cannam@48 88 ::capnp::PackedFdMessageReader message(fd);
cannam@48 89
cannam@48 90 AddressBook::Reader addressBook = message.getRoot<AddressBook>();
cannam@48 91
cannam@48 92 for (Person::Reader person : addressBook.getPeople()) {
cannam@48 93 std::cout << person.getName().cStr() << ": "
cannam@48 94 << person.getEmail().cStr() << std::endl;
cannam@48 95 for (Person::PhoneNumber::Reader phone: person.getPhones()) {
cannam@48 96 const char* typeName = "UNKNOWN";
cannam@48 97 switch (phone.getType()) {
cannam@48 98 case Person::PhoneNumber::Type::MOBILE: typeName = "mobile"; break;
cannam@48 99 case Person::PhoneNumber::Type::HOME: typeName = "home"; break;
cannam@48 100 case Person::PhoneNumber::Type::WORK: typeName = "work"; break;
cannam@48 101 }
cannam@48 102 std::cout << " " << typeName << " phone: "
cannam@48 103 << phone.getNumber().cStr() << std::endl;
cannam@48 104 }
cannam@48 105 Person::Employment::Reader employment = person.getEmployment();
cannam@48 106 switch (employment.which()) {
cannam@48 107 case Person::Employment::UNEMPLOYED:
cannam@48 108 std::cout << " unemployed" << std::endl;
cannam@48 109 break;
cannam@48 110 case Person::Employment::EMPLOYER:
cannam@48 111 std::cout << " employer: "
cannam@48 112 << employment.getEmployer().cStr() << std::endl;
cannam@48 113 break;
cannam@48 114 case Person::Employment::SCHOOL:
cannam@48 115 std::cout << " student at: "
cannam@48 116 << employment.getSchool().cStr() << std::endl;
cannam@48 117 break;
cannam@48 118 case Person::Employment::SELF_EMPLOYED:
cannam@48 119 std::cout << " self-employed" << std::endl;
cannam@48 120 break;
cannam@48 121 }
cannam@48 122 }
cannam@48 123 }
cannam@48 124 {% endhighlight %}
cannam@48 125
cannam@48 126 ## C++ Feature Usage: C++11, Exceptions
cannam@48 127
cannam@48 128 This implementation makes use of C++11 features. If you are using GCC, you will need at least
cannam@48 129 version 4.7 to compile Cap'n Proto. If you are using Clang, you will need at least version 3.2.
cannam@48 130 These compilers required the flag `-std=c++11` to enable C++11 features -- your code which
cannam@48 131 `#include`s Cap'n Proto headers will need to be compiled with this flag. Other compilers have not
cannam@48 132 been tested at this time.
cannam@48 133
cannam@48 134 This implementation prefers to handle errors using exceptions. Exceptions are only used in
cannam@48 135 circumstances that should never occur in normal operation. For example, exceptions are thrown
cannam@48 136 on assertion failures (indicating bugs in the code), network failures, and invalid input.
cannam@48 137 Exceptions thrown by Cap'n Proto are never part of the interface and never need to be caught in
cannam@48 138 correct usage. The purpose of throwing exceptions is to allow higher-level code a chance to
cannam@48 139 recover from unexpected circumstances without disrupting other work happening in the same process.
cannam@48 140 For example, a server that handles requests from multiple clients should, on exception, return an
cannam@48 141 error to the client that caused the exception and close that connection, but should continue
cannam@48 142 handling other connections normally.
cannam@48 143
cannam@48 144 When Cap'n Proto code might throw an exception from a destructor, it first checks
cannam@48 145 `std::uncaught_exception()` to ensure that this is safe. If another exception is already active,
cannam@48 146 the new exception is assumed to be a side-effect of the main exception, and is either silently
cannam@48 147 swallowed or reported on a side channel.
cannam@48 148
cannam@48 149 In recognition of the fact that some teams prefer not to use exceptions, and that even enabling
cannam@48 150 exceptions in the compiler introduces overhead, Cap'n Proto allows you to disable them entirely
cannam@48 151 by registering your own exception callback. The callback will be called in place of throwing an
cannam@48 152 exception. The callback may abort the process, and is required to do so in certain circumstances
cannam@48 153 (when a fatal bug is detected). If the callback returns normally, Cap'n Proto will attempt
cannam@48 154 to continue by inventing "safe" values. This will lead to garbage output, but at least the program
cannam@48 155 will not crash. Your exception callback should set some sort of a flag indicating that an error
cannam@48 156 occurred, and somewhere up the stack you should check for that flag and cancel the operation.
cannam@48 157 See the header `kj/exception.h` for details on how to register an exception callback.
cannam@48 158
cannam@48 159 ## KJ Library
cannam@48 160
cannam@48 161 Cap'n Proto is built on top of a basic utility library called KJ. The two were actually developed
cannam@48 162 together -- KJ is simply the stuff which is not specific to Cap'n Proto serialization, and may be
cannam@48 163 useful to others independently of Cap'n Proto. For now, the the two are distributed together. The
cannam@48 164 name "KJ" has no particular meaning; it was chosen to be short and easy-to-type.
cannam@48 165
cannam@48 166 As of v0.3, KJ is distributed with Cap'n Proto but built as a separate library. You may need
cannam@48 167 to explicitly link against libraries: `-lcapnp -lkj`
cannam@48 168
cannam@48 169 ## Generating Code
cannam@48 170
cannam@48 171 To generate C++ code from your `.capnp` [interface definition](language.html), run:
cannam@48 172
cannam@48 173 capnp compile -oc++ myproto.capnp
cannam@48 174
cannam@48 175 This will create `myproto.capnp.h` and `myproto.capnp.c++` in the same directory as `myproto.capnp`.
cannam@48 176
cannam@48 177 To use this code in your app, you must link against both `libcapnp` and `libkj`. If you use
cannam@48 178 `pkg-config`, Cap'n Proto provides the `capnp` module to simplify discovery of compiler and linker
cannam@48 179 flags.
cannam@48 180
cannam@48 181 If you use [RPC](cxxrpc.html) (i.e., your schema defines [interfaces](language.html#interfaces)),
cannam@48 182 then you will additionally nead to link against `libcapnp-rpc` and `libkj-async`, or use the
cannam@48 183 `capnp-rpc` `pkg-config` module.
cannam@48 184
cannam@48 185 ### Setting a Namespace
cannam@48 186
cannam@48 187 You probably want your generated types to live in a C++ namespace. You will need to import
cannam@48 188 `/capnp/c++.capnp` and use the `namespace` annotation it defines:
cannam@48 189
cannam@48 190 {% highlight capnp %}
cannam@48 191 using Cxx = import "/capnp/c++.capnp";
cannam@48 192 $Cxx.namespace("foo::bar::baz");
cannam@48 193 {% endhighlight %}
cannam@48 194
cannam@48 195 Note that `capnp/c++.capnp` is installed in `$PREFIX/include` (`/usr/local/include` by default)
cannam@48 196 when you install the C++ runtime. The `capnp` tool automatically searches `/usr/include` and
cannam@48 197 `/usr/local/include` for imports that start with a `/`, so it should "just work". If you installed
cannam@48 198 somewhere else, you may need to add it to the search path with the `-I` flag to `capnp compile`,
cannam@48 199 which works much like the compiler flag of the same name.
cannam@48 200
cannam@48 201 ## Types
cannam@48 202
cannam@48 203 ### Primitive Types
cannam@48 204
cannam@48 205 Primitive types map to the obvious C++ types:
cannam@48 206
cannam@48 207 * `Bool` -> `bool`
cannam@48 208 * `IntNN` -> `intNN_t`
cannam@48 209 * `UIntNN` -> `uintNN_t`
cannam@48 210 * `Float32` -> `float`
cannam@48 211 * `Float64` -> `double`
cannam@48 212 * `Void` -> `::capnp::Void` (An empty struct; its only value is `::capnp::VOID`)
cannam@48 213
cannam@48 214 ### Structs
cannam@48 215
cannam@48 216 For each struct `Foo` in your interface, a C++ type named `Foo` generated. This type itself is
cannam@48 217 really just a namespace; it contains two important inner classes: `Reader` and `Builder`.
cannam@48 218
cannam@48 219 `Reader` represents a read-only instance of `Foo` while `Builder` represents a writable instance
cannam@48 220 (usually, one that you are building). Both classes behave like pointers, in that you can pass them
cannam@48 221 by value and they do not own the underlying data that they operate on. In other words,
cannam@48 222 `Foo::Builder` is like a pointer to a `Foo` while `Foo::Reader` is like a const pointer to a `Foo`.
cannam@48 223
cannam@48 224 For every field `bar` defined in `Foo`, `Foo::Reader` has a method `getBar()`. For primitive types,
cannam@48 225 `get` just returns the type, but for structs, lists, and blobs, it returns a `Reader` for the
cannam@48 226 type.
cannam@48 227
cannam@48 228 {% highlight c++ %}
cannam@48 229 // Example Reader methods:
cannam@48 230
cannam@48 231 // myPrimitiveField @0 :Int32;
cannam@48 232 int32_t getMyPrimitiveField();
cannam@48 233
cannam@48 234 // myTextField @1 :Text;
cannam@48 235 ::capnp::Text::Reader getMyTextField();
cannam@48 236 // (Note that Text::Reader may be implicitly cast to const char* and
cannam@48 237 // std::string.)
cannam@48 238
cannam@48 239 // myStructField @2 :MyStruct;
cannam@48 240 MyStruct::Reader getMyStructField();
cannam@48 241
cannam@48 242 // myListField @3 :List(Float64);
cannam@48 243 ::capnp::List<double> getMyListField();
cannam@48 244 {% endhighlight %}
cannam@48 245
cannam@48 246 `Foo::Builder`, meanwhile, has several methods for each field `bar`:
cannam@48 247
cannam@48 248 * `getBar()`: For primitives, returns the value. For composites, returns a Builder for the
cannam@48 249 composite. If a composite field has not been initialized (i.e. this is the first time it has
cannam@48 250 been accessed), it will be initialized to a copy of the field's default value before returning.
cannam@48 251 * `setBar(x)`: For primitives, sets the value to x. For composites, sets the value to a deep copy
cannam@48 252 of x, which must be a Reader for the type.
cannam@48 253 * `initBar(n)`: Only for lists and blobs. Sets the field to a newly-allocated list or blob
cannam@48 254 of size n and returns a Builder for it. The elements of the list are initialized to their empty
cannam@48 255 state (zero for numbers, default values for structs).
cannam@48 256 * `initBar()`: Only for structs. Sets the field to a newly-allocated struct and returns a
cannam@48 257 Builder for it. Note that the newly-allocated struct is initialized to the default value for
cannam@48 258 the struct's _type_ (i.e., all-zero) rather than the default value for the field `bar` (if it
cannam@48 259 has one).
cannam@48 260 * `hasBar()`: Only for pointer fields (e.g. structs, lists, blobs). Returns true if the pointer
cannam@48 261 has been initialized (non-null). (This method is also available on readers.)
cannam@48 262 * `adoptBar(x)`: Only for pointer fields. Adopts the orphaned object x, linking it into the field
cannam@48 263 `bar` without copying. See the section on orphans.
cannam@48 264 * `disownBar()`: Disowns the value pointed to by `bar`, setting the pointer to null and returning
cannam@48 265 its previous value as an orphan. See the section on orphans.
cannam@48 266
cannam@48 267 {% highlight c++ %}
cannam@48 268 // Example Builder methods:
cannam@48 269
cannam@48 270 // myPrimitiveField @0 :Int32;
cannam@48 271 int32_t getMyPrimitiveField();
cannam@48 272 void setMyPrimitiveField(int32_t value);
cannam@48 273
cannam@48 274 // myTextField @1 :Text;
cannam@48 275 ::capnp::Text::Builder getMyTextField();
cannam@48 276 void setMyTextField(::capnp::Text::Reader value);
cannam@48 277 ::capnp::Text::Builder initMyTextField(size_t size);
cannam@48 278 // (Note that Text::Reader is implicitly constructable from const char*
cannam@48 279 // and std::string, and Text::Builder can be implicitly cast to
cannam@48 280 // these types.)
cannam@48 281
cannam@48 282 // myStructField @2 :MyStruct;
cannam@48 283 MyStruct::Builder getMyStructField();
cannam@48 284 void setMyStructField(MyStruct::Reader value);
cannam@48 285 MyStruct::Builder initMyStructField();
cannam@48 286
cannam@48 287 // myListField @3 :List(Float64);
cannam@48 288 ::capnp::List<double>::Builder getMyListField();
cannam@48 289 void setMyListField(::capnp::List<double>::Reader value);
cannam@48 290 ::capnp::List<double>::Builder initMyListField(size_t size);
cannam@48 291 {% endhighlight %}
cannam@48 292
cannam@48 293 ### Groups
cannam@48 294
cannam@48 295 Groups look a lot like a combination of a nested type and a field of that type, except that you
cannam@48 296 cannot set, adopt, or disown a group -- you can only get and init it.
cannam@48 297
cannam@48 298 ### Unions
cannam@48 299
cannam@48 300 A named union (as opposed to an unnamed one) works just like a group, except with some additions:
cannam@48 301
cannam@48 302 * For each field `foo`, the union reader and builder have a method `isFoo()` which returns true
cannam@48 303 if `foo` is the currently-set field in the union.
cannam@48 304 * The union reader and builder also have a method `which()` that returns an enum value indicating
cannam@48 305 which field is currently set.
cannam@48 306 * Calling the set, init, or adopt accessors for a field makes it the currently-set field.
cannam@48 307 * Calling the get or disown accessors on a field that isn't currently set will throw an
cannam@48 308 exception in debug mode or return garbage when `NDEBUG` is defined.
cannam@48 309
cannam@48 310 Unnamed unions differ from named unions only in that the accessor methods from the union's members
cannam@48 311 are added directly to the containing type's reader and builder, rather than generating a nested
cannam@48 312 type.
cannam@48 313
cannam@48 314 See the [example](#example-usage) at the top of the page for an example of unions.
cannam@48 315
cannam@48 316 ### Lists
cannam@48 317
cannam@48 318 Lists are represented by the type `capnp::List<T>`, where `T` is any of the primitive types,
cannam@48 319 any Cap'n Proto user-defined type, `capnp::Text`, `capnp::Data`, or `capnp::List<U>`
cannam@48 320 (to form a list of lists).
cannam@48 321
cannam@48 322 The type `List<T>` itself is not instantiatable, but has two inner classes: `Reader` and `Builder`.
cannam@48 323 As with structs, these types behave like pointers to read-only and read-write data, respectively.
cannam@48 324
cannam@48 325 Both `Reader` and `Builder` implement `size()`, `operator[]`, `begin()`, and `end()`, as good C++
cannam@48 326 containers should. Note, though, that `operator[]` is read-only -- you cannot use it to assign
cannam@48 327 the element, because that would require returning a reference, which is impossible because the
cannam@48 328 underlying data may not be in your CPU's native format (e.g., wrong byte order). Instead, to
cannam@48 329 assign an element of a list, you must use `builder.set(index, value)`.
cannam@48 330
cannam@48 331 For `List<Foo>` where `Foo` is a non-primitive type, the type returned by `operator[]` and
cannam@48 332 `iterator::operator*()` is `Foo::Reader` (for `List<Foo>::Reader`) or `Foo::Builder`
cannam@48 333 (for `List<Foo>::Builder`). The builder's `set` method takes a `Foo::Reader` as its second
cannam@48 334 parameter.
cannam@48 335
cannam@48 336 For lists of lists or lists of blobs, the builder also has a method `init(index, size)` which sets
cannam@48 337 the element at the given index to a newly-allocated value with the given size and returns a builder
cannam@48 338 for it. Struct lists do not have an `init` method because all elements are initialized to empty
cannam@48 339 values when the list is created.
cannam@48 340
cannam@48 341 ### Enums
cannam@48 342
cannam@48 343 Cap'n Proto enums become C++11 "enum classes". That means they behave like any other enum, but
cannam@48 344 the enum's values are scoped within the type. E.g. for an enum `Foo` with value `bar`, you must
cannam@48 345 refer to the value as `Foo::BAR`.
cannam@48 346
cannam@48 347 To match prevaling C++ style, an enum's value names are converted to UPPERCASE_WITH_UNDERSCORES
cannam@48 348 (whereas in the schema language you'd write them in camelCase).
cannam@48 349
cannam@48 350 Keep in mind when writing `switch` blocks that an enum read off the wire may have a numeric
cannam@48 351 value that is not listed in its definition. This may be the case if the sender is using a newer
cannam@48 352 version of the protocol, or if the message is corrupt or malicious. In C++11, enums are allowed
cannam@48 353 to have any value that is within the range of their base type, which for Cap'n Proto enums is
cannam@48 354 `uint16_t`.
cannam@48 355
cannam@48 356 ### Blobs (Text and Data)
cannam@48 357
cannam@48 358 Blobs are manipulated using the classes `capnp::Text` and `capnp::Data`. These classes are,
cannam@48 359 again, just containers for inner classes `Reader` and `Builder`. These classes are iterable and
cannam@48 360 implement `size()` and `operator[]` methods. `Builder::operator[]` even returns a reference
cannam@48 361 (unlike with `List<T>`). `Text::Reader` additionally has a method `cStr()` which returns a
cannam@48 362 NUL-terminated `const char*`.
cannam@48 363
cannam@48 364 As a special convenience, if you are using GCC 4.8+ or Clang, `Text::Reader` (and its underlying
cannam@48 365 type, `kj::StringPtr`) can be implicitly converted to and from `std::string` format. This is
cannam@48 366 accomplished without actually `#include`ing `<string>`, since some clients do not want to rely
cannam@48 367 on this rather-bulky header. In fact, any class which defines a `.c_str()` method will be
cannam@48 368 implicitly convertible in this way. Unfortunately, this trick doesn't work on GCC 4.7.
cannam@48 369
cannam@48 370 ### Interfaces
cannam@48 371
cannam@48 372 [Interfaces (RPC) have their own page.](cxxrpc.html)
cannam@48 373
cannam@48 374 ### Generics
cannam@48 375
cannam@48 376 [Generic types](language.html#generic-types) become templates in C++. The outer type (the one whose
cannam@48 377 name matches the schema declaration's name) is templatized; the inner `Reader` and `Builder` types
cannam@48 378 are not, because they inherit the parameters from the outer type. Similarly, template parameters
cannam@48 379 should refer to outer types, not `Reader` or `Builder` types.
cannam@48 380
cannam@48 381 For example, given:
cannam@48 382
cannam@48 383 {% highlight capnp %}
cannam@48 384 struct Map(Key, Value) {
cannam@48 385 entries @0 :List(Entry);
cannam@48 386 struct Entry {
cannam@48 387 key @0 :Key;
cannam@48 388 value @1 :Value;
cannam@48 389 }
cannam@48 390 }
cannam@48 391
cannam@48 392 struct People {
cannam@48 393 byName @0 :Map(Text, Person);
cannam@48 394 # Maps names to Person instances.
cannam@48 395 }
cannam@48 396 {% endhighlight %}
cannam@48 397
cannam@48 398 You might write code like:
cannam@48 399
cannam@48 400 {% highlight c++ %}
cannam@48 401 void processPeople(People::Reader people) {
cannam@48 402 Map<Text, Person>::Reader reader = people.getByName();
cannam@48 403 capnp::List<Map<Text, Person>::Entry>::Reader entries =
cannam@48 404 reader.getEntries()
cannam@48 405 for (auto entry: entries) {
cannam@48 406 processPerson(entry);
cannam@48 407 }
cannam@48 408 }
cannam@48 409 {% endhighlight %}
cannam@48 410
cannam@48 411 Note that all template parameters will be specified with a default value of `AnyPointer`.
cannam@48 412 Therefore, the type `Map<>` is equivalent to `Map<capnp::AnyPointer, capnp::AnyPointer>`.
cannam@48 413
cannam@48 414 ### Constants
cannam@48 415
cannam@48 416 Constants are exposed with their names converted to UPPERCASE_WITH_UNDERSCORES naming style
cannam@48 417 (whereas in the schema language you’d write them in camelCase). Primitive constants are just
cannam@48 418 `constexpr` values. Pointer-type constants (e.g. structs, lists, and blobs) are represented
cannam@48 419 using a proxy object that can be converted to the relevant `Reader` type, either implicitly or
cannam@48 420 using the unary `*` or `->` operators.
cannam@48 421
cannam@48 422 ## Messages and I/O
cannam@48 423
cannam@48 424 To create a new message, you must start by creating a `capnp::MessageBuilder`
cannam@48 425 (`capnp/message.h`). This is an abstract type which you can implement yourself, but most users
cannam@48 426 will want to use `capnp::MallocMessageBuilder`. Once your message is constructed, write it to
cannam@48 427 a file descriptor with `capnp::writeMessageToFd(fd, builder)` (`capnp/serialize.h`) or
cannam@48 428 `capnp::writePackedMessageToFd(fd, builder)` (`capnp/serialize-packed.h`).
cannam@48 429
cannam@48 430 To read a message, you must create a `capnp::MessageReader`, which is another abstract type.
cannam@48 431 Implementations are specific to the data source. You can use `capnp::StreamFdMessageReader`
cannam@48 432 (`capnp/serialize.h`) or `capnp::PackedFdMessageReader` (`capnp/serialize-packed.h`)
cannam@48 433 to read from file descriptors; both take the file descriptor as a constructor argument.
cannam@48 434
cannam@48 435 Note that if your stream contains additional data after the message, `PackedFdMessageReader` may
cannam@48 436 accidentally read some of that data, since it does buffered I/O. To make this work correctly, you
cannam@48 437 will need to set up a multi-use buffered stream. Buffered I/O may also be a good idea with
cannam@48 438 `StreamFdMessageReader` and also when writing, for performance reasons. See `capnp/io.h` for
cannam@48 439 details.
cannam@48 440
cannam@48 441 There is an [example](#example-usage) of all this at the beginning of this page.
cannam@48 442
cannam@48 443 ### Using mmap
cannam@48 444
cannam@48 445 Cap'n Proto can be used together with `mmap()` (or Win32's `MapViewOfFile()`) for extremely fast
cannam@48 446 reads, especially when you only need to use a subset of the data in the file. Currently,
cannam@48 447 Cap'n Proto is not well-suited for _writing_ via `mmap()`, only reading, but this is only because
cannam@48 448 we have not yet invented a mutable segment framing format -- the underlying design should
cannam@48 449 eventually work for both.
cannam@48 450
cannam@48 451 To take advantage of `mmap()` at read time, write your file in regular serialized (but NOT packed)
cannam@48 452 format -- that is, use `writeMessageToFd()`, _not_ `writePackedMessageToFd()`. Now, `mmap()` in
cannam@48 453 the entire file, and then pass the mapped memory to the constructor of
cannam@48 454 `capnp::FlatArrayMessageReader` (defined in `capnp/serialize.h`). That's it. You can use the
cannam@48 455 reader just like a normal `StreamFdMessageReader`. The operating system will automatically page
cannam@48 456 in data from disk as you read it.
cannam@48 457
cannam@48 458 `mmap()` works best when reading from flash media, or when the file is already hot in cache.
cannam@48 459 It works less well with slow rotating disks. Here, disk seeks make random access relatively
cannam@48 460 expensive. Also, if I/O throughput is your bottleneck, then the fact that mmaped data cannot
cannam@48 461 be packed or compressed may hurt you. However, it all depends on what fraction of the file you're
cannam@48 462 actually reading -- if you only pull one field out of one deeply-nested struct in a huge tree, it
cannam@48 463 may still be a win. The only way to know for sure is to do benchmarks! (But be careful to make
cannam@48 464 sure your benchmark is actually interacting with disk and not cache.)
cannam@48 465
cannam@48 466 ## Dynamic Reflection
cannam@48 467
cannam@48 468 Sometimes you want to write generic code that operates on arbitrary types, iterating over the
cannam@48 469 fields or looking them up by name. For example, you might want to write code that encodes
cannam@48 470 arbitrary Cap'n Proto types in JSON format. This requires something like "reflection", but C++
cannam@48 471 does not offer reflection. Also, you might even want to operate on types that aren't compiled
cannam@48 472 into the binary at all, but only discovered at runtime.
cannam@48 473
cannam@48 474 The C++ API supports inspecting schemas at runtime via the interface defined in
cannam@48 475 `capnp/schema.h`, and dynamically reading and writing instances of arbitrary types via
cannam@48 476 `capnp/dynamic.h`. Here's the example from the beginning of this file rewritten in terms
cannam@48 477 of the dynamic API:
cannam@48 478
cannam@48 479 {% highlight c++ %}
cannam@48 480 #include "addressbook.capnp.h"
cannam@48 481 #include <capnp/message.h>
cannam@48 482 #include <capnp/serialize-packed.h>
cannam@48 483 #include <iostream>
cannam@48 484 #include <capnp/schema.h>
cannam@48 485 #include <capnp/dynamic.h>
cannam@48 486
cannam@48 487 using ::capnp::DynamicValue;
cannam@48 488 using ::capnp::DynamicStruct;
cannam@48 489 using ::capnp::DynamicEnum;
cannam@48 490 using ::capnp::DynamicList;
cannam@48 491 using ::capnp::List;
cannam@48 492 using ::capnp::Schema;
cannam@48 493 using ::capnp::StructSchema;
cannam@48 494 using ::capnp::EnumSchema;
cannam@48 495
cannam@48 496 using ::capnp::Void;
cannam@48 497 using ::capnp::Text;
cannam@48 498 using ::capnp::MallocMessageBuilder;
cannam@48 499 using ::capnp::PackedFdMessageReader;
cannam@48 500
cannam@48 501 void dynamicWriteAddressBook(int fd, StructSchema schema) {
cannam@48 502 // Write a message using the dynamic API to set each
cannam@48 503 // field by text name. This isn't something you'd
cannam@48 504 // normally want to do; it's just for illustration.
cannam@48 505
cannam@48 506 MallocMessageBuilder message;
cannam@48 507
cannam@48 508 // Types shown for explanation purposes; normally you'd
cannam@48 509 // use auto.
cannam@48 510 DynamicStruct::Builder addressBook =
cannam@48 511 message.initRoot<DynamicStruct>(schema);
cannam@48 512
cannam@48 513 DynamicList::Builder people =
cannam@48 514 addressBook.init("people", 2).as<DynamicList>();
cannam@48 515
cannam@48 516 DynamicStruct::Builder alice =
cannam@48 517 people[0].as<DynamicStruct>();
cannam@48 518 alice.set("id", 123);
cannam@48 519 alice.set("name", "Alice");
cannam@48 520 alice.set("email", "alice@example.com");
cannam@48 521 auto alicePhones = alice.init("phones", 1).as<DynamicList>();
cannam@48 522 auto phone0 = alicePhones[0].as<DynamicStruct>();
cannam@48 523 phone0.set("number", "555-1212");
cannam@48 524 phone0.set("type", "mobile");
cannam@48 525 alice.get("employment").as<DynamicStruct>()
cannam@48 526 .set("school", "MIT");
cannam@48 527
cannam@48 528 auto bob = people[1].as<DynamicStruct>();
cannam@48 529 bob.set("id", 456);
cannam@48 530 bob.set("name", "Bob");
cannam@48 531 bob.set("email", "bob@example.com");
cannam@48 532
cannam@48 533 // Some magic: We can convert a dynamic sub-value back to
cannam@48 534 // the native type with as<T>()!
cannam@48 535 List<Person::PhoneNumber>::Builder bobPhones =
cannam@48 536 bob.init("phones", 2).as<List<Person::PhoneNumber>>();
cannam@48 537 bobPhones[0].setNumber("555-4567");
cannam@48 538 bobPhones[0].setType(Person::PhoneNumber::Type::HOME);
cannam@48 539 bobPhones[1].setNumber("555-7654");
cannam@48 540 bobPhones[1].setType(Person::PhoneNumber::Type::WORK);
cannam@48 541 bob.get("employment").as<DynamicStruct>()
cannam@48 542 .set("unemployed", ::capnp::VOID);
cannam@48 543
cannam@48 544 writePackedMessageToFd(fd, message);
cannam@48 545 }
cannam@48 546
cannam@48 547 void dynamicPrintValue(DynamicValue::Reader value) {
cannam@48 548 // Print an arbitrary message via the dynamic API by
cannam@48 549 // iterating over the schema. Look at the handling
cannam@48 550 // of STRUCT in particular.
cannam@48 551
cannam@48 552 switch (value.getType()) {
cannam@48 553 case DynamicValue::VOID:
cannam@48 554 std::cout << "";
cannam@48 555 break;
cannam@48 556 case DynamicValue::BOOL:
cannam@48 557 std::cout << (value.as<bool>() ? "true" : "false");
cannam@48 558 break;
cannam@48 559 case DynamicValue::INT:
cannam@48 560 std::cout << value.as<int64_t>();
cannam@48 561 break;
cannam@48 562 case DynamicValue::UINT:
cannam@48 563 std::cout << value.as<uint64_t>();
cannam@48 564 break;
cannam@48 565 case DynamicValue::FLOAT:
cannam@48 566 std::cout << value.as<double>();
cannam@48 567 break;
cannam@48 568 case DynamicValue::TEXT:
cannam@48 569 std::cout << '\"' << value.as<Text>().cStr() << '\"';
cannam@48 570 break;
cannam@48 571 case DynamicValue::LIST: {
cannam@48 572 std::cout << "[";
cannam@48 573 bool first = true;
cannam@48 574 for (auto element: value.as<DynamicList>()) {
cannam@48 575 if (first) {
cannam@48 576 first = false;
cannam@48 577 } else {
cannam@48 578 std::cout << ", ";
cannam@48 579 }
cannam@48 580 dynamicPrintValue(element);
cannam@48 581 }
cannam@48 582 std::cout << "]";
cannam@48 583 break;
cannam@48 584 }
cannam@48 585 case DynamicValue::ENUM: {
cannam@48 586 auto enumValue = value.as<DynamicEnum>();
cannam@48 587 KJ_IF_MAYBE(enumerant, enumValue.getEnumerant()) {
cannam@48 588 std::cout <<
cannam@48 589 enumerant->getProto().getName().cStr();
cannam@48 590 } else {
cannam@48 591 // Unknown enum value; output raw number.
cannam@48 592 std::cout << enumValue.getRaw();
cannam@48 593 }
cannam@48 594 break;
cannam@48 595 }
cannam@48 596 case DynamicValue::STRUCT: {
cannam@48 597 std::cout << "(";
cannam@48 598 auto structValue = value.as<DynamicStruct>();
cannam@48 599 bool first = true;
cannam@48 600 for (auto field: structValue.getSchema().getFields()) {
cannam@48 601 if (!structValue.has(field)) continue;
cannam@48 602 if (first) {
cannam@48 603 first = false;
cannam@48 604 } else {
cannam@48 605 std::cout << ", ";
cannam@48 606 }
cannam@48 607 std::cout << field.getProto().getName().cStr()
cannam@48 608 << " = ";
cannam@48 609 dynamicPrintValue(structValue.get(field));
cannam@48 610 }
cannam@48 611 std::cout << ")";
cannam@48 612 break;
cannam@48 613 }
cannam@48 614 default:
cannam@48 615 // There are other types, we aren't handling them.
cannam@48 616 std::cout << "?";
cannam@48 617 break;
cannam@48 618 }
cannam@48 619 }
cannam@48 620
cannam@48 621 void dynamicPrintMessage(int fd, StructSchema schema) {
cannam@48 622 PackedFdMessageReader message(fd);
cannam@48 623 dynamicPrintValue(message.getRoot<DynamicStruct>(schema));
cannam@48 624 std::cout << std::endl;
cannam@48 625 }
cannam@48 626 {% endhighlight %}
cannam@48 627
cannam@48 628 Notes about the dynamic API:
cannam@48 629
cannam@48 630 * You can implicitly cast any compiled Cap'n Proto struct reader/builder type directly to
cannam@48 631 `DynamicStruct::Reader`/`DynamicStruct::Builder`. Similarly with `List<T>` and `DynamicList`,
cannam@48 632 and even enum types and `DynamicEnum`. Finally, all valid Cap'n Proto field types may be
cannam@48 633 implicitly converted to `DynamicValue`.
cannam@48 634
cannam@48 635 * You can load schemas dynamically at runtime using `SchemaLoader` (`capnp/schema-loader.h`) and
cannam@48 636 use the Dynamic API to manipulate objects of these types. `MessageBuilder` and `MessageReader`
cannam@48 637 have methods for accessing the message root using a dynamic schema.
cannam@48 638
cannam@48 639 * While `SchemaLoader` loads binary schemas, you can also parse directly from text using
cannam@48 640 `SchemaParser` (`capnp/schema-parser.h`). However, this requires linking against `libcapnpc`
cannam@48 641 (in addition to `libcapnp` and `libkj`) -- this code is bulky and not terribly efficient. If
cannam@48 642 you can arrange to use only binary schemas at runtime, you'll be better off.
cannam@48 643
cannam@48 644 * Unlike with Protobufs, there is no "global registry" of compiled-in types. To get the schema
cannam@48 645 for a compiled-in type, use `capnp::Schema::from<MyType>()`.
cannam@48 646
cannam@48 647 * Unlike with Protobufs, the overhead of supporting reflection is small. Generated `.capnp.c++`
cannam@48 648 files contain only some embedded const data structures describing the schema, no code at all,
cannam@48 649 and the runtime library support code is relatively small. Moreover, if you do not use the
cannam@48 650 dynamic API or the schema API, you do not even need to link their implementations into your
cannam@48 651 executable.
cannam@48 652
cannam@48 653 * The dynamic API performs type checks at runtime. In case of error, it will throw an exception.
cannam@48 654 If you compile with `-fno-exceptions`, it will crash instead. Correct usage of the API should
cannam@48 655 never throw, but bugs happen. Enabling and catching exceptions will make your code more robust.
cannam@48 656
cannam@48 657 * Loading user-provided schemas has security implications: it greatly increases the attack
cannam@48 658 surface of the Cap'n Proto library. In particular, it is easy for an attacker to trigger
cannam@48 659 exceptions. To protect yourself, you are strongly advised to enable exceptions and catch them.
cannam@48 660
cannam@48 661 ## Orphans
cannam@48 662
cannam@48 663 An "orphan" is a Cap'n Proto object that is disconnected from the message structure. That is,
cannam@48 664 it is not the root of a message, and there is no other Cap'n Proto object holding a pointer to it.
cannam@48 665 Thus, it has no parents. Orphans are an advanced feature that can help avoid copies and make it
cannam@48 666 easier to use Cap'n Proto objects as part of your application's internal state. Typical
cannam@48 667 applications probably won't use orphans.
cannam@48 668
cannam@48 669 The class `capnp::Orphan<T>` (defined in `<capnp/orphan.h>`) represents a pointer to an orphaned
cannam@48 670 object of type `T`. `T` can be any struct type, `List<T>`, `Text`, or `Data`. E.g.
cannam@48 671 `capnp::Orphan<Person>` would be an orphaned `Person` structure. `Orphan<T>` is a move-only class,
cannam@48 672 similar to `std::unique_ptr<T>`. This prevents two different objects from adopting the same
cannam@48 673 orphan, which would result in an invalid message.
cannam@48 674
cannam@48 675 An orphan can be "adopted" by another object to link it into the message structure. Conversely,
cannam@48 676 an object can "disown" one of its pointers, causing the pointed-to object to become an orphan.
cannam@48 677 Every pointer-typed field `foo` provides builder methods `adoptFoo()` and `disownFoo()` for these
cannam@48 678 purposes. Again, these methods use C++11 move semantics. To use them, you will need to be
cannam@48 679 familiar with `std::move()` (or the equivalent but shorter-named `kj::mv()`).
cannam@48 680
cannam@48 681 Even though an orphan is unlinked from the message tree, it still resides inside memory allocated
cannam@48 682 for a particular message (i.e. a particular `MessageBuilder`). An orphan can only be adopted by
cannam@48 683 objects that live in the same message. To move objects between messages, you must perform a copy.
cannam@48 684 If the message is serialized while an `Orphan<T>` living within it still exists, the orphan's
cannam@48 685 content will be part of the serialized message, but the only way the receiver could find it is by
cannam@48 686 investigating the raw message; the Cap'n Proto API provides no way to detect or read it.
cannam@48 687
cannam@48 688 To construct an orphan from scratch (without having some other object disown it), you need an
cannam@48 689 `Orphanage`, which is essentially an orphan factory associated with some message. You can get one
cannam@48 690 by calling the `MessageBuilder`'s `getOrphanage()` method, or by calling the static method
cannam@48 691 `Orphanage::getForMessageContaining(builder)` and passing it any struct or list builder.
cannam@48 692
cannam@48 693 Note that when an `Orphan<T>` goes out-of-scope without being adopted, the underlying memory that
cannam@48 694 it occupied is overwritten with zeros. If you use packed serialization, these zeros will take very
cannam@48 695 little bandwidth on the wire, but will still waste memory on the sending and receiving ends.
cannam@48 696 Generally, you should avoid allocating message objects that won't be used, or if you cannot avoid
cannam@48 697 it, arrange to copy the entire message over to a new `MessageBuilder` before serializing, since
cannam@48 698 only the reachable objects will be copied.
cannam@48 699
cannam@48 700 ## Reference
cannam@48 701
cannam@48 702 The runtime library contains lots of useful features not described on this page. For now, the
cannam@48 703 best reference is the header files. See:
cannam@48 704
cannam@48 705 capnp/list.h
cannam@48 706 capnp/blob.h
cannam@48 707 capnp/message.h
cannam@48 708 capnp/serialize.h
cannam@48 709 capnp/serialize-packed.h
cannam@48 710 capnp/schema.h
cannam@48 711 capnp/schema-loader.h
cannam@48 712 capnp/dynamic.h
cannam@48 713
cannam@48 714 ## Tips and Best Practices
cannam@48 715
cannam@48 716 Here are some tips for using the C++ Cap'n Proto runtime most effectively:
cannam@48 717
cannam@48 718 * Accessor methods for primitive (non-pointer) fields are fast and inline. They should be just
cannam@48 719 as fast as accessing a struct field through a pointer.
cannam@48 720
cannam@48 721 * Accessor methods for pointer fields, on the other hand, are not inline, as they need to validate
cannam@48 722 the pointer. If you intend to access the same pointer multiple times, it is a good idea to
cannam@48 723 save the value to a local variable to avoid repeating this work. This is generally not a
cannam@48 724 problem given C++11's `auto`.
cannam@48 725
cannam@48 726 Example:
cannam@48 727
cannam@48 728 // BAD
cannam@48 729 frob(foo.getBar().getBaz(),
cannam@48 730 foo.getBar().getQux(),
cannam@48 731 foo.getBar().getCorge());
cannam@48 732
cannam@48 733 // GOOD
cannam@48 734 auto bar = foo.getBar();
cannam@48 735 frob(bar.getBaz(), bar.getQux(), bar.getCorge());
cannam@48 736
cannam@48 737 It is especially important to use this style when reading messages, for another reason: as
cannam@48 738 described under the "security tips" section, below, every time you `get` a pointer, Cap'n Proto
cannam@48 739 increments a counter by the size of the target object. If that counter hits a pre-defined limit,
cannam@48 740 an exception is thrown (or a default value is returned, if exceptions are disabled), to prevent
cannam@48 741 a malicious client from sending your server into an infinite loop with a specially-crafted
cannam@48 742 message. If you repeatedly `get` the same object, you are repeatedly counting the same bytes,
cannam@48 743 and so you may hit the limit prematurely. (Since Cap'n Proto readers are backed directly by
cannam@48 744 the underlying message buffer and do not have anywhere else to store per-object information, it
cannam@48 745 is impossible to remember whether you've seen a particular object already.)
cannam@48 746
cannam@48 747 * Internally, all pointer fields start out "null", even if they have default values. When you have
cannam@48 748 a pointer field `foo` and you call `getFoo()` on the containing struct's `Reader`, if the field
cannam@48 749 is "null", you will receive a reader for that field's default value. This reader is backed by
cannam@48 750 read-only memory; nothing is allocated. However, when you call `get` on a _builder_, and the
cannam@48 751 field is null, then the implementation must make a _copy_ of the default value to return to you.
cannam@48 752 Thus, you've caused the field to become non-null, just by "reading" it. On the other hand, if
cannam@48 753 you call `init` on that field, you are explicitly replacing whatever value is already there
cannam@48 754 (null or not) with a newly-allocated instance, and that newly-allocated instance is _not_ a
cannam@48 755 copy of the field's default value, but just a completely-uninitialized instance of the
cannam@48 756 appropriate type.
cannam@48 757
cannam@48 758 * It is possible to receive a struct value constructed from a newer version of the protocol than
cannam@48 759 the one your binary was built with, and that struct might have extra fields that you don't know
cannam@48 760 about. The Cap'n Proto implementation tries to avoid discarding this extra data. If you copy
cannam@48 761 the struct from one message to another (e.g. by calling a set() method on a parent object), the
cannam@48 762 extra fields will be preserved. This makes it possible to build proxies that receive messages
cannam@48 763 and forward them on without having to rebuild the proxy every time a new field is added. You
cannam@48 764 must be careful, however: in some cases, it's not possible to retain the extra fields, because
cannam@48 765 they need to be copied into a space that is allocated before the expected content is known.
cannam@48 766 In particular, lists of structs are represented as a flat array, not as an array of pointers.
cannam@48 767 Therefore, all memory for all structs in the list must be allocated upfront. Hence, copying
cannam@48 768 a struct value from another message into an element of a list will truncate the value. Because
cannam@48 769 of this, the setter method for struct lists is called `setWithCaveats()` rather than just `set()`.
cannam@48 770
cannam@48 771 * Messages are built in "arena" or "region" style: each object is allocated sequentially in
cannam@48 772 memory, until there is no more room in the segment, in which case a new segment is allocated,
cannam@48 773 and objects continue to be allocated sequentially in that segment. This design is what makes
cannam@48 774 Cap'n Proto possible at all, and it is very fast compared to other allocation strategies.
cannam@48 775 However, it has the disadvantage that if you allocate an object and then discard it, that memory
cannam@48 776 is lost. In fact, the empty space will still become part of the serialized message, even though
cannam@48 777 it is unreachable. The implementation will try to zero it out, so at least it should pack well,
cannam@48 778 but it's still better to avoid this situation. Some ways that this can happen include:
cannam@48 779 * If you `init` a field that is already initialized, the previous value is discarded.
cannam@48 780 * If you create an orphan that is never adopted into the message tree.
cannam@48 781 * If you use `adoptWithCaveats` to adopt an orphaned struct into a struct list, then a shallow
cannam@48 782 copy is necessary, since the struct list requires that its elements are sequential in memory.
cannam@48 783 The previous copy of the struct is discarded (although child objects are transferred properly).
cannam@48 784 * If you copy a struct value from another message using a `set` method, the copy will have the
cannam@48 785 same size as the original. However, the original could have been built with an older version
cannam@48 786 of the protocol which lacked some fields compared to the version your program was built with.
cannam@48 787 If you subsequently `get` that struct, the implementation will be forced to allocate a new
cannam@48 788 (shallow) copy which is large enough to hold all known fields, and the old copy will be
cannam@48 789 discarded. Child objects will be transferred over without being copied -- though they might
cannam@48 790 suffer from the same problem if you `get` them later on.
cannam@48 791 Sometimes, avoiding these problems is too inconvenient. Fortunately, it's also possible to
cannam@48 792 clean up the mess after-the-fact: if you copy the whole message tree into a fresh
cannam@48 793 `MessageBuilder`, only the reachable objects will be copied, leaving out all of the unreachable
cannam@48 794 dead space.
cannam@48 795
cannam@48 796 In the future, Cap'n Proto may be improved such that it can re-use dead space in a message.
cannam@48 797 However, this will only improve things, not fix them entirely: fragementation could still leave
cannam@48 798 dead space.
cannam@48 799
cannam@48 800 ### Build Tips
cannam@48 801
cannam@48 802 * If you are worried about the binary footprint of the Cap'n Proto library, consider statically
cannam@48 803 linking with the `--gc-sections` linker flag. This will allow the linker to drop pieces of the
cannam@48 804 library that you do not actually use. For example, many users do not use the dynamic schema and
cannam@48 805 reflection APIs, which contribute a large fraction of the Cap'n Proto library's overall
cannam@48 806 footprint. Keep in mind that if you ever stringify a Cap'n Proto type, the stringification code
cannam@48 807 depends on the dynamic API; consider only using stringification in debug builds.
cannam@48 808
cannam@48 809 If you are dynamically linking against the system's shared copy of `libcapnp`, don't worry about
cannam@48 810 its binary size. Remember that only the code which you actually use will be paged into RAM, and
cannam@48 811 those pages are shared with other applications on the system.
cannam@48 812
cannam@48 813 Also remember to strip your binary. In particular, `libcapnpc` (the schema parser) has
cannam@48 814 excessively large symbol names caused by its use of template-based parser combinators. Stripping
cannam@48 815 the binary greatly reduces its size.
cannam@48 816
cannam@48 817 * The Cap'n Proto library has lots of debug-only asserts that are removed if you `#define NDEBUG`,
cannam@48 818 including in headers. If you care at all about performance, you should compile your production
cannam@48 819 binaries with the `-DNDEBUG` compiler flag. In fact, if Cap'n Proto detects that you have
cannam@48 820 optimization enabled but have not defined `NDEBUG`, it will define it for you (with a warning),
cannam@48 821 unless you define `DEBUG` or `KJ_DEBUG` to explicitly request debugging.
cannam@48 822
cannam@48 823 ### Security Tips
cannam@48 824
cannam@48 825 Cap'n Proto has not yet undergone security review. It most likely has some vulnerabilities. You
cannam@48 826 should not attempt to decode Cap'n Proto messages from sources you don't trust at this time.
cannam@48 827
cannam@48 828 However, assuming the Cap'n Proto implementation hardens up eventually, then the following security
cannam@48 829 tips will apply.
cannam@48 830
cannam@48 831 * It is highly recommended that you enable exceptions. When compiled with `-fno-exceptions`,
cannam@48 832 Cap'n Proto categorizes exceptions into "fatal" and "recoverable" varieties. Fatal exceptions
cannam@48 833 cause the server to crash, while recoverable exceptions are handled by logging an error and
cannam@48 834 returning a "safe" garbage value. Fatal is preferred in cases where it's unclear what kind of
cannam@48 835 garbage value would constitute "safe". The more of the library you use, the higher the chance
cannam@48 836 that you will leave yourself open to the possibility that an attacker could trigger a fatal
cannam@48 837 exception somewhere. If you enable exceptions, then you can catch the exception instead of
cannam@48 838 crashing, and return an error just to the attacker rather than to everyone using your server.
cannam@48 839
cannam@48 840 Basic parsing of Cap'n Proto messages shouldn't ever trigger fatal exceptions (assuming the
cannam@48 841 implementation is not buggy). However, the dynamic API -- especially if you are loading schemas
cannam@48 842 controlled by the attacker -- is much more exception-happy. If you cannot use exceptions, then
cannam@48 843 you are advised to avoid the dynamic API when dealing with untrusted data.
cannam@48 844
cannam@48 845 * If you need to process schemas from untrusted sources, take them in binary format, not text.
cannam@48 846 The text parser is a much larger attack surface and not designed to be secure. For instance,
cannam@48 847 as of this writing, it is trivial to deadlock the parser by simply writing a constant whose value
cannam@48 848 depends on itself.
cannam@48 849
cannam@48 850 * Cap'n Proto automatically applies two artificial limits on messages for security reasons:
cannam@48 851 a limit on nesting dept, and a limit on total bytes traversed.
cannam@48 852
cannam@48 853 * The nesting depth limit is designed to prevent stack overflow when handling a deeply-nested
cannam@48 854 recursive type, and defaults to 64. If your types aren't recursive, it is highly unlikely
cannam@48 855 that you would ever hit this limit, and even if they are recursive, it's still unlikely.
cannam@48 856
cannam@48 857 * The traversal limit is designed to defend against maliciously-crafted messages which use
cannam@48 858 pointer cycles or overlapping objects to make a message appear much larger than it looks off
cannam@48 859 the wire. While cycles and overlapping objects are illegal, they are hard to detect reliably.
cannam@48 860 Instead, Cap'n Proto places a limit on how many bytes worth of objects you can _dereference_
cannam@48 861 before it throws an exception. This limit is assessed every time you follow a pointer. By
cannam@48 862 default, the limit is 64MiB (this may change in the future). `StreamFdMessageReader` will
cannam@48 863 actually reject upfront any message which is larger than the traversal limit, even before you
cannam@48 864 start reading it.
cannam@48 865
cannam@48 866 If you need to write your code in such a way that you might frequently re-read the same
cannam@48 867 pointers, instead of increasing the traversal limit to the point where it is no longer useful,
cannam@48 868 consider simply copying the message into a new `MallocMessageBuilder` before starting. Then,
cannam@48 869 the traversal limit will be enforced only during the copy. There is no traversal limit on
cannam@48 870 objects once they live in a `MessageBuilder`, even if you use `.asReader()` to convert a
cannam@48 871 particular object's builder to the corresponding reader type.
cannam@48 872
cannam@48 873 Both limits may be increased using `capnp::ReaderOptions`, defined in `capnp/message.h`.
cannam@48 874
cannam@48 875 * Remember that enums on the wire may have a numeric value that does not match any value defined
cannam@48 876 in the schema. Your `switch()` statements must always have a safe default case.
cannam@48 877
cannam@48 878 ## Lessons Learned from Protocol Buffers
cannam@48 879
cannam@48 880 The author of Cap'n Proto's C++ implementation also wrote (in the past) verison 2 of Google's
cannam@48 881 Protocol Buffers. As a result, Cap'n Proto's implementation benefits from a number of lessons
cannam@48 882 learned the hard way:
cannam@48 883
cannam@48 884 * Protobuf generated code is enormous due to the parsing and serializing code generated for every
cannam@48 885 class. This actually poses a significant problem in practice -- there exist server binaries
cannam@48 886 containing literally hundreds of megabytes of compiled protobuf code. Cap'n Proto generated code,
cannam@48 887 on the other hand, is almost entirely inlined accessors. The only things that go into `.capnp.o`
cannam@48 888 files are default values for pointer fields (if needed, which is rare) and the encoded schema
cannam@48 889 (just the raw bytes of a Cap'n-Proto-encoded schema structure). The latter could even be removed
cannam@48 890 if you don't use dynamic reflection.
cannam@48 891
cannam@48 892 * The C++ Protobuf implementation used lots of dynamic initialization code (that runs before
cannam@48 893 `main()`) to do things like register types in global tables. This proved problematic for
cannam@48 894 programs which linked in lots of protocols but needed to start up quickly. Cap'n Proto does not
cannam@48 895 use any dynamic initializers anywhere, period.
cannam@48 896
cannam@48 897 * The C++ Protobuf implementation makes heavy use of STL in its interface and implementation.
cannam@48 898 The proliferation of template instantiations gives the Protobuf runtime library a large footprint,
cannam@48 899 and using STL in the interface can lead to weird ABI problems and slow compiles. Cap'n Proto
cannam@48 900 does not use any STL containers in its interface and makes sparing use in its implementation.
cannam@48 901 As a result, the Cap'n Proto runtime library is smaller, and code that uses it compiles quickly.
cannam@48 902
cannam@48 903 * The in-memory representation of messages in Protobuf-C++ involves many heap objects. Each
cannam@48 904 message (struct) is an object, each non-primitive repeated field allocates an array of pointers
cannam@48 905 to more objects, and each string may actually add two heap objects. Cap'n Proto by its nature
cannam@48 906 uses arena allocation, so the entire message is allocated in a few contiguous segments. This
cannam@48 907 means Cap'n Proto spends very little time allocating memory, stores messages more compactly, and
cannam@48 908 avoids memory fragmentation.
cannam@48 909
cannam@48 910 * Related to the last point, Protobuf-C++ relies heavily on object reuse for performance.
cannam@48 911 Building or parsing into a newly-allocated Protobuf object is significantly slower than using
cannam@48 912 an existing one. However, the memory usage of a Protobuf object will tend to grow the more times
cannam@48 913 it is reused, particularly if it is used to parse messages of many different "shapes", so the
cannam@48 914 objects need to be deleted and re-allocated from time to time. All this makes tuning Protobufs
cannam@48 915 fairly tedious. In contrast, enabling memory reuse with Cap'n Proto is as simple as providing
cannam@48 916 a byte buffer to use as scratch space when you build or read in a message. Provide enough scratch
cannam@48 917 space to hold the entire message and Cap'n Proto won't allocate any memory. Or don't -- since
cannam@48 918 Cap'n Proto doesn't do much allocation in the first place, the benefits of scratch space are
cannam@48 919 small.