sv-dependency-builds: src/capnproto-git-20161025/doc/cxx.md annotate

annotate src/capnproto-git-20161025/doc/cxx.md @ 140:59a8758c56b1

Add source for PortAudio stable v190600_20161030

author	Chris Cannam <cannam@all-day-breakfast.com>
date	Tue, 03 Jan 2017 13:44:07 +0000
parents	1ac99bfc383d
children

rev	line source
cannam@133	1 ---
cannam@133	2 layout: page
cannam@133	3 title: C++ Serialization
cannam@133	4 ---
cannam@133	5
cannam@133	6 # C++ Serialization
cannam@133	7
cannam@133	8 The Cap'n Proto C++ runtime implementation provides an easy-to-use interface for manipulating
cannam@133	9 messages backed by fast pointer arithmetic. This page discusses the serialization layer of
cannam@133	10 the runtime; see [C++ RPC](cxxrpc.html) for information about the RPC layer.
cannam@133	11
cannam@133	12 ## Example Usage
cannam@133	13
cannam@133	14 For the Cap'n Proto definition:
cannam@133	15
cannam@133	16 {% highlight capnp %}
cannam@133	17 struct Person {
cannam@133	18 id @0 :UInt32;
cannam@133	19 name @1 :Text;
cannam@133	20 email @2 :Text;
cannam@133	21 phones @3 :List(PhoneNumber);
cannam@133	22
cannam@133	23 struct PhoneNumber {
cannam@133	24 number @0 :Text;
cannam@133	25 type @1 :Type;
cannam@133	26
cannam@133	27 enum Type {
cannam@133	28 mobile @0;
cannam@133	29 home @1;
cannam@133	30 work @2;
cannam@133	31 }
cannam@133	32 }
cannam@133	33
cannam@133	34 employment :union {
cannam@133	35 unemployed @4 :Void;
cannam@133	36 employer @5 :Text;
cannam@133	37 school @6 :Text;
cannam@133	38 selfEmployed @7 :Void;
cannam@133	39 # We assume that a person is only one of these.
cannam@133	40 }
cannam@133	41 }
cannam@133	42
cannam@133	43 struct AddressBook {
cannam@133	44 people @0 :List(Person);
cannam@133	45 }
cannam@133	46 {% endhighlight %}
cannam@133	47
cannam@133	48 You might write code like:
cannam@133	49
cannam@133	50 {% highlight c++ %}
cannam@133	51 #include "addressbook.capnp.h"
cannam@133	52 #include <capnp/message.h>
cannam@133	53 #include <capnp/serialize-packed.h>
cannam@133	54 #include <iostream>
cannam@133	55
cannam@133	56 void writeAddressBook(int fd) {
cannam@133	57 ::capnp::MallocMessageBuilder message;
cannam@133	58
cannam@133	59 AddressBook::Builder addressBook = message.initRoot<AddressBook>();
cannam@133	60 ::capnp::List<Person>::Builder people = addressBook.initPeople(2);
cannam@133	61
cannam@133	62 Person::Builder alice = people[0];
cannam@133	63 alice.setId(123);
cannam@133	64 alice.setName("Alice");
cannam@133	65 alice.setEmail("alice@example.com");
cannam@133	66 // Type shown for explanation purposes; normally you'd use auto.
cannam@133	67 ::capnp::List<Person::PhoneNumber>::Builder alicePhones =
cannam@133	68 alice.initPhones(1);
cannam@133	69 alicePhones[0].setNumber("555-1212");
cannam@133	70 alicePhones[0].setType(Person::PhoneNumber::Type::MOBILE);
cannam@133	71 alice.getEmployment().setSchool("MIT");
cannam@133	72
cannam@133	73 Person::Builder bob = people[1];
cannam@133	74 bob.setId(456);
cannam@133	75 bob.setName("Bob");
cannam@133	76 bob.setEmail("bob@example.com");
cannam@133	77 auto bobPhones = bob.initPhones(2);
cannam@133	78 bobPhones[0].setNumber("555-4567");
cannam@133	79 bobPhones[0].setType(Person::PhoneNumber::Type::HOME);
cannam@133	80 bobPhones[1].setNumber("555-7654");
cannam@133	81 bobPhones[1].setType(Person::PhoneNumber::Type::WORK);
cannam@133	82 bob.getEmployment().setUnemployed();
cannam@133	83
cannam@133	84 writePackedMessageToFd(fd, message);
cannam@133	85 }
cannam@133	86
cannam@133	87 void printAddressBook(int fd) {
cannam@133	88 ::capnp::PackedFdMessageReader message(fd);
cannam@133	89
cannam@133	90 AddressBook::Reader addressBook = message.getRoot<AddressBook>();
cannam@133	91
cannam@133	92 for (Person::Reader person : addressBook.getPeople()) {
cannam@133	93 std::cout << person.getName().cStr() << ": "
cannam@133	94 << person.getEmail().cStr() << std::endl;
cannam@133	95 for (Person::PhoneNumber::Reader phone: person.getPhones()) {
cannam@133	96 const char* typeName = "UNKNOWN";
cannam@133	97 switch (phone.getType()) {
cannam@133	98 case Person::PhoneNumber::Type::MOBILE: typeName = "mobile"; break;
cannam@133	99 case Person::PhoneNumber::Type::HOME: typeName = "home"; break;
cannam@133	100 case Person::PhoneNumber::Type::WORK: typeName = "work"; break;
cannam@133	101 }
cannam@133	102 std::cout << " " << typeName << " phone: "
cannam@133	103 << phone.getNumber().cStr() << std::endl;
cannam@133	104 }
cannam@133	105 Person::Employment::Reader employment = person.getEmployment();
cannam@133	106 switch (employment.which()) {
cannam@133	107 case Person::Employment::UNEMPLOYED:
cannam@133	108 std::cout << " unemployed" << std::endl;
cannam@133	109 break;
cannam@133	110 case Person::Employment::EMPLOYER:
cannam@133	111 std::cout << " employer: "
cannam@133	112 << employment.getEmployer().cStr() << std::endl;
cannam@133	113 break;
cannam@133	114 case Person::Employment::SCHOOL:
cannam@133	115 std::cout << " student at: "
cannam@133	116 << employment.getSchool().cStr() << std::endl;
cannam@133	117 break;
cannam@133	118 case Person::Employment::SELF_EMPLOYED:
cannam@133	119 std::cout << " self-employed" << std::endl;
cannam@133	120 break;
cannam@133	121 }
cannam@133	122 }
cannam@133	123 }
cannam@133	124 {% endhighlight %}
cannam@133	125
cannam@133	126 ## C++ Feature Usage: C++11, Exceptions
cannam@133	127
cannam@133	128 This implementation makes use of C++11 features. If you are using GCC, you will need at least
cannam@133	129 version 4.7 to compile Cap'n Proto. If you are using Clang, you will need at least version 3.2.
cannam@133	130 These compilers required the flag `-std=c++11` to enable C++11 features -- your code which
cannam@133	131 `#include`s Cap'n Proto headers will need to be compiled with this flag. Other compilers have not
cannam@133	132 been tested at this time.
cannam@133	133
cannam@133	134 This implementation prefers to handle errors using exceptions. Exceptions are only used in
cannam@133	135 circumstances that should never occur in normal operation. For example, exceptions are thrown
cannam@133	136 on assertion failures (indicating bugs in the code), network failures, and invalid input.
cannam@133	137 Exceptions thrown by Cap'n Proto are never part of the interface and never need to be caught in
cannam@133	138 correct usage. The purpose of throwing exceptions is to allow higher-level code a chance to
cannam@133	139 recover from unexpected circumstances without disrupting other work happening in the same process.
cannam@133	140 For example, a server that handles requests from multiple clients should, on exception, return an
cannam@133	141 error to the client that caused the exception and close that connection, but should continue
cannam@133	142 handling other connections normally.
cannam@133	143
cannam@133	144 When Cap'n Proto code might throw an exception from a destructor, it first checks
cannam@133	145 `std::uncaught_exception()` to ensure that this is safe. If another exception is already active,
cannam@133	146 the new exception is assumed to be a side-effect of the main exception, and is either silently
cannam@133	147 swallowed or reported on a side channel.
cannam@133	148
cannam@133	149 In recognition of the fact that some teams prefer not to use exceptions, and that even enabling
cannam@133	150 exceptions in the compiler introduces overhead, Cap'n Proto allows you to disable them entirely
cannam@133	151 by registering your own exception callback. The callback will be called in place of throwing an
cannam@133	152 exception. The callback may abort the process, and is required to do so in certain circumstances
cannam@133	153 (when a fatal bug is detected). If the callback returns normally, Cap'n Proto will attempt
cannam@133	154 to continue by inventing "safe" values. This will lead to garbage output, but at least the program
cannam@133	155 will not crash. Your exception callback should set some sort of a flag indicating that an error
cannam@133	156 occurred, and somewhere up the stack you should check for that flag and cancel the operation.
cannam@133	157 See the header `kj/exception.h` for details on how to register an exception callback.
cannam@133	158
cannam@133	159 ## KJ Library
cannam@133	160
cannam@133	161 Cap'n Proto is built on top of a basic utility library called KJ. The two were actually developed
cannam@133	162 together -- KJ is simply the stuff which is not specific to Cap'n Proto serialization, and may be
cannam@133	163 useful to others independently of Cap'n Proto. For now, the the two are distributed together. The
cannam@133	164 name "KJ" has no particular meaning; it was chosen to be short and easy-to-type.
cannam@133	165
cannam@133	166 As of v0.3, KJ is distributed with Cap'n Proto but built as a separate library. You may need
cannam@133	167 to explicitly link against libraries: `-lcapnp -lkj`
cannam@133	168
cannam@133	169 ## Generating Code
cannam@133	170
cannam@133	171 To generate C++ code from your `.capnp` [interface definition](language.html), run:
cannam@133	172
cannam@133	173 capnp compile -oc++ myproto.capnp
cannam@133	174
cannam@133	175 This will create `myproto.capnp.h` and `myproto.capnp.c++` in the same directory as `myproto.capnp`.
cannam@133	176
cannam@133	177 To use this code in your app, you must link against both `libcapnp` and `libkj`. If you use
cannam@133	178 `pkg-config`, Cap'n Proto provides the `capnp` module to simplify discovery of compiler and linker
cannam@133	179 flags.
cannam@133	180
cannam@133	181 If you use [RPC](cxxrpc.html) (i.e., your schema defines [interfaces](language.html#interfaces)),
cannam@133	182 then you will additionally nead to link against `libcapnp-rpc` and `libkj-async`, or use the
cannam@133	183 `capnp-rpc` `pkg-config` module.
cannam@133	184
cannam@133	185 ### Setting a Namespace
cannam@133	186
cannam@133	187 You probably want your generated types to live in a C++ namespace. You will need to import
cannam@133	188 `/capnp/c++.capnp` and use the `namespace` annotation it defines:
cannam@133	189
cannam@133	190 {% highlight capnp %}
cannam@133	191 using Cxx = import "/capnp/c++.capnp";
cannam@133	192 $Cxx.namespace("foo::bar::baz");
cannam@133	193 {% endhighlight %}
cannam@133	194
cannam@133	195 Note that `capnp/c++.capnp` is installed in `$PREFIX/include` (`/usr/local/include` by default)
cannam@133	196 when you install the C++ runtime. The `capnp` tool automatically searches `/usr/include` and
cannam@133	197 `/usr/local/include` for imports that start with a `/`, so it should "just work". If you installed
cannam@133	198 somewhere else, you may need to add it to the search path with the `-I` flag to `capnp compile`,
cannam@133	199 which works much like the compiler flag of the same name.
cannam@133	200
cannam@133	201 ## Types
cannam@133	202
cannam@133	203 ### Primitive Types
cannam@133	204
cannam@133	205 Primitive types map to the obvious C++ types:
cannam@133	206
cannam@133	207 * `Bool` -> `bool`
cannam@133	208 * `IntNN` -> `intNN_t`
cannam@133	209 * `UIntNN` -> `uintNN_t`
cannam@133	210 * `Float32` -> `float`
cannam@133	211 * `Float64` -> `double`
cannam@133	212 * `Void` -> `::capnp::Void` (An empty struct; its only value is `::capnp::VOID`)
cannam@133	213
cannam@133	214 ### Structs
cannam@133	215
cannam@133	216 For each struct `Foo` in your interface, a C++ type named `Foo` generated. This type itself is
cannam@133	217 really just a namespace; it contains two important inner classes: `Reader` and `Builder`.
cannam@133	218
cannam@133	219 `Reader` represents a read-only instance of `Foo` while `Builder` represents a writable instance
cannam@133	220 (usually, one that you are building). Both classes behave like pointers, in that you can pass them
cannam@133	221 by value and they do not own the underlying data that they operate on. In other words,
cannam@133	222 `Foo::Builder` is like a pointer to a `Foo` while `Foo::Reader` is like a const pointer to a `Foo`.
cannam@133	223
cannam@133	224 For every field `bar` defined in `Foo`, `Foo::Reader` has a method `getBar()`. For primitive types,
cannam@133	225 `get` just returns the type, but for structs, lists, and blobs, it returns a `Reader` for the
cannam@133	226 type.
cannam@133	227
cannam@133	228 {% highlight c++ %}
cannam@133	229 // Example Reader methods:
cannam@133	230
cannam@133	231 // myPrimitiveField @0 :Int32;
cannam@133	232 int32_t getMyPrimitiveField();
cannam@133	233
cannam@133	234 // myTextField @1 :Text;
cannam@133	235 ::capnp::Text::Reader getMyTextField();
cannam@133	236 // (Note that Text::Reader may be implicitly cast to const char* and
cannam@133	237 // std::string.)
cannam@133	238
cannam@133	239 // myStructField @2 :MyStruct;
cannam@133	240 MyStruct::Reader getMyStructField();
cannam@133	241
cannam@133	242 // myListField @3 :List(Float64);
cannam@133	243 ::capnp::List<double> getMyListField();
cannam@133	244 {% endhighlight %}
cannam@133	245
cannam@133	246 `Foo::Builder`, meanwhile, has several methods for each field `bar`:
cannam@133	247
cannam@133	248 * `getBar()`: For primitives, returns the value. For composites, returns a Builder for the
cannam@133	249 composite. If a composite field has not been initialized (i.e. this is the first time it has
cannam@133	250 been accessed), it will be initialized to a copy of the field's default value before returning.
cannam@133	251 * `setBar(x)`: For primitives, sets the value to x. For composites, sets the value to a deep copy
cannam@133	252 of x, which must be a Reader for the type.
cannam@133	253 * `initBar(n)`: Only for lists and blobs. Sets the field to a newly-allocated list or blob
cannam@133	254 of size n and returns a Builder for it. The elements of the list are initialized to their empty
cannam@133	255 state (zero for numbers, default values for structs).
cannam@133	256 * `initBar()`: Only for structs. Sets the field to a newly-allocated struct and returns a
cannam@133	257 Builder for it. Note that the newly-allocated struct is initialized to the default value for
cannam@133	258 the struct's _type_ (i.e., all-zero) rather than the default value for the field `bar` (if it
cannam@133	259 has one).
cannam@133	260 * `hasBar()`: Only for pointer fields (e.g. structs, lists, blobs). Returns true if the pointer
cannam@133	261 has been initialized (non-null). (This method is also available on readers.)
cannam@133	262 * `adoptBar(x)`: Only for pointer fields. Adopts the orphaned object x, linking it into the field
cannam@133	263 `bar` without copying. See the section on orphans.
cannam@133	264 * `disownBar()`: Disowns the value pointed to by `bar`, setting the pointer to null and returning
cannam@133	265 its previous value as an orphan. See the section on orphans.
cannam@133	266
cannam@133	267 {% highlight c++ %}
cannam@133	268 // Example Builder methods:
cannam@133	269
cannam@133	270 // myPrimitiveField @0 :Int32;
cannam@133	271 int32_t getMyPrimitiveField();
cannam@133	272 void setMyPrimitiveField(int32_t value);
cannam@133	273
cannam@133	274 // myTextField @1 :Text;
cannam@133	275 ::capnp::Text::Builder getMyTextField();
cannam@133	276 void setMyTextField(::capnp::Text::Reader value);
cannam@133	277 ::capnp::Text::Builder initMyTextField(size_t size);
cannam@133	278 // (Note that Text::Reader is implicitly constructable from const char*
cannam@133	279 // and std::string, and Text::Builder can be implicitly cast to
cannam@133	280 // these types.)
cannam@133	281
cannam@133	282 // myStructField @2 :MyStruct;
cannam@133	283 MyStruct::Builder getMyStructField();
cannam@133	284 void setMyStructField(MyStruct::Reader value);
cannam@133	285 MyStruct::Builder initMyStructField();
cannam@133	286
cannam@133	287 // myListField @3 :List(Float64);
cannam@133	288 ::capnp::List<double>::Builder getMyListField();
cannam@133	289 void setMyListField(::capnp::List<double>::Reader value);
cannam@133	290 ::capnp::List<double>::Builder initMyListField(size_t size);
cannam@133	291 {% endhighlight %}
cannam@133	292
cannam@133	293 ### Groups
cannam@133	294
cannam@133	295 Groups look a lot like a combination of a nested type and a field of that type, except that you
cannam@133	296 cannot set, adopt, or disown a group -- you can only get and init it.
cannam@133	297
cannam@133	298 ### Unions
cannam@133	299
cannam@133	300 A named union (as opposed to an unnamed one) works just like a group, except with some additions:
cannam@133	301
cannam@133	302 * For each field `foo`, the union reader and builder have a method `isFoo()` which returns true
cannam@133	303 if `foo` is the currently-set field in the union.
cannam@133	304 * The union reader and builder also have a method `which()` that returns an enum value indicating
cannam@133	305 which field is currently set.
cannam@133	306 * Calling the set, init, or adopt accessors for a field makes it the currently-set field.
cannam@133	307 * Calling the get or disown accessors on a field that isn't currently set will throw an
cannam@133	308 exception in debug mode or return garbage when `NDEBUG` is defined.
cannam@133	309
cannam@133	310 Unnamed unions differ from named unions only in that the accessor methods from the union's members
cannam@133	311 are added directly to the containing type's reader and builder, rather than generating a nested
cannam@133	312 type.
cannam@133	313
cannam@133	314 See the [example](#example-usage) at the top of the page for an example of unions.
cannam@133	315
cannam@133	316 ### Lists
cannam@133	317
cannam@133	318 Lists are represented by the type `capnp::List<T>`, where `T` is any of the primitive types,
cannam@133	319 any Cap'n Proto user-defined type, `capnp::Text`, `capnp::Data`, or `capnp::List<U>`
cannam@133	320 (to form a list of lists).
cannam@133	321
cannam@133	322 The type `List<T>` itself is not instantiatable, but has two inner classes: `Reader` and `Builder`.
cannam@133	323 As with structs, these types behave like pointers to read-only and read-write data, respectively.
cannam@133	324
cannam@133	325 Both `Reader` and `Builder` implement `size()`, `operator[]`, `begin()`, and `end()`, as good C++
cannam@133	326 containers should. Note, though, that `operator[]` is read-only -- you cannot use it to assign
cannam@133	327 the element, because that would require returning a reference, which is impossible because the
cannam@133	328 underlying data may not be in your CPU's native format (e.g., wrong byte order). Instead, to
cannam@133	329 assign an element of a list, you must use `builder.set(index, value)`.
cannam@133	330
cannam@133	331 For `List<Foo>` where `Foo` is a non-primitive type, the type returned by `operator[]` and
cannam@133	332 `iterator::operator*()` is `Foo::Reader` (for `List<Foo>::Reader`) or `Foo::Builder`
cannam@133	333 (for `List<Foo>::Builder`). The builder's `set` method takes a `Foo::Reader` as its second
cannam@133	334 parameter.
cannam@133	335
cannam@133	336 For lists of lists or lists of blobs, the builder also has a method `init(index, size)` which sets
cannam@133	337 the element at the given index to a newly-allocated value with the given size and returns a builder
cannam@133	338 for it. Struct lists do not have an `init` method because all elements are initialized to empty
cannam@133	339 values when the list is created.
cannam@133	340
cannam@133	341 ### Enums
cannam@133	342
cannam@133	343 Cap'n Proto enums become C++11 "enum classes". That means they behave like any other enum, but
cannam@133	344 the enum's values are scoped within the type. E.g. for an enum `Foo` with value `bar`, you must
cannam@133	345 refer to the value as `Foo::BAR`.
cannam@133	346
cannam@133	347 To match prevaling C++ style, an enum's value names are converted to UPPERCASE_WITH_UNDERSCORES
cannam@133	348 (whereas in the schema language you'd write them in camelCase).
cannam@133	349
cannam@133	350 Keep in mind when writing `switch` blocks that an enum read off the wire may have a numeric
cannam@133	351 value that is not listed in its definition. This may be the case if the sender is using a newer
cannam@133	352 version of the protocol, or if the message is corrupt or malicious. In C++11, enums are allowed
cannam@133	353 to have any value that is within the range of their base type, which for Cap'n Proto enums is
cannam@133	354 `uint16_t`.
cannam@133	355
cannam@133	356 ### Blobs (Text and Data)
cannam@133	357
cannam@133	358 Blobs are manipulated using the classes `capnp::Text` and `capnp::Data`. These classes are,
cannam@133	359 again, just containers for inner classes `Reader` and `Builder`. These classes are iterable and
cannam@133	360 implement `size()` and `operator[]` methods. `Builder::operator[]` even returns a reference
cannam@133	361 (unlike with `List<T>`). `Text::Reader` additionally has a method `cStr()` which returns a
cannam@133	362 NUL-terminated `const char*`.
cannam@133	363
cannam@133	364 As a special convenience, if you are using GCC 4.8+ or Clang, `Text::Reader` (and its underlying
cannam@133	365 type, `kj::StringPtr`) can be implicitly converted to and from `std::string` format. This is
cannam@133	366 accomplished without actually `#include`ing `<string>`, since some clients do not want to rely
cannam@133	367 on this rather-bulky header. In fact, any class which defines a `.c_str()` method will be
cannam@133	368 implicitly convertible in this way. Unfortunately, this trick doesn't work on GCC 4.7.
cannam@133	369
cannam@133	370 ### Interfaces
cannam@133	371
cannam@133	372 [Interfaces (RPC) have their own page.](cxxrpc.html)
cannam@133	373
cannam@133	374 ### Generics
cannam@133	375
cannam@133	376 [Generic types](language.html#generic-types) become templates in C++. The outer type (the one whose
cannam@133	377 name matches the schema declaration's name) is templatized; the inner `Reader` and `Builder` types
cannam@133	378 are not, because they inherit the parameters from the outer type. Similarly, template parameters
cannam@133	379 should refer to outer types, not `Reader` or `Builder` types.
cannam@133	380
cannam@133	381 For example, given:
cannam@133	382
cannam@133	383 {% highlight capnp %}
cannam@133	384 struct Map(Key, Value) {
cannam@133	385 entries @0 :List(Entry);
cannam@133	386 struct Entry {
cannam@133	387 key @0 :Key;
cannam@133	388 value @1 :Value;
cannam@133	389 }
cannam@133	390 }
cannam@133	391
cannam@133	392 struct People {
cannam@133	393 byName @0 :Map(Text, Person);
cannam@133	394 # Maps names to Person instances.
cannam@133	395 }
cannam@133	396 {% endhighlight %}
cannam@133	397
cannam@133	398 You might write code like:
cannam@133	399
cannam@133	400 {% highlight c++ %}
cannam@133	401 void processPeople(People::Reader people) {
cannam@133	402 Map<Text, Person>::Reader reader = people.getByName();
cannam@133	403 capnp::List<Map<Text, Person>::Entry>::Reader entries =
cannam@133	404 reader.getEntries()
cannam@133	405 for (auto entry: entries) {
cannam@133	406 processPerson(entry);
cannam@133	407 }
cannam@133	408 }
cannam@133	409 {% endhighlight %}
cannam@133	410
cannam@133	411 Note that all template parameters will be specified with a default value of `AnyPointer`.
cannam@133	412 Therefore, the type `Map<>` is equivalent to `Map<capnp::AnyPointer, capnp::AnyPointer>`.
cannam@133	413
cannam@133	414 ### Constants
cannam@133	415
cannam@133	416 Constants are exposed with their names converted to UPPERCASE_WITH_UNDERSCORES naming style
cannam@133	417 (whereas in the schema language you’d write them in camelCase). Primitive constants are just
cannam@133	418 `constexpr` values. Pointer-type constants (e.g. structs, lists, and blobs) are represented
cannam@133	419 using a proxy object that can be converted to the relevant `Reader` type, either implicitly or
cannam@133	420 using the unary `*` or `->` operators.
cannam@133	421
cannam@133	422 ## Messages and I/O
cannam@133	423
cannam@133	424 To create a new message, you must start by creating a `capnp::MessageBuilder`
cannam@133	425 (`capnp/message.h`). This is an abstract type which you can implement yourself, but most users
cannam@133	426 will want to use `capnp::MallocMessageBuilder`. Once your message is constructed, write it to
cannam@133	427 a file descriptor with `capnp::writeMessageToFd(fd, builder)` (`capnp/serialize.h`) or
cannam@133	428 `capnp::writePackedMessageToFd(fd, builder)` (`capnp/serialize-packed.h`).
cannam@133	429
cannam@133	430 To read a message, you must create a `capnp::MessageReader`, which is another abstract type.
cannam@133	431 Implementations are specific to the data source. You can use `capnp::StreamFdMessageReader`
cannam@133	432 (`capnp/serialize.h`) or `capnp::PackedFdMessageReader` (`capnp/serialize-packed.h`)
cannam@133	433 to read from file descriptors; both take the file descriptor as a constructor argument.
cannam@133	434
cannam@133	435 Note that if your stream contains additional data after the message, `PackedFdMessageReader` may
cannam@133	436 accidentally read some of that data, since it does buffered I/O. To make this work correctly, you
cannam@133	437 will need to set up a multi-use buffered stream. Buffered I/O may also be a good idea with
cannam@133	438 `StreamFdMessageReader` and also when writing, for performance reasons. See `capnp/io.h` for
cannam@133	439 details.
cannam@133	440
cannam@133	441 There is an [example](#example-usage) of all this at the beginning of this page.
cannam@133	442
cannam@133	443 ### Using mmap
cannam@133	444
cannam@133	445 Cap'n Proto can be used together with `mmap()` (or Win32's `MapViewOfFile()`) for extremely fast
cannam@133	446 reads, especially when you only need to use a subset of the data in the file. Currently,
cannam@133	447 Cap'n Proto is not well-suited for _writing_ via `mmap()`, only reading, but this is only because
cannam@133	448 we have not yet invented a mutable segment framing format -- the underlying design should
cannam@133	449 eventually work for both.
cannam@133	450
cannam@133	451 To take advantage of `mmap()` at read time, write your file in regular serialized (but NOT packed)
cannam@133	452 format -- that is, use `writeMessageToFd()`, _not_ `writePackedMessageToFd()`. Now, `mmap()` in
cannam@133	453 the entire file, and then pass the mapped memory to the constructor of
cannam@133	454 `capnp::FlatArrayMessageReader` (defined in `capnp/serialize.h`). That's it. You can use the
cannam@133	455 reader just like a normal `StreamFdMessageReader`. The operating system will automatically page
cannam@133	456 in data from disk as you read it.
cannam@133	457
cannam@133	458 `mmap()` works best when reading from flash media, or when the file is already hot in cache.
cannam@133	459 It works less well with slow rotating disks. Here, disk seeks make random access relatively
cannam@133	460 expensive. Also, if I/O throughput is your bottleneck, then the fact that mmaped data cannot
cannam@133	461 be packed or compressed may hurt you. However, it all depends on what fraction of the file you're
cannam@133	462 actually reading -- if you only pull one field out of one deeply-nested struct in a huge tree, it
cannam@133	463 may still be a win. The only way to know for sure is to do benchmarks! (But be careful to make
cannam@133	464 sure your benchmark is actually interacting with disk and not cache.)
cannam@133	465
cannam@133	466 ## Dynamic Reflection
cannam@133	467
cannam@133	468 Sometimes you want to write generic code that operates on arbitrary types, iterating over the
cannam@133	469 fields or looking them up by name. For example, you might want to write code that encodes
cannam@133	470 arbitrary Cap'n Proto types in JSON format. This requires something like "reflection", but C++
cannam@133	471 does not offer reflection. Also, you might even want to operate on types that aren't compiled
cannam@133	472 into the binary at all, but only discovered at runtime.
cannam@133	473
cannam@133	474 The C++ API supports inspecting schemas at runtime via the interface defined in
cannam@133	475 `capnp/schema.h`, and dynamically reading and writing instances of arbitrary types via
cannam@133	476 `capnp/dynamic.h`. Here's the example from the beginning of this file rewritten in terms
cannam@133	477 of the dynamic API:
cannam@133	478
cannam@133	479 {% highlight c++ %}
cannam@133	480 #include "addressbook.capnp.h"
cannam@133	481 #include <capnp/message.h>
cannam@133	482 #include <capnp/serialize-packed.h>
cannam@133	483 #include <iostream>
cannam@133	484 #include <capnp/schema.h>
cannam@133	485 #include <capnp/dynamic.h>
cannam@133	486
cannam@133	487 using ::capnp::DynamicValue;
cannam@133	488 using ::capnp::DynamicStruct;
cannam@133	489 using ::capnp::DynamicEnum;
cannam@133	490 using ::capnp::DynamicList;
cannam@133	491 using ::capnp::List;
cannam@133	492 using ::capnp::Schema;
cannam@133	493 using ::capnp::StructSchema;
cannam@133	494 using ::capnp::EnumSchema;
cannam@133	495
cannam@133	496 using ::capnp::Void;
cannam@133	497 using ::capnp::Text;
cannam@133	498 using ::capnp::MallocMessageBuilder;
cannam@133	499 using ::capnp::PackedFdMessageReader;
cannam@133	500
cannam@133	501 void dynamicWriteAddressBook(int fd, StructSchema schema) {
cannam@133	502 // Write a message using the dynamic API to set each
cannam@133	503 // field by text name. This isn't something you'd
cannam@133	504 // normally want to do; it's just for illustration.
cannam@133	505
cannam@133	506 MallocMessageBuilder message;
cannam@133	507
cannam@133	508 // Types shown for explanation purposes; normally you'd
cannam@133	509 // use auto.
cannam@133	510 DynamicStruct::Builder addressBook =
cannam@133	511 message.initRoot<DynamicStruct>(schema);
cannam@133	512
cannam@133	513 DynamicList::Builder people =
cannam@133	514 addressBook.init("people", 2).as<DynamicList>();
cannam@133	515
cannam@133	516 DynamicStruct::Builder alice =
cannam@133	517 people[0].as<DynamicStruct>();
cannam@133	518 alice.set("id", 123);
cannam@133	519 alice.set("name", "Alice");
cannam@133	520 alice.set("email", "alice@example.com");
cannam@133	521 auto alicePhones = alice.init("phones", 1).as<DynamicList>();
cannam@133	522 auto phone0 = alicePhones[0].as<DynamicStruct>();
cannam@133	523 phone0.set("number", "555-1212");
cannam@133	524 phone0.set("type", "mobile");
cannam@133	525 alice.get("employment").as<DynamicStruct>()
cannam@133	526 .set("school", "MIT");
cannam@133	527
cannam@133	528 auto bob = people[1].as<DynamicStruct>();
cannam@133	529 bob.set("id", 456);
cannam@133	530 bob.set("name", "Bob");
cannam@133	531 bob.set("email", "bob@example.com");
cannam@133	532
cannam@133	533 // Some magic: We can convert a dynamic sub-value back to
cannam@133	534 // the native type with as<T>()!
cannam@133	535 List<Person::PhoneNumber>::Builder bobPhones =
cannam@133	536 bob.init("phones", 2).as<List<Person::PhoneNumber>>();
cannam@133	537 bobPhones[0].setNumber("555-4567");
cannam@133	538 bobPhones[0].setType(Person::PhoneNumber::Type::HOME);
cannam@133	539 bobPhones[1].setNumber("555-7654");
cannam@133	540 bobPhones[1].setType(Person::PhoneNumber::Type::WORK);
cannam@133	541 bob.get("employment").as<DynamicStruct>()
cannam@133	542 .set("unemployed", ::capnp::VOID);
cannam@133	543
cannam@133	544 writePackedMessageToFd(fd, message);
cannam@133	545 }
cannam@133	546
cannam@133	547 void dynamicPrintValue(DynamicValue::Reader value) {
cannam@133	548 // Print an arbitrary message via the dynamic API by
cannam@133	549 // iterating over the schema. Look at the handling
cannam@133	550 // of STRUCT in particular.
cannam@133	551
cannam@133	552 switch (value.getType()) {
cannam@133	553 case DynamicValue::VOID:
cannam@133	554 std::cout << "";
cannam@133	555 break;
cannam@133	556 case DynamicValue::BOOL:
cannam@133	557 std::cout << (value.as<bool>() ? "true" : "false");
cannam@133	558 break;
cannam@133	559 case DynamicValue::INT:
cannam@133	560 std::cout << value.as<int64_t>();
cannam@133	561 break;
cannam@133	562 case DynamicValue::UINT:
cannam@133	563 std::cout << value.as<uint64_t>();
cannam@133	564 break;
cannam@133	565 case DynamicValue::FLOAT:
cannam@133	566 std::cout << value.as<double>();
cannam@133	567 break;
cannam@133	568 case DynamicValue::TEXT:
cannam@133	569 std::cout << '\"' << value.as<Text>().cStr() << '\"';
cannam@133	570 break;
cannam@133	571 case DynamicValue::LIST: {
cannam@133	572 std::cout << "[";
cannam@133	573 bool first = true;
cannam@133	574 for (auto element: value.as<DynamicList>()) {
cannam@133	575 if (first) {
cannam@133	576 first = false;
cannam@133	577 } else {
cannam@133	578 std::cout << ", ";
cannam@133	579 }
cannam@133	580 dynamicPrintValue(element);
cannam@133	581 }
cannam@133	582 std::cout << "]";
cannam@133	583 break;
cannam@133	584 }
cannam@133	585 case DynamicValue::ENUM: {
cannam@133	586 auto enumValue = value.as<DynamicEnum>();
cannam@133	587 KJ_IF_MAYBE(enumerant, enumValue.getEnumerant()) {
cannam@133	588 std::cout <<
cannam@133	589 enumerant->getProto().getName().cStr();
cannam@133	590 } else {
cannam@133	591 // Unknown enum value; output raw number.
cannam@133	592 std::cout << enumValue.getRaw();
cannam@133	593 }
cannam@133	594 break;
cannam@133	595 }
cannam@133	596 case DynamicValue::STRUCT: {
cannam@133	597 std::cout << "(";
cannam@133	598 auto structValue = value.as<DynamicStruct>();
cannam@133	599 bool first = true;
cannam@133	600 for (auto field: structValue.getSchema().getFields()) {
cannam@133	601 if (!structValue.has(field)) continue;
cannam@133	602 if (first) {
cannam@133	603 first = false;
cannam@133	604 } else {
cannam@133	605 std::cout << ", ";
cannam@133	606 }
cannam@133	607 std::cout << field.getProto().getName().cStr()
cannam@133	608 << " = ";
cannam@133	609 dynamicPrintValue(structValue.get(field));
cannam@133	610 }
cannam@133	611 std::cout << ")";
cannam@133	612 break;
cannam@133	613 }
cannam@133	614 default:
cannam@133	615 // There are other types, we aren't handling them.
cannam@133	616 std::cout << "?";
cannam@133	617 break;
cannam@133	618 }
cannam@133	619 }
cannam@133	620
cannam@133	621 void dynamicPrintMessage(int fd, StructSchema schema) {
cannam@133	622 PackedFdMessageReader message(fd);
cannam@133	623 dynamicPrintValue(message.getRoot<DynamicStruct>(schema));
cannam@133	624 std::cout << std::endl;
cannam@133	625 }
cannam@133	626 {% endhighlight %}
cannam@133	627
cannam@133	628 Notes about the dynamic API:
cannam@133	629
cannam@133	630 * You can implicitly cast any compiled Cap'n Proto struct reader/builder type directly to
cannam@133	631 `DynamicStruct::Reader`/`DynamicStruct::Builder`. Similarly with `List<T>` and `DynamicList`,
cannam@133	632 and even enum types and `DynamicEnum`. Finally, all valid Cap'n Proto field types may be
cannam@133	633 implicitly converted to `DynamicValue`.
cannam@133	634
cannam@133	635 * You can load schemas dynamically at runtime using `SchemaLoader` (`capnp/schema-loader.h`) and
cannam@133	636 use the Dynamic API to manipulate objects of these types. `MessageBuilder` and `MessageReader`
cannam@133	637 have methods for accessing the message root using a dynamic schema.
cannam@133	638
cannam@133	639 * While `SchemaLoader` loads binary schemas, you can also parse directly from text using
cannam@133	640 `SchemaParser` (`capnp/schema-parser.h`). However, this requires linking against `libcapnpc`
cannam@133	641 (in addition to `libcapnp` and `libkj`) -- this code is bulky and not terribly efficient. If
cannam@133	642 you can arrange to use only binary schemas at runtime, you'll be better off.
cannam@133	643
cannam@133	644 * Unlike with Protobufs, there is no "global registry" of compiled-in types. To get the schema
cannam@133	645 for a compiled-in type, use `capnp::Schema::from<MyType>()`.
cannam@133	646
cannam@133	647 * Unlike with Protobufs, the overhead of supporting reflection is small. Generated `.capnp.c++`
cannam@133	648 files contain only some embedded const data structures describing the schema, no code at all,
cannam@133	649 and the runtime library support code is relatively small. Moreover, if you do not use the
cannam@133	650 dynamic API or the schema API, you do not even need to link their implementations into your
cannam@133	651 executable.
cannam@133	652
cannam@133	653 * The dynamic API performs type checks at runtime. In case of error, it will throw an exception.
cannam@133	654 If you compile with `-fno-exceptions`, it will crash instead. Correct usage of the API should
cannam@133	655 never throw, but bugs happen. Enabling and catching exceptions will make your code more robust.
cannam@133	656
cannam@133	657 * Loading user-provided schemas has security implications: it greatly increases the attack
cannam@133	658 surface of the Cap'n Proto library. In particular, it is easy for an attacker to trigger
cannam@133	659 exceptions. To protect yourself, you are strongly advised to enable exceptions and catch them.
cannam@133	660
cannam@133	661 ## Orphans
cannam@133	662
cannam@133	663 An "orphan" is a Cap'n Proto object that is disconnected from the message structure. That is,
cannam@133	664 it is not the root of a message, and there is no other Cap'n Proto object holding a pointer to it.
cannam@133	665 Thus, it has no parents. Orphans are an advanced feature that can help avoid copies and make it
cannam@133	666 easier to use Cap'n Proto objects as part of your application's internal state. Typical
cannam@133	667 applications probably won't use orphans.
cannam@133	668
cannam@133	669 The class `capnp::Orphan<T>` (defined in `<capnp/orphan.h>`) represents a pointer to an orphaned
cannam@133	670 object of type `T`. `T` can be any struct type, `List<T>`, `Text`, or `Data`. E.g.
cannam@133	671 `capnp::Orphan<Person>` would be an orphaned `Person` structure. `Orphan<T>` is a move-only class,
cannam@133	672 similar to `std::unique_ptr<T>`. This prevents two different objects from adopting the same
cannam@133	673 orphan, which would result in an invalid message.
cannam@133	674
cannam@133	675 An orphan can be "adopted" by another object to link it into the message structure. Conversely,
cannam@133	676 an object can "disown" one of its pointers, causing the pointed-to object to become an orphan.
cannam@133	677 Every pointer-typed field `foo` provides builder methods `adoptFoo()` and `disownFoo()` for these
cannam@133	678 purposes. Again, these methods use C++11 move semantics. To use them, you will need to be
cannam@133	679 familiar with `std::move()` (or the equivalent but shorter-named `kj::mv()`).
cannam@133	680
cannam@133	681 Even though an orphan is unlinked from the message tree, it still resides inside memory allocated
cannam@133	682 for a particular message (i.e. a particular `MessageBuilder`). An orphan can only be adopted by
cannam@133	683 objects that live in the same message. To move objects between messages, you must perform a copy.
cannam@133	684 If the message is serialized while an `Orphan<T>` living within it still exists, the orphan's
cannam@133	685 content will be part of the serialized message, but the only way the receiver could find it is by
cannam@133	686 investigating the raw message; the Cap'n Proto API provides no way to detect or read it.
cannam@133	687
cannam@133	688 To construct an orphan from scratch (without having some other object disown it), you need an
cannam@133	689 `Orphanage`, which is essentially an orphan factory associated with some message. You can get one
cannam@133	690 by calling the `MessageBuilder`'s `getOrphanage()` method, or by calling the static method
cannam@133	691 `Orphanage::getForMessageContaining(builder)` and passing it any struct or list builder.
cannam@133	692
cannam@133	693 Note that when an `Orphan<T>` goes out-of-scope without being adopted, the underlying memory that
cannam@133	694 it occupied is overwritten with zeros. If you use packed serialization, these zeros will take very
cannam@133	695 little bandwidth on the wire, but will still waste memory on the sending and receiving ends.
cannam@133	696 Generally, you should avoid allocating message objects that won't be used, or if you cannot avoid
cannam@133	697 it, arrange to copy the entire message over to a new `MessageBuilder` before serializing, since
cannam@133	698 only the reachable objects will be copied.
cannam@133	699
cannam@133	700 ## Reference
cannam@133	701
cannam@133	702 The runtime library contains lots of useful features not described on this page. For now, the
cannam@133	703 best reference is the header files. See:
cannam@133	704
cannam@133	705 capnp/list.h
cannam@133	706 capnp/blob.h
cannam@133	707 capnp/message.h
cannam@133	708 capnp/serialize.h
cannam@133	709 capnp/serialize-packed.h
cannam@133	710 capnp/schema.h
cannam@133	711 capnp/schema-loader.h
cannam@133	712 capnp/dynamic.h
cannam@133	713
cannam@133	714 ## Tips and Best Practices
cannam@133	715
cannam@133	716 Here are some tips for using the C++ Cap'n Proto runtime most effectively:
cannam@133	717
cannam@133	718 * Accessor methods for primitive (non-pointer) fields are fast and inline. They should be just
cannam@133	719 as fast as accessing a struct field through a pointer.
cannam@133	720
cannam@133	721 * Accessor methods for pointer fields, on the other hand, are not inline, as they need to validate
cannam@133	722 the pointer. If you intend to access the same pointer multiple times, it is a good idea to
cannam@133	723 save the value to a local variable to avoid repeating this work. This is generally not a
cannam@133	724 problem given C++11's `auto`.
cannam@133	725
cannam@133	726 Example:
cannam@133	727
cannam@133	728 // BAD
cannam@133	729 frob(foo.getBar().getBaz(),
cannam@133	730 foo.getBar().getQux(),
cannam@133	731 foo.getBar().getCorge());
cannam@133	732
cannam@133	733 // GOOD
cannam@133	734 auto bar = foo.getBar();
cannam@133	735 frob(bar.getBaz(), bar.getQux(), bar.getCorge());
cannam@133	736
cannam@133	737 It is especially important to use this style when reading messages, for another reason: as
cannam@133	738 described under the "security tips" section, below, every time you `get` a pointer, Cap'n Proto
cannam@133	739 increments a counter by the size of the target object. If that counter hits a pre-defined limit,
cannam@133	740 an exception is thrown (or a default value is returned, if exceptions are disabled), to prevent
cannam@133	741 a malicious client from sending your server into an infinite loop with a specially-crafted
cannam@133	742 message. If you repeatedly `get` the same object, you are repeatedly counting the same bytes,
cannam@133	743 and so you may hit the limit prematurely. (Since Cap'n Proto readers are backed directly by
cannam@133	744 the underlying message buffer and do not have anywhere else to store per-object information, it
cannam@133	745 is impossible to remember whether you've seen a particular object already.)
cannam@133	746
cannam@133	747 * Internally, all pointer fields start out "null", even if they have default values. When you have
cannam@133	748 a pointer field `foo` and you call `getFoo()` on the containing struct's `Reader`, if the field
cannam@133	749 is "null", you will receive a reader for that field's default value. This reader is backed by
cannam@133	750 read-only memory; nothing is allocated. However, when you call `get` on a _builder_, and the
cannam@133	751 field is null, then the implementation must make a _copy_ of the default value to return to you.
cannam@133	752 Thus, you've caused the field to become non-null, just by "reading" it. On the other hand, if
cannam@133	753 you call `init` on that field, you are explicitly replacing whatever value is already there
cannam@133	754 (null or not) with a newly-allocated instance, and that newly-allocated instance is _not_ a
cannam@133	755 copy of the field's default value, but just a completely-uninitialized instance of the
cannam@133	756 appropriate type.
cannam@133	757
cannam@133	758 * It is possible to receive a struct value constructed from a newer version of the protocol than
cannam@133	759 the one your binary was built with, and that struct might have extra fields that you don't know
cannam@133	760 about. The Cap'n Proto implementation tries to avoid discarding this extra data. If you copy
cannam@133	761 the struct from one message to another (e.g. by calling a set() method on a parent object), the
cannam@133	762 extra fields will be preserved. This makes it possible to build proxies that receive messages
cannam@133	763 and forward them on without having to rebuild the proxy every time a new field is added. You
cannam@133	764 must be careful, however: in some cases, it's not possible to retain the extra fields, because
cannam@133	765 they need to be copied into a space that is allocated before the expected content is known.
cannam@133	766 In particular, lists of structs are represented as a flat array, not as an array of pointers.
cannam@133	767 Therefore, all memory for all structs in the list must be allocated upfront. Hence, copying
cannam@133	768 a struct value from another message into an element of a list will truncate the value. Because
cannam@133	769 of this, the setter method for struct lists is called `setWithCaveats()` rather than just `set()`.
cannam@133	770
cannam@133	771 * Messages are built in "arena" or "region" style: each object is allocated sequentially in
cannam@133	772 memory, until there is no more room in the segment, in which case a new segment is allocated,
cannam@133	773 and objects continue to be allocated sequentially in that segment. This design is what makes
cannam@133	774 Cap'n Proto possible at all, and it is very fast compared to other allocation strategies.
cannam@133	775 However, it has the disadvantage that if you allocate an object and then discard it, that memory
cannam@133	776 is lost. In fact, the empty space will still become part of the serialized message, even though
cannam@133	777 it is unreachable. The implementation will try to zero it out, so at least it should pack well,
cannam@133	778 but it's still better to avoid this situation. Some ways that this can happen include:
cannam@133	779 * If you `init` a field that is already initialized, the previous value is discarded.
cannam@133	780 * If you create an orphan that is never adopted into the message tree.
cannam@133	781 * If you use `adoptWithCaveats` to adopt an orphaned struct into a struct list, then a shallow
cannam@133	782 copy is necessary, since the struct list requires that its elements are sequential in memory.
cannam@133	783 The previous copy of the struct is discarded (although child objects are transferred properly).
cannam@133	784 * If you copy a struct value from another message using a `set` method, the copy will have the
cannam@133	785 same size as the original. However, the original could have been built with an older version
cannam@133	786 of the protocol which lacked some fields compared to the version your program was built with.
cannam@133	787 If you subsequently `get` that struct, the implementation will be forced to allocate a new
cannam@133	788 (shallow) copy which is large enough to hold all known fields, and the old copy will be
cannam@133	789 discarded. Child objects will be transferred over without being copied -- though they might
cannam@133	790 suffer from the same problem if you `get` them later on.
cannam@133	791 Sometimes, avoiding these problems is too inconvenient. Fortunately, it's also possible to
cannam@133	792 clean up the mess after-the-fact: if you copy the whole message tree into a fresh
cannam@133	793 `MessageBuilder`, only the reachable objects will be copied, leaving out all of the unreachable
cannam@133	794 dead space.
cannam@133	795
cannam@133	796 In the future, Cap'n Proto may be improved such that it can re-use dead space in a message.
cannam@133	797 However, this will only improve things, not fix them entirely: fragementation could still leave
cannam@133	798 dead space.
cannam@133	799
cannam@133	800 ### Build Tips
cannam@133	801
cannam@133	802 * If you are worried about the binary footprint of the Cap'n Proto library, consider statically
cannam@133	803 linking with the `--gc-sections` linker flag. This will allow the linker to drop pieces of the
cannam@133	804 library that you do not actually use. For example, many users do not use the dynamic schema and
cannam@133	805 reflection APIs, which contribute a large fraction of the Cap'n Proto library's overall
cannam@133	806 footprint. Keep in mind that if you ever stringify a Cap'n Proto type, the stringification code
cannam@133	807 depends on the dynamic API; consider only using stringification in debug builds.
cannam@133	808
cannam@133	809 If you are dynamically linking against the system's shared copy of `libcapnp`, don't worry about
cannam@133	810 its binary size. Remember that only the code which you actually use will be paged into RAM, and
cannam@133	811 those pages are shared with other applications on the system.
cannam@133	812
cannam@133	813 Also remember to strip your binary. In particular, `libcapnpc` (the schema parser) has
cannam@133	814 excessively large symbol names caused by its use of template-based parser combinators. Stripping
cannam@133	815 the binary greatly reduces its size.
cannam@133	816
cannam@133	817 * The Cap'n Proto library has lots of debug-only asserts that are removed if you `#define NDEBUG`,
cannam@133	818 including in headers. If you care at all about performance, you should compile your production
cannam@133	819 binaries with the `-DNDEBUG` compiler flag. In fact, if Cap'n Proto detects that you have
cannam@133	820 optimization enabled but have not defined `NDEBUG`, it will define it for you (with a warning),
cannam@133	821 unless you define `DEBUG` or `KJ_DEBUG` to explicitly request debugging.
cannam@133	822
cannam@133	823 ### Security Tips
cannam@133	824
cannam@133	825 Cap'n Proto has not yet undergone security review. It most likely has some vulnerabilities. You
cannam@133	826 should not attempt to decode Cap'n Proto messages from sources you don't trust at this time.
cannam@133	827
cannam@133	828 However, assuming the Cap'n Proto implementation hardens up eventually, then the following security
cannam@133	829 tips will apply.
cannam@133	830
cannam@133	831 * It is highly recommended that you enable exceptions. When compiled with `-fno-exceptions`,
cannam@133	832 Cap'n Proto categorizes exceptions into "fatal" and "recoverable" varieties. Fatal exceptions
cannam@133	833 cause the server to crash, while recoverable exceptions are handled by logging an error and
cannam@133	834 returning a "safe" garbage value. Fatal is preferred in cases where it's unclear what kind of
cannam@133	835 garbage value would constitute "safe". The more of the library you use, the higher the chance
cannam@133	836 that you will leave yourself open to the possibility that an attacker could trigger a fatal
cannam@133	837 exception somewhere. If you enable exceptions, then you can catch the exception instead of
cannam@133	838 crashing, and return an error just to the attacker rather than to everyone using your server.
cannam@133	839
cannam@133	840 Basic parsing of Cap'n Proto messages shouldn't ever trigger fatal exceptions (assuming the
cannam@133	841 implementation is not buggy). However, the dynamic API -- especially if you are loading schemas
cannam@133	842 controlled by the attacker -- is much more exception-happy. If you cannot use exceptions, then
cannam@133	843 you are advised to avoid the dynamic API when dealing with untrusted data.
cannam@133	844
cannam@133	845 * If you need to process schemas from untrusted sources, take them in binary format, not text.
cannam@133	846 The text parser is a much larger attack surface and not designed to be secure. For instance,
cannam@133	847 as of this writing, it is trivial to deadlock the parser by simply writing a constant whose value
cannam@133	848 depends on itself.
cannam@133	849
cannam@133	850 * Cap'n Proto automatically applies two artificial limits on messages for security reasons:
cannam@133	851 a limit on nesting dept, and a limit on total bytes traversed.
cannam@133	852
cannam@133	853 * The nesting depth limit is designed to prevent stack overflow when handling a deeply-nested
cannam@133	854 recursive type, and defaults to 64. If your types aren't recursive, it is highly unlikely
cannam@133	855 that you would ever hit this limit, and even if they are recursive, it's still unlikely.
cannam@133	856
cannam@133	857 * The traversal limit is designed to defend against maliciously-crafted messages which use
cannam@133	858 pointer cycles or overlapping objects to make a message appear much larger than it looks off
cannam@133	859 the wire. While cycles and overlapping objects are illegal, they are hard to detect reliably.
cannam@133	860 Instead, Cap'n Proto places a limit on how many bytes worth of objects you can _dereference_
cannam@133	861 before it throws an exception. This limit is assessed every time you follow a pointer. By
cannam@133	862 default, the limit is 64MiB (this may change in the future). `StreamFdMessageReader` will
cannam@133	863 actually reject upfront any message which is larger than the traversal limit, even before you
cannam@133	864 start reading it.
cannam@133	865
cannam@133	866 If you need to write your code in such a way that you might frequently re-read the same
cannam@133	867 pointers, instead of increasing the traversal limit to the point where it is no longer useful,
cannam@133	868 consider simply copying the message into a new `MallocMessageBuilder` before starting. Then,
cannam@133	869 the traversal limit will be enforced only during the copy. There is no traversal limit on
cannam@133	870 objects once they live in a `MessageBuilder`, even if you use `.asReader()` to convert a
cannam@133	871 particular object's builder to the corresponding reader type.
cannam@133	872
cannam@133	873 Both limits may be increased using `capnp::ReaderOptions`, defined in `capnp/message.h`.
cannam@133	874
cannam@133	875 * Remember that enums on the wire may have a numeric value that does not match any value defined
cannam@133	876 in the schema. Your `switch()` statements must always have a safe default case.
cannam@133	877
cannam@133	878 ## Lessons Learned from Protocol Buffers
cannam@133	879
cannam@133	880 The author of Cap'n Proto's C++ implementation also wrote (in the past) verison 2 of Google's
cannam@133	881 Protocol Buffers. As a result, Cap'n Proto's implementation benefits from a number of lessons
cannam@133	882 learned the hard way:
cannam@133	883
cannam@133	884 * Protobuf generated code is enormous due to the parsing and serializing code generated for every
cannam@133	885 class. This actually poses a significant problem in practice -- there exist server binaries
cannam@133	886 containing literally hundreds of megabytes of compiled protobuf code. Cap'n Proto generated code,
cannam@133	887 on the other hand, is almost entirely inlined accessors. The only things that go into `.capnp.o`
cannam@133	888 files are default values for pointer fields (if needed, which is rare) and the encoded schema
cannam@133	889 (just the raw bytes of a Cap'n-Proto-encoded schema structure). The latter could even be removed
cannam@133	890 if you don't use dynamic reflection.
cannam@133	891
cannam@133	892 * The C++ Protobuf implementation used lots of dynamic initialization code (that runs before
cannam@133	893 `main()`) to do things like register types in global tables. This proved problematic for
cannam@133	894 programs which linked in lots of protocols but needed to start up quickly. Cap'n Proto does not
cannam@133	895 use any dynamic initializers anywhere, period.
cannam@133	896
cannam@133	897 * The C++ Protobuf implementation makes heavy use of STL in its interface and implementation.
cannam@133	898 The proliferation of template instantiations gives the Protobuf runtime library a large footprint,
cannam@133	899 and using STL in the interface can lead to weird ABI problems and slow compiles. Cap'n Proto
cannam@133	900 does not use any STL containers in its interface and makes sparing use in its implementation.
cannam@133	901 As a result, the Cap'n Proto runtime library is smaller, and code that uses it compiles quickly.
cannam@133	902
cannam@133	903 * The in-memory representation of messages in Protobuf-C++ involves many heap objects. Each
cannam@133	904 message (struct) is an object, each non-primitive repeated field allocates an array of pointers
cannam@133	905 to more objects, and each string may actually add two heap objects. Cap'n Proto by its nature
cannam@133	906 uses arena allocation, so the entire message is allocated in a few contiguous segments. This
cannam@133	907 means Cap'n Proto spends very little time allocating memory, stores messages more compactly, and
cannam@133	908 avoids memory fragmentation.
cannam@133	909
cannam@133	910 * Related to the last point, Protobuf-C++ relies heavily on object reuse for performance.
cannam@133	911 Building or parsing into a newly-allocated Protobuf object is significantly slower than using
cannam@133	912 an existing one. However, the memory usage of a Protobuf object will tend to grow the more times
cannam@133	913 it is reused, particularly if it is used to parse messages of many different "shapes", so the
cannam@133	914 objects need to be deleted and re-allocated from time to time. All this makes tuning Protobufs
cannam@133	915 fairly tedious. In contrast, enabling memory reuse with Cap'n Proto is as simple as providing
cannam@133	916 a byte buffer to use as scratch space when you build or read in a message. Provide enough scratch
cannam@133	917 space to hold the entire message and Cap'n Proto won't allocate any memory. Or don't -- since
cannam@133	918 Cap'n Proto doesn't do much allocation in the first place, the benefits of scratch space are
cannam@133	919 small.

Mercurial > hg > sv-dependency-builds

annotate src/capnproto-git-20161025/doc/cxx.md @ 140:59a8758c56b1