Mercurial > hg > sv-dependency-builds
diff osx/include/kj/compat/http.h @ 147:45360b968bf4
Cap'n Proto v0.6 + build for OSX
author | Chris Cannam <cannam@all-day-breakfast.com> |
---|---|
date | Mon, 22 May 2017 10:01:37 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/osx/include/kj/compat/http.h Mon May 22 10:01:37 2017 +0100 @@ -0,0 +1,636 @@ +// Copyright (c) 2017 Sandstorm Development Group, Inc. and contributors +// Licensed under the MIT License: +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef KJ_COMPAT_HTTP_H_ +#define KJ_COMPAT_HTTP_H_ +// The KJ HTTP client/server library. +// +// This is a simple library which can be used to implement an HTTP client or server. Properties +// of this library include: +// - Uses KJ async framework. +// - Agnostic to transport layer -- you can provide your own. +// - Header parsing is zero-copy -- it results in strings that point directly into the buffer +// received off the wire. +// - Application code which reads and writes headers refers to headers by symbolic names, not by +// string literals, with lookups being array-index-based, not map-based. To make this possible, +// the application announces what headers it cares about in advance, in order to assign numeric +// values to them. +// - Methods are identified by an enum. + +#include <kj/string.h> +#include <kj/vector.h> +#include <kj/memory.h> +#include <kj/one-of.h> +#include <kj/async-io.h> + +namespace kj { + +#define KJ_HTTP_FOR_EACH_METHOD(MACRO) \ + MACRO(GET) \ + MACRO(HEAD) \ + MACRO(POST) \ + MACRO(PUT) \ + MACRO(DELETE) \ + MACRO(PATCH) \ + MACRO(PURGE) \ + MACRO(OPTIONS) \ + MACRO(TRACE) \ + /* standard methods */ \ + /* */ \ + /* (CONNECT is intentionally omitted since it is handled specially in HttpHandler) */ \ + \ + MACRO(COPY) \ + MACRO(LOCK) \ + MACRO(MKCOL) \ + MACRO(MOVE) \ + MACRO(PROPFIND) \ + MACRO(PROPPATCH) \ + MACRO(SEARCH) \ + MACRO(UNLOCK) \ + /* WebDAV */ \ + \ + MACRO(REPORT) \ + MACRO(MKACTIVITY) \ + MACRO(CHECKOUT) \ + MACRO(MERGE) \ + /* Subversion */ \ + \ + MACRO(MSEARCH) \ + MACRO(NOTIFY) \ + MACRO(SUBSCRIBE) \ + MACRO(UNSUBSCRIBE) + /* UPnP */ + +#define KJ_HTTP_FOR_EACH_CONNECTION_HEADER(MACRO) \ + MACRO(connection, "Connection") \ + MACRO(contentLength, "Content-Length") \ + MACRO(keepAlive, "Keep-Alive") \ + MACRO(te, "TE") \ + MACRO(trailer, "Trailer") \ + MACRO(transferEncoding, "Transfer-Encoding") \ + MACRO(upgrade, "Upgrade") + +enum class HttpMethod { + // Enum of known HTTP methods. + // + // We use an enum rather than a string to allow for faster parsing and switching and to reduce + // ambiguity. + +#define DECLARE_METHOD(id) id, +KJ_HTTP_FOR_EACH_METHOD(DECLARE_METHOD) +#undef DECALRE_METHOD +}; + +kj::StringPtr KJ_STRINGIFY(HttpMethod method); +kj::Maybe<HttpMethod> tryParseHttpMethod(kj::StringPtr name); + +class HttpHeaderTable; + +class HttpHeaderId { + // Identifies an HTTP header by numeric ID that indexes into an HttpHeaderTable. + // + // The KJ HTTP API prefers that headers be identified by these IDs for a few reasons: + // - Integer lookups are much more efficient than string lookups. + // - Case-insensitivity is awkward to deal with when const strings are being passed to the lookup + // method. + // - Writing out strings less often means fewer typos. + // + // See HttpHeaderTable for usage hints. + +public: + HttpHeaderId() = default; + + inline bool operator==(const HttpHeaderId& other) const { return id == other.id; } + inline bool operator!=(const HttpHeaderId& other) const { return id != other.id; } + inline bool operator< (const HttpHeaderId& other) const { return id < other.id; } + inline bool operator> (const HttpHeaderId& other) const { return id > other.id; } + inline bool operator<=(const HttpHeaderId& other) const { return id <= other.id; } + inline bool operator>=(const HttpHeaderId& other) const { return id >= other.id; } + + inline size_t hashCode() const { return id; } + + kj::StringPtr toString() const; + + void requireFrom(HttpHeaderTable& table) const; + // In debug mode, throws an exception if the HttpHeaderId is not from the given table. + // + // In opt mode, no-op. + +#define KJ_HTTP_FOR_EACH_BUILTIN_HEADER(MACRO) \ + MACRO(HOST, "Host") \ + MACRO(DATE, "Date") \ + MACRO(LOCATION, "Location") \ + MACRO(CONTENT_TYPE, "Content-Type") + // For convenience, these very-common headers are valid for all HttpHeaderTables. You can refer + // to them like: + // + // HttpHeaderId::HOST + // + // TODO(0.7): Fill this out with more common headers. + +#define DECLARE_HEADER(id, name) \ + static const HttpHeaderId id; + // Declare a constant for each builtin header, e.g.: HttpHeaderId::CONNECTION + + KJ_HTTP_FOR_EACH_BUILTIN_HEADER(DECLARE_HEADER); +#undef DECLARE_HEADER + +private: + HttpHeaderTable* table; + uint id; + + inline explicit constexpr HttpHeaderId(HttpHeaderTable* table, uint id): table(table), id(id) {} + friend class HttpHeaderTable; + friend class HttpHeaders; +}; + +class HttpHeaderTable { + // Construct an HttpHeaderTable to declare which headers you'll be interested in later on, and + // to manufacture IDs for them. + // + // Example: + // + // // Build a header table with the headers we are interested in. + // kj::HttpHeaderTable::Builder builder; + // const HttpHeaderId accept = builder.add("Accept"); + // const HttpHeaderId contentType = builder.add("Content-Type"); + // kj::HttpHeaderTable table(kj::mv(builder)); + // + // // Create an HTTP client. + // auto client = kj::newHttpClient(table, network); + // + // // Get http://example.com. + // HttpHeaders headers(table); + // headers.set(accept, "text/html"); + // auto response = client->send(kj::HttpMethod::GET, "http://example.com", headers) + // .wait(waitScope); + // auto msg = kj::str("Response content type: ", response.headers.get(contentType)); + + struct IdsByNameMap; + +public: + HttpHeaderTable(); + // Constructs a table that only contains the builtin headers. + + class Builder { + public: + Builder(); + HttpHeaderId add(kj::StringPtr name); + Own<HttpHeaderTable> build(); + + HttpHeaderTable& getFutureTable(); + // Get the still-unbuilt header table. You cannot actually use it until build() has been + // called. + // + // This method exists to help when building a shared header table -- the Builder may be passed + // to several components, each of which will register the headers they need and get a reference + // to the future table. + + private: + kj::Own<HttpHeaderTable> table; + }; + + KJ_DISALLOW_COPY(HttpHeaderTable); // Can't copy because HttpHeaderId points to the table. + ~HttpHeaderTable() noexcept(false); + + uint idCount(); + // Return the number of IDs in the table. + + kj::Maybe<HttpHeaderId> stringToId(kj::StringPtr name); + // Try to find an ID for the given name. The matching is case-insensitive, per the HTTP spec. + // + // Note: if `name` contains characters that aren't allowed in HTTP header names, this may return + // a bogus value rather than null, due to optimizations used in case-insensitive matching. + + kj::StringPtr idToString(HttpHeaderId id); + // Get the canonical string name for the given ID. + +private: + kj::Vector<kj::StringPtr> namesById; + kj::Own<IdsByNameMap> idsByName; +}; + +class HttpHeaders { + // Represents a set of HTTP headers. + // + // This class guards against basic HTTP header injection attacks: Trying to set a header name or + // value containing a newline, carriage return, or other invalid character will throw an + // exception. + +public: + explicit HttpHeaders(HttpHeaderTable& table); + + KJ_DISALLOW_COPY(HttpHeaders); + HttpHeaders(HttpHeaders&&) = default; + HttpHeaders& operator=(HttpHeaders&&) = default; + + void clear(); + // Clears all contents, as if the object was freshly-allocated. However, calling this rather + // than actually re-allocating the object may avoid re-allocation of internal objects. + + HttpHeaders clone() const; + // Creates a deep clone of the HttpHeaders. The returned object owns all strings it references. + + HttpHeaders cloneShallow() const; + // Creates a shallow clone of the HttpHeaders. The returned object references the same strings + // as the original, owning none of them. + + kj::Maybe<kj::StringPtr> get(HttpHeaderId id) const; + // Read a header. + + template <typename Func> + void forEach(Func&& func) const; + // Calls `func(name, value)` for each header in the set -- including headers that aren't mapped + // to IDs in the header table. Both inputs are of type kj::StringPtr. + + void set(HttpHeaderId id, kj::StringPtr value); + void set(HttpHeaderId id, kj::String&& value); + // Sets a header value, overwriting the existing value. + // + // The String&& version is equivalent to calling the other version followed by takeOwnership(). + // + // WARNING: It is the caller's responsibility to ensure that `value` remains valid until the + // HttpHeaders object is destroyed. This allows string literals to be passed without making a + // copy, but complicates the use of dynamic values. Hint: Consider using `takeOwnership()`. + + void add(kj::StringPtr name, kj::StringPtr value); + void add(kj::StringPtr name, kj::String&& value); + void add(kj::String&& name, kj::String&& value); + // Append a header. `name` will be looked up in the header table, but if it's not mapped, the + // header will be added to the list of unmapped headers. + // + // The String&& versions are equivalent to calling the other version followed by takeOwnership(). + // + // WARNING: It is the caller's responsibility to ensure that `name` and `value` remain valid + // until the HttpHeaders object is destroyed. This allows string literals to be passed without + // making a copy, but complicates the use of dynamic values. Hint: Consider using + // `takeOwnership()`. + + void unset(HttpHeaderId id); + // Removes a header. + // + // It's not possible to remove a header by string name because non-indexed headers would take + // O(n) time to remove. Instead, construct a new HttpHeaders object and copy contents. + + void takeOwnership(kj::String&& string); + void takeOwnership(kj::Array<char>&& chars); + void takeOwnership(HttpHeaders&& otherHeaders); + // Takes overship of a string so that it lives until the HttpHeaders object is destroyed. Useful + // when you've passed a dynamic value to set() or add() or parse*(). + + struct ConnectionHeaders { + // These headers govern details of the specific HTTP connection or framing of the content. + // Hence, they are managed internally within the HTTP library, and never appear in an + // HttpHeaders structure. + +#define DECLARE_HEADER(id, name) \ + kj::StringPtr id; + KJ_HTTP_FOR_EACH_CONNECTION_HEADER(DECLARE_HEADER) +#undef DECLARE_HEADER + }; + + struct Request { + HttpMethod method; + kj::StringPtr url; + ConnectionHeaders connectionHeaders; + }; + struct Response { + uint statusCode; + kj::StringPtr statusText; + ConnectionHeaders connectionHeaders; + }; + + kj::Maybe<Request> tryParseRequest(kj::ArrayPtr<char> content); + kj::Maybe<Response> tryParseResponse(kj::ArrayPtr<char> content); + // Parse an HTTP header blob and add all the headers to this object. + // + // `content` should be all text from the start of the request to the first occurrance of two + // newlines in a row -- including the first of these two newlines, but excluding the second. + // + // The parse is performed with zero copies: The callee clobbers `content` with '\0' characters + // to split it into a bunch of shorter strings. The caller must keep `content` valid until the + // `HttpHeaders` is destroyed, or pass it to `takeOwnership()`. + + kj::String serializeRequest(HttpMethod method, kj::StringPtr url, + const ConnectionHeaders& connectionHeaders) const; + kj::String serializeResponse(uint statusCode, kj::StringPtr statusText, + const ConnectionHeaders& connectionHeaders) const; + // Serialize the headers as a complete request or response blob. The blob uses '\r\n' newlines + // and includes the double-newline to indicate the end of the headers. + + kj::String toString() const; + +private: + HttpHeaderTable* table; + + kj::Array<kj::StringPtr> indexedHeaders; + // Size is always table->idCount(). + + struct Header { + kj::StringPtr name; + kj::StringPtr value; + }; + kj::Vector<Header> unindexedHeaders; + + kj::Vector<kj::Array<char>> ownedStrings; + + kj::Maybe<uint> addNoCheck(kj::StringPtr name, kj::StringPtr value); + + kj::StringPtr cloneToOwn(kj::StringPtr str); + + kj::String serialize(kj::ArrayPtr<const char> word1, + kj::ArrayPtr<const char> word2, + kj::ArrayPtr<const char> word3, + const ConnectionHeaders& connectionHeaders) const; + + bool parseHeaders(char* ptr, char* end, ConnectionHeaders& connectionHeaders); + + // TODO(perf): Arguably we should store a map, but header sets are never very long + // TODO(perf): We could optimize for common headers by storing them directly as fields. We could + // also add direct accessors for those headers. +}; + +class WebSocket { +public: + WebSocket(kj::Own<kj::AsyncIoStream> stream); + // Create a WebSocket wrapping the given I/O stream. + + kj::Promise<void> send(kj::ArrayPtr<const byte> message); + kj::Promise<void> send(kj::ArrayPtr<const char> message); +}; + +class HttpClient { + // Interface to the client end of an HTTP connection. + // + // There are two kinds of clients: + // * Host clients are used when talking to a specific host. The `url` specified in a request + // is actually just a path. (A `Host` header is still required in all requests.) + // * Proxy clients are used when the target could be any arbitrary host on the internet. + // The `url` specified in a request is a full URL including protocol and hostname. + +public: + struct Response { + uint statusCode; + kj::StringPtr statusText; + const HttpHeaders* headers; + kj::Own<kj::AsyncInputStream> body; + // `statusText` and `headers` remain valid until `body` is dropped. + }; + + struct Request { + kj::Own<kj::AsyncOutputStream> body; + // Write the request entity body to this stream, then drop it when done. + // + // May be null for GET and HEAD requests (which have no body) and requests that have + // Content-Length: 0. + + kj::Promise<Response> response; + // Promise for the eventual respnose. + }; + + virtual Request request(HttpMethod method, kj::StringPtr url, const HttpHeaders& headers, + kj::Maybe<uint64_t> expectedBodySize = nullptr) = 0; + // Perform an HTTP request. + // + // `url` may be a full URL (with protocol and host) or it may be only the path part of the URL, + // depending on whether the client is a proxy client or a host client. + // + // `url` and `headers` need only remain valid until `request()` returns (they can be + // stack-allocated). + // + // `expectedBodySize`, if provided, must be exactly the number of bytes that will be written to + // the body. This will trigger use of the `Content-Length` connection header. Otherwise, + // `Transfer-Encoding: chunked` will be used. + + struct WebSocketResponse { + uint statusCode; + kj::StringPtr statusText; + const HttpHeaders* headers; + kj::OneOf<kj::Own<kj::AsyncInputStream>, kj::Own<WebSocket>> upstreamOrBody; + // `statusText` and `headers` remain valid until `upstreamOrBody` is dropped. + }; + virtual kj::Promise<WebSocketResponse> openWebSocket( + kj::StringPtr url, const HttpHeaders& headers, kj::Own<WebSocket> downstream); + // Tries to open a WebSocket. Default implementation calls send() and never returns a WebSocket. + // + // `url` and `headers` are invalidated when the returned promise resolves. + + virtual kj::Promise<kj::Own<kj::AsyncIoStream>> connect(kj::String host); + // Handles CONNECT requests. Only relevant for proxy clients. Default implementation throws + // UNIMPLEMENTED. +}; + +class HttpService { + // Interface which HTTP services should implement. + // + // This interface is functionally equivalent to HttpClient, but is intended for applications to + // implement rather than call. The ergonomics and performance of the method signatures are + // optimized for the serving end. + // + // As with clients, there are two kinds of services: + // * Host services are used when talking to a specific host. The `url` specified in a request + // is actually just a path. (A `Host` header is still required in all requests, and the service + // may in fact serve multiple origins via this header.) + // * Proxy services are used when the target could be any arbitrary host on the internet, i.e. to + // implement an HTTP proxy. The `url` specified in a request is a full URL including protocol + // and hostname. + +public: + class Response { + public: + virtual kj::Own<kj::AsyncOutputStream> send( + uint statusCode, kj::StringPtr statusText, const HttpHeaders& headers, + kj::Maybe<uint64_t> expectedBodySize = nullptr) = 0; + // Begin the response. + // + // `statusText` and `headers` need only remain valid until send() returns (they can be + // stack-allocated). + }; + + virtual kj::Promise<void> request( + HttpMethod method, kj::StringPtr url, const HttpHeaders& headers, + kj::AsyncInputStream& requestBody, Response& response) = 0; + // Perform an HTTP request. + // + // `url` may be a full URL (with protocol and host) or it may be only the path part of the URL, + // depending on whether the service is a proxy service or a host service. + // + // `url` and `headers` are invalidated on the first read from `requestBody` or when the returned + // promise resolves, whichever comes first. + + class WebSocketResponse: public Response { + public: + kj::Own<WebSocket> startWebSocket( + uint statusCode, kj::StringPtr statusText, const HttpHeaders& headers, + WebSocket& upstream); + // Begin the response. + // + // `statusText` and `headers` need only remain valid until startWebSocket() returns (they can + // be stack-allocated). + }; + + virtual kj::Promise<void> openWebSocket( + kj::StringPtr url, const HttpHeaders& headers, WebSocketResponse& response); + // Tries to open a WebSocket. Default implementation calls request() and never returns a + // WebSocket. + // + // `url` and `headers` are invalidated when the returned promise resolves. + + virtual kj::Promise<kj::Own<kj::AsyncIoStream>> connect(kj::String host); + // Handles CONNECT requests. Only relevant for proxy services. Default implementation throws + // UNIMPLEMENTED. +}; + +kj::Own<HttpClient> newHttpClient(HttpHeaderTable& responseHeaderTable, kj::Network& network, + kj::Maybe<kj::Network&> tlsNetwork = nullptr); +// Creates a proxy HttpClient that connects to hosts over the given network. +// +// `responseHeaderTable` is used when parsing HTTP responses. Requests can use any header table. +// +// `tlsNetwork` is required to support HTTPS destination URLs. Otherwise, only HTTP URLs can be +// fetched. + +kj::Own<HttpClient> newHttpClient(HttpHeaderTable& responseHeaderTable, kj::AsyncIoStream& stream); +// Creates an HttpClient that speaks over the given pre-established connection. The client may +// be used as a proxy client or a host client depending on whether the peer is operating as +// a proxy. +// +// Note that since this client has only one stream to work with, it will try to pipeline all +// requests on this stream. If one request or response has an I/O failure, all subsequent requests +// fail as well. If the destination server chooses to close the connection after a response, +// subsequent requests will fail. If a response takes a long time, it blocks subsequent responses. +// If a WebSocket is opened successfully, all subsequent requests fail. + +kj::Own<HttpClient> newHttpClient(HttpService& service); +kj::Own<HttpService> newHttpService(HttpClient& client); +// Adapts an HttpClient to an HttpService and vice versa. + +struct HttpServerSettings { + kj::Duration headerTimeout = 15 * kj::SECONDS; + // After initial connection open, or after receiving the first byte of a pipelined request, + // the client must send the complete request within this time. + + kj::Duration pipelineTimeout = 5 * kj::SECONDS; + // After one request/response completes, we'll wait up to this long for a pipelined request to + // arrive. +}; + +class HttpServer: private kj::TaskSet::ErrorHandler { + // Class which listens for requests on ports or connections and sends them to an HttpService. + +public: + typedef HttpServerSettings Settings; + + HttpServer(kj::Timer& timer, HttpHeaderTable& requestHeaderTable, HttpService& service, + Settings settings = Settings()); + // Set up an HttpServer that directs incoming connections to the given service. The service + // may be a host service or a proxy service depending on whether you are intending to implement + // an HTTP server or an HTTP proxy. + + kj::Promise<void> drain(); + // Stop accepting new connections or new requests on existing connections. Finish any requests + // that are already executing, then close the connections. Returns once no more requests are + // in-flight. + + kj::Promise<void> listenHttp(kj::ConnectionReceiver& port); + // Accepts HTTP connections on the given port and directs them to the handler. + // + // The returned promise never completes normally. It may throw if port.accept() throws. Dropping + // the returned promise will cause the server to stop listening on the port, but already-open + // connections will continue to be served. Destroy the whole HttpServer to cancel all I/O. + + kj::Promise<void> listenHttp(kj::Own<kj::AsyncIoStream> connection); + // Reads HTTP requests from the given connection and directs them to the handler. A successful + // completion of the promise indicates that all requests received on the connection resulted in + // a complete response, and the client closed the connection gracefully or drain() was called. + // The promise throws if an unparseable request is received or if some I/O error occurs. Dropping + // the returned promise will cancel all I/O on the connection and cancel any in-flight requests. + +private: + class Connection; + + kj::Timer& timer; + HttpHeaderTable& requestHeaderTable; + HttpService& service; + Settings settings; + + bool draining = false; + kj::ForkedPromise<void> onDrain; + kj::Own<kj::PromiseFulfiller<void>> drainFulfiller; + + uint connectionCount = 0; + kj::Maybe<kj::Own<kj::PromiseFulfiller<void>>> zeroConnectionsFulfiller; + + kj::TaskSet tasks; + + HttpServer(kj::Timer& timer, HttpHeaderTable& requestHeaderTable, HttpService& service, + Settings settings, kj::PromiseFulfillerPair<void> paf); + + kj::Promise<void> listenLoop(kj::ConnectionReceiver& port); + + void taskFailed(kj::Exception&& exception) override; +}; + +// ======================================================================================= +// inline implementation + +inline void HttpHeaderId::requireFrom(HttpHeaderTable& table) const { + KJ_IREQUIRE(this->table == nullptr || this->table == &table, + "the provided HttpHeaderId is from the wrong HttpHeaderTable"); +} + +inline kj::Own<HttpHeaderTable> HttpHeaderTable::Builder::build() { return kj::mv(table); } +inline HttpHeaderTable& HttpHeaderTable::Builder::getFutureTable() { return *table; } + +inline uint HttpHeaderTable::idCount() { return namesById.size(); } + +inline kj::StringPtr HttpHeaderTable::idToString(HttpHeaderId id) { + id.requireFrom(*this); + return namesById[id.id]; +} + +inline kj::Maybe<kj::StringPtr> HttpHeaders::get(HttpHeaderId id) const { + id.requireFrom(*table); + auto result = indexedHeaders[id.id]; + return result == nullptr ? kj::Maybe<kj::StringPtr>(nullptr) : result; +} + +inline void HttpHeaders::unset(HttpHeaderId id) { + id.requireFrom(*table); + indexedHeaders[id.id] = nullptr; +} + +template <typename Func> +inline void HttpHeaders::forEach(Func&& func) const { + for (auto i: kj::indices(indexedHeaders)) { + if (indexedHeaders[i] != nullptr) { + func(table->idToString(HttpHeaderId(table, i)), indexedHeaders[i]); + } + } + + for (auto& header: unindexedHeaders) { + func(header.name, header.value); + } +} + +} // namespace kj + +#endif // KJ_COMPAT_HTTP_H_