annotate ext/serd/src/uri.c @ 226:c5cdc9e6a4bf

Add these external library files
author Chris Cannam <cannam@all-day-breakfast.com>
date Fri, 09 Jun 2017 16:41:31 +0100
parents
children
rev   line source
cannam@226 1 /*
cannam@226 2 Copyright 2011-2014 David Robillard <http://drobilla.net>
cannam@226 3
cannam@226 4 Permission to use, copy, modify, and/or distribute this software for any
cannam@226 5 purpose with or without fee is hereby granted, provided that the above
cannam@226 6 copyright notice and this permission notice appear in all copies.
cannam@226 7
cannam@226 8 THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
cannam@226 9 WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
cannam@226 10 MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
cannam@226 11 ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
cannam@226 12 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
cannam@226 13 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
cannam@226 14 OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
cannam@226 15 */
cannam@226 16
cannam@226 17 #include "serd_internal.h"
cannam@226 18
cannam@226 19 #include <stdlib.h>
cannam@226 20 #include <string.h>
cannam@226 21
cannam@226 22 // #define URI_DEBUG 1
cannam@226 23
cannam@226 24 SERD_API
cannam@226 25 const uint8_t*
cannam@226 26 serd_uri_to_path(const uint8_t* uri)
cannam@226 27 {
cannam@226 28 const uint8_t* path = uri;
cannam@226 29 if (!is_windows_path(uri) && serd_uri_string_has_scheme(uri)) {
cannam@226 30 if (strncmp((const char*)uri, "file:", 5)) {
cannam@226 31 fprintf(stderr, "Non-file URI `%s'\n", uri);
cannam@226 32 return NULL;
cannam@226 33 } else if (!strncmp((const char*)uri, "file://localhost/", 17)) {
cannam@226 34 path = uri + 16;
cannam@226 35 } else if (!strncmp((const char*)uri, "file://", 7)) {
cannam@226 36 path = uri + 7;
cannam@226 37 } else {
cannam@226 38 fprintf(stderr, "Invalid file URI `%s'\n", uri);
cannam@226 39 return NULL;
cannam@226 40 }
cannam@226 41 if (is_windows_path(path + 1)) {
cannam@226 42 ++path; // Special case for terrible Windows file URIs
cannam@226 43 }
cannam@226 44 }
cannam@226 45 return path;
cannam@226 46 }
cannam@226 47
cannam@226 48 SERD_API
cannam@226 49 uint8_t*
cannam@226 50 serd_file_uri_parse(const uint8_t* uri, uint8_t** hostname)
cannam@226 51 {
cannam@226 52 const uint8_t* path = uri;
cannam@226 53 if (hostname) {
cannam@226 54 *hostname = NULL;
cannam@226 55 }
cannam@226 56 if (!strncmp((const char*)uri, "file://", 7)) {
cannam@226 57 const uint8_t* auth = uri + 7;
cannam@226 58 if (*auth == '/') { // No hostname
cannam@226 59 path = auth;
cannam@226 60 } else { // Has hostname
cannam@226 61 if (!(path = (const uint8_t*)strchr((const char*)auth, '/'))) {
cannam@226 62 return NULL;
cannam@226 63 }
cannam@226 64 if (hostname) {
cannam@226 65 *hostname = (uint8_t*)calloc(1, path - auth + 1);
cannam@226 66 memcpy(*hostname, auth, path - auth);
cannam@226 67 }
cannam@226 68 }
cannam@226 69 }
cannam@226 70
cannam@226 71 if (is_windows_path(path + 1)) {
cannam@226 72 ++path;
cannam@226 73 }
cannam@226 74
cannam@226 75 SerdChunk chunk = { NULL, 0 };
cannam@226 76 for (const uint8_t* s = path; *s; ++s) {
cannam@226 77 if (*s == '%') {
cannam@226 78 if (*(s + 1) == '%') {
cannam@226 79 serd_chunk_sink("%", 1, &chunk);
cannam@226 80 ++s;
cannam@226 81 } else if (is_digit(*(s + 1)) && is_digit(*(s + 2))) {
cannam@226 82 const uint8_t code[3] = { *(s + 1), *(s + 2), 0 };
cannam@226 83 uint32_t num;
cannam@226 84 sscanf((const char*)code, "%X", &num);
cannam@226 85 const uint8_t c = num;
cannam@226 86 serd_chunk_sink(&c, 1, &chunk);
cannam@226 87 s += 2;
cannam@226 88 } else {
cannam@226 89 s += 2; // Junk escape, ignore
cannam@226 90 }
cannam@226 91 } else {
cannam@226 92 serd_chunk_sink(s, 1, &chunk);
cannam@226 93 }
cannam@226 94 }
cannam@226 95 return serd_chunk_sink_finish(&chunk);
cannam@226 96 }
cannam@226 97
cannam@226 98 SERD_API
cannam@226 99 bool
cannam@226 100 serd_uri_string_has_scheme(const uint8_t* utf8)
cannam@226 101 {
cannam@226 102 // RFC3986: scheme ::= ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
cannam@226 103 if (!utf8 || !is_alpha(utf8[0])) {
cannam@226 104 return false; // Invalid scheme initial character, URI is relative
cannam@226 105 }
cannam@226 106 for (uint8_t c; (c = *++utf8) != '\0';) {
cannam@226 107 switch (c) {
cannam@226 108 case ':':
cannam@226 109 return true; // End of scheme
cannam@226 110 case '+': case '-': case '.':
cannam@226 111 break; // Valid scheme character, continue
cannam@226 112 default:
cannam@226 113 if (!is_alpha(c) && !is_digit(c)) {
cannam@226 114 return false; // Invalid scheme character
cannam@226 115 }
cannam@226 116 }
cannam@226 117 }
cannam@226 118
cannam@226 119 return false;
cannam@226 120 }
cannam@226 121
cannam@226 122 #ifdef URI_DEBUG
cannam@226 123 static void
cannam@226 124 serd_uri_dump(const SerdURI* uri, FILE* file)
cannam@226 125 {
cannam@226 126 #define PRINT_PART(range, name) \
cannam@226 127 if (range.buf) { \
cannam@226 128 fprintf(stderr, " " name " = "); \
cannam@226 129 fwrite((range).buf, 1, (range).len, stderr); \
cannam@226 130 fprintf(stderr, "\n"); \
cannam@226 131 }
cannam@226 132
cannam@226 133 PRINT_PART(uri->scheme, "scheme ");
cannam@226 134 PRINT_PART(uri->authority, "authority");
cannam@226 135 PRINT_PART(uri->path_base, "path_base");
cannam@226 136 PRINT_PART(uri->path, "path ");
cannam@226 137 PRINT_PART(uri->query, "query ");
cannam@226 138 PRINT_PART(uri->fragment, "fragment ");
cannam@226 139 }
cannam@226 140 #endif
cannam@226 141
cannam@226 142 SERD_API
cannam@226 143 SerdStatus
cannam@226 144 serd_uri_parse(const uint8_t* utf8, SerdURI* uri)
cannam@226 145 {
cannam@226 146 *uri = SERD_URI_NULL;
cannam@226 147
cannam@226 148 const uint8_t* ptr = utf8;
cannam@226 149
cannam@226 150 /* See http://tools.ietf.org/html/rfc3986#section-3
cannam@226 151 URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
cannam@226 152 */
cannam@226 153
cannam@226 154 /* S3.1: scheme ::= ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) */
cannam@226 155 if (is_alpha(*ptr)) {
cannam@226 156 for (uint8_t c = *++ptr; true; c = *++ptr) {
cannam@226 157 switch (c) {
cannam@226 158 case '\0': case '/': case '?': case '#':
cannam@226 159 ptr = utf8;
cannam@226 160 goto path; // Relative URI (starts with path by definition)
cannam@226 161 case ':':
cannam@226 162 uri->scheme.buf = utf8;
cannam@226 163 uri->scheme.len = (ptr++) - utf8;
cannam@226 164 goto maybe_authority; // URI with scheme
cannam@226 165 case '+': case '-': case '.':
cannam@226 166 continue;
cannam@226 167 default:
cannam@226 168 if (is_alpha(c) || is_digit(c)) {
cannam@226 169 continue;
cannam@226 170 }
cannam@226 171 }
cannam@226 172 }
cannam@226 173 }
cannam@226 174
cannam@226 175 /* S3.2: The authority component is preceded by a double slash ("//")
cannam@226 176 and is terminated by the next slash ("/"), question mark ("?"),
cannam@226 177 or number sign ("#") character, or by the end of the URI.
cannam@226 178 */
cannam@226 179 maybe_authority:
cannam@226 180 if (*ptr == '/' && *(ptr + 1) == '/') {
cannam@226 181 ptr += 2;
cannam@226 182 uri->authority.buf = ptr;
cannam@226 183 for (uint8_t c; (c = *ptr) != '\0'; ++ptr) {
cannam@226 184 switch (c) {
cannam@226 185 case '/': goto path;
cannam@226 186 case '?': goto query;
cannam@226 187 case '#': goto fragment;
cannam@226 188 default:
cannam@226 189 ++uri->authority.len;
cannam@226 190 }
cannam@226 191 }
cannam@226 192 }
cannam@226 193
cannam@226 194 /* RFC3986 S3.3: The path is terminated by the first question mark ("?")
cannam@226 195 or number sign ("#") character, or by the end of the URI.
cannam@226 196 */
cannam@226 197 path:
cannam@226 198 switch (*ptr) {
cannam@226 199 case '?': goto query;
cannam@226 200 case '#': goto fragment;
cannam@226 201 case '\0': goto end;
cannam@226 202 default: break;
cannam@226 203 }
cannam@226 204 uri->path.buf = ptr;
cannam@226 205 uri->path.len = 0;
cannam@226 206 for (uint8_t c; (c = *ptr) != '\0'; ++ptr) {
cannam@226 207 switch (c) {
cannam@226 208 case '?': goto query;
cannam@226 209 case '#': goto fragment;
cannam@226 210 default:
cannam@226 211 ++uri->path.len;
cannam@226 212 }
cannam@226 213 }
cannam@226 214
cannam@226 215 /* RFC3986 S3.4: The query component is indicated by the first question
cannam@226 216 mark ("?") character and terminated by a number sign ("#") character
cannam@226 217 or by the end of the URI.
cannam@226 218 */
cannam@226 219 query:
cannam@226 220 if (*ptr == '?') {
cannam@226 221 uri->query.buf = ++ptr;
cannam@226 222 for (uint8_t c; (c = *ptr) != '\0'; ++ptr) {
cannam@226 223 switch (c) {
cannam@226 224 case '#':
cannam@226 225 goto fragment;
cannam@226 226 default:
cannam@226 227 ++uri->query.len;
cannam@226 228 }
cannam@226 229 }
cannam@226 230 }
cannam@226 231
cannam@226 232 /* RFC3986 S3.5: A fragment identifier component is indicated by the
cannam@226 233 presence of a number sign ("#") character and terminated by the end
cannam@226 234 of the URI.
cannam@226 235 */
cannam@226 236 fragment:
cannam@226 237 if (*ptr == '#') {
cannam@226 238 uri->fragment.buf = ptr;
cannam@226 239 while (*ptr++ != '\0') {
cannam@226 240 ++uri->fragment.len;
cannam@226 241 }
cannam@226 242 }
cannam@226 243
cannam@226 244 end:
cannam@226 245 #ifdef URI_DEBUG
cannam@226 246 fprintf(stderr, "PARSE URI <%s>\n", utf8);
cannam@226 247 serd_uri_dump(uri, stderr);
cannam@226 248 fprintf(stderr, "\n");
cannam@226 249 #endif
cannam@226 250
cannam@226 251 return SERD_SUCCESS;
cannam@226 252 }
cannam@226 253
cannam@226 254 /**
cannam@226 255 Remove leading dot components from `path`.
cannam@226 256 See http://tools.ietf.org/html/rfc3986#section-5.2.3
cannam@226 257 @param up Set to the number of up-references (e.g. "../") trimmed
cannam@226 258 @return A pointer to the new start of `path`
cannam@226 259 */
cannam@226 260 static const uint8_t*
cannam@226 261 remove_dot_segments(const uint8_t* path, size_t len, size_t* up)
cannam@226 262 {
cannam@226 263 const uint8_t* begin = path;
cannam@226 264 const uint8_t* const end = path + len;
cannam@226 265
cannam@226 266 *up = 0;
cannam@226 267 while (begin < end) {
cannam@226 268 switch (begin[0]) {
cannam@226 269 case '.':
cannam@226 270 switch (begin[1]) {
cannam@226 271 case '/':
cannam@226 272 begin += 2; // Chop leading "./"
cannam@226 273 break;
cannam@226 274 case '.':
cannam@226 275 switch (begin[2]) {
cannam@226 276 case '\0':
cannam@226 277 ++*up;
cannam@226 278 begin += 2; // Chop input ".."
cannam@226 279 break;
cannam@226 280 case '/':
cannam@226 281 ++*up;
cannam@226 282 begin += 3; // Chop leading "../"
cannam@226 283 break;
cannam@226 284 default:
cannam@226 285 return begin;
cannam@226 286 }
cannam@226 287 break;
cannam@226 288 case '\0':
cannam@226 289 ++begin; // Chop input "." (and fall-through)
cannam@226 290 default:
cannam@226 291 return begin;
cannam@226 292 }
cannam@226 293 break;
cannam@226 294 case '/':
cannam@226 295 switch (begin[1]) {
cannam@226 296 case '.':
cannam@226 297 switch (begin[2]) {
cannam@226 298 case '/':
cannam@226 299 begin += 2; // Leading "/./" => "/"
cannam@226 300 break;
cannam@226 301 case '.':
cannam@226 302 switch (begin[3]) {
cannam@226 303 case '/':
cannam@226 304 ++*up;
cannam@226 305 begin += 3; // Leading "/../" => "/"
cannam@226 306 }
cannam@226 307 break;
cannam@226 308 default:
cannam@226 309 return begin;
cannam@226 310 }
cannam@226 311 } // else fall through
cannam@226 312 default:
cannam@226 313 return begin; // Finished chopping dot components
cannam@226 314 }
cannam@226 315 }
cannam@226 316
cannam@226 317 return begin;
cannam@226 318 }
cannam@226 319
cannam@226 320 /// Merge `base` and `path` in-place
cannam@226 321 static void
cannam@226 322 merge(SerdChunk* base, SerdChunk* path)
cannam@226 323 {
cannam@226 324 size_t up;
cannam@226 325 const uint8_t* begin = remove_dot_segments(path->buf, path->len, &up);
cannam@226 326 const uint8_t* end = path->buf + path->len;
cannam@226 327
cannam@226 328 if (base->len) {
cannam@226 329 // Find the up'th last slash
cannam@226 330 const uint8_t* base_last = (base->buf + base->len - 1);
cannam@226 331 ++up;
cannam@226 332 do {
cannam@226 333 if (*base_last == '/') {
cannam@226 334 --up;
cannam@226 335 }
cannam@226 336 } while (up > 0 && (--base_last > base->buf));
cannam@226 337
cannam@226 338 // Set path prefix
cannam@226 339 base->len = base_last - base->buf + 1;
cannam@226 340 }
cannam@226 341
cannam@226 342 // Set path suffix
cannam@226 343 path->buf = begin;
cannam@226 344 path->len = end - begin;
cannam@226 345 }
cannam@226 346
cannam@226 347 /// See http://tools.ietf.org/html/rfc3986#section-5.2.2
cannam@226 348 SERD_API
cannam@226 349 void
cannam@226 350 serd_uri_resolve(const SerdURI* r, const SerdURI* base, SerdURI* t)
cannam@226 351 {
cannam@226 352 if (!base->scheme.len) {
cannam@226 353 *t = *r; // Don't resolve against non-absolute URIs
cannam@226 354 return;
cannam@226 355 }
cannam@226 356
cannam@226 357 t->path_base.buf = NULL;
cannam@226 358 t->path_base.len = 0;
cannam@226 359 if (r->scheme.len) {
cannam@226 360 *t = *r;
cannam@226 361 } else {
cannam@226 362 if (r->authority.len) {
cannam@226 363 t->authority = r->authority;
cannam@226 364 t->path = r->path;
cannam@226 365 t->query = r->query;
cannam@226 366 } else {
cannam@226 367 t->path = r->path;
cannam@226 368 if (!r->path.len) {
cannam@226 369 t->path_base = base->path;
cannam@226 370 if (r->query.len) {
cannam@226 371 t->query = r->query;
cannam@226 372 } else {
cannam@226 373 t->query = base->query;
cannam@226 374 }
cannam@226 375 } else {
cannam@226 376 if (r->path.buf[0] != '/') {
cannam@226 377 t->path_base = base->path;
cannam@226 378 }
cannam@226 379 merge(&t->path_base, &t->path);
cannam@226 380 t->query = r->query;
cannam@226 381 }
cannam@226 382 t->authority = base->authority;
cannam@226 383 }
cannam@226 384 t->scheme = base->scheme;
cannam@226 385 t->fragment = r->fragment;
cannam@226 386 }
cannam@226 387
cannam@226 388 #ifdef URI_DEBUG
cannam@226 389 fprintf(stderr, "## RESOLVE URI\n# BASE\n");
cannam@226 390 serd_uri_dump(base, stderr);
cannam@226 391 fprintf(stderr, "# URI\n");
cannam@226 392 serd_uri_dump(r, stderr);
cannam@226 393 fprintf(stderr, "# RESULT\n");
cannam@226 394 serd_uri_dump(t, stderr);
cannam@226 395 fprintf(stderr, "\n");
cannam@226 396 #endif
cannam@226 397 }
cannam@226 398
cannam@226 399 /** Write the path of `uri` starting at index `i` */
cannam@226 400 static size_t
cannam@226 401 write_path_tail(SerdSink sink, void* stream, const SerdURI* uri, size_t i)
cannam@226 402 {
cannam@226 403 size_t len = 0;
cannam@226 404 if (i < uri->path_base.len) {
cannam@226 405 len += sink(uri->path_base.buf + i, uri->path_base.len - i, stream);
cannam@226 406 }
cannam@226 407 if (uri->path.buf) {
cannam@226 408 if (i < uri->path_base.len) {
cannam@226 409 len += sink(uri->path.buf, uri->path.len, stream);
cannam@226 410 } else {
cannam@226 411 const size_t j = (i - uri->path_base.len);
cannam@226 412 len += sink(uri->path.buf + j, uri->path.len - j, stream);
cannam@226 413 }
cannam@226 414 }
cannam@226 415 return len;
cannam@226 416 }
cannam@226 417
cannam@226 418 /** Write the path of `uri` relative to the path of `base`. */
cannam@226 419 static size_t
cannam@226 420 write_rel_path(SerdSink sink,
cannam@226 421 void* stream,
cannam@226 422 const SerdURI* uri,
cannam@226 423 const SerdURI* base)
cannam@226 424 {
cannam@226 425 const size_t path_len = uri_path_len(uri);
cannam@226 426 const size_t base_len = uri_path_len(base);
cannam@226 427 const size_t min_len = (path_len < base_len) ? path_len : base_len;
cannam@226 428
cannam@226 429 // Find the last separator common to both paths
cannam@226 430 size_t last_shared_sep = 0;
cannam@226 431 size_t i = 0;
cannam@226 432 for (; i < min_len && uri_path_at(uri, i) == uri_path_at(base, i); ++i) {
cannam@226 433 if (uri_path_at(uri, i) == '/') {
cannam@226 434 last_shared_sep = i;
cannam@226 435 }
cannam@226 436 }
cannam@226 437
cannam@226 438 if (i == path_len && i == base_len) { // Paths are identical
cannam@226 439 return 0;
cannam@226 440 } else if (last_shared_sep == 0) { // No common components
cannam@226 441 return write_path_tail(sink, stream, uri, 0);
cannam@226 442 }
cannam@226 443
cannam@226 444 // Find the number of up references ("..") required
cannam@226 445 size_t up = 0;
cannam@226 446 for (size_t s = last_shared_sep + 1; s < base_len; ++s) {
cannam@226 447 if (uri_path_at(base, s) == '/') {
cannam@226 448 ++up;
cannam@226 449 }
cannam@226 450 }
cannam@226 451
cannam@226 452 // Write up references
cannam@226 453 size_t len = 0;
cannam@226 454 for (size_t u = 0; u < up; ++u) {
cannam@226 455 len += sink("../", 3, stream);
cannam@226 456 }
cannam@226 457
cannam@226 458 // Write suffix
cannam@226 459 return len += write_path_tail(sink, stream, uri, last_shared_sep + 1);
cannam@226 460 }
cannam@226 461
cannam@226 462 /// See http://tools.ietf.org/html/rfc3986#section-5.3
cannam@226 463 SERD_API
cannam@226 464 size_t
cannam@226 465 serd_uri_serialise_relative(const SerdURI* uri,
cannam@226 466 const SerdURI* base,
cannam@226 467 const SerdURI* root,
cannam@226 468 SerdSink sink,
cannam@226 469 void* stream)
cannam@226 470 {
cannam@226 471 size_t len = 0;
cannam@226 472 const bool relative = uri_is_under(uri, root ? root : base);
cannam@226 473 if (relative) {
cannam@226 474 len = write_rel_path(sink, stream, uri, base);
cannam@226 475 }
cannam@226 476 if (!relative || (!len && base->query.buf)) {
cannam@226 477 if (uri->scheme.buf) {
cannam@226 478 len += sink(uri->scheme.buf, uri->scheme.len, stream);
cannam@226 479 len += sink(":", 1, stream);
cannam@226 480 }
cannam@226 481 if (uri->authority.buf) {
cannam@226 482 len += sink("//", 2, stream);
cannam@226 483 len += sink(uri->authority.buf, uri->authority.len, stream);
cannam@226 484 }
cannam@226 485 len += write_path_tail(sink, stream, uri, 0);
cannam@226 486 }
cannam@226 487 if (uri->query.buf) {
cannam@226 488 len += sink("?", 1, stream);
cannam@226 489 len += sink(uri->query.buf, uri->query.len, stream);
cannam@226 490 }
cannam@226 491 if (uri->fragment.buf) {
cannam@226 492 // Note uri->fragment.buf includes the leading `#'
cannam@226 493 len += sink(uri->fragment.buf, uri->fragment.len, stream);
cannam@226 494 }
cannam@226 495 return len;
cannam@226 496 }
cannam@226 497
cannam@226 498 /// See http://tools.ietf.org/html/rfc3986#section-5.3
cannam@226 499 SERD_API
cannam@226 500 size_t
cannam@226 501 serd_uri_serialise(const SerdURI* uri, SerdSink sink, void* stream)
cannam@226 502 {
cannam@226 503 return serd_uri_serialise_relative(uri, NULL, NULL, sink, stream);
cannam@226 504 }