comparison ext/serd/src/uri.c @ 226:c5cdc9e6a4bf

Add these external library files
author Chris Cannam <cannam@all-day-breakfast.com>
date Fri, 09 Jun 2017 16:41:31 +0100
parents
children
comparison
equal deleted inserted replaced
225:025b3e2f7c17 226:c5cdc9e6a4bf
1 /*
2 Copyright 2011-2014 David Robillard <http://drobilla.net>
3
4 Permission to use, copy, modify, and/or distribute this software for any
5 purpose with or without fee is hereby granted, provided that the above
6 copyright notice and this permission notice appear in all copies.
7
8 THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
16
17 #include "serd_internal.h"
18
19 #include <stdlib.h>
20 #include <string.h>
21
22 // #define URI_DEBUG 1
23
24 SERD_API
25 const uint8_t*
26 serd_uri_to_path(const uint8_t* uri)
27 {
28 const uint8_t* path = uri;
29 if (!is_windows_path(uri) && serd_uri_string_has_scheme(uri)) {
30 if (strncmp((const char*)uri, "file:", 5)) {
31 fprintf(stderr, "Non-file URI `%s'\n", uri);
32 return NULL;
33 } else if (!strncmp((const char*)uri, "file://localhost/", 17)) {
34 path = uri + 16;
35 } else if (!strncmp((const char*)uri, "file://", 7)) {
36 path = uri + 7;
37 } else {
38 fprintf(stderr, "Invalid file URI `%s'\n", uri);
39 return NULL;
40 }
41 if (is_windows_path(path + 1)) {
42 ++path; // Special case for terrible Windows file URIs
43 }
44 }
45 return path;
46 }
47
48 SERD_API
49 uint8_t*
50 serd_file_uri_parse(const uint8_t* uri, uint8_t** hostname)
51 {
52 const uint8_t* path = uri;
53 if (hostname) {
54 *hostname = NULL;
55 }
56 if (!strncmp((const char*)uri, "file://", 7)) {
57 const uint8_t* auth = uri + 7;
58 if (*auth == '/') { // No hostname
59 path = auth;
60 } else { // Has hostname
61 if (!(path = (const uint8_t*)strchr((const char*)auth, '/'))) {
62 return NULL;
63 }
64 if (hostname) {
65 *hostname = (uint8_t*)calloc(1, path - auth + 1);
66 memcpy(*hostname, auth, path - auth);
67 }
68 }
69 }
70
71 if (is_windows_path(path + 1)) {
72 ++path;
73 }
74
75 SerdChunk chunk = { NULL, 0 };
76 for (const uint8_t* s = path; *s; ++s) {
77 if (*s == '%') {
78 if (*(s + 1) == '%') {
79 serd_chunk_sink("%", 1, &chunk);
80 ++s;
81 } else if (is_digit(*(s + 1)) && is_digit(*(s + 2))) {
82 const uint8_t code[3] = { *(s + 1), *(s + 2), 0 };
83 uint32_t num;
84 sscanf((const char*)code, "%X", &num);
85 const uint8_t c = num;
86 serd_chunk_sink(&c, 1, &chunk);
87 s += 2;
88 } else {
89 s += 2; // Junk escape, ignore
90 }
91 } else {
92 serd_chunk_sink(s, 1, &chunk);
93 }
94 }
95 return serd_chunk_sink_finish(&chunk);
96 }
97
98 SERD_API
99 bool
100 serd_uri_string_has_scheme(const uint8_t* utf8)
101 {
102 // RFC3986: scheme ::= ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
103 if (!utf8 || !is_alpha(utf8[0])) {
104 return false; // Invalid scheme initial character, URI is relative
105 }
106 for (uint8_t c; (c = *++utf8) != '\0';) {
107 switch (c) {
108 case ':':
109 return true; // End of scheme
110 case '+': case '-': case '.':
111 break; // Valid scheme character, continue
112 default:
113 if (!is_alpha(c) && !is_digit(c)) {
114 return false; // Invalid scheme character
115 }
116 }
117 }
118
119 return false;
120 }
121
122 #ifdef URI_DEBUG
123 static void
124 serd_uri_dump(const SerdURI* uri, FILE* file)
125 {
126 #define PRINT_PART(range, name) \
127 if (range.buf) { \
128 fprintf(stderr, " " name " = "); \
129 fwrite((range).buf, 1, (range).len, stderr); \
130 fprintf(stderr, "\n"); \
131 }
132
133 PRINT_PART(uri->scheme, "scheme ");
134 PRINT_PART(uri->authority, "authority");
135 PRINT_PART(uri->path_base, "path_base");
136 PRINT_PART(uri->path, "path ");
137 PRINT_PART(uri->query, "query ");
138 PRINT_PART(uri->fragment, "fragment ");
139 }
140 #endif
141
142 SERD_API
143 SerdStatus
144 serd_uri_parse(const uint8_t* utf8, SerdURI* uri)
145 {
146 *uri = SERD_URI_NULL;
147
148 const uint8_t* ptr = utf8;
149
150 /* See http://tools.ietf.org/html/rfc3986#section-3
151 URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
152 */
153
154 /* S3.1: scheme ::= ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) */
155 if (is_alpha(*ptr)) {
156 for (uint8_t c = *++ptr; true; c = *++ptr) {
157 switch (c) {
158 case '\0': case '/': case '?': case '#':
159 ptr = utf8;
160 goto path; // Relative URI (starts with path by definition)
161 case ':':
162 uri->scheme.buf = utf8;
163 uri->scheme.len = (ptr++) - utf8;
164 goto maybe_authority; // URI with scheme
165 case '+': case '-': case '.':
166 continue;
167 default:
168 if (is_alpha(c) || is_digit(c)) {
169 continue;
170 }
171 }
172 }
173 }
174
175 /* S3.2: The authority component is preceded by a double slash ("//")
176 and is terminated by the next slash ("/"), question mark ("?"),
177 or number sign ("#") character, or by the end of the URI.
178 */
179 maybe_authority:
180 if (*ptr == '/' && *(ptr + 1) == '/') {
181 ptr += 2;
182 uri->authority.buf = ptr;
183 for (uint8_t c; (c = *ptr) != '\0'; ++ptr) {
184 switch (c) {
185 case '/': goto path;
186 case '?': goto query;
187 case '#': goto fragment;
188 default:
189 ++uri->authority.len;
190 }
191 }
192 }
193
194 /* RFC3986 S3.3: The path is terminated by the first question mark ("?")
195 or number sign ("#") character, or by the end of the URI.
196 */
197 path:
198 switch (*ptr) {
199 case '?': goto query;
200 case '#': goto fragment;
201 case '\0': goto end;
202 default: break;
203 }
204 uri->path.buf = ptr;
205 uri->path.len = 0;
206 for (uint8_t c; (c = *ptr) != '\0'; ++ptr) {
207 switch (c) {
208 case '?': goto query;
209 case '#': goto fragment;
210 default:
211 ++uri->path.len;
212 }
213 }
214
215 /* RFC3986 S3.4: The query component is indicated by the first question
216 mark ("?") character and terminated by a number sign ("#") character
217 or by the end of the URI.
218 */
219 query:
220 if (*ptr == '?') {
221 uri->query.buf = ++ptr;
222 for (uint8_t c; (c = *ptr) != '\0'; ++ptr) {
223 switch (c) {
224 case '#':
225 goto fragment;
226 default:
227 ++uri->query.len;
228 }
229 }
230 }
231
232 /* RFC3986 S3.5: A fragment identifier component is indicated by the
233 presence of a number sign ("#") character and terminated by the end
234 of the URI.
235 */
236 fragment:
237 if (*ptr == '#') {
238 uri->fragment.buf = ptr;
239 while (*ptr++ != '\0') {
240 ++uri->fragment.len;
241 }
242 }
243
244 end:
245 #ifdef URI_DEBUG
246 fprintf(stderr, "PARSE URI <%s>\n", utf8);
247 serd_uri_dump(uri, stderr);
248 fprintf(stderr, "\n");
249 #endif
250
251 return SERD_SUCCESS;
252 }
253
254 /**
255 Remove leading dot components from `path`.
256 See http://tools.ietf.org/html/rfc3986#section-5.2.3
257 @param up Set to the number of up-references (e.g. "../") trimmed
258 @return A pointer to the new start of `path`
259 */
260 static const uint8_t*
261 remove_dot_segments(const uint8_t* path, size_t len, size_t* up)
262 {
263 const uint8_t* begin = path;
264 const uint8_t* const end = path + len;
265
266 *up = 0;
267 while (begin < end) {
268 switch (begin[0]) {
269 case '.':
270 switch (begin[1]) {
271 case '/':
272 begin += 2; // Chop leading "./"
273 break;
274 case '.':
275 switch (begin[2]) {
276 case '\0':
277 ++*up;
278 begin += 2; // Chop input ".."
279 break;
280 case '/':
281 ++*up;
282 begin += 3; // Chop leading "../"
283 break;
284 default:
285 return begin;
286 }
287 break;
288 case '\0':
289 ++begin; // Chop input "." (and fall-through)
290 default:
291 return begin;
292 }
293 break;
294 case '/':
295 switch (begin[1]) {
296 case '.':
297 switch (begin[2]) {
298 case '/':
299 begin += 2; // Leading "/./" => "/"
300 break;
301 case '.':
302 switch (begin[3]) {
303 case '/':
304 ++*up;
305 begin += 3; // Leading "/../" => "/"
306 }
307 break;
308 default:
309 return begin;
310 }
311 } // else fall through
312 default:
313 return begin; // Finished chopping dot components
314 }
315 }
316
317 return begin;
318 }
319
320 /// Merge `base` and `path` in-place
321 static void
322 merge(SerdChunk* base, SerdChunk* path)
323 {
324 size_t up;
325 const uint8_t* begin = remove_dot_segments(path->buf, path->len, &up);
326 const uint8_t* end = path->buf + path->len;
327
328 if (base->len) {
329 // Find the up'th last slash
330 const uint8_t* base_last = (base->buf + base->len - 1);
331 ++up;
332 do {
333 if (*base_last == '/') {
334 --up;
335 }
336 } while (up > 0 && (--base_last > base->buf));
337
338 // Set path prefix
339 base->len = base_last - base->buf + 1;
340 }
341
342 // Set path suffix
343 path->buf = begin;
344 path->len = end - begin;
345 }
346
347 /// See http://tools.ietf.org/html/rfc3986#section-5.2.2
348 SERD_API
349 void
350 serd_uri_resolve(const SerdURI* r, const SerdURI* base, SerdURI* t)
351 {
352 if (!base->scheme.len) {
353 *t = *r; // Don't resolve against non-absolute URIs
354 return;
355 }
356
357 t->path_base.buf = NULL;
358 t->path_base.len = 0;
359 if (r->scheme.len) {
360 *t = *r;
361 } else {
362 if (r->authority.len) {
363 t->authority = r->authority;
364 t->path = r->path;
365 t->query = r->query;
366 } else {
367 t->path = r->path;
368 if (!r->path.len) {
369 t->path_base = base->path;
370 if (r->query.len) {
371 t->query = r->query;
372 } else {
373 t->query = base->query;
374 }
375 } else {
376 if (r->path.buf[0] != '/') {
377 t->path_base = base->path;
378 }
379 merge(&t->path_base, &t->path);
380 t->query = r->query;
381 }
382 t->authority = base->authority;
383 }
384 t->scheme = base->scheme;
385 t->fragment = r->fragment;
386 }
387
388 #ifdef URI_DEBUG
389 fprintf(stderr, "## RESOLVE URI\n# BASE\n");
390 serd_uri_dump(base, stderr);
391 fprintf(stderr, "# URI\n");
392 serd_uri_dump(r, stderr);
393 fprintf(stderr, "# RESULT\n");
394 serd_uri_dump(t, stderr);
395 fprintf(stderr, "\n");
396 #endif
397 }
398
399 /** Write the path of `uri` starting at index `i` */
400 static size_t
401 write_path_tail(SerdSink sink, void* stream, const SerdURI* uri, size_t i)
402 {
403 size_t len = 0;
404 if (i < uri->path_base.len) {
405 len += sink(uri->path_base.buf + i, uri->path_base.len - i, stream);
406 }
407 if (uri->path.buf) {
408 if (i < uri->path_base.len) {
409 len += sink(uri->path.buf, uri->path.len, stream);
410 } else {
411 const size_t j = (i - uri->path_base.len);
412 len += sink(uri->path.buf + j, uri->path.len - j, stream);
413 }
414 }
415 return len;
416 }
417
418 /** Write the path of `uri` relative to the path of `base`. */
419 static size_t
420 write_rel_path(SerdSink sink,
421 void* stream,
422 const SerdURI* uri,
423 const SerdURI* base)
424 {
425 const size_t path_len = uri_path_len(uri);
426 const size_t base_len = uri_path_len(base);
427 const size_t min_len = (path_len < base_len) ? path_len : base_len;
428
429 // Find the last separator common to both paths
430 size_t last_shared_sep = 0;
431 size_t i = 0;
432 for (; i < min_len && uri_path_at(uri, i) == uri_path_at(base, i); ++i) {
433 if (uri_path_at(uri, i) == '/') {
434 last_shared_sep = i;
435 }
436 }
437
438 if (i == path_len && i == base_len) { // Paths are identical
439 return 0;
440 } else if (last_shared_sep == 0) { // No common components
441 return write_path_tail(sink, stream, uri, 0);
442 }
443
444 // Find the number of up references ("..") required
445 size_t up = 0;
446 for (size_t s = last_shared_sep + 1; s < base_len; ++s) {
447 if (uri_path_at(base, s) == '/') {
448 ++up;
449 }
450 }
451
452 // Write up references
453 size_t len = 0;
454 for (size_t u = 0; u < up; ++u) {
455 len += sink("../", 3, stream);
456 }
457
458 // Write suffix
459 return len += write_path_tail(sink, stream, uri, last_shared_sep + 1);
460 }
461
462 /// See http://tools.ietf.org/html/rfc3986#section-5.3
463 SERD_API
464 size_t
465 serd_uri_serialise_relative(const SerdURI* uri,
466 const SerdURI* base,
467 const SerdURI* root,
468 SerdSink sink,
469 void* stream)
470 {
471 size_t len = 0;
472 const bool relative = uri_is_under(uri, root ? root : base);
473 if (relative) {
474 len = write_rel_path(sink, stream, uri, base);
475 }
476 if (!relative || (!len && base->query.buf)) {
477 if (uri->scheme.buf) {
478 len += sink(uri->scheme.buf, uri->scheme.len, stream);
479 len += sink(":", 1, stream);
480 }
481 if (uri->authority.buf) {
482 len += sink("//", 2, stream);
483 len += sink(uri->authority.buf, uri->authority.len, stream);
484 }
485 len += write_path_tail(sink, stream, uri, 0);
486 }
487 if (uri->query.buf) {
488 len += sink("?", 1, stream);
489 len += sink(uri->query.buf, uri->query.len, stream);
490 }
491 if (uri->fragment.buf) {
492 // Note uri->fragment.buf includes the leading `#'
493 len += sink(uri->fragment.buf, uri->fragment.len, stream);
494 }
495 return len;
496 }
497
498 /// See http://tools.ietf.org/html/rfc3986#section-5.3
499 SERD_API
500 size_t
501 serd_uri_serialise(const SerdURI* uri, SerdSink sink, void* stream)
502 {
503 return serd_uri_serialise_relative(uri, NULL, NULL, sink, stream);
504 }