Mercurial > hg > piper-cpp
comparison ext/serd/src/uri.c @ 226:c5cdc9e6a4bf
Add these external library files
author | Chris Cannam <cannam@all-day-breakfast.com> |
---|---|
date | Fri, 09 Jun 2017 16:41:31 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
225:025b3e2f7c17 | 226:c5cdc9e6a4bf |
---|---|
1 /* | |
2 Copyright 2011-2014 David Robillard <http://drobilla.net> | |
3 | |
4 Permission to use, copy, modify, and/or distribute this software for any | |
5 purpose with or without fee is hereby granted, provided that the above | |
6 copyright notice and this permission notice appear in all copies. | |
7 | |
8 THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |
9 WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |
10 MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |
11 ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |
12 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |
13 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | |
14 OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |
15 */ | |
16 | |
17 #include "serd_internal.h" | |
18 | |
19 #include <stdlib.h> | |
20 #include <string.h> | |
21 | |
22 // #define URI_DEBUG 1 | |
23 | |
24 SERD_API | |
25 const uint8_t* | |
26 serd_uri_to_path(const uint8_t* uri) | |
27 { | |
28 const uint8_t* path = uri; | |
29 if (!is_windows_path(uri) && serd_uri_string_has_scheme(uri)) { | |
30 if (strncmp((const char*)uri, "file:", 5)) { | |
31 fprintf(stderr, "Non-file URI `%s'\n", uri); | |
32 return NULL; | |
33 } else if (!strncmp((const char*)uri, "file://localhost/", 17)) { | |
34 path = uri + 16; | |
35 } else if (!strncmp((const char*)uri, "file://", 7)) { | |
36 path = uri + 7; | |
37 } else { | |
38 fprintf(stderr, "Invalid file URI `%s'\n", uri); | |
39 return NULL; | |
40 } | |
41 if (is_windows_path(path + 1)) { | |
42 ++path; // Special case for terrible Windows file URIs | |
43 } | |
44 } | |
45 return path; | |
46 } | |
47 | |
48 SERD_API | |
49 uint8_t* | |
50 serd_file_uri_parse(const uint8_t* uri, uint8_t** hostname) | |
51 { | |
52 const uint8_t* path = uri; | |
53 if (hostname) { | |
54 *hostname = NULL; | |
55 } | |
56 if (!strncmp((const char*)uri, "file://", 7)) { | |
57 const uint8_t* auth = uri + 7; | |
58 if (*auth == '/') { // No hostname | |
59 path = auth; | |
60 } else { // Has hostname | |
61 if (!(path = (const uint8_t*)strchr((const char*)auth, '/'))) { | |
62 return NULL; | |
63 } | |
64 if (hostname) { | |
65 *hostname = (uint8_t*)calloc(1, path - auth + 1); | |
66 memcpy(*hostname, auth, path - auth); | |
67 } | |
68 } | |
69 } | |
70 | |
71 if (is_windows_path(path + 1)) { | |
72 ++path; | |
73 } | |
74 | |
75 SerdChunk chunk = { NULL, 0 }; | |
76 for (const uint8_t* s = path; *s; ++s) { | |
77 if (*s == '%') { | |
78 if (*(s + 1) == '%') { | |
79 serd_chunk_sink("%", 1, &chunk); | |
80 ++s; | |
81 } else if (is_digit(*(s + 1)) && is_digit(*(s + 2))) { | |
82 const uint8_t code[3] = { *(s + 1), *(s + 2), 0 }; | |
83 uint32_t num; | |
84 sscanf((const char*)code, "%X", &num); | |
85 const uint8_t c = num; | |
86 serd_chunk_sink(&c, 1, &chunk); | |
87 s += 2; | |
88 } else { | |
89 s += 2; // Junk escape, ignore | |
90 } | |
91 } else { | |
92 serd_chunk_sink(s, 1, &chunk); | |
93 } | |
94 } | |
95 return serd_chunk_sink_finish(&chunk); | |
96 } | |
97 | |
98 SERD_API | |
99 bool | |
100 serd_uri_string_has_scheme(const uint8_t* utf8) | |
101 { | |
102 // RFC3986: scheme ::= ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) | |
103 if (!utf8 || !is_alpha(utf8[0])) { | |
104 return false; // Invalid scheme initial character, URI is relative | |
105 } | |
106 for (uint8_t c; (c = *++utf8) != '\0';) { | |
107 switch (c) { | |
108 case ':': | |
109 return true; // End of scheme | |
110 case '+': case '-': case '.': | |
111 break; // Valid scheme character, continue | |
112 default: | |
113 if (!is_alpha(c) && !is_digit(c)) { | |
114 return false; // Invalid scheme character | |
115 } | |
116 } | |
117 } | |
118 | |
119 return false; | |
120 } | |
121 | |
122 #ifdef URI_DEBUG | |
123 static void | |
124 serd_uri_dump(const SerdURI* uri, FILE* file) | |
125 { | |
126 #define PRINT_PART(range, name) \ | |
127 if (range.buf) { \ | |
128 fprintf(stderr, " " name " = "); \ | |
129 fwrite((range).buf, 1, (range).len, stderr); \ | |
130 fprintf(stderr, "\n"); \ | |
131 } | |
132 | |
133 PRINT_PART(uri->scheme, "scheme "); | |
134 PRINT_PART(uri->authority, "authority"); | |
135 PRINT_PART(uri->path_base, "path_base"); | |
136 PRINT_PART(uri->path, "path "); | |
137 PRINT_PART(uri->query, "query "); | |
138 PRINT_PART(uri->fragment, "fragment "); | |
139 } | |
140 #endif | |
141 | |
142 SERD_API | |
143 SerdStatus | |
144 serd_uri_parse(const uint8_t* utf8, SerdURI* uri) | |
145 { | |
146 *uri = SERD_URI_NULL; | |
147 | |
148 const uint8_t* ptr = utf8; | |
149 | |
150 /* See http://tools.ietf.org/html/rfc3986#section-3 | |
151 URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] | |
152 */ | |
153 | |
154 /* S3.1: scheme ::= ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) */ | |
155 if (is_alpha(*ptr)) { | |
156 for (uint8_t c = *++ptr; true; c = *++ptr) { | |
157 switch (c) { | |
158 case '\0': case '/': case '?': case '#': | |
159 ptr = utf8; | |
160 goto path; // Relative URI (starts with path by definition) | |
161 case ':': | |
162 uri->scheme.buf = utf8; | |
163 uri->scheme.len = (ptr++) - utf8; | |
164 goto maybe_authority; // URI with scheme | |
165 case '+': case '-': case '.': | |
166 continue; | |
167 default: | |
168 if (is_alpha(c) || is_digit(c)) { | |
169 continue; | |
170 } | |
171 } | |
172 } | |
173 } | |
174 | |
175 /* S3.2: The authority component is preceded by a double slash ("//") | |
176 and is terminated by the next slash ("/"), question mark ("?"), | |
177 or number sign ("#") character, or by the end of the URI. | |
178 */ | |
179 maybe_authority: | |
180 if (*ptr == '/' && *(ptr + 1) == '/') { | |
181 ptr += 2; | |
182 uri->authority.buf = ptr; | |
183 for (uint8_t c; (c = *ptr) != '\0'; ++ptr) { | |
184 switch (c) { | |
185 case '/': goto path; | |
186 case '?': goto query; | |
187 case '#': goto fragment; | |
188 default: | |
189 ++uri->authority.len; | |
190 } | |
191 } | |
192 } | |
193 | |
194 /* RFC3986 S3.3: The path is terminated by the first question mark ("?") | |
195 or number sign ("#") character, or by the end of the URI. | |
196 */ | |
197 path: | |
198 switch (*ptr) { | |
199 case '?': goto query; | |
200 case '#': goto fragment; | |
201 case '\0': goto end; | |
202 default: break; | |
203 } | |
204 uri->path.buf = ptr; | |
205 uri->path.len = 0; | |
206 for (uint8_t c; (c = *ptr) != '\0'; ++ptr) { | |
207 switch (c) { | |
208 case '?': goto query; | |
209 case '#': goto fragment; | |
210 default: | |
211 ++uri->path.len; | |
212 } | |
213 } | |
214 | |
215 /* RFC3986 S3.4: The query component is indicated by the first question | |
216 mark ("?") character and terminated by a number sign ("#") character | |
217 or by the end of the URI. | |
218 */ | |
219 query: | |
220 if (*ptr == '?') { | |
221 uri->query.buf = ++ptr; | |
222 for (uint8_t c; (c = *ptr) != '\0'; ++ptr) { | |
223 switch (c) { | |
224 case '#': | |
225 goto fragment; | |
226 default: | |
227 ++uri->query.len; | |
228 } | |
229 } | |
230 } | |
231 | |
232 /* RFC3986 S3.5: A fragment identifier component is indicated by the | |
233 presence of a number sign ("#") character and terminated by the end | |
234 of the URI. | |
235 */ | |
236 fragment: | |
237 if (*ptr == '#') { | |
238 uri->fragment.buf = ptr; | |
239 while (*ptr++ != '\0') { | |
240 ++uri->fragment.len; | |
241 } | |
242 } | |
243 | |
244 end: | |
245 #ifdef URI_DEBUG | |
246 fprintf(stderr, "PARSE URI <%s>\n", utf8); | |
247 serd_uri_dump(uri, stderr); | |
248 fprintf(stderr, "\n"); | |
249 #endif | |
250 | |
251 return SERD_SUCCESS; | |
252 } | |
253 | |
254 /** | |
255 Remove leading dot components from `path`. | |
256 See http://tools.ietf.org/html/rfc3986#section-5.2.3 | |
257 @param up Set to the number of up-references (e.g. "../") trimmed | |
258 @return A pointer to the new start of `path` | |
259 */ | |
260 static const uint8_t* | |
261 remove_dot_segments(const uint8_t* path, size_t len, size_t* up) | |
262 { | |
263 const uint8_t* begin = path; | |
264 const uint8_t* const end = path + len; | |
265 | |
266 *up = 0; | |
267 while (begin < end) { | |
268 switch (begin[0]) { | |
269 case '.': | |
270 switch (begin[1]) { | |
271 case '/': | |
272 begin += 2; // Chop leading "./" | |
273 break; | |
274 case '.': | |
275 switch (begin[2]) { | |
276 case '\0': | |
277 ++*up; | |
278 begin += 2; // Chop input ".." | |
279 break; | |
280 case '/': | |
281 ++*up; | |
282 begin += 3; // Chop leading "../" | |
283 break; | |
284 default: | |
285 return begin; | |
286 } | |
287 break; | |
288 case '\0': | |
289 ++begin; // Chop input "." (and fall-through) | |
290 default: | |
291 return begin; | |
292 } | |
293 break; | |
294 case '/': | |
295 switch (begin[1]) { | |
296 case '.': | |
297 switch (begin[2]) { | |
298 case '/': | |
299 begin += 2; // Leading "/./" => "/" | |
300 break; | |
301 case '.': | |
302 switch (begin[3]) { | |
303 case '/': | |
304 ++*up; | |
305 begin += 3; // Leading "/../" => "/" | |
306 } | |
307 break; | |
308 default: | |
309 return begin; | |
310 } | |
311 } // else fall through | |
312 default: | |
313 return begin; // Finished chopping dot components | |
314 } | |
315 } | |
316 | |
317 return begin; | |
318 } | |
319 | |
320 /// Merge `base` and `path` in-place | |
321 static void | |
322 merge(SerdChunk* base, SerdChunk* path) | |
323 { | |
324 size_t up; | |
325 const uint8_t* begin = remove_dot_segments(path->buf, path->len, &up); | |
326 const uint8_t* end = path->buf + path->len; | |
327 | |
328 if (base->len) { | |
329 // Find the up'th last slash | |
330 const uint8_t* base_last = (base->buf + base->len - 1); | |
331 ++up; | |
332 do { | |
333 if (*base_last == '/') { | |
334 --up; | |
335 } | |
336 } while (up > 0 && (--base_last > base->buf)); | |
337 | |
338 // Set path prefix | |
339 base->len = base_last - base->buf + 1; | |
340 } | |
341 | |
342 // Set path suffix | |
343 path->buf = begin; | |
344 path->len = end - begin; | |
345 } | |
346 | |
347 /// See http://tools.ietf.org/html/rfc3986#section-5.2.2 | |
348 SERD_API | |
349 void | |
350 serd_uri_resolve(const SerdURI* r, const SerdURI* base, SerdURI* t) | |
351 { | |
352 if (!base->scheme.len) { | |
353 *t = *r; // Don't resolve against non-absolute URIs | |
354 return; | |
355 } | |
356 | |
357 t->path_base.buf = NULL; | |
358 t->path_base.len = 0; | |
359 if (r->scheme.len) { | |
360 *t = *r; | |
361 } else { | |
362 if (r->authority.len) { | |
363 t->authority = r->authority; | |
364 t->path = r->path; | |
365 t->query = r->query; | |
366 } else { | |
367 t->path = r->path; | |
368 if (!r->path.len) { | |
369 t->path_base = base->path; | |
370 if (r->query.len) { | |
371 t->query = r->query; | |
372 } else { | |
373 t->query = base->query; | |
374 } | |
375 } else { | |
376 if (r->path.buf[0] != '/') { | |
377 t->path_base = base->path; | |
378 } | |
379 merge(&t->path_base, &t->path); | |
380 t->query = r->query; | |
381 } | |
382 t->authority = base->authority; | |
383 } | |
384 t->scheme = base->scheme; | |
385 t->fragment = r->fragment; | |
386 } | |
387 | |
388 #ifdef URI_DEBUG | |
389 fprintf(stderr, "## RESOLVE URI\n# BASE\n"); | |
390 serd_uri_dump(base, stderr); | |
391 fprintf(stderr, "# URI\n"); | |
392 serd_uri_dump(r, stderr); | |
393 fprintf(stderr, "# RESULT\n"); | |
394 serd_uri_dump(t, stderr); | |
395 fprintf(stderr, "\n"); | |
396 #endif | |
397 } | |
398 | |
399 /** Write the path of `uri` starting at index `i` */ | |
400 static size_t | |
401 write_path_tail(SerdSink sink, void* stream, const SerdURI* uri, size_t i) | |
402 { | |
403 size_t len = 0; | |
404 if (i < uri->path_base.len) { | |
405 len += sink(uri->path_base.buf + i, uri->path_base.len - i, stream); | |
406 } | |
407 if (uri->path.buf) { | |
408 if (i < uri->path_base.len) { | |
409 len += sink(uri->path.buf, uri->path.len, stream); | |
410 } else { | |
411 const size_t j = (i - uri->path_base.len); | |
412 len += sink(uri->path.buf + j, uri->path.len - j, stream); | |
413 } | |
414 } | |
415 return len; | |
416 } | |
417 | |
418 /** Write the path of `uri` relative to the path of `base`. */ | |
419 static size_t | |
420 write_rel_path(SerdSink sink, | |
421 void* stream, | |
422 const SerdURI* uri, | |
423 const SerdURI* base) | |
424 { | |
425 const size_t path_len = uri_path_len(uri); | |
426 const size_t base_len = uri_path_len(base); | |
427 const size_t min_len = (path_len < base_len) ? path_len : base_len; | |
428 | |
429 // Find the last separator common to both paths | |
430 size_t last_shared_sep = 0; | |
431 size_t i = 0; | |
432 for (; i < min_len && uri_path_at(uri, i) == uri_path_at(base, i); ++i) { | |
433 if (uri_path_at(uri, i) == '/') { | |
434 last_shared_sep = i; | |
435 } | |
436 } | |
437 | |
438 if (i == path_len && i == base_len) { // Paths are identical | |
439 return 0; | |
440 } else if (last_shared_sep == 0) { // No common components | |
441 return write_path_tail(sink, stream, uri, 0); | |
442 } | |
443 | |
444 // Find the number of up references ("..") required | |
445 size_t up = 0; | |
446 for (size_t s = last_shared_sep + 1; s < base_len; ++s) { | |
447 if (uri_path_at(base, s) == '/') { | |
448 ++up; | |
449 } | |
450 } | |
451 | |
452 // Write up references | |
453 size_t len = 0; | |
454 for (size_t u = 0; u < up; ++u) { | |
455 len += sink("../", 3, stream); | |
456 } | |
457 | |
458 // Write suffix | |
459 return len += write_path_tail(sink, stream, uri, last_shared_sep + 1); | |
460 } | |
461 | |
462 /// See http://tools.ietf.org/html/rfc3986#section-5.3 | |
463 SERD_API | |
464 size_t | |
465 serd_uri_serialise_relative(const SerdURI* uri, | |
466 const SerdURI* base, | |
467 const SerdURI* root, | |
468 SerdSink sink, | |
469 void* stream) | |
470 { | |
471 size_t len = 0; | |
472 const bool relative = uri_is_under(uri, root ? root : base); | |
473 if (relative) { | |
474 len = write_rel_path(sink, stream, uri, base); | |
475 } | |
476 if (!relative || (!len && base->query.buf)) { | |
477 if (uri->scheme.buf) { | |
478 len += sink(uri->scheme.buf, uri->scheme.len, stream); | |
479 len += sink(":", 1, stream); | |
480 } | |
481 if (uri->authority.buf) { | |
482 len += sink("//", 2, stream); | |
483 len += sink(uri->authority.buf, uri->authority.len, stream); | |
484 } | |
485 len += write_path_tail(sink, stream, uri, 0); | |
486 } | |
487 if (uri->query.buf) { | |
488 len += sink("?", 1, stream); | |
489 len += sink(uri->query.buf, uri->query.len, stream); | |
490 } | |
491 if (uri->fragment.buf) { | |
492 // Note uri->fragment.buf includes the leading `#' | |
493 len += sink(uri->fragment.buf, uri->fragment.len, stream); | |
494 } | |
495 return len; | |
496 } | |
497 | |
498 /// See http://tools.ietf.org/html/rfc3986#section-5.3 | |
499 SERD_API | |
500 size_t | |
501 serd_uri_serialise(const SerdURI* uri, SerdSink sink, void* stream) | |
502 { | |
503 return serd_uri_serialise_relative(uri, NULL, NULL, sink, stream); | |
504 } |