Mercurial > hg > piper-cpp
comparison ext/serd/src/reader.c @ 226:c5cdc9e6a4bf
Add these external library files
author | Chris Cannam <cannam@all-day-breakfast.com> |
---|---|
date | Fri, 09 Jun 2017 16:41:31 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
225:025b3e2f7c17 | 226:c5cdc9e6a4bf |
---|---|
1 /* | |
2 Copyright 2011-2017 David Robillard <http://drobilla.net> | |
3 | |
4 Permission to use, copy, modify, and/or distribute this software for any | |
5 purpose with or without fee is hereby granted, provided that the above | |
6 copyright notice and this permission notice appear in all copies. | |
7 | |
8 THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |
9 WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |
10 MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |
11 ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |
12 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |
13 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | |
14 OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |
15 */ | |
16 | |
17 #include "serd_internal.h" | |
18 | |
19 #include <assert.h> | |
20 #include <ctype.h> | |
21 #include <errno.h> | |
22 #include <stdarg.h> | |
23 #include <stdint.h> | |
24 #include <stdio.h> | |
25 #include <stdlib.h> | |
26 #include <string.h> | |
27 | |
28 #define NS_XSD "http://www.w3.org/2001/XMLSchema#" | |
29 #define NS_RDF "http://www.w3.org/1999/02/22-rdf-syntax-ns#" | |
30 | |
31 #define TRY_THROW(exp) if (!(exp)) goto except; | |
32 #define TRY_RET(exp) if (!(exp)) return 0; | |
33 | |
34 #ifdef SERD_STACK_CHECK | |
35 # define SERD_STACK_ASSERT_TOP(reader, ref) \ | |
36 assert(ref == reader->allocs[reader->n_allocs - 1]); | |
37 #else | |
38 # define SERD_STACK_ASSERT_TOP(reader, ref) | |
39 #endif | |
40 | |
41 typedef struct { | |
42 const uint8_t* filename; | |
43 unsigned line; | |
44 unsigned col; | |
45 } Cursor; | |
46 | |
47 typedef uint32_t uchar; | |
48 | |
49 /* Reference to a node in the stack (we can not use pointers since the | |
50 stack may be reallocated, invalidating any pointers to elements). | |
51 */ | |
52 typedef size_t Ref; | |
53 | |
54 typedef struct { | |
55 Ref graph; | |
56 Ref subject; | |
57 Ref predicate; | |
58 Ref object; | |
59 Ref datatype; | |
60 Ref lang; | |
61 SerdStatementFlags* flags; | |
62 } ReadContext; | |
63 | |
64 struct SerdReaderImpl { | |
65 void* handle; | |
66 void (*free_handle)(void* ptr); | |
67 SerdBaseSink base_sink; | |
68 SerdPrefixSink prefix_sink; | |
69 SerdStatementSink statement_sink; | |
70 SerdEndSink end_sink; | |
71 SerdErrorSink error_sink; | |
72 void* error_handle; | |
73 Ref rdf_first; | |
74 Ref rdf_rest; | |
75 Ref rdf_nil; | |
76 SerdNode default_graph; | |
77 SerdByteSource source; | |
78 SerdStack stack; | |
79 SerdSyntax syntax; | |
80 unsigned next_id; | |
81 Cursor cur; | |
82 SerdStatus status; | |
83 uint8_t* buf; | |
84 uint8_t* bprefix; | |
85 size_t bprefix_len; | |
86 bool strict; ///< True iff strict parsing | |
87 bool eof; | |
88 bool seen_genid; | |
89 #ifdef SERD_STACK_CHECK | |
90 Ref* allocs; ///< Stack of push offsets | |
91 size_t n_allocs; ///< Number of stack pushes | |
92 #endif | |
93 }; | |
94 | |
95 static inline bool | |
96 supports_fancy_literals(const SerdReader* reader) | |
97 { | |
98 return reader->syntax == SERD_TURTLE || reader->syntax == SERD_TRIG; | |
99 } | |
100 | |
101 static inline bool | |
102 supports_relative_iris(const SerdReader* reader) | |
103 { | |
104 return reader->syntax == SERD_TURTLE || reader->syntax == SERD_TRIG; | |
105 } | |
106 | |
107 static int | |
108 r_err(SerdReader* reader, SerdStatus st, const char* fmt, ...) | |
109 { | |
110 va_list args; | |
111 va_start(args, fmt); | |
112 const SerdError e = { | |
113 st, reader->cur.filename, reader->cur.line, reader->cur.col, fmt, &args | |
114 }; | |
115 serd_error(reader->error_sink, reader->error_handle, &e); | |
116 va_end(args); | |
117 return 0; | |
118 } | |
119 | |
120 /** fread-like wrapper for getc (which is faster). */ | |
121 static size_t | |
122 serd_file_read_byte(void* buf, size_t size, size_t nmemb, void* stream) | |
123 { | |
124 const int c = getc((FILE*)stream); | |
125 if (c == EOF) { | |
126 *((uint8_t*)buf) = 0; | |
127 return 0; | |
128 } | |
129 *((uint8_t*)buf) = (uint8_t)c; | |
130 return 1; | |
131 } | |
132 | |
133 static inline uint8_t | |
134 peek_byte(SerdReader* reader) | |
135 { | |
136 return serd_byte_source_peek(&reader->source); | |
137 } | |
138 | |
139 static inline uint8_t | |
140 eat_byte_safe(SerdReader* reader, const uint8_t byte) | |
141 { | |
142 assert(peek_byte(reader) == byte); | |
143 switch (byte) { | |
144 case '\0': reader->eof = (byte != '\0'); break; | |
145 case '\n': ++reader->cur.line; reader->cur.col = 0; break; | |
146 default: ++reader->cur.col; | |
147 } | |
148 | |
149 reader->status = serd_byte_source_advance(&reader->source); | |
150 return byte; | |
151 } | |
152 | |
153 static inline uint8_t | |
154 eat_byte_check(SerdReader* reader, const uint8_t byte) | |
155 { | |
156 const uint8_t c = peek_byte(reader); | |
157 if (c != byte) { | |
158 return r_err(reader, SERD_ERR_BAD_SYNTAX, | |
159 "expected `%c', not `%c'\n", byte, c); | |
160 } | |
161 return eat_byte_safe(reader, byte); | |
162 } | |
163 | |
164 static inline bool | |
165 eat_string(SerdReader* reader, const char* str, unsigned n) | |
166 { | |
167 bool bad = false; | |
168 for (unsigned i = 0; i < n; ++i) { | |
169 bad |= eat_byte_check(reader, ((const uint8_t*)str)[i]); | |
170 } | |
171 return bad; | |
172 } | |
173 | |
174 static Ref | |
175 push_node_padded(SerdReader* reader, size_t maxlen, | |
176 SerdType type, const char* str, size_t n_bytes) | |
177 { | |
178 void* mem = serd_stack_push_aligned( | |
179 &reader->stack, sizeof(SerdNode) + maxlen + 1, sizeof(SerdNode)); | |
180 | |
181 SerdNode* const node = (SerdNode*)mem; | |
182 node->n_bytes = node->n_chars = n_bytes; | |
183 node->flags = 0; | |
184 node->type = type; | |
185 node->buf = NULL; | |
186 | |
187 uint8_t* buf = (uint8_t*)(node + 1); | |
188 memcpy(buf, str, n_bytes + 1); | |
189 | |
190 #ifdef SERD_STACK_CHECK | |
191 reader->allocs = realloc( | |
192 reader->allocs, sizeof(uint8_t*) * (++reader->n_allocs)); | |
193 reader->allocs[reader->n_allocs - 1] = ((uint8_t*)mem - reader->stack.buf); | |
194 #endif | |
195 return (uint8_t*)node - reader->stack.buf; | |
196 } | |
197 | |
198 static Ref | |
199 push_node(SerdReader* reader, SerdType type, const char* str, size_t n_bytes) | |
200 { | |
201 return push_node_padded(reader, n_bytes, type, str, n_bytes); | |
202 } | |
203 | |
204 static inline SerdNode* | |
205 deref(SerdReader* reader, const Ref ref) | |
206 { | |
207 if (ref) { | |
208 SerdNode* node = (SerdNode*)(reader->stack.buf + ref); | |
209 node->buf = (uint8_t*)node + sizeof(SerdNode); | |
210 return node; | |
211 } | |
212 return NULL; | |
213 } | |
214 | |
215 static inline void | |
216 push_byte(SerdReader* reader, Ref ref, const uint8_t c) | |
217 { | |
218 SERD_STACK_ASSERT_TOP(reader, ref); | |
219 uint8_t* const s = serd_stack_push(&reader->stack, 1); | |
220 SerdNode* const node = (SerdNode*)(reader->stack.buf + ref); | |
221 ++node->n_bytes; | |
222 if (!(c & 0x80)) { // Starts with 0 bit, start of new character | |
223 ++node->n_chars; | |
224 } | |
225 *(s - 1) = c; | |
226 *s = '\0'; | |
227 } | |
228 | |
229 static inline void | |
230 push_replacement(SerdReader* reader, Ref dest) | |
231 { | |
232 push_byte(reader, dest, 0xEF); | |
233 push_byte(reader, dest, 0xBF); | |
234 push_byte(reader, dest, 0xBD); | |
235 } | |
236 | |
237 static Ref | |
238 pop_node(SerdReader* reader, Ref ref) | |
239 { | |
240 if (ref && ref != reader->rdf_first && ref != reader->rdf_rest | |
241 && ref != reader->rdf_nil) { | |
242 #ifdef SERD_STACK_CHECK | |
243 SERD_STACK_ASSERT_TOP(reader, ref); | |
244 --reader->n_allocs; | |
245 #endif | |
246 SerdNode* const node = deref(reader, ref); | |
247 uint8_t* const top = reader->stack.buf + reader->stack.size; | |
248 serd_stack_pop_aligned(&reader->stack, top - (uint8_t*)node); | |
249 } | |
250 return 0; | |
251 } | |
252 | |
253 static inline bool | |
254 emit_statement(SerdReader* reader, ReadContext ctx, Ref o, Ref d, Ref l) | |
255 { | |
256 SerdNode* graph = deref(reader, ctx.graph); | |
257 if (!graph && reader->default_graph.buf) { | |
258 graph = &reader->default_graph; | |
259 } | |
260 bool ret = !reader->statement_sink || | |
261 !reader->statement_sink( | |
262 reader->handle, *ctx.flags, graph, | |
263 deref(reader, ctx.subject), deref(reader, ctx.predicate), | |
264 deref(reader, o), deref(reader, d), deref(reader, l)); | |
265 *ctx.flags &= SERD_ANON_CONT|SERD_LIST_CONT; // Preserve only cont flags | |
266 return ret; | |
267 } | |
268 | |
269 static bool | |
270 read_collection(SerdReader* reader, ReadContext ctx, Ref* dest); | |
271 | |
272 static bool | |
273 read_predicateObjectList(SerdReader* reader, ReadContext ctx, bool* ate_dot); | |
274 | |
275 static inline uint8_t | |
276 read_HEX(SerdReader* reader) | |
277 { | |
278 const uint8_t c = peek_byte(reader); | |
279 if (is_digit(c) || in_range(c, 'A', 'F') || in_range(c, 'a', 'f')) { | |
280 return eat_byte_safe(reader, c); | |
281 } else { | |
282 return r_err(reader, SERD_ERR_BAD_SYNTAX, | |
283 "invalid hexadecimal digit `%c'\n", c); | |
284 } | |
285 } | |
286 | |
287 // Read UCHAR escape, initial \ is already eaten by caller | |
288 static inline bool | |
289 read_UCHAR(SerdReader* reader, Ref dest, uint32_t* char_code) | |
290 { | |
291 const uint8_t b = peek_byte(reader); | |
292 unsigned length = 0; | |
293 switch (b) { | |
294 case 'U': | |
295 length = 8; | |
296 break; | |
297 case 'u': | |
298 length = 4; | |
299 break; | |
300 default: | |
301 return false; | |
302 } | |
303 eat_byte_safe(reader, b); | |
304 | |
305 uint8_t buf[9] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }; | |
306 for (unsigned i = 0; i < length; ++i) { | |
307 if (!(buf[i] = read_HEX(reader))) { | |
308 return false; | |
309 } | |
310 } | |
311 | |
312 uint32_t code; | |
313 sscanf((const char*)buf, "%X", &code); | |
314 | |
315 unsigned size = 0; | |
316 if (code < 0x00000080) { | |
317 size = 1; | |
318 } else if (code < 0x00000800) { | |
319 size = 2; | |
320 } else if (code < 0x00010000) { | |
321 size = 3; | |
322 } else if (code < 0x00110000) { | |
323 size = 4; | |
324 } else { | |
325 r_err(reader, SERD_ERR_BAD_SYNTAX, | |
326 "unicode character 0x%X out of range\n", code); | |
327 push_replacement(reader, dest); | |
328 *char_code = 0xFFFD; | |
329 return true; | |
330 } | |
331 | |
332 // Build output in buf | |
333 // (Note # of bytes = # of leading 1 bits in first byte) | |
334 uint32_t c = code; | |
335 switch (size) { | |
336 case 4: | |
337 buf[3] = 0x80 | (uint8_t)(c & 0x3F); | |
338 c >>= 6; | |
339 c |= (16 << 12); // set bit 4 | |
340 case 3: | |
341 buf[2] = 0x80 | (uint8_t)(c & 0x3F); | |
342 c >>= 6; | |
343 c |= (32 << 6); // set bit 5 | |
344 case 2: | |
345 buf[1] = 0x80 | (uint8_t)(c & 0x3F); | |
346 c >>= 6; | |
347 c |= 0xC0; // set bits 6 and 7 | |
348 case 1: | |
349 buf[0] = (uint8_t)c; | |
350 } | |
351 | |
352 for (unsigned i = 0; i < size; ++i) { | |
353 push_byte(reader, dest, buf[i]); | |
354 } | |
355 *char_code = code; | |
356 return true; | |
357 } | |
358 | |
359 // Read ECHAR escape, initial \ is already eaten by caller | |
360 static inline bool | |
361 read_ECHAR(SerdReader* reader, Ref dest, SerdNodeFlags* flags) | |
362 { | |
363 const uint8_t c = peek_byte(reader); | |
364 switch (c) { | |
365 case 't': | |
366 eat_byte_safe(reader, 't'); | |
367 push_byte(reader, dest, '\t'); | |
368 return true; | |
369 case 'b': | |
370 eat_byte_safe(reader, 'b'); | |
371 push_byte(reader, dest, '\b'); | |
372 return true; | |
373 case 'n': | |
374 *flags |= SERD_HAS_NEWLINE; | |
375 eat_byte_safe(reader, 'n'); | |
376 push_byte(reader, dest, '\n'); | |
377 return true; | |
378 case 'r': | |
379 *flags |= SERD_HAS_NEWLINE; | |
380 eat_byte_safe(reader, 'r'); | |
381 push_byte(reader, dest, '\r'); | |
382 return true; | |
383 case 'f': | |
384 eat_byte_safe(reader, 'f'); | |
385 push_byte(reader, dest, '\f'); | |
386 return true; | |
387 case '\\': case '"': case '\'': | |
388 push_byte(reader, dest, eat_byte_safe(reader, c)); | |
389 return true; | |
390 default: | |
391 return false; | |
392 } | |
393 } | |
394 | |
395 static inline SerdStatus | |
396 bad_char(SerdReader* reader, Ref dest, const char* fmt, uint8_t c) | |
397 { | |
398 r_err(reader, SERD_ERR_BAD_SYNTAX, fmt, c); | |
399 push_replacement(reader, dest); | |
400 | |
401 // Skip bytes until the next start byte | |
402 for (uint8_t b = peek_byte(reader); (b & 0x80);) { | |
403 eat_byte_safe(reader, b); | |
404 b = peek_byte(reader); | |
405 } | |
406 | |
407 return SERD_SUCCESS; | |
408 } | |
409 | |
410 static SerdStatus | |
411 read_utf8_character(SerdReader* reader, Ref dest, uint8_t c) | |
412 { | |
413 unsigned size = 1; | |
414 if ((c & 0xE0) == 0xC0) { // Starts with `110' | |
415 size = 2; | |
416 } else if ((c & 0xF0) == 0xE0) { // Starts with `1110' | |
417 size = 3; | |
418 } else if ((c & 0xF8) == 0xF0) { // Starts with `11110' | |
419 size = 4; | |
420 } else { | |
421 return bad_char(reader, dest, "invalid UTF-8 start 0x%X\n", c); | |
422 } | |
423 | |
424 char bytes[4]; | |
425 bytes[0] = c; | |
426 | |
427 // Check character validity | |
428 for (unsigned i = 1; i < size; ++i) { | |
429 if (((bytes[i] = peek_byte(reader)) & 0x80) == 0) { | |
430 return bad_char(reader, dest, "invalid UTF-8 continuation 0x%X\n", | |
431 bytes[i]); | |
432 } | |
433 eat_byte_safe(reader, bytes[i]); | |
434 } | |
435 | |
436 // Emit character | |
437 for (unsigned i = 0; i < size; ++i) { | |
438 push_byte(reader, dest, bytes[i]); | |
439 } | |
440 return SERD_SUCCESS; | |
441 } | |
442 | |
443 // Read one character (possibly multi-byte) | |
444 // The first byte, c, has already been eaten by caller | |
445 static inline SerdStatus | |
446 read_character(SerdReader* reader, Ref dest, SerdNodeFlags* flags, uint8_t c) | |
447 { | |
448 if (!(c & 0x80)) { | |
449 switch (c) { | |
450 case 0xA: case 0xD: | |
451 *flags |= SERD_HAS_NEWLINE; | |
452 break; | |
453 case '"': case '\'': | |
454 *flags |= SERD_HAS_QUOTE; | |
455 break; | |
456 } | |
457 push_byte(reader, dest, c); | |
458 return SERD_SUCCESS; | |
459 } else { | |
460 return read_utf8_character(reader, dest, c); | |
461 } | |
462 } | |
463 | |
464 // [10] comment ::= '#' ( [^#xA #xD] )* | |
465 static void | |
466 read_comment(SerdReader* reader) | |
467 { | |
468 eat_byte_safe(reader, '#'); | |
469 uint8_t c; | |
470 while (((c = peek_byte(reader)) != 0xA) && (c != 0xD) && c) { | |
471 eat_byte_safe(reader, c); | |
472 } | |
473 } | |
474 | |
475 // [24] ws ::= #x9 | #xA | #xD | #x20 | comment | |
476 static inline bool | |
477 read_ws(SerdReader* reader) | |
478 { | |
479 const uint8_t c = peek_byte(reader); | |
480 switch (c) { | |
481 case 0x9: case 0xA: case 0xD: case 0x20: | |
482 eat_byte_safe(reader, c); | |
483 return true; | |
484 case '#': | |
485 read_comment(reader); | |
486 return true; | |
487 default: | |
488 return false; | |
489 } | |
490 } | |
491 | |
492 static inline bool | |
493 read_ws_star(SerdReader* reader) | |
494 { | |
495 while (read_ws(reader)) {} | |
496 return true; | |
497 } | |
498 | |
499 static inline bool | |
500 peek_delim(SerdReader* reader, const char delim) | |
501 { | |
502 read_ws_star(reader); | |
503 return peek_byte(reader) == delim; | |
504 } | |
505 | |
506 static inline bool | |
507 eat_delim(SerdReader* reader, const char delim) | |
508 { | |
509 if (peek_delim(reader, delim)) { | |
510 eat_byte_safe(reader, delim); | |
511 return read_ws_star(reader); | |
512 } | |
513 return false; | |
514 } | |
515 | |
516 // STRING_LITERAL_LONG_QUOTE and STRING_LITERAL_LONG_SINGLE_QUOTE | |
517 // Initial triple quotes are already eaten by caller | |
518 static Ref | |
519 read_STRING_LITERAL_LONG(SerdReader* reader, SerdNodeFlags* flags, uint8_t q) | |
520 { | |
521 Ref ref = push_node(reader, SERD_LITERAL, "", 0); | |
522 while (true) { | |
523 const uint8_t c = peek_byte(reader); | |
524 uint32_t code; | |
525 switch (c) { | |
526 case '\\': | |
527 eat_byte_safe(reader, c); | |
528 if (!read_ECHAR(reader, ref, flags) && | |
529 !read_UCHAR(reader, ref, &code)) { | |
530 r_err(reader, SERD_ERR_BAD_SYNTAX, | |
531 "invalid escape `\\%c'\n", peek_byte(reader)); | |
532 return pop_node(reader, ref); | |
533 } | |
534 break; | |
535 default: | |
536 if (c == q) { | |
537 eat_byte_safe(reader, q); | |
538 const uint8_t q2 = eat_byte_safe(reader, peek_byte(reader)); | |
539 const uint8_t q3 = peek_byte(reader); | |
540 if (q2 == q && q3 == q) { // End of string | |
541 eat_byte_safe(reader, q3); | |
542 return ref; | |
543 } else { | |
544 *flags |= SERD_HAS_QUOTE; | |
545 push_byte(reader, ref, c); | |
546 read_character(reader, ref, flags, q2); | |
547 } | |
548 } else { | |
549 read_character(reader, ref, flags, eat_byte_safe(reader, c)); | |
550 } | |
551 } | |
552 } | |
553 return ref; | |
554 } | |
555 | |
556 // STRING_LITERAL_QUOTE and STRING_LITERAL_SINGLE_QUOTE | |
557 // Initial quote is already eaten by caller | |
558 static Ref | |
559 read_STRING_LITERAL(SerdReader* reader, SerdNodeFlags* flags, uint8_t q) | |
560 { | |
561 Ref ref = push_node(reader, SERD_LITERAL, "", 0); | |
562 while (true) { | |
563 const uint8_t c = peek_byte(reader); | |
564 uint32_t code; | |
565 switch (c) { | |
566 case '\n': case '\r': | |
567 r_err(reader, SERD_ERR_BAD_SYNTAX, "line end in short string\n"); | |
568 return pop_node(reader, ref); | |
569 case '\\': | |
570 eat_byte_safe(reader, c); | |
571 if (!read_ECHAR(reader, ref, flags) && | |
572 !read_UCHAR(reader, ref, &code)) { | |
573 r_err(reader, SERD_ERR_BAD_SYNTAX, | |
574 "invalid escape `\\%c'\n", peek_byte(reader)); | |
575 return pop_node(reader, ref); | |
576 } | |
577 break; | |
578 default: | |
579 if (c == q) { | |
580 eat_byte_check(reader, q); | |
581 return ref; | |
582 } else { | |
583 read_character(reader, ref, flags, eat_byte_safe(reader, c)); | |
584 } | |
585 } | |
586 } | |
587 eat_byte_check(reader, q); | |
588 return ref; | |
589 } | |
590 | |
591 static Ref | |
592 read_String(SerdReader* reader, SerdNodeFlags* flags) | |
593 { | |
594 const uint8_t q1 = peek_byte(reader); | |
595 eat_byte_safe(reader, q1); | |
596 | |
597 const uint8_t q2 = peek_byte(reader); | |
598 if (q2 != q1) { // Short string (not triple quoted) | |
599 return read_STRING_LITERAL(reader, flags, q1); | |
600 } | |
601 | |
602 eat_byte_safe(reader, q2); | |
603 const uint8_t q3 = peek_byte(reader); | |
604 if (q3 != q1) { // Empty short string ("" or '') | |
605 return push_node(reader, SERD_LITERAL, "", 0); | |
606 } | |
607 | |
608 if (!supports_fancy_literals(reader)) { | |
609 return r_err(reader, SERD_ERR_BAD_SYNTAX, | |
610 "syntax does not support long literals\n"); | |
611 } | |
612 | |
613 eat_byte_safe(reader, q3); | |
614 return read_STRING_LITERAL_LONG(reader, flags, q1); | |
615 } | |
616 | |
617 static bool | |
618 read_PN_CHARS_BASE(SerdReader* reader, Ref dest) | |
619 { | |
620 const uint8_t c = peek_byte(reader); | |
621 if ((c & 0x80)) { // Multi-byte character | |
622 return !read_utf8_character(reader, dest, eat_byte_safe(reader, c)); | |
623 } | |
624 if (is_alpha(c)) { | |
625 push_byte(reader, dest, eat_byte_safe(reader, c)); | |
626 return true; | |
627 } | |
628 return false; | |
629 } | |
630 | |
631 static bool | |
632 read_PN_CHARS(SerdReader* reader, Ref dest) | |
633 { | |
634 const uint8_t c = peek_byte(reader); | |
635 if ((c & 0x80)) { // Multi-byte character | |
636 return !read_utf8_character(reader, dest, eat_byte_safe(reader, c)); | |
637 } | |
638 | |
639 if (is_alpha(c) || is_digit(c) || c == '_' || c == '-') { | |
640 push_byte(reader, dest, eat_byte_safe(reader, c)); | |
641 return true; | |
642 } | |
643 return false; | |
644 } | |
645 | |
646 static bool | |
647 read_PERCENT(SerdReader* reader, Ref dest) | |
648 { | |
649 push_byte(reader, dest, eat_byte_safe(reader, '%')); | |
650 const uint8_t h1 = read_HEX(reader); | |
651 const uint8_t h2 = read_HEX(reader); | |
652 if (h1 && h2) { | |
653 push_byte(reader, dest, h1); | |
654 push_byte(reader, dest, h2); | |
655 return true; | |
656 } | |
657 return false; | |
658 } | |
659 | |
660 static SerdStatus | |
661 read_PLX(SerdReader* reader, Ref dest) | |
662 { | |
663 uint8_t c = peek_byte(reader); | |
664 switch (c) { | |
665 case '%': | |
666 if (!read_PERCENT(reader, dest)) { | |
667 return SERD_ERR_BAD_SYNTAX; | |
668 } | |
669 return SERD_SUCCESS; | |
670 case '\\': | |
671 eat_byte_safe(reader, c); | |
672 if (is_alpha(c = peek_byte(reader))) { | |
673 // Escapes like \u \n etc. are not supported | |
674 return SERD_ERR_BAD_SYNTAX; | |
675 } else { | |
676 // Allow escaping of pretty much any other character | |
677 push_byte(reader, dest, eat_byte_safe(reader, c)); | |
678 return SERD_SUCCESS; | |
679 } | |
680 default: | |
681 return SERD_FAILURE; | |
682 } | |
683 } | |
684 | |
685 static SerdStatus | |
686 read_PN_LOCAL(SerdReader* reader, Ref dest, bool* ate_dot) | |
687 { | |
688 uint8_t c = peek_byte(reader); | |
689 SerdStatus st; | |
690 switch (c) { | |
691 case '0': case '1': case '2': case '3': case '4': case '5': | |
692 case '6': case '7': case '8': case '9': case ':': case '_': | |
693 push_byte(reader, dest, eat_byte_safe(reader, c)); | |
694 break; | |
695 default: | |
696 if ((st = read_PLX(reader, dest)) > SERD_FAILURE) { | |
697 return st; | |
698 } else if (st != SERD_SUCCESS && !read_PN_CHARS_BASE(reader, dest)) { | |
699 return SERD_FAILURE; | |
700 } | |
701 } | |
702 | |
703 while ((c = peek_byte(reader))) { // Middle: (PN_CHARS | '.' | ';')* | |
704 if (c == '.' || c == ':') { | |
705 push_byte(reader, dest, eat_byte_safe(reader, c)); | |
706 } else if ((st = read_PLX(reader, dest)) > SERD_FAILURE) { | |
707 return st; | |
708 } else if (st != SERD_SUCCESS && !read_PN_CHARS(reader, dest)) { | |
709 break; | |
710 } | |
711 } | |
712 | |
713 SerdNode* const n = deref(reader, dest); | |
714 if (n->buf[n->n_bytes - 1] == '.') { | |
715 // Ate trailing dot, pop it from stack/node and inform caller | |
716 --n->n_bytes; | |
717 serd_stack_pop(&reader->stack, 1); | |
718 *ate_dot = true; | |
719 } | |
720 | |
721 return SERD_SUCCESS; | |
722 } | |
723 | |
724 // Read the remainder of a PN_PREFIX after some initial characters | |
725 static SerdStatus | |
726 read_PN_PREFIX_tail(SerdReader* reader, Ref dest) | |
727 { | |
728 uint8_t c; | |
729 while ((c = peek_byte(reader))) { // Middle: (PN_CHARS | '.')* | |
730 if (c == '.') { | |
731 push_byte(reader, dest, eat_byte_safe(reader, c)); | |
732 } else if (!read_PN_CHARS(reader, dest)) { | |
733 break; | |
734 } | |
735 } | |
736 | |
737 const SerdNode* const n = deref(reader, dest); | |
738 if (n->buf[n->n_bytes - 1] == '.' && !read_PN_CHARS(reader, dest)) { | |
739 r_err(reader, SERD_ERR_BAD_SYNTAX, "prefix ends with `.'\n"); | |
740 return SERD_ERR_BAD_SYNTAX; | |
741 } | |
742 | |
743 return SERD_SUCCESS; | |
744 } | |
745 | |
746 static SerdStatus | |
747 read_PN_PREFIX(SerdReader* reader, Ref dest) | |
748 { | |
749 if (read_PN_CHARS_BASE(reader, dest)) { | |
750 return read_PN_PREFIX_tail(reader, dest); | |
751 } | |
752 return SERD_FAILURE; | |
753 } | |
754 | |
755 static Ref | |
756 read_LANGTAG(SerdReader* reader) | |
757 { | |
758 uint8_t c = peek_byte(reader); | |
759 if (!is_alpha(c)) { | |
760 return r_err(reader, SERD_ERR_BAD_SYNTAX, "unexpected `%c'\n", c); | |
761 } | |
762 Ref ref = push_node(reader, SERD_LITERAL, "", 0); | |
763 push_byte(reader, ref, eat_byte_safe(reader, c)); | |
764 while ((c = peek_byte(reader)) && is_alpha(c)) { | |
765 push_byte(reader, ref, eat_byte_safe(reader, c)); | |
766 } | |
767 while (peek_byte(reader) == '-') { | |
768 push_byte(reader, ref, eat_byte_safe(reader, '-')); | |
769 while ((c = peek_byte(reader)) && (is_alpha(c) || is_digit(c))) { | |
770 push_byte(reader, ref, eat_byte_safe(reader, c)); | |
771 } | |
772 } | |
773 return ref; | |
774 } | |
775 | |
776 typedef enum { PREFIX, GOOD, BAD} SchemeState; | |
777 | |
778 static inline bool | |
779 check_scheme(SerdReader* reader, uint8_t c, SchemeState* state) | |
780 { | |
781 if (!supports_relative_iris(reader) && *state == PREFIX) { | |
782 if (c == ':') { | |
783 *state = GOOD; | |
784 } else if (!isalpha(c)) { | |
785 *state = BAD; | |
786 return r_err(reader, SERD_ERR_BAD_SYNTAX, | |
787 "syntax does not support relative IRIs\n"); | |
788 } | |
789 } | |
790 return true; | |
791 } | |
792 | |
793 static Ref | |
794 read_IRIREF(SerdReader* reader) | |
795 { | |
796 TRY_RET(eat_byte_check(reader, '<')); | |
797 Ref ref = push_node(reader, SERD_URI, "", 0); | |
798 SchemeState scheme = PREFIX; | |
799 uint32_t code; | |
800 while (true) { | |
801 const uint8_t c = peek_byte(reader); | |
802 if (!check_scheme(reader, c, &scheme)) { | |
803 return pop_node(reader, ref); | |
804 } | |
805 switch (c) { | |
806 case '"': case '<': case '^': case '`': case '{': case '|': case '}': | |
807 r_err(reader, SERD_ERR_BAD_SYNTAX, | |
808 "invalid IRI character `%c'\n", c); | |
809 return pop_node(reader, ref); | |
810 case '>': | |
811 eat_byte_safe(reader, c); | |
812 return ref; | |
813 case '\\': | |
814 eat_byte_safe(reader, c); | |
815 if (!read_UCHAR(reader, ref, &code)) { | |
816 r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid IRI escape\n"); | |
817 return pop_node(reader, ref); | |
818 } | |
819 switch (code) { | |
820 case 0: case ' ': case '<': case '>': | |
821 r_err(reader, SERD_ERR_BAD_SYNTAX, | |
822 "invalid escaped IRI character %X %c\n", code, code); | |
823 return pop_node(reader, ref); | |
824 } | |
825 break; | |
826 default: | |
827 if (c <= 0x20) { | |
828 if (isprint(c)) { | |
829 r_err(reader, SERD_ERR_BAD_SYNTAX, | |
830 "invalid IRI character `%c' (escape %%%02X)\n", c, c); | |
831 } else { | |
832 r_err(reader, SERD_ERR_BAD_SYNTAX, | |
833 "invalid IRI character (escape %%%02X)\n", c, c); | |
834 } | |
835 if (reader->strict) { | |
836 return pop_node(reader, ref); | |
837 } | |
838 push_byte(reader, ref, eat_byte_safe(reader, c)); | |
839 } else { | |
840 push_byte(reader, ref, eat_byte_safe(reader, c)); | |
841 } | |
842 } | |
843 } | |
844 } | |
845 | |
846 static bool | |
847 read_PrefixedName(SerdReader* reader, Ref dest, bool read_prefix, bool* ate_dot) | |
848 { | |
849 if (read_prefix && read_PN_PREFIX(reader, dest) > SERD_FAILURE) { | |
850 return false; | |
851 } else if (peek_byte(reader) != ':') { | |
852 return false; | |
853 } | |
854 | |
855 push_byte(reader, dest, eat_byte_safe(reader, ':')); | |
856 return read_PN_LOCAL(reader, dest, ate_dot) <= SERD_FAILURE; | |
857 } | |
858 | |
859 static bool | |
860 read_0_9(SerdReader* reader, Ref str, bool at_least_one) | |
861 { | |
862 unsigned count = 0; | |
863 for (uint8_t c; is_digit((c = peek_byte(reader))); ++count) { | |
864 push_byte(reader, str, eat_byte_safe(reader, c)); | |
865 } | |
866 if (at_least_one && count == 0) { | |
867 r_err(reader, SERD_ERR_BAD_SYNTAX, "expected digit\n"); | |
868 } | |
869 return count; | |
870 } | |
871 | |
872 static bool | |
873 read_number(SerdReader* reader, Ref* dest, Ref* datatype, bool* ate_dot) | |
874 { | |
875 #define XSD_DECIMAL NS_XSD "decimal" | |
876 #define XSD_DOUBLE NS_XSD "double" | |
877 #define XSD_INTEGER NS_XSD "integer" | |
878 Ref ref = push_node(reader, SERD_LITERAL, "", 0); | |
879 uint8_t c = peek_byte(reader); | |
880 bool has_decimal = false; | |
881 if (c == '-' || c == '+') { | |
882 push_byte(reader, ref, eat_byte_safe(reader, c)); | |
883 } | |
884 if ((c = peek_byte(reader)) == '.') { | |
885 has_decimal = true; | |
886 // decimal case 2 (e.g. '.0' or `-.0' or `+.0') | |
887 push_byte(reader, ref, eat_byte_safe(reader, c)); | |
888 TRY_THROW(read_0_9(reader, ref, true)); | |
889 } else { | |
890 // all other cases ::= ( '-' | '+' ) [0-9]+ ( . )? ( [0-9]+ )? ... | |
891 TRY_THROW(is_digit(c)); | |
892 read_0_9(reader, ref, true); | |
893 if ((c = peek_byte(reader)) == '.') { | |
894 has_decimal = true; | |
895 | |
896 // Annoyingly, dot can be end of statement, so tentatively eat | |
897 eat_byte_safe(reader, c); | |
898 c = peek_byte(reader); | |
899 if (!is_digit(c) && c != 'e' && c != 'E') { | |
900 *dest = ref; | |
901 *ate_dot = true; // Force caller to deal with stupid grammar | |
902 return true; // Next byte is not a number character, done | |
903 } | |
904 | |
905 push_byte(reader, ref, '.'); | |
906 read_0_9(reader, ref, false); | |
907 } | |
908 } | |
909 c = peek_byte(reader); | |
910 if (c == 'e' || c == 'E') { | |
911 // double | |
912 push_byte(reader, ref, eat_byte_safe(reader, c)); | |
913 switch ((c = peek_byte(reader))) { | |
914 case '+': case '-': | |
915 push_byte(reader, ref, eat_byte_safe(reader, c)); | |
916 default: break; | |
917 } | |
918 TRY_THROW(read_0_9(reader, ref, true)); | |
919 *datatype = push_node(reader, SERD_URI, | |
920 XSD_DOUBLE, sizeof(XSD_DOUBLE) - 1); | |
921 } else if (has_decimal) { | |
922 *datatype = push_node(reader, SERD_URI, | |
923 XSD_DECIMAL, sizeof(XSD_DECIMAL) - 1); | |
924 } else { | |
925 *datatype = push_node(reader, SERD_URI, | |
926 XSD_INTEGER, sizeof(XSD_INTEGER) - 1); | |
927 } | |
928 *dest = ref; | |
929 return true; | |
930 except: | |
931 pop_node(reader, *datatype); | |
932 pop_node(reader, ref); | |
933 return false; | |
934 } | |
935 | |
936 static bool | |
937 read_iri(SerdReader* reader, Ref* dest, bool* ate_dot) | |
938 { | |
939 switch (peek_byte(reader)) { | |
940 case '<': | |
941 *dest = read_IRIREF(reader); | |
942 return true; | |
943 default: | |
944 *dest = push_node(reader, SERD_CURIE, "", 0); | |
945 return read_PrefixedName(reader, *dest, true, ate_dot); | |
946 } | |
947 } | |
948 | |
949 static bool | |
950 read_literal(SerdReader* reader, Ref* dest, | |
951 Ref* datatype, Ref* lang, SerdNodeFlags* flags, bool* ate_dot) | |
952 { | |
953 Ref str = read_String(reader, flags); | |
954 if (!str) { | |
955 return false; | |
956 } | |
957 | |
958 switch (peek_byte(reader)) { | |
959 case '@': | |
960 eat_byte_safe(reader, '@'); | |
961 TRY_THROW(*lang = read_LANGTAG(reader)); | |
962 break; | |
963 case '^': | |
964 eat_byte_safe(reader, '^'); | |
965 eat_byte_check(reader, '^'); | |
966 TRY_THROW(read_iri(reader, datatype, ate_dot)); | |
967 break; | |
968 } | |
969 *dest = str; | |
970 return true; | |
971 except: | |
972 *datatype = pop_node(reader, *datatype); | |
973 *lang = pop_node(reader, *lang); | |
974 pop_node(reader, str); | |
975 return false; | |
976 } | |
977 | |
978 inline static bool | |
979 is_token_end(uint8_t c) | |
980 { | |
981 switch (c) { | |
982 case 0x9: case 0xA: case 0xD: case 0x20: case '\0': | |
983 case '#': case '.': case ';': case '<': | |
984 return true; | |
985 default: | |
986 return false; | |
987 } | |
988 } | |
989 | |
990 static bool | |
991 read_verb(SerdReader* reader, Ref* dest) | |
992 { | |
993 if (peek_byte(reader) == '<') { | |
994 return (*dest = read_IRIREF(reader)); | |
995 } else { | |
996 /* Either a qname, or "a". Read the prefix first, and if it is in fact | |
997 "a", produce that instead. | |
998 */ | |
999 *dest = push_node(reader, SERD_CURIE, "", 0); | |
1000 SerdNode* node = deref(reader, *dest); | |
1001 const SerdStatus st = read_PN_PREFIX(reader, *dest); | |
1002 bool ate_dot = false; | |
1003 if (!st && node->n_bytes == 1 && node->buf[0] == 'a' && | |
1004 is_token_end(peek_byte(reader))) { | |
1005 pop_node(reader, *dest); | |
1006 return (*dest = push_node(reader, SERD_URI, NS_RDF "type", 47)); | |
1007 } else if (st > SERD_FAILURE || | |
1008 !read_PrefixedName(reader, *dest, false, &ate_dot) || | |
1009 ate_dot) { | |
1010 return (*dest = pop_node(reader, *dest)); | |
1011 } else { | |
1012 return true; | |
1013 } | |
1014 } | |
1015 return false; | |
1016 } | |
1017 | |
1018 static Ref | |
1019 read_BLANK_NODE_LABEL(SerdReader* reader, bool* ate_dot) | |
1020 { | |
1021 eat_byte_safe(reader, '_'); | |
1022 eat_byte_check(reader, ':'); | |
1023 Ref ref = push_node(reader, SERD_BLANK, | |
1024 reader->bprefix ? (char*)reader->bprefix : "", | |
1025 reader->bprefix_len); | |
1026 | |
1027 uint8_t c = peek_byte(reader); // First: (PN_CHARS | '_' | [0-9]) | |
1028 if (is_digit(c) || c == '_') { | |
1029 push_byte(reader, ref, eat_byte_safe(reader, c)); | |
1030 } else if (!read_PN_CHARS(reader, ref)) { | |
1031 r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid name start character\n"); | |
1032 return pop_node(reader, ref); | |
1033 } | |
1034 | |
1035 while ((c = peek_byte(reader))) { // Middle: (PN_CHARS | '.')* | |
1036 if (c == '.') { | |
1037 push_byte(reader, ref, eat_byte_safe(reader, c)); | |
1038 } else if (!read_PN_CHARS(reader, ref)) { | |
1039 break; | |
1040 } | |
1041 } | |
1042 | |
1043 SerdNode* n = deref(reader, ref); | |
1044 if (n->buf[n->n_bytes - 1] == '.' && !read_PN_CHARS(reader, ref)) { | |
1045 // Ate trailing dot, pop it from stack/node and inform caller | |
1046 --n->n_bytes; | |
1047 serd_stack_pop(&reader->stack, 1); | |
1048 *ate_dot = true; | |
1049 } | |
1050 | |
1051 if (reader->syntax == SERD_TURTLE) { | |
1052 if (is_digit(n->buf[reader->bprefix_len + 1])) { | |
1053 if ((n->buf[reader->bprefix_len]) == 'b') { | |
1054 ((char*)n->buf)[reader->bprefix_len] = 'B'; // Prevent clash | |
1055 reader->seen_genid = true; | |
1056 } else if (reader->seen_genid && | |
1057 n->buf[reader->bprefix_len] == 'B') { | |
1058 r_err(reader, SERD_ERR_ID_CLASH, | |
1059 "found both `b' and `B' blank IDs, prefix required\n"); | |
1060 return pop_node(reader, ref); | |
1061 } | |
1062 } | |
1063 } | |
1064 return ref; | |
1065 } | |
1066 | |
1067 static void | |
1068 set_blank_id(SerdReader* reader, Ref ref, size_t buf_size) | |
1069 { | |
1070 SerdNode* node = deref(reader, ref); | |
1071 const char* prefix = reader->bprefix ? (const char*)reader->bprefix : ""; | |
1072 node->n_bytes = node->n_chars = snprintf( | |
1073 (char*)node->buf, buf_size, "%sb%u", prefix, reader->next_id++); | |
1074 } | |
1075 | |
1076 static size_t | |
1077 genid_size(SerdReader* reader) | |
1078 { | |
1079 return reader->bprefix_len + 1 + 10 + 1; // + "b" + UINT32_MAX + \0 | |
1080 } | |
1081 | |
1082 static Ref | |
1083 blank_id(SerdReader* reader) | |
1084 { | |
1085 Ref ref = push_node_padded(reader, genid_size(reader), SERD_BLANK, "", 0); | |
1086 set_blank_id(reader, ref, genid_size(reader)); | |
1087 return ref; | |
1088 } | |
1089 | |
1090 static Ref | |
1091 read_blankName(SerdReader* reader) | |
1092 { | |
1093 eat_byte_safe(reader, '='); | |
1094 if (eat_byte_check(reader, '=') != '=') { | |
1095 return r_err(reader, SERD_ERR_BAD_SYNTAX, "expected `='\n"); | |
1096 } | |
1097 | |
1098 Ref subject = 0; | |
1099 bool ate_dot = false; | |
1100 read_ws_star(reader); | |
1101 read_iri(reader, &subject, &ate_dot); | |
1102 return subject; | |
1103 } | |
1104 | |
1105 static bool | |
1106 read_anon(SerdReader* reader, ReadContext ctx, bool subject, Ref* dest) | |
1107 { | |
1108 const SerdStatementFlags old_flags = *ctx.flags; | |
1109 bool empty; | |
1110 eat_byte_safe(reader, '['); | |
1111 if ((empty = peek_delim(reader, ']'))) { | |
1112 *ctx.flags |= (subject) ? SERD_EMPTY_S : SERD_EMPTY_O; | |
1113 } else { | |
1114 *ctx.flags |= (subject) ? SERD_ANON_S_BEGIN : SERD_ANON_O_BEGIN; | |
1115 if (peek_delim(reader, '=')) { | |
1116 if (!(*dest = read_blankName(reader)) || | |
1117 !eat_delim(reader, ';')) { | |
1118 return false; | |
1119 } | |
1120 } | |
1121 } | |
1122 | |
1123 if (!*dest) { | |
1124 *dest = blank_id(reader); | |
1125 } | |
1126 if (ctx.subject) { | |
1127 TRY_RET(emit_statement(reader, ctx, *dest, 0, 0)); | |
1128 } | |
1129 | |
1130 ctx.subject = *dest; | |
1131 if (!empty) { | |
1132 *ctx.flags &= ~(SERD_LIST_CONT); | |
1133 if (!subject) { | |
1134 *ctx.flags |= SERD_ANON_CONT; | |
1135 } | |
1136 bool ate_dot_in_list = false; | |
1137 read_predicateObjectList(reader, ctx, &ate_dot_in_list); | |
1138 if (ate_dot_in_list) { | |
1139 return r_err(reader, SERD_ERR_BAD_SYNTAX, "`.' inside blank\n"); | |
1140 } | |
1141 read_ws_star(reader); | |
1142 if (reader->end_sink) { | |
1143 reader->end_sink(reader->handle, deref(reader, *dest)); | |
1144 } | |
1145 *ctx.flags = old_flags; | |
1146 } | |
1147 return (eat_byte_check(reader, ']') == ']'); | |
1148 } | |
1149 | |
1150 /* If emit is true: recurses, calling statement_sink for every statement | |
1151 encountered, and leaves stack in original calling state (i.e. pops | |
1152 everything it pushes). */ | |
1153 static bool | |
1154 read_object(SerdReader* reader, ReadContext* ctx, bool emit, bool* ate_dot) | |
1155 { | |
1156 static const char* const XSD_BOOLEAN = NS_XSD "boolean"; | |
1157 static const size_t XSD_BOOLEAN_LEN = 40; | |
1158 | |
1159 #ifndef NDEBUG | |
1160 const size_t orig_stack_size = reader->stack.size; | |
1161 #endif | |
1162 | |
1163 bool ret = false; | |
1164 bool simple = (ctx->subject != 0); | |
1165 SerdNode* node = NULL; | |
1166 Ref o = 0; | |
1167 Ref datatype = 0; | |
1168 Ref lang = 0; | |
1169 uint32_t flags = 0; | |
1170 const uint8_t c = peek_byte(reader); | |
1171 if (!supports_fancy_literals(reader)) { | |
1172 switch (c) { | |
1173 case '"': case ':': case '<': case '_': break; | |
1174 default: return r_err(reader, SERD_ERR_BAD_SYNTAX, | |
1175 "expected: ':', '<', or '_'\n"); | |
1176 } | |
1177 } | |
1178 switch (c) { | |
1179 case '\0': | |
1180 case ')': | |
1181 return false; | |
1182 case '[': | |
1183 simple = false; | |
1184 TRY_THROW(ret = read_anon(reader, *ctx, false, &o)); | |
1185 break; | |
1186 case '(': | |
1187 simple = false; | |
1188 TRY_THROW(ret = read_collection(reader, *ctx, &o)); | |
1189 break; | |
1190 case '_': | |
1191 TRY_THROW(ret = (o = read_BLANK_NODE_LABEL(reader, ate_dot))); | |
1192 break; | |
1193 case '<': case ':': | |
1194 TRY_THROW(ret = read_iri(reader, &o, ate_dot)); | |
1195 break; | |
1196 case '+': case '-': case '.': case '0': case '1': case '2': case '3': | |
1197 case '4': case '5': case '6': case '7': case '8': case '9': | |
1198 TRY_THROW(ret = read_number(reader, &o, &datatype, ate_dot)); | |
1199 break; | |
1200 case '\"': | |
1201 case '\'': | |
1202 TRY_THROW(ret = read_literal(reader, &o, &datatype, &lang, &flags, ate_dot)); | |
1203 break; | |
1204 default: | |
1205 /* Either a boolean literal, or a qname. Read the prefix first, and if | |
1206 it is in fact a "true" or "false" literal, produce that instead. | |
1207 */ | |
1208 node = deref(reader, o = push_node(reader, SERD_CURIE, "", 0)); | |
1209 while (read_PN_CHARS_BASE(reader, o)) {} | |
1210 if ((node->n_bytes == 4 && !memcmp(node->buf, "true", 4)) || | |
1211 (node->n_bytes == 5 && !memcmp(node->buf, "false", 5))) { | |
1212 node->type = SERD_LITERAL; | |
1213 datatype = push_node( | |
1214 reader, SERD_URI, XSD_BOOLEAN, XSD_BOOLEAN_LEN); | |
1215 ret = true; | |
1216 } else if (read_PN_PREFIX_tail(reader, o) > SERD_FAILURE) { | |
1217 ret = false; | |
1218 } else { | |
1219 ret = read_PrefixedName(reader, o, false, ate_dot); | |
1220 } | |
1221 } | |
1222 | |
1223 if (simple && o) { | |
1224 deref(reader, o)->flags = flags; | |
1225 } | |
1226 | |
1227 if (ret && emit && simple) { | |
1228 ret = emit_statement(reader, *ctx, o, datatype, lang); | |
1229 } else if (ret && !emit) { | |
1230 ctx->object = o; | |
1231 ctx->datatype = datatype; | |
1232 ctx->lang = lang; | |
1233 return true; | |
1234 } | |
1235 | |
1236 except: | |
1237 pop_node(reader, lang); | |
1238 pop_node(reader, datatype); | |
1239 pop_node(reader, o); | |
1240 #ifndef NDEBUG | |
1241 assert(reader->stack.size == orig_stack_size); | |
1242 #endif | |
1243 return ret; | |
1244 } | |
1245 | |
1246 static bool | |
1247 read_objectList(SerdReader* reader, ReadContext ctx, bool* ate_dot) | |
1248 { | |
1249 TRY_RET(read_object(reader, &ctx, true, ate_dot)); | |
1250 while (!*ate_dot && eat_delim(reader, ',')) { | |
1251 TRY_RET(read_object(reader, &ctx, true, ate_dot)); | |
1252 } | |
1253 return true; | |
1254 } | |
1255 | |
1256 static bool | |
1257 read_predicateObjectList(SerdReader* reader, ReadContext ctx, bool* ate_dot) | |
1258 { | |
1259 uint8_t c; | |
1260 while (true) { | |
1261 TRY_THROW(read_verb(reader, &ctx.predicate)); | |
1262 read_ws_star(reader); | |
1263 | |
1264 TRY_THROW(read_objectList(reader, ctx, ate_dot)); | |
1265 ctx.predicate = pop_node(reader, ctx.predicate); | |
1266 if (*ate_dot) { | |
1267 return true; | |
1268 } | |
1269 | |
1270 bool ate_semi = false; | |
1271 do { | |
1272 read_ws_star(reader); | |
1273 switch (c = peek_byte(reader)) { | |
1274 case 0: | |
1275 return false; | |
1276 case '.': case ']': case '}': | |
1277 return true; | |
1278 case ';': | |
1279 eat_byte_safe(reader, c); | |
1280 ate_semi = true; | |
1281 } | |
1282 } while (c == ';'); | |
1283 | |
1284 if (!ate_semi) { | |
1285 return r_err(reader, SERD_ERR_BAD_SYNTAX, "missing ';' or '.'\n"); | |
1286 } | |
1287 } | |
1288 | |
1289 pop_node(reader, ctx.predicate); | |
1290 return true; | |
1291 except: | |
1292 pop_node(reader, ctx.predicate); | |
1293 return false; | |
1294 } | |
1295 | |
1296 static bool | |
1297 end_collection(SerdReader* reader, ReadContext ctx, Ref n1, Ref n2, bool ret) | |
1298 { | |
1299 pop_node(reader, n2); | |
1300 pop_node(reader, n1); | |
1301 *ctx.flags &= ~SERD_LIST_CONT; | |
1302 return ret && (eat_byte_safe(reader, ')') == ')'); | |
1303 } | |
1304 | |
1305 static bool | |
1306 read_collection(SerdReader* reader, ReadContext ctx, Ref* dest) | |
1307 { | |
1308 eat_byte_safe(reader, '('); | |
1309 bool end = peek_delim(reader, ')'); | |
1310 *dest = end ? reader->rdf_nil : blank_id(reader); | |
1311 if (ctx.subject) { | |
1312 // subject predicate _:head | |
1313 *ctx.flags |= (end ? 0 : SERD_LIST_O_BEGIN); | |
1314 TRY_RET(emit_statement(reader, ctx, *dest, 0, 0)); | |
1315 *ctx.flags |= SERD_LIST_CONT; | |
1316 } else { | |
1317 *ctx.flags |= (end ? 0 : SERD_LIST_S_BEGIN); | |
1318 } | |
1319 | |
1320 if (end) { | |
1321 return end_collection(reader, ctx, 0, 0, true); | |
1322 } | |
1323 | |
1324 /* The order of node allocation here is necessarily not in stack order, | |
1325 so we create two nodes and recycle them throughout. */ | |
1326 Ref n1 = push_node_padded(reader, genid_size(reader), SERD_BLANK, "", 0); | |
1327 Ref n2 = 0; | |
1328 Ref node = n1; | |
1329 Ref rest = 0; | |
1330 | |
1331 ctx.subject = *dest; | |
1332 while (!(end = peek_delim(reader, ')'))) { | |
1333 // _:node rdf:first object | |
1334 ctx.predicate = reader->rdf_first; | |
1335 bool ate_dot = false; | |
1336 if (!read_object(reader, &ctx, true, &ate_dot) || ate_dot) { | |
1337 return end_collection(reader, ctx, n1, n2, false); | |
1338 } | |
1339 | |
1340 if (!(end = peek_delim(reader, ')'))) { | |
1341 /* Give rest a new ID. Done as late as possible to ensure it is | |
1342 used and > IDs generated by read_object above. */ | |
1343 if (!rest) { | |
1344 rest = n2 = blank_id(reader); // First pass, push | |
1345 } else { | |
1346 set_blank_id(reader, rest, genid_size(reader)); | |
1347 } | |
1348 } | |
1349 | |
1350 // _:node rdf:rest _:rest | |
1351 *ctx.flags |= SERD_LIST_CONT; | |
1352 ctx.predicate = reader->rdf_rest; | |
1353 TRY_RET(emit_statement(reader, ctx, | |
1354 (end ? reader->rdf_nil : rest), 0, 0)); | |
1355 | |
1356 ctx.subject = rest; // _:node = _:rest | |
1357 rest = node; // _:rest = (old)_:node | |
1358 node = ctx.subject; // invariant | |
1359 } | |
1360 | |
1361 return end_collection(reader, ctx, n1, n2, true); | |
1362 } | |
1363 | |
1364 static Ref | |
1365 read_subject(SerdReader* reader, ReadContext ctx, Ref* dest, char* s_type) | |
1366 { | |
1367 bool ate_dot = false; | |
1368 switch ((*s_type = peek_byte(reader))) { | |
1369 case '[': | |
1370 read_anon(reader, ctx, true, dest); | |
1371 break; | |
1372 case '(': | |
1373 read_collection(reader, ctx, dest); | |
1374 break; | |
1375 case '_': | |
1376 *dest = read_BLANK_NODE_LABEL(reader, &ate_dot); | |
1377 break; | |
1378 default: | |
1379 TRY_RET(read_iri(reader, dest, &ate_dot)); | |
1380 } | |
1381 return ate_dot ? pop_node(reader, *dest) : *dest; | |
1382 } | |
1383 | |
1384 static Ref | |
1385 read_labelOrSubject(SerdReader* reader, ReadContext ctx) | |
1386 { | |
1387 Ref subject = 0; | |
1388 bool ate_dot = false; | |
1389 switch (peek_byte(reader)) { | |
1390 case '[': | |
1391 eat_byte_safe(reader, '['); | |
1392 read_ws_star(reader); | |
1393 TRY_RET(eat_byte_check(reader, ']')); | |
1394 return blank_id(reader); | |
1395 case '_': | |
1396 return read_BLANK_NODE_LABEL(reader, &ate_dot); | |
1397 default: | |
1398 read_iri(reader, &subject, &ate_dot); | |
1399 } | |
1400 return subject; | |
1401 } | |
1402 | |
1403 static bool | |
1404 read_triples(SerdReader* reader, ReadContext ctx, bool* ate_dot) | |
1405 { | |
1406 bool ret = false; | |
1407 if (ctx.subject) { | |
1408 read_ws_star(reader); | |
1409 switch (peek_byte(reader)) { | |
1410 case '.': | |
1411 *ate_dot = eat_byte_safe(reader, '.'); | |
1412 return false; | |
1413 case '}': | |
1414 return false; | |
1415 } | |
1416 ret = read_predicateObjectList(reader, ctx, ate_dot); | |
1417 } | |
1418 ctx.subject = ctx.predicate = 0; | |
1419 return ret; | |
1420 } | |
1421 | |
1422 static bool | |
1423 read_base(SerdReader* reader, bool sparql, bool token) | |
1424 { | |
1425 if (token) { | |
1426 TRY_RET(eat_string(reader, "base", 4)); | |
1427 } | |
1428 | |
1429 Ref uri; | |
1430 read_ws_star(reader); | |
1431 TRY_RET(uri = read_IRIREF(reader)); | |
1432 if (reader->base_sink) { | |
1433 reader->base_sink(reader->handle, deref(reader, uri)); | |
1434 } | |
1435 pop_node(reader, uri); | |
1436 | |
1437 read_ws_star(reader); | |
1438 if (!sparql) { | |
1439 return eat_byte_check(reader, '.'); | |
1440 } else if (peek_byte(reader) == '.') { | |
1441 return r_err(reader, SERD_ERR_BAD_SYNTAX, | |
1442 "full stop after SPARQL BASE\n"); | |
1443 } | |
1444 return true; | |
1445 } | |
1446 | |
1447 static bool | |
1448 read_prefixID(SerdReader* reader, bool sparql, bool token) | |
1449 { | |
1450 if (token) { | |
1451 TRY_RET(eat_string(reader, "prefix", 6)); | |
1452 } | |
1453 | |
1454 read_ws_star(reader); | |
1455 bool ret = true; | |
1456 Ref name = push_node(reader, SERD_LITERAL, "", 0); | |
1457 if (read_PN_PREFIX(reader, name) > SERD_FAILURE) { | |
1458 return pop_node(reader, name); | |
1459 } | |
1460 | |
1461 if (eat_byte_check(reader, ':') != ':') { | |
1462 return pop_node(reader, name); | |
1463 } | |
1464 | |
1465 read_ws_star(reader); | |
1466 const Ref uri = read_IRIREF(reader); | |
1467 if (!uri) { | |
1468 pop_node(reader, name); | |
1469 return false; | |
1470 } | |
1471 | |
1472 if (reader->prefix_sink) { | |
1473 ret = !reader->prefix_sink(reader->handle, | |
1474 deref(reader, name), | |
1475 deref(reader, uri)); | |
1476 } | |
1477 pop_node(reader, uri); | |
1478 pop_node(reader, name); | |
1479 if (!sparql) { | |
1480 read_ws_star(reader); | |
1481 return eat_byte_check(reader, '.'); | |
1482 } | |
1483 return ret; | |
1484 } | |
1485 | |
1486 static bool | |
1487 read_directive(SerdReader* reader) | |
1488 { | |
1489 const bool sparql = peek_byte(reader) != '@'; | |
1490 if (!sparql) { | |
1491 eat_byte_safe(reader, '@'); | |
1492 switch (peek_byte(reader)) { | |
1493 case 'B': case 'P': | |
1494 return r_err(reader, SERD_ERR_BAD_SYNTAX, | |
1495 "uppercase directive\n"); | |
1496 } | |
1497 } | |
1498 | |
1499 switch (peek_byte(reader)) { | |
1500 case 'B': case 'b': return read_base(reader, sparql, true); | |
1501 case 'P': case 'p': return read_prefixID(reader, sparql, true); | |
1502 default: | |
1503 return r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid directive\n"); | |
1504 } | |
1505 | |
1506 return true; | |
1507 } | |
1508 | |
1509 static bool | |
1510 read_wrappedGraph(SerdReader* reader, ReadContext* ctx) | |
1511 { | |
1512 bool ate_dot = false; | |
1513 char s_type = 0; | |
1514 TRY_RET(eat_byte_check(reader, '{')); | |
1515 read_ws_star(reader); | |
1516 while (peek_byte(reader) != '}') { | |
1517 ctx->subject = 0; | |
1518 Ref subj = read_subject(reader, *ctx, &ctx->subject, &s_type); | |
1519 if (!subj || | |
1520 (!read_triples(reader, *ctx, &ate_dot) && s_type != '[')) { | |
1521 return false; | |
1522 } | |
1523 pop_node(reader, subj); | |
1524 read_ws_star(reader); | |
1525 if (peek_byte(reader) == '.') { | |
1526 eat_byte_safe(reader, '.'); | |
1527 } | |
1528 read_ws_star(reader); | |
1529 } | |
1530 return eat_byte_check(reader, '}'); | |
1531 } | |
1532 | |
1533 static int | |
1534 tokcmp(SerdReader* reader, Ref ref, const char* tok, size_t n) | |
1535 { | |
1536 SerdNode* node = deref(reader, ref); | |
1537 if (!node || node->n_bytes != n) { | |
1538 return -1; | |
1539 } | |
1540 const char* s1 = (const char*)node->buf; | |
1541 const char* s2 = tok; | |
1542 for (; n > 0 && *s2; s1++, s2++, --n) { | |
1543 if (toupper(*s1) != toupper(*s2)) { | |
1544 return ((*(uint8_t*)s1 < *(uint8_t*)s2) ? -1 : +1); | |
1545 } | |
1546 } | |
1547 return 0; | |
1548 } | |
1549 | |
1550 static bool | |
1551 read_statement(SerdReader* reader) | |
1552 { | |
1553 SerdStatementFlags flags = 0; | |
1554 ReadContext ctx = { 0, 0, 0, 0, 0, 0, &flags }; | |
1555 Ref subj = 0; | |
1556 bool ate_dot = false; | |
1557 char s_type = false; | |
1558 bool ret = true; | |
1559 read_ws_star(reader); | |
1560 switch (peek_byte(reader)) { | |
1561 case '\0': | |
1562 reader->eof = true; | |
1563 return reader->status <= SERD_FAILURE; | |
1564 case '@': | |
1565 TRY_RET(read_directive(reader)); | |
1566 read_ws_star(reader); | |
1567 break; | |
1568 case '{': | |
1569 if (reader->syntax == SERD_TRIG) { | |
1570 TRY_RET(read_wrappedGraph(reader, &ctx)); | |
1571 read_ws_star(reader); | |
1572 } else { | |
1573 return r_err(reader, SERD_ERR_BAD_SYNTAX, "graph in Turtle\n"); | |
1574 } | |
1575 break; | |
1576 default: | |
1577 subj = read_subject(reader, ctx, &ctx.subject, &s_type); | |
1578 if (!tokcmp(reader, ctx.subject, "base", 4)) { | |
1579 ret = read_base(reader, true, false); | |
1580 } else if (!tokcmp(reader, ctx.subject, "prefix", 6)) { | |
1581 ret = read_prefixID(reader, true, false); | |
1582 } else if (!tokcmp(reader, ctx.subject, "graph", 5)) { | |
1583 read_ws_star(reader); | |
1584 TRY_RET((ctx.graph = read_labelOrSubject(reader, ctx))); | |
1585 read_ws_star(reader); | |
1586 TRY_RET(read_wrappedGraph(reader, &ctx)); | |
1587 read_ws_star(reader); | |
1588 } else if (read_ws_star(reader) && peek_byte(reader) == '{') { | |
1589 if (s_type == '(' || (s_type == '[' && !*ctx.flags)) { | |
1590 return false; // invalid graph with complex label | |
1591 } | |
1592 ctx.graph = subj; | |
1593 ctx.subject = subj = 0; | |
1594 TRY_RET(read_wrappedGraph(reader, &ctx)); | |
1595 read_ws_star(reader); | |
1596 } else if (!subj) { | |
1597 ret = r_err(reader, SERD_ERR_BAD_SYNTAX, "bad subject\n"); | |
1598 } else if (!read_triples(reader, ctx, &ate_dot)) { | |
1599 ret = (s_type == '['); | |
1600 } else if (!ate_dot) { | |
1601 read_ws_star(reader); | |
1602 ret = (eat_byte_check(reader, '.') == '.'); | |
1603 } | |
1604 pop_node(reader, subj); | |
1605 break; | |
1606 } | |
1607 return ret; | |
1608 } | |
1609 | |
1610 static bool | |
1611 read_turtleDoc(SerdReader* reader) | |
1612 { | |
1613 while (!reader->eof) { | |
1614 TRY_RET(read_statement(reader)); | |
1615 } | |
1616 return reader->status <= SERD_FAILURE; | |
1617 } | |
1618 | |
1619 static bool | |
1620 read_trigDoc(SerdReader* reader) | |
1621 { | |
1622 while (!reader->eof) { | |
1623 TRY_RET(read_statement(reader)); | |
1624 } | |
1625 return reader->status <= SERD_FAILURE; | |
1626 } | |
1627 | |
1628 static bool | |
1629 read_nquadsDoc(SerdReader* reader) | |
1630 { | |
1631 while (!reader->eof) { | |
1632 SerdStatementFlags flags = 0; | |
1633 ReadContext ctx = { 0, 0, 0, 0, 0, 0, &flags }; | |
1634 bool ate_dot = false; | |
1635 char s_type = false; | |
1636 read_ws_star(reader); | |
1637 if (peek_byte(reader) == '\0') { | |
1638 reader->eof = true; | |
1639 break; | |
1640 } | |
1641 | |
1642 // subject predicate object | |
1643 if (!(ctx.subject = read_subject(reader, ctx, &ctx.subject, &s_type)) || | |
1644 !read_ws_star(reader) || | |
1645 !(ctx.predicate = read_IRIREF(reader)) || | |
1646 !read_ws_star(reader) || | |
1647 !read_object(reader, &ctx, false, &ate_dot)) { | |
1648 return false; | |
1649 } | |
1650 | |
1651 if (!ate_dot) { // graphLabel? | |
1652 TRY_RET(read_ws_star(reader)); | |
1653 switch (peek_byte(reader)) { | |
1654 case '.': | |
1655 break; | |
1656 case '_': | |
1657 ctx.graph = read_BLANK_NODE_LABEL(reader, &ate_dot); | |
1658 break; | |
1659 default: | |
1660 if (!(ctx.graph = read_IRIREF(reader))) { | |
1661 return false; | |
1662 } | |
1663 } | |
1664 | |
1665 // Terminating '.' | |
1666 TRY_RET(read_ws_star(reader)); | |
1667 eat_byte_check(reader, '.'); | |
1668 } | |
1669 | |
1670 TRY_RET(emit_statement(reader, ctx, ctx.object, ctx.datatype, ctx.lang)); | |
1671 pop_node(reader, ctx.graph); | |
1672 pop_node(reader, ctx.lang); | |
1673 pop_node(reader, ctx.datatype); | |
1674 pop_node(reader, ctx.object); | |
1675 } | |
1676 return reader->status <= SERD_FAILURE; | |
1677 } | |
1678 | |
1679 static bool | |
1680 read_doc(SerdReader* reader) | |
1681 { | |
1682 switch (reader->syntax) { | |
1683 case SERD_NQUADS: return read_nquadsDoc(reader); | |
1684 case SERD_TRIG: return read_trigDoc(reader); | |
1685 default: return read_turtleDoc(reader); | |
1686 } | |
1687 } | |
1688 | |
1689 SERD_API | |
1690 SerdReader* | |
1691 serd_reader_new(SerdSyntax syntax, | |
1692 void* handle, | |
1693 void (*free_handle)(void*), | |
1694 SerdBaseSink base_sink, | |
1695 SerdPrefixSink prefix_sink, | |
1696 SerdStatementSink statement_sink, | |
1697 SerdEndSink end_sink) | |
1698 { | |
1699 const Cursor cur = { NULL, 0, 0 }; | |
1700 SerdReader* me = (SerdReader*)calloc(1, sizeof(SerdReader)); | |
1701 me->handle = handle; | |
1702 me->free_handle = free_handle; | |
1703 me->base_sink = base_sink; | |
1704 me->prefix_sink = prefix_sink; | |
1705 me->statement_sink = statement_sink; | |
1706 me->end_sink = end_sink; | |
1707 me->default_graph = SERD_NODE_NULL; | |
1708 me->stack = serd_stack_new(SERD_PAGE_SIZE); | |
1709 me->syntax = syntax; | |
1710 me->cur = cur; | |
1711 me->next_id = 1; | |
1712 | |
1713 me->rdf_first = push_node(me, SERD_URI, NS_RDF "first", 48); | |
1714 me->rdf_rest = push_node(me, SERD_URI, NS_RDF "rest", 47); | |
1715 me->rdf_nil = push_node(me, SERD_URI, NS_RDF "nil", 46); | |
1716 | |
1717 return me; | |
1718 } | |
1719 | |
1720 SERD_API | |
1721 void | |
1722 serd_reader_set_strict(SerdReader* reader, bool strict) | |
1723 { | |
1724 reader->strict = strict; | |
1725 } | |
1726 | |
1727 SERD_API | |
1728 void | |
1729 serd_reader_set_error_sink(SerdReader* reader, | |
1730 SerdErrorSink error_sink, | |
1731 void* error_handle) | |
1732 { | |
1733 reader->error_sink = error_sink; | |
1734 reader->error_handle = error_handle; | |
1735 } | |
1736 | |
1737 SERD_API | |
1738 void | |
1739 serd_reader_free(SerdReader* reader) | |
1740 { | |
1741 pop_node(reader, reader->rdf_nil); | |
1742 pop_node(reader, reader->rdf_rest); | |
1743 pop_node(reader, reader->rdf_first); | |
1744 serd_node_free(&reader->default_graph); | |
1745 | |
1746 #ifdef SERD_STACK_CHECK | |
1747 free(reader->allocs); | |
1748 #endif | |
1749 free(reader->stack.buf); | |
1750 free(reader->bprefix); | |
1751 if (reader->free_handle) { | |
1752 reader->free_handle(reader->handle); | |
1753 } | |
1754 free(reader); | |
1755 } | |
1756 | |
1757 SERD_API | |
1758 void* | |
1759 serd_reader_get_handle(const SerdReader* reader) | |
1760 { | |
1761 return reader->handle; | |
1762 } | |
1763 | |
1764 SERD_API | |
1765 void | |
1766 serd_reader_add_blank_prefix(SerdReader* reader, | |
1767 const uint8_t* prefix) | |
1768 { | |
1769 free(reader->bprefix); | |
1770 reader->bprefix_len = 0; | |
1771 reader->bprefix = NULL; | |
1772 if (prefix) { | |
1773 reader->bprefix_len = strlen((const char*)prefix); | |
1774 reader->bprefix = (uint8_t*)malloc(reader->bprefix_len + 1); | |
1775 memcpy(reader->bprefix, prefix, reader->bprefix_len + 1); | |
1776 } | |
1777 } | |
1778 | |
1779 SERD_API | |
1780 void | |
1781 serd_reader_set_default_graph(SerdReader* reader, | |
1782 const SerdNode* graph) | |
1783 { | |
1784 serd_node_free(&reader->default_graph); | |
1785 reader->default_graph = serd_node_copy(graph); | |
1786 } | |
1787 | |
1788 SERD_API | |
1789 SerdStatus | |
1790 serd_reader_read_file(SerdReader* reader, | |
1791 const uint8_t* uri) | |
1792 { | |
1793 uint8_t* const path = serd_file_uri_parse(uri, NULL); | |
1794 if (!path) { | |
1795 return SERD_ERR_BAD_ARG; | |
1796 } | |
1797 | |
1798 FILE* fd = serd_fopen((const char*)path, "r"); | |
1799 if (!fd) { | |
1800 free(path); | |
1801 return SERD_ERR_UNKNOWN; | |
1802 } | |
1803 | |
1804 SerdStatus ret = serd_reader_read_file_handle(reader, fd, path); | |
1805 fclose(fd); | |
1806 free(path); | |
1807 return ret; | |
1808 } | |
1809 | |
1810 static bool | |
1811 skip_bom(SerdReader* me) | |
1812 { | |
1813 if (peek_byte(me) == 0xEF) { | |
1814 eat_byte_safe(me, 0xEF); | |
1815 if (eat_byte_check(me, 0xBB) != 0xBB || | |
1816 eat_byte_check(me, 0xBF) != 0xBF) { | |
1817 return r_err(me, SERD_ERR_BAD_SYNTAX, "corrupt byte order mark\n"); | |
1818 } | |
1819 } | |
1820 | |
1821 return true; | |
1822 } | |
1823 | |
1824 SERD_API | |
1825 SerdStatus | |
1826 serd_reader_start_stream(SerdReader* me, | |
1827 FILE* file, | |
1828 const uint8_t* name, | |
1829 bool bulk) | |
1830 { | |
1831 return serd_reader_start_source_stream( | |
1832 me, | |
1833 bulk ? (SerdSource)fread : serd_file_read_byte, | |
1834 (SerdStreamErrorFunc)ferror, | |
1835 file, | |
1836 name, | |
1837 bulk ? SERD_PAGE_SIZE : 1); | |
1838 } | |
1839 | |
1840 SERD_API | |
1841 SerdStatus | |
1842 serd_reader_start_source_stream(SerdReader* me, | |
1843 SerdSource read_func, | |
1844 SerdStreamErrorFunc error_func, | |
1845 void* stream, | |
1846 const uint8_t* name, | |
1847 size_t page_size) | |
1848 { | |
1849 const Cursor cur = { name, 1, 1 }; | |
1850 me->cur = cur; | |
1851 | |
1852 return serd_byte_source_open_source( | |
1853 &me->source, read_func, error_func, stream, page_size); | |
1854 } | |
1855 | |
1856 static SerdStatus | |
1857 serd_reader_prepare(SerdReader* me) | |
1858 { | |
1859 me->eof = false; | |
1860 if ((me->status = serd_byte_source_prepare(&me->source))) { | |
1861 r_err(me, me->status, "read error: %s\n", strerror(errno)); | |
1862 } else if (!skip_bom(me)) { | |
1863 me->status = SERD_ERR_BAD_SYNTAX; | |
1864 } | |
1865 return me->status; | |
1866 } | |
1867 | |
1868 SERD_API | |
1869 SerdStatus | |
1870 serd_reader_read_chunk(SerdReader* me) | |
1871 { | |
1872 SerdStatus st = SERD_SUCCESS; | |
1873 if (!me->source.prepared) { | |
1874 if ((st = serd_reader_prepare(me))) { | |
1875 return st; | |
1876 } | |
1877 } else if (me->eof) { | |
1878 me->eof = false; | |
1879 if ((st = serd_byte_source_advance(&me->source))) { | |
1880 return st; | |
1881 } | |
1882 } | |
1883 return read_statement(me) ? SERD_SUCCESS : SERD_FAILURE; | |
1884 } | |
1885 | |
1886 SERD_API | |
1887 SerdStatus | |
1888 serd_reader_end_stream(SerdReader* me) | |
1889 { | |
1890 return serd_byte_source_close(&me->source); | |
1891 } | |
1892 | |
1893 SERD_API | |
1894 SerdStatus | |
1895 serd_reader_read_file_handle(SerdReader* me, FILE* file, const uint8_t* name) | |
1896 { | |
1897 return serd_reader_read_source( | |
1898 me, (SerdSource)fread, (SerdStreamErrorFunc)ferror, | |
1899 file, name, SERD_PAGE_SIZE); | |
1900 } | |
1901 | |
1902 SERD_API | |
1903 SerdStatus | |
1904 serd_reader_read_source(SerdReader* me, | |
1905 SerdSource source, | |
1906 SerdStreamErrorFunc error, | |
1907 void* stream, | |
1908 const uint8_t* name, | |
1909 size_t page_size) | |
1910 { | |
1911 SerdStatus st = serd_reader_start_source_stream( | |
1912 me, source, error, stream, name, page_size); | |
1913 | |
1914 if ((st = serd_reader_prepare(me))) { | |
1915 serd_reader_end_stream(me); | |
1916 return st; | |
1917 } else if (!read_doc(me)) { | |
1918 serd_reader_end_stream(me); | |
1919 return SERD_ERR_UNKNOWN; | |
1920 } | |
1921 | |
1922 return serd_reader_end_stream(me); | |
1923 } | |
1924 | |
1925 SERD_API | |
1926 SerdStatus | |
1927 serd_reader_read_string(SerdReader* me, const uint8_t* utf8) | |
1928 { | |
1929 const Cursor cur = { (const uint8_t*)"(string)", 1, 1 }; | |
1930 | |
1931 serd_byte_source_open_string(&me->source, utf8); | |
1932 me->cur = cur; | |
1933 me->eof = false; | |
1934 | |
1935 SerdStatus st = serd_reader_prepare(me); | |
1936 if (!st) { | |
1937 st = read_doc(me) ? SERD_SUCCESS : SERD_ERR_UNKNOWN; | |
1938 } | |
1939 | |
1940 serd_byte_source_close(&me->source); | |
1941 | |
1942 return st; | |
1943 } |