Mercurial > hg > piper-cpp
comparison ext/serd/src/writer.c @ 226:c5cdc9e6a4bf
Add these external library files
author | Chris Cannam <cannam@all-day-breakfast.com> |
---|---|
date | Fri, 09 Jun 2017 16:41:31 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
225:025b3e2f7c17 | 226:c5cdc9e6a4bf |
---|---|
1 /* | |
2 Copyright 2011-2017 David Robillard <http://drobilla.net> | |
3 | |
4 Permission to use, copy, modify, and/or distribute this software for any | |
5 purpose with or without fee is hereby granted, provided that the above | |
6 copyright notice and this permission notice appear in all copies. | |
7 | |
8 THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |
9 WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |
10 MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |
11 ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |
12 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |
13 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | |
14 OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |
15 */ | |
16 | |
17 #include "serd_internal.h" | |
18 | |
19 #include <assert.h> | |
20 #include <stdio.h> | |
21 #include <stdlib.h> | |
22 #include <string.h> | |
23 | |
24 #define NS_RDF "http://www.w3.org/1999/02/22-rdf-syntax-ns#" | |
25 #define NS_XSD "http://www.w3.org/2001/XMLSchema#" | |
26 | |
27 typedef struct { | |
28 SerdNode graph; | |
29 SerdNode subject; | |
30 SerdNode predicate; | |
31 } WriteContext; | |
32 | |
33 static const WriteContext WRITE_CONTEXT_NULL = { | |
34 { 0, 0, 0, 0, SERD_NOTHING }, | |
35 { 0, 0, 0, 0, SERD_NOTHING }, | |
36 { 0, 0, 0, 0, SERD_NOTHING } | |
37 }; | |
38 | |
39 typedef enum { | |
40 SEP_NONE, | |
41 SEP_END_S, ///< End of a subject ('.') | |
42 SEP_END_P, ///< End of a predicate (';') | |
43 SEP_END_O, ///< End of an object (',') | |
44 SEP_S_P, ///< Between a subject and predicate (whitespace) | |
45 SEP_P_O, ///< Between a predicate and object (whitespace) | |
46 SEP_ANON_BEGIN, ///< Start of anonymous node ('[') | |
47 SEP_ANON_END, ///< End of anonymous node (']') | |
48 SEP_LIST_BEGIN, ///< Start of list ('(') | |
49 SEP_LIST_SEP, ///< List separator (whitespace) | |
50 SEP_LIST_END, ///< End of list (')') | |
51 SEP_GRAPH_BEGIN, ///< Start of graph ('{') | |
52 SEP_GRAPH_END ///< End of graph ('}') | |
53 } Sep; | |
54 | |
55 typedef struct { | |
56 const char* str; ///< Sep string | |
57 uint8_t len; ///< Length of sep string | |
58 uint8_t space_before; ///< Newline before sep | |
59 uint8_t space_after_node; ///< Newline after sep if after node | |
60 uint8_t space_after_sep; ///< Newline after sep if after sep | |
61 } SepRule; | |
62 | |
63 static const SepRule rules[] = { | |
64 { NULL, 0, 0, 0, 0 }, | |
65 { " .\n\n", 4, 0, 0, 0 }, | |
66 { " ;", 2, 0, 1, 1 }, | |
67 { " ,", 2, 0, 1, 0 }, | |
68 { NULL, 0, 0, 1, 0 }, | |
69 { " ", 1, 0, 0, 0 }, | |
70 { "[", 1, 0, 1, 1 }, | |
71 { "]", 1, 1, 0, 0 }, | |
72 { "(", 1, 0, 0, 0 }, | |
73 { NULL, 1, 0, 1, 0 }, | |
74 { ")", 1, 1, 0, 0 }, | |
75 { " {", 2, 0, 1, 1 }, | |
76 { " }", 2, 0, 1, 1 }, | |
77 { "\n", 1, 0, 1, 0 } | |
78 }; | |
79 | |
80 struct SerdWriterImpl { | |
81 SerdSyntax syntax; | |
82 SerdStyle style; | |
83 SerdEnv* env; | |
84 SerdNode root_node; | |
85 SerdURI root_uri; | |
86 SerdURI base_uri; | |
87 SerdStack anon_stack; | |
88 SerdByteSink byte_sink; | |
89 SerdErrorSink error_sink; | |
90 void* error_handle; | |
91 WriteContext context; | |
92 SerdNode list_subj; | |
93 unsigned list_depth; | |
94 unsigned indent; | |
95 uint8_t* bprefix; | |
96 size_t bprefix_len; | |
97 Sep last_sep; | |
98 bool empty; | |
99 }; | |
100 | |
101 typedef enum { | |
102 WRITE_STRING, | |
103 WRITE_LONG_STRING | |
104 } TextContext; | |
105 | |
106 static bool | |
107 supports_abbrev(const SerdWriter* writer) | |
108 { | |
109 return writer->syntax == SERD_TURTLE || writer->syntax == SERD_TRIG; | |
110 } | |
111 | |
112 static void | |
113 w_err(SerdWriter* writer, SerdStatus st, const char* fmt, ...) | |
114 { | |
115 /* TODO: This results in errors with no file information, which is not | |
116 helpful when re-serializing a file (particularly for "undefined | |
117 namespace prefix" errors. The statement sink API needs to be changed to | |
118 add a Cursor parameter so the source can notify the writer of the | |
119 statement origin for better error reporting. */ | |
120 | |
121 va_list args; | |
122 va_start(args, fmt); | |
123 const SerdError e = { st, NULL, 0, 0, fmt, &args }; | |
124 serd_error(writer->error_sink, writer->error_handle, &e); | |
125 va_end(args); | |
126 } | |
127 | |
128 static inline WriteContext* | |
129 anon_stack_top(SerdWriter* writer) | |
130 { | |
131 assert(!serd_stack_is_empty(&writer->anon_stack)); | |
132 return (WriteContext*)(writer->anon_stack.buf | |
133 + writer->anon_stack.size - sizeof(WriteContext)); | |
134 } | |
135 | |
136 static void | |
137 copy_node(SerdNode* dst, const SerdNode* src) | |
138 { | |
139 if (src) { | |
140 dst->buf = (uint8_t*)realloc((char*)dst->buf, src->n_bytes + 1); | |
141 dst->n_bytes = src->n_bytes; | |
142 dst->n_chars = src->n_chars; | |
143 dst->flags = src->flags; | |
144 dst->type = src->type; | |
145 memcpy((char*)dst->buf, src->buf, src->n_bytes + 1); | |
146 } else { | |
147 dst->type = SERD_NOTHING; | |
148 } | |
149 } | |
150 | |
151 static inline size_t | |
152 sink(const void* buf, size_t len, SerdWriter* writer) | |
153 { | |
154 return serd_byte_sink_write(buf, len, &writer->byte_sink); | |
155 } | |
156 | |
157 // Parse a UTF-8 character, set *size to the length, and return the code point | |
158 static inline uint32_t | |
159 parse_utf8_char(SerdWriter* writer, const uint8_t* utf8, size_t* size) | |
160 { | |
161 uint32_t c = 0; | |
162 if ((utf8[0] & 0x80) == 0) { // Starts with `0' | |
163 *size = 1; | |
164 c = utf8[0]; | |
165 } else if ((utf8[0] & 0xE0) == 0xC0) { // Starts with `110' | |
166 *size = 2; | |
167 c = utf8[0] & 0x1F; | |
168 } else if ((utf8[0] & 0xF0) == 0xE0) { // Starts with `1110' | |
169 *size = 3; | |
170 c = utf8[0] & 0x0F; | |
171 } else if ((utf8[0] & 0xF8) == 0xF0) { // Starts with `11110' | |
172 *size = 4; | |
173 c = utf8[0] & 0x07; | |
174 } else { | |
175 w_err(writer, SERD_ERR_BAD_ARG, "invalid UTF-8: %X\n", utf8[0]); | |
176 *size = 0; | |
177 return 0; | |
178 } | |
179 | |
180 size_t i = 0; | |
181 uint8_t in = utf8[i++]; | |
182 | |
183 #define READ_BYTE() \ | |
184 in = utf8[i++] & 0x3F; \ | |
185 c = (c << 6) | in; | |
186 | |
187 switch (*size) { | |
188 case 4: READ_BYTE(); | |
189 case 3: READ_BYTE(); | |
190 case 2: READ_BYTE(); | |
191 } | |
192 | |
193 return c; | |
194 } | |
195 | |
196 // Write a single character, as an escape for single byte characters | |
197 // (Caller prints any single byte characters that don't need escaping) | |
198 static size_t | |
199 write_character(SerdWriter* writer, const uint8_t* utf8, size_t* size) | |
200 { | |
201 const uint8_t replacement_char[] = { 0xEF, 0xBF, 0xBD }; | |
202 char escape[11] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; | |
203 const uint8_t in = utf8[0]; | |
204 | |
205 uint32_t c = parse_utf8_char(writer, utf8, size); | |
206 switch (*size) { | |
207 case 0: | |
208 w_err(writer, SERD_ERR_BAD_ARG, "invalid UTF-8: %X\n", in); | |
209 return sink(replacement_char, sizeof(replacement_char), writer); | |
210 case 1: | |
211 snprintf(escape, sizeof(escape), "\\u%04X", in); | |
212 return sink(escape, 6, writer); | |
213 default: | |
214 break; | |
215 } | |
216 | |
217 if (!(writer->style & SERD_STYLE_ASCII)) { | |
218 // Write UTF-8 character directly to UTF-8 output | |
219 return sink(utf8, *size, writer); | |
220 } | |
221 | |
222 if (c <= 0xFFFF) { | |
223 snprintf(escape, sizeof(escape), "\\u%04X", c); | |
224 return sink(escape, 6, writer); | |
225 } else { | |
226 snprintf(escape, sizeof(escape), "\\U%08X", c); | |
227 return sink(escape, 10, writer); | |
228 } | |
229 } | |
230 | |
231 static inline bool | |
232 uri_must_escape(const uint8_t c) | |
233 { | |
234 switch (c) { | |
235 case ' ': case '"': case '<': case '>': case '\\': | |
236 case '^': case '`': case '{': case '|': case '}': | |
237 return true; | |
238 default: | |
239 return !in_range(c, 0x20, 0x7E); | |
240 } | |
241 } | |
242 | |
243 static size_t | |
244 write_uri(SerdWriter* writer, const uint8_t* utf8, size_t n_bytes) | |
245 { | |
246 size_t len = 0; | |
247 for (size_t i = 0; i < n_bytes;) { | |
248 size_t j = i; // Index of next character that must be escaped | |
249 for (; j < n_bytes; ++j) { | |
250 if (uri_must_escape(utf8[j])) { | |
251 break; | |
252 } | |
253 } | |
254 | |
255 // Bulk write all characters up to this special one | |
256 len += sink(&utf8[i], j - i, writer); | |
257 if ((i = j) == n_bytes) { | |
258 break; // Reached end | |
259 } | |
260 | |
261 // Write UTF-8 character | |
262 size_t size = 0; | |
263 len += write_character(writer, utf8 + i, &size); | |
264 i += size; | |
265 } | |
266 return len; | |
267 } | |
268 | |
269 static bool | |
270 lname_must_escape(const uint8_t c) | |
271 { | |
272 /* This arbitrary list of characters, most of which have nothing to do with | |
273 Turtle, must be handled as special cases here because the RDF and SPARQL | |
274 WGs are apparently intent on making the once elegant Turtle a baroque | |
275 and inconsistent mess, throwing elegance and extensibility completely | |
276 out the window for no good reason. | |
277 | |
278 Note '-', '.', and '_' are also in PN_LOCAL_ESC, but are valid unescaped | |
279 in local names, so they are not escaped here. */ | |
280 | |
281 switch (c) { | |
282 case '\'': case '!': case '#': case '$': case '%': case '&': | |
283 case '(': case ')': case '*': case '+': case ',': case '/': | |
284 case ';': case '=': case '?': case '@': case '~': | |
285 return true; | |
286 } | |
287 return false; | |
288 } | |
289 | |
290 static size_t | |
291 write_lname(SerdWriter* writer, const uint8_t* utf8, size_t n_bytes) | |
292 { | |
293 size_t len = 0; | |
294 for (size_t i = 0; i < n_bytes; ++i) { | |
295 size_t j = i; // Index of next character that must be escaped | |
296 for (; j < n_bytes; ++j) { | |
297 if (lname_must_escape(utf8[j])) { | |
298 break; | |
299 } | |
300 } | |
301 | |
302 // Bulk write all characters up to this special one | |
303 len += sink(&utf8[i], j - i, writer); | |
304 if ((i = j) == n_bytes) { | |
305 break; // Reached end | |
306 } | |
307 | |
308 // Write escape | |
309 len += sink("\\", 1, writer); | |
310 len += sink(&utf8[i], 1, writer); | |
311 } | |
312 return len; | |
313 } | |
314 | |
315 static size_t | |
316 write_text(SerdWriter* writer, TextContext ctx, | |
317 const uint8_t* utf8, size_t n_bytes) | |
318 { | |
319 size_t len = 0; | |
320 for (size_t i = 0; i < n_bytes;) { | |
321 // Fast bulk write for long strings of printable ASCII | |
322 size_t j = i; | |
323 for (; j < n_bytes; ++j) { | |
324 if (utf8[j] == '\\' || utf8[j] == '"' | |
325 || (!in_range(utf8[j], 0x20, 0x7E))) { | |
326 break; | |
327 } | |
328 } | |
329 | |
330 len += sink(&utf8[i], j - i, writer); | |
331 if ((i = j) == n_bytes) { | |
332 break; // Reached end | |
333 } | |
334 | |
335 uint8_t in = utf8[i++]; | |
336 if (ctx == WRITE_LONG_STRING) { | |
337 switch (in) { | |
338 case '\\': len += sink("\\\\", 2, writer); continue; | |
339 case '\b': len += sink("\\b", 2, writer); continue; | |
340 case '\n': case '\r': case '\t': case '\f': | |
341 len += sink(&in, 1, writer); // Write character as-is | |
342 continue; | |
343 case '\"': | |
344 if (i == n_bytes) { // '"' at string end | |
345 len += sink("\\\"", 2, writer); | |
346 } else { | |
347 len += sink(&in, 1, writer); | |
348 } | |
349 continue; | |
350 default: break; | |
351 } | |
352 } else if (ctx == WRITE_STRING) { | |
353 switch (in) { | |
354 case '\\': len += sink("\\\\", 2, writer); continue; | |
355 case '\n': len += sink("\\n", 2, writer); continue; | |
356 case '\r': len += sink("\\r", 2, writer); continue; | |
357 case '\t': len += sink("\\t", 2, writer); continue; | |
358 case '"': len += sink("\\\"", 2, writer); continue; | |
359 default: break; | |
360 } | |
361 if (writer->syntax == SERD_TURTLE) { | |
362 switch (in) { | |
363 case '\b': len += sink("\\b", 2, writer); continue; | |
364 case '\f': len += sink("\\f", 2, writer); continue; | |
365 } | |
366 } | |
367 } | |
368 | |
369 size_t size = 0; | |
370 len += write_character(writer, utf8 + i - 1, &size); | |
371 | |
372 if (size == 0) { | |
373 return len; | |
374 } | |
375 | |
376 i += size - 1; | |
377 } | |
378 return len; | |
379 } | |
380 | |
381 static size_t | |
382 uri_sink(const void* buf, size_t len, void* stream) | |
383 { | |
384 return write_uri((SerdWriter*)stream, (const uint8_t*)buf, len); | |
385 } | |
386 | |
387 static void | |
388 write_newline(SerdWriter* writer) | |
389 { | |
390 sink("\n", 1, writer); | |
391 for (unsigned i = 0; i < writer->indent; ++i) { | |
392 sink("\t", 1, writer); | |
393 } | |
394 } | |
395 | |
396 static void | |
397 write_sep(SerdWriter* writer, const Sep sep) | |
398 { | |
399 const SepRule* rule = &rules[sep]; | |
400 if (rule->space_before) { | |
401 write_newline(writer); | |
402 } | |
403 if (rule->str) { | |
404 sink(rule->str, rule->len, writer); | |
405 } | |
406 if ( (writer->last_sep && rule->space_after_sep) | |
407 || (!writer->last_sep && rule->space_after_node)) { | |
408 write_newline(writer); | |
409 } else if (writer->last_sep && rule->space_after_node) { | |
410 sink(" ", 1, writer); | |
411 } | |
412 writer->last_sep = sep; | |
413 } | |
414 | |
415 static SerdStatus | |
416 reset_context(SerdWriter* writer, bool graph) | |
417 { | |
418 if (graph) { | |
419 writer->context.graph.type = SERD_NOTHING; | |
420 } | |
421 writer->context.subject.type = SERD_NOTHING; | |
422 writer->context.predicate.type = SERD_NOTHING; | |
423 writer->empty = false; | |
424 return SERD_SUCCESS; | |
425 } | |
426 | |
427 static SerdStatus | |
428 free_context(SerdWriter* writer) | |
429 { | |
430 serd_node_free(&writer->context.graph); | |
431 serd_node_free(&writer->context.subject); | |
432 serd_node_free(&writer->context.predicate); | |
433 return reset_context(writer, true); | |
434 } | |
435 | |
436 typedef enum { | |
437 FIELD_NONE, | |
438 FIELD_SUBJECT, | |
439 FIELD_PREDICATE, | |
440 FIELD_OBJECT, | |
441 FIELD_GRAPH | |
442 } Field; | |
443 | |
444 static bool | |
445 is_inline_start(const SerdWriter* writer, Field field, SerdStatementFlags flags) | |
446 { | |
447 return (supports_abbrev(writer) && | |
448 ((field == FIELD_SUBJECT && (flags & SERD_ANON_S_BEGIN)) || | |
449 (field == FIELD_OBJECT && (flags & SERD_ANON_O_BEGIN)))); | |
450 } | |
451 | |
452 static bool | |
453 write_node(SerdWriter* writer, | |
454 const SerdNode* node, | |
455 const SerdNode* datatype, | |
456 const SerdNode* lang, | |
457 Field field, | |
458 SerdStatementFlags flags) | |
459 { | |
460 SerdChunk uri_prefix; | |
461 SerdNode prefix; | |
462 SerdChunk suffix; | |
463 bool has_scheme; | |
464 switch (node->type) { | |
465 case SERD_BLANK: | |
466 if (is_inline_start(writer, field, flags)) { | |
467 ++writer->indent; | |
468 write_sep(writer, SEP_ANON_BEGIN); | |
469 } else if (supports_abbrev(writer) | |
470 && (field == FIELD_SUBJECT && (flags & SERD_LIST_S_BEGIN))) { | |
471 assert(writer->list_depth == 0); | |
472 copy_node(&writer->list_subj, node); | |
473 ++writer->list_depth; | |
474 ++writer->indent; | |
475 write_sep(writer, SEP_LIST_BEGIN); | |
476 } else if (supports_abbrev(writer) | |
477 && (field == FIELD_OBJECT && (flags & SERD_LIST_O_BEGIN))) { | |
478 ++writer->indent; | |
479 ++writer->list_depth; | |
480 write_sep(writer, SEP_LIST_BEGIN); | |
481 } else if (supports_abbrev(writer) | |
482 && ((field == FIELD_SUBJECT && (flags & SERD_EMPTY_S)) | |
483 || (field == FIELD_OBJECT && (flags & SERD_EMPTY_O)))) { | |
484 sink("[]", 2, writer); | |
485 } else { | |
486 sink("_:", 2, writer); | |
487 if (writer->bprefix && !strncmp((const char*)node->buf, | |
488 (const char*)writer->bprefix, | |
489 writer->bprefix_len)) { | |
490 sink(node->buf + writer->bprefix_len, | |
491 node->n_bytes - writer->bprefix_len, | |
492 writer); | |
493 } else { | |
494 sink(node->buf, node->n_bytes, writer); | |
495 } | |
496 } | |
497 break; | |
498 case SERD_CURIE: | |
499 switch (writer->syntax) { | |
500 case SERD_NTRIPLES: | |
501 case SERD_NQUADS: | |
502 if (serd_env_expand(writer->env, node, &uri_prefix, &suffix)) { | |
503 w_err(writer, SERD_ERR_BAD_CURIE, | |
504 "undefined namespace prefix `%s'\n", node->buf); | |
505 return false; | |
506 } | |
507 sink("<", 1, writer); | |
508 write_uri(writer, uri_prefix.buf, uri_prefix.len); | |
509 write_uri(writer, suffix.buf, suffix.len); | |
510 sink(">", 1, writer); | |
511 break; | |
512 case SERD_TURTLE: | |
513 case SERD_TRIG: | |
514 if (is_inline_start(writer, field, flags)) { | |
515 ++writer->indent; | |
516 write_sep(writer, SEP_ANON_BEGIN); | |
517 sink("== ", 3, writer); | |
518 } | |
519 write_lname(writer, node->buf, node->n_bytes); | |
520 if (is_inline_start(writer, field, flags)) { | |
521 sink(" ;", 2, writer); | |
522 write_newline(writer); | |
523 } | |
524 } | |
525 break; | |
526 case SERD_LITERAL: | |
527 if (supports_abbrev(writer) && datatype && datatype->buf) { | |
528 const char* type_uri = (const char*)datatype->buf; | |
529 if (!strncmp(type_uri, NS_XSD, sizeof(NS_XSD) - 1) && ( | |
530 !strcmp(type_uri + sizeof(NS_XSD) - 1, "boolean") || | |
531 !strcmp(type_uri + sizeof(NS_XSD) - 1, "integer"))) { | |
532 sink(node->buf, node->n_bytes, writer); | |
533 break; | |
534 } else if (!strncmp(type_uri, NS_XSD, sizeof(NS_XSD) - 1) && | |
535 !strcmp(type_uri + sizeof(NS_XSD) - 1, "decimal") && | |
536 strchr((const char*)node->buf, '.') && | |
537 node->buf[node->n_bytes - 1] != '.') { | |
538 /* xsd:decimal literals without trailing digits, e.g. "5.", can | |
539 not be written bare in Turtle. We could add a 0 which is | |
540 prettier, but changes the text and breaks round tripping. | |
541 */ | |
542 sink(node->buf, node->n_bytes, writer); | |
543 break; | |
544 } | |
545 } | |
546 if (supports_abbrev(writer) | |
547 && (node->flags & (SERD_HAS_NEWLINE|SERD_HAS_QUOTE))) { | |
548 sink("\"\"\"", 3, writer); | |
549 write_text(writer, WRITE_LONG_STRING, node->buf, node->n_bytes); | |
550 sink("\"\"\"", 3, writer); | |
551 } else { | |
552 sink("\"", 1, writer); | |
553 write_text(writer, WRITE_STRING, node->buf, node->n_bytes); | |
554 sink("\"", 1, writer); | |
555 } | |
556 if (lang && lang->buf) { | |
557 sink("@", 1, writer); | |
558 sink(lang->buf, lang->n_bytes, writer); | |
559 } else if (datatype && datatype->buf) { | |
560 sink("^^", 2, writer); | |
561 write_node(writer, datatype, NULL, NULL, FIELD_NONE, flags); | |
562 } | |
563 break; | |
564 case SERD_URI: | |
565 if (is_inline_start(writer, field, flags)) { | |
566 ++writer->indent; | |
567 write_sep(writer, SEP_ANON_BEGIN); | |
568 sink("== ", 3, writer); | |
569 } | |
570 has_scheme = serd_uri_string_has_scheme(node->buf); | |
571 if (field == FIELD_PREDICATE && supports_abbrev(writer) | |
572 && !strcmp((const char*)node->buf, NS_RDF "type")) { | |
573 sink("a", 1, writer); | |
574 break; | |
575 } else if (supports_abbrev(writer) | |
576 && !strcmp((const char*)node->buf, NS_RDF "nil")) { | |
577 sink("()", 2, writer); | |
578 break; | |
579 } else if (has_scheme && (writer->style & SERD_STYLE_CURIED) && | |
580 serd_env_qualify(writer->env, node, &prefix, &suffix)) { | |
581 write_uri(writer, prefix.buf, prefix.n_bytes); | |
582 sink(":", 1, writer); | |
583 write_uri(writer, suffix.buf, suffix.len); | |
584 break; | |
585 } | |
586 sink("<", 1, writer); | |
587 if (writer->style & SERD_STYLE_RESOLVED) { | |
588 SerdURI in_base_uri, uri, abs_uri; | |
589 serd_env_get_base_uri(writer->env, &in_base_uri); | |
590 serd_uri_parse(node->buf, &uri); | |
591 serd_uri_resolve(&uri, &in_base_uri, &abs_uri); | |
592 bool rooted = uri_is_under(&writer->base_uri, &writer->root_uri); | |
593 SerdURI* root = rooted ? &writer->root_uri : & writer->base_uri; | |
594 if (!uri_is_under(&abs_uri, root) || | |
595 writer->syntax == SERD_NTRIPLES || | |
596 writer->syntax == SERD_NQUADS) { | |
597 serd_uri_serialise(&abs_uri, uri_sink, writer); | |
598 } else { | |
599 serd_uri_serialise_relative( | |
600 &uri, &writer->base_uri, root, uri_sink, writer); | |
601 } | |
602 } else { | |
603 write_uri(writer, node->buf, node->n_bytes); | |
604 } | |
605 sink(">", 1, writer); | |
606 if (is_inline_start(writer, field, flags)) { | |
607 sink(" ;", 2, writer); | |
608 write_newline(writer); | |
609 } | |
610 default: | |
611 break; | |
612 } | |
613 writer->last_sep = SEP_NONE; | |
614 return true; | |
615 } | |
616 | |
617 static inline bool | |
618 is_resource(const SerdNode* node) | |
619 { | |
620 return node->type > SERD_LITERAL; | |
621 } | |
622 | |
623 static void | |
624 write_pred(SerdWriter* writer, SerdStatementFlags flags, const SerdNode* pred) | |
625 { | |
626 write_node(writer, pred, NULL, NULL, FIELD_PREDICATE, flags); | |
627 write_sep(writer, SEP_P_O); | |
628 copy_node(&writer->context.predicate, pred); | |
629 } | |
630 | |
631 static bool | |
632 write_list_obj(SerdWriter* writer, | |
633 SerdStatementFlags flags, | |
634 const SerdNode* predicate, | |
635 const SerdNode* object, | |
636 const SerdNode* datatype, | |
637 const SerdNode* lang) | |
638 { | |
639 if (!strcmp((const char*)object->buf, NS_RDF "nil")) { | |
640 --writer->indent; | |
641 write_sep(writer, SEP_LIST_END); | |
642 return true; | |
643 } else if (!strcmp((const char*)predicate->buf, NS_RDF "first")) { | |
644 write_sep(writer, SEP_LIST_SEP); | |
645 write_node(writer, object, datatype, lang, FIELD_OBJECT, flags); | |
646 } | |
647 return false; | |
648 } | |
649 | |
650 SERD_API | |
651 SerdStatus | |
652 serd_writer_write_statement(SerdWriter* writer, | |
653 SerdStatementFlags flags, | |
654 const SerdNode* graph, | |
655 const SerdNode* subject, | |
656 const SerdNode* predicate, | |
657 const SerdNode* object, | |
658 const SerdNode* datatype, | |
659 const SerdNode* lang) | |
660 { | |
661 if (!subject || !predicate || !object | |
662 || !subject->buf || !predicate->buf || !object->buf | |
663 || !is_resource(subject) || !is_resource(predicate)) { | |
664 return SERD_ERR_BAD_ARG; | |
665 } | |
666 | |
667 #define TRY(write_result) \ | |
668 if (!write_result) { \ | |
669 return SERD_ERR_UNKNOWN; \ | |
670 } | |
671 | |
672 switch (writer->syntax) { | |
673 case SERD_NTRIPLES: | |
674 case SERD_NQUADS: | |
675 TRY(write_node(writer, subject, NULL, NULL, FIELD_SUBJECT, flags)); | |
676 sink(" ", 1, writer); | |
677 TRY(write_node(writer, predicate, NULL, NULL, FIELD_PREDICATE, flags)); | |
678 sink(" ", 1, writer); | |
679 TRY(write_node(writer, object, datatype, lang, FIELD_OBJECT, flags)); | |
680 if (writer->syntax == SERD_NQUADS && graph) { | |
681 sink(" ", 1, writer); | |
682 TRY(write_node(writer, graph, datatype, lang, FIELD_GRAPH, flags)); | |
683 } | |
684 sink(" .\n", 3, writer); | |
685 return SERD_SUCCESS; | |
686 default: | |
687 break; | |
688 } | |
689 | |
690 if ((graph && !serd_node_equals(graph, &writer->context.graph)) || | |
691 (!graph && writer->context.graph.type)) { | |
692 writer->indent = 0; | |
693 if (writer->context.subject.type) { | |
694 write_sep(writer, SEP_END_S); | |
695 } | |
696 if (writer->context.graph.type) { | |
697 write_sep(writer, SEP_GRAPH_END); | |
698 } | |
699 | |
700 reset_context(writer, true); | |
701 if (graph) { | |
702 TRY(write_node(writer, graph, datatype, lang, FIELD_GRAPH, flags)); | |
703 ++writer->indent; | |
704 write_sep(writer, SEP_GRAPH_BEGIN); | |
705 copy_node(&writer->context.graph, graph); | |
706 } | |
707 } | |
708 | |
709 if ((flags & SERD_LIST_CONT)) { | |
710 if (write_list_obj(writer, flags, predicate, object, datatype, lang)) { | |
711 // Reached end of list | |
712 if (--writer->list_depth == 0 && writer->list_subj.type) { | |
713 reset_context(writer, false); | |
714 serd_node_free(&writer->context.subject); | |
715 writer->context.subject = writer->list_subj; | |
716 writer->list_subj = SERD_NODE_NULL; | |
717 } | |
718 return SERD_SUCCESS; | |
719 } | |
720 } else if (serd_node_equals(subject, &writer->context.subject)) { | |
721 if (serd_node_equals(predicate, &writer->context.predicate)) { | |
722 // Abbreviate S P | |
723 if (!(flags & SERD_ANON_O_BEGIN)) { | |
724 ++writer->indent; | |
725 } | |
726 write_sep(writer, SEP_END_O); | |
727 write_node(writer, object, datatype, lang, FIELD_OBJECT, flags); | |
728 if (!(flags & SERD_ANON_O_BEGIN)) { | |
729 --writer->indent; | |
730 } | |
731 } else { | |
732 // Abbreviate S | |
733 Sep sep = writer->context.predicate.type ? SEP_END_P : SEP_S_P; | |
734 write_sep(writer, sep); | |
735 write_pred(writer, flags, predicate); | |
736 write_node(writer, object, datatype, lang, FIELD_OBJECT, flags); | |
737 } | |
738 } else { | |
739 // No abbreviation | |
740 if (writer->context.subject.type) { | |
741 assert(writer->indent > 0); | |
742 --writer->indent; | |
743 if (serd_stack_is_empty(&writer->anon_stack)) { | |
744 write_sep(writer, SEP_END_S); | |
745 } | |
746 } else if (!writer->empty) { | |
747 write_sep(writer, SEP_S_P); | |
748 } | |
749 | |
750 if (!(flags & SERD_ANON_CONT)) { | |
751 write_node(writer, subject, NULL, NULL, FIELD_SUBJECT, flags); | |
752 ++writer->indent; | |
753 write_sep(writer, SEP_S_P); | |
754 } else { | |
755 ++writer->indent; | |
756 } | |
757 | |
758 reset_context(writer, false); | |
759 copy_node(&writer->context.subject, subject); | |
760 | |
761 if (!(flags & SERD_LIST_S_BEGIN)) { | |
762 write_pred(writer, flags, predicate); | |
763 } | |
764 | |
765 write_node(writer, object, datatype, lang, FIELD_OBJECT, flags); | |
766 } | |
767 | |
768 if (flags & (SERD_ANON_S_BEGIN|SERD_ANON_O_BEGIN)) { | |
769 WriteContext* ctx = (WriteContext*)serd_stack_push( | |
770 &writer->anon_stack, sizeof(WriteContext)); | |
771 *ctx = writer->context; | |
772 WriteContext new_context = { | |
773 serd_node_copy(graph), serd_node_copy(subject), SERD_NODE_NULL }; | |
774 if ((flags & SERD_ANON_S_BEGIN)) { | |
775 new_context.predicate = serd_node_copy(predicate); | |
776 } | |
777 writer->context = new_context; | |
778 } else { | |
779 copy_node(&writer->context.graph, graph); | |
780 copy_node(&writer->context.subject, subject); | |
781 copy_node(&writer->context.predicate, predicate); | |
782 } | |
783 | |
784 return SERD_SUCCESS; | |
785 } | |
786 | |
787 SERD_API | |
788 SerdStatus | |
789 serd_writer_end_anon(SerdWriter* writer, | |
790 const SerdNode* node) | |
791 { | |
792 if (writer->syntax == SERD_NTRIPLES || writer->syntax == SERD_NQUADS) { | |
793 return SERD_SUCCESS; | |
794 } | |
795 if (serd_stack_is_empty(&writer->anon_stack) || writer->indent == 0) { | |
796 w_err(writer, SERD_ERR_UNKNOWN, | |
797 "unexpected end of anonymous node\n"); | |
798 return SERD_ERR_UNKNOWN; | |
799 } | |
800 --writer->indent; | |
801 write_sep(writer, SEP_ANON_END); | |
802 free_context(writer); | |
803 writer->context = *anon_stack_top(writer); | |
804 serd_stack_pop(&writer->anon_stack, sizeof(WriteContext)); | |
805 const bool is_subject = serd_node_equals(node, &writer->context.subject); | |
806 if (is_subject) { | |
807 copy_node(&writer->context.subject, node); | |
808 writer->context.predicate.type = SERD_NOTHING; | |
809 } | |
810 return SERD_SUCCESS; | |
811 } | |
812 | |
813 SERD_API | |
814 SerdStatus | |
815 serd_writer_finish(SerdWriter* writer) | |
816 { | |
817 if (writer->context.subject.type) { | |
818 sink(" .\n", 3, writer); | |
819 } | |
820 if (writer->context.graph.type) { | |
821 sink("}\n", 2, writer); | |
822 } | |
823 serd_byte_sink_flush(&writer->byte_sink); | |
824 writer->indent = 0; | |
825 return free_context(writer); | |
826 } | |
827 | |
828 SERD_API | |
829 SerdWriter* | |
830 serd_writer_new(SerdSyntax syntax, | |
831 SerdStyle style, | |
832 SerdEnv* env, | |
833 const SerdURI* base_uri, | |
834 SerdSink ssink, | |
835 void* stream) | |
836 { | |
837 const WriteContext context = WRITE_CONTEXT_NULL; | |
838 SerdWriter* writer = (SerdWriter*)calloc(1, sizeof(SerdWriter)); | |
839 writer->syntax = syntax; | |
840 writer->style = style; | |
841 writer->env = env; | |
842 writer->root_node = SERD_NODE_NULL; | |
843 writer->root_uri = SERD_URI_NULL; | |
844 writer->base_uri = base_uri ? *base_uri : SERD_URI_NULL; | |
845 writer->anon_stack = serd_stack_new(sizeof(WriteContext)); | |
846 writer->context = context; | |
847 writer->list_subj = SERD_NODE_NULL; | |
848 writer->empty = true; | |
849 writer->byte_sink = serd_byte_sink_new( | |
850 ssink, stream, (style & SERD_STYLE_BULK) ? SERD_PAGE_SIZE : 1); | |
851 return writer; | |
852 } | |
853 | |
854 SERD_API | |
855 void | |
856 serd_writer_set_error_sink(SerdWriter* writer, | |
857 SerdErrorSink error_sink, | |
858 void* error_handle) | |
859 { | |
860 writer->error_sink = error_sink; | |
861 writer->error_handle = error_handle; | |
862 } | |
863 | |
864 SERD_API | |
865 void | |
866 serd_writer_chop_blank_prefix(SerdWriter* writer, | |
867 const uint8_t* prefix) | |
868 { | |
869 free(writer->bprefix); | |
870 writer->bprefix_len = 0; | |
871 writer->bprefix = NULL; | |
872 if (prefix) { | |
873 writer->bprefix_len = strlen((const char*)prefix); | |
874 writer->bprefix = (uint8_t*)malloc(writer->bprefix_len + 1); | |
875 memcpy(writer->bprefix, prefix, writer->bprefix_len + 1); | |
876 } | |
877 } | |
878 | |
879 SERD_API | |
880 SerdStatus | |
881 serd_writer_set_base_uri(SerdWriter* writer, | |
882 const SerdNode* uri) | |
883 { | |
884 if (!serd_env_set_base_uri(writer->env, uri)) { | |
885 serd_env_get_base_uri(writer->env, &writer->base_uri); | |
886 | |
887 if (writer->syntax == SERD_TURTLE || writer->syntax == SERD_TRIG) { | |
888 if (writer->context.graph.type || writer->context.subject.type) { | |
889 sink(" .\n\n", 4, writer); | |
890 reset_context(writer, true); | |
891 } | |
892 sink("@base <", 7, writer); | |
893 sink(uri->buf, uri->n_bytes, writer); | |
894 sink("> .\n", 4, writer); | |
895 } | |
896 writer->indent = 0; | |
897 return reset_context(writer, true); | |
898 } | |
899 return SERD_ERR_UNKNOWN; | |
900 } | |
901 | |
902 SERD_API | |
903 SerdStatus | |
904 serd_writer_set_root_uri(SerdWriter* writer, | |
905 const SerdNode* uri) | |
906 { | |
907 serd_node_free(&writer->root_node); | |
908 if (uri && uri->buf) { | |
909 writer->root_node = serd_node_copy(uri); | |
910 serd_uri_parse(uri->buf, &writer->root_uri); | |
911 } else { | |
912 writer->root_node = SERD_NODE_NULL; | |
913 writer->root_uri = SERD_URI_NULL; | |
914 } | |
915 return SERD_SUCCESS; | |
916 } | |
917 | |
918 SERD_API | |
919 SerdStatus | |
920 serd_writer_set_prefix(SerdWriter* writer, | |
921 const SerdNode* name, | |
922 const SerdNode* uri) | |
923 { | |
924 if (!serd_env_set_prefix(writer->env, name, uri)) { | |
925 if (writer->syntax == SERD_TURTLE || writer->syntax == SERD_TRIG) { | |
926 if (writer->context.graph.type || writer->context.subject.type) { | |
927 sink(" .\n\n", 4, writer); | |
928 reset_context(writer, true); | |
929 } | |
930 sink("@prefix ", 8, writer); | |
931 sink(name->buf, name->n_bytes, writer); | |
932 sink(": <", 3, writer); | |
933 write_uri(writer, uri->buf, uri->n_bytes); | |
934 sink("> .\n", 4, writer); | |
935 } | |
936 writer->indent = 0; | |
937 return reset_context(writer, true); | |
938 } | |
939 return SERD_ERR_UNKNOWN; | |
940 } | |
941 | |
942 SERD_API | |
943 void | |
944 serd_writer_free(SerdWriter* writer) | |
945 { | |
946 serd_writer_finish(writer); | |
947 serd_stack_free(&writer->anon_stack); | |
948 free(writer->bprefix); | |
949 serd_byte_sink_free(&writer->byte_sink); | |
950 serd_node_free(&writer->root_node); | |
951 free(writer); | |
952 } | |
953 | |
954 SERD_API | |
955 SerdEnv* | |
956 serd_writer_get_env(SerdWriter* writer) | |
957 { | |
958 return writer->env; | |
959 } | |
960 | |
961 SERD_API | |
962 size_t | |
963 serd_file_sink(const void* buf, size_t len, void* stream) | |
964 { | |
965 return fwrite(buf, 1, len, (FILE*)stream); | |
966 } | |
967 | |
968 SERD_API | |
969 size_t | |
970 serd_chunk_sink(const void* buf, size_t len, void* stream) | |
971 { | |
972 SerdChunk* chunk = (SerdChunk*)stream; | |
973 chunk->buf = (uint8_t*)realloc((uint8_t*)chunk->buf, chunk->len + len); | |
974 memcpy((uint8_t*)chunk->buf + chunk->len, buf, len); | |
975 chunk->len += len; | |
976 return len; | |
977 } | |
978 | |
979 SERD_API | |
980 uint8_t* | |
981 serd_chunk_sink_finish(SerdChunk* stream) | |
982 { | |
983 serd_chunk_sink("", 1, stream); | |
984 return (uint8_t*)stream->buf; | |
985 } |