comparison ext/serd/src/writer.c @ 226:c5cdc9e6a4bf

Add these external library files
author Chris Cannam <cannam@all-day-breakfast.com>
date Fri, 09 Jun 2017 16:41:31 +0100
parents
children
comparison
equal deleted inserted replaced
225:025b3e2f7c17 226:c5cdc9e6a4bf
1 /*
2 Copyright 2011-2017 David Robillard <http://drobilla.net>
3
4 Permission to use, copy, modify, and/or distribute this software for any
5 purpose with or without fee is hereby granted, provided that the above
6 copyright notice and this permission notice appear in all copies.
7
8 THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
16
17 #include "serd_internal.h"
18
19 #include <assert.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <string.h>
23
24 #define NS_RDF "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
25 #define NS_XSD "http://www.w3.org/2001/XMLSchema#"
26
27 typedef struct {
28 SerdNode graph;
29 SerdNode subject;
30 SerdNode predicate;
31 } WriteContext;
32
33 static const WriteContext WRITE_CONTEXT_NULL = {
34 { 0, 0, 0, 0, SERD_NOTHING },
35 { 0, 0, 0, 0, SERD_NOTHING },
36 { 0, 0, 0, 0, SERD_NOTHING }
37 };
38
39 typedef enum {
40 SEP_NONE,
41 SEP_END_S, ///< End of a subject ('.')
42 SEP_END_P, ///< End of a predicate (';')
43 SEP_END_O, ///< End of an object (',')
44 SEP_S_P, ///< Between a subject and predicate (whitespace)
45 SEP_P_O, ///< Between a predicate and object (whitespace)
46 SEP_ANON_BEGIN, ///< Start of anonymous node ('[')
47 SEP_ANON_END, ///< End of anonymous node (']')
48 SEP_LIST_BEGIN, ///< Start of list ('(')
49 SEP_LIST_SEP, ///< List separator (whitespace)
50 SEP_LIST_END, ///< End of list (')')
51 SEP_GRAPH_BEGIN, ///< Start of graph ('{')
52 SEP_GRAPH_END ///< End of graph ('}')
53 } Sep;
54
55 typedef struct {
56 const char* str; ///< Sep string
57 uint8_t len; ///< Length of sep string
58 uint8_t space_before; ///< Newline before sep
59 uint8_t space_after_node; ///< Newline after sep if after node
60 uint8_t space_after_sep; ///< Newline after sep if after sep
61 } SepRule;
62
63 static const SepRule rules[] = {
64 { NULL, 0, 0, 0, 0 },
65 { " .\n\n", 4, 0, 0, 0 },
66 { " ;", 2, 0, 1, 1 },
67 { " ,", 2, 0, 1, 0 },
68 { NULL, 0, 0, 1, 0 },
69 { " ", 1, 0, 0, 0 },
70 { "[", 1, 0, 1, 1 },
71 { "]", 1, 1, 0, 0 },
72 { "(", 1, 0, 0, 0 },
73 { NULL, 1, 0, 1, 0 },
74 { ")", 1, 1, 0, 0 },
75 { " {", 2, 0, 1, 1 },
76 { " }", 2, 0, 1, 1 },
77 { "\n", 1, 0, 1, 0 }
78 };
79
80 struct SerdWriterImpl {
81 SerdSyntax syntax;
82 SerdStyle style;
83 SerdEnv* env;
84 SerdNode root_node;
85 SerdURI root_uri;
86 SerdURI base_uri;
87 SerdStack anon_stack;
88 SerdByteSink byte_sink;
89 SerdErrorSink error_sink;
90 void* error_handle;
91 WriteContext context;
92 SerdNode list_subj;
93 unsigned list_depth;
94 unsigned indent;
95 uint8_t* bprefix;
96 size_t bprefix_len;
97 Sep last_sep;
98 bool empty;
99 };
100
101 typedef enum {
102 WRITE_STRING,
103 WRITE_LONG_STRING
104 } TextContext;
105
106 static bool
107 supports_abbrev(const SerdWriter* writer)
108 {
109 return writer->syntax == SERD_TURTLE || writer->syntax == SERD_TRIG;
110 }
111
112 static void
113 w_err(SerdWriter* writer, SerdStatus st, const char* fmt, ...)
114 {
115 /* TODO: This results in errors with no file information, which is not
116 helpful when re-serializing a file (particularly for "undefined
117 namespace prefix" errors. The statement sink API needs to be changed to
118 add a Cursor parameter so the source can notify the writer of the
119 statement origin for better error reporting. */
120
121 va_list args;
122 va_start(args, fmt);
123 const SerdError e = { st, NULL, 0, 0, fmt, &args };
124 serd_error(writer->error_sink, writer->error_handle, &e);
125 va_end(args);
126 }
127
128 static inline WriteContext*
129 anon_stack_top(SerdWriter* writer)
130 {
131 assert(!serd_stack_is_empty(&writer->anon_stack));
132 return (WriteContext*)(writer->anon_stack.buf
133 + writer->anon_stack.size - sizeof(WriteContext));
134 }
135
136 static void
137 copy_node(SerdNode* dst, const SerdNode* src)
138 {
139 if (src) {
140 dst->buf = (uint8_t*)realloc((char*)dst->buf, src->n_bytes + 1);
141 dst->n_bytes = src->n_bytes;
142 dst->n_chars = src->n_chars;
143 dst->flags = src->flags;
144 dst->type = src->type;
145 memcpy((char*)dst->buf, src->buf, src->n_bytes + 1);
146 } else {
147 dst->type = SERD_NOTHING;
148 }
149 }
150
151 static inline size_t
152 sink(const void* buf, size_t len, SerdWriter* writer)
153 {
154 return serd_byte_sink_write(buf, len, &writer->byte_sink);
155 }
156
157 // Parse a UTF-8 character, set *size to the length, and return the code point
158 static inline uint32_t
159 parse_utf8_char(SerdWriter* writer, const uint8_t* utf8, size_t* size)
160 {
161 uint32_t c = 0;
162 if ((utf8[0] & 0x80) == 0) { // Starts with `0'
163 *size = 1;
164 c = utf8[0];
165 } else if ((utf8[0] & 0xE0) == 0xC0) { // Starts with `110'
166 *size = 2;
167 c = utf8[0] & 0x1F;
168 } else if ((utf8[0] & 0xF0) == 0xE0) { // Starts with `1110'
169 *size = 3;
170 c = utf8[0] & 0x0F;
171 } else if ((utf8[0] & 0xF8) == 0xF0) { // Starts with `11110'
172 *size = 4;
173 c = utf8[0] & 0x07;
174 } else {
175 w_err(writer, SERD_ERR_BAD_ARG, "invalid UTF-8: %X\n", utf8[0]);
176 *size = 0;
177 return 0;
178 }
179
180 size_t i = 0;
181 uint8_t in = utf8[i++];
182
183 #define READ_BYTE() \
184 in = utf8[i++] & 0x3F; \
185 c = (c << 6) | in;
186
187 switch (*size) {
188 case 4: READ_BYTE();
189 case 3: READ_BYTE();
190 case 2: READ_BYTE();
191 }
192
193 return c;
194 }
195
196 // Write a single character, as an escape for single byte characters
197 // (Caller prints any single byte characters that don't need escaping)
198 static size_t
199 write_character(SerdWriter* writer, const uint8_t* utf8, size_t* size)
200 {
201 const uint8_t replacement_char[] = { 0xEF, 0xBF, 0xBD };
202 char escape[11] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
203 const uint8_t in = utf8[0];
204
205 uint32_t c = parse_utf8_char(writer, utf8, size);
206 switch (*size) {
207 case 0:
208 w_err(writer, SERD_ERR_BAD_ARG, "invalid UTF-8: %X\n", in);
209 return sink(replacement_char, sizeof(replacement_char), writer);
210 case 1:
211 snprintf(escape, sizeof(escape), "\\u%04X", in);
212 return sink(escape, 6, writer);
213 default:
214 break;
215 }
216
217 if (!(writer->style & SERD_STYLE_ASCII)) {
218 // Write UTF-8 character directly to UTF-8 output
219 return sink(utf8, *size, writer);
220 }
221
222 if (c <= 0xFFFF) {
223 snprintf(escape, sizeof(escape), "\\u%04X", c);
224 return sink(escape, 6, writer);
225 } else {
226 snprintf(escape, sizeof(escape), "\\U%08X", c);
227 return sink(escape, 10, writer);
228 }
229 }
230
231 static inline bool
232 uri_must_escape(const uint8_t c)
233 {
234 switch (c) {
235 case ' ': case '"': case '<': case '>': case '\\':
236 case '^': case '`': case '{': case '|': case '}':
237 return true;
238 default:
239 return !in_range(c, 0x20, 0x7E);
240 }
241 }
242
243 static size_t
244 write_uri(SerdWriter* writer, const uint8_t* utf8, size_t n_bytes)
245 {
246 size_t len = 0;
247 for (size_t i = 0; i < n_bytes;) {
248 size_t j = i; // Index of next character that must be escaped
249 for (; j < n_bytes; ++j) {
250 if (uri_must_escape(utf8[j])) {
251 break;
252 }
253 }
254
255 // Bulk write all characters up to this special one
256 len += sink(&utf8[i], j - i, writer);
257 if ((i = j) == n_bytes) {
258 break; // Reached end
259 }
260
261 // Write UTF-8 character
262 size_t size = 0;
263 len += write_character(writer, utf8 + i, &size);
264 i += size;
265 }
266 return len;
267 }
268
269 static bool
270 lname_must_escape(const uint8_t c)
271 {
272 /* This arbitrary list of characters, most of which have nothing to do with
273 Turtle, must be handled as special cases here because the RDF and SPARQL
274 WGs are apparently intent on making the once elegant Turtle a baroque
275 and inconsistent mess, throwing elegance and extensibility completely
276 out the window for no good reason.
277
278 Note '-', '.', and '_' are also in PN_LOCAL_ESC, but are valid unescaped
279 in local names, so they are not escaped here. */
280
281 switch (c) {
282 case '\'': case '!': case '#': case '$': case '%': case '&':
283 case '(': case ')': case '*': case '+': case ',': case '/':
284 case ';': case '=': case '?': case '@': case '~':
285 return true;
286 }
287 return false;
288 }
289
290 static size_t
291 write_lname(SerdWriter* writer, const uint8_t* utf8, size_t n_bytes)
292 {
293 size_t len = 0;
294 for (size_t i = 0; i < n_bytes; ++i) {
295 size_t j = i; // Index of next character that must be escaped
296 for (; j < n_bytes; ++j) {
297 if (lname_must_escape(utf8[j])) {
298 break;
299 }
300 }
301
302 // Bulk write all characters up to this special one
303 len += sink(&utf8[i], j - i, writer);
304 if ((i = j) == n_bytes) {
305 break; // Reached end
306 }
307
308 // Write escape
309 len += sink("\\", 1, writer);
310 len += sink(&utf8[i], 1, writer);
311 }
312 return len;
313 }
314
315 static size_t
316 write_text(SerdWriter* writer, TextContext ctx,
317 const uint8_t* utf8, size_t n_bytes)
318 {
319 size_t len = 0;
320 for (size_t i = 0; i < n_bytes;) {
321 // Fast bulk write for long strings of printable ASCII
322 size_t j = i;
323 for (; j < n_bytes; ++j) {
324 if (utf8[j] == '\\' || utf8[j] == '"'
325 || (!in_range(utf8[j], 0x20, 0x7E))) {
326 break;
327 }
328 }
329
330 len += sink(&utf8[i], j - i, writer);
331 if ((i = j) == n_bytes) {
332 break; // Reached end
333 }
334
335 uint8_t in = utf8[i++];
336 if (ctx == WRITE_LONG_STRING) {
337 switch (in) {
338 case '\\': len += sink("\\\\", 2, writer); continue;
339 case '\b': len += sink("\\b", 2, writer); continue;
340 case '\n': case '\r': case '\t': case '\f':
341 len += sink(&in, 1, writer); // Write character as-is
342 continue;
343 case '\"':
344 if (i == n_bytes) { // '"' at string end
345 len += sink("\\\"", 2, writer);
346 } else {
347 len += sink(&in, 1, writer);
348 }
349 continue;
350 default: break;
351 }
352 } else if (ctx == WRITE_STRING) {
353 switch (in) {
354 case '\\': len += sink("\\\\", 2, writer); continue;
355 case '\n': len += sink("\\n", 2, writer); continue;
356 case '\r': len += sink("\\r", 2, writer); continue;
357 case '\t': len += sink("\\t", 2, writer); continue;
358 case '"': len += sink("\\\"", 2, writer); continue;
359 default: break;
360 }
361 if (writer->syntax == SERD_TURTLE) {
362 switch (in) {
363 case '\b': len += sink("\\b", 2, writer); continue;
364 case '\f': len += sink("\\f", 2, writer); continue;
365 }
366 }
367 }
368
369 size_t size = 0;
370 len += write_character(writer, utf8 + i - 1, &size);
371
372 if (size == 0) {
373 return len;
374 }
375
376 i += size - 1;
377 }
378 return len;
379 }
380
381 static size_t
382 uri_sink(const void* buf, size_t len, void* stream)
383 {
384 return write_uri((SerdWriter*)stream, (const uint8_t*)buf, len);
385 }
386
387 static void
388 write_newline(SerdWriter* writer)
389 {
390 sink("\n", 1, writer);
391 for (unsigned i = 0; i < writer->indent; ++i) {
392 sink("\t", 1, writer);
393 }
394 }
395
396 static void
397 write_sep(SerdWriter* writer, const Sep sep)
398 {
399 const SepRule* rule = &rules[sep];
400 if (rule->space_before) {
401 write_newline(writer);
402 }
403 if (rule->str) {
404 sink(rule->str, rule->len, writer);
405 }
406 if ( (writer->last_sep && rule->space_after_sep)
407 || (!writer->last_sep && rule->space_after_node)) {
408 write_newline(writer);
409 } else if (writer->last_sep && rule->space_after_node) {
410 sink(" ", 1, writer);
411 }
412 writer->last_sep = sep;
413 }
414
415 static SerdStatus
416 reset_context(SerdWriter* writer, bool graph)
417 {
418 if (graph) {
419 writer->context.graph.type = SERD_NOTHING;
420 }
421 writer->context.subject.type = SERD_NOTHING;
422 writer->context.predicate.type = SERD_NOTHING;
423 writer->empty = false;
424 return SERD_SUCCESS;
425 }
426
427 static SerdStatus
428 free_context(SerdWriter* writer)
429 {
430 serd_node_free(&writer->context.graph);
431 serd_node_free(&writer->context.subject);
432 serd_node_free(&writer->context.predicate);
433 return reset_context(writer, true);
434 }
435
436 typedef enum {
437 FIELD_NONE,
438 FIELD_SUBJECT,
439 FIELD_PREDICATE,
440 FIELD_OBJECT,
441 FIELD_GRAPH
442 } Field;
443
444 static bool
445 is_inline_start(const SerdWriter* writer, Field field, SerdStatementFlags flags)
446 {
447 return (supports_abbrev(writer) &&
448 ((field == FIELD_SUBJECT && (flags & SERD_ANON_S_BEGIN)) ||
449 (field == FIELD_OBJECT && (flags & SERD_ANON_O_BEGIN))));
450 }
451
452 static bool
453 write_node(SerdWriter* writer,
454 const SerdNode* node,
455 const SerdNode* datatype,
456 const SerdNode* lang,
457 Field field,
458 SerdStatementFlags flags)
459 {
460 SerdChunk uri_prefix;
461 SerdNode prefix;
462 SerdChunk suffix;
463 bool has_scheme;
464 switch (node->type) {
465 case SERD_BLANK:
466 if (is_inline_start(writer, field, flags)) {
467 ++writer->indent;
468 write_sep(writer, SEP_ANON_BEGIN);
469 } else if (supports_abbrev(writer)
470 && (field == FIELD_SUBJECT && (flags & SERD_LIST_S_BEGIN))) {
471 assert(writer->list_depth == 0);
472 copy_node(&writer->list_subj, node);
473 ++writer->list_depth;
474 ++writer->indent;
475 write_sep(writer, SEP_LIST_BEGIN);
476 } else if (supports_abbrev(writer)
477 && (field == FIELD_OBJECT && (flags & SERD_LIST_O_BEGIN))) {
478 ++writer->indent;
479 ++writer->list_depth;
480 write_sep(writer, SEP_LIST_BEGIN);
481 } else if (supports_abbrev(writer)
482 && ((field == FIELD_SUBJECT && (flags & SERD_EMPTY_S))
483 || (field == FIELD_OBJECT && (flags & SERD_EMPTY_O)))) {
484 sink("[]", 2, writer);
485 } else {
486 sink("_:", 2, writer);
487 if (writer->bprefix && !strncmp((const char*)node->buf,
488 (const char*)writer->bprefix,
489 writer->bprefix_len)) {
490 sink(node->buf + writer->bprefix_len,
491 node->n_bytes - writer->bprefix_len,
492 writer);
493 } else {
494 sink(node->buf, node->n_bytes, writer);
495 }
496 }
497 break;
498 case SERD_CURIE:
499 switch (writer->syntax) {
500 case SERD_NTRIPLES:
501 case SERD_NQUADS:
502 if (serd_env_expand(writer->env, node, &uri_prefix, &suffix)) {
503 w_err(writer, SERD_ERR_BAD_CURIE,
504 "undefined namespace prefix `%s'\n", node->buf);
505 return false;
506 }
507 sink("<", 1, writer);
508 write_uri(writer, uri_prefix.buf, uri_prefix.len);
509 write_uri(writer, suffix.buf, suffix.len);
510 sink(">", 1, writer);
511 break;
512 case SERD_TURTLE:
513 case SERD_TRIG:
514 if (is_inline_start(writer, field, flags)) {
515 ++writer->indent;
516 write_sep(writer, SEP_ANON_BEGIN);
517 sink("== ", 3, writer);
518 }
519 write_lname(writer, node->buf, node->n_bytes);
520 if (is_inline_start(writer, field, flags)) {
521 sink(" ;", 2, writer);
522 write_newline(writer);
523 }
524 }
525 break;
526 case SERD_LITERAL:
527 if (supports_abbrev(writer) && datatype && datatype->buf) {
528 const char* type_uri = (const char*)datatype->buf;
529 if (!strncmp(type_uri, NS_XSD, sizeof(NS_XSD) - 1) && (
530 !strcmp(type_uri + sizeof(NS_XSD) - 1, "boolean") ||
531 !strcmp(type_uri + sizeof(NS_XSD) - 1, "integer"))) {
532 sink(node->buf, node->n_bytes, writer);
533 break;
534 } else if (!strncmp(type_uri, NS_XSD, sizeof(NS_XSD) - 1) &&
535 !strcmp(type_uri + sizeof(NS_XSD) - 1, "decimal") &&
536 strchr((const char*)node->buf, '.') &&
537 node->buf[node->n_bytes - 1] != '.') {
538 /* xsd:decimal literals without trailing digits, e.g. "5.", can
539 not be written bare in Turtle. We could add a 0 which is
540 prettier, but changes the text and breaks round tripping.
541 */
542 sink(node->buf, node->n_bytes, writer);
543 break;
544 }
545 }
546 if (supports_abbrev(writer)
547 && (node->flags & (SERD_HAS_NEWLINE|SERD_HAS_QUOTE))) {
548 sink("\"\"\"", 3, writer);
549 write_text(writer, WRITE_LONG_STRING, node->buf, node->n_bytes);
550 sink("\"\"\"", 3, writer);
551 } else {
552 sink("\"", 1, writer);
553 write_text(writer, WRITE_STRING, node->buf, node->n_bytes);
554 sink("\"", 1, writer);
555 }
556 if (lang && lang->buf) {
557 sink("@", 1, writer);
558 sink(lang->buf, lang->n_bytes, writer);
559 } else if (datatype && datatype->buf) {
560 sink("^^", 2, writer);
561 write_node(writer, datatype, NULL, NULL, FIELD_NONE, flags);
562 }
563 break;
564 case SERD_URI:
565 if (is_inline_start(writer, field, flags)) {
566 ++writer->indent;
567 write_sep(writer, SEP_ANON_BEGIN);
568 sink("== ", 3, writer);
569 }
570 has_scheme = serd_uri_string_has_scheme(node->buf);
571 if (field == FIELD_PREDICATE && supports_abbrev(writer)
572 && !strcmp((const char*)node->buf, NS_RDF "type")) {
573 sink("a", 1, writer);
574 break;
575 } else if (supports_abbrev(writer)
576 && !strcmp((const char*)node->buf, NS_RDF "nil")) {
577 sink("()", 2, writer);
578 break;
579 } else if (has_scheme && (writer->style & SERD_STYLE_CURIED) &&
580 serd_env_qualify(writer->env, node, &prefix, &suffix)) {
581 write_uri(writer, prefix.buf, prefix.n_bytes);
582 sink(":", 1, writer);
583 write_uri(writer, suffix.buf, suffix.len);
584 break;
585 }
586 sink("<", 1, writer);
587 if (writer->style & SERD_STYLE_RESOLVED) {
588 SerdURI in_base_uri, uri, abs_uri;
589 serd_env_get_base_uri(writer->env, &in_base_uri);
590 serd_uri_parse(node->buf, &uri);
591 serd_uri_resolve(&uri, &in_base_uri, &abs_uri);
592 bool rooted = uri_is_under(&writer->base_uri, &writer->root_uri);
593 SerdURI* root = rooted ? &writer->root_uri : & writer->base_uri;
594 if (!uri_is_under(&abs_uri, root) ||
595 writer->syntax == SERD_NTRIPLES ||
596 writer->syntax == SERD_NQUADS) {
597 serd_uri_serialise(&abs_uri, uri_sink, writer);
598 } else {
599 serd_uri_serialise_relative(
600 &uri, &writer->base_uri, root, uri_sink, writer);
601 }
602 } else {
603 write_uri(writer, node->buf, node->n_bytes);
604 }
605 sink(">", 1, writer);
606 if (is_inline_start(writer, field, flags)) {
607 sink(" ;", 2, writer);
608 write_newline(writer);
609 }
610 default:
611 break;
612 }
613 writer->last_sep = SEP_NONE;
614 return true;
615 }
616
617 static inline bool
618 is_resource(const SerdNode* node)
619 {
620 return node->type > SERD_LITERAL;
621 }
622
623 static void
624 write_pred(SerdWriter* writer, SerdStatementFlags flags, const SerdNode* pred)
625 {
626 write_node(writer, pred, NULL, NULL, FIELD_PREDICATE, flags);
627 write_sep(writer, SEP_P_O);
628 copy_node(&writer->context.predicate, pred);
629 }
630
631 static bool
632 write_list_obj(SerdWriter* writer,
633 SerdStatementFlags flags,
634 const SerdNode* predicate,
635 const SerdNode* object,
636 const SerdNode* datatype,
637 const SerdNode* lang)
638 {
639 if (!strcmp((const char*)object->buf, NS_RDF "nil")) {
640 --writer->indent;
641 write_sep(writer, SEP_LIST_END);
642 return true;
643 } else if (!strcmp((const char*)predicate->buf, NS_RDF "first")) {
644 write_sep(writer, SEP_LIST_SEP);
645 write_node(writer, object, datatype, lang, FIELD_OBJECT, flags);
646 }
647 return false;
648 }
649
650 SERD_API
651 SerdStatus
652 serd_writer_write_statement(SerdWriter* writer,
653 SerdStatementFlags flags,
654 const SerdNode* graph,
655 const SerdNode* subject,
656 const SerdNode* predicate,
657 const SerdNode* object,
658 const SerdNode* datatype,
659 const SerdNode* lang)
660 {
661 if (!subject || !predicate || !object
662 || !subject->buf || !predicate->buf || !object->buf
663 || !is_resource(subject) || !is_resource(predicate)) {
664 return SERD_ERR_BAD_ARG;
665 }
666
667 #define TRY(write_result) \
668 if (!write_result) { \
669 return SERD_ERR_UNKNOWN; \
670 }
671
672 switch (writer->syntax) {
673 case SERD_NTRIPLES:
674 case SERD_NQUADS:
675 TRY(write_node(writer, subject, NULL, NULL, FIELD_SUBJECT, flags));
676 sink(" ", 1, writer);
677 TRY(write_node(writer, predicate, NULL, NULL, FIELD_PREDICATE, flags));
678 sink(" ", 1, writer);
679 TRY(write_node(writer, object, datatype, lang, FIELD_OBJECT, flags));
680 if (writer->syntax == SERD_NQUADS && graph) {
681 sink(" ", 1, writer);
682 TRY(write_node(writer, graph, datatype, lang, FIELD_GRAPH, flags));
683 }
684 sink(" .\n", 3, writer);
685 return SERD_SUCCESS;
686 default:
687 break;
688 }
689
690 if ((graph && !serd_node_equals(graph, &writer->context.graph)) ||
691 (!graph && writer->context.graph.type)) {
692 writer->indent = 0;
693 if (writer->context.subject.type) {
694 write_sep(writer, SEP_END_S);
695 }
696 if (writer->context.graph.type) {
697 write_sep(writer, SEP_GRAPH_END);
698 }
699
700 reset_context(writer, true);
701 if (graph) {
702 TRY(write_node(writer, graph, datatype, lang, FIELD_GRAPH, flags));
703 ++writer->indent;
704 write_sep(writer, SEP_GRAPH_BEGIN);
705 copy_node(&writer->context.graph, graph);
706 }
707 }
708
709 if ((flags & SERD_LIST_CONT)) {
710 if (write_list_obj(writer, flags, predicate, object, datatype, lang)) {
711 // Reached end of list
712 if (--writer->list_depth == 0 && writer->list_subj.type) {
713 reset_context(writer, false);
714 serd_node_free(&writer->context.subject);
715 writer->context.subject = writer->list_subj;
716 writer->list_subj = SERD_NODE_NULL;
717 }
718 return SERD_SUCCESS;
719 }
720 } else if (serd_node_equals(subject, &writer->context.subject)) {
721 if (serd_node_equals(predicate, &writer->context.predicate)) {
722 // Abbreviate S P
723 if (!(flags & SERD_ANON_O_BEGIN)) {
724 ++writer->indent;
725 }
726 write_sep(writer, SEP_END_O);
727 write_node(writer, object, datatype, lang, FIELD_OBJECT, flags);
728 if (!(flags & SERD_ANON_O_BEGIN)) {
729 --writer->indent;
730 }
731 } else {
732 // Abbreviate S
733 Sep sep = writer->context.predicate.type ? SEP_END_P : SEP_S_P;
734 write_sep(writer, sep);
735 write_pred(writer, flags, predicate);
736 write_node(writer, object, datatype, lang, FIELD_OBJECT, flags);
737 }
738 } else {
739 // No abbreviation
740 if (writer->context.subject.type) {
741 assert(writer->indent > 0);
742 --writer->indent;
743 if (serd_stack_is_empty(&writer->anon_stack)) {
744 write_sep(writer, SEP_END_S);
745 }
746 } else if (!writer->empty) {
747 write_sep(writer, SEP_S_P);
748 }
749
750 if (!(flags & SERD_ANON_CONT)) {
751 write_node(writer, subject, NULL, NULL, FIELD_SUBJECT, flags);
752 ++writer->indent;
753 write_sep(writer, SEP_S_P);
754 } else {
755 ++writer->indent;
756 }
757
758 reset_context(writer, false);
759 copy_node(&writer->context.subject, subject);
760
761 if (!(flags & SERD_LIST_S_BEGIN)) {
762 write_pred(writer, flags, predicate);
763 }
764
765 write_node(writer, object, datatype, lang, FIELD_OBJECT, flags);
766 }
767
768 if (flags & (SERD_ANON_S_BEGIN|SERD_ANON_O_BEGIN)) {
769 WriteContext* ctx = (WriteContext*)serd_stack_push(
770 &writer->anon_stack, sizeof(WriteContext));
771 *ctx = writer->context;
772 WriteContext new_context = {
773 serd_node_copy(graph), serd_node_copy(subject), SERD_NODE_NULL };
774 if ((flags & SERD_ANON_S_BEGIN)) {
775 new_context.predicate = serd_node_copy(predicate);
776 }
777 writer->context = new_context;
778 } else {
779 copy_node(&writer->context.graph, graph);
780 copy_node(&writer->context.subject, subject);
781 copy_node(&writer->context.predicate, predicate);
782 }
783
784 return SERD_SUCCESS;
785 }
786
787 SERD_API
788 SerdStatus
789 serd_writer_end_anon(SerdWriter* writer,
790 const SerdNode* node)
791 {
792 if (writer->syntax == SERD_NTRIPLES || writer->syntax == SERD_NQUADS) {
793 return SERD_SUCCESS;
794 }
795 if (serd_stack_is_empty(&writer->anon_stack) || writer->indent == 0) {
796 w_err(writer, SERD_ERR_UNKNOWN,
797 "unexpected end of anonymous node\n");
798 return SERD_ERR_UNKNOWN;
799 }
800 --writer->indent;
801 write_sep(writer, SEP_ANON_END);
802 free_context(writer);
803 writer->context = *anon_stack_top(writer);
804 serd_stack_pop(&writer->anon_stack, sizeof(WriteContext));
805 const bool is_subject = serd_node_equals(node, &writer->context.subject);
806 if (is_subject) {
807 copy_node(&writer->context.subject, node);
808 writer->context.predicate.type = SERD_NOTHING;
809 }
810 return SERD_SUCCESS;
811 }
812
813 SERD_API
814 SerdStatus
815 serd_writer_finish(SerdWriter* writer)
816 {
817 if (writer->context.subject.type) {
818 sink(" .\n", 3, writer);
819 }
820 if (writer->context.graph.type) {
821 sink("}\n", 2, writer);
822 }
823 serd_byte_sink_flush(&writer->byte_sink);
824 writer->indent = 0;
825 return free_context(writer);
826 }
827
828 SERD_API
829 SerdWriter*
830 serd_writer_new(SerdSyntax syntax,
831 SerdStyle style,
832 SerdEnv* env,
833 const SerdURI* base_uri,
834 SerdSink ssink,
835 void* stream)
836 {
837 const WriteContext context = WRITE_CONTEXT_NULL;
838 SerdWriter* writer = (SerdWriter*)calloc(1, sizeof(SerdWriter));
839 writer->syntax = syntax;
840 writer->style = style;
841 writer->env = env;
842 writer->root_node = SERD_NODE_NULL;
843 writer->root_uri = SERD_URI_NULL;
844 writer->base_uri = base_uri ? *base_uri : SERD_URI_NULL;
845 writer->anon_stack = serd_stack_new(sizeof(WriteContext));
846 writer->context = context;
847 writer->list_subj = SERD_NODE_NULL;
848 writer->empty = true;
849 writer->byte_sink = serd_byte_sink_new(
850 ssink, stream, (style & SERD_STYLE_BULK) ? SERD_PAGE_SIZE : 1);
851 return writer;
852 }
853
854 SERD_API
855 void
856 serd_writer_set_error_sink(SerdWriter* writer,
857 SerdErrorSink error_sink,
858 void* error_handle)
859 {
860 writer->error_sink = error_sink;
861 writer->error_handle = error_handle;
862 }
863
864 SERD_API
865 void
866 serd_writer_chop_blank_prefix(SerdWriter* writer,
867 const uint8_t* prefix)
868 {
869 free(writer->bprefix);
870 writer->bprefix_len = 0;
871 writer->bprefix = NULL;
872 if (prefix) {
873 writer->bprefix_len = strlen((const char*)prefix);
874 writer->bprefix = (uint8_t*)malloc(writer->bprefix_len + 1);
875 memcpy(writer->bprefix, prefix, writer->bprefix_len + 1);
876 }
877 }
878
879 SERD_API
880 SerdStatus
881 serd_writer_set_base_uri(SerdWriter* writer,
882 const SerdNode* uri)
883 {
884 if (!serd_env_set_base_uri(writer->env, uri)) {
885 serd_env_get_base_uri(writer->env, &writer->base_uri);
886
887 if (writer->syntax == SERD_TURTLE || writer->syntax == SERD_TRIG) {
888 if (writer->context.graph.type || writer->context.subject.type) {
889 sink(" .\n\n", 4, writer);
890 reset_context(writer, true);
891 }
892 sink("@base <", 7, writer);
893 sink(uri->buf, uri->n_bytes, writer);
894 sink("> .\n", 4, writer);
895 }
896 writer->indent = 0;
897 return reset_context(writer, true);
898 }
899 return SERD_ERR_UNKNOWN;
900 }
901
902 SERD_API
903 SerdStatus
904 serd_writer_set_root_uri(SerdWriter* writer,
905 const SerdNode* uri)
906 {
907 serd_node_free(&writer->root_node);
908 if (uri && uri->buf) {
909 writer->root_node = serd_node_copy(uri);
910 serd_uri_parse(uri->buf, &writer->root_uri);
911 } else {
912 writer->root_node = SERD_NODE_NULL;
913 writer->root_uri = SERD_URI_NULL;
914 }
915 return SERD_SUCCESS;
916 }
917
918 SERD_API
919 SerdStatus
920 serd_writer_set_prefix(SerdWriter* writer,
921 const SerdNode* name,
922 const SerdNode* uri)
923 {
924 if (!serd_env_set_prefix(writer->env, name, uri)) {
925 if (writer->syntax == SERD_TURTLE || writer->syntax == SERD_TRIG) {
926 if (writer->context.graph.type || writer->context.subject.type) {
927 sink(" .\n\n", 4, writer);
928 reset_context(writer, true);
929 }
930 sink("@prefix ", 8, writer);
931 sink(name->buf, name->n_bytes, writer);
932 sink(": <", 3, writer);
933 write_uri(writer, uri->buf, uri->n_bytes);
934 sink("> .\n", 4, writer);
935 }
936 writer->indent = 0;
937 return reset_context(writer, true);
938 }
939 return SERD_ERR_UNKNOWN;
940 }
941
942 SERD_API
943 void
944 serd_writer_free(SerdWriter* writer)
945 {
946 serd_writer_finish(writer);
947 serd_stack_free(&writer->anon_stack);
948 free(writer->bprefix);
949 serd_byte_sink_free(&writer->byte_sink);
950 serd_node_free(&writer->root_node);
951 free(writer);
952 }
953
954 SERD_API
955 SerdEnv*
956 serd_writer_get_env(SerdWriter* writer)
957 {
958 return writer->env;
959 }
960
961 SERD_API
962 size_t
963 serd_file_sink(const void* buf, size_t len, void* stream)
964 {
965 return fwrite(buf, 1, len, (FILE*)stream);
966 }
967
968 SERD_API
969 size_t
970 serd_chunk_sink(const void* buf, size_t len, void* stream)
971 {
972 SerdChunk* chunk = (SerdChunk*)stream;
973 chunk->buf = (uint8_t*)realloc((uint8_t*)chunk->buf, chunk->len + len);
974 memcpy((uint8_t*)chunk->buf + chunk->len, buf, len);
975 chunk->len += len;
976 return len;
977 }
978
979 SERD_API
980 uint8_t*
981 serd_chunk_sink_finish(SerdChunk* stream)
982 {
983 serd_chunk_sink("", 1, stream);
984 return (uint8_t*)stream->buf;
985 }