cannam@226: /* cannam@226: Copyright 2011-2017 David Robillard cannam@226: cannam@226: Permission to use, copy, modify, and/or distribute this software for any cannam@226: purpose with or without fee is hereby granted, provided that the above cannam@226: copyright notice and this permission notice appear in all copies. cannam@226: cannam@226: THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES cannam@226: WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF cannam@226: MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR cannam@226: ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES cannam@226: WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN cannam@226: ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF cannam@226: OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. cannam@226: */ cannam@226: cannam@226: #include "serd_internal.h" cannam@226: cannam@226: #include cannam@226: #include cannam@226: #include cannam@226: #include cannam@226: cannam@226: #define NS_RDF "http://www.w3.org/1999/02/22-rdf-syntax-ns#" cannam@226: #define NS_XSD "http://www.w3.org/2001/XMLSchema#" cannam@226: cannam@226: typedef struct { cannam@226: SerdNode graph; cannam@226: SerdNode subject; cannam@226: SerdNode predicate; cannam@226: } WriteContext; cannam@226: cannam@226: static const WriteContext WRITE_CONTEXT_NULL = { cannam@226: { 0, 0, 0, 0, SERD_NOTHING }, cannam@226: { 0, 0, 0, 0, SERD_NOTHING }, cannam@226: { 0, 0, 0, 0, SERD_NOTHING } cannam@226: }; cannam@226: cannam@226: typedef enum { cannam@226: SEP_NONE, cannam@226: SEP_END_S, ///< End of a subject ('.') cannam@226: SEP_END_P, ///< End of a predicate (';') cannam@226: SEP_END_O, ///< End of an object (',') cannam@226: SEP_S_P, ///< Between a subject and predicate (whitespace) cannam@226: SEP_P_O, ///< Between a predicate and object (whitespace) cannam@226: SEP_ANON_BEGIN, ///< Start of anonymous node ('[') cannam@226: SEP_ANON_END, ///< End of anonymous node (']') cannam@226: SEP_LIST_BEGIN, ///< Start of list ('(') cannam@226: SEP_LIST_SEP, ///< List separator (whitespace) cannam@226: SEP_LIST_END, ///< End of list (')') cannam@226: SEP_GRAPH_BEGIN, ///< Start of graph ('{') cannam@226: SEP_GRAPH_END ///< End of graph ('}') cannam@226: } Sep; cannam@226: cannam@226: typedef struct { cannam@226: const char* str; ///< Sep string cannam@226: uint8_t len; ///< Length of sep string cannam@226: uint8_t space_before; ///< Newline before sep cannam@226: uint8_t space_after_node; ///< Newline after sep if after node cannam@226: uint8_t space_after_sep; ///< Newline after sep if after sep cannam@226: } SepRule; cannam@226: cannam@226: static const SepRule rules[] = { cannam@226: { NULL, 0, 0, 0, 0 }, cannam@226: { " .\n\n", 4, 0, 0, 0 }, cannam@226: { " ;", 2, 0, 1, 1 }, cannam@226: { " ,", 2, 0, 1, 0 }, cannam@226: { NULL, 0, 0, 1, 0 }, cannam@226: { " ", 1, 0, 0, 0 }, cannam@226: { "[", 1, 0, 1, 1 }, cannam@226: { "]", 1, 1, 0, 0 }, cannam@226: { "(", 1, 0, 0, 0 }, cannam@226: { NULL, 1, 0, 1, 0 }, cannam@226: { ")", 1, 1, 0, 0 }, cannam@226: { " {", 2, 0, 1, 1 }, cannam@226: { " }", 2, 0, 1, 1 }, cannam@226: { "\n", 1, 0, 1, 0 } cannam@226: }; cannam@226: cannam@226: struct SerdWriterImpl { cannam@226: SerdSyntax syntax; cannam@226: SerdStyle style; cannam@226: SerdEnv* env; cannam@226: SerdNode root_node; cannam@226: SerdURI root_uri; cannam@226: SerdURI base_uri; cannam@226: SerdStack anon_stack; cannam@226: SerdByteSink byte_sink; cannam@226: SerdErrorSink error_sink; cannam@226: void* error_handle; cannam@226: WriteContext context; cannam@226: SerdNode list_subj; cannam@226: unsigned list_depth; cannam@226: unsigned indent; cannam@226: uint8_t* bprefix; cannam@226: size_t bprefix_len; cannam@226: Sep last_sep; cannam@226: bool empty; cannam@226: }; cannam@226: cannam@226: typedef enum { cannam@226: WRITE_STRING, cannam@226: WRITE_LONG_STRING cannam@226: } TextContext; cannam@226: cannam@226: static bool cannam@226: supports_abbrev(const SerdWriter* writer) cannam@226: { cannam@226: return writer->syntax == SERD_TURTLE || writer->syntax == SERD_TRIG; cannam@226: } cannam@226: cannam@226: static void cannam@226: w_err(SerdWriter* writer, SerdStatus st, const char* fmt, ...) cannam@226: { cannam@226: /* TODO: This results in errors with no file information, which is not cannam@226: helpful when re-serializing a file (particularly for "undefined cannam@226: namespace prefix" errors. The statement sink API needs to be changed to cannam@226: add a Cursor parameter so the source can notify the writer of the cannam@226: statement origin for better error reporting. */ cannam@226: cannam@226: va_list args; cannam@226: va_start(args, fmt); cannam@226: const SerdError e = { st, NULL, 0, 0, fmt, &args }; cannam@226: serd_error(writer->error_sink, writer->error_handle, &e); cannam@226: va_end(args); cannam@226: } cannam@226: cannam@226: static inline WriteContext* cannam@226: anon_stack_top(SerdWriter* writer) cannam@226: { cannam@226: assert(!serd_stack_is_empty(&writer->anon_stack)); cannam@226: return (WriteContext*)(writer->anon_stack.buf cannam@226: + writer->anon_stack.size - sizeof(WriteContext)); cannam@226: } cannam@226: cannam@226: static void cannam@226: copy_node(SerdNode* dst, const SerdNode* src) cannam@226: { cannam@226: if (src) { cannam@226: dst->buf = (uint8_t*)realloc((char*)dst->buf, src->n_bytes + 1); cannam@226: dst->n_bytes = src->n_bytes; cannam@226: dst->n_chars = src->n_chars; cannam@226: dst->flags = src->flags; cannam@226: dst->type = src->type; cannam@226: memcpy((char*)dst->buf, src->buf, src->n_bytes + 1); cannam@226: } else { cannam@226: dst->type = SERD_NOTHING; cannam@226: } cannam@226: } cannam@226: cannam@226: static inline size_t cannam@226: sink(const void* buf, size_t len, SerdWriter* writer) cannam@226: { cannam@226: return serd_byte_sink_write(buf, len, &writer->byte_sink); cannam@226: } cannam@226: cannam@226: // Parse a UTF-8 character, set *size to the length, and return the code point cannam@226: static inline uint32_t cannam@226: parse_utf8_char(SerdWriter* writer, const uint8_t* utf8, size_t* size) cannam@226: { cannam@226: uint32_t c = 0; cannam@226: if ((utf8[0] & 0x80) == 0) { // Starts with `0' cannam@226: *size = 1; cannam@226: c = utf8[0]; cannam@226: } else if ((utf8[0] & 0xE0) == 0xC0) { // Starts with `110' cannam@226: *size = 2; cannam@226: c = utf8[0] & 0x1F; cannam@226: } else if ((utf8[0] & 0xF0) == 0xE0) { // Starts with `1110' cannam@226: *size = 3; cannam@226: c = utf8[0] & 0x0F; cannam@226: } else if ((utf8[0] & 0xF8) == 0xF0) { // Starts with `11110' cannam@226: *size = 4; cannam@226: c = utf8[0] & 0x07; cannam@226: } else { cannam@226: w_err(writer, SERD_ERR_BAD_ARG, "invalid UTF-8: %X\n", utf8[0]); cannam@226: *size = 0; cannam@226: return 0; cannam@226: } cannam@226: cannam@226: size_t i = 0; cannam@226: uint8_t in = utf8[i++]; cannam@226: cannam@226: #define READ_BYTE() \ cannam@226: in = utf8[i++] & 0x3F; \ cannam@226: c = (c << 6) | in; cannam@226: cannam@226: switch (*size) { cannam@226: case 4: READ_BYTE(); cannam@226: case 3: READ_BYTE(); cannam@226: case 2: READ_BYTE(); cannam@226: } cannam@226: cannam@226: return c; cannam@226: } cannam@226: cannam@226: // Write a single character, as an escape for single byte characters cannam@226: // (Caller prints any single byte characters that don't need escaping) cannam@226: static size_t cannam@226: write_character(SerdWriter* writer, const uint8_t* utf8, size_t* size) cannam@226: { cannam@226: const uint8_t replacement_char[] = { 0xEF, 0xBF, 0xBD }; cannam@226: char escape[11] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; cannam@226: const uint8_t in = utf8[0]; cannam@226: cannam@226: uint32_t c = parse_utf8_char(writer, utf8, size); cannam@226: switch (*size) { cannam@226: case 0: cannam@226: w_err(writer, SERD_ERR_BAD_ARG, "invalid UTF-8: %X\n", in); cannam@226: return sink(replacement_char, sizeof(replacement_char), writer); cannam@226: case 1: cannam@226: snprintf(escape, sizeof(escape), "\\u%04X", in); cannam@226: return sink(escape, 6, writer); cannam@226: default: cannam@226: break; cannam@226: } cannam@226: cannam@226: if (!(writer->style & SERD_STYLE_ASCII)) { cannam@226: // Write UTF-8 character directly to UTF-8 output cannam@226: return sink(utf8, *size, writer); cannam@226: } cannam@226: cannam@226: if (c <= 0xFFFF) { cannam@226: snprintf(escape, sizeof(escape), "\\u%04X", c); cannam@226: return sink(escape, 6, writer); cannam@226: } else { cannam@226: snprintf(escape, sizeof(escape), "\\U%08X", c); cannam@226: return sink(escape, 10, writer); cannam@226: } cannam@226: } cannam@226: cannam@226: static inline bool cannam@226: uri_must_escape(const uint8_t c) cannam@226: { cannam@226: switch (c) { cannam@226: case ' ': case '"': case '<': case '>': case '\\': cannam@226: case '^': case '`': case '{': case '|': case '}': cannam@226: return true; cannam@226: default: cannam@226: return !in_range(c, 0x20, 0x7E); cannam@226: } cannam@226: } cannam@226: cannam@226: static size_t cannam@226: write_uri(SerdWriter* writer, const uint8_t* utf8, size_t n_bytes) cannam@226: { cannam@226: size_t len = 0; cannam@226: for (size_t i = 0; i < n_bytes;) { cannam@226: size_t j = i; // Index of next character that must be escaped cannam@226: for (; j < n_bytes; ++j) { cannam@226: if (uri_must_escape(utf8[j])) { cannam@226: break; cannam@226: } cannam@226: } cannam@226: cannam@226: // Bulk write all characters up to this special one cannam@226: len += sink(&utf8[i], j - i, writer); cannam@226: if ((i = j) == n_bytes) { cannam@226: break; // Reached end cannam@226: } cannam@226: cannam@226: // Write UTF-8 character cannam@226: size_t size = 0; cannam@226: len += write_character(writer, utf8 + i, &size); cannam@226: i += size; cannam@226: } cannam@226: return len; cannam@226: } cannam@226: cannam@226: static bool cannam@226: lname_must_escape(const uint8_t c) cannam@226: { cannam@226: /* This arbitrary list of characters, most of which have nothing to do with cannam@226: Turtle, must be handled as special cases here because the RDF and SPARQL cannam@226: WGs are apparently intent on making the once elegant Turtle a baroque cannam@226: and inconsistent mess, throwing elegance and extensibility completely cannam@226: out the window for no good reason. cannam@226: cannam@226: Note '-', '.', and '_' are also in PN_LOCAL_ESC, but are valid unescaped cannam@226: in local names, so they are not escaped here. */ cannam@226: cannam@226: switch (c) { cannam@226: case '\'': case '!': case '#': case '$': case '%': case '&': cannam@226: case '(': case ')': case '*': case '+': case ',': case '/': cannam@226: case ';': case '=': case '?': case '@': case '~': cannam@226: return true; cannam@226: } cannam@226: return false; cannam@226: } cannam@226: cannam@226: static size_t cannam@226: write_lname(SerdWriter* writer, const uint8_t* utf8, size_t n_bytes) cannam@226: { cannam@226: size_t len = 0; cannam@226: for (size_t i = 0; i < n_bytes; ++i) { cannam@226: size_t j = i; // Index of next character that must be escaped cannam@226: for (; j < n_bytes; ++j) { cannam@226: if (lname_must_escape(utf8[j])) { cannam@226: break; cannam@226: } cannam@226: } cannam@226: cannam@226: // Bulk write all characters up to this special one cannam@226: len += sink(&utf8[i], j - i, writer); cannam@226: if ((i = j) == n_bytes) { cannam@226: break; // Reached end cannam@226: } cannam@226: cannam@226: // Write escape cannam@226: len += sink("\\", 1, writer); cannam@226: len += sink(&utf8[i], 1, writer); cannam@226: } cannam@226: return len; cannam@226: } cannam@226: cannam@226: static size_t cannam@226: write_text(SerdWriter* writer, TextContext ctx, cannam@226: const uint8_t* utf8, size_t n_bytes) cannam@226: { cannam@226: size_t len = 0; cannam@226: for (size_t i = 0; i < n_bytes;) { cannam@226: // Fast bulk write for long strings of printable ASCII cannam@226: size_t j = i; cannam@226: for (; j < n_bytes; ++j) { cannam@226: if (utf8[j] == '\\' || utf8[j] == '"' cannam@226: || (!in_range(utf8[j], 0x20, 0x7E))) { cannam@226: break; cannam@226: } cannam@226: } cannam@226: cannam@226: len += sink(&utf8[i], j - i, writer); cannam@226: if ((i = j) == n_bytes) { cannam@226: break; // Reached end cannam@226: } cannam@226: cannam@226: uint8_t in = utf8[i++]; cannam@226: if (ctx == WRITE_LONG_STRING) { cannam@226: switch (in) { cannam@226: case '\\': len += sink("\\\\", 2, writer); continue; cannam@226: case '\b': len += sink("\\b", 2, writer); continue; cannam@226: case '\n': case '\r': case '\t': case '\f': cannam@226: len += sink(&in, 1, writer); // Write character as-is cannam@226: continue; cannam@226: case '\"': cannam@226: if (i == n_bytes) { // '"' at string end cannam@226: len += sink("\\\"", 2, writer); cannam@226: } else { cannam@226: len += sink(&in, 1, writer); cannam@226: } cannam@226: continue; cannam@226: default: break; cannam@226: } cannam@226: } else if (ctx == WRITE_STRING) { cannam@226: switch (in) { cannam@226: case '\\': len += sink("\\\\", 2, writer); continue; cannam@226: case '\n': len += sink("\\n", 2, writer); continue; cannam@226: case '\r': len += sink("\\r", 2, writer); continue; cannam@226: case '\t': len += sink("\\t", 2, writer); continue; cannam@226: case '"': len += sink("\\\"", 2, writer); continue; cannam@226: default: break; cannam@226: } cannam@226: if (writer->syntax == SERD_TURTLE) { cannam@226: switch (in) { cannam@226: case '\b': len += sink("\\b", 2, writer); continue; cannam@226: case '\f': len += sink("\\f", 2, writer); continue; cannam@226: } cannam@226: } cannam@226: } cannam@226: cannam@226: size_t size = 0; cannam@226: len += write_character(writer, utf8 + i - 1, &size); cannam@226: cannam@226: if (size == 0) { cannam@226: return len; cannam@226: } cannam@226: cannam@226: i += size - 1; cannam@226: } cannam@226: return len; cannam@226: } cannam@226: cannam@226: static size_t cannam@226: uri_sink(const void* buf, size_t len, void* stream) cannam@226: { cannam@226: return write_uri((SerdWriter*)stream, (const uint8_t*)buf, len); cannam@226: } cannam@226: cannam@226: static void cannam@226: write_newline(SerdWriter* writer) cannam@226: { cannam@226: sink("\n", 1, writer); cannam@226: for (unsigned i = 0; i < writer->indent; ++i) { cannam@226: sink("\t", 1, writer); cannam@226: } cannam@226: } cannam@226: cannam@226: static void cannam@226: write_sep(SerdWriter* writer, const Sep sep) cannam@226: { cannam@226: const SepRule* rule = &rules[sep]; cannam@226: if (rule->space_before) { cannam@226: write_newline(writer); cannam@226: } cannam@226: if (rule->str) { cannam@226: sink(rule->str, rule->len, writer); cannam@226: } cannam@226: if ( (writer->last_sep && rule->space_after_sep) cannam@226: || (!writer->last_sep && rule->space_after_node)) { cannam@226: write_newline(writer); cannam@226: } else if (writer->last_sep && rule->space_after_node) { cannam@226: sink(" ", 1, writer); cannam@226: } cannam@226: writer->last_sep = sep; cannam@226: } cannam@226: cannam@226: static SerdStatus cannam@226: reset_context(SerdWriter* writer, bool graph) cannam@226: { cannam@226: if (graph) { cannam@226: writer->context.graph.type = SERD_NOTHING; cannam@226: } cannam@226: writer->context.subject.type = SERD_NOTHING; cannam@226: writer->context.predicate.type = SERD_NOTHING; cannam@226: writer->empty = false; cannam@226: return SERD_SUCCESS; cannam@226: } cannam@226: cannam@226: static SerdStatus cannam@226: free_context(SerdWriter* writer) cannam@226: { cannam@226: serd_node_free(&writer->context.graph); cannam@226: serd_node_free(&writer->context.subject); cannam@226: serd_node_free(&writer->context.predicate); cannam@226: return reset_context(writer, true); cannam@226: } cannam@226: cannam@226: typedef enum { cannam@226: FIELD_NONE, cannam@226: FIELD_SUBJECT, cannam@226: FIELD_PREDICATE, cannam@226: FIELD_OBJECT, cannam@226: FIELD_GRAPH cannam@226: } Field; cannam@226: cannam@226: static bool cannam@226: is_inline_start(const SerdWriter* writer, Field field, SerdStatementFlags flags) cannam@226: { cannam@226: return (supports_abbrev(writer) && cannam@226: ((field == FIELD_SUBJECT && (flags & SERD_ANON_S_BEGIN)) || cannam@226: (field == FIELD_OBJECT && (flags & SERD_ANON_O_BEGIN)))); cannam@226: } cannam@226: cannam@226: static bool cannam@226: write_node(SerdWriter* writer, cannam@226: const SerdNode* node, cannam@226: const SerdNode* datatype, cannam@226: const SerdNode* lang, cannam@226: Field field, cannam@226: SerdStatementFlags flags) cannam@226: { cannam@226: SerdChunk uri_prefix; cannam@226: SerdNode prefix; cannam@226: SerdChunk suffix; cannam@226: bool has_scheme; cannam@226: switch (node->type) { cannam@226: case SERD_BLANK: cannam@226: if (is_inline_start(writer, field, flags)) { cannam@226: ++writer->indent; cannam@226: write_sep(writer, SEP_ANON_BEGIN); cannam@226: } else if (supports_abbrev(writer) cannam@226: && (field == FIELD_SUBJECT && (flags & SERD_LIST_S_BEGIN))) { cannam@226: assert(writer->list_depth == 0); cannam@226: copy_node(&writer->list_subj, node); cannam@226: ++writer->list_depth; cannam@226: ++writer->indent; cannam@226: write_sep(writer, SEP_LIST_BEGIN); cannam@226: } else if (supports_abbrev(writer) cannam@226: && (field == FIELD_OBJECT && (flags & SERD_LIST_O_BEGIN))) { cannam@226: ++writer->indent; cannam@226: ++writer->list_depth; cannam@226: write_sep(writer, SEP_LIST_BEGIN); cannam@226: } else if (supports_abbrev(writer) cannam@226: && ((field == FIELD_SUBJECT && (flags & SERD_EMPTY_S)) cannam@226: || (field == FIELD_OBJECT && (flags & SERD_EMPTY_O)))) { cannam@226: sink("[]", 2, writer); cannam@226: } else { cannam@226: sink("_:", 2, writer); cannam@226: if (writer->bprefix && !strncmp((const char*)node->buf, cannam@226: (const char*)writer->bprefix, cannam@226: writer->bprefix_len)) { cannam@226: sink(node->buf + writer->bprefix_len, cannam@226: node->n_bytes - writer->bprefix_len, cannam@226: writer); cannam@226: } else { cannam@226: sink(node->buf, node->n_bytes, writer); cannam@226: } cannam@226: } cannam@226: break; cannam@226: case SERD_CURIE: cannam@226: switch (writer->syntax) { cannam@226: case SERD_NTRIPLES: cannam@226: case SERD_NQUADS: cannam@226: if (serd_env_expand(writer->env, node, &uri_prefix, &suffix)) { cannam@226: w_err(writer, SERD_ERR_BAD_CURIE, cannam@226: "undefined namespace prefix `%s'\n", node->buf); cannam@226: return false; cannam@226: } cannam@226: sink("<", 1, writer); cannam@226: write_uri(writer, uri_prefix.buf, uri_prefix.len); cannam@226: write_uri(writer, suffix.buf, suffix.len); cannam@226: sink(">", 1, writer); cannam@226: break; cannam@226: case SERD_TURTLE: cannam@226: case SERD_TRIG: cannam@226: if (is_inline_start(writer, field, flags)) { cannam@226: ++writer->indent; cannam@226: write_sep(writer, SEP_ANON_BEGIN); cannam@226: sink("== ", 3, writer); cannam@226: } cannam@226: write_lname(writer, node->buf, node->n_bytes); cannam@226: if (is_inline_start(writer, field, flags)) { cannam@226: sink(" ;", 2, writer); cannam@226: write_newline(writer); cannam@226: } cannam@226: } cannam@226: break; cannam@226: case SERD_LITERAL: cannam@226: if (supports_abbrev(writer) && datatype && datatype->buf) { cannam@226: const char* type_uri = (const char*)datatype->buf; cannam@226: if (!strncmp(type_uri, NS_XSD, sizeof(NS_XSD) - 1) && ( cannam@226: !strcmp(type_uri + sizeof(NS_XSD) - 1, "boolean") || cannam@226: !strcmp(type_uri + sizeof(NS_XSD) - 1, "integer"))) { cannam@226: sink(node->buf, node->n_bytes, writer); cannam@226: break; cannam@226: } else if (!strncmp(type_uri, NS_XSD, sizeof(NS_XSD) - 1) && cannam@226: !strcmp(type_uri + sizeof(NS_XSD) - 1, "decimal") && cannam@226: strchr((const char*)node->buf, '.') && cannam@226: node->buf[node->n_bytes - 1] != '.') { cannam@226: /* xsd:decimal literals without trailing digits, e.g. "5.", can cannam@226: not be written bare in Turtle. We could add a 0 which is cannam@226: prettier, but changes the text and breaks round tripping. cannam@226: */ cannam@226: sink(node->buf, node->n_bytes, writer); cannam@226: break; cannam@226: } cannam@226: } cannam@226: if (supports_abbrev(writer) cannam@226: && (node->flags & (SERD_HAS_NEWLINE|SERD_HAS_QUOTE))) { cannam@226: sink("\"\"\"", 3, writer); cannam@226: write_text(writer, WRITE_LONG_STRING, node->buf, node->n_bytes); cannam@226: sink("\"\"\"", 3, writer); cannam@226: } else { cannam@226: sink("\"", 1, writer); cannam@226: write_text(writer, WRITE_STRING, node->buf, node->n_bytes); cannam@226: sink("\"", 1, writer); cannam@226: } cannam@226: if (lang && lang->buf) { cannam@226: sink("@", 1, writer); cannam@226: sink(lang->buf, lang->n_bytes, writer); cannam@226: } else if (datatype && datatype->buf) { cannam@226: sink("^^", 2, writer); cannam@226: write_node(writer, datatype, NULL, NULL, FIELD_NONE, flags); cannam@226: } cannam@226: break; cannam@226: case SERD_URI: cannam@226: if (is_inline_start(writer, field, flags)) { cannam@226: ++writer->indent; cannam@226: write_sep(writer, SEP_ANON_BEGIN); cannam@226: sink("== ", 3, writer); cannam@226: } cannam@226: has_scheme = serd_uri_string_has_scheme(node->buf); cannam@226: if (field == FIELD_PREDICATE && supports_abbrev(writer) cannam@226: && !strcmp((const char*)node->buf, NS_RDF "type")) { cannam@226: sink("a", 1, writer); cannam@226: break; cannam@226: } else if (supports_abbrev(writer) cannam@226: && !strcmp((const char*)node->buf, NS_RDF "nil")) { cannam@226: sink("()", 2, writer); cannam@226: break; cannam@226: } else if (has_scheme && (writer->style & SERD_STYLE_CURIED) && cannam@226: serd_env_qualify(writer->env, node, &prefix, &suffix)) { cannam@226: write_uri(writer, prefix.buf, prefix.n_bytes); cannam@226: sink(":", 1, writer); cannam@226: write_uri(writer, suffix.buf, suffix.len); cannam@226: break; cannam@226: } cannam@226: sink("<", 1, writer); cannam@226: if (writer->style & SERD_STYLE_RESOLVED) { cannam@226: SerdURI in_base_uri, uri, abs_uri; cannam@226: serd_env_get_base_uri(writer->env, &in_base_uri); cannam@226: serd_uri_parse(node->buf, &uri); cannam@226: serd_uri_resolve(&uri, &in_base_uri, &abs_uri); cannam@226: bool rooted = uri_is_under(&writer->base_uri, &writer->root_uri); cannam@226: SerdURI* root = rooted ? &writer->root_uri : & writer->base_uri; cannam@226: if (!uri_is_under(&abs_uri, root) || cannam@226: writer->syntax == SERD_NTRIPLES || cannam@226: writer->syntax == SERD_NQUADS) { cannam@226: serd_uri_serialise(&abs_uri, uri_sink, writer); cannam@226: } else { cannam@226: serd_uri_serialise_relative( cannam@226: &uri, &writer->base_uri, root, uri_sink, writer); cannam@226: } cannam@226: } else { cannam@226: write_uri(writer, node->buf, node->n_bytes); cannam@226: } cannam@226: sink(">", 1, writer); cannam@226: if (is_inline_start(writer, field, flags)) { cannam@226: sink(" ;", 2, writer); cannam@226: write_newline(writer); cannam@226: } cannam@226: default: cannam@226: break; cannam@226: } cannam@226: writer->last_sep = SEP_NONE; cannam@226: return true; cannam@226: } cannam@226: cannam@226: static inline bool cannam@226: is_resource(const SerdNode* node) cannam@226: { cannam@226: return node->type > SERD_LITERAL; cannam@226: } cannam@226: cannam@226: static void cannam@226: write_pred(SerdWriter* writer, SerdStatementFlags flags, const SerdNode* pred) cannam@226: { cannam@226: write_node(writer, pred, NULL, NULL, FIELD_PREDICATE, flags); cannam@226: write_sep(writer, SEP_P_O); cannam@226: copy_node(&writer->context.predicate, pred); cannam@226: } cannam@226: cannam@226: static bool cannam@226: write_list_obj(SerdWriter* writer, cannam@226: SerdStatementFlags flags, cannam@226: const SerdNode* predicate, cannam@226: const SerdNode* object, cannam@226: const SerdNode* datatype, cannam@226: const SerdNode* lang) cannam@226: { cannam@226: if (!strcmp((const char*)object->buf, NS_RDF "nil")) { cannam@226: --writer->indent; cannam@226: write_sep(writer, SEP_LIST_END); cannam@226: return true; cannam@226: } else if (!strcmp((const char*)predicate->buf, NS_RDF "first")) { cannam@226: write_sep(writer, SEP_LIST_SEP); cannam@226: write_node(writer, object, datatype, lang, FIELD_OBJECT, flags); cannam@226: } cannam@226: return false; cannam@226: } cannam@226: cannam@226: SERD_API cannam@226: SerdStatus cannam@226: serd_writer_write_statement(SerdWriter* writer, cannam@226: SerdStatementFlags flags, cannam@226: const SerdNode* graph, cannam@226: const SerdNode* subject, cannam@226: const SerdNode* predicate, cannam@226: const SerdNode* object, cannam@226: const SerdNode* datatype, cannam@226: const SerdNode* lang) cannam@226: { cannam@226: if (!subject || !predicate || !object cannam@226: || !subject->buf || !predicate->buf || !object->buf cannam@226: || !is_resource(subject) || !is_resource(predicate)) { cannam@226: return SERD_ERR_BAD_ARG; cannam@226: } cannam@226: cannam@226: #define TRY(write_result) \ cannam@226: if (!write_result) { \ cannam@226: return SERD_ERR_UNKNOWN; \ cannam@226: } cannam@226: cannam@226: switch (writer->syntax) { cannam@226: case SERD_NTRIPLES: cannam@226: case SERD_NQUADS: cannam@226: TRY(write_node(writer, subject, NULL, NULL, FIELD_SUBJECT, flags)); cannam@226: sink(" ", 1, writer); cannam@226: TRY(write_node(writer, predicate, NULL, NULL, FIELD_PREDICATE, flags)); cannam@226: sink(" ", 1, writer); cannam@226: TRY(write_node(writer, object, datatype, lang, FIELD_OBJECT, flags)); cannam@226: if (writer->syntax == SERD_NQUADS && graph) { cannam@226: sink(" ", 1, writer); cannam@226: TRY(write_node(writer, graph, datatype, lang, FIELD_GRAPH, flags)); cannam@226: } cannam@226: sink(" .\n", 3, writer); cannam@226: return SERD_SUCCESS; cannam@226: default: cannam@226: break; cannam@226: } cannam@226: cannam@226: if ((graph && !serd_node_equals(graph, &writer->context.graph)) || cannam@226: (!graph && writer->context.graph.type)) { cannam@226: writer->indent = 0; cannam@226: if (writer->context.subject.type) { cannam@226: write_sep(writer, SEP_END_S); cannam@226: } cannam@226: if (writer->context.graph.type) { cannam@226: write_sep(writer, SEP_GRAPH_END); cannam@226: } cannam@226: cannam@226: reset_context(writer, true); cannam@226: if (graph) { cannam@226: TRY(write_node(writer, graph, datatype, lang, FIELD_GRAPH, flags)); cannam@226: ++writer->indent; cannam@226: write_sep(writer, SEP_GRAPH_BEGIN); cannam@226: copy_node(&writer->context.graph, graph); cannam@226: } cannam@226: } cannam@226: cannam@226: if ((flags & SERD_LIST_CONT)) { cannam@226: if (write_list_obj(writer, flags, predicate, object, datatype, lang)) { cannam@226: // Reached end of list cannam@226: if (--writer->list_depth == 0 && writer->list_subj.type) { cannam@226: reset_context(writer, false); cannam@226: serd_node_free(&writer->context.subject); cannam@226: writer->context.subject = writer->list_subj; cannam@226: writer->list_subj = SERD_NODE_NULL; cannam@226: } cannam@226: return SERD_SUCCESS; cannam@226: } cannam@226: } else if (serd_node_equals(subject, &writer->context.subject)) { cannam@226: if (serd_node_equals(predicate, &writer->context.predicate)) { cannam@226: // Abbreviate S P cannam@226: if (!(flags & SERD_ANON_O_BEGIN)) { cannam@226: ++writer->indent; cannam@226: } cannam@226: write_sep(writer, SEP_END_O); cannam@226: write_node(writer, object, datatype, lang, FIELD_OBJECT, flags); cannam@226: if (!(flags & SERD_ANON_O_BEGIN)) { cannam@226: --writer->indent; cannam@226: } cannam@226: } else { cannam@226: // Abbreviate S cannam@226: Sep sep = writer->context.predicate.type ? SEP_END_P : SEP_S_P; cannam@226: write_sep(writer, sep); cannam@226: write_pred(writer, flags, predicate); cannam@226: write_node(writer, object, datatype, lang, FIELD_OBJECT, flags); cannam@226: } cannam@226: } else { cannam@226: // No abbreviation cannam@226: if (writer->context.subject.type) { cannam@226: assert(writer->indent > 0); cannam@226: --writer->indent; cannam@226: if (serd_stack_is_empty(&writer->anon_stack)) { cannam@226: write_sep(writer, SEP_END_S); cannam@226: } cannam@226: } else if (!writer->empty) { cannam@226: write_sep(writer, SEP_S_P); cannam@226: } cannam@226: cannam@226: if (!(flags & SERD_ANON_CONT)) { cannam@226: write_node(writer, subject, NULL, NULL, FIELD_SUBJECT, flags); cannam@226: ++writer->indent; cannam@226: write_sep(writer, SEP_S_P); cannam@226: } else { cannam@226: ++writer->indent; cannam@226: } cannam@226: cannam@226: reset_context(writer, false); cannam@226: copy_node(&writer->context.subject, subject); cannam@226: cannam@226: if (!(flags & SERD_LIST_S_BEGIN)) { cannam@226: write_pred(writer, flags, predicate); cannam@226: } cannam@226: cannam@226: write_node(writer, object, datatype, lang, FIELD_OBJECT, flags); cannam@226: } cannam@226: cannam@226: if (flags & (SERD_ANON_S_BEGIN|SERD_ANON_O_BEGIN)) { cannam@226: WriteContext* ctx = (WriteContext*)serd_stack_push( cannam@226: &writer->anon_stack, sizeof(WriteContext)); cannam@226: *ctx = writer->context; cannam@226: WriteContext new_context = { cannam@226: serd_node_copy(graph), serd_node_copy(subject), SERD_NODE_NULL }; cannam@226: if ((flags & SERD_ANON_S_BEGIN)) { cannam@226: new_context.predicate = serd_node_copy(predicate); cannam@226: } cannam@226: writer->context = new_context; cannam@226: } else { cannam@226: copy_node(&writer->context.graph, graph); cannam@226: copy_node(&writer->context.subject, subject); cannam@226: copy_node(&writer->context.predicate, predicate); cannam@226: } cannam@226: cannam@226: return SERD_SUCCESS; cannam@226: } cannam@226: cannam@226: SERD_API cannam@226: SerdStatus cannam@226: serd_writer_end_anon(SerdWriter* writer, cannam@226: const SerdNode* node) cannam@226: { cannam@226: if (writer->syntax == SERD_NTRIPLES || writer->syntax == SERD_NQUADS) { cannam@226: return SERD_SUCCESS; cannam@226: } cannam@226: if (serd_stack_is_empty(&writer->anon_stack) || writer->indent == 0) { cannam@226: w_err(writer, SERD_ERR_UNKNOWN, cannam@226: "unexpected end of anonymous node\n"); cannam@226: return SERD_ERR_UNKNOWN; cannam@226: } cannam@226: --writer->indent; cannam@226: write_sep(writer, SEP_ANON_END); cannam@226: free_context(writer); cannam@226: writer->context = *anon_stack_top(writer); cannam@226: serd_stack_pop(&writer->anon_stack, sizeof(WriteContext)); cannam@226: const bool is_subject = serd_node_equals(node, &writer->context.subject); cannam@226: if (is_subject) { cannam@226: copy_node(&writer->context.subject, node); cannam@226: writer->context.predicate.type = SERD_NOTHING; cannam@226: } cannam@226: return SERD_SUCCESS; cannam@226: } cannam@226: cannam@226: SERD_API cannam@226: SerdStatus cannam@226: serd_writer_finish(SerdWriter* writer) cannam@226: { cannam@226: if (writer->context.subject.type) { cannam@226: sink(" .\n", 3, writer); cannam@226: } cannam@226: if (writer->context.graph.type) { cannam@226: sink("}\n", 2, writer); cannam@226: } cannam@226: serd_byte_sink_flush(&writer->byte_sink); cannam@226: writer->indent = 0; cannam@226: return free_context(writer); cannam@226: } cannam@226: cannam@226: SERD_API cannam@226: SerdWriter* cannam@226: serd_writer_new(SerdSyntax syntax, cannam@226: SerdStyle style, cannam@226: SerdEnv* env, cannam@226: const SerdURI* base_uri, cannam@226: SerdSink ssink, cannam@226: void* stream) cannam@226: { cannam@226: const WriteContext context = WRITE_CONTEXT_NULL; cannam@226: SerdWriter* writer = (SerdWriter*)calloc(1, sizeof(SerdWriter)); cannam@226: writer->syntax = syntax; cannam@226: writer->style = style; cannam@226: writer->env = env; cannam@226: writer->root_node = SERD_NODE_NULL; cannam@226: writer->root_uri = SERD_URI_NULL; cannam@226: writer->base_uri = base_uri ? *base_uri : SERD_URI_NULL; cannam@226: writer->anon_stack = serd_stack_new(sizeof(WriteContext)); cannam@226: writer->context = context; cannam@226: writer->list_subj = SERD_NODE_NULL; cannam@226: writer->empty = true; cannam@226: writer->byte_sink = serd_byte_sink_new( cannam@226: ssink, stream, (style & SERD_STYLE_BULK) ? SERD_PAGE_SIZE : 1); cannam@226: return writer; cannam@226: } cannam@226: cannam@226: SERD_API cannam@226: void cannam@226: serd_writer_set_error_sink(SerdWriter* writer, cannam@226: SerdErrorSink error_sink, cannam@226: void* error_handle) cannam@226: { cannam@226: writer->error_sink = error_sink; cannam@226: writer->error_handle = error_handle; cannam@226: } cannam@226: cannam@226: SERD_API cannam@226: void cannam@226: serd_writer_chop_blank_prefix(SerdWriter* writer, cannam@226: const uint8_t* prefix) cannam@226: { cannam@226: free(writer->bprefix); cannam@226: writer->bprefix_len = 0; cannam@226: writer->bprefix = NULL; cannam@226: if (prefix) { cannam@226: writer->bprefix_len = strlen((const char*)prefix); cannam@226: writer->bprefix = (uint8_t*)malloc(writer->bprefix_len + 1); cannam@226: memcpy(writer->bprefix, prefix, writer->bprefix_len + 1); cannam@226: } cannam@226: } cannam@226: cannam@226: SERD_API cannam@226: SerdStatus cannam@226: serd_writer_set_base_uri(SerdWriter* writer, cannam@226: const SerdNode* uri) cannam@226: { cannam@226: if (!serd_env_set_base_uri(writer->env, uri)) { cannam@226: serd_env_get_base_uri(writer->env, &writer->base_uri); cannam@226: cannam@226: if (writer->syntax == SERD_TURTLE || writer->syntax == SERD_TRIG) { cannam@226: if (writer->context.graph.type || writer->context.subject.type) { cannam@226: sink(" .\n\n", 4, writer); cannam@226: reset_context(writer, true); cannam@226: } cannam@226: sink("@base <", 7, writer); cannam@226: sink(uri->buf, uri->n_bytes, writer); cannam@226: sink("> .\n", 4, writer); cannam@226: } cannam@226: writer->indent = 0; cannam@226: return reset_context(writer, true); cannam@226: } cannam@226: return SERD_ERR_UNKNOWN; cannam@226: } cannam@226: cannam@226: SERD_API cannam@226: SerdStatus cannam@226: serd_writer_set_root_uri(SerdWriter* writer, cannam@226: const SerdNode* uri) cannam@226: { cannam@226: serd_node_free(&writer->root_node); cannam@226: if (uri && uri->buf) { cannam@226: writer->root_node = serd_node_copy(uri); cannam@226: serd_uri_parse(uri->buf, &writer->root_uri); cannam@226: } else { cannam@226: writer->root_node = SERD_NODE_NULL; cannam@226: writer->root_uri = SERD_URI_NULL; cannam@226: } cannam@226: return SERD_SUCCESS; cannam@226: } cannam@226: cannam@226: SERD_API cannam@226: SerdStatus cannam@226: serd_writer_set_prefix(SerdWriter* writer, cannam@226: const SerdNode* name, cannam@226: const SerdNode* uri) cannam@226: { cannam@226: if (!serd_env_set_prefix(writer->env, name, uri)) { cannam@226: if (writer->syntax == SERD_TURTLE || writer->syntax == SERD_TRIG) { cannam@226: if (writer->context.graph.type || writer->context.subject.type) { cannam@226: sink(" .\n\n", 4, writer); cannam@226: reset_context(writer, true); cannam@226: } cannam@226: sink("@prefix ", 8, writer); cannam@226: sink(name->buf, name->n_bytes, writer); cannam@226: sink(": <", 3, writer); cannam@226: write_uri(writer, uri->buf, uri->n_bytes); cannam@226: sink("> .\n", 4, writer); cannam@226: } cannam@226: writer->indent = 0; cannam@226: return reset_context(writer, true); cannam@226: } cannam@226: return SERD_ERR_UNKNOWN; cannam@226: } cannam@226: cannam@226: SERD_API cannam@226: void cannam@226: serd_writer_free(SerdWriter* writer) cannam@226: { cannam@226: serd_writer_finish(writer); cannam@226: serd_stack_free(&writer->anon_stack); cannam@226: free(writer->bprefix); cannam@226: serd_byte_sink_free(&writer->byte_sink); cannam@226: serd_node_free(&writer->root_node); cannam@226: free(writer); cannam@226: } cannam@226: cannam@226: SERD_API cannam@226: SerdEnv* cannam@226: serd_writer_get_env(SerdWriter* writer) cannam@226: { cannam@226: return writer->env; cannam@226: } cannam@226: cannam@226: SERD_API cannam@226: size_t cannam@226: serd_file_sink(const void* buf, size_t len, void* stream) cannam@226: { cannam@226: return fwrite(buf, 1, len, (FILE*)stream); cannam@226: } cannam@226: cannam@226: SERD_API cannam@226: size_t cannam@226: serd_chunk_sink(const void* buf, size_t len, void* stream) cannam@226: { cannam@226: SerdChunk* chunk = (SerdChunk*)stream; cannam@226: chunk->buf = (uint8_t*)realloc((uint8_t*)chunk->buf, chunk->len + len); cannam@226: memcpy((uint8_t*)chunk->buf + chunk->len, buf, len); cannam@226: chunk->len += len; cannam@226: return len; cannam@226: } cannam@226: cannam@226: SERD_API cannam@226: uint8_t* cannam@226: serd_chunk_sink_finish(SerdChunk* stream) cannam@226: { cannam@226: serd_chunk_sink("", 1, stream); cannam@226: return (uint8_t*)stream->buf; cannam@226: }