cannam@226
|
1 /*
|
cannam@226
|
2 Copyright 2011-2017 David Robillard <http://drobilla.net>
|
cannam@226
|
3
|
cannam@226
|
4 Permission to use, copy, modify, and/or distribute this software for any
|
cannam@226
|
5 purpose with or without fee is hereby granted, provided that the above
|
cannam@226
|
6 copyright notice and this permission notice appear in all copies.
|
cannam@226
|
7
|
cannam@226
|
8 THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
cannam@226
|
9 WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
cannam@226
|
10 MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
cannam@226
|
11 ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
cannam@226
|
12 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
cannam@226
|
13 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
cannam@226
|
14 OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
cannam@226
|
15 */
|
cannam@226
|
16
|
cannam@226
|
17 #include "serd_internal.h"
|
cannam@226
|
18
|
cannam@226
|
19 #include <assert.h>
|
cannam@226
|
20 #include <stdio.h>
|
cannam@226
|
21 #include <stdlib.h>
|
cannam@226
|
22 #include <string.h>
|
cannam@226
|
23
|
cannam@226
|
24 #define NS_RDF "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
cannam@226
|
25 #define NS_XSD "http://www.w3.org/2001/XMLSchema#"
|
cannam@226
|
26
|
cannam@226
|
27 typedef struct {
|
cannam@226
|
28 SerdNode graph;
|
cannam@226
|
29 SerdNode subject;
|
cannam@226
|
30 SerdNode predicate;
|
cannam@226
|
31 } WriteContext;
|
cannam@226
|
32
|
cannam@226
|
33 static const WriteContext WRITE_CONTEXT_NULL = {
|
cannam@226
|
34 { 0, 0, 0, 0, SERD_NOTHING },
|
cannam@226
|
35 { 0, 0, 0, 0, SERD_NOTHING },
|
cannam@226
|
36 { 0, 0, 0, 0, SERD_NOTHING }
|
cannam@226
|
37 };
|
cannam@226
|
38
|
cannam@226
|
39 typedef enum {
|
cannam@226
|
40 SEP_NONE,
|
cannam@226
|
41 SEP_END_S, ///< End of a subject ('.')
|
cannam@226
|
42 SEP_END_P, ///< End of a predicate (';')
|
cannam@226
|
43 SEP_END_O, ///< End of an object (',')
|
cannam@226
|
44 SEP_S_P, ///< Between a subject and predicate (whitespace)
|
cannam@226
|
45 SEP_P_O, ///< Between a predicate and object (whitespace)
|
cannam@226
|
46 SEP_ANON_BEGIN, ///< Start of anonymous node ('[')
|
cannam@226
|
47 SEP_ANON_END, ///< End of anonymous node (']')
|
cannam@226
|
48 SEP_LIST_BEGIN, ///< Start of list ('(')
|
cannam@226
|
49 SEP_LIST_SEP, ///< List separator (whitespace)
|
cannam@226
|
50 SEP_LIST_END, ///< End of list (')')
|
cannam@226
|
51 SEP_GRAPH_BEGIN, ///< Start of graph ('{')
|
cannam@226
|
52 SEP_GRAPH_END ///< End of graph ('}')
|
cannam@226
|
53 } Sep;
|
cannam@226
|
54
|
cannam@226
|
55 typedef struct {
|
cannam@226
|
56 const char* str; ///< Sep string
|
cannam@226
|
57 uint8_t len; ///< Length of sep string
|
cannam@226
|
58 uint8_t space_before; ///< Newline before sep
|
cannam@226
|
59 uint8_t space_after_node; ///< Newline after sep if after node
|
cannam@226
|
60 uint8_t space_after_sep; ///< Newline after sep if after sep
|
cannam@226
|
61 } SepRule;
|
cannam@226
|
62
|
cannam@226
|
63 static const SepRule rules[] = {
|
cannam@226
|
64 { NULL, 0, 0, 0, 0 },
|
cannam@226
|
65 { " .\n\n", 4, 0, 0, 0 },
|
cannam@226
|
66 { " ;", 2, 0, 1, 1 },
|
cannam@226
|
67 { " ,", 2, 0, 1, 0 },
|
cannam@226
|
68 { NULL, 0, 0, 1, 0 },
|
cannam@226
|
69 { " ", 1, 0, 0, 0 },
|
cannam@226
|
70 { "[", 1, 0, 1, 1 },
|
cannam@226
|
71 { "]", 1, 1, 0, 0 },
|
cannam@226
|
72 { "(", 1, 0, 0, 0 },
|
cannam@226
|
73 { NULL, 1, 0, 1, 0 },
|
cannam@226
|
74 { ")", 1, 1, 0, 0 },
|
cannam@226
|
75 { " {", 2, 0, 1, 1 },
|
cannam@226
|
76 { " }", 2, 0, 1, 1 },
|
cannam@226
|
77 { "\n", 1, 0, 1, 0 }
|
cannam@226
|
78 };
|
cannam@226
|
79
|
cannam@226
|
80 struct SerdWriterImpl {
|
cannam@226
|
81 SerdSyntax syntax;
|
cannam@226
|
82 SerdStyle style;
|
cannam@226
|
83 SerdEnv* env;
|
cannam@226
|
84 SerdNode root_node;
|
cannam@226
|
85 SerdURI root_uri;
|
cannam@226
|
86 SerdURI base_uri;
|
cannam@226
|
87 SerdStack anon_stack;
|
cannam@226
|
88 SerdByteSink byte_sink;
|
cannam@226
|
89 SerdErrorSink error_sink;
|
cannam@226
|
90 void* error_handle;
|
cannam@226
|
91 WriteContext context;
|
cannam@226
|
92 SerdNode list_subj;
|
cannam@226
|
93 unsigned list_depth;
|
cannam@226
|
94 unsigned indent;
|
cannam@226
|
95 uint8_t* bprefix;
|
cannam@226
|
96 size_t bprefix_len;
|
cannam@226
|
97 Sep last_sep;
|
cannam@226
|
98 bool empty;
|
cannam@226
|
99 };
|
cannam@226
|
100
|
cannam@226
|
101 typedef enum {
|
cannam@226
|
102 WRITE_STRING,
|
cannam@226
|
103 WRITE_LONG_STRING
|
cannam@226
|
104 } TextContext;
|
cannam@226
|
105
|
cannam@226
|
106 static bool
|
cannam@226
|
107 supports_abbrev(const SerdWriter* writer)
|
cannam@226
|
108 {
|
cannam@226
|
109 return writer->syntax == SERD_TURTLE || writer->syntax == SERD_TRIG;
|
cannam@226
|
110 }
|
cannam@226
|
111
|
cannam@226
|
112 static void
|
cannam@226
|
113 w_err(SerdWriter* writer, SerdStatus st, const char* fmt, ...)
|
cannam@226
|
114 {
|
cannam@226
|
115 /* TODO: This results in errors with no file information, which is not
|
cannam@226
|
116 helpful when re-serializing a file (particularly for "undefined
|
cannam@226
|
117 namespace prefix" errors. The statement sink API needs to be changed to
|
cannam@226
|
118 add a Cursor parameter so the source can notify the writer of the
|
cannam@226
|
119 statement origin for better error reporting. */
|
cannam@226
|
120
|
cannam@226
|
121 va_list args;
|
cannam@226
|
122 va_start(args, fmt);
|
cannam@226
|
123 const SerdError e = { st, NULL, 0, 0, fmt, &args };
|
cannam@226
|
124 serd_error(writer->error_sink, writer->error_handle, &e);
|
cannam@226
|
125 va_end(args);
|
cannam@226
|
126 }
|
cannam@226
|
127
|
cannam@226
|
128 static inline WriteContext*
|
cannam@226
|
129 anon_stack_top(SerdWriter* writer)
|
cannam@226
|
130 {
|
cannam@226
|
131 assert(!serd_stack_is_empty(&writer->anon_stack));
|
cannam@226
|
132 return (WriteContext*)(writer->anon_stack.buf
|
cannam@226
|
133 + writer->anon_stack.size - sizeof(WriteContext));
|
cannam@226
|
134 }
|
cannam@226
|
135
|
cannam@226
|
136 static void
|
cannam@226
|
137 copy_node(SerdNode* dst, const SerdNode* src)
|
cannam@226
|
138 {
|
cannam@226
|
139 if (src) {
|
cannam@226
|
140 dst->buf = (uint8_t*)realloc((char*)dst->buf, src->n_bytes + 1);
|
cannam@226
|
141 dst->n_bytes = src->n_bytes;
|
cannam@226
|
142 dst->n_chars = src->n_chars;
|
cannam@226
|
143 dst->flags = src->flags;
|
cannam@226
|
144 dst->type = src->type;
|
cannam@226
|
145 memcpy((char*)dst->buf, src->buf, src->n_bytes + 1);
|
cannam@226
|
146 } else {
|
cannam@226
|
147 dst->type = SERD_NOTHING;
|
cannam@226
|
148 }
|
cannam@226
|
149 }
|
cannam@226
|
150
|
cannam@226
|
151 static inline size_t
|
cannam@226
|
152 sink(const void* buf, size_t len, SerdWriter* writer)
|
cannam@226
|
153 {
|
cannam@226
|
154 return serd_byte_sink_write(buf, len, &writer->byte_sink);
|
cannam@226
|
155 }
|
cannam@226
|
156
|
cannam@226
|
157 // Parse a UTF-8 character, set *size to the length, and return the code point
|
cannam@226
|
158 static inline uint32_t
|
cannam@226
|
159 parse_utf8_char(SerdWriter* writer, const uint8_t* utf8, size_t* size)
|
cannam@226
|
160 {
|
cannam@226
|
161 uint32_t c = 0;
|
cannam@226
|
162 if ((utf8[0] & 0x80) == 0) { // Starts with `0'
|
cannam@226
|
163 *size = 1;
|
cannam@226
|
164 c = utf8[0];
|
cannam@226
|
165 } else if ((utf8[0] & 0xE0) == 0xC0) { // Starts with `110'
|
cannam@226
|
166 *size = 2;
|
cannam@226
|
167 c = utf8[0] & 0x1F;
|
cannam@226
|
168 } else if ((utf8[0] & 0xF0) == 0xE0) { // Starts with `1110'
|
cannam@226
|
169 *size = 3;
|
cannam@226
|
170 c = utf8[0] & 0x0F;
|
cannam@226
|
171 } else if ((utf8[0] & 0xF8) == 0xF0) { // Starts with `11110'
|
cannam@226
|
172 *size = 4;
|
cannam@226
|
173 c = utf8[0] & 0x07;
|
cannam@226
|
174 } else {
|
cannam@226
|
175 w_err(writer, SERD_ERR_BAD_ARG, "invalid UTF-8: %X\n", utf8[0]);
|
cannam@226
|
176 *size = 0;
|
cannam@226
|
177 return 0;
|
cannam@226
|
178 }
|
cannam@226
|
179
|
cannam@226
|
180 size_t i = 0;
|
cannam@226
|
181 uint8_t in = utf8[i++];
|
cannam@226
|
182
|
cannam@226
|
183 #define READ_BYTE() \
|
cannam@226
|
184 in = utf8[i++] & 0x3F; \
|
cannam@226
|
185 c = (c << 6) | in;
|
cannam@226
|
186
|
cannam@226
|
187 switch (*size) {
|
cannam@226
|
188 case 4: READ_BYTE();
|
cannam@226
|
189 case 3: READ_BYTE();
|
cannam@226
|
190 case 2: READ_BYTE();
|
cannam@226
|
191 }
|
cannam@226
|
192
|
cannam@226
|
193 return c;
|
cannam@226
|
194 }
|
cannam@226
|
195
|
cannam@226
|
196 // Write a single character, as an escape for single byte characters
|
cannam@226
|
197 // (Caller prints any single byte characters that don't need escaping)
|
cannam@226
|
198 static size_t
|
cannam@226
|
199 write_character(SerdWriter* writer, const uint8_t* utf8, size_t* size)
|
cannam@226
|
200 {
|
cannam@226
|
201 const uint8_t replacement_char[] = { 0xEF, 0xBF, 0xBD };
|
cannam@226
|
202 char escape[11] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
|
cannam@226
|
203 const uint8_t in = utf8[0];
|
cannam@226
|
204
|
cannam@226
|
205 uint32_t c = parse_utf8_char(writer, utf8, size);
|
cannam@226
|
206 switch (*size) {
|
cannam@226
|
207 case 0:
|
cannam@226
|
208 w_err(writer, SERD_ERR_BAD_ARG, "invalid UTF-8: %X\n", in);
|
cannam@226
|
209 return sink(replacement_char, sizeof(replacement_char), writer);
|
cannam@226
|
210 case 1:
|
cannam@226
|
211 snprintf(escape, sizeof(escape), "\\u%04X", in);
|
cannam@226
|
212 return sink(escape, 6, writer);
|
cannam@226
|
213 default:
|
cannam@226
|
214 break;
|
cannam@226
|
215 }
|
cannam@226
|
216
|
cannam@226
|
217 if (!(writer->style & SERD_STYLE_ASCII)) {
|
cannam@226
|
218 // Write UTF-8 character directly to UTF-8 output
|
cannam@226
|
219 return sink(utf8, *size, writer);
|
cannam@226
|
220 }
|
cannam@226
|
221
|
cannam@226
|
222 if (c <= 0xFFFF) {
|
cannam@226
|
223 snprintf(escape, sizeof(escape), "\\u%04X", c);
|
cannam@226
|
224 return sink(escape, 6, writer);
|
cannam@226
|
225 } else {
|
cannam@226
|
226 snprintf(escape, sizeof(escape), "\\U%08X", c);
|
cannam@226
|
227 return sink(escape, 10, writer);
|
cannam@226
|
228 }
|
cannam@226
|
229 }
|
cannam@226
|
230
|
cannam@226
|
231 static inline bool
|
cannam@226
|
232 uri_must_escape(const uint8_t c)
|
cannam@226
|
233 {
|
cannam@226
|
234 switch (c) {
|
cannam@226
|
235 case ' ': case '"': case '<': case '>': case '\\':
|
cannam@226
|
236 case '^': case '`': case '{': case '|': case '}':
|
cannam@226
|
237 return true;
|
cannam@226
|
238 default:
|
cannam@226
|
239 return !in_range(c, 0x20, 0x7E);
|
cannam@226
|
240 }
|
cannam@226
|
241 }
|
cannam@226
|
242
|
cannam@226
|
243 static size_t
|
cannam@226
|
244 write_uri(SerdWriter* writer, const uint8_t* utf8, size_t n_bytes)
|
cannam@226
|
245 {
|
cannam@226
|
246 size_t len = 0;
|
cannam@226
|
247 for (size_t i = 0; i < n_bytes;) {
|
cannam@226
|
248 size_t j = i; // Index of next character that must be escaped
|
cannam@226
|
249 for (; j < n_bytes; ++j) {
|
cannam@226
|
250 if (uri_must_escape(utf8[j])) {
|
cannam@226
|
251 break;
|
cannam@226
|
252 }
|
cannam@226
|
253 }
|
cannam@226
|
254
|
cannam@226
|
255 // Bulk write all characters up to this special one
|
cannam@226
|
256 len += sink(&utf8[i], j - i, writer);
|
cannam@226
|
257 if ((i = j) == n_bytes) {
|
cannam@226
|
258 break; // Reached end
|
cannam@226
|
259 }
|
cannam@226
|
260
|
cannam@226
|
261 // Write UTF-8 character
|
cannam@226
|
262 size_t size = 0;
|
cannam@226
|
263 len += write_character(writer, utf8 + i, &size);
|
cannam@226
|
264 i += size;
|
cannam@226
|
265 }
|
cannam@226
|
266 return len;
|
cannam@226
|
267 }
|
cannam@226
|
268
|
cannam@226
|
269 static bool
|
cannam@226
|
270 lname_must_escape(const uint8_t c)
|
cannam@226
|
271 {
|
cannam@226
|
272 /* This arbitrary list of characters, most of which have nothing to do with
|
cannam@226
|
273 Turtle, must be handled as special cases here because the RDF and SPARQL
|
cannam@226
|
274 WGs are apparently intent on making the once elegant Turtle a baroque
|
cannam@226
|
275 and inconsistent mess, throwing elegance and extensibility completely
|
cannam@226
|
276 out the window for no good reason.
|
cannam@226
|
277
|
cannam@226
|
278 Note '-', '.', and '_' are also in PN_LOCAL_ESC, but are valid unescaped
|
cannam@226
|
279 in local names, so they are not escaped here. */
|
cannam@226
|
280
|
cannam@226
|
281 switch (c) {
|
cannam@226
|
282 case '\'': case '!': case '#': case '$': case '%': case '&':
|
cannam@226
|
283 case '(': case ')': case '*': case '+': case ',': case '/':
|
cannam@226
|
284 case ';': case '=': case '?': case '@': case '~':
|
cannam@226
|
285 return true;
|
cannam@226
|
286 }
|
cannam@226
|
287 return false;
|
cannam@226
|
288 }
|
cannam@226
|
289
|
cannam@226
|
290 static size_t
|
cannam@226
|
291 write_lname(SerdWriter* writer, const uint8_t* utf8, size_t n_bytes)
|
cannam@226
|
292 {
|
cannam@226
|
293 size_t len = 0;
|
cannam@226
|
294 for (size_t i = 0; i < n_bytes; ++i) {
|
cannam@226
|
295 size_t j = i; // Index of next character that must be escaped
|
cannam@226
|
296 for (; j < n_bytes; ++j) {
|
cannam@226
|
297 if (lname_must_escape(utf8[j])) {
|
cannam@226
|
298 break;
|
cannam@226
|
299 }
|
cannam@226
|
300 }
|
cannam@226
|
301
|
cannam@226
|
302 // Bulk write all characters up to this special one
|
cannam@226
|
303 len += sink(&utf8[i], j - i, writer);
|
cannam@226
|
304 if ((i = j) == n_bytes) {
|
cannam@226
|
305 break; // Reached end
|
cannam@226
|
306 }
|
cannam@226
|
307
|
cannam@226
|
308 // Write escape
|
cannam@226
|
309 len += sink("\\", 1, writer);
|
cannam@226
|
310 len += sink(&utf8[i], 1, writer);
|
cannam@226
|
311 }
|
cannam@226
|
312 return len;
|
cannam@226
|
313 }
|
cannam@226
|
314
|
cannam@226
|
315 static size_t
|
cannam@226
|
316 write_text(SerdWriter* writer, TextContext ctx,
|
cannam@226
|
317 const uint8_t* utf8, size_t n_bytes)
|
cannam@226
|
318 {
|
cannam@226
|
319 size_t len = 0;
|
cannam@226
|
320 for (size_t i = 0; i < n_bytes;) {
|
cannam@226
|
321 // Fast bulk write for long strings of printable ASCII
|
cannam@226
|
322 size_t j = i;
|
cannam@226
|
323 for (; j < n_bytes; ++j) {
|
cannam@226
|
324 if (utf8[j] == '\\' || utf8[j] == '"'
|
cannam@226
|
325 || (!in_range(utf8[j], 0x20, 0x7E))) {
|
cannam@226
|
326 break;
|
cannam@226
|
327 }
|
cannam@226
|
328 }
|
cannam@226
|
329
|
cannam@226
|
330 len += sink(&utf8[i], j - i, writer);
|
cannam@226
|
331 if ((i = j) == n_bytes) {
|
cannam@226
|
332 break; // Reached end
|
cannam@226
|
333 }
|
cannam@226
|
334
|
cannam@226
|
335 uint8_t in = utf8[i++];
|
cannam@226
|
336 if (ctx == WRITE_LONG_STRING) {
|
cannam@226
|
337 switch (in) {
|
cannam@226
|
338 case '\\': len += sink("\\\\", 2, writer); continue;
|
cannam@226
|
339 case '\b': len += sink("\\b", 2, writer); continue;
|
cannam@226
|
340 case '\n': case '\r': case '\t': case '\f':
|
cannam@226
|
341 len += sink(&in, 1, writer); // Write character as-is
|
cannam@226
|
342 continue;
|
cannam@226
|
343 case '\"':
|
cannam@226
|
344 if (i == n_bytes) { // '"' at string end
|
cannam@226
|
345 len += sink("\\\"", 2, writer);
|
cannam@226
|
346 } else {
|
cannam@226
|
347 len += sink(&in, 1, writer);
|
cannam@226
|
348 }
|
cannam@226
|
349 continue;
|
cannam@226
|
350 default: break;
|
cannam@226
|
351 }
|
cannam@226
|
352 } else if (ctx == WRITE_STRING) {
|
cannam@226
|
353 switch (in) {
|
cannam@226
|
354 case '\\': len += sink("\\\\", 2, writer); continue;
|
cannam@226
|
355 case '\n': len += sink("\\n", 2, writer); continue;
|
cannam@226
|
356 case '\r': len += sink("\\r", 2, writer); continue;
|
cannam@226
|
357 case '\t': len += sink("\\t", 2, writer); continue;
|
cannam@226
|
358 case '"': len += sink("\\\"", 2, writer); continue;
|
cannam@226
|
359 default: break;
|
cannam@226
|
360 }
|
cannam@226
|
361 if (writer->syntax == SERD_TURTLE) {
|
cannam@226
|
362 switch (in) {
|
cannam@226
|
363 case '\b': len += sink("\\b", 2, writer); continue;
|
cannam@226
|
364 case '\f': len += sink("\\f", 2, writer); continue;
|
cannam@226
|
365 }
|
cannam@226
|
366 }
|
cannam@226
|
367 }
|
cannam@226
|
368
|
cannam@226
|
369 size_t size = 0;
|
cannam@226
|
370 len += write_character(writer, utf8 + i - 1, &size);
|
cannam@226
|
371
|
cannam@226
|
372 if (size == 0) {
|
cannam@226
|
373 return len;
|
cannam@226
|
374 }
|
cannam@226
|
375
|
cannam@226
|
376 i += size - 1;
|
cannam@226
|
377 }
|
cannam@226
|
378 return len;
|
cannam@226
|
379 }
|
cannam@226
|
380
|
cannam@226
|
381 static size_t
|
cannam@226
|
382 uri_sink(const void* buf, size_t len, void* stream)
|
cannam@226
|
383 {
|
cannam@226
|
384 return write_uri((SerdWriter*)stream, (const uint8_t*)buf, len);
|
cannam@226
|
385 }
|
cannam@226
|
386
|
cannam@226
|
387 static void
|
cannam@226
|
388 write_newline(SerdWriter* writer)
|
cannam@226
|
389 {
|
cannam@226
|
390 sink("\n", 1, writer);
|
cannam@226
|
391 for (unsigned i = 0; i < writer->indent; ++i) {
|
cannam@226
|
392 sink("\t", 1, writer);
|
cannam@226
|
393 }
|
cannam@226
|
394 }
|
cannam@226
|
395
|
cannam@226
|
396 static void
|
cannam@226
|
397 write_sep(SerdWriter* writer, const Sep sep)
|
cannam@226
|
398 {
|
cannam@226
|
399 const SepRule* rule = &rules[sep];
|
cannam@226
|
400 if (rule->space_before) {
|
cannam@226
|
401 write_newline(writer);
|
cannam@226
|
402 }
|
cannam@226
|
403 if (rule->str) {
|
cannam@226
|
404 sink(rule->str, rule->len, writer);
|
cannam@226
|
405 }
|
cannam@226
|
406 if ( (writer->last_sep && rule->space_after_sep)
|
cannam@226
|
407 || (!writer->last_sep && rule->space_after_node)) {
|
cannam@226
|
408 write_newline(writer);
|
cannam@226
|
409 } else if (writer->last_sep && rule->space_after_node) {
|
cannam@226
|
410 sink(" ", 1, writer);
|
cannam@226
|
411 }
|
cannam@226
|
412 writer->last_sep = sep;
|
cannam@226
|
413 }
|
cannam@226
|
414
|
cannam@226
|
415 static SerdStatus
|
cannam@226
|
416 reset_context(SerdWriter* writer, bool graph)
|
cannam@226
|
417 {
|
cannam@226
|
418 if (graph) {
|
cannam@226
|
419 writer->context.graph.type = SERD_NOTHING;
|
cannam@226
|
420 }
|
cannam@226
|
421 writer->context.subject.type = SERD_NOTHING;
|
cannam@226
|
422 writer->context.predicate.type = SERD_NOTHING;
|
cannam@226
|
423 writer->empty = false;
|
cannam@226
|
424 return SERD_SUCCESS;
|
cannam@226
|
425 }
|
cannam@226
|
426
|
cannam@226
|
427 static SerdStatus
|
cannam@226
|
428 free_context(SerdWriter* writer)
|
cannam@226
|
429 {
|
cannam@226
|
430 serd_node_free(&writer->context.graph);
|
cannam@226
|
431 serd_node_free(&writer->context.subject);
|
cannam@226
|
432 serd_node_free(&writer->context.predicate);
|
cannam@226
|
433 return reset_context(writer, true);
|
cannam@226
|
434 }
|
cannam@226
|
435
|
cannam@226
|
436 typedef enum {
|
cannam@226
|
437 FIELD_NONE,
|
cannam@226
|
438 FIELD_SUBJECT,
|
cannam@226
|
439 FIELD_PREDICATE,
|
cannam@226
|
440 FIELD_OBJECT,
|
cannam@226
|
441 FIELD_GRAPH
|
cannam@226
|
442 } Field;
|
cannam@226
|
443
|
cannam@226
|
444 static bool
|
cannam@226
|
445 is_inline_start(const SerdWriter* writer, Field field, SerdStatementFlags flags)
|
cannam@226
|
446 {
|
cannam@226
|
447 return (supports_abbrev(writer) &&
|
cannam@226
|
448 ((field == FIELD_SUBJECT && (flags & SERD_ANON_S_BEGIN)) ||
|
cannam@226
|
449 (field == FIELD_OBJECT && (flags & SERD_ANON_O_BEGIN))));
|
cannam@226
|
450 }
|
cannam@226
|
451
|
cannam@226
|
452 static bool
|
cannam@226
|
453 write_node(SerdWriter* writer,
|
cannam@226
|
454 const SerdNode* node,
|
cannam@226
|
455 const SerdNode* datatype,
|
cannam@226
|
456 const SerdNode* lang,
|
cannam@226
|
457 Field field,
|
cannam@226
|
458 SerdStatementFlags flags)
|
cannam@226
|
459 {
|
cannam@226
|
460 SerdChunk uri_prefix;
|
cannam@226
|
461 SerdNode prefix;
|
cannam@226
|
462 SerdChunk suffix;
|
cannam@226
|
463 bool has_scheme;
|
cannam@226
|
464 switch (node->type) {
|
cannam@226
|
465 case SERD_BLANK:
|
cannam@226
|
466 if (is_inline_start(writer, field, flags)) {
|
cannam@226
|
467 ++writer->indent;
|
cannam@226
|
468 write_sep(writer, SEP_ANON_BEGIN);
|
cannam@226
|
469 } else if (supports_abbrev(writer)
|
cannam@226
|
470 && (field == FIELD_SUBJECT && (flags & SERD_LIST_S_BEGIN))) {
|
cannam@226
|
471 assert(writer->list_depth == 0);
|
cannam@226
|
472 copy_node(&writer->list_subj, node);
|
cannam@226
|
473 ++writer->list_depth;
|
cannam@226
|
474 ++writer->indent;
|
cannam@226
|
475 write_sep(writer, SEP_LIST_BEGIN);
|
cannam@226
|
476 } else if (supports_abbrev(writer)
|
cannam@226
|
477 && (field == FIELD_OBJECT && (flags & SERD_LIST_O_BEGIN))) {
|
cannam@226
|
478 ++writer->indent;
|
cannam@226
|
479 ++writer->list_depth;
|
cannam@226
|
480 write_sep(writer, SEP_LIST_BEGIN);
|
cannam@226
|
481 } else if (supports_abbrev(writer)
|
cannam@226
|
482 && ((field == FIELD_SUBJECT && (flags & SERD_EMPTY_S))
|
cannam@226
|
483 || (field == FIELD_OBJECT && (flags & SERD_EMPTY_O)))) {
|
cannam@226
|
484 sink("[]", 2, writer);
|
cannam@226
|
485 } else {
|
cannam@226
|
486 sink("_:", 2, writer);
|
cannam@226
|
487 if (writer->bprefix && !strncmp((const char*)node->buf,
|
cannam@226
|
488 (const char*)writer->bprefix,
|
cannam@226
|
489 writer->bprefix_len)) {
|
cannam@226
|
490 sink(node->buf + writer->bprefix_len,
|
cannam@226
|
491 node->n_bytes - writer->bprefix_len,
|
cannam@226
|
492 writer);
|
cannam@226
|
493 } else {
|
cannam@226
|
494 sink(node->buf, node->n_bytes, writer);
|
cannam@226
|
495 }
|
cannam@226
|
496 }
|
cannam@226
|
497 break;
|
cannam@226
|
498 case SERD_CURIE:
|
cannam@226
|
499 switch (writer->syntax) {
|
cannam@226
|
500 case SERD_NTRIPLES:
|
cannam@226
|
501 case SERD_NQUADS:
|
cannam@226
|
502 if (serd_env_expand(writer->env, node, &uri_prefix, &suffix)) {
|
cannam@226
|
503 w_err(writer, SERD_ERR_BAD_CURIE,
|
cannam@226
|
504 "undefined namespace prefix `%s'\n", node->buf);
|
cannam@226
|
505 return false;
|
cannam@226
|
506 }
|
cannam@226
|
507 sink("<", 1, writer);
|
cannam@226
|
508 write_uri(writer, uri_prefix.buf, uri_prefix.len);
|
cannam@226
|
509 write_uri(writer, suffix.buf, suffix.len);
|
cannam@226
|
510 sink(">", 1, writer);
|
cannam@226
|
511 break;
|
cannam@226
|
512 case SERD_TURTLE:
|
cannam@226
|
513 case SERD_TRIG:
|
cannam@226
|
514 if (is_inline_start(writer, field, flags)) {
|
cannam@226
|
515 ++writer->indent;
|
cannam@226
|
516 write_sep(writer, SEP_ANON_BEGIN);
|
cannam@226
|
517 sink("== ", 3, writer);
|
cannam@226
|
518 }
|
cannam@226
|
519 write_lname(writer, node->buf, node->n_bytes);
|
cannam@226
|
520 if (is_inline_start(writer, field, flags)) {
|
cannam@226
|
521 sink(" ;", 2, writer);
|
cannam@226
|
522 write_newline(writer);
|
cannam@226
|
523 }
|
cannam@226
|
524 }
|
cannam@226
|
525 break;
|
cannam@226
|
526 case SERD_LITERAL:
|
cannam@226
|
527 if (supports_abbrev(writer) && datatype && datatype->buf) {
|
cannam@226
|
528 const char* type_uri = (const char*)datatype->buf;
|
cannam@226
|
529 if (!strncmp(type_uri, NS_XSD, sizeof(NS_XSD) - 1) && (
|
cannam@226
|
530 !strcmp(type_uri + sizeof(NS_XSD) - 1, "boolean") ||
|
cannam@226
|
531 !strcmp(type_uri + sizeof(NS_XSD) - 1, "integer"))) {
|
cannam@226
|
532 sink(node->buf, node->n_bytes, writer);
|
cannam@226
|
533 break;
|
cannam@226
|
534 } else if (!strncmp(type_uri, NS_XSD, sizeof(NS_XSD) - 1) &&
|
cannam@226
|
535 !strcmp(type_uri + sizeof(NS_XSD) - 1, "decimal") &&
|
cannam@226
|
536 strchr((const char*)node->buf, '.') &&
|
cannam@226
|
537 node->buf[node->n_bytes - 1] != '.') {
|
cannam@226
|
538 /* xsd:decimal literals without trailing digits, e.g. "5.", can
|
cannam@226
|
539 not be written bare in Turtle. We could add a 0 which is
|
cannam@226
|
540 prettier, but changes the text and breaks round tripping.
|
cannam@226
|
541 */
|
cannam@226
|
542 sink(node->buf, node->n_bytes, writer);
|
cannam@226
|
543 break;
|
cannam@226
|
544 }
|
cannam@226
|
545 }
|
cannam@226
|
546 if (supports_abbrev(writer)
|
cannam@226
|
547 && (node->flags & (SERD_HAS_NEWLINE|SERD_HAS_QUOTE))) {
|
cannam@226
|
548 sink("\"\"\"", 3, writer);
|
cannam@226
|
549 write_text(writer, WRITE_LONG_STRING, node->buf, node->n_bytes);
|
cannam@226
|
550 sink("\"\"\"", 3, writer);
|
cannam@226
|
551 } else {
|
cannam@226
|
552 sink("\"", 1, writer);
|
cannam@226
|
553 write_text(writer, WRITE_STRING, node->buf, node->n_bytes);
|
cannam@226
|
554 sink("\"", 1, writer);
|
cannam@226
|
555 }
|
cannam@226
|
556 if (lang && lang->buf) {
|
cannam@226
|
557 sink("@", 1, writer);
|
cannam@226
|
558 sink(lang->buf, lang->n_bytes, writer);
|
cannam@226
|
559 } else if (datatype && datatype->buf) {
|
cannam@226
|
560 sink("^^", 2, writer);
|
cannam@226
|
561 write_node(writer, datatype, NULL, NULL, FIELD_NONE, flags);
|
cannam@226
|
562 }
|
cannam@226
|
563 break;
|
cannam@226
|
564 case SERD_URI:
|
cannam@226
|
565 if (is_inline_start(writer, field, flags)) {
|
cannam@226
|
566 ++writer->indent;
|
cannam@226
|
567 write_sep(writer, SEP_ANON_BEGIN);
|
cannam@226
|
568 sink("== ", 3, writer);
|
cannam@226
|
569 }
|
cannam@226
|
570 has_scheme = serd_uri_string_has_scheme(node->buf);
|
cannam@226
|
571 if (field == FIELD_PREDICATE && supports_abbrev(writer)
|
cannam@226
|
572 && !strcmp((const char*)node->buf, NS_RDF "type")) {
|
cannam@226
|
573 sink("a", 1, writer);
|
cannam@226
|
574 break;
|
cannam@226
|
575 } else if (supports_abbrev(writer)
|
cannam@226
|
576 && !strcmp((const char*)node->buf, NS_RDF "nil")) {
|
cannam@226
|
577 sink("()", 2, writer);
|
cannam@226
|
578 break;
|
cannam@226
|
579 } else if (has_scheme && (writer->style & SERD_STYLE_CURIED) &&
|
cannam@226
|
580 serd_env_qualify(writer->env, node, &prefix, &suffix)) {
|
cannam@226
|
581 write_uri(writer, prefix.buf, prefix.n_bytes);
|
cannam@226
|
582 sink(":", 1, writer);
|
cannam@226
|
583 write_uri(writer, suffix.buf, suffix.len);
|
cannam@226
|
584 break;
|
cannam@226
|
585 }
|
cannam@226
|
586 sink("<", 1, writer);
|
cannam@226
|
587 if (writer->style & SERD_STYLE_RESOLVED) {
|
cannam@226
|
588 SerdURI in_base_uri, uri, abs_uri;
|
cannam@226
|
589 serd_env_get_base_uri(writer->env, &in_base_uri);
|
cannam@226
|
590 serd_uri_parse(node->buf, &uri);
|
cannam@226
|
591 serd_uri_resolve(&uri, &in_base_uri, &abs_uri);
|
cannam@226
|
592 bool rooted = uri_is_under(&writer->base_uri, &writer->root_uri);
|
cannam@226
|
593 SerdURI* root = rooted ? &writer->root_uri : & writer->base_uri;
|
cannam@226
|
594 if (!uri_is_under(&abs_uri, root) ||
|
cannam@226
|
595 writer->syntax == SERD_NTRIPLES ||
|
cannam@226
|
596 writer->syntax == SERD_NQUADS) {
|
cannam@226
|
597 serd_uri_serialise(&abs_uri, uri_sink, writer);
|
cannam@226
|
598 } else {
|
cannam@226
|
599 serd_uri_serialise_relative(
|
cannam@226
|
600 &uri, &writer->base_uri, root, uri_sink, writer);
|
cannam@226
|
601 }
|
cannam@226
|
602 } else {
|
cannam@226
|
603 write_uri(writer, node->buf, node->n_bytes);
|
cannam@226
|
604 }
|
cannam@226
|
605 sink(">", 1, writer);
|
cannam@226
|
606 if (is_inline_start(writer, field, flags)) {
|
cannam@226
|
607 sink(" ;", 2, writer);
|
cannam@226
|
608 write_newline(writer);
|
cannam@226
|
609 }
|
cannam@226
|
610 default:
|
cannam@226
|
611 break;
|
cannam@226
|
612 }
|
cannam@226
|
613 writer->last_sep = SEP_NONE;
|
cannam@226
|
614 return true;
|
cannam@226
|
615 }
|
cannam@226
|
616
|
cannam@226
|
617 static inline bool
|
cannam@226
|
618 is_resource(const SerdNode* node)
|
cannam@226
|
619 {
|
cannam@226
|
620 return node->type > SERD_LITERAL;
|
cannam@226
|
621 }
|
cannam@226
|
622
|
cannam@226
|
623 static void
|
cannam@226
|
624 write_pred(SerdWriter* writer, SerdStatementFlags flags, const SerdNode* pred)
|
cannam@226
|
625 {
|
cannam@226
|
626 write_node(writer, pred, NULL, NULL, FIELD_PREDICATE, flags);
|
cannam@226
|
627 write_sep(writer, SEP_P_O);
|
cannam@226
|
628 copy_node(&writer->context.predicate, pred);
|
cannam@226
|
629 }
|
cannam@226
|
630
|
cannam@226
|
631 static bool
|
cannam@226
|
632 write_list_obj(SerdWriter* writer,
|
cannam@226
|
633 SerdStatementFlags flags,
|
cannam@226
|
634 const SerdNode* predicate,
|
cannam@226
|
635 const SerdNode* object,
|
cannam@226
|
636 const SerdNode* datatype,
|
cannam@226
|
637 const SerdNode* lang)
|
cannam@226
|
638 {
|
cannam@226
|
639 if (!strcmp((const char*)object->buf, NS_RDF "nil")) {
|
cannam@226
|
640 --writer->indent;
|
cannam@226
|
641 write_sep(writer, SEP_LIST_END);
|
cannam@226
|
642 return true;
|
cannam@226
|
643 } else if (!strcmp((const char*)predicate->buf, NS_RDF "first")) {
|
cannam@226
|
644 write_sep(writer, SEP_LIST_SEP);
|
cannam@226
|
645 write_node(writer, object, datatype, lang, FIELD_OBJECT, flags);
|
cannam@226
|
646 }
|
cannam@226
|
647 return false;
|
cannam@226
|
648 }
|
cannam@226
|
649
|
cannam@226
|
650 SERD_API
|
cannam@226
|
651 SerdStatus
|
cannam@226
|
652 serd_writer_write_statement(SerdWriter* writer,
|
cannam@226
|
653 SerdStatementFlags flags,
|
cannam@226
|
654 const SerdNode* graph,
|
cannam@226
|
655 const SerdNode* subject,
|
cannam@226
|
656 const SerdNode* predicate,
|
cannam@226
|
657 const SerdNode* object,
|
cannam@226
|
658 const SerdNode* datatype,
|
cannam@226
|
659 const SerdNode* lang)
|
cannam@226
|
660 {
|
cannam@226
|
661 if (!subject || !predicate || !object
|
cannam@226
|
662 || !subject->buf || !predicate->buf || !object->buf
|
cannam@226
|
663 || !is_resource(subject) || !is_resource(predicate)) {
|
cannam@226
|
664 return SERD_ERR_BAD_ARG;
|
cannam@226
|
665 }
|
cannam@226
|
666
|
cannam@226
|
667 #define TRY(write_result) \
|
cannam@226
|
668 if (!write_result) { \
|
cannam@226
|
669 return SERD_ERR_UNKNOWN; \
|
cannam@226
|
670 }
|
cannam@226
|
671
|
cannam@226
|
672 switch (writer->syntax) {
|
cannam@226
|
673 case SERD_NTRIPLES:
|
cannam@226
|
674 case SERD_NQUADS:
|
cannam@226
|
675 TRY(write_node(writer, subject, NULL, NULL, FIELD_SUBJECT, flags));
|
cannam@226
|
676 sink(" ", 1, writer);
|
cannam@226
|
677 TRY(write_node(writer, predicate, NULL, NULL, FIELD_PREDICATE, flags));
|
cannam@226
|
678 sink(" ", 1, writer);
|
cannam@226
|
679 TRY(write_node(writer, object, datatype, lang, FIELD_OBJECT, flags));
|
cannam@226
|
680 if (writer->syntax == SERD_NQUADS && graph) {
|
cannam@226
|
681 sink(" ", 1, writer);
|
cannam@226
|
682 TRY(write_node(writer, graph, datatype, lang, FIELD_GRAPH, flags));
|
cannam@226
|
683 }
|
cannam@226
|
684 sink(" .\n", 3, writer);
|
cannam@226
|
685 return SERD_SUCCESS;
|
cannam@226
|
686 default:
|
cannam@226
|
687 break;
|
cannam@226
|
688 }
|
cannam@226
|
689
|
cannam@226
|
690 if ((graph && !serd_node_equals(graph, &writer->context.graph)) ||
|
cannam@226
|
691 (!graph && writer->context.graph.type)) {
|
cannam@226
|
692 writer->indent = 0;
|
cannam@226
|
693 if (writer->context.subject.type) {
|
cannam@226
|
694 write_sep(writer, SEP_END_S);
|
cannam@226
|
695 }
|
cannam@226
|
696 if (writer->context.graph.type) {
|
cannam@226
|
697 write_sep(writer, SEP_GRAPH_END);
|
cannam@226
|
698 }
|
cannam@226
|
699
|
cannam@226
|
700 reset_context(writer, true);
|
cannam@226
|
701 if (graph) {
|
cannam@226
|
702 TRY(write_node(writer, graph, datatype, lang, FIELD_GRAPH, flags));
|
cannam@226
|
703 ++writer->indent;
|
cannam@226
|
704 write_sep(writer, SEP_GRAPH_BEGIN);
|
cannam@226
|
705 copy_node(&writer->context.graph, graph);
|
cannam@226
|
706 }
|
cannam@226
|
707 }
|
cannam@226
|
708
|
cannam@226
|
709 if ((flags & SERD_LIST_CONT)) {
|
cannam@226
|
710 if (write_list_obj(writer, flags, predicate, object, datatype, lang)) {
|
cannam@226
|
711 // Reached end of list
|
cannam@226
|
712 if (--writer->list_depth == 0 && writer->list_subj.type) {
|
cannam@226
|
713 reset_context(writer, false);
|
cannam@226
|
714 serd_node_free(&writer->context.subject);
|
cannam@226
|
715 writer->context.subject = writer->list_subj;
|
cannam@226
|
716 writer->list_subj = SERD_NODE_NULL;
|
cannam@226
|
717 }
|
cannam@226
|
718 return SERD_SUCCESS;
|
cannam@226
|
719 }
|
cannam@226
|
720 } else if (serd_node_equals(subject, &writer->context.subject)) {
|
cannam@226
|
721 if (serd_node_equals(predicate, &writer->context.predicate)) {
|
cannam@226
|
722 // Abbreviate S P
|
cannam@226
|
723 if (!(flags & SERD_ANON_O_BEGIN)) {
|
cannam@226
|
724 ++writer->indent;
|
cannam@226
|
725 }
|
cannam@226
|
726 write_sep(writer, SEP_END_O);
|
cannam@226
|
727 write_node(writer, object, datatype, lang, FIELD_OBJECT, flags);
|
cannam@226
|
728 if (!(flags & SERD_ANON_O_BEGIN)) {
|
cannam@226
|
729 --writer->indent;
|
cannam@226
|
730 }
|
cannam@226
|
731 } else {
|
cannam@226
|
732 // Abbreviate S
|
cannam@226
|
733 Sep sep = writer->context.predicate.type ? SEP_END_P : SEP_S_P;
|
cannam@226
|
734 write_sep(writer, sep);
|
cannam@226
|
735 write_pred(writer, flags, predicate);
|
cannam@226
|
736 write_node(writer, object, datatype, lang, FIELD_OBJECT, flags);
|
cannam@226
|
737 }
|
cannam@226
|
738 } else {
|
cannam@226
|
739 // No abbreviation
|
cannam@226
|
740 if (writer->context.subject.type) {
|
cannam@226
|
741 assert(writer->indent > 0);
|
cannam@226
|
742 --writer->indent;
|
cannam@226
|
743 if (serd_stack_is_empty(&writer->anon_stack)) {
|
cannam@226
|
744 write_sep(writer, SEP_END_S);
|
cannam@226
|
745 }
|
cannam@226
|
746 } else if (!writer->empty) {
|
cannam@226
|
747 write_sep(writer, SEP_S_P);
|
cannam@226
|
748 }
|
cannam@226
|
749
|
cannam@226
|
750 if (!(flags & SERD_ANON_CONT)) {
|
cannam@226
|
751 write_node(writer, subject, NULL, NULL, FIELD_SUBJECT, flags);
|
cannam@226
|
752 ++writer->indent;
|
cannam@226
|
753 write_sep(writer, SEP_S_P);
|
cannam@226
|
754 } else {
|
cannam@226
|
755 ++writer->indent;
|
cannam@226
|
756 }
|
cannam@226
|
757
|
cannam@226
|
758 reset_context(writer, false);
|
cannam@226
|
759 copy_node(&writer->context.subject, subject);
|
cannam@226
|
760
|
cannam@226
|
761 if (!(flags & SERD_LIST_S_BEGIN)) {
|
cannam@226
|
762 write_pred(writer, flags, predicate);
|
cannam@226
|
763 }
|
cannam@226
|
764
|
cannam@226
|
765 write_node(writer, object, datatype, lang, FIELD_OBJECT, flags);
|
cannam@226
|
766 }
|
cannam@226
|
767
|
cannam@226
|
768 if (flags & (SERD_ANON_S_BEGIN|SERD_ANON_O_BEGIN)) {
|
cannam@226
|
769 WriteContext* ctx = (WriteContext*)serd_stack_push(
|
cannam@226
|
770 &writer->anon_stack, sizeof(WriteContext));
|
cannam@226
|
771 *ctx = writer->context;
|
cannam@226
|
772 WriteContext new_context = {
|
cannam@226
|
773 serd_node_copy(graph), serd_node_copy(subject), SERD_NODE_NULL };
|
cannam@226
|
774 if ((flags & SERD_ANON_S_BEGIN)) {
|
cannam@226
|
775 new_context.predicate = serd_node_copy(predicate);
|
cannam@226
|
776 }
|
cannam@226
|
777 writer->context = new_context;
|
cannam@226
|
778 } else {
|
cannam@226
|
779 copy_node(&writer->context.graph, graph);
|
cannam@226
|
780 copy_node(&writer->context.subject, subject);
|
cannam@226
|
781 copy_node(&writer->context.predicate, predicate);
|
cannam@226
|
782 }
|
cannam@226
|
783
|
cannam@226
|
784 return SERD_SUCCESS;
|
cannam@226
|
785 }
|
cannam@226
|
786
|
cannam@226
|
787 SERD_API
|
cannam@226
|
788 SerdStatus
|
cannam@226
|
789 serd_writer_end_anon(SerdWriter* writer,
|
cannam@226
|
790 const SerdNode* node)
|
cannam@226
|
791 {
|
cannam@226
|
792 if (writer->syntax == SERD_NTRIPLES || writer->syntax == SERD_NQUADS) {
|
cannam@226
|
793 return SERD_SUCCESS;
|
cannam@226
|
794 }
|
cannam@226
|
795 if (serd_stack_is_empty(&writer->anon_stack) || writer->indent == 0) {
|
cannam@226
|
796 w_err(writer, SERD_ERR_UNKNOWN,
|
cannam@226
|
797 "unexpected end of anonymous node\n");
|
cannam@226
|
798 return SERD_ERR_UNKNOWN;
|
cannam@226
|
799 }
|
cannam@226
|
800 --writer->indent;
|
cannam@226
|
801 write_sep(writer, SEP_ANON_END);
|
cannam@226
|
802 free_context(writer);
|
cannam@226
|
803 writer->context = *anon_stack_top(writer);
|
cannam@226
|
804 serd_stack_pop(&writer->anon_stack, sizeof(WriteContext));
|
cannam@226
|
805 const bool is_subject = serd_node_equals(node, &writer->context.subject);
|
cannam@226
|
806 if (is_subject) {
|
cannam@226
|
807 copy_node(&writer->context.subject, node);
|
cannam@226
|
808 writer->context.predicate.type = SERD_NOTHING;
|
cannam@226
|
809 }
|
cannam@226
|
810 return SERD_SUCCESS;
|
cannam@226
|
811 }
|
cannam@226
|
812
|
cannam@226
|
813 SERD_API
|
cannam@226
|
814 SerdStatus
|
cannam@226
|
815 serd_writer_finish(SerdWriter* writer)
|
cannam@226
|
816 {
|
cannam@226
|
817 if (writer->context.subject.type) {
|
cannam@226
|
818 sink(" .\n", 3, writer);
|
cannam@226
|
819 }
|
cannam@226
|
820 if (writer->context.graph.type) {
|
cannam@226
|
821 sink("}\n", 2, writer);
|
cannam@226
|
822 }
|
cannam@226
|
823 serd_byte_sink_flush(&writer->byte_sink);
|
cannam@226
|
824 writer->indent = 0;
|
cannam@226
|
825 return free_context(writer);
|
cannam@226
|
826 }
|
cannam@226
|
827
|
cannam@226
|
828 SERD_API
|
cannam@226
|
829 SerdWriter*
|
cannam@226
|
830 serd_writer_new(SerdSyntax syntax,
|
cannam@226
|
831 SerdStyle style,
|
cannam@226
|
832 SerdEnv* env,
|
cannam@226
|
833 const SerdURI* base_uri,
|
cannam@226
|
834 SerdSink ssink,
|
cannam@226
|
835 void* stream)
|
cannam@226
|
836 {
|
cannam@226
|
837 const WriteContext context = WRITE_CONTEXT_NULL;
|
cannam@226
|
838 SerdWriter* writer = (SerdWriter*)calloc(1, sizeof(SerdWriter));
|
cannam@226
|
839 writer->syntax = syntax;
|
cannam@226
|
840 writer->style = style;
|
cannam@226
|
841 writer->env = env;
|
cannam@226
|
842 writer->root_node = SERD_NODE_NULL;
|
cannam@226
|
843 writer->root_uri = SERD_URI_NULL;
|
cannam@226
|
844 writer->base_uri = base_uri ? *base_uri : SERD_URI_NULL;
|
cannam@226
|
845 writer->anon_stack = serd_stack_new(sizeof(WriteContext));
|
cannam@226
|
846 writer->context = context;
|
cannam@226
|
847 writer->list_subj = SERD_NODE_NULL;
|
cannam@226
|
848 writer->empty = true;
|
cannam@226
|
849 writer->byte_sink = serd_byte_sink_new(
|
cannam@226
|
850 ssink, stream, (style & SERD_STYLE_BULK) ? SERD_PAGE_SIZE : 1);
|
cannam@226
|
851 return writer;
|
cannam@226
|
852 }
|
cannam@226
|
853
|
cannam@226
|
854 SERD_API
|
cannam@226
|
855 void
|
cannam@226
|
856 serd_writer_set_error_sink(SerdWriter* writer,
|
cannam@226
|
857 SerdErrorSink error_sink,
|
cannam@226
|
858 void* error_handle)
|
cannam@226
|
859 {
|
cannam@226
|
860 writer->error_sink = error_sink;
|
cannam@226
|
861 writer->error_handle = error_handle;
|
cannam@226
|
862 }
|
cannam@226
|
863
|
cannam@226
|
864 SERD_API
|
cannam@226
|
865 void
|
cannam@226
|
866 serd_writer_chop_blank_prefix(SerdWriter* writer,
|
cannam@226
|
867 const uint8_t* prefix)
|
cannam@226
|
868 {
|
cannam@226
|
869 free(writer->bprefix);
|
cannam@226
|
870 writer->bprefix_len = 0;
|
cannam@226
|
871 writer->bprefix = NULL;
|
cannam@226
|
872 if (prefix) {
|
cannam@226
|
873 writer->bprefix_len = strlen((const char*)prefix);
|
cannam@226
|
874 writer->bprefix = (uint8_t*)malloc(writer->bprefix_len + 1);
|
cannam@226
|
875 memcpy(writer->bprefix, prefix, writer->bprefix_len + 1);
|
cannam@226
|
876 }
|
cannam@226
|
877 }
|
cannam@226
|
878
|
cannam@226
|
879 SERD_API
|
cannam@226
|
880 SerdStatus
|
cannam@226
|
881 serd_writer_set_base_uri(SerdWriter* writer,
|
cannam@226
|
882 const SerdNode* uri)
|
cannam@226
|
883 {
|
cannam@226
|
884 if (!serd_env_set_base_uri(writer->env, uri)) {
|
cannam@226
|
885 serd_env_get_base_uri(writer->env, &writer->base_uri);
|
cannam@226
|
886
|
cannam@226
|
887 if (writer->syntax == SERD_TURTLE || writer->syntax == SERD_TRIG) {
|
cannam@226
|
888 if (writer->context.graph.type || writer->context.subject.type) {
|
cannam@226
|
889 sink(" .\n\n", 4, writer);
|
cannam@226
|
890 reset_context(writer, true);
|
cannam@226
|
891 }
|
cannam@226
|
892 sink("@base <", 7, writer);
|
cannam@226
|
893 sink(uri->buf, uri->n_bytes, writer);
|
cannam@226
|
894 sink("> .\n", 4, writer);
|
cannam@226
|
895 }
|
cannam@226
|
896 writer->indent = 0;
|
cannam@226
|
897 return reset_context(writer, true);
|
cannam@226
|
898 }
|
cannam@226
|
899 return SERD_ERR_UNKNOWN;
|
cannam@226
|
900 }
|
cannam@226
|
901
|
cannam@226
|
902 SERD_API
|
cannam@226
|
903 SerdStatus
|
cannam@226
|
904 serd_writer_set_root_uri(SerdWriter* writer,
|
cannam@226
|
905 const SerdNode* uri)
|
cannam@226
|
906 {
|
cannam@226
|
907 serd_node_free(&writer->root_node);
|
cannam@226
|
908 if (uri && uri->buf) {
|
cannam@226
|
909 writer->root_node = serd_node_copy(uri);
|
cannam@226
|
910 serd_uri_parse(uri->buf, &writer->root_uri);
|
cannam@226
|
911 } else {
|
cannam@226
|
912 writer->root_node = SERD_NODE_NULL;
|
cannam@226
|
913 writer->root_uri = SERD_URI_NULL;
|
cannam@226
|
914 }
|
cannam@226
|
915 return SERD_SUCCESS;
|
cannam@226
|
916 }
|
cannam@226
|
917
|
cannam@226
|
918 SERD_API
|
cannam@226
|
919 SerdStatus
|
cannam@226
|
920 serd_writer_set_prefix(SerdWriter* writer,
|
cannam@226
|
921 const SerdNode* name,
|
cannam@226
|
922 const SerdNode* uri)
|
cannam@226
|
923 {
|
cannam@226
|
924 if (!serd_env_set_prefix(writer->env, name, uri)) {
|
cannam@226
|
925 if (writer->syntax == SERD_TURTLE || writer->syntax == SERD_TRIG) {
|
cannam@226
|
926 if (writer->context.graph.type || writer->context.subject.type) {
|
cannam@226
|
927 sink(" .\n\n", 4, writer);
|
cannam@226
|
928 reset_context(writer, true);
|
cannam@226
|
929 }
|
cannam@226
|
930 sink("@prefix ", 8, writer);
|
cannam@226
|
931 sink(name->buf, name->n_bytes, writer);
|
cannam@226
|
932 sink(": <", 3, writer);
|
cannam@226
|
933 write_uri(writer, uri->buf, uri->n_bytes);
|
cannam@226
|
934 sink("> .\n", 4, writer);
|
cannam@226
|
935 }
|
cannam@226
|
936 writer->indent = 0;
|
cannam@226
|
937 return reset_context(writer, true);
|
cannam@226
|
938 }
|
cannam@226
|
939 return SERD_ERR_UNKNOWN;
|
cannam@226
|
940 }
|
cannam@226
|
941
|
cannam@226
|
942 SERD_API
|
cannam@226
|
943 void
|
cannam@226
|
944 serd_writer_free(SerdWriter* writer)
|
cannam@226
|
945 {
|
cannam@226
|
946 serd_writer_finish(writer);
|
cannam@226
|
947 serd_stack_free(&writer->anon_stack);
|
cannam@226
|
948 free(writer->bprefix);
|
cannam@226
|
949 serd_byte_sink_free(&writer->byte_sink);
|
cannam@226
|
950 serd_node_free(&writer->root_node);
|
cannam@226
|
951 free(writer);
|
cannam@226
|
952 }
|
cannam@226
|
953
|
cannam@226
|
954 SERD_API
|
cannam@226
|
955 SerdEnv*
|
cannam@226
|
956 serd_writer_get_env(SerdWriter* writer)
|
cannam@226
|
957 {
|
cannam@226
|
958 return writer->env;
|
cannam@226
|
959 }
|
cannam@226
|
960
|
cannam@226
|
961 SERD_API
|
cannam@226
|
962 size_t
|
cannam@226
|
963 serd_file_sink(const void* buf, size_t len, void* stream)
|
cannam@226
|
964 {
|
cannam@226
|
965 return fwrite(buf, 1, len, (FILE*)stream);
|
cannam@226
|
966 }
|
cannam@226
|
967
|
cannam@226
|
968 SERD_API
|
cannam@226
|
969 size_t
|
cannam@226
|
970 serd_chunk_sink(const void* buf, size_t len, void* stream)
|
cannam@226
|
971 {
|
cannam@226
|
972 SerdChunk* chunk = (SerdChunk*)stream;
|
cannam@226
|
973 chunk->buf = (uint8_t*)realloc((uint8_t*)chunk->buf, chunk->len + len);
|
cannam@226
|
974 memcpy((uint8_t*)chunk->buf + chunk->len, buf, len);
|
cannam@226
|
975 chunk->len += len;
|
cannam@226
|
976 return len;
|
cannam@226
|
977 }
|
cannam@226
|
978
|
cannam@226
|
979 SERD_API
|
cannam@226
|
980 uint8_t*
|
cannam@226
|
981 serd_chunk_sink_finish(SerdChunk* stream)
|
cannam@226
|
982 {
|
cannam@226
|
983 serd_chunk_sink("", 1, stream);
|
cannam@226
|
984 return (uint8_t*)stream->buf;
|
cannam@226
|
985 }
|