cannam@226
|
1 /*
|
cannam@226
|
2 Copyright 2011-2017 David Robillard <http://drobilla.net>
|
cannam@226
|
3
|
cannam@226
|
4 Permission to use, copy, modify, and/or distribute this software for any
|
cannam@226
|
5 purpose with or without fee is hereby granted, provided that the above
|
cannam@226
|
6 copyright notice and this permission notice appear in all copies.
|
cannam@226
|
7
|
cannam@226
|
8 THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
cannam@226
|
9 WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
cannam@226
|
10 MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
cannam@226
|
11 ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
cannam@226
|
12 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
cannam@226
|
13 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
cannam@226
|
14 OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
cannam@226
|
15 */
|
cannam@226
|
16
|
cannam@226
|
17 #include "serd_internal.h"
|
cannam@226
|
18
|
cannam@226
|
19 #include <assert.h>
|
cannam@226
|
20 #include <errno.h>
|
cannam@226
|
21 #include <stdlib.h>
|
cannam@226
|
22 #include <string.h>
|
cannam@226
|
23
|
cannam@226
|
24 #define SERDI_ERROR(msg) fprintf(stderr, "serdi: " msg);
|
cannam@226
|
25 #define SERDI_ERRORF(fmt, ...) fprintf(stderr, "serdi: " fmt, __VA_ARGS__);
|
cannam@226
|
26
|
cannam@226
|
27 static int
|
cannam@226
|
28 print_version(void)
|
cannam@226
|
29 {
|
cannam@226
|
30 printf("serdi " SERD_VERSION " <http://drobilla.net/software/serd>\n");
|
cannam@226
|
31 printf("Copyright 2011-2017 David Robillard <http://drobilla.net>.\n"
|
cannam@226
|
32 "License: <http://www.opensource.org/licenses/isc>\n"
|
cannam@226
|
33 "This is free software; you are free to change and redistribute it."
|
cannam@226
|
34 "\nThere is NO WARRANTY, to the extent permitted by law.\n");
|
cannam@226
|
35 return 0;
|
cannam@226
|
36 }
|
cannam@226
|
37
|
cannam@226
|
38 static int
|
cannam@226
|
39 print_usage(const char* name, bool error)
|
cannam@226
|
40 {
|
cannam@226
|
41 FILE* const os = error ? stderr : stdout;
|
cannam@226
|
42 fprintf(os, "%s", error ? "\n" : "");
|
cannam@226
|
43 fprintf(os, "Usage: %s [OPTION]... INPUT [BASE_URI]\n", name);
|
cannam@226
|
44 fprintf(os, "Read and write RDF syntax.\n");
|
cannam@226
|
45 fprintf(os, "Use - for INPUT to read from standard input.\n\n");
|
cannam@226
|
46 fprintf(os, " -b Fast bulk output for large serialisations.\n");
|
cannam@226
|
47 fprintf(os, " -c PREFIX Chop PREFIX from matching blank node IDs.\n");
|
cannam@226
|
48 fprintf(os, " -e Eat input one character at a time.\n");
|
cannam@226
|
49 fprintf(os, " -f Keep full URIs in input (don't qualify).\n");
|
cannam@226
|
50 fprintf(os, " -h Display this help and exit.\n");
|
cannam@226
|
51 fprintf(os, " -i SYNTAX Input syntax: turtle/ntriples/trig/nquads.\n");
|
cannam@226
|
52 fprintf(os, " -l Lax (non-strict) parsing.\n");
|
cannam@226
|
53 fprintf(os, " -o SYNTAX Output syntax: turtle/ntriples/nquads.\n");
|
cannam@226
|
54 fprintf(os, " -p PREFIX Add PREFIX to blank node IDs.\n");
|
cannam@226
|
55 fprintf(os, " -q Suppress all output except data.\n");
|
cannam@226
|
56 fprintf(os, " -r ROOT_URI Keep relative URIs within ROOT_URI.\n");
|
cannam@226
|
57 fprintf(os, " -s INPUT Parse INPUT as string (terminates options).\n");
|
cannam@226
|
58 fprintf(os, " -v Display version information and exit.\n");
|
cannam@226
|
59 return error ? 1 : 0;
|
cannam@226
|
60 }
|
cannam@226
|
61
|
cannam@226
|
62 static bool
|
cannam@226
|
63 set_syntax(SerdSyntax* syntax, const char* name)
|
cannam@226
|
64 {
|
cannam@226
|
65 if (!strcmp(name, "turtle")) {
|
cannam@226
|
66 *syntax = SERD_TURTLE;
|
cannam@226
|
67 } else if (!strcmp(name, "ntriples")) {
|
cannam@226
|
68 *syntax = SERD_NTRIPLES;
|
cannam@226
|
69 } else if (!strcmp(name, "nquads")) {
|
cannam@226
|
70 *syntax = SERD_NQUADS;
|
cannam@226
|
71 } else if (!strcmp(name, "trig")) {
|
cannam@226
|
72 *syntax = SERD_TRIG;
|
cannam@226
|
73 } else {
|
cannam@226
|
74 SERDI_ERRORF("unknown syntax `%s'\n", name);
|
cannam@226
|
75 return false;
|
cannam@226
|
76 }
|
cannam@226
|
77 return true;
|
cannam@226
|
78 }
|
cannam@226
|
79
|
cannam@226
|
80 static int
|
cannam@226
|
81 missing_arg(const char* name, char opt)
|
cannam@226
|
82 {
|
cannam@226
|
83 SERDI_ERRORF("option requires an argument -- '%c'\n", opt);
|
cannam@226
|
84 return print_usage(name, true);
|
cannam@226
|
85 }
|
cannam@226
|
86
|
cannam@226
|
87 static SerdStatus
|
cannam@226
|
88 quiet_error_sink(void* handle, const SerdError* e)
|
cannam@226
|
89 {
|
cannam@226
|
90 return SERD_SUCCESS;
|
cannam@226
|
91 }
|
cannam@226
|
92
|
cannam@226
|
93 int
|
cannam@226
|
94 main(int argc, char** argv)
|
cannam@226
|
95 {
|
cannam@226
|
96 if (argc < 2) {
|
cannam@226
|
97 return print_usage(argv[0], true);
|
cannam@226
|
98 }
|
cannam@226
|
99
|
cannam@226
|
100 FILE* in_fd = NULL;
|
cannam@226
|
101 SerdSyntax input_syntax = SERD_TURTLE;
|
cannam@226
|
102 SerdSyntax output_syntax = SERD_NTRIPLES;
|
cannam@226
|
103 bool from_file = true;
|
cannam@226
|
104 bool bulk_read = true;
|
cannam@226
|
105 bool bulk_write = false;
|
cannam@226
|
106 bool full_uris = false;
|
cannam@226
|
107 bool lax = false;
|
cannam@226
|
108 bool quiet = false;
|
cannam@226
|
109 const uint8_t* in_name = NULL;
|
cannam@226
|
110 const uint8_t* add_prefix = NULL;
|
cannam@226
|
111 const uint8_t* chop_prefix = NULL;
|
cannam@226
|
112 const uint8_t* root_uri = NULL;
|
cannam@226
|
113 int a = 1;
|
cannam@226
|
114 for (; a < argc && argv[a][0] == '-'; ++a) {
|
cannam@226
|
115 if (argv[a][1] == '\0') {
|
cannam@226
|
116 in_name = (const uint8_t*)"(stdin)";
|
cannam@226
|
117 in_fd = stdin;
|
cannam@226
|
118 break;
|
cannam@226
|
119 } else if (argv[a][1] == 'b') {
|
cannam@226
|
120 bulk_write = true;
|
cannam@226
|
121 } else if (argv[a][1] == 'e') {
|
cannam@226
|
122 bulk_read = false;
|
cannam@226
|
123 } else if (argv[a][1] == 'f') {
|
cannam@226
|
124 full_uris = true;
|
cannam@226
|
125 } else if (argv[a][1] == 'h') {
|
cannam@226
|
126 return print_usage(argv[0], false);
|
cannam@226
|
127 } else if (argv[a][1] == 'l') {
|
cannam@226
|
128 lax = true;
|
cannam@226
|
129 } else if (argv[a][1] == 'q') {
|
cannam@226
|
130 quiet = true;
|
cannam@226
|
131 } else if (argv[a][1] == 'v') {
|
cannam@226
|
132 return print_version();
|
cannam@226
|
133 } else if (argv[a][1] == 's') {
|
cannam@226
|
134 in_name = (const uint8_t*)"(string)";
|
cannam@226
|
135 from_file = false;
|
cannam@226
|
136 ++a;
|
cannam@226
|
137 break;
|
cannam@226
|
138 } else if (argv[a][1] == 'i') {
|
cannam@226
|
139 if (++a == argc) {
|
cannam@226
|
140 return missing_arg(argv[0], 'i');
|
cannam@226
|
141 } else if (!set_syntax(&input_syntax, argv[a])) {
|
cannam@226
|
142 return print_usage(argv[0], true);
|
cannam@226
|
143 }
|
cannam@226
|
144 } else if (argv[a][1] == 'o') {
|
cannam@226
|
145 if (++a == argc) {
|
cannam@226
|
146 return missing_arg(argv[0], 'o');
|
cannam@226
|
147 } else if (!set_syntax(&output_syntax, argv[a])) {
|
cannam@226
|
148 return print_usage(argv[0], true);
|
cannam@226
|
149 }
|
cannam@226
|
150 } else if (argv[a][1] == 'p') {
|
cannam@226
|
151 if (++a == argc) {
|
cannam@226
|
152 return missing_arg(argv[0], 'p');
|
cannam@226
|
153 }
|
cannam@226
|
154 add_prefix = (const uint8_t*)argv[a];
|
cannam@226
|
155 } else if (argv[a][1] == 'c') {
|
cannam@226
|
156 if (++a == argc) {
|
cannam@226
|
157 return missing_arg(argv[0], 'c');
|
cannam@226
|
158 }
|
cannam@226
|
159 chop_prefix = (const uint8_t*)argv[a];
|
cannam@226
|
160 } else if (argv[a][1] == 'r') {
|
cannam@226
|
161 if (++a == argc) {
|
cannam@226
|
162 return missing_arg(argv[0], 'r');
|
cannam@226
|
163 }
|
cannam@226
|
164 root_uri = (const uint8_t*)argv[a];
|
cannam@226
|
165 } else {
|
cannam@226
|
166 SERDI_ERRORF("invalid option -- '%s'\n", argv[a] + 1);
|
cannam@226
|
167 return print_usage(argv[0], true);
|
cannam@226
|
168 }
|
cannam@226
|
169 }
|
cannam@226
|
170
|
cannam@226
|
171 if (a == argc) {
|
cannam@226
|
172 SERDI_ERROR("missing input\n");
|
cannam@226
|
173 return 1;
|
cannam@226
|
174 }
|
cannam@226
|
175
|
cannam@226
|
176 const uint8_t* input = (const uint8_t*)argv[a++];
|
cannam@226
|
177 if (from_file) {
|
cannam@226
|
178 in_name = in_name ? in_name : input;
|
cannam@226
|
179 if (!in_fd) {
|
cannam@226
|
180 input = serd_uri_to_path(in_name);
|
cannam@226
|
181 if (!input || !(in_fd = serd_fopen((const char*)input, "r"))) {
|
cannam@226
|
182 return 1;
|
cannam@226
|
183 }
|
cannam@226
|
184 }
|
cannam@226
|
185 }
|
cannam@226
|
186
|
cannam@226
|
187 SerdURI base_uri = SERD_URI_NULL;
|
cannam@226
|
188 SerdNode base = SERD_NODE_NULL;
|
cannam@226
|
189 if (a < argc) { // Base URI given on command line
|
cannam@226
|
190 base = serd_node_new_uri_from_string(
|
cannam@226
|
191 (const uint8_t*)argv[a], NULL, &base_uri);
|
cannam@226
|
192 } else if (from_file && in_fd != stdin) { // Use input file URI
|
cannam@226
|
193 base = serd_node_new_file_uri(input, NULL, &base_uri, true);
|
cannam@226
|
194 }
|
cannam@226
|
195
|
cannam@226
|
196 FILE* out_fd = stdout;
|
cannam@226
|
197 SerdEnv* env = serd_env_new(&base);
|
cannam@226
|
198
|
cannam@226
|
199 int output_style = 0;
|
cannam@226
|
200 if (output_syntax == SERD_NTRIPLES || output_syntax == SERD_NQUADS) {
|
cannam@226
|
201 output_style |= SERD_STYLE_ASCII;
|
cannam@226
|
202 } else if (output_syntax == SERD_TURTLE) {
|
cannam@226
|
203 output_style |= SERD_STYLE_ABBREVIATED;
|
cannam@226
|
204 if (!full_uris) {
|
cannam@226
|
205 output_style |= SERD_STYLE_CURIED;
|
cannam@226
|
206 }
|
cannam@226
|
207 }
|
cannam@226
|
208
|
cannam@226
|
209 if ((input_syntax == SERD_TURTLE || input_syntax == SERD_TRIG) ||
|
cannam@226
|
210 (output_style & SERD_STYLE_CURIED)) {
|
cannam@226
|
211 // Base URI may change and/or we're abbreviating URIs, so must resolve
|
cannam@226
|
212 output_style |= SERD_STYLE_RESOLVED;
|
cannam@226
|
213 }
|
cannam@226
|
214
|
cannam@226
|
215 if (bulk_write) {
|
cannam@226
|
216 output_style |= SERD_STYLE_BULK;
|
cannam@226
|
217 }
|
cannam@226
|
218
|
cannam@226
|
219 SerdWriter* writer = serd_writer_new(
|
cannam@226
|
220 output_syntax, (SerdStyle)output_style,
|
cannam@226
|
221 env, &base_uri, serd_file_sink, out_fd);
|
cannam@226
|
222
|
cannam@226
|
223 SerdReader* reader = serd_reader_new(
|
cannam@226
|
224 input_syntax, writer, NULL,
|
cannam@226
|
225 (SerdBaseSink)serd_writer_set_base_uri,
|
cannam@226
|
226 (SerdPrefixSink)serd_writer_set_prefix,
|
cannam@226
|
227 (SerdStatementSink)serd_writer_write_statement,
|
cannam@226
|
228 (SerdEndSink)serd_writer_end_anon);
|
cannam@226
|
229
|
cannam@226
|
230 serd_reader_set_strict(reader, !lax);
|
cannam@226
|
231 if (quiet) {
|
cannam@226
|
232 serd_reader_set_error_sink(reader, quiet_error_sink, NULL);
|
cannam@226
|
233 serd_writer_set_error_sink(writer, quiet_error_sink, NULL);
|
cannam@226
|
234 }
|
cannam@226
|
235
|
cannam@226
|
236 SerdNode root = serd_node_from_string(SERD_URI, root_uri);
|
cannam@226
|
237 serd_writer_set_root_uri(writer, &root);
|
cannam@226
|
238 serd_writer_chop_blank_prefix(writer, chop_prefix);
|
cannam@226
|
239 serd_reader_add_blank_prefix(reader, add_prefix);
|
cannam@226
|
240
|
cannam@226
|
241 SerdStatus status = SERD_SUCCESS;
|
cannam@226
|
242 if (!from_file) {
|
cannam@226
|
243 status = serd_reader_read_string(reader, input);
|
cannam@226
|
244 } else if (bulk_read) {
|
cannam@226
|
245 status = serd_reader_read_file_handle(reader, in_fd, in_name);
|
cannam@226
|
246 } else {
|
cannam@226
|
247 status = serd_reader_start_stream(reader, in_fd, in_name, false);
|
cannam@226
|
248 while (!status) {
|
cannam@226
|
249 status = serd_reader_read_chunk(reader);
|
cannam@226
|
250 }
|
cannam@226
|
251 serd_reader_end_stream(reader);
|
cannam@226
|
252 }
|
cannam@226
|
253
|
cannam@226
|
254 serd_reader_free(reader);
|
cannam@226
|
255 serd_writer_finish(writer);
|
cannam@226
|
256 serd_writer_free(writer);
|
cannam@226
|
257 serd_env_free(env);
|
cannam@226
|
258 serd_node_free(&base);
|
cannam@226
|
259
|
cannam@226
|
260 if (from_file) {
|
cannam@226
|
261 fclose(in_fd);
|
cannam@226
|
262 }
|
cannam@226
|
263
|
cannam@226
|
264 if (fclose(out_fd)) {
|
cannam@226
|
265 perror("serdi: write error");
|
cannam@226
|
266 status = SERD_ERR_UNKNOWN;
|
cannam@226
|
267 }
|
cannam@226
|
268
|
cannam@226
|
269 return (status > SERD_FAILURE) ? 1 : 0;
|
cannam@226
|
270 }
|