annotate ext/serd/src/serdi.c @ 248:cb51adadccd5

Add forgotten files!
author Chris Cannam <cannam@all-day-breakfast.com>
date Thu, 15 Jun 2017 10:12:16 +0100
parents c5cdc9e6a4bf
children
rev   line source
cannam@226 1 /*
cannam@226 2 Copyright 2011-2017 David Robillard <http://drobilla.net>
cannam@226 3
cannam@226 4 Permission to use, copy, modify, and/or distribute this software for any
cannam@226 5 purpose with or without fee is hereby granted, provided that the above
cannam@226 6 copyright notice and this permission notice appear in all copies.
cannam@226 7
cannam@226 8 THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
cannam@226 9 WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
cannam@226 10 MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
cannam@226 11 ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
cannam@226 12 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
cannam@226 13 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
cannam@226 14 OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
cannam@226 15 */
cannam@226 16
cannam@226 17 #include "serd_internal.h"
cannam@226 18
cannam@226 19 #include <assert.h>
cannam@226 20 #include <errno.h>
cannam@226 21 #include <stdlib.h>
cannam@226 22 #include <string.h>
cannam@226 23
cannam@226 24 #define SERDI_ERROR(msg) fprintf(stderr, "serdi: " msg);
cannam@226 25 #define SERDI_ERRORF(fmt, ...) fprintf(stderr, "serdi: " fmt, __VA_ARGS__);
cannam@226 26
cannam@226 27 static int
cannam@226 28 print_version(void)
cannam@226 29 {
cannam@226 30 printf("serdi " SERD_VERSION " <http://drobilla.net/software/serd>\n");
cannam@226 31 printf("Copyright 2011-2017 David Robillard <http://drobilla.net>.\n"
cannam@226 32 "License: <http://www.opensource.org/licenses/isc>\n"
cannam@226 33 "This is free software; you are free to change and redistribute it."
cannam@226 34 "\nThere is NO WARRANTY, to the extent permitted by law.\n");
cannam@226 35 return 0;
cannam@226 36 }
cannam@226 37
cannam@226 38 static int
cannam@226 39 print_usage(const char* name, bool error)
cannam@226 40 {
cannam@226 41 FILE* const os = error ? stderr : stdout;
cannam@226 42 fprintf(os, "%s", error ? "\n" : "");
cannam@226 43 fprintf(os, "Usage: %s [OPTION]... INPUT [BASE_URI]\n", name);
cannam@226 44 fprintf(os, "Read and write RDF syntax.\n");
cannam@226 45 fprintf(os, "Use - for INPUT to read from standard input.\n\n");
cannam@226 46 fprintf(os, " -b Fast bulk output for large serialisations.\n");
cannam@226 47 fprintf(os, " -c PREFIX Chop PREFIX from matching blank node IDs.\n");
cannam@226 48 fprintf(os, " -e Eat input one character at a time.\n");
cannam@226 49 fprintf(os, " -f Keep full URIs in input (don't qualify).\n");
cannam@226 50 fprintf(os, " -h Display this help and exit.\n");
cannam@226 51 fprintf(os, " -i SYNTAX Input syntax: turtle/ntriples/trig/nquads.\n");
cannam@226 52 fprintf(os, " -l Lax (non-strict) parsing.\n");
cannam@226 53 fprintf(os, " -o SYNTAX Output syntax: turtle/ntriples/nquads.\n");
cannam@226 54 fprintf(os, " -p PREFIX Add PREFIX to blank node IDs.\n");
cannam@226 55 fprintf(os, " -q Suppress all output except data.\n");
cannam@226 56 fprintf(os, " -r ROOT_URI Keep relative URIs within ROOT_URI.\n");
cannam@226 57 fprintf(os, " -s INPUT Parse INPUT as string (terminates options).\n");
cannam@226 58 fprintf(os, " -v Display version information and exit.\n");
cannam@226 59 return error ? 1 : 0;
cannam@226 60 }
cannam@226 61
cannam@226 62 static bool
cannam@226 63 set_syntax(SerdSyntax* syntax, const char* name)
cannam@226 64 {
cannam@226 65 if (!strcmp(name, "turtle")) {
cannam@226 66 *syntax = SERD_TURTLE;
cannam@226 67 } else if (!strcmp(name, "ntriples")) {
cannam@226 68 *syntax = SERD_NTRIPLES;
cannam@226 69 } else if (!strcmp(name, "nquads")) {
cannam@226 70 *syntax = SERD_NQUADS;
cannam@226 71 } else if (!strcmp(name, "trig")) {
cannam@226 72 *syntax = SERD_TRIG;
cannam@226 73 } else {
cannam@226 74 SERDI_ERRORF("unknown syntax `%s'\n", name);
cannam@226 75 return false;
cannam@226 76 }
cannam@226 77 return true;
cannam@226 78 }
cannam@226 79
cannam@226 80 static int
cannam@226 81 missing_arg(const char* name, char opt)
cannam@226 82 {
cannam@226 83 SERDI_ERRORF("option requires an argument -- '%c'\n", opt);
cannam@226 84 return print_usage(name, true);
cannam@226 85 }
cannam@226 86
cannam@226 87 static SerdStatus
cannam@226 88 quiet_error_sink(void* handle, const SerdError* e)
cannam@226 89 {
cannam@226 90 return SERD_SUCCESS;
cannam@226 91 }
cannam@226 92
cannam@226 93 int
cannam@226 94 main(int argc, char** argv)
cannam@226 95 {
cannam@226 96 if (argc < 2) {
cannam@226 97 return print_usage(argv[0], true);
cannam@226 98 }
cannam@226 99
cannam@226 100 FILE* in_fd = NULL;
cannam@226 101 SerdSyntax input_syntax = SERD_TURTLE;
cannam@226 102 SerdSyntax output_syntax = SERD_NTRIPLES;
cannam@226 103 bool from_file = true;
cannam@226 104 bool bulk_read = true;
cannam@226 105 bool bulk_write = false;
cannam@226 106 bool full_uris = false;
cannam@226 107 bool lax = false;
cannam@226 108 bool quiet = false;
cannam@226 109 const uint8_t* in_name = NULL;
cannam@226 110 const uint8_t* add_prefix = NULL;
cannam@226 111 const uint8_t* chop_prefix = NULL;
cannam@226 112 const uint8_t* root_uri = NULL;
cannam@226 113 int a = 1;
cannam@226 114 for (; a < argc && argv[a][0] == '-'; ++a) {
cannam@226 115 if (argv[a][1] == '\0') {
cannam@226 116 in_name = (const uint8_t*)"(stdin)";
cannam@226 117 in_fd = stdin;
cannam@226 118 break;
cannam@226 119 } else if (argv[a][1] == 'b') {
cannam@226 120 bulk_write = true;
cannam@226 121 } else if (argv[a][1] == 'e') {
cannam@226 122 bulk_read = false;
cannam@226 123 } else if (argv[a][1] == 'f') {
cannam@226 124 full_uris = true;
cannam@226 125 } else if (argv[a][1] == 'h') {
cannam@226 126 return print_usage(argv[0], false);
cannam@226 127 } else if (argv[a][1] == 'l') {
cannam@226 128 lax = true;
cannam@226 129 } else if (argv[a][1] == 'q') {
cannam@226 130 quiet = true;
cannam@226 131 } else if (argv[a][1] == 'v') {
cannam@226 132 return print_version();
cannam@226 133 } else if (argv[a][1] == 's') {
cannam@226 134 in_name = (const uint8_t*)"(string)";
cannam@226 135 from_file = false;
cannam@226 136 ++a;
cannam@226 137 break;
cannam@226 138 } else if (argv[a][1] == 'i') {
cannam@226 139 if (++a == argc) {
cannam@226 140 return missing_arg(argv[0], 'i');
cannam@226 141 } else if (!set_syntax(&input_syntax, argv[a])) {
cannam@226 142 return print_usage(argv[0], true);
cannam@226 143 }
cannam@226 144 } else if (argv[a][1] == 'o') {
cannam@226 145 if (++a == argc) {
cannam@226 146 return missing_arg(argv[0], 'o');
cannam@226 147 } else if (!set_syntax(&output_syntax, argv[a])) {
cannam@226 148 return print_usage(argv[0], true);
cannam@226 149 }
cannam@226 150 } else if (argv[a][1] == 'p') {
cannam@226 151 if (++a == argc) {
cannam@226 152 return missing_arg(argv[0], 'p');
cannam@226 153 }
cannam@226 154 add_prefix = (const uint8_t*)argv[a];
cannam@226 155 } else if (argv[a][1] == 'c') {
cannam@226 156 if (++a == argc) {
cannam@226 157 return missing_arg(argv[0], 'c');
cannam@226 158 }
cannam@226 159 chop_prefix = (const uint8_t*)argv[a];
cannam@226 160 } else if (argv[a][1] == 'r') {
cannam@226 161 if (++a == argc) {
cannam@226 162 return missing_arg(argv[0], 'r');
cannam@226 163 }
cannam@226 164 root_uri = (const uint8_t*)argv[a];
cannam@226 165 } else {
cannam@226 166 SERDI_ERRORF("invalid option -- '%s'\n", argv[a] + 1);
cannam@226 167 return print_usage(argv[0], true);
cannam@226 168 }
cannam@226 169 }
cannam@226 170
cannam@226 171 if (a == argc) {
cannam@226 172 SERDI_ERROR("missing input\n");
cannam@226 173 return 1;
cannam@226 174 }
cannam@226 175
cannam@226 176 const uint8_t* input = (const uint8_t*)argv[a++];
cannam@226 177 if (from_file) {
cannam@226 178 in_name = in_name ? in_name : input;
cannam@226 179 if (!in_fd) {
cannam@226 180 input = serd_uri_to_path(in_name);
cannam@226 181 if (!input || !(in_fd = serd_fopen((const char*)input, "r"))) {
cannam@226 182 return 1;
cannam@226 183 }
cannam@226 184 }
cannam@226 185 }
cannam@226 186
cannam@226 187 SerdURI base_uri = SERD_URI_NULL;
cannam@226 188 SerdNode base = SERD_NODE_NULL;
cannam@226 189 if (a < argc) { // Base URI given on command line
cannam@226 190 base = serd_node_new_uri_from_string(
cannam@226 191 (const uint8_t*)argv[a], NULL, &base_uri);
cannam@226 192 } else if (from_file && in_fd != stdin) { // Use input file URI
cannam@226 193 base = serd_node_new_file_uri(input, NULL, &base_uri, true);
cannam@226 194 }
cannam@226 195
cannam@226 196 FILE* out_fd = stdout;
cannam@226 197 SerdEnv* env = serd_env_new(&base);
cannam@226 198
cannam@226 199 int output_style = 0;
cannam@226 200 if (output_syntax == SERD_NTRIPLES || output_syntax == SERD_NQUADS) {
cannam@226 201 output_style |= SERD_STYLE_ASCII;
cannam@226 202 } else if (output_syntax == SERD_TURTLE) {
cannam@226 203 output_style |= SERD_STYLE_ABBREVIATED;
cannam@226 204 if (!full_uris) {
cannam@226 205 output_style |= SERD_STYLE_CURIED;
cannam@226 206 }
cannam@226 207 }
cannam@226 208
cannam@226 209 if ((input_syntax == SERD_TURTLE || input_syntax == SERD_TRIG) ||
cannam@226 210 (output_style & SERD_STYLE_CURIED)) {
cannam@226 211 // Base URI may change and/or we're abbreviating URIs, so must resolve
cannam@226 212 output_style |= SERD_STYLE_RESOLVED;
cannam@226 213 }
cannam@226 214
cannam@226 215 if (bulk_write) {
cannam@226 216 output_style |= SERD_STYLE_BULK;
cannam@226 217 }
cannam@226 218
cannam@226 219 SerdWriter* writer = serd_writer_new(
cannam@226 220 output_syntax, (SerdStyle)output_style,
cannam@226 221 env, &base_uri, serd_file_sink, out_fd);
cannam@226 222
cannam@226 223 SerdReader* reader = serd_reader_new(
cannam@226 224 input_syntax, writer, NULL,
cannam@226 225 (SerdBaseSink)serd_writer_set_base_uri,
cannam@226 226 (SerdPrefixSink)serd_writer_set_prefix,
cannam@226 227 (SerdStatementSink)serd_writer_write_statement,
cannam@226 228 (SerdEndSink)serd_writer_end_anon);
cannam@226 229
cannam@226 230 serd_reader_set_strict(reader, !lax);
cannam@226 231 if (quiet) {
cannam@226 232 serd_reader_set_error_sink(reader, quiet_error_sink, NULL);
cannam@226 233 serd_writer_set_error_sink(writer, quiet_error_sink, NULL);
cannam@226 234 }
cannam@226 235
cannam@226 236 SerdNode root = serd_node_from_string(SERD_URI, root_uri);
cannam@226 237 serd_writer_set_root_uri(writer, &root);
cannam@226 238 serd_writer_chop_blank_prefix(writer, chop_prefix);
cannam@226 239 serd_reader_add_blank_prefix(reader, add_prefix);
cannam@226 240
cannam@226 241 SerdStatus status = SERD_SUCCESS;
cannam@226 242 if (!from_file) {
cannam@226 243 status = serd_reader_read_string(reader, input);
cannam@226 244 } else if (bulk_read) {
cannam@226 245 status = serd_reader_read_file_handle(reader, in_fd, in_name);
cannam@226 246 } else {
cannam@226 247 status = serd_reader_start_stream(reader, in_fd, in_name, false);
cannam@226 248 while (!status) {
cannam@226 249 status = serd_reader_read_chunk(reader);
cannam@226 250 }
cannam@226 251 serd_reader_end_stream(reader);
cannam@226 252 }
cannam@226 253
cannam@226 254 serd_reader_free(reader);
cannam@226 255 serd_writer_finish(writer);
cannam@226 256 serd_writer_free(writer);
cannam@226 257 serd_env_free(env);
cannam@226 258 serd_node_free(&base);
cannam@226 259
cannam@226 260 if (from_file) {
cannam@226 261 fclose(in_fd);
cannam@226 262 }
cannam@226 263
cannam@226 264 if (fclose(out_fd)) {
cannam@226 265 perror("serdi: write error");
cannam@226 266 status = SERD_ERR_UNKNOWN;
cannam@226 267 }
cannam@226 268
cannam@226 269 return (status > SERD_FAILURE) ? 1 : 0;
cannam@226 270 }