annotate ext/sord/src/sord_validate.c @ 296:50a0b4fea7f1 tip master

Merge pull request #8 from michel-slm/gcc15 Include headers needed to compile with GCC 15's -std=gnu23 default
author Chris Cannam <cannam@all-day-breakfast.com>
date Mon, 27 Jan 2025 08:53:58 +0000
parents c5cdc9e6a4bf
children
rev   line source
cannam@226 1 /*
cannam@226 2 Copyright 2012-2017 David Robillard <http://drobilla.net>
cannam@226 3
cannam@226 4 Permission to use, copy, modify, and/or distribute this software for any
cannam@226 5 purpose with or without fee is hereby granted, provided that the above
cannam@226 6 copyright notice and this permission notice appear in all copies.
cannam@226 7
cannam@226 8 THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
cannam@226 9 WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
cannam@226 10 MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
cannam@226 11 ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
cannam@226 12 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
cannam@226 13 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
cannam@226 14 OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
cannam@226 15 */
cannam@226 16
cannam@226 17 #define _BSD_SOURCE 1 // for realpath
cannam@226 18 #define _DEFAULT_SOURCE 1 // for realpath
cannam@226 19
cannam@226 20 #include <assert.h>
cannam@226 21 #include <stdlib.h>
cannam@226 22 #include <string.h>
cannam@226 23
cannam@226 24 #ifdef _WIN32
cannam@226 25 # include <windows.h>
cannam@226 26 #endif
cannam@226 27
cannam@226 28 #include "serd/serd.h"
cannam@226 29 #include "sord/sord.h"
cannam@226 30 #include "sord_config.h"
cannam@226 31
cannam@226 32 #ifdef HAVE_PCRE
cannam@226 33 # include <pcre.h>
cannam@226 34 #endif
cannam@226 35
cannam@226 36 #define USTR(s) ((const uint8_t*)s)
cannam@226 37
cannam@226 38 #define NS_foaf (const uint8_t*)"http://xmlns.com/foaf/0.1/"
cannam@226 39 #define NS_owl (const uint8_t*)"http://www.w3.org/2002/07/owl#"
cannam@226 40 #define NS_rdf (const uint8_t*)"http://www.w3.org/1999/02/22-rdf-syntax-ns#"
cannam@226 41 #define NS_rdfs (const uint8_t*)"http://www.w3.org/2000/01/rdf-schema#"
cannam@226 42 #define NS_xsd (const uint8_t*)"http://www.w3.org/2001/XMLSchema#"
cannam@226 43
cannam@226 44 typedef struct {
cannam@226 45 SordNode* foaf_Document;
cannam@226 46 SordNode* owl_AnnotationProperty;
cannam@226 47 SordNode* owl_Class;
cannam@226 48 SordNode* owl_DatatypeProperty;
cannam@226 49 SordNode* owl_FunctionalProperty;
cannam@226 50 SordNode* owl_InverseFunctionalProperty;
cannam@226 51 SordNode* owl_ObjectProperty;
cannam@226 52 SordNode* owl_OntologyProperty;
cannam@226 53 SordNode* owl_Restriction;
cannam@226 54 SordNode* owl_Thing;
cannam@226 55 SordNode* owl_cardinality;
cannam@226 56 SordNode* owl_equivalentClass;
cannam@226 57 SordNode* owl_maxCardinality;
cannam@226 58 SordNode* owl_minCardinality;
cannam@226 59 SordNode* owl_onDatatype;
cannam@226 60 SordNode* owl_onProperty;
cannam@226 61 SordNode* owl_someValuesFrom;
cannam@226 62 SordNode* owl_withRestrictions;
cannam@226 63 SordNode* rdf_PlainLiteral;
cannam@226 64 SordNode* rdf_Property;
cannam@226 65 SordNode* rdf_first;
cannam@226 66 SordNode* rdf_rest;
cannam@226 67 SordNode* rdf_type;
cannam@226 68 SordNode* rdfs_Class;
cannam@226 69 SordNode* rdfs_Literal;
cannam@226 70 SordNode* rdfs_Resource;
cannam@226 71 SordNode* rdfs_domain;
cannam@226 72 SordNode* rdfs_label;
cannam@226 73 SordNode* rdfs_range;
cannam@226 74 SordNode* rdfs_subClassOf;
cannam@226 75 SordNode* xsd_anyURI;
cannam@226 76 SordNode* xsd_decimal;
cannam@226 77 SordNode* xsd_double;
cannam@226 78 SordNode* xsd_maxInclusive;
cannam@226 79 SordNode* xsd_minInclusive;
cannam@226 80 SordNode* xsd_pattern;
cannam@226 81 SordNode* xsd_string;
cannam@226 82 } URIs;
cannam@226 83
cannam@226 84 int n_errors = 0;
cannam@226 85 int n_restrictions = 0;
cannam@226 86 bool one_line_errors = false;
cannam@226 87
cannam@226 88 static int
cannam@226 89 print_version(void)
cannam@226 90 {
cannam@226 91 printf("sord_validate " SORD_VERSION
cannam@226 92 " <http://drobilla.net/software/sord>\n");
cannam@226 93 printf("Copyright 2012-2017 David Robillard <http://drobilla.net>.\n"
cannam@226 94 "License: <http://www.opensource.org/licenses/isc>\n"
cannam@226 95 "This is free software; you are free to change and redistribute it."
cannam@226 96 "\nThere is NO WARRANTY, to the extent permitted by law.\n");
cannam@226 97 return 0;
cannam@226 98 }
cannam@226 99
cannam@226 100 static int
cannam@226 101 print_usage(const char* name, bool error)
cannam@226 102 {
cannam@226 103 FILE* const os = error ? stderr : stdout;
cannam@226 104 fprintf(os, "Usage: %s [OPTION]... INPUT...\n", name);
cannam@226 105 fprintf(os, "Validate RDF data\n\n");
cannam@226 106 fprintf(os, " -h Display this help and exit\n");
cannam@226 107 fprintf(os, " -l Print errors on a single line.\n");
cannam@226 108 fprintf(os, " -v Display version information and exit\n");
cannam@226 109 fprintf(os,
cannam@226 110 "Validate RDF data. This is a simple validator which checks\n"
cannam@226 111 "that all used properties are actually defined. It does not do\n"
cannam@226 112 "any fancy file retrieval, the files passed on the command line\n"
cannam@226 113 "are the only data that is read. In other words, you must pass\n"
cannam@226 114 "the definition of all vocabularies used on the command line.\n");
cannam@226 115 return error ? 1 : 0;
cannam@226 116 }
cannam@226 117
cannam@226 118 static uint8_t*
cannam@226 119 absolute_path(const uint8_t* path)
cannam@226 120 {
cannam@226 121 #ifdef _WIN32
cannam@226 122 char* out = (char*)malloc(MAX_PATH);
cannam@226 123 GetFullPathName((const char*)path, MAX_PATH, out, NULL);
cannam@226 124 return (uint8_t*)out;
cannam@226 125 #else
cannam@226 126 return (uint8_t*)realpath((const char*)path, NULL);
cannam@226 127 #endif
cannam@226 128 }
cannam@226 129
cannam@226 130 static int
cannam@226 131 errorf(const SordQuad quad, const char* fmt, ...)
cannam@226 132 {
cannam@226 133 va_list args;
cannam@226 134 va_start(args, fmt);
cannam@226 135 fprintf(stderr, "error: ");
cannam@226 136 vfprintf(stderr, fmt, args);
cannam@226 137 va_end(args);
cannam@226 138
cannam@226 139 const char* sep = one_line_errors ? "\t" : "\n ";
cannam@226 140 fprintf(stderr, "%s%s%s%s%s%s\n",
cannam@226 141 sep, (const char*)sord_node_get_string(quad[SORD_SUBJECT]),
cannam@226 142 sep, (const char*)sord_node_get_string(quad[SORD_PREDICATE]),
cannam@226 143 sep, (const char*)sord_node_get_string(quad[SORD_OBJECT]));
cannam@226 144
cannam@226 145 ++n_errors;
cannam@226 146 return 1;
cannam@226 147 }
cannam@226 148
cannam@226 149 static bool
cannam@226 150 is_descendant_of(SordModel* model,
cannam@226 151 const URIs* uris,
cannam@226 152 const SordNode* child,
cannam@226 153 const SordNode* parent,
cannam@226 154 const SordNode* pred)
cannam@226 155 {
cannam@226 156 if (!child) {
cannam@226 157 return false;
cannam@226 158 } else if (sord_node_equals(child, parent) ||
cannam@226 159 sord_ask(model, child, uris->owl_equivalentClass, parent, NULL)) {
cannam@226 160 return true;
cannam@226 161 }
cannam@226 162
cannam@226 163 SordIter* i = sord_search(model, child, pred, NULL, NULL);
cannam@226 164 for (; !sord_iter_end(i); sord_iter_next(i)) {
cannam@226 165 const SordNode* o = sord_iter_get_node(i, SORD_OBJECT);
cannam@226 166 if (sord_node_equals(child, o)) {
cannam@226 167 continue; // Weird class is explicitly a descendent of itself
cannam@226 168 }
cannam@226 169 if (is_descendant_of(model, uris, o, parent, pred)) {
cannam@226 170 sord_iter_free(i);
cannam@226 171 return true;
cannam@226 172 }
cannam@226 173 }
cannam@226 174 sord_iter_free(i);
cannam@226 175
cannam@226 176 return false;
cannam@226 177 }
cannam@226 178
cannam@226 179 static bool
cannam@226 180 regexp_match(const uint8_t* pat, const char* str)
cannam@226 181 {
cannam@226 182 #ifdef HAVE_PCRE
cannam@226 183 // Append a $ to the pattern so we only match if the entire string matches
cannam@226 184 const size_t len = strlen((const char*)pat);
cannam@226 185 char* const regx = (char*)malloc(len + 2);
cannam@226 186 memcpy(regx, pat, len);
cannam@226 187 regx[len] = '$';
cannam@226 188 regx[len + 1] = '\0';
cannam@226 189
cannam@226 190 const char* err;
cannam@226 191 int erroffset;
cannam@226 192 pcre* re = pcre_compile(regx, PCRE_ANCHORED, &err, &erroffset, NULL);
cannam@226 193 free(regx);
cannam@226 194 if (!re) {
cannam@226 195 fprintf(stderr, "Error in pattern `%s' at offset %d (%s)\n",
cannam@226 196 pat, erroffset, err);
cannam@226 197 return false;
cannam@226 198 }
cannam@226 199
cannam@226 200 const bool ret = pcre_exec(re, NULL, str, strlen(str), 0, 0, NULL, 0) >= 0;
cannam@226 201 pcre_free(re);
cannam@226 202 return ret;
cannam@226 203 #endif // HAVE_PCRE
cannam@226 204 return true;
cannam@226 205 }
cannam@226 206
cannam@226 207 static int
cannam@226 208 bound_cmp(SordModel* model,
cannam@226 209 const URIs* uris,
cannam@226 210 const SordNode* literal,
cannam@226 211 const SordNode* type,
cannam@226 212 const SordNode* bound)
cannam@226 213 {
cannam@226 214 const char* str = (const char*)sord_node_get_string(literal);
cannam@226 215 const char* bound_str = (const char*)sord_node_get_string(bound);
cannam@226 216 const bool is_numeric =
cannam@226 217 is_descendant_of(model, uris, type, uris->xsd_decimal, uris->owl_onDatatype) ||
cannam@226 218 is_descendant_of(model, uris, type, uris->xsd_double, uris->owl_onDatatype);
cannam@226 219
cannam@226 220 if (is_numeric) {
cannam@226 221 const double fbound = serd_strtod(bound_str, NULL);
cannam@226 222 const double fliteral = serd_strtod(str, NULL);
cannam@226 223 return ((fliteral < fbound) ? -1 :
cannam@226 224 (fliteral > fbound) ? 1 :
cannam@226 225 0);
cannam@226 226 } else {
cannam@226 227 return strcmp(str, bound_str);
cannam@226 228 }
cannam@226 229 }
cannam@226 230
cannam@226 231 static bool
cannam@226 232 check_restriction(SordModel* model,
cannam@226 233 const URIs* uris,
cannam@226 234 const SordNode* literal,
cannam@226 235 const SordNode* type,
cannam@226 236 const SordNode* restriction)
cannam@226 237 {
cannam@226 238 size_t len = 0;
cannam@226 239 const char* str = (const char*)sord_node_get_string_counted(literal, &len);
cannam@226 240
cannam@226 241 // Check xsd:pattern
cannam@226 242 SordIter* p = sord_search(model, restriction, uris->xsd_pattern, 0, 0);
cannam@226 243 if (p) {
cannam@226 244 const SordNode* pat = sord_iter_get_node(p, SORD_OBJECT);
cannam@226 245 if (!regexp_match(sord_node_get_string(pat), str)) {
cannam@226 246 fprintf(stderr, "`%s' does not match <%s> pattern `%s'\n",
cannam@226 247 sord_node_get_string(literal),
cannam@226 248 sord_node_get_string(type),
cannam@226 249 sord_node_get_string(pat));
cannam@226 250 sord_iter_free(p);
cannam@226 251 return false;
cannam@226 252 }
cannam@226 253 sord_iter_free(p);
cannam@226 254 ++n_restrictions;
cannam@226 255 }
cannam@226 256
cannam@226 257 // Check xsd:minInclusive
cannam@226 258 SordIter* l = sord_search(model, restriction, uris->xsd_minInclusive, 0, 0);
cannam@226 259 if (l) {
cannam@226 260 const SordNode* lower = sord_iter_get_node(l, SORD_OBJECT);
cannam@226 261 if (bound_cmp(model, uris, literal, type, lower) < 0) {
cannam@226 262 fprintf(stderr, "`%s' is not >= <%s> minimum `%s'\n",
cannam@226 263 sord_node_get_string(literal),
cannam@226 264 sord_node_get_string(type),
cannam@226 265 sord_node_get_string(lower));
cannam@226 266 sord_iter_free(l);
cannam@226 267 return false;
cannam@226 268 }
cannam@226 269 sord_iter_free(l);
cannam@226 270 ++n_restrictions;
cannam@226 271 }
cannam@226 272
cannam@226 273 // Check xsd:maxInclusive
cannam@226 274 SordIter* u = sord_search(model, restriction, uris->xsd_maxInclusive, 0, 0);
cannam@226 275 if (u) {
cannam@226 276 const SordNode* upper = sord_iter_get_node(u, SORD_OBJECT);
cannam@226 277 if (bound_cmp(model, uris, literal, type, upper) > 0) {
cannam@226 278 fprintf(stderr, "`%s' is not <= <%s> maximum `%s'\n",
cannam@226 279 sord_node_get_string(literal),
cannam@226 280 sord_node_get_string(type),
cannam@226 281 sord_node_get_string(upper));
cannam@226 282 sord_iter_free(u);
cannam@226 283 return false;
cannam@226 284 }
cannam@226 285 sord_iter_free(u);
cannam@226 286 ++n_restrictions;
cannam@226 287 }
cannam@226 288
cannam@226 289 return true; // Unknown restriction, be quietly tolerant
cannam@226 290 }
cannam@226 291
cannam@226 292 static bool
cannam@226 293 literal_is_valid(SordModel* model,
cannam@226 294 const URIs* uris,
cannam@226 295 const SordQuad quad,
cannam@226 296 const SordNode* literal,
cannam@226 297 const SordNode* type)
cannam@226 298 {
cannam@226 299 if (!type) {
cannam@226 300 return true;
cannam@226 301 }
cannam@226 302
cannam@226 303 /* Check that literal data is related to required type. We don't do a
cannam@226 304 strict subtype check here because e.g. an xsd:decimal might be a valid
cannam@226 305 xsd:unsignedInt, which the pattern checks will verify, but if the
cannam@226 306 literal type is not related to the required type at all
cannam@226 307 (e.g. xsd:decimal and xsd:string) there is a problem. */
cannam@226 308 const SordNode* datatype = sord_node_get_datatype(literal);
cannam@226 309 if (datatype && datatype != type) {
cannam@226 310 if (!is_descendant_of(
cannam@226 311 model, uris,
cannam@226 312 datatype, type, uris->owl_onDatatype) &&
cannam@226 313 !is_descendant_of(
cannam@226 314 model, uris,
cannam@226 315 type, datatype, uris->owl_onDatatype) &&
cannam@226 316 !(sord_node_equals(datatype, uris->xsd_decimal) &&
cannam@226 317 is_descendant_of(
cannam@226 318 model, uris,
cannam@226 319 type, uris->xsd_double, uris->owl_onDatatype))) {
cannam@226 320 errorf(quad,
cannam@226 321 "Literal `%s' datatype <%s> is not compatible with <%s>\n",
cannam@226 322 sord_node_get_string(literal),
cannam@226 323 sord_node_get_string(datatype),
cannam@226 324 sord_node_get_string(type));
cannam@226 325 return false;
cannam@226 326 }
cannam@226 327 }
cannam@226 328
cannam@226 329 // Find restrictions list
cannam@226 330 SordIter* rs = sord_search(model, type, uris->owl_withRestrictions, 0, 0);
cannam@226 331 if (sord_iter_end(rs)) {
cannam@226 332 return true; // No restrictions
cannam@226 333 }
cannam@226 334
cannam@226 335 // Walk list, checking each restriction
cannam@226 336 const SordNode* head = sord_iter_get_node(rs, SORD_OBJECT);
cannam@226 337 while (head) {
cannam@226 338 SordIter* f = sord_search(model, head, uris->rdf_first, 0, 0);
cannam@226 339 if (!f) {
cannam@226 340 break; // Reached end of restrictions list without failure
cannam@226 341 }
cannam@226 342
cannam@226 343 // Check this restriction
cannam@226 344 const bool good = check_restriction(
cannam@226 345 model, uris, literal, type, sord_iter_get_node(f, SORD_OBJECT));
cannam@226 346 sord_iter_free(f);
cannam@226 347
cannam@226 348 if (!good) {
cannam@226 349 sord_iter_free(rs);
cannam@226 350 return false; // Failed, literal is invalid
cannam@226 351 }
cannam@226 352
cannam@226 353 // Seek to next list node
cannam@226 354 SordIter* n = sord_search(model, head, uris->rdf_rest, 0, 0);
cannam@226 355 head = n ? sord_iter_get_node(n, SORD_OBJECT) : NULL;
cannam@226 356 sord_iter_free(n);
cannam@226 357 }
cannam@226 358
cannam@226 359 sord_iter_free(rs);
cannam@226 360
cannam@226 361 SordIter* s = sord_search(model, type, uris->owl_onDatatype, 0, 0);
cannam@226 362 if (s) {
cannam@226 363 const SordNode* super = sord_iter_get_node(s, SORD_OBJECT);
cannam@226 364 const bool good = literal_is_valid(
cannam@226 365 model, uris, quad, literal, super);
cannam@226 366 sord_iter_free(s);
cannam@226 367 return good; // Match iff literal also matches supertype
cannam@226 368 }
cannam@226 369
cannam@226 370 return true; // Matches top level type
cannam@226 371 }
cannam@226 372
cannam@226 373 static bool
cannam@226 374 check_type(SordModel* model,
cannam@226 375 const URIs* uris,
cannam@226 376 const SordQuad quad,
cannam@226 377 const SordNode* node,
cannam@226 378 const SordNode* type)
cannam@226 379 {
cannam@226 380 if (sord_node_equals(type, uris->rdfs_Resource) ||
cannam@226 381 sord_node_equals(type, uris->owl_Thing)) {
cannam@226 382 return true;
cannam@226 383 }
cannam@226 384
cannam@226 385 if (sord_node_get_type(node) == SORD_LITERAL) {
cannam@226 386 if (sord_node_equals(type, uris->rdfs_Literal)) {
cannam@226 387 return true;
cannam@226 388 } else if (sord_node_equals(type, uris->rdf_PlainLiteral)) {
cannam@226 389 return !sord_node_get_language(node);
cannam@226 390 } else {
cannam@226 391 return literal_is_valid(model, uris, quad, node, type);
cannam@226 392 }
cannam@226 393 } else if (sord_node_get_type(node) == SORD_URI) {
cannam@226 394 if (sord_node_equals(type, uris->foaf_Document)) {
cannam@226 395 return true; // Questionable...
cannam@226 396 } else if (is_descendant_of(
cannam@226 397 model, uris,
cannam@226 398 type, uris->xsd_anyURI, uris->owl_onDatatype)) {
cannam@226 399 /* Type is any URI and this is a URI, so pass. Restrictions on
cannam@226 400 anyURI subtypes are not currently checked (very uncommon). */
cannam@226 401 return true; // Type is anyURI, and this is a URI
cannam@226 402 } else {
cannam@226 403 SordIter* t = sord_search(model, node, uris->rdf_type, NULL, NULL);
cannam@226 404 for (; !sord_iter_end(t); sord_iter_next(t)) {
cannam@226 405 if (is_descendant_of(model, uris,
cannam@226 406 sord_iter_get_node(t, SORD_OBJECT),
cannam@226 407 type,
cannam@226 408 uris->rdfs_subClassOf)) {
cannam@226 409 sord_iter_free(t);
cannam@226 410 return true;
cannam@226 411 }
cannam@226 412 }
cannam@226 413 sord_iter_free(t);
cannam@226 414 return false;
cannam@226 415 }
cannam@226 416 } else {
cannam@226 417 return true; // Blanks often lack explicit types, ignore
cannam@226 418 }
cannam@226 419
cannam@226 420 return false;
cannam@226 421 }
cannam@226 422
cannam@226 423 static uint64_t
cannam@226 424 count_non_blanks(SordIter* i, SordQuadIndex field)
cannam@226 425 {
cannam@226 426 uint64_t n = 0;
cannam@226 427 for (; !sord_iter_end(i); sord_iter_next(i)) {
cannam@226 428 const SordNode* node = sord_iter_get_node(i, field);
cannam@226 429 if (sord_node_get_type(node) != SORD_BLANK) {
cannam@226 430 ++n;
cannam@226 431 }
cannam@226 432 }
cannam@226 433 return n;
cannam@226 434 }
cannam@226 435
cannam@226 436 static int
cannam@226 437 check_properties(SordModel* model, URIs* uris)
cannam@226 438 {
cannam@226 439 int st = 0;
cannam@226 440 SordIter* i = sord_begin(model);
cannam@226 441 for (; !sord_iter_end(i); sord_iter_next(i)) {
cannam@226 442 SordQuad quad;
cannam@226 443 sord_iter_get(i, quad);
cannam@226 444
cannam@226 445 const SordNode* subj = quad[SORD_SUBJECT];
cannam@226 446 const SordNode* pred = quad[SORD_PREDICATE];
cannam@226 447 const SordNode* obj = quad[SORD_OBJECT];
cannam@226 448
cannam@226 449 bool is_any_property = false;
cannam@226 450 SordIter* t = sord_search(model, pred, uris->rdf_type, NULL, NULL);
cannam@226 451 for (; !sord_iter_end(t); sord_iter_next(t)) {
cannam@226 452 if (is_descendant_of(model, uris,
cannam@226 453 sord_iter_get_node(t, SORD_OBJECT),
cannam@226 454 uris->rdf_Property,
cannam@226 455 uris->rdfs_subClassOf)) {
cannam@226 456 is_any_property = true;
cannam@226 457 break;
cannam@226 458 }
cannam@226 459 }
cannam@226 460 sord_iter_free(t);
cannam@226 461
cannam@226 462 const bool is_ObjectProperty = sord_ask(
cannam@226 463 model, pred, uris->rdf_type, uris->owl_ObjectProperty, 0);
cannam@226 464 const bool is_FunctionalProperty = sord_ask(
cannam@226 465 model, pred, uris->rdf_type, uris->owl_FunctionalProperty, 0);
cannam@226 466 const bool is_InverseFunctionalProperty = sord_ask(
cannam@226 467 model, pred, uris->rdf_type, uris->owl_InverseFunctionalProperty, 0);
cannam@226 468 const bool is_DatatypeProperty = sord_ask(
cannam@226 469 model, pred, uris->rdf_type, uris->owl_DatatypeProperty, 0);
cannam@226 470
cannam@226 471 if (!is_any_property) {
cannam@226 472 st = errorf(quad, "Use of undefined property");
cannam@226 473 }
cannam@226 474
cannam@226 475 if (!sord_ask(model, pred, uris->rdfs_label, NULL, NULL)) {
cannam@226 476 st = errorf(quad, "Property <%s> has no label",
cannam@226 477 sord_node_get_string(pred));
cannam@226 478 }
cannam@226 479
cannam@226 480 if (is_DatatypeProperty &&
cannam@226 481 sord_node_get_type(obj) != SORD_LITERAL) {
cannam@226 482 st = errorf(quad, "Datatype property with non-literal value");
cannam@226 483 }
cannam@226 484
cannam@226 485 if (is_ObjectProperty &&
cannam@226 486 sord_node_get_type(obj) == SORD_LITERAL) {
cannam@226 487 st = errorf(quad, "Object property with literal value");
cannam@226 488 }
cannam@226 489
cannam@226 490 if (is_FunctionalProperty) {
cannam@226 491 SordIter* o = sord_search(model, subj, pred, NULL, NULL);
cannam@226 492 const uint64_t n = count_non_blanks(o, SORD_OBJECT);
cannam@226 493 if (n > 1) {
cannam@226 494 st = errorf(quad, "Functional property with %u objects", n);
cannam@226 495 }
cannam@226 496 sord_iter_free(o);
cannam@226 497 }
cannam@226 498
cannam@226 499 if (is_InverseFunctionalProperty) {
cannam@226 500 SordIter* s = sord_search(model, NULL, pred, obj, NULL);
cannam@226 501 const unsigned n = count_non_blanks(s, SORD_SUBJECT);
cannam@226 502 if (n > 1) {
cannam@226 503 st = errorf(
cannam@226 504 quad, "Inverse functional property with %u subjects", n);
cannam@226 505 }
cannam@226 506 sord_iter_free(s);
cannam@226 507 }
cannam@226 508
cannam@226 509 if (sord_node_equals(pred, uris->rdf_type) &&
cannam@226 510 !sord_ask(model, obj, uris->rdf_type, uris->rdfs_Class, NULL) &&
cannam@226 511 !sord_ask(model, obj, uris->rdf_type, uris->owl_Class, NULL)) {
cannam@226 512 st = errorf(quad, "Type is not a rdfs:Class or owl:Class");
cannam@226 513 }
cannam@226 514
cannam@226 515 if (sord_node_get_type(obj) == SORD_LITERAL &&
cannam@226 516 !literal_is_valid(model, uris, quad,
cannam@226 517 obj, sord_node_get_datatype(obj))) {
cannam@226 518 st = errorf(quad, "Literal does not match datatype");
cannam@226 519 }
cannam@226 520
cannam@226 521 SordIter* r = sord_search(model, pred, uris->rdfs_range, NULL, NULL);
cannam@226 522 for (; !sord_iter_end(r); sord_iter_next(r)) {
cannam@226 523 const SordNode* range = sord_iter_get_node(r, SORD_OBJECT);
cannam@226 524 if (!check_type(model, uris, quad, obj, range)) {
cannam@226 525 st = errorf(quad, "Object not in range <%s>\n",
cannam@226 526 sord_node_get_string(range));
cannam@226 527 }
cannam@226 528 }
cannam@226 529 sord_iter_free(r);
cannam@226 530
cannam@226 531 SordIter* d = sord_search(model, pred, uris->rdfs_domain, NULL, NULL);
cannam@226 532 if (d) {
cannam@226 533 const SordNode* domain = sord_iter_get_node(d, SORD_OBJECT);
cannam@226 534 if (!check_type(model, uris, quad, subj, domain)) {
cannam@226 535 st = errorf(quad, "Subject not in domain <%s>",
cannam@226 536 sord_node_get_string(domain));
cannam@226 537 }
cannam@226 538 sord_iter_free(d);
cannam@226 539 }
cannam@226 540 }
cannam@226 541 sord_iter_free(i);
cannam@226 542
cannam@226 543 return st;
cannam@226 544 }
cannam@226 545
cannam@226 546 static int
cannam@226 547 check_instance(SordModel* model,
cannam@226 548 const URIs* uris,
cannam@226 549 const SordNode* restriction,
cannam@226 550 const SordQuad quad)
cannam@226 551 {
cannam@226 552 const SordNode* instance = quad[SORD_SUBJECT];
cannam@226 553 int st = 0;
cannam@226 554
cannam@226 555 const SordNode* prop = sord_get(
cannam@226 556 model, restriction, uris->owl_onProperty, NULL, NULL);
cannam@226 557 if (!prop) {
cannam@226 558 return 0;
cannam@226 559 }
cannam@226 560
cannam@226 561 const unsigned values = sord_count(model, instance, prop, NULL, NULL);
cannam@226 562
cannam@226 563 // Check exact cardinality
cannam@226 564 const SordNode* card = sord_get(
cannam@226 565 model, restriction, uris->owl_cardinality, NULL, NULL);
cannam@226 566 if (card) {
cannam@226 567 const unsigned c = atoi((const char*)sord_node_get_string(card));
cannam@226 568 if (values != c) {
cannam@226 569 st = errorf(quad, "Property %s on %s has %u != %u values",
cannam@226 570 sord_node_get_string(prop),
cannam@226 571 sord_node_get_string(instance),
cannam@226 572 values, c);
cannam@226 573 }
cannam@226 574 }
cannam@226 575
cannam@226 576 // Check minimum cardinality
cannam@226 577 const SordNode* minCard = sord_get(
cannam@226 578 model, restriction, uris->owl_minCardinality, NULL, NULL);
cannam@226 579 if (minCard) {
cannam@226 580 const unsigned m = atoi((const char*)sord_node_get_string(minCard));
cannam@226 581 if (values < m) {
cannam@226 582 st = errorf(quad, "Property %s on %s has %u < %u values",
cannam@226 583 sord_node_get_string(prop),
cannam@226 584 sord_node_get_string(instance),
cannam@226 585 values, m);
cannam@226 586 }
cannam@226 587 }
cannam@226 588
cannam@226 589 // Check maximum cardinality
cannam@226 590 const SordNode* maxCard = sord_get(
cannam@226 591 model, restriction, uris->owl_maxCardinality, NULL, NULL);
cannam@226 592 if (maxCard) {
cannam@226 593 const unsigned m = atoi((const char*)sord_node_get_string(maxCard));
cannam@226 594 if (values < m) {
cannam@226 595 st = errorf(quad, "Property %s on %s has %u > %u values",
cannam@226 596 sord_node_get_string(prop),
cannam@226 597 sord_node_get_string(instance),
cannam@226 598 values, m);
cannam@226 599 }
cannam@226 600 }
cannam@226 601
cannam@226 602 // Check someValuesFrom
cannam@226 603 SordIter* sf = sord_search(
cannam@226 604 model, restriction, uris->owl_someValuesFrom, NULL, NULL);
cannam@226 605 if (sf) {
cannam@226 606 const SordNode* type = sord_iter_get_node(sf, SORD_OBJECT);
cannam@226 607
cannam@226 608 SordIter* v = sord_search(model, instance, prop, NULL, NULL);
cannam@226 609 bool found = false;
cannam@226 610 for (; !sord_iter_end(v); sord_iter_next(v)) {
cannam@226 611 const SordNode* value = sord_iter_get_node(v, SORD_OBJECT);
cannam@226 612 if (check_type(model, uris, quad, value, type)) {
cannam@226 613 found = true;
cannam@226 614 break;
cannam@226 615 }
cannam@226 616 }
cannam@226 617 if (!found) {
cannam@226 618 st = errorf(quad, "%s has no <%s> values of type <%s>\n",
cannam@226 619 sord_node_get_string(instance),
cannam@226 620 sord_node_get_string(prop),
cannam@226 621 sord_node_get_string(type));
cannam@226 622 }
cannam@226 623 sord_iter_free(v);
cannam@226 624 }
cannam@226 625 sord_iter_free(sf);
cannam@226 626
cannam@226 627 return st;
cannam@226 628 }
cannam@226 629
cannam@226 630 static int
cannam@226 631 check_class_instances(SordModel* model,
cannam@226 632 const URIs* uris,
cannam@226 633 const SordNode* restriction,
cannam@226 634 const SordNode* klass)
cannam@226 635 {
cannam@226 636 // Check immediate instances of this class
cannam@226 637 SordIter* i = sord_search(model, NULL, uris->rdf_type, klass, NULL);
cannam@226 638 for (; !sord_iter_end(i); sord_iter_next(i)) {
cannam@226 639 SordQuad quad;
cannam@226 640 sord_iter_get(i, quad);
cannam@226 641 check_instance(model, uris, restriction, quad);
cannam@226 642 }
cannam@226 643 sord_iter_free(i);
cannam@226 644
cannam@226 645 // Check instances of all subclasses recursively
cannam@226 646 SordIter* s = sord_search(model, NULL, uris->rdfs_subClassOf, klass, NULL);
cannam@226 647 for (; !sord_iter_end(s); sord_iter_next(s)) {
cannam@226 648 const SordNode* subklass = sord_iter_get_node(s, SORD_SUBJECT);
cannam@226 649 check_class_instances(model, uris, restriction, subklass);
cannam@226 650 }
cannam@226 651 sord_iter_free(s);
cannam@226 652
cannam@226 653 return 0;
cannam@226 654 }
cannam@226 655
cannam@226 656 static int
cannam@226 657 check_instances(SordModel* model, const URIs* uris)
cannam@226 658 {
cannam@226 659 int st = 0;
cannam@226 660 SordIter* r = sord_search(
cannam@226 661 model, NULL, uris->rdf_type, uris->owl_Restriction, NULL);
cannam@226 662 for (; !sord_iter_end(r); sord_iter_next(r)) {
cannam@226 663 const SordNode* restriction = sord_iter_get_node(r, SORD_SUBJECT);
cannam@226 664 const SordNode* prop = sord_get(
cannam@226 665 model, restriction, uris->owl_onProperty, NULL, NULL);
cannam@226 666 if (!prop) {
cannam@226 667 continue;
cannam@226 668 }
cannam@226 669
cannam@226 670 SordIter* c = sord_search(
cannam@226 671 model, NULL, uris->rdfs_subClassOf, restriction, NULL);
cannam@226 672 for (; !sord_iter_end(c); sord_iter_next(c)) {
cannam@226 673 const SordNode* klass = sord_iter_get_node(c, SORD_SUBJECT);
cannam@226 674 check_class_instances(model, uris, restriction, klass);
cannam@226 675 }
cannam@226 676 sord_iter_free(c);
cannam@226 677 }
cannam@226 678 sord_iter_free(r);
cannam@226 679
cannam@226 680 return st;
cannam@226 681 }
cannam@226 682
cannam@226 683 int
cannam@226 684 main(int argc, char** argv)
cannam@226 685 {
cannam@226 686 if (argc < 2) {
cannam@226 687 return print_usage(argv[0], true);
cannam@226 688 }
cannam@226 689
cannam@226 690 int a = 1;
cannam@226 691 for (; a < argc && argv[a][0] == '-'; ++a) {
cannam@226 692 if (argv[a][1] == 'l') {
cannam@226 693 one_line_errors = true;
cannam@226 694 } else if (argv[a][1] == 'v') {
cannam@226 695 return print_version();
cannam@226 696 } else {
cannam@226 697 fprintf(stderr, "%s: Unknown option `%s'\n", argv[0], argv[a]);
cannam@226 698 return print_usage(argv[0], true);
cannam@226 699 }
cannam@226 700 }
cannam@226 701
cannam@226 702 SordWorld* world = sord_world_new();
cannam@226 703 SordModel* model = sord_new(world, SORD_SPO|SORD_OPS, false);
cannam@226 704 SerdEnv* env = serd_env_new(&SERD_NODE_NULL);
cannam@226 705 SerdReader* reader = sord_new_reader(model, env, SERD_TURTLE, NULL);
cannam@226 706
cannam@226 707 for (; a < argc; ++a) {
cannam@226 708 const uint8_t* input = (const uint8_t*)argv[a];
cannam@226 709 uint8_t* in_path = absolute_path(serd_uri_to_path(input));
cannam@226 710
cannam@226 711 if (!in_path) {
cannam@226 712 fprintf(stderr, "Skipping file %s\n", input);
cannam@226 713 continue;
cannam@226 714 }
cannam@226 715
cannam@226 716 SerdURI base_uri;
cannam@226 717 SerdNode base_uri_node = serd_node_new_file_uri(
cannam@226 718 in_path, NULL, &base_uri, true);
cannam@226 719
cannam@226 720 serd_env_set_base_uri(env, &base_uri_node);
cannam@226 721 const SerdStatus st = serd_reader_read_file(reader, in_path);
cannam@226 722 if (st) {
cannam@226 723 fprintf(stderr, "error reading %s: %s\n",
cannam@226 724 in_path, serd_strerror(st));
cannam@226 725 }
cannam@226 726
cannam@226 727 serd_node_free(&base_uri_node);
cannam@226 728 free(in_path);
cannam@226 729 }
cannam@226 730 serd_reader_free(reader);
cannam@226 731 serd_env_free(env);
cannam@226 732
cannam@226 733 #define URI(prefix, suffix) \
cannam@226 734 uris.prefix##_##suffix = sord_new_uri(world, NS_##prefix #suffix)
cannam@226 735
cannam@226 736 URIs uris;
cannam@226 737 URI(foaf, Document);
cannam@226 738 URI(owl, AnnotationProperty);
cannam@226 739 URI(owl, Class);
cannam@226 740 URI(owl, DatatypeProperty);
cannam@226 741 URI(owl, FunctionalProperty);
cannam@226 742 URI(owl, InverseFunctionalProperty);
cannam@226 743 URI(owl, ObjectProperty);
cannam@226 744 URI(owl, OntologyProperty);
cannam@226 745 URI(owl, Restriction);
cannam@226 746 URI(owl, Thing);
cannam@226 747 URI(owl, cardinality);
cannam@226 748 URI(owl, equivalentClass);
cannam@226 749 URI(owl, maxCardinality);
cannam@226 750 URI(owl, minCardinality);
cannam@226 751 URI(owl, onDatatype);
cannam@226 752 URI(owl, onProperty);
cannam@226 753 URI(owl, someValuesFrom);
cannam@226 754 URI(owl, withRestrictions);
cannam@226 755 URI(rdf, PlainLiteral);
cannam@226 756 URI(rdf, Property);
cannam@226 757 URI(rdf, first);
cannam@226 758 URI(rdf, rest);
cannam@226 759 URI(rdf, type);
cannam@226 760 URI(rdfs, Class);
cannam@226 761 URI(rdfs, Literal);
cannam@226 762 URI(rdfs, Resource);
cannam@226 763 URI(rdfs, domain);
cannam@226 764 URI(rdfs, label);
cannam@226 765 URI(rdfs, range);
cannam@226 766 URI(rdfs, subClassOf);
cannam@226 767 URI(xsd, anyURI);
cannam@226 768 URI(xsd, decimal);
cannam@226 769 URI(xsd, double);
cannam@226 770 URI(xsd, maxInclusive);
cannam@226 771 URI(xsd, minInclusive);
cannam@226 772 URI(xsd, pattern);
cannam@226 773 URI(xsd, string);
cannam@226 774
cannam@226 775 #ifndef HAVE_PCRE
cannam@226 776 fprintf(stderr, "warning: Built without PCRE, datatypes not checked.\n");
cannam@226 777 #endif
cannam@226 778
cannam@226 779 const int prop_st = check_properties(model, &uris);
cannam@226 780 const int inst_st = check_instances(model, &uris);
cannam@226 781
cannam@226 782 printf("Found %d errors among %d files (checked %d restrictions)\n",
cannam@226 783 n_errors, argc - 1, n_restrictions);
cannam@226 784
cannam@226 785 sord_free(model);
cannam@226 786 sord_world_free(world);
cannam@226 787 return prop_st || inst_st;
cannam@226 788 }