cannam@226: /* cannam@226: Copyright 2012-2017 David Robillard cannam@226: cannam@226: Permission to use, copy, modify, and/or distribute this software for any cannam@226: purpose with or without fee is hereby granted, provided that the above cannam@226: copyright notice and this permission notice appear in all copies. cannam@226: cannam@226: THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES cannam@226: WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF cannam@226: MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR cannam@226: ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES cannam@226: WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN cannam@226: ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF cannam@226: OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. cannam@226: */ cannam@226: cannam@226: #define _BSD_SOURCE 1 // for realpath cannam@226: #define _DEFAULT_SOURCE 1 // for realpath cannam@226: cannam@226: #include cannam@226: #include cannam@226: #include cannam@226: cannam@226: #ifdef _WIN32 cannam@226: # include cannam@226: #endif cannam@226: cannam@226: #include "serd/serd.h" cannam@226: #include "sord/sord.h" cannam@226: #include "sord_config.h" cannam@226: cannam@226: #ifdef HAVE_PCRE cannam@226: # include cannam@226: #endif cannam@226: cannam@226: #define USTR(s) ((const uint8_t*)s) cannam@226: cannam@226: #define NS_foaf (const uint8_t*)"http://xmlns.com/foaf/0.1/" cannam@226: #define NS_owl (const uint8_t*)"http://www.w3.org/2002/07/owl#" cannam@226: #define NS_rdf (const uint8_t*)"http://www.w3.org/1999/02/22-rdf-syntax-ns#" cannam@226: #define NS_rdfs (const uint8_t*)"http://www.w3.org/2000/01/rdf-schema#" cannam@226: #define NS_xsd (const uint8_t*)"http://www.w3.org/2001/XMLSchema#" cannam@226: cannam@226: typedef struct { cannam@226: SordNode* foaf_Document; cannam@226: SordNode* owl_AnnotationProperty; cannam@226: SordNode* owl_Class; cannam@226: SordNode* owl_DatatypeProperty; cannam@226: SordNode* owl_FunctionalProperty; cannam@226: SordNode* owl_InverseFunctionalProperty; cannam@226: SordNode* owl_ObjectProperty; cannam@226: SordNode* owl_OntologyProperty; cannam@226: SordNode* owl_Restriction; cannam@226: SordNode* owl_Thing; cannam@226: SordNode* owl_cardinality; cannam@226: SordNode* owl_equivalentClass; cannam@226: SordNode* owl_maxCardinality; cannam@226: SordNode* owl_minCardinality; cannam@226: SordNode* owl_onDatatype; cannam@226: SordNode* owl_onProperty; cannam@226: SordNode* owl_someValuesFrom; cannam@226: SordNode* owl_withRestrictions; cannam@226: SordNode* rdf_PlainLiteral; cannam@226: SordNode* rdf_Property; cannam@226: SordNode* rdf_first; cannam@226: SordNode* rdf_rest; cannam@226: SordNode* rdf_type; cannam@226: SordNode* rdfs_Class; cannam@226: SordNode* rdfs_Literal; cannam@226: SordNode* rdfs_Resource; cannam@226: SordNode* rdfs_domain; cannam@226: SordNode* rdfs_label; cannam@226: SordNode* rdfs_range; cannam@226: SordNode* rdfs_subClassOf; cannam@226: SordNode* xsd_anyURI; cannam@226: SordNode* xsd_decimal; cannam@226: SordNode* xsd_double; cannam@226: SordNode* xsd_maxInclusive; cannam@226: SordNode* xsd_minInclusive; cannam@226: SordNode* xsd_pattern; cannam@226: SordNode* xsd_string; cannam@226: } URIs; cannam@226: cannam@226: int n_errors = 0; cannam@226: int n_restrictions = 0; cannam@226: bool one_line_errors = false; cannam@226: cannam@226: static int cannam@226: print_version(void) cannam@226: { cannam@226: printf("sord_validate " SORD_VERSION cannam@226: " \n"); cannam@226: printf("Copyright 2012-2017 David Robillard .\n" cannam@226: "License: \n" cannam@226: "This is free software; you are free to change and redistribute it." cannam@226: "\nThere is NO WARRANTY, to the extent permitted by law.\n"); cannam@226: return 0; cannam@226: } cannam@226: cannam@226: static int cannam@226: print_usage(const char* name, bool error) cannam@226: { cannam@226: FILE* const os = error ? stderr : stdout; cannam@226: fprintf(os, "Usage: %s [OPTION]... INPUT...\n", name); cannam@226: fprintf(os, "Validate RDF data\n\n"); cannam@226: fprintf(os, " -h Display this help and exit\n"); cannam@226: fprintf(os, " -l Print errors on a single line.\n"); cannam@226: fprintf(os, " -v Display version information and exit\n"); cannam@226: fprintf(os, cannam@226: "Validate RDF data. This is a simple validator which checks\n" cannam@226: "that all used properties are actually defined. It does not do\n" cannam@226: "any fancy file retrieval, the files passed on the command line\n" cannam@226: "are the only data that is read. In other words, you must pass\n" cannam@226: "the definition of all vocabularies used on the command line.\n"); cannam@226: return error ? 1 : 0; cannam@226: } cannam@226: cannam@226: static uint8_t* cannam@226: absolute_path(const uint8_t* path) cannam@226: { cannam@226: #ifdef _WIN32 cannam@226: char* out = (char*)malloc(MAX_PATH); cannam@226: GetFullPathName((const char*)path, MAX_PATH, out, NULL); cannam@226: return (uint8_t*)out; cannam@226: #else cannam@226: return (uint8_t*)realpath((const char*)path, NULL); cannam@226: #endif cannam@226: } cannam@226: cannam@226: static int cannam@226: errorf(const SordQuad quad, const char* fmt, ...) cannam@226: { cannam@226: va_list args; cannam@226: va_start(args, fmt); cannam@226: fprintf(stderr, "error: "); cannam@226: vfprintf(stderr, fmt, args); cannam@226: va_end(args); cannam@226: cannam@226: const char* sep = one_line_errors ? "\t" : "\n "; cannam@226: fprintf(stderr, "%s%s%s%s%s%s\n", cannam@226: sep, (const char*)sord_node_get_string(quad[SORD_SUBJECT]), cannam@226: sep, (const char*)sord_node_get_string(quad[SORD_PREDICATE]), cannam@226: sep, (const char*)sord_node_get_string(quad[SORD_OBJECT])); cannam@226: cannam@226: ++n_errors; cannam@226: return 1; cannam@226: } cannam@226: cannam@226: static bool cannam@226: is_descendant_of(SordModel* model, cannam@226: const URIs* uris, cannam@226: const SordNode* child, cannam@226: const SordNode* parent, cannam@226: const SordNode* pred) cannam@226: { cannam@226: if (!child) { cannam@226: return false; cannam@226: } else if (sord_node_equals(child, parent) || cannam@226: sord_ask(model, child, uris->owl_equivalentClass, parent, NULL)) { cannam@226: return true; cannam@226: } cannam@226: cannam@226: SordIter* i = sord_search(model, child, pred, NULL, NULL); cannam@226: for (; !sord_iter_end(i); sord_iter_next(i)) { cannam@226: const SordNode* o = sord_iter_get_node(i, SORD_OBJECT); cannam@226: if (sord_node_equals(child, o)) { cannam@226: continue; // Weird class is explicitly a descendent of itself cannam@226: } cannam@226: if (is_descendant_of(model, uris, o, parent, pred)) { cannam@226: sord_iter_free(i); cannam@226: return true; cannam@226: } cannam@226: } cannam@226: sord_iter_free(i); cannam@226: cannam@226: return false; cannam@226: } cannam@226: cannam@226: static bool cannam@226: regexp_match(const uint8_t* pat, const char* str) cannam@226: { cannam@226: #ifdef HAVE_PCRE cannam@226: // Append a $ to the pattern so we only match if the entire string matches cannam@226: const size_t len = strlen((const char*)pat); cannam@226: char* const regx = (char*)malloc(len + 2); cannam@226: memcpy(regx, pat, len); cannam@226: regx[len] = '$'; cannam@226: regx[len + 1] = '\0'; cannam@226: cannam@226: const char* err; cannam@226: int erroffset; cannam@226: pcre* re = pcre_compile(regx, PCRE_ANCHORED, &err, &erroffset, NULL); cannam@226: free(regx); cannam@226: if (!re) { cannam@226: fprintf(stderr, "Error in pattern `%s' at offset %d (%s)\n", cannam@226: pat, erroffset, err); cannam@226: return false; cannam@226: } cannam@226: cannam@226: const bool ret = pcre_exec(re, NULL, str, strlen(str), 0, 0, NULL, 0) >= 0; cannam@226: pcre_free(re); cannam@226: return ret; cannam@226: #endif // HAVE_PCRE cannam@226: return true; cannam@226: } cannam@226: cannam@226: static int cannam@226: bound_cmp(SordModel* model, cannam@226: const URIs* uris, cannam@226: const SordNode* literal, cannam@226: const SordNode* type, cannam@226: const SordNode* bound) cannam@226: { cannam@226: const char* str = (const char*)sord_node_get_string(literal); cannam@226: const char* bound_str = (const char*)sord_node_get_string(bound); cannam@226: const bool is_numeric = cannam@226: is_descendant_of(model, uris, type, uris->xsd_decimal, uris->owl_onDatatype) || cannam@226: is_descendant_of(model, uris, type, uris->xsd_double, uris->owl_onDatatype); cannam@226: cannam@226: if (is_numeric) { cannam@226: const double fbound = serd_strtod(bound_str, NULL); cannam@226: const double fliteral = serd_strtod(str, NULL); cannam@226: return ((fliteral < fbound) ? -1 : cannam@226: (fliteral > fbound) ? 1 : cannam@226: 0); cannam@226: } else { cannam@226: return strcmp(str, bound_str); cannam@226: } cannam@226: } cannam@226: cannam@226: static bool cannam@226: check_restriction(SordModel* model, cannam@226: const URIs* uris, cannam@226: const SordNode* literal, cannam@226: const SordNode* type, cannam@226: const SordNode* restriction) cannam@226: { cannam@226: size_t len = 0; cannam@226: const char* str = (const char*)sord_node_get_string_counted(literal, &len); cannam@226: cannam@226: // Check xsd:pattern cannam@226: SordIter* p = sord_search(model, restriction, uris->xsd_pattern, 0, 0); cannam@226: if (p) { cannam@226: const SordNode* pat = sord_iter_get_node(p, SORD_OBJECT); cannam@226: if (!regexp_match(sord_node_get_string(pat), str)) { cannam@226: fprintf(stderr, "`%s' does not match <%s> pattern `%s'\n", cannam@226: sord_node_get_string(literal), cannam@226: sord_node_get_string(type), cannam@226: sord_node_get_string(pat)); cannam@226: sord_iter_free(p); cannam@226: return false; cannam@226: } cannam@226: sord_iter_free(p); cannam@226: ++n_restrictions; cannam@226: } cannam@226: cannam@226: // Check xsd:minInclusive cannam@226: SordIter* l = sord_search(model, restriction, uris->xsd_minInclusive, 0, 0); cannam@226: if (l) { cannam@226: const SordNode* lower = sord_iter_get_node(l, SORD_OBJECT); cannam@226: if (bound_cmp(model, uris, literal, type, lower) < 0) { cannam@226: fprintf(stderr, "`%s' is not >= <%s> minimum `%s'\n", cannam@226: sord_node_get_string(literal), cannam@226: sord_node_get_string(type), cannam@226: sord_node_get_string(lower)); cannam@226: sord_iter_free(l); cannam@226: return false; cannam@226: } cannam@226: sord_iter_free(l); cannam@226: ++n_restrictions; cannam@226: } cannam@226: cannam@226: // Check xsd:maxInclusive cannam@226: SordIter* u = sord_search(model, restriction, uris->xsd_maxInclusive, 0, 0); cannam@226: if (u) { cannam@226: const SordNode* upper = sord_iter_get_node(u, SORD_OBJECT); cannam@226: if (bound_cmp(model, uris, literal, type, upper) > 0) { cannam@226: fprintf(stderr, "`%s' is not <= <%s> maximum `%s'\n", cannam@226: sord_node_get_string(literal), cannam@226: sord_node_get_string(type), cannam@226: sord_node_get_string(upper)); cannam@226: sord_iter_free(u); cannam@226: return false; cannam@226: } cannam@226: sord_iter_free(u); cannam@226: ++n_restrictions; cannam@226: } cannam@226: cannam@226: return true; // Unknown restriction, be quietly tolerant cannam@226: } cannam@226: cannam@226: static bool cannam@226: literal_is_valid(SordModel* model, cannam@226: const URIs* uris, cannam@226: const SordQuad quad, cannam@226: const SordNode* literal, cannam@226: const SordNode* type) cannam@226: { cannam@226: if (!type) { cannam@226: return true; cannam@226: } cannam@226: cannam@226: /* Check that literal data is related to required type. We don't do a cannam@226: strict subtype check here because e.g. an xsd:decimal might be a valid cannam@226: xsd:unsignedInt, which the pattern checks will verify, but if the cannam@226: literal type is not related to the required type at all cannam@226: (e.g. xsd:decimal and xsd:string) there is a problem. */ cannam@226: const SordNode* datatype = sord_node_get_datatype(literal); cannam@226: if (datatype && datatype != type) { cannam@226: if (!is_descendant_of( cannam@226: model, uris, cannam@226: datatype, type, uris->owl_onDatatype) && cannam@226: !is_descendant_of( cannam@226: model, uris, cannam@226: type, datatype, uris->owl_onDatatype) && cannam@226: !(sord_node_equals(datatype, uris->xsd_decimal) && cannam@226: is_descendant_of( cannam@226: model, uris, cannam@226: type, uris->xsd_double, uris->owl_onDatatype))) { cannam@226: errorf(quad, cannam@226: "Literal `%s' datatype <%s> is not compatible with <%s>\n", cannam@226: sord_node_get_string(literal), cannam@226: sord_node_get_string(datatype), cannam@226: sord_node_get_string(type)); cannam@226: return false; cannam@226: } cannam@226: } cannam@226: cannam@226: // Find restrictions list cannam@226: SordIter* rs = sord_search(model, type, uris->owl_withRestrictions, 0, 0); cannam@226: if (sord_iter_end(rs)) { cannam@226: return true; // No restrictions cannam@226: } cannam@226: cannam@226: // Walk list, checking each restriction cannam@226: const SordNode* head = sord_iter_get_node(rs, SORD_OBJECT); cannam@226: while (head) { cannam@226: SordIter* f = sord_search(model, head, uris->rdf_first, 0, 0); cannam@226: if (!f) { cannam@226: break; // Reached end of restrictions list without failure cannam@226: } cannam@226: cannam@226: // Check this restriction cannam@226: const bool good = check_restriction( cannam@226: model, uris, literal, type, sord_iter_get_node(f, SORD_OBJECT)); cannam@226: sord_iter_free(f); cannam@226: cannam@226: if (!good) { cannam@226: sord_iter_free(rs); cannam@226: return false; // Failed, literal is invalid cannam@226: } cannam@226: cannam@226: // Seek to next list node cannam@226: SordIter* n = sord_search(model, head, uris->rdf_rest, 0, 0); cannam@226: head = n ? sord_iter_get_node(n, SORD_OBJECT) : NULL; cannam@226: sord_iter_free(n); cannam@226: } cannam@226: cannam@226: sord_iter_free(rs); cannam@226: cannam@226: SordIter* s = sord_search(model, type, uris->owl_onDatatype, 0, 0); cannam@226: if (s) { cannam@226: const SordNode* super = sord_iter_get_node(s, SORD_OBJECT); cannam@226: const bool good = literal_is_valid( cannam@226: model, uris, quad, literal, super); cannam@226: sord_iter_free(s); cannam@226: return good; // Match iff literal also matches supertype cannam@226: } cannam@226: cannam@226: return true; // Matches top level type cannam@226: } cannam@226: cannam@226: static bool cannam@226: check_type(SordModel* model, cannam@226: const URIs* uris, cannam@226: const SordQuad quad, cannam@226: const SordNode* node, cannam@226: const SordNode* type) cannam@226: { cannam@226: if (sord_node_equals(type, uris->rdfs_Resource) || cannam@226: sord_node_equals(type, uris->owl_Thing)) { cannam@226: return true; cannam@226: } cannam@226: cannam@226: if (sord_node_get_type(node) == SORD_LITERAL) { cannam@226: if (sord_node_equals(type, uris->rdfs_Literal)) { cannam@226: return true; cannam@226: } else if (sord_node_equals(type, uris->rdf_PlainLiteral)) { cannam@226: return !sord_node_get_language(node); cannam@226: } else { cannam@226: return literal_is_valid(model, uris, quad, node, type); cannam@226: } cannam@226: } else if (sord_node_get_type(node) == SORD_URI) { cannam@226: if (sord_node_equals(type, uris->foaf_Document)) { cannam@226: return true; // Questionable... cannam@226: } else if (is_descendant_of( cannam@226: model, uris, cannam@226: type, uris->xsd_anyURI, uris->owl_onDatatype)) { cannam@226: /* Type is any URI and this is a URI, so pass. Restrictions on cannam@226: anyURI subtypes are not currently checked (very uncommon). */ cannam@226: return true; // Type is anyURI, and this is a URI cannam@226: } else { cannam@226: SordIter* t = sord_search(model, node, uris->rdf_type, NULL, NULL); cannam@226: for (; !sord_iter_end(t); sord_iter_next(t)) { cannam@226: if (is_descendant_of(model, uris, cannam@226: sord_iter_get_node(t, SORD_OBJECT), cannam@226: type, cannam@226: uris->rdfs_subClassOf)) { cannam@226: sord_iter_free(t); cannam@226: return true; cannam@226: } cannam@226: } cannam@226: sord_iter_free(t); cannam@226: return false; cannam@226: } cannam@226: } else { cannam@226: return true; // Blanks often lack explicit types, ignore cannam@226: } cannam@226: cannam@226: return false; cannam@226: } cannam@226: cannam@226: static uint64_t cannam@226: count_non_blanks(SordIter* i, SordQuadIndex field) cannam@226: { cannam@226: uint64_t n = 0; cannam@226: for (; !sord_iter_end(i); sord_iter_next(i)) { cannam@226: const SordNode* node = sord_iter_get_node(i, field); cannam@226: if (sord_node_get_type(node) != SORD_BLANK) { cannam@226: ++n; cannam@226: } cannam@226: } cannam@226: return n; cannam@226: } cannam@226: cannam@226: static int cannam@226: check_properties(SordModel* model, URIs* uris) cannam@226: { cannam@226: int st = 0; cannam@226: SordIter* i = sord_begin(model); cannam@226: for (; !sord_iter_end(i); sord_iter_next(i)) { cannam@226: SordQuad quad; cannam@226: sord_iter_get(i, quad); cannam@226: cannam@226: const SordNode* subj = quad[SORD_SUBJECT]; cannam@226: const SordNode* pred = quad[SORD_PREDICATE]; cannam@226: const SordNode* obj = quad[SORD_OBJECT]; cannam@226: cannam@226: bool is_any_property = false; cannam@226: SordIter* t = sord_search(model, pred, uris->rdf_type, NULL, NULL); cannam@226: for (; !sord_iter_end(t); sord_iter_next(t)) { cannam@226: if (is_descendant_of(model, uris, cannam@226: sord_iter_get_node(t, SORD_OBJECT), cannam@226: uris->rdf_Property, cannam@226: uris->rdfs_subClassOf)) { cannam@226: is_any_property = true; cannam@226: break; cannam@226: } cannam@226: } cannam@226: sord_iter_free(t); cannam@226: cannam@226: const bool is_ObjectProperty = sord_ask( cannam@226: model, pred, uris->rdf_type, uris->owl_ObjectProperty, 0); cannam@226: const bool is_FunctionalProperty = sord_ask( cannam@226: model, pred, uris->rdf_type, uris->owl_FunctionalProperty, 0); cannam@226: const bool is_InverseFunctionalProperty = sord_ask( cannam@226: model, pred, uris->rdf_type, uris->owl_InverseFunctionalProperty, 0); cannam@226: const bool is_DatatypeProperty = sord_ask( cannam@226: model, pred, uris->rdf_type, uris->owl_DatatypeProperty, 0); cannam@226: cannam@226: if (!is_any_property) { cannam@226: st = errorf(quad, "Use of undefined property"); cannam@226: } cannam@226: cannam@226: if (!sord_ask(model, pred, uris->rdfs_label, NULL, NULL)) { cannam@226: st = errorf(quad, "Property <%s> has no label", cannam@226: sord_node_get_string(pred)); cannam@226: } cannam@226: cannam@226: if (is_DatatypeProperty && cannam@226: sord_node_get_type(obj) != SORD_LITERAL) { cannam@226: st = errorf(quad, "Datatype property with non-literal value"); cannam@226: } cannam@226: cannam@226: if (is_ObjectProperty && cannam@226: sord_node_get_type(obj) == SORD_LITERAL) { cannam@226: st = errorf(quad, "Object property with literal value"); cannam@226: } cannam@226: cannam@226: if (is_FunctionalProperty) { cannam@226: SordIter* o = sord_search(model, subj, pred, NULL, NULL); cannam@226: const uint64_t n = count_non_blanks(o, SORD_OBJECT); cannam@226: if (n > 1) { cannam@226: st = errorf(quad, "Functional property with %u objects", n); cannam@226: } cannam@226: sord_iter_free(o); cannam@226: } cannam@226: cannam@226: if (is_InverseFunctionalProperty) { cannam@226: SordIter* s = sord_search(model, NULL, pred, obj, NULL); cannam@226: const unsigned n = count_non_blanks(s, SORD_SUBJECT); cannam@226: if (n > 1) { cannam@226: st = errorf( cannam@226: quad, "Inverse functional property with %u subjects", n); cannam@226: } cannam@226: sord_iter_free(s); cannam@226: } cannam@226: cannam@226: if (sord_node_equals(pred, uris->rdf_type) && cannam@226: !sord_ask(model, obj, uris->rdf_type, uris->rdfs_Class, NULL) && cannam@226: !sord_ask(model, obj, uris->rdf_type, uris->owl_Class, NULL)) { cannam@226: st = errorf(quad, "Type is not a rdfs:Class or owl:Class"); cannam@226: } cannam@226: cannam@226: if (sord_node_get_type(obj) == SORD_LITERAL && cannam@226: !literal_is_valid(model, uris, quad, cannam@226: obj, sord_node_get_datatype(obj))) { cannam@226: st = errorf(quad, "Literal does not match datatype"); cannam@226: } cannam@226: cannam@226: SordIter* r = sord_search(model, pred, uris->rdfs_range, NULL, NULL); cannam@226: for (; !sord_iter_end(r); sord_iter_next(r)) { cannam@226: const SordNode* range = sord_iter_get_node(r, SORD_OBJECT); cannam@226: if (!check_type(model, uris, quad, obj, range)) { cannam@226: st = errorf(quad, "Object not in range <%s>\n", cannam@226: sord_node_get_string(range)); cannam@226: } cannam@226: } cannam@226: sord_iter_free(r); cannam@226: cannam@226: SordIter* d = sord_search(model, pred, uris->rdfs_domain, NULL, NULL); cannam@226: if (d) { cannam@226: const SordNode* domain = sord_iter_get_node(d, SORD_OBJECT); cannam@226: if (!check_type(model, uris, quad, subj, domain)) { cannam@226: st = errorf(quad, "Subject not in domain <%s>", cannam@226: sord_node_get_string(domain)); cannam@226: } cannam@226: sord_iter_free(d); cannam@226: } cannam@226: } cannam@226: sord_iter_free(i); cannam@226: cannam@226: return st; cannam@226: } cannam@226: cannam@226: static int cannam@226: check_instance(SordModel* model, cannam@226: const URIs* uris, cannam@226: const SordNode* restriction, cannam@226: const SordQuad quad) cannam@226: { cannam@226: const SordNode* instance = quad[SORD_SUBJECT]; cannam@226: int st = 0; cannam@226: cannam@226: const SordNode* prop = sord_get( cannam@226: model, restriction, uris->owl_onProperty, NULL, NULL); cannam@226: if (!prop) { cannam@226: return 0; cannam@226: } cannam@226: cannam@226: const unsigned values = sord_count(model, instance, prop, NULL, NULL); cannam@226: cannam@226: // Check exact cardinality cannam@226: const SordNode* card = sord_get( cannam@226: model, restriction, uris->owl_cardinality, NULL, NULL); cannam@226: if (card) { cannam@226: const unsigned c = atoi((const char*)sord_node_get_string(card)); cannam@226: if (values != c) { cannam@226: st = errorf(quad, "Property %s on %s has %u != %u values", cannam@226: sord_node_get_string(prop), cannam@226: sord_node_get_string(instance), cannam@226: values, c); cannam@226: } cannam@226: } cannam@226: cannam@226: // Check minimum cardinality cannam@226: const SordNode* minCard = sord_get( cannam@226: model, restriction, uris->owl_minCardinality, NULL, NULL); cannam@226: if (minCard) { cannam@226: const unsigned m = atoi((const char*)sord_node_get_string(minCard)); cannam@226: if (values < m) { cannam@226: st = errorf(quad, "Property %s on %s has %u < %u values", cannam@226: sord_node_get_string(prop), cannam@226: sord_node_get_string(instance), cannam@226: values, m); cannam@226: } cannam@226: } cannam@226: cannam@226: // Check maximum cardinality cannam@226: const SordNode* maxCard = sord_get( cannam@226: model, restriction, uris->owl_maxCardinality, NULL, NULL); cannam@226: if (maxCard) { cannam@226: const unsigned m = atoi((const char*)sord_node_get_string(maxCard)); cannam@226: if (values < m) { cannam@226: st = errorf(quad, "Property %s on %s has %u > %u values", cannam@226: sord_node_get_string(prop), cannam@226: sord_node_get_string(instance), cannam@226: values, m); cannam@226: } cannam@226: } cannam@226: cannam@226: // Check someValuesFrom cannam@226: SordIter* sf = sord_search( cannam@226: model, restriction, uris->owl_someValuesFrom, NULL, NULL); cannam@226: if (sf) { cannam@226: const SordNode* type = sord_iter_get_node(sf, SORD_OBJECT); cannam@226: cannam@226: SordIter* v = sord_search(model, instance, prop, NULL, NULL); cannam@226: bool found = false; cannam@226: for (; !sord_iter_end(v); sord_iter_next(v)) { cannam@226: const SordNode* value = sord_iter_get_node(v, SORD_OBJECT); cannam@226: if (check_type(model, uris, quad, value, type)) { cannam@226: found = true; cannam@226: break; cannam@226: } cannam@226: } cannam@226: if (!found) { cannam@226: st = errorf(quad, "%s has no <%s> values of type <%s>\n", cannam@226: sord_node_get_string(instance), cannam@226: sord_node_get_string(prop), cannam@226: sord_node_get_string(type)); cannam@226: } cannam@226: sord_iter_free(v); cannam@226: } cannam@226: sord_iter_free(sf); cannam@226: cannam@226: return st; cannam@226: } cannam@226: cannam@226: static int cannam@226: check_class_instances(SordModel* model, cannam@226: const URIs* uris, cannam@226: const SordNode* restriction, cannam@226: const SordNode* klass) cannam@226: { cannam@226: // Check immediate instances of this class cannam@226: SordIter* i = sord_search(model, NULL, uris->rdf_type, klass, NULL); cannam@226: for (; !sord_iter_end(i); sord_iter_next(i)) { cannam@226: SordQuad quad; cannam@226: sord_iter_get(i, quad); cannam@226: check_instance(model, uris, restriction, quad); cannam@226: } cannam@226: sord_iter_free(i); cannam@226: cannam@226: // Check instances of all subclasses recursively cannam@226: SordIter* s = sord_search(model, NULL, uris->rdfs_subClassOf, klass, NULL); cannam@226: for (; !sord_iter_end(s); sord_iter_next(s)) { cannam@226: const SordNode* subklass = sord_iter_get_node(s, SORD_SUBJECT); cannam@226: check_class_instances(model, uris, restriction, subklass); cannam@226: } cannam@226: sord_iter_free(s); cannam@226: cannam@226: return 0; cannam@226: } cannam@226: cannam@226: static int cannam@226: check_instances(SordModel* model, const URIs* uris) cannam@226: { cannam@226: int st = 0; cannam@226: SordIter* r = sord_search( cannam@226: model, NULL, uris->rdf_type, uris->owl_Restriction, NULL); cannam@226: for (; !sord_iter_end(r); sord_iter_next(r)) { cannam@226: const SordNode* restriction = sord_iter_get_node(r, SORD_SUBJECT); cannam@226: const SordNode* prop = sord_get( cannam@226: model, restriction, uris->owl_onProperty, NULL, NULL); cannam@226: if (!prop) { cannam@226: continue; cannam@226: } cannam@226: cannam@226: SordIter* c = sord_search( cannam@226: model, NULL, uris->rdfs_subClassOf, restriction, NULL); cannam@226: for (; !sord_iter_end(c); sord_iter_next(c)) { cannam@226: const SordNode* klass = sord_iter_get_node(c, SORD_SUBJECT); cannam@226: check_class_instances(model, uris, restriction, klass); cannam@226: } cannam@226: sord_iter_free(c); cannam@226: } cannam@226: sord_iter_free(r); cannam@226: cannam@226: return st; cannam@226: } cannam@226: cannam@226: int cannam@226: main(int argc, char** argv) cannam@226: { cannam@226: if (argc < 2) { cannam@226: return print_usage(argv[0], true); cannam@226: } cannam@226: cannam@226: int a = 1; cannam@226: for (; a < argc && argv[a][0] == '-'; ++a) { cannam@226: if (argv[a][1] == 'l') { cannam@226: one_line_errors = true; cannam@226: } else if (argv[a][1] == 'v') { cannam@226: return print_version(); cannam@226: } else { cannam@226: fprintf(stderr, "%s: Unknown option `%s'\n", argv[0], argv[a]); cannam@226: return print_usage(argv[0], true); cannam@226: } cannam@226: } cannam@226: cannam@226: SordWorld* world = sord_world_new(); cannam@226: SordModel* model = sord_new(world, SORD_SPO|SORD_OPS, false); cannam@226: SerdEnv* env = serd_env_new(&SERD_NODE_NULL); cannam@226: SerdReader* reader = sord_new_reader(model, env, SERD_TURTLE, NULL); cannam@226: cannam@226: for (; a < argc; ++a) { cannam@226: const uint8_t* input = (const uint8_t*)argv[a]; cannam@226: uint8_t* in_path = absolute_path(serd_uri_to_path(input)); cannam@226: cannam@226: if (!in_path) { cannam@226: fprintf(stderr, "Skipping file %s\n", input); cannam@226: continue; cannam@226: } cannam@226: cannam@226: SerdURI base_uri; cannam@226: SerdNode base_uri_node = serd_node_new_file_uri( cannam@226: in_path, NULL, &base_uri, true); cannam@226: cannam@226: serd_env_set_base_uri(env, &base_uri_node); cannam@226: const SerdStatus st = serd_reader_read_file(reader, in_path); cannam@226: if (st) { cannam@226: fprintf(stderr, "error reading %s: %s\n", cannam@226: in_path, serd_strerror(st)); cannam@226: } cannam@226: cannam@226: serd_node_free(&base_uri_node); cannam@226: free(in_path); cannam@226: } cannam@226: serd_reader_free(reader); cannam@226: serd_env_free(env); cannam@226: cannam@226: #define URI(prefix, suffix) \ cannam@226: uris.prefix##_##suffix = sord_new_uri(world, NS_##prefix #suffix) cannam@226: cannam@226: URIs uris; cannam@226: URI(foaf, Document); cannam@226: URI(owl, AnnotationProperty); cannam@226: URI(owl, Class); cannam@226: URI(owl, DatatypeProperty); cannam@226: URI(owl, FunctionalProperty); cannam@226: URI(owl, InverseFunctionalProperty); cannam@226: URI(owl, ObjectProperty); cannam@226: URI(owl, OntologyProperty); cannam@226: URI(owl, Restriction); cannam@226: URI(owl, Thing); cannam@226: URI(owl, cardinality); cannam@226: URI(owl, equivalentClass); cannam@226: URI(owl, maxCardinality); cannam@226: URI(owl, minCardinality); cannam@226: URI(owl, onDatatype); cannam@226: URI(owl, onProperty); cannam@226: URI(owl, someValuesFrom); cannam@226: URI(owl, withRestrictions); cannam@226: URI(rdf, PlainLiteral); cannam@226: URI(rdf, Property); cannam@226: URI(rdf, first); cannam@226: URI(rdf, rest); cannam@226: URI(rdf, type); cannam@226: URI(rdfs, Class); cannam@226: URI(rdfs, Literal); cannam@226: URI(rdfs, Resource); cannam@226: URI(rdfs, domain); cannam@226: URI(rdfs, label); cannam@226: URI(rdfs, range); cannam@226: URI(rdfs, subClassOf); cannam@226: URI(xsd, anyURI); cannam@226: URI(xsd, decimal); cannam@226: URI(xsd, double); cannam@226: URI(xsd, maxInclusive); cannam@226: URI(xsd, minInclusive); cannam@226: URI(xsd, pattern); cannam@226: URI(xsd, string); cannam@226: cannam@226: #ifndef HAVE_PCRE cannam@226: fprintf(stderr, "warning: Built without PCRE, datatypes not checked.\n"); cannam@226: #endif cannam@226: cannam@226: const int prop_st = check_properties(model, &uris); cannam@226: const int inst_st = check_instances(model, &uris); cannam@226: cannam@226: printf("Found %d errors among %d files (checked %d restrictions)\n", cannam@226: n_errors, argc - 1, n_restrictions); cannam@226: cannam@226: sord_free(model); cannam@226: sord_world_free(world); cannam@226: return prop_st || inst_st; cannam@226: }