cannam@226
|
1 /*
|
cannam@226
|
2 Copyright 2012-2017 David Robillard <http://drobilla.net>
|
cannam@226
|
3
|
cannam@226
|
4 Permission to use, copy, modify, and/or distribute this software for any
|
cannam@226
|
5 purpose with or without fee is hereby granted, provided that the above
|
cannam@226
|
6 copyright notice and this permission notice appear in all copies.
|
cannam@226
|
7
|
cannam@226
|
8 THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
cannam@226
|
9 WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
cannam@226
|
10 MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
cannam@226
|
11 ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
cannam@226
|
12 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
cannam@226
|
13 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
cannam@226
|
14 OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
cannam@226
|
15 */
|
cannam@226
|
16
|
cannam@226
|
17 #define _BSD_SOURCE 1 // for realpath
|
cannam@226
|
18 #define _DEFAULT_SOURCE 1 // for realpath
|
cannam@226
|
19
|
cannam@226
|
20 #include <assert.h>
|
cannam@226
|
21 #include <stdlib.h>
|
cannam@226
|
22 #include <string.h>
|
cannam@226
|
23
|
cannam@226
|
24 #ifdef _WIN32
|
cannam@226
|
25 # include <windows.h>
|
cannam@226
|
26 #endif
|
cannam@226
|
27
|
cannam@226
|
28 #include "serd/serd.h"
|
cannam@226
|
29 #include "sord/sord.h"
|
cannam@226
|
30 #include "sord_config.h"
|
cannam@226
|
31
|
cannam@226
|
32 #ifdef HAVE_PCRE
|
cannam@226
|
33 # include <pcre.h>
|
cannam@226
|
34 #endif
|
cannam@226
|
35
|
cannam@226
|
36 #define USTR(s) ((const uint8_t*)s)
|
cannam@226
|
37
|
cannam@226
|
38 #define NS_foaf (const uint8_t*)"http://xmlns.com/foaf/0.1/"
|
cannam@226
|
39 #define NS_owl (const uint8_t*)"http://www.w3.org/2002/07/owl#"
|
cannam@226
|
40 #define NS_rdf (const uint8_t*)"http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
cannam@226
|
41 #define NS_rdfs (const uint8_t*)"http://www.w3.org/2000/01/rdf-schema#"
|
cannam@226
|
42 #define NS_xsd (const uint8_t*)"http://www.w3.org/2001/XMLSchema#"
|
cannam@226
|
43
|
cannam@226
|
44 typedef struct {
|
cannam@226
|
45 SordNode* foaf_Document;
|
cannam@226
|
46 SordNode* owl_AnnotationProperty;
|
cannam@226
|
47 SordNode* owl_Class;
|
cannam@226
|
48 SordNode* owl_DatatypeProperty;
|
cannam@226
|
49 SordNode* owl_FunctionalProperty;
|
cannam@226
|
50 SordNode* owl_InverseFunctionalProperty;
|
cannam@226
|
51 SordNode* owl_ObjectProperty;
|
cannam@226
|
52 SordNode* owl_OntologyProperty;
|
cannam@226
|
53 SordNode* owl_Restriction;
|
cannam@226
|
54 SordNode* owl_Thing;
|
cannam@226
|
55 SordNode* owl_cardinality;
|
cannam@226
|
56 SordNode* owl_equivalentClass;
|
cannam@226
|
57 SordNode* owl_maxCardinality;
|
cannam@226
|
58 SordNode* owl_minCardinality;
|
cannam@226
|
59 SordNode* owl_onDatatype;
|
cannam@226
|
60 SordNode* owl_onProperty;
|
cannam@226
|
61 SordNode* owl_someValuesFrom;
|
cannam@226
|
62 SordNode* owl_withRestrictions;
|
cannam@226
|
63 SordNode* rdf_PlainLiteral;
|
cannam@226
|
64 SordNode* rdf_Property;
|
cannam@226
|
65 SordNode* rdf_first;
|
cannam@226
|
66 SordNode* rdf_rest;
|
cannam@226
|
67 SordNode* rdf_type;
|
cannam@226
|
68 SordNode* rdfs_Class;
|
cannam@226
|
69 SordNode* rdfs_Literal;
|
cannam@226
|
70 SordNode* rdfs_Resource;
|
cannam@226
|
71 SordNode* rdfs_domain;
|
cannam@226
|
72 SordNode* rdfs_label;
|
cannam@226
|
73 SordNode* rdfs_range;
|
cannam@226
|
74 SordNode* rdfs_subClassOf;
|
cannam@226
|
75 SordNode* xsd_anyURI;
|
cannam@226
|
76 SordNode* xsd_decimal;
|
cannam@226
|
77 SordNode* xsd_double;
|
cannam@226
|
78 SordNode* xsd_maxInclusive;
|
cannam@226
|
79 SordNode* xsd_minInclusive;
|
cannam@226
|
80 SordNode* xsd_pattern;
|
cannam@226
|
81 SordNode* xsd_string;
|
cannam@226
|
82 } URIs;
|
cannam@226
|
83
|
cannam@226
|
84 int n_errors = 0;
|
cannam@226
|
85 int n_restrictions = 0;
|
cannam@226
|
86 bool one_line_errors = false;
|
cannam@226
|
87
|
cannam@226
|
88 static int
|
cannam@226
|
89 print_version(void)
|
cannam@226
|
90 {
|
cannam@226
|
91 printf("sord_validate " SORD_VERSION
|
cannam@226
|
92 " <http://drobilla.net/software/sord>\n");
|
cannam@226
|
93 printf("Copyright 2012-2017 David Robillard <http://drobilla.net>.\n"
|
cannam@226
|
94 "License: <http://www.opensource.org/licenses/isc>\n"
|
cannam@226
|
95 "This is free software; you are free to change and redistribute it."
|
cannam@226
|
96 "\nThere is NO WARRANTY, to the extent permitted by law.\n");
|
cannam@226
|
97 return 0;
|
cannam@226
|
98 }
|
cannam@226
|
99
|
cannam@226
|
100 static int
|
cannam@226
|
101 print_usage(const char* name, bool error)
|
cannam@226
|
102 {
|
cannam@226
|
103 FILE* const os = error ? stderr : stdout;
|
cannam@226
|
104 fprintf(os, "Usage: %s [OPTION]... INPUT...\n", name);
|
cannam@226
|
105 fprintf(os, "Validate RDF data\n\n");
|
cannam@226
|
106 fprintf(os, " -h Display this help and exit\n");
|
cannam@226
|
107 fprintf(os, " -l Print errors on a single line.\n");
|
cannam@226
|
108 fprintf(os, " -v Display version information and exit\n");
|
cannam@226
|
109 fprintf(os,
|
cannam@226
|
110 "Validate RDF data. This is a simple validator which checks\n"
|
cannam@226
|
111 "that all used properties are actually defined. It does not do\n"
|
cannam@226
|
112 "any fancy file retrieval, the files passed on the command line\n"
|
cannam@226
|
113 "are the only data that is read. In other words, you must pass\n"
|
cannam@226
|
114 "the definition of all vocabularies used on the command line.\n");
|
cannam@226
|
115 return error ? 1 : 0;
|
cannam@226
|
116 }
|
cannam@226
|
117
|
cannam@226
|
118 static uint8_t*
|
cannam@226
|
119 absolute_path(const uint8_t* path)
|
cannam@226
|
120 {
|
cannam@226
|
121 #ifdef _WIN32
|
cannam@226
|
122 char* out = (char*)malloc(MAX_PATH);
|
cannam@226
|
123 GetFullPathName((const char*)path, MAX_PATH, out, NULL);
|
cannam@226
|
124 return (uint8_t*)out;
|
cannam@226
|
125 #else
|
cannam@226
|
126 return (uint8_t*)realpath((const char*)path, NULL);
|
cannam@226
|
127 #endif
|
cannam@226
|
128 }
|
cannam@226
|
129
|
cannam@226
|
130 static int
|
cannam@226
|
131 errorf(const SordQuad quad, const char* fmt, ...)
|
cannam@226
|
132 {
|
cannam@226
|
133 va_list args;
|
cannam@226
|
134 va_start(args, fmt);
|
cannam@226
|
135 fprintf(stderr, "error: ");
|
cannam@226
|
136 vfprintf(stderr, fmt, args);
|
cannam@226
|
137 va_end(args);
|
cannam@226
|
138
|
cannam@226
|
139 const char* sep = one_line_errors ? "\t" : "\n ";
|
cannam@226
|
140 fprintf(stderr, "%s%s%s%s%s%s\n",
|
cannam@226
|
141 sep, (const char*)sord_node_get_string(quad[SORD_SUBJECT]),
|
cannam@226
|
142 sep, (const char*)sord_node_get_string(quad[SORD_PREDICATE]),
|
cannam@226
|
143 sep, (const char*)sord_node_get_string(quad[SORD_OBJECT]));
|
cannam@226
|
144
|
cannam@226
|
145 ++n_errors;
|
cannam@226
|
146 return 1;
|
cannam@226
|
147 }
|
cannam@226
|
148
|
cannam@226
|
149 static bool
|
cannam@226
|
150 is_descendant_of(SordModel* model,
|
cannam@226
|
151 const URIs* uris,
|
cannam@226
|
152 const SordNode* child,
|
cannam@226
|
153 const SordNode* parent,
|
cannam@226
|
154 const SordNode* pred)
|
cannam@226
|
155 {
|
cannam@226
|
156 if (!child) {
|
cannam@226
|
157 return false;
|
cannam@226
|
158 } else if (sord_node_equals(child, parent) ||
|
cannam@226
|
159 sord_ask(model, child, uris->owl_equivalentClass, parent, NULL)) {
|
cannam@226
|
160 return true;
|
cannam@226
|
161 }
|
cannam@226
|
162
|
cannam@226
|
163 SordIter* i = sord_search(model, child, pred, NULL, NULL);
|
cannam@226
|
164 for (; !sord_iter_end(i); sord_iter_next(i)) {
|
cannam@226
|
165 const SordNode* o = sord_iter_get_node(i, SORD_OBJECT);
|
cannam@226
|
166 if (sord_node_equals(child, o)) {
|
cannam@226
|
167 continue; // Weird class is explicitly a descendent of itself
|
cannam@226
|
168 }
|
cannam@226
|
169 if (is_descendant_of(model, uris, o, parent, pred)) {
|
cannam@226
|
170 sord_iter_free(i);
|
cannam@226
|
171 return true;
|
cannam@226
|
172 }
|
cannam@226
|
173 }
|
cannam@226
|
174 sord_iter_free(i);
|
cannam@226
|
175
|
cannam@226
|
176 return false;
|
cannam@226
|
177 }
|
cannam@226
|
178
|
cannam@226
|
179 static bool
|
cannam@226
|
180 regexp_match(const uint8_t* pat, const char* str)
|
cannam@226
|
181 {
|
cannam@226
|
182 #ifdef HAVE_PCRE
|
cannam@226
|
183 // Append a $ to the pattern so we only match if the entire string matches
|
cannam@226
|
184 const size_t len = strlen((const char*)pat);
|
cannam@226
|
185 char* const regx = (char*)malloc(len + 2);
|
cannam@226
|
186 memcpy(regx, pat, len);
|
cannam@226
|
187 regx[len] = '$';
|
cannam@226
|
188 regx[len + 1] = '\0';
|
cannam@226
|
189
|
cannam@226
|
190 const char* err;
|
cannam@226
|
191 int erroffset;
|
cannam@226
|
192 pcre* re = pcre_compile(regx, PCRE_ANCHORED, &err, &erroffset, NULL);
|
cannam@226
|
193 free(regx);
|
cannam@226
|
194 if (!re) {
|
cannam@226
|
195 fprintf(stderr, "Error in pattern `%s' at offset %d (%s)\n",
|
cannam@226
|
196 pat, erroffset, err);
|
cannam@226
|
197 return false;
|
cannam@226
|
198 }
|
cannam@226
|
199
|
cannam@226
|
200 const bool ret = pcre_exec(re, NULL, str, strlen(str), 0, 0, NULL, 0) >= 0;
|
cannam@226
|
201 pcre_free(re);
|
cannam@226
|
202 return ret;
|
cannam@226
|
203 #endif // HAVE_PCRE
|
cannam@226
|
204 return true;
|
cannam@226
|
205 }
|
cannam@226
|
206
|
cannam@226
|
207 static int
|
cannam@226
|
208 bound_cmp(SordModel* model,
|
cannam@226
|
209 const URIs* uris,
|
cannam@226
|
210 const SordNode* literal,
|
cannam@226
|
211 const SordNode* type,
|
cannam@226
|
212 const SordNode* bound)
|
cannam@226
|
213 {
|
cannam@226
|
214 const char* str = (const char*)sord_node_get_string(literal);
|
cannam@226
|
215 const char* bound_str = (const char*)sord_node_get_string(bound);
|
cannam@226
|
216 const bool is_numeric =
|
cannam@226
|
217 is_descendant_of(model, uris, type, uris->xsd_decimal, uris->owl_onDatatype) ||
|
cannam@226
|
218 is_descendant_of(model, uris, type, uris->xsd_double, uris->owl_onDatatype);
|
cannam@226
|
219
|
cannam@226
|
220 if (is_numeric) {
|
cannam@226
|
221 const double fbound = serd_strtod(bound_str, NULL);
|
cannam@226
|
222 const double fliteral = serd_strtod(str, NULL);
|
cannam@226
|
223 return ((fliteral < fbound) ? -1 :
|
cannam@226
|
224 (fliteral > fbound) ? 1 :
|
cannam@226
|
225 0);
|
cannam@226
|
226 } else {
|
cannam@226
|
227 return strcmp(str, bound_str);
|
cannam@226
|
228 }
|
cannam@226
|
229 }
|
cannam@226
|
230
|
cannam@226
|
231 static bool
|
cannam@226
|
232 check_restriction(SordModel* model,
|
cannam@226
|
233 const URIs* uris,
|
cannam@226
|
234 const SordNode* literal,
|
cannam@226
|
235 const SordNode* type,
|
cannam@226
|
236 const SordNode* restriction)
|
cannam@226
|
237 {
|
cannam@226
|
238 size_t len = 0;
|
cannam@226
|
239 const char* str = (const char*)sord_node_get_string_counted(literal, &len);
|
cannam@226
|
240
|
cannam@226
|
241 // Check xsd:pattern
|
cannam@226
|
242 SordIter* p = sord_search(model, restriction, uris->xsd_pattern, 0, 0);
|
cannam@226
|
243 if (p) {
|
cannam@226
|
244 const SordNode* pat = sord_iter_get_node(p, SORD_OBJECT);
|
cannam@226
|
245 if (!regexp_match(sord_node_get_string(pat), str)) {
|
cannam@226
|
246 fprintf(stderr, "`%s' does not match <%s> pattern `%s'\n",
|
cannam@226
|
247 sord_node_get_string(literal),
|
cannam@226
|
248 sord_node_get_string(type),
|
cannam@226
|
249 sord_node_get_string(pat));
|
cannam@226
|
250 sord_iter_free(p);
|
cannam@226
|
251 return false;
|
cannam@226
|
252 }
|
cannam@226
|
253 sord_iter_free(p);
|
cannam@226
|
254 ++n_restrictions;
|
cannam@226
|
255 }
|
cannam@226
|
256
|
cannam@226
|
257 // Check xsd:minInclusive
|
cannam@226
|
258 SordIter* l = sord_search(model, restriction, uris->xsd_minInclusive, 0, 0);
|
cannam@226
|
259 if (l) {
|
cannam@226
|
260 const SordNode* lower = sord_iter_get_node(l, SORD_OBJECT);
|
cannam@226
|
261 if (bound_cmp(model, uris, literal, type, lower) < 0) {
|
cannam@226
|
262 fprintf(stderr, "`%s' is not >= <%s> minimum `%s'\n",
|
cannam@226
|
263 sord_node_get_string(literal),
|
cannam@226
|
264 sord_node_get_string(type),
|
cannam@226
|
265 sord_node_get_string(lower));
|
cannam@226
|
266 sord_iter_free(l);
|
cannam@226
|
267 return false;
|
cannam@226
|
268 }
|
cannam@226
|
269 sord_iter_free(l);
|
cannam@226
|
270 ++n_restrictions;
|
cannam@226
|
271 }
|
cannam@226
|
272
|
cannam@226
|
273 // Check xsd:maxInclusive
|
cannam@226
|
274 SordIter* u = sord_search(model, restriction, uris->xsd_maxInclusive, 0, 0);
|
cannam@226
|
275 if (u) {
|
cannam@226
|
276 const SordNode* upper = sord_iter_get_node(u, SORD_OBJECT);
|
cannam@226
|
277 if (bound_cmp(model, uris, literal, type, upper) > 0) {
|
cannam@226
|
278 fprintf(stderr, "`%s' is not <= <%s> maximum `%s'\n",
|
cannam@226
|
279 sord_node_get_string(literal),
|
cannam@226
|
280 sord_node_get_string(type),
|
cannam@226
|
281 sord_node_get_string(upper));
|
cannam@226
|
282 sord_iter_free(u);
|
cannam@226
|
283 return false;
|
cannam@226
|
284 }
|
cannam@226
|
285 sord_iter_free(u);
|
cannam@226
|
286 ++n_restrictions;
|
cannam@226
|
287 }
|
cannam@226
|
288
|
cannam@226
|
289 return true; // Unknown restriction, be quietly tolerant
|
cannam@226
|
290 }
|
cannam@226
|
291
|
cannam@226
|
292 static bool
|
cannam@226
|
293 literal_is_valid(SordModel* model,
|
cannam@226
|
294 const URIs* uris,
|
cannam@226
|
295 const SordQuad quad,
|
cannam@226
|
296 const SordNode* literal,
|
cannam@226
|
297 const SordNode* type)
|
cannam@226
|
298 {
|
cannam@226
|
299 if (!type) {
|
cannam@226
|
300 return true;
|
cannam@226
|
301 }
|
cannam@226
|
302
|
cannam@226
|
303 /* Check that literal data is related to required type. We don't do a
|
cannam@226
|
304 strict subtype check here because e.g. an xsd:decimal might be a valid
|
cannam@226
|
305 xsd:unsignedInt, which the pattern checks will verify, but if the
|
cannam@226
|
306 literal type is not related to the required type at all
|
cannam@226
|
307 (e.g. xsd:decimal and xsd:string) there is a problem. */
|
cannam@226
|
308 const SordNode* datatype = sord_node_get_datatype(literal);
|
cannam@226
|
309 if (datatype && datatype != type) {
|
cannam@226
|
310 if (!is_descendant_of(
|
cannam@226
|
311 model, uris,
|
cannam@226
|
312 datatype, type, uris->owl_onDatatype) &&
|
cannam@226
|
313 !is_descendant_of(
|
cannam@226
|
314 model, uris,
|
cannam@226
|
315 type, datatype, uris->owl_onDatatype) &&
|
cannam@226
|
316 !(sord_node_equals(datatype, uris->xsd_decimal) &&
|
cannam@226
|
317 is_descendant_of(
|
cannam@226
|
318 model, uris,
|
cannam@226
|
319 type, uris->xsd_double, uris->owl_onDatatype))) {
|
cannam@226
|
320 errorf(quad,
|
cannam@226
|
321 "Literal `%s' datatype <%s> is not compatible with <%s>\n",
|
cannam@226
|
322 sord_node_get_string(literal),
|
cannam@226
|
323 sord_node_get_string(datatype),
|
cannam@226
|
324 sord_node_get_string(type));
|
cannam@226
|
325 return false;
|
cannam@226
|
326 }
|
cannam@226
|
327 }
|
cannam@226
|
328
|
cannam@226
|
329 // Find restrictions list
|
cannam@226
|
330 SordIter* rs = sord_search(model, type, uris->owl_withRestrictions, 0, 0);
|
cannam@226
|
331 if (sord_iter_end(rs)) {
|
cannam@226
|
332 return true; // No restrictions
|
cannam@226
|
333 }
|
cannam@226
|
334
|
cannam@226
|
335 // Walk list, checking each restriction
|
cannam@226
|
336 const SordNode* head = sord_iter_get_node(rs, SORD_OBJECT);
|
cannam@226
|
337 while (head) {
|
cannam@226
|
338 SordIter* f = sord_search(model, head, uris->rdf_first, 0, 0);
|
cannam@226
|
339 if (!f) {
|
cannam@226
|
340 break; // Reached end of restrictions list without failure
|
cannam@226
|
341 }
|
cannam@226
|
342
|
cannam@226
|
343 // Check this restriction
|
cannam@226
|
344 const bool good = check_restriction(
|
cannam@226
|
345 model, uris, literal, type, sord_iter_get_node(f, SORD_OBJECT));
|
cannam@226
|
346 sord_iter_free(f);
|
cannam@226
|
347
|
cannam@226
|
348 if (!good) {
|
cannam@226
|
349 sord_iter_free(rs);
|
cannam@226
|
350 return false; // Failed, literal is invalid
|
cannam@226
|
351 }
|
cannam@226
|
352
|
cannam@226
|
353 // Seek to next list node
|
cannam@226
|
354 SordIter* n = sord_search(model, head, uris->rdf_rest, 0, 0);
|
cannam@226
|
355 head = n ? sord_iter_get_node(n, SORD_OBJECT) : NULL;
|
cannam@226
|
356 sord_iter_free(n);
|
cannam@226
|
357 }
|
cannam@226
|
358
|
cannam@226
|
359 sord_iter_free(rs);
|
cannam@226
|
360
|
cannam@226
|
361 SordIter* s = sord_search(model, type, uris->owl_onDatatype, 0, 0);
|
cannam@226
|
362 if (s) {
|
cannam@226
|
363 const SordNode* super = sord_iter_get_node(s, SORD_OBJECT);
|
cannam@226
|
364 const bool good = literal_is_valid(
|
cannam@226
|
365 model, uris, quad, literal, super);
|
cannam@226
|
366 sord_iter_free(s);
|
cannam@226
|
367 return good; // Match iff literal also matches supertype
|
cannam@226
|
368 }
|
cannam@226
|
369
|
cannam@226
|
370 return true; // Matches top level type
|
cannam@226
|
371 }
|
cannam@226
|
372
|
cannam@226
|
373 static bool
|
cannam@226
|
374 check_type(SordModel* model,
|
cannam@226
|
375 const URIs* uris,
|
cannam@226
|
376 const SordQuad quad,
|
cannam@226
|
377 const SordNode* node,
|
cannam@226
|
378 const SordNode* type)
|
cannam@226
|
379 {
|
cannam@226
|
380 if (sord_node_equals(type, uris->rdfs_Resource) ||
|
cannam@226
|
381 sord_node_equals(type, uris->owl_Thing)) {
|
cannam@226
|
382 return true;
|
cannam@226
|
383 }
|
cannam@226
|
384
|
cannam@226
|
385 if (sord_node_get_type(node) == SORD_LITERAL) {
|
cannam@226
|
386 if (sord_node_equals(type, uris->rdfs_Literal)) {
|
cannam@226
|
387 return true;
|
cannam@226
|
388 } else if (sord_node_equals(type, uris->rdf_PlainLiteral)) {
|
cannam@226
|
389 return !sord_node_get_language(node);
|
cannam@226
|
390 } else {
|
cannam@226
|
391 return literal_is_valid(model, uris, quad, node, type);
|
cannam@226
|
392 }
|
cannam@226
|
393 } else if (sord_node_get_type(node) == SORD_URI) {
|
cannam@226
|
394 if (sord_node_equals(type, uris->foaf_Document)) {
|
cannam@226
|
395 return true; // Questionable...
|
cannam@226
|
396 } else if (is_descendant_of(
|
cannam@226
|
397 model, uris,
|
cannam@226
|
398 type, uris->xsd_anyURI, uris->owl_onDatatype)) {
|
cannam@226
|
399 /* Type is any URI and this is a URI, so pass. Restrictions on
|
cannam@226
|
400 anyURI subtypes are not currently checked (very uncommon). */
|
cannam@226
|
401 return true; // Type is anyURI, and this is a URI
|
cannam@226
|
402 } else {
|
cannam@226
|
403 SordIter* t = sord_search(model, node, uris->rdf_type, NULL, NULL);
|
cannam@226
|
404 for (; !sord_iter_end(t); sord_iter_next(t)) {
|
cannam@226
|
405 if (is_descendant_of(model, uris,
|
cannam@226
|
406 sord_iter_get_node(t, SORD_OBJECT),
|
cannam@226
|
407 type,
|
cannam@226
|
408 uris->rdfs_subClassOf)) {
|
cannam@226
|
409 sord_iter_free(t);
|
cannam@226
|
410 return true;
|
cannam@226
|
411 }
|
cannam@226
|
412 }
|
cannam@226
|
413 sord_iter_free(t);
|
cannam@226
|
414 return false;
|
cannam@226
|
415 }
|
cannam@226
|
416 } else {
|
cannam@226
|
417 return true; // Blanks often lack explicit types, ignore
|
cannam@226
|
418 }
|
cannam@226
|
419
|
cannam@226
|
420 return false;
|
cannam@226
|
421 }
|
cannam@226
|
422
|
cannam@226
|
423 static uint64_t
|
cannam@226
|
424 count_non_blanks(SordIter* i, SordQuadIndex field)
|
cannam@226
|
425 {
|
cannam@226
|
426 uint64_t n = 0;
|
cannam@226
|
427 for (; !sord_iter_end(i); sord_iter_next(i)) {
|
cannam@226
|
428 const SordNode* node = sord_iter_get_node(i, field);
|
cannam@226
|
429 if (sord_node_get_type(node) != SORD_BLANK) {
|
cannam@226
|
430 ++n;
|
cannam@226
|
431 }
|
cannam@226
|
432 }
|
cannam@226
|
433 return n;
|
cannam@226
|
434 }
|
cannam@226
|
435
|
cannam@226
|
436 static int
|
cannam@226
|
437 check_properties(SordModel* model, URIs* uris)
|
cannam@226
|
438 {
|
cannam@226
|
439 int st = 0;
|
cannam@226
|
440 SordIter* i = sord_begin(model);
|
cannam@226
|
441 for (; !sord_iter_end(i); sord_iter_next(i)) {
|
cannam@226
|
442 SordQuad quad;
|
cannam@226
|
443 sord_iter_get(i, quad);
|
cannam@226
|
444
|
cannam@226
|
445 const SordNode* subj = quad[SORD_SUBJECT];
|
cannam@226
|
446 const SordNode* pred = quad[SORD_PREDICATE];
|
cannam@226
|
447 const SordNode* obj = quad[SORD_OBJECT];
|
cannam@226
|
448
|
cannam@226
|
449 bool is_any_property = false;
|
cannam@226
|
450 SordIter* t = sord_search(model, pred, uris->rdf_type, NULL, NULL);
|
cannam@226
|
451 for (; !sord_iter_end(t); sord_iter_next(t)) {
|
cannam@226
|
452 if (is_descendant_of(model, uris,
|
cannam@226
|
453 sord_iter_get_node(t, SORD_OBJECT),
|
cannam@226
|
454 uris->rdf_Property,
|
cannam@226
|
455 uris->rdfs_subClassOf)) {
|
cannam@226
|
456 is_any_property = true;
|
cannam@226
|
457 break;
|
cannam@226
|
458 }
|
cannam@226
|
459 }
|
cannam@226
|
460 sord_iter_free(t);
|
cannam@226
|
461
|
cannam@226
|
462 const bool is_ObjectProperty = sord_ask(
|
cannam@226
|
463 model, pred, uris->rdf_type, uris->owl_ObjectProperty, 0);
|
cannam@226
|
464 const bool is_FunctionalProperty = sord_ask(
|
cannam@226
|
465 model, pred, uris->rdf_type, uris->owl_FunctionalProperty, 0);
|
cannam@226
|
466 const bool is_InverseFunctionalProperty = sord_ask(
|
cannam@226
|
467 model, pred, uris->rdf_type, uris->owl_InverseFunctionalProperty, 0);
|
cannam@226
|
468 const bool is_DatatypeProperty = sord_ask(
|
cannam@226
|
469 model, pred, uris->rdf_type, uris->owl_DatatypeProperty, 0);
|
cannam@226
|
470
|
cannam@226
|
471 if (!is_any_property) {
|
cannam@226
|
472 st = errorf(quad, "Use of undefined property");
|
cannam@226
|
473 }
|
cannam@226
|
474
|
cannam@226
|
475 if (!sord_ask(model, pred, uris->rdfs_label, NULL, NULL)) {
|
cannam@226
|
476 st = errorf(quad, "Property <%s> has no label",
|
cannam@226
|
477 sord_node_get_string(pred));
|
cannam@226
|
478 }
|
cannam@226
|
479
|
cannam@226
|
480 if (is_DatatypeProperty &&
|
cannam@226
|
481 sord_node_get_type(obj) != SORD_LITERAL) {
|
cannam@226
|
482 st = errorf(quad, "Datatype property with non-literal value");
|
cannam@226
|
483 }
|
cannam@226
|
484
|
cannam@226
|
485 if (is_ObjectProperty &&
|
cannam@226
|
486 sord_node_get_type(obj) == SORD_LITERAL) {
|
cannam@226
|
487 st = errorf(quad, "Object property with literal value");
|
cannam@226
|
488 }
|
cannam@226
|
489
|
cannam@226
|
490 if (is_FunctionalProperty) {
|
cannam@226
|
491 SordIter* o = sord_search(model, subj, pred, NULL, NULL);
|
cannam@226
|
492 const uint64_t n = count_non_blanks(o, SORD_OBJECT);
|
cannam@226
|
493 if (n > 1) {
|
cannam@226
|
494 st = errorf(quad, "Functional property with %u objects", n);
|
cannam@226
|
495 }
|
cannam@226
|
496 sord_iter_free(o);
|
cannam@226
|
497 }
|
cannam@226
|
498
|
cannam@226
|
499 if (is_InverseFunctionalProperty) {
|
cannam@226
|
500 SordIter* s = sord_search(model, NULL, pred, obj, NULL);
|
cannam@226
|
501 const unsigned n = count_non_blanks(s, SORD_SUBJECT);
|
cannam@226
|
502 if (n > 1) {
|
cannam@226
|
503 st = errorf(
|
cannam@226
|
504 quad, "Inverse functional property with %u subjects", n);
|
cannam@226
|
505 }
|
cannam@226
|
506 sord_iter_free(s);
|
cannam@226
|
507 }
|
cannam@226
|
508
|
cannam@226
|
509 if (sord_node_equals(pred, uris->rdf_type) &&
|
cannam@226
|
510 !sord_ask(model, obj, uris->rdf_type, uris->rdfs_Class, NULL) &&
|
cannam@226
|
511 !sord_ask(model, obj, uris->rdf_type, uris->owl_Class, NULL)) {
|
cannam@226
|
512 st = errorf(quad, "Type is not a rdfs:Class or owl:Class");
|
cannam@226
|
513 }
|
cannam@226
|
514
|
cannam@226
|
515 if (sord_node_get_type(obj) == SORD_LITERAL &&
|
cannam@226
|
516 !literal_is_valid(model, uris, quad,
|
cannam@226
|
517 obj, sord_node_get_datatype(obj))) {
|
cannam@226
|
518 st = errorf(quad, "Literal does not match datatype");
|
cannam@226
|
519 }
|
cannam@226
|
520
|
cannam@226
|
521 SordIter* r = sord_search(model, pred, uris->rdfs_range, NULL, NULL);
|
cannam@226
|
522 for (; !sord_iter_end(r); sord_iter_next(r)) {
|
cannam@226
|
523 const SordNode* range = sord_iter_get_node(r, SORD_OBJECT);
|
cannam@226
|
524 if (!check_type(model, uris, quad, obj, range)) {
|
cannam@226
|
525 st = errorf(quad, "Object not in range <%s>\n",
|
cannam@226
|
526 sord_node_get_string(range));
|
cannam@226
|
527 }
|
cannam@226
|
528 }
|
cannam@226
|
529 sord_iter_free(r);
|
cannam@226
|
530
|
cannam@226
|
531 SordIter* d = sord_search(model, pred, uris->rdfs_domain, NULL, NULL);
|
cannam@226
|
532 if (d) {
|
cannam@226
|
533 const SordNode* domain = sord_iter_get_node(d, SORD_OBJECT);
|
cannam@226
|
534 if (!check_type(model, uris, quad, subj, domain)) {
|
cannam@226
|
535 st = errorf(quad, "Subject not in domain <%s>",
|
cannam@226
|
536 sord_node_get_string(domain));
|
cannam@226
|
537 }
|
cannam@226
|
538 sord_iter_free(d);
|
cannam@226
|
539 }
|
cannam@226
|
540 }
|
cannam@226
|
541 sord_iter_free(i);
|
cannam@226
|
542
|
cannam@226
|
543 return st;
|
cannam@226
|
544 }
|
cannam@226
|
545
|
cannam@226
|
546 static int
|
cannam@226
|
547 check_instance(SordModel* model,
|
cannam@226
|
548 const URIs* uris,
|
cannam@226
|
549 const SordNode* restriction,
|
cannam@226
|
550 const SordQuad quad)
|
cannam@226
|
551 {
|
cannam@226
|
552 const SordNode* instance = quad[SORD_SUBJECT];
|
cannam@226
|
553 int st = 0;
|
cannam@226
|
554
|
cannam@226
|
555 const SordNode* prop = sord_get(
|
cannam@226
|
556 model, restriction, uris->owl_onProperty, NULL, NULL);
|
cannam@226
|
557 if (!prop) {
|
cannam@226
|
558 return 0;
|
cannam@226
|
559 }
|
cannam@226
|
560
|
cannam@226
|
561 const unsigned values = sord_count(model, instance, prop, NULL, NULL);
|
cannam@226
|
562
|
cannam@226
|
563 // Check exact cardinality
|
cannam@226
|
564 const SordNode* card = sord_get(
|
cannam@226
|
565 model, restriction, uris->owl_cardinality, NULL, NULL);
|
cannam@226
|
566 if (card) {
|
cannam@226
|
567 const unsigned c = atoi((const char*)sord_node_get_string(card));
|
cannam@226
|
568 if (values != c) {
|
cannam@226
|
569 st = errorf(quad, "Property %s on %s has %u != %u values",
|
cannam@226
|
570 sord_node_get_string(prop),
|
cannam@226
|
571 sord_node_get_string(instance),
|
cannam@226
|
572 values, c);
|
cannam@226
|
573 }
|
cannam@226
|
574 }
|
cannam@226
|
575
|
cannam@226
|
576 // Check minimum cardinality
|
cannam@226
|
577 const SordNode* minCard = sord_get(
|
cannam@226
|
578 model, restriction, uris->owl_minCardinality, NULL, NULL);
|
cannam@226
|
579 if (minCard) {
|
cannam@226
|
580 const unsigned m = atoi((const char*)sord_node_get_string(minCard));
|
cannam@226
|
581 if (values < m) {
|
cannam@226
|
582 st = errorf(quad, "Property %s on %s has %u < %u values",
|
cannam@226
|
583 sord_node_get_string(prop),
|
cannam@226
|
584 sord_node_get_string(instance),
|
cannam@226
|
585 values, m);
|
cannam@226
|
586 }
|
cannam@226
|
587 }
|
cannam@226
|
588
|
cannam@226
|
589 // Check maximum cardinality
|
cannam@226
|
590 const SordNode* maxCard = sord_get(
|
cannam@226
|
591 model, restriction, uris->owl_maxCardinality, NULL, NULL);
|
cannam@226
|
592 if (maxCard) {
|
cannam@226
|
593 const unsigned m = atoi((const char*)sord_node_get_string(maxCard));
|
cannam@226
|
594 if (values < m) {
|
cannam@226
|
595 st = errorf(quad, "Property %s on %s has %u > %u values",
|
cannam@226
|
596 sord_node_get_string(prop),
|
cannam@226
|
597 sord_node_get_string(instance),
|
cannam@226
|
598 values, m);
|
cannam@226
|
599 }
|
cannam@226
|
600 }
|
cannam@226
|
601
|
cannam@226
|
602 // Check someValuesFrom
|
cannam@226
|
603 SordIter* sf = sord_search(
|
cannam@226
|
604 model, restriction, uris->owl_someValuesFrom, NULL, NULL);
|
cannam@226
|
605 if (sf) {
|
cannam@226
|
606 const SordNode* type = sord_iter_get_node(sf, SORD_OBJECT);
|
cannam@226
|
607
|
cannam@226
|
608 SordIter* v = sord_search(model, instance, prop, NULL, NULL);
|
cannam@226
|
609 bool found = false;
|
cannam@226
|
610 for (; !sord_iter_end(v); sord_iter_next(v)) {
|
cannam@226
|
611 const SordNode* value = sord_iter_get_node(v, SORD_OBJECT);
|
cannam@226
|
612 if (check_type(model, uris, quad, value, type)) {
|
cannam@226
|
613 found = true;
|
cannam@226
|
614 break;
|
cannam@226
|
615 }
|
cannam@226
|
616 }
|
cannam@226
|
617 if (!found) {
|
cannam@226
|
618 st = errorf(quad, "%s has no <%s> values of type <%s>\n",
|
cannam@226
|
619 sord_node_get_string(instance),
|
cannam@226
|
620 sord_node_get_string(prop),
|
cannam@226
|
621 sord_node_get_string(type));
|
cannam@226
|
622 }
|
cannam@226
|
623 sord_iter_free(v);
|
cannam@226
|
624 }
|
cannam@226
|
625 sord_iter_free(sf);
|
cannam@226
|
626
|
cannam@226
|
627 return st;
|
cannam@226
|
628 }
|
cannam@226
|
629
|
cannam@226
|
630 static int
|
cannam@226
|
631 check_class_instances(SordModel* model,
|
cannam@226
|
632 const URIs* uris,
|
cannam@226
|
633 const SordNode* restriction,
|
cannam@226
|
634 const SordNode* klass)
|
cannam@226
|
635 {
|
cannam@226
|
636 // Check immediate instances of this class
|
cannam@226
|
637 SordIter* i = sord_search(model, NULL, uris->rdf_type, klass, NULL);
|
cannam@226
|
638 for (; !sord_iter_end(i); sord_iter_next(i)) {
|
cannam@226
|
639 SordQuad quad;
|
cannam@226
|
640 sord_iter_get(i, quad);
|
cannam@226
|
641 check_instance(model, uris, restriction, quad);
|
cannam@226
|
642 }
|
cannam@226
|
643 sord_iter_free(i);
|
cannam@226
|
644
|
cannam@226
|
645 // Check instances of all subclasses recursively
|
cannam@226
|
646 SordIter* s = sord_search(model, NULL, uris->rdfs_subClassOf, klass, NULL);
|
cannam@226
|
647 for (; !sord_iter_end(s); sord_iter_next(s)) {
|
cannam@226
|
648 const SordNode* subklass = sord_iter_get_node(s, SORD_SUBJECT);
|
cannam@226
|
649 check_class_instances(model, uris, restriction, subklass);
|
cannam@226
|
650 }
|
cannam@226
|
651 sord_iter_free(s);
|
cannam@226
|
652
|
cannam@226
|
653 return 0;
|
cannam@226
|
654 }
|
cannam@226
|
655
|
cannam@226
|
656 static int
|
cannam@226
|
657 check_instances(SordModel* model, const URIs* uris)
|
cannam@226
|
658 {
|
cannam@226
|
659 int st = 0;
|
cannam@226
|
660 SordIter* r = sord_search(
|
cannam@226
|
661 model, NULL, uris->rdf_type, uris->owl_Restriction, NULL);
|
cannam@226
|
662 for (; !sord_iter_end(r); sord_iter_next(r)) {
|
cannam@226
|
663 const SordNode* restriction = sord_iter_get_node(r, SORD_SUBJECT);
|
cannam@226
|
664 const SordNode* prop = sord_get(
|
cannam@226
|
665 model, restriction, uris->owl_onProperty, NULL, NULL);
|
cannam@226
|
666 if (!prop) {
|
cannam@226
|
667 continue;
|
cannam@226
|
668 }
|
cannam@226
|
669
|
cannam@226
|
670 SordIter* c = sord_search(
|
cannam@226
|
671 model, NULL, uris->rdfs_subClassOf, restriction, NULL);
|
cannam@226
|
672 for (; !sord_iter_end(c); sord_iter_next(c)) {
|
cannam@226
|
673 const SordNode* klass = sord_iter_get_node(c, SORD_SUBJECT);
|
cannam@226
|
674 check_class_instances(model, uris, restriction, klass);
|
cannam@226
|
675 }
|
cannam@226
|
676 sord_iter_free(c);
|
cannam@226
|
677 }
|
cannam@226
|
678 sord_iter_free(r);
|
cannam@226
|
679
|
cannam@226
|
680 return st;
|
cannam@226
|
681 }
|
cannam@226
|
682
|
cannam@226
|
683 int
|
cannam@226
|
684 main(int argc, char** argv)
|
cannam@226
|
685 {
|
cannam@226
|
686 if (argc < 2) {
|
cannam@226
|
687 return print_usage(argv[0], true);
|
cannam@226
|
688 }
|
cannam@226
|
689
|
cannam@226
|
690 int a = 1;
|
cannam@226
|
691 for (; a < argc && argv[a][0] == '-'; ++a) {
|
cannam@226
|
692 if (argv[a][1] == 'l') {
|
cannam@226
|
693 one_line_errors = true;
|
cannam@226
|
694 } else if (argv[a][1] == 'v') {
|
cannam@226
|
695 return print_version();
|
cannam@226
|
696 } else {
|
cannam@226
|
697 fprintf(stderr, "%s: Unknown option `%s'\n", argv[0], argv[a]);
|
cannam@226
|
698 return print_usage(argv[0], true);
|
cannam@226
|
699 }
|
cannam@226
|
700 }
|
cannam@226
|
701
|
cannam@226
|
702 SordWorld* world = sord_world_new();
|
cannam@226
|
703 SordModel* model = sord_new(world, SORD_SPO|SORD_OPS, false);
|
cannam@226
|
704 SerdEnv* env = serd_env_new(&SERD_NODE_NULL);
|
cannam@226
|
705 SerdReader* reader = sord_new_reader(model, env, SERD_TURTLE, NULL);
|
cannam@226
|
706
|
cannam@226
|
707 for (; a < argc; ++a) {
|
cannam@226
|
708 const uint8_t* input = (const uint8_t*)argv[a];
|
cannam@226
|
709 uint8_t* in_path = absolute_path(serd_uri_to_path(input));
|
cannam@226
|
710
|
cannam@226
|
711 if (!in_path) {
|
cannam@226
|
712 fprintf(stderr, "Skipping file %s\n", input);
|
cannam@226
|
713 continue;
|
cannam@226
|
714 }
|
cannam@226
|
715
|
cannam@226
|
716 SerdURI base_uri;
|
cannam@226
|
717 SerdNode base_uri_node = serd_node_new_file_uri(
|
cannam@226
|
718 in_path, NULL, &base_uri, true);
|
cannam@226
|
719
|
cannam@226
|
720 serd_env_set_base_uri(env, &base_uri_node);
|
cannam@226
|
721 const SerdStatus st = serd_reader_read_file(reader, in_path);
|
cannam@226
|
722 if (st) {
|
cannam@226
|
723 fprintf(stderr, "error reading %s: %s\n",
|
cannam@226
|
724 in_path, serd_strerror(st));
|
cannam@226
|
725 }
|
cannam@226
|
726
|
cannam@226
|
727 serd_node_free(&base_uri_node);
|
cannam@226
|
728 free(in_path);
|
cannam@226
|
729 }
|
cannam@226
|
730 serd_reader_free(reader);
|
cannam@226
|
731 serd_env_free(env);
|
cannam@226
|
732
|
cannam@226
|
733 #define URI(prefix, suffix) \
|
cannam@226
|
734 uris.prefix##_##suffix = sord_new_uri(world, NS_##prefix #suffix)
|
cannam@226
|
735
|
cannam@226
|
736 URIs uris;
|
cannam@226
|
737 URI(foaf, Document);
|
cannam@226
|
738 URI(owl, AnnotationProperty);
|
cannam@226
|
739 URI(owl, Class);
|
cannam@226
|
740 URI(owl, DatatypeProperty);
|
cannam@226
|
741 URI(owl, FunctionalProperty);
|
cannam@226
|
742 URI(owl, InverseFunctionalProperty);
|
cannam@226
|
743 URI(owl, ObjectProperty);
|
cannam@226
|
744 URI(owl, OntologyProperty);
|
cannam@226
|
745 URI(owl, Restriction);
|
cannam@226
|
746 URI(owl, Thing);
|
cannam@226
|
747 URI(owl, cardinality);
|
cannam@226
|
748 URI(owl, equivalentClass);
|
cannam@226
|
749 URI(owl, maxCardinality);
|
cannam@226
|
750 URI(owl, minCardinality);
|
cannam@226
|
751 URI(owl, onDatatype);
|
cannam@226
|
752 URI(owl, onProperty);
|
cannam@226
|
753 URI(owl, someValuesFrom);
|
cannam@226
|
754 URI(owl, withRestrictions);
|
cannam@226
|
755 URI(rdf, PlainLiteral);
|
cannam@226
|
756 URI(rdf, Property);
|
cannam@226
|
757 URI(rdf, first);
|
cannam@226
|
758 URI(rdf, rest);
|
cannam@226
|
759 URI(rdf, type);
|
cannam@226
|
760 URI(rdfs, Class);
|
cannam@226
|
761 URI(rdfs, Literal);
|
cannam@226
|
762 URI(rdfs, Resource);
|
cannam@226
|
763 URI(rdfs, domain);
|
cannam@226
|
764 URI(rdfs, label);
|
cannam@226
|
765 URI(rdfs, range);
|
cannam@226
|
766 URI(rdfs, subClassOf);
|
cannam@226
|
767 URI(xsd, anyURI);
|
cannam@226
|
768 URI(xsd, decimal);
|
cannam@226
|
769 URI(xsd, double);
|
cannam@226
|
770 URI(xsd, maxInclusive);
|
cannam@226
|
771 URI(xsd, minInclusive);
|
cannam@226
|
772 URI(xsd, pattern);
|
cannam@226
|
773 URI(xsd, string);
|
cannam@226
|
774
|
cannam@226
|
775 #ifndef HAVE_PCRE
|
cannam@226
|
776 fprintf(stderr, "warning: Built without PCRE, datatypes not checked.\n");
|
cannam@226
|
777 #endif
|
cannam@226
|
778
|
cannam@226
|
779 const int prop_st = check_properties(model, &uris);
|
cannam@226
|
780 const int inst_st = check_instances(model, &uris);
|
cannam@226
|
781
|
cannam@226
|
782 printf("Found %d errors among %d files (checked %d restrictions)\n",
|
cannam@226
|
783 n_errors, argc - 1, n_restrictions);
|
cannam@226
|
784
|
cannam@226
|
785 sord_free(model);
|
cannam@226
|
786 sord_world_free(world);
|
cannam@226
|
787 return prop_st || inst_st;
|
cannam@226
|
788 }
|