comparison ext/sord/src/sord.c @ 226:c5cdc9e6a4bf

Add these external library files
author Chris Cannam <cannam@all-day-breakfast.com>
date Fri, 09 Jun 2017 16:41:31 +0100
parents
children 234f89708d75
comparison
equal deleted inserted replaced
225:025b3e2f7c17 226:c5cdc9e6a4bf
1 /*
2 Copyright 2011-2016 David Robillard <http://drobilla.net>
3
4 Permission to use, copy, modify, and/or distribute this software for any
5 purpose with or without fee is hereby granted, provided that the above
6 copyright notice and this permission notice appear in all copies.
7
8 THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
16
17 // C99
18 #include <assert.h>
19 #include <errno.h>
20 #include <stdint.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24
25 #define ZIX_INLINE
26 #include "zix/digest.c"
27 #include "zix/hash.c"
28 #include "zix/btree.c"
29
30 #include "sord_config.h"
31 #include "sord_internal.h"
32
33 #define SORD_LOG(prefix, ...) fprintf(stderr, "[Sord::" prefix "] " __VA_ARGS__)
34
35 #ifdef SORD_DEBUG_ITER
36 # define SORD_ITER_LOG(...) SORD_LOG("iter", __VA_ARGS__)
37 #else
38 # define SORD_ITER_LOG(...)
39 #endif
40 #ifdef SORD_DEBUG_SEARCH
41 # define SORD_FIND_LOG(...) SORD_LOG("search", __VA_ARGS__)
42 #else
43 # define SORD_FIND_LOG(...)
44 #endif
45 #ifdef SORD_DEBUG_WRITE
46 # define SORD_WRITE_LOG(...) SORD_LOG("write", __VA_ARGS__)
47 #else
48 # define SORD_WRITE_LOG(...)
49 #endif
50
51 #define NUM_ORDERS 12
52 #define STATEMENT_LEN 3
53 #define TUP_LEN STATEMENT_LEN + 1
54 #define DEFAULT_ORDER SPO
55 #define DEFAULT_GRAPH_ORDER GSPO
56
57 #define TUP_FMT "(%s %s %s %s)"
58 #define TUP_FMT_ELEM(e) ((e) ? sord_node_get_string(e) : (const uint8_t*)"*")
59 #define TUP_FMT_ARGS(t) \
60 TUP_FMT_ELEM((t)[0]), \
61 TUP_FMT_ELEM((t)[1]), \
62 TUP_FMT_ELEM((t)[2]), \
63 TUP_FMT_ELEM((t)[3])
64
65 #define TUP_S 0
66 #define TUP_P 1
67 #define TUP_O 2
68 #define TUP_G 3
69
70 /** Triple ordering */
71 typedef enum {
72 SPO, ///< Subject, Predicate, Object
73 SOP, ///< Subject, Object, Predicate
74 OPS, ///< Object, Predicate, Subject
75 OSP, ///< Object, Subject, Predicate
76 PSO, ///< Predicate, Subject, Object
77 POS, ///< Predicate, Object, Subject
78 GSPO, ///< Graph, Subject, Predicate, Object
79 GSOP, ///< Graph, Subject, Object, Predicate
80 GOPS, ///< Graph, Object, Predicate, Subject
81 GOSP, ///< Graph, Object, Subject, Predicate
82 GPSO, ///< Graph, Predicate, Subject, Object
83 GPOS ///< Graph, Predicate, Object, Subject
84 } SordOrder;
85
86 #ifdef SORD_DEBUG_SEARCH
87 /** String name of each ordering (array indexed by SordOrder) */
88 static const char* const order_names[NUM_ORDERS] = {
89 "spo", "sop", "ops", "osp", "pso", "pos",
90 "gspo", "gsop", "gops", "gosp", "gpso", "gpos"
91 };
92 #endif
93
94 /**
95 Quads of indices for each order, from most to least significant
96 (array indexed by SordOrder)
97 */
98 static const int orderings[NUM_ORDERS][TUP_LEN] = {
99 { 0, 1, 2, 3 }, { 0, 2, 1, 3 }, // SPO, SOP
100 { 2, 1, 0, 3 }, { 2, 0, 1, 3 }, // OPS, OSP
101 { 1, 0, 2, 3 }, { 1, 2, 0, 3 }, // PSO, POS
102 { 3, 0, 1, 2 }, { 3, 0, 2, 1 }, // GSPO, GSOP
103 { 3, 2, 1, 0 }, { 3, 2, 0, 1 }, // GOPS, GOSP
104 { 3, 1, 0, 2 }, { 3, 1, 2, 0 } // GPSO, GPOS
105 };
106
107 /** World */
108 struct SordWorldImpl {
109 ZixHash* nodes;
110 SerdErrorSink error_sink;
111 void* error_handle;
112 };
113
114 /** Store */
115 struct SordModelImpl {
116 SordWorld* world;
117
118 /** Index for each possible triple ordering (may or may not exist).
119 * Each index is a tree of SordQuad with the appropriate ordering.
120 */
121 ZixBTree* indices[NUM_ORDERS];
122
123 size_t n_quads;
124 size_t n_iters;
125 };
126
127 /** Mode for searching or iteration */
128 typedef enum {
129 ALL, ///< Iterate over entire store
130 SINGLE, ///< Iteration over a single element (exact search)
131 RANGE, ///< Iterate over range with equal prefix
132 FILTER_RANGE, ///< Iterate over range with equal prefix, filtering
133 FILTER_ALL ///< Iterate to end of store, filtering
134 } SearchMode;
135
136 /** Iterator over some range of a store */
137 struct SordIterImpl {
138 const SordModel* sord; ///< Model being iterated over
139 ZixBTreeIter* cur; ///< Current DB cursor
140 SordQuad pat; ///< Pattern (in ordering order)
141 SordOrder order; ///< Store order (which index)
142 SearchMode mode; ///< Iteration mode
143 int n_prefix; ///< Prefix for RANGE and FILTER_RANGE
144 bool end; ///< True iff reached end
145 bool skip_graphs; ///< Iteration should ignore graphs
146 };
147
148 static uint32_t
149 sord_node_hash(const void* n)
150 {
151 const SordNode* node = (const SordNode*)n;
152 uint32_t hash = zix_digest_start();
153 hash = zix_digest_add(hash, node->node.buf, node->node.n_bytes);
154 hash = zix_digest_add(hash, &node->node.type, sizeof(node->node.type));
155 if (node->node.type == SERD_LITERAL) {
156 hash = zix_digest_add(hash, &node->meta.lit, sizeof(node->meta.lit));
157 }
158 return hash;
159 }
160
161 static bool
162 sord_node_hash_equal(const void* a, const void* b)
163 {
164 const SordNode* a_node = (const SordNode*)a;
165 const SordNode* b_node = (const SordNode*)b;
166 return (a_node == b_node)
167 || ((a_node->node.type == b_node->node.type) &&
168 (a_node->node.type != SERD_LITERAL ||
169 (a_node->meta.lit.datatype == b_node->meta.lit.datatype &&
170 !strncmp(a_node->meta.lit.lang,
171 b_node->meta.lit.lang,
172 sizeof(a_node->meta.lit.lang)))) &&
173 (serd_node_equals(&a_node->node, &b_node->node)));
174 }
175
176 static void
177 error(SordWorld* world, SerdStatus st, const char* fmt, ...)
178 {
179 va_list args;
180 va_start(args, fmt);
181 const SerdError e = { st, NULL, 0, 0, fmt, &args };
182 if (world->error_sink) {
183 world->error_sink(world->error_handle, &e);
184 } else {
185 fprintf(stderr, "error: ");
186 vfprintf(stderr, fmt, args);
187 }
188 va_end(args);
189 }
190
191 SordWorld*
192 sord_world_new(void)
193 {
194 SordWorld* world = (SordWorld*)malloc(sizeof(SordWorld));
195 world->error_sink = NULL;
196 world->error_handle = NULL;
197
198 world->nodes = zix_hash_new(
199 sord_node_hash, sord_node_hash_equal, sizeof(SordNode));
200
201 return world;
202 }
203
204 static void
205 free_node_entry(void* value, void* user_data)
206 {
207 SordNode* node = (SordNode*)value;
208 if (node->node.type == SERD_LITERAL) {
209 sord_node_free((SordWorld*)user_data, node->meta.lit.datatype);
210 }
211 free((uint8_t*)node->node.buf);
212 }
213
214 void
215 sord_world_free(SordWorld* world)
216 {
217 zix_hash_foreach(world->nodes, free_node_entry, world);
218 zix_hash_free(world->nodes);
219 free(world);
220 }
221
222 void
223 sord_world_set_error_sink(SordWorld* world,
224 SerdErrorSink error_sink,
225 void* handle)
226 {
227 world->error_sink = error_sink;
228 world->error_handle = handle;
229 }
230
231 /** Compare nodes, considering NULL a wildcard match. */
232 static inline int
233 sord_node_compare(const SordNode* a, const SordNode* b)
234 {
235 if (a == b || !a || !b) {
236 return 0; // Exact or wildcard match
237 } else if (a->node.type != b->node.type) {
238 return a->node.type - b->node.type;
239 }
240
241 int cmp = 0;
242 switch (a->node.type) {
243 case SERD_URI:
244 case SERD_BLANK:
245 return strcmp((const char*)a->node.buf, (const char*)b->node.buf);
246 case SERD_LITERAL:
247 cmp = strcmp((const char*)sord_node_get_string(a),
248 (const char*)sord_node_get_string(b));
249 if (cmp == 0) {
250 // Note: Can't use sord_node_compare here since it does wildcards
251 if (!a->meta.lit.datatype || !b->meta.lit.datatype) {
252 cmp = a->meta.lit.datatype - b->meta.lit.datatype;
253 } else {
254 cmp = strcmp((const char*)a->meta.lit.datatype->node.buf,
255 (const char*)b->meta.lit.datatype->node.buf);
256 }
257 }
258 if (cmp == 0) {
259 cmp = strcmp(a->meta.lit.lang, b->meta.lit.lang);
260 }
261 default:
262 break;
263 }
264 return cmp;
265 }
266
267 bool
268 sord_node_equals(const SordNode* a, const SordNode* b)
269 {
270 return a == b; // Nodes are interned
271 }
272
273 /** Return true iff IDs are equivalent, or one is a wildcard */
274 static inline bool
275 sord_id_match(const SordNode* a, const SordNode* b)
276 {
277 return !a || !b || (a == b);
278 }
279
280 static inline bool
281 sord_quad_match_inline(const SordQuad x, const SordQuad y)
282 {
283 return sord_id_match(x[0], y[0])
284 && sord_id_match(x[1], y[1])
285 && sord_id_match(x[2], y[2])
286 && sord_id_match(x[3], y[3]);
287 }
288
289 bool
290 sord_quad_match(const SordQuad x, const SordQuad y)
291 {
292 return sord_quad_match_inline(x, y);
293 }
294
295 /**
296 Compare two quad IDs lexicographically.
297 NULL IDs (equal to 0) are treated as wildcards, always less than every
298 other possible ID, except itself.
299 */
300 static int
301 sord_quad_compare(const void* x_ptr, const void* y_ptr, void* user_data)
302 {
303 const int* const ordering = (const int*)user_data;
304 const SordNode*const*const x = (const SordNode*const*)x_ptr;
305 const SordNode*const*const y = (const SordNode*const*)y_ptr;
306
307 for (int i = 0; i < TUP_LEN; ++i) {
308 const int idx = ordering[i];
309 const int cmp = sord_node_compare(x[idx], y[idx]);
310 if (cmp) {
311 return cmp;
312 }
313 }
314
315 return 0;
316 }
317
318 static inline bool
319 sord_iter_forward(SordIter* iter)
320 {
321 if (!iter->skip_graphs) {
322 zix_btree_iter_increment(iter->cur);
323 return zix_btree_iter_is_end(iter->cur);
324 }
325
326 SordNode** key = (SordNode**)zix_btree_get(iter->cur);
327 const SordQuad initial = { key[0], key[1], key[2], key[3] };
328 zix_btree_iter_increment(iter->cur);
329 while (!zix_btree_iter_is_end(iter->cur)) {
330 key = (SordNode**)zix_btree_get(iter->cur);
331 for (int i = 0; i < 3; ++i)
332 if (key[i] != initial[i])
333 return false;
334
335 zix_btree_iter_increment(iter->cur);
336 }
337
338 return true;
339 }
340
341 /**
342 Seek forward as necessary until `iter` points at a match.
343 @return true iff iterator reached end of valid range.
344 */
345 static inline bool
346 sord_iter_seek_match(SordIter* iter)
347 {
348 for (iter->end = true;
349 !zix_btree_iter_is_end(iter->cur);
350 sord_iter_forward(iter)) {
351 const SordNode** const key = (const SordNode**)zix_btree_get(iter->cur);
352 if (sord_quad_match_inline(key, iter->pat))
353 return (iter->end = false);
354 }
355 return true;
356 }
357
358 /**
359 Seek forward as necessary until `iter` points at a match, or the prefix
360 no longer matches iter->pat.
361 @return true iff iterator reached end of valid range.
362 */
363 static inline bool
364 sord_iter_seek_match_range(SordIter* iter)
365 {
366 assert(!iter->end);
367
368 do {
369 const SordNode** key = (const SordNode**)zix_btree_get(iter->cur);
370
371 if (sord_quad_match_inline(key, iter->pat))
372 return false; // Found match
373
374 for (int i = 0; i < iter->n_prefix; ++i) {
375 const int idx = orderings[iter->order][i];
376 if (!sord_id_match(key[idx], iter->pat[idx])) {
377 iter->end = true; // Reached end of valid range
378 return true;
379 }
380 }
381 } while (!sord_iter_forward(iter));
382
383 return (iter->end = true); // Reached end
384 }
385
386 static SordIter*
387 sord_iter_new(const SordModel* sord, ZixBTreeIter* cur, const SordQuad pat,
388 SordOrder order, SearchMode mode, int n_prefix)
389 {
390 SordIter* iter = (SordIter*)malloc(sizeof(SordIter));
391 iter->sord = sord;
392 iter->cur = cur;
393 iter->order = order;
394 iter->mode = mode;
395 iter->n_prefix = n_prefix;
396 iter->end = false;
397 iter->skip_graphs = order < GSPO;
398 for (int i = 0; i < TUP_LEN; ++i) {
399 iter->pat[i] = pat[i];
400 }
401
402 switch (iter->mode) {
403 case ALL:
404 case SINGLE:
405 case RANGE:
406 assert(
407 sord_quad_match_inline((const SordNode**)zix_btree_get(iter->cur),
408 iter->pat));
409 break;
410 case FILTER_RANGE:
411 sord_iter_seek_match_range(iter);
412 break;
413 case FILTER_ALL:
414 sord_iter_seek_match(iter);
415 break;
416 }
417
418 #ifdef SORD_DEBUG_ITER
419 SordQuad value;
420 sord_iter_get(iter, value);
421 SORD_ITER_LOG("New %p pat=" TUP_FMT " cur=" TUP_FMT " end=%d skip=%d\n",
422 (void*)iter, TUP_FMT_ARGS(pat), TUP_FMT_ARGS(value),
423 iter->end, iter->skip_graphs);
424 #endif
425
426 ++((SordModel*)sord)->n_iters;
427 return iter;
428 }
429
430 const SordModel*
431 sord_iter_get_model(SordIter* iter)
432 {
433 return iter->sord;
434 }
435
436 void
437 sord_iter_get(const SordIter* iter, SordQuad id)
438 {
439 SordNode** key = (SordNode**)zix_btree_get(iter->cur);
440 for (int i = 0; i < TUP_LEN; ++i) {
441 id[i] = key[i];
442 }
443 }
444
445 const SordNode*
446 sord_iter_get_node(const SordIter* iter, SordQuadIndex index)
447 {
448 return (!sord_iter_end(iter)
449 ? ((SordNode**)zix_btree_get(iter->cur))[index]
450 : NULL);
451 }
452
453 static bool
454 sord_iter_scan_next(SordIter* iter)
455 {
456 if (iter->end) {
457 return true;
458 }
459
460 const SordNode** key;
461 if (!iter->end) {
462 switch (iter->mode) {
463 case ALL:
464 // At the end if the cursor is (assigned above)
465 break;
466 case SINGLE:
467 iter->end = true;
468 SORD_ITER_LOG("%p reached single end\n", (void*)iter);
469 break;
470 case RANGE:
471 SORD_ITER_LOG("%p range next\n", (void*)iter);
472 // At the end if the MSNs no longer match
473 key = (const SordNode**)zix_btree_get(iter->cur);
474 assert(key);
475 for (int i = 0; i < iter->n_prefix; ++i) {
476 const int idx = orderings[iter->order][i];
477 if (!sord_id_match(key[idx], iter->pat[idx])) {
478 iter->end = true;
479 SORD_ITER_LOG("%p reached non-match end\n", (void*)iter);
480 break;
481 }
482 }
483 break;
484 case FILTER_RANGE:
485 // Seek forward to next match, stopping if prefix changes
486 sord_iter_seek_match_range(iter);
487 break;
488 case FILTER_ALL:
489 // Seek forward to next match
490 sord_iter_seek_match(iter);
491 break;
492 }
493 } else {
494 SORD_ITER_LOG("%p reached index end\n", (void*)iter);
495 }
496
497 if (iter->end) {
498 SORD_ITER_LOG("%p Reached end\n", (void*)iter);
499 return true;
500 } else {
501 #ifdef SORD_DEBUG_ITER
502 SordQuad tup;
503 sord_iter_get(iter, tup);
504 SORD_ITER_LOG("%p Increment to " TUP_FMT "\n",
505 (void*)iter, TUP_FMT_ARGS(tup));
506 #endif
507 return false;
508 }
509 }
510
511 bool
512 sord_iter_next(SordIter* iter)
513 {
514 if (iter->end) {
515 return true;
516 }
517
518 iter->end = sord_iter_forward(iter);
519 return sord_iter_scan_next(iter);
520 }
521
522 bool
523 sord_iter_end(const SordIter* iter)
524 {
525 return !iter || iter->end;
526 }
527
528 void
529 sord_iter_free(SordIter* iter)
530 {
531 SORD_ITER_LOG("%p Free\n", (void*)iter);
532 if (iter) {
533 --((SordModel*)iter->sord)->n_iters;
534 zix_btree_iter_free(iter->cur);
535 free(iter);
536 }
537 }
538
539 /**
540 Return true iff `sord` has an index for `order`.
541 If `graphs` is true, `order` will be modified to be the
542 corresponding order with a G prepended (so G will be the MSN).
543 */
544 static inline bool
545 sord_has_index(SordModel* sord, SordOrder* order, int* n_prefix, bool graphs)
546 {
547 if (graphs) {
548 *order = (SordOrder)(*order + GSPO);
549 *n_prefix += 1;
550 }
551
552 return sord->indices[*order];
553 }
554
555 /**
556 Return the best available index for a pattern.
557 @param pat Pattern in standard (S P O G) order
558 @param mode Set to the (best) iteration mode for iterating over results
559 @param n_prefix Set to the length of the range prefix
560 (for `mode` == RANGE and `mode` == FILTER_RANGE)
561 */
562 static inline SordOrder
563 sord_best_index(SordModel* sord,
564 const SordQuad pat,
565 SearchMode* mode,
566 int* n_prefix)
567 {
568 const bool graph_search = (pat[TUP_G] != 0);
569
570 const unsigned sig
571 = (pat[0] ? 1 : 0) * 0x100
572 + (pat[1] ? 1 : 0) * 0x010
573 + (pat[2] ? 1 : 0) * 0x001;
574
575 SordOrder good[2] = { (SordOrder)-1, (SordOrder)-1 };
576
577 #define PAT_CASE(sig, m, g0, g1, np) \
578 case sig: \
579 *mode = m; \
580 good[0] = g0; \
581 good[1] = g1; \
582 *n_prefix = np; \
583 break
584
585 // Good orderings that don't require filtering
586 *mode = RANGE;
587 *n_prefix = 0;
588 switch (sig) {
589 case 0x000:
590 assert(graph_search);
591 *mode = RANGE;
592 *n_prefix = 1;
593 return DEFAULT_GRAPH_ORDER;
594 case 0x111:
595 *mode = SINGLE;
596 return graph_search ? DEFAULT_GRAPH_ORDER : DEFAULT_ORDER;
597
598 PAT_CASE(0x001, RANGE, OPS, OSP, 1);
599 PAT_CASE(0x010, RANGE, POS, PSO, 1);
600 PAT_CASE(0x011, RANGE, OPS, POS, 2);
601 PAT_CASE(0x100, RANGE, SPO, SOP, 1);
602 PAT_CASE(0x101, RANGE, SOP, OSP, 2);
603 PAT_CASE(0x110, RANGE, SPO, PSO, 2);
604 }
605
606 if (*mode == RANGE) {
607 if (sord_has_index(sord, &good[0], n_prefix, graph_search)) {
608 return good[0];
609 } else if (sord_has_index(sord, &good[1], n_prefix, graph_search)) {
610 return good[1];
611 }
612 }
613
614 // Not so good orderings that require filtering, but can
615 // still be constrained to a range
616 switch (sig) {
617 PAT_CASE(0x011, FILTER_RANGE, OSP, PSO, 1);
618 PAT_CASE(0x101, FILTER_RANGE, SPO, OPS, 1);
619 // SPO is always present, so 0x110 is never reached here
620 default: break;
621 }
622
623 if (*mode == FILTER_RANGE) {
624 if (sord_has_index(sord, &good[0], n_prefix, graph_search)) {
625 return good[0];
626 } else if (sord_has_index(sord, &good[1], n_prefix, graph_search)) {
627 return good[1];
628 }
629 }
630
631 if (graph_search) {
632 *mode = FILTER_RANGE;
633 *n_prefix = 1;
634 return DEFAULT_GRAPH_ORDER;
635 } else {
636 *mode = FILTER_ALL;
637 return DEFAULT_ORDER;
638 }
639 }
640
641 SordModel*
642 sord_new(SordWorld* world, unsigned indices, bool graphs)
643 {
644 SordModel* sord = (SordModel*)malloc(sizeof(struct SordModelImpl));
645 sord->world = world;
646 sord->n_quads = 0;
647 sord->n_iters = 0;
648
649 for (unsigned i = 0; i < (NUM_ORDERS / 2); ++i) {
650 const int* const ordering = orderings[i];
651 const int* const g_ordering = orderings[i + (NUM_ORDERS / 2)];
652
653 if (indices & (1 << i)) {
654 sord->indices[i] = zix_btree_new(
655 sord_quad_compare, (void*)ordering, NULL);
656 if (graphs) {
657 sord->indices[i + (NUM_ORDERS / 2)] = zix_btree_new(
658 sord_quad_compare, (void*)g_ordering, NULL);
659 } else {
660 sord->indices[i + (NUM_ORDERS / 2)] = NULL;
661 }
662 } else {
663 sord->indices[i] = NULL;
664 sord->indices[i + (NUM_ORDERS / 2)] = NULL;
665 }
666 }
667
668 if (!sord->indices[DEFAULT_ORDER]) {
669 sord->indices[DEFAULT_ORDER] = zix_btree_new(
670 sord_quad_compare, (void*)orderings[DEFAULT_ORDER], NULL);
671 }
672 if (graphs && !sord->indices[DEFAULT_GRAPH_ORDER]) {
673 sord->indices[DEFAULT_GRAPH_ORDER] = zix_btree_new(
674 sord_quad_compare, (void*)orderings[DEFAULT_GRAPH_ORDER], NULL);
675 }
676
677 return sord;
678 }
679
680 static void
681 sord_node_free_internal(SordWorld* world, SordNode* node)
682 {
683 assert(node->refs == 0);
684
685 // Cache pointer to buffer to free after node removal and destruction
686 const uint8_t* const buf = node->node.buf;
687
688 // Remove node from hash (which frees the node)
689 if (zix_hash_remove(world->nodes, node)) {
690 error(world, SERD_ERR_INTERNAL, "failed to remove node from hash\n");
691 }
692
693 // Free buffer
694 free((uint8_t*)buf);
695 }
696
697 static void
698 sord_add_quad_ref(SordModel* sord, const SordNode* node, SordQuadIndex i)
699 {
700 if (node) {
701 assert(node->refs > 0);
702 ++((SordNode*)node)->refs;
703 if (node->node.type != SERD_LITERAL && i == SORD_OBJECT) {
704 ++((SordNode*)node)->meta.res.refs_as_obj;
705 }
706 }
707 }
708
709 static void
710 sord_drop_quad_ref(SordModel* sord, const SordNode* node, SordQuadIndex i)
711 {
712 if (!node) {
713 return;
714 }
715
716 assert(node->refs > 0);
717 if (node->node.type != SERD_LITERAL && i == SORD_OBJECT) {
718 assert(node->meta.res.refs_as_obj > 0);
719 --((SordNode*)node)->meta.res.refs_as_obj;
720 }
721 if (--((SordNode*)node)->refs == 0) {
722 sord_node_free_internal(sord_get_world(sord), (SordNode*)node);
723 }
724 }
725
726 void
727 sord_free(SordModel* sord)
728 {
729 if (!sord)
730 return;
731
732 // Free nodes
733 SordQuad tup;
734 SordIter* i = sord_begin(sord);
735 for (; !sord_iter_end(i); sord_iter_next(i)) {
736 sord_iter_get(i, tup);
737 for (int t = 0; t < TUP_LEN; ++t) {
738 sord_drop_quad_ref(sord, tup[t], (SordQuadIndex)t);
739 }
740 }
741 sord_iter_free(i);
742
743 // Free quads
744 ZixBTreeIter* t = zix_btree_begin(sord->indices[DEFAULT_ORDER]);
745 for (; !zix_btree_iter_is_end(t); zix_btree_iter_increment(t)) {
746 free(zix_btree_get(t));
747 }
748 zix_btree_iter_free(t);
749
750 // Free indices
751 for (unsigned o = 0; o < NUM_ORDERS; ++o)
752 if (sord->indices[o])
753 zix_btree_free(sord->indices[o]);
754
755 free(sord);
756 }
757
758 SordWorld*
759 sord_get_world(SordModel* sord)
760 {
761 return sord->world;
762 }
763
764 size_t
765 sord_num_quads(const SordModel* sord)
766 {
767 return sord->n_quads;
768 }
769
770 size_t
771 sord_num_nodes(const SordWorld* world)
772 {
773 return zix_hash_size(world->nodes);
774 }
775
776 SordIter*
777 sord_begin(const SordModel* sord)
778 {
779 if (sord_num_quads(sord) == 0) {
780 return NULL;
781 } else {
782 ZixBTreeIter* cur = zix_btree_begin(sord->indices[DEFAULT_ORDER]);
783 SordQuad pat = { 0, 0, 0, 0 };
784 return sord_iter_new(sord, cur, pat, DEFAULT_ORDER, ALL, 0);
785 }
786 }
787
788 SordIter*
789 sord_find(SordModel* sord, const SordQuad pat)
790 {
791 if (!pat[0] && !pat[1] && !pat[2] && !pat[3])
792 return sord_begin(sord);
793
794 SearchMode mode;
795 int n_prefix;
796 const SordOrder index_order = sord_best_index(sord, pat, &mode, &n_prefix);
797
798 SORD_FIND_LOG("Find " TUP_FMT " index=%s mode=%d n_prefix=%d\n",
799 TUP_FMT_ARGS(pat), order_names[index_order], mode, n_prefix);
800
801 if (pat[0] && pat[1] && pat[2] && pat[3])
802 mode = SINGLE; // No duplicate quads (Sord is a set)
803
804 ZixBTree* const db = sord->indices[index_order];
805 ZixBTreeIter* cur = NULL;
806 zix_btree_lower_bound(db, pat, &cur);
807 if (zix_btree_iter_is_end(cur)) {
808 SORD_FIND_LOG("No match found\n");
809 zix_btree_iter_free(cur);
810 return NULL;
811 }
812 const SordNode** const key = (const SordNode**)zix_btree_get(cur);
813 if (!key || ( (mode == RANGE || mode == SINGLE)
814 && !sord_quad_match_inline(pat, key) )) {
815 SORD_FIND_LOG("No match found\n");
816 zix_btree_iter_free(cur);
817 return NULL;
818 }
819
820 return sord_iter_new(sord, cur, pat, index_order, mode, n_prefix);
821 }
822
823 SordIter*
824 sord_search(SordModel* model,
825 const SordNode* s,
826 const SordNode* p,
827 const SordNode* o,
828 const SordNode* g)
829 {
830 SordQuad pat = { s, p, o, g };
831 return sord_find(model, pat);
832 }
833
834 SordNode*
835 sord_get(SordModel* model,
836 const SordNode* s,
837 const SordNode* p,
838 const SordNode* o,
839 const SordNode* g)
840 {
841 if ((bool)s + (bool)p + (bool)o != 2) {
842 return NULL;
843 }
844
845 SordIter* i = sord_search(model, s, p, o, g);
846 SordNode* ret = NULL;
847 if (!s) {
848 ret = sord_node_copy(sord_iter_get_node(i, SORD_SUBJECT));
849 } else if (!p) {
850 ret = sord_node_copy(sord_iter_get_node(i, SORD_PREDICATE));
851 } else if (!o) {
852 ret = sord_node_copy(sord_iter_get_node(i, SORD_OBJECT));
853 }
854
855 sord_iter_free(i);
856 return ret;
857 }
858
859 bool
860 sord_ask(SordModel* model,
861 const SordNode* s,
862 const SordNode* p,
863 const SordNode* o,
864 const SordNode* g)
865 {
866 SordQuad pat = { s, p, o, g };
867 return sord_contains(model, pat);
868 }
869
870 uint64_t
871 sord_count(SordModel* model,
872 const SordNode* s,
873 const SordNode* p,
874 const SordNode* o,
875 const SordNode* g)
876 {
877 SordIter* i = sord_search(model, s, p, o, g);
878 uint64_t n = 0;
879 for (; !sord_iter_end(i); sord_iter_next(i)) {
880 ++n;
881 }
882 sord_iter_free(i);
883 return n;
884 }
885
886 bool
887 sord_contains(SordModel* sord, const SordQuad pat)
888 {
889 SordIter* iter = sord_find(sord, pat);
890 bool ret = (iter != NULL);
891 sord_iter_free(iter);
892 return ret;
893 }
894
895 static uint8_t*
896 sord_strndup(const uint8_t* str, size_t len)
897 {
898 uint8_t* dup = (uint8_t*)malloc(len + 1);
899 memcpy(dup, str, len + 1);
900 return dup;
901 }
902
903 SordNodeType
904 sord_node_get_type(const SordNode* node)
905 {
906 switch (node->node.type) {
907 case SERD_URI:
908 return SORD_URI;
909 case SERD_BLANK:
910 return SORD_BLANK;
911 default:
912 return SORD_LITERAL;
913 }
914 SORD_UNREACHABLE();
915 }
916
917 const uint8_t*
918 sord_node_get_string(const SordNode* node)
919 {
920 return node->node.buf;
921 }
922
923 const uint8_t*
924 sord_node_get_string_counted(const SordNode* node, size_t* bytes)
925 {
926 *bytes = node->node.n_bytes;
927 return node->node.buf;
928 }
929
930 const uint8_t*
931 sord_node_get_string_measured(const SordNode* node,
932 size_t* bytes,
933 size_t* chars)
934 {
935 *bytes = node->node.n_bytes;
936 *chars = node->node.n_chars;
937 return node->node.buf;
938 }
939
940 const char*
941 sord_node_get_language(const SordNode* node)
942 {
943 if (node->node.type != SERD_LITERAL || !node->meta.lit.lang[0]) {
944 return NULL;
945 }
946 return node->meta.lit.lang;
947 }
948
949 SordNode*
950 sord_node_get_datatype(const SordNode* node)
951 {
952 return (node->node.type == SERD_LITERAL) ? node->meta.lit.datatype : NULL;
953 }
954
955 SerdNodeFlags
956 sord_node_get_flags(const SordNode* node)
957 {
958 return node->node.flags;
959 }
960
961 bool
962 sord_node_is_inline_object(const SordNode* node)
963 {
964 return (node->node.type == SERD_BLANK) && (node->meta.res.refs_as_obj == 1);
965 }
966
967 static SordNode*
968 sord_insert_node(SordWorld* world, const SordNode* key, bool copy)
969 {
970 SordNode* node = NULL;
971 ZixStatus st = zix_hash_insert(world->nodes, key, (const void**)&node);
972 switch (st) {
973 case ZIX_STATUS_EXISTS:
974 ++node->refs;
975 break;
976 case ZIX_STATUS_SUCCESS:
977 assert(node->refs == 1);
978 if (copy) {
979 node->node.buf = sord_strndup(node->node.buf, node->node.n_bytes);
980 }
981 if (node->node.type == SERD_LITERAL) {
982 node->meta.lit.datatype = sord_node_copy(node->meta.lit.datatype);
983 }
984 return node;
985 default:
986 error(world, SERD_ERR_INTERNAL,
987 "error inserting node `%s'\n", key->node.buf);
988 }
989
990 if (!copy) {
991 // Free the buffer we would have copied if a new node was created
992 free((uint8_t*)key->node.buf);
993 }
994
995 return node;
996 }
997
998 static SordNode*
999 sord_new_uri_counted(SordWorld* world, const uint8_t* str,
1000 size_t n_bytes, size_t n_chars, bool copy)
1001 {
1002 if (!serd_uri_string_has_scheme(str)) {
1003 error(world, SERD_ERR_BAD_ARG,
1004 "attempt to map invalid URI `%s'\n", str);
1005 return NULL; // Can't intern relative URIs
1006 }
1007
1008 const SordNode key = {
1009 { str, n_bytes, n_chars, 0, SERD_URI }, 1, { { 0 } }
1010 };
1011
1012 return sord_insert_node(world, &key, copy);
1013 }
1014
1015 SordNode*
1016 sord_new_uri(SordWorld* world, const uint8_t* str)
1017 {
1018 const SerdNode node = serd_node_from_string(SERD_URI, str);
1019 return sord_new_uri_counted(world, str, node.n_bytes, node.n_chars, true);
1020 }
1021
1022 SordNode*
1023 sord_new_relative_uri(SordWorld* world,
1024 const uint8_t* str,
1025 const uint8_t* base_str)
1026 {
1027 if (serd_uri_string_has_scheme(str)) {
1028 return sord_new_uri(world, str);
1029 }
1030 SerdURI buri = SERD_URI_NULL;
1031 SerdNode base = serd_node_new_uri_from_string(base_str, NULL, &buri);
1032 SerdNode node = serd_node_new_uri_from_string(str, &buri, NULL);
1033
1034 SordNode* ret = sord_new_uri_counted(
1035 world, node.buf, node.n_bytes, node.n_chars, false);
1036
1037 serd_node_free(&base);
1038 return ret;
1039 }
1040
1041 static SordNode*
1042 sord_new_blank_counted(SordWorld* world, const uint8_t* str,
1043 size_t n_bytes, size_t n_chars)
1044 {
1045 const SordNode key = {
1046 { str, n_bytes, n_chars, 0, SERD_BLANK }, 1, { { 0 } }
1047 };
1048
1049 return sord_insert_node(world, &key, true);
1050 }
1051
1052 SordNode*
1053 sord_new_blank(SordWorld* world, const uint8_t* str)
1054 {
1055 const SerdNode node = serd_node_from_string(SERD_URI, str);
1056 return sord_new_blank_counted(world, str, node.n_bytes, node.n_chars);
1057 }
1058
1059 static SordNode*
1060 sord_new_literal_counted(SordWorld* world,
1061 SordNode* datatype,
1062 const uint8_t* str,
1063 size_t n_bytes,
1064 size_t n_chars,
1065 SerdNodeFlags flags,
1066 const char* lang)
1067 {
1068 SordNode key = {
1069 { str, n_bytes, n_chars, flags, SERD_LITERAL }, 1, { { 0 } }
1070 };
1071 key.meta.lit.datatype = sord_node_copy(datatype);
1072 memset(key.meta.lit.lang, 0, sizeof(key.meta.lit.lang));
1073 if (lang) {
1074 strncpy(key.meta.lit.lang, lang, sizeof(key.meta.lit.lang));
1075 }
1076
1077 return sord_insert_node(world, &key, true);
1078 }
1079
1080 SordNode*
1081 sord_new_literal(SordWorld* world, SordNode* datatype,
1082 const uint8_t* str, const char* lang)
1083 {
1084 SerdNodeFlags flags = 0;
1085 size_t n_bytes = 0;
1086 size_t n_chars = serd_strlen(str, &n_bytes, &flags);
1087 return sord_new_literal_counted(world, datatype,
1088 str, n_bytes, n_chars, flags,
1089 lang);
1090 }
1091
1092 SordNode*
1093 sord_node_from_serd_node(SordWorld* world,
1094 SerdEnv* env,
1095 const SerdNode* sn,
1096 const SerdNode* datatype,
1097 const SerdNode* lang)
1098 {
1099 if (!sn) {
1100 return NULL;
1101 }
1102
1103 SordNode* datatype_node = NULL;
1104 SordNode* ret = NULL;
1105 switch (sn->type) {
1106 case SERD_NOTHING:
1107 return NULL;
1108 case SERD_LITERAL:
1109 datatype_node = sord_node_from_serd_node(
1110 world, env, datatype, NULL, NULL),
1111 ret = sord_new_literal_counted(
1112 world,
1113 datatype_node,
1114 sn->buf,
1115 sn->n_bytes,
1116 sn->n_chars,
1117 sn->flags,
1118 lang ? (const char*)lang->buf : NULL);
1119 sord_node_free(world, datatype_node);
1120 return ret;
1121 case SERD_URI:
1122 if (serd_uri_string_has_scheme(sn->buf)) {
1123 return sord_new_uri_counted(
1124 world, sn->buf, sn->n_bytes, sn->n_chars, true);
1125 } else {
1126 SerdURI base_uri;
1127 serd_env_get_base_uri(env, &base_uri);
1128 SerdURI abs_uri;
1129 SerdNode abs_uri_node = serd_node_new_uri_from_node(
1130 sn, &base_uri, &abs_uri);
1131 ret = sord_new_uri_counted(world,
1132 abs_uri_node.buf,
1133 abs_uri_node.n_bytes,
1134 abs_uri_node.n_chars,
1135 true);
1136 serd_node_free(&abs_uri_node);
1137 return ret;
1138 }
1139 case SERD_CURIE: {
1140 SerdChunk uri_prefix;
1141 SerdChunk uri_suffix;
1142 if (serd_env_expand(env, sn, &uri_prefix, &uri_suffix)) {
1143 error(world, SERD_ERR_BAD_CURIE,
1144 "failed to expand CURIE `%s'\n", sn->buf);
1145 return NULL;
1146 }
1147 const size_t uri_len = uri_prefix.len + uri_suffix.len;
1148 uint8_t* buf = (uint8_t*)malloc(uri_len + 1);
1149 memcpy(buf, uri_prefix.buf, uri_prefix.len);
1150 memcpy(buf + uri_prefix.len, uri_suffix.buf, uri_suffix.len);
1151 buf[uri_len] = '\0';
1152 ret = sord_new_uri_counted(
1153 world, buf, uri_len, serd_strlen(buf, NULL, NULL), false);
1154 return ret;
1155 }
1156 case SERD_BLANK:
1157 return sord_new_blank_counted(world, sn->buf, sn->n_bytes, sn->n_chars);
1158 }
1159 return NULL;
1160 }
1161
1162 const SerdNode*
1163 sord_node_to_serd_node(const SordNode* node)
1164 {
1165 return node ? &node->node : &SERD_NODE_NULL;
1166 }
1167
1168 void
1169 sord_node_free(SordWorld* world, SordNode* node)
1170 {
1171 if (!node) {
1172 return;
1173 } else if (node->refs == 0) {
1174 error(world, SERD_ERR_BAD_ARG, "attempt to free garbage node\n");
1175 } else if (--node->refs == 0) {
1176 sord_node_free_internal(world, node);
1177 }
1178 }
1179
1180 SordNode*
1181 sord_node_copy(const SordNode* node)
1182 {
1183 SordNode* copy = (SordNode*)node;
1184 if (copy) {
1185 ++copy->refs;
1186 }
1187 return copy;
1188 }
1189
1190 static inline bool
1191 sord_add_to_index(SordModel* sord, const SordNode** tup, SordOrder order)
1192 {
1193 return !zix_btree_insert(sord->indices[order], tup);
1194 }
1195
1196 bool
1197 sord_add(SordModel* sord, const SordQuad tup)
1198 {
1199 SORD_WRITE_LOG("Add " TUP_FMT "\n", TUP_FMT_ARGS(tup));
1200 if (!tup[0] || !tup[1] || !tup[2]) {
1201 error(sord->world, SERD_ERR_BAD_ARG,
1202 "attempt to add quad with NULL field\n");
1203 return false;
1204 } else if (sord->n_iters > 0) {
1205 error(sord->world, SERD_ERR_BAD_ARG, "added tuple during iteration\n");
1206 }
1207
1208 const SordNode** quad = (const SordNode**)malloc(sizeof(SordQuad));
1209 memcpy(quad, tup, sizeof(SordQuad));
1210
1211 for (unsigned i = 0; i < NUM_ORDERS; ++i) {
1212 if (sord->indices[i] && (i < GSPO || tup[3])) {
1213 if (!sord_add_to_index(sord, quad, (SordOrder)i)) {
1214 assert(i == 0); // Assuming index coherency
1215 free(quad);
1216 return false; // Quad already stored, do nothing
1217 }
1218 }
1219 }
1220
1221 for (int i = 0; i < TUP_LEN; ++i)
1222 sord_add_quad_ref(sord, tup[i], (SordQuadIndex)i);
1223
1224 ++sord->n_quads;
1225 return true;
1226 }
1227
1228 void
1229 sord_remove(SordModel* sord, const SordQuad tup)
1230 {
1231 SORD_WRITE_LOG("Remove " TUP_FMT "\n", TUP_FMT_ARGS(tup));
1232 if (sord->n_iters > 0) {
1233 error(sord->world, SERD_ERR_BAD_ARG, "remove with iterator\n");
1234 }
1235
1236 SordNode* quad = NULL;
1237 for (unsigned i = 0; i < NUM_ORDERS; ++i) {
1238 if (sord->indices[i] && (i < GSPO || tup[3])) {
1239 if (zix_btree_remove(sord->indices[i], tup, (void**)&quad, NULL)) {
1240 assert(i == 0); // Assuming index coherency
1241 return; // Quad not found, do nothing
1242 }
1243 }
1244 }
1245
1246 free(quad);
1247
1248 for (int i = 0; i < TUP_LEN; ++i)
1249 sord_drop_quad_ref(sord, tup[i], (SordQuadIndex)i);
1250
1251 --sord->n_quads;
1252 }
1253
1254 SerdStatus
1255 sord_erase(SordModel* sord, SordIter* iter)
1256 {
1257 if (sord->n_iters > 1) {
1258 error(sord->world, SERD_ERR_BAD_ARG, "erased with many iterators\n");
1259 return SERD_ERR_BAD_ARG;
1260 }
1261
1262 SordQuad tup;
1263 sord_iter_get(iter, tup);
1264
1265 SORD_WRITE_LOG("Remove " TUP_FMT "\n", TUP_FMT_ARGS(tup));
1266
1267 SordNode* quad = NULL;
1268 for (unsigned i = 0; i < NUM_ORDERS; ++i) {
1269 if (sord->indices[i] && (i < GSPO || tup[3])) {
1270 if (zix_btree_remove(sord->indices[i], tup, (void**)&quad,
1271 i == iter->order ? &iter->cur : NULL)) {
1272 return (i == 0) ? SERD_ERR_NOT_FOUND : SERD_ERR_INTERNAL;
1273 }
1274 }
1275 }
1276 iter->end = zix_btree_iter_is_end(iter->cur);
1277 sord_iter_scan_next(iter);
1278
1279 free(quad);
1280
1281 for (int i = 0; i < TUP_LEN; ++i)
1282 sord_drop_quad_ref(sord, tup[i], (SordQuadIndex)i);
1283
1284 --sord->n_quads;
1285 return SERD_SUCCESS;
1286 }