annotate ext/sord/src/sord.c @ 266:234f89708d75

Fix strncpy overrun
author Chris Cannam <cannam@all-day-breakfast.com>
date Sat, 13 Oct 2018 12:32:03 +0100
parents c5cdc9e6a4bf
children
rev   line source
cannam@226 1 /*
cannam@226 2 Copyright 2011-2016 David Robillard <http://drobilla.net>
cannam@226 3
cannam@226 4 Permission to use, copy, modify, and/or distribute this software for any
cannam@226 5 purpose with or without fee is hereby granted, provided that the above
cannam@226 6 copyright notice and this permission notice appear in all copies.
cannam@226 7
cannam@226 8 THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
cannam@226 9 WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
cannam@226 10 MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
cannam@226 11 ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
cannam@226 12 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
cannam@226 13 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
cannam@226 14 OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
cannam@226 15 */
cannam@226 16
cannam@226 17 // C99
cannam@226 18 #include <assert.h>
cannam@226 19 #include <errno.h>
cannam@226 20 #include <stdint.h>
cannam@226 21 #include <stdio.h>
cannam@226 22 #include <stdlib.h>
cannam@226 23 #include <string.h>
cannam@226 24
cannam@226 25 #define ZIX_INLINE
cannam@226 26 #include "zix/digest.c"
cannam@226 27 #include "zix/hash.c"
cannam@226 28 #include "zix/btree.c"
cannam@226 29
cannam@226 30 #include "sord_config.h"
cannam@226 31 #include "sord_internal.h"
cannam@226 32
cannam@226 33 #define SORD_LOG(prefix, ...) fprintf(stderr, "[Sord::" prefix "] " __VA_ARGS__)
cannam@226 34
cannam@226 35 #ifdef SORD_DEBUG_ITER
cannam@226 36 # define SORD_ITER_LOG(...) SORD_LOG("iter", __VA_ARGS__)
cannam@226 37 #else
cannam@226 38 # define SORD_ITER_LOG(...)
cannam@226 39 #endif
cannam@226 40 #ifdef SORD_DEBUG_SEARCH
cannam@226 41 # define SORD_FIND_LOG(...) SORD_LOG("search", __VA_ARGS__)
cannam@226 42 #else
cannam@226 43 # define SORD_FIND_LOG(...)
cannam@226 44 #endif
cannam@226 45 #ifdef SORD_DEBUG_WRITE
cannam@226 46 # define SORD_WRITE_LOG(...) SORD_LOG("write", __VA_ARGS__)
cannam@226 47 #else
cannam@226 48 # define SORD_WRITE_LOG(...)
cannam@226 49 #endif
cannam@226 50
cannam@226 51 #define NUM_ORDERS 12
cannam@226 52 #define STATEMENT_LEN 3
cannam@226 53 #define TUP_LEN STATEMENT_LEN + 1
cannam@226 54 #define DEFAULT_ORDER SPO
cannam@226 55 #define DEFAULT_GRAPH_ORDER GSPO
cannam@226 56
cannam@226 57 #define TUP_FMT "(%s %s %s %s)"
cannam@226 58 #define TUP_FMT_ELEM(e) ((e) ? sord_node_get_string(e) : (const uint8_t*)"*")
cannam@226 59 #define TUP_FMT_ARGS(t) \
cannam@226 60 TUP_FMT_ELEM((t)[0]), \
cannam@226 61 TUP_FMT_ELEM((t)[1]), \
cannam@226 62 TUP_FMT_ELEM((t)[2]), \
cannam@226 63 TUP_FMT_ELEM((t)[3])
cannam@226 64
cannam@226 65 #define TUP_S 0
cannam@226 66 #define TUP_P 1
cannam@226 67 #define TUP_O 2
cannam@226 68 #define TUP_G 3
cannam@226 69
cannam@226 70 /** Triple ordering */
cannam@226 71 typedef enum {
cannam@226 72 SPO, ///< Subject, Predicate, Object
cannam@226 73 SOP, ///< Subject, Object, Predicate
cannam@226 74 OPS, ///< Object, Predicate, Subject
cannam@226 75 OSP, ///< Object, Subject, Predicate
cannam@226 76 PSO, ///< Predicate, Subject, Object
cannam@226 77 POS, ///< Predicate, Object, Subject
cannam@226 78 GSPO, ///< Graph, Subject, Predicate, Object
cannam@226 79 GSOP, ///< Graph, Subject, Object, Predicate
cannam@226 80 GOPS, ///< Graph, Object, Predicate, Subject
cannam@226 81 GOSP, ///< Graph, Object, Subject, Predicate
cannam@226 82 GPSO, ///< Graph, Predicate, Subject, Object
cannam@226 83 GPOS ///< Graph, Predicate, Object, Subject
cannam@226 84 } SordOrder;
cannam@226 85
cannam@226 86 #ifdef SORD_DEBUG_SEARCH
cannam@226 87 /** String name of each ordering (array indexed by SordOrder) */
cannam@226 88 static const char* const order_names[NUM_ORDERS] = {
cannam@226 89 "spo", "sop", "ops", "osp", "pso", "pos",
cannam@226 90 "gspo", "gsop", "gops", "gosp", "gpso", "gpos"
cannam@226 91 };
cannam@226 92 #endif
cannam@226 93
cannam@226 94 /**
cannam@226 95 Quads of indices for each order, from most to least significant
cannam@226 96 (array indexed by SordOrder)
cannam@226 97 */
cannam@226 98 static const int orderings[NUM_ORDERS][TUP_LEN] = {
cannam@226 99 { 0, 1, 2, 3 }, { 0, 2, 1, 3 }, // SPO, SOP
cannam@226 100 { 2, 1, 0, 3 }, { 2, 0, 1, 3 }, // OPS, OSP
cannam@226 101 { 1, 0, 2, 3 }, { 1, 2, 0, 3 }, // PSO, POS
cannam@226 102 { 3, 0, 1, 2 }, { 3, 0, 2, 1 }, // GSPO, GSOP
cannam@226 103 { 3, 2, 1, 0 }, { 3, 2, 0, 1 }, // GOPS, GOSP
cannam@226 104 { 3, 1, 0, 2 }, { 3, 1, 2, 0 } // GPSO, GPOS
cannam@226 105 };
cannam@226 106
cannam@226 107 /** World */
cannam@226 108 struct SordWorldImpl {
cannam@226 109 ZixHash* nodes;
cannam@226 110 SerdErrorSink error_sink;
cannam@226 111 void* error_handle;
cannam@226 112 };
cannam@226 113
cannam@226 114 /** Store */
cannam@226 115 struct SordModelImpl {
cannam@226 116 SordWorld* world;
cannam@226 117
cannam@226 118 /** Index for each possible triple ordering (may or may not exist).
cannam@226 119 * Each index is a tree of SordQuad with the appropriate ordering.
cannam@226 120 */
cannam@226 121 ZixBTree* indices[NUM_ORDERS];
cannam@226 122
cannam@226 123 size_t n_quads;
cannam@226 124 size_t n_iters;
cannam@226 125 };
cannam@226 126
cannam@226 127 /** Mode for searching or iteration */
cannam@226 128 typedef enum {
cannam@226 129 ALL, ///< Iterate over entire store
cannam@226 130 SINGLE, ///< Iteration over a single element (exact search)
cannam@226 131 RANGE, ///< Iterate over range with equal prefix
cannam@226 132 FILTER_RANGE, ///< Iterate over range with equal prefix, filtering
cannam@226 133 FILTER_ALL ///< Iterate to end of store, filtering
cannam@226 134 } SearchMode;
cannam@226 135
cannam@226 136 /** Iterator over some range of a store */
cannam@226 137 struct SordIterImpl {
cannam@226 138 const SordModel* sord; ///< Model being iterated over
cannam@226 139 ZixBTreeIter* cur; ///< Current DB cursor
cannam@226 140 SordQuad pat; ///< Pattern (in ordering order)
cannam@226 141 SordOrder order; ///< Store order (which index)
cannam@226 142 SearchMode mode; ///< Iteration mode
cannam@226 143 int n_prefix; ///< Prefix for RANGE and FILTER_RANGE
cannam@226 144 bool end; ///< True iff reached end
cannam@226 145 bool skip_graphs; ///< Iteration should ignore graphs
cannam@226 146 };
cannam@226 147
cannam@226 148 static uint32_t
cannam@226 149 sord_node_hash(const void* n)
cannam@226 150 {
cannam@226 151 const SordNode* node = (const SordNode*)n;
cannam@226 152 uint32_t hash = zix_digest_start();
cannam@226 153 hash = zix_digest_add(hash, node->node.buf, node->node.n_bytes);
cannam@226 154 hash = zix_digest_add(hash, &node->node.type, sizeof(node->node.type));
cannam@226 155 if (node->node.type == SERD_LITERAL) {
cannam@226 156 hash = zix_digest_add(hash, &node->meta.lit, sizeof(node->meta.lit));
cannam@226 157 }
cannam@226 158 return hash;
cannam@226 159 }
cannam@226 160
cannam@226 161 static bool
cannam@226 162 sord_node_hash_equal(const void* a, const void* b)
cannam@226 163 {
cannam@226 164 const SordNode* a_node = (const SordNode*)a;
cannam@226 165 const SordNode* b_node = (const SordNode*)b;
cannam@226 166 return (a_node == b_node)
cannam@226 167 || ((a_node->node.type == b_node->node.type) &&
cannam@226 168 (a_node->node.type != SERD_LITERAL ||
cannam@226 169 (a_node->meta.lit.datatype == b_node->meta.lit.datatype &&
cannam@226 170 !strncmp(a_node->meta.lit.lang,
cannam@226 171 b_node->meta.lit.lang,
cannam@226 172 sizeof(a_node->meta.lit.lang)))) &&
cannam@226 173 (serd_node_equals(&a_node->node, &b_node->node)));
cannam@226 174 }
cannam@226 175
cannam@226 176 static void
cannam@226 177 error(SordWorld* world, SerdStatus st, const char* fmt, ...)
cannam@226 178 {
cannam@226 179 va_list args;
cannam@226 180 va_start(args, fmt);
cannam@226 181 const SerdError e = { st, NULL, 0, 0, fmt, &args };
cannam@226 182 if (world->error_sink) {
cannam@226 183 world->error_sink(world->error_handle, &e);
cannam@226 184 } else {
cannam@226 185 fprintf(stderr, "error: ");
cannam@226 186 vfprintf(stderr, fmt, args);
cannam@226 187 }
cannam@226 188 va_end(args);
cannam@226 189 }
cannam@226 190
cannam@226 191 SordWorld*
cannam@226 192 sord_world_new(void)
cannam@226 193 {
cannam@226 194 SordWorld* world = (SordWorld*)malloc(sizeof(SordWorld));
cannam@226 195 world->error_sink = NULL;
cannam@226 196 world->error_handle = NULL;
cannam@226 197
cannam@226 198 world->nodes = zix_hash_new(
cannam@226 199 sord_node_hash, sord_node_hash_equal, sizeof(SordNode));
cannam@226 200
cannam@226 201 return world;
cannam@226 202 }
cannam@226 203
cannam@226 204 static void
cannam@226 205 free_node_entry(void* value, void* user_data)
cannam@226 206 {
cannam@226 207 SordNode* node = (SordNode*)value;
cannam@226 208 if (node->node.type == SERD_LITERAL) {
cannam@226 209 sord_node_free((SordWorld*)user_data, node->meta.lit.datatype);
cannam@226 210 }
cannam@226 211 free((uint8_t*)node->node.buf);
cannam@226 212 }
cannam@226 213
cannam@226 214 void
cannam@226 215 sord_world_free(SordWorld* world)
cannam@226 216 {
cannam@226 217 zix_hash_foreach(world->nodes, free_node_entry, world);
cannam@226 218 zix_hash_free(world->nodes);
cannam@226 219 free(world);
cannam@226 220 }
cannam@226 221
cannam@226 222 void
cannam@226 223 sord_world_set_error_sink(SordWorld* world,
cannam@226 224 SerdErrorSink error_sink,
cannam@226 225 void* handle)
cannam@226 226 {
cannam@226 227 world->error_sink = error_sink;
cannam@226 228 world->error_handle = handle;
cannam@226 229 }
cannam@226 230
cannam@226 231 /** Compare nodes, considering NULL a wildcard match. */
cannam@226 232 static inline int
cannam@226 233 sord_node_compare(const SordNode* a, const SordNode* b)
cannam@226 234 {
cannam@226 235 if (a == b || !a || !b) {
cannam@226 236 return 0; // Exact or wildcard match
cannam@226 237 } else if (a->node.type != b->node.type) {
cannam@226 238 return a->node.type - b->node.type;
cannam@226 239 }
cannam@226 240
cannam@226 241 int cmp = 0;
cannam@226 242 switch (a->node.type) {
cannam@226 243 case SERD_URI:
cannam@226 244 case SERD_BLANK:
cannam@226 245 return strcmp((const char*)a->node.buf, (const char*)b->node.buf);
cannam@226 246 case SERD_LITERAL:
cannam@226 247 cmp = strcmp((const char*)sord_node_get_string(a),
cannam@226 248 (const char*)sord_node_get_string(b));
cannam@226 249 if (cmp == 0) {
cannam@226 250 // Note: Can't use sord_node_compare here since it does wildcards
cannam@226 251 if (!a->meta.lit.datatype || !b->meta.lit.datatype) {
cannam@226 252 cmp = a->meta.lit.datatype - b->meta.lit.datatype;
cannam@226 253 } else {
cannam@226 254 cmp = strcmp((const char*)a->meta.lit.datatype->node.buf,
cannam@226 255 (const char*)b->meta.lit.datatype->node.buf);
cannam@226 256 }
cannam@226 257 }
cannam@226 258 if (cmp == 0) {
cannam@226 259 cmp = strcmp(a->meta.lit.lang, b->meta.lit.lang);
cannam@226 260 }
cannam@226 261 default:
cannam@226 262 break;
cannam@226 263 }
cannam@226 264 return cmp;
cannam@226 265 }
cannam@226 266
cannam@226 267 bool
cannam@226 268 sord_node_equals(const SordNode* a, const SordNode* b)
cannam@226 269 {
cannam@226 270 return a == b; // Nodes are interned
cannam@226 271 }
cannam@226 272
cannam@226 273 /** Return true iff IDs are equivalent, or one is a wildcard */
cannam@226 274 static inline bool
cannam@226 275 sord_id_match(const SordNode* a, const SordNode* b)
cannam@226 276 {
cannam@226 277 return !a || !b || (a == b);
cannam@226 278 }
cannam@226 279
cannam@226 280 static inline bool
cannam@226 281 sord_quad_match_inline(const SordQuad x, const SordQuad y)
cannam@226 282 {
cannam@226 283 return sord_id_match(x[0], y[0])
cannam@226 284 && sord_id_match(x[1], y[1])
cannam@226 285 && sord_id_match(x[2], y[2])
cannam@226 286 && sord_id_match(x[3], y[3]);
cannam@226 287 }
cannam@226 288
cannam@226 289 bool
cannam@226 290 sord_quad_match(const SordQuad x, const SordQuad y)
cannam@226 291 {
cannam@226 292 return sord_quad_match_inline(x, y);
cannam@226 293 }
cannam@226 294
cannam@226 295 /**
cannam@226 296 Compare two quad IDs lexicographically.
cannam@226 297 NULL IDs (equal to 0) are treated as wildcards, always less than every
cannam@226 298 other possible ID, except itself.
cannam@226 299 */
cannam@226 300 static int
cannam@226 301 sord_quad_compare(const void* x_ptr, const void* y_ptr, void* user_data)
cannam@226 302 {
cannam@226 303 const int* const ordering = (const int*)user_data;
cannam@226 304 const SordNode*const*const x = (const SordNode*const*)x_ptr;
cannam@226 305 const SordNode*const*const y = (const SordNode*const*)y_ptr;
cannam@226 306
cannam@226 307 for (int i = 0; i < TUP_LEN; ++i) {
cannam@226 308 const int idx = ordering[i];
cannam@226 309 const int cmp = sord_node_compare(x[idx], y[idx]);
cannam@226 310 if (cmp) {
cannam@226 311 return cmp;
cannam@226 312 }
cannam@226 313 }
cannam@226 314
cannam@226 315 return 0;
cannam@226 316 }
cannam@226 317
cannam@226 318 static inline bool
cannam@226 319 sord_iter_forward(SordIter* iter)
cannam@226 320 {
cannam@226 321 if (!iter->skip_graphs) {
cannam@226 322 zix_btree_iter_increment(iter->cur);
cannam@226 323 return zix_btree_iter_is_end(iter->cur);
cannam@226 324 }
cannam@226 325
cannam@226 326 SordNode** key = (SordNode**)zix_btree_get(iter->cur);
cannam@226 327 const SordQuad initial = { key[0], key[1], key[2], key[3] };
cannam@226 328 zix_btree_iter_increment(iter->cur);
cannam@226 329 while (!zix_btree_iter_is_end(iter->cur)) {
cannam@226 330 key = (SordNode**)zix_btree_get(iter->cur);
cannam@226 331 for (int i = 0; i < 3; ++i)
cannam@226 332 if (key[i] != initial[i])
cannam@226 333 return false;
cannam@226 334
cannam@226 335 zix_btree_iter_increment(iter->cur);
cannam@226 336 }
cannam@226 337
cannam@226 338 return true;
cannam@226 339 }
cannam@226 340
cannam@226 341 /**
cannam@226 342 Seek forward as necessary until `iter` points at a match.
cannam@226 343 @return true iff iterator reached end of valid range.
cannam@226 344 */
cannam@226 345 static inline bool
cannam@226 346 sord_iter_seek_match(SordIter* iter)
cannam@226 347 {
cannam@226 348 for (iter->end = true;
cannam@226 349 !zix_btree_iter_is_end(iter->cur);
cannam@226 350 sord_iter_forward(iter)) {
cannam@226 351 const SordNode** const key = (const SordNode**)zix_btree_get(iter->cur);
cannam@226 352 if (sord_quad_match_inline(key, iter->pat))
cannam@226 353 return (iter->end = false);
cannam@226 354 }
cannam@226 355 return true;
cannam@226 356 }
cannam@226 357
cannam@226 358 /**
cannam@226 359 Seek forward as necessary until `iter` points at a match, or the prefix
cannam@226 360 no longer matches iter->pat.
cannam@226 361 @return true iff iterator reached end of valid range.
cannam@226 362 */
cannam@226 363 static inline bool
cannam@226 364 sord_iter_seek_match_range(SordIter* iter)
cannam@226 365 {
cannam@226 366 assert(!iter->end);
cannam@226 367
cannam@226 368 do {
cannam@226 369 const SordNode** key = (const SordNode**)zix_btree_get(iter->cur);
cannam@226 370
cannam@226 371 if (sord_quad_match_inline(key, iter->pat))
cannam@226 372 return false; // Found match
cannam@226 373
cannam@226 374 for (int i = 0; i < iter->n_prefix; ++i) {
cannam@226 375 const int idx = orderings[iter->order][i];
cannam@226 376 if (!sord_id_match(key[idx], iter->pat[idx])) {
cannam@226 377 iter->end = true; // Reached end of valid range
cannam@226 378 return true;
cannam@226 379 }
cannam@226 380 }
cannam@226 381 } while (!sord_iter_forward(iter));
cannam@226 382
cannam@226 383 return (iter->end = true); // Reached end
cannam@226 384 }
cannam@226 385
cannam@226 386 static SordIter*
cannam@226 387 sord_iter_new(const SordModel* sord, ZixBTreeIter* cur, const SordQuad pat,
cannam@226 388 SordOrder order, SearchMode mode, int n_prefix)
cannam@226 389 {
cannam@226 390 SordIter* iter = (SordIter*)malloc(sizeof(SordIter));
cannam@226 391 iter->sord = sord;
cannam@226 392 iter->cur = cur;
cannam@226 393 iter->order = order;
cannam@226 394 iter->mode = mode;
cannam@226 395 iter->n_prefix = n_prefix;
cannam@226 396 iter->end = false;
cannam@226 397 iter->skip_graphs = order < GSPO;
cannam@226 398 for (int i = 0; i < TUP_LEN; ++i) {
cannam@226 399 iter->pat[i] = pat[i];
cannam@226 400 }
cannam@226 401
cannam@226 402 switch (iter->mode) {
cannam@226 403 case ALL:
cannam@226 404 case SINGLE:
cannam@226 405 case RANGE:
cannam@226 406 assert(
cannam@226 407 sord_quad_match_inline((const SordNode**)zix_btree_get(iter->cur),
cannam@226 408 iter->pat));
cannam@226 409 break;
cannam@226 410 case FILTER_RANGE:
cannam@226 411 sord_iter_seek_match_range(iter);
cannam@226 412 break;
cannam@226 413 case FILTER_ALL:
cannam@226 414 sord_iter_seek_match(iter);
cannam@226 415 break;
cannam@226 416 }
cannam@226 417
cannam@226 418 #ifdef SORD_DEBUG_ITER
cannam@226 419 SordQuad value;
cannam@226 420 sord_iter_get(iter, value);
cannam@226 421 SORD_ITER_LOG("New %p pat=" TUP_FMT " cur=" TUP_FMT " end=%d skip=%d\n",
cannam@226 422 (void*)iter, TUP_FMT_ARGS(pat), TUP_FMT_ARGS(value),
cannam@226 423 iter->end, iter->skip_graphs);
cannam@226 424 #endif
cannam@226 425
cannam@226 426 ++((SordModel*)sord)->n_iters;
cannam@226 427 return iter;
cannam@226 428 }
cannam@226 429
cannam@226 430 const SordModel*
cannam@226 431 sord_iter_get_model(SordIter* iter)
cannam@226 432 {
cannam@226 433 return iter->sord;
cannam@226 434 }
cannam@226 435
cannam@226 436 void
cannam@226 437 sord_iter_get(const SordIter* iter, SordQuad id)
cannam@226 438 {
cannam@226 439 SordNode** key = (SordNode**)zix_btree_get(iter->cur);
cannam@226 440 for (int i = 0; i < TUP_LEN; ++i) {
cannam@226 441 id[i] = key[i];
cannam@226 442 }
cannam@226 443 }
cannam@226 444
cannam@226 445 const SordNode*
cannam@226 446 sord_iter_get_node(const SordIter* iter, SordQuadIndex index)
cannam@226 447 {
cannam@226 448 return (!sord_iter_end(iter)
cannam@226 449 ? ((SordNode**)zix_btree_get(iter->cur))[index]
cannam@226 450 : NULL);
cannam@226 451 }
cannam@226 452
cannam@226 453 static bool
cannam@226 454 sord_iter_scan_next(SordIter* iter)
cannam@226 455 {
cannam@226 456 if (iter->end) {
cannam@226 457 return true;
cannam@226 458 }
cannam@226 459
cannam@226 460 const SordNode** key;
cannam@226 461 if (!iter->end) {
cannam@226 462 switch (iter->mode) {
cannam@226 463 case ALL:
cannam@226 464 // At the end if the cursor is (assigned above)
cannam@226 465 break;
cannam@226 466 case SINGLE:
cannam@226 467 iter->end = true;
cannam@226 468 SORD_ITER_LOG("%p reached single end\n", (void*)iter);
cannam@226 469 break;
cannam@226 470 case RANGE:
cannam@226 471 SORD_ITER_LOG("%p range next\n", (void*)iter);
cannam@226 472 // At the end if the MSNs no longer match
cannam@226 473 key = (const SordNode**)zix_btree_get(iter->cur);
cannam@226 474 assert(key);
cannam@226 475 for (int i = 0; i < iter->n_prefix; ++i) {
cannam@226 476 const int idx = orderings[iter->order][i];
cannam@226 477 if (!sord_id_match(key[idx], iter->pat[idx])) {
cannam@226 478 iter->end = true;
cannam@226 479 SORD_ITER_LOG("%p reached non-match end\n", (void*)iter);
cannam@226 480 break;
cannam@226 481 }
cannam@226 482 }
cannam@226 483 break;
cannam@226 484 case FILTER_RANGE:
cannam@226 485 // Seek forward to next match, stopping if prefix changes
cannam@226 486 sord_iter_seek_match_range(iter);
cannam@226 487 break;
cannam@226 488 case FILTER_ALL:
cannam@226 489 // Seek forward to next match
cannam@226 490 sord_iter_seek_match(iter);
cannam@226 491 break;
cannam@226 492 }
cannam@226 493 } else {
cannam@226 494 SORD_ITER_LOG("%p reached index end\n", (void*)iter);
cannam@226 495 }
cannam@226 496
cannam@226 497 if (iter->end) {
cannam@226 498 SORD_ITER_LOG("%p Reached end\n", (void*)iter);
cannam@226 499 return true;
cannam@226 500 } else {
cannam@226 501 #ifdef SORD_DEBUG_ITER
cannam@226 502 SordQuad tup;
cannam@226 503 sord_iter_get(iter, tup);
cannam@226 504 SORD_ITER_LOG("%p Increment to " TUP_FMT "\n",
cannam@226 505 (void*)iter, TUP_FMT_ARGS(tup));
cannam@226 506 #endif
cannam@226 507 return false;
cannam@226 508 }
cannam@226 509 }
cannam@226 510
cannam@226 511 bool
cannam@226 512 sord_iter_next(SordIter* iter)
cannam@226 513 {
cannam@226 514 if (iter->end) {
cannam@226 515 return true;
cannam@226 516 }
cannam@226 517
cannam@226 518 iter->end = sord_iter_forward(iter);
cannam@226 519 return sord_iter_scan_next(iter);
cannam@226 520 }
cannam@226 521
cannam@226 522 bool
cannam@226 523 sord_iter_end(const SordIter* iter)
cannam@226 524 {
cannam@226 525 return !iter || iter->end;
cannam@226 526 }
cannam@226 527
cannam@226 528 void
cannam@226 529 sord_iter_free(SordIter* iter)
cannam@226 530 {
cannam@226 531 SORD_ITER_LOG("%p Free\n", (void*)iter);
cannam@226 532 if (iter) {
cannam@226 533 --((SordModel*)iter->sord)->n_iters;
cannam@226 534 zix_btree_iter_free(iter->cur);
cannam@226 535 free(iter);
cannam@226 536 }
cannam@226 537 }
cannam@226 538
cannam@226 539 /**
cannam@226 540 Return true iff `sord` has an index for `order`.
cannam@226 541 If `graphs` is true, `order` will be modified to be the
cannam@226 542 corresponding order with a G prepended (so G will be the MSN).
cannam@226 543 */
cannam@226 544 static inline bool
cannam@226 545 sord_has_index(SordModel* sord, SordOrder* order, int* n_prefix, bool graphs)
cannam@226 546 {
cannam@226 547 if (graphs) {
cannam@226 548 *order = (SordOrder)(*order + GSPO);
cannam@226 549 *n_prefix += 1;
cannam@226 550 }
cannam@226 551
cannam@226 552 return sord->indices[*order];
cannam@226 553 }
cannam@226 554
cannam@226 555 /**
cannam@226 556 Return the best available index for a pattern.
cannam@226 557 @param pat Pattern in standard (S P O G) order
cannam@226 558 @param mode Set to the (best) iteration mode for iterating over results
cannam@226 559 @param n_prefix Set to the length of the range prefix
cannam@226 560 (for `mode` == RANGE and `mode` == FILTER_RANGE)
cannam@226 561 */
cannam@226 562 static inline SordOrder
cannam@226 563 sord_best_index(SordModel* sord,
cannam@226 564 const SordQuad pat,
cannam@226 565 SearchMode* mode,
cannam@226 566 int* n_prefix)
cannam@226 567 {
cannam@226 568 const bool graph_search = (pat[TUP_G] != 0);
cannam@226 569
cannam@226 570 const unsigned sig
cannam@226 571 = (pat[0] ? 1 : 0) * 0x100
cannam@226 572 + (pat[1] ? 1 : 0) * 0x010
cannam@226 573 + (pat[2] ? 1 : 0) * 0x001;
cannam@226 574
cannam@226 575 SordOrder good[2] = { (SordOrder)-1, (SordOrder)-1 };
cannam@226 576
cannam@226 577 #define PAT_CASE(sig, m, g0, g1, np) \
cannam@226 578 case sig: \
cannam@226 579 *mode = m; \
cannam@226 580 good[0] = g0; \
cannam@226 581 good[1] = g1; \
cannam@226 582 *n_prefix = np; \
cannam@226 583 break
cannam@226 584
cannam@226 585 // Good orderings that don't require filtering
cannam@226 586 *mode = RANGE;
cannam@226 587 *n_prefix = 0;
cannam@226 588 switch (sig) {
cannam@226 589 case 0x000:
cannam@226 590 assert(graph_search);
cannam@226 591 *mode = RANGE;
cannam@226 592 *n_prefix = 1;
cannam@226 593 return DEFAULT_GRAPH_ORDER;
cannam@226 594 case 0x111:
cannam@226 595 *mode = SINGLE;
cannam@226 596 return graph_search ? DEFAULT_GRAPH_ORDER : DEFAULT_ORDER;
cannam@226 597
cannam@226 598 PAT_CASE(0x001, RANGE, OPS, OSP, 1);
cannam@226 599 PAT_CASE(0x010, RANGE, POS, PSO, 1);
cannam@226 600 PAT_CASE(0x011, RANGE, OPS, POS, 2);
cannam@226 601 PAT_CASE(0x100, RANGE, SPO, SOP, 1);
cannam@226 602 PAT_CASE(0x101, RANGE, SOP, OSP, 2);
cannam@226 603 PAT_CASE(0x110, RANGE, SPO, PSO, 2);
cannam@226 604 }
cannam@226 605
cannam@226 606 if (*mode == RANGE) {
cannam@226 607 if (sord_has_index(sord, &good[0], n_prefix, graph_search)) {
cannam@226 608 return good[0];
cannam@226 609 } else if (sord_has_index(sord, &good[1], n_prefix, graph_search)) {
cannam@226 610 return good[1];
cannam@226 611 }
cannam@226 612 }
cannam@226 613
cannam@226 614 // Not so good orderings that require filtering, but can
cannam@226 615 // still be constrained to a range
cannam@226 616 switch (sig) {
cannam@226 617 PAT_CASE(0x011, FILTER_RANGE, OSP, PSO, 1);
cannam@226 618 PAT_CASE(0x101, FILTER_RANGE, SPO, OPS, 1);
cannam@226 619 // SPO is always present, so 0x110 is never reached here
cannam@226 620 default: break;
cannam@226 621 }
cannam@226 622
cannam@226 623 if (*mode == FILTER_RANGE) {
cannam@226 624 if (sord_has_index(sord, &good[0], n_prefix, graph_search)) {
cannam@226 625 return good[0];
cannam@226 626 } else if (sord_has_index(sord, &good[1], n_prefix, graph_search)) {
cannam@226 627 return good[1];
cannam@226 628 }
cannam@226 629 }
cannam@226 630
cannam@226 631 if (graph_search) {
cannam@226 632 *mode = FILTER_RANGE;
cannam@226 633 *n_prefix = 1;
cannam@226 634 return DEFAULT_GRAPH_ORDER;
cannam@226 635 } else {
cannam@226 636 *mode = FILTER_ALL;
cannam@226 637 return DEFAULT_ORDER;
cannam@226 638 }
cannam@226 639 }
cannam@226 640
cannam@226 641 SordModel*
cannam@226 642 sord_new(SordWorld* world, unsigned indices, bool graphs)
cannam@226 643 {
cannam@226 644 SordModel* sord = (SordModel*)malloc(sizeof(struct SordModelImpl));
cannam@226 645 sord->world = world;
cannam@226 646 sord->n_quads = 0;
cannam@226 647 sord->n_iters = 0;
cannam@226 648
cannam@226 649 for (unsigned i = 0; i < (NUM_ORDERS / 2); ++i) {
cannam@226 650 const int* const ordering = orderings[i];
cannam@226 651 const int* const g_ordering = orderings[i + (NUM_ORDERS / 2)];
cannam@226 652
cannam@226 653 if (indices & (1 << i)) {
cannam@226 654 sord->indices[i] = zix_btree_new(
cannam@226 655 sord_quad_compare, (void*)ordering, NULL);
cannam@226 656 if (graphs) {
cannam@226 657 sord->indices[i + (NUM_ORDERS / 2)] = zix_btree_new(
cannam@226 658 sord_quad_compare, (void*)g_ordering, NULL);
cannam@226 659 } else {
cannam@226 660 sord->indices[i + (NUM_ORDERS / 2)] = NULL;
cannam@226 661 }
cannam@226 662 } else {
cannam@226 663 sord->indices[i] = NULL;
cannam@226 664 sord->indices[i + (NUM_ORDERS / 2)] = NULL;
cannam@226 665 }
cannam@226 666 }
cannam@226 667
cannam@226 668 if (!sord->indices[DEFAULT_ORDER]) {
cannam@226 669 sord->indices[DEFAULT_ORDER] = zix_btree_new(
cannam@226 670 sord_quad_compare, (void*)orderings[DEFAULT_ORDER], NULL);
cannam@226 671 }
cannam@226 672 if (graphs && !sord->indices[DEFAULT_GRAPH_ORDER]) {
cannam@226 673 sord->indices[DEFAULT_GRAPH_ORDER] = zix_btree_new(
cannam@226 674 sord_quad_compare, (void*)orderings[DEFAULT_GRAPH_ORDER], NULL);
cannam@226 675 }
cannam@226 676
cannam@226 677 return sord;
cannam@226 678 }
cannam@226 679
cannam@226 680 static void
cannam@226 681 sord_node_free_internal(SordWorld* world, SordNode* node)
cannam@226 682 {
cannam@226 683 assert(node->refs == 0);
cannam@226 684
cannam@226 685 // Cache pointer to buffer to free after node removal and destruction
cannam@226 686 const uint8_t* const buf = node->node.buf;
cannam@226 687
cannam@226 688 // Remove node from hash (which frees the node)
cannam@226 689 if (zix_hash_remove(world->nodes, node)) {
cannam@226 690 error(world, SERD_ERR_INTERNAL, "failed to remove node from hash\n");
cannam@226 691 }
cannam@226 692
cannam@226 693 // Free buffer
cannam@226 694 free((uint8_t*)buf);
cannam@226 695 }
cannam@226 696
cannam@226 697 static void
cannam@226 698 sord_add_quad_ref(SordModel* sord, const SordNode* node, SordQuadIndex i)
cannam@226 699 {
cannam@226 700 if (node) {
cannam@226 701 assert(node->refs > 0);
cannam@226 702 ++((SordNode*)node)->refs;
cannam@226 703 if (node->node.type != SERD_LITERAL && i == SORD_OBJECT) {
cannam@226 704 ++((SordNode*)node)->meta.res.refs_as_obj;
cannam@226 705 }
cannam@226 706 }
cannam@226 707 }
cannam@226 708
cannam@226 709 static void
cannam@226 710 sord_drop_quad_ref(SordModel* sord, const SordNode* node, SordQuadIndex i)
cannam@226 711 {
cannam@226 712 if (!node) {
cannam@226 713 return;
cannam@226 714 }
cannam@226 715
cannam@226 716 assert(node->refs > 0);
cannam@226 717 if (node->node.type != SERD_LITERAL && i == SORD_OBJECT) {
cannam@226 718 assert(node->meta.res.refs_as_obj > 0);
cannam@226 719 --((SordNode*)node)->meta.res.refs_as_obj;
cannam@226 720 }
cannam@226 721 if (--((SordNode*)node)->refs == 0) {
cannam@226 722 sord_node_free_internal(sord_get_world(sord), (SordNode*)node);
cannam@226 723 }
cannam@226 724 }
cannam@226 725
cannam@226 726 void
cannam@226 727 sord_free(SordModel* sord)
cannam@226 728 {
cannam@226 729 if (!sord)
cannam@226 730 return;
cannam@226 731
cannam@226 732 // Free nodes
cannam@226 733 SordQuad tup;
cannam@226 734 SordIter* i = sord_begin(sord);
cannam@226 735 for (; !sord_iter_end(i); sord_iter_next(i)) {
cannam@226 736 sord_iter_get(i, tup);
cannam@226 737 for (int t = 0; t < TUP_LEN; ++t) {
cannam@226 738 sord_drop_quad_ref(sord, tup[t], (SordQuadIndex)t);
cannam@226 739 }
cannam@226 740 }
cannam@226 741 sord_iter_free(i);
cannam@226 742
cannam@226 743 // Free quads
cannam@226 744 ZixBTreeIter* t = zix_btree_begin(sord->indices[DEFAULT_ORDER]);
cannam@226 745 for (; !zix_btree_iter_is_end(t); zix_btree_iter_increment(t)) {
cannam@226 746 free(zix_btree_get(t));
cannam@226 747 }
cannam@226 748 zix_btree_iter_free(t);
cannam@226 749
cannam@226 750 // Free indices
cannam@226 751 for (unsigned o = 0; o < NUM_ORDERS; ++o)
cannam@226 752 if (sord->indices[o])
cannam@226 753 zix_btree_free(sord->indices[o]);
cannam@226 754
cannam@226 755 free(sord);
cannam@226 756 }
cannam@226 757
cannam@226 758 SordWorld*
cannam@226 759 sord_get_world(SordModel* sord)
cannam@226 760 {
cannam@226 761 return sord->world;
cannam@226 762 }
cannam@226 763
cannam@226 764 size_t
cannam@226 765 sord_num_quads(const SordModel* sord)
cannam@226 766 {
cannam@226 767 return sord->n_quads;
cannam@226 768 }
cannam@226 769
cannam@226 770 size_t
cannam@226 771 sord_num_nodes(const SordWorld* world)
cannam@226 772 {
cannam@226 773 return zix_hash_size(world->nodes);
cannam@226 774 }
cannam@226 775
cannam@226 776 SordIter*
cannam@226 777 sord_begin(const SordModel* sord)
cannam@226 778 {
cannam@226 779 if (sord_num_quads(sord) == 0) {
cannam@226 780 return NULL;
cannam@226 781 } else {
cannam@226 782 ZixBTreeIter* cur = zix_btree_begin(sord->indices[DEFAULT_ORDER]);
cannam@226 783 SordQuad pat = { 0, 0, 0, 0 };
cannam@226 784 return sord_iter_new(sord, cur, pat, DEFAULT_ORDER, ALL, 0);
cannam@226 785 }
cannam@226 786 }
cannam@226 787
cannam@226 788 SordIter*
cannam@226 789 sord_find(SordModel* sord, const SordQuad pat)
cannam@226 790 {
cannam@226 791 if (!pat[0] && !pat[1] && !pat[2] && !pat[3])
cannam@226 792 return sord_begin(sord);
cannam@226 793
cannam@226 794 SearchMode mode;
cannam@226 795 int n_prefix;
cannam@226 796 const SordOrder index_order = sord_best_index(sord, pat, &mode, &n_prefix);
cannam@226 797
cannam@226 798 SORD_FIND_LOG("Find " TUP_FMT " index=%s mode=%d n_prefix=%d\n",
cannam@226 799 TUP_FMT_ARGS(pat), order_names[index_order], mode, n_prefix);
cannam@226 800
cannam@226 801 if (pat[0] && pat[1] && pat[2] && pat[3])
cannam@226 802 mode = SINGLE; // No duplicate quads (Sord is a set)
cannam@226 803
cannam@226 804 ZixBTree* const db = sord->indices[index_order];
cannam@226 805 ZixBTreeIter* cur = NULL;
cannam@226 806 zix_btree_lower_bound(db, pat, &cur);
cannam@226 807 if (zix_btree_iter_is_end(cur)) {
cannam@226 808 SORD_FIND_LOG("No match found\n");
cannam@226 809 zix_btree_iter_free(cur);
cannam@226 810 return NULL;
cannam@226 811 }
cannam@226 812 const SordNode** const key = (const SordNode**)zix_btree_get(cur);
cannam@226 813 if (!key || ( (mode == RANGE || mode == SINGLE)
cannam@226 814 && !sord_quad_match_inline(pat, key) )) {
cannam@226 815 SORD_FIND_LOG("No match found\n");
cannam@226 816 zix_btree_iter_free(cur);
cannam@226 817 return NULL;
cannam@226 818 }
cannam@226 819
cannam@226 820 return sord_iter_new(sord, cur, pat, index_order, mode, n_prefix);
cannam@226 821 }
cannam@226 822
cannam@226 823 SordIter*
cannam@226 824 sord_search(SordModel* model,
cannam@226 825 const SordNode* s,
cannam@226 826 const SordNode* p,
cannam@226 827 const SordNode* o,
cannam@226 828 const SordNode* g)
cannam@226 829 {
cannam@226 830 SordQuad pat = { s, p, o, g };
cannam@226 831 return sord_find(model, pat);
cannam@226 832 }
cannam@226 833
cannam@226 834 SordNode*
cannam@226 835 sord_get(SordModel* model,
cannam@226 836 const SordNode* s,
cannam@226 837 const SordNode* p,
cannam@226 838 const SordNode* o,
cannam@226 839 const SordNode* g)
cannam@226 840 {
cannam@226 841 if ((bool)s + (bool)p + (bool)o != 2) {
cannam@226 842 return NULL;
cannam@226 843 }
cannam@226 844
cannam@226 845 SordIter* i = sord_search(model, s, p, o, g);
cannam@226 846 SordNode* ret = NULL;
cannam@226 847 if (!s) {
cannam@226 848 ret = sord_node_copy(sord_iter_get_node(i, SORD_SUBJECT));
cannam@226 849 } else if (!p) {
cannam@226 850 ret = sord_node_copy(sord_iter_get_node(i, SORD_PREDICATE));
cannam@226 851 } else if (!o) {
cannam@226 852 ret = sord_node_copy(sord_iter_get_node(i, SORD_OBJECT));
cannam@226 853 }
cannam@226 854
cannam@226 855 sord_iter_free(i);
cannam@226 856 return ret;
cannam@226 857 }
cannam@226 858
cannam@226 859 bool
cannam@226 860 sord_ask(SordModel* model,
cannam@226 861 const SordNode* s,
cannam@226 862 const SordNode* p,
cannam@226 863 const SordNode* o,
cannam@226 864 const SordNode* g)
cannam@226 865 {
cannam@226 866 SordQuad pat = { s, p, o, g };
cannam@226 867 return sord_contains(model, pat);
cannam@226 868 }
cannam@226 869
cannam@226 870 uint64_t
cannam@226 871 sord_count(SordModel* model,
cannam@226 872 const SordNode* s,
cannam@226 873 const SordNode* p,
cannam@226 874 const SordNode* o,
cannam@226 875 const SordNode* g)
cannam@226 876 {
cannam@226 877 SordIter* i = sord_search(model, s, p, o, g);
cannam@226 878 uint64_t n = 0;
cannam@226 879 for (; !sord_iter_end(i); sord_iter_next(i)) {
cannam@226 880 ++n;
cannam@226 881 }
cannam@226 882 sord_iter_free(i);
cannam@226 883 return n;
cannam@226 884 }
cannam@226 885
cannam@226 886 bool
cannam@226 887 sord_contains(SordModel* sord, const SordQuad pat)
cannam@226 888 {
cannam@226 889 SordIter* iter = sord_find(sord, pat);
cannam@226 890 bool ret = (iter != NULL);
cannam@226 891 sord_iter_free(iter);
cannam@226 892 return ret;
cannam@226 893 }
cannam@226 894
cannam@226 895 static uint8_t*
cannam@226 896 sord_strndup(const uint8_t* str, size_t len)
cannam@226 897 {
cannam@226 898 uint8_t* dup = (uint8_t*)malloc(len + 1);
cannam@226 899 memcpy(dup, str, len + 1);
cannam@226 900 return dup;
cannam@226 901 }
cannam@226 902
cannam@226 903 SordNodeType
cannam@226 904 sord_node_get_type(const SordNode* node)
cannam@226 905 {
cannam@226 906 switch (node->node.type) {
cannam@226 907 case SERD_URI:
cannam@226 908 return SORD_URI;
cannam@226 909 case SERD_BLANK:
cannam@226 910 return SORD_BLANK;
cannam@226 911 default:
cannam@226 912 return SORD_LITERAL;
cannam@226 913 }
cannam@226 914 SORD_UNREACHABLE();
cannam@226 915 }
cannam@226 916
cannam@226 917 const uint8_t*
cannam@226 918 sord_node_get_string(const SordNode* node)
cannam@226 919 {
cannam@226 920 return node->node.buf;
cannam@226 921 }
cannam@226 922
cannam@226 923 const uint8_t*
cannam@226 924 sord_node_get_string_counted(const SordNode* node, size_t* bytes)
cannam@226 925 {
cannam@226 926 *bytes = node->node.n_bytes;
cannam@226 927 return node->node.buf;
cannam@226 928 }
cannam@226 929
cannam@226 930 const uint8_t*
cannam@226 931 sord_node_get_string_measured(const SordNode* node,
cannam@226 932 size_t* bytes,
cannam@226 933 size_t* chars)
cannam@226 934 {
cannam@226 935 *bytes = node->node.n_bytes;
cannam@226 936 *chars = node->node.n_chars;
cannam@226 937 return node->node.buf;
cannam@226 938 }
cannam@226 939
cannam@226 940 const char*
cannam@226 941 sord_node_get_language(const SordNode* node)
cannam@226 942 {
cannam@226 943 if (node->node.type != SERD_LITERAL || !node->meta.lit.lang[0]) {
cannam@226 944 return NULL;
cannam@226 945 }
cannam@226 946 return node->meta.lit.lang;
cannam@226 947 }
cannam@226 948
cannam@226 949 SordNode*
cannam@226 950 sord_node_get_datatype(const SordNode* node)
cannam@226 951 {
cannam@226 952 return (node->node.type == SERD_LITERAL) ? node->meta.lit.datatype : NULL;
cannam@226 953 }
cannam@226 954
cannam@226 955 SerdNodeFlags
cannam@226 956 sord_node_get_flags(const SordNode* node)
cannam@226 957 {
cannam@226 958 return node->node.flags;
cannam@226 959 }
cannam@226 960
cannam@226 961 bool
cannam@226 962 sord_node_is_inline_object(const SordNode* node)
cannam@226 963 {
cannam@226 964 return (node->node.type == SERD_BLANK) && (node->meta.res.refs_as_obj == 1);
cannam@226 965 }
cannam@226 966
cannam@226 967 static SordNode*
cannam@226 968 sord_insert_node(SordWorld* world, const SordNode* key, bool copy)
cannam@226 969 {
cannam@226 970 SordNode* node = NULL;
cannam@226 971 ZixStatus st = zix_hash_insert(world->nodes, key, (const void**)&node);
cannam@226 972 switch (st) {
cannam@226 973 case ZIX_STATUS_EXISTS:
cannam@226 974 ++node->refs;
cannam@226 975 break;
cannam@226 976 case ZIX_STATUS_SUCCESS:
cannam@226 977 assert(node->refs == 1);
cannam@226 978 if (copy) {
cannam@226 979 node->node.buf = sord_strndup(node->node.buf, node->node.n_bytes);
cannam@226 980 }
cannam@226 981 if (node->node.type == SERD_LITERAL) {
cannam@226 982 node->meta.lit.datatype = sord_node_copy(node->meta.lit.datatype);
cannam@226 983 }
cannam@226 984 return node;
cannam@226 985 default:
cannam@226 986 error(world, SERD_ERR_INTERNAL,
cannam@226 987 "error inserting node `%s'\n", key->node.buf);
cannam@226 988 }
cannam@226 989
cannam@226 990 if (!copy) {
cannam@226 991 // Free the buffer we would have copied if a new node was created
cannam@226 992 free((uint8_t*)key->node.buf);
cannam@226 993 }
cannam@226 994
cannam@226 995 return node;
cannam@226 996 }
cannam@226 997
cannam@226 998 static SordNode*
cannam@226 999 sord_new_uri_counted(SordWorld* world, const uint8_t* str,
cannam@226 1000 size_t n_bytes, size_t n_chars, bool copy)
cannam@226 1001 {
cannam@226 1002 if (!serd_uri_string_has_scheme(str)) {
cannam@226 1003 error(world, SERD_ERR_BAD_ARG,
cannam@226 1004 "attempt to map invalid URI `%s'\n", str);
cannam@226 1005 return NULL; // Can't intern relative URIs
cannam@226 1006 }
cannam@226 1007
cannam@226 1008 const SordNode key = {
cannam@226 1009 { str, n_bytes, n_chars, 0, SERD_URI }, 1, { { 0 } }
cannam@226 1010 };
cannam@226 1011
cannam@226 1012 return sord_insert_node(world, &key, copy);
cannam@226 1013 }
cannam@226 1014
cannam@226 1015 SordNode*
cannam@226 1016 sord_new_uri(SordWorld* world, const uint8_t* str)
cannam@226 1017 {
cannam@226 1018 const SerdNode node = serd_node_from_string(SERD_URI, str);
cannam@226 1019 return sord_new_uri_counted(world, str, node.n_bytes, node.n_chars, true);
cannam@226 1020 }
cannam@226 1021
cannam@226 1022 SordNode*
cannam@226 1023 sord_new_relative_uri(SordWorld* world,
cannam@226 1024 const uint8_t* str,
cannam@226 1025 const uint8_t* base_str)
cannam@226 1026 {
cannam@226 1027 if (serd_uri_string_has_scheme(str)) {
cannam@226 1028 return sord_new_uri(world, str);
cannam@226 1029 }
cannam@226 1030 SerdURI buri = SERD_URI_NULL;
cannam@226 1031 SerdNode base = serd_node_new_uri_from_string(base_str, NULL, &buri);
cannam@226 1032 SerdNode node = serd_node_new_uri_from_string(str, &buri, NULL);
cannam@226 1033
cannam@226 1034 SordNode* ret = sord_new_uri_counted(
cannam@226 1035 world, node.buf, node.n_bytes, node.n_chars, false);
cannam@226 1036
cannam@226 1037 serd_node_free(&base);
cannam@226 1038 return ret;
cannam@226 1039 }
cannam@226 1040
cannam@226 1041 static SordNode*
cannam@226 1042 sord_new_blank_counted(SordWorld* world, const uint8_t* str,
cannam@226 1043 size_t n_bytes, size_t n_chars)
cannam@226 1044 {
cannam@226 1045 const SordNode key = {
cannam@226 1046 { str, n_bytes, n_chars, 0, SERD_BLANK }, 1, { { 0 } }
cannam@226 1047 };
cannam@226 1048
cannam@226 1049 return sord_insert_node(world, &key, true);
cannam@226 1050 }
cannam@226 1051
cannam@226 1052 SordNode*
cannam@226 1053 sord_new_blank(SordWorld* world, const uint8_t* str)
cannam@226 1054 {
cannam@226 1055 const SerdNode node = serd_node_from_string(SERD_URI, str);
cannam@226 1056 return sord_new_blank_counted(world, str, node.n_bytes, node.n_chars);
cannam@226 1057 }
cannam@226 1058
cannam@226 1059 static SordNode*
cannam@226 1060 sord_new_literal_counted(SordWorld* world,
cannam@226 1061 SordNode* datatype,
cannam@226 1062 const uint8_t* str,
cannam@226 1063 size_t n_bytes,
cannam@226 1064 size_t n_chars,
cannam@226 1065 SerdNodeFlags flags,
cannam@226 1066 const char* lang)
cannam@226 1067 {
cannam@226 1068 SordNode key = {
cannam@226 1069 { str, n_bytes, n_chars, flags, SERD_LITERAL }, 1, { { 0 } }
cannam@226 1070 };
cannam@226 1071 key.meta.lit.datatype = sord_node_copy(datatype);
cannam@226 1072 memset(key.meta.lit.lang, 0, sizeof(key.meta.lit.lang));
cannam@226 1073 if (lang) {
cannam@266 1074 strncpy(key.meta.lit.lang, lang, sizeof(key.meta.lit.lang)-1);
cannam@226 1075 }
cannam@226 1076
cannam@226 1077 return sord_insert_node(world, &key, true);
cannam@226 1078 }
cannam@226 1079
cannam@226 1080 SordNode*
cannam@226 1081 sord_new_literal(SordWorld* world, SordNode* datatype,
cannam@226 1082 const uint8_t* str, const char* lang)
cannam@226 1083 {
cannam@226 1084 SerdNodeFlags flags = 0;
cannam@226 1085 size_t n_bytes = 0;
cannam@226 1086 size_t n_chars = serd_strlen(str, &n_bytes, &flags);
cannam@226 1087 return sord_new_literal_counted(world, datatype,
cannam@226 1088 str, n_bytes, n_chars, flags,
cannam@226 1089 lang);
cannam@226 1090 }
cannam@226 1091
cannam@226 1092 SordNode*
cannam@226 1093 sord_node_from_serd_node(SordWorld* world,
cannam@226 1094 SerdEnv* env,
cannam@226 1095 const SerdNode* sn,
cannam@226 1096 const SerdNode* datatype,
cannam@226 1097 const SerdNode* lang)
cannam@226 1098 {
cannam@226 1099 if (!sn) {
cannam@226 1100 return NULL;
cannam@226 1101 }
cannam@226 1102
cannam@226 1103 SordNode* datatype_node = NULL;
cannam@226 1104 SordNode* ret = NULL;
cannam@226 1105 switch (sn->type) {
cannam@226 1106 case SERD_NOTHING:
cannam@226 1107 return NULL;
cannam@226 1108 case SERD_LITERAL:
cannam@226 1109 datatype_node = sord_node_from_serd_node(
cannam@226 1110 world, env, datatype, NULL, NULL),
cannam@226 1111 ret = sord_new_literal_counted(
cannam@226 1112 world,
cannam@226 1113 datatype_node,
cannam@226 1114 sn->buf,
cannam@226 1115 sn->n_bytes,
cannam@226 1116 sn->n_chars,
cannam@226 1117 sn->flags,
cannam@226 1118 lang ? (const char*)lang->buf : NULL);
cannam@226 1119 sord_node_free(world, datatype_node);
cannam@226 1120 return ret;
cannam@226 1121 case SERD_URI:
cannam@226 1122 if (serd_uri_string_has_scheme(sn->buf)) {
cannam@226 1123 return sord_new_uri_counted(
cannam@226 1124 world, sn->buf, sn->n_bytes, sn->n_chars, true);
cannam@226 1125 } else {
cannam@226 1126 SerdURI base_uri;
cannam@226 1127 serd_env_get_base_uri(env, &base_uri);
cannam@226 1128 SerdURI abs_uri;
cannam@226 1129 SerdNode abs_uri_node = serd_node_new_uri_from_node(
cannam@226 1130 sn, &base_uri, &abs_uri);
cannam@226 1131 ret = sord_new_uri_counted(world,
cannam@226 1132 abs_uri_node.buf,
cannam@226 1133 abs_uri_node.n_bytes,
cannam@226 1134 abs_uri_node.n_chars,
cannam@226 1135 true);
cannam@226 1136 serd_node_free(&abs_uri_node);
cannam@226 1137 return ret;
cannam@226 1138 }
cannam@226 1139 case SERD_CURIE: {
cannam@226 1140 SerdChunk uri_prefix;
cannam@226 1141 SerdChunk uri_suffix;
cannam@226 1142 if (serd_env_expand(env, sn, &uri_prefix, &uri_suffix)) {
cannam@226 1143 error(world, SERD_ERR_BAD_CURIE,
cannam@226 1144 "failed to expand CURIE `%s'\n", sn->buf);
cannam@226 1145 return NULL;
cannam@226 1146 }
cannam@226 1147 const size_t uri_len = uri_prefix.len + uri_suffix.len;
cannam@226 1148 uint8_t* buf = (uint8_t*)malloc(uri_len + 1);
cannam@226 1149 memcpy(buf, uri_prefix.buf, uri_prefix.len);
cannam@226 1150 memcpy(buf + uri_prefix.len, uri_suffix.buf, uri_suffix.len);
cannam@226 1151 buf[uri_len] = '\0';
cannam@226 1152 ret = sord_new_uri_counted(
cannam@226 1153 world, buf, uri_len, serd_strlen(buf, NULL, NULL), false);
cannam@226 1154 return ret;
cannam@226 1155 }
cannam@226 1156 case SERD_BLANK:
cannam@226 1157 return sord_new_blank_counted(world, sn->buf, sn->n_bytes, sn->n_chars);
cannam@226 1158 }
cannam@226 1159 return NULL;
cannam@226 1160 }
cannam@226 1161
cannam@226 1162 const SerdNode*
cannam@226 1163 sord_node_to_serd_node(const SordNode* node)
cannam@226 1164 {
cannam@226 1165 return node ? &node->node : &SERD_NODE_NULL;
cannam@226 1166 }
cannam@226 1167
cannam@226 1168 void
cannam@226 1169 sord_node_free(SordWorld* world, SordNode* node)
cannam@226 1170 {
cannam@226 1171 if (!node) {
cannam@226 1172 return;
cannam@226 1173 } else if (node->refs == 0) {
cannam@226 1174 error(world, SERD_ERR_BAD_ARG, "attempt to free garbage node\n");
cannam@226 1175 } else if (--node->refs == 0) {
cannam@226 1176 sord_node_free_internal(world, node);
cannam@226 1177 }
cannam@226 1178 }
cannam@226 1179
cannam@226 1180 SordNode*
cannam@226 1181 sord_node_copy(const SordNode* node)
cannam@226 1182 {
cannam@226 1183 SordNode* copy = (SordNode*)node;
cannam@226 1184 if (copy) {
cannam@226 1185 ++copy->refs;
cannam@226 1186 }
cannam@226 1187 return copy;
cannam@226 1188 }
cannam@226 1189
cannam@226 1190 static inline bool
cannam@226 1191 sord_add_to_index(SordModel* sord, const SordNode** tup, SordOrder order)
cannam@226 1192 {
cannam@226 1193 return !zix_btree_insert(sord->indices[order], tup);
cannam@226 1194 }
cannam@226 1195
cannam@226 1196 bool
cannam@226 1197 sord_add(SordModel* sord, const SordQuad tup)
cannam@226 1198 {
cannam@226 1199 SORD_WRITE_LOG("Add " TUP_FMT "\n", TUP_FMT_ARGS(tup));
cannam@226 1200 if (!tup[0] || !tup[1] || !tup[2]) {
cannam@226 1201 error(sord->world, SERD_ERR_BAD_ARG,
cannam@226 1202 "attempt to add quad with NULL field\n");
cannam@226 1203 return false;
cannam@226 1204 } else if (sord->n_iters > 0) {
cannam@226 1205 error(sord->world, SERD_ERR_BAD_ARG, "added tuple during iteration\n");
cannam@226 1206 }
cannam@226 1207
cannam@226 1208 const SordNode** quad = (const SordNode**)malloc(sizeof(SordQuad));
cannam@226 1209 memcpy(quad, tup, sizeof(SordQuad));
cannam@226 1210
cannam@226 1211 for (unsigned i = 0; i < NUM_ORDERS; ++i) {
cannam@226 1212 if (sord->indices[i] && (i < GSPO || tup[3])) {
cannam@226 1213 if (!sord_add_to_index(sord, quad, (SordOrder)i)) {
cannam@226 1214 assert(i == 0); // Assuming index coherency
cannam@226 1215 free(quad);
cannam@226 1216 return false; // Quad already stored, do nothing
cannam@226 1217 }
cannam@226 1218 }
cannam@226 1219 }
cannam@226 1220
cannam@226 1221 for (int i = 0; i < TUP_LEN; ++i)
cannam@226 1222 sord_add_quad_ref(sord, tup[i], (SordQuadIndex)i);
cannam@226 1223
cannam@226 1224 ++sord->n_quads;
cannam@226 1225 return true;
cannam@226 1226 }
cannam@226 1227
cannam@226 1228 void
cannam@226 1229 sord_remove(SordModel* sord, const SordQuad tup)
cannam@226 1230 {
cannam@226 1231 SORD_WRITE_LOG("Remove " TUP_FMT "\n", TUP_FMT_ARGS(tup));
cannam@226 1232 if (sord->n_iters > 0) {
cannam@226 1233 error(sord->world, SERD_ERR_BAD_ARG, "remove with iterator\n");
cannam@226 1234 }
cannam@226 1235
cannam@226 1236 SordNode* quad = NULL;
cannam@226 1237 for (unsigned i = 0; i < NUM_ORDERS; ++i) {
cannam@226 1238 if (sord->indices[i] && (i < GSPO || tup[3])) {
cannam@226 1239 if (zix_btree_remove(sord->indices[i], tup, (void**)&quad, NULL)) {
cannam@226 1240 assert(i == 0); // Assuming index coherency
cannam@226 1241 return; // Quad not found, do nothing
cannam@226 1242 }
cannam@226 1243 }
cannam@226 1244 }
cannam@226 1245
cannam@226 1246 free(quad);
cannam@226 1247
cannam@226 1248 for (int i = 0; i < TUP_LEN; ++i)
cannam@226 1249 sord_drop_quad_ref(sord, tup[i], (SordQuadIndex)i);
cannam@226 1250
cannam@226 1251 --sord->n_quads;
cannam@226 1252 }
cannam@226 1253
cannam@226 1254 SerdStatus
cannam@226 1255 sord_erase(SordModel* sord, SordIter* iter)
cannam@226 1256 {
cannam@226 1257 if (sord->n_iters > 1) {
cannam@226 1258 error(sord->world, SERD_ERR_BAD_ARG, "erased with many iterators\n");
cannam@226 1259 return SERD_ERR_BAD_ARG;
cannam@226 1260 }
cannam@226 1261
cannam@226 1262 SordQuad tup;
cannam@226 1263 sord_iter_get(iter, tup);
cannam@226 1264
cannam@226 1265 SORD_WRITE_LOG("Remove " TUP_FMT "\n", TUP_FMT_ARGS(tup));
cannam@226 1266
cannam@226 1267 SordNode* quad = NULL;
cannam@226 1268 for (unsigned i = 0; i < NUM_ORDERS; ++i) {
cannam@226 1269 if (sord->indices[i] && (i < GSPO || tup[3])) {
cannam@226 1270 if (zix_btree_remove(sord->indices[i], tup, (void**)&quad,
cannam@226 1271 i == iter->order ? &iter->cur : NULL)) {
cannam@226 1272 return (i == 0) ? SERD_ERR_NOT_FOUND : SERD_ERR_INTERNAL;
cannam@226 1273 }
cannam@226 1274 }
cannam@226 1275 }
cannam@226 1276 iter->end = zix_btree_iter_is_end(iter->cur);
cannam@226 1277 sord_iter_scan_next(iter);
cannam@226 1278
cannam@226 1279 free(quad);
cannam@226 1280
cannam@226 1281 for (int i = 0; i < TUP_LEN; ++i)
cannam@226 1282 sord_drop_quad_ref(sord, tup[i], (SordQuadIndex)i);
cannam@226 1283
cannam@226 1284 --sord->n_quads;
cannam@226 1285 return SERD_SUCCESS;
cannam@226 1286 }