Mercurial > hg > piper-cpp
changeset 247:8a031eb9a25f
Merge branch 'output-type-uri'
author | Chris Cannam <cannam@all-day-breakfast.com> |
---|---|
date | Thu, 15 Jun 2017 09:52:01 +0100 |
parents | db929669e7d3 (current diff) 86d4310c2a62 (diff) |
children | cb51adadccd5 |
files | |
diffstat | 50 files changed, 13771 insertions(+), 306 deletions(-) [+] |
line wrap: on
line diff
--- a/.travis.yml Mon May 22 08:57:02 2017 +0100 +++ b/.travis.yml Thu Jun 15 09:52:01 2017 +0100 @@ -14,7 +14,7 @@ addons: apt: packages: - - qt5-default + - qt5-default libsord-dev before_install: - ( cd ../ ; git clone https://github.com/piper-audio/piper ) @@ -22,7 +22,7 @@ - ( cd ../ ; git clone https://github.com/sandstorm-io/capnproto ) - ( cd ../capnproto/c++ ; ./setup-autotools.sh && autoreconf -i ) - ( cd ../capnproto/c++ ; ./configure && make && sudo make install ) - - if [[ "$TRAVIS_OS_NAME" = "osx" ]] ; then brew install qt5 ; fi + - if [[ "$TRAVIS_OS_NAME" = "osx" ]] ; then brew install qt5 sord ; fi - if [[ "$TRAVIS_OS_NAME" = "osx" ]] ; then export PATH=$PATH:/usr/local/opt/qt5/bin ; fi - ( cd ../vamp-plugin-sdk ; if [[ "$TRAVIS_OS_NAME" = "osx" ]] ; then make -f build/Makefile.osx sdkstatic plugins ; else ./configure --disable-programs && make sdkstatic plugins; fi )
--- a/Makefile Mon May 22 08:57:02 2017 +0100 +++ b/Makefile Thu Jun 15 09:52:01 2017 +0100 @@ -2,15 +2,18 @@ VAMPSDK_DIR := ../vamp-plugin-sdk PIPER_DIR := ../piper -INCFLAGS := -Iext -I$(VAMPSDK_DIR) -I. -I/usr/local/include -CXXFLAGS := -Wall -Wextra -Werror -g3 -std=c++11 $(INCFLAGS) +INCFLAGS := -Iext -Iext/sord -Iext/serd -I$(VAMPSDK_DIR) -I. -I/usr/local/include -#LDFLAGS := -L$(VAMPSDK_DIR) -L/usr/local/lib -lvamp-hostsdk -lcapnp -lkj -LDFLAGS := $(VAMPSDK_DIR)/libvamp-hostsdk.a -lcapnp -lkj +OPTFLAGS := -O3 + +CFLAGS := -Wall $(OPTFLAGS) $(INCFLAGS) +CXXFLAGS := -Wall -Wextra -Werror -Wno-error=unused-parameter -std=c++11 $(OPTFLAGS) $(INCFLAGS) + +LDFLAGS := -L$(VAMPSDK_DIR) -L/usr/local/lib -lvamp-hostsdk -lcapnp -lkj LDFLAGS += -ldl -COMMON_OBJS := ext/json11/json11.o vamp-capnp/piper.capnp.o +COMMON_OBJS := ext/json11/json11.o ext/sord/sord-single.o vamp-capnp/piper.capnp.o TEST_SRCS := test/main.cpp test/vamp-client/tst_PluginStub.cpp TEST_OBJS := $(TEST_SRCS:.cpp=.o) @@ -33,9 +36,9 @@ vamp-capnp/piper.capnp.o: vamp-capnp/piper.capnp.c++ c++ $(CXXFLAGS) $(INCFLAGS) -c $< -o $@ -vamp-capnp/piper.capnp.h: vamp-capnp/piper.capnp.c++ +vamp-capnp/piper.capnp.c++: vamp-capnp/piper.capnp.h -vamp-capnp/piper.capnp.c++: $(PIPER_DIR)/capnp/piper.capnp +vamp-capnp/piper.capnp.h: $(PIPER_DIR)/capnp/piper.capnp capnpc --src-prefix=$(PIPER_DIR)/capnp -oc++:vamp-capnp $< test: all @@ -57,7 +60,10 @@ # DO NOT DELETE vamp-capnp/piper-capnp.o: vamp-capnp/piper.capnp.c++ vamp-capnp/piper.capnp.h -vamp-server/convert.o: vamp-json/VampJson.h vamp-support/PluginStaticData.h +vamp-server/convert.o: vamp-json/VampJson.h +vamp-server/convert.o: vamp-support/StaticOutputDescriptor.h +vamp-server/convert.o: vamp-support/PluginStaticData.h +vamp-server/convert.o: vamp-support/StaticOutputDescriptor.h vamp-server/convert.o: vamp-support/PluginConfiguration.h vamp-server/convert.o: vamp-support/RequestResponse.h vamp-server/convert.o: vamp-support/PluginStaticData.h @@ -74,7 +80,9 @@ vamp-server/convert.o: vamp-support/PluginHandleMapper.h vamp-server/convert.o: vamp-support/PreservingPluginOutputIdMapper.h vamp-server/simple-server.o: vamp-json/VampJson.h +vamp-server/simple-server.o: vamp-support/StaticOutputDescriptor.h vamp-server/simple-server.o: vamp-support/PluginStaticData.h +vamp-server/simple-server.o: vamp-support/StaticOutputDescriptor.h vamp-server/simple-server.o: vamp-support/PluginConfiguration.h vamp-server/simple-server.o: vamp-support/RequestResponse.h vamp-server/simple-server.o: vamp-support/PluginStaticData.h @@ -92,6 +100,7 @@ vamp-server/simple-server.o: vamp-support/AssignedPluginHandleMapper.h vamp-server/simple-server.o: vamp-support/DefaultPluginOutputIdMapper.h vamp-server/simple-server.o: vamp-support/LoaderRequests.h +vamp-server/simple-server.o: vamp-support/StaticOutputRdf.h ext/json11/json11.o: ext/json11/json11.hpp ext/json11/test.o: ext/json11/json11.hpp test/vamp-client/tst_PluginStub.o: vamp-client/Loader.h @@ -102,6 +111,7 @@ test/vamp-client/tst_PluginStub.o: vamp-support/PluginConfiguration.h test/vamp-client/tst_PluginStub.o: vamp-client/PiperVampPlugin.h test/vamp-client/tst_PluginStub.o: vamp-support/PluginStaticData.h +test/vamp-client/tst_PluginStub.o: vamp-support/StaticOutputDescriptor.h test/vamp-client/tst_PluginStub.o: vamp-client/PluginClient.h vamp-client/qt/test.o: vamp-client/qt/ProcessQtTransport.h vamp-client/qt/test.o: vamp-client/SynchronousTransport.h @@ -114,6 +124,7 @@ vamp-client/qt/test.o: vamp-client/PluginClient.h vamp-client/qt/test.o: vamp-client/PiperVampPlugin.h vamp-client/qt/test.o: vamp-support/PluginStaticData.h +vamp-client/qt/test.o: vamp-support/StaticOutputDescriptor.h vamp-client/qt/test.o: vamp-support/PluginConfiguration.h vamp-client/qt/test.o: vamp-client/SynchronousTransport.h vamp-client/qt/test.o: vamp-support/AssignedPluginHandleMapper.h
--- a/README.md Mon May 22 08:57:02 2017 +0100 +++ b/README.md Thu Jun 15 09:52:01 2017 +0100 @@ -4,8 +4,6 @@ Supporting code in C++ for the Piper audio feature extractor protocol, primarily for making Vamp plugins work with the Piper protocol. -_This is pre-1.0 code and the API may change at any time_ - ## Contents * code to adapt Piper messages to the classes used in the Vamp SDK
--- a/ext/json11/CMakeLists.txt Mon May 22 08:57:02 2017 +0100 +++ b/ext/json11/CMakeLists.txt Thu Jun 15 09:52:01 2017 +0100 @@ -1,21 +1,57 @@ -project(json11) - cmake_minimum_required(VERSION 2.8) +if (CMAKE_VERSION VERSION_LESS "3") + project(json11 CXX) +else() + cmake_policy(SET CMP0048 NEW) + project(json11 VERSION 1.0.0 LANGUAGES CXX) +endif() enable_testing() -add_definitions( - -std=c++11 - -fno-rtti - -fno-exceptions - -Wall - -Wextra - -Werror) +option(JSON11_BUILD_TESTS "Build unit tests" OFF) +option(JSON11_ENABLE_DR1467_CANARY "Enable canary test for DR 1467" OFF) -set(json11_SRCS json11.cpp) +if(CMAKE_VERSION VERSION_LESS "3") + add_definitions(-std=c++11) +else() + set(CMAKE_CXX_STANDARD 11) + set(CMAKE_CXX_STANDARD_REQUIRED ON) +endif() -add_library(json11 STATIC ${json11_SRCS}) +if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) + set(CMAKE_INSTALL_PREFIX /usr) +endif() -add_test(json11_test json11_test) +add_library(json11 json11.cpp) +target_include_directories(json11 PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +target_compile_options(json11 + PRIVATE -fPIC -fno-rtti -fno-exceptions -Wall) -add_executable(json11_test ${json11_SRCS} test.cpp) +# Set warning flags, which may vary per platform +include(CheckCXXCompilerFlag) +set(_possible_warnings_flags /W4 /WX -Wextra -Werror) +foreach(_warning_flag in ${_possible_warnings_flags}) + CHECK_CXX_COMPILER_FLAG(_warning_flag _flag_supported) + if(${_flag_supported}) + target_compile_options(json11 PRIVATE ${_warning_flag}) + endif() +endforeach() + +configure_file("json11.pc.in" "json11.pc" @ONLY) + +if (JSON11_BUILD_TESTS) + + # enable test for DR1467, described here: https://llvm.org/bugs/show_bug.cgi?id=23812 + if(JSON11_ENABLE_DR1467_CANARY) + add_definitions(-D JSON11_ENABLE_DR1467_CANARY=1) + else() + add_definitions(-D JSON11_ENABLE_DR1467_CANARY=0) + endif() + + add_executable(json11_test test.cpp) + target_link_libraries(json11_test json11) +endif() + +install(TARGETS json11 DESTINATION lib/${CMAKE_LIBRARY_ARCHITECTURE}) +install(FILES "${CMAKE_CURRENT_SOURCE_DIR}/json11.hpp" DESTINATION include/${CMAKE_LIBRARY_ARCHITECTURE}) +install(FILES "${CMAKE_CURRENT_BINARY_DIR}/json11.pc" DESTINATION lib/${CMAKE_LIBRARY_ARCHITECTURE}/pkgconfig)
--- a/ext/json11/Makefile Mon May 22 08:57:02 2017 +0100 +++ b/ext/json11/Makefile Thu Jun 15 09:52:01 2017 +0100 @@ -1,5 +1,13 @@ +# Environment variable to enable or disable code which demonstrates the behavior change +# in Xcode 7 / Clang 3.7, introduced by DR1467 and described here: +# https://llvm.org/bugs/show_bug.cgi?id=23812 +# Defaults to on in order to act as a warning to anyone who's unaware of the issue. +ifneq ($(JSON11_ENABLE_DR1467_CANARY),) +CANARY_ARGS = -DJSON11_ENABLE_DR1467_CANARY=$(JSON11_ENABLE_DR1467_CANARY) +endif + test: json11.cpp json11.hpp test.cpp - $(CXX) -O -std=c++11 json11.cpp test.cpp -o test -fno-rtti -fno-exceptions + $(CXX) $(CANARY_ARGS) -O -std=c++11 json11.cpp test.cpp -o test -fno-rtti -fno-exceptions clean: if [ -e test ]; then rm test; fi
--- a/ext/json11/json11.cpp Mon May 22 08:57:02 2017 +0100 +++ b/ext/json11/json11.cpp Thu Jun 15 09:52:01 2017 +0100 @@ -37,11 +37,20 @@ using std::initializer_list; using std::move; +/* Helper for representing null - just a do-nothing struct, plus comparison + * operators so the helpers in JsonValue work. We can't use nullptr_t because + * it may not be orderable. + */ +struct NullStruct { + bool operator==(NullStruct) const { return true; } + bool operator<(NullStruct) const { return false; } +}; + /* * * * * * * * * * * * * * * * * * * * * Serialization */ -static void dump(std::nullptr_t, string &out) { +static void dump(NullStruct, string &out) { out += "null"; } @@ -208,9 +217,9 @@ explicit JsonObject(Json::object &&value) : Value(move(value)) {} }; -class JsonNull final : public Value<Json::NUL, std::nullptr_t> { +class JsonNull final : public Value<Json::NUL, NullStruct> { public: - JsonNull() : Value(nullptr) {} + JsonNull() : Value({}) {} }; /* * * * * * * * * * * * * * * * * * * * @@ -291,6 +300,8 @@ */ bool Json::operator== (const Json &other) const { + if (m_ptr == other.m_ptr) + return true; if (m_ptr->type() != other.m_ptr->type()) return false; @@ -298,6 +309,8 @@ } bool Json::operator< (const Json &other) const { + if (m_ptr == other.m_ptr) + return false; if (m_ptr->type() != other.m_ptr->type()) return m_ptr->type() < other.m_ptr->type(); @@ -326,11 +339,12 @@ return (x >= lower && x <= upper); } +namespace { /* JsonParser * * Object that tracks all state of an in-progress parse. */ -struct JsonParser { +struct JsonParser final { /* State */ @@ -374,38 +388,31 @@ if (str[i] == '/') { i++; if (i == str.size()) - return fail("unexpected end of input inside comment", 0); + return fail("unexpected end of input after start of comment", false); if (str[i] == '/') { // inline comment i++; - if (i == str.size()) - return fail("unexpected end of input inside inline comment", 0); - // advance until next line - while (str[i] != '\n') { + // advance until next line, or end of input + while (i < str.size() && str[i] != '\n') { i++; - if (i == str.size()) - return fail("unexpected end of input inside inline comment", 0); } comment_found = true; } else if (str[i] == '*') { // multiline comment i++; if (i > str.size()-2) - return fail("unexpected end of input inside multi-line comment", 0); + return fail("unexpected end of input inside multi-line comment", false); // advance until closing tokens while (!(str[i] == '*' && str[i+1] == '/')) { i++; if (i > str.size()-2) return fail( - "unexpected end of input inside multi-line comment", 0); + "unexpected end of input inside multi-line comment", false); } i += 2; - if (i == str.size()) - return fail( - "unexpected end of input inside multi-line comment", 0); comment_found = true; } else - return fail("malformed comment", 0); + return fail("malformed comment", false); } return comment_found; } @@ -420,6 +427,7 @@ bool comment_found = false; do { comment_found = consume_comment(); + if (failed) return; consume_whitespace(); } while(comment_found); @@ -433,8 +441,9 @@ */ char get_next_token() { consume_garbage(); + if (failed) return (char)0; if (i == str.size()) - return fail("unexpected end of input", 0); + return fail("unexpected end of input", (char)0); return str[i++]; } @@ -508,7 +517,7 @@ if (esc.length() < 4) { return fail("bad \\u escape: " + esc, ""); } - for (int j = 0; j < 4; j++) { + for (size_t j = 0; j < 4; j++) { if (!in_range(esc[j], 'a', 'f') && !in_range(esc[j], 'A', 'F') && !in_range(esc[j], '0', '9')) return fail("bad \\u escape: " + esc, ""); @@ -718,6 +727,7 @@ return fail("expected value, got " + esc(ch)); } }; +}//namespace { Json Json::parse(const string &in, string &err, JsonParse strategy) { JsonParser parser { in, 0, err, false, strategy }; @@ -725,6 +735,8 @@ // Check for any trailing garbage parser.consume_garbage(); + if (parser.failed) + return Json(); if (parser.i != in.size()) return parser.fail("unexpected trailing " + esc(in[parser.i])); @@ -733,15 +745,22 @@ // Documented in json11.hpp vector<Json> Json::parse_multi(const string &in, + std::string::size_type &parser_stop_pos, string &err, JsonParse strategy) { JsonParser parser { in, 0, err, false, strategy }; - + parser_stop_pos = 0; vector<Json> json_vec; while (parser.i != in.size() && !parser.failed) { json_vec.push_back(parser.parse_json(0)); + if (parser.failed) + break; + // Check for another object parser.consume_garbage(); + if (parser.failed) + break; + parser_stop_pos = parser.i; } return json_vec; }
--- a/ext/json11/json11.hpp Mon May 22 08:57:02 2017 +0100 +++ b/ext/json11/json11.hpp Thu Jun 15 09:52:01 2017 +0100 @@ -56,6 +56,18 @@ #include <memory> #include <initializer_list> +#ifdef _MSC_VER + #if _MSC_VER <= 1800 // VS 2013 + #ifndef noexcept + #define noexcept throw() + #endif + + #ifndef snprintf + #define snprintf _snprintf_s + #endif + #endif +#endif + namespace json11 { enum JsonParse { @@ -165,9 +177,18 @@ // Parse multiple objects, concatenated or separated by whitespace static std::vector<Json> parse_multi( const std::string & in, + std::string::size_type & parser_stop_pos, std::string & err, JsonParse strategy = JsonParse::STANDARD); + static inline std::vector<Json> parse_multi( + const std::string & in, + std::string & err, + JsonParse strategy = JsonParse::STANDARD) { + std::string::size_type parser_stop_pos; + return parse_multi(in, parser_stop_pos, err, strategy); + } + bool operator== (const Json &rhs) const; bool operator< (const Json &rhs) const; bool operator!= (const Json &rhs) const { return !(*this == rhs); }
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ext/json11/json11.pc.in Thu Jun 15 09:52:01 2017 +0100 @@ -0,0 +1,9 @@ +prefix=@CMAKE_INSTALL_PREFIX@ +libdir=${prefix}/lib/@CMAKE_LIBRARY_ARCHITECTURE@ +includedir=${prefix}/include/@CMAKE_LIBRARY_ARCHITECTURE@ + +Name: @PROJECT_NAME@ +Description: json11 is a tiny JSON library for C++11, providing JSON parsing and serialization. +Version: @PROJECT_VERSION@ +Libs: -L${libdir} -ljson11 +Cflags: -I${includedir}
--- a/ext/json11/test.cpp Mon May 22 08:57:02 2017 +0100 +++ b/ext/json11/test.cpp Thu Jun 15 09:52:01 2017 +0100 @@ -1,19 +1,55 @@ +/* + * Define JSON11_TEST_CUSTOM_CONFIG to 1 if you want to build this tester into + * your own unit-test framework rather than a stand-alone program. By setting + * The values of the variables included below, you can insert your own custom + * code into this file as it builds, in order to make it into a test case for + * your favorite framework. + */ +#if !JSON11_TEST_CUSTOM_CONFIG +#define JSON11_TEST_CPP_PREFIX_CODE +#define JSON11_TEST_CPP_SUFFIX_CODE +#define JSON11_TEST_STANDALONE_MAIN 1 +#define JSON11_TEST_CASE(name) static void name() +#define JSON11_TEST_ASSERT(b) assert(b) +#ifdef NDEBUG +#undef NDEBUG//at now assert will work even in Release build +#endif +#endif // JSON11_TEST_CUSTOM_CONFIG + +/* + * Enable or disable code which demonstrates the behavior change in Xcode 7 / Clang 3.7, + * introduced by DR1467 and described here: https://github.com/dropbox/json11/issues/86 + * Defaults to off since it doesn't appear the standards committee is likely to act + * on this, so it needs to be considered normal behavior. + */ +#ifndef JSON11_ENABLE_DR1467_CANARY +#define JSON11_ENABLE_DR1467_CANARY 0 +#endif + +/* + * Beginning of standard source file, which makes use of the customizations above. + */ +#include <cassert> #include <string> #include <cstdio> #include <cstring> #include <iostream> #include <sstream> #include "json11.hpp" -#include <cassert> #include <list> #include <set> #include <unordered_map> +#include <algorithm> +#include <type_traits> + +// Insert user-defined prefix code (includes, function declarations, etc) +// to set up a custom test suite +JSON11_TEST_CPP_PREFIX_CODE using namespace json11; using std::string; // Check that Json has the properties we want. -#include <type_traits> #define CHECK_TRAIT(x) static_assert(std::x::value, #x) CHECK_TRAIT(is_nothrow_constructible<Json>); CHECK_TRAIT(is_nothrow_default_constructible<Json>); @@ -23,7 +59,197 @@ CHECK_TRAIT(is_nothrow_move_assignable<Json>); CHECK_TRAIT(is_nothrow_destructible<Json>); -void parse_from_stdin() { +JSON11_TEST_CASE(json11_test) { + const string simple_test = + R"({"k1":"v1", "k2":42, "k3":["a",123,true,false,null]})"; + + string err; + const auto json = Json::parse(simple_test, err); + + std::cout << "k1: " << json["k1"].string_value() << "\n"; + std::cout << "k3: " << json["k3"].dump() << "\n"; + + for (auto &k : json["k3"].array_items()) { + std::cout << " - " << k.dump() << "\n"; + } + + string comment_test = R"({ + // comment /* with nested comment */ + "a": 1, + // comment + // continued + "b": "text", + /* multi + line + comment + // line-comment-inside-multiline-comment + */ + // and single-line comment + // and single-line comment /* multiline inside single line */ + "c": [1, 2, 3] + // and single-line comment at end of object + })"; + + string err_comment; + auto json_comment = Json::parse( + comment_test, err_comment, JsonParse::COMMENTS); + JSON11_TEST_ASSERT(!json_comment.is_null()); + JSON11_TEST_ASSERT(err_comment.empty()); + + comment_test = "{\"a\": 1}//trailing line comment"; + json_comment = Json::parse( + comment_test, err_comment, JsonParse::COMMENTS); + JSON11_TEST_ASSERT(!json_comment.is_null()); + JSON11_TEST_ASSERT(err_comment.empty()); + + comment_test = "{\"a\": 1}/*trailing multi-line comment*/"; + json_comment = Json::parse( + comment_test, err_comment, JsonParse::COMMENTS); + JSON11_TEST_ASSERT(!json_comment.is_null()); + JSON11_TEST_ASSERT(err_comment.empty()); + + string failing_comment_test = "{\n/* unterminated comment\n\"a\": 1,\n}"; + string err_failing_comment; + auto json_failing_comment = Json::parse( + failing_comment_test, err_failing_comment, JsonParse::COMMENTS); + JSON11_TEST_ASSERT(json_failing_comment.is_null()); + JSON11_TEST_ASSERT(!err_failing_comment.empty()); + + failing_comment_test = "{\n/* unterminated trailing comment }"; + json_failing_comment = Json::parse( + failing_comment_test, err_failing_comment, JsonParse::COMMENTS); + JSON11_TEST_ASSERT(json_failing_comment.is_null()); + JSON11_TEST_ASSERT(!err_failing_comment.empty()); + + failing_comment_test = "{\n/ / bad comment }"; + json_failing_comment = Json::parse( + failing_comment_test, err_failing_comment, JsonParse::COMMENTS); + JSON11_TEST_ASSERT(json_failing_comment.is_null()); + JSON11_TEST_ASSERT(!err_failing_comment.empty()); + + failing_comment_test = "{// bad comment }"; + json_failing_comment = Json::parse( + failing_comment_test, err_failing_comment, JsonParse::COMMENTS); + JSON11_TEST_ASSERT(json_failing_comment.is_null()); + JSON11_TEST_ASSERT(!err_failing_comment.empty()); + + failing_comment_test = "{\n\"a\": 1\n}/"; + json_failing_comment = Json::parse( + failing_comment_test, err_failing_comment, JsonParse::COMMENTS); + JSON11_TEST_ASSERT(json_failing_comment.is_null()); + JSON11_TEST_ASSERT(!err_failing_comment.empty()); + + failing_comment_test = "{/* bad\ncomment *}"; + json_failing_comment = Json::parse( + failing_comment_test, err_failing_comment, JsonParse::COMMENTS); + JSON11_TEST_ASSERT(json_failing_comment.is_null()); + JSON11_TEST_ASSERT(!err_failing_comment.empty()); + + std::list<int> l1 { 1, 2, 3 }; + std::vector<int> l2 { 1, 2, 3 }; + std::set<int> l3 { 1, 2, 3 }; + JSON11_TEST_ASSERT(Json(l1) == Json(l2)); + JSON11_TEST_ASSERT(Json(l2) == Json(l3)); + + std::map<string, string> m1 { { "k1", "v1" }, { "k2", "v2" } }; + std::unordered_map<string, string> m2 { { "k1", "v1" }, { "k2", "v2" } }; + JSON11_TEST_ASSERT(Json(m1) == Json(m2)); + + // Json literals + const Json obj = Json::object({ + { "k1", "v1" }, + { "k2", 42.0 }, + { "k3", Json::array({ "a", 123.0, true, false, nullptr }) }, + }); + + std::cout << "obj: " << obj.dump() << "\n"; + JSON11_TEST_ASSERT(obj.dump() == "{\"k1\": \"v1\", \"k2\": 42, \"k3\": [\"a\", 123, true, false, null]}"); + + JSON11_TEST_ASSERT(Json("a").number_value() == 0); + JSON11_TEST_ASSERT(Json("a").string_value() == "a"); + JSON11_TEST_ASSERT(Json().number_value() == 0); + + JSON11_TEST_ASSERT(obj == json); + JSON11_TEST_ASSERT(Json(42) == Json(42.0)); + JSON11_TEST_ASSERT(Json(42) != Json(42.1)); + + const string unicode_escape_test = + R"([ "blah\ud83d\udca9blah\ud83dblah\udca9blah\u0000blah\u1234" ])"; + + const char utf8[] = "blah" "\xf0\x9f\x92\xa9" "blah" "\xed\xa0\xbd" "blah" + "\xed\xb2\xa9" "blah" "\0" "blah" "\xe1\x88\xb4"; + + Json uni = Json::parse(unicode_escape_test, err); + JSON11_TEST_ASSERT(uni[0].string_value().size() == (sizeof utf8) - 1); + JSON11_TEST_ASSERT(std::memcmp(uni[0].string_value().data(), utf8, sizeof utf8) == 0); + + // Demonstrates the behavior change in Xcode 7 / Clang 3.7, introduced by DR1467 + // and described here: https://llvm.org/bugs/show_bug.cgi?id=23812 + if (JSON11_ENABLE_DR1467_CANARY) { + Json nested_array = Json::array { Json::array { 1, 2, 3 } }; + JSON11_TEST_ASSERT(nested_array.is_array()); + JSON11_TEST_ASSERT(nested_array.array_items().size() == 1); + JSON11_TEST_ASSERT(nested_array.array_items()[0].is_array()); + JSON11_TEST_ASSERT(nested_array.array_items()[0].array_items().size() == 3); + } + + { + const std::string good_json = R"( {"k1" : "v1"})"; + const std::string bad_json1 = good_json + " {"; + const std::string bad_json2 = good_json + R"({"k2":"v2", "k3":[)"; + struct TestMultiParse { + std::string input; + std::string::size_type expect_parser_stop_pos; + size_t expect_not_empty_elms_count; + Json expect_parse_res; + } tests[] = { + {" {", 0, 0, {}}, + {good_json, good_json.size(), 1, Json(std::map<string, string>{ { "k1", "v1" } })}, + {bad_json1, good_json.size() + 1, 1, Json(std::map<string, string>{ { "k1", "v1" } })}, + {bad_json2, good_json.size(), 1, Json(std::map<string, string>{ { "k1", "v1" } })}, + {"{}", 2, 1, Json::object{}}, + }; + for (const auto &tst : tests) { + std::string::size_type parser_stop_pos; + std::string err; + auto res = Json::parse_multi(tst.input, parser_stop_pos, err); + JSON11_TEST_ASSERT(parser_stop_pos == tst.expect_parser_stop_pos); + JSON11_TEST_ASSERT( + (size_t)std::count_if(res.begin(), res.end(), + [](const Json& j) { return !j.is_null(); }) + == tst.expect_not_empty_elms_count); + if (!res.empty()) { + JSON11_TEST_ASSERT(tst.expect_parse_res == res[0]); + } + } + } + + Json my_json = Json::object { + { "key1", "value1" }, + { "key2", false }, + { "key3", Json::array { 1, 2, 3 } }, + }; + std::string json_obj_str = my_json.dump(); + std::cout << "json_obj_str: " << json_obj_str << "\n"; + JSON11_TEST_ASSERT(json_obj_str == "{\"key1\": \"value1\", \"key2\": false, \"key3\": [1, 2, 3]}"); + + class Point { + public: + int x; + int y; + Point (int x, int y) : x(x), y(y) {} + Json to_json() const { return Json::array { x, y }; } + }; + + std::vector<Point> points = { { 1, 2 }, { 10, 20 }, { 100, 200 } }; + std::string points_json = Json(points).dump(); + std::cout << "points_json: " << points_json << "\n"; + JSON11_TEST_ASSERT(points_json == "[[1, 2], [10, 20], [100, 200]]"); +} + +#if JSON11_TEST_STANDALONE_MAIN + +static void parse_from_stdin() { string buf; string line; while (std::getline(std::cin, line)) { @@ -45,161 +271,11 @@ return 0; } - const string simple_test = - R"({"k1":"v1", "k2":42, "k3":["a",123,true,false,null]})"; + json11_test(); +} - string err; - auto json = Json::parse(simple_test, err); +#endif // JSON11_TEST_STANDALONE_MAIN - std::cout << "k1: " << json["k1"].string_value() << "\n"; - std::cout << "k3: " << json["k3"].dump() << "\n"; - - for (auto &k : json["k3"].array_items()) { - std::cout << " - " << k.dump() << "\n"; - } - - const string comment_test = R"({ - // comment /* with nested comment */ - "a": 1, - // comment - // continued - "b": "text", - /* multi - line - comment */ - // and single-line comment - "c": [1, 2, 3] - })"; - - string err_comment; - auto json_comment = Json::parse( - comment_test, err_comment, JsonParse::COMMENTS); - if (!err_comment.empty()) { - printf("Failed: %s\n", err_comment.c_str()); - } else { - printf("Result: %s\n", json_comment.dump().c_str()); - } - - string failing_comment_test = R"({ - /* bad comment - "a": 1, - })"; - - string err_failing_comment; - auto json_failing_comment = Json::parse( - failing_comment_test, err_failing_comment, JsonParse::COMMENTS); - if (!err_failing_comment.empty()) { - printf("Failed: %s\n", err_failing_comment.c_str()); - } else { - printf("Result: %s\n", json_failing_comment.dump().c_str()); - } - - failing_comment_test = R"({ - / / bad comment })"; - - json_failing_comment = Json::parse( - failing_comment_test, err_failing_comment, JsonParse::COMMENTS); - if (!err_failing_comment.empty()) { - printf("Failed: %s\n", err_failing_comment.c_str()); - } else { - printf("Result: %s\n", json_failing_comment.dump().c_str()); - } - - failing_comment_test = R"({// bad comment })"; - - json_failing_comment = Json::parse( - failing_comment_test, err_failing_comment, JsonParse::COMMENTS); - if (!err_failing_comment.empty()) { - printf("Failed: %s\n", err_failing_comment.c_str()); - } else { - printf("Result: %s\n", json_failing_comment.dump().c_str()); - } - - failing_comment_test = R"({ - "a": 1 - }/)"; - - json_failing_comment = Json::parse( - failing_comment_test, err_failing_comment, JsonParse::COMMENTS); - if (!err_failing_comment.empty()) { - printf("Failed: %s\n", err_failing_comment.c_str()); - } else { - printf("Result: %s\n", json_failing_comment.dump().c_str()); - } - - failing_comment_test = R"({/* bad - comment *})"; - - json_failing_comment = Json::parse( - failing_comment_test, err_failing_comment, JsonParse::COMMENTS); - if (!err_failing_comment.empty()) { - printf("Failed: %s\n", err_failing_comment.c_str()); - } else { - printf("Result: %s\n", json_failing_comment.dump().c_str()); - } - - std::list<int> l1 { 1, 2, 3 }; - std::vector<int> l2 { 1, 2, 3 }; - std::set<int> l3 { 1, 2, 3 }; - assert(Json(l1) == Json(l2)); - assert(Json(l2) == Json(l3)); - - std::map<string, string> m1 { { "k1", "v1" }, { "k2", "v2" } }; - std::unordered_map<string, string> m2 { { "k1", "v1" }, { "k2", "v2" } }; - assert(Json(m1) == Json(m2)); - - // Json literals - Json obj = Json::object({ - { "k1", "v1" }, - { "k2", 42.0 }, - { "k3", Json::array({ "a", 123.0, true, false, nullptr }) }, - }); - - std::cout << "obj: " << obj.dump() << "\n"; - - assert(Json("a").number_value() == 0); - assert(Json("a").string_value() == "a"); - assert(Json().number_value() == 0); - - assert(obj == json); - assert(Json(42) == Json(42.0)); - assert(Json(42) != Json(42.1)); - - const string unicode_escape_test = - R"([ "blah\ud83d\udca9blah\ud83dblah\udca9blah\u0000blah\u1234" ])"; - - const char utf8[] = "blah" "\xf0\x9f\x92\xa9" "blah" "\xed\xa0\xbd" "blah" - "\xed\xb2\xa9" "blah" "\0" "blah" "\xe1\x88\xb4"; - - Json uni = Json::parse(unicode_escape_test, err); - assert(uni[0].string_value().size() == (sizeof utf8) - 1); - assert(std::memcmp(uni[0].string_value().data(), utf8, sizeof utf8) == 0); - - // Demonstrates the behavior change in Xcode 7 / Clang 3.7 described - // here: https://llvm.org/bugs/show_bug.cgi?id=23812 - Json nested_array = Json::array { Json::array { 1, 2, 3 } }; - assert(nested_array.is_array()); - assert(nested_array.array_items().size() == 1); - assert(nested_array.array_items()[0].is_array()); - assert(nested_array.array_items()[0].array_items().size() == 3); - - Json my_json = Json::object { - { "key1", "value1" }, - { "key2", false }, - { "key3", Json::array { 1, 2, 3 } }, - }; - std::string json_str = my_json.dump(); - printf("%s\n", json_str.c_str()); - - class Point { - public: - int x; - int y; - Point (int x, int y) : x(x), y(y) {} - Json to_json() const { return Json::array { x, y }; } - }; - - std::vector<Point> points = { { 1, 2 }, { 10, 20 }, { 100, 200 } }; - std::string points_json = Json(points).dump(); - printf("%s\n", points_json.c_str()); -} +// Insert user-defined suffix code (function definitions, etc) +// to set up a custom test suite +JSON11_TEST_CPP_SUFFIX_CODE
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ext/serd/COPYING Thu Jun 15 09:52:01 2017 +0100 @@ -0,0 +1,13 @@ +Copyright 2011-2017 David Robillard <http://drobilla.net> + +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ext/serd/README Thu Jun 15 09:52:01 2017 +0100 @@ -0,0 +1,9 @@ +Serd +==== + +Serd is a lightweight C library for RDF syntax which supports reading and +writing Turtle, NTriples, TriG, and NQuads. +For more information, see <http://drobilla.net/software/serd>. + + -- David Robillard <d@drobilla.net> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ext/serd/serd/serd.h Thu Jun 15 09:52:01 2017 +0100 @@ -0,0 +1,1058 @@ +/* + Copyright 2011-2017 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +/** + @file serd.h API for Serd, a lightweight RDF syntax library. +*/ + +#ifndef SERD_SERD_H +#define SERD_SERD_H + +#include <stdarg.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> + +#ifdef SERD_SHARED +# ifdef _WIN32 +# define SERD_LIB_IMPORT __declspec(dllimport) +# define SERD_LIB_EXPORT __declspec(dllexport) +# else +# define SERD_LIB_IMPORT __attribute__((visibility("default"))) +# define SERD_LIB_EXPORT __attribute__((visibility("default"))) +# endif +# ifdef SERD_INTERNAL +# define SERD_API SERD_LIB_EXPORT +# else +# define SERD_API SERD_LIB_IMPORT +# endif +#else +# define SERD_API +#endif + +#ifdef __cplusplus +extern "C" { +#else +# include <stdbool.h> +#endif + +/** + @defgroup serd Serd + A lightweight RDF syntax library. + @{ +*/ + +/** + Environment. + + Represents the state required to resolve a CURIE or relative URI, e.g. the + base URI and set of namespace prefixes at a particular point. +*/ +typedef struct SerdEnvImpl SerdEnv; + +/** + RDF reader. + + Parses RDF by calling user-provided sink functions as input is consumed + (much like an XML SAX parser). +*/ +typedef struct SerdReaderImpl SerdReader; + +/** + RDF writer. + + Provides a number of functions to allow writing RDF syntax out to some + stream. These functions are deliberately compatible with the sink functions + used by SerdReader, so a reader can be directly connected to a writer to + re-serialise a document with minimal overhead. +*/ +typedef struct SerdWriterImpl SerdWriter; + +/** + Return status code. +*/ +typedef enum { + SERD_SUCCESS, /**< No error */ + SERD_FAILURE, /**< Non-fatal failure */ + SERD_ERR_UNKNOWN, /**< Unknown error */ + SERD_ERR_BAD_SYNTAX, /**< Invalid syntax */ + SERD_ERR_BAD_ARG, /**< Invalid argument */ + SERD_ERR_NOT_FOUND, /**< Not found */ + SERD_ERR_ID_CLASH, /**< Encountered clashing blank node IDs */ + SERD_ERR_BAD_CURIE, /**< Invalid CURIE (e.g. prefix does not exist) */ + SERD_ERR_INTERNAL /**< Unexpected internal error (should not happen) */ +} SerdStatus; + +/** + RDF syntax type. +*/ +typedef enum { + /** + Turtle - Terse RDF Triple Language (UTF-8). + @see <a href="http://www.w3.org/TeamSubmission/turtle/">Turtle</a> + */ + SERD_TURTLE = 1, + + /** + NTriples - Line-based RDF triples (ASCII). + @see <a href="http://www.w3.org/TR/rdf-testcases#ntriples">NTriples</a> + */ + SERD_NTRIPLES = 2, + + /** + NQuads - Line-based RDF quads (UTF-8). + @see <a href="https://www.w3.org/TR/n-quads/">NQuads</a> + */ + SERD_NQUADS = 3, + + /** + TRiG - Terse RDF quads (UTF-8). + @see <a href="https://www.w3.org/TR/trig/">Trig</a> + */ + SERD_TRIG = 4 +} SerdSyntax; + +/** + Flags indication inline abbreviation information for a statement. +*/ +typedef enum { + SERD_EMPTY_S = 1 << 1, /**< Empty blank node subject */ + SERD_EMPTY_O = 1 << 2, /**< Empty blank node object */ + SERD_ANON_S_BEGIN = 1 << 3, /**< Start of anonymous subject */ + SERD_ANON_O_BEGIN = 1 << 4, /**< Start of anonymous object */ + SERD_ANON_CONT = 1 << 5, /**< Continuation of anonymous node */ + SERD_LIST_S_BEGIN = 1 << 6, /**< Start of list subject */ + SERD_LIST_O_BEGIN = 1 << 7, /**< Start of list object */ + SERD_LIST_CONT = 1 << 8 /**< Continuation of list */ +} SerdStatementFlag; + +/** + Bitwise OR of SerdNodeFlag values. +*/ +typedef uint32_t SerdStatementFlags; + +/** + Type of a syntactic RDF node. + + This is more precise than the type of an abstract RDF node. An abstract + node is either a resource, literal, or blank. In syntax there are two ways + to refer to a resource (by URI or CURIE) and two ways to refer to a blank + (by ID or anonymously). Anonymous (inline) blank nodes are expressed using + SerdStatementFlags rather than this type. +*/ +typedef enum { + /** + The type of a nonexistent node. + + This type is useful as a sentinel, but is never emitted by the reader. + */ + SERD_NOTHING = 0, + + /** + Literal value. + + A literal optionally has either a language, or a datatype (not both). + */ + SERD_LITERAL = 1, + + /** + URI (absolute or relative). + + Value is an unquoted URI string, which is either a relative reference + with respect to the current base URI (e.g. "foo/bar"), or an absolute + URI (e.g. "http://example.org/foo"). + @see <a href="http://tools.ietf.org/html/rfc3986">RFC3986</a>. + */ + SERD_URI = 2, + + /** + CURIE, a shortened URI. + + Value is an unquoted CURIE string relative to the current environment, + e.g. "rdf:type". + @see <a href="http://www.w3.org/TR/curie">CURIE Syntax 1.0</a> + */ + SERD_CURIE = 3, + + /** + A blank node. + + Value is a blank node ID, e.g. "id3", which is meaningful only within + this serialisation. + @see <a href="http://www.w3.org/TeamSubmission/turtle#nodeID">Turtle + <tt>nodeID</tt></a> + */ + SERD_BLANK = 4 +} SerdType; + +/** + Flags indicating certain string properties relevant to serialisation. +*/ +typedef enum { + SERD_HAS_NEWLINE = 1, /**< Contains line breaks ('\\n' or '\\r') */ + SERD_HAS_QUOTE = 1 << 1 /**< Contains quotes ('"') */ +} SerdNodeFlag; + +/** + Bitwise OR of SerdNodeFlag values. +*/ +typedef uint32_t SerdNodeFlags; + +/** + A syntactic RDF node. +*/ +typedef struct { + const uint8_t* buf; /**< Value string */ + size_t n_bytes; /**< Size in bytes (not including null) */ + size_t n_chars; /**< Length in characters (not including null)*/ + SerdNodeFlags flags; /**< Node flags (e.g. string properties) */ + SerdType type; /**< Node type */ +} SerdNode; + +/** + An unterminated string fragment. +*/ +typedef struct { + const uint8_t* buf; /**< Start of chunk */ + size_t len; /**< Length of chunk in bytes */ +} SerdChunk; + +/** + An error description. +*/ +typedef struct { + SerdStatus status; /**< Error code */ + const uint8_t* filename; /**< File where error was encountered, or NULL */ + unsigned line; /**< Line where error was encountered, or 0 */ + unsigned col; /**< Column where error was encountered */ + const char* fmt; /**< Message format string (printf style) */ + va_list* args; /**< Arguments for fmt */ +} SerdError; + +/** + A parsed URI. + + This struct directly refers to chunks in other strings, it does not own any + memory itself. Thus, URIs can be parsed and/or resolved against a base URI + in-place without allocating memory. +*/ +typedef struct { + SerdChunk scheme; /**< Scheme */ + SerdChunk authority; /**< Authority */ + SerdChunk path_base; /**< Path prefix if relative */ + SerdChunk path; /**< Path suffix */ + SerdChunk query; /**< Query */ + SerdChunk fragment; /**< Fragment */ +} SerdURI; + +/** + Syntax style options. + + The style of the writer output can be controlled by ORing together + values from this enumeration. Note that some options are only supported + for some syntaxes (e.g. NTriples does not support abbreviation and is + always ASCII). +*/ +typedef enum { + SERD_STYLE_ABBREVIATED = 1, /**< Abbreviate triples when possible. */ + SERD_STYLE_ASCII = 1 << 1, /**< Escape all non-ASCII characters. */ + SERD_STYLE_RESOLVED = 1 << 2, /**< Resolve URIs against base URI. */ + SERD_STYLE_CURIED = 1 << 3, /**< Shorten URIs into CURIEs. */ + SERD_STYLE_BULK = 1 << 4 /**< Write output in pages. */ +} SerdStyle; + +/** + @name String Utilities + @{ +*/ + +/** + Return a string describing a status code. +*/ +SERD_API +const uint8_t* +serd_strerror(SerdStatus status); + +/** + Measure a UTF-8 string. + @return Length of `str` in characters (except NULL). + @param str A null-terminated UTF-8 string. + @param n_bytes (Output) Set to the size of `str` in bytes (except NULL). + @param flags (Output) Set to the applicable flags. +*/ +SERD_API +size_t +serd_strlen(const uint8_t* str, size_t* n_bytes, SerdNodeFlags* flags); + +/** + Parse a string to a double. + + The API of this function is identical to the standard C strtod function, + except this function is locale-independent and always matches the lexical + format used in the Turtle grammar (the decimal point is always "."). +*/ +SERD_API +double +serd_strtod(const char* str, char** endptr); + +/** + Decode a base64 string. + This function can be used to deserialise a blob node created with + serd_node_new_blob(). + + @param str Base64 string to decode. + @param len The length of `str`. + @param size Set to the size of the returned blob in bytes. + @return A newly allocated blob which must be freed with free(). +*/ +SERD_API +void* +serd_base64_decode(const uint8_t* str, size_t len, size_t* size); + +/** + @} + @name URI + @{ +*/ + +static const SerdURI SERD_URI_NULL = { + {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0} +}; + +/** + Return the local path for `uri`, or NULL if `uri` is not a file URI. + Note this (inappropriately named) function only removes the file scheme if + necessary, and returns `uri` unmodified if it is an absolute path. Percent + encoding and other issues are not handled, to properly convert a file URI to + a path, use serd_file_uri_parse(). +*/ +SERD_API +const uint8_t* +serd_uri_to_path(const uint8_t* uri); + +/** + Get the unescaped path and hostname from a file URI. + @param uri A file URI. + @param hostname If non-NULL, set to the hostname, if present. + @return The path component of the URI. + + The returned path and `*hostname` must be freed with free(). +*/ +SERD_API +uint8_t* +serd_file_uri_parse(const uint8_t* uri, uint8_t** hostname); + +/** + Return true iff `utf8` starts with a valid URI scheme. +*/ +SERD_API +bool +serd_uri_string_has_scheme(const uint8_t* utf8); + +/** + Parse `utf8`, writing result to `out`. +*/ +SERD_API +SerdStatus +serd_uri_parse(const uint8_t* utf8, SerdURI* out); + +/** + Set `out` to `uri` resolved against `base`. +*/ +SERD_API +void +serd_uri_resolve(const SerdURI* uri, const SerdURI* base, SerdURI* out); + +/** + Function to detect I/O stream errors. + + Identical semantics to `ferror`. + + @return Non-zero if `stream` has encountered an error. +*/ +typedef int (*SerdStreamErrorFunc)(void* stream); + +/** + Source function for raw string input. + + Identical semantics to `fread`, but may set errno for more informative error + reporting than supported by SerdStreamErrorFunc. + + @param buf Output buffer. + @param size Size of a single element of data in bytes (always 1). + @param nmemb Number of elements to read. + @param stream Stream to read from (FILE* for fread). + @return Number of elements (bytes) read. +*/ +typedef size_t (*SerdSource)(void* buf, + size_t size, + size_t nmemb, + void* stream); + +/** + Sink function for raw string output. +*/ +typedef size_t (*SerdSink)(const void* buf, size_t len, void* stream); + +/** + Serialise `uri` with a series of calls to `sink`. +*/ +SERD_API +size_t +serd_uri_serialise(const SerdURI* uri, SerdSink sink, void* stream); + +/** + Serialise `uri` relative to `base` with a series of calls to `sink`. + + The `uri` is written as a relative URI iff if it a child of `base` and @c + root. The optional `root` parameter must be a prefix of `base` and can be + used keep up-references ("../") within a certain namespace. +*/ +SERD_API +size_t +serd_uri_serialise_relative(const SerdURI* uri, + const SerdURI* base, + const SerdURI* root, + SerdSink sink, + void* stream); + +/** + @} + @name Node + @{ +*/ + +static const SerdNode SERD_NODE_NULL = { NULL, 0, 0, 0, SERD_NOTHING }; + +/** + Make a (shallow) node from `str`. + + This measures, but does not copy, `str`. No memory is allocated. +*/ +SERD_API +SerdNode +serd_node_from_string(SerdType type, const uint8_t* str); + +/** + Make a deep copy of `node`. + + @return a node that the caller must free with serd_node_free(). +*/ +SERD_API +SerdNode +serd_node_copy(const SerdNode* node); + +/** + Return true iff `a` is equal to `b`. +*/ +SERD_API +bool +serd_node_equals(const SerdNode* a, const SerdNode* b); + +/** + Simple wrapper for serd_node_new_uri() to resolve a URI node. +*/ +SERD_API +SerdNode +serd_node_new_uri_from_node(const SerdNode* uri_node, + const SerdURI* base, + SerdURI* out); + +/** + Simple wrapper for serd_node_new_uri() to resolve a URI string. +*/ +SERD_API +SerdNode +serd_node_new_uri_from_string(const uint8_t* str, + const SerdURI* base, + SerdURI* out); + +/** + Create a new file URI node from a file system path and optional hostname. + + Backslashes in Windows paths will be converted and '%' will always be + percent encoded. If `escape` is true, all other invalid characters will be + percent encoded as well. + + If `path` is relative, `hostname` is ignored. + If `out` is not NULL, it will be set to the parsed URI. +*/ +SERD_API +SerdNode +serd_node_new_file_uri(const uint8_t* path, + const uint8_t* hostname, + SerdURI* out, + bool escape); + +/** + Create a new node by serialising `uri` into a new string. + + @param uri The URI to serialise. + + @param base Base URI to resolve `uri` against (or NULL for no resolution). + + @param out Set to the parsing of the new URI (i.e. points only to + memory owned by the new returned node). +*/ +SERD_API +SerdNode +serd_node_new_uri(const SerdURI* uri, const SerdURI* base, SerdURI* out); + +/** + Create a new node by serialising `uri` into a new relative URI. + + @param uri The URI to serialise. + + @param base Base URI to make `uri` relative to, if possible. + + @param root Root URI for resolution (see serd_uri_serialise_relative()). + + @param out Set to the parsing of the new URI (i.e. points only to + memory owned by the new returned node). +*/ +SERD_API +SerdNode +serd_node_new_relative_uri(const SerdURI* uri, + const SerdURI* base, + const SerdURI* root, + SerdURI* out); + +/** + Create a new node by serialising `d` into an xsd:decimal string. + + The resulting node will always contain a `.', start with a digit, and end + with a digit (i.e. will have a leading and/or trailing `0' if necessary). + It will never be in scientific notation. A maximum of `frac_digits` digits + will be written after the decimal point, but trailing zeros will + automatically be omitted (except one if `d` is a round integer). + + Note that about 16 and 8 fractional digits are required to precisely + represent a double and float, respectively. + + @param d The value for the new node. + @param frac_digits The maximum number of digits after the decimal place. +*/ +SERD_API +SerdNode +serd_node_new_decimal(double d, unsigned frac_digits); + +/** + Create a new node by serialising `i` into an xsd:integer string. +*/ +SERD_API +SerdNode +serd_node_new_integer(int64_t i); + +/** + Create a node by serialising `buf` into an xsd:base64Binary string. + This function can be used to make a serialisable node out of arbitrary + binary data, which can be decoded using serd_base64_decode(). + + @param buf Raw binary input data. + @param size Size of `buf`. + @param wrap_lines Wrap lines at 76 characters to conform to RFC 2045. +*/ +SERD_API +SerdNode +serd_node_new_blob(const void* buf, size_t size, bool wrap_lines); + +/** + Free any data owned by `node`. + + Note that if `node` is itself dynamically allocated (which is not the case + for nodes created internally by serd), it will not be freed. +*/ +SERD_API +void +serd_node_free(SerdNode* node); + +/** + @} + @name Event Handlers + @{ +*/ + +/** + Sink (callback) for errors. + + @param handle Handle for user data. + @param error Error description. +*/ +typedef SerdStatus (*SerdErrorSink)(void* handle, + const SerdError* error); + +/** + Sink (callback) for base URI changes. + + Called whenever the base URI of the serialisation changes. +*/ +typedef SerdStatus (*SerdBaseSink)(void* handle, + const SerdNode* uri); + +/** + Sink (callback) for namespace definitions. + + Called whenever a prefix is defined in the serialisation. +*/ +typedef SerdStatus (*SerdPrefixSink)(void* handle, + const SerdNode* name, + const SerdNode* uri); + +/** + Sink (callback) for statements. + + Called for every RDF statement in the serialisation. +*/ +typedef SerdStatus (*SerdStatementSink)(void* handle, + SerdStatementFlags flags, + const SerdNode* graph, + const SerdNode* subject, + const SerdNode* predicate, + const SerdNode* object, + const SerdNode* object_datatype, + const SerdNode* object_lang); + +/** + Sink (callback) for anonymous node end markers. + + This is called to indicate that the anonymous node with the given + `value` will no longer be referred to by any future statements + (i.e. the anonymous serialisation of the node is finished). +*/ +typedef SerdStatus (*SerdEndSink)(void* handle, + const SerdNode* node); + +/** + @} + @name Environment + @{ +*/ + +/** + Create a new environment. +*/ +SERD_API +SerdEnv* +serd_env_new(const SerdNode* base_uri); + +/** + Free `ns`. +*/ +SERD_API +void +serd_env_free(SerdEnv* env); + +/** + Get the current base URI. +*/ +SERD_API +const SerdNode* +serd_env_get_base_uri(const SerdEnv* env, + SerdURI* out); + +/** + Set the current base URI. +*/ +SERD_API +SerdStatus +serd_env_set_base_uri(SerdEnv* env, + const SerdNode* uri); + +/** + Set a namespace prefix. +*/ +SERD_API +SerdStatus +serd_env_set_prefix(SerdEnv* env, + const SerdNode* name, + const SerdNode* uri); + +/** + Set a namespace prefix. +*/ +SERD_API +SerdStatus +serd_env_set_prefix_from_strings(SerdEnv* env, + const uint8_t* name, + const uint8_t* uri); + +/** + Qualify `uri` into a CURIE if possible. +*/ +SERD_API +bool +serd_env_qualify(const SerdEnv* env, + const SerdNode* uri, + SerdNode* prefix, + SerdChunk* suffix); + +/** + Expand `curie`. +*/ +SERD_API +SerdStatus +serd_env_expand(const SerdEnv* env, + const SerdNode* curie, + SerdChunk* uri_prefix, + SerdChunk* uri_suffix); + +/** + Expand `node`, which must be a CURIE or URI, to a full URI. +*/ +SERD_API +SerdNode +serd_env_expand_node(const SerdEnv* env, + const SerdNode* node); + +/** + Call `func` for each prefix defined in `env`. +*/ +SERD_API +void +serd_env_foreach(const SerdEnv* env, + SerdPrefixSink func, + void* handle); + +/** + @} + @name Reader + @{ +*/ + +/** + Create a new RDF reader. +*/ +SERD_API +SerdReader* +serd_reader_new(SerdSyntax syntax, + void* handle, + void (*free_handle)(void*), + SerdBaseSink base_sink, + SerdPrefixSink prefix_sink, + SerdStatementSink statement_sink, + SerdEndSink end_sink); + +/** + Enable or disable strict parsing. + + The reader is non-strict (lax) by default, which will tolerate URIs with + invalid characters. Setting strict will fail when parsing such files. An + error is printed for invalid input in either case. +*/ +SERD_API +void +serd_reader_set_strict(SerdReader* reader, bool strict); + +/** + Set a function to be called when errors occur during reading. + + The `error_sink` will be called with `handle` as its first argument. If + no error function is set, errors are printed to stderr in GCC style. +*/ +SERD_API +void +serd_reader_set_error_sink(SerdReader* reader, + SerdErrorSink error_sink, + void* handle); + +/** + Return the `handle` passed to serd_reader_new(). +*/ +SERD_API +void* +serd_reader_get_handle(const SerdReader* reader); + +/** + Set a prefix to be added to all blank node identifiers. + + This is useful when multiple files are to be parsed into the same output + (e.g. a store, or other files). Since Serd preserves blank node IDs, this + could cause conflicts where two non-equivalent blank nodes are merged, + resulting in corrupt data. By setting a unique blank node prefix for each + parsed file, this can be avoided, while preserving blank node names. +*/ +SERD_API +void +serd_reader_add_blank_prefix(SerdReader* reader, + const uint8_t* prefix); + +/** + Set the URI of the default graph. + + If this is set, the reader will emit quads with the graph set to the given + node for any statements that are not in a named graph (which is currently + all of them since Serd currently does not support any graph syntaxes). +*/ +SERD_API +void +serd_reader_set_default_graph(SerdReader* reader, + const SerdNode* graph); + +/** + Read a file at a given `uri`. +*/ +SERD_API +SerdStatus +serd_reader_read_file(SerdReader* reader, + const uint8_t* uri); + +/** + Start an incremental read from a file handle. + + Iff `bulk` is true, `file` will be read a page at a time. This is more + efficient, but uses a page of memory and means that an entire page of input + must be ready before any callbacks will fire. To react as soon as input + arrives, set `bulk` to false. +*/ +SERD_API +SerdStatus +serd_reader_start_stream(SerdReader* me, + FILE* file, + const uint8_t* name, + bool bulk); + +/** + Start an incremental read from a user-specified source. + + The `read_func` is guaranteed to only be called for `page_size` elements + with size 1 (i.e. `page_size` bytes). +*/ +SERD_API +SerdStatus +serd_reader_start_source_stream(SerdReader* me, + SerdSource read_func, + SerdStreamErrorFunc error_func, + void* stream, + const uint8_t* name, + size_t page_size); + +/** + Read a single "chunk" of data during an incremental read. + + This function will read a single top level description, and return. This + may be a directive, statement, or several statements; essentially it reads + until a '.' is encountered. This is particularly useful for reading + directly from a pipe or socket. +*/ +SERD_API +SerdStatus +serd_reader_read_chunk(SerdReader* me); + +/** + Finish an incremental read from a file handle. +*/ +SERD_API +SerdStatus +serd_reader_end_stream(SerdReader* me); + +/** + Read `file`. +*/ +SERD_API +SerdStatus +serd_reader_read_file_handle(SerdReader* reader, + FILE* file, + const uint8_t* name); + +/** + Read a user-specified byte source. +*/ +SERD_API +SerdStatus +serd_reader_read_source(SerdReader* reader, + SerdSource source, + SerdStreamErrorFunc error, + void* stream, + const uint8_t* name, + size_t page_size); + +/** + Read `utf8`. +*/ +SERD_API +SerdStatus +serd_reader_read_string(SerdReader* me, const uint8_t* utf8); + +/** + Free `reader`. +*/ +SERD_API +void +serd_reader_free(SerdReader* reader); + +/** + @} + @name Writer + @{ +*/ + +/** + Create a new RDF writer. +*/ +SERD_API +SerdWriter* +serd_writer_new(SerdSyntax syntax, + SerdStyle style, + SerdEnv* env, + const SerdURI* base_uri, + SerdSink sink, + void* stream); + +/** + Free `writer`. +*/ +SERD_API +void +serd_writer_free(SerdWriter* writer); + +/** + Return the env used by `writer`. +*/ +SERD_API +SerdEnv* +serd_writer_get_env(SerdWriter* writer); + +/** + A convenience sink function for writing to a FILE*. + + This function can be used as a SerdSink when writing to a FILE*. The + `stream` parameter must be a FILE* opened for writing. +*/ +SERD_API +size_t +serd_file_sink(const void* buf, size_t len, void* stream); + +/** + A convenience sink function for writing to a string. + + This function can be used as a SerdSink to write to a SerdChunk which is + resized as necessary with realloc(). The `stream` parameter must point to + an initialized SerdChunk. When the write is finished, the string should be + retrieved with serd_chunk_sink_finish(). +*/ +SERD_API +size_t +serd_chunk_sink(const void* buf, size_t len, void* stream); + +/** + Finish a serialisation to a chunk with serd_chunk_sink(). + + The returned string is the result of the serialisation, which is NULL + terminated (by this function) and owned by the caller. +*/ +SERD_API +uint8_t* +serd_chunk_sink_finish(SerdChunk* stream); + +/** + Set a function to be called when errors occur during writing. + + The `error_sink` will be called with `handle` as its first argument. If + no error function is set, errors are printed to stderr. +*/ +SERD_API +void +serd_writer_set_error_sink(SerdWriter* writer, + SerdErrorSink error_sink, + void* handle); + +/** + Set a prefix to be removed from matching blank node identifiers. +*/ +SERD_API +void +serd_writer_chop_blank_prefix(SerdWriter* writer, + const uint8_t* prefix); + +/** + Set the current output base URI (and emit directive if applicable). + + Note this function can be safely casted to SerdBaseSink. +*/ +SERD_API +SerdStatus +serd_writer_set_base_uri(SerdWriter* writer, + const SerdNode* uri); + +/** + Set the current root URI. + + The root URI should be a prefix of the base URI. The path of the root URI + is the highest path any relative up-reference can refer to. For example, + with root <file:///foo/root> and base <file:///foo/root/base>, + <file:///foo/root> will be written as <../>, but <file:///foo> will be + written non-relatively as <file:///foo>. If the root is not explicitly set, + it defaults to the base URI, so no up-references will be created at all. +*/ +SERD_API +SerdStatus +serd_writer_set_root_uri(SerdWriter* writer, + const SerdNode* uri); + +/** + Set a namespace prefix (and emit directive if applicable). + + Note this function can be safely casted to SerdPrefixSink. +*/ +SERD_API +SerdStatus +serd_writer_set_prefix(SerdWriter* writer, + const SerdNode* name, + const SerdNode* uri); + +/** + Write a statement. + + Note this function can be safely casted to SerdStatementSink. +*/ +SERD_API +SerdStatus +serd_writer_write_statement(SerdWriter* writer, + SerdStatementFlags flags, + const SerdNode* graph, + const SerdNode* subject, + const SerdNode* predicate, + const SerdNode* object, + const SerdNode* object_datatype, + const SerdNode* object_lang); + +/** + Mark the end of an anonymous node's description. + + Note this function can be safely casted to SerdEndSink. +*/ +SERD_API +SerdStatus +serd_writer_end_anon(SerdWriter* writer, + const SerdNode* node); + +/** + Finish a write. +*/ +SERD_API +SerdStatus +serd_writer_finish(SerdWriter* writer); + +/** + @} + @} +*/ + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* SERD_SERD_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ext/serd/src/byte_source.c Thu Jun 15 09:52:01 2017 +0100 @@ -0,0 +1,108 @@ +/* + Copyright 2011-2017 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "serd_internal.h" + +static inline SerdStatus +serd_byte_source_page(SerdByteSource* source) +{ + source->read_head = 0; + size_t n_read = source->read_func( + source->file_buf, 1, source->page_size, source->stream); + if (n_read == 0) { + source->file_buf[0] = '\0'; + return (source->error_func(source->stream) + ? SERD_ERR_UNKNOWN : SERD_FAILURE); + } else if (n_read < source->page_size) { + source->file_buf[n_read] = '\0'; + } + return SERD_SUCCESS; +} + +SerdStatus +serd_byte_source_open_source(SerdByteSource* source, + SerdSource read_func, + SerdStreamErrorFunc error_func, + void* stream, + size_t page_size) +{ + memset(source, '\0', sizeof(*source)); + source->stream = stream; + source->from_stream = true; + source->page_size = page_size; + source->error_func = error_func; + source->read_func = read_func; + + if (page_size > 1) { + source->file_buf = (uint8_t*)serd_bufalloc(page_size); + source->read_buf = source->file_buf; + memset(source->file_buf, '\0', page_size); + } else { + source->read_buf = &source->read_byte; + } + + return SERD_SUCCESS; +} + +SerdStatus +serd_byte_source_prepare(SerdByteSource* source) +{ + if (!source->prepared) { + source->prepared = true; + if (source->page_size > 1) { + return serd_byte_source_page(source); + } else if (source->from_stream) { + return serd_byte_source_advance(source); + } + } + return SERD_SUCCESS; +} + +SerdStatus +serd_byte_source_open_string(SerdByteSource* source, const uint8_t* utf8) +{ + memset(source, '\0', sizeof(*source)); + source->read_buf = utf8; + source->prepared = true; + return SERD_SUCCESS; +} + +SerdStatus +serd_byte_source_close(SerdByteSource* source) +{ + if (source->page_size > 1) { + free(source->file_buf); + } + memset(source, '\0', sizeof(*source)); + return SERD_SUCCESS; +} + +SerdStatus +serd_byte_source_advance(SerdByteSource* source) +{ + const bool paging = source->page_size > 1; + SerdStatus st = SERD_SUCCESS; + if (source->from_stream && !paging) { + if (source->read_func(&source->read_byte, 1, 1, source->stream) == 0) { + return (source->error_func(source->stream) + ? SERD_ERR_UNKNOWN : SERD_FAILURE); + } + } else if (++source->read_head == source->page_size && paging) { + st = serd_byte_source_page(source); + } + + return st; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ext/serd/src/env.c Thu Jun 15 09:52:01 2017 +0100 @@ -0,0 +1,271 @@ +/* + Copyright 2011-2016 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "serd_internal.h" + +#include <stdlib.h> +#include <string.h> + +typedef struct { + SerdNode name; + SerdNode uri; +} SerdPrefix; + +struct SerdEnvImpl { + SerdPrefix* prefixes; + size_t n_prefixes; + SerdNode base_uri_node; + SerdURI base_uri; +}; + +SERD_API +SerdEnv* +serd_env_new(const SerdNode* base_uri) +{ + SerdEnv* env = (SerdEnv*)calloc(sizeof(struct SerdEnvImpl), 1); + if (env && base_uri) { + serd_env_set_base_uri(env, base_uri); + } + return env; +} + +SERD_API +void +serd_env_free(SerdEnv* env) +{ + for (size_t i = 0; i < env->n_prefixes; ++i) { + serd_node_free(&env->prefixes[i].name); + serd_node_free(&env->prefixes[i].uri); + } + free(env->prefixes); + serd_node_free(&env->base_uri_node); + free(env); +} + +SERD_API +const SerdNode* +serd_env_get_base_uri(const SerdEnv* env, + SerdURI* out) +{ + if (out) { + *out = env->base_uri; + } + return &env->base_uri_node; +} + +SERD_API +SerdStatus +serd_env_set_base_uri(SerdEnv* env, + const SerdNode* uri_node) +{ + if (!env || !uri_node) { + return SERD_ERR_BAD_ARG; + } + + // Resolve base URI and create a new node and URI for it + SerdURI base_uri; + SerdNode base_uri_node = serd_node_new_uri_from_node( + uri_node, &env->base_uri, &base_uri); + + if (base_uri_node.buf) { + // Replace the current base URI + serd_node_free(&env->base_uri_node); + env->base_uri_node = base_uri_node; + env->base_uri = base_uri; + return SERD_SUCCESS; + } + return SERD_ERR_BAD_ARG; +} + +static inline SerdPrefix* +serd_env_find(const SerdEnv* env, + const uint8_t* name, + size_t name_len) +{ + for (size_t i = 0; i < env->n_prefixes; ++i) { + const SerdNode* const prefix_name = &env->prefixes[i].name; + if (prefix_name->n_bytes == name_len) { + if (!memcmp(prefix_name->buf, name, name_len)) { + return &env->prefixes[i]; + } + } + } + return NULL; +} + +static void +serd_env_add(SerdEnv* env, + const SerdNode* name, + const SerdNode* uri) +{ + SerdPrefix* const prefix = serd_env_find(env, name->buf, name->n_bytes); + if (prefix) { + SerdNode old_prefix_uri = prefix->uri; + prefix->uri = serd_node_copy(uri); + serd_node_free(&old_prefix_uri); + } else { + env->prefixes = (SerdPrefix*)realloc( + env->prefixes, (++env->n_prefixes) * sizeof(SerdPrefix)); + env->prefixes[env->n_prefixes - 1].name = serd_node_copy(name); + env->prefixes[env->n_prefixes - 1].uri = serd_node_copy(uri); + } +} + +SERD_API +SerdStatus +serd_env_set_prefix(SerdEnv* env, + const SerdNode* name, + const SerdNode* uri_node) +{ + if (!name->buf || uri_node->type != SERD_URI) { + return SERD_ERR_BAD_ARG; + } else if (serd_uri_string_has_scheme(uri_node->buf)) { + // Set prefix to absolute URI + serd_env_add(env, name, uri_node); + } else { + // Resolve relative URI and create a new node and URI for it + SerdURI abs_uri; + SerdNode abs_uri_node = serd_node_new_uri_from_node( + uri_node, &env->base_uri, &abs_uri); + + // Set prefix to resolved (absolute) URI + serd_env_add(env, name, &abs_uri_node); + serd_node_free(&abs_uri_node); + } + return SERD_SUCCESS; +} + +SERD_API +SerdStatus +serd_env_set_prefix_from_strings(SerdEnv* env, + const uint8_t* name, + const uint8_t* uri) +{ + const SerdNode name_node = serd_node_from_string(SERD_LITERAL, name); + const SerdNode uri_node = serd_node_from_string(SERD_URI, uri); + + return serd_env_set_prefix(env, &name_node, &uri_node); +} + +static inline bool +is_nameChar(const uint8_t c) +{ + return is_alpha(c) || is_digit(c) || c == '_'; +} + +/** + Return true iff `buf` is a valid prefixed name suffix. + TODO: This is more strict than it should be. +*/ +static inline bool +is_name(const uint8_t* buf, size_t len) +{ + for (size_t i = 0; i < len; ++i) { + if (!is_nameChar(buf[i])) { + return false; + } + } + return true; +} + +SERD_API +bool +serd_env_qualify(const SerdEnv* env, + const SerdNode* uri, + SerdNode* prefix_name, + SerdChunk* suffix) +{ + for (size_t i = 0; i < env->n_prefixes; ++i) { + const SerdNode* const prefix_uri = &env->prefixes[i].uri; + if (uri->n_bytes >= prefix_uri->n_bytes) { + if (!strncmp((const char*)uri->buf, + (const char*)prefix_uri->buf, + prefix_uri->n_bytes)) { + *prefix_name = env->prefixes[i].name; + suffix->buf = uri->buf + prefix_uri->n_bytes; + suffix->len = uri->n_bytes - prefix_uri->n_bytes; + if (is_name(suffix->buf, suffix->len)) { + return true; + } + } + } + } + return false; +} + +SERD_API +SerdStatus +serd_env_expand(const SerdEnv* env, + const SerdNode* qname, + SerdChunk* uri_prefix, + SerdChunk* uri_suffix) +{ + const uint8_t* const colon = (const uint8_t*)memchr( + qname->buf, ':', qname->n_bytes + 1); + if (!colon) { + return SERD_ERR_BAD_ARG; // Invalid qname + } + + const size_t name_len = colon - qname->buf; + const SerdPrefix* const prefix = serd_env_find(env, qname->buf, name_len); + if (prefix) { + uri_prefix->buf = prefix->uri.buf; + uri_prefix->len = prefix->uri.n_bytes; + uri_suffix->buf = colon + 1; + uri_suffix->len = qname->n_bytes - (colon - qname->buf) - 1; + return SERD_SUCCESS; + } + return SERD_ERR_NOT_FOUND; +} + +SERD_API +SerdNode +serd_env_expand_node(const SerdEnv* env, + const SerdNode* node) +{ + switch (node->type) { + case SERD_CURIE: { + SerdChunk prefix; + SerdChunk suffix; + if (serd_env_expand(env, node, &prefix, &suffix)) { + return SERD_NODE_NULL; + } + const size_t len = prefix.len + suffix.len; + uint8_t* buf = (uint8_t*)malloc(len + 1); + SerdNode ret = { buf, len, 0, 0, SERD_URI }; + snprintf((char*)buf, len + 1, "%s%s", prefix.buf, suffix.buf); + ret.n_chars = serd_strlen(buf, NULL, NULL); + return ret; + } + case SERD_URI: { + SerdURI ignored; + return serd_node_new_uri_from_node(node, &env->base_uri, &ignored); + } + default: + return SERD_NODE_NULL; + } +} + +SERD_API +void +serd_env_foreach(const SerdEnv* env, + SerdPrefixSink func, + void* handle) +{ + for (size_t i = 0; i < env->n_prefixes; ++i) { + func(handle, &env->prefixes[i].name, &env->prefixes[i].uri); + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ext/serd/src/node.c Thu Jun 15 09:52:01 2017 +0100 @@ -0,0 +1,384 @@ +/* + Copyright 2011-2016 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "serd_internal.h" + +#include <stdlib.h> +#include <string.h> + +#include <math.h> +#include <float.h> + +#ifdef _WIN32 +# ifndef isnan +# define isnan(x) _isnan(x) +# endif +# ifndef isinf +# define isinf(x) (!_finite(x)) +# endif +#endif + +SERD_API +SerdNode +serd_node_from_string(SerdType type, const uint8_t* buf) +{ + if (!buf) { + return SERD_NODE_NULL; + } + + uint32_t flags = 0; + size_t buf_n_bytes = 0; + const size_t buf_n_chars = serd_strlen(buf, &buf_n_bytes, &flags); + SerdNode ret = { buf, buf_n_bytes, buf_n_chars, flags, type }; + return ret; +} + +SERD_API +SerdNode +serd_node_copy(const SerdNode* node) +{ + if (!node || !node->buf) { + return SERD_NODE_NULL; + } + + SerdNode copy = *node; + uint8_t* buf = (uint8_t*)malloc(copy.n_bytes + 1); + memcpy(buf, node->buf, copy.n_bytes + 1); + copy.buf = buf; + return copy; +} + +SERD_API +bool +serd_node_equals(const SerdNode* a, const SerdNode* b) +{ + return (a == b) + || (a->type == b->type + && a->n_bytes == b->n_bytes + && a->n_chars == b->n_chars + && ((a->buf == b->buf) || !memcmp((const char*)a->buf, + (const char*)b->buf, + a->n_bytes + 1))); +} + +static size_t +serd_uri_string_length(const SerdURI* uri) +{ + size_t len = uri->path_base.len; + +#define ADD_LEN(field, n_delims) \ + if ((field).len) { len += (field).len + (n_delims); } + + ADD_LEN(uri->path, 1); // + possible leading `/' + ADD_LEN(uri->scheme, 1); // + trailing `:' + ADD_LEN(uri->authority, 2); // + leading `//' + ADD_LEN(uri->query, 1); // + leading `?' + ADD_LEN(uri->fragment, 1); // + leading `#' + + return len + 2; // + 2 for authority `//' +} + +static size_t +string_sink(const void* buf, size_t len, void* stream) +{ + uint8_t** ptr = (uint8_t**)stream; + memcpy(*ptr, buf, len); + *ptr += len; + return len; +} + +SERD_API +SerdNode +serd_node_new_uri_from_node(const SerdNode* uri_node, + const SerdURI* base, + SerdURI* out) +{ + return (uri_node->type == SERD_URI && uri_node->buf) + ? serd_node_new_uri_from_string(uri_node->buf, base, out) + : SERD_NODE_NULL; +} + +SERD_API +SerdNode +serd_node_new_uri_from_string(const uint8_t* str, + const SerdURI* base, + SerdURI* out) +{ + if (!str || str[0] == '\0') { + // Empty URI => Base URI, or nothing if no base is given + return base ? serd_node_new_uri(base, NULL, out) : SERD_NODE_NULL; + } + + SerdURI uri; + serd_uri_parse(str, &uri); + return serd_node_new_uri(&uri, base, out); // Resolve/Serialise +} + +static inline bool +is_uri_path_char(const uint8_t c) +{ + if (is_alpha(c) || is_digit(c)) { + return true; + } + switch (c) { + case '-': case '.': case '_': case '~': // unreserved + case ':': case '@': // pchar + case '/': // separator + // sub-delims + case '!': case '$': case '&': case '\'': case '(': case ')': + case '*': case '+': case ',': case ';': case '=': + return true; + default: + return false; + } +} + +SERD_API +SerdNode +serd_node_new_file_uri(const uint8_t* path, + const uint8_t* hostname, + SerdURI* out, + bool escape) +{ + const size_t path_len = strlen((const char*)path); + const size_t hostname_len = hostname ? strlen((const char*)hostname) : 0; + const bool evil = is_windows_path(path); + size_t uri_len = 0; + uint8_t* uri = NULL; + + if (path[0] == '/' || is_windows_path(path)) { + uri_len = strlen("file://") + hostname_len + evil; + uri = (uint8_t*)malloc(uri_len + 1); + snprintf((char*)uri, uri_len + 1, "file://%s%s", + hostname ? (const char*)hostname : "", + evil ? "/" : ""); + } + + SerdChunk chunk = { uri, uri_len }; + for (size_t i = 0; i < path_len; ++i) { + if (evil && path[i] == '\\') { + serd_chunk_sink("/", 1, &chunk); + } else if (path[i] == '%') { + serd_chunk_sink("%%", 2, &chunk); + } else if (!escape || is_uri_path_char(path[i])) { + serd_chunk_sink(path + i, 1, &chunk); + } else { + char escape_str[4] = { '%', 0, 0, 0 }; + snprintf(escape_str + 1, sizeof(escape_str) - 1, "%X", path[i]); + serd_chunk_sink(escape_str, 3, &chunk); + } + } + serd_chunk_sink_finish(&chunk); + + if (out) { + serd_uri_parse(chunk.buf, out); + } + + return serd_node_from_string(SERD_URI, chunk.buf); +} + +SERD_API +SerdNode +serd_node_new_uri(const SerdURI* uri, const SerdURI* base, SerdURI* out) +{ + SerdURI abs_uri = *uri; + if (base) { + serd_uri_resolve(uri, base, &abs_uri); + } + + const size_t len = serd_uri_string_length(&abs_uri); + uint8_t* buf = (uint8_t*)malloc(len + 1); + SerdNode node = { buf, 0, 0, 0, SERD_URI }; + uint8_t* ptr = buf; + const size_t actual_len = serd_uri_serialise(&abs_uri, string_sink, &ptr); + + buf[actual_len] = '\0'; + node.n_bytes = actual_len; + node.n_chars = serd_strlen(buf, NULL, NULL); + + if (out) { + serd_uri_parse(buf, out); // TODO: cleverly avoid double parse + } + + return node; +} + +SERD_API +SerdNode +serd_node_new_relative_uri(const SerdURI* uri, + const SerdURI* base, + const SerdURI* root, + SerdURI* out) +{ + const size_t uri_len = serd_uri_string_length(uri); + const size_t base_len = serd_uri_string_length(base); + uint8_t* buf = (uint8_t*)malloc(uri_len + base_len + 1); + SerdNode node = { buf, 0, 0, 0, SERD_URI }; + uint8_t* ptr = buf; + const size_t actual_len = serd_uri_serialise_relative( + uri, base, root, string_sink, &ptr); + + buf[actual_len] = '\0'; + node.n_bytes = actual_len; + node.n_chars = serd_strlen(buf, NULL, NULL); + + if (out) { + serd_uri_parse(buf, out); // TODO: cleverly avoid double parse + } + + return node; +} + +static inline unsigned +serd_digits(double abs) +{ + const double lg = ceil(log10(floor(abs) + 1.0)); + return lg < 1.0 ? 1U : (unsigned)lg; +} + +SERD_API +SerdNode +serd_node_new_decimal(double d, unsigned frac_digits) +{ + if (isnan(d) || isinf(d)) { + return SERD_NODE_NULL; + } + + const double abs_d = fabs(d); + const unsigned int_digits = serd_digits(abs_d); + char* buf = (char*)calloc(int_digits + frac_digits + 3, 1); + SerdNode node = { (const uint8_t*)buf, 0, 0, 0, SERD_LITERAL }; + const double int_part = floor(abs_d); + + // Point s to decimal point location + char* s = buf + int_digits; + if (d < 0.0) { + *buf = '-'; + ++s; + } + + // Write integer part (right to left) + char* t = s - 1; + uint64_t dec = (uint64_t)int_part; + do { + *t-- = '0' + (dec % 10); + } while ((dec /= 10) > 0); + + *s++ = '.'; + + // Write fractional part (right to left) + double frac_part = fabs(d - int_part); + if (frac_part < DBL_EPSILON) { + *s++ = '0'; + node.n_bytes = node.n_chars = (s - buf); + } else { + uint64_t frac = frac_part * pow(10.0, (int)frac_digits) + 0.5; + s += frac_digits - 1; + unsigned i = 0; + + // Skip trailing zeros + for (; i < frac_digits - 1 && !(frac % 10); ++i, --s, frac /= 10) {} + + node.n_bytes = node.n_chars = (s - buf) + 1; + + // Write digits from last trailing zero to decimal point + for (; i < frac_digits; ++i) { + *s-- = '0' + (frac % 10); + frac /= 10; + } + } + + return node; +} + +SERD_API +SerdNode +serd_node_new_integer(int64_t i) +{ + int64_t abs_i = (i < 0) ? -i : i; + const unsigned digits = serd_digits(abs_i); + char* buf = (char*)calloc(digits + 2, 1); + SerdNode node = { (const uint8_t*)buf, 0, 0, 0, SERD_LITERAL }; + + // Point s to the end + char* s = buf + digits - 1; + if (i < 0) { + *buf = '-'; + ++s; + } + + node.n_bytes = node.n_chars = (s - buf) + 1; + + // Write integer part (right to left) + do { + *s-- = '0' + (abs_i % 10); + } while ((abs_i /= 10) > 0); + + return node; +} + +/** + Base64 encoding table. + @see <a href="http://tools.ietf.org/html/rfc3548#section-3">RFC3986 S3</a>. +*/ +static const uint8_t b64_map[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + +/** + Encode 3 raw bytes to 4 base64 characters. +*/ +static inline void +encode_chunk(uint8_t out[4], const uint8_t in[3], size_t n_in) +{ + out[0] = b64_map[in[0] >> 2]; + out[1] = b64_map[((in[0] & 0x03) << 4) | ((in[1] & 0xF0) >> 4)]; + out[2] = ((n_in > 1) + ? (b64_map[((in[1] & 0x0F) << 2) | ((in[2] & 0xC0) >> 6)]) + : (uint8_t)'='); + out[3] = ((n_in > 2) ? b64_map[in[2] & 0x3F] : (uint8_t)'='); +} + +SERD_API +SerdNode +serd_node_new_blob(const void* buf, size_t size, bool wrap_lines) +{ + const size_t len = ((size + 2) / 3) * 4 + (wrap_lines ? (size / 57) : 0); + uint8_t* str = (uint8_t*)calloc(1, len + 2); + SerdNode node = { str, len, len, 0, SERD_LITERAL }; + for (size_t i = 0, j = 0; i < size; i += 3, j += 4) { + uint8_t in[4] = { 0, 0, 0, 0 }; + size_t n_in = MIN(3, size - i); + memcpy(in, (const uint8_t*)buf + i, n_in); + + if (wrap_lines && i > 0 && (i % 57) == 0) { + str[j++] = '\n'; + node.flags |= SERD_HAS_NEWLINE; + } + + encode_chunk(str + j, in, n_in); + } + return node; +} + +SERD_API +void +serd_node_free(SerdNode* node) +{ + if (node && node->buf) { + free((uint8_t*)node->buf); + node->buf = NULL; + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ext/serd/src/reader.c Thu Jun 15 09:52:01 2017 +0100 @@ -0,0 +1,1943 @@ +/* + Copyright 2011-2017 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "serd_internal.h" + +#include <assert.h> +#include <ctype.h> +#include <errno.h> +#include <stdarg.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#define NS_XSD "http://www.w3.org/2001/XMLSchema#" +#define NS_RDF "http://www.w3.org/1999/02/22-rdf-syntax-ns#" + +#define TRY_THROW(exp) if (!(exp)) goto except; +#define TRY_RET(exp) if (!(exp)) return 0; + +#ifdef SERD_STACK_CHECK +# define SERD_STACK_ASSERT_TOP(reader, ref) \ + assert(ref == reader->allocs[reader->n_allocs - 1]); +#else +# define SERD_STACK_ASSERT_TOP(reader, ref) +#endif + +typedef struct { + const uint8_t* filename; + unsigned line; + unsigned col; +} Cursor; + +typedef uint32_t uchar; + +/* Reference to a node in the stack (we can not use pointers since the + stack may be reallocated, invalidating any pointers to elements). +*/ +typedef size_t Ref; + +typedef struct { + Ref graph; + Ref subject; + Ref predicate; + Ref object; + Ref datatype; + Ref lang; + SerdStatementFlags* flags; +} ReadContext; + +struct SerdReaderImpl { + void* handle; + void (*free_handle)(void* ptr); + SerdBaseSink base_sink; + SerdPrefixSink prefix_sink; + SerdStatementSink statement_sink; + SerdEndSink end_sink; + SerdErrorSink error_sink; + void* error_handle; + Ref rdf_first; + Ref rdf_rest; + Ref rdf_nil; + SerdNode default_graph; + SerdByteSource source; + SerdStack stack; + SerdSyntax syntax; + unsigned next_id; + Cursor cur; + SerdStatus status; + uint8_t* buf; + uint8_t* bprefix; + size_t bprefix_len; + bool strict; ///< True iff strict parsing + bool eof; + bool seen_genid; +#ifdef SERD_STACK_CHECK + Ref* allocs; ///< Stack of push offsets + size_t n_allocs; ///< Number of stack pushes +#endif +}; + +static inline bool +supports_fancy_literals(const SerdReader* reader) +{ + return reader->syntax == SERD_TURTLE || reader->syntax == SERD_TRIG; +} + +static inline bool +supports_relative_iris(const SerdReader* reader) +{ + return reader->syntax == SERD_TURTLE || reader->syntax == SERD_TRIG; +} + +static int +r_err(SerdReader* reader, SerdStatus st, const char* fmt, ...) +{ + va_list args; + va_start(args, fmt); + const SerdError e = { + st, reader->cur.filename, reader->cur.line, reader->cur.col, fmt, &args + }; + serd_error(reader->error_sink, reader->error_handle, &e); + va_end(args); + return 0; +} + +/** fread-like wrapper for getc (which is faster). */ +static size_t +serd_file_read_byte(void* buf, size_t size, size_t nmemb, void* stream) +{ + const int c = getc((FILE*)stream); + if (c == EOF) { + *((uint8_t*)buf) = 0; + return 0; + } + *((uint8_t*)buf) = (uint8_t)c; + return 1; +} + +static inline uint8_t +peek_byte(SerdReader* reader) +{ + return serd_byte_source_peek(&reader->source); +} + +static inline uint8_t +eat_byte_safe(SerdReader* reader, const uint8_t byte) +{ + assert(peek_byte(reader) == byte); + switch (byte) { + case '\0': reader->eof = (byte != '\0'); break; + case '\n': ++reader->cur.line; reader->cur.col = 0; break; + default: ++reader->cur.col; + } + + reader->status = serd_byte_source_advance(&reader->source); + return byte; +} + +static inline uint8_t +eat_byte_check(SerdReader* reader, const uint8_t byte) +{ + const uint8_t c = peek_byte(reader); + if (c != byte) { + return r_err(reader, SERD_ERR_BAD_SYNTAX, + "expected `%c', not `%c'\n", byte, c); + } + return eat_byte_safe(reader, byte); +} + +static inline bool +eat_string(SerdReader* reader, const char* str, unsigned n) +{ + bool bad = false; + for (unsigned i = 0; i < n; ++i) { + bad |= eat_byte_check(reader, ((const uint8_t*)str)[i]); + } + return bad; +} + +static Ref +push_node_padded(SerdReader* reader, size_t maxlen, + SerdType type, const char* str, size_t n_bytes) +{ + void* mem = serd_stack_push_aligned( + &reader->stack, sizeof(SerdNode) + maxlen + 1, sizeof(SerdNode)); + + SerdNode* const node = (SerdNode*)mem; + node->n_bytes = node->n_chars = n_bytes; + node->flags = 0; + node->type = type; + node->buf = NULL; + + uint8_t* buf = (uint8_t*)(node + 1); + memcpy(buf, str, n_bytes + 1); + +#ifdef SERD_STACK_CHECK + reader->allocs = realloc( + reader->allocs, sizeof(uint8_t*) * (++reader->n_allocs)); + reader->allocs[reader->n_allocs - 1] = ((uint8_t*)mem - reader->stack.buf); +#endif + return (uint8_t*)node - reader->stack.buf; +} + +static Ref +push_node(SerdReader* reader, SerdType type, const char* str, size_t n_bytes) +{ + return push_node_padded(reader, n_bytes, type, str, n_bytes); +} + +static inline SerdNode* +deref(SerdReader* reader, const Ref ref) +{ + if (ref) { + SerdNode* node = (SerdNode*)(reader->stack.buf + ref); + node->buf = (uint8_t*)node + sizeof(SerdNode); + return node; + } + return NULL; +} + +static inline void +push_byte(SerdReader* reader, Ref ref, const uint8_t c) +{ + SERD_STACK_ASSERT_TOP(reader, ref); + uint8_t* const s = serd_stack_push(&reader->stack, 1); + SerdNode* const node = (SerdNode*)(reader->stack.buf + ref); + ++node->n_bytes; + if (!(c & 0x80)) { // Starts with 0 bit, start of new character + ++node->n_chars; + } + *(s - 1) = c; + *s = '\0'; +} + +static inline void +push_replacement(SerdReader* reader, Ref dest) +{ + push_byte(reader, dest, 0xEF); + push_byte(reader, dest, 0xBF); + push_byte(reader, dest, 0xBD); +} + +static Ref +pop_node(SerdReader* reader, Ref ref) +{ + if (ref && ref != reader->rdf_first && ref != reader->rdf_rest + && ref != reader->rdf_nil) { +#ifdef SERD_STACK_CHECK + SERD_STACK_ASSERT_TOP(reader, ref); + --reader->n_allocs; +#endif + SerdNode* const node = deref(reader, ref); + uint8_t* const top = reader->stack.buf + reader->stack.size; + serd_stack_pop_aligned(&reader->stack, top - (uint8_t*)node); + } + return 0; +} + +static inline bool +emit_statement(SerdReader* reader, ReadContext ctx, Ref o, Ref d, Ref l) +{ + SerdNode* graph = deref(reader, ctx.graph); + if (!graph && reader->default_graph.buf) { + graph = &reader->default_graph; + } + bool ret = !reader->statement_sink || + !reader->statement_sink( + reader->handle, *ctx.flags, graph, + deref(reader, ctx.subject), deref(reader, ctx.predicate), + deref(reader, o), deref(reader, d), deref(reader, l)); + *ctx.flags &= SERD_ANON_CONT|SERD_LIST_CONT; // Preserve only cont flags + return ret; +} + +static bool +read_collection(SerdReader* reader, ReadContext ctx, Ref* dest); + +static bool +read_predicateObjectList(SerdReader* reader, ReadContext ctx, bool* ate_dot); + +static inline uint8_t +read_HEX(SerdReader* reader) +{ + const uint8_t c = peek_byte(reader); + if (is_digit(c) || in_range(c, 'A', 'F') || in_range(c, 'a', 'f')) { + return eat_byte_safe(reader, c); + } else { + return r_err(reader, SERD_ERR_BAD_SYNTAX, + "invalid hexadecimal digit `%c'\n", c); + } +} + +// Read UCHAR escape, initial \ is already eaten by caller +static inline bool +read_UCHAR(SerdReader* reader, Ref dest, uint32_t* char_code) +{ + const uint8_t b = peek_byte(reader); + unsigned length = 0; + switch (b) { + case 'U': + length = 8; + break; + case 'u': + length = 4; + break; + default: + return false; + } + eat_byte_safe(reader, b); + + uint8_t buf[9] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + for (unsigned i = 0; i < length; ++i) { + if (!(buf[i] = read_HEX(reader))) { + return false; + } + } + + uint32_t code; + sscanf((const char*)buf, "%X", &code); + + unsigned size = 0; + if (code < 0x00000080) { + size = 1; + } else if (code < 0x00000800) { + size = 2; + } else if (code < 0x00010000) { + size = 3; + } else if (code < 0x00110000) { + size = 4; + } else { + r_err(reader, SERD_ERR_BAD_SYNTAX, + "unicode character 0x%X out of range\n", code); + push_replacement(reader, dest); + *char_code = 0xFFFD; + return true; + } + + // Build output in buf + // (Note # of bytes = # of leading 1 bits in first byte) + uint32_t c = code; + switch (size) { + case 4: + buf[3] = 0x80 | (uint8_t)(c & 0x3F); + c >>= 6; + c |= (16 << 12); // set bit 4 + case 3: + buf[2] = 0x80 | (uint8_t)(c & 0x3F); + c >>= 6; + c |= (32 << 6); // set bit 5 + case 2: + buf[1] = 0x80 | (uint8_t)(c & 0x3F); + c >>= 6; + c |= 0xC0; // set bits 6 and 7 + case 1: + buf[0] = (uint8_t)c; + } + + for (unsigned i = 0; i < size; ++i) { + push_byte(reader, dest, buf[i]); + } + *char_code = code; + return true; +} + +// Read ECHAR escape, initial \ is already eaten by caller +static inline bool +read_ECHAR(SerdReader* reader, Ref dest, SerdNodeFlags* flags) +{ + const uint8_t c = peek_byte(reader); + switch (c) { + case 't': + eat_byte_safe(reader, 't'); + push_byte(reader, dest, '\t'); + return true; + case 'b': + eat_byte_safe(reader, 'b'); + push_byte(reader, dest, '\b'); + return true; + case 'n': + *flags |= SERD_HAS_NEWLINE; + eat_byte_safe(reader, 'n'); + push_byte(reader, dest, '\n'); + return true; + case 'r': + *flags |= SERD_HAS_NEWLINE; + eat_byte_safe(reader, 'r'); + push_byte(reader, dest, '\r'); + return true; + case 'f': + eat_byte_safe(reader, 'f'); + push_byte(reader, dest, '\f'); + return true; + case '\\': case '"': case '\'': + push_byte(reader, dest, eat_byte_safe(reader, c)); + return true; + default: + return false; + } +} + +static inline SerdStatus +bad_char(SerdReader* reader, Ref dest, const char* fmt, uint8_t c) +{ + r_err(reader, SERD_ERR_BAD_SYNTAX, fmt, c); + push_replacement(reader, dest); + + // Skip bytes until the next start byte + for (uint8_t b = peek_byte(reader); (b & 0x80);) { + eat_byte_safe(reader, b); + b = peek_byte(reader); + } + + return SERD_SUCCESS; +} + +static SerdStatus +read_utf8_character(SerdReader* reader, Ref dest, uint8_t c) +{ + unsigned size = 1; + if ((c & 0xE0) == 0xC0) { // Starts with `110' + size = 2; + } else if ((c & 0xF0) == 0xE0) { // Starts with `1110' + size = 3; + } else if ((c & 0xF8) == 0xF0) { // Starts with `11110' + size = 4; + } else { + return bad_char(reader, dest, "invalid UTF-8 start 0x%X\n", c); + } + + char bytes[4]; + bytes[0] = c; + + // Check character validity + for (unsigned i = 1; i < size; ++i) { + if (((bytes[i] = peek_byte(reader)) & 0x80) == 0) { + return bad_char(reader, dest, "invalid UTF-8 continuation 0x%X\n", + bytes[i]); + } + eat_byte_safe(reader, bytes[i]); + } + + // Emit character + for (unsigned i = 0; i < size; ++i) { + push_byte(reader, dest, bytes[i]); + } + return SERD_SUCCESS; +} + +// Read one character (possibly multi-byte) +// The first byte, c, has already been eaten by caller +static inline SerdStatus +read_character(SerdReader* reader, Ref dest, SerdNodeFlags* flags, uint8_t c) +{ + if (!(c & 0x80)) { + switch (c) { + case 0xA: case 0xD: + *flags |= SERD_HAS_NEWLINE; + break; + case '"': case '\'': + *flags |= SERD_HAS_QUOTE; + break; + } + push_byte(reader, dest, c); + return SERD_SUCCESS; + } else { + return read_utf8_character(reader, dest, c); + } +} + +// [10] comment ::= '#' ( [^#xA #xD] )* +static void +read_comment(SerdReader* reader) +{ + eat_byte_safe(reader, '#'); + uint8_t c; + while (((c = peek_byte(reader)) != 0xA) && (c != 0xD) && c) { + eat_byte_safe(reader, c); + } +} + +// [24] ws ::= #x9 | #xA | #xD | #x20 | comment +static inline bool +read_ws(SerdReader* reader) +{ + const uint8_t c = peek_byte(reader); + switch (c) { + case 0x9: case 0xA: case 0xD: case 0x20: + eat_byte_safe(reader, c); + return true; + case '#': + read_comment(reader); + return true; + default: + return false; + } +} + +static inline bool +read_ws_star(SerdReader* reader) +{ + while (read_ws(reader)) {} + return true; +} + +static inline bool +peek_delim(SerdReader* reader, const char delim) +{ + read_ws_star(reader); + return peek_byte(reader) == delim; +} + +static inline bool +eat_delim(SerdReader* reader, const char delim) +{ + if (peek_delim(reader, delim)) { + eat_byte_safe(reader, delim); + return read_ws_star(reader); + } + return false; +} + +// STRING_LITERAL_LONG_QUOTE and STRING_LITERAL_LONG_SINGLE_QUOTE +// Initial triple quotes are already eaten by caller +static Ref +read_STRING_LITERAL_LONG(SerdReader* reader, SerdNodeFlags* flags, uint8_t q) +{ + Ref ref = push_node(reader, SERD_LITERAL, "", 0); + while (true) { + const uint8_t c = peek_byte(reader); + uint32_t code; + switch (c) { + case '\\': + eat_byte_safe(reader, c); + if (!read_ECHAR(reader, ref, flags) && + !read_UCHAR(reader, ref, &code)) { + r_err(reader, SERD_ERR_BAD_SYNTAX, + "invalid escape `\\%c'\n", peek_byte(reader)); + return pop_node(reader, ref); + } + break; + default: + if (c == q) { + eat_byte_safe(reader, q); + const uint8_t q2 = eat_byte_safe(reader, peek_byte(reader)); + const uint8_t q3 = peek_byte(reader); + if (q2 == q && q3 == q) { // End of string + eat_byte_safe(reader, q3); + return ref; + } else { + *flags |= SERD_HAS_QUOTE; + push_byte(reader, ref, c); + read_character(reader, ref, flags, q2); + } + } else { + read_character(reader, ref, flags, eat_byte_safe(reader, c)); + } + } + } + return ref; +} + +// STRING_LITERAL_QUOTE and STRING_LITERAL_SINGLE_QUOTE +// Initial quote is already eaten by caller +static Ref +read_STRING_LITERAL(SerdReader* reader, SerdNodeFlags* flags, uint8_t q) +{ + Ref ref = push_node(reader, SERD_LITERAL, "", 0); + while (true) { + const uint8_t c = peek_byte(reader); + uint32_t code; + switch (c) { + case '\n': case '\r': + r_err(reader, SERD_ERR_BAD_SYNTAX, "line end in short string\n"); + return pop_node(reader, ref); + case '\\': + eat_byte_safe(reader, c); + if (!read_ECHAR(reader, ref, flags) && + !read_UCHAR(reader, ref, &code)) { + r_err(reader, SERD_ERR_BAD_SYNTAX, + "invalid escape `\\%c'\n", peek_byte(reader)); + return pop_node(reader, ref); + } + break; + default: + if (c == q) { + eat_byte_check(reader, q); + return ref; + } else { + read_character(reader, ref, flags, eat_byte_safe(reader, c)); + } + } + } + eat_byte_check(reader, q); + return ref; +} + +static Ref +read_String(SerdReader* reader, SerdNodeFlags* flags) +{ + const uint8_t q1 = peek_byte(reader); + eat_byte_safe(reader, q1); + + const uint8_t q2 = peek_byte(reader); + if (q2 != q1) { // Short string (not triple quoted) + return read_STRING_LITERAL(reader, flags, q1); + } + + eat_byte_safe(reader, q2); + const uint8_t q3 = peek_byte(reader); + if (q3 != q1) { // Empty short string ("" or '') + return push_node(reader, SERD_LITERAL, "", 0); + } + + if (!supports_fancy_literals(reader)) { + return r_err(reader, SERD_ERR_BAD_SYNTAX, + "syntax does not support long literals\n"); + } + + eat_byte_safe(reader, q3); + return read_STRING_LITERAL_LONG(reader, flags, q1); +} + +static bool +read_PN_CHARS_BASE(SerdReader* reader, Ref dest) +{ + const uint8_t c = peek_byte(reader); + if ((c & 0x80)) { // Multi-byte character + return !read_utf8_character(reader, dest, eat_byte_safe(reader, c)); + } + if (is_alpha(c)) { + push_byte(reader, dest, eat_byte_safe(reader, c)); + return true; + } + return false; +} + +static bool +read_PN_CHARS(SerdReader* reader, Ref dest) +{ + const uint8_t c = peek_byte(reader); + if ((c & 0x80)) { // Multi-byte character + return !read_utf8_character(reader, dest, eat_byte_safe(reader, c)); + } + + if (is_alpha(c) || is_digit(c) || c == '_' || c == '-') { + push_byte(reader, dest, eat_byte_safe(reader, c)); + return true; + } + return false; +} + +static bool +read_PERCENT(SerdReader* reader, Ref dest) +{ + push_byte(reader, dest, eat_byte_safe(reader, '%')); + const uint8_t h1 = read_HEX(reader); + const uint8_t h2 = read_HEX(reader); + if (h1 && h2) { + push_byte(reader, dest, h1); + push_byte(reader, dest, h2); + return true; + } + return false; +} + +static SerdStatus +read_PLX(SerdReader* reader, Ref dest) +{ + uint8_t c = peek_byte(reader); + switch (c) { + case '%': + if (!read_PERCENT(reader, dest)) { + return SERD_ERR_BAD_SYNTAX; + } + return SERD_SUCCESS; + case '\\': + eat_byte_safe(reader, c); + if (is_alpha(c = peek_byte(reader))) { + // Escapes like \u \n etc. are not supported + return SERD_ERR_BAD_SYNTAX; + } else { + // Allow escaping of pretty much any other character + push_byte(reader, dest, eat_byte_safe(reader, c)); + return SERD_SUCCESS; + } + default: + return SERD_FAILURE; + } +} + +static SerdStatus +read_PN_LOCAL(SerdReader* reader, Ref dest, bool* ate_dot) +{ + uint8_t c = peek_byte(reader); + SerdStatus st; + switch (c) { + case '0': case '1': case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': case ':': case '_': + push_byte(reader, dest, eat_byte_safe(reader, c)); + break; + default: + if ((st = read_PLX(reader, dest)) > SERD_FAILURE) { + return st; + } else if (st != SERD_SUCCESS && !read_PN_CHARS_BASE(reader, dest)) { + return SERD_FAILURE; + } + } + + while ((c = peek_byte(reader))) { // Middle: (PN_CHARS | '.' | ';')* + if (c == '.' || c == ':') { + push_byte(reader, dest, eat_byte_safe(reader, c)); + } else if ((st = read_PLX(reader, dest)) > SERD_FAILURE) { + return st; + } else if (st != SERD_SUCCESS && !read_PN_CHARS(reader, dest)) { + break; + } + } + + SerdNode* const n = deref(reader, dest); + if (n->buf[n->n_bytes - 1] == '.') { + // Ate trailing dot, pop it from stack/node and inform caller + --n->n_bytes; + serd_stack_pop(&reader->stack, 1); + *ate_dot = true; + } + + return SERD_SUCCESS; +} + +// Read the remainder of a PN_PREFIX after some initial characters +static SerdStatus +read_PN_PREFIX_tail(SerdReader* reader, Ref dest) +{ + uint8_t c; + while ((c = peek_byte(reader))) { // Middle: (PN_CHARS | '.')* + if (c == '.') { + push_byte(reader, dest, eat_byte_safe(reader, c)); + } else if (!read_PN_CHARS(reader, dest)) { + break; + } + } + + const SerdNode* const n = deref(reader, dest); + if (n->buf[n->n_bytes - 1] == '.' && !read_PN_CHARS(reader, dest)) { + r_err(reader, SERD_ERR_BAD_SYNTAX, "prefix ends with `.'\n"); + return SERD_ERR_BAD_SYNTAX; + } + + return SERD_SUCCESS; +} + +static SerdStatus +read_PN_PREFIX(SerdReader* reader, Ref dest) +{ + if (read_PN_CHARS_BASE(reader, dest)) { + return read_PN_PREFIX_tail(reader, dest); + } + return SERD_FAILURE; +} + +static Ref +read_LANGTAG(SerdReader* reader) +{ + uint8_t c = peek_byte(reader); + if (!is_alpha(c)) { + return r_err(reader, SERD_ERR_BAD_SYNTAX, "unexpected `%c'\n", c); + } + Ref ref = push_node(reader, SERD_LITERAL, "", 0); + push_byte(reader, ref, eat_byte_safe(reader, c)); + while ((c = peek_byte(reader)) && is_alpha(c)) { + push_byte(reader, ref, eat_byte_safe(reader, c)); + } + while (peek_byte(reader) == '-') { + push_byte(reader, ref, eat_byte_safe(reader, '-')); + while ((c = peek_byte(reader)) && (is_alpha(c) || is_digit(c))) { + push_byte(reader, ref, eat_byte_safe(reader, c)); + } + } + return ref; +} + +typedef enum { PREFIX, GOOD, BAD} SchemeState; + +static inline bool +check_scheme(SerdReader* reader, uint8_t c, SchemeState* state) +{ + if (!supports_relative_iris(reader) && *state == PREFIX) { + if (c == ':') { + *state = GOOD; + } else if (!isalpha(c)) { + *state = BAD; + return r_err(reader, SERD_ERR_BAD_SYNTAX, + "syntax does not support relative IRIs\n"); + } + } + return true; +} + +static Ref +read_IRIREF(SerdReader* reader) +{ + TRY_RET(eat_byte_check(reader, '<')); + Ref ref = push_node(reader, SERD_URI, "", 0); + SchemeState scheme = PREFIX; + uint32_t code; + while (true) { + const uint8_t c = peek_byte(reader); + if (!check_scheme(reader, c, &scheme)) { + return pop_node(reader, ref); + } + switch (c) { + case '"': case '<': case '^': case '`': case '{': case '|': case '}': + r_err(reader, SERD_ERR_BAD_SYNTAX, + "invalid IRI character `%c'\n", c); + return pop_node(reader, ref); + case '>': + eat_byte_safe(reader, c); + return ref; + case '\\': + eat_byte_safe(reader, c); + if (!read_UCHAR(reader, ref, &code)) { + r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid IRI escape\n"); + return pop_node(reader, ref); + } + switch (code) { + case 0: case ' ': case '<': case '>': + r_err(reader, SERD_ERR_BAD_SYNTAX, + "invalid escaped IRI character %X %c\n", code, code); + return pop_node(reader, ref); + } + break; + default: + if (c <= 0x20) { + if (isprint(c)) { + r_err(reader, SERD_ERR_BAD_SYNTAX, + "invalid IRI character `%c' (escape %%%02X)\n", c, c); + } else { + r_err(reader, SERD_ERR_BAD_SYNTAX, + "invalid IRI character (escape %%%02X)\n", c, c); + } + if (reader->strict) { + return pop_node(reader, ref); + } + push_byte(reader, ref, eat_byte_safe(reader, c)); + } else { + push_byte(reader, ref, eat_byte_safe(reader, c)); + } + } + } +} + +static bool +read_PrefixedName(SerdReader* reader, Ref dest, bool read_prefix, bool* ate_dot) +{ + if (read_prefix && read_PN_PREFIX(reader, dest) > SERD_FAILURE) { + return false; + } else if (peek_byte(reader) != ':') { + return false; + } + + push_byte(reader, dest, eat_byte_safe(reader, ':')); + return read_PN_LOCAL(reader, dest, ate_dot) <= SERD_FAILURE; +} + +static bool +read_0_9(SerdReader* reader, Ref str, bool at_least_one) +{ + unsigned count = 0; + for (uint8_t c; is_digit((c = peek_byte(reader))); ++count) { + push_byte(reader, str, eat_byte_safe(reader, c)); + } + if (at_least_one && count == 0) { + r_err(reader, SERD_ERR_BAD_SYNTAX, "expected digit\n"); + } + return count; +} + +static bool +read_number(SerdReader* reader, Ref* dest, Ref* datatype, bool* ate_dot) +{ + #define XSD_DECIMAL NS_XSD "decimal" + #define XSD_DOUBLE NS_XSD "double" + #define XSD_INTEGER NS_XSD "integer" + Ref ref = push_node(reader, SERD_LITERAL, "", 0); + uint8_t c = peek_byte(reader); + bool has_decimal = false; + if (c == '-' || c == '+') { + push_byte(reader, ref, eat_byte_safe(reader, c)); + } + if ((c = peek_byte(reader)) == '.') { + has_decimal = true; + // decimal case 2 (e.g. '.0' or `-.0' or `+.0') + push_byte(reader, ref, eat_byte_safe(reader, c)); + TRY_THROW(read_0_9(reader, ref, true)); + } else { + // all other cases ::= ( '-' | '+' ) [0-9]+ ( . )? ( [0-9]+ )? ... + TRY_THROW(is_digit(c)); + read_0_9(reader, ref, true); + if ((c = peek_byte(reader)) == '.') { + has_decimal = true; + + // Annoyingly, dot can be end of statement, so tentatively eat + eat_byte_safe(reader, c); + c = peek_byte(reader); + if (!is_digit(c) && c != 'e' && c != 'E') { + *dest = ref; + *ate_dot = true; // Force caller to deal with stupid grammar + return true; // Next byte is not a number character, done + } + + push_byte(reader, ref, '.'); + read_0_9(reader, ref, false); + } + } + c = peek_byte(reader); + if (c == 'e' || c == 'E') { + // double + push_byte(reader, ref, eat_byte_safe(reader, c)); + switch ((c = peek_byte(reader))) { + case '+': case '-': + push_byte(reader, ref, eat_byte_safe(reader, c)); + default: break; + } + TRY_THROW(read_0_9(reader, ref, true)); + *datatype = push_node(reader, SERD_URI, + XSD_DOUBLE, sizeof(XSD_DOUBLE) - 1); + } else if (has_decimal) { + *datatype = push_node(reader, SERD_URI, + XSD_DECIMAL, sizeof(XSD_DECIMAL) - 1); + } else { + *datatype = push_node(reader, SERD_URI, + XSD_INTEGER, sizeof(XSD_INTEGER) - 1); + } + *dest = ref; + return true; +except: + pop_node(reader, *datatype); + pop_node(reader, ref); + return false; +} + +static bool +read_iri(SerdReader* reader, Ref* dest, bool* ate_dot) +{ + switch (peek_byte(reader)) { + case '<': + *dest = read_IRIREF(reader); + return true; + default: + *dest = push_node(reader, SERD_CURIE, "", 0); + return read_PrefixedName(reader, *dest, true, ate_dot); + } +} + +static bool +read_literal(SerdReader* reader, Ref* dest, + Ref* datatype, Ref* lang, SerdNodeFlags* flags, bool* ate_dot) +{ + Ref str = read_String(reader, flags); + if (!str) { + return false; + } + + switch (peek_byte(reader)) { + case '@': + eat_byte_safe(reader, '@'); + TRY_THROW(*lang = read_LANGTAG(reader)); + break; + case '^': + eat_byte_safe(reader, '^'); + eat_byte_check(reader, '^'); + TRY_THROW(read_iri(reader, datatype, ate_dot)); + break; + } + *dest = str; + return true; +except: + *datatype = pop_node(reader, *datatype); + *lang = pop_node(reader, *lang); + pop_node(reader, str); + return false; +} + +inline static bool +is_token_end(uint8_t c) +{ + switch (c) { + case 0x9: case 0xA: case 0xD: case 0x20: case '\0': + case '#': case '.': case ';': case '<': + return true; + default: + return false; + } +} + +static bool +read_verb(SerdReader* reader, Ref* dest) +{ + if (peek_byte(reader) == '<') { + return (*dest = read_IRIREF(reader)); + } else { + /* Either a qname, or "a". Read the prefix first, and if it is in fact + "a", produce that instead. + */ + *dest = push_node(reader, SERD_CURIE, "", 0); + SerdNode* node = deref(reader, *dest); + const SerdStatus st = read_PN_PREFIX(reader, *dest); + bool ate_dot = false; + if (!st && node->n_bytes == 1 && node->buf[0] == 'a' && + is_token_end(peek_byte(reader))) { + pop_node(reader, *dest); + return (*dest = push_node(reader, SERD_URI, NS_RDF "type", 47)); + } else if (st > SERD_FAILURE || + !read_PrefixedName(reader, *dest, false, &ate_dot) || + ate_dot) { + return (*dest = pop_node(reader, *dest)); + } else { + return true; + } + } + return false; +} + +static Ref +read_BLANK_NODE_LABEL(SerdReader* reader, bool* ate_dot) +{ + eat_byte_safe(reader, '_'); + eat_byte_check(reader, ':'); + Ref ref = push_node(reader, SERD_BLANK, + reader->bprefix ? (char*)reader->bprefix : "", + reader->bprefix_len); + + uint8_t c = peek_byte(reader); // First: (PN_CHARS | '_' | [0-9]) + if (is_digit(c) || c == '_') { + push_byte(reader, ref, eat_byte_safe(reader, c)); + } else if (!read_PN_CHARS(reader, ref)) { + r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid name start character\n"); + return pop_node(reader, ref); + } + + while ((c = peek_byte(reader))) { // Middle: (PN_CHARS | '.')* + if (c == '.') { + push_byte(reader, ref, eat_byte_safe(reader, c)); + } else if (!read_PN_CHARS(reader, ref)) { + break; + } + } + + SerdNode* n = deref(reader, ref); + if (n->buf[n->n_bytes - 1] == '.' && !read_PN_CHARS(reader, ref)) { + // Ate trailing dot, pop it from stack/node and inform caller + --n->n_bytes; + serd_stack_pop(&reader->stack, 1); + *ate_dot = true; + } + + if (reader->syntax == SERD_TURTLE) { + if (is_digit(n->buf[reader->bprefix_len + 1])) { + if ((n->buf[reader->bprefix_len]) == 'b') { + ((char*)n->buf)[reader->bprefix_len] = 'B'; // Prevent clash + reader->seen_genid = true; + } else if (reader->seen_genid && + n->buf[reader->bprefix_len] == 'B') { + r_err(reader, SERD_ERR_ID_CLASH, + "found both `b' and `B' blank IDs, prefix required\n"); + return pop_node(reader, ref); + } + } + } + return ref; +} + +static void +set_blank_id(SerdReader* reader, Ref ref, size_t buf_size) +{ + SerdNode* node = deref(reader, ref); + const char* prefix = reader->bprefix ? (const char*)reader->bprefix : ""; + node->n_bytes = node->n_chars = snprintf( + (char*)node->buf, buf_size, "%sb%u", prefix, reader->next_id++); +} + +static size_t +genid_size(SerdReader* reader) +{ + return reader->bprefix_len + 1 + 10 + 1; // + "b" + UINT32_MAX + \0 +} + +static Ref +blank_id(SerdReader* reader) +{ + Ref ref = push_node_padded(reader, genid_size(reader), SERD_BLANK, "", 0); + set_blank_id(reader, ref, genid_size(reader)); + return ref; +} + +static Ref +read_blankName(SerdReader* reader) +{ + eat_byte_safe(reader, '='); + if (eat_byte_check(reader, '=') != '=') { + return r_err(reader, SERD_ERR_BAD_SYNTAX, "expected `='\n"); + } + + Ref subject = 0; + bool ate_dot = false; + read_ws_star(reader); + read_iri(reader, &subject, &ate_dot); + return subject; +} + +static bool +read_anon(SerdReader* reader, ReadContext ctx, bool subject, Ref* dest) +{ + const SerdStatementFlags old_flags = *ctx.flags; + bool empty; + eat_byte_safe(reader, '['); + if ((empty = peek_delim(reader, ']'))) { + *ctx.flags |= (subject) ? SERD_EMPTY_S : SERD_EMPTY_O; + } else { + *ctx.flags |= (subject) ? SERD_ANON_S_BEGIN : SERD_ANON_O_BEGIN; + if (peek_delim(reader, '=')) { + if (!(*dest = read_blankName(reader)) || + !eat_delim(reader, ';')) { + return false; + } + } + } + + if (!*dest) { + *dest = blank_id(reader); + } + if (ctx.subject) { + TRY_RET(emit_statement(reader, ctx, *dest, 0, 0)); + } + + ctx.subject = *dest; + if (!empty) { + *ctx.flags &= ~(SERD_LIST_CONT); + if (!subject) { + *ctx.flags |= SERD_ANON_CONT; + } + bool ate_dot_in_list = false; + read_predicateObjectList(reader, ctx, &ate_dot_in_list); + if (ate_dot_in_list) { + return r_err(reader, SERD_ERR_BAD_SYNTAX, "`.' inside blank\n"); + } + read_ws_star(reader); + if (reader->end_sink) { + reader->end_sink(reader->handle, deref(reader, *dest)); + } + *ctx.flags = old_flags; + } + return (eat_byte_check(reader, ']') == ']'); +} + +/* If emit is true: recurses, calling statement_sink for every statement + encountered, and leaves stack in original calling state (i.e. pops + everything it pushes). */ +static bool +read_object(SerdReader* reader, ReadContext* ctx, bool emit, bool* ate_dot) +{ + static const char* const XSD_BOOLEAN = NS_XSD "boolean"; + static const size_t XSD_BOOLEAN_LEN = 40; + +#ifndef NDEBUG + const size_t orig_stack_size = reader->stack.size; +#endif + + bool ret = false; + bool simple = (ctx->subject != 0); + SerdNode* node = NULL; + Ref o = 0; + Ref datatype = 0; + Ref lang = 0; + uint32_t flags = 0; + const uint8_t c = peek_byte(reader); + if (!supports_fancy_literals(reader)) { + switch (c) { + case '"': case ':': case '<': case '_': break; + default: return r_err(reader, SERD_ERR_BAD_SYNTAX, + "expected: ':', '<', or '_'\n"); + } + } + switch (c) { + case '\0': + case ')': + return false; + case '[': + simple = false; + TRY_THROW(ret = read_anon(reader, *ctx, false, &o)); + break; + case '(': + simple = false; + TRY_THROW(ret = read_collection(reader, *ctx, &o)); + break; + case '_': + TRY_THROW(ret = (o = read_BLANK_NODE_LABEL(reader, ate_dot))); + break; + case '<': case ':': + TRY_THROW(ret = read_iri(reader, &o, ate_dot)); + break; + case '+': case '-': case '.': case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': case '8': case '9': + TRY_THROW(ret = read_number(reader, &o, &datatype, ate_dot)); + break; + case '\"': + case '\'': + TRY_THROW(ret = read_literal(reader, &o, &datatype, &lang, &flags, ate_dot)); + break; + default: + /* Either a boolean literal, or a qname. Read the prefix first, and if + it is in fact a "true" or "false" literal, produce that instead. + */ + node = deref(reader, o = push_node(reader, SERD_CURIE, "", 0)); + while (read_PN_CHARS_BASE(reader, o)) {} + if ((node->n_bytes == 4 && !memcmp(node->buf, "true", 4)) || + (node->n_bytes == 5 && !memcmp(node->buf, "false", 5))) { + node->type = SERD_LITERAL; + datatype = push_node( + reader, SERD_URI, XSD_BOOLEAN, XSD_BOOLEAN_LEN); + ret = true; + } else if (read_PN_PREFIX_tail(reader, o) > SERD_FAILURE) { + ret = false; + } else { + ret = read_PrefixedName(reader, o, false, ate_dot); + } + } + + if (simple && o) { + deref(reader, o)->flags = flags; + } + + if (ret && emit && simple) { + ret = emit_statement(reader, *ctx, o, datatype, lang); + } else if (ret && !emit) { + ctx->object = o; + ctx->datatype = datatype; + ctx->lang = lang; + return true; + } + +except: + pop_node(reader, lang); + pop_node(reader, datatype); + pop_node(reader, o); +#ifndef NDEBUG + assert(reader->stack.size == orig_stack_size); +#endif + return ret; +} + +static bool +read_objectList(SerdReader* reader, ReadContext ctx, bool* ate_dot) +{ + TRY_RET(read_object(reader, &ctx, true, ate_dot)); + while (!*ate_dot && eat_delim(reader, ',')) { + TRY_RET(read_object(reader, &ctx, true, ate_dot)); + } + return true; +} + +static bool +read_predicateObjectList(SerdReader* reader, ReadContext ctx, bool* ate_dot) +{ + uint8_t c; + while (true) { + TRY_THROW(read_verb(reader, &ctx.predicate)); + read_ws_star(reader); + + TRY_THROW(read_objectList(reader, ctx, ate_dot)); + ctx.predicate = pop_node(reader, ctx.predicate); + if (*ate_dot) { + return true; + } + + bool ate_semi = false; + do { + read_ws_star(reader); + switch (c = peek_byte(reader)) { + case 0: + return false; + case '.': case ']': case '}': + return true; + case ';': + eat_byte_safe(reader, c); + ate_semi = true; + } + } while (c == ';'); + + if (!ate_semi) { + return r_err(reader, SERD_ERR_BAD_SYNTAX, "missing ';' or '.'\n"); + } + } + + pop_node(reader, ctx.predicate); + return true; +except: + pop_node(reader, ctx.predicate); + return false; +} + +static bool +end_collection(SerdReader* reader, ReadContext ctx, Ref n1, Ref n2, bool ret) +{ + pop_node(reader, n2); + pop_node(reader, n1); + *ctx.flags &= ~SERD_LIST_CONT; + return ret && (eat_byte_safe(reader, ')') == ')'); +} + +static bool +read_collection(SerdReader* reader, ReadContext ctx, Ref* dest) +{ + eat_byte_safe(reader, '('); + bool end = peek_delim(reader, ')'); + *dest = end ? reader->rdf_nil : blank_id(reader); + if (ctx.subject) { + // subject predicate _:head + *ctx.flags |= (end ? 0 : SERD_LIST_O_BEGIN); + TRY_RET(emit_statement(reader, ctx, *dest, 0, 0)); + *ctx.flags |= SERD_LIST_CONT; + } else { + *ctx.flags |= (end ? 0 : SERD_LIST_S_BEGIN); + } + + if (end) { + return end_collection(reader, ctx, 0, 0, true); + } + + /* The order of node allocation here is necessarily not in stack order, + so we create two nodes and recycle them throughout. */ + Ref n1 = push_node_padded(reader, genid_size(reader), SERD_BLANK, "", 0); + Ref n2 = 0; + Ref node = n1; + Ref rest = 0; + + ctx.subject = *dest; + while (!(end = peek_delim(reader, ')'))) { + // _:node rdf:first object + ctx.predicate = reader->rdf_first; + bool ate_dot = false; + if (!read_object(reader, &ctx, true, &ate_dot) || ate_dot) { + return end_collection(reader, ctx, n1, n2, false); + } + + if (!(end = peek_delim(reader, ')'))) { + /* Give rest a new ID. Done as late as possible to ensure it is + used and > IDs generated by read_object above. */ + if (!rest) { + rest = n2 = blank_id(reader); // First pass, push + } else { + set_blank_id(reader, rest, genid_size(reader)); + } + } + + // _:node rdf:rest _:rest + *ctx.flags |= SERD_LIST_CONT; + ctx.predicate = reader->rdf_rest; + TRY_RET(emit_statement(reader, ctx, + (end ? reader->rdf_nil : rest), 0, 0)); + + ctx.subject = rest; // _:node = _:rest + rest = node; // _:rest = (old)_:node + node = ctx.subject; // invariant + } + + return end_collection(reader, ctx, n1, n2, true); +} + +static Ref +read_subject(SerdReader* reader, ReadContext ctx, Ref* dest, char* s_type) +{ + bool ate_dot = false; + switch ((*s_type = peek_byte(reader))) { + case '[': + read_anon(reader, ctx, true, dest); + break; + case '(': + read_collection(reader, ctx, dest); + break; + case '_': + *dest = read_BLANK_NODE_LABEL(reader, &ate_dot); + break; + default: + TRY_RET(read_iri(reader, dest, &ate_dot)); + } + return ate_dot ? pop_node(reader, *dest) : *dest; +} + +static Ref +read_labelOrSubject(SerdReader* reader, ReadContext ctx) +{ + Ref subject = 0; + bool ate_dot = false; + switch (peek_byte(reader)) { + case '[': + eat_byte_safe(reader, '['); + read_ws_star(reader); + TRY_RET(eat_byte_check(reader, ']')); + return blank_id(reader); + case '_': + return read_BLANK_NODE_LABEL(reader, &ate_dot); + default: + read_iri(reader, &subject, &ate_dot); + } + return subject; +} + +static bool +read_triples(SerdReader* reader, ReadContext ctx, bool* ate_dot) +{ + bool ret = false; + if (ctx.subject) { + read_ws_star(reader); + switch (peek_byte(reader)) { + case '.': + *ate_dot = eat_byte_safe(reader, '.'); + return false; + case '}': + return false; + } + ret = read_predicateObjectList(reader, ctx, ate_dot); + } + ctx.subject = ctx.predicate = 0; + return ret; +} + +static bool +read_base(SerdReader* reader, bool sparql, bool token) +{ + if (token) { + TRY_RET(eat_string(reader, "base", 4)); + } + + Ref uri; + read_ws_star(reader); + TRY_RET(uri = read_IRIREF(reader)); + if (reader->base_sink) { + reader->base_sink(reader->handle, deref(reader, uri)); + } + pop_node(reader, uri); + + read_ws_star(reader); + if (!sparql) { + return eat_byte_check(reader, '.'); + } else if (peek_byte(reader) == '.') { + return r_err(reader, SERD_ERR_BAD_SYNTAX, + "full stop after SPARQL BASE\n"); + } + return true; +} + +static bool +read_prefixID(SerdReader* reader, bool sparql, bool token) +{ + if (token) { + TRY_RET(eat_string(reader, "prefix", 6)); + } + + read_ws_star(reader); + bool ret = true; + Ref name = push_node(reader, SERD_LITERAL, "", 0); + if (read_PN_PREFIX(reader, name) > SERD_FAILURE) { + return pop_node(reader, name); + } + + if (eat_byte_check(reader, ':') != ':') { + return pop_node(reader, name); + } + + read_ws_star(reader); + const Ref uri = read_IRIREF(reader); + if (!uri) { + pop_node(reader, name); + return false; + } + + if (reader->prefix_sink) { + ret = !reader->prefix_sink(reader->handle, + deref(reader, name), + deref(reader, uri)); + } + pop_node(reader, uri); + pop_node(reader, name); + if (!sparql) { + read_ws_star(reader); + return eat_byte_check(reader, '.'); + } + return ret; +} + +static bool +read_directive(SerdReader* reader) +{ + const bool sparql = peek_byte(reader) != '@'; + if (!sparql) { + eat_byte_safe(reader, '@'); + switch (peek_byte(reader)) { + case 'B': case 'P': + return r_err(reader, SERD_ERR_BAD_SYNTAX, + "uppercase directive\n"); + } + } + + switch (peek_byte(reader)) { + case 'B': case 'b': return read_base(reader, sparql, true); + case 'P': case 'p': return read_prefixID(reader, sparql, true); + default: + return r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid directive\n"); + } + + return true; +} + +static bool +read_wrappedGraph(SerdReader* reader, ReadContext* ctx) +{ + bool ate_dot = false; + char s_type = 0; + TRY_RET(eat_byte_check(reader, '{')); + read_ws_star(reader); + while (peek_byte(reader) != '}') { + ctx->subject = 0; + Ref subj = read_subject(reader, *ctx, &ctx->subject, &s_type); + if (!subj || + (!read_triples(reader, *ctx, &ate_dot) && s_type != '[')) { + return false; + } + pop_node(reader, subj); + read_ws_star(reader); + if (peek_byte(reader) == '.') { + eat_byte_safe(reader, '.'); + } + read_ws_star(reader); + } + return eat_byte_check(reader, '}'); +} + +static int +tokcmp(SerdReader* reader, Ref ref, const char* tok, size_t n) +{ + SerdNode* node = deref(reader, ref); + if (!node || node->n_bytes != n) { + return -1; + } + const char* s1 = (const char*)node->buf; + const char* s2 = tok; + for (; n > 0 && *s2; s1++, s2++, --n) { + if (toupper(*s1) != toupper(*s2)) { + return ((*(uint8_t*)s1 < *(uint8_t*)s2) ? -1 : +1); + } + } + return 0; +} + +static bool +read_statement(SerdReader* reader) +{ + SerdStatementFlags flags = 0; + ReadContext ctx = { 0, 0, 0, 0, 0, 0, &flags }; + Ref subj = 0; + bool ate_dot = false; + char s_type = false; + bool ret = true; + read_ws_star(reader); + switch (peek_byte(reader)) { + case '\0': + reader->eof = true; + return reader->status <= SERD_FAILURE; + case '@': + TRY_RET(read_directive(reader)); + read_ws_star(reader); + break; + case '{': + if (reader->syntax == SERD_TRIG) { + TRY_RET(read_wrappedGraph(reader, &ctx)); + read_ws_star(reader); + } else { + return r_err(reader, SERD_ERR_BAD_SYNTAX, "graph in Turtle\n"); + } + break; + default: + subj = read_subject(reader, ctx, &ctx.subject, &s_type); + if (!tokcmp(reader, ctx.subject, "base", 4)) { + ret = read_base(reader, true, false); + } else if (!tokcmp(reader, ctx.subject, "prefix", 6)) { + ret = read_prefixID(reader, true, false); + } else if (!tokcmp(reader, ctx.subject, "graph", 5)) { + read_ws_star(reader); + TRY_RET((ctx.graph = read_labelOrSubject(reader, ctx))); + read_ws_star(reader); + TRY_RET(read_wrappedGraph(reader, &ctx)); + read_ws_star(reader); + } else if (read_ws_star(reader) && peek_byte(reader) == '{') { + if (s_type == '(' || (s_type == '[' && !*ctx.flags)) { + return false; // invalid graph with complex label + } + ctx.graph = subj; + ctx.subject = subj = 0; + TRY_RET(read_wrappedGraph(reader, &ctx)); + read_ws_star(reader); + } else if (!subj) { + ret = r_err(reader, SERD_ERR_BAD_SYNTAX, "bad subject\n"); + } else if (!read_triples(reader, ctx, &ate_dot)) { + ret = (s_type == '['); + } else if (!ate_dot) { + read_ws_star(reader); + ret = (eat_byte_check(reader, '.') == '.'); + } + pop_node(reader, subj); + break; + } + return ret; +} + +static bool +read_turtleDoc(SerdReader* reader) +{ + while (!reader->eof) { + TRY_RET(read_statement(reader)); + } + return reader->status <= SERD_FAILURE; +} + +static bool +read_trigDoc(SerdReader* reader) +{ + while (!reader->eof) { + TRY_RET(read_statement(reader)); + } + return reader->status <= SERD_FAILURE; +} + +static bool +read_nquadsDoc(SerdReader* reader) +{ + while (!reader->eof) { + SerdStatementFlags flags = 0; + ReadContext ctx = { 0, 0, 0, 0, 0, 0, &flags }; + bool ate_dot = false; + char s_type = false; + read_ws_star(reader); + if (peek_byte(reader) == '\0') { + reader->eof = true; + break; + } + + // subject predicate object + if (!(ctx.subject = read_subject(reader, ctx, &ctx.subject, &s_type)) || + !read_ws_star(reader) || + !(ctx.predicate = read_IRIREF(reader)) || + !read_ws_star(reader) || + !read_object(reader, &ctx, false, &ate_dot)) { + return false; + } + + if (!ate_dot) { // graphLabel? + TRY_RET(read_ws_star(reader)); + switch (peek_byte(reader)) { + case '.': + break; + case '_': + ctx.graph = read_BLANK_NODE_LABEL(reader, &ate_dot); + break; + default: + if (!(ctx.graph = read_IRIREF(reader))) { + return false; + } + } + + // Terminating '.' + TRY_RET(read_ws_star(reader)); + eat_byte_check(reader, '.'); + } + + TRY_RET(emit_statement(reader, ctx, ctx.object, ctx.datatype, ctx.lang)); + pop_node(reader, ctx.graph); + pop_node(reader, ctx.lang); + pop_node(reader, ctx.datatype); + pop_node(reader, ctx.object); + } + return reader->status <= SERD_FAILURE; +} + +static bool +read_doc(SerdReader* reader) +{ + switch (reader->syntax) { + case SERD_NQUADS: return read_nquadsDoc(reader); + case SERD_TRIG: return read_trigDoc(reader); + default: return read_turtleDoc(reader); + } +} + +SERD_API +SerdReader* +serd_reader_new(SerdSyntax syntax, + void* handle, + void (*free_handle)(void*), + SerdBaseSink base_sink, + SerdPrefixSink prefix_sink, + SerdStatementSink statement_sink, + SerdEndSink end_sink) +{ + const Cursor cur = { NULL, 0, 0 }; + SerdReader* me = (SerdReader*)calloc(1, sizeof(SerdReader)); + me->handle = handle; + me->free_handle = free_handle; + me->base_sink = base_sink; + me->prefix_sink = prefix_sink; + me->statement_sink = statement_sink; + me->end_sink = end_sink; + me->default_graph = SERD_NODE_NULL; + me->stack = serd_stack_new(SERD_PAGE_SIZE); + me->syntax = syntax; + me->cur = cur; + me->next_id = 1; + + me->rdf_first = push_node(me, SERD_URI, NS_RDF "first", 48); + me->rdf_rest = push_node(me, SERD_URI, NS_RDF "rest", 47); + me->rdf_nil = push_node(me, SERD_URI, NS_RDF "nil", 46); + + return me; +} + +SERD_API +void +serd_reader_set_strict(SerdReader* reader, bool strict) +{ + reader->strict = strict; +} + +SERD_API +void +serd_reader_set_error_sink(SerdReader* reader, + SerdErrorSink error_sink, + void* error_handle) +{ + reader->error_sink = error_sink; + reader->error_handle = error_handle; +} + +SERD_API +void +serd_reader_free(SerdReader* reader) +{ + pop_node(reader, reader->rdf_nil); + pop_node(reader, reader->rdf_rest); + pop_node(reader, reader->rdf_first); + serd_node_free(&reader->default_graph); + +#ifdef SERD_STACK_CHECK + free(reader->allocs); +#endif + free(reader->stack.buf); + free(reader->bprefix); + if (reader->free_handle) { + reader->free_handle(reader->handle); + } + free(reader); +} + +SERD_API +void* +serd_reader_get_handle(const SerdReader* reader) +{ + return reader->handle; +} + +SERD_API +void +serd_reader_add_blank_prefix(SerdReader* reader, + const uint8_t* prefix) +{ + free(reader->bprefix); + reader->bprefix_len = 0; + reader->bprefix = NULL; + if (prefix) { + reader->bprefix_len = strlen((const char*)prefix); + reader->bprefix = (uint8_t*)malloc(reader->bprefix_len + 1); + memcpy(reader->bprefix, prefix, reader->bprefix_len + 1); + } +} + +SERD_API +void +serd_reader_set_default_graph(SerdReader* reader, + const SerdNode* graph) +{ + serd_node_free(&reader->default_graph); + reader->default_graph = serd_node_copy(graph); +} + +SERD_API +SerdStatus +serd_reader_read_file(SerdReader* reader, + const uint8_t* uri) +{ + uint8_t* const path = serd_file_uri_parse(uri, NULL); + if (!path) { + return SERD_ERR_BAD_ARG; + } + + FILE* fd = serd_fopen((const char*)path, "r"); + if (!fd) { + free(path); + return SERD_ERR_UNKNOWN; + } + + SerdStatus ret = serd_reader_read_file_handle(reader, fd, path); + fclose(fd); + free(path); + return ret; +} + +static bool +skip_bom(SerdReader* me) +{ + if (peek_byte(me) == 0xEF) { + eat_byte_safe(me, 0xEF); + if (eat_byte_check(me, 0xBB) != 0xBB || + eat_byte_check(me, 0xBF) != 0xBF) { + return r_err(me, SERD_ERR_BAD_SYNTAX, "corrupt byte order mark\n"); + } + } + + return true; +} + +SERD_API +SerdStatus +serd_reader_start_stream(SerdReader* me, + FILE* file, + const uint8_t* name, + bool bulk) +{ + return serd_reader_start_source_stream( + me, + bulk ? (SerdSource)fread : serd_file_read_byte, + (SerdStreamErrorFunc)ferror, + file, + name, + bulk ? SERD_PAGE_SIZE : 1); +} + +SERD_API +SerdStatus +serd_reader_start_source_stream(SerdReader* me, + SerdSource read_func, + SerdStreamErrorFunc error_func, + void* stream, + const uint8_t* name, + size_t page_size) +{ + const Cursor cur = { name, 1, 1 }; + me->cur = cur; + + return serd_byte_source_open_source( + &me->source, read_func, error_func, stream, page_size); +} + +static SerdStatus +serd_reader_prepare(SerdReader* me) +{ + me->eof = false; + if ((me->status = serd_byte_source_prepare(&me->source))) { + r_err(me, me->status, "read error: %s\n", strerror(errno)); + } else if (!skip_bom(me)) { + me->status = SERD_ERR_BAD_SYNTAX; + } + return me->status; +} + +SERD_API +SerdStatus +serd_reader_read_chunk(SerdReader* me) +{ + SerdStatus st = SERD_SUCCESS; + if (!me->source.prepared) { + if ((st = serd_reader_prepare(me))) { + return st; + } + } else if (me->eof) { + me->eof = false; + if ((st = serd_byte_source_advance(&me->source))) { + return st; + } + } + return read_statement(me) ? SERD_SUCCESS : SERD_FAILURE; +} + +SERD_API +SerdStatus +serd_reader_end_stream(SerdReader* me) +{ + return serd_byte_source_close(&me->source); +} + +SERD_API +SerdStatus +serd_reader_read_file_handle(SerdReader* me, FILE* file, const uint8_t* name) +{ + return serd_reader_read_source( + me, (SerdSource)fread, (SerdStreamErrorFunc)ferror, + file, name, SERD_PAGE_SIZE); +} + +SERD_API +SerdStatus +serd_reader_read_source(SerdReader* me, + SerdSource source, + SerdStreamErrorFunc error, + void* stream, + const uint8_t* name, + size_t page_size) +{ + SerdStatus st = serd_reader_start_source_stream( + me, source, error, stream, name, page_size); + + if ((st = serd_reader_prepare(me))) { + serd_reader_end_stream(me); + return st; + } else if (!read_doc(me)) { + serd_reader_end_stream(me); + return SERD_ERR_UNKNOWN; + } + + return serd_reader_end_stream(me); +} + +SERD_API +SerdStatus +serd_reader_read_string(SerdReader* me, const uint8_t* utf8) +{ + const Cursor cur = { (const uint8_t*)"(string)", 1, 1 }; + + serd_byte_source_open_string(&me->source, utf8); + me->cur = cur; + me->eof = false; + + SerdStatus st = serd_reader_prepare(me); + if (!st) { + st = read_doc(me) ? SERD_SUCCESS : SERD_ERR_UNKNOWN; + } + + serd_byte_source_close(&me->source); + + return st; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ext/serd/src/serd_internal.h Thu Jun 15 09:52:01 2017 +0100 @@ -0,0 +1,384 @@ +/* + Copyright 2011-2016 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef SERD_INTERNAL_H +#define SERD_INTERNAL_H + +#define _POSIX_C_SOURCE 200809L /* for posix_memalign and posix_fadvise */ + +#include <assert.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "serd/serd.h" +#include "serd_config.h" + +#if defined(HAVE_POSIX_FADVISE) && defined(HAVE_FILENO) +# include <fcntl.h> +#endif + +#define SERD_PAGE_SIZE 4096 + +#ifndef MIN +# define MIN(a, b) (((a) < (b)) ? (a) : (b)) +#endif + +/* File and Buffer Utilities */ + +static inline FILE* +serd_fopen(const char* path, const char* mode) +{ + FILE* fd = fopen((const char*)path, mode); + if (!fd) { + fprintf(stderr, "Error opening file %s (%s)\n", path, strerror(errno)); + return NULL; + } +#if defined(HAVE_POSIX_FADVISE) && defined(HAVE_FILENO) + posix_fadvise(fileno(fd), 0, 0, POSIX_FADV_SEQUENTIAL); +#endif + return fd; +} + +static inline void* +serd_bufalloc(size_t size) +{ +#ifdef HAVE_POSIX_MEMALIGN + void* ptr; + const int ret = posix_memalign(&ptr, SERD_PAGE_SIZE, size); + return ret ? NULL : ptr; +#else + return malloc(size); +#endif +} + +/* Byte source */ + +typedef struct { + SerdSource read_func; ///< Read function (e.g. fread) + SerdStreamErrorFunc error_func; ///< Error function (e.g. ferror) + void* stream; ///< Stream (e.g. FILE) + size_t page_size; ///< Number of bytes to read at a time + uint8_t* file_buf; ///< Buffer iff reading pages from a file + const uint8_t* read_buf; ///< Pointer to file_buf or read_byte + size_t read_head; ///< Offset into read_buf + uint8_t read_byte; ///< 1-byte 'buffer' used when not paging + bool from_stream; ///< True iff reading from `stream` + bool prepared; ///< True iff prepared for reading +} SerdByteSource; + +SerdStatus +serd_byte_source_open_file(SerdByteSource* source, + FILE* file, + bool bulk); + +SerdStatus +serd_byte_source_open_string(SerdByteSource* source, const uint8_t* utf8); + +SerdStatus +serd_byte_source_open_source(SerdByteSource* source, + SerdSource read_func, + SerdStreamErrorFunc error_func, + void* stream, + size_t page_size); + +SerdStatus +serd_byte_source_close(SerdByteSource* source); + +SerdStatus +serd_byte_source_prepare(SerdByteSource* source); + +static inline uint8_t +serd_byte_source_peek(SerdByteSource* source) +{ + assert(source->prepared); + return source->read_buf[source->read_head]; +} + +SerdStatus +serd_byte_source_advance(SerdByteSource* source); + +/* Stack */ + +/** A dynamic stack in memory. */ +typedef struct { + uint8_t* buf; ///< Stack memory + size_t buf_size; ///< Allocated size of buf (>= size) + size_t size; ///< Conceptual size of stack in buf +} SerdStack; + +/** An offset to start the stack at. Note 0 is reserved for NULL. */ +#define SERD_STACK_BOTTOM sizeof(void*) + +static inline SerdStack +serd_stack_new(size_t size) +{ + SerdStack stack; + stack.buf = (uint8_t*)malloc(size); + stack.buf_size = size; + stack.size = SERD_STACK_BOTTOM; + return stack; +} + +static inline bool +serd_stack_is_empty(SerdStack* stack) +{ + return stack->size <= SERD_STACK_BOTTOM; +} + +static inline void +serd_stack_free(SerdStack* stack) +{ + free(stack->buf); + stack->buf = NULL; + stack->buf_size = 0; + stack->size = 0; +} + +static inline uint8_t* +serd_stack_push(SerdStack* stack, size_t n_bytes) +{ + const size_t new_size = stack->size + n_bytes; + if (stack->buf_size < new_size) { + stack->buf_size *= 2; + stack->buf = (uint8_t*)realloc(stack->buf, stack->buf_size); + } + uint8_t* const ret = (stack->buf + stack->size); + stack->size = new_size; + return ret; +} + +static inline void +serd_stack_pop(SerdStack* stack, size_t n_bytes) +{ + assert(stack->size >= n_bytes); + stack->size -= n_bytes; +} + +static inline void* +serd_stack_push_aligned(SerdStack* stack, size_t n_bytes, size_t align) +{ + // Push one byte to ensure space for a pad count + serd_stack_push(stack, 1); + + // Push padding if necessary + const uint8_t pad = align - stack->size % align; + if (pad > 0) { + serd_stack_push(stack, pad); + } + + // Set top of stack to pad count so we can properly pop later + stack->buf[stack->size - 1] = pad; + + // Push requested space at aligned location + return serd_stack_push(stack, n_bytes); +} + +static inline void +serd_stack_pop_aligned(SerdStack* stack, size_t n_bytes) +{ + // Pop requested space down to aligned location + serd_stack_pop(stack, n_bytes); + + // Get amount of padding from top of stack + const uint8_t pad = stack->buf[stack->size - 1]; + + // Pop padding and pad count + serd_stack_pop(stack, pad + 1); +} + +/* Byte Sink */ + +typedef struct SerdByteSinkImpl { + SerdSink sink; + void* stream; + uint8_t* buf; + size_t size; + size_t block_size; +} SerdByteSink; + +static inline SerdByteSink +serd_byte_sink_new(SerdSink sink, void* stream, size_t block_size) +{ + SerdByteSink bsink; + bsink.sink = sink; + bsink.stream = stream; + bsink.size = 0; + bsink.block_size = block_size; + bsink.buf = ((block_size > 1) + ? (uint8_t*)serd_bufalloc(block_size) + : NULL); + return bsink; +} + +static inline void +serd_byte_sink_flush(SerdByteSink* bsink) +{ + if (bsink->block_size > 1 && bsink->size > 0) { + bsink->sink(bsink->buf, bsink->size, bsink->stream); + bsink->size = 0; + } +} + +static inline void +serd_byte_sink_free(SerdByteSink* bsink) +{ + serd_byte_sink_flush(bsink); + free(bsink->buf); + bsink->buf = NULL; +} + +static inline size_t +serd_byte_sink_write(const void* buf, size_t len, SerdByteSink* bsink) +{ + if (len == 0) { + return 0; + } else if (bsink->block_size == 1) { + return bsink->sink(buf, len, bsink->stream); + } + + const size_t orig_len = len; + while (len) { + const size_t space = bsink->block_size - bsink->size; + const size_t n = MIN(space, len); + + // Write as much as possible into the remaining buffer space + memcpy(bsink->buf + bsink->size, buf, n); + bsink->size += n; + buf = (const uint8_t*)buf + n; + len -= n; + + // Flush page if buffer is full + if (bsink->size == bsink->block_size) { + bsink->sink(bsink->buf, bsink->block_size, bsink->stream); + bsink->size = 0; + } + } + return orig_len; +} + +/* Character utilities */ + +/** Return true if `c` lies within [`min`...`max`] (inclusive) */ +static inline bool +in_range(const uint8_t c, const uint8_t min, const uint8_t max) +{ + return (c >= min && c <= max); +} + +/** RFC2234: ALPHA := %x41-5A / %x61-7A ; A-Z / a-z */ +static inline bool +is_alpha(const uint8_t c) +{ + return in_range(c, 'A', 'Z') || in_range(c, 'a', 'z'); +} + +/** RFC2234: DIGIT ::= %x30-39 ; 0-9 */ +static inline bool +is_digit(const uint8_t c) +{ + return in_range(c, '0', '9'); +} + +static inline bool +is_space(const char c) +{ + switch (c) { + case ' ': case '\f': case '\n': case '\r': case '\t': case '\v': + return true; + default: + return false; + } +} + +static inline bool +is_base64(const uint8_t c) +{ + return is_alpha(c) || is_digit(c) || c == '+' || c == '/' || c == '='; +} + +static inline bool +is_windows_path(const uint8_t* path) +{ + return is_alpha(path[0]) && (path[1] == ':' || path[1] == '|') + && (path[2] == '/' || path[2] == '\\'); +} + +/* URI utilities */ + +static inline bool +chunk_equals(const SerdChunk* a, const SerdChunk* b) +{ + return a->len == b->len + && !strncmp((const char*)a->buf, (const char*)b->buf, a->len); +} + +static inline size_t +uri_path_len(const SerdURI* uri) +{ + return uri->path_base.len + uri->path.len; +} + +static inline uint8_t +uri_path_at(const SerdURI* uri, size_t i) +{ + if (i < uri->path_base.len) { + return uri->path_base.buf[i]; + } else { + return uri->path.buf[i - uri->path_base.len]; + } +} + +/** Return true iff `uri` is within the base of `root` */ +static inline bool +uri_is_under(const SerdURI* uri, const SerdURI* root) +{ + if (!root || !root->scheme.len || + !chunk_equals(&root->scheme, &uri->scheme) || + !chunk_equals(&root->authority, &uri->authority)) { + return false; + } + + bool differ = false; + const size_t path_len = uri_path_len(uri); + const size_t root_len = uri_path_len(root); + for (size_t i = 0; i < path_len && i < root_len; ++i) { + if (uri_path_at(uri, i) != uri_path_at(root, i)) { + differ = true; + } + if (differ && uri_path_at(root, i) == '/') { + return false; + } + } + + return true; +} + +/* Error reporting */ + +static inline void +serd_error(SerdErrorSink error_sink, void* handle, const SerdError* e) +{ + if (error_sink) { + error_sink(handle, e); + } else { + fprintf(stderr, "error: %s:%u:%u: ", e->filename, e->line, e->col); + vfprintf(stderr, e->fmt, *e->args); + } +} + +#endif // SERD_INTERNAL_H
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ext/serd/src/serdi.c Thu Jun 15 09:52:01 2017 +0100 @@ -0,0 +1,270 @@ +/* + Copyright 2011-2017 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "serd_internal.h" + +#include <assert.h> +#include <errno.h> +#include <stdlib.h> +#include <string.h> + +#define SERDI_ERROR(msg) fprintf(stderr, "serdi: " msg); +#define SERDI_ERRORF(fmt, ...) fprintf(stderr, "serdi: " fmt, __VA_ARGS__); + +static int +print_version(void) +{ + printf("serdi " SERD_VERSION " <http://drobilla.net/software/serd>\n"); + printf("Copyright 2011-2017 David Robillard <http://drobilla.net>.\n" + "License: <http://www.opensource.org/licenses/isc>\n" + "This is free software; you are free to change and redistribute it." + "\nThere is NO WARRANTY, to the extent permitted by law.\n"); + return 0; +} + +static int +print_usage(const char* name, bool error) +{ + FILE* const os = error ? stderr : stdout; + fprintf(os, "%s", error ? "\n" : ""); + fprintf(os, "Usage: %s [OPTION]... INPUT [BASE_URI]\n", name); + fprintf(os, "Read and write RDF syntax.\n"); + fprintf(os, "Use - for INPUT to read from standard input.\n\n"); + fprintf(os, " -b Fast bulk output for large serialisations.\n"); + fprintf(os, " -c PREFIX Chop PREFIX from matching blank node IDs.\n"); + fprintf(os, " -e Eat input one character at a time.\n"); + fprintf(os, " -f Keep full URIs in input (don't qualify).\n"); + fprintf(os, " -h Display this help and exit.\n"); + fprintf(os, " -i SYNTAX Input syntax: turtle/ntriples/trig/nquads.\n"); + fprintf(os, " -l Lax (non-strict) parsing.\n"); + fprintf(os, " -o SYNTAX Output syntax: turtle/ntriples/nquads.\n"); + fprintf(os, " -p PREFIX Add PREFIX to blank node IDs.\n"); + fprintf(os, " -q Suppress all output except data.\n"); + fprintf(os, " -r ROOT_URI Keep relative URIs within ROOT_URI.\n"); + fprintf(os, " -s INPUT Parse INPUT as string (terminates options).\n"); + fprintf(os, " -v Display version information and exit.\n"); + return error ? 1 : 0; +} + +static bool +set_syntax(SerdSyntax* syntax, const char* name) +{ + if (!strcmp(name, "turtle")) { + *syntax = SERD_TURTLE; + } else if (!strcmp(name, "ntriples")) { + *syntax = SERD_NTRIPLES; + } else if (!strcmp(name, "nquads")) { + *syntax = SERD_NQUADS; + } else if (!strcmp(name, "trig")) { + *syntax = SERD_TRIG; + } else { + SERDI_ERRORF("unknown syntax `%s'\n", name); + return false; + } + return true; +} + +static int +missing_arg(const char* name, char opt) +{ + SERDI_ERRORF("option requires an argument -- '%c'\n", opt); + return print_usage(name, true); +} + +static SerdStatus +quiet_error_sink(void* handle, const SerdError* e) +{ + return SERD_SUCCESS; +} + +int +main(int argc, char** argv) +{ + if (argc < 2) { + return print_usage(argv[0], true); + } + + FILE* in_fd = NULL; + SerdSyntax input_syntax = SERD_TURTLE; + SerdSyntax output_syntax = SERD_NTRIPLES; + bool from_file = true; + bool bulk_read = true; + bool bulk_write = false; + bool full_uris = false; + bool lax = false; + bool quiet = false; + const uint8_t* in_name = NULL; + const uint8_t* add_prefix = NULL; + const uint8_t* chop_prefix = NULL; + const uint8_t* root_uri = NULL; + int a = 1; + for (; a < argc && argv[a][0] == '-'; ++a) { + if (argv[a][1] == '\0') { + in_name = (const uint8_t*)"(stdin)"; + in_fd = stdin; + break; + } else if (argv[a][1] == 'b') { + bulk_write = true; + } else if (argv[a][1] == 'e') { + bulk_read = false; + } else if (argv[a][1] == 'f') { + full_uris = true; + } else if (argv[a][1] == 'h') { + return print_usage(argv[0], false); + } else if (argv[a][1] == 'l') { + lax = true; + } else if (argv[a][1] == 'q') { + quiet = true; + } else if (argv[a][1] == 'v') { + return print_version(); + } else if (argv[a][1] == 's') { + in_name = (const uint8_t*)"(string)"; + from_file = false; + ++a; + break; + } else if (argv[a][1] == 'i') { + if (++a == argc) { + return missing_arg(argv[0], 'i'); + } else if (!set_syntax(&input_syntax, argv[a])) { + return print_usage(argv[0], true); + } + } else if (argv[a][1] == 'o') { + if (++a == argc) { + return missing_arg(argv[0], 'o'); + } else if (!set_syntax(&output_syntax, argv[a])) { + return print_usage(argv[0], true); + } + } else if (argv[a][1] == 'p') { + if (++a == argc) { + return missing_arg(argv[0], 'p'); + } + add_prefix = (const uint8_t*)argv[a]; + } else if (argv[a][1] == 'c') { + if (++a == argc) { + return missing_arg(argv[0], 'c'); + } + chop_prefix = (const uint8_t*)argv[a]; + } else if (argv[a][1] == 'r') { + if (++a == argc) { + return missing_arg(argv[0], 'r'); + } + root_uri = (const uint8_t*)argv[a]; + } else { + SERDI_ERRORF("invalid option -- '%s'\n", argv[a] + 1); + return print_usage(argv[0], true); + } + } + + if (a == argc) { + SERDI_ERROR("missing input\n"); + return 1; + } + + const uint8_t* input = (const uint8_t*)argv[a++]; + if (from_file) { + in_name = in_name ? in_name : input; + if (!in_fd) { + input = serd_uri_to_path(in_name); + if (!input || !(in_fd = serd_fopen((const char*)input, "r"))) { + return 1; + } + } + } + + SerdURI base_uri = SERD_URI_NULL; + SerdNode base = SERD_NODE_NULL; + if (a < argc) { // Base URI given on command line + base = serd_node_new_uri_from_string( + (const uint8_t*)argv[a], NULL, &base_uri); + } else if (from_file && in_fd != stdin) { // Use input file URI + base = serd_node_new_file_uri(input, NULL, &base_uri, true); + } + + FILE* out_fd = stdout; + SerdEnv* env = serd_env_new(&base); + + int output_style = 0; + if (output_syntax == SERD_NTRIPLES || output_syntax == SERD_NQUADS) { + output_style |= SERD_STYLE_ASCII; + } else if (output_syntax == SERD_TURTLE) { + output_style |= SERD_STYLE_ABBREVIATED; + if (!full_uris) { + output_style |= SERD_STYLE_CURIED; + } + } + + if ((input_syntax == SERD_TURTLE || input_syntax == SERD_TRIG) || + (output_style & SERD_STYLE_CURIED)) { + // Base URI may change and/or we're abbreviating URIs, so must resolve + output_style |= SERD_STYLE_RESOLVED; + } + + if (bulk_write) { + output_style |= SERD_STYLE_BULK; + } + + SerdWriter* writer = serd_writer_new( + output_syntax, (SerdStyle)output_style, + env, &base_uri, serd_file_sink, out_fd); + + SerdReader* reader = serd_reader_new( + input_syntax, writer, NULL, + (SerdBaseSink)serd_writer_set_base_uri, + (SerdPrefixSink)serd_writer_set_prefix, + (SerdStatementSink)serd_writer_write_statement, + (SerdEndSink)serd_writer_end_anon); + + serd_reader_set_strict(reader, !lax); + if (quiet) { + serd_reader_set_error_sink(reader, quiet_error_sink, NULL); + serd_writer_set_error_sink(writer, quiet_error_sink, NULL); + } + + SerdNode root = serd_node_from_string(SERD_URI, root_uri); + serd_writer_set_root_uri(writer, &root); + serd_writer_chop_blank_prefix(writer, chop_prefix); + serd_reader_add_blank_prefix(reader, add_prefix); + + SerdStatus status = SERD_SUCCESS; + if (!from_file) { + status = serd_reader_read_string(reader, input); + } else if (bulk_read) { + status = serd_reader_read_file_handle(reader, in_fd, in_name); + } else { + status = serd_reader_start_stream(reader, in_fd, in_name, false); + while (!status) { + status = serd_reader_read_chunk(reader); + } + serd_reader_end_stream(reader); + } + + serd_reader_free(reader); + serd_writer_finish(writer); + serd_writer_free(writer); + serd_env_free(env); + serd_node_free(&base); + + if (from_file) { + fclose(in_fd); + } + + if (fclose(out_fd)) { + perror("serdi: write error"); + status = SERD_ERR_UNKNOWN; + } + + return (status > SERD_FAILURE) ? 1 : 0; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ext/serd/src/string.c Thu Jun 15 09:52:01 2017 +0100 @@ -0,0 +1,168 @@ +/* + Copyright 2011-2016 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "serd_internal.h" + +#include <math.h> + +SERD_API +const uint8_t* +serd_strerror(SerdStatus st) +{ + switch (st) { + case SERD_SUCCESS: return (const uint8_t*)"Success"; + case SERD_FAILURE: return (const uint8_t*)"Non-fatal failure"; + case SERD_ERR_UNKNOWN: return (const uint8_t*)"Unknown error"; + case SERD_ERR_BAD_SYNTAX: return (const uint8_t*)"Invalid syntax"; + case SERD_ERR_BAD_ARG: return (const uint8_t*)"Invalid argument"; + case SERD_ERR_NOT_FOUND: return (const uint8_t*)"Not found"; + case SERD_ERR_ID_CLASH: return (const uint8_t*)"Blank node ID clash"; + case SERD_ERR_BAD_CURIE: return (const uint8_t*)"Invalid CURIE"; + case SERD_ERR_INTERNAL: return (const uint8_t*)"Internal error"; + } + return (const uint8_t*)"Unknown error"; // never reached +} + +SERD_API +size_t +serd_strlen(const uint8_t* str, size_t* n_bytes, SerdNodeFlags* flags) +{ + size_t n_chars = 0; + size_t i = 0; + SerdNodeFlags f = 0; + for (; str[i]; ++i) { + if ((str[i] & 0xC0) != 0x80) { + // Does not start with `10', start of a new character + ++n_chars; + switch (str[i]) { + case '\r': case '\n': + f |= SERD_HAS_NEWLINE; + break; + case '"': + f |= SERD_HAS_QUOTE; + } + } + } + if (n_bytes) { + *n_bytes = i; + } + if (flags) { + *flags = f; + } + return n_chars; +} + +static inline double +read_sign(const char** sptr) +{ + double sign = 1.0; + switch (**sptr) { + case '-': sign = -1.0; + case '+': ++(*sptr); + default: return sign; + } +} + +SERD_API +double +serd_strtod(const char* str, char** endptr) +{ + double result = 0.0; + + // Point s at the first non-whitespace character + const char* s = str; + while (is_space(*s)) { ++s; } + + // Read leading sign if necessary + const double sign = read_sign(&s); + + // Parse integer part + for (; is_digit(*s); ++s) { + result = (result * 10.0) + (*s - '0'); + } + + // Parse fractional part + if (*s == '.') { + double denom = 10.0; + for (++s; is_digit(*s); ++s) { + result += (*s - '0') / denom; + denom *= 10.0; + } + } + + // Parse exponent + if (*s == 'e' || *s == 'E') { + ++s; + double expt = 0.0; + double expt_sign = read_sign(&s); + for (; is_digit(*s); ++s) { + expt = (expt * 10.0) + (*s - '0'); + } + result *= pow(10, expt * expt_sign); + } + + if (endptr) { + *endptr = (char*)s; + } + + return result * sign; +} + +/** + Base64 decoding table. + This is indexed by encoded characters and returns the numeric value used + for decoding, shifted up by 47 to be in the range of printable ASCII. + A '$' is a placeholder for characters not in the base64 alphabet. +*/ +static const char b64_unmap[] = + "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$m$$$ncdefghijkl$$$$$$" + "$/0123456789:;<=>?@ABCDEFGH$$$$$$IJKLMNOPQRSTUVWXYZ[\\]^_`ab$$$$" + "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$" + "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$"; + +static inline uint8_t unmap(const uint8_t in) { return b64_unmap[in] - 47; } + +/** + Decode 4 base64 characters to 3 raw bytes. +*/ +static inline size_t +decode_chunk(const uint8_t in[4], uint8_t out[3]) +{ + out[0] = (uint8_t)(((unmap(in[0]) << 2)) | unmap(in[1]) >> 4); + out[1] = (uint8_t)(((unmap(in[1]) << 4) & 0xF0) | unmap(in[2]) >> 2); + out[2] = (uint8_t)(((unmap(in[2]) << 6) & 0xC0) | unmap(in[3])); + return 1 + (in[2] != '=') + ((in[2] != '=') && (in[3] != '=')); +} + +SERD_API +void* +serd_base64_decode(const uint8_t* str, size_t len, size_t* size) +{ + void* buf = malloc((len * 3) / 4 + 2); + *size = 0; + for (size_t i = 0, j = 0; i < len; j += 3) { + uint8_t in[] = "===="; + size_t n_in = 0; + for (; i < len && n_in < 4; ++n_in) { + for (; i < len && !is_base64(str[i]); ++i) {} // Skip junk + in[n_in] = str[i++]; + } + if (n_in > 1) { + *size += decode_chunk(in, (uint8_t*)buf + j); + } + } + return buf; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ext/serd/src/uri.c Thu Jun 15 09:52:01 2017 +0100 @@ -0,0 +1,504 @@ +/* + Copyright 2011-2014 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "serd_internal.h" + +#include <stdlib.h> +#include <string.h> + +// #define URI_DEBUG 1 + +SERD_API +const uint8_t* +serd_uri_to_path(const uint8_t* uri) +{ + const uint8_t* path = uri; + if (!is_windows_path(uri) && serd_uri_string_has_scheme(uri)) { + if (strncmp((const char*)uri, "file:", 5)) { + fprintf(stderr, "Non-file URI `%s'\n", uri); + return NULL; + } else if (!strncmp((const char*)uri, "file://localhost/", 17)) { + path = uri + 16; + } else if (!strncmp((const char*)uri, "file://", 7)) { + path = uri + 7; + } else { + fprintf(stderr, "Invalid file URI `%s'\n", uri); + return NULL; + } + if (is_windows_path(path + 1)) { + ++path; // Special case for terrible Windows file URIs + } + } + return path; +} + +SERD_API +uint8_t* +serd_file_uri_parse(const uint8_t* uri, uint8_t** hostname) +{ + const uint8_t* path = uri; + if (hostname) { + *hostname = NULL; + } + if (!strncmp((const char*)uri, "file://", 7)) { + const uint8_t* auth = uri + 7; + if (*auth == '/') { // No hostname + path = auth; + } else { // Has hostname + if (!(path = (const uint8_t*)strchr((const char*)auth, '/'))) { + return NULL; + } + if (hostname) { + *hostname = (uint8_t*)calloc(1, path - auth + 1); + memcpy(*hostname, auth, path - auth); + } + } + } + + if (is_windows_path(path + 1)) { + ++path; + } + + SerdChunk chunk = { NULL, 0 }; + for (const uint8_t* s = path; *s; ++s) { + if (*s == '%') { + if (*(s + 1) == '%') { + serd_chunk_sink("%", 1, &chunk); + ++s; + } else if (is_digit(*(s + 1)) && is_digit(*(s + 2))) { + const uint8_t code[3] = { *(s + 1), *(s + 2), 0 }; + uint32_t num; + sscanf((const char*)code, "%X", &num); + const uint8_t c = num; + serd_chunk_sink(&c, 1, &chunk); + s += 2; + } else { + s += 2; // Junk escape, ignore + } + } else { + serd_chunk_sink(s, 1, &chunk); + } + } + return serd_chunk_sink_finish(&chunk); +} + +SERD_API +bool +serd_uri_string_has_scheme(const uint8_t* utf8) +{ + // RFC3986: scheme ::= ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) + if (!utf8 || !is_alpha(utf8[0])) { + return false; // Invalid scheme initial character, URI is relative + } + for (uint8_t c; (c = *++utf8) != '\0';) { + switch (c) { + case ':': + return true; // End of scheme + case '+': case '-': case '.': + break; // Valid scheme character, continue + default: + if (!is_alpha(c) && !is_digit(c)) { + return false; // Invalid scheme character + } + } + } + + return false; +} + +#ifdef URI_DEBUG +static void +serd_uri_dump(const SerdURI* uri, FILE* file) +{ +#define PRINT_PART(range, name) \ + if (range.buf) { \ + fprintf(stderr, " " name " = "); \ + fwrite((range).buf, 1, (range).len, stderr); \ + fprintf(stderr, "\n"); \ + } + + PRINT_PART(uri->scheme, "scheme "); + PRINT_PART(uri->authority, "authority"); + PRINT_PART(uri->path_base, "path_base"); + PRINT_PART(uri->path, "path "); + PRINT_PART(uri->query, "query "); + PRINT_PART(uri->fragment, "fragment "); +} +#endif + +SERD_API +SerdStatus +serd_uri_parse(const uint8_t* utf8, SerdURI* uri) +{ + *uri = SERD_URI_NULL; + + const uint8_t* ptr = utf8; + + /* See http://tools.ietf.org/html/rfc3986#section-3 + URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] + */ + + /* S3.1: scheme ::= ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) */ + if (is_alpha(*ptr)) { + for (uint8_t c = *++ptr; true; c = *++ptr) { + switch (c) { + case '\0': case '/': case '?': case '#': + ptr = utf8; + goto path; // Relative URI (starts with path by definition) + case ':': + uri->scheme.buf = utf8; + uri->scheme.len = (ptr++) - utf8; + goto maybe_authority; // URI with scheme + case '+': case '-': case '.': + continue; + default: + if (is_alpha(c) || is_digit(c)) { + continue; + } + } + } + } + + /* S3.2: The authority component is preceded by a double slash ("//") + and is terminated by the next slash ("/"), question mark ("?"), + or number sign ("#") character, or by the end of the URI. + */ +maybe_authority: + if (*ptr == '/' && *(ptr + 1) == '/') { + ptr += 2; + uri->authority.buf = ptr; + for (uint8_t c; (c = *ptr) != '\0'; ++ptr) { + switch (c) { + case '/': goto path; + case '?': goto query; + case '#': goto fragment; + default: + ++uri->authority.len; + } + } + } + + /* RFC3986 S3.3: The path is terminated by the first question mark ("?") + or number sign ("#") character, or by the end of the URI. + */ +path: + switch (*ptr) { + case '?': goto query; + case '#': goto fragment; + case '\0': goto end; + default: break; + } + uri->path.buf = ptr; + uri->path.len = 0; + for (uint8_t c; (c = *ptr) != '\0'; ++ptr) { + switch (c) { + case '?': goto query; + case '#': goto fragment; + default: + ++uri->path.len; + } + } + + /* RFC3986 S3.4: The query component is indicated by the first question + mark ("?") character and terminated by a number sign ("#") character + or by the end of the URI. + */ +query: + if (*ptr == '?') { + uri->query.buf = ++ptr; + for (uint8_t c; (c = *ptr) != '\0'; ++ptr) { + switch (c) { + case '#': + goto fragment; + default: + ++uri->query.len; + } + } + } + + /* RFC3986 S3.5: A fragment identifier component is indicated by the + presence of a number sign ("#") character and terminated by the end + of the URI. + */ +fragment: + if (*ptr == '#') { + uri->fragment.buf = ptr; + while (*ptr++ != '\0') { + ++uri->fragment.len; + } + } + +end: + #ifdef URI_DEBUG + fprintf(stderr, "PARSE URI <%s>\n", utf8); + serd_uri_dump(uri, stderr); + fprintf(stderr, "\n"); + #endif + + return SERD_SUCCESS; +} + +/** + Remove leading dot components from `path`. + See http://tools.ietf.org/html/rfc3986#section-5.2.3 + @param up Set to the number of up-references (e.g. "../") trimmed + @return A pointer to the new start of `path` +*/ +static const uint8_t* +remove_dot_segments(const uint8_t* path, size_t len, size_t* up) +{ + const uint8_t* begin = path; + const uint8_t* const end = path + len; + + *up = 0; + while (begin < end) { + switch (begin[0]) { + case '.': + switch (begin[1]) { + case '/': + begin += 2; // Chop leading "./" + break; + case '.': + switch (begin[2]) { + case '\0': + ++*up; + begin += 2; // Chop input ".." + break; + case '/': + ++*up; + begin += 3; // Chop leading "../" + break; + default: + return begin; + } + break; + case '\0': + ++begin; // Chop input "." (and fall-through) + default: + return begin; + } + break; + case '/': + switch (begin[1]) { + case '.': + switch (begin[2]) { + case '/': + begin += 2; // Leading "/./" => "/" + break; + case '.': + switch (begin[3]) { + case '/': + ++*up; + begin += 3; // Leading "/../" => "/" + } + break; + default: + return begin; + } + } // else fall through + default: + return begin; // Finished chopping dot components + } + } + + return begin; +} + +/// Merge `base` and `path` in-place +static void +merge(SerdChunk* base, SerdChunk* path) +{ + size_t up; + const uint8_t* begin = remove_dot_segments(path->buf, path->len, &up); + const uint8_t* end = path->buf + path->len; + + if (base->len) { + // Find the up'th last slash + const uint8_t* base_last = (base->buf + base->len - 1); + ++up; + do { + if (*base_last == '/') { + --up; + } + } while (up > 0 && (--base_last > base->buf)); + + // Set path prefix + base->len = base_last - base->buf + 1; + } + + // Set path suffix + path->buf = begin; + path->len = end - begin; +} + +/// See http://tools.ietf.org/html/rfc3986#section-5.2.2 +SERD_API +void +serd_uri_resolve(const SerdURI* r, const SerdURI* base, SerdURI* t) +{ + if (!base->scheme.len) { + *t = *r; // Don't resolve against non-absolute URIs + return; + } + + t->path_base.buf = NULL; + t->path_base.len = 0; + if (r->scheme.len) { + *t = *r; + } else { + if (r->authority.len) { + t->authority = r->authority; + t->path = r->path; + t->query = r->query; + } else { + t->path = r->path; + if (!r->path.len) { + t->path_base = base->path; + if (r->query.len) { + t->query = r->query; + } else { + t->query = base->query; + } + } else { + if (r->path.buf[0] != '/') { + t->path_base = base->path; + } + merge(&t->path_base, &t->path); + t->query = r->query; + } + t->authority = base->authority; + } + t->scheme = base->scheme; + t->fragment = r->fragment; + } + + #ifdef URI_DEBUG + fprintf(stderr, "## RESOLVE URI\n# BASE\n"); + serd_uri_dump(base, stderr); + fprintf(stderr, "# URI\n"); + serd_uri_dump(r, stderr); + fprintf(stderr, "# RESULT\n"); + serd_uri_dump(t, stderr); + fprintf(stderr, "\n"); + #endif +} + +/** Write the path of `uri` starting at index `i` */ +static size_t +write_path_tail(SerdSink sink, void* stream, const SerdURI* uri, size_t i) +{ + size_t len = 0; + if (i < uri->path_base.len) { + len += sink(uri->path_base.buf + i, uri->path_base.len - i, stream); + } + if (uri->path.buf) { + if (i < uri->path_base.len) { + len += sink(uri->path.buf, uri->path.len, stream); + } else { + const size_t j = (i - uri->path_base.len); + len += sink(uri->path.buf + j, uri->path.len - j, stream); + } + } + return len; +} + +/** Write the path of `uri` relative to the path of `base`. */ +static size_t +write_rel_path(SerdSink sink, + void* stream, + const SerdURI* uri, + const SerdURI* base) +{ + const size_t path_len = uri_path_len(uri); + const size_t base_len = uri_path_len(base); + const size_t min_len = (path_len < base_len) ? path_len : base_len; + + // Find the last separator common to both paths + size_t last_shared_sep = 0; + size_t i = 0; + for (; i < min_len && uri_path_at(uri, i) == uri_path_at(base, i); ++i) { + if (uri_path_at(uri, i) == '/') { + last_shared_sep = i; + } + } + + if (i == path_len && i == base_len) { // Paths are identical + return 0; + } else if (last_shared_sep == 0) { // No common components + return write_path_tail(sink, stream, uri, 0); + } + + // Find the number of up references ("..") required + size_t up = 0; + for (size_t s = last_shared_sep + 1; s < base_len; ++s) { + if (uri_path_at(base, s) == '/') { + ++up; + } + } + + // Write up references + size_t len = 0; + for (size_t u = 0; u < up; ++u) { + len += sink("../", 3, stream); + } + + // Write suffix + return len += write_path_tail(sink, stream, uri, last_shared_sep + 1); +} + +/// See http://tools.ietf.org/html/rfc3986#section-5.3 +SERD_API +size_t +serd_uri_serialise_relative(const SerdURI* uri, + const SerdURI* base, + const SerdURI* root, + SerdSink sink, + void* stream) +{ + size_t len = 0; + const bool relative = uri_is_under(uri, root ? root : base); + if (relative) { + len = write_rel_path(sink, stream, uri, base); + } + if (!relative || (!len && base->query.buf)) { + if (uri->scheme.buf) { + len += sink(uri->scheme.buf, uri->scheme.len, stream); + len += sink(":", 1, stream); + } + if (uri->authority.buf) { + len += sink("//", 2, stream); + len += sink(uri->authority.buf, uri->authority.len, stream); + } + len += write_path_tail(sink, stream, uri, 0); + } + if (uri->query.buf) { + len += sink("?", 1, stream); + len += sink(uri->query.buf, uri->query.len, stream); + } + if (uri->fragment.buf) { + // Note uri->fragment.buf includes the leading `#' + len += sink(uri->fragment.buf, uri->fragment.len, stream); + } + return len; +} + +/// See http://tools.ietf.org/html/rfc3986#section-5.3 +SERD_API +size_t +serd_uri_serialise(const SerdURI* uri, SerdSink sink, void* stream) +{ + return serd_uri_serialise_relative(uri, NULL, NULL, sink, stream); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ext/serd/src/writer.c Thu Jun 15 09:52:01 2017 +0100 @@ -0,0 +1,985 @@ +/* + Copyright 2011-2017 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "serd_internal.h" + +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#define NS_RDF "http://www.w3.org/1999/02/22-rdf-syntax-ns#" +#define NS_XSD "http://www.w3.org/2001/XMLSchema#" + +typedef struct { + SerdNode graph; + SerdNode subject; + SerdNode predicate; +} WriteContext; + +static const WriteContext WRITE_CONTEXT_NULL = { + { 0, 0, 0, 0, SERD_NOTHING }, + { 0, 0, 0, 0, SERD_NOTHING }, + { 0, 0, 0, 0, SERD_NOTHING } +}; + +typedef enum { + SEP_NONE, + SEP_END_S, ///< End of a subject ('.') + SEP_END_P, ///< End of a predicate (';') + SEP_END_O, ///< End of an object (',') + SEP_S_P, ///< Between a subject and predicate (whitespace) + SEP_P_O, ///< Between a predicate and object (whitespace) + SEP_ANON_BEGIN, ///< Start of anonymous node ('[') + SEP_ANON_END, ///< End of anonymous node (']') + SEP_LIST_BEGIN, ///< Start of list ('(') + SEP_LIST_SEP, ///< List separator (whitespace) + SEP_LIST_END, ///< End of list (')') + SEP_GRAPH_BEGIN, ///< Start of graph ('{') + SEP_GRAPH_END ///< End of graph ('}') +} Sep; + +typedef struct { + const char* str; ///< Sep string + uint8_t len; ///< Length of sep string + uint8_t space_before; ///< Newline before sep + uint8_t space_after_node; ///< Newline after sep if after node + uint8_t space_after_sep; ///< Newline after sep if after sep +} SepRule; + +static const SepRule rules[] = { + { NULL, 0, 0, 0, 0 }, + { " .\n\n", 4, 0, 0, 0 }, + { " ;", 2, 0, 1, 1 }, + { " ,", 2, 0, 1, 0 }, + { NULL, 0, 0, 1, 0 }, + { " ", 1, 0, 0, 0 }, + { "[", 1, 0, 1, 1 }, + { "]", 1, 1, 0, 0 }, + { "(", 1, 0, 0, 0 }, + { NULL, 1, 0, 1, 0 }, + { ")", 1, 1, 0, 0 }, + { " {", 2, 0, 1, 1 }, + { " }", 2, 0, 1, 1 }, + { "\n", 1, 0, 1, 0 } +}; + +struct SerdWriterImpl { + SerdSyntax syntax; + SerdStyle style; + SerdEnv* env; + SerdNode root_node; + SerdURI root_uri; + SerdURI base_uri; + SerdStack anon_stack; + SerdByteSink byte_sink; + SerdErrorSink error_sink; + void* error_handle; + WriteContext context; + SerdNode list_subj; + unsigned list_depth; + unsigned indent; + uint8_t* bprefix; + size_t bprefix_len; + Sep last_sep; + bool empty; +}; + +typedef enum { + WRITE_STRING, + WRITE_LONG_STRING +} TextContext; + +static bool +supports_abbrev(const SerdWriter* writer) +{ + return writer->syntax == SERD_TURTLE || writer->syntax == SERD_TRIG; +} + +static void +w_err(SerdWriter* writer, SerdStatus st, const char* fmt, ...) +{ + /* TODO: This results in errors with no file information, which is not + helpful when re-serializing a file (particularly for "undefined + namespace prefix" errors. The statement sink API needs to be changed to + add a Cursor parameter so the source can notify the writer of the + statement origin for better error reporting. */ + + va_list args; + va_start(args, fmt); + const SerdError e = { st, NULL, 0, 0, fmt, &args }; + serd_error(writer->error_sink, writer->error_handle, &e); + va_end(args); +} + +static inline WriteContext* +anon_stack_top(SerdWriter* writer) +{ + assert(!serd_stack_is_empty(&writer->anon_stack)); + return (WriteContext*)(writer->anon_stack.buf + + writer->anon_stack.size - sizeof(WriteContext)); +} + +static void +copy_node(SerdNode* dst, const SerdNode* src) +{ + if (src) { + dst->buf = (uint8_t*)realloc((char*)dst->buf, src->n_bytes + 1); + dst->n_bytes = src->n_bytes; + dst->n_chars = src->n_chars; + dst->flags = src->flags; + dst->type = src->type; + memcpy((char*)dst->buf, src->buf, src->n_bytes + 1); + } else { + dst->type = SERD_NOTHING; + } +} + +static inline size_t +sink(const void* buf, size_t len, SerdWriter* writer) +{ + return serd_byte_sink_write(buf, len, &writer->byte_sink); +} + +// Parse a UTF-8 character, set *size to the length, and return the code point +static inline uint32_t +parse_utf8_char(SerdWriter* writer, const uint8_t* utf8, size_t* size) +{ + uint32_t c = 0; + if ((utf8[0] & 0x80) == 0) { // Starts with `0' + *size = 1; + c = utf8[0]; + } else if ((utf8[0] & 0xE0) == 0xC0) { // Starts with `110' + *size = 2; + c = utf8[0] & 0x1F; + } else if ((utf8[0] & 0xF0) == 0xE0) { // Starts with `1110' + *size = 3; + c = utf8[0] & 0x0F; + } else if ((utf8[0] & 0xF8) == 0xF0) { // Starts with `11110' + *size = 4; + c = utf8[0] & 0x07; + } else { + w_err(writer, SERD_ERR_BAD_ARG, "invalid UTF-8: %X\n", utf8[0]); + *size = 0; + return 0; + } + + size_t i = 0; + uint8_t in = utf8[i++]; + +#define READ_BYTE() \ + in = utf8[i++] & 0x3F; \ + c = (c << 6) | in; + + switch (*size) { + case 4: READ_BYTE(); + case 3: READ_BYTE(); + case 2: READ_BYTE(); + } + + return c; +} + +// Write a single character, as an escape for single byte characters +// (Caller prints any single byte characters that don't need escaping) +static size_t +write_character(SerdWriter* writer, const uint8_t* utf8, size_t* size) +{ + const uint8_t replacement_char[] = { 0xEF, 0xBF, 0xBD }; + char escape[11] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + const uint8_t in = utf8[0]; + + uint32_t c = parse_utf8_char(writer, utf8, size); + switch (*size) { + case 0: + w_err(writer, SERD_ERR_BAD_ARG, "invalid UTF-8: %X\n", in); + return sink(replacement_char, sizeof(replacement_char), writer); + case 1: + snprintf(escape, sizeof(escape), "\\u%04X", in); + return sink(escape, 6, writer); + default: + break; + } + + if (!(writer->style & SERD_STYLE_ASCII)) { + // Write UTF-8 character directly to UTF-8 output + return sink(utf8, *size, writer); + } + + if (c <= 0xFFFF) { + snprintf(escape, sizeof(escape), "\\u%04X", c); + return sink(escape, 6, writer); + } else { + snprintf(escape, sizeof(escape), "\\U%08X", c); + return sink(escape, 10, writer); + } +} + +static inline bool +uri_must_escape(const uint8_t c) +{ + switch (c) { + case ' ': case '"': case '<': case '>': case '\\': + case '^': case '`': case '{': case '|': case '}': + return true; + default: + return !in_range(c, 0x20, 0x7E); + } +} + +static size_t +write_uri(SerdWriter* writer, const uint8_t* utf8, size_t n_bytes) +{ + size_t len = 0; + for (size_t i = 0; i < n_bytes;) { + size_t j = i; // Index of next character that must be escaped + for (; j < n_bytes; ++j) { + if (uri_must_escape(utf8[j])) { + break; + } + } + + // Bulk write all characters up to this special one + len += sink(&utf8[i], j - i, writer); + if ((i = j) == n_bytes) { + break; // Reached end + } + + // Write UTF-8 character + size_t size = 0; + len += write_character(writer, utf8 + i, &size); + i += size; + } + return len; +} + +static bool +lname_must_escape(const uint8_t c) +{ + /* This arbitrary list of characters, most of which have nothing to do with + Turtle, must be handled as special cases here because the RDF and SPARQL + WGs are apparently intent on making the once elegant Turtle a baroque + and inconsistent mess, throwing elegance and extensibility completely + out the window for no good reason. + + Note '-', '.', and '_' are also in PN_LOCAL_ESC, but are valid unescaped + in local names, so they are not escaped here. */ + + switch (c) { + case '\'': case '!': case '#': case '$': case '%': case '&': + case '(': case ')': case '*': case '+': case ',': case '/': + case ';': case '=': case '?': case '@': case '~': + return true; + } + return false; +} + +static size_t +write_lname(SerdWriter* writer, const uint8_t* utf8, size_t n_bytes) +{ + size_t len = 0; + for (size_t i = 0; i < n_bytes; ++i) { + size_t j = i; // Index of next character that must be escaped + for (; j < n_bytes; ++j) { + if (lname_must_escape(utf8[j])) { + break; + } + } + + // Bulk write all characters up to this special one + len += sink(&utf8[i], j - i, writer); + if ((i = j) == n_bytes) { + break; // Reached end + } + + // Write escape + len += sink("\\", 1, writer); + len += sink(&utf8[i], 1, writer); + } + return len; +} + +static size_t +write_text(SerdWriter* writer, TextContext ctx, + const uint8_t* utf8, size_t n_bytes) +{ + size_t len = 0; + for (size_t i = 0; i < n_bytes;) { + // Fast bulk write for long strings of printable ASCII + size_t j = i; + for (; j < n_bytes; ++j) { + if (utf8[j] == '\\' || utf8[j] == '"' + || (!in_range(utf8[j], 0x20, 0x7E))) { + break; + } + } + + len += sink(&utf8[i], j - i, writer); + if ((i = j) == n_bytes) { + break; // Reached end + } + + uint8_t in = utf8[i++]; + if (ctx == WRITE_LONG_STRING) { + switch (in) { + case '\\': len += sink("\\\\", 2, writer); continue; + case '\b': len += sink("\\b", 2, writer); continue; + case '\n': case '\r': case '\t': case '\f': + len += sink(&in, 1, writer); // Write character as-is + continue; + case '\"': + if (i == n_bytes) { // '"' at string end + len += sink("\\\"", 2, writer); + } else { + len += sink(&in, 1, writer); + } + continue; + default: break; + } + } else if (ctx == WRITE_STRING) { + switch (in) { + case '\\': len += sink("\\\\", 2, writer); continue; + case '\n': len += sink("\\n", 2, writer); continue; + case '\r': len += sink("\\r", 2, writer); continue; + case '\t': len += sink("\\t", 2, writer); continue; + case '"': len += sink("\\\"", 2, writer); continue; + default: break; + } + if (writer->syntax == SERD_TURTLE) { + switch (in) { + case '\b': len += sink("\\b", 2, writer); continue; + case '\f': len += sink("\\f", 2, writer); continue; + } + } + } + + size_t size = 0; + len += write_character(writer, utf8 + i - 1, &size); + + if (size == 0) { + return len; + } + + i += size - 1; + } + return len; +} + +static size_t +uri_sink(const void* buf, size_t len, void* stream) +{ + return write_uri((SerdWriter*)stream, (const uint8_t*)buf, len); +} + +static void +write_newline(SerdWriter* writer) +{ + sink("\n", 1, writer); + for (unsigned i = 0; i < writer->indent; ++i) { + sink("\t", 1, writer); + } +} + +static void +write_sep(SerdWriter* writer, const Sep sep) +{ + const SepRule* rule = &rules[sep]; + if (rule->space_before) { + write_newline(writer); + } + if (rule->str) { + sink(rule->str, rule->len, writer); + } + if ( (writer->last_sep && rule->space_after_sep) + || (!writer->last_sep && rule->space_after_node)) { + write_newline(writer); + } else if (writer->last_sep && rule->space_after_node) { + sink(" ", 1, writer); + } + writer->last_sep = sep; +} + +static SerdStatus +reset_context(SerdWriter* writer, bool graph) +{ + if (graph) { + writer->context.graph.type = SERD_NOTHING; + } + writer->context.subject.type = SERD_NOTHING; + writer->context.predicate.type = SERD_NOTHING; + writer->empty = false; + return SERD_SUCCESS; +} + +static SerdStatus +free_context(SerdWriter* writer) +{ + serd_node_free(&writer->context.graph); + serd_node_free(&writer->context.subject); + serd_node_free(&writer->context.predicate); + return reset_context(writer, true); +} + +typedef enum { + FIELD_NONE, + FIELD_SUBJECT, + FIELD_PREDICATE, + FIELD_OBJECT, + FIELD_GRAPH +} Field; + +static bool +is_inline_start(const SerdWriter* writer, Field field, SerdStatementFlags flags) +{ + return (supports_abbrev(writer) && + ((field == FIELD_SUBJECT && (flags & SERD_ANON_S_BEGIN)) || + (field == FIELD_OBJECT && (flags & SERD_ANON_O_BEGIN)))); +} + +static bool +write_node(SerdWriter* writer, + const SerdNode* node, + const SerdNode* datatype, + const SerdNode* lang, + Field field, + SerdStatementFlags flags) +{ + SerdChunk uri_prefix; + SerdNode prefix; + SerdChunk suffix; + bool has_scheme; + switch (node->type) { + case SERD_BLANK: + if (is_inline_start(writer, field, flags)) { + ++writer->indent; + write_sep(writer, SEP_ANON_BEGIN); + } else if (supports_abbrev(writer) + && (field == FIELD_SUBJECT && (flags & SERD_LIST_S_BEGIN))) { + assert(writer->list_depth == 0); + copy_node(&writer->list_subj, node); + ++writer->list_depth; + ++writer->indent; + write_sep(writer, SEP_LIST_BEGIN); + } else if (supports_abbrev(writer) + && (field == FIELD_OBJECT && (flags & SERD_LIST_O_BEGIN))) { + ++writer->indent; + ++writer->list_depth; + write_sep(writer, SEP_LIST_BEGIN); + } else if (supports_abbrev(writer) + && ((field == FIELD_SUBJECT && (flags & SERD_EMPTY_S)) + || (field == FIELD_OBJECT && (flags & SERD_EMPTY_O)))) { + sink("[]", 2, writer); + } else { + sink("_:", 2, writer); + if (writer->bprefix && !strncmp((const char*)node->buf, + (const char*)writer->bprefix, + writer->bprefix_len)) { + sink(node->buf + writer->bprefix_len, + node->n_bytes - writer->bprefix_len, + writer); + } else { + sink(node->buf, node->n_bytes, writer); + } + } + break; + case SERD_CURIE: + switch (writer->syntax) { + case SERD_NTRIPLES: + case SERD_NQUADS: + if (serd_env_expand(writer->env, node, &uri_prefix, &suffix)) { + w_err(writer, SERD_ERR_BAD_CURIE, + "undefined namespace prefix `%s'\n", node->buf); + return false; + } + sink("<", 1, writer); + write_uri(writer, uri_prefix.buf, uri_prefix.len); + write_uri(writer, suffix.buf, suffix.len); + sink(">", 1, writer); + break; + case SERD_TURTLE: + case SERD_TRIG: + if (is_inline_start(writer, field, flags)) { + ++writer->indent; + write_sep(writer, SEP_ANON_BEGIN); + sink("== ", 3, writer); + } + write_lname(writer, node->buf, node->n_bytes); + if (is_inline_start(writer, field, flags)) { + sink(" ;", 2, writer); + write_newline(writer); + } + } + break; + case SERD_LITERAL: + if (supports_abbrev(writer) && datatype && datatype->buf) { + const char* type_uri = (const char*)datatype->buf; + if (!strncmp(type_uri, NS_XSD, sizeof(NS_XSD) - 1) && ( + !strcmp(type_uri + sizeof(NS_XSD) - 1, "boolean") || + !strcmp(type_uri + sizeof(NS_XSD) - 1, "integer"))) { + sink(node->buf, node->n_bytes, writer); + break; + } else if (!strncmp(type_uri, NS_XSD, sizeof(NS_XSD) - 1) && + !strcmp(type_uri + sizeof(NS_XSD) - 1, "decimal") && + strchr((const char*)node->buf, '.') && + node->buf[node->n_bytes - 1] != '.') { + /* xsd:decimal literals without trailing digits, e.g. "5.", can + not be written bare in Turtle. We could add a 0 which is + prettier, but changes the text and breaks round tripping. + */ + sink(node->buf, node->n_bytes, writer); + break; + } + } + if (supports_abbrev(writer) + && (node->flags & (SERD_HAS_NEWLINE|SERD_HAS_QUOTE))) { + sink("\"\"\"", 3, writer); + write_text(writer, WRITE_LONG_STRING, node->buf, node->n_bytes); + sink("\"\"\"", 3, writer); + } else { + sink("\"", 1, writer); + write_text(writer, WRITE_STRING, node->buf, node->n_bytes); + sink("\"", 1, writer); + } + if (lang && lang->buf) { + sink("@", 1, writer); + sink(lang->buf, lang->n_bytes, writer); + } else if (datatype && datatype->buf) { + sink("^^", 2, writer); + write_node(writer, datatype, NULL, NULL, FIELD_NONE, flags); + } + break; + case SERD_URI: + if (is_inline_start(writer, field, flags)) { + ++writer->indent; + write_sep(writer, SEP_ANON_BEGIN); + sink("== ", 3, writer); + } + has_scheme = serd_uri_string_has_scheme(node->buf); + if (field == FIELD_PREDICATE && supports_abbrev(writer) + && !strcmp((const char*)node->buf, NS_RDF "type")) { + sink("a", 1, writer); + break; + } else if (supports_abbrev(writer) + && !strcmp((const char*)node->buf, NS_RDF "nil")) { + sink("()", 2, writer); + break; + } else if (has_scheme && (writer->style & SERD_STYLE_CURIED) && + serd_env_qualify(writer->env, node, &prefix, &suffix)) { + write_uri(writer, prefix.buf, prefix.n_bytes); + sink(":", 1, writer); + write_uri(writer, suffix.buf, suffix.len); + break; + } + sink("<", 1, writer); + if (writer->style & SERD_STYLE_RESOLVED) { + SerdURI in_base_uri, uri, abs_uri; + serd_env_get_base_uri(writer->env, &in_base_uri); + serd_uri_parse(node->buf, &uri); + serd_uri_resolve(&uri, &in_base_uri, &abs_uri); + bool rooted = uri_is_under(&writer->base_uri, &writer->root_uri); + SerdURI* root = rooted ? &writer->root_uri : & writer->base_uri; + if (!uri_is_under(&abs_uri, root) || + writer->syntax == SERD_NTRIPLES || + writer->syntax == SERD_NQUADS) { + serd_uri_serialise(&abs_uri, uri_sink, writer); + } else { + serd_uri_serialise_relative( + &uri, &writer->base_uri, root, uri_sink, writer); + } + } else { + write_uri(writer, node->buf, node->n_bytes); + } + sink(">", 1, writer); + if (is_inline_start(writer, field, flags)) { + sink(" ;", 2, writer); + write_newline(writer); + } + default: + break; + } + writer->last_sep = SEP_NONE; + return true; +} + +static inline bool +is_resource(const SerdNode* node) +{ + return node->type > SERD_LITERAL; +} + +static void +write_pred(SerdWriter* writer, SerdStatementFlags flags, const SerdNode* pred) +{ + write_node(writer, pred, NULL, NULL, FIELD_PREDICATE, flags); + write_sep(writer, SEP_P_O); + copy_node(&writer->context.predicate, pred); +} + +static bool +write_list_obj(SerdWriter* writer, + SerdStatementFlags flags, + const SerdNode* predicate, + const SerdNode* object, + const SerdNode* datatype, + const SerdNode* lang) +{ + if (!strcmp((const char*)object->buf, NS_RDF "nil")) { + --writer->indent; + write_sep(writer, SEP_LIST_END); + return true; + } else if (!strcmp((const char*)predicate->buf, NS_RDF "first")) { + write_sep(writer, SEP_LIST_SEP); + write_node(writer, object, datatype, lang, FIELD_OBJECT, flags); + } + return false; +} + +SERD_API +SerdStatus +serd_writer_write_statement(SerdWriter* writer, + SerdStatementFlags flags, + const SerdNode* graph, + const SerdNode* subject, + const SerdNode* predicate, + const SerdNode* object, + const SerdNode* datatype, + const SerdNode* lang) +{ + if (!subject || !predicate || !object + || !subject->buf || !predicate->buf || !object->buf + || !is_resource(subject) || !is_resource(predicate)) { + return SERD_ERR_BAD_ARG; + } + +#define TRY(write_result) \ + if (!write_result) { \ + return SERD_ERR_UNKNOWN; \ + } + + switch (writer->syntax) { + case SERD_NTRIPLES: + case SERD_NQUADS: + TRY(write_node(writer, subject, NULL, NULL, FIELD_SUBJECT, flags)); + sink(" ", 1, writer); + TRY(write_node(writer, predicate, NULL, NULL, FIELD_PREDICATE, flags)); + sink(" ", 1, writer); + TRY(write_node(writer, object, datatype, lang, FIELD_OBJECT, flags)); + if (writer->syntax == SERD_NQUADS && graph) { + sink(" ", 1, writer); + TRY(write_node(writer, graph, datatype, lang, FIELD_GRAPH, flags)); + } + sink(" .\n", 3, writer); + return SERD_SUCCESS; + default: + break; + } + + if ((graph && !serd_node_equals(graph, &writer->context.graph)) || + (!graph && writer->context.graph.type)) { + writer->indent = 0; + if (writer->context.subject.type) { + write_sep(writer, SEP_END_S); + } + if (writer->context.graph.type) { + write_sep(writer, SEP_GRAPH_END); + } + + reset_context(writer, true); + if (graph) { + TRY(write_node(writer, graph, datatype, lang, FIELD_GRAPH, flags)); + ++writer->indent; + write_sep(writer, SEP_GRAPH_BEGIN); + copy_node(&writer->context.graph, graph); + } + } + + if ((flags & SERD_LIST_CONT)) { + if (write_list_obj(writer, flags, predicate, object, datatype, lang)) { + // Reached end of list + if (--writer->list_depth == 0 && writer->list_subj.type) { + reset_context(writer, false); + serd_node_free(&writer->context.subject); + writer->context.subject = writer->list_subj; + writer->list_subj = SERD_NODE_NULL; + } + return SERD_SUCCESS; + } + } else if (serd_node_equals(subject, &writer->context.subject)) { + if (serd_node_equals(predicate, &writer->context.predicate)) { + // Abbreviate S P + if (!(flags & SERD_ANON_O_BEGIN)) { + ++writer->indent; + } + write_sep(writer, SEP_END_O); + write_node(writer, object, datatype, lang, FIELD_OBJECT, flags); + if (!(flags & SERD_ANON_O_BEGIN)) { + --writer->indent; + } + } else { + // Abbreviate S + Sep sep = writer->context.predicate.type ? SEP_END_P : SEP_S_P; + write_sep(writer, sep); + write_pred(writer, flags, predicate); + write_node(writer, object, datatype, lang, FIELD_OBJECT, flags); + } + } else { + // No abbreviation + if (writer->context.subject.type) { + assert(writer->indent > 0); + --writer->indent; + if (serd_stack_is_empty(&writer->anon_stack)) { + write_sep(writer, SEP_END_S); + } + } else if (!writer->empty) { + write_sep(writer, SEP_S_P); + } + + if (!(flags & SERD_ANON_CONT)) { + write_node(writer, subject, NULL, NULL, FIELD_SUBJECT, flags); + ++writer->indent; + write_sep(writer, SEP_S_P); + } else { + ++writer->indent; + } + + reset_context(writer, false); + copy_node(&writer->context.subject, subject); + + if (!(flags & SERD_LIST_S_BEGIN)) { + write_pred(writer, flags, predicate); + } + + write_node(writer, object, datatype, lang, FIELD_OBJECT, flags); + } + + if (flags & (SERD_ANON_S_BEGIN|SERD_ANON_O_BEGIN)) { + WriteContext* ctx = (WriteContext*)serd_stack_push( + &writer->anon_stack, sizeof(WriteContext)); + *ctx = writer->context; + WriteContext new_context = { + serd_node_copy(graph), serd_node_copy(subject), SERD_NODE_NULL }; + if ((flags & SERD_ANON_S_BEGIN)) { + new_context.predicate = serd_node_copy(predicate); + } + writer->context = new_context; + } else { + copy_node(&writer->context.graph, graph); + copy_node(&writer->context.subject, subject); + copy_node(&writer->context.predicate, predicate); + } + + return SERD_SUCCESS; +} + +SERD_API +SerdStatus +serd_writer_end_anon(SerdWriter* writer, + const SerdNode* node) +{ + if (writer->syntax == SERD_NTRIPLES || writer->syntax == SERD_NQUADS) { + return SERD_SUCCESS; + } + if (serd_stack_is_empty(&writer->anon_stack) || writer->indent == 0) { + w_err(writer, SERD_ERR_UNKNOWN, + "unexpected end of anonymous node\n"); + return SERD_ERR_UNKNOWN; + } + --writer->indent; + write_sep(writer, SEP_ANON_END); + free_context(writer); + writer->context = *anon_stack_top(writer); + serd_stack_pop(&writer->anon_stack, sizeof(WriteContext)); + const bool is_subject = serd_node_equals(node, &writer->context.subject); + if (is_subject) { + copy_node(&writer->context.subject, node); + writer->context.predicate.type = SERD_NOTHING; + } + return SERD_SUCCESS; +} + +SERD_API +SerdStatus +serd_writer_finish(SerdWriter* writer) +{ + if (writer->context.subject.type) { + sink(" .\n", 3, writer); + } + if (writer->context.graph.type) { + sink("}\n", 2, writer); + } + serd_byte_sink_flush(&writer->byte_sink); + writer->indent = 0; + return free_context(writer); +} + +SERD_API +SerdWriter* +serd_writer_new(SerdSyntax syntax, + SerdStyle style, + SerdEnv* env, + const SerdURI* base_uri, + SerdSink ssink, + void* stream) +{ + const WriteContext context = WRITE_CONTEXT_NULL; + SerdWriter* writer = (SerdWriter*)calloc(1, sizeof(SerdWriter)); + writer->syntax = syntax; + writer->style = style; + writer->env = env; + writer->root_node = SERD_NODE_NULL; + writer->root_uri = SERD_URI_NULL; + writer->base_uri = base_uri ? *base_uri : SERD_URI_NULL; + writer->anon_stack = serd_stack_new(sizeof(WriteContext)); + writer->context = context; + writer->list_subj = SERD_NODE_NULL; + writer->empty = true; + writer->byte_sink = serd_byte_sink_new( + ssink, stream, (style & SERD_STYLE_BULK) ? SERD_PAGE_SIZE : 1); + return writer; +} + +SERD_API +void +serd_writer_set_error_sink(SerdWriter* writer, + SerdErrorSink error_sink, + void* error_handle) +{ + writer->error_sink = error_sink; + writer->error_handle = error_handle; +} + +SERD_API +void +serd_writer_chop_blank_prefix(SerdWriter* writer, + const uint8_t* prefix) +{ + free(writer->bprefix); + writer->bprefix_len = 0; + writer->bprefix = NULL; + if (prefix) { + writer->bprefix_len = strlen((const char*)prefix); + writer->bprefix = (uint8_t*)malloc(writer->bprefix_len + 1); + memcpy(writer->bprefix, prefix, writer->bprefix_len + 1); + } +} + +SERD_API +SerdStatus +serd_writer_set_base_uri(SerdWriter* writer, + const SerdNode* uri) +{ + if (!serd_env_set_base_uri(writer->env, uri)) { + serd_env_get_base_uri(writer->env, &writer->base_uri); + + if (writer->syntax == SERD_TURTLE || writer->syntax == SERD_TRIG) { + if (writer->context.graph.type || writer->context.subject.type) { + sink(" .\n\n", 4, writer); + reset_context(writer, true); + } + sink("@base <", 7, writer); + sink(uri->buf, uri->n_bytes, writer); + sink("> .\n", 4, writer); + } + writer->indent = 0; + return reset_context(writer, true); + } + return SERD_ERR_UNKNOWN; +} + +SERD_API +SerdStatus +serd_writer_set_root_uri(SerdWriter* writer, + const SerdNode* uri) +{ + serd_node_free(&writer->root_node); + if (uri && uri->buf) { + writer->root_node = serd_node_copy(uri); + serd_uri_parse(uri->buf, &writer->root_uri); + } else { + writer->root_node = SERD_NODE_NULL; + writer->root_uri = SERD_URI_NULL; + } + return SERD_SUCCESS; +} + +SERD_API +SerdStatus +serd_writer_set_prefix(SerdWriter* writer, + const SerdNode* name, + const SerdNode* uri) +{ + if (!serd_env_set_prefix(writer->env, name, uri)) { + if (writer->syntax == SERD_TURTLE || writer->syntax == SERD_TRIG) { + if (writer->context.graph.type || writer->context.subject.type) { + sink(" .\n\n", 4, writer); + reset_context(writer, true); + } + sink("@prefix ", 8, writer); + sink(name->buf, name->n_bytes, writer); + sink(": <", 3, writer); + write_uri(writer, uri->buf, uri->n_bytes); + sink("> .\n", 4, writer); + } + writer->indent = 0; + return reset_context(writer, true); + } + return SERD_ERR_UNKNOWN; +} + +SERD_API +void +serd_writer_free(SerdWriter* writer) +{ + serd_writer_finish(writer); + serd_stack_free(&writer->anon_stack); + free(writer->bprefix); + serd_byte_sink_free(&writer->byte_sink); + serd_node_free(&writer->root_node); + free(writer); +} + +SERD_API +SerdEnv* +serd_writer_get_env(SerdWriter* writer) +{ + return writer->env; +} + +SERD_API +size_t +serd_file_sink(const void* buf, size_t len, void* stream) +{ + return fwrite(buf, 1, len, (FILE*)stream); +} + +SERD_API +size_t +serd_chunk_sink(const void* buf, size_t len, void* stream) +{ + SerdChunk* chunk = (SerdChunk*)stream; + chunk->buf = (uint8_t*)realloc((uint8_t*)chunk->buf, chunk->len + len); + memcpy((uint8_t*)chunk->buf + chunk->len, buf, len); + chunk->len += len; + return len; +} + +SERD_API +uint8_t* +serd_chunk_sink_finish(SerdChunk* stream) +{ + serd_chunk_sink("", 1, stream); + return (uint8_t*)stream->buf; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ext/sord/COPYING Thu Jun 15 09:52:01 2017 +0100 @@ -0,0 +1,13 @@ +Copyright 2011-2016 David Robillard <http://drobilla.net> + +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ext/sord/README Thu Jun 15 09:52:01 2017 +0100 @@ -0,0 +1,8 @@ +Sord +==== + +Sord is a lightweight C library for storing RDF statements in memory. +For more information, see <http://drobilla.net/software/sord>. + + -- David Robillard <d@drobilla.net> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ext/sord/sord/sord.h Thu Jun 15 09:52:01 2017 +0100 @@ -0,0 +1,659 @@ +/* + Copyright 2011-2016 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +/** + @file sord.h API for Sord, a lightweight RDF model library. +*/ + +#ifndef SORD_SORD_H +#define SORD_SORD_H + +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> + +#include "serd/serd.h" + +#ifdef SORD_SHARED +# ifdef _WIN32 +# define SORD_LIB_IMPORT __declspec(dllimport) +# define SORD_LIB_EXPORT __declspec(dllexport) +# else +# define SORD_LIB_IMPORT __attribute__((visibility("default"))) +# define SORD_LIB_EXPORT __attribute__((visibility("default"))) +# endif +# ifdef SORD_INTERNAL +# define SORD_API SORD_LIB_EXPORT +# else +# define SORD_API SORD_LIB_IMPORT +# endif +#else +# define SORD_API +#endif + +#ifdef __cplusplus +extern "C" { +#else +# include <stdbool.h> +#endif + +/** + @defgroup sord Sord + A lightweight RDF model library. + + Sord stores RDF (subject object predicate context) quads, where the context + may be omitted (to represent triples in the default graph). + @{ +*/ + +/** + Sord World. + The World represents all library state, including interned strings. +*/ +typedef struct SordWorldImpl SordWorld; + +/** + Sord Model. + + A model is an indexed set of Quads (i.e. it can contain several RDF + graphs). It may be searched using various patterns depending on which + indices are enabled. +*/ +typedef struct SordModelImpl SordModel; + +/** + Model Inserter. + + An inserter is used for writing statements to a model using the Serd sink + interface. This makes it simple to write to a model directly using a + SerdReader, or any other code that writes statements to a SerdStatementSink. +*/ +typedef struct SordInserterImpl SordInserter; + +/** + Model Iterator. +*/ +typedef struct SordIterImpl SordIter; + +/** + RDF Node. + A Node is a component of a Quad. Nodes may be URIs, blank nodes, or + (in the case of quad objects only) string literals. Literal nodes may + have an associate language or datatype (but not both). +*/ +typedef struct SordNodeImpl SordNode; + +/** + Quad of nodes (a statement), or a quad pattern. + + Nodes are ordered (S P O G). The ID of the default graph is 0. +*/ +typedef const SordNode* SordQuad[4]; + +/** + Index into a SordQuad. +*/ +typedef enum { + SORD_SUBJECT = 0, /**< Subject */ + SORD_PREDICATE = 1, /**< Predicate ("key") */ + SORD_OBJECT = 2, /**< Object ("value") */ + SORD_GRAPH = 3 /**< Graph ("context") */ +} SordQuadIndex; + +/** + Type of a node. +*/ +typedef enum { + SORD_URI = 1, /**< URI */ + SORD_BLANK = 2, /**< Blank node identifier */ + SORD_LITERAL = 3 /**< Literal (string with optional lang or datatype) */ +} SordNodeType; + +/** + Indexing option. +*/ +typedef enum { + SORD_SPO = 1, /**< Subject, Predicate, Object */ + SORD_SOP = 1 << 1, /**< Subject, Object, Predicate */ + SORD_OPS = 1 << 2, /**< Object, Predicate, Subject */ + SORD_OSP = 1 << 3, /**< Object, Subject, Predicate */ + SORD_PSO = 1 << 4, /**< Predicate, Subject, Object */ + SORD_POS = 1 << 5 /**< Predicate, Object, Subject */ +} SordIndexOption; + +/** + @name World + @{ +*/ + +/** + Create a new Sord World. + It is safe to use multiple worlds in one process, though no data + (e.g. nodes) can be shared between worlds, and this should be avoided if + possible for performance reasons. +*/ +SORD_API +SordWorld* +sord_world_new(void); + +/** + Free `world`. +*/ +SORD_API +void +sord_world_free(SordWorld* world); + +/** + Set a function to be called when errors occur. + + The `error_sink` will be called with `handle` as its first argument. If + no error function is set, errors are printed to stderr. +*/ +SORD_API +void +sord_world_set_error_sink(SordWorld* world, + SerdErrorSink error_sink, + void* handle); + +/** + @} + @name Node + @{ +*/ + +/** + Get a URI node from a string. + + Note this function measures `str`, which is a common bottleneck. + Use sord_node_from_serd_node() instead if `str` is already measured. +*/ +SORD_API +SordNode* +sord_new_uri(SordWorld* world, const uint8_t* uri); + +/** + Get a URI node from a relative URI string. +*/ +SORD_API +SordNode* +sord_new_relative_uri(SordWorld* world, + const uint8_t* str, + const uint8_t* base_uri); + +/** + Get a blank node from a string. + + Note this function measures `str`, which is a common bottleneck. + Use sord_node_from_serd_node() instead if `str` is already measured. +*/ +SORD_API +SordNode* +sord_new_blank(SordWorld* world, const uint8_t* str); + +/** + Get a literal node from a string. + + Note this function measures `str`, which is a common bottleneck. + Use sord_node_from_serd_node() instead if `str` is already measured. +*/ +SORD_API +SordNode* +sord_new_literal(SordWorld* world, + SordNode* datatype, + const uint8_t* str, + const char* lang); + +/** + Copy a node (obtain a reference). + + Node that since nodes are interned and reference counted, this does not + actually create a deep copy of `node`. +*/ +SORD_API +SordNode* +sord_node_copy(const SordNode* node); + +/** + Free a node (drop a reference). +*/ +SORD_API +void +sord_node_free(SordWorld* world, SordNode* node); + +/** + Return the type of a node (SORD_URI, SORD_BLANK, or SORD_LITERAL). +*/ +SORD_API +SordNodeType +sord_node_get_type(const SordNode* node); + +/** + Return the string value of a node. +*/ +SORD_API +const uint8_t* +sord_node_get_string(const SordNode* node); + +/** + Return the string value of a node, and set `bytes` to its length in bytes. +*/ +SORD_API +const uint8_t* +sord_node_get_string_counted(const SordNode* node, size_t* bytes); + +/** + Return the string value of a node, and set `bytes` to its length in bytes, + and `count` to its length in characters. +*/ +SORD_API +const uint8_t* +sord_node_get_string_measured(const SordNode* node, + size_t* bytes, + size_t* chars); + +/** + Return the language of a literal node (or NULL). +*/ +SORD_API +const char* +sord_node_get_language(const SordNode* node); + +/** + Return the datatype URI of a literal node (or NULL). +*/ +SORD_API +SordNode* +sord_node_get_datatype(const SordNode* node); + +/** + Return the flags (string attributes) of a node. +*/ +SORD_API +SerdNodeFlags +sord_node_get_flags(const SordNode* node); + +/** + Return true iff node can be serialised as an inline object. + + More specifically, this returns true iff the node is the object field + of exactly one statement, and therefore can be inlined since it needn't + be referred to by name. +*/ +SORD_API +bool +sord_node_is_inline_object(const SordNode* node); + +/** + Return true iff `a` is equal to `b`. + + Note this is much faster than comparing the node's strings. +*/ +SORD_API +bool +sord_node_equals(const SordNode* a, + const SordNode* b); + +/** + Return a SordNode as a SerdNode. + + The returned node is shared and must not be freed or modified. +*/ +SORD_API +const SerdNode* +sord_node_to_serd_node(const SordNode* node); + +/** + Create a new SordNode from a SerdNode. + + The returned node must be freed using sord_node_free(). +*/ +SORD_API +SordNode* +sord_node_from_serd_node(SordWorld* world, + SerdEnv* env, + const SerdNode* node, + const SerdNode* datatype, + const SerdNode* lang); + +/** + @} + @name Model + @{ +*/ + +/** + Create a new model. + + @param world The world in which to make this model. + + @param indices SordIndexOption flags (e.g. SORD_SPO|SORD_OPS). Be sure to + enable an index where the most significant node(s) are not variables in your + queries (e.g. to make (? P O) queries, enable either SORD_OPS or SORD_POS). + + @param graphs If true, store (and index) graph contexts. +*/ +SORD_API +SordModel* +sord_new(SordWorld* world, + unsigned indices, + bool graphs); + +/** + Close and free `model`. +*/ +SORD_API +void +sord_free(SordModel* model); + +/** + Get the world associated with `model`. +*/ +SORD_API +SordWorld* +sord_get_world(SordModel* model); + +/** + Return the number of nodes stored in `world`. + + Nodes are included in this count iff they are a part of a quad in `world`. +*/ +SORD_API +size_t +sord_num_nodes(const SordWorld* world); + +/** + Return the number of quads stored in `model`. +*/ +SORD_API +size_t +sord_num_quads(const SordModel* model); + +/** + Return an iterator to the start of `model`. +*/ +SORD_API +SordIter* +sord_begin(const SordModel* model); + +/** + Search for statements by a quad pattern. + @return an iterator to the first match, or NULL if no matches found. +*/ +SORD_API +SordIter* +sord_find(SordModel* model, const SordQuad pat); + +/** + Search for statements by nodes. + @return an iterator to the first match, or NULL if no matches found. +*/ +SORD_API +SordIter* +sord_search(SordModel* model, + const SordNode* s, + const SordNode* p, + const SordNode* o, + const SordNode* g); +/** + Search for a single node that matches a pattern. + Exactly one of `s`, `p`, `o` must be NULL. + This function is mainly useful for predicates that only have one value. + The returned node must be freed using sord_node_free(). + @return the first matching node, or NULL if no matches are found. +*/ +SORD_API +SordNode* +sord_get(SordModel* model, + const SordNode* s, + const SordNode* p, + const SordNode* o, + const SordNode* g); + +/** + Return true iff a statement exists. +*/ +SORD_API +bool +sord_ask(SordModel* model, + const SordNode* s, + const SordNode* p, + const SordNode* o, + const SordNode* g); + +/** + Return the number of matching statements. +*/ +SORD_API +uint64_t +sord_count(SordModel* model, + const SordNode* s, + const SordNode* p, + const SordNode* o, + const SordNode* g); + +/** + Check if `model` contains a triple pattern. + + @return true if `model` contains a match for `pat`, otherwise false. +*/ +SORD_API +bool +sord_contains(SordModel* model, const SordQuad pat); + +/** + Add a quad to a model. + + Calling this function invalidates all iterators on `model`. + + @return true on success, false, on error. +*/ +SORD_API +bool +sord_add(SordModel* model, const SordQuad quad); + +/** + Remove a quad from a model. + + Calling this function invalidates all iterators on `model`. To remove quads + while iterating, use sord_erase() instead. +*/ +SORD_API +void +sord_remove(SordModel* model, const SordQuad quad); + +/** + Remove a quad from a model via an iterator. + + Calling this function invalidates all iterators on `model` except `iter`. + + @param model The model which `iter` points to. + @param iter Iterator to the element to erase, which is incremented to the + next value on return. +*/ +SORD_API +SerdStatus +sord_erase(SordModel* model, SordIter* iter); + +/** + @} + @name Inserter + @{ +*/ + +/** + Create an inserter for writing statements to a model. +*/ +SORD_API +SordInserter* +sord_inserter_new(SordModel* model, + SerdEnv* env); + +/** + Free an inserter. +*/ +SORD_API +void +sord_inserter_free(SordInserter* inserter); + +/** + Set the current base URI for writing to the model. + + Note this function can be safely casted to SerdBaseSink. +*/ +SORD_API +SerdStatus +sord_inserter_set_base_uri(SordInserter* inserter, + const SerdNode* uri); + +/** + Set a namespace prefix for writing to the model. + + Note this function can be safely casted to SerdPrefixSink. +*/ +SORD_API +SerdStatus +sord_inserter_set_prefix(SordInserter* inserter, + const SerdNode* name, + const SerdNode* uri); + +/** + Write a statement to the model. + + Note this function can be safely casted to SerdStatementSink. +*/ +SORD_API +SerdStatus +sord_inserter_write_statement(SordInserter* inserter, + SerdStatementFlags flags, + const SerdNode* graph, + const SerdNode* subject, + const SerdNode* predicate, + const SerdNode* object, + const SerdNode* object_datatype, + const SerdNode* object_lang); + +/** + @} + @name Iteration + @{ +*/ + +/** + Set `quad` to the quad pointed to by `iter`. +*/ +SORD_API +void +sord_iter_get(const SordIter* iter, SordQuad quad); + +/** + Return a field of the quad pointed to by `iter`. + + Returns NULL if `iter` is NULL or is at the end. +*/ +SORD_API +const SordNode* +sord_iter_get_node(const SordIter* iter, SordQuadIndex index); + +/** + Return the store pointed to by `iter`. +*/ +SORD_API +const SordModel* +sord_iter_get_model(SordIter* iter); + +/** + Increment `iter` to point to the next statement. +*/ +SORD_API +bool +sord_iter_next(SordIter* iter); + +/** + Return true iff `iter` is at the end of its range. +*/ +SORD_API +bool +sord_iter_end(const SordIter* iter); + +/** + Free `iter`. +*/ +SORD_API +void +sord_iter_free(SordIter* iter); + +/** + @} + @name Utilities + @{ +*/ + +/** + Match two quads (using ID comparison only). + + This function is a straightforward and fast equivalence match with wildcard + support (ID 0 is a wildcard). It does not actually read node data. + @return true iff `x` and `y` match. +*/ +SORD_API +bool +sord_quad_match(const SordQuad x, const SordQuad y); + +/** + @} + @name Serialisation + @{ +*/ + +/** + Return a reader that will read into `model`. +*/ +SORD_API +SerdReader* +sord_new_reader(SordModel* model, + SerdEnv* env, + SerdSyntax syntax, + SordNode* graph); + +/** + Write a model to a writer. +*/ +SORD_API +bool +sord_write(SordModel* model, + SerdWriter* writer, + SordNode* graph); + +/** + Write a range to a writer. + + This increments `iter` to its end, then frees it. +*/ +SORD_API +bool +sord_write_iter(SordIter* iter, + SerdWriter* writer); + +/** + @} + @} +*/ + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* SORD_SORD_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ext/sord/sord/sordmm.hpp Thu Jun 15 09:52:01 2017 +0100 @@ -0,0 +1,650 @@ +/* + Copyright 2011-2013 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +/** + @file sordmm.hpp + Public Sord C++ API. +*/ + +#ifndef SORD_SORDMM_HPP +#define SORD_SORDMM_HPP + +#include <cassert> +#include <cstring> +#include <cstdlib> +#include <iostream> +#include <set> +#include <string> +#include <sstream> + +#include "serd/serd.h" +#include "sord/sord.h" + +#define SORD_NS_XSD "http://www.w3.org/2001/XMLSchema#" + +namespace Sord { + +/** Utility base class to prevent copying. */ +class Noncopyable { +protected: + Noncopyable() {} + ~Noncopyable() {} +private: + Noncopyable(const Noncopyable&); + const Noncopyable& operator=(const Noncopyable&); +}; + +/** C++ wrapper for a Sord object. */ +template <typename T> +class Wrapper { +public: + inline Wrapper(T c_obj = NULL) : _c_obj(c_obj) {} + + inline T c_obj() { return _c_obj; } + inline const T c_obj() const { return _c_obj; } + +protected: + T _c_obj; +}; + +/** Collection of RDF namespaces with prefixes. */ +class Namespaces : public Wrapper<SerdEnv*> { +public: + Namespaces() : Wrapper<SerdEnv*>(serd_env_new(NULL)) {} + ~Namespaces() { serd_env_free(_c_obj); } + + static inline SerdNode string_to_node(SerdType type, const std::string& s) { + SerdNode ret = { + (const uint8_t*)s.c_str(), s.length(), s.length(), 0, type }; + return ret; + } + + inline void add(const std::string& name, + const std::string& uri) { + const SerdNode name_node = string_to_node(SERD_LITERAL, name); + const SerdNode uri_node = string_to_node(SERD_URI, uri); + serd_env_set_prefix(_c_obj, &name_node, &uri_node); + } + + inline std::string qualify(std::string uri) const { + const SerdNode uri_node = string_to_node(SERD_URI, uri); + SerdNode prefix; + SerdChunk suffix; + if (serd_env_qualify(_c_obj, &uri_node, &prefix, &suffix)) { + std::string ret((const char*)prefix.buf, prefix.n_bytes); + ret.append(":").append((const char*)suffix.buf, suffix.len); + return ret; + } + return uri; + } + + inline std::string expand(const std::string& curie) const { + assert(curie.find(":") != std::string::npos); + SerdNode curie_node = string_to_node(SERD_CURIE, curie); + SerdChunk uri_prefix; + SerdChunk uri_suffix; + if (!serd_env_expand(_c_obj, &curie_node, &uri_prefix, &uri_suffix)) { + std::string ret((const char*)uri_prefix.buf, uri_prefix.len); + ret.append((const char*)uri_suffix.buf, uri_suffix.len); + return ret; + } + std::cerr << "CURIE `" << curie << "' has unknown prefix." << std::endl; + return curie; + } +}; + +/** Sord library state. */ +class World : public Noncopyable, public Wrapper<SordWorld*> { +public: + inline World() + : _next_blank_id(0) + { + _c_obj = sord_world_new(); + } + + inline ~World() { + sord_world_free(_c_obj); + } + + inline uint64_t blank_id() { return _next_blank_id++; } + + inline void add_prefix(const std::string& prefix, const std::string& uri) { + _prefixes.add(prefix, uri); + } + + inline const Namespaces& prefixes() const { return _prefixes; } + inline SordWorld* world() { return _c_obj; } + +private: + Namespaces _prefixes; + std::set<std::string> _blank_ids; + uint64_t _next_blank_id; +}; + +/** An RDF Node (resource, literal, etc) + */ +class Node : public Wrapper<SordNode*> { +public: + enum Type { + UNKNOWN = 0, + URI = SORD_URI, + BLANK = SORD_BLANK, + LITERAL = SORD_LITERAL + }; + + inline Node() : Wrapper<SordNode*>(NULL), _world(NULL) {} + + inline Node(World& world, Type t, const std::string& s); + inline Node(World& world); + inline Node(World& world, const SordNode* node); + inline Node(World& world, SordNode* node, bool copy=false); + inline Node(const Node& other); + inline ~Node(); + + inline Type type() const { + return _c_obj ? (Type)sord_node_get_type(_c_obj) : UNKNOWN; + } + + inline const SordNode* get_node() const { return _c_obj; } + inline SordNode* get_node() { return _c_obj; } + + const SerdNode* to_serd_node() { + return sord_node_to_serd_node(_c_obj); + } + + inline bool is_valid() const { return type() != UNKNOWN; } + + inline bool operator<(const Node& other) const { + if (type() != other.type()) { + return type() < other.type(); + } else { + return to_string() < other.to_string(); + } + } + + Node& operator=(const Node& other) { + if (&other != this) { + if (_c_obj) { + sord_node_free(_world->c_obj(), _c_obj); + } + _world = other._world; + _c_obj = other._c_obj ? sord_node_copy(other._c_obj) : NULL; + } + return *this; + } + + inline bool operator==(const Node& other) const { + return sord_node_equals(_c_obj, other._c_obj); + } + + inline const uint8_t* to_u_string() const; + inline const char* to_c_string() const; + inline std::string to_string() const; + + inline bool is_literal_type(const char* type_uri) const; + + inline bool is_uri() const { return _c_obj && type() == URI; } + inline bool is_blank() const { return _c_obj && type() == BLANK; } + inline bool is_int() const { return is_literal_type(SORD_NS_XSD "integer"); } + inline bool is_float() const { return is_literal_type(SORD_NS_XSD "decimal"); } + inline bool is_bool() const { return is_literal_type(SORD_NS_XSD "boolean"); } + + inline int to_int() const; + inline float to_float() const; + inline bool to_bool() const; + + inline static Node blank_id(World& world, const std::string base="b") { + const uint64_t num = world.blank_id(); + std::ostringstream ss; + ss << base << num; + return Node(world, Node::BLANK, ss.str()); + } + +private: + World* _world; +}; + +inline std::ostream& +operator<<(std::ostream& os, const Node& node) +{ + return os << node.to_string(); +} + +class URI : public Node { +public: + inline URI(World& world, const std::string& s) + : Node(world, Node::URI, s) {} + inline URI(World& world, const std::string& s, const std::string& base) + : Node(world, sord_new_relative_uri(world.world(), + (const uint8_t*)s.c_str(), + (const uint8_t*)base.c_str())) + {} +}; + +class Curie : public Node { +public: + inline Curie(World& world, const std::string& s) + : Node(world, Node::URI, world.prefixes().expand(s)) {} +}; + +class Literal : public Node { +public: + inline Literal(World& world, const std::string& s) + : Node(world, Node::LITERAL, s) {} + + static inline Node decimal(World& world, double d, unsigned frac_digits) { + const SerdNode val = serd_node_new_decimal(d, 7); + const SerdNode type = serd_node_from_string( + SERD_URI, (const uint8_t*)SORD_NS_XSD "decimal"); + + return Node( + world, + sord_node_from_serd_node( + world.c_obj(), world.prefixes().c_obj(), &val, &type, NULL), + false); + } + + static inline Node integer(World& world, int64_t i) { + const SerdNode val = serd_node_new_integer(i); + const SerdNode type = serd_node_from_string( + SERD_URI, (const uint8_t*)SORD_NS_XSD "integer"); + + return Node( + world, + sord_node_from_serd_node( + world.c_obj(), world.prefixes().c_obj(), &val, &type, NULL), + false); + } +}; + +inline +Node::Node(World& world, Type type, const std::string& s) + : _world(&world) +{ + switch (type) { + case URI: + _c_obj = sord_new_uri( + world.world(), (const unsigned char*)s.c_str()); + break; + case LITERAL: + _c_obj = sord_new_literal( + world.world(), NULL, (const unsigned char*)s.c_str(), NULL); + break; + case BLANK: + _c_obj = sord_new_blank( + world.world(), (const unsigned char*)s.c_str()); + break; + default: + _c_obj = NULL; + } + + assert(this->type() == type); +} + +inline +Node::Node(World& world) + : _world(&world) +{ + Node me = blank_id(world); + *this = me; +} + +inline +Node::Node(World& world, const SordNode* node) + : _world(&world) +{ + _c_obj = sord_node_copy(node); +} + +inline +Node::Node(World& world, SordNode* node, bool copy) + : _world(&world) +{ + _c_obj = copy ? sord_node_copy(node) : node; +} + +inline +Node::Node(const Node& other) + : Wrapper<SordNode*>() + , _world(other._world) +{ + if (_world) { + _c_obj = other._c_obj ? sord_node_copy(other._c_obj) : NULL; + } + + assert((!_c_obj && !other._c_obj) || to_string() == other.to_string()); +} + +inline +Node::~Node() +{ + if (_world) { + sord_node_free(_world->c_obj(), _c_obj); + } +} + +inline std::string +Node::to_string() const +{ + return _c_obj ? (const char*)sord_node_get_string(_c_obj) : ""; +} + +inline const char* +Node::to_c_string() const +{ + return (const char*)sord_node_get_string(_c_obj); +} + +inline const uint8_t* +Node::to_u_string() const +{ + return sord_node_get_string(_c_obj); +} + +inline bool +Node::is_literal_type(const char* type_uri) const +{ + if (_c_obj && sord_node_get_type(_c_obj) == SORD_LITERAL) { + const SordNode* datatype = sord_node_get_datatype(_c_obj); + if (datatype && !strcmp((const char*)sord_node_get_string(datatype), + type_uri)) + return true; + } + return false; +} + +inline int +Node::to_int() const +{ + assert(is_int()); + char* endptr; + return strtol((const char*)sord_node_get_string(_c_obj), &endptr, 10); +} + +inline float +Node::to_float() const +{ + assert(is_float()); + return serd_strtod((const char*)sord_node_get_string(_c_obj), NULL); +} + +inline bool +Node::to_bool() const +{ + assert(is_bool()); + return !strcmp((const char*)sord_node_get_string(_c_obj), "true"); +} + +struct Iter : public Wrapper<SordIter*> { + inline Iter(World& world, SordIter* c_obj) + : Wrapper<SordIter*>(c_obj), _world(world) {} + inline ~Iter() { sord_iter_free(_c_obj); } + inline bool end() const { return sord_iter_end(_c_obj); } + inline bool next() const { return sord_iter_next(_c_obj); } + inline Iter& operator++() { + assert(!end()); + next(); + return *this; + } + inline const Node get_subject() const { + SordQuad quad; + sord_iter_get(_c_obj, quad); + return Node(_world, quad[SORD_SUBJECT]); + } + inline const Node get_predicate() const { + SordQuad quad; + sord_iter_get(_c_obj, quad); + return Node(_world, quad[SORD_PREDICATE]); + } + inline const Node get_object() const { + SordQuad quad; + sord_iter_get(_c_obj, quad); + return Node(_world, quad[SORD_OBJECT]); + } + World& _world; +}; + +/** An RDF Model (collection of triples). + */ +class Model : public Noncopyable, public Wrapper<SordModel*> { +public: + inline Model(World& world, + const std::string& base_uri, + unsigned indices = (SORD_SPO | SORD_OPS), + bool graphs = true); + + inline ~Model(); + + inline const Node& base_uri() const { return _base; } + + size_t num_quads() const { return sord_num_quads(_c_obj); } + + inline void load_file(SerdEnv* env, + SerdSyntax syntax, + const std::string& uri, + const std::string& base_uri=""); + + inline void load_string(SerdEnv* env, + SerdSyntax syntax, + const char* str, + size_t len, + const std::string& base_uri); + + inline SerdStatus write_to_file( + const std::string& uri, + SerdSyntax syntax = SERD_TURTLE, + SerdStyle style = (SerdStyle)(SERD_STYLE_ABBREVIATED + |SERD_STYLE_CURIED + |SERD_STYLE_RESOLVED)); + + inline std::string write_to_string( + const std::string& base_uri, + SerdSyntax syntax = SERD_TURTLE, + SerdStyle style = (SerdStyle)(SERD_STYLE_ABBREVIATED + |SERD_STYLE_CURIED + |SERD_STYLE_RESOLVED)); + + inline void add_statement(const Node& subject, + const Node& predicate, + const Node& object); + + inline Iter find(const Node& subject, + const Node& predicate, + const Node& object); + + inline Node get(const Node& subject, + const Node& predicate, + const Node& object); + + inline World& world() const { return _world; } + +private: + World& _world; + Node _base; +}; + +/** Create an empty in-memory RDF model. + */ +inline +Model::Model(World& world, + const std::string& base_uri, + unsigned indices, + bool graphs) + : _world(world) + , _base(world, Node::URI, base_uri) +{ + _c_obj = sord_new(_world.world(), indices, graphs); +} + +inline void +Model::load_string(SerdEnv* env, + SerdSyntax syntax, + const char* str, + size_t len, + const std::string& base_uri) +{ + SerdReader* reader = sord_new_reader(_c_obj, env, syntax, NULL); + serd_reader_read_string(reader, (const uint8_t*)str); + serd_reader_free(reader); +} + +inline Model::~Model() +{ + sord_free(_c_obj); +} + +inline void +Model::load_file(SerdEnv* env, + SerdSyntax syntax, + const std::string& data_uri, + const std::string& base_uri) +{ + uint8_t* path = serd_file_uri_parse((const uint8_t*)data_uri.c_str(), NULL); + if (!path) { + fprintf(stderr, "Failed to parse file URI <%s>\n", data_uri.c_str()); + return; + } + + // FIXME: blank prefix parameter? + SerdReader* reader = sord_new_reader(_c_obj, env, syntax, NULL); + serd_reader_read_file(reader, path); + serd_reader_free(reader); + free(path); +} + +inline SerdStatus +Model::write_to_file(const std::string& uri, SerdSyntax syntax, SerdStyle style) +{ + uint8_t* path = serd_file_uri_parse((const uint8_t*)uri.c_str(), NULL); + if (!path) { + fprintf(stderr, "Failed to parse file URI <%s>\n", uri.c_str()); + return SERD_ERR_BAD_ARG; + } + + FILE* const fd = fopen((const char*)path, "w"); + if (!fd) { + fprintf(stderr, "Failed to open file %s\n", path); + free(path); + return SERD_ERR_UNKNOWN; + } + free(path); + + SerdURI base_uri = SERD_URI_NULL; + if (serd_uri_parse((const uint8_t*)uri.c_str(), &base_uri)) { + fprintf(stderr, "Invalid base URI <%s>\n", uri.c_str()); + fclose(fd); + return SERD_ERR_BAD_ARG; + } + + SerdWriter* writer = serd_writer_new(syntax, + style, + _world.prefixes().c_obj(), + &base_uri, + serd_file_sink, + fd); + + serd_env_foreach(_world.prefixes().c_obj(), + (SerdPrefixSink)serd_writer_set_prefix, + writer); + + sord_write(_c_obj, writer, 0); + serd_writer_free(writer); + fclose(fd); + + return SERD_SUCCESS; +} + +static size_t +string_sink(const void* buf, size_t len, void* stream) +{ + std::string* str = (std::string*)stream; + str->append((const char*)buf, len); + return len; +} + +inline std::string +Model::write_to_string(const std::string& base_uri_str, + SerdSyntax syntax, + SerdStyle style) +{ + SerdURI base_uri = SERD_URI_NULL; + if (serd_uri_parse((const uint8_t*)base_uri_str.c_str(), &base_uri)) { + fprintf(stderr, "Invalid base URI <%s>\n", base_uri_str.c_str()); + return ""; + } + + std::string ret; + + SerdWriter* writer = serd_writer_new(syntax, + style, + _world.prefixes().c_obj(), + &base_uri, + string_sink, + &ret); + + const SerdNode base_uri_node = serd_node_from_string( + SERD_URI, (const uint8_t*)base_uri_str.c_str()); + serd_writer_set_base_uri(writer, &base_uri_node); + + serd_env_foreach(_world.prefixes().c_obj(), + (SerdPrefixSink)serd_writer_set_prefix, + writer); + + sord_write(_c_obj, writer, 0); + + serd_writer_free(writer); + return ret; +} + +inline void +Model::add_statement(const Node& subject, + const Node& predicate, + const Node& object) +{ + SordQuad quad = { subject.c_obj(), + predicate.c_obj(), + object.c_obj(), + NULL }; + + sord_add(_c_obj, quad); +} + +inline Iter +Model::find(const Node& subject, + const Node& predicate, + const Node& object) +{ + SordQuad quad = { subject.c_obj(), + predicate.c_obj(), + object.c_obj(), + NULL }; + + return Iter(_world, sord_find(_c_obj, quad)); +} + +inline Node +Model::get(const Node& subject, + const Node& predicate, + const Node& object) +{ + SordNode* c_node = sord_get( + _c_obj, subject.c_obj(), predicate.c_obj(), object.c_obj(), NULL); + return Node(_world, c_node, false); +} + +} // namespace Sord + +#endif // SORD_SORDMM_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ext/sord/src/sord.c Thu Jun 15 09:52:01 2017 +0100 @@ -0,0 +1,1286 @@ +/* + Copyright 2011-2016 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +// C99 +#include <assert.h> +#include <errno.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#define ZIX_INLINE +#include "zix/digest.c" +#include "zix/hash.c" +#include "zix/btree.c" + +#include "sord_config.h" +#include "sord_internal.h" + +#define SORD_LOG(prefix, ...) fprintf(stderr, "[Sord::" prefix "] " __VA_ARGS__) + +#ifdef SORD_DEBUG_ITER +# define SORD_ITER_LOG(...) SORD_LOG("iter", __VA_ARGS__) +#else +# define SORD_ITER_LOG(...) +#endif +#ifdef SORD_DEBUG_SEARCH +# define SORD_FIND_LOG(...) SORD_LOG("search", __VA_ARGS__) +#else +# define SORD_FIND_LOG(...) +#endif +#ifdef SORD_DEBUG_WRITE +# define SORD_WRITE_LOG(...) SORD_LOG("write", __VA_ARGS__) +#else +# define SORD_WRITE_LOG(...) +#endif + +#define NUM_ORDERS 12 +#define STATEMENT_LEN 3 +#define TUP_LEN STATEMENT_LEN + 1 +#define DEFAULT_ORDER SPO +#define DEFAULT_GRAPH_ORDER GSPO + +#define TUP_FMT "(%s %s %s %s)" +#define TUP_FMT_ELEM(e) ((e) ? sord_node_get_string(e) : (const uint8_t*)"*") +#define TUP_FMT_ARGS(t) \ + TUP_FMT_ELEM((t)[0]), \ + TUP_FMT_ELEM((t)[1]), \ + TUP_FMT_ELEM((t)[2]), \ + TUP_FMT_ELEM((t)[3]) + +#define TUP_S 0 +#define TUP_P 1 +#define TUP_O 2 +#define TUP_G 3 + +/** Triple ordering */ +typedef enum { + SPO, ///< Subject, Predicate, Object + SOP, ///< Subject, Object, Predicate + OPS, ///< Object, Predicate, Subject + OSP, ///< Object, Subject, Predicate + PSO, ///< Predicate, Subject, Object + POS, ///< Predicate, Object, Subject + GSPO, ///< Graph, Subject, Predicate, Object + GSOP, ///< Graph, Subject, Object, Predicate + GOPS, ///< Graph, Object, Predicate, Subject + GOSP, ///< Graph, Object, Subject, Predicate + GPSO, ///< Graph, Predicate, Subject, Object + GPOS ///< Graph, Predicate, Object, Subject +} SordOrder; + +#ifdef SORD_DEBUG_SEARCH +/** String name of each ordering (array indexed by SordOrder) */ +static const char* const order_names[NUM_ORDERS] = { + "spo", "sop", "ops", "osp", "pso", "pos", + "gspo", "gsop", "gops", "gosp", "gpso", "gpos" +}; +#endif + +/** + Quads of indices for each order, from most to least significant + (array indexed by SordOrder) +*/ +static const int orderings[NUM_ORDERS][TUP_LEN] = { + { 0, 1, 2, 3 }, { 0, 2, 1, 3 }, // SPO, SOP + { 2, 1, 0, 3 }, { 2, 0, 1, 3 }, // OPS, OSP + { 1, 0, 2, 3 }, { 1, 2, 0, 3 }, // PSO, POS + { 3, 0, 1, 2 }, { 3, 0, 2, 1 }, // GSPO, GSOP + { 3, 2, 1, 0 }, { 3, 2, 0, 1 }, // GOPS, GOSP + { 3, 1, 0, 2 }, { 3, 1, 2, 0 } // GPSO, GPOS +}; + +/** World */ +struct SordWorldImpl { + ZixHash* nodes; + SerdErrorSink error_sink; + void* error_handle; +}; + +/** Store */ +struct SordModelImpl { + SordWorld* world; + + /** Index for each possible triple ordering (may or may not exist). + * Each index is a tree of SordQuad with the appropriate ordering. + */ + ZixBTree* indices[NUM_ORDERS]; + + size_t n_quads; + size_t n_iters; +}; + +/** Mode for searching or iteration */ +typedef enum { + ALL, ///< Iterate over entire store + SINGLE, ///< Iteration over a single element (exact search) + RANGE, ///< Iterate over range with equal prefix + FILTER_RANGE, ///< Iterate over range with equal prefix, filtering + FILTER_ALL ///< Iterate to end of store, filtering +} SearchMode; + +/** Iterator over some range of a store */ +struct SordIterImpl { + const SordModel* sord; ///< Model being iterated over + ZixBTreeIter* cur; ///< Current DB cursor + SordQuad pat; ///< Pattern (in ordering order) + SordOrder order; ///< Store order (which index) + SearchMode mode; ///< Iteration mode + int n_prefix; ///< Prefix for RANGE and FILTER_RANGE + bool end; ///< True iff reached end + bool skip_graphs; ///< Iteration should ignore graphs +}; + +static uint32_t +sord_node_hash(const void* n) +{ + const SordNode* node = (const SordNode*)n; + uint32_t hash = zix_digest_start(); + hash = zix_digest_add(hash, node->node.buf, node->node.n_bytes); + hash = zix_digest_add(hash, &node->node.type, sizeof(node->node.type)); + if (node->node.type == SERD_LITERAL) { + hash = zix_digest_add(hash, &node->meta.lit, sizeof(node->meta.lit)); + } + return hash; +} + +static bool +sord_node_hash_equal(const void* a, const void* b) +{ + const SordNode* a_node = (const SordNode*)a; + const SordNode* b_node = (const SordNode*)b; + return (a_node == b_node) + || ((a_node->node.type == b_node->node.type) && + (a_node->node.type != SERD_LITERAL || + (a_node->meta.lit.datatype == b_node->meta.lit.datatype && + !strncmp(a_node->meta.lit.lang, + b_node->meta.lit.lang, + sizeof(a_node->meta.lit.lang)))) && + (serd_node_equals(&a_node->node, &b_node->node))); +} + +static void +error(SordWorld* world, SerdStatus st, const char* fmt, ...) +{ + va_list args; + va_start(args, fmt); + const SerdError e = { st, NULL, 0, 0, fmt, &args }; + if (world->error_sink) { + world->error_sink(world->error_handle, &e); + } else { + fprintf(stderr, "error: "); + vfprintf(stderr, fmt, args); + } + va_end(args); +} + +SordWorld* +sord_world_new(void) +{ + SordWorld* world = (SordWorld*)malloc(sizeof(SordWorld)); + world->error_sink = NULL; + world->error_handle = NULL; + + world->nodes = zix_hash_new( + sord_node_hash, sord_node_hash_equal, sizeof(SordNode)); + + return world; +} + +static void +free_node_entry(void* value, void* user_data) +{ + SordNode* node = (SordNode*)value; + if (node->node.type == SERD_LITERAL) { + sord_node_free((SordWorld*)user_data, node->meta.lit.datatype); + } + free((uint8_t*)node->node.buf); +} + +void +sord_world_free(SordWorld* world) +{ + zix_hash_foreach(world->nodes, free_node_entry, world); + zix_hash_free(world->nodes); + free(world); +} + +void +sord_world_set_error_sink(SordWorld* world, + SerdErrorSink error_sink, + void* handle) +{ + world->error_sink = error_sink; + world->error_handle = handle; +} + +/** Compare nodes, considering NULL a wildcard match. */ +static inline int +sord_node_compare(const SordNode* a, const SordNode* b) +{ + if (a == b || !a || !b) { + return 0; // Exact or wildcard match + } else if (a->node.type != b->node.type) { + return a->node.type - b->node.type; + } + + int cmp = 0; + switch (a->node.type) { + case SERD_URI: + case SERD_BLANK: + return strcmp((const char*)a->node.buf, (const char*)b->node.buf); + case SERD_LITERAL: + cmp = strcmp((const char*)sord_node_get_string(a), + (const char*)sord_node_get_string(b)); + if (cmp == 0) { + // Note: Can't use sord_node_compare here since it does wildcards + if (!a->meta.lit.datatype || !b->meta.lit.datatype) { + cmp = a->meta.lit.datatype - b->meta.lit.datatype; + } else { + cmp = strcmp((const char*)a->meta.lit.datatype->node.buf, + (const char*)b->meta.lit.datatype->node.buf); + } + } + if (cmp == 0) { + cmp = strcmp(a->meta.lit.lang, b->meta.lit.lang); + } + default: + break; + } + return cmp; +} + +bool +sord_node_equals(const SordNode* a, const SordNode* b) +{ + return a == b; // Nodes are interned +} + +/** Return true iff IDs are equivalent, or one is a wildcard */ +static inline bool +sord_id_match(const SordNode* a, const SordNode* b) +{ + return !a || !b || (a == b); +} + +static inline bool +sord_quad_match_inline(const SordQuad x, const SordQuad y) +{ + return sord_id_match(x[0], y[0]) + && sord_id_match(x[1], y[1]) + && sord_id_match(x[2], y[2]) + && sord_id_match(x[3], y[3]); +} + +bool +sord_quad_match(const SordQuad x, const SordQuad y) +{ + return sord_quad_match_inline(x, y); +} + +/** + Compare two quad IDs lexicographically. + NULL IDs (equal to 0) are treated as wildcards, always less than every + other possible ID, except itself. +*/ +static int +sord_quad_compare(const void* x_ptr, const void* y_ptr, void* user_data) +{ + const int* const ordering = (const int*)user_data; + const SordNode*const*const x = (const SordNode*const*)x_ptr; + const SordNode*const*const y = (const SordNode*const*)y_ptr; + + for (int i = 0; i < TUP_LEN; ++i) { + const int idx = ordering[i]; + const int cmp = sord_node_compare(x[idx], y[idx]); + if (cmp) { + return cmp; + } + } + + return 0; +} + +static inline bool +sord_iter_forward(SordIter* iter) +{ + if (!iter->skip_graphs) { + zix_btree_iter_increment(iter->cur); + return zix_btree_iter_is_end(iter->cur); + } + + SordNode** key = (SordNode**)zix_btree_get(iter->cur); + const SordQuad initial = { key[0], key[1], key[2], key[3] }; + zix_btree_iter_increment(iter->cur); + while (!zix_btree_iter_is_end(iter->cur)) { + key = (SordNode**)zix_btree_get(iter->cur); + for (int i = 0; i < 3; ++i) + if (key[i] != initial[i]) + return false; + + zix_btree_iter_increment(iter->cur); + } + + return true; +} + +/** + Seek forward as necessary until `iter` points at a match. + @return true iff iterator reached end of valid range. +*/ +static inline bool +sord_iter_seek_match(SordIter* iter) +{ + for (iter->end = true; + !zix_btree_iter_is_end(iter->cur); + sord_iter_forward(iter)) { + const SordNode** const key = (const SordNode**)zix_btree_get(iter->cur); + if (sord_quad_match_inline(key, iter->pat)) + return (iter->end = false); + } + return true; +} + +/** + Seek forward as necessary until `iter` points at a match, or the prefix + no longer matches iter->pat. + @return true iff iterator reached end of valid range. +*/ +static inline bool +sord_iter_seek_match_range(SordIter* iter) +{ + assert(!iter->end); + + do { + const SordNode** key = (const SordNode**)zix_btree_get(iter->cur); + + if (sord_quad_match_inline(key, iter->pat)) + return false; // Found match + + for (int i = 0; i < iter->n_prefix; ++i) { + const int idx = orderings[iter->order][i]; + if (!sord_id_match(key[idx], iter->pat[idx])) { + iter->end = true; // Reached end of valid range + return true; + } + } + } while (!sord_iter_forward(iter)); + + return (iter->end = true); // Reached end +} + +static SordIter* +sord_iter_new(const SordModel* sord, ZixBTreeIter* cur, const SordQuad pat, + SordOrder order, SearchMode mode, int n_prefix) +{ + SordIter* iter = (SordIter*)malloc(sizeof(SordIter)); + iter->sord = sord; + iter->cur = cur; + iter->order = order; + iter->mode = mode; + iter->n_prefix = n_prefix; + iter->end = false; + iter->skip_graphs = order < GSPO; + for (int i = 0; i < TUP_LEN; ++i) { + iter->pat[i] = pat[i]; + } + + switch (iter->mode) { + case ALL: + case SINGLE: + case RANGE: + assert( + sord_quad_match_inline((const SordNode**)zix_btree_get(iter->cur), + iter->pat)); + break; + case FILTER_RANGE: + sord_iter_seek_match_range(iter); + break; + case FILTER_ALL: + sord_iter_seek_match(iter); + break; + } + +#ifdef SORD_DEBUG_ITER + SordQuad value; + sord_iter_get(iter, value); + SORD_ITER_LOG("New %p pat=" TUP_FMT " cur=" TUP_FMT " end=%d skip=%d\n", + (void*)iter, TUP_FMT_ARGS(pat), TUP_FMT_ARGS(value), + iter->end, iter->skip_graphs); +#endif + + ++((SordModel*)sord)->n_iters; + return iter; +} + +const SordModel* +sord_iter_get_model(SordIter* iter) +{ + return iter->sord; +} + +void +sord_iter_get(const SordIter* iter, SordQuad id) +{ + SordNode** key = (SordNode**)zix_btree_get(iter->cur); + for (int i = 0; i < TUP_LEN; ++i) { + id[i] = key[i]; + } +} + +const SordNode* +sord_iter_get_node(const SordIter* iter, SordQuadIndex index) +{ + return (!sord_iter_end(iter) + ? ((SordNode**)zix_btree_get(iter->cur))[index] + : NULL); +} + +static bool +sord_iter_scan_next(SordIter* iter) +{ + if (iter->end) { + return true; + } + + const SordNode** key; + if (!iter->end) { + switch (iter->mode) { + case ALL: + // At the end if the cursor is (assigned above) + break; + case SINGLE: + iter->end = true; + SORD_ITER_LOG("%p reached single end\n", (void*)iter); + break; + case RANGE: + SORD_ITER_LOG("%p range next\n", (void*)iter); + // At the end if the MSNs no longer match + key = (const SordNode**)zix_btree_get(iter->cur); + assert(key); + for (int i = 0; i < iter->n_prefix; ++i) { + const int idx = orderings[iter->order][i]; + if (!sord_id_match(key[idx], iter->pat[idx])) { + iter->end = true; + SORD_ITER_LOG("%p reached non-match end\n", (void*)iter); + break; + } + } + break; + case FILTER_RANGE: + // Seek forward to next match, stopping if prefix changes + sord_iter_seek_match_range(iter); + break; + case FILTER_ALL: + // Seek forward to next match + sord_iter_seek_match(iter); + break; + } + } else { + SORD_ITER_LOG("%p reached index end\n", (void*)iter); + } + + if (iter->end) { + SORD_ITER_LOG("%p Reached end\n", (void*)iter); + return true; + } else { +#ifdef SORD_DEBUG_ITER + SordQuad tup; + sord_iter_get(iter, tup); + SORD_ITER_LOG("%p Increment to " TUP_FMT "\n", + (void*)iter, TUP_FMT_ARGS(tup)); +#endif + return false; + } +} + +bool +sord_iter_next(SordIter* iter) +{ + if (iter->end) { + return true; + } + + iter->end = sord_iter_forward(iter); + return sord_iter_scan_next(iter); +} + +bool +sord_iter_end(const SordIter* iter) +{ + return !iter || iter->end; +} + +void +sord_iter_free(SordIter* iter) +{ + SORD_ITER_LOG("%p Free\n", (void*)iter); + if (iter) { + --((SordModel*)iter->sord)->n_iters; + zix_btree_iter_free(iter->cur); + free(iter); + } +} + +/** + Return true iff `sord` has an index for `order`. + If `graphs` is true, `order` will be modified to be the + corresponding order with a G prepended (so G will be the MSN). +*/ +static inline bool +sord_has_index(SordModel* sord, SordOrder* order, int* n_prefix, bool graphs) +{ + if (graphs) { + *order = (SordOrder)(*order + GSPO); + *n_prefix += 1; + } + + return sord->indices[*order]; +} + +/** + Return the best available index for a pattern. + @param pat Pattern in standard (S P O G) order + @param mode Set to the (best) iteration mode for iterating over results + @param n_prefix Set to the length of the range prefix + (for `mode` == RANGE and `mode` == FILTER_RANGE) +*/ +static inline SordOrder +sord_best_index(SordModel* sord, + const SordQuad pat, + SearchMode* mode, + int* n_prefix) +{ + const bool graph_search = (pat[TUP_G] != 0); + + const unsigned sig + = (pat[0] ? 1 : 0) * 0x100 + + (pat[1] ? 1 : 0) * 0x010 + + (pat[2] ? 1 : 0) * 0x001; + + SordOrder good[2] = { (SordOrder)-1, (SordOrder)-1 }; + +#define PAT_CASE(sig, m, g0, g1, np) \ + case sig: \ + *mode = m; \ + good[0] = g0; \ + good[1] = g1; \ + *n_prefix = np; \ + break + + // Good orderings that don't require filtering + *mode = RANGE; + *n_prefix = 0; + switch (sig) { + case 0x000: + assert(graph_search); + *mode = RANGE; + *n_prefix = 1; + return DEFAULT_GRAPH_ORDER; + case 0x111: + *mode = SINGLE; + return graph_search ? DEFAULT_GRAPH_ORDER : DEFAULT_ORDER; + + PAT_CASE(0x001, RANGE, OPS, OSP, 1); + PAT_CASE(0x010, RANGE, POS, PSO, 1); + PAT_CASE(0x011, RANGE, OPS, POS, 2); + PAT_CASE(0x100, RANGE, SPO, SOP, 1); + PAT_CASE(0x101, RANGE, SOP, OSP, 2); + PAT_CASE(0x110, RANGE, SPO, PSO, 2); + } + + if (*mode == RANGE) { + if (sord_has_index(sord, &good[0], n_prefix, graph_search)) { + return good[0]; + } else if (sord_has_index(sord, &good[1], n_prefix, graph_search)) { + return good[1]; + } + } + + // Not so good orderings that require filtering, but can + // still be constrained to a range + switch (sig) { + PAT_CASE(0x011, FILTER_RANGE, OSP, PSO, 1); + PAT_CASE(0x101, FILTER_RANGE, SPO, OPS, 1); + // SPO is always present, so 0x110 is never reached here + default: break; + } + + if (*mode == FILTER_RANGE) { + if (sord_has_index(sord, &good[0], n_prefix, graph_search)) { + return good[0]; + } else if (sord_has_index(sord, &good[1], n_prefix, graph_search)) { + return good[1]; + } + } + + if (graph_search) { + *mode = FILTER_RANGE; + *n_prefix = 1; + return DEFAULT_GRAPH_ORDER; + } else { + *mode = FILTER_ALL; + return DEFAULT_ORDER; + } +} + +SordModel* +sord_new(SordWorld* world, unsigned indices, bool graphs) +{ + SordModel* sord = (SordModel*)malloc(sizeof(struct SordModelImpl)); + sord->world = world; + sord->n_quads = 0; + sord->n_iters = 0; + + for (unsigned i = 0; i < (NUM_ORDERS / 2); ++i) { + const int* const ordering = orderings[i]; + const int* const g_ordering = orderings[i + (NUM_ORDERS / 2)]; + + if (indices & (1 << i)) { + sord->indices[i] = zix_btree_new( + sord_quad_compare, (void*)ordering, NULL); + if (graphs) { + sord->indices[i + (NUM_ORDERS / 2)] = zix_btree_new( + sord_quad_compare, (void*)g_ordering, NULL); + } else { + sord->indices[i + (NUM_ORDERS / 2)] = NULL; + } + } else { + sord->indices[i] = NULL; + sord->indices[i + (NUM_ORDERS / 2)] = NULL; + } + } + + if (!sord->indices[DEFAULT_ORDER]) { + sord->indices[DEFAULT_ORDER] = zix_btree_new( + sord_quad_compare, (void*)orderings[DEFAULT_ORDER], NULL); + } + if (graphs && !sord->indices[DEFAULT_GRAPH_ORDER]) { + sord->indices[DEFAULT_GRAPH_ORDER] = zix_btree_new( + sord_quad_compare, (void*)orderings[DEFAULT_GRAPH_ORDER], NULL); + } + + return sord; +} + +static void +sord_node_free_internal(SordWorld* world, SordNode* node) +{ + assert(node->refs == 0); + + // Cache pointer to buffer to free after node removal and destruction + const uint8_t* const buf = node->node.buf; + + // Remove node from hash (which frees the node) + if (zix_hash_remove(world->nodes, node)) { + error(world, SERD_ERR_INTERNAL, "failed to remove node from hash\n"); + } + + // Free buffer + free((uint8_t*)buf); +} + +static void +sord_add_quad_ref(SordModel* sord, const SordNode* node, SordQuadIndex i) +{ + if (node) { + assert(node->refs > 0); + ++((SordNode*)node)->refs; + if (node->node.type != SERD_LITERAL && i == SORD_OBJECT) { + ++((SordNode*)node)->meta.res.refs_as_obj; + } + } +} + +static void +sord_drop_quad_ref(SordModel* sord, const SordNode* node, SordQuadIndex i) +{ + if (!node) { + return; + } + + assert(node->refs > 0); + if (node->node.type != SERD_LITERAL && i == SORD_OBJECT) { + assert(node->meta.res.refs_as_obj > 0); + --((SordNode*)node)->meta.res.refs_as_obj; + } + if (--((SordNode*)node)->refs == 0) { + sord_node_free_internal(sord_get_world(sord), (SordNode*)node); + } +} + +void +sord_free(SordModel* sord) +{ + if (!sord) + return; + + // Free nodes + SordQuad tup; + SordIter* i = sord_begin(sord); + for (; !sord_iter_end(i); sord_iter_next(i)) { + sord_iter_get(i, tup); + for (int t = 0; t < TUP_LEN; ++t) { + sord_drop_quad_ref(sord, tup[t], (SordQuadIndex)t); + } + } + sord_iter_free(i); + + // Free quads + ZixBTreeIter* t = zix_btree_begin(sord->indices[DEFAULT_ORDER]); + for (; !zix_btree_iter_is_end(t); zix_btree_iter_increment(t)) { + free(zix_btree_get(t)); + } + zix_btree_iter_free(t); + + // Free indices + for (unsigned o = 0; o < NUM_ORDERS; ++o) + if (sord->indices[o]) + zix_btree_free(sord->indices[o]); + + free(sord); +} + +SordWorld* +sord_get_world(SordModel* sord) +{ + return sord->world; +} + +size_t +sord_num_quads(const SordModel* sord) +{ + return sord->n_quads; +} + +size_t +sord_num_nodes(const SordWorld* world) +{ + return zix_hash_size(world->nodes); +} + +SordIter* +sord_begin(const SordModel* sord) +{ + if (sord_num_quads(sord) == 0) { + return NULL; + } else { + ZixBTreeIter* cur = zix_btree_begin(sord->indices[DEFAULT_ORDER]); + SordQuad pat = { 0, 0, 0, 0 }; + return sord_iter_new(sord, cur, pat, DEFAULT_ORDER, ALL, 0); + } +} + +SordIter* +sord_find(SordModel* sord, const SordQuad pat) +{ + if (!pat[0] && !pat[1] && !pat[2] && !pat[3]) + return sord_begin(sord); + + SearchMode mode; + int n_prefix; + const SordOrder index_order = sord_best_index(sord, pat, &mode, &n_prefix); + + SORD_FIND_LOG("Find " TUP_FMT " index=%s mode=%d n_prefix=%d\n", + TUP_FMT_ARGS(pat), order_names[index_order], mode, n_prefix); + + if (pat[0] && pat[1] && pat[2] && pat[3]) + mode = SINGLE; // No duplicate quads (Sord is a set) + + ZixBTree* const db = sord->indices[index_order]; + ZixBTreeIter* cur = NULL; + zix_btree_lower_bound(db, pat, &cur); + if (zix_btree_iter_is_end(cur)) { + SORD_FIND_LOG("No match found\n"); + zix_btree_iter_free(cur); + return NULL; + } + const SordNode** const key = (const SordNode**)zix_btree_get(cur); + if (!key || ( (mode == RANGE || mode == SINGLE) + && !sord_quad_match_inline(pat, key) )) { + SORD_FIND_LOG("No match found\n"); + zix_btree_iter_free(cur); + return NULL; + } + + return sord_iter_new(sord, cur, pat, index_order, mode, n_prefix); +} + +SordIter* +sord_search(SordModel* model, + const SordNode* s, + const SordNode* p, + const SordNode* o, + const SordNode* g) +{ + SordQuad pat = { s, p, o, g }; + return sord_find(model, pat); +} + +SordNode* +sord_get(SordModel* model, + const SordNode* s, + const SordNode* p, + const SordNode* o, + const SordNode* g) +{ + if ((bool)s + (bool)p + (bool)o != 2) { + return NULL; + } + + SordIter* i = sord_search(model, s, p, o, g); + SordNode* ret = NULL; + if (!s) { + ret = sord_node_copy(sord_iter_get_node(i, SORD_SUBJECT)); + } else if (!p) { + ret = sord_node_copy(sord_iter_get_node(i, SORD_PREDICATE)); + } else if (!o) { + ret = sord_node_copy(sord_iter_get_node(i, SORD_OBJECT)); + } + + sord_iter_free(i); + return ret; +} + +bool +sord_ask(SordModel* model, + const SordNode* s, + const SordNode* p, + const SordNode* o, + const SordNode* g) +{ + SordQuad pat = { s, p, o, g }; + return sord_contains(model, pat); +} + +uint64_t +sord_count(SordModel* model, + const SordNode* s, + const SordNode* p, + const SordNode* o, + const SordNode* g) +{ + SordIter* i = sord_search(model, s, p, o, g); + uint64_t n = 0; + for (; !sord_iter_end(i); sord_iter_next(i)) { + ++n; + } + sord_iter_free(i); + return n; +} + +bool +sord_contains(SordModel* sord, const SordQuad pat) +{ + SordIter* iter = sord_find(sord, pat); + bool ret = (iter != NULL); + sord_iter_free(iter); + return ret; +} + +static uint8_t* +sord_strndup(const uint8_t* str, size_t len) +{ + uint8_t* dup = (uint8_t*)malloc(len + 1); + memcpy(dup, str, len + 1); + return dup; +} + +SordNodeType +sord_node_get_type(const SordNode* node) +{ + switch (node->node.type) { + case SERD_URI: + return SORD_URI; + case SERD_BLANK: + return SORD_BLANK; + default: + return SORD_LITERAL; + } + SORD_UNREACHABLE(); +} + +const uint8_t* +sord_node_get_string(const SordNode* node) +{ + return node->node.buf; +} + +const uint8_t* +sord_node_get_string_counted(const SordNode* node, size_t* bytes) +{ + *bytes = node->node.n_bytes; + return node->node.buf; +} + +const uint8_t* +sord_node_get_string_measured(const SordNode* node, + size_t* bytes, + size_t* chars) +{ + *bytes = node->node.n_bytes; + *chars = node->node.n_chars; + return node->node.buf; +} + +const char* +sord_node_get_language(const SordNode* node) +{ + if (node->node.type != SERD_LITERAL || !node->meta.lit.lang[0]) { + return NULL; + } + return node->meta.lit.lang; +} + +SordNode* +sord_node_get_datatype(const SordNode* node) +{ + return (node->node.type == SERD_LITERAL) ? node->meta.lit.datatype : NULL; +} + +SerdNodeFlags +sord_node_get_flags(const SordNode* node) +{ + return node->node.flags; +} + +bool +sord_node_is_inline_object(const SordNode* node) +{ + return (node->node.type == SERD_BLANK) && (node->meta.res.refs_as_obj == 1); +} + +static SordNode* +sord_insert_node(SordWorld* world, const SordNode* key, bool copy) +{ + SordNode* node = NULL; + ZixStatus st = zix_hash_insert(world->nodes, key, (const void**)&node); + switch (st) { + case ZIX_STATUS_EXISTS: + ++node->refs; + break; + case ZIX_STATUS_SUCCESS: + assert(node->refs == 1); + if (copy) { + node->node.buf = sord_strndup(node->node.buf, node->node.n_bytes); + } + if (node->node.type == SERD_LITERAL) { + node->meta.lit.datatype = sord_node_copy(node->meta.lit.datatype); + } + return node; + default: + error(world, SERD_ERR_INTERNAL, + "error inserting node `%s'\n", key->node.buf); + } + + if (!copy) { + // Free the buffer we would have copied if a new node was created + free((uint8_t*)key->node.buf); + } + + return node; +} + +static SordNode* +sord_new_uri_counted(SordWorld* world, const uint8_t* str, + size_t n_bytes, size_t n_chars, bool copy) +{ + if (!serd_uri_string_has_scheme(str)) { + error(world, SERD_ERR_BAD_ARG, + "attempt to map invalid URI `%s'\n", str); + return NULL; // Can't intern relative URIs + } + + const SordNode key = { + { str, n_bytes, n_chars, 0, SERD_URI }, 1, { { 0 } } + }; + + return sord_insert_node(world, &key, copy); +} + +SordNode* +sord_new_uri(SordWorld* world, const uint8_t* str) +{ + const SerdNode node = serd_node_from_string(SERD_URI, str); + return sord_new_uri_counted(world, str, node.n_bytes, node.n_chars, true); +} + +SordNode* +sord_new_relative_uri(SordWorld* world, + const uint8_t* str, + const uint8_t* base_str) +{ + if (serd_uri_string_has_scheme(str)) { + return sord_new_uri(world, str); + } + SerdURI buri = SERD_URI_NULL; + SerdNode base = serd_node_new_uri_from_string(base_str, NULL, &buri); + SerdNode node = serd_node_new_uri_from_string(str, &buri, NULL); + + SordNode* ret = sord_new_uri_counted( + world, node.buf, node.n_bytes, node.n_chars, false); + + serd_node_free(&base); + return ret; +} + +static SordNode* +sord_new_blank_counted(SordWorld* world, const uint8_t* str, + size_t n_bytes, size_t n_chars) +{ + const SordNode key = { + { str, n_bytes, n_chars, 0, SERD_BLANK }, 1, { { 0 } } + }; + + return sord_insert_node(world, &key, true); +} + +SordNode* +sord_new_blank(SordWorld* world, const uint8_t* str) +{ + const SerdNode node = serd_node_from_string(SERD_URI, str); + return sord_new_blank_counted(world, str, node.n_bytes, node.n_chars); +} + +static SordNode* +sord_new_literal_counted(SordWorld* world, + SordNode* datatype, + const uint8_t* str, + size_t n_bytes, + size_t n_chars, + SerdNodeFlags flags, + const char* lang) +{ + SordNode key = { + { str, n_bytes, n_chars, flags, SERD_LITERAL }, 1, { { 0 } } + }; + key.meta.lit.datatype = sord_node_copy(datatype); + memset(key.meta.lit.lang, 0, sizeof(key.meta.lit.lang)); + if (lang) { + strncpy(key.meta.lit.lang, lang, sizeof(key.meta.lit.lang)); + } + + return sord_insert_node(world, &key, true); +} + +SordNode* +sord_new_literal(SordWorld* world, SordNode* datatype, + const uint8_t* str, const char* lang) +{ + SerdNodeFlags flags = 0; + size_t n_bytes = 0; + size_t n_chars = serd_strlen(str, &n_bytes, &flags); + return sord_new_literal_counted(world, datatype, + str, n_bytes, n_chars, flags, + lang); +} + +SordNode* +sord_node_from_serd_node(SordWorld* world, + SerdEnv* env, + const SerdNode* sn, + const SerdNode* datatype, + const SerdNode* lang) +{ + if (!sn) { + return NULL; + } + + SordNode* datatype_node = NULL; + SordNode* ret = NULL; + switch (sn->type) { + case SERD_NOTHING: + return NULL; + case SERD_LITERAL: + datatype_node = sord_node_from_serd_node( + world, env, datatype, NULL, NULL), + ret = sord_new_literal_counted( + world, + datatype_node, + sn->buf, + sn->n_bytes, + sn->n_chars, + sn->flags, + lang ? (const char*)lang->buf : NULL); + sord_node_free(world, datatype_node); + return ret; + case SERD_URI: + if (serd_uri_string_has_scheme(sn->buf)) { + return sord_new_uri_counted( + world, sn->buf, sn->n_bytes, sn->n_chars, true); + } else { + SerdURI base_uri; + serd_env_get_base_uri(env, &base_uri); + SerdURI abs_uri; + SerdNode abs_uri_node = serd_node_new_uri_from_node( + sn, &base_uri, &abs_uri); + ret = sord_new_uri_counted(world, + abs_uri_node.buf, + abs_uri_node.n_bytes, + abs_uri_node.n_chars, + true); + serd_node_free(&abs_uri_node); + return ret; + } + case SERD_CURIE: { + SerdChunk uri_prefix; + SerdChunk uri_suffix; + if (serd_env_expand(env, sn, &uri_prefix, &uri_suffix)) { + error(world, SERD_ERR_BAD_CURIE, + "failed to expand CURIE `%s'\n", sn->buf); + return NULL; + } + const size_t uri_len = uri_prefix.len + uri_suffix.len; + uint8_t* buf = (uint8_t*)malloc(uri_len + 1); + memcpy(buf, uri_prefix.buf, uri_prefix.len); + memcpy(buf + uri_prefix.len, uri_suffix.buf, uri_suffix.len); + buf[uri_len] = '\0'; + ret = sord_new_uri_counted( + world, buf, uri_len, serd_strlen(buf, NULL, NULL), false); + return ret; + } + case SERD_BLANK: + return sord_new_blank_counted(world, sn->buf, sn->n_bytes, sn->n_chars); + } + return NULL; +} + +const SerdNode* +sord_node_to_serd_node(const SordNode* node) +{ + return node ? &node->node : &SERD_NODE_NULL; +} + +void +sord_node_free(SordWorld* world, SordNode* node) +{ + if (!node) { + return; + } else if (node->refs == 0) { + error(world, SERD_ERR_BAD_ARG, "attempt to free garbage node\n"); + } else if (--node->refs == 0) { + sord_node_free_internal(world, node); + } +} + +SordNode* +sord_node_copy(const SordNode* node) +{ + SordNode* copy = (SordNode*)node; + if (copy) { + ++copy->refs; + } + return copy; +} + +static inline bool +sord_add_to_index(SordModel* sord, const SordNode** tup, SordOrder order) +{ + return !zix_btree_insert(sord->indices[order], tup); +} + +bool +sord_add(SordModel* sord, const SordQuad tup) +{ + SORD_WRITE_LOG("Add " TUP_FMT "\n", TUP_FMT_ARGS(tup)); + if (!tup[0] || !tup[1] || !tup[2]) { + error(sord->world, SERD_ERR_BAD_ARG, + "attempt to add quad with NULL field\n"); + return false; + } else if (sord->n_iters > 0) { + error(sord->world, SERD_ERR_BAD_ARG, "added tuple during iteration\n"); + } + + const SordNode** quad = (const SordNode**)malloc(sizeof(SordQuad)); + memcpy(quad, tup, sizeof(SordQuad)); + + for (unsigned i = 0; i < NUM_ORDERS; ++i) { + if (sord->indices[i] && (i < GSPO || tup[3])) { + if (!sord_add_to_index(sord, quad, (SordOrder)i)) { + assert(i == 0); // Assuming index coherency + free(quad); + return false; // Quad already stored, do nothing + } + } + } + + for (int i = 0; i < TUP_LEN; ++i) + sord_add_quad_ref(sord, tup[i], (SordQuadIndex)i); + + ++sord->n_quads; + return true; +} + +void +sord_remove(SordModel* sord, const SordQuad tup) +{ + SORD_WRITE_LOG("Remove " TUP_FMT "\n", TUP_FMT_ARGS(tup)); + if (sord->n_iters > 0) { + error(sord->world, SERD_ERR_BAD_ARG, "remove with iterator\n"); + } + + SordNode* quad = NULL; + for (unsigned i = 0; i < NUM_ORDERS; ++i) { + if (sord->indices[i] && (i < GSPO || tup[3])) { + if (zix_btree_remove(sord->indices[i], tup, (void**)&quad, NULL)) { + assert(i == 0); // Assuming index coherency + return; // Quad not found, do nothing + } + } + } + + free(quad); + + for (int i = 0; i < TUP_LEN; ++i) + sord_drop_quad_ref(sord, tup[i], (SordQuadIndex)i); + + --sord->n_quads; +} + +SerdStatus +sord_erase(SordModel* sord, SordIter* iter) +{ + if (sord->n_iters > 1) { + error(sord->world, SERD_ERR_BAD_ARG, "erased with many iterators\n"); + return SERD_ERR_BAD_ARG; + } + + SordQuad tup; + sord_iter_get(iter, tup); + + SORD_WRITE_LOG("Remove " TUP_FMT "\n", TUP_FMT_ARGS(tup)); + + SordNode* quad = NULL; + for (unsigned i = 0; i < NUM_ORDERS; ++i) { + if (sord->indices[i] && (i < GSPO || tup[3])) { + if (zix_btree_remove(sord->indices[i], tup, (void**)&quad, + i == iter->order ? &iter->cur : NULL)) { + return (i == 0) ? SERD_ERR_NOT_FOUND : SERD_ERR_INTERNAL; + } + } + } + iter->end = zix_btree_iter_is_end(iter->cur); + sord_iter_scan_next(iter); + + free(quad); + + for (int i = 0; i < TUP_LEN; ++i) + sord_drop_quad_ref(sord, tup[i], (SordQuadIndex)i); + + --sord->n_quads; + return SERD_SUCCESS; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ext/sord/src/sord_internal.h Thu Jun 15 09:52:01 2017 +0100 @@ -0,0 +1,52 @@ +/* + Copyright 2011-2015 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef SORD_SORD_INTERNAL_H +#define SORD_SORD_INTERNAL_H + +#include <stddef.h> +#include <stdint.h> + +#include "sord/sord.h" + +#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5) +# define SORD_UNREACHABLE() __builtin_unreachable() +#else +# define SORD_UNREACHABLE() assert(false) +#endif + +/** Resource node metadata */ +typedef struct { + size_t refs_as_obj; ///< References as a quad object +} SordResourceMetadata; + +/** Literal node metadata */ +typedef struct { + SordNode* datatype; ///< Optional literal data type URI + char lang[16]; ///< Optional language tag +} SordLiteralMetadata; + +/** Node */ +struct SordNodeImpl { + SerdNode node; ///< Serd node + size_t refs; ///< Reference count (# of containing quads) + union { + SordResourceMetadata res; + SordLiteralMetadata lit; + } meta; +}; + +#endif /* SORD_SORD_INTERNAL_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ext/sord/src/sord_test.c Thu Jun 15 09:52:01 2017 +0100 @@ -0,0 +1,761 @@ +/* + Copyright 2011-2016 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "sord/sord.h" + +static const int DIGITS = 3; +static const unsigned n_objects_per = 2; + +static int n_expected_errors = 0; + +typedef struct { + SordQuad query; + int expected_num_results; +} QueryTest; + +#define USTR(s) ((const uint8_t*)(s)) + +static SordNode* +uri(SordWorld* world, int num) +{ + if (num == 0) + return 0; + + char str[] = "eg:000"; + char* uri_num = str + 3; // First `0' + snprintf(uri_num, DIGITS + 1, "%0*d", DIGITS, num); + return sord_new_uri(world, (const uint8_t*)str); +} + +static int +test_fail(const char* fmt, ...) +{ + va_list args; + va_start(args, fmt); + fprintf(stderr, "error: "); + vfprintf(stderr, fmt, args); + va_end(args); + return 1; +} + +static int +generate(SordWorld* world, + SordModel* sord, + size_t n_quads, + SordNode* graph) +{ + fprintf(stderr, "Generating %zu (S P *) quads with %u objects each\n", + n_quads, n_objects_per); + + for (size_t i = 0; i < n_quads; ++i) { + int num = (i * n_objects_per) + 1; + + SordNode* ids[2 + n_objects_per]; + for (unsigned j = 0; j < 2 + n_objects_per; ++j) { + ids[j] = uri(world, num++); + } + + for (unsigned j = 0; j < n_objects_per; ++j) { + SordQuad tup = { ids[0], ids[1], ids[2 + j], graph }; + if (!sord_add(sord, tup)) { + return test_fail("Fail: Failed to add quad\n"); + } + } + + for (unsigned j = 0; j < 2 + n_objects_per; ++j) { + sord_node_free(world, ids[j]); + } + } + + // Add some literals + + // (98 4 "hello") and (98 4 "hello"^^<5>) + SordQuad tup = { 0, 0, 0, 0 }; + tup[0] = uri(world, 98); + tup[1] = uri(world, 4); + tup[2] = sord_new_literal(world, 0, USTR("hello"), NULL); + tup[3] = graph; + sord_add(sord, tup); + sord_node_free(world, (SordNode*)tup[2]); + tup[2] = sord_new_literal(world, uri(world, 5), USTR("hello"), NULL); + if (!sord_add(sord, tup)) { + return test_fail("Failed to add typed literal\n"); + } + + // (96 4 "hello"^^<4>) and (96 4 "hello"^^<5>) + tup[0] = uri(world, 96); + tup[1] = uri(world, 4); + tup[2] = sord_new_literal(world, uri(world, 4), USTR("hello"), NULL); + tup[3] = graph; + sord_add(sord, tup); + sord_node_free(world, (SordNode*)tup[2]); + tup[2] = sord_new_literal(world, uri(world, 5), USTR("hello"), NULL); + if (!sord_add(sord, tup)) { + return test_fail("Failed to add typed literal\n"); + } + + // (94 5 "hello") and (94 5 "hello"@en-gb) + tup[0] = uri(world, 94); + tup[1] = uri(world, 5); + tup[2] = sord_new_literal(world, 0, USTR("hello"), NULL); + tup[3] = graph; + sord_add(sord, tup); + sord_node_free(world, (SordNode*)tup[2]); + tup[2] = sord_new_literal(world, NULL, USTR("hello"), "en-gb"); + if (!sord_add(sord, tup)) { + return test_fail("Failed to add literal with language\n"); + } + + // (92 6 "hello"@en-us) and (92 5 "hello"@en-gb) + tup[0] = uri(world, 92); + tup[1] = uri(world, 6); + tup[2] = sord_new_literal(world, 0, USTR("hello"), "en-us"); + tup[3] = graph; + sord_add(sord, tup); + sord_node_free(world, (SordNode*)tup[2]); + tup[2] = sord_new_literal(world, NULL, USTR("hello"), "en-gb"); + if (!sord_add(sord, tup)) { + return test_fail("Failed to add literal with language\n"); + } + + sord_node_free(world, (SordNode*)tup[0]); + sord_node_free(world, (SordNode*)tup[2]); + tup[0] = uri(world, 14); + tup[2] = sord_new_literal(world, 0, USTR("bonjour"), "fr"); + sord_add(sord, tup); + sord_node_free(world, (SordNode*)tup[2]); + tup[2] = sord_new_literal(world, 0, USTR("salut"), "fr"); + sord_add(sord, tup); + + // Attempt to add some duplicates + if (sord_add(sord, tup)) { + return test_fail("Fail: Successfully added duplicate quad\n"); + } + if (sord_add(sord, tup)) { + return test_fail("Fail: Successfully added duplicate quad\n"); + } + + // Add a blank node subject + sord_node_free(world, (SordNode*)tup[0]); + tup[0] = sord_new_blank(world, USTR("ablank")); + sord_add(sord, tup); + + sord_node_free(world, (SordNode*)tup[1]); + sord_node_free(world, (SordNode*)tup[2]); + tup[1] = uri(world, 6); + tup[2] = uri(world, 7); + sord_add(sord, tup); + sord_node_free(world, (SordNode*)tup[0]); + sord_node_free(world, (SordNode*)tup[1]); + sord_node_free(world, (SordNode*)tup[2]); + + return EXIT_SUCCESS; +} + +#define TUP_FMT "(%6s %6s %6s)" +#define TUP_FMT_ARGS(t) \ + ((t)[0] ? sord_node_get_string((t)[0]) : USTR("*")), \ + ((t)[1] ? sord_node_get_string((t)[1]) : USTR("*")), \ + ((t)[2] ? sord_node_get_string((t)[2]) : USTR("*")) + +static int +test_read(SordWorld* world, SordModel* sord, SordNode* g, + const size_t n_quads) +{ + int ret = EXIT_SUCCESS; + + SordQuad id; + + SordIter* iter = sord_begin(sord); + if (sord_iter_get_model(iter) != sord) { + return test_fail("Fail: Iterator has incorrect sord pointer\n"); + } + + for (; !sord_iter_end(iter); sord_iter_next(iter)) + sord_iter_get(iter, id); + + // Attempt to increment past end + if (!sord_iter_next(iter)) { + return test_fail("Fail: Successfully incremented past end\n"); + } + + sord_iter_free(iter); + + const uint8_t* s = USTR("hello"); + SordNode* plain_hello = sord_new_literal(world, 0, s, NULL); + SordNode* type4_hello = sord_new_literal(world, uri(world, 4), s, NULL); + SordNode* type5_hello = sord_new_literal(world, uri(world, 5), s, NULL); + SordNode* gb_hello = sord_new_literal(world, NULL, s, "en-gb"); + SordNode* us_hello = sord_new_literal(world, NULL, s, "en-us"); + +#define NUM_PATTERNS 18 + + QueryTest patterns[NUM_PATTERNS] = { + { { 0, 0, 0 }, (int)(n_quads * n_objects_per) + 12 }, + { { uri(world, 1), 0, 0 }, 2 }, + { { uri(world, 9), uri(world, 9), uri(world, 9) }, 0 }, + { { uri(world, 1), uri(world, 2), uri(world, 4) }, 1 }, + { { uri(world, 3), uri(world, 4), uri(world, 0) }, 2 }, + { { uri(world, 0), uri(world, 2), uri(world, 4) }, 1 }, + { { uri(world, 0), uri(world, 0), uri(world, 4) }, 1 }, + { { uri(world, 1), uri(world, 0), uri(world, 0) }, 2 }, + { { uri(world, 1), uri(world, 0), uri(world, 4) }, 1 }, + { { uri(world, 0), uri(world, 2), uri(world, 0) }, 2 }, + { { uri(world, 98), uri(world, 4), plain_hello }, 1 }, + { { uri(world, 98), uri(world, 4), type5_hello }, 1 }, + { { uri(world, 96), uri(world, 4), type4_hello }, 1 }, + { { uri(world, 96), uri(world, 4), type5_hello }, 1 }, + { { uri(world, 94), uri(world, 5), plain_hello }, 1 }, + { { uri(world, 94), uri(world, 5), gb_hello }, 1 }, + { { uri(world, 92), uri(world, 6), gb_hello }, 1 }, + { { uri(world, 92), uri(world, 6), us_hello }, 1 } }; + + SordQuad match = { uri(world, 1), uri(world, 2), uri(world, 4), g }; + if (!sord_contains(sord, match)) { + return test_fail("Fail: No match for " TUP_FMT "\n", + TUP_FMT_ARGS(match)); + } + + SordQuad nomatch = { uri(world, 1), uri(world, 2), uri(world, 9), g }; + if (sord_contains(sord, nomatch)) { + return test_fail("Fail: False match for " TUP_FMT "\n", + TUP_FMT_ARGS(nomatch)); + } + + if (sord_get(sord, NULL, NULL, uri(world, 3), g)) { + return test_fail("Fail: Get *,*,3 succeeded\n"); + } else if (!sord_node_equals( + sord_get(sord, uri(world, 1), uri(world, 2), NULL, g), + uri(world, 3))) { + return test_fail("Fail: Get 1,2,* != 3\n"); + } else if (!sord_node_equals( + sord_get(sord, uri(world, 1), NULL, uri(world, 3), g), + uri(world, 2))) { + return test_fail("Fail: Get 1,*,3 != 2\n"); + } else if (!sord_node_equals( + sord_get(sord, NULL, uri(world, 2), uri(world, 3), g), + uri(world, 1))) { + return test_fail("Fail: Get *,2,3 != 1\n"); + } + + for (unsigned i = 0; i < NUM_PATTERNS; ++i) { + QueryTest test = patterns[i]; + SordQuad pat = { test.query[0], test.query[1], test.query[2], g }; + fprintf(stderr, "Query " TUP_FMT "... ", TUP_FMT_ARGS(pat)); + + iter = sord_find(sord, pat); + int num_results = 0; + for (; !sord_iter_end(iter); sord_iter_next(iter)) { + sord_iter_get(iter, id); + ++num_results; + if (!sord_quad_match(pat, id)) { + sord_iter_free(iter); + return test_fail( + "Fail: Query result " TUP_FMT " does not match pattern\n", + TUP_FMT_ARGS(id)); + } + } + sord_iter_free(iter); + if (num_results != test.expected_num_results) { + return test_fail("Fail: Expected %d results, got %d\n", + test.expected_num_results, num_results); + } + fprintf(stderr, "OK (%u matches)\n", test.expected_num_results); + } + + // Query blank node subject + SordQuad pat = { sord_new_blank(world, USTR("ablank")), 0, 0 }; + if (!pat[0]) { + return test_fail("Blank node subject lost\n"); + } + fprintf(stderr, "Query " TUP_FMT "... ", TUP_FMT_ARGS(pat)); + iter = sord_find(sord, pat); + int num_results = 0; + for (; !sord_iter_end(iter); sord_iter_next(iter)) { + sord_iter_get(iter, id); + ++num_results; + if (!sord_quad_match(pat, id)) { + sord_iter_free(iter); + return test_fail( + "Fail: Query result " TUP_FMT " does not match pattern\n", + TUP_FMT_ARGS(id)); + } + } + fprintf(stderr, "OK\n"); + sord_node_free(world, (SordNode*)pat[0]); + sord_iter_free(iter); + if (num_results != 2) { + return test_fail("Blank node subject query failed\n"); + } + + // Test nested queries + fprintf(stderr, "Nested Queries... "); + const SordNode* last_subject = 0; + iter = sord_search(sord, NULL, NULL, NULL, NULL); + for (; !sord_iter_end(iter); sord_iter_next(iter)) { + sord_iter_get(iter, id); + if (id[0] == last_subject) + continue; + + SordQuad subpat = { id[0], 0, 0 }; + SordIter* subiter = sord_find(sord, subpat); + uint64_t num_sub_results = 0; + if (sord_iter_get_node(subiter, SORD_SUBJECT) != id[0]) { + return test_fail("Fail: Incorrect initial submatch\n"); + } + for (; !sord_iter_end(subiter); sord_iter_next(subiter)) { + SordQuad subid; + sord_iter_get(subiter, subid); + if (!sord_quad_match(subpat, subid)) { + sord_iter_free(iter); + sord_iter_free(subiter); + return test_fail( + "Fail: Nested query result does not match pattern\n"); + } + ++num_sub_results; + } + sord_iter_free(subiter); + if (num_sub_results != n_objects_per) { + return test_fail( + "Fail: Nested query " TUP_FMT " failed" + " (%d results, expected %d)\n", + TUP_FMT_ARGS(subpat), num_sub_results, n_objects_per); + } + + uint64_t count = sord_count(sord, id[0], 0, 0, 0); + if (count != num_sub_results) { + return test_fail("Fail: Query " TUP_FMT " sord_count() %d" + "does not match result count %d\n", + TUP_FMT_ARGS(subpat), count, num_sub_results); + } + + last_subject = id[0]; + } + fprintf(stderr, "OK\n\n"); + sord_iter_free(iter); + + return ret; +} + +static SerdStatus +unexpected_error(void* handle, const SerdError* error) +{ + fprintf(stderr, "unexpected error: "); + vfprintf(stderr, error->fmt, *error->args); + return SERD_SUCCESS; +} + +static SerdStatus +expected_error(void* handle, const SerdError* error) +{ + fprintf(stderr, "expected error: "); + vfprintf(stderr, error->fmt, *error->args); + ++n_expected_errors; + return SERD_SUCCESS; +} + +static int +finished(SordWorld* world, SordModel* sord, int status) +{ + sord_free(sord); + sord_world_free(world); + return status; +} + +int +main(int argc, char** argv) +{ + static const size_t n_quads = 300; + + sord_free(NULL); // Shouldn't crash + + SordWorld* world = sord_world_new(); + + + // Attempt to create invalid URI + fprintf(stderr, "expected "); + SordNode* bad_uri = sord_new_uri(world, USTR("noscheme")); + if (bad_uri) { + return test_fail("Successfully created invalid URI \"noscheme\"\n"); + } + sord_node_free(world, bad_uri); + + sord_world_set_error_sink(world, expected_error, NULL); + + // Attempt to create invalid CURIE + SerdNode base = serd_node_from_string(SERD_URI, USTR("http://example.org/")); + SerdEnv* env = serd_env_new(&base); + SerdNode sbadns = serd_node_from_string(SERD_CURIE, USTR("badns:")); + SordNode* badns = sord_node_from_serd_node(world, env, &sbadns, NULL, NULL); + if (badns) { + return test_fail("Successfully created CURIE with bad namespace\n"); + } + sord_node_free(world, badns); + serd_env_free(env); + + // Attempt to create node from garbage + SerdNode junk = SERD_NODE_NULL; + junk.type = (SerdType)1234; + if (sord_node_from_serd_node(world, env, &junk, NULL, NULL)) { + return test_fail("Successfully created node from garbage serd node\n"); + } + + // Attempt to create NULL node + SordNode* nil_node = sord_node_from_serd_node( + world, NULL, &SERD_NODE_NULL, NULL, NULL); + if (nil_node) { + return test_fail("Successfully created NULL node\n"); + } + sord_node_free(world, nil_node); + + // Attempt to double-free a node + SordNode* garbage = sord_new_uri(world, USTR("urn:garbage")); + sord_node_free(world, garbage); + sord_world_set_error_sink(world, expected_error, NULL); + sord_node_free(world, garbage); + sord_world_set_error_sink(world, unexpected_error, NULL); + if (n_expected_errors != 2) { + return test_fail("Successfully freed node twice\n"); + } + + sord_world_set_error_sink(world, unexpected_error, NULL); + + // Check node flags are set properly + SordNode* with_newline = sord_new_literal(world, NULL, USTR("a\nb"), NULL); + if (!(sord_node_get_flags(with_newline) & SERD_HAS_NEWLINE)) { + return test_fail("Newline flag not set\n"); + } + SordNode* with_quote = sord_new_literal(world, NULL, USTR("a\"b"), NULL); + if (!(sord_node_get_flags(with_quote) & SERD_HAS_QUOTE)) { + return test_fail("Quote flag not set\n"); + } + + // Create with minimal indexing + SordModel* sord = sord_new(world, SORD_SPO, false); + generate(world, sord, n_quads, NULL); + + if (test_read(world, sord, NULL, n_quads)) { + sord_free(sord); + sord_world_free(world); + return EXIT_FAILURE; + } + + // Check adding tuples with NULL fields fails + sord_world_set_error_sink(world, expected_error, NULL); + const size_t initial_num_quads = sord_num_quads(sord); + SordQuad tup = { 0, 0, 0, 0}; + if (sord_add(sord, tup)) { + return test_fail("Added NULL tuple\n"); + } + tup[0] = uri(world, 1); + if (sord_add(sord, tup)) { + return test_fail("Added tuple with NULL P and O\n"); + } + tup[1] = uri(world, 2); + if (sord_add(sord, tup)) { + return test_fail("Added tuple with NULL O\n"); + } + + if (sord_num_quads(sord) != initial_num_quads) { + return test_fail("Num quads %zu != %zu\n", + sord_num_quads(sord), initial_num_quads); + } + + // Check adding tuples with an active iterator fails + SordIter* iter = sord_begin(sord); + tup[2] = uri(world, 3); + if (sord_add(sord, tup)) { + return test_fail("Added tuple with active iterator\n"); + } + + // Check removing tuples with several active iterator fails + SordIter* iter2 = sord_begin(sord); + if (!sord_erase(sord, iter)) { + return test_fail("Erased tuple with several active iterators\n"); + } + n_expected_errors = 0; + sord_remove(sord, tup); + if (n_expected_errors != 1) { + return test_fail("Removed tuple with several active iterators\n"); + } + sord_iter_free(iter); + sord_iter_free(iter2); + + sord_world_set_error_sink(world, unexpected_error, NULL); + + // Check interning merges equivalent values + SordNode* uri_id = sord_new_uri(world, USTR("http://example.org")); + SordNode* blank_id = sord_new_blank(world, USTR("testblank")); + SordNode* lit_id = sord_new_literal(world, uri_id, USTR("hello"), NULL); + if (sord_node_get_type(uri_id) != SORD_URI) { + return test_fail("URI node has incorrect type\n"); + } else if (sord_node_get_type(blank_id) != SORD_BLANK) { + return test_fail("Blank node has incorrect type\n"); + } else if (sord_node_get_type(lit_id) != SORD_LITERAL) { + return test_fail("Literal node has incorrect type\n"); + } + + const size_t initial_num_nodes = sord_num_nodes(world); + + SordNode* uri_id2 = sord_new_uri(world, USTR("http://example.org")); + SordNode* blank_id2 = sord_new_blank(world, USTR("testblank")); + SordNode* lit_id2 = sord_new_literal(world, uri_id, USTR("hello"), NULL); + if (uri_id2 != uri_id || !sord_node_equals(uri_id2, uri_id)) { + fprintf(stderr, "Fail: URI interning failed (duplicates)\n"); + return finished(world, sord, EXIT_FAILURE); + } else if (blank_id2 != blank_id + || !sord_node_equals(blank_id2, blank_id)) { + fprintf(stderr, "Fail: Blank node interning failed (duplicates)\n"); + return finished(world, sord, EXIT_FAILURE); + } else if (lit_id2 != lit_id || !sord_node_equals(lit_id2, lit_id)) { + fprintf(stderr, "Fail: Literal interning failed (duplicates)\n"); + return finished(world, sord, EXIT_FAILURE); + } + + if (sord_num_nodes(world) != initial_num_nodes) { + return test_fail("Num nodes %zu != %zu\n", + sord_num_nodes(world), initial_num_nodes); + } + + const uint8_t ni_hao[] = { 0xE4, 0xBD, 0xA0, 0xE5, 0xA5, 0xBD }; + SordNode* chello = sord_new_literal(world, NULL, ni_hao, "cmn"); + + // Test literal length + size_t n_bytes; + size_t n_chars; + const uint8_t* str = sord_node_get_string_counted(lit_id2, &n_bytes); + if (strcmp((const char*)str, "hello")) { + return test_fail("Literal node corrupt\n"); + } else if (n_bytes != strlen("hello")) { + return test_fail("ASCII literal byte count incorrect\n"); + } + + str = sord_node_get_string_measured(lit_id2, &n_bytes, &n_chars); + if (n_bytes != strlen("hello") || n_chars != strlen("hello")) { + return test_fail("ASCII literal measured length incorrect\n"); + } + + str = sord_node_get_string_measured(chello, &n_bytes, &n_chars); + if (n_bytes != 6) { + return test_fail("Multi-byte literal byte count incorrect\n"); + } else if (n_chars != 2) { + return test_fail("Multi-byte literal character count incorrect\n"); + } + + // Check interning doesn't clash non-equivalent values + SordNode* uri_id3 = sord_new_uri(world, USTR("http://example.orgX")); + SordNode* blank_id3 = sord_new_blank(world, USTR("testblankX")); + SordNode* lit_id3 = sord_new_literal(world, uri_id, USTR("helloX"), NULL); + if (uri_id3 == uri_id || sord_node_equals(uri_id3, uri_id)) { + fprintf(stderr, "Fail: URI interning failed (clash)\n"); + return finished(world, sord, EXIT_FAILURE); + } else if (blank_id3 == blank_id || sord_node_equals(blank_id3, blank_id)) { + fprintf(stderr, "Fail: Blank node interning failed (clash)\n"); + return finished(world, sord, EXIT_FAILURE); + } else if (lit_id3 == lit_id || sord_node_equals(lit_id3, lit_id)) { + fprintf(stderr, "Fail: Literal interning failed (clash)\n"); + return finished(world, sord, EXIT_FAILURE); + } + + // Check literal interning + SordNode* lit4 = sord_new_literal(world, NULL, USTR("hello"), NULL); + SordNode* lit5 = sord_new_literal(world, uri_id2, USTR("hello"), NULL); + SordNode* lit6 = sord_new_literal(world, NULL, USTR("hello"), "en-ca"); + if (lit4 == lit5 || sord_node_equals(lit4, lit5) + || lit4 == lit6 || sord_node_equals(lit4, lit6) + || lit5 == lit6 || sord_node_equals(lit5, lit6)) { + fprintf(stderr, "Fail: Literal interning failed (type/lang clash)\n"); + return finished(world, sord, EXIT_FAILURE); + } + + // Check relative URI construction + SordNode* reluri = sord_new_relative_uri( + world, USTR("a/b"), USTR("http://example.org/")); + if (strcmp((const char*)sord_node_get_string(reluri), + "http://example.org/a/b")) { + fprintf(stderr, "Fail: Bad relative URI constructed: <%s>\n", + sord_node_get_string(reluri)); + return finished(world, sord, EXIT_FAILURE); + } + SordNode* reluri2 = sord_new_relative_uri( + world, USTR("http://drobilla.net/"), USTR("http://example.org/")); + if (strcmp((const char*)sord_node_get_string(reluri2), + "http://drobilla.net/")) { + fprintf(stderr, "Fail: Bad relative URI constructed: <%s>\n", + sord_node_get_string(reluri)); + return finished(world, sord, EXIT_FAILURE); + } + + // Check comparison with NULL + sord_node_free(world, uri_id); + sord_node_free(world, blank_id); + sord_node_free(world, lit_id); + sord_node_free(world, uri_id2); + sord_node_free(world, blank_id2); + sord_node_free(world, lit_id2); + sord_node_free(world, uri_id3); + sord_node_free(world, blank_id3); + sord_node_free(world, lit_id3); + sord_free(sord); + + static const char* const index_names[6] = { + "spo", "sop", "ops", "osp", "pso", "pos" + }; + + for (int i = 0; i < 6; ++i) { + sord = sord_new(world, (1 << i), false); + printf("Testing Index `%s'\n", index_names[i]); + generate(world, sord, n_quads, 0); + if (test_read(world, sord, 0, n_quads)) + return finished(world, sord, EXIT_FAILURE); + sord_free(sord); + } + + static const char* const graph_index_names[6] = { + "gspo", "gsop", "gops", "gosp", "gpso", "gpos" + }; + + for (int i = 0; i < 6; ++i) { + sord = sord_new(world, (1 << i), true); + printf("Testing Index `%s'\n", graph_index_names[i]); + SordNode* graph = uri(world, 42); + generate(world, sord, n_quads, graph); + if (test_read(world, sord, graph, n_quads)) + return finished(world, sord, EXIT_FAILURE); + sord_free(sord); + } + + // Test removing + sord = sord_new(world, SORD_SPO, true); + tup[0] = uri(world, 1); + tup[1] = uri(world, 2); + tup[2] = sord_new_literal(world, 0, USTR("hello"), NULL); + tup[3] = 0; + sord_add(sord, tup); + if (!sord_ask(sord, tup[0], tup[1], tup[2], tup[3])) { + fprintf(stderr, "Failed to add tuple\n"); + return finished(world, sord, EXIT_FAILURE); + } + sord_node_free(world, (SordNode*)tup[2]); + tup[2] = sord_new_literal(world, 0, USTR("hi"), NULL); + sord_add(sord, tup); + sord_remove(sord, tup); + if (sord_num_quads(sord) != 1) { + fprintf(stderr, "Remove failed (%zu quads, expected 1)\n", + sord_num_quads(sord)); + return finished(world, sord, EXIT_FAILURE); + } + + iter = sord_find(sord, tup); + if (!sord_iter_end(iter)) { + fprintf(stderr, "Found removed tuple\n"); + return finished(world, sord, EXIT_FAILURE); + } + sord_iter_free(iter); + + // Test double remove (silent success) + sord_remove(sord, tup); + + // Load a couple graphs + SordNode* graph42 = uri(world, 42); + SordNode* graph43 = uri(world, 43); + generate(world, sord, 1, graph42); + generate(world, sord, 1, graph43); + + // Remove one graph via iterator + SerdStatus st; + iter = sord_search(sord, NULL, NULL, NULL, graph43); + while (!sord_iter_end(iter)) { + if ((st = sord_erase(sord, iter))) { + fprintf(stderr, "Remove by iterator failed (%s)\n", + serd_strerror(st)); + return finished(world, sord, EXIT_FAILURE); + } + } + sord_iter_free(iter); + + // Erase the first tuple (an element in the default graph) + iter = sord_begin(sord); + if (sord_erase(sord, iter)) { + return test_fail("Failed to erase begin iterator on non-empty model\n"); + } + sord_iter_free(iter); + + // Ensure only the other graph is left + SordQuad quad; + SordQuad pat = { 0, 0, 0, graph42 }; + for (iter = sord_begin(sord); !sord_iter_end(iter); sord_iter_next(iter)) { + sord_iter_get(iter, quad); + if (!sord_quad_match(quad, pat)) { + fprintf(stderr, "Graph removal via iteration failed\n"); + return finished(world, sord, EXIT_FAILURE); + } + } + sord_iter_free(iter); + + // Load file into two separate graphs + sord_free(sord); + sord = sord_new(world, SORD_SPO, true); + env = serd_env_new(&base); + SordNode* graph1 = sord_new_uri(world, USTR("http://example.org/graph1")); + SordNode* graph2 = sord_new_uri(world, USTR("http://example.org/graph2")); + SerdReader* reader = sord_new_reader(sord, env, SERD_TURTLE, graph1); + if ((st = serd_reader_read_string(reader, USTR("<s> <p> <o> .")))) { + fprintf(stderr, "Failed to read string (%s)\n", serd_strerror(st)); + return finished(world, sord, EXIT_FAILURE); + } + serd_reader_free(reader); + reader = sord_new_reader(sord, env, SERD_TURTLE, graph2); + if ((st = serd_reader_read_string(reader, USTR("<s> <p> <o> .")))) { + fprintf(stderr, "Failed to re-read string (%s)\n", serd_strerror(st)); + return finished(world, sord, EXIT_FAILURE); + } + serd_reader_free(reader); + serd_env_free(env); + + // Ensure we only see triple once + size_t n_triples = 0; + for (iter = sord_begin(sord); !sord_iter_end(iter); sord_iter_next(iter)) { + fprintf(stderr, "%s %s %s %s\n", + sord_node_get_string(sord_iter_get_node(iter, SORD_SUBJECT)), + sord_node_get_string(sord_iter_get_node(iter, SORD_PREDICATE)), + sord_node_get_string(sord_iter_get_node(iter, SORD_OBJECT)), + sord_node_get_string(sord_iter_get_node(iter, SORD_GRAPH))); + + ++n_triples; + } + sord_iter_free(iter); + if (n_triples != 1) { + fprintf(stderr, "Found duplicate triple\n"); + return finished(world, sord, EXIT_FAILURE); + } + + // Test SPO iteration on an SOP indexed store + sord_free(sord); + sord = sord_new(world, SORD_SOP, false); + generate(world, sord, 1, graph42); + for (iter = sord_begin(sord); !sord_iter_end(iter); sord_iter_next(iter)) { + ++n_triples; + } + sord_iter_free(iter); + + return finished(world, sord, EXIT_SUCCESS); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ext/sord/src/sord_validate.c Thu Jun 15 09:52:01 2017 +0100 @@ -0,0 +1,788 @@ +/* + Copyright 2012-2017 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#define _BSD_SOURCE 1 // for realpath +#define _DEFAULT_SOURCE 1 // for realpath + +#include <assert.h> +#include <stdlib.h> +#include <string.h> + +#ifdef _WIN32 +# include <windows.h> +#endif + +#include "serd/serd.h" +#include "sord/sord.h" +#include "sord_config.h" + +#ifdef HAVE_PCRE +# include <pcre.h> +#endif + +#define USTR(s) ((const uint8_t*)s) + +#define NS_foaf (const uint8_t*)"http://xmlns.com/foaf/0.1/" +#define NS_owl (const uint8_t*)"http://www.w3.org/2002/07/owl#" +#define NS_rdf (const uint8_t*)"http://www.w3.org/1999/02/22-rdf-syntax-ns#" +#define NS_rdfs (const uint8_t*)"http://www.w3.org/2000/01/rdf-schema#" +#define NS_xsd (const uint8_t*)"http://www.w3.org/2001/XMLSchema#" + +typedef struct { + SordNode* foaf_Document; + SordNode* owl_AnnotationProperty; + SordNode* owl_Class; + SordNode* owl_DatatypeProperty; + SordNode* owl_FunctionalProperty; + SordNode* owl_InverseFunctionalProperty; + SordNode* owl_ObjectProperty; + SordNode* owl_OntologyProperty; + SordNode* owl_Restriction; + SordNode* owl_Thing; + SordNode* owl_cardinality; + SordNode* owl_equivalentClass; + SordNode* owl_maxCardinality; + SordNode* owl_minCardinality; + SordNode* owl_onDatatype; + SordNode* owl_onProperty; + SordNode* owl_someValuesFrom; + SordNode* owl_withRestrictions; + SordNode* rdf_PlainLiteral; + SordNode* rdf_Property; + SordNode* rdf_first; + SordNode* rdf_rest; + SordNode* rdf_type; + SordNode* rdfs_Class; + SordNode* rdfs_Literal; + SordNode* rdfs_Resource; + SordNode* rdfs_domain; + SordNode* rdfs_label; + SordNode* rdfs_range; + SordNode* rdfs_subClassOf; + SordNode* xsd_anyURI; + SordNode* xsd_decimal; + SordNode* xsd_double; + SordNode* xsd_maxInclusive; + SordNode* xsd_minInclusive; + SordNode* xsd_pattern; + SordNode* xsd_string; +} URIs; + +int n_errors = 0; +int n_restrictions = 0; +bool one_line_errors = false; + +static int +print_version(void) +{ + printf("sord_validate " SORD_VERSION + " <http://drobilla.net/software/sord>\n"); + printf("Copyright 2012-2017 David Robillard <http://drobilla.net>.\n" + "License: <http://www.opensource.org/licenses/isc>\n" + "This is free software; you are free to change and redistribute it." + "\nThere is NO WARRANTY, to the extent permitted by law.\n"); + return 0; +} + +static int +print_usage(const char* name, bool error) +{ + FILE* const os = error ? stderr : stdout; + fprintf(os, "Usage: %s [OPTION]... INPUT...\n", name); + fprintf(os, "Validate RDF data\n\n"); + fprintf(os, " -h Display this help and exit\n"); + fprintf(os, " -l Print errors on a single line.\n"); + fprintf(os, " -v Display version information and exit\n"); + fprintf(os, + "Validate RDF data. This is a simple validator which checks\n" + "that all used properties are actually defined. It does not do\n" + "any fancy file retrieval, the files passed on the command line\n" + "are the only data that is read. In other words, you must pass\n" + "the definition of all vocabularies used on the command line.\n"); + return error ? 1 : 0; +} + +static uint8_t* +absolute_path(const uint8_t* path) +{ +#ifdef _WIN32 + char* out = (char*)malloc(MAX_PATH); + GetFullPathName((const char*)path, MAX_PATH, out, NULL); + return (uint8_t*)out; +#else + return (uint8_t*)realpath((const char*)path, NULL); +#endif +} + +static int +errorf(const SordQuad quad, const char* fmt, ...) +{ + va_list args; + va_start(args, fmt); + fprintf(stderr, "error: "); + vfprintf(stderr, fmt, args); + va_end(args); + + const char* sep = one_line_errors ? "\t" : "\n "; + fprintf(stderr, "%s%s%s%s%s%s\n", + sep, (const char*)sord_node_get_string(quad[SORD_SUBJECT]), + sep, (const char*)sord_node_get_string(quad[SORD_PREDICATE]), + sep, (const char*)sord_node_get_string(quad[SORD_OBJECT])); + + ++n_errors; + return 1; +} + +static bool +is_descendant_of(SordModel* model, + const URIs* uris, + const SordNode* child, + const SordNode* parent, + const SordNode* pred) +{ + if (!child) { + return false; + } else if (sord_node_equals(child, parent) || + sord_ask(model, child, uris->owl_equivalentClass, parent, NULL)) { + return true; + } + + SordIter* i = sord_search(model, child, pred, NULL, NULL); + for (; !sord_iter_end(i); sord_iter_next(i)) { + const SordNode* o = sord_iter_get_node(i, SORD_OBJECT); + if (sord_node_equals(child, o)) { + continue; // Weird class is explicitly a descendent of itself + } + if (is_descendant_of(model, uris, o, parent, pred)) { + sord_iter_free(i); + return true; + } + } + sord_iter_free(i); + + return false; +} + +static bool +regexp_match(const uint8_t* pat, const char* str) +{ +#ifdef HAVE_PCRE + // Append a $ to the pattern so we only match if the entire string matches + const size_t len = strlen((const char*)pat); + char* const regx = (char*)malloc(len + 2); + memcpy(regx, pat, len); + regx[len] = '$'; + regx[len + 1] = '\0'; + + const char* err; + int erroffset; + pcre* re = pcre_compile(regx, PCRE_ANCHORED, &err, &erroffset, NULL); + free(regx); + if (!re) { + fprintf(stderr, "Error in pattern `%s' at offset %d (%s)\n", + pat, erroffset, err); + return false; + } + + const bool ret = pcre_exec(re, NULL, str, strlen(str), 0, 0, NULL, 0) >= 0; + pcre_free(re); + return ret; +#endif // HAVE_PCRE + return true; +} + +static int +bound_cmp(SordModel* model, + const URIs* uris, + const SordNode* literal, + const SordNode* type, + const SordNode* bound) +{ + const char* str = (const char*)sord_node_get_string(literal); + const char* bound_str = (const char*)sord_node_get_string(bound); + const bool is_numeric = + is_descendant_of(model, uris, type, uris->xsd_decimal, uris->owl_onDatatype) || + is_descendant_of(model, uris, type, uris->xsd_double, uris->owl_onDatatype); + + if (is_numeric) { + const double fbound = serd_strtod(bound_str, NULL); + const double fliteral = serd_strtod(str, NULL); + return ((fliteral < fbound) ? -1 : + (fliteral > fbound) ? 1 : + 0); + } else { + return strcmp(str, bound_str); + } +} + +static bool +check_restriction(SordModel* model, + const URIs* uris, + const SordNode* literal, + const SordNode* type, + const SordNode* restriction) +{ + size_t len = 0; + const char* str = (const char*)sord_node_get_string_counted(literal, &len); + + // Check xsd:pattern + SordIter* p = sord_search(model, restriction, uris->xsd_pattern, 0, 0); + if (p) { + const SordNode* pat = sord_iter_get_node(p, SORD_OBJECT); + if (!regexp_match(sord_node_get_string(pat), str)) { + fprintf(stderr, "`%s' does not match <%s> pattern `%s'\n", + sord_node_get_string(literal), + sord_node_get_string(type), + sord_node_get_string(pat)); + sord_iter_free(p); + return false; + } + sord_iter_free(p); + ++n_restrictions; + } + + // Check xsd:minInclusive + SordIter* l = sord_search(model, restriction, uris->xsd_minInclusive, 0, 0); + if (l) { + const SordNode* lower = sord_iter_get_node(l, SORD_OBJECT); + if (bound_cmp(model, uris, literal, type, lower) < 0) { + fprintf(stderr, "`%s' is not >= <%s> minimum `%s'\n", + sord_node_get_string(literal), + sord_node_get_string(type), + sord_node_get_string(lower)); + sord_iter_free(l); + return false; + } + sord_iter_free(l); + ++n_restrictions; + } + + // Check xsd:maxInclusive + SordIter* u = sord_search(model, restriction, uris->xsd_maxInclusive, 0, 0); + if (u) { + const SordNode* upper = sord_iter_get_node(u, SORD_OBJECT); + if (bound_cmp(model, uris, literal, type, upper) > 0) { + fprintf(stderr, "`%s' is not <= <%s> maximum `%s'\n", + sord_node_get_string(literal), + sord_node_get_string(type), + sord_node_get_string(upper)); + sord_iter_free(u); + return false; + } + sord_iter_free(u); + ++n_restrictions; + } + + return true; // Unknown restriction, be quietly tolerant +} + +static bool +literal_is_valid(SordModel* model, + const URIs* uris, + const SordQuad quad, + const SordNode* literal, + const SordNode* type) +{ + if (!type) { + return true; + } + + /* Check that literal data is related to required type. We don't do a + strict subtype check here because e.g. an xsd:decimal might be a valid + xsd:unsignedInt, which the pattern checks will verify, but if the + literal type is not related to the required type at all + (e.g. xsd:decimal and xsd:string) there is a problem. */ + const SordNode* datatype = sord_node_get_datatype(literal); + if (datatype && datatype != type) { + if (!is_descendant_of( + model, uris, + datatype, type, uris->owl_onDatatype) && + !is_descendant_of( + model, uris, + type, datatype, uris->owl_onDatatype) && + !(sord_node_equals(datatype, uris->xsd_decimal) && + is_descendant_of( + model, uris, + type, uris->xsd_double, uris->owl_onDatatype))) { + errorf(quad, + "Literal `%s' datatype <%s> is not compatible with <%s>\n", + sord_node_get_string(literal), + sord_node_get_string(datatype), + sord_node_get_string(type)); + return false; + } + } + + // Find restrictions list + SordIter* rs = sord_search(model, type, uris->owl_withRestrictions, 0, 0); + if (sord_iter_end(rs)) { + return true; // No restrictions + } + + // Walk list, checking each restriction + const SordNode* head = sord_iter_get_node(rs, SORD_OBJECT); + while (head) { + SordIter* f = sord_search(model, head, uris->rdf_first, 0, 0); + if (!f) { + break; // Reached end of restrictions list without failure + } + + // Check this restriction + const bool good = check_restriction( + model, uris, literal, type, sord_iter_get_node(f, SORD_OBJECT)); + sord_iter_free(f); + + if (!good) { + sord_iter_free(rs); + return false; // Failed, literal is invalid + } + + // Seek to next list node + SordIter* n = sord_search(model, head, uris->rdf_rest, 0, 0); + head = n ? sord_iter_get_node(n, SORD_OBJECT) : NULL; + sord_iter_free(n); + } + + sord_iter_free(rs); + + SordIter* s = sord_search(model, type, uris->owl_onDatatype, 0, 0); + if (s) { + const SordNode* super = sord_iter_get_node(s, SORD_OBJECT); + const bool good = literal_is_valid( + model, uris, quad, literal, super); + sord_iter_free(s); + return good; // Match iff literal also matches supertype + } + + return true; // Matches top level type +} + +static bool +check_type(SordModel* model, + const URIs* uris, + const SordQuad quad, + const SordNode* node, + const SordNode* type) +{ + if (sord_node_equals(type, uris->rdfs_Resource) || + sord_node_equals(type, uris->owl_Thing)) { + return true; + } + + if (sord_node_get_type(node) == SORD_LITERAL) { + if (sord_node_equals(type, uris->rdfs_Literal)) { + return true; + } else if (sord_node_equals(type, uris->rdf_PlainLiteral)) { + return !sord_node_get_language(node); + } else { + return literal_is_valid(model, uris, quad, node, type); + } + } else if (sord_node_get_type(node) == SORD_URI) { + if (sord_node_equals(type, uris->foaf_Document)) { + return true; // Questionable... + } else if (is_descendant_of( + model, uris, + type, uris->xsd_anyURI, uris->owl_onDatatype)) { + /* Type is any URI and this is a URI, so pass. Restrictions on + anyURI subtypes are not currently checked (very uncommon). */ + return true; // Type is anyURI, and this is a URI + } else { + SordIter* t = sord_search(model, node, uris->rdf_type, NULL, NULL); + for (; !sord_iter_end(t); sord_iter_next(t)) { + if (is_descendant_of(model, uris, + sord_iter_get_node(t, SORD_OBJECT), + type, + uris->rdfs_subClassOf)) { + sord_iter_free(t); + return true; + } + } + sord_iter_free(t); + return false; + } + } else { + return true; // Blanks often lack explicit types, ignore + } + + return false; +} + +static uint64_t +count_non_blanks(SordIter* i, SordQuadIndex field) +{ + uint64_t n = 0; + for (; !sord_iter_end(i); sord_iter_next(i)) { + const SordNode* node = sord_iter_get_node(i, field); + if (sord_node_get_type(node) != SORD_BLANK) { + ++n; + } + } + return n; +} + +static int +check_properties(SordModel* model, URIs* uris) +{ + int st = 0; + SordIter* i = sord_begin(model); + for (; !sord_iter_end(i); sord_iter_next(i)) { + SordQuad quad; + sord_iter_get(i, quad); + + const SordNode* subj = quad[SORD_SUBJECT]; + const SordNode* pred = quad[SORD_PREDICATE]; + const SordNode* obj = quad[SORD_OBJECT]; + + bool is_any_property = false; + SordIter* t = sord_search(model, pred, uris->rdf_type, NULL, NULL); + for (; !sord_iter_end(t); sord_iter_next(t)) { + if (is_descendant_of(model, uris, + sord_iter_get_node(t, SORD_OBJECT), + uris->rdf_Property, + uris->rdfs_subClassOf)) { + is_any_property = true; + break; + } + } + sord_iter_free(t); + + const bool is_ObjectProperty = sord_ask( + model, pred, uris->rdf_type, uris->owl_ObjectProperty, 0); + const bool is_FunctionalProperty = sord_ask( + model, pred, uris->rdf_type, uris->owl_FunctionalProperty, 0); + const bool is_InverseFunctionalProperty = sord_ask( + model, pred, uris->rdf_type, uris->owl_InverseFunctionalProperty, 0); + const bool is_DatatypeProperty = sord_ask( + model, pred, uris->rdf_type, uris->owl_DatatypeProperty, 0); + + if (!is_any_property) { + st = errorf(quad, "Use of undefined property"); + } + + if (!sord_ask(model, pred, uris->rdfs_label, NULL, NULL)) { + st = errorf(quad, "Property <%s> has no label", + sord_node_get_string(pred)); + } + + if (is_DatatypeProperty && + sord_node_get_type(obj) != SORD_LITERAL) { + st = errorf(quad, "Datatype property with non-literal value"); + } + + if (is_ObjectProperty && + sord_node_get_type(obj) == SORD_LITERAL) { + st = errorf(quad, "Object property with literal value"); + } + + if (is_FunctionalProperty) { + SordIter* o = sord_search(model, subj, pred, NULL, NULL); + const uint64_t n = count_non_blanks(o, SORD_OBJECT); + if (n > 1) { + st = errorf(quad, "Functional property with %u objects", n); + } + sord_iter_free(o); + } + + if (is_InverseFunctionalProperty) { + SordIter* s = sord_search(model, NULL, pred, obj, NULL); + const unsigned n = count_non_blanks(s, SORD_SUBJECT); + if (n > 1) { + st = errorf( + quad, "Inverse functional property with %u subjects", n); + } + sord_iter_free(s); + } + + if (sord_node_equals(pred, uris->rdf_type) && + !sord_ask(model, obj, uris->rdf_type, uris->rdfs_Class, NULL) && + !sord_ask(model, obj, uris->rdf_type, uris->owl_Class, NULL)) { + st = errorf(quad, "Type is not a rdfs:Class or owl:Class"); + } + + if (sord_node_get_type(obj) == SORD_LITERAL && + !literal_is_valid(model, uris, quad, + obj, sord_node_get_datatype(obj))) { + st = errorf(quad, "Literal does not match datatype"); + } + + SordIter* r = sord_search(model, pred, uris->rdfs_range, NULL, NULL); + for (; !sord_iter_end(r); sord_iter_next(r)) { + const SordNode* range = sord_iter_get_node(r, SORD_OBJECT); + if (!check_type(model, uris, quad, obj, range)) { + st = errorf(quad, "Object not in range <%s>\n", + sord_node_get_string(range)); + } + } + sord_iter_free(r); + + SordIter* d = sord_search(model, pred, uris->rdfs_domain, NULL, NULL); + if (d) { + const SordNode* domain = sord_iter_get_node(d, SORD_OBJECT); + if (!check_type(model, uris, quad, subj, domain)) { + st = errorf(quad, "Subject not in domain <%s>", + sord_node_get_string(domain)); + } + sord_iter_free(d); + } + } + sord_iter_free(i); + + return st; +} + +static int +check_instance(SordModel* model, + const URIs* uris, + const SordNode* restriction, + const SordQuad quad) +{ + const SordNode* instance = quad[SORD_SUBJECT]; + int st = 0; + + const SordNode* prop = sord_get( + model, restriction, uris->owl_onProperty, NULL, NULL); + if (!prop) { + return 0; + } + + const unsigned values = sord_count(model, instance, prop, NULL, NULL); + + // Check exact cardinality + const SordNode* card = sord_get( + model, restriction, uris->owl_cardinality, NULL, NULL); + if (card) { + const unsigned c = atoi((const char*)sord_node_get_string(card)); + if (values != c) { + st = errorf(quad, "Property %s on %s has %u != %u values", + sord_node_get_string(prop), + sord_node_get_string(instance), + values, c); + } + } + + // Check minimum cardinality + const SordNode* minCard = sord_get( + model, restriction, uris->owl_minCardinality, NULL, NULL); + if (minCard) { + const unsigned m = atoi((const char*)sord_node_get_string(minCard)); + if (values < m) { + st = errorf(quad, "Property %s on %s has %u < %u values", + sord_node_get_string(prop), + sord_node_get_string(instance), + values, m); + } + } + + // Check maximum cardinality + const SordNode* maxCard = sord_get( + model, restriction, uris->owl_maxCardinality, NULL, NULL); + if (maxCard) { + const unsigned m = atoi((const char*)sord_node_get_string(maxCard)); + if (values < m) { + st = errorf(quad, "Property %s on %s has %u > %u values", + sord_node_get_string(prop), + sord_node_get_string(instance), + values, m); + } + } + + // Check someValuesFrom + SordIter* sf = sord_search( + model, restriction, uris->owl_someValuesFrom, NULL, NULL); + if (sf) { + const SordNode* type = sord_iter_get_node(sf, SORD_OBJECT); + + SordIter* v = sord_search(model, instance, prop, NULL, NULL); + bool found = false; + for (; !sord_iter_end(v); sord_iter_next(v)) { + const SordNode* value = sord_iter_get_node(v, SORD_OBJECT); + if (check_type(model, uris, quad, value, type)) { + found = true; + break; + } + } + if (!found) { + st = errorf(quad, "%s has no <%s> values of type <%s>\n", + sord_node_get_string(instance), + sord_node_get_string(prop), + sord_node_get_string(type)); + } + sord_iter_free(v); + } + sord_iter_free(sf); + + return st; +} + +static int +check_class_instances(SordModel* model, + const URIs* uris, + const SordNode* restriction, + const SordNode* klass) +{ + // Check immediate instances of this class + SordIter* i = sord_search(model, NULL, uris->rdf_type, klass, NULL); + for (; !sord_iter_end(i); sord_iter_next(i)) { + SordQuad quad; + sord_iter_get(i, quad); + check_instance(model, uris, restriction, quad); + } + sord_iter_free(i); + + // Check instances of all subclasses recursively + SordIter* s = sord_search(model, NULL, uris->rdfs_subClassOf, klass, NULL); + for (; !sord_iter_end(s); sord_iter_next(s)) { + const SordNode* subklass = sord_iter_get_node(s, SORD_SUBJECT); + check_class_instances(model, uris, restriction, subklass); + } + sord_iter_free(s); + + return 0; +} + +static int +check_instances(SordModel* model, const URIs* uris) +{ + int st = 0; + SordIter* r = sord_search( + model, NULL, uris->rdf_type, uris->owl_Restriction, NULL); + for (; !sord_iter_end(r); sord_iter_next(r)) { + const SordNode* restriction = sord_iter_get_node(r, SORD_SUBJECT); + const SordNode* prop = sord_get( + model, restriction, uris->owl_onProperty, NULL, NULL); + if (!prop) { + continue; + } + + SordIter* c = sord_search( + model, NULL, uris->rdfs_subClassOf, restriction, NULL); + for (; !sord_iter_end(c); sord_iter_next(c)) { + const SordNode* klass = sord_iter_get_node(c, SORD_SUBJECT); + check_class_instances(model, uris, restriction, klass); + } + sord_iter_free(c); + } + sord_iter_free(r); + + return st; +} + +int +main(int argc, char** argv) +{ + if (argc < 2) { + return print_usage(argv[0], true); + } + + int a = 1; + for (; a < argc && argv[a][0] == '-'; ++a) { + if (argv[a][1] == 'l') { + one_line_errors = true; + } else if (argv[a][1] == 'v') { + return print_version(); + } else { + fprintf(stderr, "%s: Unknown option `%s'\n", argv[0], argv[a]); + return print_usage(argv[0], true); + } + } + + SordWorld* world = sord_world_new(); + SordModel* model = sord_new(world, SORD_SPO|SORD_OPS, false); + SerdEnv* env = serd_env_new(&SERD_NODE_NULL); + SerdReader* reader = sord_new_reader(model, env, SERD_TURTLE, NULL); + + for (; a < argc; ++a) { + const uint8_t* input = (const uint8_t*)argv[a]; + uint8_t* in_path = absolute_path(serd_uri_to_path(input)); + + if (!in_path) { + fprintf(stderr, "Skipping file %s\n", input); + continue; + } + + SerdURI base_uri; + SerdNode base_uri_node = serd_node_new_file_uri( + in_path, NULL, &base_uri, true); + + serd_env_set_base_uri(env, &base_uri_node); + const SerdStatus st = serd_reader_read_file(reader, in_path); + if (st) { + fprintf(stderr, "error reading %s: %s\n", + in_path, serd_strerror(st)); + } + + serd_node_free(&base_uri_node); + free(in_path); + } + serd_reader_free(reader); + serd_env_free(env); + +#define URI(prefix, suffix) \ + uris.prefix##_##suffix = sord_new_uri(world, NS_##prefix #suffix) + + URIs uris; + URI(foaf, Document); + URI(owl, AnnotationProperty); + URI(owl, Class); + URI(owl, DatatypeProperty); + URI(owl, FunctionalProperty); + URI(owl, InverseFunctionalProperty); + URI(owl, ObjectProperty); + URI(owl, OntologyProperty); + URI(owl, Restriction); + URI(owl, Thing); + URI(owl, cardinality); + URI(owl, equivalentClass); + URI(owl, maxCardinality); + URI(owl, minCardinality); + URI(owl, onDatatype); + URI(owl, onProperty); + URI(owl, someValuesFrom); + URI(owl, withRestrictions); + URI(rdf, PlainLiteral); + URI(rdf, Property); + URI(rdf, first); + URI(rdf, rest); + URI(rdf, type); + URI(rdfs, Class); + URI(rdfs, Literal); + URI(rdfs, Resource); + URI(rdfs, domain); + URI(rdfs, label); + URI(rdfs, range); + URI(rdfs, subClassOf); + URI(xsd, anyURI); + URI(xsd, decimal); + URI(xsd, double); + URI(xsd, maxInclusive); + URI(xsd, minInclusive); + URI(xsd, pattern); + URI(xsd, string); + +#ifndef HAVE_PCRE + fprintf(stderr, "warning: Built without PCRE, datatypes not checked.\n"); +#endif + + const int prop_st = check_properties(model, &uris); + const int inst_st = check_instances(model, &uris); + + printf("Found %d errors among %d files (checked %d restrictions)\n", + n_errors, argc - 1, n_restrictions); + + sord_free(model); + sord_world_free(world); + return prop_st || inst_st; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ext/sord/src/sordi.c Thu Jun 15 09:52:01 2017 +0100 @@ -0,0 +1,209 @@ +/* + Copyright 2011-2016 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include <assert.h> +#include <stdlib.h> +#include <string.h> + +#ifdef _WIN32 +# include <windows.h> +#endif + +#include "serd/serd.h" +#include "sord/sord.h" +#include "sord_config.h" + +#define SORDI_ERROR(msg) fprintf(stderr, "sordi: " msg); +#define SORDI_ERRORF(fmt, ...) fprintf(stderr, "sordi: " fmt, __VA_ARGS__); + +typedef struct { + SerdWriter* writer; + SerdEnv* env; + SerdNode base_uri_node; + SerdURI base_uri; + SordModel* sord; +} State; + +static int +print_version(void) +{ + printf("sordi " SORD_VERSION " <http://drobilla.net/software/sord>\n"); + printf("Copyright 2011-2016 David Robillard <http://drobilla.net>.\n" + "License: <http://www.opensource.org/licenses/isc>\n" + "This is free software; you are free to change and redistribute it." + "\nThere is NO WARRANTY, to the extent permitted by law.\n"); + return 0; +} + +static int +print_usage(const char* name, bool error) +{ + FILE* const os = error ? stderr : stdout; + fprintf(os, "%s", error ? "\n" : ""); + fprintf(os, "Usage: %s [OPTION]... INPUT [BASE_URI]\n", name); + fprintf(os, "Load and re-serialise RDF data.\n"); + fprintf(os, "Use - for INPUT to read from standard input.\n\n"); + fprintf(os, " -h Display this help and exit\n"); + fprintf(os, " -i SYNTAX Input syntax (`turtle' or `ntriples')\n"); + fprintf(os, " -o SYNTAX Output syntax (`turtle' or `ntriples')\n"); + fprintf(os, " -s INPUT Parse INPUT as string (terminates options)\n"); + fprintf(os, " -v Display version information and exit\n"); + return error ? 1 : 0; +} + +static bool +set_syntax(SerdSyntax* syntax, const char* name) +{ + if (!strcmp(name, "turtle")) { + *syntax = SERD_TURTLE; + } else if (!strcmp(name, "ntriples")) { + *syntax = SERD_NTRIPLES; + } else { + SORDI_ERRORF("unknown syntax `%s'\n", name); + return false; + } + return true; +} + +int +main(int argc, char** argv) +{ + if (argc < 2) { + return print_usage(argv[0], true); + } + + FILE* in_fd = NULL; + SerdSyntax input_syntax = SERD_TURTLE; + SerdSyntax output_syntax = SERD_NTRIPLES; + bool from_file = true; + const uint8_t* in_name = NULL; + int a = 1; + for (; a < argc && argv[a][0] == '-'; ++a) { + if (argv[a][1] == '\0') { + in_name = (const uint8_t*)"(stdin)"; + in_fd = stdin; + break; + } else if (argv[a][1] == 'h') { + return print_usage(argv[0], false); + } else if (argv[a][1] == 'v') { + return print_version(); + } else if (argv[a][1] == 's') { + in_name = (const uint8_t*)"(string)"; + from_file = false; + ++a; + break; + } else if (argv[a][1] == 'i') { + if (++a == argc) { + SORDI_ERROR("option requires an argument -- 'i'\n\n"); + return print_usage(argv[0], true); + } + if (!set_syntax(&input_syntax, argv[a])) { + return print_usage(argv[0], true); + } + } else if (argv[a][1] == 'o') { + if (++a == argc) { + SORDI_ERROR("option requires an argument -- 'o'\n\n"); + return print_usage(argv[0], true); + } + if (!set_syntax(&output_syntax, argv[a])) { + return print_usage(argv[0], true); + } + } else { + SORDI_ERRORF("invalid option -- '%s'\n", argv[a] + 1); + return print_usage(argv[0], true); + } + } + + if (a == argc) { + SORDI_ERROR("missing input\n"); + return print_usage(argv[0], true); + } + + const uint8_t* input = (const uint8_t*)argv[a++]; + if (from_file) { + in_name = in_name ? in_name : input; + if (!in_fd) { + input = serd_uri_to_path(in_name); + if (!input || !(in_fd = fopen((const char*)input, "rb"))) { + return 1; + } + } + } + + SerdURI base_uri = SERD_URI_NULL; + SerdNode base = SERD_NODE_NULL; + if (a < argc) { // Base URI given on command line + base = serd_node_new_uri_from_string( + (const uint8_t*)argv[a], NULL, &base_uri); + } else if (from_file && in_fd != stdin) { // Use input file URI + base = serd_node_new_file_uri(input, NULL, &base_uri, true); + } + + SordWorld* world = sord_world_new(); + SordModel* sord = sord_new(world, SORD_SPO|SORD_OPS, false); + SerdEnv* env = serd_env_new(&base); + SerdReader* reader = sord_new_reader(sord, env, input_syntax, NULL); + + SerdStatus status = (from_file) + ? serd_reader_read_file_handle(reader, in_fd, in_name) + : serd_reader_read_string(reader, input); + + serd_reader_free(reader); + + FILE* out_fd = stdout; + SerdEnv* write_env = serd_env_new(&base); + + int output_style = SERD_STYLE_RESOLVED; + if (output_syntax == SERD_NTRIPLES) { + output_style |= SERD_STYLE_ASCII; + } else { + output_style |= SERD_STYLE_CURIED | SERD_STYLE_ABBREVIATED; + } + + SerdWriter* writer = serd_writer_new( + output_syntax, + (SerdStyle)output_style, + write_env, &base_uri, serd_file_sink, stdout); + + // Write @prefix directives + serd_env_foreach(env, + (SerdPrefixSink)serd_writer_set_prefix, + writer); + + // Write statements + sord_write(sord, writer, NULL); + + serd_writer_finish(writer); + serd_writer_free(writer); + + serd_env_free(env); + serd_env_free(write_env); + serd_node_free(&base); + + sord_free(sord); + sord_world_free(world); + + if (from_file) { + fclose(in_fd); + } + + if (fclose(out_fd)) { + perror("sordi: write error"); + status = SERD_ERR_UNKNOWN; + } + + return (status > SERD_FAILURE) ? 1 : 0; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ext/sord/src/sordmm_test.cpp Thu Jun 15 09:52:01 2017 +0100 @@ -0,0 +1,25 @@ +/* + Copyright 2011 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "sord/sordmm.hpp" + +int +main(int argc, char** argv) +{ + Sord::World world; + Sord::Model model(world, "http://example.org/"); + return 0; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ext/sord/src/syntax.c Thu Jun 15 09:52:01 2017 +0100 @@ -0,0 +1,207 @@ +/* + Copyright 2011-2015 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include <assert.h> +#include <stdlib.h> +#include <string.h> + +#include "serd/serd.h" + +#include "sord_config.h" +#include "sord_internal.h" + +struct SordInserterImpl { + SordModel* model; + SerdEnv* env; +}; + +SordInserter* +sord_inserter_new(SordModel* model, + SerdEnv* env) +{ + SordInserter* inserter = (SordInserter*)malloc(sizeof(SordInserter)); + inserter->model = model; + inserter->env = env; + return inserter; +} + +void +sord_inserter_free(SordInserter* inserter) +{ + free(inserter); +} + +SerdStatus +sord_inserter_set_base_uri(SordInserter* inserter, + const SerdNode* uri_node) +{ + return serd_env_set_base_uri(inserter->env, uri_node); +} + +SerdStatus +sord_inserter_set_prefix(SordInserter* inserter, + const SerdNode* name, + const SerdNode* uri_node) +{ + return serd_env_set_prefix(inserter->env, name, uri_node); +} + +SerdStatus +sord_inserter_write_statement(SordInserter* inserter, + SerdStatementFlags flags, + const SerdNode* graph, + const SerdNode* subject, + const SerdNode* predicate, + const SerdNode* object, + const SerdNode* object_datatype, + const SerdNode* object_lang) +{ + SordWorld* world = sord_get_world(inserter->model); + SerdEnv* env = inserter->env; + + SordNode* g = sord_node_from_serd_node(world, env, graph, NULL, NULL); + SordNode* s = sord_node_from_serd_node(world, env, subject, NULL, NULL); + SordNode* p = sord_node_from_serd_node(world, env, predicate, NULL, NULL); + SordNode* o = sord_node_from_serd_node(world, env, object, + object_datatype, object_lang); + + if (!s || !p || !o) { + return SERD_ERR_BAD_ARG; + } + + const SordQuad tup = { s, p, o, g }; + sord_add(inserter->model, tup); + + sord_node_free(world, o); + sord_node_free(world, p); + sord_node_free(world, s); + sord_node_free(world, g); + + return SERD_SUCCESS; +} + +SORD_API +SerdReader* +sord_new_reader(SordModel* model, + SerdEnv* env, + SerdSyntax syntax, + SordNode* graph) +{ + SordInserter* inserter = sord_inserter_new(model, env); + + SerdReader* reader = serd_reader_new( + syntax, inserter, (void (*)(void*))sord_inserter_free, + (SerdBaseSink)sord_inserter_set_base_uri, + (SerdPrefixSink)sord_inserter_set_prefix, + (SerdStatementSink)sord_inserter_write_statement, + NULL); + + if (graph) { + serd_reader_set_default_graph(reader, sord_node_to_serd_node(graph)); + } + + return reader; +} + +static SerdStatus +write_statement(SordModel* sord, + SerdWriter* writer, + SordQuad tup, + SerdStatementFlags flags) +{ + const SordNode* s = tup[SORD_SUBJECT]; + const SordNode* p = tup[SORD_PREDICATE]; + const SordNode* o = tup[SORD_OBJECT]; + const SordNode* d = sord_node_get_datatype(o); + const SerdNode* ss = sord_node_to_serd_node(s); + const SerdNode* sp = sord_node_to_serd_node(p); + const SerdNode* so = sord_node_to_serd_node(o); + const SerdNode* sd = sord_node_to_serd_node(d); + + const char* lang_str = sord_node_get_language(o); + size_t lang_len = lang_str ? strlen(lang_str) : 0; + SerdNode language = SERD_NODE_NULL; + if (lang_str) { + language.type = SERD_LITERAL; + language.n_bytes = lang_len; + language.n_chars = lang_len; + language.buf = (const uint8_t*)lang_str; + }; + + // TODO: Subject abbreviation + + if (sord_node_is_inline_object(s) && !(flags & SERD_ANON_CONT)) { + return SERD_SUCCESS; + } + + SerdStatus st = SERD_SUCCESS; + if (sord_node_is_inline_object(o)) { + SordQuad sub_pat = { o, 0, 0, 0 }; + SordIter* sub_iter = sord_find(sord, sub_pat); + + SerdStatementFlags start_flags = flags + | ((sub_iter) ? SERD_ANON_O_BEGIN : SERD_EMPTY_O); + + st = serd_writer_write_statement( + writer, start_flags, NULL, ss, sp, so, sd, &language); + + if (!st && sub_iter) { + flags |= SERD_ANON_CONT; + for (; !st && !sord_iter_end(sub_iter); sord_iter_next(sub_iter)) { + SordQuad sub_tup; + sord_iter_get(sub_iter, sub_tup); + st = write_statement(sord, writer, sub_tup, flags); + } + sord_iter_free(sub_iter); + serd_writer_end_anon(writer, so); + } + } else { + st = serd_writer_write_statement( + writer, flags, NULL, ss, sp, so, sd, &language); + } + + return st; +} + +bool +sord_write(SordModel* model, + SerdWriter* writer, + SordNode* graph) +{ + SordQuad pat = { 0, 0, 0, graph }; + SordIter* iter = sord_find(model, pat); + return sord_write_iter(iter, writer); +} + +bool +sord_write_iter(SordIter* iter, + SerdWriter* writer) +{ + if (!iter) { + return false; + } + + SordModel* model = (SordModel*)sord_iter_get_model(iter); + SerdStatus st = SERD_SUCCESS; + for (; !st && !sord_iter_end(iter); sord_iter_next(iter)) { + SordQuad tup; + sord_iter_get(iter, tup); + st = write_statement(model, writer, tup, 0); + } + sord_iter_free(iter); + + return !st; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ext/sord/src/zix/btree.c Thu Jun 15 09:52:01 2017 +0100 @@ -0,0 +1,738 @@ +/* + Copyright 2011-2014 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include <assert.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "btree.h" + +// #define ZIX_BTREE_DEBUG 1 + +#define ZIX_BTREE_PAGE_SIZE 4096 +#define ZIX_BTREE_NODE_SPACE (ZIX_BTREE_PAGE_SIZE - 2 * sizeof(uint16_t)) +#define ZIX_BTREE_LEAF_VALS ((ZIX_BTREE_NODE_SPACE / sizeof(void*)) - 1) +#define ZIX_BTREE_INODE_VALS (ZIX_BTREE_LEAF_VALS / 2) + +struct ZixBTreeImpl { + ZixBTreeNode* root; + ZixDestroyFunc destroy; + ZixComparator cmp; + void* cmp_data; + size_t size; + unsigned height; ///< Number of levels, i.e. root only has height 1 +}; + +struct ZixBTreeNodeImpl { + uint16_t is_leaf; + uint16_t n_vals; + // On 64-bit we rely on some padding here to get page-sized nodes + void* vals[ZIX_BTREE_INODE_VALS]; // ZIX_BTREE_LEAF_VALS for leaves + ZixBTreeNode* children[ZIX_BTREE_INODE_VALS + 1]; // Nonexistent for leaves +}; + +typedef struct { + ZixBTreeNode* node; + unsigned index; +} ZixBTreeIterFrame; + +struct ZixBTreeIterImpl { + unsigned level; ///< Current level in stack + ZixBTreeIterFrame stack[]; ///< Position stack +}; + +#ifdef ZIX_BTREE_DEBUG + +ZIX_PRIVATE void +print_node(const ZixBTreeNode* n, const char* prefix) +{ + printf("%s[", prefix); + for (uint16_t v = 0; v < n->n_vals; ++v) { + printf(" %lu", (uintptr_t)n->vals[v]); + } + printf(" ]\n"); +} + +ZIX_PRIVATE void +print_tree(const ZixBTreeNode* parent, const ZixBTreeNode* node, int level) +{ + if (node) { + if (!parent) { + printf("TREE {\n"); + } + for (int i = 0; i < level + 1; ++i) { + printf(" "); + } + print_node(node, ""); + if (!node->is_leaf) { + for (uint16_t i = 0; i < node->n_vals + 1; ++i) { + print_tree(node, node->children[i], level + 1); + } + } + if (!parent) { + printf("}\n"); + } + } +} + +#endif // ZIX_BTREE_DEBUG + +ZIX_PRIVATE ZixBTreeNode* +zix_btree_node_new(const bool leaf) +{ + assert(sizeof(ZixBTreeNode) == ZIX_BTREE_PAGE_SIZE); + ZixBTreeNode* node = (ZixBTreeNode*)malloc(sizeof(ZixBTreeNode)); + if (node) { + node->is_leaf = leaf; + node->n_vals = 0; + } + return node; +} + +ZIX_API ZixBTree* +zix_btree_new(const ZixComparator cmp, + void* const cmp_data, + const ZixDestroyFunc destroy) +{ + ZixBTree* t = (ZixBTree*)malloc(sizeof(ZixBTree)); + if (t) { + t->root = zix_btree_node_new(true); + t->destroy = destroy; + t->cmp = cmp; + t->cmp_data = cmp_data; + t->size = 0; + t->height = 1; + if (!t->root) { + free(t); + return NULL; + } + } + return t; +} + +ZIX_PRIVATE void +zix_btree_free_rec(ZixBTree* const t, ZixBTreeNode* const n) +{ + if (n) { + if (t->destroy) { + for (uint16_t i = 0; i < n->n_vals; ++i) { + t->destroy(n->vals[i]); + } + } + if (!n->is_leaf) { + for (uint16_t i = 0; i < n->n_vals + 1; ++i) { + zix_btree_free_rec(t, n->children[i]); + } + } + free(n); + } +} + +ZIX_API void +zix_btree_free(ZixBTree* const t) +{ + if (t) { + zix_btree_free_rec(t, t->root); + free(t); + } +} + +ZIX_API size_t +zix_btree_size(const ZixBTree* const t) +{ + return t->size; +} + +ZIX_PRIVATE uint16_t +zix_btree_max_vals(const ZixBTreeNode* const node) +{ + return node->is_leaf ? ZIX_BTREE_LEAF_VALS : ZIX_BTREE_INODE_VALS; +} + +ZIX_PRIVATE uint16_t +zix_btree_min_vals(const ZixBTreeNode* const node) +{ + return ((zix_btree_max_vals(node) + 1) / 2) - 1; +} + +/** Shift pointers in `array` of length `n` right starting at `i`. */ +ZIX_PRIVATE void +zix_btree_ainsert(void** const array, + const uint16_t n, + const uint16_t i, + void* const e) +{ + memmove(array + i + 1, array + i, (n - i) * sizeof(e)); + array[i] = e; +} + +/** Erase element `i` in `array` of length `n` and return erased element. */ +ZIX_PRIVATE void* +zix_btree_aerase(void** const array, const uint16_t n, const uint16_t i) +{ + void* const ret = array[i]; + memmove(array + i, array + i + 1, (n - i) * sizeof(ret)); + return ret; +} + +/** Split lhs, the i'th child of `n`, into two nodes. */ +ZIX_PRIVATE ZixBTreeNode* +zix_btree_split_child(ZixBTreeNode* const n, + const uint16_t i, + ZixBTreeNode* const lhs) +{ + assert(lhs->n_vals == zix_btree_max_vals(lhs)); + assert(n->n_vals < ZIX_BTREE_INODE_VALS); + assert(i < n->n_vals + 1); + assert(n->children[i] == lhs); + + const uint16_t max_n_vals = zix_btree_max_vals(lhs); + ZixBTreeNode* rhs = zix_btree_node_new(lhs->is_leaf); + if (!rhs) { + return NULL; + } + + // LHS and RHS get roughly half, less the middle value which moves up + lhs->n_vals = max_n_vals / 2; + rhs->n_vals = max_n_vals - lhs->n_vals - 1; + + // Copy large half of values from LHS to new RHS node + memcpy(rhs->vals, + lhs->vals + lhs->n_vals + 1, + rhs->n_vals * sizeof(void*)); + + // Copy large half of children from LHS to new RHS node + if (!lhs->is_leaf) { + memcpy(rhs->children, + lhs->children + lhs->n_vals + 1, + (rhs->n_vals + 1) * sizeof(ZixBTreeNode*)); + } + + // Move middle value up to parent + zix_btree_ainsert(n->vals, n->n_vals, i, lhs->vals[lhs->n_vals]); + + // Insert new RHS node in parent at position i + zix_btree_ainsert((void**)n->children, ++n->n_vals, i + 1, rhs); + + return rhs; +} + +/** Find the first value in `n` that is not less than `e` (lower bound). */ +ZIX_PRIVATE uint16_t +zix_btree_node_find(const ZixBTree* const t, + const ZixBTreeNode* const n, + const void* const e, + bool* const equal) +{ + uint16_t first = 0; + uint16_t len = n->n_vals; + while (len > 0) { + const uint16_t half = len >> 1; + const uint16_t i = first + half; + const int cmp = t->cmp(n->vals[i], e, t->cmp_data); + if (cmp == 0) { + *equal = true; + len = half; // Keep searching for wildcard matches + } else if (cmp < 0) { + const uint16_t chop = half + 1; + first += chop; + len -= chop; + } else { + len = half; + } + } + assert(!*equal || t->cmp(n->vals[first], e, t->cmp_data) == 0); + return first; +} + +ZIX_API ZixStatus +zix_btree_insert(ZixBTree* const t, void* const e) +{ + ZixBTreeNode* parent = NULL; // Parent of n + ZixBTreeNode* n = t->root; // Current node + uint16_t i = 0; // Index of n in parent + while (n) { + if (n->n_vals == zix_btree_max_vals(n)) { + // Node is full, split to ensure there is space for a leaf split + if (!parent) { + // Root is full, grow tree upwards + if (!(parent = zix_btree_node_new(false))) { + return ZIX_STATUS_NO_MEM; + } + t->root = parent; + parent->children[0] = n; + ++t->height; + } + + ZixBTreeNode* const rhs = zix_btree_split_child(parent, i, n); + if (!rhs) { + return ZIX_STATUS_NO_MEM; + } + + const int cmp = t->cmp(parent->vals[i], e, t->cmp_data); + if (cmp == 0) { + return ZIX_STATUS_EXISTS; + } else if (cmp < 0) { + // Move to new RHS + n = rhs; + ++i; + } + } + + assert(!parent || parent->children[i] == n); + + bool equal = false; + i = zix_btree_node_find(t, n, e, &equal); + if (equal) { + return ZIX_STATUS_EXISTS; + } else if (!n->is_leaf) { + // Descend to child node left of value + parent = n; + n = n->children[i]; + } else { + // Insert into internal node + zix_btree_ainsert(n->vals, n->n_vals++, i, e); + break; + } + } + + ++t->size; + + return ZIX_STATUS_SUCCESS; +} + +ZIX_PRIVATE ZixBTreeIter* +zix_btree_iter_new(const ZixBTree* const t) +{ + const size_t s = t->height * sizeof(ZixBTreeIterFrame); + ZixBTreeIter* const i = (ZixBTreeIter*)malloc(sizeof(ZixBTreeIter) + s); + if (i) { + i->level = 0; + } + return i; +} + +ZIX_PRIVATE void +zix_btree_iter_set_frame(ZixBTreeIter* const ti, + ZixBTreeNode* const n, + const uint16_t i) +{ + if (ti) { + ti->stack[ti->level].node = n; + ti->stack[ti->level].index = i; + } +} + +ZIX_PRIVATE bool +zix_btree_node_is_minimal(ZixBTreeNode* const n) +{ + assert(n->n_vals >= zix_btree_min_vals(n)); + return n->n_vals == zix_btree_min_vals(n); +} + +/** Enlarge left child by stealing a value from its right sibling. */ +ZIX_PRIVATE ZixBTreeNode* +zix_btree_rotate_left(ZixBTreeNode* const parent, const uint16_t i) +{ + ZixBTreeNode* const lhs = parent->children[i]; + ZixBTreeNode* const rhs = parent->children[i + 1]; + + // Move parent value to end of LHS + lhs->vals[lhs->n_vals++] = parent->vals[i]; + + // Move first child pointer from RHS to end of LHS + if (!lhs->is_leaf) { + lhs->children[lhs->n_vals] = (ZixBTreeNode*)zix_btree_aerase( + (void**)rhs->children, rhs->n_vals, 0); + } + + // Move first value in RHS to parent + parent->vals[i] = zix_btree_aerase(rhs->vals, --rhs->n_vals, 0); + + return lhs; +} + +/** Enlarge right child by stealing a value from its left sibling. */ +ZIX_PRIVATE ZixBTreeNode* +zix_btree_rotate_right(ZixBTreeNode* const parent, const uint16_t i) +{ + ZixBTreeNode* const lhs = parent->children[i - 1]; + ZixBTreeNode* const rhs = parent->children[i]; + + // Prepend parent value to RHS + zix_btree_ainsert(rhs->vals, rhs->n_vals++, 0, parent->vals[i - 1]); + + // Move last child pointer from LHS and prepend to RHS + if (!lhs->is_leaf) { + zix_btree_ainsert((void**)rhs->children, + rhs->n_vals, + 0, + lhs->children[lhs->n_vals]); + } + + // Move last value from LHS to parent + parent->vals[i - 1] = lhs->vals[--lhs->n_vals]; + + return rhs; +} + +/** Move n[i] down, merge the left and right child, return the merged node. */ +ZIX_PRIVATE ZixBTreeNode* +zix_btree_merge(ZixBTree* const t, ZixBTreeNode* const n, const uint16_t i) +{ + ZixBTreeNode* const lhs = n->children[i]; + ZixBTreeNode* const rhs = n->children[i + 1]; + + assert(zix_btree_node_is_minimal(n->children[i])); + assert(lhs->n_vals + rhs->n_vals < zix_btree_max_vals(lhs)); + + // Move parent value to end of LHS + lhs->vals[lhs->n_vals++] = zix_btree_aerase(n->vals, n->n_vals, i); + + // Erase corresponding child pointer (to RHS) in parent + zix_btree_aerase((void**)n->children, n->n_vals, i + 1); + + // Add everything from RHS to end of LHS + memcpy(lhs->vals + lhs->n_vals, rhs->vals, rhs->n_vals * sizeof(void*)); + if (!lhs->is_leaf) { + memcpy(lhs->children + lhs->n_vals, + rhs->children, + (rhs->n_vals + 1) * sizeof(void*)); + } + lhs->n_vals += rhs->n_vals; + + if (--n->n_vals == 0) { + // Root is now empty, replace it with its only child + assert(n == t->root); + t->root = lhs; + free(n); + } + + free(rhs); + return lhs; +} + +/** Remove and return the min value from the subtree rooted at `n`. */ +ZIX_PRIVATE void* +zix_btree_remove_min(ZixBTree* const t, ZixBTreeNode* n) +{ + while (!n->is_leaf) { + if (zix_btree_node_is_minimal(n->children[0])) { + // Leftmost child is minimal, must expand + if (!zix_btree_node_is_minimal(n->children[1])) { + // Child's right sibling has at least one key to steal + n = zix_btree_rotate_left(n, 0); + } else { + // Both child and right sibling are minimal, merge + n = zix_btree_merge(t, n, 0); + } + } else { + n = n->children[0]; + } + } + + return zix_btree_aerase(n->vals, --n->n_vals, 0); +} + +/** Remove and return the max value from the subtree rooted at `n`. */ +ZIX_PRIVATE void* +zix_btree_remove_max(ZixBTree* const t, ZixBTreeNode* n) +{ + while (!n->is_leaf) { + if (zix_btree_node_is_minimal(n->children[n->n_vals])) { + // Leftmost child is minimal, must expand + if (!zix_btree_node_is_minimal(n->children[n->n_vals - 1])) { + // Child's left sibling has at least one key to steal + n = zix_btree_rotate_right(n, n->n_vals); + } else { + // Both child and left sibling are minimal, merge + n = zix_btree_merge(t, n, n->n_vals - 1); + } + } else { + n = n->children[n->n_vals]; + } + } + + return n->vals[--n->n_vals]; +} + +ZIX_API ZixStatus +zix_btree_remove(ZixBTree* const t, + const void* const e, + void** const out, + ZixBTreeIter** const next) +{ + ZixBTreeNode* n = t->root; + ZixBTreeIter* ti = NULL; + const bool user_iter = next && *next; + if (next) { + if (!*next && !(*next = zix_btree_iter_new(t))) { + return ZIX_STATUS_NO_MEM; + } + ti = *next; + ti->level = 0; + } + + while (true) { + /* To remove in a single walk down, the tree is adjusted along the way + so that the current node always has at least one more value than the + minimum required in general. Thus, there is always room to remove + without adjusting on the way back up. */ + assert(n == t->root || !zix_btree_node_is_minimal(n)); + + bool equal = false; + const uint16_t i = zix_btree_node_find(t, n, e, &equal); + zix_btree_iter_set_frame(ti, n, i); + if (n->is_leaf) { + if (equal) { + // Found in leaf node + *out = zix_btree_aerase(n->vals, --n->n_vals, i); + if (ti && i == n->n_vals) { + if (i == 0) { + ti->stack[ti->level = 0].node = NULL; + } else { + --ti->stack[ti->level].index; + zix_btree_iter_increment(ti); + } + } + --t->size; + return ZIX_STATUS_SUCCESS; + } else { + // Not found in leaf node, or tree + if (ti && !user_iter) { + zix_btree_iter_free(ti); + *next = NULL; + } + return ZIX_STATUS_NOT_FOUND; + } + } else if (equal) { + // Found in internal node + if (!zix_btree_node_is_minimal(n->children[i])) { + // Left child can remove without merge + *out = n->vals[i]; + n->vals[i] = zix_btree_remove_max(t, n->children[i]); + --t->size; + return ZIX_STATUS_SUCCESS; + } else if (!zix_btree_node_is_minimal(n->children[i + 1])) { + // Right child can remove without merge + *out = n->vals[i]; + n->vals[i] = zix_btree_remove_min(t, n->children[i + 1]); + --t->size; + return ZIX_STATUS_SUCCESS; + } else { + // Both preceding and succeeding child are minimal + n = zix_btree_merge(t, n, i); + } + } else { + // Not found in internal node, key is in/under children[i] + if (zix_btree_node_is_minimal(n->children[i])) { + if (i > 0 && !zix_btree_node_is_minimal(n->children[i - 1])) { + // Steal a key from child's left sibling + n = zix_btree_rotate_right(n, i); + } else if (i < n->n_vals && + !zix_btree_node_is_minimal(n->children[i + 1])) { + // Steal a key from child's right sibling + n = zix_btree_rotate_left(n, i); + } else { + // Both child's siblings are minimal, merge them + if (i < n->n_vals) { + n = zix_btree_merge(t, n, i); + } else { + n = zix_btree_merge(t, n, i - 1); + if (ti) { + --ti->stack[ti->level].index; + } + } + } + } else { + n = n->children[i]; + } + } + if (ti) { + ++ti->level; + } + } + + assert(false); // Not reached + return ZIX_STATUS_ERROR; +} + +ZIX_API ZixStatus +zix_btree_find(const ZixBTree* const t, + const void* const e, + ZixBTreeIter** const ti) +{ + ZixBTreeNode* n = t->root; + if (!(*ti = zix_btree_iter_new(t))) { + return ZIX_STATUS_NO_MEM; + } + + while (n) { + bool equal = false; + const uint16_t i = zix_btree_node_find(t, n, e, &equal); + + zix_btree_iter_set_frame(*ti, n, i); + + if (equal) { + return ZIX_STATUS_SUCCESS; + } else if (n->is_leaf) { + break; + } else { + ++(*ti)->level; + n = n->children[i]; + } + } + + zix_btree_iter_free(*ti); + *ti = NULL; + return ZIX_STATUS_NOT_FOUND; +} + +ZIX_API ZixStatus +zix_btree_lower_bound(const ZixBTree* const t, + const void* const e, + ZixBTreeIter** const ti) +{ + if (!t) { + *ti = NULL; + return ZIX_STATUS_BAD_ARG; + } + + ZixBTreeNode* n = t->root; + bool found = false; + unsigned found_level = 0; + if (!(*ti = zix_btree_iter_new(t))) { + return ZIX_STATUS_NO_MEM; + } + + while (n) { + bool equal = false; + const uint16_t i = zix_btree_node_find(t, n, e, &equal); + + zix_btree_iter_set_frame(*ti, n, i); + + if (equal) { + found_level = (*ti)->level; + found = true; + } + + if (n->is_leaf) { + break; + } else { + ++(*ti)->level; + n = n->children[i]; + assert(n); + } + } + + const ZixBTreeIterFrame* const frame = &(*ti)->stack[(*ti)->level]; + if (frame->index == frame->node->n_vals) { + if (found) { + // Found on a previous level but went too far + (*ti)->level = found_level; + } else { + // Reached end (key is greater than everything in tree) + (*ti)->stack[0].node = NULL; + } + } + + return ZIX_STATUS_SUCCESS; +} + +ZIX_API void* +zix_btree_get(const ZixBTreeIter* const ti) +{ + const ZixBTreeIterFrame* const frame = &ti->stack[ti->level]; + assert(frame->index < frame->node->n_vals); + return frame->node->vals[frame->index]; +} + +ZIX_API ZixBTreeIter* +zix_btree_begin(const ZixBTree* const t) +{ + ZixBTreeIter* const i = zix_btree_iter_new(t); + if (!i) { + return NULL; + } else if (t->size == 0) { + i->stack[0].node = NULL; + } else { + ZixBTreeNode* n = t->root; + i->stack[0].node = n; + i->stack[0].index = 0; + while (!n->is_leaf) { + n = n->children[0]; + ++i->level; + i->stack[i->level].node = n; + i->stack[i->level].index = 0; + } + } + return i; +} + +ZIX_API bool +zix_btree_iter_is_end(const ZixBTreeIter* const i) +{ + return !i || i->stack[0].node == NULL; +} + +ZIX_API void +zix_btree_iter_increment(ZixBTreeIter* const i) +{ + ZixBTreeIterFrame* f = &i->stack[i->level]; + if (f->node->is_leaf) { + // Leaf, move right + assert(f->index < f->node->n_vals); + if (++f->index == f->node->n_vals) { + // Reached end of leaf, move up + f = &i->stack[i->level]; + while (i->level > 0 && f->index == f->node->n_vals) { + f = &i->stack[--i->level]; + assert(f->index <= f->node->n_vals); + } + + if (f->index == f->node->n_vals) { + // Reached end of tree + assert(i->level == 0); + f->node = NULL; + f->index = 0; + } + } + } else { + // Internal node, move down to next child + assert(f->index < f->node->n_vals); + ZixBTreeNode* child = f->node->children[++f->index]; + + f = &i->stack[++i->level]; + f->node = child; + f->index = 0; + + // Move down and left until we hit a leaf + while (!f->node->is_leaf) { + child = f->node->children[0]; + f = &i->stack[++i->level]; + f->node = child; + f->index = 0; + } + } +} + +ZIX_API void +zix_btree_iter_free(ZixBTreeIter* const i) +{ + free(i); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ext/sord/src/zix/btree.h Thu Jun 15 09:52:01 2017 +0100 @@ -0,0 +1,155 @@ +/* + Copyright 2011-2016 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef ZIX_BTREE_H +#define ZIX_BTREE_H + +#include <stddef.h> + +#include "common.h" + +#ifdef __cplusplus +extern "C" { +#else +# include <stdbool.h> +#endif + +/** + @addtogroup zix + @{ + @name BTree + @{ +*/ + +/** + A B-Tree. +*/ +typedef struct ZixBTreeImpl ZixBTree; + +/** + A B-Tree node (opaque). +*/ +typedef struct ZixBTreeNodeImpl ZixBTreeNode; + +/** + An iterator over a B-Tree. + + Note that modifying the trees invalidates all iterators, so all iterators + are const iterators. +*/ +typedef struct ZixBTreeIterImpl ZixBTreeIter; + +/** + Create a new (empty) B-Tree. +*/ +ZIX_API ZixBTree* +zix_btree_new(ZixComparator cmp, + void* cmp_data, + ZixDestroyFunc destroy); + +/** + Free `t`. +*/ +ZIX_API void +zix_btree_free(ZixBTree* t); + +/** + Return the number of elements in `t`. +*/ +ZIX_API size_t +zix_btree_size(const ZixBTree* t); + +/** + Insert the element `e` into `t`. +*/ +ZIX_API ZixStatus +zix_btree_insert(ZixBTree* t, void* e); + +/** + Remove the value `e` from `t`. + + @param t Tree to remove from. + + @param e Value to remove. + + @param out Set to point to the removed pointer (which may not equal `e`). + + @param next If non-NULL, pointed to the value following `e`. If *next is + also non-NULL, the iterator is reused, otherwise a new one is allocated. To + reuse an iterator, no items may have been added since its creation. +*/ +ZIX_API ZixStatus +zix_btree_remove(ZixBTree* t, const void* e, void** out, ZixBTreeIter** next); + +/** + Set `ti` to an element equal to `e` in `t`. + If no such item exists, `ti` is set to NULL. +*/ +ZIX_API ZixStatus +zix_btree_find(const ZixBTree* t, const void* e, ZixBTreeIter** ti); + +/** + Set `ti` to the smallest element in `t` that is not less than `e`. + + Wildcards are supported, so if the search key `e` compares equal to many + values in the tree, `ti` will be set to the least such element. The search + key `e` is always passed as the second argument to the comparator. +*/ +ZIX_API ZixStatus +zix_btree_lower_bound(const ZixBTree* t, const void* e, ZixBTreeIter** ti); + +/** + Return the data associated with the given tree item. +*/ +ZIX_API void* +zix_btree_get(const ZixBTreeIter* ti); + +/** + Return an iterator to the first (smallest) element in `t`. + + The returned iterator must be freed with zix_btree_iter_free(). +*/ +ZIX_API ZixBTreeIter* +zix_btree_begin(const ZixBTree* t); + +/** + Return true iff `i` is an iterator to the end of its tree. +*/ +ZIX_API bool +zix_btree_iter_is_end(const ZixBTreeIter* i); + +/** + Increment `i` to point to the next element in the tree. +*/ +ZIX_API void +zix_btree_iter_increment(ZixBTreeIter* i); + +/** + Free `i`. +*/ +ZIX_API void +zix_btree_iter_free(ZixBTreeIter* i); + +/** + @} + @} +*/ + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* ZIX_BTREE_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ext/sord/src/zix/common.h Thu Jun 15 09:52:01 2017 +0100 @@ -0,0 +1,88 @@ +/* + Copyright 2012 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef ZIX_COMMON_H +#define ZIX_COMMON_H + +/** + @addtogroup zix + @{ +*/ + +/** @cond */ +#ifdef ZIX_SHARED +# ifdef _WIN32 +# define ZIX_LIB_IMPORT __declspec(dllimport) +# define ZIX_LIB_EXPORT __declspec(dllexport) +# else +# define ZIX_LIB_IMPORT __attribute__((visibility("default"))) +# define ZIX_LIB_EXPORT __attribute__((visibility("default"))) +# endif +# ifdef ZIX_INTERNAL +# define ZIX_API ZIX_LIB_EXPORT +# else +# define ZIX_API ZIX_LIB_IMPORT +# endif +# define ZIX_PRIVATE static +#elif defined(ZIX_INLINE) +# define ZIX_API static inline +# define ZIX_PRIVATE static inline +#else +# define ZIX_API +# define ZIX_PRIVATE static +#endif +/** @endcond */ + +#ifdef __cplusplus +extern "C" { +#else +# include <stdbool.h> +#endif + +typedef enum { + ZIX_STATUS_SUCCESS, + ZIX_STATUS_ERROR, + ZIX_STATUS_NO_MEM, + ZIX_STATUS_NOT_FOUND, + ZIX_STATUS_EXISTS, + ZIX_STATUS_BAD_ARG, + ZIX_STATUS_BAD_PERMS, +} ZixStatus; + +/** + Function for comparing two elements. +*/ +typedef int (*ZixComparator)(const void* a, const void* b, void* user_data); + +/** + Function for testing equality of two elements. +*/ +typedef bool (*ZixEqualFunc)(const void* a, const void* b); + +/** + Function to destroy an element. +*/ +typedef void (*ZixDestroyFunc)(void* ptr); + +/** + @} +*/ + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* ZIX_COMMON_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ext/sord/src/zix/digest.c Thu Jun 15 09:52:01 2017 +0100 @@ -0,0 +1,57 @@ +/* + Copyright 2012-2014 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "digest.h" + +#ifdef __SSE4_2__ +# include <smmintrin.h> +#endif + +ZIX_API uint32_t +zix_digest_start(void) +{ +#ifdef __SSE4_2__ + return 1; // CRC32 initial value +#else + return 5381; // DJB hash initial value +#endif +} + +ZIX_API uint32_t +zix_digest_add(uint32_t hash, const void* const buf, const size_t len) +{ + const uint8_t* str = (const uint8_t*)buf; +#ifdef __SSE4_2__ + // SSE 4.2 CRC32 + for (size_t i = 0; i < (len / sizeof(uint32_t)); ++i) { + hash = _mm_crc32_u32(hash, *(const uint32_t*)str); + str += sizeof(uint32_t); + } + if (len & sizeof(uint16_t)) { + hash = _mm_crc32_u16(hash, *(const uint16_t*)str); + str += sizeof(uint16_t); + } + if (len & sizeof(uint8_t)) { + hash = _mm_crc32_u8(hash, *(const uint8_t*)str); + } +#else + // Classic DJB hash + for (size_t i = 0; i < len; ++i) { + hash = (hash << 5) + hash + str[i]; + } +#endif + return hash; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ext/sord/src/zix/digest.h Thu Jun 15 09:52:01 2017 +0100 @@ -0,0 +1,39 @@ +/* + Copyright 2012 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef ZIX_DIGEST_H +#define ZIX_DIGEST_H + +#include <stddef.h> +#include <stdint.h> + +#include "common.h" + +#ifdef __cplusplus +extern "C" { +#endif + +ZIX_API uint32_t +zix_digest_start(void); + +ZIX_API uint32_t +zix_digest_add(uint32_t hash, const void* buf, const size_t len); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* ZIX_DIGEST_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ext/sord/src/zix/hash.c Thu Jun 15 09:52:01 2017 +0100 @@ -0,0 +1,232 @@ +/* + Copyright 2011-2014 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include <assert.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +#include "hash.h" + +/** + Primes, each slightly less than twice its predecessor, and as far away + from powers of two as possible. +*/ +static const unsigned sizes[] = { + 53, 97, 193, 389, 769, 1543, 3079, 6151, 12289, 24593, 49157, 98317, + 196613, 393241, 786433, 1572869, 3145739, 6291469, 12582917, 25165843, + 50331653, 100663319, 201326611, 402653189, 805306457, 1610612741, 0 +}; + +typedef struct ZixHashEntry { + struct ZixHashEntry* next; ///< Next entry in bucket + uint32_t hash; ///< Non-modulo hash value + // Value follows here (access with zix_hash_value) +} ZixHashEntry; + +struct ZixHashImpl { + ZixHashFunc hash_func; + ZixEqualFunc equal_func; + ZixHashEntry** buckets; + const unsigned* n_buckets; + size_t value_size; + unsigned count; +}; + +static inline void* +zix_hash_value(ZixHashEntry* entry) +{ + return entry + 1; +} + +ZIX_API ZixHash* +zix_hash_new(ZixHashFunc hash_func, + ZixEqualFunc equal_func, + size_t value_size) +{ + ZixHash* hash = (ZixHash*)malloc(sizeof(ZixHash)); + if (hash) { + hash->hash_func = hash_func; + hash->equal_func = equal_func; + hash->n_buckets = &sizes[0]; + hash->value_size = value_size; + hash->count = 0; + if (!(hash->buckets = (ZixHashEntry**)calloc(*hash->n_buckets, + sizeof(ZixHashEntry*)))) { + free(hash); + return NULL; + } + } + return hash; +} + +ZIX_API void +zix_hash_free(ZixHash* hash) +{ + for (unsigned b = 0; b < *hash->n_buckets; ++b) { + ZixHashEntry* bucket = hash->buckets[b]; + for (ZixHashEntry* e = bucket; e;) { + ZixHashEntry* next = e->next; + free(e); + e = next; + } + } + + free(hash->buckets); + free(hash); +} + +ZIX_API size_t +zix_hash_size(const ZixHash* hash) +{ + return hash->count; +} + +static inline void +insert_entry(ZixHashEntry** bucket, ZixHashEntry* entry) +{ + entry->next = *bucket; + *bucket = entry; +} + +static inline ZixStatus +rehash(ZixHash* hash, unsigned new_n_buckets) +{ + ZixHashEntry** new_buckets = (ZixHashEntry**)calloc( + new_n_buckets, sizeof(ZixHashEntry*)); + if (!new_buckets) { + return ZIX_STATUS_NO_MEM; + } + + const unsigned old_n_buckets = *hash->n_buckets; + for (unsigned b = 0; b < old_n_buckets; ++b) { + for (ZixHashEntry* e = hash->buckets[b]; e;) { + ZixHashEntry* const next = e->next; + const unsigned h = e->hash % new_n_buckets; + insert_entry(&new_buckets[h], e); + e = next; + } + } + + free(hash->buckets); + hash->buckets = new_buckets; + + return ZIX_STATUS_SUCCESS; +} + +static inline ZixHashEntry* +find_entry(const ZixHash* hash, + const void* key, + const unsigned h, + const unsigned h_nomod) +{ + for (ZixHashEntry* e = hash->buckets[h]; e; e = e->next) { + if (e->hash == h_nomod && hash->equal_func(zix_hash_value(e), key)) { + return e; + } + } + return NULL; +} + +ZIX_API const void* +zix_hash_find(const ZixHash* hash, const void* value) +{ + const unsigned h_nomod = hash->hash_func(value); + const unsigned h = h_nomod % *hash->n_buckets; + ZixHashEntry* const entry = find_entry(hash, value, h, h_nomod); + return entry ? zix_hash_value(entry) : 0; +} + +ZIX_API ZixStatus +zix_hash_insert(ZixHash* hash, const void* value, const void** inserted) +{ + unsigned h_nomod = hash->hash_func(value); + unsigned h = h_nomod % *hash->n_buckets; + + ZixHashEntry* elem = find_entry(hash, value, h, h_nomod); + if (elem) { + assert(elem->hash == h_nomod); + if (inserted) { + *inserted = zix_hash_value(elem); + } + return ZIX_STATUS_EXISTS; + } + + elem = (ZixHashEntry*)malloc(sizeof(ZixHashEntry) + hash->value_size); + if (!elem) { + return ZIX_STATUS_NO_MEM; + } + elem->next = NULL; + elem->hash = h_nomod; + memcpy(elem + 1, value, hash->value_size); + + const unsigned next_n_buckets = *(hash->n_buckets + 1); + if (next_n_buckets != 0 && (hash->count + 1) >= next_n_buckets) { + if (!rehash(hash, next_n_buckets)) { + h = h_nomod % *(++hash->n_buckets); + } + } + + insert_entry(&hash->buckets[h], elem); + ++hash->count; + if (inserted) { + *inserted = zix_hash_value(elem); + } + return ZIX_STATUS_SUCCESS; +} + +ZIX_API ZixStatus +zix_hash_remove(ZixHash* hash, const void* value) +{ + const unsigned h_nomod = hash->hash_func(value); + const unsigned h = h_nomod % *hash->n_buckets; + + ZixHashEntry** next_ptr = &hash->buckets[h]; + for (ZixHashEntry* e = hash->buckets[h]; e; e = e->next) { + if (h_nomod == e->hash && + hash->equal_func(zix_hash_value(e), value)) { + *next_ptr = e->next; + free(e); + return ZIX_STATUS_SUCCESS; + } + next_ptr = &e->next; + } + + if (hash->n_buckets != sizes) { + const unsigned prev_n_buckets = *(hash->n_buckets - 1); + if (hash->count - 1 <= prev_n_buckets) { + if (!rehash(hash, prev_n_buckets)) { + --hash->n_buckets; + } + } + } + + --hash->count; + return ZIX_STATUS_NOT_FOUND; +} + +ZIX_API void +zix_hash_foreach(ZixHash* hash, + ZixHashVisitFunc f, + void* user_data) +{ + for (unsigned b = 0; b < *hash->n_buckets; ++b) { + ZixHashEntry* bucket = hash->buckets[b]; + for (ZixHashEntry* e = bucket; e; e = e->next) { + f(zix_hash_value(e), user_data); + } + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ext/sord/src/zix/hash.h Thu Jun 15 09:52:01 2017 +0100 @@ -0,0 +1,140 @@ +/* + Copyright 2011-2015 David Robillard <http://drobilla.net> + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef ZIX_HASH_H +#define ZIX_HASH_H + +#include <stddef.h> +#include <stdint.h> + +#include "common.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @addtogroup zix + @{ + @name Hash + @{ +*/ + +typedef struct ZixHashImpl ZixHash; + +/** + Function for computing the hash of an element. +*/ +typedef uint32_t (*ZixHashFunc)(const void* value); + +/** + Function to visit a hash element. +*/ +typedef void (*ZixHashVisitFunc)(void* value, + void* user_data); + +/** + Create a new hash table. + + To minimize space overhead, unlike many hash tables this stores a single + value, not a key and a value. Any size of value can be stored, but all the + values in the hash table must be the same size, and the values must be safe + to copy with memcpy. To get key:value behaviour, simply insert a struct + with a key and value into the hash. + + @param hash_func The hashing function. + @param equal_func A function to test value equality. + @param value_size The size of the values to be stored. +*/ +ZIX_API ZixHash* +zix_hash_new(ZixHashFunc hash_func, + ZixEqualFunc equal_func, + size_t value_size); + +/** + Free `hash`. +*/ +ZIX_API void +zix_hash_free(ZixHash* hash); + +/** + Return the number of elements in `hash`. +*/ +ZIX_API size_t +zix_hash_size(const ZixHash* hash); + +/** + Insert an item into `hash`. + + If no matching value is found, ZIX_STATUS_SUCCESS will be returned, and @p + inserted will be pointed to the copy of `value` made in the new hash node. + + If a matching value already exists, ZIX_STATUS_EXISTS will be returned, and + `inserted` will be pointed to the existing value. + + @param hash The hash table. + @param value The value to be inserted. + @param inserted The copy of `value` in the hash table. + @return ZIX_STATUS_SUCCESS, ZIX_STATUS_EXISTS, or ZIX_STATUS_NO_MEM. +*/ +ZIX_API ZixStatus +zix_hash_insert(ZixHash* hash, + const void* value, + const void** inserted); + +/** + Remove an item from `hash`. + + @param hash The hash table. + @param value The value to remove. + @return ZIX_STATUS_SUCCES or ZIX_STATUS_NOT_FOUND. +*/ +ZIX_API ZixStatus +zix_hash_remove(ZixHash* hash, + const void* value); + +/** + Search for an item in `hash`. + + @param hash The hash table. + @param value The value to search for. +*/ +ZIX_API const void* +zix_hash_find(const ZixHash* hash, + const void* value); + +/** + Call `f` on each value in `hash`. + + @param hash The hash table. + @param f The function to call on each value. + @param user_data The user_data parameter passed to `f`. +*/ +ZIX_API void +zix_hash_foreach(ZixHash* hash, + ZixHashVisitFunc f, + void* user_data); + +/** + @} + @} +*/ + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* ZIX_HASH_H */
--- a/vamp-capnp/VampnProto.h Mon May 22 08:57:02 2017 +0100 +++ b/vamp-capnp/VampnProto.h Thu Jun 15 09:52:01 2017 +0100 @@ -4,7 +4,7 @@ Piper C++ Centre for Digital Music, Queen Mary, University of London. - Copyright 2015-2016 QMUL. + Copyright 2015-2017 QMUL. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation @@ -131,6 +131,12 @@ } static void + buildStaticOutputDescriptor(piper::StaticOutputDescriptor::Builder &b, + const StaticOutputDescriptor &sd) { + b.setTypeURI(sd.typeURI); + } + + static void buildConfiguredOutputDescriptor(piper::ConfiguredOutputDescriptor::Builder &b, const Vamp::Plugin::OutputDescriptor &od) { @@ -163,13 +169,23 @@ static void buildOutputDescriptor(piper::OutputDescriptor::Builder &b, - const Vamp::Plugin::OutputDescriptor &od) { + const Vamp::Plugin::OutputDescriptor &od, + const StaticOutputDescriptor &sd) { auto basic = b.initBasic(); buildBasicDescriptor(basic, od); auto configured = b.initConfigured(); buildConfiguredOutputDescriptor(configured, od); + + auto statc = b.initStatic(); + buildStaticOutputDescriptor(statc, sd); + } + + static void + readStaticOutputDescriptor(StaticOutputDescriptor &sd, + const piper::StaticOutputDescriptor::Reader &r) { + sd.typeURI = r.getTypeURI(); } static void @@ -205,9 +221,11 @@ static void readOutputDescriptor(Vamp::Plugin::OutputDescriptor &od, + StaticOutputDescriptor &sd, const piper::OutputDescriptor::Reader &r) { readBasicDescriptor(od, r.getBasic()); + readStaticOutputDescriptor(sd, r.getStatic()); readConfiguredOutputDescriptor(od, r.getConfigured()); } @@ -414,6 +432,17 @@ auto od = olist[i]; buildBasicDescriptor(od, vouts[i]); } + + const auto &vstatic = d.staticOutputInfo; + auto slist = b.initStaticOutputInfo(unsigned(vstatic.size())); + int i = 0; + for (const auto &vi: vstatic) { + auto spair = slist[i]; + spair.setOutput(vi.first); + auto sdata = spair.initStatic(); + sdata.setTypeURI(vi.second.typeURI); + ++i; + } } static void @@ -460,6 +489,14 @@ readBasicDescriptor(b, o); d.basicOutputInfo.push_back(b); } + + d.staticOutputInfo.clear(); + auto sp = r.getStaticOutputInfo(); + for (auto s: sp) { + std::string id = s.getOutput(); + std::string typeURI = s.getStatic().getTypeURI(); + d.staticOutputInfo[id] = { typeURI }; + } } static void @@ -640,10 +677,19 @@ b.setHandle(pmapper.pluginToHandle(cr.plugin)); auto olist = b.initOutputs(unsigned(cr.outputs.size())); + for (int i = 0; i < int(cr.outputs.size()); ++i) { + + auto id = cr.outputs[i].identifier; + StaticOutputDescriptor sd; + if (cr.staticOutputInfo.find(id) != cr.staticOutputInfo.end()) { + sd = cr.staticOutputInfo.at(id); + } + auto od = olist[i]; - buildOutputDescriptor(od, cr.outputs[i]); + buildOutputDescriptor(od, cr.outputs[i], sd); } + auto framing = b.initFraming(); framing.setStepSize(cr.framing.stepSize); framing.setBlockSize(cr.framing.blockSize); @@ -656,11 +702,16 @@ cr.plugin = pmapper.handleToPlugin(r.getHandle()); cr.outputs.clear(); + cr.staticOutputInfo.clear(); auto oo = r.getOutputs(); for (const auto &o: oo) { Vamp::Plugin::OutputDescriptor desc; - readOutputDescriptor(desc, o); + StaticOutputDescriptor sd; + readOutputDescriptor(desc, sd, o); cr.outputs.push_back(desc); + if (sd.typeURI != "") { + cr.staticOutputInfo[desc.identifier] = { sd.typeURI }; + } } cr.framing.stepSize = r.getFraming().getStepSize(); cr.framing.blockSize = r.getFraming().getBlockSize();
--- a/vamp-capnp/piper.capnp.c++ Mon May 22 08:57:02 2017 +0100 +++ b/vamp-capnp/piper.capnp.c++ Thu Jun 15 09:52:01 2017 +0100 @@ -287,6 +287,51 @@ }; #endif // !CAPNP_LITE CAPNP_DEFINE_ENUM(SampleType_eca23e4a04bdfeb2, eca23e4a04bdfeb2); +static const ::capnp::_::AlignedData<34> b_a88eab3ec4264cba = { + { 0, 0, 0, 0, 5, 0, 6, 0, + 186, 76, 38, 196, 62, 171, 142, 168, + 12, 0, 0, 0, 1, 0, 0, 0, + 6, 146, 153, 76, 196, 198, 177, 196, + 1, 0, 7, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 21, 0, 0, 0, 26, 1, 0, 0, + 37, 0, 0, 0, 7, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 33, 0, 0, 0, 63, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 112, 105, 112, 101, 114, 46, 99, 97, + 112, 110, 112, 58, 83, 116, 97, 116, + 105, 99, 79, 117, 116, 112, 117, 116, + 68, 101, 115, 99, 114, 105, 112, 116, + 111, 114, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, 1, 0, + 4, 0, 0, 0, 3, 0, 4, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 13, 0, 0, 0, 66, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 8, 0, 0, 0, 3, 0, 1, 0, + 20, 0, 0, 0, 2, 0, 1, 0, + 116, 121, 112, 101, 85, 82, 73, 0, + 12, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 12, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, } +}; +::capnp::word const* const bp_a88eab3ec4264cba = b_a88eab3ec4264cba.words; +#if !CAPNP_LITE +static const uint16_t m_a88eab3ec4264cba[] = {0}; +static const uint16_t i_a88eab3ec4264cba[] = {0}; +const ::capnp::_::RawSchema s_a88eab3ec4264cba = { + 0xa88eab3ec4264cba, b_a88eab3ec4264cba.words, 34, nullptr, m_a88eab3ec4264cba, + 0, 1, i_a88eab3ec4264cba, nullptr, nullptr, { &s_a88eab3ec4264cba, nullptr, nullptr, 0, 0, nullptr } +}; +#endif // !CAPNP_LITE static const ::capnp::_::AlignedData<215> b_b2d0c825aac8249c = { { 0, 0, 0, 0, 5, 0, 6, 0, 156, 36, 200, 170, 37, 200, 208, 178, @@ -516,17 +561,17 @@ 1, 12, i_b2d0c825aac8249c, nullptr, nullptr, { &s_b2d0c825aac8249c, nullptr, nullptr, 0, 0, nullptr } }; #endif // !CAPNP_LITE -static const ::capnp::_::AlignedData<49> b_902c6065e1be824a = { +static const ::capnp::_::AlignedData<64> b_902c6065e1be824a = { { 0, 0, 0, 0, 5, 0, 6, 0, 74, 130, 190, 225, 101, 96, 44, 144, 12, 0, 0, 0, 1, 0, 0, 0, 6, 146, 153, 76, 196, 198, 177, 196, - 2, 0, 7, 0, 0, 0, 0, 0, + 3, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 0, 0, 0, 234, 0, 0, 0, 33, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 29, 0, 0, 0, 119, 0, 0, 0, + 29, 0, 0, 0, 175, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 112, 105, 112, 101, 114, 46, 99, 97, @@ -534,21 +579,28 @@ 117, 116, 68, 101, 115, 99, 114, 105, 112, 116, 111, 114, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, - 8, 0, 0, 0, 3, 0, 4, 0, + 12, 0, 0, 0, 3, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 41, 0, 0, 0, 50, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 36, 0, 0, 0, 3, 0, 1, 0, - 48, 0, 0, 0, 2, 0, 1, 0, + 69, 0, 0, 0, 50, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 64, 0, 0, 0, 3, 0, 1, 0, + 76, 0, 0, 0, 2, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 45, 0, 0, 0, 90, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 44, 0, 0, 0, 3, 0, 1, 0, - 56, 0, 0, 0, 2, 0, 1, 0, + 73, 0, 0, 0, 90, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 72, 0, 0, 0, 3, 0, 1, 0, + 84, 0, 0, 0, 2, 0, 1, 0, + 2, 0, 0, 0, 2, 0, 0, 0, + 0, 0, 1, 0, 2, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 81, 0, 0, 0, 58, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 76, 0, 0, 0, 3, 0, 1, 0, + 88, 0, 0, 0, 2, 0, 1, 0, 98, 97, 115, 105, 99, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 63, 37, 44, 34, 99, 202, 145, 180, @@ -565,19 +617,28 @@ 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 115, 116, 97, 116, 105, 99, 0, 0, + 16, 0, 0, 0, 0, 0, 0, 0, + 186, 76, 38, 196, 62, 171, 142, 168, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 16, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } }; ::capnp::word const* const bp_902c6065e1be824a = b_902c6065e1be824a.words; #if !CAPNP_LITE static const ::capnp::_::RawSchema* const d_902c6065e1be824a[] = { + &s_a88eab3ec4264cba, &s_b2d0c825aac8249c, &s_b491ca63222c253f, }; -static const uint16_t m_902c6065e1be824a[] = {0, 1}; -static const uint16_t i_902c6065e1be824a[] = {0, 1}; +static const uint16_t m_902c6065e1be824a[] = {0, 1, 2}; +static const uint16_t i_902c6065e1be824a[] = {0, 1, 2}; const ::capnp::_::RawSchema s_902c6065e1be824a = { - 0x902c6065e1be824a, b_902c6065e1be824a.words, 49, d_902c6065e1be824a, m_902c6065e1be824a, - 2, 2, i_902c6065e1be824a, nullptr, nullptr, { &s_902c6065e1be824a, nullptr, nullptr, 0, 0, nullptr } + 0x902c6065e1be824a, b_902c6065e1be824a.words, 64, d_902c6065e1be824a, m_902c6065e1be824a, + 3, 3, i_902c6065e1be824a, nullptr, nullptr, { &s_902c6065e1be824a, nullptr, nullptr, 0, 0, nullptr } }; #endif // !CAPNP_LITE static const ::capnp::_::AlignedData<27> b_f50fb3b9c1bf75f4 = { @@ -618,109 +679,119 @@ }; #endif // !CAPNP_LITE CAPNP_DEFINE_ENUM(InputDomain_f50fb3b9c1bf75f4, f50fb3b9c1bf75f4); -static const ::capnp::_::AlignedData<221> b_b83ac85463e6caa1 = { +static const ::capnp::_::AlignedData<245> b_b83ac85463e6caa1 = { { 0, 0, 0, 0, 5, 0, 6, 0, 161, 202, 230, 99, 84, 200, 58, 184, 12, 0, 0, 0, 1, 0, 2, 0, 6, 146, 153, 76, 196, 198, 177, 196, - 8, 0, 7, 0, 0, 0, 0, 0, + 9, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 0, 0, 0, 2, 1, 0, 0, - 33, 0, 0, 0, 7, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 29, 0, 0, 0, 167, 2, 0, 0, + 33, 0, 0, 0, 23, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 41, 0, 0, 0, 223, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 112, 105, 112, 101, 114, 46, 99, 97, 112, 110, 112, 58, 69, 120, 116, 114, 97, 99, 116, 111, 114, 83, 116, 97, 116, 105, 99, 68, 97, 116, 97, 0, - 0, 0, 0, 0, 1, 0, 1, 0, - 48, 0, 0, 0, 3, 0, 4, 0, + 4, 0, 0, 0, 1, 0, 1, 0, + 28, 110, 231, 31, 55, 160, 201, 196, + 1, 0, 0, 0, 58, 0, 0, 0, + 83, 79, 80, 97, 105, 114, 0, 0, + 52, 0, 0, 0, 3, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 65, 1, 0, 0, 34, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 60, 1, 0, 0, 3, 0, 1, 0, - 72, 1, 0, 0, 2, 0, 1, 0, + 93, 1, 0, 0, 34, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 88, 1, 0, 0, 3, 0, 1, 0, + 100, 1, 0, 0, 2, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 69, 1, 0, 0, 50, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 64, 1, 0, 0, 3, 0, 1, 0, - 76, 1, 0, 0, 2, 0, 1, 0, + 97, 1, 0, 0, 50, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 92, 1, 0, 0, 3, 0, 1, 0, + 104, 1, 0, 0, 2, 0, 1, 0, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 73, 1, 0, 0, 50, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 68, 1, 0, 0, 3, 0, 1, 0, - 80, 1, 0, 0, 2, 0, 1, 0, + 101, 1, 0, 0, 50, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 96, 1, 0, 0, 3, 0, 1, 0, + 108, 1, 0, 0, 2, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 0, 0, 0, 1, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 77, 1, 0, 0, 58, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 72, 1, 0, 0, 3, 0, 1, 0, - 84, 1, 0, 0, 2, 0, 1, 0, + 105, 1, 0, 0, 58, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 100, 1, 0, 0, 3, 0, 1, 0, + 112, 1, 0, 0, 2, 0, 1, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 81, 1, 0, 0, 66, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 76, 1, 0, 0, 3, 0, 1, 0, - 88, 1, 0, 0, 2, 0, 1, 0, + 109, 1, 0, 0, 66, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 104, 1, 0, 0, 3, 0, 1, 0, + 116, 1, 0, 0, 2, 0, 1, 0, 5, 0, 0, 0, 4, 0, 0, 0, 0, 0, 1, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 85, 1, 0, 0, 74, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 84, 1, 0, 0, 3, 0, 1, 0, - 112, 1, 0, 0, 2, 0, 1, 0, + 113, 1, 0, 0, 74, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 112, 1, 0, 0, 3, 0, 1, 0, + 140, 1, 0, 0, 2, 0, 1, 0, 6, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 109, 1, 0, 0, 130, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 108, 1, 0, 0, 3, 0, 1, 0, - 120, 1, 0, 0, 2, 0, 1, 0, + 137, 1, 0, 0, 130, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 136, 1, 0, 0, 3, 0, 1, 0, + 148, 1, 0, 0, 2, 0, 1, 0, 7, 0, 0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 117, 1, 0, 0, 130, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 116, 1, 0, 0, 3, 0, 1, 0, - 128, 1, 0, 0, 2, 0, 1, 0, + 145, 1, 0, 0, 130, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 144, 1, 0, 0, 3, 0, 1, 0, + 156, 1, 0, 0, 2, 0, 1, 0, 8, 0, 0, 0, 5, 0, 0, 0, 0, 0, 1, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 125, 1, 0, 0, 90, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 124, 1, 0, 0, 3, 0, 1, 0, - 152, 1, 0, 0, 2, 0, 1, 0, + 153, 1, 0, 0, 90, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 152, 1, 0, 0, 3, 0, 1, 0, + 180, 1, 0, 0, 2, 0, 1, 0, 9, 0, 0, 0, 6, 0, 0, 0, 0, 0, 1, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 149, 1, 0, 0, 74, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 148, 1, 0, 0, 3, 0, 1, 0, - 176, 1, 0, 0, 2, 0, 1, 0, + 177, 1, 0, 0, 74, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 176, 1, 0, 0, 3, 0, 1, 0, + 204, 1, 0, 0, 2, 0, 1, 0, 10, 0, 0, 0, 6, 0, 0, 0, 0, 0, 1, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 173, 1, 0, 0, 98, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 172, 1, 0, 0, 3, 0, 1, 0, - 184, 1, 0, 0, 2, 0, 1, 0, + 201, 1, 0, 0, 98, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 200, 1, 0, 0, 3, 0, 1, 0, + 212, 1, 0, 0, 2, 0, 1, 0, 11, 0, 0, 0, 7, 0, 0, 0, 0, 0, 1, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 181, 1, 0, 0, 130, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 180, 1, 0, 0, 3, 0, 1, 0, - 208, 1, 0, 0, 2, 0, 1, 0, + 209, 1, 0, 0, 130, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 208, 1, 0, 0, 3, 0, 1, 0, + 236, 1, 0, 0, 2, 0, 1, 0, + 12, 0, 0, 0, 8, 0, 0, 0, + 0, 0, 1, 0, 12, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 233, 1, 0, 0, 138, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 236, 1, 0, 0, 3, 0, 1, 0, + 8, 2, 0, 0, 2, 0, 1, 0, 107, 101, 121, 0, 0, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -839,20 +910,98 @@ 0, 0, 0, 0, 0, 0, 0, 0, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 115, 116, 97, 116, 105, 99, 79, 117, + 116, 112, 117, 116, 73, 110, 102, 111, + 0, 0, 0, 0, 0, 0, 0, 0, + 14, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 3, 0, 1, 0, + 16, 0, 0, 0, 0, 0, 0, 0, + 28, 110, 231, 31, 55, 160, 201, 196, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 14, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } }; ::capnp::word const* const bp_b83ac85463e6caa1 = b_b83ac85463e6caa1.words; #if !CAPNP_LITE static const ::capnp::_::RawSchema* const d_b83ac85463e6caa1[] = { &s_b491ca63222c253f, + &s_c4c9a0371fe76e1c, &s_f50fb3b9c1bf75f4, &s_f8fda10bef70a97d, }; -static const uint16_t m_b83ac85463e6caa1[] = {1, 11, 5, 10, 0, 2, 7, 6, 8, 9, 3, 4}; -static const uint16_t i_b83ac85463e6caa1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; +static const uint16_t m_b83ac85463e6caa1[] = {1, 11, 5, 10, 0, 2, 7, 6, 8, 9, 3, 12, 4}; +static const uint16_t i_b83ac85463e6caa1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; const ::capnp::_::RawSchema s_b83ac85463e6caa1 = { - 0xb83ac85463e6caa1, b_b83ac85463e6caa1.words, 221, d_b83ac85463e6caa1, m_b83ac85463e6caa1, - 3, 12, i_b83ac85463e6caa1, nullptr, nullptr, { &s_b83ac85463e6caa1, nullptr, nullptr, 0, 0, nullptr } + 0xb83ac85463e6caa1, b_b83ac85463e6caa1.words, 245, d_b83ac85463e6caa1, m_b83ac85463e6caa1, + 4, 13, i_b83ac85463e6caa1, nullptr, nullptr, { &s_b83ac85463e6caa1, nullptr, nullptr, 0, 0, nullptr } +}; +#endif // !CAPNP_LITE +static const ::capnp::_::AlignedData<49> b_c4c9a0371fe76e1c = { + { 0, 0, 0, 0, 5, 0, 6, 0, + 28, 110, 231, 31, 55, 160, 201, 196, + 32, 0, 0, 0, 1, 0, 0, 0, + 161, 202, 230, 99, 84, 200, 58, 184, + 2, 0, 7, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 21, 0, 0, 0, 58, 1, 0, 0, + 37, 0, 0, 0, 7, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 33, 0, 0, 0, 119, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 112, 105, 112, 101, 114, 46, 99, 97, + 112, 110, 112, 58, 69, 120, 116, 114, + 97, 99, 116, 111, 114, 83, 116, 97, + 116, 105, 99, 68, 97, 116, 97, 46, + 83, 79, 80, 97, 105, 114, 0, 0, + 0, 0, 0, 0, 1, 0, 1, 0, + 8, 0, 0, 0, 3, 0, 4, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 41, 0, 0, 0, 58, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 36, 0, 0, 0, 3, 0, 1, 0, + 48, 0, 0, 0, 2, 0, 1, 0, + 1, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 1, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 45, 0, 0, 0, 58, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 40, 0, 0, 0, 3, 0, 1, 0, + 52, 0, 0, 0, 2, 0, 1, 0, + 111, 117, 116, 112, 117, 116, 0, 0, + 12, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 12, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 115, 116, 97, 116, 105, 99, 0, 0, + 16, 0, 0, 0, 0, 0, 0, 0, + 186, 76, 38, 196, 62, 171, 142, 168, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 16, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, } +}; +::capnp::word const* const bp_c4c9a0371fe76e1c = b_c4c9a0371fe76e1c.words; +#if !CAPNP_LITE +static const ::capnp::_::RawSchema* const d_c4c9a0371fe76e1c[] = { + &s_a88eab3ec4264cba, +}; +static const uint16_t m_c4c9a0371fe76e1c[] = {0, 1}; +static const uint16_t i_c4c9a0371fe76e1c[] = {0, 1}; +const ::capnp::_::RawSchema s_c4c9a0371fe76e1c = { + 0xc4c9a0371fe76e1c, b_c4c9a0371fe76e1c.words, 49, d_c4c9a0371fe76e1c, m_c4c9a0371fe76e1c, + 1, 2, i_c4c9a0371fe76e1c, nullptr, nullptr, { &s_c4c9a0371fe76e1c, nullptr, nullptr, 0, 0, nullptr } }; #endif // !CAPNP_LITE static const ::capnp::_::AlignedData<47> b_84d515888a427d07 = { @@ -2794,6 +2943,14 @@ constexpr ::capnp::_::RawSchema const* ParameterDescriptor::_capnpPrivate::schema; #endif // !CAPNP_LITE +// StaticOutputDescriptor +constexpr uint16_t StaticOutputDescriptor::_capnpPrivate::dataWordSize; +constexpr uint16_t StaticOutputDescriptor::_capnpPrivate::pointerCount; +#if !CAPNP_LITE +constexpr ::capnp::Kind StaticOutputDescriptor::_capnpPrivate::kind; +constexpr ::capnp::_::RawSchema const* StaticOutputDescriptor::_capnpPrivate::schema; +#endif // !CAPNP_LITE + // ConfiguredOutputDescriptor constexpr uint16_t ConfiguredOutputDescriptor::_capnpPrivate::dataWordSize; constexpr uint16_t ConfiguredOutputDescriptor::_capnpPrivate::pointerCount; @@ -2818,6 +2975,14 @@ constexpr ::capnp::_::RawSchema const* ExtractorStaticData::_capnpPrivate::schema; #endif // !CAPNP_LITE +// ExtractorStaticData::SOPair +constexpr uint16_t ExtractorStaticData::SOPair::_capnpPrivate::dataWordSize; +constexpr uint16_t ExtractorStaticData::SOPair::_capnpPrivate::pointerCount; +#if !CAPNP_LITE +constexpr ::capnp::Kind ExtractorStaticData::SOPair::_capnpPrivate::kind; +constexpr ::capnp::_::RawSchema const* ExtractorStaticData::SOPair::_capnpPrivate::schema; +#endif // !CAPNP_LITE + // RealTime constexpr uint16_t RealTime::_capnpPrivate::dataWordSize; constexpr uint16_t RealTime::_capnpPrivate::pointerCount;
--- a/vamp-capnp/piper.capnp.h Mon May 22 08:57:02 2017 +0100 +++ b/vamp-capnp/piper.capnp.h Thu Jun 15 09:52:01 2017 +0100 @@ -6,7 +6,7 @@ #include <capnp/generated-header-support.h> -#if CAPNP_VERSION != 6000 +#if CAPNP_VERSION != 7000 #error "Version mismatch between generated code and library headers. You must use the same version of the Cap'n Proto compiler and library." #endif @@ -23,6 +23,7 @@ VARIABLE_SAMPLE_RATE, }; CAPNP_DECLARE_ENUM(SampleType, eca23e4a04bdfeb2); +CAPNP_DECLARE_SCHEMA(a88eab3ec4264cba); CAPNP_DECLARE_SCHEMA(b2d0c825aac8249c); CAPNP_DECLARE_SCHEMA(902c6065e1be824a); CAPNP_DECLARE_SCHEMA(f50fb3b9c1bf75f4); @@ -32,6 +33,7 @@ }; CAPNP_DECLARE_ENUM(InputDomain, f50fb3b9c1bf75f4); CAPNP_DECLARE_SCHEMA(b83ac85463e6caa1); +CAPNP_DECLARE_SCHEMA(c4c9a0371fe76e1c); CAPNP_DECLARE_SCHEMA(84d515888a427d07); CAPNP_DECLARE_SCHEMA(c6f3f05f2bc614ba); CAPNP_DECLARE_SCHEMA(d6a172208c9a1760); @@ -104,6 +106,21 @@ typedef ::capnp::schemas::SampleType_eca23e4a04bdfeb2 SampleType; +struct StaticOutputDescriptor { + StaticOutputDescriptor() = delete; + + class Reader; + class Builder; + class Pipeline; + + struct _capnpPrivate { + CAPNP_DECLARE_STRUCT_HEADER(a88eab3ec4264cba, 0, 1) + #if !CAPNP_LITE + static constexpr ::capnp::_::RawBrandedSchema const* brand() { return &schema->defaultBrand; } + #endif // !CAPNP_LITE + }; +}; + struct ConfiguredOutputDescriptor { ConfiguredOutputDescriptor() = delete; @@ -127,7 +144,7 @@ class Pipeline; struct _capnpPrivate { - CAPNP_DECLARE_STRUCT_HEADER(902c6065e1be824a, 0, 2) + CAPNP_DECLARE_STRUCT_HEADER(902c6065e1be824a, 0, 3) #if !CAPNP_LITE static constexpr ::capnp::_::RawBrandedSchema const* brand() { return &schema->defaultBrand; } #endif // !CAPNP_LITE @@ -142,9 +159,25 @@ class Reader; class Builder; class Pipeline; + struct SOPair; struct _capnpPrivate { - CAPNP_DECLARE_STRUCT_HEADER(b83ac85463e6caa1, 2, 8) + CAPNP_DECLARE_STRUCT_HEADER(b83ac85463e6caa1, 2, 9) + #if !CAPNP_LITE + static constexpr ::capnp::_::RawBrandedSchema const* brand() { return &schema->defaultBrand; } + #endif // !CAPNP_LITE + }; +}; + +struct ExtractorStaticData::SOPair { + SOPair() = delete; + + class Reader; + class Builder; + class Pipeline; + + struct _capnpPrivate { + CAPNP_DECLARE_STRUCT_HEADER(c4c9a0371fe76e1c, 0, 2) #if !CAPNP_LITE static constexpr ::capnp::_::RawBrandedSchema const* brand() { return &schema->defaultBrand; } #endif // !CAPNP_LITE @@ -792,6 +825,87 @@ }; #endif // !CAPNP_LITE +class StaticOutputDescriptor::Reader { +public: + typedef StaticOutputDescriptor Reads; + + Reader() = default; + inline explicit Reader(::capnp::_::StructReader base): _reader(base) {} + + inline ::capnp::MessageSize totalSize() const { + return _reader.totalSize().asPublic(); + } + +#if !CAPNP_LITE + inline ::kj::StringTree toString() const { + return ::capnp::_::structString(_reader, *_capnpPrivate::brand()); + } +#endif // !CAPNP_LITE + + inline bool hasTypeURI() const; + inline ::capnp::Text::Reader getTypeURI() const; + +private: + ::capnp::_::StructReader _reader; + template <typename, ::capnp::Kind> + friend struct ::capnp::ToDynamic_; + template <typename, ::capnp::Kind> + friend struct ::capnp::_::PointerHelpers; + template <typename, ::capnp::Kind> + friend struct ::capnp::List; + friend class ::capnp::MessageBuilder; + friend class ::capnp::Orphanage; +}; + +class StaticOutputDescriptor::Builder { +public: + typedef StaticOutputDescriptor Builds; + + Builder() = delete; // Deleted to discourage incorrect usage. + // You can explicitly initialize to nullptr instead. + inline Builder(decltype(nullptr)) {} + inline explicit Builder(::capnp::_::StructBuilder base): _builder(base) {} + inline operator Reader() const { return Reader(_builder.asReader()); } + inline Reader asReader() const { return *this; } + + inline ::capnp::MessageSize totalSize() const { return asReader().totalSize(); } +#if !CAPNP_LITE + inline ::kj::StringTree toString() const { return asReader().toString(); } +#endif // !CAPNP_LITE + + inline bool hasTypeURI(); + inline ::capnp::Text::Builder getTypeURI(); + inline void setTypeURI( ::capnp::Text::Reader value); + inline ::capnp::Text::Builder initTypeURI(unsigned int size); + inline void adoptTypeURI(::capnp::Orphan< ::capnp::Text>&& value); + inline ::capnp::Orphan< ::capnp::Text> disownTypeURI(); + +private: + ::capnp::_::StructBuilder _builder; + template <typename, ::capnp::Kind> + friend struct ::capnp::ToDynamic_; + friend class ::capnp::Orphanage; + template <typename, ::capnp::Kind> + friend struct ::capnp::_::PointerHelpers; +}; + +#if !CAPNP_LITE +class StaticOutputDescriptor::Pipeline { +public: + typedef StaticOutputDescriptor Pipelines; + + inline Pipeline(decltype(nullptr)): _typeless(nullptr) {} + inline explicit Pipeline(::capnp::AnyPointer::Pipeline&& typeless) + : _typeless(kj::mv(typeless)) {} + +private: + ::capnp::AnyPointer::Pipeline _typeless; + friend class ::capnp::PipelineHook; + template <typename, ::capnp::Kind> + friend struct ::capnp::ToDynamic_; +}; +#endif // !CAPNP_LITE + class ConfiguredOutputDescriptor::Reader { public: typedef ConfiguredOutputDescriptor Reads; @@ -957,6 +1071,9 @@ inline bool hasConfigured() const; inline ::piper::ConfiguredOutputDescriptor::Reader getConfigured() const; + inline bool hasStatic() const; + inline ::piper::StaticOutputDescriptor::Reader getStatic() const; + private: ::capnp::_::StructReader _reader; template <typename, ::capnp::Kind> @@ -999,6 +1116,13 @@ inline void adoptConfigured(::capnp::Orphan< ::piper::ConfiguredOutputDescriptor>&& value); inline ::capnp::Orphan< ::piper::ConfiguredOutputDescriptor> disownConfigured(); + inline bool hasStatic(); + inline ::piper::StaticOutputDescriptor::Builder getStatic(); + inline void setStatic( ::piper::StaticOutputDescriptor::Reader value); + inline ::piper::StaticOutputDescriptor::Builder initStatic(); + inline void adoptStatic(::capnp::Orphan< ::piper::StaticOutputDescriptor>&& value); + inline ::capnp::Orphan< ::piper::StaticOutputDescriptor> disownStatic(); + private: ::capnp::_::StructBuilder _builder; template <typename, ::capnp::Kind> @@ -1019,6 +1143,7 @@ inline ::piper::Basic::Pipeline getBasic(); inline ::piper::ConfiguredOutputDescriptor::Pipeline getConfigured(); + inline ::piper::StaticOutputDescriptor::Pipeline getStatic(); private: ::capnp::AnyPointer::Pipeline _typeless; friend class ::capnp::PipelineHook; @@ -1076,6 +1201,9 @@ inline bool hasBasicOutputInfo() const; inline ::capnp::List< ::piper::Basic>::Reader getBasicOutputInfo() const; + inline bool hasStaticOutputInfo() const; + inline ::capnp::List< ::piper::ExtractorStaticData::SOPair>::Reader getStaticOutputInfo() const; + private: ::capnp::_::StructReader _reader; template <typename, ::capnp::Kind> @@ -1174,6 +1302,13 @@ inline void adoptBasicOutputInfo(::capnp::Orphan< ::capnp::List< ::piper::Basic>>&& value); inline ::capnp::Orphan< ::capnp::List< ::piper::Basic>> disownBasicOutputInfo(); + inline bool hasStaticOutputInfo(); + inline ::capnp::List< ::piper::ExtractorStaticData::SOPair>::Builder getStaticOutputInfo(); + inline void setStaticOutputInfo( ::capnp::List< ::piper::ExtractorStaticData::SOPair>::Reader value); + inline ::capnp::List< ::piper::ExtractorStaticData::SOPair>::Builder initStaticOutputInfo(unsigned int size); + inline void adoptStaticOutputInfo(::capnp::Orphan< ::capnp::List< ::piper::ExtractorStaticData::SOPair>>&& value); + inline ::capnp::Orphan< ::capnp::List< ::piper::ExtractorStaticData::SOPair>> disownStaticOutputInfo(); + private: ::capnp::_::StructBuilder _builder; template <typename, ::capnp::Kind> @@ -1201,6 +1336,98 @@ }; #endif // !CAPNP_LITE +class ExtractorStaticData::SOPair::Reader { +public: + typedef SOPair Reads; + + Reader() = default; + inline explicit Reader(::capnp::_::StructReader base): _reader(base) {} + + inline ::capnp::MessageSize totalSize() const { + return _reader.totalSize().asPublic(); + } + +#if !CAPNP_LITE + inline ::kj::StringTree toString() const { + return ::capnp::_::structString(_reader, *_capnpPrivate::brand()); + } +#endif // !CAPNP_LITE + + inline bool hasOutput() const; + inline ::capnp::Text::Reader getOutput() const; + + inline bool hasStatic() const; + inline ::piper::StaticOutputDescriptor::Reader getStatic() const; + +private: + ::capnp::_::StructReader _reader; + template <typename, ::capnp::Kind> + friend struct ::capnp::ToDynamic_; + template <typename, ::capnp::Kind> + friend struct ::capnp::_::PointerHelpers; + template <typename, ::capnp::Kind> + friend struct ::capnp::List; + friend class ::capnp::MessageBuilder; + friend class ::capnp::Orphanage; +}; + +class ExtractorStaticData::SOPair::Builder { +public: + typedef SOPair Builds; + + Builder() = delete; // Deleted to discourage incorrect usage. + // You can explicitly initialize to nullptr instead. + inline Builder(decltype(nullptr)) {} + inline explicit Builder(::capnp::_::StructBuilder base): _builder(base) {} + inline operator Reader() const { return Reader(_builder.asReader()); } + inline Reader asReader() const { return *this; } + + inline ::capnp::MessageSize totalSize() const { return asReader().totalSize(); } +#if !CAPNP_LITE + inline ::kj::StringTree toString() const { return asReader().toString(); } +#endif // !CAPNP_LITE + + inline bool hasOutput(); + inline ::capnp::Text::Builder getOutput(); + inline void setOutput( ::capnp::Text::Reader value); + inline ::capnp::Text::Builder initOutput(unsigned int size); + inline void adoptOutput(::capnp::Orphan< ::capnp::Text>&& value); + inline ::capnp::Orphan< ::capnp::Text> disownOutput(); + + inline bool hasStatic(); + inline ::piper::StaticOutputDescriptor::Builder getStatic(); + inline void setStatic( ::piper::StaticOutputDescriptor::Reader value); + inline ::piper::StaticOutputDescriptor::Builder initStatic(); + inline void adoptStatic(::capnp::Orphan< ::piper::StaticOutputDescriptor>&& value); + inline ::capnp::Orphan< ::piper::StaticOutputDescriptor> disownStatic(); + +private: + ::capnp::_::StructBuilder _builder; + template <typename, ::capnp::Kind> + friend struct ::capnp::ToDynamic_; + friend class ::capnp::Orphanage; + template <typename, ::capnp::Kind> + friend struct ::capnp::_::PointerHelpers; +}; + +#if !CAPNP_LITE +class ExtractorStaticData::SOPair::Pipeline { +public: + typedef SOPair Pipelines; + + inline Pipeline(decltype(nullptr)): _typeless(nullptr) {} + inline explicit Pipeline(::capnp::AnyPointer::Pipeline&& typeless) + : _typeless(kj::mv(typeless)) {} + + inline ::piper::StaticOutputDescriptor::Pipeline getStatic(); +private: + ::capnp::AnyPointer::Pipeline _typeless; + friend class ::capnp::PipelineHook; + template <typename, ::capnp::Kind> + friend struct ::capnp::ToDynamic_; +}; +#endif // !CAPNP_LITE + class RealTime::Reader { public: typedef RealTime Reads; @@ -3839,6 +4066,40 @@ ::capnp::bounded<2>() * ::capnp::POINTERS)); } +inline bool StaticOutputDescriptor::Reader::hasTypeURI() const { + return !_reader.getPointerField( + ::capnp::bounded<0>() * ::capnp::POINTERS).isNull(); +} +inline bool StaticOutputDescriptor::Builder::hasTypeURI() { + return !_builder.getPointerField( + ::capnp::bounded<0>() * ::capnp::POINTERS).isNull(); +} +inline ::capnp::Text::Reader StaticOutputDescriptor::Reader::getTypeURI() const { + return ::capnp::_::PointerHelpers< ::capnp::Text>::get(_reader.getPointerField( + ::capnp::bounded<0>() * ::capnp::POINTERS)); +} +inline ::capnp::Text::Builder StaticOutputDescriptor::Builder::getTypeURI() { + return ::capnp::_::PointerHelpers< ::capnp::Text>::get(_builder.getPointerField( + ::capnp::bounded<0>() * ::capnp::POINTERS)); +} +inline void StaticOutputDescriptor::Builder::setTypeURI( ::capnp::Text::Reader value) { + ::capnp::_::PointerHelpers< ::capnp::Text>::set(_builder.getPointerField( + ::capnp::bounded<0>() * ::capnp::POINTERS), value); +} +inline ::capnp::Text::Builder StaticOutputDescriptor::Builder::initTypeURI(unsigned int size) { + return ::capnp::_::PointerHelpers< ::capnp::Text>::init(_builder.getPointerField( + ::capnp::bounded<0>() * ::capnp::POINTERS), size); +} +inline void StaticOutputDescriptor::Builder::adoptTypeURI( + ::capnp::Orphan< ::capnp::Text>&& value) { + ::capnp::_::PointerHelpers< ::capnp::Text>::adopt(_builder.getPointerField( + ::capnp::bounded<0>() * ::capnp::POINTERS), kj::mv(value)); +} +inline ::capnp::Orphan< ::capnp::Text> StaticOutputDescriptor::Builder::disownTypeURI() { + return ::capnp::_::PointerHelpers< ::capnp::Text>::disown(_builder.getPointerField( + ::capnp::bounded<0>() * ::capnp::POINTERS)); +} + inline bool ConfiguredOutputDescriptor::Reader::hasUnit() const { return !_reader.getPointerField( ::capnp::bounded<0>() * ::capnp::POINTERS).isNull(); @@ -4131,6 +4392,45 @@ ::capnp::bounded<1>() * ::capnp::POINTERS)); } +inline bool OutputDescriptor::Reader::hasStatic() const { + return !_reader.getPointerField( + ::capnp::bounded<2>() * ::capnp::POINTERS).isNull(); +} +inline bool OutputDescriptor::Builder::hasStatic() { + return !_builder.getPointerField( + ::capnp::bounded<2>() * ::capnp::POINTERS).isNull(); +} +inline ::piper::StaticOutputDescriptor::Reader OutputDescriptor::Reader::getStatic() const { + return ::capnp::_::PointerHelpers< ::piper::StaticOutputDescriptor>::get(_reader.getPointerField( + ::capnp::bounded<2>() * ::capnp::POINTERS)); +} +inline ::piper::StaticOutputDescriptor::Builder OutputDescriptor::Builder::getStatic() { + return ::capnp::_::PointerHelpers< ::piper::StaticOutputDescriptor>::get(_builder.getPointerField( + ::capnp::bounded<2>() * ::capnp::POINTERS)); +} +#if !CAPNP_LITE +inline ::piper::StaticOutputDescriptor::Pipeline OutputDescriptor::Pipeline::getStatic() { + return ::piper::StaticOutputDescriptor::Pipeline(_typeless.getPointerField(2)); +} +#endif // !CAPNP_LITE +inline void OutputDescriptor::Builder::setStatic( ::piper::StaticOutputDescriptor::Reader value) { + ::capnp::_::PointerHelpers< ::piper::StaticOutputDescriptor>::set(_builder.getPointerField( + ::capnp::bounded<2>() * ::capnp::POINTERS), value); +} +inline ::piper::StaticOutputDescriptor::Builder OutputDescriptor::Builder::initStatic() { + return ::capnp::_::PointerHelpers< ::piper::StaticOutputDescriptor>::init(_builder.getPointerField( + ::capnp::bounded<2>() * ::capnp::POINTERS)); +} +inline void OutputDescriptor::Builder::adoptStatic( + ::capnp::Orphan< ::piper::StaticOutputDescriptor>&& value) { + ::capnp::_::PointerHelpers< ::piper::StaticOutputDescriptor>::adopt(_builder.getPointerField( + ::capnp::bounded<2>() * ::capnp::POINTERS), kj::mv(value)); +} +inline ::capnp::Orphan< ::piper::StaticOutputDescriptor> OutputDescriptor::Builder::disownStatic() { + return ::capnp::_::PointerHelpers< ::piper::StaticOutputDescriptor>::disown(_builder.getPointerField( + ::capnp::bounded<2>() * ::capnp::POINTERS)); +} + inline bool ExtractorStaticData::Reader::hasKey() const { return !_reader.getPointerField( ::capnp::bounded<0>() * ::capnp::POINTERS).isNull(); @@ -4472,6 +4772,113 @@ ::capnp::bounded<7>() * ::capnp::POINTERS)); } +inline bool ExtractorStaticData::Reader::hasStaticOutputInfo() const { + return !_reader.getPointerField( + ::capnp::bounded<8>() * ::capnp::POINTERS).isNull(); +} +inline bool ExtractorStaticData::Builder::hasStaticOutputInfo() { + return !_builder.getPointerField( + ::capnp::bounded<8>() * ::capnp::POINTERS).isNull(); +} +inline ::capnp::List< ::piper::ExtractorStaticData::SOPair>::Reader ExtractorStaticData::Reader::getStaticOutputInfo() const { + return ::capnp::_::PointerHelpers< ::capnp::List< ::piper::ExtractorStaticData::SOPair>>::get(_reader.getPointerField( + ::capnp::bounded<8>() * ::capnp::POINTERS)); +} +inline ::capnp::List< ::piper::ExtractorStaticData::SOPair>::Builder ExtractorStaticData::Builder::getStaticOutputInfo() { + return ::capnp::_::PointerHelpers< ::capnp::List< ::piper::ExtractorStaticData::SOPair>>::get(_builder.getPointerField( + ::capnp::bounded<8>() * ::capnp::POINTERS)); +} +inline void ExtractorStaticData::Builder::setStaticOutputInfo( ::capnp::List< ::piper::ExtractorStaticData::SOPair>::Reader value) { + ::capnp::_::PointerHelpers< ::capnp::List< ::piper::ExtractorStaticData::SOPair>>::set(_builder.getPointerField( + ::capnp::bounded<8>() * ::capnp::POINTERS), value); +} +inline ::capnp::List< ::piper::ExtractorStaticData::SOPair>::Builder ExtractorStaticData::Builder::initStaticOutputInfo(unsigned int size) { + return ::capnp::_::PointerHelpers< ::capnp::List< ::piper::ExtractorStaticData::SOPair>>::init(_builder.getPointerField( + ::capnp::bounded<8>() * ::capnp::POINTERS), size); +} +inline void ExtractorStaticData::Builder::adoptStaticOutputInfo( + ::capnp::Orphan< ::capnp::List< ::piper::ExtractorStaticData::SOPair>>&& value) { + ::capnp::_::PointerHelpers< ::capnp::List< ::piper::ExtractorStaticData::SOPair>>::adopt(_builder.getPointerField( + ::capnp::bounded<8>() * ::capnp::POINTERS), kj::mv(value)); +} +inline ::capnp::Orphan< ::capnp::List< ::piper::ExtractorStaticData::SOPair>> ExtractorStaticData::Builder::disownStaticOutputInfo() { + return ::capnp::_::PointerHelpers< ::capnp::List< ::piper::ExtractorStaticData::SOPair>>::disown(_builder.getPointerField( + ::capnp::bounded<8>() * ::capnp::POINTERS)); +} + +inline bool ExtractorStaticData::SOPair::Reader::hasOutput() const { + return !_reader.getPointerField( + ::capnp::bounded<0>() * ::capnp::POINTERS).isNull(); +} +inline bool ExtractorStaticData::SOPair::Builder::hasOutput() { + return !_builder.getPointerField( + ::capnp::bounded<0>() * ::capnp::POINTERS).isNull(); +} +inline ::capnp::Text::Reader ExtractorStaticData::SOPair::Reader::getOutput() const { + return ::capnp::_::PointerHelpers< ::capnp::Text>::get(_reader.getPointerField( + ::capnp::bounded<0>() * ::capnp::POINTERS)); +} +inline ::capnp::Text::Builder ExtractorStaticData::SOPair::Builder::getOutput() { + return ::capnp::_::PointerHelpers< ::capnp::Text>::get(_builder.getPointerField( + ::capnp::bounded<0>() * ::capnp::POINTERS)); +} +inline void ExtractorStaticData::SOPair::Builder::setOutput( ::capnp::Text::Reader value) { + ::capnp::_::PointerHelpers< ::capnp::Text>::set(_builder.getPointerField( + ::capnp::bounded<0>() * ::capnp::POINTERS), value); +} +inline ::capnp::Text::Builder ExtractorStaticData::SOPair::Builder::initOutput(unsigned int size) { + return ::capnp::_::PointerHelpers< ::capnp::Text>::init(_builder.getPointerField( + ::capnp::bounded<0>() * ::capnp::POINTERS), size); +} +inline void ExtractorStaticData::SOPair::Builder::adoptOutput( + ::capnp::Orphan< ::capnp::Text>&& value) { + ::capnp::_::PointerHelpers< ::capnp::Text>::adopt(_builder.getPointerField( + ::capnp::bounded<0>() * ::capnp::POINTERS), kj::mv(value)); +} +inline ::capnp::Orphan< ::capnp::Text> ExtractorStaticData::SOPair::Builder::disownOutput() { + return ::capnp::_::PointerHelpers< ::capnp::Text>::disown(_builder.getPointerField( + ::capnp::bounded<0>() * ::capnp::POINTERS)); +} + +inline bool ExtractorStaticData::SOPair::Reader::hasStatic() const { + return !_reader.getPointerField( + ::capnp::bounded<1>() * ::capnp::POINTERS).isNull(); +} +inline bool ExtractorStaticData::SOPair::Builder::hasStatic() { + return !_builder.getPointerField( + ::capnp::bounded<1>() * ::capnp::POINTERS).isNull(); +} +inline ::piper::StaticOutputDescriptor::Reader ExtractorStaticData::SOPair::Reader::getStatic() const { + return ::capnp::_::PointerHelpers< ::piper::StaticOutputDescriptor>::get(_reader.getPointerField( + ::capnp::bounded<1>() * ::capnp::POINTERS)); +} +inline ::piper::StaticOutputDescriptor::Builder ExtractorStaticData::SOPair::Builder::getStatic() { + return ::capnp::_::PointerHelpers< ::piper::StaticOutputDescriptor>::get(_builder.getPointerField( + ::capnp::bounded<1>() * ::capnp::POINTERS)); +} +#if !CAPNP_LITE +inline ::piper::StaticOutputDescriptor::Pipeline ExtractorStaticData::SOPair::Pipeline::getStatic() { + return ::piper::StaticOutputDescriptor::Pipeline(_typeless.getPointerField(1)); +} +#endif // !CAPNP_LITE +inline void ExtractorStaticData::SOPair::Builder::setStatic( ::piper::StaticOutputDescriptor::Reader value) { + ::capnp::_::PointerHelpers< ::piper::StaticOutputDescriptor>::set(_builder.getPointerField( + ::capnp::bounded<1>() * ::capnp::POINTERS), value); +} +inline ::piper::StaticOutputDescriptor::Builder ExtractorStaticData::SOPair::Builder::initStatic() { + return ::capnp::_::PointerHelpers< ::piper::StaticOutputDescriptor>::init(_builder.getPointerField( + ::capnp::bounded<1>() * ::capnp::POINTERS)); +} +inline void ExtractorStaticData::SOPair::Builder::adoptStatic( + ::capnp::Orphan< ::piper::StaticOutputDescriptor>&& value) { + ::capnp::_::PointerHelpers< ::piper::StaticOutputDescriptor>::adopt(_builder.getPointerField( + ::capnp::bounded<1>() * ::capnp::POINTERS), kj::mv(value)); +} +inline ::capnp::Orphan< ::piper::StaticOutputDescriptor> ExtractorStaticData::SOPair::Builder::disownStatic() { + return ::capnp::_::PointerHelpers< ::piper::StaticOutputDescriptor>::disown(_builder.getPointerField( + ::capnp::bounded<1>() * ::capnp::POINTERS)); +} + inline ::int32_t RealTime::Reader::getSec() const { return _reader.getDataField< ::int32_t>( ::capnp::bounded<0>() * ::capnp::ELEMENTS);
--- a/vamp-json/VampJson.h Mon May 22 08:57:02 2017 +0100 +++ b/vamp-json/VampJson.h Thu Jun 15 09:52:01 2017 +0100 @@ -4,7 +4,7 @@ Piper C++ Centre for Digital Music, Queen Mary, University of London. - Copyright 2015-2016 QMUL. + Copyright 2015-2017 QMUL. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation @@ -47,6 +47,7 @@ #include <vamp-hostsdk/Plugin.h> #include <vamp-hostsdk/PluginLoader.h> +#include "vamp-support/StaticOutputDescriptor.h" #include "vamp-support/PluginStaticData.h" #include "vamp-support/PluginConfiguration.h" #include "vamp-support/RequestResponse.h" @@ -216,6 +217,15 @@ } static json11::Json + fromStaticOutputDescriptor(const StaticOutputDescriptor &sd) { + json11::Json::object jo; + if (sd.typeURI != "") { + jo["typeURI"] = sd.typeURI; + } + return json11::Json(jo); + } + + static json11::Json fromConfiguredOutputDescriptor(const Vamp::Plugin::OutputDescriptor &desc) { json11::Json::object jo { { "unit", desc.unit }, @@ -238,13 +248,28 @@ } static json11::Json - fromOutputDescriptor(const Vamp::Plugin::OutputDescriptor &desc) { + fromOutputDescriptor(const Vamp::Plugin::OutputDescriptor &desc, + const StaticOutputDescriptor &sd) { json11::Json::object jo { { "basic", fromBasicDescriptor(desc) }, + { "static", fromStaticOutputDescriptor(sd) }, { "configured", fromConfiguredOutputDescriptor(desc) } }; return json11::Json(jo); } + + static StaticOutputDescriptor + toStaticOutputDescriptor(json11::Json j, std::string &err) { + + StaticOutputDescriptor sd; + if (!j.is_object()) { + err = "object expected for static output descriptor"; + return sd; + } + + sd.typeURI = j["typeURI"].string_value(); + return sd; + } static Vamp::Plugin::OutputDescriptor toConfiguredOutputDescriptor(json11::Json j, std::string &err) { @@ -296,7 +321,7 @@ return od; } - static Vamp::Plugin::OutputDescriptor + static std::pair<Vamp::Plugin::OutputDescriptor, StaticOutputDescriptor> toOutputDescriptor(json11::Json j, std::string &err) { Vamp::Plugin::OutputDescriptor od; @@ -311,7 +336,13 @@ toBasicDescriptor(j["basic"], od, err); if (failed(err)) return {}; - return od; + StaticOutputDescriptor sd; + if (j["static"] != json11::Json()) { + sd = toStaticOutputDescriptor(j["static"], err); + if (failed(err)) return {}; + } + + return { od, sd }; } static json11::Json @@ -569,10 +600,36 @@ auto vouts = d.basicOutputInfo; for (auto &o: vouts) outinfo.push_back(fromBasicDescriptor(o)); jo["basicOutputInfo"] = outinfo; - + + json11::Json::object statinfo; + auto souts = d.staticOutputInfo; + for (auto &s: souts) { + statinfo[s.first] = fromStaticOutputDescriptor(s.second); + } + jo["staticOutputInfo"] = statinfo; + return json11::Json(jo); } + static StaticOutputInfo + toStaticOutputInfo(json11::Json j, std::string &err) { + + if (j == json11::Json()) return {}; + + if (!j.is_object()) { + err = "object expected for static output info"; + return {}; + } + + StaticOutputInfo sinfo; + auto items = j.object_items(); + for (auto i: items) { + sinfo[i.first] = toStaticOutputDescriptor(i.second, err); + if (failed(err)) return {}; + } + return sinfo; + } + static PluginStaticData toPluginStaticData(json11::Json j, std::string &err) { @@ -596,6 +653,7 @@ } else if (!j["rights"].is_null() && !j["rights"].is_string()) { + err = "string expected for rights"; } else if (!j["category"].is_null() && @@ -618,10 +676,10 @@ err = "string expected for inputDomain"; - } else if (!j["basicOutputInfo"].is_null() && - !j["basicOutputInfo"].is_array()) { + } else if (!j["staticOutputInfo"].is_null() && + !j["staticOutputInfo"].is_object()) { - err = "array expected for basicOutputInfo"; + err = "object expected for staticOutputInfo"; } else { @@ -670,7 +728,12 @@ if (failed(err)) return {}; psd.basicOutputInfo.push_back(b); } - + + StaticOutputInfo sinfo = + toStaticOutputInfo(j["staticOutputInfo"], err); + if (failed(err)) return {}; + psd.staticOutputInfo = sinfo; + return psd; } @@ -968,7 +1031,12 @@ json11::Json::array outs; for (auto &d: cr.outputs) { - outs.push_back(fromOutputDescriptor(d)); + auto id = d.identifier; + StaticOutputDescriptor sd; + if (cr.staticOutputInfo.find(id) != cr.staticOutputInfo.end()) { + sd = cr.staticOutputInfo.at(id); + } + outs.push_back(fromOutputDescriptor(d, sd)); } jo["outputList"] = outs; @@ -1001,8 +1069,10 @@ cr.plugin = pmapper.handleToPlugin(j["handle"].int_value()); for (const auto &o: j["outputList"].array_items()) { - cr.outputs.push_back(toOutputDescriptor(o, err)); + auto odpair = toOutputDescriptor(o, err); if (failed(err)) return {}; + cr.outputs.push_back(odpair.first); + cr.staticOutputInfo[odpair.first.identifier] = odpair.second; } cr.framing.stepSize = int(round(j["framing"]["stepSize"].number_value()));
--- a/vamp-server/simple-server.cpp Mon May 22 08:57:02 2017 +0100 +++ b/vamp-server/simple-server.cpp Thu Jun 15 09:52:01 2017 +0100 @@ -662,6 +662,9 @@ if (debug) { cerr << myname << " " << pid << ": waiting for format: " << format << endl; + if (format == "json") { + cerr << myname << " " << pid << ": (to test the server, try {\"method\": \"list\"})" << endl; + } } while (true) {
--- a/vamp-server/test.sh Mon May 22 08:57:02 2017 +0100 +++ b/vamp-server/test.sh Thu Jun 15 09:52:01 2017 +0100 @@ -30,6 +30,12 @@ expected_less_strict="$tmpdir/expected-less-strict" obtained="$tmpdir/obtained" +fail() { + local msg="$1" + echo " !! $msg!" 1>&2 + exit 1 +} + validate() { local file="$1" local schemaname="$2" @@ -37,7 +43,7 @@ echo " * validating against schema $schemaname... " 1>&2 jsonschema -i "$file" "$schemadir/$schemaname.json" 1>&2 && \ echo " -> validated against schema $schemaname" 1>&2 || \ - echo " !! failed to validate $schemaname!" 1>&2 + fail "failed to validate $schemaname" else echo "(schema directory $schemadir not found, skipping validation)" 1>&2 fi @@ -74,9 +80,9 @@ # Expected output, apart from the plugin list which seems a bit # fragile to check here cat > "$expected" <<EOF -{"id": 6, "jsonrpc": "2.0", "method": "load", "result": {"defaultConfiguration": {"channelCount": 1, "framing": {"blockSize": 1024, "stepSize": 1024}, "parameterValues": {"sensitivity": 40, "threshold": 3}}, "handle": 1, "staticData": {"basic": {"description": "Detect percussive note onsets by identifying broadband energy rises", "identifier": "percussiononsets", "name": "Simple Percussion Onset Detector"}, "basicOutputInfo": [{"description": "Percussive note onset locations", "identifier": "onsets", "name": "Onsets"}, {"description": "Broadband energy rise detection function", "identifier": "detectionfunction", "name": "Detection Function"}], "category": ["Time", "Onsets"], "inputDomain": "TimeDomain", "key": "vamp-example-plugins:percussiononsets", "maker": "Vamp SDK Example Plugins", "maxChannelCount": 1, "minChannelCount": 1, "parameters": [{"basic": {"description": "Energy rise within a frequency bin necessary to count toward broadband total", "identifier": "threshold", "name": "Energy rise threshold"}, "defaultValue": 3, "extents": {"max": 20, "min": 0}, "unit": "dB", "valueNames": []}, {"basic": {"description": "Sensitivity of peak detector applied to broadband detection function", "identifier": "sensitivity", "name": "Sensitivity"}, "defaultValue": 40, "extents": {"max": 100, "min": 0}, "unit": "%", "valueNames": []}], "programs": [], "rights": "Code copyright 2006 Queen Mary, University of London, after Dan Barry et al 2005. Freely redistributable (BSD license)", "version": 2}}} +{"id": 6, "jsonrpc": "2.0", "method": "load", "result": {"defaultConfiguration": {"channelCount": 1, "framing": {"blockSize": 1024, "stepSize": 1024}, "parameterValues": {"sensitivity": 40, "threshold": 3}}, "handle": 1, "staticData": {"basic": {"description": "Detect percussive note onsets by identifying broadband energy rises", "identifier": "percussiononsets", "name": "Simple Percussion Onset Detector"}, "basicOutputInfo": [{"description": "Percussive note onset locations", "identifier": "onsets", "name": "Onsets"}, {"description": "Broadband energy rise detection function", "identifier": "detectionfunction", "name": "Detection Function"}], "category": ["Time", "Onsets"], "inputDomain": "TimeDomain", "key": "vamp-example-plugins:percussiononsets", "maker": "Vamp SDK Example Plugins", "maxChannelCount": 1, "minChannelCount": 1, "parameters": [{"basic": {"description": "Energy rise within a frequency bin necessary to count toward broadband total", "identifier": "threshold", "name": "Energy rise threshold"}, "defaultValue": 3, "extents": {"max": 20, "min": 0}, "unit": "dB", "valueNames": []}, {"basic": {"description": "Sensitivity of peak detector applied to broadband detection function", "identifier": "sensitivity", "name": "Sensitivity"}, "defaultValue": 40, "extents": {"max": 100, "min": 0}, "unit": "%", "valueNames": []}], "programs": [], "rights": "Code copyright 2006 Queen Mary, University of London, after Dan Barry et al 2005. Freely redistributable (BSD license)", "staticOutputInfo": {"detectionfunction": {"typeURI": "http://purl.org/ontology/af/OnsetDetectionFunction"}, "onsets": {"typeURI": "http://purl.org/ontology/af/Onset"}}, "version": 2}}} {"error": {"code": 0, "message": "error in process request: plugin has not been configured"}, "jsonrpc": "2.0", "method": "process"} -{"id": "weevil", "jsonrpc": "2.0", "method": "configure", "result": {"framing": {"blockSize": 8, "stepSize": 8}, "handle": 1, "outputList": [{"basic": {"description": "Percussive note onset locations", "identifier": "onsets", "name": "Onsets"}, "configured": {"binCount": 0, "binNames": [], "hasDuration": false, "sampleRate": 44100, "sampleType": "VariableSampleRate", "unit": ""}}, {"basic": {"description": "Broadband energy rise detection function", "identifier": "detectionfunction", "name": "Detection Function"}, "configured": {"binCount": 1, "binNames": [""], "hasDuration": false, "quantizeStep": 1, "sampleRate": 86.1328125, "sampleType": "FixedSampleRate", "unit": ""}}]}} +{"id": "weevil", "jsonrpc": "2.0", "method": "configure", "result": {"framing": {"blockSize": 8, "stepSize": 8}, "handle": 1, "outputList": [{"basic": {"description": "Percussive note onset locations", "identifier": "onsets", "name": "Onsets"}, "configured": {"binCount": 0, "binNames": [], "hasDuration": false, "sampleRate": 44100, "sampleType": "VariableSampleRate", "unit": ""}, "static": {}}, {"basic": {"description": "Broadband energy rise detection function", "identifier": "detectionfunction", "name": "Detection Function"}, "configured": {"binCount": 1, "binNames": [""], "hasDuration": false, "quantizeStep": 1, "sampleRate": 86.1328125, "sampleType": "FixedSampleRate", "unit": ""}, "static": {}}]}} {"error": {"code": 0, "message": "error in configure request: unknown plugin handle supplied to configure"}, "id": 9, "jsonrpc": "2.0", "method": "configure"} {"jsonrpc": "2.0", "method": "process", "result": {"features": {}, "handle": 1}} {"jsonrpc": "2.0", "method": "finish", "result": {"features": {"detectionfunction": [{"featureValues": [0], "timestamp": {"n": 11609977, "s": 0}}]}, "handle": 1}}
--- a/vamp-support/LoaderRequests.h Mon May 22 08:57:02 2017 +0100 +++ b/vamp-support/LoaderRequests.h Thu Jun 15 09:52:01 2017 +0100 @@ -39,6 +39,7 @@ #include "PluginStaticData.h" #include "PluginConfiguration.h" +#include "StaticOutputRdf.h" #include <vamp-hostsdk/PluginLoader.h> @@ -73,8 +74,10 @@ Vamp::Plugin *p = loader->loadPlugin(key, 44100, 0); if (!p) continue; auto category = loader->getPluginCategory(key); - response.available.push_back - (PluginStaticData::fromPlugin(key, category, p)); + PluginStaticData psd = + PluginStaticData::fromPlugin(key, category, p); + psd.staticOutputInfo = StaticOutputRdf().loadStaticOutputInfo(key); + response.available.push_back(psd); delete p; } @@ -99,6 +102,9 @@ (req.pluginKey, loader->getPluginCategory(req.pluginKey), plugin); + + response.staticData.staticOutputInfo = + StaticOutputRdf().loadStaticOutputInfo(req.pluginKey); int defaultChannels = 0; if (plugin->getMinChannelCount() == plugin->getMaxChannelCount()) {
--- a/vamp-support/PluginStaticData.h Mon May 22 08:57:02 2017 +0100 +++ b/vamp-support/PluginStaticData.h Thu Jun 15 09:52:01 2017 +0100 @@ -4,7 +4,7 @@ Piper C++ Centre for Digital Music, Queen Mary, University of London. - Copyright 2006-2016 Chris Cannam and QMUL. + Copyright 2006-2017 Chris Cannam and QMUL. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation @@ -37,6 +37,8 @@ #include <vamp-hostsdk/Plugin.h> +#include "StaticOutputDescriptor.h" + namespace piper_vamp { /** @@ -62,7 +64,7 @@ std::string description; }; typedef std::vector<Basic> BasicList; - + PluginStaticData() : // invalid static data by default pluginVersion(0), minChannelCount(0), maxChannelCount(0), inputDomain(Vamp::Plugin::TimeDomain) { } @@ -72,13 +74,18 @@ std::string maker; std::string copyright; int pluginVersion; - std::vector<std::string> category; + std::vector<std::string> category; // not found in the plugin, may + // come from accompanying + // metadata size_t minChannelCount; size_t maxChannelCount; Vamp::PluginBase::ParameterList parameters; Vamp::PluginBase::ProgramList programs; Vamp::Plugin::InputDomain inputDomain; BasicList basicOutputInfo; + StaticOutputInfo staticOutputInfo; // not found in the plugin, may + // come from accompanying + // (RDF?) metadata static PluginStaticData fromPlugin(std::string pluginKey,
--- a/vamp-support/RequestResponse.h Mon May 22 08:57:02 2017 +0100 +++ b/vamp-support/RequestResponse.h Thu Jun 15 09:52:01 2017 +0100 @@ -203,6 +203,8 @@ Vamp::Plugin *plugin; Vamp::Plugin::OutputList outputs; + StaticOutputInfo staticOutputInfo; // stuff not in Plugin::OutputDescriptor + Framing framing; };
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/vamp-support/StaticOutputDescriptor.h Thu Jun 15 09:52:01 2017 +0100 @@ -0,0 +1,52 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + Piper C++ + + Centre for Digital Music, Queen Mary, University of London. + Copyright 2006-2017 Chris Cannam and QMUL. + + Permission is hereby granted, free of charge, to any person + obtaining a copy of this software and associated documentation + files (the "Software"), to deal in the Software without + restriction, including without limitation the rights to use, copy, + modify, merge, publish, distribute, sublicense, and/or sell copies + of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + Except as contained in this notice, the names of the Centre for + Digital Music; Queen Mary, University of London; and Chris Cannam + shall not be used in advertising or otherwise to promote the sale, + use or other dealings in this Software without prior written + authorization. +*/ + +#ifndef PIPER_STATIC_OUTPUT_DESCRIPTOR_H +#define PIPER_STATIC_OUTPUT_DESCRIPTOR_H + +#include <string> +#include <map> + +namespace piper_vamp { + +struct StaticOutputDescriptor +{ + std::string typeURI; +}; + +typedef std::map<std::string, StaticOutputDescriptor> StaticOutputInfo; + +} + +#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/vamp-support/StaticOutputRdf.h Thu Jun 15 09:52:01 2017 +0100 @@ -0,0 +1,314 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + Piper C++ + + An API for audio analysis and feature extraction plugins. + + Centre for Digital Music, Queen Mary, University of London. + Copyright 2006-2017 Chris Cannam and QMUL. + + Permission is hereby granted, free of charge, to any person + obtaining a copy of this software and associated documentation + files (the "Software"), to deal in the Software without + restriction, including without limitation the rights to use, copy, + modify, merge, publish, distribute, sublicense, and/or sell copies + of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + Except as contained in this notice, the names of the Centre for + Digital Music; Queen Mary, University of London; and Chris Cannam + shall not be used in advertising or otherwise to promote the sale, + use or other dealings in this Software without prior written + authorization. +*/ + +#ifndef PIPER_STATIC_OUTPUT_RDF_H +#define PIPER_STATIC_OUTPUT_RDF_H + +#include "StaticOutputDescriptor.h" + +#include <vamp-hostsdk/PluginLoader.h> + +#include <sord/sord.h> + +#include <mutex> + +namespace piper_vamp { + +//!!! todo: better (+ optional) error reporting; check whether file +//!!! exists before parsing it to avoid spurious error messages; +//!!! refactoring + +class StaticOutputRdf +{ +public: + StaticOutputRdf() : + m_world(sord_world_new()) + {} + + ~StaticOutputRdf() { + sord_world_free(m_world); + } + + StaticOutputInfo loadStaticOutputInfo(Vamp::HostExt::PluginLoader::PluginKey + pluginKey) { + + StaticOutputInfo info; + SordModel *model = sord_new(m_world, SORD_SPO|SORD_OPS|SORD_POS, false); + if (loadRdf(model, candidateRdfFilesFor(pluginKey))) { + loadStaticOutputInfoFromModel(model, pluginKey, info); + } + sord_free(model); + return info; + } + +private: + SordWorld *m_world; + + bool loadRdf(SordModel *targetModel, std::vector<std::string> filenames) { + for (auto f: filenames) { + if (loadRdfFile(targetModel, f)) { + return true; + } + } + return false; + } + + bool loadRdfFile(SordModel *targetModel, std::string filename) { + std::string base = "file://" + filename; + SerdURI bu; + if (serd_uri_parse((const uint8_t *)base.c_str(), &bu) != + SERD_SUCCESS) { + std::cerr << "Failed to parse base URI " << base << std::endl; + return false; + } + SerdNode bn = serd_node_from_string(SERD_URI, + (const uint8_t *)base.c_str()); + SerdEnv *env = serd_env_new(&bn); + SerdReader *reader = sord_new_reader(targetModel, env, SERD_TURTLE, 0); + SerdStatus rv = serd_reader_read_file + (reader, (const uint8_t *)filename.c_str()); + bool success = (rv == SERD_SUCCESS); + if (!success) { + // We are asking Serd to parse the file without having + // checked whether it actually exists or not (in order to + // avoid duplicating ugly platform/encoding-specific stuff + // in this file). So don't bleat if the file is simply not + // found, but only if there's a real parse error + if (rv != SERD_ERR_NOT_FOUND && + rv != SERD_ERR_UNKNOWN) { + std::cerr << "Failed to import RDF from " << filename + << ": " << serd_strerror(rv) << std::endl; + } + } + serd_reader_free(reader); + serd_env_free(env); + return success; + } + + std::vector<std::string> candidateRdfFilesFor(Vamp::HostExt:: + PluginLoader::PluginKey key) { + + std::string library = Vamp::HostExt::PluginLoader::getInstance()-> + getLibraryPathForPlugin(key); + + auto li = library.rfind('.'); + if (li == std::string::npos) return {}; + auto withoutSuffix = library.substr(0, li); + + std::vector<std::string> suffixes { "n3", "N3", "ttl", "TTL" }; + std::vector<std::string> candidates; + + for (auto suffix : suffixes) { + candidates.push_back(withoutSuffix + "." + suffix); + } + + return candidates; + } + + void + loadStaticOutputInfoFromModel(SordModel *model, + std::string pluginKey, + StaticOutputInfo &info) { + + // we want to find a graph like + // + // :plugin a vamp:Plugin + // :plugin vamp:identifier "pluginId" + // :library vamp:available_plugin :plugin + // :library vamp:identifier "libraryId" + // :plugin vamp:output :output1 + // :plugin vamp:output :output2 + // :plugin vamp:output :output3 + // :output1 vamp:computes_event_type :event + // :output2 vamp:computes_feature :feature + // :output3 vamp:computes_signal_type :signal + // + // in which pluginKey == libraryId + ":" + pluginId + + std::string libraryId, pluginId; + decomposePluginKey(pluginKey, libraryId, pluginId); + + typedef const uint8_t *S; + + SordNode *a = sord_new_uri + (m_world, S("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")); + + SordNode *pluginType = sord_new_uri + (m_world, S("http://purl.org/ontology/vamp/Plugin")); + + SordNode *identProp = sord_new_uri + (m_world, S("http://purl.org/ontology/vamp/identifier")); + SordNode *availProp = sord_new_uri + (m_world, S("http://purl.org/ontology/vamp/available_plugin")); + SordNode *outputProp = sord_new_uri + (m_world, S("http://purl.org/ontology/vamp/output")); + + SordNode *computesEventProp = sord_new_uri + (m_world, S("http://purl.org/ontology/vamp/computes_event_type")); + SordNode *computesFeatureProp = sord_new_uri + (m_world, S("http://purl.org/ontology/vamp/computes_feature")); + SordNode *computesSignalProp = sord_new_uri + (m_world, S("http://purl.org/ontology/vamp/computes_signal_type")); + + SordIter *pluginItr = 0; + + for (pluginItr = sord_search(model, 0, a, pluginType, 0); + !sord_iter_end(pluginItr); + sord_iter_next(pluginItr)) { + + const SordNode *pluginNode = + sord_iter_get_node(pluginItr, SORD_SUBJECT); + + SordNode *pluginIdNode = + sord_get(model, pluginNode, identProp, 0, 0); + + if (!pluginIdNode || + sord_node_get_type(pluginIdNode) != SORD_LITERAL || + (const char *)sord_node_get_string(pluginIdNode) != pluginId) { + // This is a plugin node, but it's not the plugin node + // we're looking for. (We have to check both the type + // property, vamp:Plugin, and the identifier, + // vamp:identifier, because the identifier is just a + // string and it's possible it could be used for an + // output or parameter rather than just a plugin.) + continue; + } + + SordNode *libraryNode = + sord_get(model, 0, availProp, pluginNode, 0); + + if (!libraryNode) { + std::cerr << "Plugin is not listed as being in a library, " + << "skipping library id check" << std::endl; + } else { + SordNode *libIdNode = + sord_get(model, libraryNode, identProp, 0, 0); + if (!libIdNode || + sord_node_get_type(libIdNode) != SORD_LITERAL || + (const char *)sord_node_get_string(libIdNode) != libraryId) { + std::cerr << "Ignoring plugin in wrong library" << std::endl; + continue; + } + } + + SordIter *outputItr = 0; + + for (outputItr = sord_search(model, pluginNode, outputProp, 0, 0); + !sord_iter_end(outputItr); + sord_iter_next(outputItr)) { + + const SordNode *outputNode = + sord_iter_get_node(outputItr, SORD_OBJECT); + + SordNode *outputIdNode = + sord_get(model, outputNode, identProp, 0, 0); + + if (!outputIdNode || + sord_node_get_type(outputIdNode) != SORD_LITERAL || + !sord_node_get_string(outputIdNode)) { + std::cerr << "Ignoring output with no id" << std::endl; + continue; + } + + std::string outputId = + (const char *)sord_node_get_string(outputIdNode); + + SordIter *propItr = 0; + + for (propItr = sord_search(model, outputNode, 0, 0, 0); + !sord_iter_end(propItr); + sord_iter_next(propItr)) { + + const SordNode *propNode = + sord_iter_get_node(propItr, SORD_PREDICATE); + + if (sord_node_equals(propNode, computesEventProp) || + sord_node_equals(propNode, computesFeatureProp) || + sord_node_equals(propNode, computesSignalProp)) { + + const SordNode *computesNode = + sord_iter_get_node(propItr, SORD_OBJECT); + + if (sord_node_get_type(computesNode) != SORD_URI || + !sord_node_get_string(computesNode)) { + std::cerr << "Ignoring non-URI computes node" + << std::endl; + continue; + } + + std::string typeURI = + (const char *)sord_node_get_string(computesNode); + + std::cerr << "Found type <" << typeURI + << "> for output \"" << outputId + << "\" of plugin \"" << pluginId + << "\" in library " << libraryId + << std::endl; + + StaticOutputDescriptor desc; + desc.typeURI = typeURI; + info[outputId] = desc; + + break; // only interested in one "computes" property + } + } + + sord_iter_free(propItr); + } + + sord_iter_free(outputItr); + } + + sord_iter_free(pluginItr); + } + + bool decomposePluginKey(std::string pluginKey, + std::string &libraryId, + std::string &pluginId) { + auto i = pluginKey.find(':'); + if (i == std::string::npos || i == 0 || i + 1 == pluginKey.length()) { + return false; + } + libraryId = pluginKey.substr(0, i); + pluginId = pluginKey.substr(i + 1); + return true; + } + +}; + +} + +#endif