From a7accd02bade9ba84ae05b206247f615fbe1e10d Mon Sep 17 00:00:00 2001 From: Ally Heev Date: Thu, 28 May 2026 12:19:02 +0530 Subject: [PATCH 1/2] add arrow csr dst col name --- src_cpp/include/node_arrow_test_utils.h | 13 ++ src_cpp/main.cpp | 2 + src_cpp/node_arrow_test_utils.cpp | 194 ++++++++++++++++++++++++ src_cpp/node_connection.cpp | 58 ++++--- src_js/connection.js | 53 +++++-- src_js/lbug.d.ts | 10 +- test/test.js | 1 + test/test_arrow_memory_backed_table.js | 103 +++++++++++++ 8 files changed, 403 insertions(+), 31 deletions(-) create mode 100644 src_cpp/include/node_arrow_test_utils.h create mode 100644 src_cpp/node_arrow_test_utils.cpp create mode 100644 test/test_arrow_memory_backed_table.js diff --git a/src_cpp/include/node_arrow_test_utils.h b/src_cpp/include/node_arrow_test_utils.h new file mode 100644 index 0000000..ce4f58a --- /dev/null +++ b/src_cpp/include/node_arrow_test_utils.h @@ -0,0 +1,13 @@ +#pragma once + +#include + +// Creates Arrow C Data Interface structures for CSR rel-table tests. +// info[0] (optional string): destination column name (default: "to") +// Returns a plain object: { nodeSchemaPtr, nodeArrayPtr, +// indicesSchemaPtr, indicesArrayPtr, +// indptrSchemaPtr, indptrArrayPtr } +// where each value is an N-API External wrapping a heap-allocated ArrowSchema/ArrowArray. +// Ownership is held by the External; it is transferred to Ladybug when the pointer is passed to +// createArrowRelTableSync. +Napi::Value CreateArrowCSRTestData(const Napi::CallbackInfo& info); diff --git a/src_cpp/main.cpp b/src_cpp/main.cpp index ed8f21a..b2efb14 100644 --- a/src_cpp/main.cpp +++ b/src_cpp/main.cpp @@ -1,3 +1,4 @@ +#include "include/node_arrow_test_utils.h" #include "include/node_connection.h" #include "include/node_database.h" #include "include/node_query_result.h" @@ -8,6 +9,7 @@ Napi::Object InitAll(Napi::Env env, Napi::Object exports) { NodeDatabase::Init(env, exports); NodePreparedStatement::Init(env, exports); NodeQueryResult::Init(env, exports); + exports.Set("createArrowCSRTestData", Napi::Function::New(env, CreateArrowCSRTestData)); return exports; } diff --git a/src_cpp/node_arrow_test_utils.cpp b/src_cpp/node_arrow_test_utils.cpp new file mode 100644 index 0000000..f49d551 --- /dev/null +++ b/src_cpp/node_arrow_test_utils.cpp @@ -0,0 +1,194 @@ +#include "include/node_arrow_test_utils.h" + +#include +#include +#include +#include +#include + +#include "common/arrow/arrow.h" +#include + +namespace { + +// Fill a leaf (non-struct) ArrowSchema in place. +static void fillLeafSchema(ArrowSchema* s, const char* format, const char* name) { + s->format = format; + s->name = name; + s->metadata = nullptr; + s->flags = ARROW_FLAG_NULLABLE; + s->n_children = 0; + s->children = nullptr; + s->dictionary = nullptr; + s->release = [](ArrowSchema* s) { s->release = nullptr; }; + s->private_data = nullptr; +} + +// Fill a struct ArrowSchema in place. `children` must already be malloc'd (n_fields pointers). +static void fillStructSchema(ArrowSchema* s, ArrowSchema** children, int64_t nFields) { + s->format = "+s"; + s->name = nullptr; + s->metadata = nullptr; + s->flags = 0; + s->n_children = nFields; + s->children = children; + s->dictionary = nullptr; + s->private_data = nullptr; + s->release = [](ArrowSchema* s) { + if (s->children) { + for (int64_t i = 0; i < s->n_children; ++i) { + if (s->children[i] && s->children[i]->release) { + s->children[i]->release(s->children[i]); + } + free(s->children[i]); + } + free(s->children); + } + s->release = nullptr; + }; +} + +// Allocate a leaf ArrowArray on the heap and fill it with typed data. +template +static ArrowArray* makeLeafArray(const std::vector& data) { + auto* a = static_cast(malloc(sizeof(ArrowArray))); + auto* buf = malloc(data.size() * sizeof(T)); + memcpy(buf, data.data(), data.size() * sizeof(T)); + a->length = static_cast(data.size()); + a->null_count = 0; + a->offset = 0; + a->n_buffers = 2; + a->n_children = 0; + a->buffers = static_cast(malloc(sizeof(void*) * 2)); + a->buffers[0] = nullptr; + a->buffers[1] = buf; + a->children = nullptr; + a->dictionary = nullptr; + a->private_data = nullptr; + a->release = [](ArrowArray* a) { + if (a->buffers) { + free(const_cast(a->buffers[1])); + free(const_cast(a->buffers)); + } + a->release = nullptr; + }; + return a; +} + +// Allocate a struct ArrowArray on the heap with pre-built children. +static ArrowArray* makeStructArray(int64_t length, ArrowArray** children, int64_t nChildren) { + auto* a = static_cast(malloc(sizeof(ArrowArray))); + a->length = length; + a->null_count = 0; + a->offset = 0; + a->n_buffers = 1; + a->n_children = nChildren; + a->buffers = static_cast(malloc(sizeof(void*))); + a->buffers[0] = nullptr; + a->children = children; + a->dictionary = nullptr; + a->private_data = nullptr; + a->release = [](ArrowArray* a) { + if (a->children) { + for (int64_t i = 0; i < a->n_children; ++i) { + if (a->children[i] && a->children[i]->release) { + a->children[i]->release(a->children[i]); + } + free(a->children[i]); + } + free(a->children); + } + if (a->buffers) { + free(const_cast(a->buffers)); + } + a->release = nullptr; + }; + return a; +} + +// Wrap a heap-allocated ArrowSchema (new'd) in an N-API External. +// The finalizer calls release (if set) then deletes the struct. +static Napi::External toSchemaExternal(Napi::Env env, ArrowSchema* s) { + return Napi::External::New(env, s, [](Napi::Env, ArrowSchema* s) { + if (s->release) { + s->release(s); + } + delete s; + }); +} + +// Wrap a heap-allocated ArrowArray (malloc'd) in an N-API External. +// The finalizer calls release (if set) then frees the struct. +static Napi::External toArrayExternal(Napi::Env env, ArrowArray* a) { + return Napi::External::New(env, a, [](Napi::Env, ArrowArray* a) { + if (a->release) { + a->release(a); + } + free(a); + }); +} + +} // namespace + +Napi::Value CreateArrowCSRTestData(const Napi::CallbackInfo& info) { + Napi::Env env = info.Env(); + + std::string dstColName = "to"; + if (info.Length() > 0 && info[0].IsString()) { + dstColName = info[0].As().Utf8Value(); + } + + // ----- Node table: struct { id: int64 } with 3 rows [0, 1, 2] ----- + auto* nodeSchema = new ArrowSchema; + auto** nc = static_cast(malloc(sizeof(ArrowSchema*))); + nc[0] = static_cast(malloc(sizeof(ArrowSchema))); + fillLeafSchema(nc[0], "l", "id"); + fillStructSchema(nodeSchema, nc, 1); + + auto** na = static_cast(malloc(sizeof(ArrowArray*))); + na[0] = makeLeafArray({0, 1, 2}); + auto* nodeArray = makeStructArray(3, na, 1); + + // ----- CSR indices: struct { : uint64, weight: int64 } ----- + // Edges: person0→person1 (w=10), person0→person2 (w=20), person1→person2 (w=30) + auto* indicesSchema = new ArrowSchema; + auto** ic = static_cast(malloc(sizeof(ArrowSchema*) * 2)); + ic[0] = static_cast(malloc(sizeof(ArrowSchema))); + ic[1] = static_cast(malloc(sizeof(ArrowSchema))); + // dstColName is dynamic — strdup it and store in private_data so the child release frees it. + char* nameBuf = strdup(dstColName.c_str()); + fillLeafSchema(ic[0], "L", nameBuf); + ic[0]->private_data = nameBuf; + ic[0]->release = [](ArrowSchema* s) { + free(s->private_data); + s->release = nullptr; + }; + fillLeafSchema(ic[1], "l", "weight"); + fillStructSchema(indicesSchema, ic, 2); + + auto** ia = static_cast(malloc(sizeof(ArrowArray*) * 2)); + ia[0] = makeLeafArray({1, 2, 2}); + ia[1] = makeLeafArray({10, 20, 30}); + auto* indicesArray = makeStructArray(3, ia, 2); + + // ----- CSR indptr: struct { v: uint64 } ----- + // Offsets: person0 → edges[0,2), person1 → edges[2,3), person2 → edges[3,3) + auto* indptrSchema = new ArrowSchema; + auto** pc = static_cast(malloc(sizeof(ArrowSchema*))); + pc[0] = static_cast(malloc(sizeof(ArrowSchema))); + fillLeafSchema(pc[0], "L", "v"); + fillStructSchema(indptrSchema, pc, 1); + + auto** pa = static_cast(malloc(sizeof(ArrowArray*))); + pa[0] = makeLeafArray({0, 2, 3, 3}); + auto* indptrArray = makeStructArray(4, pa, 1); + + auto result = Napi::Object::New(env); + result.Set("nodeSchemaPtr", toSchemaExternal(env, nodeSchema)); + result.Set("nodeArrayPtr", toArrayExternal(env, nodeArray)); + result.Set("indicesSchemaPtr", toSchemaExternal(env, indicesSchema)); + result.Set("indicesArrayPtr", toArrayExternal(env, indicesArray)); + result.Set("indptrSchemaPtr", toSchemaExternal(env, indptrSchema)); + result.Set("indptrArrayPtr", toArrayExternal(env, indptrArray)); + return result; +} diff --git a/src_cpp/node_connection.cpp b/src_cpp/node_connection.cpp index 9f8ed92..d8c7698 100644 --- a/src_cpp/node_connection.cpp +++ b/src_cpp/node_connection.cpp @@ -46,8 +46,8 @@ std::vector TakeArrowArrays(const Napi::Value& value, uint64_ auto arrayPointers = value.As(); wrappers.reserve(arrayPointers.Length()); for (auto i = 0u; i < arrayPointers.Length(); ++i) { - wrappers.push_back(TakeArrowArray( - GetPointerArgument(arrayPointers.Get(i), "ArrowArray"))); + wrappers.push_back( + TakeArrowArray(GetPointerArgument(arrayPointers.Get(i), "ArrowArray"))); } return wrappers; } @@ -69,7 +69,8 @@ void AdoptArrowQueryResult(Napi::Env env, NodeQueryResult* nodeQueryResult, Napi::Error::New(env, result->getErrorMessage()).ThrowAsJavaScriptException(); return; } - nodeQueryResult->AdoptQueryResult(std::move(result), std::move(connection), std::move(database)); + nodeQueryResult->AdoptQueryResult(std::move(result), std::move(connection), + std::move(database)); } } // namespace @@ -221,9 +222,8 @@ Napi::Value NodeConnection::QueryAsync(const Napi::CallbackInfo& info) { auto statement = info[0].As().Utf8Value(); auto nodeQueryResult = Napi::ObjectWrap::Unwrap(info[1].As()); auto callback = info[2].As(); - auto asyncWorker = - new ConnectionQueryAsyncWorker( - callback, connection, database, statement, nodeQueryResult, info[3]); + auto asyncWorker = new ConnectionQueryAsyncWorker(callback, connection, database, statement, + nodeQueryResult, info[3]); asyncWorker->Queue(); return info.Env().Undefined(); } @@ -235,8 +235,8 @@ Napi::Value NodeConnection::QueryArrowAsync(const Napi::CallbackInfo& info) { auto chunkSize = info[1].As().Int64Value(); auto nodeQueryResult = Napi::ObjectWrap::Unwrap(info[2].As()); auto callback = info[3].As(); - auto asyncWorker = new ConnectionQueryArrowAsyncWorker( - callback, connection, database, statement, chunkSize, nodeQueryResult); + auto asyncWorker = new ConnectionQueryArrowAsyncWorker(callback, connection, database, + statement, chunkSize, nodeQueryResult); asyncWorker->Queue(); return info.Env().Undefined(); } @@ -269,8 +269,8 @@ Napi::Value NodeConnection::CreateArrowTableSync(const Napi::CallbackInfo& info) try { auto result = lbug::ArrowTableSupport::createViewFromArrowTable(*connection, tableName, TakeArrowSchema(schema), TakeArrowArrays(info[2], numArrays)); - AdoptArrowQueryResult( - env, nodeQueryResult, std::move(result.queryResult), connection, database); + AdoptArrowQueryResult(env, nodeQueryResult, std::move(result.queryResult), connection, + database); } catch (const std::exception& exc) { Napi::Error::New(env, std::string(exc.what())).ThrowAsJavaScriptException(); } @@ -283,14 +283,38 @@ Napi::Value NodeConnection::CreateArrowRelTableSync(const Napi::CallbackInfo& in auto tableName = info[0].As().Utf8Value(); auto srcTableName = info[1].As().Utf8Value(); auto dstTableName = info[2].As().Utf8Value(); - auto* schema = GetPointerArgument(info[3], "schema"); - auto numArrays = info[5].As().Int64Value(); - auto nodeQueryResult = Napi::ObjectWrap::Unwrap(info[6].As()); try { - auto result = lbug::ArrowTableSupport::createRelTableFromArrowTable(*connection, tableName, - srcTableName, dstTableName, TakeArrowSchema(schema), TakeArrowArrays(info[4], numArrays)); - AdoptArrowQueryResult( - env, nodeQueryResult, std::move(result.queryResult), connection, database); + if (info.Length() == 11) { + // CSR mode: info[3]=indicesSchemaPtr, info[4]=indicesArraysPtr, + // info[5]=numIndicesArrays, + // info[6]=indptrSchemaPtr, info[7]=indptrArraysPtr, info[8]=numIndptrArrays, + // info[9]=dstColName, info[10]=nodeQueryResult + auto* indicesSchema = GetPointerArgument(info[3], "indicesSchema"); + auto numIndicesArrays = info[5].As().Uint32Value(); + auto* indptrSchema = GetPointerArgument(info[6], "indptrSchema"); + auto numIndptrArrays = info[8].As().Uint32Value(); + auto dstColName = info[9].As().Utf8Value(); + auto nodeQueryResult = + Napi::ObjectWrap::Unwrap(info[10].As()); + auto result = lbug::ArrowTableSupport::createRelTableFromArrowCSR(*connection, + tableName, srcTableName, dstTableName, TakeArrowSchema(indicesSchema), + TakeArrowArrays(info[4], numIndicesArrays), TakeArrowSchema(indptrSchema), + TakeArrowArrays(info[7], numIndptrArrays), dstColName); + AdoptArrowQueryResult(env, nodeQueryResult, std::move(result.queryResult), connection, + database); + } else { + // Flat mode: info[3]=schemaPtr, info[4]=arraysPtr, info[5]=numArrays, + // info[6]=nodeQueryResult + auto* schema = GetPointerArgument(info[3], "schema"); + auto numArrays = info[5].As().Uint32Value(); + auto nodeQueryResult = + Napi::ObjectWrap::Unwrap(info[6].As()); + auto result = lbug::ArrowTableSupport::createRelTableFromArrowTable(*connection, + tableName, srcTableName, dstTableName, TakeArrowSchema(schema), + TakeArrowArrays(info[4], numArrays)); + AdoptArrowQueryResult(env, nodeQueryResult, std::move(result.queryResult), connection, + database); + } } catch (const std::exception& exc) { Napi::Error::New(env, std::string(exc.what())).ThrowAsJavaScriptException(); } diff --git a/src_js/connection.js b/src_js/connection.js index ccbce2b..f83712d 100644 --- a/src_js/connection.js +++ b/src_js/connection.js @@ -398,10 +398,22 @@ class Connection { /** * Create an Arrow memory-backed relationship table from Arrow C Data Interface pointers. - * The Arrow table must contain endpoint columns named "from" and "to". + * For flat layout (default): the Arrow table must contain endpoint columns named "from" and "to". + * For CSR layout: pass indptrSchemaPtr and indptrArraysPtr; dstColName names the destination column. * Ownership of the schema and arrays is transferred to Ladybug. */ - createArrowRelTableSync(tableName, srcTableName, dstTableName, schemaPtr, arraysPtr, numArrays = 1) { + createArrowRelTableSync( + tableName, + srcTableName, + dstTableName, + schemaPtr, + arraysPtr, + numArrays = 1, + indptrSchemaPtr = null, + indptrArraysPtr = null, + numIndptrArrays = 1, + dstColName = "to" + ) { if (typeof tableName !== "string") { throw new Error("tableName must be a string."); } @@ -413,15 +425,34 @@ class Connection { } const connection = this._getConnectionSync(); const nodeQueryResult = new LbugNative.NodeQueryResult(); - connection.createArrowRelTableSync( - tableName, - srcTableName, - dstTableName, - schemaPtr, - arraysPtr, - numArrays, - nodeQueryResult - ); + if (indptrSchemaPtr != null) { + if (typeof dstColName !== "string") { + throw new Error("dstColName must be a string."); + } + connection.createArrowRelTableSync( + tableName, + srcTableName, + dstTableName, + schemaPtr, + arraysPtr, + numArrays, + indptrSchemaPtr, + indptrArraysPtr, + numIndptrArrays, + dstColName, + nodeQueryResult + ); + } else { + connection.createArrowRelTableSync( + tableName, + srcTableName, + dstTableName, + schemaPtr, + arraysPtr, + numArrays, + nodeQueryResult + ); + } return new QueryResult(this, nodeQueryResult); } diff --git a/src_js/lbug.d.ts b/src_js/lbug.d.ts index 88760b7..ddd1d52 100644 --- a/src_js/lbug.d.ts +++ b/src_js/lbug.d.ts @@ -325,7 +325,8 @@ export class Connection { /** * Create an Arrow memory-backed relationship table from Arrow C Data Interface pointers. - * The Arrow table must contain endpoint columns named "from" and "to". + * For flat layout (default): the Arrow table must contain endpoint columns named "from" and "to". + * For CSR layout: pass indptrSchemaPtr and indptrArraysPtr; dstColName names the destination column. * Ownership of schemaPtr and arraysPtr is transferred to Ladybug. */ createArrowRelTableSync( @@ -334,8 +335,11 @@ export class Connection { dstTableName: string, schemaPtr: NativePointer, arraysPtr: NativePointer | NativePointer[], - numArrays?: number - ): QueryResult; + numArrays?: number, + indptrSchemaPtr?: NativePointer | null, + indptrArraysPtr?: NativePointer | NativePointer[] | null, + numIndptrArrays?: number, + dstColName?: string): QueryResult; /** * Drop an Arrow memory-backed table and unregister its Arrow data. diff --git a/test/test.js b/test/test.js index a107a55..4cc99d2 100644 --- a/test/test.js +++ b/test/test.js @@ -14,6 +14,7 @@ describe("lbug", () => { importTest("Connection", "./test_connection.js"); importTest("Query result", "./test_query_result.js"); importTest("Arrow query", "./test_arrow_query.js"); + importTest("Arrow memory-backed table", "./test_arrow_memory_backed_table.js"); importTest("Data types", "./test_data_type.js"); importTest("Query parameters", "./test_parameter.js"); importTest("Concurrent query execution", "./test_concurrency.js"); diff --git a/test/test_arrow_memory_backed_table.js b/test/test_arrow_memory_backed_table.js new file mode 100644 index 0000000..ba898a2 --- /dev/null +++ b/test/test_arrow_memory_backed_table.js @@ -0,0 +1,103 @@ +const { assert } = require("chai"); +const path = require("path"); +const os = require("os"); +const fs = require("fs"); + +// Access the native module directly for test helpers +const LbugNative = require("../build/lbug_native.js"); + +let arrowDb, arrowConn; + +before(function () { + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "lbug-arrow-test-")); + const dbPath = path.join(tmpDir, "arrow_test.lbdb"); + arrowDb = new lbug.Database(dbPath, 1 << 27); + arrowConn = new lbug.Connection(arrowDb, 2); +}); + +describe("createArrowRelTableSync CSR", function () { + it("should throw when dstColName is not a string in CSR mode", function () { + const { nodeSchemaPtr, nodeArrayPtr, indicesSchemaPtr, indicesArrayPtr, + indptrSchemaPtr, indptrArrayPtr } = LbugNative.createArrowCSRTestData("to"); + + arrowConn.createArrowTableSync("person_err", nodeSchemaPtr, nodeArrayPtr, 1); + + assert.throws(() => { + arrowConn.createArrowRelTableSync( + "knows_err", "person_err", "person_err", + indicesSchemaPtr, indicesArrayPtr, 1, + indptrSchemaPtr, indptrArrayPtr, 1, + 42 /* not a string */ + ); + }, /dstColName must be a string/); + }); + + it("should create a CSR rel table with the default dstColName", function () { + const { nodeSchemaPtr, nodeArrayPtr, indicesSchemaPtr, indicesArrayPtr, + indptrSchemaPtr, indptrArrayPtr } = LbugNative.createArrowCSRTestData("to"); + + arrowConn.createArrowTableSync("person_default", nodeSchemaPtr, nodeArrayPtr, 1); + + // Use default dstColName "to" (omit last optional param) + arrowConn.createArrowRelTableSync( + "knows_default", "person_default", "person_default", + indicesSchemaPtr, indicesArrayPtr, 1, + indptrSchemaPtr, indptrArrayPtr, 1 + ); + + // Verify edges: person0→person1 (w=10), person0→person2 (w=20), person1→person2 (w=30) + const rows = arrowConn.querySync( + "MATCH (a:person_default)-[r:knows_default]->(b:person_default) " + + "RETURN a.id, r.weight, b.id ORDER BY a.id, b.id" + ).getAllSync(); + + assert.deepEqual( + rows.map(r => [Number(r["a.id"]), Number(r["r.weight"]), Number(r["b.id"])]), + [[0, 10, 1], [0, 20, 2], [1, 30, 2]] + ); + }); + + it("should create a CSR rel table with a custom dstColName", function () { + const dstColName = "destination"; + const { nodeSchemaPtr, nodeArrayPtr, indicesSchemaPtr, indicesArrayPtr, + indptrSchemaPtr, indptrArrayPtr } = LbugNative.createArrowCSRTestData(dstColName); + + arrowConn.createArrowTableSync("person_custom", nodeSchemaPtr, nodeArrayPtr, 1); + + arrowConn.createArrowRelTableSync( + "knows_custom", "person_custom", "person_custom", + indicesSchemaPtr, indicesArrayPtr, 1, + indptrSchemaPtr, indptrArrayPtr, 1, + dstColName + ); + + const rows = arrowConn.querySync( + "MATCH (a:person_custom)-[r:knows_custom]->(b:person_custom) " + + "RETURN a.id, r.weight, b.id ORDER BY a.id, b.id" + ).getAllSync(); + + assert.deepEqual( + rows.map(r => [Number(r["a.id"]), Number(r["r.weight"]), Number(r["b.id"])]), + [[0, 10, 1], [0, 20, 2], [1, 30, 2]] + ); + }); + + it("should fall through to flat mode when no CSR params are passed", function () { + // Fresh data since each call to createArrowTableSync transfers ownership + const d = LbugNative.createArrowCSRTestData("to"); + + arrowConn.createArrowTableSync("person_flat_test", d.nodeSchemaPtr, d.nodeArrayPtr, 1); + + // Flat path requires "from"/"to" columns in the Arrow batch. + // Passing a batch with only "id" should trigger a Ladybug error (confirming flat path is used). + const d2 = LbugNative.createArrowCSRTestData("to"); + assert.throws(() => { + arrowConn.createArrowRelTableSync( + "knows_flat_test", "person_flat_test", "person_flat_test", + d2.nodeSchemaPtr, d2.nodeArrayPtr, 1 + // no CSR params → flat path + ); + }); + }); +}); + From 9e342f4151ea2f9da10584d65113ce22baa17277 Mon Sep 17 00:00:00 2001 From: Ally Heev Date: Thu, 28 May 2026 21:54:30 +0530 Subject: [PATCH 2/2] fix tests --- .github/workflows/ci.yml | 3 ++ CMakeLists.txt | 15 ++++++++- src_cpp/main.cpp | 7 +++- src_cpp/node_connection.cpp | 27 +++++++++++++--- src_js/connection.js | 6 ++++ test/test_arrow_memory_backed_table.js | 44 ++++++++++++++++++++------ 6 files changed, 86 insertions(+), 16 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 739cf9d..5e62f39 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -109,6 +109,8 @@ jobs: - name: Build Node.js native module (Windows) if: ${{ matrix.platform == 'win32' }} shell: cmd + env: + EXTRA_CMAKE_FLAGS: -DLBUG_NODEJS_ENABLE_TEST_EXPORTS=ON run: | powershell.exe -Command "Add-MpPreference -ExclusionPath '${{ github.workspace }}'" call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat" @@ -124,6 +126,7 @@ jobs: CMAKE_OSX_ARCHITECTURES: ${{ matrix.mac_env && matrix.arch || '' }} CMAKE_C_COMPILER_LAUNCHER: ccache CMAKE_CXX_COMPILER_LAUNCHER: ccache + EXTRA_CMAKE_FLAGS: -DLBUG_NODEJS_ENABLE_TEST_EXPORTS=ON - name: Run Node.js tests working-directory: tools/nodejs_api diff --git a/CMakeLists.txt b/CMakeLists.txt index a185fdc..aa1bb5a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -104,7 +104,16 @@ get_filename_component(NODE_ADDON_API_INCLUDE_PATH ./node_modules/node-addon-api include_directories(${CMAKE_JS_INC}) include_directories(${NODE_ADDON_API_INCLUDE_PATH}) -file(GLOB CPP_SOURCE_FILES ./src_cpp/*) +option(LBUG_NODEJS_ENABLE_TEST_EXPORTS + "Export test-only native helpers (e.g. createArrowCSRTestData). Enable for dev/test builds only; never ship in production." + OFF) + +file(GLOB CPP_SOURCE_FILES ./src_cpp/*.cpp) + +if(NOT LBUG_NODEJS_ENABLE_TEST_EXPORTS) + list(REMOVE_ITEM CPP_SOURCE_FILES "${CMAKE_CURRENT_SOURCE_DIR}/src_cpp/node_arrow_test_utils.cpp") +endif() + file(GLOB JS_SOURCE_FILES ./src_js/*.js ./src_js/*.mjs @@ -178,3 +187,7 @@ target_link_libraries(lbugjs PRIVATE lbug ${NODEJS_LBUG_LINK_DEPS} ${CMAKE_JS_LI if(LBUG_NODEJS_EXTRA_LINK_LIBS) target_link_libraries(lbugjs PRIVATE ${LBUG_NODEJS_EXTRA_LINK_LIBS}) endif() +if(LBUG_NODEJS_ENABLE_TEST_EXPORTS) + target_compile_definitions(lbugjs PRIVATE LBUG_ENABLE_TEST_EXPORTS) + message(STATUS "lbugjs: test-only exports enabled (LBUG_NODEJS_ENABLE_TEST_EXPORTS=ON)") +endif() diff --git a/src_cpp/main.cpp b/src_cpp/main.cpp index b2efb14..135db52 100644 --- a/src_cpp/main.cpp +++ b/src_cpp/main.cpp @@ -1,15 +1,20 @@ -#include "include/node_arrow_test_utils.h" #include "include/node_connection.h" #include "include/node_database.h" #include "include/node_query_result.h" #include +#ifdef LBUG_ENABLE_TEST_EXPORTS +#include "include/node_arrow_test_utils.h" +#endif + Napi::Object InitAll(Napi::Env env, Napi::Object exports) { NodeConnection::Init(env, exports); NodeDatabase::Init(env, exports); NodePreparedStatement::Init(env, exports); NodeQueryResult::Init(env, exports); +#ifdef LBUG_ENABLE_TEST_EXPORTS exports.Set("createArrowCSRTestData", Napi::Function::New(env, CreateArrowCSRTestData)); +#endif return exports; } diff --git a/src_cpp/node_connection.cpp b/src_cpp/node_connection.cpp index d8c7698..45f381e 100644 --- a/src_cpp/node_connection.cpp +++ b/src_cpp/node_connection.cpp @@ -10,6 +10,25 @@ namespace { +// Maximum number of Arrow arrays accepted in a single batch call. +// Guards against integer-wrap DoS when a raw pointer + count is used. +static constexpr int64_t kMaxArrowBatchCount = 65536; + +// Reads a batch-count argument, throwing if it is not a number, not a positive +// integer, or exceeds kMaxArrowBatchCount. +static uint64_t ValidateArrayCount(const Napi::Value& value, const char* name) { + if (!value.IsNumber()) { + throw std::runtime_error(std::string(name) + " must be a number."); + } + auto count = value.As().Int64Value(); + if (count <= 0 || count > kMaxArrowBatchCount) { + throw std::runtime_error(std::string(name) + + " must be a positive integer no greater than " + + std::to_string(kMaxArrowBatchCount) + "."); + } + return static_cast(count); +} + template T* GetPointerArgument(const Napi::Value& value, const char* name) { if (value.IsBigInt()) { @@ -264,7 +283,7 @@ Napi::Value NodeConnection::CreateArrowTableSync(const Napi::CallbackInfo& info) Napi::HandleScope scope(env); auto tableName = info[0].As().Utf8Value(); auto* schema = GetPointerArgument(info[1], "schema"); - auto numArrays = info[3].As().Int64Value(); + auto numArrays = ValidateArrayCount(info[3], "numArrays"); auto nodeQueryResult = Napi::ObjectWrap::Unwrap(info[4].As()); try { auto result = lbug::ArrowTableSupport::createViewFromArrowTable(*connection, tableName, @@ -290,9 +309,9 @@ Napi::Value NodeConnection::CreateArrowRelTableSync(const Napi::CallbackInfo& in // info[6]=indptrSchemaPtr, info[7]=indptrArraysPtr, info[8]=numIndptrArrays, // info[9]=dstColName, info[10]=nodeQueryResult auto* indicesSchema = GetPointerArgument(info[3], "indicesSchema"); - auto numIndicesArrays = info[5].As().Uint32Value(); + auto numIndicesArrays = ValidateArrayCount(info[5], "numIndicesArrays"); auto* indptrSchema = GetPointerArgument(info[6], "indptrSchema"); - auto numIndptrArrays = info[8].As().Uint32Value(); + auto numIndptrArrays = ValidateArrayCount(info[8], "numIndptrArrays"); auto dstColName = info[9].As().Utf8Value(); auto nodeQueryResult = Napi::ObjectWrap::Unwrap(info[10].As()); @@ -306,7 +325,7 @@ Napi::Value NodeConnection::CreateArrowRelTableSync(const Napi::CallbackInfo& in // Flat mode: info[3]=schemaPtr, info[4]=arraysPtr, info[5]=numArrays, // info[6]=nodeQueryResult auto* schema = GetPointerArgument(info[3], "schema"); - auto numArrays = info[5].As().Uint32Value(); + auto numArrays = ValidateArrayCount(info[5], "numArrays"); auto nodeQueryResult = Napi::ObjectWrap::Unwrap(info[6].As()); auto result = lbug::ArrowTableSupport::createRelTableFromArrowTable(*connection, diff --git a/src_js/connection.js b/src_js/connection.js index f83712d..61f6ef4 100644 --- a/src_js/connection.js +++ b/src_js/connection.js @@ -426,6 +426,12 @@ class Connection { const connection = this._getConnectionSync(); const nodeQueryResult = new LbugNative.NodeQueryResult(); if (indptrSchemaPtr != null) { + if (indptrArraysPtr == null) { + throw new Error("indptrArraysPtr must be provided in CSR mode."); + } + if (!Array.isArray(indptrArraysPtr) && (!Number.isSafeInteger(numIndptrArrays) || numIndptrArrays <= 0)) { + throw new Error("numIndptrArrays must be a positive integer."); + } if (typeof dstColName !== "string") { throw new Error("dstColName must be a string."); } diff --git a/test/test_arrow_memory_backed_table.js b/test/test_arrow_memory_backed_table.js index ba898a2..f1e0f8b 100644 --- a/test/test_arrow_memory_backed_table.js +++ b/test/test_arrow_memory_backed_table.js @@ -6,6 +6,10 @@ const fs = require("fs"); // Access the native module directly for test helpers const LbugNative = require("../build/lbug_native.js"); +// createArrowCSRTestData is only compiled in when the addon is built with +// LBUG_NODEJS_ENABLE_TEST_EXPORTS=ON. Skip the whole suite otherwise. +const hasTestHelpers = typeof LbugNative.createArrowCSRTestData === "function"; + let arrowDb, arrowConn; before(function () { @@ -15,12 +19,23 @@ before(function () { arrowConn = new lbug.Connection(arrowDb, 2); }); +after(function () { + if (arrowConn) arrowConn.closeSync(); + if (arrowDb) arrowDb.closeSync(); +}); + describe("createArrowRelTableSync CSR", function () { + before(function () { + if (!hasTestHelpers) { + this.skip(); + } + }); it("should throw when dstColName is not a string in CSR mode", function () { const { nodeSchemaPtr, nodeArrayPtr, indicesSchemaPtr, indicesArrayPtr, indptrSchemaPtr, indptrArrayPtr } = LbugNative.createArrowCSRTestData("to"); - arrowConn.createArrowTableSync("person_err", nodeSchemaPtr, nodeArrayPtr, 1); + const createPersonResult = arrowConn.createArrowTableSync("person_err", nodeSchemaPtr, nodeArrayPtr, 1); + createPersonResult.close(); assert.throws(() => { arrowConn.createArrowRelTableSync( @@ -36,20 +51,24 @@ describe("createArrowRelTableSync CSR", function () { const { nodeSchemaPtr, nodeArrayPtr, indicesSchemaPtr, indicesArrayPtr, indptrSchemaPtr, indptrArrayPtr } = LbugNative.createArrowCSRTestData("to"); - arrowConn.createArrowTableSync("person_default", nodeSchemaPtr, nodeArrayPtr, 1); + const createPersonResult = arrowConn.createArrowTableSync("person_default", nodeSchemaPtr, nodeArrayPtr, 1); + createPersonResult.close(); // Use default dstColName "to" (omit last optional param) - arrowConn.createArrowRelTableSync( + const createRelResult = arrowConn.createArrowRelTableSync( "knows_default", "person_default", "person_default", indicesSchemaPtr, indicesArrayPtr, 1, indptrSchemaPtr, indptrArrayPtr, 1 ); + createRelResult.close(); // Verify edges: person0→person1 (w=10), person0→person2 (w=20), person1→person2 (w=30) - const rows = arrowConn.querySync( + const qr = arrowConn.querySync( "MATCH (a:person_default)-[r:knows_default]->(b:person_default) " + "RETURN a.id, r.weight, b.id ORDER BY a.id, b.id" - ).getAllSync(); + ); + const rows = qr.getAllSync(); + qr.close(); assert.deepEqual( rows.map(r => [Number(r["a.id"]), Number(r["r.weight"]), Number(r["b.id"])]), @@ -62,19 +81,23 @@ describe("createArrowRelTableSync CSR", function () { const { nodeSchemaPtr, nodeArrayPtr, indicesSchemaPtr, indicesArrayPtr, indptrSchemaPtr, indptrArrayPtr } = LbugNative.createArrowCSRTestData(dstColName); - arrowConn.createArrowTableSync("person_custom", nodeSchemaPtr, nodeArrayPtr, 1); + const createPersonResult = arrowConn.createArrowTableSync("person_custom", nodeSchemaPtr, nodeArrayPtr, 1); + createPersonResult.close(); - arrowConn.createArrowRelTableSync( + const createRelResult = arrowConn.createArrowRelTableSync( "knows_custom", "person_custom", "person_custom", indicesSchemaPtr, indicesArrayPtr, 1, indptrSchemaPtr, indptrArrayPtr, 1, dstColName ); + createRelResult.close(); - const rows = arrowConn.querySync( + const qr = arrowConn.querySync( "MATCH (a:person_custom)-[r:knows_custom]->(b:person_custom) " + "RETURN a.id, r.weight, b.id ORDER BY a.id, b.id" - ).getAllSync(); + ); + const rows = qr.getAllSync(); + qr.close(); assert.deepEqual( rows.map(r => [Number(r["a.id"]), Number(r["r.weight"]), Number(r["b.id"])]), @@ -86,7 +109,8 @@ describe("createArrowRelTableSync CSR", function () { // Fresh data since each call to createArrowTableSync transfers ownership const d = LbugNative.createArrowCSRTestData("to"); - arrowConn.createArrowTableSync("person_flat_test", d.nodeSchemaPtr, d.nodeArrayPtr, 1); + const createPersonResult = arrowConn.createArrowTableSync("person_flat_test", d.nodeSchemaPtr, d.nodeArrayPtr, 1); + createPersonResult.close(); // Flat path requires "from"/"to" columns in the Arrow batch. // Passing a batch with only "id" should trigger a Ladybug error (confirming flat path is used).