From b0adfc8cd3769a425784ee55094fcad2c911d23b Mon Sep 17 00:00:00 2001 From: 761417898 <761417898@qq.com> Date: Thu, 11 Jun 2026 17:50:05 +0800 Subject: [PATCH 1/6] Fix C++ TsFile writer flush offsets for multi-GB files. Sync file_base_offset after flush, reset the write stream and consumer, and add tree/table 4GB regression tests. --- cpp/src/common/allocator/byte_stream.h | 4 + cpp/src/file/tsfile_io_writer.cc | 6 +- cpp/src/reader/aligned_chunk_reader.cc | 2 +- cpp/test/CMakeLists.txt | 1 + cpp/test/reader/large_file_test_common.h | 63 ++++++++ .../table_view/large_file_table_test.cc | 148 ++++++++++++++++++ .../reader/tree_view/large_file_tree_test.cc | 129 +++++++++++++++ cpp/test/reader/tsfile_reader_test.cc | 62 -------- cpp/test/writer/tsfile_writer_test.cc | 51 ++++-- 9 files changed, 390 insertions(+), 76 deletions(-) create mode 100644 cpp/test/reader/large_file_test_common.h create mode 100644 cpp/test/reader/table_view/large_file_table_test.cc create mode 100644 cpp/test/reader/tree_view/large_file_tree_test.cc diff --git a/cpp/src/common/allocator/byte_stream.h b/cpp/src/common/allocator/byte_stream.h index 36db0e8d9..435a1f6fd 100644 --- a/cpp/src/common/allocator/byte_stream.h +++ b/cpp/src/common/allocator/byte_stream.h @@ -543,6 +543,10 @@ class ByteStream { Consumer(const ByteStream& bs) : host_(bs) { ASSERT(bs.head_.enable_atomic()); + reset(); + } + + void reset() { cur_ = nullptr; read_offset_within_cur_page_ = 0; total_end_offset_ = 0; diff --git a/cpp/src/file/tsfile_io_writer.cc b/cpp/src/file/tsfile_io_writer.cc index 21086da61..42d99feda 100644 --- a/cpp/src/file/tsfile_io_writer.cc +++ b/cpp/src/file/tsfile_io_writer.cc @@ -891,7 +891,11 @@ int TsFileIOWriter::flush_stream_to_file() { } } - write_stream_.purge_prev_pages(); + if (IS_SUCC(ret)) { + file_base_offset_ = file_->get_position(); + write_stream_.reset(); + write_stream_consumer_.reset(); + } return ret; } diff --git a/cpp/src/reader/aligned_chunk_reader.cc b/cpp/src/reader/aligned_chunk_reader.cc index d79bc7811..49c469547 100644 --- a/cpp/src/reader/aligned_chunk_reader.cc +++ b/cpp/src/reader/aligned_chunk_reader.cc @@ -319,7 +319,7 @@ int AlignedChunkReader::read_from_file_and_rewrap( int ret = E_OK; const int DEFAULT_READ_SIZE = 4096; // may use page_size + page_header_size char* file_data_buf = in_stream_.get_wrapped_buf(); - int offset = chunk_meta->offset_of_chunk_header_ + chunk_visit_offset; + int64_t offset = chunk_meta->offset_of_chunk_header_ + chunk_visit_offset; int read_size = (want_size < DEFAULT_READ_SIZE ? DEFAULT_READ_SIZE : want_size); if (file_data_buf_size < read_size || diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index 4d325635f..513cbd5ca 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -150,6 +150,7 @@ set(LIB_TSFILE_SDK_DIR ${PROJECT_BINARY_DIR}/lib) message("LIB_TSFILE_SDK_DIR: ${LIB_TSFILE_SDK_DIR}") include_directories( + ${CMAKE_CURRENT_SOURCE_DIR}/reader ${LIBRARY_INCLUDE_DIR} ${THIRD_PARTY_INCLUDE} ) diff --git a/cpp/test/reader/large_file_test_common.h b/cpp/test/reader/large_file_test_common.h new file mode 100644 index 000000000..60522e94d --- /dev/null +++ b/cpp/test/reader/large_file_test_common.h @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#ifndef TEST_LARGE_FILE_TEST_COMMON_H +#define TEST_LARGE_FILE_TEST_COMMON_H + +#include + +#include +#include +#include + +namespace large_file_test { + +constexpr int64_t kTargetFileSize = + static_cast(4) * 1024 * 1024 * 1024; +constexpr int64_t kMinAcceptableFileSize = + static_cast(3800) * 1024 * 1024; +constexpr int64_t kStartTime = 1622505600000LL; +constexpr uint32_t kTabletRows = 50000; +constexpr int64_t kFlushRows = 1000000; + +inline int64_t GetFileSize(const std::string& path) { + struct stat s; + if (stat(path.c_str(), &s) != 0) { + return -1; + } + return static_cast(s.st_size); +} + +inline std::string RandomSuffix(int length = 10) { + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<> dis(0, 61); + const std::string chars = + "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; + std::string out; + out.reserve(length); + for (int i = 0; i < length; ++i) { + out += chars[dis(gen)]; + } + return out; +} + +} // namespace large_file_test + +#endif // TEST_LARGE_FILE_TEST_COMMON_H diff --git a/cpp/test/reader/table_view/large_file_table_test.cc b/cpp/test/reader/table_view/large_file_table_test.cc new file mode 100644 index 000000000..0f1edd716 --- /dev/null +++ b/cpp/test/reader/table_view/large_file_table_test.cc @@ -0,0 +1,148 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include +#include +#include + +#include +#include + +#include "common/schema.h" +#include "common/tablet.h" +#include "file/write_file.h" +#include "large_file_test_common.h" +#include "reader/table_result_set.h" +#include "reader/tsfile_reader.h" +#include "writer/tsfile_table_writer.h" + +using namespace common; +using namespace storage; +using namespace large_file_test; + +namespace { + +constexpr char kTableName[] = "large_table"; +constexpr char kDeviceTag[] = "device"; +constexpr char kValueField[] = "value"; + +bool VerifyTableRecord(TsFileReader& reader, int64_t record_index) { + const int64_t timestamp = kStartTime + record_index * 1000; + ResultSet* tmp_result_set = nullptr; + int ret = reader.query(kTableName, {kValueField}, timestamp, timestamp + 1, + tmp_result_set); + if (ret != E_OK || tmp_result_set == nullptr) { + return false; + } + + auto* table_result_set = static_cast(tmp_result_set); + bool has_next = false; + ret = table_result_set->next(has_next); + if (ret != E_OK || !has_next) { + reader.destroy_query_data_set(table_result_set); + return false; + } + + const int64_t read_time = table_result_set->get_value(1); + const int64_t read_value = table_result_set->get_value(2); + reader.destroy_query_data_set(table_result_set); + return read_time == timestamp && read_value == record_index; +} + +TableSchema* CreateTableSchema() { + std::vector measurement_schemas; + measurement_schemas.push_back( + new MeasurementSchema(kDeviceTag, TSDataType::STRING, TSEncoding::PLAIN, + CompressionType::UNCOMPRESSED)); + measurement_schemas.push_back( + new MeasurementSchema(kValueField, TSDataType::INT64, TSEncoding::PLAIN, + CompressionType::UNCOMPRESSED)); + std::vector column_categories = {ColumnCategory::TAG, + ColumnCategory::FIELD}; + return new TableSchema(kTableName, measurement_schemas, column_categories); +} + +} // namespace + +class LargeFileTableTest : public ::testing::Test { + protected: + void SetUp() override { + libtsfile_init(); + file_name_ = "large_file_table_test_" + RandomSuffix() + ".tsfile"; + remove(file_name_.c_str()); + } + + void TearDown() override { + remove(file_name_.c_str()); + libtsfile_destroy(); + } + + std::string file_name_; +}; + +TEST_F(LargeFileTableTest, LargeFile4GB_TableWriteAndRead) { + std::unique_ptr table_schema(CreateTableSchema()); + + WriteFile write_file; + int flags = O_WRONLY | O_CREAT | O_TRUNC; +#ifdef _WIN32 + flags |= O_BINARY; +#endif + ASSERT_EQ(write_file.create(file_name_, flags, 0666), E_OK); + + TsFileTableWriter table_writer(&write_file, table_schema.get()); + const std::vector column_names = {kDeviceTag, kValueField}; + const std::vector data_types = {TSDataType::STRING, + TSDataType::INT64}; + + int64_t total_rows = 0; + while (GetFileSize(file_name_) < kTargetFileSize) { + Tablet tablet(column_names, data_types, kTabletRows); + tablet.set_table_name(kTableName); + for (uint32_t row = 0; row < kTabletRows; ++row) { + const int64_t record_index = total_rows + row; + tablet.add_timestamp(row, kStartTime + record_index * 1000); + tablet.add_value(row, kDeviceTag, "device0"); + tablet.add_value(row, kValueField, record_index); + } + ASSERT_EQ(table_writer.write_table(tablet), E_OK); + total_rows += kTabletRows; + if (total_rows % kFlushRows == 0) { + ASSERT_EQ(table_writer.flush(), E_OK); + } + } + + ASSERT_EQ(table_writer.flush(), E_OK); + ASSERT_EQ(table_writer.close(), E_OK); + ASSERT_EQ(write_file.close(), E_OK); + + const int64_t final_size = GetFileSize(file_name_); + ASSERT_GE(final_size, kMinAcceptableFileSize); + + TsFileReader reader; + ASSERT_EQ(reader.open(file_name_), E_OK); + + const std::vector check_indexes = {0, total_rows / 2, + total_rows - 1}; + for (int64_t index : check_indexes) { + ASSERT_TRUE(VerifyTableRecord(reader, index)) << "index=" << index; + } + + ASSERT_EQ(reader.close(), E_OK); +} diff --git a/cpp/test/reader/tree_view/large_file_tree_test.cc b/cpp/test/reader/tree_view/large_file_tree_test.cc new file mode 100644 index 000000000..1fb63159b --- /dev/null +++ b/cpp/test/reader/tree_view/large_file_tree_test.cc @@ -0,0 +1,129 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include +#include +#include + +#include +#include + +#include "common/schema.h" +#include "common/tablet.h" +#include "large_file_test_common.h" +#include "reader/qds_without_timegenerator.h" +#include "reader/tsfile_reader.h" +#include "writer/tsfile_writer.h" + +using namespace common; +using namespace storage; +using namespace large_file_test; + +namespace { + +bool VerifyTreeRecord(TsFileReader& reader, int64_t record_index) { + std::vector select_list = {"device1.temperature"}; + const int64_t timestamp = kStartTime + record_index * 1000; + ResultSet* tmp_qds = nullptr; + int ret = reader.query(select_list, timestamp, timestamp + 1, tmp_qds); + if (ret != E_OK || tmp_qds == nullptr) { + return false; + } + + auto* qds = static_cast(tmp_qds); + bool has_next = false; + ret = qds->next(has_next); + if (ret != E_OK || !has_next) { + reader.destroy_query_data_set(qds); + return false; + } + + const int64_t read_time = qds->get_value(1); + const int64_t read_value = qds->get_value(2); + reader.destroy_query_data_set(qds); + return read_time == timestamp && read_value == record_index; +} + +} // namespace + +class LargeFileTreeTest : public ::testing::Test { + protected: + void SetUp() override { + libtsfile_init(); + file_name_ = "large_file_tree_test_" + RandomSuffix() + ".tsfile"; + remove(file_name_.c_str()); + } + + void TearDown() override { + remove(file_name_.c_str()); + libtsfile_destroy(); + } + + std::string file_name_; +}; + +TEST_F(LargeFileTreeTest, LargeFile4GB_TreeWriteAndRead) { + TsFileWriter writer; + int flags = O_WRONLY | O_CREAT | O_TRUNC; +#ifdef _WIN32 + flags |= O_BINARY; +#endif + ASSERT_EQ(writer.open(file_name_, flags, 0666), E_OK); + writer.register_timeseries( + "device1", + MeasurementSchema("temperature", TSDataType::INT64, TSEncoding::PLAIN, + CompressionType::UNCOMPRESSED)); + + std::vector schema = { + MeasurementSchema("temperature", TSDataType::INT64, TSEncoding::PLAIN, + CompressionType::UNCOMPRESSED)}; + auto schema_ptr = std::make_shared>(schema); + + int64_t total_rows = 0; + while (GetFileSize(file_name_) < kTargetFileSize) { + Tablet tablet("device1", schema_ptr, kTabletRows); + for (uint32_t row = 0; row < kTabletRows; ++row) { + const int64_t record_index = total_rows + row; + tablet.add_timestamp(row, kStartTime + record_index * 1000); + tablet.add_value(row, 0, record_index); + } + ASSERT_EQ(writer.write_tablet(tablet), E_OK); + total_rows += kTabletRows; + if (total_rows % kFlushRows == 0) { + ASSERT_EQ(writer.flush(), E_OK); + } + } + + ASSERT_EQ(writer.flush(), E_OK); + ASSERT_EQ(writer.close(), E_OK); + + const int64_t final_size = GetFileSize(file_name_); + ASSERT_GE(final_size, kMinAcceptableFileSize); + + TsFileReader reader; + ASSERT_EQ(reader.open(file_name_), E_OK); + + const std::vector check_indexes = {0, total_rows / 2, + total_rows - 1}; + for (int64_t index : check_indexes) { + ASSERT_TRUE(VerifyTreeRecord(reader, index)) << "index=" << index; + } + + ASSERT_EQ(reader.close(), E_OK); +} diff --git a/cpp/test/reader/tsfile_reader_test.cc b/cpp/test/reader/tsfile_reader_test.cc index 45261cf45..08cda6e31 100644 --- a/cpp/test/reader/tsfile_reader_test.cc +++ b/cpp/test/reader/tsfile_reader_test.cc @@ -395,65 +395,3 @@ TEST_F(TsFileReaderTest, GetTimeseriesMetadataTableModelTypeAndDeviceFilter) { reader.close(); } - -static const int64_t kLargeFileNumRecords = 300000000; -static const int64_t kLargeFileFlushBatch = 100000; - -TEST_F(TsFileReaderTest, - DISABLED_LargeFileNoEncodingNoCompression_WriteAndRead) { - std::string device_path = "device1"; - std::string measurement_name = "temperature"; - common::TSDataType data_type = common::TSDataType::INT64; - common::TSEncoding encoding = common::TSEncoding::PLAIN; - common::CompressionType compression_type = - common::CompressionType::UNCOMPRESSED; - - tsfile_writer_->register_timeseries( - device_path, storage::MeasurementSchema(measurement_name, data_type, - encoding, compression_type)); - - const int64_t start_time = 1622505600000LL; - for (int64_t i = 0; i < kLargeFileNumRecords; ++i) { - TsRecord record(start_time + i * 1000, device_path); - record.add_point(measurement_name, static_cast(i)); - ASSERT_EQ(tsfile_writer_->write_record(record), E_OK); - if ((i + 1) % kLargeFileFlushBatch == 0) { - ASSERT_EQ(tsfile_writer_->flush(), E_OK); - } - } - ASSERT_EQ(tsfile_writer_->flush(), E_OK); - ASSERT_EQ(tsfile_writer_->close(), E_OK); - - std::vector select_list = {"device1.temperature"}; - const int64_t end_time = start_time + (kLargeFileNumRecords - 1) * 1000 + 1; - - storage::TsFileReader reader; - int ret = reader.open(file_name_); - ASSERT_EQ(ret, common::E_OK); - - storage::ResultSet* tmp_qds = nullptr; - ret = reader.query(select_list, start_time, end_time, tmp_qds); - ASSERT_EQ(ret, common::E_OK); - ASSERT_NE(tmp_qds, nullptr); - - auto* qds = static_cast(tmp_qds); - std::shared_ptr meta = qds->get_metadata(); - ASSERT_NE(meta, nullptr); - ASSERT_EQ(meta->get_column_type(1), INT64); - ASSERT_EQ(meta->get_column_type(2), INT64); - - int64_t row_count = 0; - bool has_next = false; - - while (true) { - ret = qds->next(has_next); - ASSERT_EQ(ret, common::E_OK); - if (!has_next) break; - row_count++; - } - - ASSERT_EQ(row_count, kLargeFileNumRecords); - - reader.destroy_query_data_set(qds); - reader.close(); -} diff --git a/cpp/test/writer/tsfile_writer_test.cc b/cpp/test/writer/tsfile_writer_test.cc index 3c6d15165..139761380 100644 --- a/cpp/test/writer/tsfile_writer_test.cc +++ b/cpp/test/writer/tsfile_writer_test.cc @@ -262,24 +262,51 @@ TEST_F(TsFileWriterTest, RegisterTimeSeries) { ASSERT_EQ(tsfile_writer_->close(), E_OK); } -TEST_F(TsFileWriterTest, WriteMultipleRecords) { +TEST_F(TsFileWriterTest, MultiFlushWriteAndRead) { std::string device_path = "device1"; std::string measurement_name = "temperature"; - common::TSDataType data_type = common::TSDataType::INT32; - common::TSEncoding encoding = common::TSEncoding::PLAIN; - common::CompressionType compression_type = - common::CompressionType::UNCOMPRESSED; tsfile_writer_->register_timeseries( - device_path, storage::MeasurementSchema(measurement_name, data_type, - encoding, compression_type)); - - for (int i = 0; i < 50000; ++i) { - TsRecord record(1622505600000 + i * 1000, device_path); - record.add_point(measurement_name, (int32_t)i); + device_path, + storage::MeasurementSchema(measurement_name, common::TSDataType::INT64, + common::TSEncoding::PLAIN, + common::CompressionType::UNCOMPRESSED)); + + const int64_t start_time = 1622505600000LL; + const int row_count = 200000; + for (int i = 0; i < row_count; ++i) { + TsRecord record(start_time + i * 1000, device_path); + record.add_point(measurement_name, static_cast(i)); ASSERT_EQ(tsfile_writer_->write_record(record), E_OK); - ASSERT_EQ(tsfile_writer_->flush(), E_OK); + if ((i + 1) % 10000 == 0) { + ASSERT_EQ(tsfile_writer_->flush(), E_OK); + } } + ASSERT_EQ(tsfile_writer_->flush(), E_OK); ASSERT_EQ(tsfile_writer_->close(), E_OK); + + storage::TsFileReader reader; + ASSERT_EQ(reader.open(file_name_), E_OK); + std::vector select_list = {"device1.temperature"}; + storage::ResultSet* tmp_qds = nullptr; + ASSERT_EQ(reader.query(select_list, start_time, + start_time + (row_count - 1) * 1000 + 1, tmp_qds), + E_OK); + auto* qds = static_cast(tmp_qds); + + int64_t read_rows = 0; + bool has_next = false; + while (true) { + ASSERT_EQ(qds->next(has_next), E_OK); + if (!has_next) { + break; + } + ASSERT_EQ(qds->get_value(1), start_time + read_rows * 1000); + ASSERT_EQ(qds->get_value(2), read_rows); + ++read_rows; + } + ASSERT_EQ(read_rows, row_count); + reader.destroy_query_data_set(qds); + ASSERT_EQ(reader.close(), E_OK); } #if defined(ENABLE_ZLIB) && defined(ENABLE_SNAPPY) && defined(ENABLE_LZ4) && \ From 3e39a5013b0cb158903e51b3f40064687fef443d Mon Sep 17 00:00:00 2001 From: 761417898 <761417898@qq.com> Date: Fri, 12 Jun 2026 10:24:22 +0800 Subject: [PATCH 2/6] Add Python 4GB large-file tests and fix MSVC test includes. Mirror C++ tree/table ~4GB regression coverage in pytest and drop POSIX-only headers from large-file gtests. --- .../table_view/large_file_table_test.cc | 2 - .../reader/tree_view/large_file_tree_test.cc | 2 - python/tests/conftest.py | 24 ++++ python/tests/large_file_test_common.py | 39 +++++++ .../tests/table_view/test_large_file_table.py | 104 ++++++++++++++++++ .../tests/tree_view/test_large_file_tree.py | 101 +++++++++++++++++ 6 files changed, 268 insertions(+), 4 deletions(-) create mode 100644 python/tests/conftest.py create mode 100644 python/tests/large_file_test_common.py create mode 100644 python/tests/table_view/test_large_file_table.py create mode 100644 python/tests/tree_view/test_large_file_tree.py diff --git a/cpp/test/reader/table_view/large_file_table_test.cc b/cpp/test/reader/table_view/large_file_table_test.cc index 0f1edd716..adac286e8 100644 --- a/cpp/test/reader/table_view/large_file_table_test.cc +++ b/cpp/test/reader/table_view/large_file_table_test.cc @@ -17,9 +17,7 @@ * under the License. */ -#include #include -#include #include #include diff --git a/cpp/test/reader/tree_view/large_file_tree_test.cc b/cpp/test/reader/tree_view/large_file_tree_test.cc index 1fb63159b..dd698eac9 100644 --- a/cpp/test/reader/tree_view/large_file_tree_test.cc +++ b/cpp/test/reader/tree_view/large_file_tree_test.cc @@ -17,9 +17,7 @@ * under the License. */ -#include #include -#include #include #include diff --git a/python/tests/conftest.py b/python/tests/conftest.py new file mode 100644 index 000000000..9f6288c2f --- /dev/null +++ b/python/tests/conftest.py @@ -0,0 +1,24 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +import sys +from pathlib import Path + +_TESTS_DIR = Path(__file__).resolve().parent +if str(_TESTS_DIR) not in sys.path: + sys.path.insert(0, str(_TESTS_DIR)) diff --git a/python/tests/large_file_test_common.py b/python/tests/large_file_test_common.py new file mode 100644 index 000000000..815f17055 --- /dev/null +++ b/python/tests/large_file_test_common.py @@ -0,0 +1,39 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +import os +import random +import string + +TARGET_FILE_SIZE = 4 * 1024 * 1024 * 1024 +MIN_ACCEPTABLE_FILE_SIZE = 3800 * 1024 * 1024 +START_TIME = 1622505600000 +TABLET_ROWS = 50000 +FLUSH_ROWS = 1_000_000 + + +def get_file_size(path: str) -> int: + try: + return os.path.getsize(path) + except OSError: + return -1 + + +def random_suffix(length: int = 10) -> str: + chars = string.ascii_letters + string.digits + return "".join(random.choice(chars) for _ in range(length)) diff --git a/python/tests/table_view/test_large_file_table.py b/python/tests/table_view/test_large_file_table.py new file mode 100644 index 000000000..a4d68f4bc --- /dev/null +++ b/python/tests/table_view/test_large_file_table.py @@ -0,0 +1,104 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +import os + +from tsfile import ColumnCategory, ColumnSchema, Tablet +from tsfile import TableSchema, TSDataType, TsFileReader, TsFileTableWriter + +from large_file_test_common import ( + FLUSH_ROWS, + MIN_ACCEPTABLE_FILE_SIZE, + START_TIME, + TABLET_ROWS, + TARGET_FILE_SIZE, + get_file_size, + random_suffix, +) + +TABLE_NAME = "large_table" +DEVICE_TAG = "device" +VALUE_FIELD = "value" + + +def _fill_table_tablet(tablet: Tablet, total_rows: int) -> None: + row_range = range(TABLET_ROWS) + tablet.timestamp_list[:TABLET_ROWS] = [ + START_TIME + (total_rows + row) * 1000 for row in row_range + ] + tablet.data_list[0][:TABLET_ROWS] = ["device0"] * TABLET_ROWS + tablet.data_list[1][:TABLET_ROWS] = [total_rows + row for row in row_range] + + +def _verify_table_record(reader: TsFileReader, record_index: int) -> bool: + timestamp = START_TIME + record_index * 1000 + with reader.query_table( + TABLE_NAME, [VALUE_FIELD], timestamp, timestamp + 1 + ) as result: + if not result.next(): + return False + return ( + result.get_value_by_index(1) == timestamp + and result.get_value_by_index(2) == record_index + ) + + +def test_large_file_4gb_table_write_and_read(): + file_name = f"large_file_table_test_{random_suffix()}.tsfile" + if os.path.exists(file_name): + os.remove(file_name) + + schema = TableSchema( + TABLE_NAME, + [ + ColumnSchema(DEVICE_TAG, TSDataType.STRING, ColumnCategory.TAG), + ColumnSchema(VALUE_FIELD, TSDataType.INT64, ColumnCategory.FIELD), + ], + ) + + try: + with TsFileTableWriter(file_name, schema) as writer: + tablet = Tablet( + [DEVICE_TAG, VALUE_FIELD], + [TSDataType.STRING, TSDataType.INT64], + TABLET_ROWS, + ) + tablet.set_table_name(TABLE_NAME) + + total_rows = 0 + while get_file_size(file_name) < TARGET_FILE_SIZE: + _fill_table_tablet(tablet, total_rows) + writer.write_table(tablet) + total_rows += TABLET_ROWS + if total_rows % FLUSH_ROWS == 0: + writer.flush() + + writer.flush() + + final_size = get_file_size(file_name) + assert final_size >= MIN_ACCEPTABLE_FILE_SIZE + + reader = TsFileReader(file_name) + try: + for index in (0, total_rows // 2, total_rows - 1): + assert _verify_table_record(reader, index), f"index={index}" + finally: + reader.close() + finally: + if os.path.exists(file_name): + os.remove(file_name) diff --git a/python/tests/tree_view/test_large_file_tree.py b/python/tests/tree_view/test_large_file_tree.py new file mode 100644 index 000000000..b80b85c19 --- /dev/null +++ b/python/tests/tree_view/test_large_file_tree.py @@ -0,0 +1,101 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +import os + +from tsfile import Compressor, Tablet, TSEncoding, TSDataType, TimeseriesSchema +from tsfile import TsFileReader, TsFileWriter + +from large_file_test_common import ( + FLUSH_ROWS, + MIN_ACCEPTABLE_FILE_SIZE, + START_TIME, + TABLET_ROWS, + TARGET_FILE_SIZE, + get_file_size, + random_suffix, +) + + +def _fill_tree_tablet(tablet: Tablet, total_rows: int) -> None: + row_range = range(TABLET_ROWS) + tablet.timestamp_list[:TABLET_ROWS] = [ + START_TIME + (total_rows + row) * 1000 for row in row_range + ] + tablet.data_list[0][:TABLET_ROWS] = [total_rows + row for row in row_range] + + +def _verify_tree_record(reader: TsFileReader, record_index: int) -> bool: + timestamp = START_TIME + record_index * 1000 + result = reader.query_timeseries( + "device1", ["temperature"], timestamp, timestamp + 1 + ) + try: + if not result.next(): + return False + return ( + result.get_value_by_index(1) == timestamp + and result.get_value_by_index(2) == record_index + ) + finally: + result.close() + + +def test_large_file_4gb_tree_write_and_read(): + file_name = f"large_file_tree_test_{random_suffix()}.tsfile" + if os.path.exists(file_name): + os.remove(file_name) + + try: + writer = TsFileWriter(file_name) + writer.register_timeseries( + "device1", + TimeseriesSchema( + "temperature", + TSDataType.INT64, + TSEncoding.PLAIN, + Compressor.UNCOMPRESSED, + ), + ) + + tablet = Tablet(["temperature"], [TSDataType.INT64], TABLET_ROWS) + tablet.set_table_name("device1") + + total_rows = 0 + while get_file_size(file_name) < TARGET_FILE_SIZE: + _fill_tree_tablet(tablet, total_rows) + writer.write_tablet(tablet) + total_rows += TABLET_ROWS + if total_rows % FLUSH_ROWS == 0: + writer.flush() + + writer.flush() + writer.close() + + final_size = get_file_size(file_name) + assert final_size >= MIN_ACCEPTABLE_FILE_SIZE + + reader = TsFileReader(file_name) + try: + for index in (0, total_rows // 2, total_rows - 1): + assert _verify_tree_record(reader, index), f"index={index}" + finally: + reader.close() + finally: + if os.path.exists(file_name): + os.remove(file_name) From 8e774d1ed8c8e4bf5107463c69cf0c6ca841c73e Mon Sep 17 00:00:00 2001 From: 761417898 <761417898@qq.com> Date: Fri, 12 Jun 2026 10:31:18 +0800 Subject: [PATCH 3/6] Merge large-file test helpers into conftest.py. Keep shared constants and utilities in one pytest entry point for tree/table 4GB tests. --- python/tests/conftest.py | 21 ++++++++++ python/tests/large_file_test_common.py | 39 ------------------- .../tests/table_view/test_large_file_table.py | 2 +- .../tests/tree_view/test_large_file_tree.py | 2 +- 4 files changed, 23 insertions(+), 41 deletions(-) delete mode 100644 python/tests/large_file_test_common.py diff --git a/python/tests/conftest.py b/python/tests/conftest.py index 9f6288c2f..300ba48d1 100644 --- a/python/tests/conftest.py +++ b/python/tests/conftest.py @@ -16,9 +16,30 @@ # under the License. # +import os +import random +import string import sys from pathlib import Path _TESTS_DIR = Path(__file__).resolve().parent if str(_TESTS_DIR) not in sys.path: sys.path.insert(0, str(_TESTS_DIR)) + +TARGET_FILE_SIZE = 4 * 1024 * 1024 * 1024 +MIN_ACCEPTABLE_FILE_SIZE = 3800 * 1024 * 1024 +START_TIME = 1622505600000 +TABLET_ROWS = 50000 +FLUSH_ROWS = 1_000_000 + + +def get_file_size(path: str) -> int: + try: + return os.path.getsize(path) + except OSError: + return -1 + + +def random_suffix(length: int = 10) -> str: + chars = string.ascii_letters + string.digits + return "".join(random.choice(chars) for _ in range(length)) diff --git a/python/tests/large_file_test_common.py b/python/tests/large_file_test_common.py deleted file mode 100644 index 815f17055..000000000 --- a/python/tests/large_file_test_common.py +++ /dev/null @@ -1,39 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -import os -import random -import string - -TARGET_FILE_SIZE = 4 * 1024 * 1024 * 1024 -MIN_ACCEPTABLE_FILE_SIZE = 3800 * 1024 * 1024 -START_TIME = 1622505600000 -TABLET_ROWS = 50000 -FLUSH_ROWS = 1_000_000 - - -def get_file_size(path: str) -> int: - try: - return os.path.getsize(path) - except OSError: - return -1 - - -def random_suffix(length: int = 10) -> str: - chars = string.ascii_letters + string.digits - return "".join(random.choice(chars) for _ in range(length)) diff --git a/python/tests/table_view/test_large_file_table.py b/python/tests/table_view/test_large_file_table.py index a4d68f4bc..1660a3d36 100644 --- a/python/tests/table_view/test_large_file_table.py +++ b/python/tests/table_view/test_large_file_table.py @@ -21,7 +21,7 @@ from tsfile import ColumnCategory, ColumnSchema, Tablet from tsfile import TableSchema, TSDataType, TsFileReader, TsFileTableWriter -from large_file_test_common import ( +from conftest import ( FLUSH_ROWS, MIN_ACCEPTABLE_FILE_SIZE, START_TIME, diff --git a/python/tests/tree_view/test_large_file_tree.py b/python/tests/tree_view/test_large_file_tree.py index b80b85c19..b1df1c62b 100644 --- a/python/tests/tree_view/test_large_file_tree.py +++ b/python/tests/tree_view/test_large_file_tree.py @@ -21,7 +21,7 @@ from tsfile import Compressor, Tablet, TSEncoding, TSDataType, TimeseriesSchema from tsfile import TsFileReader, TsFileWriter -from large_file_test_common import ( +from conftest import ( FLUSH_ROWS, MIN_ACCEPTABLE_FILE_SIZE, START_TIME, From f8046b7c8f6c9a0aad63102311f2dadddb7fa7c3 Mon Sep 17 00:00:00 2001 From: 761417898 <761417898@qq.com> Date: Fri, 12 Jun 2026 16:48:59 +0800 Subject: [PATCH 4/6] Fix Windows large-file truncate and stat handling in C++. --- cpp/src/file/write_file.cc | 2 +- cpp/test/reader/large_file_test_common.h | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/cpp/src/file/write_file.cc b/cpp/src/file/write_file.cc index b6fbd6e44..227520b71 100644 --- a/cpp/src/file/write_file.cc +++ b/cpp/src/file/write_file.cc @@ -131,7 +131,7 @@ int WriteFile::close() { int WriteFile::truncate(int64_t size) { ASSERT(fd_ > 0); #ifdef _WIN32 - if (_chsize_s(fd_, static_cast(size)) != 0) { + if (_chsize_s(fd_, size) != 0) { return E_FILE_WRITE_ERR; } #else diff --git a/cpp/test/reader/large_file_test_common.h b/cpp/test/reader/large_file_test_common.h index 60522e94d..364634188 100644 --- a/cpp/test/reader/large_file_test_common.h +++ b/cpp/test/reader/large_file_test_common.h @@ -20,7 +20,12 @@ #ifndef TEST_LARGE_FILE_TEST_COMMON_H #define TEST_LARGE_FILE_TEST_COMMON_H +#ifdef _WIN32 #include +#include +#else +#include +#endif #include #include @@ -37,11 +42,19 @@ constexpr uint32_t kTabletRows = 50000; constexpr int64_t kFlushRows = 1000000; inline int64_t GetFileSize(const std::string& path) { +#ifdef _WIN32 + struct __stat64 s; + if (_stat64(path.c_str(), &s) != 0) { + return -1; + } + return static_cast(s.st_size); +#else struct stat s; if (stat(path.c_str(), &s) != 0) { return -1; } return static_cast(s.st_size); +#endif } inline std::string RandomSuffix(int length = 10) { From 8080af5e3f710c8ab3cda086cbcfa6549b9c25d3 Mon Sep 17 00:00:00 2001 From: 761417898 <761417898@qq.com> Date: Mon, 15 Jun 2026 16:08:39 +0800 Subject: [PATCH 5/6] fix msvc compilation --- cpp/src/cwrapper/tsfile_cwrapper.cc | 9 +-------- cpp/src/file/read_file.cc | 16 +++++++++++++++- cpp/src/writer/tsfile_writer.cc | 1 - .../reader/table_view/large_file_table_test.cc | 2 +- .../reader/tree_view/large_file_tree_test.cc | 2 +- 5 files changed, 18 insertions(+), 12 deletions(-) diff --git a/cpp/src/cwrapper/tsfile_cwrapper.cc b/cpp/src/cwrapper/tsfile_cwrapper.cc index 07b363aeb..0934981f9 100644 --- a/cpp/src/cwrapper/tsfile_cwrapper.cc +++ b/cpp/src/cwrapper/tsfile_cwrapper.cc @@ -56,14 +56,7 @@ int ArrowStructToTablet(const char* table_name, const ArrowArray* in_array, extern "C" { #endif -static bool is_init = false; - -void init_tsfile_config() { - if (!is_init) { - common::init_common(); - is_init = true; - } -} +void init_tsfile_config() { storage::libtsfile_init(); } uint8_t get_global_time_encoding() { return common::get_global_time_encoding(); diff --git a/cpp/src/file/read_file.cc b/cpp/src/file/read_file.cc index 8aab78ca6..d9902ddb9 100644 --- a/cpp/src/file/read_file.cc +++ b/cpp/src/file/read_file.cc @@ -49,7 +49,11 @@ void ReadFile::close() { int ReadFile::open(const std::string& file_path) { int ret = E_OK; file_path_ = file_path; - fd_ = ::open(file_path_.c_str(), O_RDONLY); + int flags = O_RDONLY; +#ifdef _WIN32 + flags |= O_BINARY; +#endif + fd_ = ::open(file_path_.c_str(), flags); if (fd_ < 0) { std::cerr << "open file " << file_path << " error: " << strerror(errno) << " (errno " << errno << ")" << std::endl; @@ -66,8 +70,13 @@ int ReadFile::open(const std::string& file_path) { } int ReadFile::get_file_size(int64_t& file_size) { +#ifdef _WIN32 + struct __stat64 s; + if (_fstat64(fd_, &s) < 0) { +#else struct stat s; if (fstat(fd_, &s) < 0) { +#endif LOGE("fstat error, file_path=" << file_path_.c_str() << "fd=" << fd_ << "errno" << errno); return E_FILE_STAT_ERR; @@ -114,8 +123,13 @@ int ReadFile::read(int64_t offset, char* buf, int32_t buf_size, int ret = E_OK; read_len = 0; while (read_len < buf_size) { +#ifdef _WIN32 + ssize_t pread_size = ::pread(fd_, buf + read_len, buf_size - read_len, + static_cast(offset + read_len)); +#else ssize_t pread_size = ::pread(fd_, buf + read_len, buf_size - read_len, static_cast(offset + read_len)); +#endif if (pread_size < 0) { ret = E_FILE_READ_ERR; ////log_err("tsfile reader error, file_path=%s, errno=%d", diff --git a/cpp/src/writer/tsfile_writer.cc b/cpp/src/writer/tsfile_writer.cc index 3170a3160..bc3398d98 100644 --- a/cpp/src/writer/tsfile_writer.cc +++ b/cpp/src/writer/tsfile_writer.cc @@ -44,7 +44,6 @@ bool g_s_is_inited = false; } int libtsfile_init() { - libtsfile::g_s_is_inited = false; if (libtsfile::g_s_is_inited) { return E_OK; } diff --git a/cpp/test/reader/table_view/large_file_table_test.cc b/cpp/test/reader/table_view/large_file_table_test.cc index adac286e8..19945179e 100644 --- a/cpp/test/reader/table_view/large_file_table_test.cc +++ b/cpp/test/reader/table_view/large_file_table_test.cc @@ -94,7 +94,7 @@ class LargeFileTableTest : public ::testing::Test { std::string file_name_; }; -TEST_F(LargeFileTableTest, LargeFile4GB_TableWriteAndRead) { +TEST_F(LargeFileTableTest, DISABLED_LargeFile4GB_TableWriteAndRead) { std::unique_ptr table_schema(CreateTableSchema()); WriteFile write_file; diff --git a/cpp/test/reader/tree_view/large_file_tree_test.cc b/cpp/test/reader/tree_view/large_file_tree_test.cc index dd698eac9..8a05b957c 100644 --- a/cpp/test/reader/tree_view/large_file_tree_test.cc +++ b/cpp/test/reader/tree_view/large_file_tree_test.cc @@ -76,7 +76,7 @@ class LargeFileTreeTest : public ::testing::Test { std::string file_name_; }; -TEST_F(LargeFileTreeTest, LargeFile4GB_TreeWriteAndRead) { +TEST_F(LargeFileTreeTest, DISABLED_LargeFile4GB_TreeWriteAndRead) { TsFileWriter writer; int flags = O_WRONLY | O_CREAT | O_TRUNC; #ifdef _WIN32 From 79a47b840bb2f35909a57cd8c128c33da93cc993 Mon Sep 17 00:00:00 2001 From: 761417898 <761417898@qq.com> Date: Mon, 15 Jun 2026 16:18:51 +0800 Subject: [PATCH 6/6] removed unused test --- python/tests/conftest.py | 45 -------- .../tests/table_view/test_large_file_table.py | 104 ------------------ .../tests/tree_view/test_large_file_tree.py | 101 ----------------- 3 files changed, 250 deletions(-) delete mode 100644 python/tests/conftest.py delete mode 100644 python/tests/table_view/test_large_file_table.py delete mode 100644 python/tests/tree_view/test_large_file_tree.py diff --git a/python/tests/conftest.py b/python/tests/conftest.py deleted file mode 100644 index 300ba48d1..000000000 --- a/python/tests/conftest.py +++ /dev/null @@ -1,45 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -import os -import random -import string -import sys -from pathlib import Path - -_TESTS_DIR = Path(__file__).resolve().parent -if str(_TESTS_DIR) not in sys.path: - sys.path.insert(0, str(_TESTS_DIR)) - -TARGET_FILE_SIZE = 4 * 1024 * 1024 * 1024 -MIN_ACCEPTABLE_FILE_SIZE = 3800 * 1024 * 1024 -START_TIME = 1622505600000 -TABLET_ROWS = 50000 -FLUSH_ROWS = 1_000_000 - - -def get_file_size(path: str) -> int: - try: - return os.path.getsize(path) - except OSError: - return -1 - - -def random_suffix(length: int = 10) -> str: - chars = string.ascii_letters + string.digits - return "".join(random.choice(chars) for _ in range(length)) diff --git a/python/tests/table_view/test_large_file_table.py b/python/tests/table_view/test_large_file_table.py deleted file mode 100644 index 1660a3d36..000000000 --- a/python/tests/table_view/test_large_file_table.py +++ /dev/null @@ -1,104 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -import os - -from tsfile import ColumnCategory, ColumnSchema, Tablet -from tsfile import TableSchema, TSDataType, TsFileReader, TsFileTableWriter - -from conftest import ( - FLUSH_ROWS, - MIN_ACCEPTABLE_FILE_SIZE, - START_TIME, - TABLET_ROWS, - TARGET_FILE_SIZE, - get_file_size, - random_suffix, -) - -TABLE_NAME = "large_table" -DEVICE_TAG = "device" -VALUE_FIELD = "value" - - -def _fill_table_tablet(tablet: Tablet, total_rows: int) -> None: - row_range = range(TABLET_ROWS) - tablet.timestamp_list[:TABLET_ROWS] = [ - START_TIME + (total_rows + row) * 1000 for row in row_range - ] - tablet.data_list[0][:TABLET_ROWS] = ["device0"] * TABLET_ROWS - tablet.data_list[1][:TABLET_ROWS] = [total_rows + row for row in row_range] - - -def _verify_table_record(reader: TsFileReader, record_index: int) -> bool: - timestamp = START_TIME + record_index * 1000 - with reader.query_table( - TABLE_NAME, [VALUE_FIELD], timestamp, timestamp + 1 - ) as result: - if not result.next(): - return False - return ( - result.get_value_by_index(1) == timestamp - and result.get_value_by_index(2) == record_index - ) - - -def test_large_file_4gb_table_write_and_read(): - file_name = f"large_file_table_test_{random_suffix()}.tsfile" - if os.path.exists(file_name): - os.remove(file_name) - - schema = TableSchema( - TABLE_NAME, - [ - ColumnSchema(DEVICE_TAG, TSDataType.STRING, ColumnCategory.TAG), - ColumnSchema(VALUE_FIELD, TSDataType.INT64, ColumnCategory.FIELD), - ], - ) - - try: - with TsFileTableWriter(file_name, schema) as writer: - tablet = Tablet( - [DEVICE_TAG, VALUE_FIELD], - [TSDataType.STRING, TSDataType.INT64], - TABLET_ROWS, - ) - tablet.set_table_name(TABLE_NAME) - - total_rows = 0 - while get_file_size(file_name) < TARGET_FILE_SIZE: - _fill_table_tablet(tablet, total_rows) - writer.write_table(tablet) - total_rows += TABLET_ROWS - if total_rows % FLUSH_ROWS == 0: - writer.flush() - - writer.flush() - - final_size = get_file_size(file_name) - assert final_size >= MIN_ACCEPTABLE_FILE_SIZE - - reader = TsFileReader(file_name) - try: - for index in (0, total_rows // 2, total_rows - 1): - assert _verify_table_record(reader, index), f"index={index}" - finally: - reader.close() - finally: - if os.path.exists(file_name): - os.remove(file_name) diff --git a/python/tests/tree_view/test_large_file_tree.py b/python/tests/tree_view/test_large_file_tree.py deleted file mode 100644 index b1df1c62b..000000000 --- a/python/tests/tree_view/test_large_file_tree.py +++ /dev/null @@ -1,101 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -import os - -from tsfile import Compressor, Tablet, TSEncoding, TSDataType, TimeseriesSchema -from tsfile import TsFileReader, TsFileWriter - -from conftest import ( - FLUSH_ROWS, - MIN_ACCEPTABLE_FILE_SIZE, - START_TIME, - TABLET_ROWS, - TARGET_FILE_SIZE, - get_file_size, - random_suffix, -) - - -def _fill_tree_tablet(tablet: Tablet, total_rows: int) -> None: - row_range = range(TABLET_ROWS) - tablet.timestamp_list[:TABLET_ROWS] = [ - START_TIME + (total_rows + row) * 1000 for row in row_range - ] - tablet.data_list[0][:TABLET_ROWS] = [total_rows + row for row in row_range] - - -def _verify_tree_record(reader: TsFileReader, record_index: int) -> bool: - timestamp = START_TIME + record_index * 1000 - result = reader.query_timeseries( - "device1", ["temperature"], timestamp, timestamp + 1 - ) - try: - if not result.next(): - return False - return ( - result.get_value_by_index(1) == timestamp - and result.get_value_by_index(2) == record_index - ) - finally: - result.close() - - -def test_large_file_4gb_tree_write_and_read(): - file_name = f"large_file_tree_test_{random_suffix()}.tsfile" - if os.path.exists(file_name): - os.remove(file_name) - - try: - writer = TsFileWriter(file_name) - writer.register_timeseries( - "device1", - TimeseriesSchema( - "temperature", - TSDataType.INT64, - TSEncoding.PLAIN, - Compressor.UNCOMPRESSED, - ), - ) - - tablet = Tablet(["temperature"], [TSDataType.INT64], TABLET_ROWS) - tablet.set_table_name("device1") - - total_rows = 0 - while get_file_size(file_name) < TARGET_FILE_SIZE: - _fill_tree_tablet(tablet, total_rows) - writer.write_tablet(tablet) - total_rows += TABLET_ROWS - if total_rows % FLUSH_ROWS == 0: - writer.flush() - - writer.flush() - writer.close() - - final_size = get_file_size(file_name) - assert final_size >= MIN_ACCEPTABLE_FILE_SIZE - - reader = TsFileReader(file_name) - try: - for index in (0, total_rows // 2, total_rows - 1): - assert _verify_tree_record(reader, index), f"index={index}" - finally: - reader.close() - finally: - if os.path.exists(file_name): - os.remove(file_name)