From 182e5fe9ff33f8f1e185b8a95aa5f637916d2937 Mon Sep 17 00:00:00 2001 From: "xuweixin.rex" Date: Mon, 1 Jun 2026 16:53:39 +0800 Subject: [PATCH 1/3] Fix oversized BYTE_ARRAY parquet data pages --- bolt/dwio/parquet/arrow/ColumnWriter.cpp | 172 ++++++++++++++++-- .../tests/writer/ParquetWriterTest.cpp | 117 ++++++++++++ 2 files changed, 274 insertions(+), 15 deletions(-) diff --git a/bolt/dwio/parquet/arrow/ColumnWriter.cpp b/bolt/dwio/parquet/arrow/ColumnWriter.cpp index 10480f34a..c3df79f8a 100644 --- a/bolt/dwio/parquet/arrow/ColumnWriter.cpp +++ b/bolt/dwio/parquet/arrow/ColumnWriter.cpp @@ -36,8 +36,11 @@ #include #include #include +#include #include #include +#include +#include #include #include @@ -93,6 +96,17 @@ using util::CodecOptions; namespace { +int32_t CheckPageHeaderSize(std::string_view sizeName, int64_t size) { + if (size < 0 || size > std::numeric_limits::max()) { + throw ParquetException( + std::string(sizeName), + " page size cannot be represented in a Parquet PageHeader int32 " + "field: ", + size); + } + return static_cast(size); +} + // Visitor that extracts the value buffer from a FlatArray at a given offset. struct ValueBufferSlicer { template @@ -350,25 +364,30 @@ class SerializedPageWriter : public PageWriter { dict_page_header.__set_is_sorted(page.is_sorted()); const uint8_t* output_data_buffer = compressed_data->data(); - int32_t output_data_len = static_cast(compressed_data->size()); + int64_t output_data_len = compressed_data->size(); + const int32_t uncompressed_page_size = + CheckPageHeaderSize("Uncompressed dictionary", uncompressed_size); + int32_t compressed_page_size = + CheckPageHeaderSize("Compressed dictionary", output_data_len); if (data_encryptor_.get()) { UpdateEncryption(encryption::kDictionaryPage); PARQUET_THROW_NOT_OK(encryption_buffer_->Resize( - data_encryptor_->CiphertextSizeDelta() + output_data_len, false)); + data_encryptor_->CiphertextSizeDelta() + compressed_page_size, + false)); output_data_len = data_encryptor_->Encrypt( compressed_data->data(), - output_data_len, + compressed_page_size, encryption_buffer_->mutable_data()); output_data_buffer = encryption_buffer_->data(); + compressed_page_size = + CheckPageHeaderSize("Compressed dictionary", output_data_len); } format::PageHeader page_header; page_header.__set_type(format::PageType::DICTIONARY_PAGE); - page_header.__set_uncompressed_page_size( - static_cast(uncompressed_size)); - page_header.__set_compressed_page_size( - static_cast(output_data_len)); + page_header.__set_uncompressed_page_size(uncompressed_page_size); + page_header.__set_compressed_page_size(compressed_page_size); page_header.__set_dictionary_page_header(dict_page_header); if (page_checksum_verification_) { uint32_t crc32 = @@ -452,24 +471,29 @@ class SerializedPageWriter : public PageWriter { const int64_t uncompressed_size = page.uncompressed_size(); std::shared_ptr compressed_data = page.buffer(); const uint8_t* output_data_buffer = compressed_data->data(); - int32_t output_data_len = static_cast(compressed_data->size()); + int64_t output_data_len = compressed_data->size(); + const int32_t uncompressed_page_size = + CheckPageHeaderSize("Uncompressed data", uncompressed_size); + int32_t compressed_page_size = + CheckPageHeaderSize("Compressed data", output_data_len); if (data_encryptor_.get()) { PARQUET_THROW_NOT_OK(encryption_buffer_->Resize( - data_encryptor_->CiphertextSizeDelta() + output_data_len, false)); + data_encryptor_->CiphertextSizeDelta() + compressed_page_size, + false)); UpdateEncryption(encryption::kDataPage); output_data_len = data_encryptor_->Encrypt( compressed_data->data(), - output_data_len, + compressed_page_size, encryption_buffer_->mutable_data()); output_data_buffer = encryption_buffer_->data(); + compressed_page_size = + CheckPageHeaderSize("Compressed data", output_data_len); } format::PageHeader page_header; - page_header.__set_uncompressed_page_size( - static_cast(uncompressed_size)); - page_header.__set_compressed_page_size( - static_cast(output_data_len)); + page_header.__set_uncompressed_page_size(uncompressed_page_size); + page_header.__set_compressed_page_size(compressed_page_size); if (page_checksum_verification_) { uint32_t crc32 = @@ -2754,8 +2778,56 @@ Status TypedColumnWriterImpl::WriteArrowDense( ARROW_UNSUPPORTED(); } + constexpr int64_t kDataPageSizeSlack = 64L * 1024 * 1024; + const int64_t dataPageByteLimit = std::min( + data_pagesize_, std::numeric_limits::max() - kDataPageSizeSlack); + + auto valueLength = [&](int64_t index) { + if (::arrow::is_binary_like(array.type_id())) { + return static_cast( + checked_cast(array).value_length(index)); + } + DCHECK(::arrow::is_large_binary_like(array.type_id())); + return static_cast( + checked_cast(array).value_length( + index)); + }; + + auto valueRangeByteLength = [&](int64_t start, int64_t count) { + if (::arrow::is_binary_like(array.type_id())) { + const auto& binaryArray = + checked_cast(array); + return static_cast( + binaryArray.value_offset(start + count) - + binaryArray.value_offset(start)); + } + DCHECK(::arrow::is_large_binary_like(array.type_id())); + const auto& largeBinaryArray = + checked_cast(array); + return static_cast( + largeBinaryArray.value_offset(start + count) - + largeBinaryArray.value_offset(start)); + }; + + auto hasSpacedValue = [&](int64_t levelIndex) { + if (def_levels == nullptr || level_info_.def_level == 0) { + return true; + } + return def_levels[levelIndex] >= level_info_.repeated_ancestor_def_level; + }; + + auto hasNonNullValue = [&](int64_t levelIndex, int64_t valueIndex) { + if (def_levels != nullptr && + def_levels[levelIndex] != level_info_.def_level) { + return false; + } + return array.IsValid(valueIndex); + }; + int64_t value_offset = 0; - auto WriteChunk = [&](int64_t offset, int64_t batch_size, bool check_page) { + auto WriteSubChunk = [&](int64_t offset, + int64_t batch_size, + bool check_page) { int64_t batch_num_values = 0; int64_t batch_num_spaced_values = 0; int64_t null_count = 0; @@ -2790,6 +2862,76 @@ Status TypedColumnWriterImpl::WriteArrowDense( value_offset += batch_num_spaced_values; }; + auto WriteChunk = [&](int64_t offset, int64_t batch_size, bool check_page) { + const bool split_by_byte_size = + check_page && !IsDictionaryEncoding(current_encoder_->encoding()); + if (!split_by_byte_size) { + WriteSubChunk(offset, batch_size, check_page); + return; + } + + if (def_levels == nullptr || level_info_.def_level == 0) { + const int64_t batch_encoded_bytes = + valueRangeByteLength(value_offset, batch_size) + + batch_size * static_cast(sizeof(uint32_t)); + if (current_encoder_->EstimatedDataEncodedSize() + batch_encoded_bytes <= + dataPageByteLimit) { + WriteSubChunk(offset, batch_size, check_page); + return; + } + } + + int64_t local_offset = offset; + int64_t remaining = batch_size; + while (remaining > 0) { + int64_t subchunk_levels = 0; + int64_t subchunk_spaced_values = 0; + int64_t subchunk_encoded_bytes = 0; + + while (subchunk_levels < remaining) { + const int64_t level_index = local_offset + subchunk_levels; + const bool can_break_before_level = + !pages_change_on_record_boundaries() || rep_levels == nullptr || + rep_levels[level_index] == 0; + + int64_t value_bytes = 0; + if (hasSpacedValue(level_index)) { + const int64_t value_index = value_offset + subchunk_spaced_values; + if (hasNonNullValue(level_index, value_index)) { + value_bytes = static_cast(sizeof(uint32_t)) + + valueLength(value_index); + } + } + + if (check_page && split_by_byte_size && can_break_before_level && + current_encoder_->EstimatedDataEncodedSize() + + subchunk_encoded_bytes + value_bytes > + dataPageByteLimit) { + if (subchunk_levels == 0) { + if (num_buffered_values_ > 0) { + AddDataPage(); + } + } else { + break; + } + } + + if (hasSpacedValue(level_index)) { + ++subchunk_spaced_values; + } + subchunk_encoded_bytes += value_bytes; + ++subchunk_levels; + } + + if (subchunk_levels == 0) { + throw ParquetException("Unable to split BYTE_ARRAY write chunk"); + } + WriteSubChunk(local_offset, subchunk_levels, check_page); + local_offset += subchunk_levels; + remaining -= subchunk_levels; + } + }; + PARQUET_CATCH_NOT_OK(DoInBatches( def_levels, rep_levels, diff --git a/bolt/dwio/parquet/tests/writer/ParquetWriterTest.cpp b/bolt/dwio/parquet/tests/writer/ParquetWriterTest.cpp index 35e0e9381..b3bf0085e 100644 --- a/bolt/dwio/parquet/tests/writer/ParquetWriterTest.cpp +++ b/bolt/dwio/parquet/tests/writer/ParquetWriterTest.cpp @@ -675,6 +675,123 @@ TEST_F(ParquetWriterTest, columnPageSize) { ASSERT_EQ(4, chunk2PageEncodingStats[0].count); // data page num } +TEST_F(ParquetWriterTest, byteArrayPageSizeSplitsLargeVarcharPages) { + std::string c0{"c0"}; + auto schema = ROW({c0}, {VARCHAR()}); + const vector_size_t kRows = 20; + std::string parquetPath = tempPath_->path + "/varcharPageSize.parquet"; + + auto data = makeRowVector({makeFlatVector( + kRows, + [](auto row) { + return std::string(40, static_cast('a' + row % 26)); + }, + [](auto row) { return row % 5 == 0; })}); + + vp::WriterOptions writerOptions{}; + writerOptions.enableDictionary = false; + writerOptions.columnDataPageSizeMap[c0] = 100; + auto writer = createLocalWriter(parquetPath, schema, writerOptions); + writer->write(data); + writer->close(); + + assertRead(parquetPath, kRows, schema, data); + + auto reader = createLocalParquetReader(parquetPath); + auto chunkPageEncodingStats = + reader->fileMetaData().rowGroup(0).columnChunk(0).pageEncodingStats(); + ASSERT_EQ(1, chunkPageEncodingStats.size()); + ASSERT_GT(chunkPageEncodingStats[0].count, 1); +} + +TEST_F(ParquetWriterTest, byteArrayPageSizeSplitsLargeVarbinaryPages) { + std::string c0{"c0"}; + auto schema = ROW({c0}, {VARBINARY()}); + const vector_size_t kRows = 20; + std::string parquetPath = tempPath_->path + "/varbinaryPageSize.parquet"; + + auto data = makeRowVector({makeFlatVector( + kRows, + [](auto row) { + return std::string(40, static_cast('a' + row % 26)); + }, + [](auto row) { return row % 7 == 0; }, + VARBINARY())}); + + vp::WriterOptions writerOptions{}; + writerOptions.enableDictionary = false; + writerOptions.columnDataPageSizeMap[c0] = 100; + auto writer = createLocalWriter(parquetPath, schema, writerOptions); + writer->write(data); + writer->close(); + + assertRead(parquetPath, kRows, schema, data); + + auto reader = createLocalParquetReader(parquetPath); + auto chunkPageEncodingStats = + reader->fileMetaData().rowGroup(0).columnChunk(0).pageEncodingStats(); + ASSERT_EQ(1, chunkPageEncodingStats.size()); + ASSERT_GT(chunkPageEncodingStats[0].count, 1); +} + +TEST_F(ParquetWriterTest, byteArrayPageSizeSplitsAfterDictionaryFallback) { + std::string c0{"c0"}; + auto schema = ROW({c0}, {VARCHAR()}); + const vector_size_t kDictionaryRows = 4; + const vector_size_t kPlainRows = 20; + std::string parquetPath = + tempPath_->path + "/dictionaryFallbackPageSize.parquet"; + + auto dictionaryData = + makeRowVector({makeFlatVector(kDictionaryRows, [](auto row) { + return std::string("dict-") + std::to_string(row) + "-" + + std::string(40, static_cast('a' + row % 26)); + })}); + auto plainData = makeRowVector({makeFlatVector( + kPlainRows, + [](auto row) { + return std::string("value-") + std::to_string(row) + "-" + + std::string(40, static_cast('a' + row % 26)); + }, + [](auto row) { return row % 6 == 0; })}); + auto mergedData = + BaseVector::create(schema, kDictionaryRows + kPlainRows, pool_.get()); + mergedData->copy(dictionaryData.get(), 0, 0, dictionaryData->size()); + mergedData->copy( + plainData.get(), dictionaryData->size(), 0, plainData->size()); + + vp::WriterOptions writerOptions{}; + writerOptions.enableDictionary = true; + writerOptions.columnDictionaryPageSizeLimitMap[c0] = 64; + writerOptions.columnDataPageSizeMap[c0] = 100; + auto writer = createLocalWriter(parquetPath, schema, writerOptions); + writer->write(dictionaryData); + writer->write(plainData); + writer->close(); + + assertRead(parquetPath, kDictionaryRows + kPlainRows, schema, mergedData); + + auto reader = createLocalParquetReader(parquetPath); + auto chunkPageEncodingStats = + reader->fileMetaData().rowGroup(0).columnChunk(0).pageEncodingStats(); + int32_t dictionaryPageCount = 0; + int32_t dataPageCount = 0; + int32_t plainDataPageCount = 0; + for (const auto& stats : chunkPageEncodingStats) { + if (stats.page_type == thrift::PageType::DICTIONARY_PAGE) { + dictionaryPageCount += stats.count; + } else if (stats.page_type == thrift::PageType::DATA_PAGE) { + dataPageCount += stats.count; + if (stats.encoding == thrift::Encoding::PLAIN) { + plainDataPageCount += stats.count; + } + } + } + ASSERT_EQ(1, dictionaryPageCount); + ASSERT_GT(dataPageCount, 1); + ASSERT_GT(plainDataPageCount, 1); +} + TEST_F(ParquetWriterTest, arrowPool) { const size_t kRows = 4 * 1024; auto type = getType(); From a9d1bee0a393d1244632de072b5cea123cb4af96 Mon Sep 17 00:00:00 2001 From: "xuweixin.rex" Date: Tue, 2 Jun 2026 16:18:46 +0800 Subject: [PATCH 2/3] Optimize parquet writer page size checks --- bolt/dwio/parquet/arrow/ColumnWriter.cpp | 68 +++++++++++-------- .../tests/writer/ParquetWriterBenchmark.cpp | 60 ++++++++++++++-- 2 files changed, 94 insertions(+), 34 deletions(-) diff --git a/bolt/dwio/parquet/arrow/ColumnWriter.cpp b/bolt/dwio/parquet/arrow/ColumnWriter.cpp index c3df79f8a..7c00f9c72 100644 --- a/bolt/dwio/parquet/arrow/ColumnWriter.cpp +++ b/bolt/dwio/parquet/arrow/ColumnWriter.cpp @@ -96,13 +96,20 @@ using util::CodecOptions; namespace { -int32_t CheckPageHeaderSize(std::string_view sizeName, int64_t size) { - if (size < 0 || size > std::numeric_limits::max()) { - throw ParquetException( - std::string(sizeName), - " page size cannot be represented in a Parquet PageHeader int32 " - "field: ", - size); +[[noreturn]] void ThrowPageHeaderSizeError( + std::string_view sizeName, + int64_t size) { + throw ParquetException( + std::string(sizeName), + " page size cannot be represented in a Parquet PageHeader int32 " + "field: ", + size); +} + +inline int32_t CheckPageHeaderSize(std::string_view sizeName, int64_t size) { + if (ARROW_PREDICT_FALSE( + size < 0 || size > std::numeric_limits::max())) { + ThrowPageHeaderSizeError(sizeName, size); } return static_cast(size); } @@ -667,12 +674,13 @@ class SerializedPageWriter : public PageWriter { void UpdateEncryption(int8_t module_type) { switch (module_type) { case encryption::kColumnMetaData: { - meta_encryptor_->UpdateAad(encryption::CreateModuleAad( - meta_encryptor_->file_aad(), - module_type, - row_group_ordinal_, - column_ordinal_, - kNonPageOrdinal)); + meta_encryptor_->UpdateAad( + encryption::CreateModuleAad( + meta_encryptor_->file_aad(), + module_type, + row_group_ordinal_, + column_ordinal_, + kNonPageOrdinal)); break; } case encryption::kDataPage: { @@ -686,21 +694,23 @@ class SerializedPageWriter : public PageWriter { break; } case encryption::kDictionaryPageHeader: { - meta_encryptor_->UpdateAad(encryption::CreateModuleAad( - meta_encryptor_->file_aad(), - module_type, - row_group_ordinal_, - column_ordinal_, - kNonPageOrdinal)); + meta_encryptor_->UpdateAad( + encryption::CreateModuleAad( + meta_encryptor_->file_aad(), + module_type, + row_group_ordinal_, + column_ordinal_, + kNonPageOrdinal)); break; } case encryption::kDictionaryPage: { - data_encryptor_->UpdateAad(encryption::CreateModuleAad( - data_encryptor_->file_aad(), - module_type, - row_group_ordinal_, - column_ordinal_, - kNonPageOrdinal)); + data_encryptor_->UpdateAad( + encryption::CreateModuleAad( + data_encryptor_->file_aad(), + module_type, + row_group_ordinal_, + column_ordinal_, + kNonPageOrdinal)); break; } default: @@ -1940,8 +1950,12 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, if (array->data()->offset > 0) { RETURN_NOT_OK(util::VisitArrayInline(*array, &slicer, &buffers[1])); } - return ::arrow::MakeArray(std::make_shared( - array->type(), array->length(), std::move(buffers), new_null_count)); + return ::arrow::MakeArray( + std::make_shared( + array->type(), + array->length(), + std::move(buffers), + new_null_count)); } void WriteLevelsSpaced( diff --git a/bolt/dwio/parquet/tests/writer/ParquetWriterBenchmark.cpp b/bolt/dwio/parquet/tests/writer/ParquetWriterBenchmark.cpp index 2d7fb63e0..b04ecabce 100644 --- a/bolt/dwio/parquet/tests/writer/ParquetWriterBenchmark.cpp +++ b/bolt/dwio/parquet/tests/writer/ParquetWriterBenchmark.cpp @@ -24,6 +24,8 @@ #include "bolt/dwio/parquet/writer/Writer.h" #include "bolt/exec/tests/utils/TempDirectoryPath.h" +#include + #include #include using namespace bytedance::bolt; @@ -65,7 +67,7 @@ class ParquetWriterBenchmark { ~ParquetWriterBenchmark() {} - void writeToFile( + uint64_t writeToFile( const std::vector& batches, bool /*forRowGroupSkip*/) { for (auto& batch : batches) { @@ -73,9 +75,10 @@ class ParquetWriterBenchmark { } writer_->flush(); writer_->close(); + return std::filesystem::file_size(fileFolder_->path + "/" + fileName_); } - void writeSingleColumn( + uint64_t writeSingleColumn( const std::string& columnName, const TypePtr& type, uint8_t nullsRateX100, @@ -89,7 +92,7 @@ class ParquetWriterBenchmark { .withNullsForField(Subfield(columnName), nullsRateX100) .build(); suspender.dismiss(); - writeToFile(*batches, true); + return writeToFile(*batches, true); } private: @@ -105,16 +108,18 @@ class ParquetWriterBenchmark { }; void run( - uint32_t, + uint32_t iterations, const std::string& columnName, const TypePtr& type, uint8_t nullsRateX100, uint32_t batchSize, bool disableDictionary) { RowTypePtr rowType = ROW({columnName}, {type}); - ParquetWriterBenchmark benchmark(disableDictionary, rowType); - BIGINT()->toString(); - benchmark.writeSingleColumn(columnName, type, nullsRateX100, batchSize); + for (uint32_t i = 0; i < iterations; ++i) { + ParquetWriterBenchmark benchmark(disableDictionary, rowType); + folly::doNotOptimizeAway(benchmark.writeSingleColumn( + columnName, type, nullsRateX100, batchSize)); + } } #define PARQUET_BENCHMARKS_NULLS(_type_, _name_, _null_) \ @@ -131,6 +136,41 @@ void run( #define PARQUET_BENCHMARKS(_type_, _name_) \ PARQUET_BENCHMARKS_NULLS(_type_, _name_, 20) +// Benchmarks targeting the BYTE_ARRAY non-dictionary write path that is +// affected by the oversized parquet data page fix. The data sizes here are +// well below the int32 page-size limit, so these benchmarks measure the +// overhead introduced for the common (non-oversized) case. +#define PARQUET_BENCHMARKS_NULLS_NO_DICT(_type_, _name_, _null_) \ + BENCHMARK_NAMED_PARAM( \ + run, \ + _name_##_batch_4k_no_dict_null##_null_, \ + #_name_, \ + _type_, \ + _null_, \ + 4096, \ + true); \ + BENCHMARK_NAMED_PARAM( \ + run, \ + _name_##_batch_32k_no_dict_null##_null_, \ + #_name_, \ + _type_, \ + _null_, \ + 32768, \ + true); \ + BENCHMARK_NAMED_PARAM( \ + run, \ + _name_##_batch_256k_no_dict_null##_null_, \ + #_name_, \ + _type_, \ + _null_, \ + 262144, \ + true); \ + BENCHMARK_DRAW_LINE(); + +#define PARQUET_BENCHMARKS_NO_DICT(_type_, _name_) \ + PARQUET_BENCHMARKS_NULLS_NO_DICT(_type_, _name_, 0) \ + PARQUET_BENCHMARKS_NULLS_NO_DICT(_type_, _name_, 20) + PARQUET_BENCHMARKS(VARCHAR(), Varchar); PARQUET_BENCHMARKS(BIGINT(), BigInt); PARQUET_BENCHMARKS(DOUBLE(), Double); @@ -139,6 +179,12 @@ PARQUET_BENCHMARKS(DECIMAL(38, 3), LongDecimalType); PARQUET_BENCHMARKS(MAP(BIGINT(), BIGINT()), Map); PARQUET_BENCHMARKS(ARRAY(BIGINT()), List); +// Plain-encoded VARCHAR exercises the new BYTE_ARRAY page-splitting code path +// added by the oversized-page fix. The nested ARRAY case additionally +// drives the per-level loop branch (def_levels != nullptr && def_level > 0). +PARQUET_BENCHMARKS_NO_DICT(VARCHAR(), VarcharPlain); +PARQUET_BENCHMARKS_NO_DICT(ARRAY(VARCHAR()), VarcharListPlain); + // TODO: Add all data types int main(int argc, char** argv) { From 46f7b08aba8c8b15863338eca1bfdcddf782b1af Mon Sep 17 00:00:00 2001 From: "xuweixin.rex" Date: Tue, 2 Jun 2026 16:45:52 +0800 Subject: [PATCH 3/3] format code --- bolt/dwio/parquet/arrow/ColumnWriter.cpp | 47 ++++++++++-------------- 1 file changed, 20 insertions(+), 27 deletions(-) diff --git a/bolt/dwio/parquet/arrow/ColumnWriter.cpp b/bolt/dwio/parquet/arrow/ColumnWriter.cpp index 7c00f9c72..0b2b4e7c2 100644 --- a/bolt/dwio/parquet/arrow/ColumnWriter.cpp +++ b/bolt/dwio/parquet/arrow/ColumnWriter.cpp @@ -674,13 +674,12 @@ class SerializedPageWriter : public PageWriter { void UpdateEncryption(int8_t module_type) { switch (module_type) { case encryption::kColumnMetaData: { - meta_encryptor_->UpdateAad( - encryption::CreateModuleAad( - meta_encryptor_->file_aad(), - module_type, - row_group_ordinal_, - column_ordinal_, - kNonPageOrdinal)); + meta_encryptor_->UpdateAad(encryption::CreateModuleAad( + meta_encryptor_->file_aad(), + module_type, + row_group_ordinal_, + column_ordinal_, + kNonPageOrdinal)); break; } case encryption::kDataPage: { @@ -694,23 +693,21 @@ class SerializedPageWriter : public PageWriter { break; } case encryption::kDictionaryPageHeader: { - meta_encryptor_->UpdateAad( - encryption::CreateModuleAad( - meta_encryptor_->file_aad(), - module_type, - row_group_ordinal_, - column_ordinal_, - kNonPageOrdinal)); + meta_encryptor_->UpdateAad(encryption::CreateModuleAad( + meta_encryptor_->file_aad(), + module_type, + row_group_ordinal_, + column_ordinal_, + kNonPageOrdinal)); break; } case encryption::kDictionaryPage: { - data_encryptor_->UpdateAad( - encryption::CreateModuleAad( - data_encryptor_->file_aad(), - module_type, - row_group_ordinal_, - column_ordinal_, - kNonPageOrdinal)); + data_encryptor_->UpdateAad(encryption::CreateModuleAad( + data_encryptor_->file_aad(), + module_type, + row_group_ordinal_, + column_ordinal_, + kNonPageOrdinal)); break; } default: @@ -1950,12 +1947,8 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, if (array->data()->offset > 0) { RETURN_NOT_OK(util::VisitArrayInline(*array, &slicer, &buffers[1])); } - return ::arrow::MakeArray( - std::make_shared( - array->type(), - array->length(), - std::move(buffers), - new_null_count)); + return ::arrow::MakeArray(std::make_shared( + array->type(), array->length(), std::move(buffers), new_null_count)); } void WriteLevelsSpaced(