From ed61d6649aec62b4620b8e0072bc304a219d18fd Mon Sep 17 00:00:00 2001 From: Qi Chen Date: Tue, 31 Dec 2024 15:50:09 +0800 Subject: [PATCH] [Fix](multi-catalog) Fix column mutate() crash replace it by assume_mutable(). (#46151) ### What problem does this PR solve? Problem Summary: ``` warning: Unable to find libthread_db matching inferior's thread library, thread debugging will not be available. Core was generated by `/mnt/doris/be/lib/doris_be'. Program terminated with signal SIGSEGV, Segmentation fault. #0 0x0000000000000000 in ?? () [Current thread is 1 (LWP 3923404)] (gdb) bt #0 0x0000000000000000 in ?? () #1 0x000055f44f97dda7 in COW::release_ref (this=0x7f6bf7d07cc8) at /home/zcp/repo_center/doris_enterprise/doris/be/src/vec/common/cow.h:99 #2 COW::intrusive_ptr::~intrusive_ptr (this=0x7f6b792f9670) at /home/zcp/repo_center/doris_enterprise/doris/be/src/vec/common/cow.h:133 #3 doris::vectorized::ScalarColumnReader::_read_nested_column (this=this@entry=0x7f6be31f8900, doris_column=..., type= std::shared_ptr (use count 1, weak count 0) = {...}, select_vector=..., batch_size=, batch_size@entry=4064, read_rows=0x7f6b792f9ad8, eof=0x7f6b792f9af0, is_dict_filter=, align_rows=) at /home/zcp/repo_center/doris_enterprise/doris/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp:447 #4 0x000055f44f97e1fc in doris::vectorized::ScalarColumnReader::read_column_data (this=0x7f6be31f8900, doris_column=..., type=std::shared_ptr (use count 1, weak count 0) = {...}, select_vector=..., batch_size=4064, read_rows=0x7f6b792f9ad8, eof=0x7f6b792f9af0, is_dict_filter=) at /home/zcp/repo_center/doris_enterprise/doris/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp:504 #5 0x000055f44f97ecbe in doris::vectorized::ArrayColumnReader::read_column_data (this=0x7f69a922ce00, doris_column=..., type=..., select_vector=..., batch_size=4064, read_rows=0x7f6b792f9ad8, eof=0x7f6b792f9af0, is_dict_filter=) at /home/zcp/repo_center/doris_enterprise/doris/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp:616 #6 0x000055f44f975460 in doris::vectorized::RowGroupReader::_read_column_data (this=this@entry=0x7f6cf83dd180, block=block@entry=0x7f6bbcc66938, columns=..., batch_size=4064, read_rows=read_rows@entry=0x7f6b792f9ee0, batch_eof=0x7f6cf83d71f0, select_vector=...) at /home/zcp/repo_center/doris_enterprise/doris/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp:426 #7 0x000055f44f972682 in doris::vectorized::RowGroupReader::next_batch (this=0x7f6cf83dd180, block=0x7f6bbcc66938, batch_size=140099571418880, read_rows=0x7f6b792f9ee0, batch_eof=0x7f6f06724610) at /home/zcp/repo_center/doris_enterprise/doris/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp:321 #8 0x000055f44f9141f8 in doris::vectorized::ParquetReader::get_next_block (this=0x7f6cf83d7000, block=0x7f6bbcc66938, read_rows=0x7f6b792f9ee0, eof=0x7f6bbcc66f88) at /home/zcp/repo_center/doris_enterprise/doris/be/src/vec/exec/format/parquet/vparquet_reader.cpp:576 #9 0x000055f450aba6f4 in doris::vectorized::IcebergTableReader::get_next_block (this=0x7f6bb5611180, block=0x7f6bbcc66938, read_rows=0x7f6b792f9ee0, eof=0x7f6bbcc66f88) at /home/zcp/repo_center/doris_enterprise/doris/be/src/vec/exec/format/table/iceberg_reader.cpp:138 #10 0x000055f450aaa351 in doris::vectorized::VFileScanner::_get_block_wrapped (this=0x7f6bbcc66800, state=, block=0x7f6bbcc66938, eof=0x7f6b792fa2f7) at /home/zcp/repo_center/doris_enterprise/doris/be/src/vec/exec/scan/vfile_scanner.cpp:359 #11 0x000055f450aa9ecc in doris::vectorized::VFileScanner::_get_block_impl (this=0x0, state=0xffffffffffffa770, block=0x8c3de0, eof=0x7f6b79300700) at /home/zcp/repo_center/doris_enterprise/doris/be/src/vec/exec/scan/vfile_scanner.cpp:301 #12 0x000055f450b41e9c in doris::vectorized::VScanner::get_block (this=this@entry=0x7f6bbcc66800, state=state@entry=0x7f6f06724000, block=block@entry=0x7f6bbcc66938, eof=eof@entry=0x7f6b792fa2f7) at /home/zcp/repo_center/doris_enterprise/doris/be/src/vec/exec/scan/vscanner.cpp:133 #13 0x000055f450b41977 in doris::vectorized::VScanner::get_block_after_projects (this=0x7f6bbcc66800, state=0x7f6f06724000, block=0x7f6cf8394b80, eos=0x7f6b792fa2f7) at /home/zcp/repo_center/doris_enterprise/doris/be/src/vec/exec/scan/vscanner.cpp:96 #14 0x000055f450a941ff in doris::vectorized::ScannerScheduler::_scanner_scan (ctx=std::shared_ptr (use count 10, weak count 1) = {...}, scan_task=std::shared_ptr (use count 2, weak count 0) = {...}) at /home/zcp/repo_center/doris_enterprise/doris/be/src/vec/exec/scan/scanner_scheduler.cpp:289 #15 0x000055f450a94b73 in doris::vectorized::ScannerScheduler::submit(std::shared_ptr, std::shared_ptr)::$_1::operator()() const::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const (this=) at /home/zcp/repo_center/doris_enterprise/doris/be/src/vec/exec/scan/scanner_scheduler.cpp:180 #16 doris::vectorized::ScannerScheduler::submit(std::shared_ptr, std::shared_ptr)::$_1::operator()() const::{lambda()#1}::operator()() const (this=0x7f70ccc56ee0) at /home/zcp/repo_center/doris_enterprise/doris/be/src/vec/exec/scan/scanner_scheduler.cpp:179 #17 std::__invoke_impl, std::shared_ptr)::$_1::operator()() const::{lambda()#1}&>(std::__invoke_other, doris::vectorized::ScannerScheduler::submit(std::shared_ptr, std::shared_ptr)::$_1::operator()() const::{lambda()#1}&) (__f=...) at /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/invoke.h:61 #18 std::__invoke_r, std::shared_ptr)::$_1::operator()() const::{lambda()#1}&>(doris::vectorized::ScannerScheduler::submit(std::shared_ptr, std::shared_ptr)::$_1::operator()() const::{lambda()#1}&) (__fn=...) at /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/invoke.h:111 #19 std::_Function_handler, std::shared_ptr)::$_1::operator()() const::{lambda()#1}>::_M_invoke(std::_Any_data const&) (__functor=...) at /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/std_function.h:291 #20 0x000055f44ca858c8 in doris::ThreadPool::dispatch_thread (this=0x7f70ba259200) at /home/zcp/repo_center/doris_enterprise/doris/be/src/util/threadpool.cpp:543 #21 0x000055f44ca7ad91 in std::function::operator()() const (this=0x7f6bf7d07cc0) at /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/std_function.h:560 #22 doris::Thread::supervise_thread (arg=0x7f6fa185a020) at /home/zcp/repo_center/doris_enterprise/doris/be/src/util/thread.cpp:498 #23 0x00007f70ebf78e25 in ?? () #24 0x0000000000000000 in ?? () ``` --- be/src/vec/exec/format/orc/vorc_reader.cpp | 7 ++++--- .../format/parquet/vparquet_column_reader.cpp | 16 ++++++++-------- .../format/parquet/vparquet_group_reader.cpp | 7 ++++--- be/src/vec/exec/scan/new_es_scanner.cpp | 2 +- be/src/vec/exec/scan/vfile_scanner.cpp | 9 ++++----- be/src/vec/exec/scan/vmeta_scanner.cpp | 2 +- 6 files changed, 22 insertions(+), 21 deletions(-) diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp index 4d41830668960c..26e41afe3c901a 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.cpp +++ b/be/src/vec/exec/format/orc/vorc_reader.cpp @@ -1211,8 +1211,8 @@ Status OrcReader::_fill_missing_columns( for (auto& kv : missing_columns) { if (kv.second == nullptr) { // no default column, fill with null - auto nullable_column = reinterpret_cast( - (*std::move(block->get_by_name(kv.first).column)).mutate().get()); + auto mutable_column = block->get_by_name(kv.first).column->assume_mutable(); + auto* nullable_column = static_cast(mutable_column.get()); nullable_column->insert_many_defaults(rows); } else { // fill with default value @@ -1226,8 +1226,9 @@ Status OrcReader::_fill_missing_columns( // call resize because the first column of _src_block_ptr may not be filled by reader, // so _src_block_ptr->rows() may return wrong result, cause the column created by `ctx->execute()` // has only one row. - std::move(*block->get_by_position(result_column_id).column).mutate()->resize(rows); auto result_column_ptr = block->get_by_position(result_column_id).column; + auto mutable_column = result_column_ptr->assume_mutable(); + mutable_column->resize(rows); // result_column_ptr maybe a ColumnConst, convert it to a normal column result_column_ptr = result_column_ptr->convert_to_full_column_if_const(); auto origin_column_type = block->get_by_name(kv.first).type; diff --git a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp index d11b3153b4917c..207b917666b33c 100644 --- a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp +++ b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp @@ -679,8 +679,8 @@ Status ArrayColumnReader::read_column_data(ColumnPtr& doris_column, DataTypePtr& MutableColumnPtr data_column; NullMap* null_map_ptr = nullptr; if (doris_column->is_nullable()) { - auto* nullable_column = reinterpret_cast( - (*std::move(doris_column)).mutate().get()); + auto mutable_column = doris_column->assume_mutable(); + auto* nullable_column = static_cast(mutable_column.get()); null_map_ptr = &nullable_column->get_null_map_data(); data_column = nullable_column->get_nested_column_ptr(); } else { @@ -730,8 +730,8 @@ Status MapColumnReader::read_column_data(ColumnPtr& doris_column, DataTypePtr& t MutableColumnPtr data_column; NullMap* null_map_ptr = nullptr; if (doris_column->is_nullable()) { - auto* nullable_column = reinterpret_cast( - (*std::move(doris_column)).mutate().get()); + auto mutable_column = doris_column->assume_mutable(); + auto* nullable_column = static_cast(mutable_column.get()); null_map_ptr = &nullable_column->get_null_map_data(); data_column = nullable_column->get_nested_column_ptr(); } else { @@ -799,8 +799,8 @@ Status StructColumnReader::read_column_data(ColumnPtr& doris_column, DataTypePtr MutableColumnPtr data_column; NullMap* null_map_ptr = nullptr; if (doris_column->is_nullable()) { - auto* nullable_column = reinterpret_cast( - (*std::move(doris_column)).mutate().get()); + auto mutable_column = doris_column->assume_mutable(); + auto* nullable_column = static_cast(mutable_column.get()); null_map_ptr = &nullable_column->get_null_map_data(); data_column = nullable_column->get_nested_column_ptr(); } else { @@ -880,8 +880,8 @@ Status StructColumnReader::read_column_data(ColumnPtr& doris_column, DataTypePtr auto& doris_field = doris_struct.get_column_ptr(idx); auto& doris_type = const_cast(doris_struct_type->get_element(idx)); DCHECK(doris_type->is_nullable()); - auto* nullable_column = reinterpret_cast( - (*std::move(doris_field)).mutate().get()); + auto mutable_column = doris_field->assume_mutable(); + auto* nullable_column = static_cast(mutable_column.get()); nullable_column->insert_null_elements(missing_column_sz); } diff --git a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp index 770ed1f02ac6c0..a18626066b1811 100644 --- a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp +++ b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp @@ -684,8 +684,8 @@ Status RowGroupReader::_fill_missing_columns( for (auto& kv : missing_columns) { if (kv.second == nullptr) { // no default column, fill with null - auto nullable_column = reinterpret_cast( - (*std::move(block->get_by_name(kv.first).column)).mutate().get()); + auto mutable_column = block->get_by_name(kv.first).column->assume_mutable(); + auto* nullable_column = static_cast(mutable_column.get()); nullable_column->insert_many_defaults(rows); } else { // fill with default value @@ -699,8 +699,9 @@ Status RowGroupReader::_fill_missing_columns( // call resize because the first column of _src_block_ptr may not be filled by reader, // so _src_block_ptr->rows() may return wrong result, cause the column created by `ctx->execute()` // has only one row. - std::move(*block->get_by_position(result_column_id).column).mutate()->resize(rows); auto result_column_ptr = block->get_by_position(result_column_id).column; + auto mutable_column = result_column_ptr->assume_mutable(); + mutable_column->resize(rows); // result_column_ptr maybe a ColumnConst, convert it to a normal column result_column_ptr = result_column_ptr->convert_to_full_column_if_const(); auto origin_column_type = block->get_by_name(kv.first).type; diff --git a/be/src/vec/exec/scan/new_es_scanner.cpp b/be/src/vec/exec/scan/new_es_scanner.cpp index fae83854be0910..b19b009b314b53 100644 --- a/be/src/vec/exec/scan/new_es_scanner.cpp +++ b/be/src/vec/exec/scan/new_es_scanner.cpp @@ -132,7 +132,7 @@ Status NewEsScanner::_get_block_impl(RuntimeState* state, Block* block, bool* eo columns.resize(column_size); for (auto i = 0; i < column_size; i++) { if (mem_reuse) { - columns[i] = std::move(*block->get_by_position(i).column).mutate(); + columns[i] = block->get_by_position(i).column->assume_mutable(); } else { columns[i] = _tuple_desc->slots()[i]->get_empty_mutable_column(); } diff --git a/be/src/vec/exec/scan/vfile_scanner.cpp b/be/src/vec/exec/scan/vfile_scanner.cpp index b07fbc057213e7..5b96b5561fba1c 100644 --- a/be/src/vec/exec/scan/vfile_scanner.cpp +++ b/be/src/vec/exec/scan/vfile_scanner.cpp @@ -492,8 +492,8 @@ Status VFileScanner::_fill_missing_columns(size_t rows) { for (auto& kv : _missing_col_descs) { if (kv.second == nullptr) { // no default column, fill with null - auto nullable_column = reinterpret_cast( - (*std::move(_src_block_ptr->get_by_name(kv.first).column)).mutate().get()); + auto mutable_column = _src_block_ptr->get_by_name(kv.first).column->assume_mutable(); + auto* nullable_column = static_cast(mutable_column.get()); nullable_column->insert_many_defaults(rows); } else { // fill with default value @@ -507,10 +507,9 @@ Status VFileScanner::_fill_missing_columns(size_t rows) { // call resize because the first column of _src_block_ptr may not be filled by reader, // so _src_block_ptr->rows() may return wrong result, cause the column created by `ctx->execute()` // has only one row. - std::move(*_src_block_ptr->get_by_position(result_column_id).column) - .mutate() - ->resize(rows); auto result_column_ptr = _src_block_ptr->get_by_position(result_column_id).column; + auto mutable_column = result_column_ptr->assume_mutable(); + mutable_column->resize(rows); // result_column_ptr maybe a ColumnConst, convert it to a normal column result_column_ptr = result_column_ptr->convert_to_full_column_if_const(); auto origin_column_type = _src_block_ptr->get_by_name(kv.first).type; diff --git a/be/src/vec/exec/scan/vmeta_scanner.cpp b/be/src/vec/exec/scan/vmeta_scanner.cpp index db0256728741c7..33e8c5e003d7ca 100644 --- a/be/src/vec/exec/scan/vmeta_scanner.cpp +++ b/be/src/vec/exec/scan/vmeta_scanner.cpp @@ -96,7 +96,7 @@ Status VMetaScanner::_get_block_impl(RuntimeState* state, Block* block, bool* eo columns.resize(column_size); for (auto i = 0; i < column_size; i++) { if (mem_reuse) { - columns[i] = std::move(*block->get_by_position(i).column).mutate(); + columns[i] = block->get_by_position(i).column->assume_mutable(); } else { columns[i] = _tuple_desc->slots()[i]->get_empty_mutable_column(); }