Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 15 additions & 46 deletions be/src/core/column/column_variant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2488,64 +2488,33 @@ bool ColumnVariant::try_insert_many_defaults_from_nested(const Subcolumns::NodeP
return true;
}

/// Visitor that keeps @num_dimensions_to_keep dimensions in arrays
/// and replaces all scalars or nested arrays to @replacement at that level.
class FieldVisitorReplaceScalars : public StaticVisitor<Field> {
public:
FieldVisitorReplaceScalars(const Field& replacement_, size_t num_dimensions_to_keep_)
: replacement(replacement_), num_dimensions_to_keep(num_dimensions_to_keep_) {}

template <PrimitiveType T>
Field apply(const typename PrimitiveTypeTraits<T>::CppType& x) const {
if constexpr (T == TYPE_ARRAY) {
if (num_dimensions_to_keep == 0) {
return replacement;
}

const size_t size = x.size();
Array res(size);
for (size_t i = 0; i < size; ++i) {
res[i] = apply_visitor(
FieldVisitorReplaceScalars(replacement, num_dimensions_to_keep - 1), x[i]);
}
return Field::create_field<TYPE_ARRAY>(res);
} else {
return replacement;
}
}

private:
const Field& replacement;
size_t num_dimensions_to_keep;
};

bool ColumnVariant::try_insert_default_from_nested(const Subcolumns::NodePtr& entry) const {
const auto* leaf = get_leaf_of_the_same_nested(entry);
if (!leaf) {
return false;
}

auto last_field = leaf->data.get_last_field();
if (last_field.is_null()) {
size_t leaf_size = leaf->data.size();
if (leaf_size == 0) {
return false;
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This changes behavior for nullable nested siblings. Before this patch, try_insert_default_from_nested() called get_last_field() and returned false when the donor leaf row was NULL, so the caller fell back to entry->data.insert_default(). After the refactor we always cut the last donor row and rebuild a default-shaped subcolumn from it, which means a row like items.tags = null can now synthesize a nested sibling value for a missing items.id instead of keeping that sibling NULL too. That is a user-visible semantic change in the JSON/variant parse path, not just a refactor. Please preserve the old null guard here or add an explicit test showing the new behavior is intended.

}

size_t leaf_num_dimensions = leaf->data.get_dimensions();
size_t entry_num_dimensions = entry->data.get_dimensions();

if (entry_num_dimensions > leaf_num_dimensions) {
throw doris::Exception(
ErrorCode::INVALID_ARGUMENT,
"entry_num_dimensions > leaf_num_dimensions, entry_num_dimensions={}, "
"leaf_num_dimensions={}",
entry_num_dimensions, leaf_num_dimensions);
// Preserve the original null guard: if the donor leaf's last row is NULL,
// fall back to insert_default() so that a missing sibling stays NULL too.
if (leaf->data.is_null_at(leaf_size - 1)) {
return false;
}

auto default_scalar = entry->data.get_least_common_type()->get_default();
FieldInfo field_info = {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This changes the single-row path from inserting a typed default Field into the existing entry column to rebuilding a temporary subcolumn from FieldInfo. That is not equivalent for metadata-carrying types. Here field_info only keeps scalar_type_id and num_dimensions; precision and scale stay at their defaults. If a missing nested sibling is ARRAY<DECIMAL(10,2)> and we hit this path through ColumnVariant::try_insert() or insert_from(), clone_with_default_values() reaches create_array_of_type(TYPE_DECIMAL..., 0, 0) and throws from create_decimal(). The old implementation used entry->data.get_least_common_type()->get_default() and inserted that typed field directly, so it preserved the decimal metadata. Please preserve the full type metadata here before switching the single-row path over to clone_with_default_values().

.scalar_type_id = entry->data.least_common_type.get_base_type_id(),
.num_dimensions = entry->data.get_dimensions(),
};

auto default_field = apply_visitor(
FieldVisitorReplaceScalars(default_scalar, leaf_num_dimensions), last_field);
entry->data.insert(std::move(default_field));
// Reuse the same column-level approach as try_insert_many_defaults_from_nested:
// cut the last row from leaf, replace scalar values with defaults, keep array structure.
auto new_subcolumn = leaf->data.cut(leaf_size - 1, 1).clone_with_default_values(field_info);
entry->data.insert_range_from(new_subcolumn, 0, 1);
ENABLE_CHECK_CONSISTENCY(this);
return true;
}

Expand Down
29 changes: 0 additions & 29 deletions be/src/exec/common/variant_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1821,35 +1821,6 @@ static void append_field_to_binary_chars(const Field& field, ColumnString::Chars
field.get_type());
}
}
/// Visitor that keeps @num_dimensions_to_keep dimensions in arrays
/// and replaces all scalars or nested arrays to @replacement at that level.
class FieldVisitorReplaceScalars : public StaticVisitor<Field> {
public:
FieldVisitorReplaceScalars(const Field& replacement_, size_t num_dimensions_to_keep_)
: replacement(replacement_), num_dimensions_to_keep(num_dimensions_to_keep_) {}
template <PrimitiveType T>
Field operator()(const typename PrimitiveTypeTraits<T>::CppType& x) const {
if constexpr (T == TYPE_ARRAY) {
if (num_dimensions_to_keep == 0) {
return replacement;
}
const size_t size = x.size();
Array res(size);
for (size_t i = 0; i < size; ++i) {
res[i] = apply_visitor(
FieldVisitorReplaceScalars(replacement, num_dimensions_to_keep - 1), x[i]);
}
return Field::create_field<TYPE_ARRAY>(res);
} else {
return replacement;
}
}

private:
const Field& replacement;
size_t num_dimensions_to_keep;
};

template <typename ParserImpl>
void parse_json_to_variant_impl(IColumn& column, const char* src, size_t length,
JSONDataParser<ParserImpl>* parser, const ParseConfig& config) {
Expand Down
24 changes: 0 additions & 24 deletions be/test/core/column/column_variant_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3082,30 +3082,6 @@ TEST_F(ColumnVariantTest, get_field_info_all_types) {
}
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The patch removes the only direct test around the deleted transformation logic, but it does not add any replacement coverage for the behavior that changed in try_insert_default_from_nested(). The existing try_insert_default_from_nested test only exercises the call and never asserts the inserted row contents/nullability. Please add a targeted test for the blocking case above: donor nested sibling row is NULL, target sibling path is missing, and we must still fall back to insert_default() rather than materializing a synthesized nested row.

}

TEST_F(ColumnVariantTest, field_visitor) {
// Test replacing scalar values in a flat array
{
Array array;
array.push_back(Field::create_field<TYPE_BIGINT>(Int64(1)));
array.push_back(Field::create_field<TYPE_BIGINT>(Int64(2)));
array.push_back(Field::create_field<TYPE_BIGINT>(Int64(3)));

Field field = Field::create_field<TYPE_ARRAY>(std::move(array));
Field replacement = Field::create_field<TYPE_BIGINT>(Int64(42));
Field result = apply_visitor(FieldVisitorReplaceScalars(replacement, 0), field);

EXPECT_EQ(result.get<TYPE_BIGINT>(), 42);

Field replacement1 = Field::create_field<TYPE_BIGINT>(Int64(42));
Field result1 = apply_visitor(FieldVisitorReplaceScalars(replacement, 1), field);

EXPECT_EQ(result1.get<TYPE_ARRAY>().size(), 3);
EXPECT_EQ(result1.get<TYPE_ARRAY>()[0].get<TYPE_BIGINT>(), 42);
EXPECT_EQ(result1.get<TYPE_ARRAY>()[1].get<TYPE_BIGINT>(), 42);
EXPECT_EQ(result1.get<TYPE_ARRAY>()[2].get<TYPE_BIGINT>(), 42);
}
}

TEST_F(ColumnVariantTest, subcolumn_operations_coverage) {
// Test insert_range_from
{
Expand Down
Loading