Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.apache.doris.nereids.exceptions.UnboundException;
import org.apache.doris.nereids.trees.TreeNode;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.Variable;
import org.apache.doris.nereids.types.DataType;

import com.google.common.collect.ImmutableList;
Expand All @@ -46,23 +47,58 @@ default void checkLegalityBeforeTypeCoercion() {}
@Developing
default void checkLegalityAfterRewrite() {}

/**
* getArguments.
*/
default List<Expression> getArguments() {
return children();
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unwrapping Variable here only fixes callers that already use getArgument*() / getArguments*(). A lot of function legality/signature code still reads raw child() nodes before the later VariableToLiteral analyzer rule runs, so the same user-variable bug is still present there. Concrete example: SELECT UTC_TIMESTAMP(@p) with @p = 3 still fails, because visitUnboundVariable() returns Variable(realExpression=literal), visitUnboundFunction() builds UtcTimestamp(Variable), and then TypeCoercionUtils.processBoundFunction() immediately calls UtcTimestamp.checkLegalityBeforeTypeCoercion(), which rejects !child(0).isLiteral(). The same pattern exists in utc_time, uniform, width_bucket, etc. I think this needs to be fixed earlier in the bind/type-check path, or those raw-child checks need to be updated as well; otherwise this patch only fixes the window_funnel-style subset of functions.

ImmutableList.Builder<Expression> arguments = ImmutableList.builder();
for (Expression arg : children()) {
if (arg instanceof Variable && ((Variable) arg).getRealExpression() != null) {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This only fixes callers that go through getArguments*(), but BindExpression still binds functions before the later VariableToLiteral analyzer pass runs. That means any function that validates raw child() nodes still hits the same timing bug. A concrete reachable example is SET @p = 3; SELECT utc_time(@p) (also utc_timestamp(@p) and char(@cs, 65)): ExpressionAnalyzer.visitUnboundFunction() immediately calls TypeCoercionUtils.processBoundFunction(), and those implementations still check child(0).isLiteral() / instanceof StringLikeLiteral, so they reject the Variable wrapper before this helper is ever consulted. So this fixes helper-based callers like window_funnel, but not the general Variable->literal timing issue described in the PR.

arguments.add(((Variable) arg).getRealExpression());
} else {
arguments.add(arg);
}
}
return arguments.build();
}

/**
* getArgument.
*/
default Expression getArgument(int index) {
return child(index);
Expression arg = child(index);
if (arg instanceof Variable && ((Variable) arg).getRealExpression() != null) {
return ((Variable) arg).getRealExpression();
} else {
return arg;
}
}

/**
* getArgumentsTypes.
*/
default List<DataType> getArgumentsTypes() {
return getArguments()
.stream()
.map(Expression::getDataType)
.collect(ImmutableList.toImmutableList());
ImmutableList.Builder<DataType> dataTypes = ImmutableList.builder();
for (Expression arg : children()) {
if (arg instanceof Variable && ((Variable) arg).getRealExpression() != null) {
dataTypes.add(((Variable) arg).getRealExpression().getDataType());
} else {
dataTypes.add(arg.getDataType());
}
}
return dataTypes.build();
}

/**
* getArgumentType.
*/
default DataType getArgumentType(int index) {
return child(index).getDataType();
Expression arg = child(index);
if (arg instanceof Variable && ((Variable) arg).getRealExpression() != null) {
return ((Variable) arg).getRealExpression().getDataType();
} else {
return arg.getDataType();
}
}

default DataType getDataType() throws UnboundException {
Expand Down
19 changes: 17 additions & 2 deletions fe/fe-core/src/main/java/org/apache/doris/qe/ConnectContext.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.apache.doris.analysis.DecimalLiteral;
import org.apache.doris.analysis.FloatLiteral;
import org.apache.doris.analysis.IntLiteral;
import org.apache.doris.analysis.LargeIntLiteral;
import org.apache.doris.analysis.LiteralExpr;
import org.apache.doris.analysis.NullLiteral;
import org.apache.doris.analysis.RedirectStatus;
Expand Down Expand Up @@ -88,6 +89,7 @@
import org.xnio.StreamConnection;

import java.io.IOException;
import java.math.BigInteger;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collections;
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Blocking: this LARGEINT arm is unreachable in the actual user-variable flow. SetUserDefinedVarOp.run() stores values via value.toLegacyLiteral(), and a Nereids LargeIntLiteral becomes org.apache.doris.analysis.LargeIntLiteral, not IntLiteral. That means SET @v = 9223372036854775808 still falls through to the final else here and is converted to a string literal instead of a Nereids LargeIntLiteral. Please handle legacy LargeIntLiteral explicitly (or reuse Literal.fromLegacyLiteral(literalExpr, literalExpr.getType()) for the numeric branches).`

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This still widens TINYINT/SMALLINT user variables to INTEGER, so exact-width signatures remain broken. SET @p = 3 stores a TINYINT via LogicalPlanBuilder.visitIntegerLiteral() and TinyIntLiteral.toLegacyLiteral(), but this branch still returns Literal.of((int) ...), so SELECT CURTIME(@p) is analyzed as current_time(INT) and fails against the existing current_time(TINYINT) signature in CurrentTime. If the goal is to preserve the original integer type, the TINYINT and SMALLINT cases need explicit byte/short handling too.

Expand Down Expand Up @@ -587,10 +589,23 @@ public void setUserVar(String name, LiteralExpr value) {
LiteralExpr literalExpr = userVars.get(varName);
if (literalExpr instanceof BoolLiteral) {
return Literal.of(((BoolLiteral) literalExpr).getValue());
} else if (literalExpr instanceof IntLiteral) {
} else if (literalExpr instanceof IntLiteral || literalExpr instanceof LargeIntLiteral) {
// the value in the IntLiteral should be int, but now is long in old planner literalExpr
// so type coercion to generate right new planner int Literal
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This fix still misses the forwarded-FE path. getLiteralForUserVar() now relies on the stored legacy literal keeping its original primitive type, but FEOpExecutor.getForwardUserVariables() serializes all of TINYINT/SMALLINT/INT/BIGINT as INT_LITERAL, and ConnectProcessor.getLiteralExprFromThrift() reconstructs that as new IntLiteral(value) while ignoring node.type. So SET @v = CAST(1 AS BIGINT) executed through an observer/follower still degrades to a TINYINT before this method runs. ExprToThriftVisitor already sends msg.type, so the real fix needs to preserve that type when rebuilding the legacy IntLiteral; otherwise the integer-typing bug remains on forwarded queries.

return Literal.of((int) ((IntLiteral) literalExpr).getValue());
switch (literalExpr.getType().getPrimitiveType()) {
case LARGEINT:
return Literal.of(new BigInteger(literalExpr.getStringValue()));
case BIGINT:
return Literal.of(((IntLiteral) literalExpr).getValue());
case INT:
return Literal.of((int) ((IntLiteral) literalExpr).getValue());
case SMALLINT:
return Literal.of((short) ((IntLiteral) literalExpr).getValue());
case TINYINT:
return Literal.of((byte) ((IntLiteral) literalExpr).getValue());
default:
return Literal.of((int) ((IntLiteral) literalExpr).getValue());
}
} else if (literalExpr instanceof FloatLiteral) {
return Literal.of(((FloatLiteral) literalExpr).getValue());
} else if (literalExpr instanceof DecimalLiteral) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.doris.nereids.rules.analysis;

import org.apache.doris.analysis.IntLiteral;
import org.apache.doris.analysis.LargeIntLiteral;
import org.apache.doris.nereids.types.BigIntType;
import org.apache.doris.nereids.types.IntegerType;
import org.apache.doris.nereids.types.LargeIntType;
import org.apache.doris.nereids.types.SmallIntType;
import org.apache.doris.nereids.types.TinyIntType;
import org.apache.doris.nereids.util.MemoTestUtils;
import org.apache.doris.qe.ConnectContext;

import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;

/** Tests for user variable handling in expression analysis. */
public class UserVariableAnalysisTest {

@Test
public void testUserVarIntegerType() {
ConnectContext ctx = MemoTestUtils.createConnectContext();
// set user var @a = TINY_INT_MAX (tiny int)
ctx.setUserVar("a", new IntLiteral(Byte.MAX_VALUE));
// set user var @a = SMALL_INT_MAX (small int)
ctx.setUserVar("b", new IntLiteral(Short.MAX_VALUE));
// set user var @b = Long.MAX_VALUE (bigint)
ctx.setUserVar("c", new IntLiteral(Integer.MAX_VALUE));
// set user var @b = Long.MAX_VALUE (bigint)
ctx.setUserVar("d", new IntLiteral(Long.MAX_VALUE));
// set user var @b = Long.MAX_VALUE (bigint)
ctx.setUserVar("e", new LargeIntLiteral(LargeIntLiteral.LARGE_INT_MAX));

Assertions.assertEquals(TinyIntType.INSTANCE, ConnectContext.get().getLiteralForUserVar("a").getDataType());
Assertions.assertEquals(SmallIntType.INSTANCE, ConnectContext.get().getLiteralForUserVar("b").getDataType());
Assertions.assertEquals(IntegerType.INSTANCE, ConnectContext.get().getLiteralForUserVar("c").getDataType());
Assertions.assertEquals(BigIntType.INSTANCE, ConnectContext.get().getLiteralForUserVar("d").getDataType());
Assertions.assertEquals(LargeIntType.INSTANCE, ConnectContext.get().getLiteralForUserVar("e").getDataType());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.doris.nereids.trees.expressions.functions;

import org.apache.doris.nereids.analyzer.UnboundVariable.VariableType;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.Variable;
import org.apache.doris.nereids.trees.expressions.literal.IntegerLiteral;
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
import org.apache.doris.nereids.types.DataType;

import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;

import java.util.List;

/**
* Tests for ExpressionTrait behaviors when children are `Variable`.
*/
public class ExpressionTraitTest {

static class DummyFunction extends Expression {
protected DummyFunction(List<Expression> children) {
super(children);
}

protected DummyFunction(Expression... children) {
super(children);
}

@Override
public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
return null;
}

@Override
public Expression withChildren(List<Expression> children) {
return new DummyFunction(children);
}

@Override
protected String computeToSql() {
return "dummy";
}

@Override
public boolean nullable() {
return false;
}
}

@Test
public void testVariableWithRealExpression() {
IntegerLiteral lit = new IntegerLiteral(42);
Variable var = new Variable("v", VariableType.USER, lit);

DummyFunction func = new DummyFunction(var);

List<Expression> args = func.getArguments();
Assertions.assertEquals(1, args.size());
Assertions.assertEquals(lit, args.get(0));

Assertions.assertEquals(lit, func.getArgument(0));

List<DataType> types = func.getArgumentsTypes();
Assertions.assertEquals(1, types.size());
Assertions.assertEquals(lit.getDataType(), types.get(0));

Assertions.assertEquals(lit.getDataType(), func.getArgumentType(0));
}

@Test
public void testVariableWithoutRealExpression() {
IntegerLiteral lit = new IntegerLiteral(100);
// create a Variable but override getRealExpression to simulate a missing real expression
Variable var = new Variable("v", VariableType.USER, lit) {
@Override
public Expression getRealExpression() {
return null;
}
};

DummyFunction func = new DummyFunction(var);

List<Expression> args = func.getArguments();
Assertions.assertEquals(1, args.size());
// when Variable.getRealExpression() returns null, ExpressionTrait should return the Variable itself
Assertions.assertSame(var, args.get(0));

Assertions.assertSame(var, func.getArgument(0));

List<DataType> types = func.getArgumentsTypes();
Assertions.assertEquals(1, types.size());
// fallback to variable.getDataType()
Assertions.assertEquals(var.getDataType(), types.get(0));

Assertions.assertEquals(var.getDataType(), func.getArgumentType(0));
}
}
3 changes: 3 additions & 0 deletions regression-test/data/query_p0/set/test_user_var.out
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,6 @@ true false
-- !select7 --
1

-- !select_user_var --
2

28 changes: 28 additions & 0 deletions regression-test/suites/query_p0/set/test_user_var.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -85,4 +85,32 @@ suite("test_user_var") {
notContains "CAST"
}

multi_sql """
drop table if exists table_50_undef_partitions2_keys3_properties4_distributed_by54;
create table table_50_undef_partitions2_keys3_properties4_distributed_by54 (
col_date_undef_signed date null ,
col_int_undef_signed int null ,
col_datetime_undef_signed datetime null ,
pk int
) engine=olap
DUPLICATE KEY(col_date_undef_signed)
PARTITION BY RANGE(col_date_undef_signed) (
PARTITION p0 VALUES LESS THAN ('2023-12-11'),
PARTITION p1 VALUES LESS THAN ('2023-12-15'),
PARTITION p2 VALUES LESS THAN ('2023-12-16'),
PARTITION p3 VALUES LESS THAN ('2023-12-17'),
PARTITION p4 VALUES LESS THAN ('2024-01-18'),
PARTITION p5 VALUES LESS THAN ('2026-02-18'),
PARTITION p100 VALUES LESS THAN ('9999-12-31')
)
distributed by hash(pk) buckets 10
properties("replication_num" = "1");
insert into table_50_undef_partitions2_keys3_properties4_distributed_by54(pk,col_int_undef_signed,col_date_undef_signed,col_datetime_undef_signed) values (0,3,'2023-12-09','2004-12-17 12:35:40'),(1,null,'2023-12-15','2007-03-09 11:14:00'),(2,9,'2023-12-15','2005-01-10 12:14:20'),(3,1,null,'2012-08-13 07:51:37'),(4,5,'2023-12-12','2008-06-09 05:14:31'),(5,5,null,'2003-12-12 03:46:03'),(6,4,'2012-11-08','2013-05-17 19:32:15'),(7,5,'2023-12-15','2013-11-11 05:02:08'),(8,7,'2023-12-15','2001-07-01 20:59:59'),(9,9,'2023-12-14','2007-05-11 16:10:53'),(10,null,'2023-12-17','2014-12-22 08:07:07'),(11,9,null,'2019-12-26 12:33:12'),(12,null,'2023-12-12','2018-02-04 20:16:03'),(13,1,'2023-12-17','2012-12-23 09:29:23'),(14,null,'2023-12-14','2002-01-23 18:40:57'),(15,0,'2023-12-13','2005-03-20 23:57:50'),(16,7,'2023-12-15','2005-04-22 08:56:50'),(17,8,'2023-12-18','2019-06-27 11:46:05'),(18,0,'2023-12-17','2000-10-16 00:48:54'),(19,null,'2023-12-16','2010-02-18 12:33:42'),(20,9,'2023-12-16','2011-06-23 14:58:11'),(21,null,null,'2009-09-22 23:43:05'),(22,6,'2023-12-14','2004-01-12 10:08:48'),(23,3,null,'2008-08-23 17:12:45'),(24,2,'2023-12-16','2013-10-14 01:57:45'),(25,0,null,'2012-12-05 06:31:29'),(26,8,'2023-12-13','2014-04-03 17:49:28'),(27,null,'2023-12-11','2014-04-07 19:37:58'),(28,1,'2023-12-18','2014-09-18 14:09:04'),(29,8,'2023-12-14','2004-09-18 16:57:38'),(30,null,'2023-12-13','2014-10-10 01:44:04'),(31,9,'2023-12-15','2017-09-24 12:33:16'),(32,2,'2023-12-16','2013-11-15 17:12:36'),(33,4,'2005-04-24','2003-03-06 08:47:44'),(34,6,'2023-12-16','2001-05-21 10:40:16'),(35,2,'2023-12-17','2001-06-02 02:34:54'),(36,4,'2023-12-14','2010-03-06 08:46:49'),(37,4,null,'2000-04-08 07:56:25'),(38,8,'2023-12-10','2018-03-24 22:17:31'),(39,0,'2023-12-11','2009-12-13 17:40:47'),(40,9,'2023-12-16','2014-05-04 01:17:18'),(41,5,'2017-02-23','2005-04-10 15:34:29'),(42,2,'2023-12-15','2013-12-16 10:12:23'),(43,6,'2023-12-12','2000-07-28 14:30:17'),(44,7,'2023-12-11','2002-03-24 11:43:04'),(45,7,'2023-12-09','2012-07-25 19:06:56'),(46,null,'2005-12-05','2018-10-07 21:28:38'),(47,1,'2023-12-15','2019-06-15 23:13:02'),(48,4,null,'2012-10-23 02:55:35'),(49,5,'2023-12-17','2009-09-07 11:50:21');
SET @v0 = 2502512601;
SET @v1 = 'default';
SET @v2 = '2025-06-18';
SET @v3 = '2023-12-17';
"""
qt_select_user_var """SELECT WINDOW_FUNNEL(@v0, @v1, col_datetime_undef_signed, col_date_undef_signed != @v2, col_date_undef_signed > @v3) FROM table_50_undef_partitions2_keys3_properties4_distributed_by54 ;"""
}
Loading