Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -573,8 +573,14 @@ private BitSet addJoin(LogicalJoin<?, ?> join,
}

private BitSet addFilter(LogicalFilter<?> filter, Pair<BitSet, Long> childEdgeNodes) {
// Record the nodes actually used by the filter predicates when building the graph.
// slotToNodeMap already follows project aliases through addAlias(), so filters on alias
// slots still point back to the original base nodes. Slot-free predicates, e.g. 1 = 0,
// affect the whole child subtree and must not be treated as unrelated to every node.
long inputNodes = filter.getInputSlots().isEmpty()
? childEdgeNodes.second : calNodeMap(filter.getInputSlots());
FilterEdge edge = new FilterEdge(filter, filterEdges.size(), childEdgeNodes.first, childEdgeNodes.second,
childEdgeNodes.second);
childEdgeNodes.second, inputNodes);
filterEdges.add(edge);
BitSet bitSet = new BitSet();
bitSet.set(edge.getIndex());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,13 @@
*/
public class FilterEdge extends Edge {
private final LogicalFilter<? extends Plan> filter;
private final long inputNodes;

public FilterEdge(LogicalFilter<? extends Plan> filter, int index,
BitSet childEdges, long subTreeNodes, long childRequireNodes) {
BitSet childEdges, long subTreeNodes, long childRequireNodes, long inputNodes) {
super(index, childEdges, new BitSet(), subTreeNodes, childRequireNodes, 0L);
this.filter = filter;
this.inputNodes = inputNodes;
}

@Override
Expand All @@ -48,7 +50,12 @@ public List<? extends Expression> getExpressions() {
return filter.getExpressions();
}

public long getInputNodes() {
return inputNodes;
}

public FilterEdge clear() {
return new FilterEdge(filter, getIndex(), getLeftChildEdges(), getSubTreeNodes(), getLeftRequiredNodes());
return new FilterEdge(filter, getIndex(), getLeftChildEdges(), getSubTreeNodes(), getLeftRequiredNodes(),
inputNodes);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ private ComparisonResult isLogicCompatible() {
.forEach(e -> pullUpViewExprWithEdge.put(e, e.getExpressions()));
Sets.difference(getViewFilterEdgeSet(), Sets.newHashSet(queryToViewFilterEdge.values()))
.stream()
.filter(e -> !LongBitmap.isOverlap(e.getReferenceNodes(), shouldEliminateViewNodesMap))
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why not use e.getReferenceNodes()

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why not use e.getReferenceNodes()

FilterEdge.getReferenceNodes() is the whole filter child subtree, not the nodes actually referenced by the filter predicate. In HyperGraph.addFilter() we build the edge with subTreeNodes, so for a filter above t1 LEFT JOIN t2, getReferenceNodes() contains both sides even if the predicate only uses preserved-side columns, e.g. WHERE t1.id > 1. Using it here would therefore over-block safe elimination. getInputNodes() is narrower: it tracks the actual predicate lineage from input slots (including projected aliases), and only falls back to the whole subtree for slot-free predicates such as 1 = 0, where the filter really affects the entire child subtree. That is the dependency we want for this safety check.

.filter(e -> !LongBitmap.isOverlap(e.getInputNodes(), shouldEliminateViewNodesMap))
.forEach(e -> pullUpViewExprWithEdge.put(e, e.getExpressions()));

return buildComparisonRes();
Expand Down Expand Up @@ -197,6 +197,16 @@ private boolean canEliminatePrimaryByForeign(long primaryNodes, long foreignNode
return JoinUtils.canEliminateByFk(joinEdge.getJoin(), primary, foreign);
}

private boolean canEliminateViewByLeft(JoinEdge joinEdge, Plan rightPlan) {
// MV comparison can validate residual predicates separately. Keep this relaxation local instead of
// changing JoinUtils.canEliminateByLeft(), which is shared by standalone rewrite rules.
Pair<Set<Slot>, Set<Slot>> njHashKeys = joinEdge.getJoin().extractNullRejectHashKeys();
if (njHashKeys == null) {
return false;
}
return rightPlan.getLogicalProperties().getTrait().isUnique(njHashKeys.second);
}

private boolean canEliminateViewEdge(JoinEdge joinEdge) {
// eliminate by unique
if (joinEdge.getJoinType().isLeftOuterJoin() && joinEdge.isRightSimple()) {
Expand All @@ -205,12 +215,11 @@ private boolean canEliminateViewEdge(JoinEdge joinEdge) {
if (LongBitmap.getCardinality(eliminatedRight) != 1) {
return false;
}
Plan rigthPlan = constructViewPlan(joinEdge.getRightExtendedNodes(), joinEdge.getRightInputSlots());
if (rigthPlan == null) {
Plan rightPlan = constructViewPlan(joinEdge.getRightExtendedNodes(), joinEdge.getRightInputSlots());
if (rightPlan == null) {
return false;
}
boolean couldEliminateByLeft = JoinUtils.canEliminateByLeft(joinEdge.getJoin(),
rigthPlan.getLogicalProperties().getTrait());
boolean couldEliminateByLeft = canEliminateViewByLeft(joinEdge, rightPlan);
// if eliminated successfully, should refresh the eliminateViewNodesMap
if (couldEliminateByLeft) {
this.reservedShouldEliminatedViewNodes =
Expand Down Expand Up @@ -254,21 +263,31 @@ private boolean canEliminateViewEdge(JoinEdge joinEdge) {
}

private boolean tryEliminateNodesAndEdge() {
boolean hasFilterEdgeAbove = viewHyperGraph.getFilterEdges().stream()
.filter(e -> LongBitmap.getCardinality(e.getReferenceNodes()) == 1)
.anyMatch(e -> LongBitmap.isSubset(e.getReferenceNodes(), shouldEliminateViewNodesMap));
if (hasFilterEdgeAbove) {
// If there is some filter edge above the eliminated node, we should rebuild a plan
// Right now, just reject it.
return false;
}
long allCanEliminateNodes = 0;
for (JoinEdge joinEdge : viewHyperGraph.getJoinEdges()) {
long canEliminateSideNodes = getCanEliminateSideNodes(joinEdge);
allCanEliminateNodes = LongBitmap.or(allCanEliminateNodes, canEliminateSideNodes);
if (LongBitmap.isOverlap(canEliminateSideNodes, reservedShouldEliminatedViewNodes)
&& !canEliminateViewEdge(joinEdge)) {
return false;
long shouldEliminateNodesOnCurrentEdge =
LongBitmap.newBitmapIntersect(canEliminateSideNodes, reservedShouldEliminatedViewNodes);
if (LongBitmap.isOverlap(canEliminateSideNodes, reservedShouldEliminatedViewNodes)) {
// For FilterEdge, leftChildEdges records join edges below the filter child. Non-empty means
// the filter is above a join result, e.g. WHERE after LEFT JOIN, and may filter preserved rows.
// Empty means the filter stays inside a base/nullable-side subtree, which is safe for LOJ.
Comment thread
foxtail463 marked this conversation as resolved.
boolean hasUnsafeFilterEdgeOnCurrentEliminatedNode =
LongBitmap.getCardinality(shouldEliminateNodesOnCurrentEdge) > 0
&& viewHyperGraph.getFilterEdges().stream()
.filter(e -> e.getExpressions().stream().anyMatch(expr -> !ExpressionUtils.isInferred(expr)))
.filter(e -> LongBitmap.isOverlap(
e.getInputNodes(), shouldEliminateNodesOnCurrentEdge))
.anyMatch(e -> joinEdge.getJoinType().isInnerJoin() || !e.getLeftChildEdges().isEmpty());
// Inner join elimination is unsafe with any filter on the removed side. For left join,
// filters inside the nullable side are OK, but filters above the join can filter left rows.
if (hasUnsafeFilterEdgeOnCurrentEliminatedNode) {
return false;
}
if (!canEliminateViewEdge(joinEdge)) {
return false;
}
}
}
// check all can eliminateNodes contains all should eliminate nodes, to avoid some nodes can not be eliminated
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,105 @@ void testLOJWithUK() throws Exception {
dropConstraint("alter table T2 drop constraint uk");
}

@Test
void testLOJWithUKAndOtherJoinConjuncts() throws Exception {
connectContext.getSessionVariable().setDisableNereidsRules("INFER_PREDICATES,PRUNE_EMPTY_PARTITION");
CascadesContext c1 = createCascadesContext(
"select * from T1",
connectContext
);
Plan p1 = PlanChecker.from(c1)
.analyze()
.rewrite()
.getPlan().child(0);
addConstraint("alter table T2 add constraint uk_other_join_conjunct unique (id)");
try {
CascadesContext c2 = createCascadesContext(
"select * from T1 left outer join T2 "
+ "on T1.id = T2.id and T2.id = 1",
connectContext
);
Plan p2 = PlanChecker.from(c2)
.analyze()
.rewrite()
.applyExploration(RuleSet.BUSHY_TREE_JOIN_REORDER)
.getAllPlan().get(0).child(0);
HyperGraph h1 = HyperGraph.builderForMv(p1).build();
HyperGraph h2 = HyperGraph.builderForMv(p2).build();
ComparisonResult res = HyperGraphComparator.isLogicCompatible(h1, h2, constructContext(p1, p2, c1));
Assertions.assertFalse(res.isInvalid());
Assertions.assertTrue(res.getViewExpressions().isEmpty());
} finally {
dropConstraint("alter table T2 drop constraint uk_other_join_conjunct");
}
}

@Test
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The added tests only cover rejection paths plus the helper method. They never assert the positive regression described in the PR body: a unique right side with otherJoinConjuncts inside the LEFT JOIN ... ON ... clause should now match SELECT ... FROM t1.

Because this patch is specifically about relaxing otherJoinConjuncts, we still need a direct end-to-end regression here, e.g. the LEFT JOIN t2 ON t1.id = t2.id AND t2.id = 1 shape from the PR description.

void testLOJWithUKAndFilterOnEliminatedNode() throws Exception {
connectContext.getSessionVariable().setDisableNereidsRules("INFER_PREDICATES,PRUNE_EMPTY_PARTITION");
CascadesContext c1 = createCascadesContext(
"select T1.id from T1",
connectContext
);
Plan p1 = PlanChecker.from(c1)
.analyze()
.rewrite()
.getPlan().child(0);
addConstraint("alter table T2 add constraint uk_loj_filter unique (id)");
try {
CascadesContext c2 = createCascadesContext(
"select T1.id from T1 left outer join T2 "
+ "on T1.id = T2.id where T2.id is null",
connectContext
);
Plan p2 = PlanChecker.from(c2)
.analyze()
.rewrite()
.applyExploration(RuleSet.BUSHY_TREE_JOIN_REORDER)
.getAllPlan().get(0).child(0);
HyperGraph h1 = HyperGraph.builderForMv(p1).build();
HyperGraph h2 = HyperGraph.builderForMv(p2).build();
ComparisonResult res = HyperGraphComparator.isLogicCompatible(h1, h2, constructContext(p1, p2, c1));
Assertions.assertTrue(res.isInvalid());
} finally {
dropConstraint("alter table T2 drop constraint uk_loj_filter");
}
}

@Test
void testInnerJoinWithPKFKAndSlotFreeFilterOnEliminatedNode() throws Exception {
connectContext.getSessionVariable().setDisableNereidsRules("INFER_PREDICATES,PRUNE_EMPTY_PARTITION");
CascadesContext c1 = createCascadesContext(
"select * from T1",
connectContext
);
Plan p1 = PlanChecker.from(c1)
.analyze()
.rewrite()
.getPlan().child(0);
addConstraint("alter table T2 add constraint pk_slot_free_filter primary key (id)");
addConstraint("alter table T1 add constraint fk_slot_free_filter foreign key (id) references T2(id)");
try {
CascadesContext c2 = createCascadesContext(
"select * from T1 inner join (select * from T2 where 1 = 0) T2 "
+ "on T1.id = T2.id",
connectContext
);
Plan p2 = PlanChecker.from(c2)
.analyze()
.rewrite()
.applyExploration(RuleSet.BUSHY_TREE_JOIN_REORDER)
.getAllPlan().get(0).child(0);
HyperGraph h1 = HyperGraph.builderForMv(p1).build();
HyperGraph h2 = HyperGraph.builderForMv(p2).build();
ComparisonResult res = HyperGraphComparator.isLogicCompatible(h1, h2, constructContext(p1, p2, c1));
Assertions.assertTrue(res.isInvalid());
} finally {
dropConstraint("alter table T1 drop constraint fk_slot_free_filter");
dropConstraint("alter table T2 drop constraint pk_slot_free_filter");
}
}

@Test
void testLOJWithPKFK() throws Exception {
connectContext.getSessionVariable().setDisableNereidsRules("INFER_PREDICATES,PRUNE_EMPTY_PARTITION");
Expand Down Expand Up @@ -143,6 +242,36 @@ void testLOJWithPKFK() throws Exception {
dropConstraint("alter table T2 drop constraint pk");
}

@Test
void testInnerJoinWithPKFKAndMultiNodeResidualFilter() throws Exception {
connectContext.getSessionVariable().setDisableNereidsRules("INFER_PREDICATES,PRUNE_EMPTY_PARTITION");
CascadesContext c1 = createCascadesContext(
"select * from T1",
connectContext
);
Plan p1 = PlanChecker.from(c1)
.analyze()
.rewrite()
.getPlan().child(0);
addConstraint("alter table T2 add constraint pk primary key (id)");
addConstraint("alter table T1 add constraint fk foreign key (id) references T2(id)");
CascadesContext c2 = createCascadesContext(
"select * from T1 inner join T2 "
+ "on T1.id = T2.id where T1.score > T2.score",
connectContext
);
Plan p2 = PlanChecker.from(c2)
.analyze()
.rewrite()
.applyExploration(RuleSet.BUSHY_TREE_JOIN_REORDER)
.getAllPlan().get(0).child(0);
HyperGraph h1 = HyperGraph.builderForMv(p1).build();
HyperGraph h2 = HyperGraph.builderForMv(p2).build();
ComparisonResult res = HyperGraphComparator.isLogicCompatible(h1, h2, constructContext(p1, p2, c1));
Assertions.assertTrue(res.isInvalid());
dropConstraint("alter table T2 drop constraint pk");
}

@Disabled
@Test
void testLOJWithPKFKAndUK1() throws Exception {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !join_elim_filter_edge --
1 Alice
2 Bob
3 Charlie
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
package mv.join_elim_p_f_key
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

suite("join_elim_filter_edge") {
String db = context.config.getDbNameByFile(context.file)
sql "use ${db}"
sql "SET enable_nereids_planner=true"
sql "SET enable_fallback_to_original_planner=false"
sql "SET enable_materialized_view_rewrite=true"
sql "SET enable_nereids_timeout = false"

sql """DROP MATERIALIZED VIEW IF EXISTS mv_join_elim_filter_edge"""
sql """DROP TABLE IF EXISTS join_elim_filter_edge_t1"""
sql """DROP TABLE IF EXISTS join_elim_filter_edge_t2"""

sql """CREATE TABLE join_elim_filter_edge_t1 (
id INT,
name VARCHAR(50)
)
DUPLICATE KEY(id)
DISTRIBUTED BY HASH(id) BUCKETS 1
PROPERTIES ("replication_allocation" = "tag.location.default: 1");"""

sql """CREATE TABLE join_elim_filter_edge_t2 (
id INT,
value INT
)
UNIQUE KEY(id)
DISTRIBUTED BY HASH(id) BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"enable_unique_key_merge_on_write" = "true"
);"""

sql """ALTER TABLE join_elim_filter_edge_t2 ADD CONSTRAINT uk_id UNIQUE (id)"""

sql """INSERT INTO join_elim_filter_edge_t1 VALUES
(1, 'Alice'),
(2, 'Bob'),
(3, 'Charlie');"""

sql """INSERT INTO join_elim_filter_edge_t2 VALUES
(1, 10),
(2, -5),
(4, 20);"""

sql """ANALYZE TABLE join_elim_filter_edge_t1 WITH SYNC;"""
sql """ANALYZE TABLE join_elim_filter_edge_t2 WITH SYNC;"""
sql """ALTER TABLE join_elim_filter_edge_t1 MODIFY COLUMN name SET STATS ('row_count'='3');"""
sql """ALTER TABLE join_elim_filter_edge_t2 MODIFY COLUMN value SET STATS ('row_count'='3');"""

create_async_mv(db, "mv_join_elim_filter_edge", """
SELECT
t1.id,
t1.name
FROM join_elim_filter_edge_t1 t1
LEFT JOIN join_elim_filter_edge_t2 t2
ON t1.id = t2.id AND t2.id = 1
""")

def querySql = """SELECT id, name FROM join_elim_filter_edge_t1"""
mv_rewrite_success_without_check_chosen(querySql, "mv_join_elim_filter_edge")
order_qt_join_elim_filter_edge """SELECT id, name FROM join_elim_filter_edge_t1 ORDER BY 1, 2"""
}
Loading