diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/joinorder/hypergraph/HyperGraph.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/joinorder/hypergraph/HyperGraph.java index 6ccaa85e27f04a..f39732fa147fbf 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/joinorder/hypergraph/HyperGraph.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/joinorder/hypergraph/HyperGraph.java @@ -573,8 +573,14 @@ private BitSet addJoin(LogicalJoin join, } private BitSet addFilter(LogicalFilter filter, Pair childEdgeNodes) { + // Record the nodes actually used by the filter predicates when building the graph. + // slotToNodeMap already follows project aliases through addAlias(), so filters on alias + // slots still point back to the original base nodes. Slot-free predicates, e.g. 1 = 0, + // affect the whole child subtree and must not be treated as unrelated to every node. + long inputNodes = filter.getInputSlots().isEmpty() + ? childEdgeNodes.second : calNodeMap(filter.getInputSlots()); FilterEdge edge = new FilterEdge(filter, filterEdges.size(), childEdgeNodes.first, childEdgeNodes.second, - childEdgeNodes.second); + childEdgeNodes.second, inputNodes); filterEdges.add(edge); BitSet bitSet = new BitSet(); bitSet.set(edge.getIndex()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/joinorder/hypergraph/edge/FilterEdge.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/joinorder/hypergraph/edge/FilterEdge.java index 52c94abe3a16c9..813edaa7910b56 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/joinorder/hypergraph/edge/FilterEdge.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/joinorder/hypergraph/edge/FilterEdge.java @@ -31,11 +31,13 @@ */ public class FilterEdge extends Edge { private final LogicalFilter filter; + private final long inputNodes; public FilterEdge(LogicalFilter filter, int index, - BitSet childEdges, long subTreeNodes, long childRequireNodes) { + BitSet childEdges, long subTreeNodes, long childRequireNodes, long inputNodes) { super(index, childEdges, new BitSet(), subTreeNodes, childRequireNodes, 0L); this.filter = filter; + this.inputNodes = inputNodes; } @Override @@ -48,7 +50,12 @@ public List getExpressions() { return filter.getExpressions(); } + public long getInputNodes() { + return inputNodes; + } + public FilterEdge clear() { - return new FilterEdge(filter, getIndex(), getLeftChildEdges(), getSubTreeNodes(), getLeftRequiredNodes()); + return new FilterEdge(filter, getIndex(), getLeftChildEdges(), getSubTreeNodes(), getLeftRequiredNodes(), + inputNodes); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/HyperGraphComparator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/HyperGraphComparator.java index 11f2e7c409da33..39c2cc4213c865 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/HyperGraphComparator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/HyperGraphComparator.java @@ -165,7 +165,7 @@ private ComparisonResult isLogicCompatible() { .forEach(e -> pullUpViewExprWithEdge.put(e, e.getExpressions())); Sets.difference(getViewFilterEdgeSet(), Sets.newHashSet(queryToViewFilterEdge.values())) .stream() - .filter(e -> !LongBitmap.isOverlap(e.getReferenceNodes(), shouldEliminateViewNodesMap)) + .filter(e -> !LongBitmap.isOverlap(e.getInputNodes(), shouldEliminateViewNodesMap)) .forEach(e -> pullUpViewExprWithEdge.put(e, e.getExpressions())); return buildComparisonRes(); @@ -197,6 +197,16 @@ private boolean canEliminatePrimaryByForeign(long primaryNodes, long foreignNode return JoinUtils.canEliminateByFk(joinEdge.getJoin(), primary, foreign); } + private boolean canEliminateViewByLeft(JoinEdge joinEdge, Plan rightPlan) { + // MV comparison can validate residual predicates separately. Keep this relaxation local instead of + // changing JoinUtils.canEliminateByLeft(), which is shared by standalone rewrite rules. + Pair, Set> njHashKeys = joinEdge.getJoin().extractNullRejectHashKeys(); + if (njHashKeys == null) { + return false; + } + return rightPlan.getLogicalProperties().getTrait().isUnique(njHashKeys.second); + } + private boolean canEliminateViewEdge(JoinEdge joinEdge) { // eliminate by unique if (joinEdge.getJoinType().isLeftOuterJoin() && joinEdge.isRightSimple()) { @@ -205,12 +215,11 @@ private boolean canEliminateViewEdge(JoinEdge joinEdge) { if (LongBitmap.getCardinality(eliminatedRight) != 1) { return false; } - Plan rigthPlan = constructViewPlan(joinEdge.getRightExtendedNodes(), joinEdge.getRightInputSlots()); - if (rigthPlan == null) { + Plan rightPlan = constructViewPlan(joinEdge.getRightExtendedNodes(), joinEdge.getRightInputSlots()); + if (rightPlan == null) { return false; } - boolean couldEliminateByLeft = JoinUtils.canEliminateByLeft(joinEdge.getJoin(), - rigthPlan.getLogicalProperties().getTrait()); + boolean couldEliminateByLeft = canEliminateViewByLeft(joinEdge, rightPlan); // if eliminated successfully, should refresh the eliminateViewNodesMap if (couldEliminateByLeft) { this.reservedShouldEliminatedViewNodes = @@ -254,21 +263,31 @@ private boolean canEliminateViewEdge(JoinEdge joinEdge) { } private boolean tryEliminateNodesAndEdge() { - boolean hasFilterEdgeAbove = viewHyperGraph.getFilterEdges().stream() - .filter(e -> LongBitmap.getCardinality(e.getReferenceNodes()) == 1) - .anyMatch(e -> LongBitmap.isSubset(e.getReferenceNodes(), shouldEliminateViewNodesMap)); - if (hasFilterEdgeAbove) { - // If there is some filter edge above the eliminated node, we should rebuild a plan - // Right now, just reject it. - return false; - } long allCanEliminateNodes = 0; for (JoinEdge joinEdge : viewHyperGraph.getJoinEdges()) { long canEliminateSideNodes = getCanEliminateSideNodes(joinEdge); allCanEliminateNodes = LongBitmap.or(allCanEliminateNodes, canEliminateSideNodes); - if (LongBitmap.isOverlap(canEliminateSideNodes, reservedShouldEliminatedViewNodes) - && !canEliminateViewEdge(joinEdge)) { - return false; + long shouldEliminateNodesOnCurrentEdge = + LongBitmap.newBitmapIntersect(canEliminateSideNodes, reservedShouldEliminatedViewNodes); + if (LongBitmap.isOverlap(canEliminateSideNodes, reservedShouldEliminatedViewNodes)) { + // For FilterEdge, leftChildEdges records join edges below the filter child. Non-empty means + // the filter is above a join result, e.g. WHERE after LEFT JOIN, and may filter preserved rows. + // Empty means the filter stays inside a base/nullable-side subtree, which is safe for LOJ. + boolean hasUnsafeFilterEdgeOnCurrentEliminatedNode = + LongBitmap.getCardinality(shouldEliminateNodesOnCurrentEdge) > 0 + && viewHyperGraph.getFilterEdges().stream() + .filter(e -> e.getExpressions().stream().anyMatch(expr -> !ExpressionUtils.isInferred(expr))) + .filter(e -> LongBitmap.isOverlap( + e.getInputNodes(), shouldEliminateNodesOnCurrentEdge)) + .anyMatch(e -> joinEdge.getJoinType().isInnerJoin() || !e.getLeftChildEdges().isEmpty()); + // Inner join elimination is unsafe with any filter on the removed side. For left join, + // filters inside the nullable side are OK, but filters above the join can filter left rows. + if (hasUnsafeFilterEdgeOnCurrentEliminatedNode) { + return false; + } + if (!canEliminateViewEdge(joinEdge)) { + return false; + } } } // check all can eliminateNodes contains all should eliminate nodes, to avoid some nodes can not be eliminated diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/exploration/mv/EliminateJoinTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/exploration/mv/EliminateJoinTest.java index 2bec767d46f041..20697e78912d20 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/exploration/mv/EliminateJoinTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/exploration/mv/EliminateJoinTest.java @@ -100,6 +100,105 @@ void testLOJWithUK() throws Exception { dropConstraint("alter table T2 drop constraint uk"); } + @Test + void testLOJWithUKAndOtherJoinConjuncts() throws Exception { + connectContext.getSessionVariable().setDisableNereidsRules("INFER_PREDICATES,PRUNE_EMPTY_PARTITION"); + CascadesContext c1 = createCascadesContext( + "select * from T1", + connectContext + ); + Plan p1 = PlanChecker.from(c1) + .analyze() + .rewrite() + .getPlan().child(0); + addConstraint("alter table T2 add constraint uk_other_join_conjunct unique (id)"); + try { + CascadesContext c2 = createCascadesContext( + "select * from T1 left outer join T2 " + + "on T1.id = T2.id and T2.id = 1", + connectContext + ); + Plan p2 = PlanChecker.from(c2) + .analyze() + .rewrite() + .applyExploration(RuleSet.BUSHY_TREE_JOIN_REORDER) + .getAllPlan().get(0).child(0); + HyperGraph h1 = HyperGraph.builderForMv(p1).build(); + HyperGraph h2 = HyperGraph.builderForMv(p2).build(); + ComparisonResult res = HyperGraphComparator.isLogicCompatible(h1, h2, constructContext(p1, p2, c1)); + Assertions.assertFalse(res.isInvalid()); + Assertions.assertTrue(res.getViewExpressions().isEmpty()); + } finally { + dropConstraint("alter table T2 drop constraint uk_other_join_conjunct"); + } + } + + @Test + void testLOJWithUKAndFilterOnEliminatedNode() throws Exception { + connectContext.getSessionVariable().setDisableNereidsRules("INFER_PREDICATES,PRUNE_EMPTY_PARTITION"); + CascadesContext c1 = createCascadesContext( + "select T1.id from T1", + connectContext + ); + Plan p1 = PlanChecker.from(c1) + .analyze() + .rewrite() + .getPlan().child(0); + addConstraint("alter table T2 add constraint uk_loj_filter unique (id)"); + try { + CascadesContext c2 = createCascadesContext( + "select T1.id from T1 left outer join T2 " + + "on T1.id = T2.id where T2.id is null", + connectContext + ); + Plan p2 = PlanChecker.from(c2) + .analyze() + .rewrite() + .applyExploration(RuleSet.BUSHY_TREE_JOIN_REORDER) + .getAllPlan().get(0).child(0); + HyperGraph h1 = HyperGraph.builderForMv(p1).build(); + HyperGraph h2 = HyperGraph.builderForMv(p2).build(); + ComparisonResult res = HyperGraphComparator.isLogicCompatible(h1, h2, constructContext(p1, p2, c1)); + Assertions.assertTrue(res.isInvalid()); + } finally { + dropConstraint("alter table T2 drop constraint uk_loj_filter"); + } + } + + @Test + void testInnerJoinWithPKFKAndSlotFreeFilterOnEliminatedNode() throws Exception { + connectContext.getSessionVariable().setDisableNereidsRules("INFER_PREDICATES,PRUNE_EMPTY_PARTITION"); + CascadesContext c1 = createCascadesContext( + "select * from T1", + connectContext + ); + Plan p1 = PlanChecker.from(c1) + .analyze() + .rewrite() + .getPlan().child(0); + addConstraint("alter table T2 add constraint pk_slot_free_filter primary key (id)"); + addConstraint("alter table T1 add constraint fk_slot_free_filter foreign key (id) references T2(id)"); + try { + CascadesContext c2 = createCascadesContext( + "select * from T1 inner join (select * from T2 where 1 = 0) T2 " + + "on T1.id = T2.id", + connectContext + ); + Plan p2 = PlanChecker.from(c2) + .analyze() + .rewrite() + .applyExploration(RuleSet.BUSHY_TREE_JOIN_REORDER) + .getAllPlan().get(0).child(0); + HyperGraph h1 = HyperGraph.builderForMv(p1).build(); + HyperGraph h2 = HyperGraph.builderForMv(p2).build(); + ComparisonResult res = HyperGraphComparator.isLogicCompatible(h1, h2, constructContext(p1, p2, c1)); + Assertions.assertTrue(res.isInvalid()); + } finally { + dropConstraint("alter table T1 drop constraint fk_slot_free_filter"); + dropConstraint("alter table T2 drop constraint pk_slot_free_filter"); + } + } + @Test void testLOJWithPKFK() throws Exception { connectContext.getSessionVariable().setDisableNereidsRules("INFER_PREDICATES,PRUNE_EMPTY_PARTITION"); @@ -143,6 +242,36 @@ void testLOJWithPKFK() throws Exception { dropConstraint("alter table T2 drop constraint pk"); } + @Test + void testInnerJoinWithPKFKAndMultiNodeResidualFilter() throws Exception { + connectContext.getSessionVariable().setDisableNereidsRules("INFER_PREDICATES,PRUNE_EMPTY_PARTITION"); + CascadesContext c1 = createCascadesContext( + "select * from T1", + connectContext + ); + Plan p1 = PlanChecker.from(c1) + .analyze() + .rewrite() + .getPlan().child(0); + addConstraint("alter table T2 add constraint pk primary key (id)"); + addConstraint("alter table T1 add constraint fk foreign key (id) references T2(id)"); + CascadesContext c2 = createCascadesContext( + "select * from T1 inner join T2 " + + "on T1.id = T2.id where T1.score > T2.score", + connectContext + ); + Plan p2 = PlanChecker.from(c2) + .analyze() + .rewrite() + .applyExploration(RuleSet.BUSHY_TREE_JOIN_REORDER) + .getAllPlan().get(0).child(0); + HyperGraph h1 = HyperGraph.builderForMv(p1).build(); + HyperGraph h2 = HyperGraph.builderForMv(p2).build(); + ComparisonResult res = HyperGraphComparator.isLogicCompatible(h1, h2, constructContext(p1, p2, c1)); + Assertions.assertTrue(res.isInvalid()); + dropConstraint("alter table T2 drop constraint pk"); + } + @Disabled @Test void testLOJWithPKFKAndUK1() throws Exception { diff --git a/regression-test/data/nereids_rules_p0/mv/join_elim_p_f_key/join_elim_filter_edge.out b/regression-test/data/nereids_rules_p0/mv/join_elim_p_f_key/join_elim_filter_edge.out new file mode 100644 index 00000000000000..37037d7bad2818 --- /dev/null +++ b/regression-test/data/nereids_rules_p0/mv/join_elim_p_f_key/join_elim_filter_edge.out @@ -0,0 +1,5 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !join_elim_filter_edge -- +1 Alice +2 Bob +3 Charlie diff --git a/regression-test/suites/nereids_rules_p0/mv/join_elim_p_f_key/join_elim_filter_edge.groovy b/regression-test/suites/nereids_rules_p0/mv/join_elim_p_f_key/join_elim_filter_edge.groovy new file mode 100644 index 00000000000000..57f5c3abdb9cbb --- /dev/null +++ b/regression-test/suites/nereids_rules_p0/mv/join_elim_p_f_key/join_elim_filter_edge.groovy @@ -0,0 +1,79 @@ +package mv.join_elim_p_f_key +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("join_elim_filter_edge") { + String db = context.config.getDbNameByFile(context.file) + sql "use ${db}" + sql "SET enable_nereids_planner=true" + sql "SET enable_fallback_to_original_planner=false" + sql "SET enable_materialized_view_rewrite=true" + sql "SET enable_nereids_timeout = false" + + sql """DROP MATERIALIZED VIEW IF EXISTS mv_join_elim_filter_edge""" + sql """DROP TABLE IF EXISTS join_elim_filter_edge_t1""" + sql """DROP TABLE IF EXISTS join_elim_filter_edge_t2""" + + sql """CREATE TABLE join_elim_filter_edge_t1 ( + id INT, + name VARCHAR(50) + ) + DUPLICATE KEY(id) + DISTRIBUTED BY HASH(id) BUCKETS 1 + PROPERTIES ("replication_allocation" = "tag.location.default: 1");""" + + sql """CREATE TABLE join_elim_filter_edge_t2 ( + id INT, + value INT + ) + UNIQUE KEY(id) + DISTRIBUTED BY HASH(id) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "enable_unique_key_merge_on_write" = "true" + );""" + + sql """ALTER TABLE join_elim_filter_edge_t2 ADD CONSTRAINT uk_id UNIQUE (id)""" + + sql """INSERT INTO join_elim_filter_edge_t1 VALUES + (1, 'Alice'), + (2, 'Bob'), + (3, 'Charlie');""" + + sql """INSERT INTO join_elim_filter_edge_t2 VALUES + (1, 10), + (2, -5), + (4, 20);""" + + sql """ANALYZE TABLE join_elim_filter_edge_t1 WITH SYNC;""" + sql """ANALYZE TABLE join_elim_filter_edge_t2 WITH SYNC;""" + sql """ALTER TABLE join_elim_filter_edge_t1 MODIFY COLUMN name SET STATS ('row_count'='3');""" + sql """ALTER TABLE join_elim_filter_edge_t2 MODIFY COLUMN value SET STATS ('row_count'='3');""" + + create_async_mv(db, "mv_join_elim_filter_edge", """ + SELECT + t1.id, + t1.name + FROM join_elim_filter_edge_t1 t1 + LEFT JOIN join_elim_filter_edge_t2 t2 + ON t1.id = t2.id AND t2.id = 1 + """) + + def querySql = """SELECT id, name FROM join_elim_filter_edge_t1""" + mv_rewrite_success_without_check_chosen(querySql, "mv_join_elim_filter_edge") + order_qt_join_elim_filter_edge """SELECT id, name FROM join_elim_filter_edge_t1 ORDER BY 1, 2""" +}