diff --git a/contrib/pax_storage/sql/cbdb_parallel.sql b/contrib/pax_storage/sql/cbdb_parallel.sql index 5bd48ea210d..6e5770e69fc 100644 --- a/contrib/pax_storage/sql/cbdb_parallel.sql +++ b/contrib/pax_storage/sql/cbdb_parallel.sql @@ -2,7 +2,7 @@ -- CBDB PARALLEL -- Test CBDB style parallel plan. -- GUCs shoule be set with local, do not disturb other parallel plans. --- Should not use force_parallel_mode as it will ignore plan and check results only. +-- Should not use debug_parallel_query as it will ignore plan and check results only. -- We want to check plan in this file! -- If there is need to do that, set it local inside a transaction. -- Set optimizer off in this file, ORCA parallel is not supported. @@ -31,7 +31,7 @@ -- 12 CdbLocusType_HashedWorkers -- -- -set force_parallel_mode = 0; +set debug_parallel_query = 0; set optimizer = off; create schema test_parallel; @@ -134,7 +134,7 @@ set local enable_parallel = on; create index on t1(c2); insert into t1 select i, i from generate_series(1, 1000000) i; analyze t1; -set local force_parallel_mode = 1; +set local debug_parallel_query = 1; set local enable_seqscan = off; explain(locus, costs off) select c2 from t1; -- results check @@ -691,7 +691,7 @@ abort; begin; set local optimizer=off; set local enable_parallel=on; -set local force_parallel_mode =1 ; +set local debug_parallel_query =1 ; set local min_parallel_table_scan_size = 0; create table semi_t1 (c1 integer) with(parallel_workers=2) distributed randomly; create table semi_t2 (c2 integer) with(parallel_workers=2) distributed randomly; @@ -765,5 +765,5 @@ drop schema test_parallel cascade; -- end_ignore reset gp_appendonly_insert_files; -reset force_parallel_mode; +reset debug_parallel_query; reset optimizer; diff --git a/contrib/pax_storage/sql/types.sql b/contrib/pax_storage/sql/types.sql index 04342d08717..fed01b7af9d 100644 --- a/contrib/pax_storage/sql/types.sql +++ b/contrib/pax_storage/sql/types.sql @@ -22,9 +22,9 @@ CREATE TABLE pax_test.all_typbyval_pg_types ( pg_lsn_col pg_lsn ) USING pax distributed by (id); -insert into pax_test.all_typbyval_pg_types values(1, true,'c',2,'cid',4.2,5,'2023-05-17 17:56:49',7,'2023-05-17 17:56:49',10,11.1111,12,'2023-05-17 17:56:49','2023-05-17 17:56:49', '16/0'), -(1, true,'c',2,'cid',4.2,5,'2023-05-17 17:56:49',7,'2023-05-17 17:56:49',10,11.1111,12,'2023-05-17 17:56:49','2023-05-17 17:56:49', '16/0'), -(1, true,'c',2,'cid',4.2,5,'2023-05-17 17:56:49',7,'2023-05-17 17:56:49',10,11.1111,12,'2023-05-17 17:56:49','2023-05-17 17:56:49', '16/0'); +insert into pax_test.all_typbyval_pg_types values(1, true,'c',2,'0',4.2,5,'2023-05-17 17:56:49',7,'2023-05-17 17:56:49',10,11.1111,12,'2023-05-17 17:56:49','2023-05-17 17:56:49', '16/0'), +(1, true,'c',2,'0',4.2,5,'2023-05-17 17:56:49',7,'2023-05-17 17:56:49',10,11.1111,12,'2023-05-17 17:56:49','2023-05-17 17:56:49', '16/0'), +(1, true,'c',2,'0',4.2,5,'2023-05-17 17:56:49',7,'2023-05-17 17:56:49',10,11.1111,12,'2023-05-17 17:56:49','2023-05-17 17:56:49', '16/0'); select * from pax_test.all_typbyval_pg_types; create table pax_test.all_typlen_lt_0_pg_type ( diff --git a/contrib/pax_storage/src/test/regress/expected/DML_over_joins_optimizer.out b/contrib/pax_storage/src/test/regress/expected/DML_over_joins_optimizer.out index aa87cea2a3e..25cb4991125 100644 --- a/contrib/pax_storage/src/test/regress/expected/DML_over_joins_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/DML_over_joins_optimizer.out @@ -3,6 +3,7 @@ -- ---------------------------------------------------------------------- create schema DML_over_joins; set search_path to DML_over_joins; +set optimizer_trace_fallback=on; -- ---------------------------------------------------------------------- -- Test: heap_motion1.sql -- ---------------------------------------------------------------------- @@ -23,7 +24,11 @@ insert into s select generate_series(1, 100), generate_series(1, 100) * 4; analyze r; analyze s; update r set b = r.b + 1 from s where r.a = s.a; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 21 not found in project list update r set b = r.b + 1 from s where r.a in (select a from s); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 35 not found in project list delete from r using s where r.a = s.a; delete from r; insert into r select generate_series(1, 10000), generate_series(1, 10000) * 3; @@ -40,7 +45,11 @@ delete from s; insert into r select generate_series(1, 10000), generate_series(1, 10000) * 3; insert into s select generate_series(1, 100), generate_series(1, 100) * 4; update r set b = r.b + 4 from s where r.b = s.b; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 21 not found in project list update r set b = b + 1 where b in (select b from s); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 25 not found in project list delete from s using r where r.a = s.b; delete from r; delete from s; @@ -55,7 +64,9 @@ delete from s; insert into r select generate_series(1, 10000), generate_series(1, 10000) * 3; insert into s select generate_series(1, 100), generate_series(1, 100) * 4; explain update s set b = b + 1 where exists (select 1 from r where s.a = r.b); - QUERY PLAN +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 28 not found in project list + QUERY PLAN ---------------------------------------------------------------------------------------------------------------------------- Update on s (cost=47.71..49.13 rows=0 width=0) -> Explicit Redistribute Motion 3:3 (slice1; segments: 3) (cost=47.71..49.13 rows=33 width=52) @@ -74,8 +85,10 @@ explain update s set b = b + 1 where exists (select 1 from r where s.a = r.b); (14 rows) update s set b = b + 1 where exists (select 1 from r where s.a = r.b); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 28 not found in project list explain delete from s where exists (select 1 from r where s.a = r.b); - QUERY PLAN + QUERY PLAN --------------------------------------------------------------------------------------------------------------- Delete on s (cost=0.00..865.66 rows=34 width=1) -> Hash Semi Join (cost=0.00..862.80 rows=34 width=18) @@ -108,7 +121,11 @@ create table s (a int8, b int8) distributed by (a); insert into r select generate_series(1, 10000), generate_series(1, 10000) * 3; insert into s select generate_series(1, 100), generate_series(1, 100) * 4; update r set b = r.b + 1 from s where r.a = s.a; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 21 not found in project list update r set b = r.b + 1 from s where r.a in (select a from s); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 35 not found in project list delete from r using s where r.a = s.a; delete from r; insert into r select generate_series(1, 10000), generate_series(1, 10000) * 3; @@ -125,7 +142,11 @@ delete from s; insert into r select generate_series(1, 10000), generate_series(1, 10000) * 3; insert into s select generate_series(1, 100), generate_series(1, 100) * 4; update r set b = r.b + 4 from s where r.b = s.b; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 21 not found in project list update r set b = b + 1 where b in (select b from s); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 25 not found in project list delete from s using r where r.a = s.b; delete from r; delete from s; @@ -140,6 +161,8 @@ delete from s; insert into r select generate_series(1, 10000), generate_series(1, 10000) * 3; insert into s select generate_series(1, 100), generate_series(1, 100) * 4; update s set b = b + 1 where exists (select 1 from r where s.a = r.b); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 28 not found in project list delete from s where exists (select 1 from r where s.a = r.b); -- ---------------------------------------------------------------------- -- Test: heap_motion3.sql @@ -159,7 +182,11 @@ create table s (a float4, b float4) distributed by (a); insert into r select generate_series(1, 10000), generate_series(1, 10000) * 3; insert into s select generate_series(1, 100), generate_series(1, 100) * 4; update r set b = r.b + 1 from s where r.a = s.a; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 21 not found in project list update r set b = r.b + 1 from s where r.a in (select a from s); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 35 not found in project list delete from r using s where r.a = s.a; delete from r; insert into r select generate_series(1, 10000), generate_series(1, 10000) * 3; @@ -176,7 +203,11 @@ delete from s; insert into r select generate_series(1, 10000), generate_series(1, 10000) * 3; insert into s select generate_series(1, 100), generate_series(1, 100) * 4; update r set b = r.b + 4 from s where r.b = s.b; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 21 not found in project list update r set b = b + 1 where b in (select b from s); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 25 not found in project list delete from s using r where r.a = s.b; delete from r; delete from s; @@ -191,6 +222,8 @@ delete from s; insert into r select generate_series(1, 10000), generate_series(1, 10000) * 3; insert into s select generate_series(1, 100), generate_series(1, 100) * 4; update s set b = b + 1 where exists (select 1 from r where s.a = r.b); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 28 not found in project list delete from s where exists (select 1 from r where s.a = r.b); -- ---------------------------------------------------------------------- -- Test: heap_motion4.sql @@ -210,7 +243,11 @@ create table s (a float(24), b float(24)) distributed by (a); insert into r select generate_series(1, 10000), generate_series(1, 10000) * 3; insert into s select generate_series(1, 100), generate_series(1, 100) * 4; update r set b = r.b + 1 from s where r.a = s.a; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 21 not found in project list update r set b = r.b + 1 from s where r.a in (select a from s); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 35 not found in project list delete from r using s where r.a = s.a; delete from r; insert into r select generate_series(1, 10000), generate_series(1, 10000) * 3; @@ -227,7 +264,11 @@ delete from s; insert into r select generate_series(1, 10000), generate_series(1, 10000) * 3; insert into s select generate_series(1, 100), generate_series(1, 100) * 4; update r set b = r.b + 4 from s where r.b = s.b; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 21 not found in project list update r set b = b + 1 where b in (select b from s); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 25 not found in project list delete from s using r where r.a = s.b; delete from r; delete from s; @@ -242,6 +283,8 @@ delete from s; insert into r select generate_series(1, 10000), generate_series(1, 10000) * 3; insert into s select generate_series(1, 100), generate_series(1, 100) * 4; update s set b = b + 1 where exists (select 1 from r where s.a = r.b); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 28 not found in project list delete from s where exists (select 1 from r where s.a = r.b); -- ---------------------------------------------------------------------- -- Test: heap_motion5.sql @@ -261,7 +304,11 @@ create table s (a float(53), b float(53)) distributed by (a); insert into r select generate_series(1, 10000), generate_series(1, 10000) * 3; insert into s select generate_series(1, 100), generate_series(1, 100) * 4; update r set b = r.b + 1 from s where r.a = s.a; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 21 not found in project list update r set b = r.b + 1 from s where r.a in (select a from s); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 35 not found in project list delete from r using s where r.a = s.a; delete from r; insert into r select generate_series(1, 10000), generate_series(1, 10000) * 3; @@ -278,7 +325,11 @@ delete from s; insert into r select generate_series(1, 10000), generate_series(1, 10000) * 3; insert into s select generate_series(1, 100), generate_series(1, 100) * 4; update r set b = r.b + 4 from s where r.b = s.b; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 21 not found in project list update r set b = b + 1 where b in (select b from s); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 25 not found in project list delete from s using r where r.a = s.b; delete from r; delete from s; @@ -293,6 +344,8 @@ delete from s; insert into r select generate_series(1, 10000), generate_series(1, 10000) * 3; insert into s select generate_series(1, 100), generate_series(1, 100) * 4; update s set b = b + 1 where exists (select 1 from r where s.a = r.b); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 28 not found in project list delete from s where exists (select 1 from r where s.a = r.b); ------------------------------------------------------------ -- Update with Motion: @@ -317,12 +370,18 @@ update s set a = r.a from r where r.b = s.b; -- Statement contains correlated subquery ------------------------------------------------------------ update s set b = (select min(a) from r where b = s.b); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 23 not found in project list delete from s where b = (select min(a) from r where b = s.b); ------------------------------------------------------------ -- Update partition key (requires moving tuples from one partition to another) ------------------------------------------------------------ update p set c = c + 1 where c = 0; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(update) on partitioned tables update p set c = c + 1 where b in (select b from s) and c = 0; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(update) on partitioned tables select tableoid::regclass, c, count(*) from p group by 1, 2; tableoid | c | count ---------------+---+------- @@ -356,12 +415,18 @@ update s set a = r.a from r where r.b = s.b; -- Statement contains correlated subquery ------------------------------------------------------------ update s set b = (select min(a) from r where b = s.b); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 23 not found in project list delete from s where b = (select min(a) from r where b = s.b); ------------------------------------------------------------ -- Update partition key (requires moving tuples from one partition to another) ------------------------------------------------------------ update p set c = c + 1 where c = 0; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(update) on partitioned tables update p set c = c + 1 where b in (select b from s where b = 36); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(update) on partitioned tables select tableoid::regclass, c, count(*) from p group by 1, 2; tableoid | c | count ---------------+---+------- @@ -395,12 +460,18 @@ update s set a = r.a from r where r.b = s.b; -- Statement contains correlated subquery ------------------------------------------------------------ update s set b = (select min(a) from r where b = s.b); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 23 not found in project list delete from s where b = (select min(a) from r where b = s.b); ------------------------------------------------------------ -- Update partition key (requires moving tuples from one partition to another) ------------------------------------------------------------ update p set c = c + 1 where c = 0; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(update) on partitioned tables update p set c = c + 1 where b in (select b from s) and c = 0; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(update) on partitioned tables select tableoid::regclass, c, count(*) from p group by 1, 2; tableoid | c | count ---------------+---+------- @@ -434,12 +505,18 @@ update s set a = r.a from r where r.b = s.b; -- Statement contains correlated subquery ------------------------------------------------------------ update s set b = (select min(a) from r where b = s.b); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 23 not found in project list delete from s where b = (select min(a) from r where b = s.b); ------------------------------------------------------------ -- Update partition key (requires moving tuples from one partition to another) ------------------------------------------------------------ update p set c = c + 1 where c = 0; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(update) on partitioned tables update p set c = c + 1 where b in (select b from s) and c = 0; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(update) on partitioned tables select tableoid::regclass, c, count(*) from p group by 1, 2; tableoid | c | count ---------------+---+------- @@ -473,12 +550,18 @@ update s set a = r.a from r where r.b = s.b; -- Statement contains correlated subquery ------------------------------------------------------------ update s set b = (select min(a) from r where b = s.b); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 23 not found in project list delete from s where b = (select min(a) from r where b = s.b); ------------------------------------------------------------ -- Update partition key (requires moving tuples from one partition to another) ------------------------------------------------------------ update p set c = c + 1 where c = 0; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(update) on partitioned tables update p set c = c + 1 where b in (select b from s) and c = 0; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(update) on partitioned tables select tableoid::regclass, c, count(*) from p group by 1, 2; tableoid | c | count ---------------+---+------- @@ -514,14 +597,24 @@ insert into p select generate_series(1,10000), generate_series(1,10000) * 3, gen -- Motion on p, append node, hash agg ------------------------------------------------------------ update p set b = b + 1 where a in (select b from r where a = p.c); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(update) on partitioned tables delete from p where p.a in (select b from r where a = p.c); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables delete from p using r where p.a = r.b and r.a = p.c; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables ------------------------------------------------------------ -- Updates with motion: -- No motion, colocated distribution key ------------------------------------------------------------ delete from p where a in (select a from r where a = p.c); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables delete from p using r where p.a = r.a and r.a = p.c; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables ------------------------------------------------------------ -- No motion of s ------------------------------------------------------------ @@ -569,14 +662,24 @@ insert into p select generate_series(1,10000), generate_series(1,10000) * 3, gen -- Motion on p, append node, hash agg ------------------------------------------------------------ update p set b = b + 1 where a in (select b from r where a = p.c); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(update) on partitioned tables delete from p where p.a in (select b from r where a = p.c); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables delete from p using r where p.a = r.b and r.a = p.c; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables ------------------------------------------------------------ -- Updates with motion: -- No motion, colocated distribution key ------------------------------------------------------------ delete from p where a in (select a from r where a = p.c); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables delete from p using r where p.a = r.a and r.a = p.c; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables ------------------------------------------------------------ -- No motion of s ------------------------------------------------------------ @@ -624,14 +727,24 @@ insert into p select generate_series(1,10000), generate_series(1,10000) * 3, gen -- Motion on p, append node, hash agg ------------------------------------------------------------ update p set b = b + 1 where a in (select b from r where a = p.c); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(update) on partitioned tables delete from p where p.a in (select b from r where a = p.c); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables delete from p using r where p.a = r.b and r.a = p.c; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables ------------------------------------------------------------ -- Updates with motion: -- No motion, colocated distribution key ------------------------------------------------------------ delete from p where a in (select a from r where a = p.c); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables delete from p using r where p.a = r.a and r.a = p.c; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables ------------------------------------------------------------ -- No motion of s ------------------------------------------------------------ @@ -679,14 +792,24 @@ insert into p select generate_series(1,10000), generate_series(1,10000) * 3, gen -- Motion on p, append node, hash agg ------------------------------------------------------------ update p set b = b + 1 where a in (select b from r where a = p.c); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(update) on partitioned tables delete from p where p.a in (select b from r where a = p.c); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables delete from p using r where p.a = r.b and r.a = p.c; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables ------------------------------------------------------------ -- Updates with motion: -- No motion, colocated distribution key ------------------------------------------------------------ delete from p where a in (select a from r where a = p.c); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables delete from p using r where p.a = r.a and r.a = p.c; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables ------------------------------------------------------------ -- No motion of s ------------------------------------------------------------ @@ -734,14 +857,24 @@ insert into p select generate_series(1,10000), generate_series(1,10000) * 3, gen -- Motion on p, append node, hash agg ------------------------------------------------------------ update p set b = b + 1 where a in (select b from r where a = p.c); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(update) on partitioned tables delete from p where p.a in (select b from r where a = p.c); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables delete from p using r where p.a = r.b and r.a = p.c; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables ------------------------------------------------------------ -- Updates with motion: -- No motion, colocated distribution key ------------------------------------------------------------ delete from p where a in (select a from r where a = p.c); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables delete from p using r where p.a = r.a and r.a = p.c; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables ------------------------------------------------------------ -- No motion of s ------------------------------------------------------------ @@ -796,6 +929,8 @@ SELECT a,b,c FROM update_test ORDER BY a,c; (2 rows) UPDATE update_test SET a = DEFAULT, b = DEFAULT; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 14 not found in project list SELECT a,b,c FROM update_test ORDER BY a,c; a | b | c ----+---+----- @@ -805,6 +940,8 @@ SELECT a,b,c FROM update_test ORDER BY a,c; -- aliases for the UPDATE target table UPDATE update_test AS t SET b = 10 WHERE t.a = 10; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 13 not found in project list SELECT a,b,c FROM update_test ORDER BY a,c; a | b | c ----+----+----- @@ -813,6 +950,8 @@ SELECT a,b,c FROM update_test ORDER BY a,c; (2 rows) UPDATE update_test t SET b = t.b + 10 WHERE t.a = 10; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 13 not found in project list SELECT a,b,c FROM update_test ORDER BY a,c; a | b | c ----+----+----- @@ -822,6 +961,8 @@ SELECT a,b,c FROM update_test ORDER BY a,c; UPDATE update_test SET a=v.i FROM (VALUES(100, 20)) AS v(i, j) WHERE update_test.b = v.j; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 14 not found in project list SELECT a,b,c FROM update_test ORDER BY a,c; a | b | c -----+----+----- @@ -845,6 +986,8 @@ INSERT INTO t2 (id, data1, data2) VALUES (4, 3, 104); -- (depending upon exactly where the data is stored, which will vary depending -- upon the number of segments; in my case, I used only 2 segments). UPDATE t1 SET data2 = t2.data2 FROM t2 WHERE t1.data1 = t2.data1; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 22 not found in project list SELECT * from t1; id | data1 | data2 ----+-------+------- @@ -970,6 +1113,11 @@ DECLARE region VARCHAR; tablename VARCHAR; BEGIN + -- Set trace fallback to off to stabilize the test. Issue is that ORCA can + -- fallback due to Query Parameter not supported in DXL. That is + -- non-deterministic based on plancache. This can be removed this after + -- ORCA implements Query Parameters. + set optimizer_trace_fallback=off; rowCount = $1; tablename = $2; FOR i IN 1 .. rowCount LOOP @@ -985,10 +1133,13 @@ BEGIN END IF; PERFORM insertIntoSales(tablename, i, region ); END LOOP; + set optimizer_trace_fallback=on; END; $$ LANGUAGE plpgsql; -- SELECT InsertManyIntoSales(20,'sales_par_CO'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions insertmanyintosales --------------------- @@ -1005,6 +1156,8 @@ delete from sales; insert into r select generate_series(1, 10000), generate_series(1, 10000) * 3; insert into s select generate_series(1, 100), generate_series(1, 100) * 3; SELECT InsertManyIntoSales(40,'sales'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions insertmanyintosales --------------------- @@ -1030,6 +1183,8 @@ select sales.* from sales,s,r where sales.id = s.b and sales.month = r.b+1; (10 rows) update sales set month = month+1 from r,s where sales.id = s.b and sales.month = r.b+1; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 34 not found in project list select sales.* from sales,s,r where sales.id = s.b and sales.month = r.b+2; id | year | month | day | region ----+------+-------+-----+----------- @@ -1055,6 +1210,8 @@ select sales.* from sales where id in (select s.b from s, r where s.a = r.b) and (4 rows) update sales set region = 'new_region' where id in (select s.b from s, r where s.a = r.b) and day in (select a from r); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 52 not found in project list select sales.* from sales where id in (select s.b from s, r where s.a = r.b) and day in (select a from r); id | year | month | day | region ----+------+-------+-----+------------ @@ -1107,6 +1264,8 @@ select r.* from r,s,sales where s.a = sales.day and sales.month = r.b; (6 rows) update r set b = r.b + 1 from s,sales where s.a = sales.day and sales.month = r.b; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 34 not found in project list select r.* from r,s,sales where s.a = sales.day and sales.month = r.b-1; a | b ---+---- @@ -1125,9 +1284,13 @@ select r.* from r,s,sales where s.a = sales.day and sales.month = r.b-1; delete from r; delete from s; delete from sales_par; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables insert into r select generate_series(1, 10000), generate_series(1, 10000) * 3; insert into s select generate_series(1, 100), generate_series(1, 100) * 3; SELECT InsertManyIntoSales(20,'sales_par'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions insertmanyintosales --------------------- @@ -1142,6 +1305,8 @@ select sales_par.* from sales_par where id in (select s.b from s, r where s.a = (2 rows) update sales_par set region = 'new_region' where id in (select s.b from s, r where s.a = r.b) and day in (select a from r); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(update) on partitioned tables select sales_par.* from sales_par where id in (select s.b from s, r where s.a = r.b) and day in (select a from r); id | year | month | day | region ----+------+-------+-----+------------ @@ -1160,6 +1325,8 @@ select sales_par.* from sales_par,s,r where sales_par.id = s.b and sales_par.mon (5 rows) update sales_par set month = month+1 from r,s where sales_par.id = s.b and sales_par.month = r.b+1; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(update) on partitioned tables select sales_par.* from sales_par,s,r where sales_par.id = s.b and sales_par.month = r.b+2; id | year | month | day | region ----+------+-------+-----+------------ @@ -1178,6 +1345,8 @@ select sales_par.* from sales_par where id in (select s.b-1 from s,r where s.a = (2 rows) delete from sales_par where id in (select s.b-1 from s,r where s.a = r.b); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables select sales_par.* from sales_par where id in (select s.b-1 from s,r where s.a = r.b); id | year | month | day | region ----+------+-------+-----+-------- @@ -1205,9 +1374,13 @@ select s.* from s, r,sales_par where s.a = r.b and s.b = sales_par.id; delete from r; delete from s; delete from sales_par2; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multi-level partitioned tables insert into r select generate_series(1, 10000), generate_series(1, 10000) * 3; insert into s select generate_series(1, 100), generate_series(1, 100) * 3; SELECT InsertManyIntoSales(20,'sales_par2'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions insertmanyintosales --------------------- @@ -1215,6 +1388,8 @@ SELECT InsertManyIntoSales(20,'sales_par2'); -- partition key select sales_par2.* from sales_par2,s,r where sales_par2.id = s.b and sales_par2.month = r.b+1; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multi-level partitioned tables id | year | month | day | region ----+------+-------+-----+----------- 3 | 2005 | 4 | 4 | australia @@ -1225,7 +1400,11 @@ select sales_par2.* from sales_par2,s,r where sales_par2.id = s.b and sales_par2 (5 rows) update sales_par2 set month = month+1 from r,s where sales_par2.id = s.b and sales_par2.month = r.b+1; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multi-level partitioned tables select sales_par2.* from sales_par2,s,r where sales_par2.id = s.b and sales_par2.month = r.b+2; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multi-level partitioned tables id | year | month | day | region ----+------+-------+-----+----------- 9 | 2004 | 11 | 10 | europe @@ -1237,7 +1416,11 @@ select sales_par2.* from sales_par2,s,r where sales_par2.id = s.b and sales_par2 PREPARE plan0 as update sales_par2 set month = month+1 from r,s where sales_par2.id = s.b and sales_par2.month = r.b+2; EXECUTE plan0; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multi-level partitioned tables select sales_par2.* from sales_par2,s,r where sales_par2.id = s.b and sales_par2.month = r.b+3; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multi-level partitioned tables id | year | month | day | region ----+------+-------+-----+----------- 9 | 2004 | 12 | 10 | europe @@ -1248,6 +1431,8 @@ select sales_par2.* from sales_par2,s,r where sales_par2.id = s.b and sales_par2 (5 rows) select sales_par2.* from sales_par2 where id in (select s.b-1 from s,r where s.a = r.b); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multi-level partitioned tables id | year | month | day | region ----+------+-------+-----+-------- 17 | 2005 | 6 | 18 | europe @@ -1255,13 +1440,19 @@ select sales_par2.* from sales_par2 where id in (select s.b-1 from s,r where s.a (2 rows) delete from sales_par2 where id in (select s.b-1 from s,r where s.a = r.b); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multi-level partitioned tables select sales_par2.* from sales_par2 where id in (select s.b-1 from s,r where s.a = r.b); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multi-level partitioned tables id | year | month | day | region ----+------+-------+-----+-------- (0 rows) -- heap table select s.* from s, r,sales_par2 where s.a = r.b and s.b = sales_par2.id; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multi-level partitioned tables a | b ---+---- 3 | 9 @@ -1269,7 +1460,11 @@ select s.* from s, r,sales_par2 where s.a = r.b and s.b = sales_par2.id; (2 rows) delete from s using r,sales_par2 where s.a = r.b and s.b = sales_par2.id; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multi-level partitioned tables select s.* from s, r,sales_par2 where s.a = r.b and s.b = sales_par2.id; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multi-level partitioned tables a | b ---+--- (0 rows) @@ -1288,6 +1483,8 @@ create table s_ao (a int, b int) with (appendonly = true) distributed by (a); insert into s_ao select generate_series(1, 100), generate_series(1, 100) * 3; -- heap table: delete -- select * from r where b in (select month-1 from sales_par_CO, s_ao where sales_par_CO.id = s_ao.b); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multi-level partitioned tables a | b ---+--- 3 | 9 @@ -1296,19 +1493,27 @@ select * from r where b in (select month-1 from sales_par_CO, s_ao where sales_p (3 rows) delete from r where b in (select month-1 from sales_par_CO, s_ao where sales_par_CO.id = s_ao.b); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multi-level partitioned tables select * from r where b in (select month-1 from sales_par_CO, s_ao where sales_par_CO.id = s_ao.b); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multi-level partitioned tables a | b ---+--- (0 rows) -- hdeap table: update: duplicate distribution key -- SELECT InsertManyIntoSales(20,'sales_par_CO'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions insertmanyintosales --------------------- (1 row) select * from r where a in (select sales_par_CO.id from sales_par_CO, s_ao where sales_par_CO.id = s_ao.b); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multi-level partitioned tables a | b ----+---- 6 | 18 @@ -1319,7 +1524,11 @@ select * from r where a in (select sales_par_CO.id from sales_par_CO, s_ao where (5 rows) update r set b = r.b + 1 where a in (select sales_par_CO.id from sales_par_CO, s_ao where sales_par_CO.id = s_ao.b); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multi-level partitioned tables select * from r where a in (select sales_par_CO.id from sales_par_CO, s_ao where sales_par_CO.id = s_ao.b); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multi-level partitioned tables a | b ----+---- 15 | 46 @@ -1331,6 +1540,8 @@ select * from r where a in (select sales_par_CO.id from sales_par_CO, s_ao where -- heap table: delete: select * from r where a in (select month from sales_par_CO, s_ao, s where sales_par_CO.id = s_ao.b and s_ao.a = s.b); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multi-level partitioned tables a | b ----+---- 7 | 21 @@ -1338,7 +1549,11 @@ select * from r where a in (select month from sales_par_CO, s_ao, s where sales_ (2 rows) delete from r where a in (select month from sales_par_CO, s_ao, s where sales_par_CO.id = s_ao.b and s_ao.a = s.b); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multi-level partitioned tables select * from r where a in (select month from sales_par_CO, s_ao, s where sales_par_CO.id = s_ao.b and s_ao.a = s.b); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multi-level partitioned tables a | b ---+--- (0 rows) @@ -1350,10 +1565,14 @@ select * from r where a in (select month from sales_par_CO, s_ao, s where sales_ delete from s; delete from m; delete from sales_par; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables insert into s select generate_series(1, 100), generate_series(1, 100) * 3; insert into s select generate_series(1, 10), generate_series(1, 10) * 4; insert into m select generate_series(1, 1000), generate_series(1, 1000) * 4; SELECT InsertManyIntoSales(20,'sales_par'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions insertmanyintosales --------------------- @@ -1387,6 +1606,8 @@ select * from s where a = 4 and a in (select b from m); (2 rows) update s set b = b + 1 where a = 4 and a in (select b from m); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 25 not found in project list select * from s where a = 4 and a in (select b from m); a | b ---+---- @@ -1405,6 +1626,8 @@ select distinct sales_par.* from sales_par,s where sales_par.id in (s.b, s.b+1) (4 rows) update sales_par set month = month+1 from s where sales_par.id in (s.b, s.b+1) and region = 'europe'; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(update) on partitioned tables select distinct sales_par.* from sales_par,s where sales_par.id in (s.b, s.b+1) and region='europe'; id | year | month | day | region ----+------+-------+-----+-------- @@ -1422,6 +1645,8 @@ select * from sales_par where region='asia' and id in (select b from s where a = (1 row) delete from sales_par where region='asia' and id in (select b from s where a = 1); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables select * from sales_par where region='asia' and id in (select b from s where a = 1); id | year | month | day | region ----+------+-------+-----+-------- @@ -1434,6 +1659,8 @@ select * from sales_par where region='asia' and id in (select b from m where a = (1 row) delete from sales_par where region='asia' and id in (select b from m where a = 2); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables select * from sales_par where region='asia' and id in (select b from m where a = 2); id | year | month | day | region ----+------+-------+-----+-------- @@ -1446,10 +1673,14 @@ select * from sales_par where region='asia' and id in (select b from m where a = delete from s; delete from m; delete from sales_par; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables insert into s select generate_series(1, 100), generate_series(1, 100) * 3; insert into s select generate_series(1, 10), generate_series(1, 10) * 4; insert into m select generate_series(1, 1000), generate_series(1, 1000) * 4; SELECT InsertManyIntoSales(20,'sales_par'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions insertmanyintosales --------------------- @@ -1485,6 +1716,8 @@ select * from s where a = 4 and a in (select b from m); PREPARE plan2 AS update s set b = b + 1 where a = 4 and a in (select b from m); EXECUTE plan2; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 25 not found in project list select * from s where a = 4 and a in (select b from m); a | b ---+---- @@ -1504,6 +1737,8 @@ select distinct sales_par.* from sales_par,s where sales_par.id in (s.b, s.b+1) PREPARE plan3 AS update sales_par set month = month+1 from s where sales_par.id in (s.b, s.b+1) and region = 'europe'; EXECUTE plan3; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(update) on partitioned tables select distinct sales_par.* from sales_par,s where sales_par.id in (s.b, s.b+1) and region='europe'; id | year | month | day | region ----+------+-------+-----+-------- @@ -1522,6 +1757,8 @@ select * from sales_par where region='asia' and id in (select b from s where a = PREPARE plan4 AS delete from sales_par where region='asia' and id in (select b from s where a = 1); EXECUTE plan4; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables select * from sales_par where region='asia' and id in (select b from s where a = 1); id | year | month | day | region ----+------+-------+-----+-------- @@ -1535,6 +1772,8 @@ select * from sales_par where region='asia' and id in (select b from m where a = PREPARE plan5 AS delete from sales_par where region='asia' and id in (select b from m where a = 2); EXECUTE plan5; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables select * from sales_par where region='asia' and id in (select b from m where a = 2); id | year | month | day | region ----+------+-------+-----+-------- @@ -1601,9 +1840,17 @@ analyze tab3; -- the `::regclass` way as it only matches the table in current search_path. set allow_system_table_mods=true; update pg_class set relpages = 10000 where oid='tab2'::regclass; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Queries on master-only tables update pg_class set reltuples = 100000000 where oid='tab2'::regclass; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Queries on master-only tables update pg_class set relpages = 100000000 where oid='tab3'::regclass; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Queries on master-only tables update pg_class set reltuples = 100000 where oid='tab3'::regclass; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Queries on master-only tables -- Planner: there is redistribute motion above tab1, however, we can also -- remove the explicit redistribute motion here because the final join -- co-locate with the result relation tab1. @@ -1630,6 +1877,224 @@ HINT: For non-partitioned tables, run analyze (). For Optimizer: GPORCA (16 rows) +-- ---------------------------------------------------------------------- +-- Test delete on partition table from join on another partition table +-- ---------------------------------------------------------------------- +drop table if exists part_eq_dis_1; +drop table if exists part_eq_dis_2; +create table part_eq_dis_1 (a int4, b int4) partition by range (a) (start(1) end(20) every(1), default partition extra); +create table part_eq_dis_2 (c int4, d int4) partition by range (c) (start(1) end(20) every(1), default partition extra); +insert into part_eq_dis_1 select generate_series(1,40), generate_series(1,40); +insert into part_eq_dis_2 select generate_series(1,40), generate_series(1,40); +drop table if exists part_neq_dis_1; +drop table if exists part_neq_dis_2; +create table part_neq_dis_1 (a int4, b int4) partition by range (b) (start(1) end(20) every(1), default partition extra); +create table part_neq_dis_2 (c int4, d int4) partition by range (d) (start(1) end(20) every(1), default partition extra); +insert into part_neq_dis_1 select generate_series(1,40), generate_series(1,40); +insert into part_neq_dis_2 select generate_series(1,40), generate_series(1,40); +-- T1 - distribution partitioned column, T2 - distributed partitioned column +-- a) non-default partition +delete from part_eq_dis_1 where a = (select c from part_eq_dis_2 where c=1); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables +-- b) default partition +delete from part_eq_dis_1 where a = (select c from part_eq_dis_2 where c=21); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables +-- T1 - distribution partitioned column, T2 - non-distributed non-partitioned column +-- a) non-default partition +delete from part_eq_dis_1 where a = (select d from part_eq_dis_2 where c=2); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables +-- b) default partition +delete from part_eq_dis_1 where a = (select d from part_eq_dis_2 where c=22); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables +-- T1 - distribution partitioned column, T2 - distributed non-partitioned column +-- a) non-default partition +delete from part_eq_dis_1 where a = (select c from part_neq_dis_2 where c=3); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables +-- b) default partition +delete from part_eq_dis_1 where a = (select c from part_neq_dis_2 where c=23); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables +-- T1 - distribution partitioned column, T2 - non-distributed partitioned column +-- a) non-default partition +delete from part_eq_dis_1 where a = (select d from part_neq_dis_2 where c=4); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables +-- b) default partition +delete from part_eq_dis_1 where a = (select d from part_neq_dis_2 where c=24); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables +-- T1 - non-distribution non-partitioned column, T2 - distributed partitioned column +-- a) non-default partition +delete from part_eq_dis_1 where b = (select c from part_eq_dis_2 where c=5); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables +-- b) default partition +delete from part_eq_dis_1 where b = (select c from part_eq_dis_2 where c=25); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables +-- T1 - non-distribution non-partitioned column, T2 - non-distributed non-partitioned column +-- a) non-default partition +delete from part_eq_dis_1 where b = (select d from part_eq_dis_2 where c=6); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables +-- b) default partition +delete from part_eq_dis_1 where b = (select d from part_eq_dis_2 where c=26); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables +-- T1 - non-distribution non-partitioned column, T2 - distributed non-partitioned column +-- a) non-default partition +delete from part_eq_dis_1 where b = (select c from part_neq_dis_2 where c=7); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables +-- b) default partition +delete from part_eq_dis_1 where b = (select c from part_neq_dis_2 where c=27); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables +-- T1 - non-distribution non-partitioned column, T2 - non-distributed partitioned column +-- a) non-default partition +delete from part_eq_dis_1 where b = (select d from part_neq_dis_2 where c=8); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables +-- b) default partition +delete from part_eq_dis_1 where b = (select d from part_neq_dis_2 where c=28); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables +-- T1 - distribution non-partitioned column, T2 - distributed partitioned column +-- a) non-default partition +delete from part_neq_dis_1 where a = (select c from part_eq_dis_2 where c=9); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables +-- b) default partition +delete from part_neq_dis_1 where a = (select c from part_eq_dis_2 where c=29); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables +-- T1 - distribution non-partitioned column, T2 - non-distributed non-partitioned column +-- a) non-default partition +delete from part_neq_dis_1 where a = (select d from part_eq_dis_2 where c=10); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables +-- b) default partition +delete from part_neq_dis_1 where a = (select d from part_eq_dis_2 where c=30); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables +-- T1 - distribution non-partitioned column, T2 - non-distributed partitioned column +-- a) non-default partition +delete from part_neq_dis_1 where a = (select d from part_neq_dis_2 where c=11); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables +-- b) default partition +delete from part_neq_dis_1 where a = (select d from part_neq_dis_2 where c=31); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables +-- T1 - distribution non-partitioned column, T2 - distributed non-partitioned column +-- a) non-default partition +delete from part_neq_dis_1 where a = (select c from part_neq_dis_2 where c=12); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables +-- b) default partition +delete from part_neq_dis_1 where a = (select c from part_neq_dis_2 where c=32); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables +-- T1 - non-distribution partitioned column, T2 - distributed partitioned column +-- a) non-default partition +delete from part_neq_dis_1 where b = (select c from part_eq_dis_2 where c=13); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables +-- b) default partition +delete from part_neq_dis_1 where b = (select c from part_eq_dis_2 where c=33); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables +-- T1 - non-distribution partitioned column, T2 - non-distributed non-partitioned column +-- a) non-default partition +delete from part_neq_dis_1 where b = (select d from part_eq_dis_2 where c=14); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables +-- b) default partition +delete from part_neq_dis_1 where b = (select d from part_eq_dis_2 where c=34); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables +-- T1 - non-distribution partitioned column, T2 - distributed non-partitioned column +-- a) non-default partition +delete from part_neq_dis_1 where b = (select d from part_neq_dis_2 where c=15); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables +-- b) default partition +delete from part_neq_dis_1 where b = (select d from part_neq_dis_2 where c=35); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables +-- T1 - non-distribution partitioned column, T2 - non-distributed partitioned column +-- a) non-default partition +delete from part_neq_dis_1 where b = (select c from part_neq_dis_2 where c=16); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables +-- b) default partition +delete from part_neq_dis_1 where b = (select c from part_neq_dis_2 where c=36); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables +select * from part_eq_dis_1; + a | b +----+---- + 9 | 9 + 10 | 10 + 11 | 11 + 12 | 12 + 13 | 13 + 14 | 14 + 15 | 15 + 16 | 16 + 17 | 17 + 18 | 18 + 19 | 19 + 20 | 20 + 29 | 29 + 30 | 30 + 31 | 31 + 32 | 32 + 33 | 33 + 34 | 34 + 35 | 35 + 36 | 36 + 37 | 37 + 38 | 38 + 39 | 39 + 40 | 40 +(24 rows) + +select * from part_neq_dis_1; + a | b +----+---- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 17 | 17 + 18 | 18 + 19 | 19 + 20 | 20 + 21 | 21 + 22 | 22 + 23 | 23 + 24 | 24 + 25 | 25 + 26 | 26 + 27 | 27 + 28 | 28 + 37 | 37 + 38 | 38 + 39 | 39 + 40 | 40 +(24 rows) + +reset optimizer_trace_fallback; -- ---------------------------------------------------------------------- -- Test: teardown.sql -- ---------------------------------------------------------------------- diff --git a/contrib/pax_storage/src/test/regress/expected/aggregates_optimizer.out b/contrib/pax_storage/src/test/regress/expected/aggregates_optimizer.out index 2eb3d593464..813c1affc0b 100644 --- a/contrib/pax_storage/src/test/regress/expected/aggregates_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/aggregates_optimizer.out @@ -2,9 +2,20 @@ -- AGGREGATES -- -- start_ignore +SET optimizer_trace_fallback to on; -- end_ignore +-- directory paths are passed to us in environment variables +\getenv abs_srcdir PG_ABS_SRCDIR -- avoid bit-exact output here because operations may not be bit-exact. SET extra_float_digits = 0; +-- prepare some test data +CREATE TABLE aggtest ( + a int2, + b float4 +); +\set filename :abs_srcdir '/data/agg.data' +COPY aggtest FROM :'filename'; +ANALYZE aggtest; SELECT avg(four) AS avg_1 FROM onek; avg_1 -------------------- @@ -17,6 +28,30 @@ SELECT avg(a) AS avg_32 FROM aggtest WHERE a < 100; 32.6666666666666667 (1 row) +SELECT any_value(v) FROM (VALUES (1), (2), (3)) AS v (v); + any_value +----------- + 1 +(1 row) + +SELECT any_value(v) FROM (VALUES (NULL)) AS v (v); + any_value +----------- + +(1 row) + +SELECT any_value(v) FROM (VALUES (NULL), (1), (2)) AS v (v); + any_value +----------- + 1 +(1 row) + +SELECT any_value(v) FROM (VALUES (array['hello', 'world'])) AS v (v); + any_value +--------------- + {hello,world} +(1 row) + -- In 7.1, avg(float4) is computed using float8 arithmetic. -- Round the result to 3 digits to avoid platform-specific results. SELECT avg(b)::numeric(10,3) AS avg_107_943 FROM aggtest; @@ -26,6 +61,8 @@ SELECT avg(b)::numeric(10,3) AS avg_107_943 FROM aggtest; (1 row) SELECT avg(gpa) AS avg_3_4 FROM ONLY student; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: ONLY in the FROM clause avg_3_4 --------- 3.4 @@ -50,6 +87,8 @@ SELECT sum(b) AS avg_431_773 FROM aggtest; (1 row) SELECT sum(gpa) AS avg_6_8 FROM ONLY student; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: ONLY in the FROM clause avg_6_8 --------- 6.8 @@ -74,6 +113,8 @@ SELECT max(aggtest.b) AS max_324_78 FROM aggtest; (1 row) SELECT max(student.gpa) AS max_3_7 FROM student; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Inherited tables max_3_7 --------- 3.7 @@ -663,7 +704,9 @@ from generate_series(1, 3) s1, lateral (select s2, sum(s1 + s2) sm from generate_series(1, 3) s2 group by s2) ss order by 1, 2; - QUERY PLAN +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: LATERAL + QUERY PLAN ------------------------------------------------------------------------ Sort Output: s1.s1, s2.s2, (sum((s1.s1 + s2.s2))) @@ -690,6 +733,8 @@ from generate_series(1, 3) s1, lateral (select s2, sum(s1 + s2) sm from generate_series(1, 3) s2 group by s2) ss order by 1, 2; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: LATERAL s1 | s2 | sm ----+----+---- 1 | 1 | 2 @@ -707,7 +752,9 @@ explain (verbose, costs off) select array(select sum(x+y) s from generate_series(1,3) y group by y order by s) from generate_series(1,3) x; - QUERY PLAN +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery + QUERY PLAN ------------------------------------------------------------------- Function Scan on pg_catalog.generate_series x Output: (SubPlan 1) @@ -729,6 +776,8 @@ select array(select sum(x+y) s select array(select sum(x+y) s from generate_series(1,3) y group by y order by s) from generate_series(1,3) x; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery array --------- {2,3,4} @@ -1204,6 +1253,8 @@ create index minmaxtest1i on minmaxtest1(f1); create index minmaxtest2i on minmaxtest2(f1 desc); create index minmaxtest3i on minmaxtest3(f1) where f1 is not null; insert into minmaxtest values(11), (12); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Inherited tables insert into minmaxtest1 values(13), (14); insert into minmaxtest2 values(15), (16); insert into minmaxtest3 values(17), (18); @@ -1214,7 +1265,9 @@ analyze minmaxtest3; set enable_seqscan=off; explain (costs off) select min(f1), max(f1) from minmaxtest; - QUERY PLAN +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Inherited tables + QUERY PLAN --------------------------------------------------------------------------------------------------- Result InitPlan 1 (returns $0) (slice1) @@ -1247,6 +1300,8 @@ explain (costs off) (28 rows) select min(f1), max(f1) from minmaxtest; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Inherited tables min | max -----+----- 11 | 18 @@ -1256,7 +1311,9 @@ reset enable_seqscan; -- DISTINCT doesn't do anything useful here, but it shouldn't fail explain (costs off) select distinct min(f1), max(f1) from minmaxtest; - QUERY PLAN +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Inherited tables + QUERY PLAN -------------------------------------------------------------- Finalize Aggregate -> Gather Motion 3:1 (slice1; segments: 3) @@ -1270,6 +1327,8 @@ explain (costs off) (9 rows) select distinct min(f1), max(f1) from minmaxtest; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Inherited tables min | max -----+----- 11 | 18 @@ -1280,6 +1339,40 @@ NOTICE: drop cascades to 3 other objects DETAIL: drop cascades to table minmaxtest1 drop cascades to table minmaxtest2 drop cascades to table minmaxtest3 +-- DISTINCT can also trigger wrong answers with hash aggregation (bug #18465) +begin; +set local enable_sort = off; +explain (costs off) + select f1, (select distinct min(t1.f1) from int4_tbl t1 where t1.f1 = t0.f1) + from int4_tbl t0; + QUERY PLAN +----------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on int4_tbl t0 + SubPlan 1 + -> GroupAggregate + Group Key: min(t1.f1) + -> Aggregate + -> Result + Filter: (t1.f1 = t0.f1) + -> Materialize + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Seq Scan on int4_tbl t1 + Optimizer: GPORCA +(12 rows) + +select f1, (select distinct min(t1.f1) from int4_tbl t1 where t1.f1 = t0.f1) +from int4_tbl t0; + f1 | min +-------------+------------- + 0 | 0 + 123456 | 123456 + -123456 | -123456 + 2147483647 | 2147483647 + -2147483647 | -2147483647 +(5 rows) + +rollback; -- check for correct detection of nested-aggregate errors select max(min(unique1)) from tenk1; ERROR: aggregate function calls cannot be nested @@ -1372,7 +1465,9 @@ create temp table t1c () inherits (t1); NOTICE: table has parent, setting distribution columns to match parent table -- Ensure we don't remove any columns when t1 has a child table explain (costs off) select * from t1 group by a,b,c,d; - QUERY PLAN +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Inherited tables + QUERY PLAN ------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) -> HashAggregate @@ -1385,7 +1480,9 @@ explain (costs off) select * from t1 group by a,b,c,d; -- Okay to remove columns if we're only querying the parent. explain (costs off) select * from only t1 group by a,b,c,d; - QUERY PLAN +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: ONLY in the FROM clause + QUERY PLAN ------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) -> HashAggregate @@ -1423,8 +1520,8 @@ drop table p_t1; -- -- Test GROUP BY matching of join columns that are type-coerced due to USING -- -create temp table t1(f1 int, f2 bigint); -create temp table t2(f1 bigint, f22 bigint); +create temp table t1(f1 int, f2 int); +create temp table t2(f1 bigint, f2 oid); select f1 from t1 left join t2 using (f1) group by f1; f1 ---- @@ -1445,7 +1542,205 @@ select t1.f1 from t1 left join t2 using (f1) group by f1; ERROR: column "t1.f1" must appear in the GROUP BY clause or be used in an aggregate function LINE 1: select t1.f1 from t1 left join t2 using (f1) group by f1; ^ +-- check case where we have to inject nullingrels into coerced join alias +select f1, count(*) from +t1 x(x0,x1) left join (t1 left join t2 using(f1)) on (x0 = 0) +group by f1; + f1 | count +----+------- +(0 rows) + +-- same, for a RelabelType coercion +select f2, count(*) from +t1 x(x0,x1) left join (t1 left join t2 using(f2)) on (x0 = 0) +group by f2; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: CTranslatorQueryToDXL.cpp:4130: Failed assertion: ((((const Node*)(join_alias_node))->type) == T_Var) || ((((const Node*)(join_alias_node))->type) == T_FuncExpr) || ((((const Node*)(join_alias_node))->type) == T_CoalesceExpr) + f2 | count +----+------- +(0 rows) + drop table t1, t2; +-- +-- Test planner's selection of pathkeys for ORDER BY aggregates +-- +-- Ensure we order by four. This suits the most aggregate functions. +explain (costs off) +select sum(two order by two),max(four order by four), min(four order by four) +from tenk1; +QUERY PLAN +___________ + Finalize Aggregate + -> Gather Motion 3:1 (slice1; segments: 3) + -> Partial Aggregate + -> Seq Scan on tenk1 + Optimizer: Pivotal Optimizer (GPORCA) + +-- Ensure we order by two. It's a tie between ordering by two and four but +-- we tiebreak on the aggregate's position. +explain (costs off) +select + sum(two order by two), max(four order by four), + min(four order by four), max(two order by two) +from tenk1; +QUERY PLAN +___________ + Finalize Aggregate + -> Gather Motion 3:1 (slice1; segments: 3) + -> Partial Aggregate + -> Seq Scan on tenk1 + Optimizer: Pivotal Optimizer (GPORCA) + +-- Similar to above, but tiebreak on ordering by four +explain (costs off) +select + max(four order by four), sum(two order by two), + min(four order by four), max(two order by two) +from tenk1; +QUERY PLAN +___________ + Finalize Aggregate + -> Gather Motion 3:1 (slice1; segments: 3) + -> Partial Aggregate + -> Seq Scan on tenk1 + Optimizer: Pivotal Optimizer (GPORCA) + +-- Ensure this one orders by ten since there are 3 aggregates that require ten +-- vs two that suit two and four. +explain (costs off) +select + max(four order by four), sum(two order by two), + min(four order by four), max(two order by two), + sum(ten order by ten), min(ten order by ten), max(ten order by ten) +from tenk1; +QUERY PLAN +___________ + Finalize Aggregate + -> Gather Motion 3:1 (slice1; segments: 3) + -> Partial Aggregate + -> Seq Scan on tenk1 + Optimizer: Pivotal Optimizer (GPORCA) + +-- Try a case involving a GROUP BY clause where the GROUP BY column is also +-- part of an aggregate's ORDER BY clause. We want a sort order that works +-- for the GROUP BY along with the first and the last aggregate. +explain (costs off) +select + sum(unique1 order by ten, two), sum(unique1 order by four), + sum(unique1 order by two, four) +from tenk1 +group by ten; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Hash aggregation with ORDER BY +QUERY PLAN +___________ + Gather Motion 3:1 (slice1; segments: 3) + -> GroupAggregate + Group Key: ten + -> Sort + Sort Key: ten, two, four + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: ten + -> Seq Scan on tenk1 + Optimizer: Pivotal Optimizer (GPORCA) + +-- Ensure that we never choose to provide presorted input to an Aggref with +-- a volatile function in the ORDER BY / DISTINCT clause. We want to ensure +-- these sorts are performed individually rather than at the query level. +explain (costs off) +select + sum(unique1 order by two), sum(unique1 order by four), + sum(unique1 order by four, two), sum(unique1 order by two, random()), + sum(unique1 order by two, random(), random() + 1) +from tenk1 +group by ten; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Hash aggregation with ORDER BY + QUERY PLAN +------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> GroupAggregate + Group Key: ten + -> Sort + Sort Key: ten, four, two + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: ten + -> Seq Scan on tenk1 + Optimizer: Postgres query optimizer +(9 rows) + +-- Ensure consecutive NULLs are properly treated as distinct from each other +select array_agg(distinct val) +from (select null as val from generate_series(1, 2)); + array_agg +----------- + {NULL} +(1 row) + +-- Ensure no ordering is requested when enable_presorted_aggregate is off +set enable_presorted_aggregate to off; +explain (costs off) +select sum(two order by two) from tenk1; +QUERY PLAN +___________ + Finalize Aggregate + -> Gather Motion 3:1 (slice1; segments: 3) + -> Partial Aggregate + -> Seq Scan on tenk1 + Optimizer: Pivotal Optimizer (GPORCA) + +reset enable_presorted_aggregate; +-- +-- Test cases with FILTER clause +-- +-- Ensure we presort when the aggregate contains plain Vars +explain (costs off) +select sum(two order by two) filter (where two > 1) from tenk1; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER +QUERY PLAN +___________ + Aggregate + -> Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on tenk1 + Optimizer: Pivotal Optimizer (GPORCA) + +-- Ensure we presort for RelabelType'd Vars +explain (costs off) +select string_agg(distinct f1, ',') filter (where length(f1) > 1) +from varchar_tbl; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER +QUERY PLAN +___________ + Finalize Aggregate + -> Gather Motion 3:1 (slice1; segments: 3) + -> Partial Aggregate + -> Seq Scan on varchar_tbl + Optimizer: Pivotal Optimizer (GPORCA) + +-- Ensure we don't presort when the aggregate's argument contains an +-- explicit cast. +explain (costs off) +select string_agg(distinct f1::varchar(2), ',') filter (where length(f1) > 1) +from varchar_tbl; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER + QUERY PLAN +------------------------------------------------------------------------------------------ + Finalize Aggregate + -> Gather Motion 3:1 (slice1; segments: 3) + -> Partial Aggregate + -> HashAggregate + Group Key: (((f1)::character varying(2))::text), ','::text + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: (((f1)::character varying(2))::text), ','::text + -> Streaming HashAggregate + Group Key: ((f1)::character varying(2))::text, ','::text + -> Seq Scan on varchar_tbl + Optimizer: Postgres query optimizer +(11 rows) + -- -- Test combinations of DISTINCT and/or ORDER BY -- @@ -1508,6 +1803,8 @@ select array_agg(distinct a order by a desc nulls last) -- multi-arg aggs, strict/nonstrict, distinct/order by select aggfstr(a,b,c) from (values (1,3,'foo'),(0,null,null),(2,2,'bar'),(3,1,'baz')) v(a,b,c); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: ROW EXPRESSION aggfstr --------------------------------------- {"(1,3,foo)","(2,2,bar)","(3,1,baz)"} @@ -1515,6 +1812,8 @@ select aggfstr(a,b,c) select aggfns(a,b,c) from (values (1,3,'foo'),(0,null,null),(2,2,'bar'),(3,1,'baz')) v(a,b,c); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: ROW EXPRESSION aggfns ----------------------------------------------- {"(1,3,foo)","(0,,)","(2,2,bar)","(3,1,baz)"} @@ -1523,6 +1822,8 @@ select aggfns(a,b,c) select aggfstr(distinct a,b,c) from (values (1,3,'foo'),(0,null,null),(2,2,'bar'),(3,1,'baz')) v(a,b,c), generate_series(1,3) i; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: ROW EXPRESSION aggfstr --------------------------------------- {"(1,3,foo)","(2,2,bar)","(3,1,baz)"} @@ -1531,6 +1832,8 @@ select aggfstr(distinct a,b,c) select aggfns(distinct a,b,c) from (values (1,3,'foo'),(0,null,null),(2,2,'bar'),(3,1,'baz')) v(a,b,c), generate_series(1,3) i; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: ROW EXPRESSION aggfns ----------------------------------------------- {"(0,,)","(1,3,foo)","(2,2,bar)","(3,1,baz)"} @@ -1539,6 +1842,8 @@ select aggfns(distinct a,b,c) select aggfstr(distinct a,b,c order by b) from (values (1,3,'foo'),(0,null,null),(2,2,'bar'),(3,1,'baz')) v(a,b,c), generate_series(1,3) i; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: ROW EXPRESSION aggfstr --------------------------------------- {"(3,1,baz)","(2,2,bar)","(1,3,foo)"} @@ -1547,6 +1852,8 @@ select aggfstr(distinct a,b,c order by b) select aggfns(distinct a,b,c order by b) from (values (1,3,'foo'),(0,null,null),(2,2,'bar'),(3,1,'baz')) v(a,b,c), generate_series(1,3) i; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: ROW EXPRESSION aggfns ----------------------------------------------- {"(3,1,baz)","(2,2,bar)","(1,3,foo)","(0,,)"} @@ -1556,6 +1863,8 @@ select aggfns(distinct a,b,c order by b) select aggfns(distinct a,a,c order by c using ~<~,a) from (values (1,3,'foo'),(0,null,null),(2,2,'bar'),(3,1,'baz')) v(a,b,c), generate_series(1,2) i; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: ROW EXPRESSION aggfns ------------------------------------------------ {"(2,2,bar)","(3,3,baz)","(1,1,foo)","(0,0,)"} @@ -1564,6 +1873,8 @@ select aggfns(distinct a,a,c order by c using ~<~,a) select aggfns(distinct a,a,c order by c using ~<~) from (values (1,3,'foo'),(0,null,null),(2,2,'bar'),(3,1,'baz')) v(a,b,c), generate_series(1,2) i; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: ROW EXPRESSION aggfns ------------------------------------------------ {"(2,2,bar)","(3,3,baz)","(1,1,foo)","(0,0,)"} @@ -1572,6 +1883,8 @@ select aggfns(distinct a,a,c order by c using ~<~) select aggfns(distinct a,a,c order by a) from (values (1,3,'foo'),(0,null,null),(2,2,'bar'),(3,1,'baz')) v(a,b,c), generate_series(1,2) i; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: ROW EXPRESSION aggfns ------------------------------------------------ {"(0,0,)","(1,1,foo)","(2,2,bar)","(3,3,baz)"} @@ -1580,11 +1893,26 @@ select aggfns(distinct a,a,c order by a) select aggfns(distinct a,b,c order by a,c using ~<~,b) from (values (1,3,'foo'),(0,null,null),(2,2,'bar'),(3,1,'baz')) v(a,b,c), generate_series(1,2) i; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: ROW EXPRESSION aggfns ----------------------------------------------- {"(0,,)","(1,3,foo)","(2,2,bar)","(3,1,baz)"} (1 row) +-- test a more complex permutation that has previous caused issues +select + string_agg(distinct 'a', ','), + sum(( + select sum(1) + from (values(1)) b(id) + where a.id = b.id +)) from unnest(array[1]) a(id); + string_agg | sum +------------+----- + a | 1 +(1 row) + -- check node I/O via view creation and usage, also deparsing logic -- start_ignore -- pg_get_viewdef() runs some internal queries on catalogs, and we don't want @@ -1602,7 +1930,7 @@ select * from agg_view1; select pg_get_viewdef('agg_view1'::regclass); pg_get_viewdef --------------------------------------------------------------------------------------------------------------------- - SELECT aggfns(v.a, v.b, v.c) AS aggfns + + SELECT aggfns(a, b, c) AS aggfns + FROM ( VALUES (1,3,'foo'::text), (0,NULL::integer,NULL::text), (2,2,'bar'::text), (3,1,'baz'::text)) v(a, b, c); (1 row) @@ -1654,7 +1982,7 @@ select * from agg_view1; select pg_get_viewdef('agg_view1'::regclass); pg_get_viewdef --------------------------------------------------------------------------------------------------------------------- - SELECT aggfns(v.a, v.b, v.c ORDER BY (v.b + 1)) AS aggfns + + SELECT aggfns(a, b, c ORDER BY (b + 1)) AS aggfns + FROM ( VALUES (1,3,'foo'::text), (0,NULL::integer,NULL::text), (2,2,'bar'::text), (3,1,'baz'::text)) v(a, b, c); (1 row) @@ -1670,7 +1998,7 @@ select * from agg_view1; select pg_get_viewdef('agg_view1'::regclass); pg_get_viewdef --------------------------------------------------------------------------------------------------------------------- - SELECT aggfns(v.a, v.a, v.c ORDER BY v.b) AS aggfns + + SELECT aggfns(a, a, c ORDER BY b) AS aggfns + FROM ( VALUES (1,3,'foo'::text), (0,NULL::integer,NULL::text), (2,2,'bar'::text), (3,1,'baz'::text)) v(a, b, c); (1 row) @@ -1686,7 +2014,7 @@ select * from agg_view1; select pg_get_viewdef('agg_view1'::regclass); pg_get_viewdef --------------------------------------------------------------------------------------------------------------------- - SELECT aggfns(v.a, v.b, v.c ORDER BY v.c USING ~<~ NULLS LAST) AS aggfns + + SELECT aggfns(a, b, c ORDER BY c USING ~<~ NULLS LAST) AS aggfns + FROM ( VALUES (1,3,'foo'::text), (0,NULL::integer,NULL::text), (2,2,'bar'::text), (3,1,'baz'::text)) v(a, b, c); (1 row) @@ -1780,6 +2108,8 @@ select string_agg(distinct f1::text, ',' order by f1::text) from varchar_tbl; - -- FILTER tests select min(unique1) filter (where unique1 > 100) from tenk1; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER min ----- 101 @@ -1787,6 +2117,8 @@ select min(unique1) filter (where unique1 > 100) from tenk1; select ten, sum(distinct four) filter (where four::text ~ '123') from onek a group by ten; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER ten | sum -----+----- 0 | @@ -1804,6 +2136,8 @@ group by ten; select ten, sum(distinct four) filter (where four > 10) from onek a group by ten having exists (select 1 from onek b where sum(distinct a.four) = b.four); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER ten | sum -----+----- 0 | @@ -1815,6 +2149,8 @@ having exists (select 1 from onek b where sum(distinct a.four) = b.four); select max(foo COLLATE "C") filter (where (bar collate "POSIX") > '0') from (values ('a', 'b')) AS v(foo,bar); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-default collation max ----- a @@ -1833,6 +2169,8 @@ from (values (2),(3)) t1(outer_c); -- inner query is aggregation query select (select count(*) filter (where outer_c <> 0) from (values (1)) t0(inner_c)) from (values (2),(3)) t1(outer_c); -- outer query is aggregation query +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER count ------- 2 @@ -1841,6 +2179,8 @@ from (values (2),(3)) t1(outer_c); -- outer query is aggregation query select (select count(inner_c) filter (where outer_c <> 0) from (values (1)) t0(inner_c)) from (values (2),(3)) t1(outer_c); -- inner query is aggregation query +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER count ------- 1 @@ -1851,6 +2191,8 @@ select (select max((select i.unique2 from tenk1 i where i.unique1 = o.unique1)) filter (where o.unique1 < 10)) from tenk1 o; -- outer query is aggregation query +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER max ------ 9998 @@ -1859,6 +2201,8 @@ from tenk1 o; -- outer query is aggregation query -- subquery in FILTER clause (PostgreSQL extension) select sum(unique1) FILTER (WHERE unique1 IN (SELECT unique1 FROM onek where unique1 < 100)) FROM tenk1; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER sum ------ 4950 @@ -1868,6 +2212,10 @@ select sum(unique1) FILTER (WHERE select aggfns(distinct a,b,c order by a,c using ~<~,b) filter (where a > 1) from (values (1,3,'foo'),(0,null,null),(2,2,'bar'),(3,1,'baz')) v(a,b,c), generate_series(1,2) i; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: ROW EXPRESSION aggfns --------------------------- {"(2,2,bar)","(3,1,baz)"} @@ -1903,14 +2251,14 @@ from generate_series(1,5) x, (values (0::float8),(0.1),(0.25),(0.4),(0.5),(0.6),(0.75),(0.9),(1)) v(p) group by p order by p; ERROR: sum is not an ordered-set aggregate, so it cannot have WITHIN GROUP -LINE 1: select p, sum() within group (order by x::float8) +LINE 1: select p, sum() within group (order by x::float8) -- error ^ select p, percentile_cont(p,p) -- error from generate_series(1,5) x, (values (0::float8),(0.1),(0.25),(0.4),(0.5),(0.6),(0.75),(0.9),(1)) v(p) group by p order by p; ERROR: WITHIN GROUP is required for ordered-set aggregate percentile_cont -LINE 1: select p, percentile_cont(p,p) +LINE 1: select p, percentile_cont(p,p) -- error ^ select percentile_cont(0.5) within group (order by b) from aggtest; percentile_cont @@ -1993,6 +2341,8 @@ from generate_series(1,6) x; (1 row) select ten, mode() within group (order by string4) from tenk1 group by ten order by ten; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-default collation ten | mode -----+-------- 0 | HHHHxx @@ -2017,6 +2367,8 @@ from unnest('{fred,jim,fred,jack,jill,fred,jill,jim,jim,sheila,jim,sheila}'::tex -- check collation propagates up in suitable cases: select pg_collation_for(percentile_disc(1) within group (order by x collate "POSIX")) from (values ('fred'),('jim')) v(x); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-default collation pg_collation_for ------------------ "POSIX" @@ -2104,22 +2456,26 @@ select ten, -- start_ignore -- pg_get_viewdef() runs some internal queries on catalogs, and we don't want -- fallback notices about those. +reset optimizer_trace_fallback; -- end_ignore select pg_get_viewdef('aggordview1'); - pg_get_viewdef -------------------------------------------------------------------------------------------------------------------------------- - SELECT tenk1.ten, + - percentile_disc((0.5)::double precision) WITHIN GROUP (ORDER BY tenk1.thousand) AS p50, + - percentile_disc((0.5)::double precision) WITHIN GROUP (ORDER BY tenk1.thousand) FILTER (WHERE (tenk1.hundred = 1)) AS px,+ - rank(5, 'AZZZZ'::name, 50) WITHIN GROUP (ORDER BY tenk1.hundred, tenk1.string4 DESC, tenk1.hundred) AS rank + - FROM tenk1 + - GROUP BY tenk1.ten + - ORDER BY tenk1.ten; + pg_get_viewdef +------------------------------------------------------------------------------------------------------------------- + SELECT ten, + + percentile_disc((0.5)::double precision) WITHIN GROUP (ORDER BY thousand) AS p50, + + percentile_disc((0.5)::double precision) WITHIN GROUP (ORDER BY thousand) FILTER (WHERE (hundred = 1)) AS px,+ + rank(5, 'AZZZZ'::name, 50) WITHIN GROUP (ORDER BY hundred, string4 DESC, hundred) AS rank + + FROM tenk1 + + GROUP BY ten + + ORDER BY ten; (1 row) -- start_ignore +SET optimizer_trace_fallback to on; -- end_ignore select * from aggordview1 order by ten; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER ten | p50 | px | rank -----+-----+-----+------ 0 | 490 | | 101 @@ -2187,12 +2543,16 @@ select string_agg(v, decode('ee', 'hex')) from bytea_test_table; drop table bytea_test_table; -- FILTER tests select min(unique1) filter (where unique1 > 100) from tenk1; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER min ----- 101 (1 row) select sum(1/ten) filter (where ten > 0) from tenk1; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER sum ------ 1000 @@ -2200,6 +2560,8 @@ select sum(1/ten) filter (where ten > 0) from tenk1; select ten, sum(distinct four) filter (where four::text ~ '123') from onek a group by ten; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER ten | sum -----+----- 0 | @@ -2217,6 +2579,8 @@ group by ten; select ten, sum(distinct four) filter (where four > 10) from onek a group by ten having exists (select 1 from onek b where sum(distinct a.four) = b.four); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER ten | sum -----+----- 0 | @@ -2228,6 +2592,8 @@ having exists (select 1 from onek b where sum(distinct a.four) = b.four); select max(foo COLLATE "C") filter (where (bar collate "POSIX") > '0') from (values ('a', 'b')) AS v(foo,bar); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-default collation max ----- a @@ -2246,6 +2612,8 @@ from (values (2),(3)) t1(outer_c); -- inner query is aggregation query select (select count(*) filter (where outer_c <> 0) from (values (1)) t0(inner_c)) from (values (2),(3)) t1(outer_c); -- outer query is aggregation query +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER count ------- 2 @@ -2254,6 +2622,8 @@ from (values (2),(3)) t1(outer_c); -- outer query is aggregation query select (select count(inner_c) filter (where outer_c <> 0) from (values (1)) t0(inner_c)) from (values (2),(3)) t1(outer_c); -- inner query is aggregation query +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER count ------- 1 @@ -2264,6 +2634,8 @@ select (select max((select i.unique2 from tenk1 i where i.unique1 = o.unique1)) filter (where o.unique1 < 10)) from tenk1 o; -- outer query is aggregation query +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER max ------ 9998 @@ -2272,6 +2644,8 @@ from tenk1 o; -- outer query is aggregation query -- subquery in FILTER clause (PostgreSQL extension) select sum(unique1) FILTER (WHERE unique1 IN (SELECT unique1 FROM onek where unique1 < 100)) FROM tenk1; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER sum ------ 4950 @@ -2281,6 +2655,10 @@ select sum(unique1) FILTER (WHERE select aggfns(distinct a,b,c order by a,c using ~<~,b) filter (where a > 1) from (values (1,3,'foo'),(0,null,null),(2,2,'bar'),(3,1,'baz')) v(a,b,c), generate_series(1,2) i; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: ROW EXPRESSION aggfns --------------------------- {"(2,2,bar)","(3,1,baz)"} @@ -2288,12 +2666,16 @@ select aggfns(distinct a,b,c order by a,c using ~<~,b) filter (where a > 1) -- check handling of bare boolean Var in FILTER select max(0) filter (where b1) from bool_test; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER max ----- 0 (1 row) select (select max(0) filter (where b1)) from bool_test; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER max ----- 0 @@ -2346,14 +2728,14 @@ from generate_series(1,5) x, (values (0::float8),(0.1),(0.25),(0.4),(0.5),(0.6),(0.75),(0.9),(1)) v(p) group by p order by p; ERROR: sum is not an ordered-set aggregate, so it cannot have WITHIN GROUP -LINE 1: select p, sum() within group (order by x::float8) +LINE 1: select p, sum() within group (order by x::float8) -- error ^ select p, percentile_cont(p,p) -- error from generate_series(1,5) x, (values (0::float8),(0.1),(0.25),(0.4),(0.5),(0.6),(0.75),(0.9),(1)) v(p) group by p order by p; ERROR: WITHIN GROUP is required for ordered-set aggregate percentile_cont -LINE 1: select p, percentile_cont(p,p) +LINE 1: select p, percentile_cont(p,p) -- error ^ select percentile_cont(0.5) within group (order by b) from aggtest; percentile_cont @@ -2436,6 +2818,8 @@ from generate_series(1,6) x; (1 row) select ten, mode() within group (order by string4) from tenk1 group by ten; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-default collation ten | mode -----+-------- 0 | HHHHxx @@ -2460,6 +2844,8 @@ from unnest('{fred,jim,fred,jack,jill,fred,jill,jim,jim,sheila,jim,sheila}'::tex -- check collation propagates up in suitable cases: select pg_collation_for(percentile_disc(1) within group (order by x collate "POSIX")) from (values ('fred'),('jim')) v(x); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-default collation pg_collation_for ------------------ "POSIX" @@ -2547,22 +2933,26 @@ select ten, -- start_ignore -- pg_get_viewdef() runs some internal queries on catalogs, and we don't want -- fallback notices about those. +reset optimizer_trace_fallback; -- end_ignore select pg_get_viewdef('aggordview1'); - pg_get_viewdef -------------------------------------------------------------------------------------------------------------------------------- - SELECT tenk1.ten, + - percentile_disc((0.5)::double precision) WITHIN GROUP (ORDER BY tenk1.thousand) AS p50, + - percentile_disc((0.5)::double precision) WITHIN GROUP (ORDER BY tenk1.thousand) FILTER (WHERE (tenk1.hundred = 1)) AS px,+ - rank(5, 'AZZZZ'::name, 50) WITHIN GROUP (ORDER BY tenk1.hundred, tenk1.string4 DESC, tenk1.hundred) AS rank + - FROM tenk1 + - GROUP BY tenk1.ten + - ORDER BY tenk1.ten; + pg_get_viewdef +------------------------------------------------------------------------------------------------------------------- + SELECT ten, + + percentile_disc((0.5)::double precision) WITHIN GROUP (ORDER BY thousand) AS p50, + + percentile_disc((0.5)::double precision) WITHIN GROUP (ORDER BY thousand) FILTER (WHERE (hundred = 1)) AS px,+ + rank(5, 'AZZZZ'::name, 50) WITHIN GROUP (ORDER BY hundred, string4 DESC, hundred) AS rank + + FROM tenk1 + + GROUP BY ten + + ORDER BY ten; (1 row) -- start_ignore +SET optimizer_trace_fallback to on; -- end_ignore select * from aggordview1 order by ten; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER ten | p50 | px | rank -----+-----+-----+------ 0 | 490 | | 101 @@ -2671,6 +3061,8 @@ create aggregate my_sum(int4) sfunc = avg_transfn, finalfunc = sum_finalfn ); +-- reset the plan cache, sometimes it would re-plan these prepared statements and log ORCA fallbacks +discard plans; -- aggregate state should be shared as aggs are the same. select my_avg(one),my_avg(one) from (values(1),(3)) t(one); NOTICE: avg_transfn called with 1 @@ -2682,6 +3074,7 @@ NOTICE: avg_transfn called with 3 2 | 2 (1 row) +discard plans; -- aggregate state should be shared as transfn is the same for both aggs. select my_avg(one),my_sum(one) from (values(1),(3)) t(one); NOTICE: avg_transfn called with 1 @@ -2693,6 +3086,7 @@ NOTICE: avg_transfn called with 3 2 | 4 (1 row) +discard plans; -- same as previous one, but with DISTINCT, which requires sorting the input. select my_avg(distinct one),my_sum(distinct one) from (values(1),(3),(1)) t(one); NOTICE: avg_transfn called with 1 @@ -2704,6 +3098,7 @@ NOTICE: avg_transfn called with 3 2 | 4 (1 row) +discard plans; -- shouldn't share states due to the distinctness not matching. select my_avg(distinct one),my_sum(one) from (values(1),(3)) t(one); NOTICE: avg_transfn called with 1 @@ -2715,8 +3110,11 @@ NOTICE: avg_transfn called with 3 2 | 4 (1 row) +discard plans; -- shouldn't share states due to the filter clause not matching. select my_avg(one) filter (where one > 1),my_sum(one) from (values(1),(3)) t(one); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER NOTICE: avg_transfn called with 1 NOTICE: avg_transfn called with 3 NOTICE: avg_transfn called with 3 @@ -2725,6 +3123,7 @@ NOTICE: avg_transfn called with 3 3 | 4 (1 row) +discard plans; -- this should not share the state due to different input columns. select my_avg(one),my_sum(two) from (values(1,2),(3,4)) t(one,two); NOTICE: avg_transfn called with 1 @@ -2787,6 +3186,8 @@ create aggregate my_avg_init2(int4) finalfunc = avg_finalfn, initcond = '(4,0)' ); +-- reset the plan cache, sometimes it would re-plan these prepared statements and log ORCA fallbacks +discard plans; -- state should be shared if INITCONDs are matching select my_sum_init(one),my_avg_init(one) from (values(1),(3)) t(one); NOTICE: avg_transfn called with 1 @@ -2853,6 +3254,7 @@ create aggregate my_half_sum(int4) sfunc = sum_transfn, finalfunc = halfsum_finalfn ); +discard plans; -- Agg state should be shared even though my_sum has no finalfn select my_sum(one),my_half_sum(one) from (values(1),(2),(3),(4)) t(one); NOTICE: sum_transfn called with 1 @@ -2921,12 +3323,61 @@ CREATE AGGREGATE balk(int4) INITCOND = '0' ); -- force use of parallelism --- Pax not support parallel scan --- ALTER TABLE tenk1 set (parallel_workers = 4); --- SET LOCAL parallel_setup_cost=0; --- SET LOCAL max_parallel_workers_per_gather=4; --- EXPLAIN (COSTS OFF) SELECT balk(hundred) FROM tenk1; --- SELECT balk(hundred) FROM tenk1; +ALTER TABLE tenk1 set (parallel_workers = 4); +SET LOCAL parallel_setup_cost=0; +SET LOCAL max_parallel_workers_per_gather=4; +EXPLAIN (COSTS OFF) SELECT balk(hundred) FROM tenk1; + QUERY PLAN +------------------------------------------------ + Finalize Aggregate + -> Gather Motion 3:1 (slice1; segments: 3) + -> Partial Aggregate + -> Seq Scan on tenk1 +(5 rows) + +SELECT balk(hundred) FROM tenk1; + balk +------ + +(1 row) + +ROLLBACK; +-- test multiple usage of an aggregate whose finalfn returns a R/W datum +BEGIN; +CREATE FUNCTION rwagg_sfunc(x anyarray, y anyarray) RETURNS anyarray +LANGUAGE plpgsql IMMUTABLE AS $$ +BEGIN + RETURN array_fill(y[1], ARRAY[4]); +END; +$$; +CREATE FUNCTION rwagg_finalfunc(x anyarray) RETURNS anyarray +LANGUAGE plpgsql STRICT IMMUTABLE AS $$ +DECLARE + res x%TYPE; +BEGIN + -- assignment is essential for this test, it expands the array to R/W + res := array_fill(x[1], ARRAY[4]); + RETURN res; +END; +$$; +CREATE AGGREGATE rwagg(anyarray) ( + STYPE = anyarray, + SFUNC = rwagg_sfunc, + FINALFUNC = rwagg_finalfunc +); +CREATE FUNCTION eatarray(x real[]) RETURNS real[] +LANGUAGE plpgsql STRICT IMMUTABLE AS $$ +BEGIN + x[1] := x[1] + 1; + RETURN x; +END; +$$; +SELECT eatarray(rwagg(ARRAY[1.0::real])), eatarray(rwagg(ARRAY[1.0::real])); + eatarray | eatarray +-----------+----------- + {2,1,1,1} | {2,1,1,1} +(1 row) + ROLLBACK; -- test coverage for aggregate combine/serial/deserial functions BEGIN; @@ -2945,7 +3396,9 @@ FROM (SELECT * FROM tenk1 UNION ALL SELECT * FROM tenk1 UNION ALL SELECT * FROM tenk1 UNION ALL SELECT * FROM tenk1) u; - QUERY PLAN +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-default collation + QUERY PLAN --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Finalize Aggregate Output: variance(tenk1.unique1), sum((tenk1.unique1)::bigint), regr_count((tenk1.unique1)::double precision, (tenk1.unique1)::double precision) @@ -2971,6 +3424,8 @@ FROM (SELECT * FROM tenk1 UNION ALL SELECT * FROM tenk1 UNION ALL SELECT * FROM tenk1 UNION ALL SELECT * FROM tenk1) u; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-default collation variance | sum | regr_count ----------------------+-----------+------------ 8333541.588539713493 | 199980000 | 40000 @@ -2984,7 +3439,9 @@ FROM (SELECT * FROM tenk1 UNION ALL SELECT * FROM tenk1 UNION ALL SELECT * FROM tenk1 UNION ALL SELECT * FROM tenk1) u; - QUERY PLAN +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-default collation + QUERY PLAN ---------------------------------------------------------------------------------------------------------------------------------- Finalize Aggregate Output: variance((tenk1.unique1)::bigint), avg((tenk1.unique1)::numeric) @@ -3010,6 +3467,8 @@ FROM (SELECT * FROM tenk1 UNION ALL SELECT * FROM tenk1 UNION ALL SELECT * FROM tenk1 UNION ALL SELECT * FROM tenk1) u; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-default collation variance | avg ----------------------+----------------------- 8333541.588539713493 | 4999.5000000000000000 @@ -3066,7 +3525,9 @@ set enable_memoize to off; explain (costs off) select 1 from tenk1 where (hundred, thousand) in (select twothousand, twothousand from onek); - QUERY PLAN +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery + QUERY PLAN --------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) -> Result @@ -3278,13 +3739,12 @@ drop table agg_hash_1; -- drop table agg_hash_2; drop table agg_hash_3; drop table agg_hash_4; --- fix github issue #12061 numsegments of general locus is not -1 on create_minmaxagg_path -/* - * On the arm platform, `Seq Scan` is executed frequently, resulting in unstable output. - */ -set enable_indexonlyscan = off; -explain analyze select count(*) from pg_class, (select count(*) >0 from (select count(*) from pg_class where relname like 't%')x)y; - QUERY PLAN +-- GitHub issue https://github.com/greenplum-db/gpdb/issues/12061 +-- numsegments of the general locus should be -1 on create_minmaxagg_path +explain analyze select count(*) from pg_class, (select count(*) > 0 from (select count(*) from pg_class where relnatts > 8) x) y; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Queries on master-only tables + QUERY PLAN ----------------------------------------------------------------------------------------------------------------------- Aggregate (cost=10000000017.74..10000000017.75 rows=1 width=8) (actual time=0.142..0.142 rows=1 loops=1) -> Nested Loop (cost=10000000000.02..10000000016.44 rows=520 width=0) (actual time=0.016..0.114 rows=686 loops=1) @@ -3297,5 +3757,5 @@ explain analyze select count(*) from pg_class, (select count(*) >0 from (selec Memory used: 128000kB Optimizer: Postgres query optimizer Execution Time: 0.198 ms -(11 rows) +(10 rows) diff --git a/contrib/pax_storage/src/test/regress/expected/alter_distribution_policy_optimizer.out b/contrib/pax_storage/src/test/regress/expected/alter_distribution_policy_optimizer.out index 0ae68140d9f..df30a5d8f05 100644 --- a/contrib/pax_storage/src/test/regress/expected/alter_distribution_policy_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/alter_distribution_policy_optimizer.out @@ -1,7 +1,3 @@ --- start_matchsubs --- m/cluster.c:\d+/ --- s/cluster.c:\d+/cluster.c:###/ --- end_matchsubs -- ALTER TABLE ... SET DISTRIBUTED BY -- This is the main interface for system expansion \set DATA values(1, 2), (2, 3), (3, 4) @@ -175,7 +171,7 @@ ERROR: permission denied: "pg_class" is a system catalog create table atsdb (i int, j text) distributed by (j); insert into atsdb select i, i::text from generate_series(1, 10) i; alter table atsdb set with(appendonly = true); -ERROR: PAX not allow swap relation files for different AM (cluster.c:1487) +ERROR: PAX not allow swap relation files for different AM (cluster.c:1535) select relname, segrelid != 0, reloptions from pg_class, pg_appendonly where pg_class.oid = 'atsdb'::regclass and relid = pg_class.oid; relname | ?column? | reloptions @@ -186,15 +182,15 @@ select * from atsdb; i | j ----+---- 1 | 1 + 9 | 9 + 10 | 10 + 2 | 2 + 3 | 3 4 | 4 7 | 7 - 2 | 2 5 | 5 - 8 | 8 - 3 | 3 6 | 6 - 9 | 9 - 10 | 10 + 8 | 8 (10 rows) drop table atsdb; @@ -209,106 +205,106 @@ alter table atsdb drop column i; select * from atsdb; j | t | n -----+-----+----- - 2 | 3 | 4 - 4 | 5 | 6 + 5 | 6 | 7 6 | 7 | 8 - 8 | 9 | 10 + 9 | 10 | 11 10 | 11 | 12 - 12 | 13 | 14 + 11 | 12 | 13 + 13 | 14 | 15 14 | 15 | 16 - 16 | 17 | 18 - 18 | 19 | 20 - 20 | 21 | 22 - 22 | 23 | 24 - 24 | 25 | 26 - 26 | 27 | 28 + 17 | 18 | 19 + 21 | 22 | 23 + 25 | 26 | 27 28 | 29 | 30 - 30 | 31 | 32 32 | 33 | 34 - 34 | 35 | 36 - 36 | 37 | 38 - 38 | 39 | 40 - 40 | 41 | 42 - 42 | 43 | 44 - 44 | 45 | 46 - 46 | 47 | 48 - 48 | 49 | 50 - 50 | 51 | 52 + 33 | 34 | 35 + 43 | 44 | 45 52 | 53 | 54 - 54 | 55 | 56 56 | 57 | 58 58 | 59 | 60 - 60 | 61 | 62 62 | 63 | 64 - 64 | 65 | 66 - 66 | 67 | 68 - 68 | 69 | 70 - 70 | 71 | 72 - 72 | 73 | 74 - 74 | 75 | 76 - 76 | 77 | 78 - 78 | 79 | 80 - 80 | 81 | 82 + 63 | 64 | 65 + 67 | 68 | 69 + 73 | 74 | 75 82 | 83 | 84 - 84 | 85 | 86 - 86 | 87 | 88 - 88 | 89 | 90 - 90 | 91 | 92 - 92 | 93 | 94 - 94 | 95 | 96 + 85 | 86 | 87 96 | 97 | 98 - 98 | 99 | 100 100 | 101 | 102 + 2 | 3 | 4 3 | 4 | 5 - 5 | 6 | 7 + 4 | 5 | 6 7 | 8 | 9 - 9 | 10 | 11 - 11 | 12 | 13 - 13 | 14 | 15 - 15 | 16 | 17 - 17 | 18 | 19 + 8 | 9 | 10 + 16 | 17 | 18 + 18 | 19 | 20 19 | 20 | 21 - 21 | 22 | 23 - 23 | 24 | 25 - 25 | 26 | 27 + 22 | 23 | 24 + 24 | 25 | 26 27 | 28 | 29 29 | 30 | 31 - 31 | 32 | 33 - 33 | 34 | 35 - 35 | 36 | 37 + 34 | 35 | 36 37 | 38 | 39 39 | 40 | 41 41 | 42 | 43 - 43 | 44 | 45 + 42 | 43 | 44 45 | 46 | 47 - 47 | 48 | 49 - 49 | 50 | 51 51 | 52 | 53 53 | 54 | 55 + 54 | 55 | 56 55 | 56 | 57 - 57 | 58 | 59 59 | 60 | 61 - 61 | 62 | 63 - 63 | 64 | 65 + 60 | 61 | 62 65 | 66 | 67 - 67 | 68 | 69 - 69 | 70 | 71 - 71 | 72 | 73 - 73 | 74 | 75 + 66 | 67 | 68 + 70 | 71 | 72 75 | 76 | 77 77 | 78 | 79 - 79 | 80 | 81 + 80 | 81 | 82 81 | 82 | 83 + 84 | 85 | 86 + 90 | 91 | 92 + 92 | 93 | 94 + 93 | 94 | 95 + 94 | 95 | 96 + 97 | 98 | 99 + 99 | 100 | 101 + 101 | 102 | 103 + 12 | 13 | 14 + 15 | 16 | 17 + 20 | 21 | 22 + 23 | 24 | 25 + 26 | 27 | 28 + 30 | 31 | 32 + 31 | 32 | 33 + 35 | 36 | 37 + 36 | 37 | 38 + 38 | 39 | 40 + 40 | 41 | 42 + 44 | 45 | 46 + 46 | 47 | 48 + 47 | 48 | 49 + 48 | 49 | 50 + 49 | 50 | 51 + 50 | 51 | 52 + 57 | 58 | 59 + 61 | 62 | 63 + 64 | 65 | 66 + 68 | 69 | 70 + 69 | 70 | 71 + 71 | 72 | 73 + 72 | 73 | 74 + 74 | 75 | 76 + 76 | 77 | 78 + 78 | 79 | 80 + 79 | 80 | 81 83 | 84 | 85 - 85 | 86 | 87 + 86 | 87 | 88 87 | 88 | 89 + 88 | 89 | 90 89 | 90 | 91 91 | 92 | 93 - 93 | 94 | 95 95 | 96 | 97 - 97 | 98 | 99 - 99 | 100 | 101 - 101 | 102 | 103 + 98 | 99 | 100 (100 rows) alter table atsdb set distributed by (t); @@ -320,7 +316,7 @@ select * from distcheck where rel = 'atsdb'; alter table atsdb drop column n; alter table atsdb set with(appendonly = true, compresslevel = 3); -ERROR: PAX not allow swap relation files for different AM (cluster.c:1487) +ERROR: PAX not allow swap relation files for different AM (cluster.c:1535) select relname, segrelid != 0, reloptions from pg_class, pg_appendonly where pg_class.oid = 'atsdb'::regclass and relid = pg_class.oid; relname | ?column? | reloptions @@ -337,105 +333,105 @@ select * from atsdb; j | t -----+----- 9 | 10 + 10 | 11 11 | 12 13 | 14 - 15 | 16 - 17 | 18 - 29 | 30 - 31 | 32 + 28 | 29 33 | 34 - 35 | 36 + 58 | 59 + 67 | 68 + 85 | 86 + 8 | 9 + 16 | 17 37 | 38 - 49 | 50 + 45 | 46 51 | 52 - 53 | 54 55 | 56 - 57 | 58 - 69 | 70 - 71 | 72 - 73 | 74 - 75 | 76 - 77 | 78 - 89 | 90 - 91 | 92 + 65 | 66 + 80 | 81 + 81 | 82 93 | 94 - 95 | 96 - 97 | 98 99 | 100 101 | 102 - 2 | 3 - 4 | 5 - 6 | 7 - 8 | 9 + 12 | 13 20 | 21 - 22 | 23 - 24 | 25 26 | 27 - 28 | 29 - 40 | 41 + 30 | 31 + 47 | 48 + 50 | 51 + 57 | 58 + 61 | 62 + 68 | 69 + 78 | 79 + 79 | 80 + 87 | 88 + 88 | 89 + 6 | 7 + 32 | 33 + 43 | 44 + 52 | 53 + 63 | 64 + 82 | 83 + 2 | 3 + 3 | 4 + 18 | 19 + 19 | 20 + 29 | 30 + 39 | 40 42 | 43 + 60 | 61 + 77 | 78 + 84 | 85 + 92 | 93 + 97 | 98 + 15 | 16 + 35 | 36 + 36 | 37 + 40 | 41 44 | 45 46 | 47 - 48 | 49 - 60 | 61 - 62 | 63 + 49 | 50 64 | 65 - 66 | 67 - 68 | 69 - 80 | 81 - 82 | 83 - 84 | 85 + 69 | 70 + 72 | 73 + 83 | 84 86 | 87 - 88 | 89 - 3 | 4 + 91 | 92 + 98 | 99 5 | 6 - 7 | 8 - 19 | 20 + 14 | 15 + 17 | 18 21 | 22 - 23 | 24 25 | 26 + 56 | 57 + 62 | 63 + 73 | 74 + 96 | 97 + 100 | 101 + 4 | 5 + 7 | 8 + 22 | 23 + 24 | 25 27 | 28 - 39 | 40 - 41 | 42 - 43 | 44 - 45 | 46 - 47 | 48 - 59 | 60 - 61 | 62 - 63 | 64 - 65 | 66 - 67 | 68 - 79 | 80 - 81 | 82 - 83 | 84 - 85 | 86 - 87 | 88 - 10 | 11 - 12 | 13 - 14 | 15 - 16 | 17 - 18 | 19 - 30 | 31 - 32 | 33 34 | 35 - 36 | 37 - 38 | 39 - 50 | 51 - 52 | 53 + 41 | 42 + 53 | 54 54 | 55 - 56 | 57 - 58 | 59 + 59 | 60 + 66 | 67 70 | 71 - 72 | 73 - 74 | 75 - 76 | 77 - 78 | 79 + 75 | 76 90 | 91 - 92 | 93 94 | 95 - 96 | 97 - 98 | 99 - 100 | 101 + 23 | 24 + 31 | 32 + 38 | 39 + 48 | 49 + 71 | 72 + 74 | 75 + 76 | 77 + 89 | 90 + 95 | 96 (100 rows) alter table atsdb set distributed by (j); @@ -454,106 +450,106 @@ select relname, segrelid != 0, reloptions from pg_class, pg_appendonly where pg_ select * from atsdb; j | t -----+----- - 14 | 15 - 15 | 16 - 31 | 32 - 33 | 34 - 36 | 37 - 37 | 38 - 48 | 49 - 52 | 53 - 70 | 71 - 84 | 85 - 85 | 86 - 2 | 3 - 16 | 17 - 29 | 30 - 34 | 35 - 49 | 50 - 68 | 69 - 71 | 72 - 83 | 84 + 9 | 10 + 10 | 11 + 11 | 12 13 | 14 - 17 | 18 28 | 29 - 30 | 31 - 35 | 36 - 50 | 51 - 51 | 52 + 33 | 34 + 58 | 59 + 67 | 68 + 85 | 86 + 6 | 7 + 32 | 33 + 43 | 44 + 52 | 53 63 | 64 - 69 | 70 - 72 | 73 - 86 | 87 + 82 | 83 + 5 | 6 + 14 | 15 + 17 | 18 + 21 | 22 + 25 | 26 + 56 | 57 + 62 | 63 + 73 | 74 + 96 | 97 + 100 | 101 + 2 | 3 3 | 4 - 6 | 7 18 | 19 - 20 | 21 + 19 | 20 + 29 | 30 + 39 | 40 42 | 43 - 53 | 54 - 73 | 74 - 76 | 77 + 60 | 61 77 | 78 - 88 | 89 - 90 | 91 - 91 | 92 - 5 | 6 - 19 | 20 - 22 | 23 - 38 | 39 - 40 | 41 - 41 | 42 - 54 | 55 - 55 | 56 - 74 | 75 - 87 | 88 + 84 | 85 + 92 | 93 97 | 98 4 | 5 7 | 8 - 21 | 22 - 39 | 40 - 56 | 57 - 57 | 58 - 75 | 76 - 89 | 90 - 96 | 97 - 12 | 13 - 23 | 24 - 26 | 27 + 22 | 23 + 24 | 25 27 | 28 - 43 | 44 - 58 | 59 - 60 | 61 - 65 | 66 - 82 | 83 - 93 | 94 - 99 | 100 - 8 | 9 - 9 | 10 - 25 | 26 - 32 | 33 - 46 | 47 - 47 | 48 + 34 | 35 + 41 | 42 + 53 | 54 + 54 | 55 59 | 60 - 62 | 63 - 64 | 65 - 78 | 79 + 66 | 67 + 70 | 71 + 75 | 76 + 90 | 91 + 94 | 95 + 8 | 9 + 16 | 17 + 37 | 38 + 45 | 46 + 51 | 52 + 55 | 56 + 65 | 66 80 | 81 81 | 82 - 92 | 93 - 94 | 95 - 95 | 96 - 100 | 101 + 93 | 94 + 99 | 100 101 | 102 - 10 | 11 - 11 | 12 - 24 | 25 - 44 | 45 - 45 | 46 + 12 | 13 + 20 | 21 + 26 | 27 + 30 | 31 + 47 | 48 + 50 | 51 + 57 | 58 61 | 62 - 66 | 67 - 67 | 68 + 68 | 69 + 78 | 79 79 | 80 + 87 | 88 + 88 | 89 + 15 | 16 + 35 | 36 + 36 | 37 + 40 | 41 + 44 | 45 + 46 | 47 + 49 | 50 + 64 | 65 + 69 | 70 + 72 | 73 + 83 | 84 + 86 | 87 + 91 | 92 98 | 99 + 23 | 24 + 31 | 32 + 38 | 39 + 48 | 49 + 71 | 72 + 74 | 75 + 76 | 77 + 89 | 90 + 95 | 96 (100 rows) -- validate parameters @@ -572,15 +568,10 @@ drop table atsdb; create table atsdb (i int, j int, t text, n numeric) distributed by (i, j); insert into atsdb select i, i+1, i+2, i+3 from generate_series(1, 20) i; alter table atsdb drop column i; -NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy +NOTICE: dropping a column that is part of the distribution policy forces a NULL distribution policy select * from atsdb; j | t | n ----+----+---- - 2 | 3 | 4 - 9 | 10 | 11 - 11 | 12 | 13 - 12 | 13 | 14 - 21 | 22 | 23 4 | 5 | 6 7 | 8 | 9 8 | 9 | 10 @@ -596,6 +587,11 @@ select * from atsdb; 6 | 7 | 8 13 | 14 | 15 17 | 18 | 19 + 2 | 3 | 4 + 9 | 10 | 11 + 11 | 12 | 13 + 12 | 13 | 14 + 21 | 22 | 23 (20 rows) select * from distcheck where rel = 'atsdb'; @@ -611,20 +607,10 @@ create table atsdb (i int_new, j int, t text, n numeric) distributed by (i, j); insert into atsdb select i, i+1, i+2, i+3 from generate_series(1, 20) i; drop type int_new cascade; NOTICE: drop cascades to column i of table atsdb -NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy +NOTICE: dropping a column that is part of the distribution policy forces a NULL distribution policy select * from atsdb; j | t | n ----+----+---- - 4 | 5 | 6 - 7 | 8 | 9 - 8 | 9 | 10 - 10 | 11 | 12 - 14 | 15 | 16 - 15 | 16 | 17 - 16 | 17 | 18 - 18 | 19 | 20 - 19 | 20 | 21 - 20 | 21 | 22 3 | 4 | 5 5 | 6 | 7 6 | 7 | 8 @@ -635,6 +621,16 @@ select * from atsdb; 11 | 12 | 13 12 | 13 | 14 21 | 22 | 23 + 4 | 5 | 6 + 7 | 8 | 9 + 8 | 9 | 10 + 10 | 11 | 12 + 14 | 15 | 16 + 15 | 16 | 17 + 16 | 17 | 18 + 18 | 19 | 20 + 19 | 20 | 21 + 20 | 21 | 22 (20 rows) select * from distcheck where rel = 'atsdb'; @@ -741,7 +737,7 @@ select * from atsdb order by 1, 2, 3; drop table atsdb; -- check distribution correctly cascaded for inherited tables create table dropColumnCascade (a int, b int, e int); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table. +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table dropColumnCascadeChild (c int) inherits (dropColumnCascade); NOTICE: table has parent, setting distribution columns to match parent table @@ -749,9 +745,9 @@ create table dropColumnCascadeAnother (d int) inherits (dropColumnCascadeChild); NOTICE: table has parent, setting distribution columns to match parent table insert into dropColumnCascadeAnother select i,i,i from generate_series(1,10)i; alter table dropColumnCascade drop column a; -NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy -NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy -NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy +NOTICE: dropping a column that is part of the distribution policy forces a NULL distribution policy +NOTICE: dropping a column that is part of the distribution policy forces a NULL distribution policy +NOTICE: dropping a column that is part of the distribution policy forces a NULL distribution policy select * from distcheck where rel like 'dropcolumnicascade%'; rel | attname -----+--------- @@ -765,7 +761,7 @@ drop cascades to table dropcolumncascadeanother -- distribution should be set to randomly for base and inherited tables create domain int_new as int; create table dropColumnCascade (a int_new, b int, e int); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table. +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table dropColumnCascadeChild (c int) inherits (dropColumnCascade); NOTICE: table has parent, setting distribution columns to match parent table @@ -777,9 +773,9 @@ NOTICE: drop cascades to 3 other objects DETAIL: drop cascades to column a of table dropcolumncascade drop cascades to column a of table dropcolumncascadechild drop cascades to column a of table dropcolumncascadeanother -NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy -NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy -NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy +NOTICE: dropping a column that is part of the distribution policy forces a NULL distribution policy +NOTICE: dropping a column that is part of the distribution policy forces a NULL distribution policy +NOTICE: dropping a column that is part of the distribution policy forces a NULL distribution policy select * from distcheck where rel like 'dropcolumncascade%'; rel | attname -----+--------- @@ -791,13 +787,13 @@ DETAIL: drop cascades to table dropcolumncascadechild drop cascades to table dropcolumncascadeanother -- Test corner cases in dropping distkey as inherited columns create table p1 (f1 int, f2 int); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'f1' as the Greenplum Database data distribution key for this table. +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'f1' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table c1 (f1 int not null) inherits(p1); NOTICE: table has parent, setting distribution columns to match parent table NOTICE: merging column "f1" with inherited definition alter table p1 drop column f1; -NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy +NOTICE: dropping a column that is part of the distribution policy forces a NULL distribution policy -- only p1 is randomly distributed, c1 is still distributed by c1.f1 select * from distcheck where rel in ('p1', 'c1'); rel | attname @@ -806,7 +802,7 @@ select * from distcheck where rel in ('p1', 'c1'); (1 row) alter table c1 drop column f1; -NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy +NOTICE: dropping a column that is part of the distribution policy forces a NULL distribution policy -- both c1 and p1 randomly distributed select * from distcheck where rel in ('p1', 'c1'); rel | attname @@ -816,12 +812,12 @@ select * from distcheck where rel in ('p1', 'c1'); drop table p1 cascade; NOTICE: drop cascades to table c1 create table p1 (f1 int, f2 int); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'f1' as the Greenplum Database data distribution key for this table. +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'f1' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table c1 () inherits(p1); NOTICE: table has parent, setting distribution columns to match parent table alter table only p1 drop column f1; -NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy +NOTICE: dropping a column that is part of the distribution policy forces a NULL distribution policy -- only p1 is randomly distributed, c1 is still distributed by c1.f1 select * from distcheck where rel in ('p1', 'c1'); rel | attname @@ -835,13 +831,13 @@ NOTICE: drop cascades to table c1 -- distribution should be set to randomly for base and inherited tables create domain int_new as int; create table p1 (f1 int_new, f2 int); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'f1' as the Greenplum Database data distribution key for this table. +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'f1' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table c1 (f1 int_new not null) inherits(p1); NOTICE: table has parent, setting distribution columns to match parent table NOTICE: merging column "f1" with inherited definition create table p1_inh (f1 int_new, f2 int); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'f1' as the Greenplum Database data distribution key for this table. +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'f1' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table c1_inh () inherits(p1_inh); NOTICE: table has parent, setting distribution columns to match parent table @@ -851,10 +847,10 @@ DETAIL: drop cascades to column f1 of table p1 drop cascades to column f1 of table c1 drop cascades to column f1 of table p1_inh drop cascades to column f1 of table c1_inh -NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy -NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy -NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy -NOTICE: dropping a column that is part of the distribution policy forces a random distribution policy +NOTICE: dropping a column that is part of the distribution policy forces a NULL distribution policy +NOTICE: dropping a column that is part of the distribution policy forces a NULL distribution policy +NOTICE: dropping a column that is part of the distribution policy forces a NULL distribution policy +NOTICE: dropping a column that is part of the distribution policy forces a NULL distribution policy -- all above tables set to randomly distributed select * from distcheck where rel in ('p1', 'c1'); rel | attname @@ -917,8 +913,7 @@ drop table test_add_drop_rename_column_change_datatype ; -- should run without error create table atsdb with (appendonly=true) as select * from generate_series(1,1000); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'generate_series' as the Apache Cloudberry data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry. alter table only atsdb set with(reorganize=true) distributed by (generate_series); select count(*) from atsdb; count @@ -1262,16 +1257,16 @@ insert into mpp5738 select i, i+1, i+2, i from generate_series(1, 10) i; select * from mpp5738; a | b | c | d ----+----+----+---- + 2 | 3 | 4 | 2 3 | 4 | 5 | 3 4 | 5 | 6 | 4 - 5 | 6 | 7 | 5 - 6 | 7 | 8 | 6 7 | 8 | 9 | 7 - 1 | 2 | 3 | 1 - 2 | 3 | 4 | 2 8 | 9 | 10 | 8 + 5 | 6 | 7 | 5 + 6 | 7 | 8 | 6 9 | 10 | 11 | 9 10 | 11 | 12 | 10 + 1 | 2 | 3 | 1 (10 rows) alter table mpp5738 alter partition for (1) set with (appendonly=true); @@ -1280,15 +1275,15 @@ select * from mpp5738; a | b | c | d ----+----+----+---- 1 | 2 | 3 | 1 + 5 | 6 | 7 | 5 + 6 | 7 | 8 | 6 + 9 | 10 | 11 | 9 + 10 | 11 | 12 | 10 2 | 3 | 4 | 2 3 | 4 | 5 | 3 4 | 5 | 6 | 4 - 5 | 6 | 7 | 5 - 6 | 7 | 8 | 6 7 | 8 | 9 | 7 8 | 9 | 10 | 8 - 9 | 10 | 11 | 9 - 10 | 11 | 12 | 10 (10 rows) drop table mpp5738; @@ -1326,7 +1321,6 @@ drop table mpp5754; -- MPP-5918 create role atsdb; NOTICE: resource queue required -- using default resource queue "pg_default" --- pax table won't create toast table create table owner_test(i int, toast text) distributed randomly; alter table owner_test owner to atsdb; alter table owner_test set with (reorganize = true) distributed by (i); @@ -1539,7 +1533,7 @@ INSERT INTO tstab VALUES (1, 'foo'); CREATE TABLE alter_table_with_primary_key (a int primary key); ALTER TABLE alter_table_with_primary_key SET DISTRIBUTED RANDOMLY; ERROR: cannot set to DISTRIBUTED RANDOMLY because relation has primary Key -HINT: Drop the primary key first +HINT: Drop the primary key first. CREATE TABLE alter_table_with_unique_index (a int unique); ALTER TABLE alter_table_with_unique_index SET DISTRIBUTED RANDOMLY; ERROR: cannot set to DISTRIBUTED RANDOMLY because relation has unique index @@ -1768,7 +1762,7 @@ DROP TABLE t_reorganize_false; -- Check that AT SET DISTRIBUTED BY cannot be combined with other subcommands -- on the same table CREATE TABLE atsdby_multiple(i int, j int); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Greenplum Database data distribution key for this table. +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. ALTER TABLE atsdby_multiple SET DISTRIBUTED BY(j), ADD COLUMN k int; ERROR: cannot alter distribution with other subcommands for relation "atsdby_multiple" diff --git a/contrib/pax_storage/src/test/regress/expected/autostats.out b/contrib/pax_storage/src/test/regress/expected/autostats.out index 34c0670b6e8..efc61bdaa2d 100644 --- a/contrib/pax_storage/src/test/regress/expected/autostats.out +++ b/contrib/pax_storage/src/test/regress/expected/autostats.out @@ -8,6 +8,7 @@ -- m/^LOG: .*Feature not supported: Queries on master-only tables./ -- m/^LOG: .*Falling back to Postgres-based planner because GPORCA does not support the following feature: Queries on master-only tables./ -- m/^LOG:.*ERROR,"PG exception raised"/ +-- m/^LOG: An exception was encountered during the execution of statement:/ -- end_matchignore set gp_autostats_mode=on_change; set gp_autostats_on_change_threshold=9; diff --git a/contrib/pax_storage/src/test/regress/expected/bfv_aggregate_optimizer.out b/contrib/pax_storage/src/test/regress/expected/bfv_aggregate_optimizer.out index 72f0a78988c..84c073ac20c 100644 --- a/contrib/pax_storage/src/test/regress/expected/bfv_aggregate_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/bfv_aggregate_optimizer.out @@ -1588,18 +1588,16 @@ select array_agg(a order by b desc nulls last) from aggordertest; create temp table mpp14125 as select repeat('a', a) a, a % 10 b from generate_series(1, 100)a; NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry. explain select string_agg(a, '') from mpp14125 group by b; - QUERY PLAN --------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..431.07 rows=10 width=8) - -> GroupAggregate (cost=0.00..431.07 rows=4 width=8) + QUERY PLAN +------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=8) + -> HashAggregate (cost=0.00..431.00 rows=1 width=8) Group Key: b - -> Sort (cost=0.00..431.06 rows=34 width=55) - Sort Key: b - -> Redistribute Motion 3:3 (slice1; segments: 3) (cost=0.00..431.01 rows=34 width=55) - Hash Key: b - -> Seq Scan on mpp14125 (cost=0.00..431.00 rows=34 width=55) - Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0 -(9 rows) + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=12) + Hash Key: b + -> Seq Scan on mpp14125 (cost=0.00..431.00 rows=1 width=12) + Optimizer: GPORCA +(7 rows) -- end MPP-14125 -- Test that integer AVG() aggregate is accurate with large values. We used to @@ -1625,29 +1623,29 @@ insert into int2vectortab values select distinct t from int2vectortab group by distkey, t; t ------- + 22 1 1 2 1 2 3 - 22 (4 rows) select t from int2vectortab union select t from int2vectortab; t ------- - 1 1 2 1 2 3 + 1 22 (4 rows) select count(*) over (partition by t) from int2vectortab; count ------- - 1 1 1 2 2 + 1 (5 rows) select count(distinct t) from int2vectortab; @@ -1735,32 +1733,32 @@ SELECT a.x, b.y, count(*) FROM pagg_tab1 a FULL JOIN pagg_tab2 b ON a.x = b.y GR -> Redistribute Motion 3:3 (slice3; segments: 3) Hash Key: b.y -> Seq Scan on pagg_tab2 b - Optimizer: Postgres query optimizer + Optimizer: GPORCA (13 rows) SELECT a.x, b.y, count(*) FROM pagg_tab1 a FULL JOIN pagg_tab2 b ON a.x = b.y GROUP BY a.x, b.y; x | y | count ----+----+------- - 26 | | 10 - 28 | | 10 - 4 | | 10 - | 15 | 10 - 6 | 6 | 100 - 8 | | 10 - 2 | | 10 - 22 | | 10 - 10 | | 10 - | 9 | 10 - | 21 | 10 - 0 | 0 | 100 - 14 | | 10 + 16 | | 10 12 | 12 | 100 + 24 | 24 | 100 + 20 | | 10 | 3 | 10 - 16 | | 10 - | 27 | 10 18 | 18 | 100 - 20 | | 10 - 24 | 24 | 100 + | 27 | 10 + 14 | | 10 + | 9 | 10 + 0 | 0 | 100 + | 21 | 10 + 10 | | 10 + 22 | | 10 + 2 | | 10 + 4 | | 10 + 8 | | 10 + | 15 | 10 + 28 | | 10 + 26 | | 10 + 6 | 6 | 100 (20 rows) -- @@ -1806,7 +1804,7 @@ select 1, median(col1) from group_by_const group by 1; -> Redistribute Motion 3:3 (slice2; segments: 3) Hash Key: (1) -> Seq Scan on group_by_const - Optimizer: Pivotal Optimizer (GPORCA) + Optimizer: GPORCA (9 rows) select 1, median(col1) from group_by_const group by 1; @@ -1815,6 +1813,387 @@ select 1, median(col1) from group_by_const group by 1; 1 | 500.5 (1 row) +-- Test GROUP BY with a RelabelType +create table tx (c1 text); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +insert into tx values('hello'); +EXPLAIN (COSTS OFF, VERBOSE ON) +SELECT MIN(tx.c1) FROM tx GROUP BY (tx.c1)::VARCHAR; + QUERY PLAN +------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + Output: (min(c1)) + -> HashAggregate + Output: min(c1) + Group Key: ((tx.c1)::character varying) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Output: c1, ((c1)::character varying) + Hash Key: ((c1)::character varying) + -> Seq Scan on bfv_aggregate.tx + Output: c1, c1 + Settings: optimizer = 'on' + Optimizer: GPORCA +(12 rows) + +SELECT MIN(tx.c1) FROM tx GROUP BY (tx.c1)::VARCHAR; + min +------- + hello +(1 row) + +drop table tx; +-- ORCA should pick singlestage-agg plan when multistage-agg guc is true +-- and distribution type is universal/replicated +set optimizer_force_multistage_agg to on; +create table t1_replicated(a int, b int, c float, d float) distributed replicated; +create table t2_replicated(a int, b int) distributed replicated; +explain select distinct b from t1_replicated; + QUERY PLAN +--------------------------------------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) (cost=0.00..431.00 rows=1 width=4) + -> GroupAggregate (cost=0.00..431.00 rows=3 width=4) + Group Key: b + -> Sort (cost=0.00..431.00 rows=3 width=4) + Sort Key: b + -> Seq Scan on t1_replicated (cost=0.00..431.00 rows=3 width=4) + Optimizer: GPORCA +(7 rows) + +explain select sum(a), avg(b) from t1_replicated; + QUERY PLAN +------------------------------------------------------------------------------------- + Finalize Aggregate (cost=0.00..431.00 rows=1 width=16) + -> Gather Motion 1:1 (slice1; segments: 1) (cost=0.00..431.00 rows=1 width=16) + -> Partial Aggregate (cost=0.00..431.00 rows=3 width=16) + -> Seq Scan on t1_replicated (cost=0.00..431.00 rows=3 width=8) + Optimizer: GPORCA +(5 rows) + +explain select count(distinct b) from t1_replicated group by a; + QUERY PLAN +--------------------------------------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) (cost=0.00..431.00 rows=1 width=8) + -> GroupAggregate (cost=0.00..431.00 rows=3 width=8) + Group Key: a + -> Sort (cost=0.00..431.00 rows=3 width=8) + Sort Key: a + -> Seq Scan on t1_replicated (cost=0.00..431.00 rows=3 width=8) + Optimizer: GPORCA +(7 rows) + +explain select a, sum(mc) from (select a, b, max(c) mc from t1_replicated group by a,b) t group by a; + QUERY PLAN +---------------------------------------------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) (cost=0.00..431.00 rows=1 width=12) + -> GroupAggregate (cost=0.00..431.00 rows=3 width=12) + Group Key: a + -> GroupAggregate (cost=0.00..431.00 rows=3 width=12) + Group Key: a, b + -> Sort (cost=0.00..431.00 rows=3 width=16) + Sort Key: a, b + -> Seq Scan on t1_replicated (cost=0.00..431.00 rows=3 width=16) + Optimizer: GPORCA +(9 rows) + +explain SELECT t1.a, sum(c) from t1_replicated as t1 join t2_replicated as t2 on t1.a = t2.a group by t1.a; + QUERY PLAN +------------------------------------------------------------------------------------------ + Gather Motion 1:1 (slice1; segments: 1) (cost=0.00..862.00 rows=1 width=12) + -> HashAggregate (cost=0.00..862.00 rows=3 width=12) + Group Key: t1.a + -> Hash Join (cost=0.00..862.00 rows=3 width=12) + Hash Cond: (t1.a = t2.a) + -> Seq Scan on t1_replicated t1 (cost=0.00..431.00 rows=3 width=12) + -> Hash (cost=431.00..431.00 rows=3 width=4) + -> Seq Scan on t2_replicated t2 (cost=0.00..431.00 rows=3 width=4) + Optimizer: GPORCA +(9 rows) + +explain select count(a) from t1_replicated where c < (select sum(b) from t2_replicated); + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------ + Finalize Aggregate (cost=0.00..1324032.42 rows=1 width=8) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324032.42 rows=1 width=8) + -> Partial Aggregate (cost=0.00..1324032.42 rows=1 width=8) + -> Nested Loop (cost=0.00..1324032.42 rows=1 width=4) + Join Filter: (t1_replicated.c < ((sum(t2_replicated.b)))::double precision) + -> Seq Scan on t1_replicated (cost=0.00..431.00 rows=1 width=12) + -> Materialize (cost=0.00..431.00 rows=1 width=8) + -> Redistribute Motion 1:3 (slice2) (cost=0.00..431.00 rows=1 width=8) + -> Finalize Aggregate (cost=0.00..431.00 rows=1 width=8) + -> Gather Motion 1:1 (slice3; segments: 1) (cost=0.00..431.00 rows=1 width=8) + -> Partial Aggregate (cost=0.00..431.00 rows=3 width=8) + -> Seq Scan on t2_replicated (cost=0.00..431.00 rows=3 width=4) + Optimizer: GPORCA +(13 rows) + +explain SELECT DISTINCT g%10 FROM generate_series(0, 100) g; + QUERY PLAN +----------------------------------------------------------------------------- + HashAggregate (cost=0.00..0.13 rows=1000 width=4) + Group Key: (generate_series % 10) + -> Function Scan on generate_series (cost=0.00..0.00 rows=1000 width=4) + Optimizer: GPORCA +(4 rows) + +explain select count(*) from generate_series(0, 100) g; + QUERY PLAN +----------------------------------------------------------------------------- + Aggregate (cost=0.00..0.00 rows=1 width=8) + -> Function Scan on generate_series (cost=0.00..0.00 rows=1000 width=1) + Optimizer: GPORCA +(3 rows) + +explain select g%10 as c1, sum(g::numeric)as c2, count(*) as c3 from generate_series(1, 99) g group by g%10; + QUERY PLAN +----------------------------------------------------------------------------- + HashAggregate (cost=0.00..0.15 rows=1000 width=20) + Group Key: (generate_series % 10) + -> Function Scan on generate_series (cost=0.00..0.00 rows=1000 width=4) + Optimizer: GPORCA +(4 rows) + +reset optimizer_force_multistage_agg; +-- Eliminate unuseful columns of targetlist in multistage-agg +create table ex1(a int, b int, c int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +create table ex2(a int, b int, c int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +insert into ex1 select i,i,i from generate_series(1, 10) i; +insert into ex2 select i,i,i from generate_series(1, 10) i; +explain (verbose on, costs off) select ex2.b/2, sum(ex1.a) from ex1, (select a, coalesce(b, 1) b from ex2) ex2 where ex1.a = ex2.a group by ex2.b/2; + QUERY PLAN +--------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: (((COALESCE(ex2.b, 1)) / 2)), (sum(ex1.a)) + -> GroupAggregate + Output: (((COALESCE(ex2.b, 1)) / 2)), sum(ex1.a) + Group Key: (((COALESCE(ex2.b, 1)) / 2)) + -> Sort + Output: ex1.a, (((COALESCE(ex2.b, 1)) / 2)) + Sort Key: (((COALESCE(ex2.b, 1)) / 2)) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Output: ex1.a, (((COALESCE(ex2.b, 1)) / 2)) + Hash Key: (((COALESCE(ex2.b, 1)) / 2)) + -> Hash Join + Output: ex1.a, ((COALESCE(ex2.b, 1)) / 2) + Hash Cond: (ex2.a = ex1.a) + -> Seq Scan on bfv_aggregate.ex2 + Output: COALESCE(ex2.b, 1), ex2.a + -> Hash + Output: ex1.a + -> Seq Scan on bfv_aggregate.ex1 + Output: ex1.a + Settings: optimizer = 'on' + Optimizer: GPORCA +(22 rows) + +select ex2.b/2, sum(ex1.a) from ex1, (select a, coalesce(b, 1) b from ex2) ex2 where ex1.a = ex2.a group by ex2.b/2; + ?column? | sum +----------+----- + 2 | 9 + 3 | 13 + 4 | 17 + 0 | 1 + 1 | 5 + 5 | 10 +(6 rows) + +explain (verbose on, costs off) SELECT b/2, sum(b) * (b/2) FROM ex1 GROUP BY b/2; + QUERY PLAN +------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + Output: ((b / 2)), ((sum(b) * ((b / 2)))) + -> GroupAggregate + Output: ((b / 2)), (sum(b) * ((b / 2))) + Group Key: ((ex1.b / 2)) + -> Sort + Output: b, ((b / 2)) + Sort Key: ((ex1.b / 2)) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Output: b, ((b / 2)) + Hash Key: ((b / 2)) + -> Seq Scan on bfv_aggregate.ex1 + Output: b, (b / 2) + Settings: optimizer = 'on' + Optimizer: GPORCA +(15 rows) + +SELECT b/2, sum(b) * (b/2) FROM ex1 GROUP BY b/2; + ?column? | ?column? +----------+---------- + 0 | 0 + 1 | 5 + 2 | 18 + 3 | 39 + 4 | 68 + 5 | 50 +(6 rows) + +-- Test if Motion is placed between the "group by clauses" +drop table if exists t; +create table t(a int, b int, c int) distributed by (a); +insert into t select 1, i, i from generate_series(1, 10)i; +insert into t select 1, i, i from generate_series(1, 10)i; +insert into t select 1, i, i from generate_series(1, 10)i; +insert into t select 1, i, i from generate_series(1, 10)i; +analyze t; +explain (costs off) select count(distinct(b)), gp_segment_id from t group by gp_segment_id; + QUERY PLAN +------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> GroupAggregate + Group Key: gp_segment_id + -> Sort + Sort Key: gp_segment_id + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: gp_segment_id + -> Seq Scan on t + Optimizer: GPORCA +(9 rows) + +select count(distinct(b)), gp_segment_id from t group by gp_segment_id; + count | gp_segment_id +-------+--------------- + 10 | 1 +(1 row) + +-- The cost of multistage agg is higher than hash agg +set optimizer_force_multistage_agg to on; +explain (costs off) select count(distinct(b)), gp_segment_id from t group by gp_segment_id; + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Finalize HashAggregate + Group Key: gp_segment_id + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: gp_segment_id + -> Partial GroupAggregate + Group Key: gp_segment_id + -> Sort + Sort Key: gp_segment_id, b + -> Redistribute Motion 3:3 (slice3; segments: 3) + Hash Key: gp_segment_id, b + -> Seq Scan on t + Optimizer: GPORCA +(13 rows) + +select count(distinct(b)), gp_segment_id from t group by gp_segment_id; + count | gp_segment_id +-------+--------------- + 10 | 1 +(1 row) + +reset optimizer_force_multistage_agg; +drop table t; +-- Test defferral keyword on primary/unique key +-- When the grouping columns include a key, the GbAgg operator can be transformed to a Select, +-- resulting in the dropping of grouping columns. However, it is important to note that if a primary +-- or unique key has the deferral keyword, ORCA should not optimize (drop grouping columns) in such cases. +drop table if exists t1, t2, t3, t4, t5, t6; +NOTICE: table "t2" does not exist, skipping +NOTICE: table "t3" does not exist, skipping +NOTICE: table "t4" does not exist, skipping +NOTICE: table "t5" does not exist, skipping +NOTICE: table "t6" does not exist, skipping +create table t1 (a int, b int, c int, primary key(a, b)); +create table t2 (a int, b int, c int, primary key(a, b) deferrable); +create table t3 (a int, b int, c int, primary key(a, b) deferrable initially deferred); +create table t4 (a int, b int, c int, unique(a, b)); +create table t5 (a int, b int, c int, unique(a, b) deferrable); +create table t6 (a int, b int, c int, unique(a, b) deferrable initially deferred); +explain (costs off) select * from t1 group by a, b, c; + QUERY PLAN +------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on t1 + Optimizer: GPORCA +(3 rows) + +explain (costs off) select * from t2 group by a, b, c; + QUERY PLAN +------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> GroupAggregate + Group Key: a, b, c + -> Sort + Sort Key: a, b, c + -> Seq Scan on t2 + Optimizer: GPORCA +(7 rows) + +explain (costs off) select * from t3 group by a, b, c; + QUERY PLAN +------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> GroupAggregate + Group Key: a, b, c + -> Sort + Sort Key: a, b, c + -> Seq Scan on t3 + Optimizer: GPORCA +(7 rows) + +explain (costs off) select * from t4 group by a, b, c; + QUERY PLAN +------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on t4 + Optimizer: GPORCA +(3 rows) + +explain (costs off) select * from t5 group by a, b, c; + QUERY PLAN +------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> GroupAggregate + Group Key: a, b, c + -> Sort + Sort Key: a, b, c + -> Seq Scan on t5 + Optimizer: GPORCA +(7 rows) + +explain (costs off) select * from t6 group by a, b, c; + QUERY PLAN +------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> GroupAggregate + Group Key: a, b, c + -> Sort + Sort Key: a, b, c + -> Seq Scan on t6 + Optimizer: GPORCA +(7 rows) + +explain (costs off) with cte1 as (select * from t3 group by a, b, c) select * from cte1; + QUERY PLAN +------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> GroupAggregate + Group Key: a, b, c + -> Sort + Sort Key: a, b, c + -> Seq Scan on t3 + Optimizer: GPORCA +(7 rows) + +begin; +insert into t3 values (1, 1, 1), (1, 1, 1), (1, 2, 1), (1, 3, 1), (1, 2, 1); +select * from t3 group by a, b, c; + a | b | c +---+---+--- + 1 | 1 | 1 + 1 | 2 | 1 + 1 | 3 | 1 +(3 rows) + +commit; +ERROR: not implemented yet on pax relations: TupleFetchRowVersion (seg1 127.0.0.1:7003 pid=1465172) +drop table t1, t2, t3, t4, t5, t6; -- CLEANUP set client_min_messages='warning'; drop schema bfv_aggregate cascade; diff --git a/contrib/pax_storage/src/test/regress/expected/bfv_cte.out b/contrib/pax_storage/src/test/regress/expected/bfv_cte.out index 2d2dc5ccb20..ddc12290541 100644 --- a/contrib/pax_storage/src/test/regress/expected/bfv_cte.out +++ b/contrib/pax_storage/src/test/regress/expected/bfv_cte.out @@ -306,6 +306,8 @@ SELECT cup.* FROM SELECT sum(t.b) OVER(PARTITION BY t.a ) AS e FROM (select 1 as a, 2 as b from pg_class limit 1)foo,t ) as cup GROUP BY cup.e; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Queries on master-only tables e --- 2 @@ -342,7 +344,7 @@ select const_a, const_b, sum(n) -- test cte can not be the param for gp_dist_random -- so in set_cte_pathlist we do not neet to check forceDistRandom create table ttt(tc1 int,tc2 int) ; -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'tc1' as the Greenplum Database data distribution key for this table. +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'tc1' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into ttt values(1,1); insert into ttt values(2,2); @@ -403,43 +405,49 @@ SELECT y.a, z.a FROM (SELECT cte1.a, cte1.b FROM cte cte1 JOIN sisc_t2 ON (cte1.a = sisc_t2.d)) y, (SELECT cte2.a, cte2.b FROM cte cte2 JOIN sisc_t2 ON (cte2.a = sisc_t2.d)) z WHERE y.b = z.b - 1; - QUERY PLAN ------------------------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) - -> Hash Join - Hash Cond: (share0_ref2.b = (share0_ref1.b - 1)) - -> Redistribute Motion 3:3 (slice2; segments: 3) - Hash Key: share0_ref2.b - -> Hash Join - Hash Cond: (share0_ref2.a = sisc_t2.d) - -> Shared Scan (share slice:id 2:0) - -> Hash - -> Redistribute Motion 3:3 (slice3; segments: 3) - Hash Key: sisc_t2.d - -> Seq Scan on sisc_t2 - -> Hash - -> Redistribute Motion 3:3 (slice4; segments: 3) - Hash Key: (share0_ref1.b - 1) + -> Sequence + -> Shared Scan (share slice:id 1:0) + -> Seq Scan on sisc_t1 + -> Hash Join + Hash Cond: (share0_ref3.b = (share0_ref2.b - 1)) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: share0_ref3.b -> Hash Join - Hash Cond: (share0_ref1.a = sisc_t2_1.d) - -> Shared Scan (share slice:id 4:0) - -> Seq Scan on sisc_t1 + Hash Cond: (share0_ref3.a = sisc_t2.d) + -> Shared Scan (share slice:id 2:0) -> Hash - -> Redistribute Motion 3:3 (slice5; segments: 3) - Hash Key: sisc_t2_1.d - -> Seq Scan on sisc_t2 sisc_t2_1 - Optimizer: Postgres query optimizer -(24 rows) + -> Redistribute Motion 3:3 (slice3; segments: 3) + Hash Key: sisc_t2.d + -> Seq Scan on sisc_t2 + -> Hash + -> Redistribute Motion 3:3 (slice4; segments: 3) + Hash Key: (share0_ref2.b - 1) + -> Hash Join + Hash Cond: (share0_ref2.a = sisc_t2_1.d) + -> Shared Scan (share slice:id 4:0) + -> Hash + -> Redistribute Motion 3:3 (slice5; segments: 3) + Hash Key: sisc_t2_1.d + -> Seq Scan on sisc_t2 sisc_t2_1 + Optimizer: GPORCA +(26 rows) -- On seg2, introduce a delay in SISC WRITER (slice1) so that the xslice shared state -- which is stored in shared memory is initialized by the SISC READER (slice2 or slice4) select gp_inject_fault('get_shareinput_reference_delay_writer', 'suspend', dbid) from gp_segment_configuration where content = 2 and role = 'p'; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Queries on master-only tables gp_inject_fault ----------------- Success: (1 row) select gp_inject_fault('get_shareinput_reference_done', 'skip', dbid) from gp_segment_configuration where content = 2 and role = 'p'; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Queries on master-only tables gp_inject_fault ----------------- Success: @@ -455,18 +463,24 @@ select gp_inject_fault('get_shareinput_reference_done', 'skip', dbid) from gp_se \! bash -c 'psql -X regression -c "set client_min_messages to log; set enable_parallel=off; set debug_shareinput_xslice to true; set optimizer_enable_motion_broadcast to off; WITH cte AS MATERIALIZED (SELECT * FROM sisc_t1) SELECT y.a, z.a FROM (SELECT cte1.a, cte1.b FROM cte cte1 JOIN sisc_t2 ON (cte1.a = sisc_t2.d)) y, (SELECT cte2.a, cte2.b FROM cte cte2 JOIN sisc_t2 ON (cte2.a = sisc_t2.d)) z WHERE y.b = z.b - 1;" &> /tmp/bfv_cte.out' & -- Wait for both SISC READERs to be initialized and squelched select gp_wait_until_triggered_fault('get_shareinput_reference_done', 1, dbid) from gp_segment_configuration where content = 2 and role = 'p'; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Queries on master-only tables gp_wait_until_triggered_fault ------------------------------- Success: (1 row) select gp_inject_fault('get_shareinput_reference_done', 'reset', dbid) from gp_segment_configuration where content = 2 and role = 'p'; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Queries on master-only tables gp_inject_fault ----------------- Success: (1 row) select gp_inject_fault('get_shareinput_reference_delay_writer', 'reset', dbid) from gp_segment_configuration where content = 2 and role = 'p'; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Queries on master-only tables gp_inject_fault ----------------- Success: @@ -474,6 +488,12 @@ select gp_inject_fault('get_shareinput_reference_delay_writer', 'reset', dbid) f -- Wait for the query to finish select wait_until_query_output_to_file('/tmp/bfv_cte.out'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions wait_until_query_output_to_file --------------------------------- @@ -495,13 +515,13 @@ select wait_until_query_output_to_file('/tmp/bfv_cte.out'); SET SET SET -LOG: SISC (shareid=0, slice=2): initialized xslice state (seg2 slice2 127.0.1.1:7004 pid=1049102) -LOG: SISC READER (shareid=0, slice=2): wrote notify_done (seg2 slice2 127.0.1.1:7004 pid=1049102) -LOG: SISC WRITER (shareid=0, slice=4): initializing because squelched (seg2 slice4 127.0.1.1:7004 pid=1049114) -LOG: SISC WRITER (shareid=0, slice=4): No tuplestore yet, creating tuplestore (seg2 slice4 127.0.1.1:7004 pid=1049114) -LOG: SISC WRITER (shareid=0, slice=4): wrote notify_ready (seg2 slice4 127.0.1.1:7004 pid=1049114) -LOG: SISC WRITER (shareid=0, slice=4): got DONE message from 1 readers (seg2 slice4 127.0.1.1:7004 pid=1049114) -LOG: SISC (shareid=0, slice=4): removed xslice state (seg2 slice4 127.0.1.1:7004 pid=1049114) +LOG: SISC (shareid=0, slice=2): initialized xslice state (seg2 slice2 127.0.0.1:7004 pid=1470401) +LOG: SISC READER (shareid=0, slice=2): wrote notify_done (seg2 slice2 127.0.0.1:7004 pid=1470401) +LOG: SISC READER (shareid=0, slice=4): wrote notify_done (seg2 slice4 127.0.0.1:7004 pid=1470413) +LOG: SISC WRITER (shareid=0, slice=1): No tuplestore yet, creating tuplestore (seg2 slice1 127.0.0.1:7004 pid=1470394) +LOG: SISC WRITER (shareid=0, slice=1): wrote notify_ready (seg2 slice1 127.0.0.1:7004 pid=1470394) +LOG: SISC WRITER (shareid=0, slice=1): got DONE message from 2 readers (seg2 slice1 127.0.0.1:7004 pid=1470394) +LOG: SISC (shareid=0, slice=1): removed xslice state (seg2 slice1 127.0.0.1:7004 pid=1470394) SET a | a ---+--- @@ -510,6 +530,8 @@ SET -- cleanup select gp_inject_fault_infinite('all', 'reset', dbid) from gp_segment_configuration; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Queries on master-only tables gp_inject_fault_infinite -------------------------- Success: @@ -548,28 +570,27 @@ explain (analyze off, costs off, verbose off) with cte1 as ( select *,row_number() over ( partition by i) as rank_desc from rep), cte2 as ( select 'col1' tblnm,count(*) diffcnt from ( select * from cte1) x) select * from ( select 'col1' tblnm from cte1) a left join cte2 c on a.tblnm=c.tblnm; - QUERY PLAN ------------------------------------------------------------------ - Hash Left Join - Hash Cond: (('col1'::text) = ('col1'::text)) - -> Gather Motion 1:1 (slice1; segments: 1) - -> Subquery Scan on cte1 + QUERY PLAN +---------------------------------------------------------------- + Sequence + -> Shared Scan (share slice:id 0:0) + -> Gather Motion 1:1 (slice1; segments: 1) -> WindowAgg Partition By: rep.i -> Sort Sort Key: rep.i -> Seq Scan on rep - -> Hash - -> Aggregate - -> Gather Motion 1:1 (slice2; segments: 1) - -> Subquery Scan on cte1_1 - -> WindowAgg - Partition By: rep_1.i - -> Sort - Sort Key: rep_1.i - -> Seq Scan on rep rep_1 - Optimizer: Postgres-based planner -(19 rows) + -> Hash Left Join + Hash Cond: (('col1'::text) = ('col1'::text)) + -> Result + -> Shared Scan (share slice:id 0:0) + -> Hash + -> Result + Filter: (('col1'::text) = 'col1'::text) + -> Aggregate + -> Shared Scan (share slice:id 0:0) + Optimizer: GPORCA +(18 rows) with cte1 as ( select *,row_number() over ( partition by i) as rank_desc from rep), cte2 as ( select 'col1' tblnm,count(*) diffcnt from ( select * from cte1) x) @@ -595,23 +616,23 @@ with t1 as (select * from rep1), t2 as (select id, rc from rep2 where ri = 101991) select p.*from t1 p join t2 r on p.isc = r.rc join t2 r1 on p.iscd = r1.rc; - QUERY PLAN ------------------------------------------------------------------- + QUERY PLAN +----------------------------------------------------------------------------- Gather Motion 1:1 (slice1; segments: 1) - -> Hash Join - Hash Cond: ((rep1.iscd)::text = (r1.rc)::text) + -> Sequence + -> Shared Scan (share slice:id 1:1) + -> Seq Scan on rep2 + Filter: (ri = '101991'::numeric) -> Hash Join - Hash Cond: ((rep1.isc)::text = (r.rc)::text) - -> Seq Scan on rep1 + Hash Cond: ((rep1.isc)::text = (share1_ref2.rc)::text) + -> Hash Join + Hash Cond: ((rep1.iscd)::text = (share1_ref3.rc)::text) + -> Seq Scan on rep1 + -> Hash + -> Shared Scan (share slice:id 1:1) -> Hash - -> Subquery Scan on r - -> Seq Scan on rep2 - Filter: (ri = '101991'::numeric) - -> Hash - -> Subquery Scan on r1 - -> Seq Scan on rep2 rep2_1 - Filter: (ri = '101991'::numeric) - Optimizer: Postgres-based planner + -> Shared Scan (share slice:id 1:1) + Optimizer: GPORCA (15 rows) with @@ -635,7 +656,11 @@ NOTICE: table "t1" does not exist, skipping NOTICE: table "t2" does not exist, skipping NOTICE: table "rep" does not exist, skipping create table t1 (a int, b int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table t2 (a int, b int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table rep (a int, b int) distributed replicated; insert into t1 select 1, generate_series(1,10); insert into t2 select 1, generate_series(1,20); @@ -648,26 +673,40 @@ case when (t2.b in (1,2)) then (select rep_cte.a from rep_cte) when (t2.b in (1,2)) then (select rep_cte.a from rep_cte) end as rep_cte_a from t1_cte join t2 on t1_cte.b = t2.b; - QUERY PLAN ------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) - InitPlan 1 (returns $0) (slice4) - -> Gather Motion 1:1 (slice5; segments: 1) - -> Seq Scan on rep - InitPlan 2 (returns $1) (slice6) - -> Gather Motion 1:1 (slice7; segments: 1) - -> Seq Scan on rep rep_1 - -> Hash Join - Hash Cond: (t1.b = t2.b) - -> Redistribute Motion 3:3 (slice2; segments: 3) - Hash Key: t1.b - -> Seq Scan on t1 - -> Hash - -> Redistribute Motion 3:3 (slice3; segments: 3) - Hash Key: t2.b - -> Seq Scan on t2 - Optimizer: Postgres-based planner -(17 rows) + -> Sequence + -> Shared Scan (share slice:id 1:1) + -> Result + One-Time Filter: (gp_execution_segment() = 0) + -> Seq Scan on rep + -> Nested Loop Left Join + Join Filter: true + -> Nested Loop Left Join + Join Filter: true + -> Hash Join + Hash Cond: (t1.b = t2.b) + -> Seq Scan on t1 + -> Hash + -> Broadcast Motion 3:3 (slice6; segments: 3) + -> Seq Scan on t2 + -> Assert + Assert Cond: ((row_number() OVER (?)) = 1) + -> Materialize + -> Broadcast Motion 1:3 (slice4) + -> WindowAgg + -> Gather Motion 3:1 (slice5; segments: 3) + -> Shared Scan (share slice:id 5:1) + -> Assert + Assert Cond: ((row_number() OVER (?)) = 1) + -> Materialize + -> Broadcast Motion 1:3 (slice2) + -> WindowAgg + -> Gather Motion 3:1 (slice3; segments: 3) + -> Shared Scan (share slice:id 3:1) + Optimizer: GPORCA +(31 rows) with t1_cte as (select b from t1), rep_cte as (select a from rep) @@ -679,11 +718,11 @@ from t1_cte join t2 on t1_cte.b = t2.b; rep_cte_a ----------- 1 + 1 - 1 @@ -700,6 +739,8 @@ drop table t1, t2, rep; -- SIGSEGV during DXL to PlStmt translation. -- create table a_table(a smallint); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create view cte_view as with t1 as (select a from a_table) select t1.a from t1 @@ -708,33 +749,57 @@ create view cte_view as -- to pick a plan that would crash before this fix set allow_system_table_mods=true; update pg_class set relpages = 1::int, reltuples = 12.0::real where relname = 'a_table'; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Queries on master-only tables reset allow_system_table_mods; explain select * from a_table join cte_view on a_table.a = (select a from cte_view) where cte_view.a = 2024; - QUERY PLAN -------------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=10000000003.50..10000000005.68 rows=3 width=4) - InitPlan 2 (returns $1) (slice5) - -> Gather Motion 3:1 (slice6; segments: 3) (cost=1.20..2.30 rows=3 width=2) - -> Seq Scan on a_table a_table_3 (cost=0.00..1.05 rows=1 width=2) - Filter: (a = $0) - InitPlan 1 (returns $0) (slice7) - -> Gather Motion 3:1 (slice8; segments: 3) (cost=0.00..1.20 rows=12 width=2) - -> Seq Scan on a_table a_table_2 (cost=0.00..1.04 rows=4 width=2) - -> Nested Loop (cost=10000000001.20..10000000003.34 rows=1 width=4) - -> Seq Scan on a_table (cost=0.00..1.05 rows=1 width=2) - Filter: (a = $1) - -> Materialize (cost=1.20..2.28 rows=1 width=2) - -> Redistribute Motion 1:3 (slice2; segments: 1) (cost=1.20..2.28 rows=1 width=2) - Hash Key: $1 - -> Result (cost=1.20..2.25 rows=1 width=2) - One-Time Filter: ($2 = 2024) - InitPlan 3 (returns $2) (slice3) - -> Gather Motion 3:1 (slice4; segments: 3) (cost=0.00..1.20 rows=12 width=2) - -> Seq Scan on a_table a_table_4 (cost=0.00..1.04 rows=4 width=2) - -> Seq Scan on a_table a_table_1 (cost=0.00..1.05 rows=1 width=2) - Filter: (a = 2024) - Optimizer: Postgres-based planner -(22 rows) +NOTICE: One or more columns in the following table(s) do not have statistics: a_table +HINT: For non-partitioned tables, run analyze (). For partitioned tables, run analyze rootpartition (). See log for columns missing statistics. + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..2261454598.54 rows=1920 width=4) + -> Nested Loop (cost=0.00..2261454598.51 rows=640 width=4) + Join Filter: true + -> Result (cost=0.00..2206727.74 rows=134 width=2) + Filter: (a_table_1.a = ((SubPlan 1))) + -> Seq Scan on a_table a_table_1 (cost=0.00..2206727.72 rows=334 width=4) + SubPlan 1 + -> Materialize (cost=0.00..1293.00 rows=12 width=2) + -> Broadcast Motion 3:3 (slice5; segments: 3) (cost=0.00..1293.00 rows=12 width=2) + -> Sequence (cost=0.00..1293.00 rows=4 width=2) + -> Shared Scan (share slice:id 5:1) (cost=0.00..431.00 rows=4 width=1) + -> Seq Scan on a_table a_table_2 (cost=0.00..431.00 rows=4 width=2) + -> Hash Join (cost=0.00..862.00 rows=4 width=2) + Hash Cond: (share1_ref3.a = share1_ref2.a) + -> Redistribute Motion 1:3 (slice6) (cost=0.00..431.00 rows=1 width=2) + Hash Key: share1_ref3.a + -> Assert (cost=0.00..431.00 rows=1 width=2) + Assert Cond: ((row_number() OVER (?)) = 1) + -> WindowAgg (cost=0.00..431.00 rows=12 width=10) + -> Gather Motion 3:1 (slice7; segments: 3) (cost=0.00..431.00 rows=12 width=2) + -> Shared Scan (share slice:id 7:1) (cost=0.00..431.00 rows=4 width=2) + -> Hash (cost=431.00..431.00 rows=4 width=2) + -> Shared Scan (share slice:id 5:1) (cost=0.00..431.00 rows=4 width=2) + -> Materialize (cost=0.00..1293.00 rows=5 width=2) + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..1293.00 rows=5 width=2) + -> Sequence (cost=0.00..1293.00 rows=2 width=2) + -> Shared Scan (share slice:id 2:0) (cost=0.00..431.00 rows=4 width=1) + -> Seq Scan on a_table (cost=0.00..431.00 rows=4 width=2) + -> Hash Join (cost=0.00..862.00 rows=2 width=2) + Hash Cond: (share0_ref3.a = share0_ref2.a) + -> Redistribute Motion 1:3 (slice3) (cost=0.00..431.00 rows=1 width=2) + Hash Key: share0_ref3.a + -> Assert (cost=0.00..431.00 rows=1 width=2) + Assert Cond: ((row_number() OVER (?)) = 1) + -> WindowAgg (cost=0.00..431.00 rows=12 width=10) + -> Gather Motion 3:1 (slice4; segments: 3) (cost=0.00..431.00 rows=12 width=2) + -> Shared Scan (share slice:id 4:0) (cost=0.00..431.00 rows=4 width=2) + -> Hash (cost=431.00..431.00 rows=2 width=2) + -> Result (cost=0.00..431.00 rows=2 width=2) + Filter: (share0_ref2.a = 2024) + -> Shared Scan (share slice:id 2:0) (cost=0.00..431.00 rows=4 width=2) + Optimizer: GPORCA +(42 rows) -- CTE tests with outer references. Ensure Orca produces an inlined plan in these cases rather than falling back to planner drop table if exists foo; @@ -747,22 +812,22 @@ insert into foo values (2); insert into jazz values (2); analyze foo,jazz; explain (COSTS OFF) select ((with cte as (select * from jazz) select 1 from cte cte1, cte cte2 where foo.a = 2)) as t FROM foo; - QUERY PLAN ------------------------------------------------------------------------------------ + QUERY PLAN +----------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) -> Seq Scan on foo SubPlan 1 -> Result - One-Time Filter: (foo.a = 2) - -> Nested Loop + -> Result + One-Time Filter: (foo.a = 2) -> Materialize -> Broadcast Motion 3:3 (slice2; segments: 3) - -> Seq Scan on jazz - -> Materialize - -> Materialize - -> Broadcast Motion 3:3 (slice3; segments: 3) - -> Seq Scan on jazz jazz_1 - Optimizer: Postgres-based planner + -> Nested Loop + Join Filter: true + -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Seq Scan on jazz jazz_1 + -> Seq Scan on jazz + Optimizer: GPORCA (14 rows) select ((with cte as (select * from jazz) select 1 from cte cte1, cte cte2 where foo.a = 2)) as t FROM foo; @@ -773,21 +838,21 @@ select ((with cte as (select * from jazz) select 1 from cte cte1, cte cte2 where -- outer ref in limit explain (COSTS OFF) select ((with cte as (select * from jazz) select 1 from cte cte1, cte cte2 limit foo.a)) as t FROM foo; - QUERY PLAN ------------------------------------------------------------------------------------ + QUERY PLAN +----------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) -> Seq Scan on foo SubPlan 1 - -> Limit - -> Nested Loop + -> Result + -> Limit -> Materialize -> Broadcast Motion 3:3 (slice2; segments: 3) - -> Seq Scan on jazz - -> Materialize - -> Materialize - -> Broadcast Motion 3:3 (slice3; segments: 3) - -> Seq Scan on jazz jazz_1 - Optimizer: Postgres-based planner + -> Nested Loop + Join Filter: true + -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Seq Scan on jazz jazz_1 + -> Seq Scan on jazz + Optimizer: GPORCA (13 rows) select ((with cte as (select * from jazz) select 1 from cte cte1, cte cte2 limit foo.a)) as t FROM foo; diff --git a/contrib/pax_storage/src/test/regress/expected/bfv_dd_types_optimizer.out b/contrib/pax_storage/src/test/regress/expected/bfv_dd_types_optimizer.out index c8780686015..25c7af6c306 100644 --- a/contrib/pax_storage/src/test/regress/expected/bfv_dd_types_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/bfv_dd_types_optimizer.out @@ -89,7 +89,7 @@ INFO: Distributed transaction command 'Distributed Commit (one-phase)' to SINGL insert into direct_test_type_bpchar values('abs'); INFO: (slice 0) Dispatch command to SINGLE content INFO: Distributed transaction command 'Distributed Commit (one-phase)' to SINGLE content -insert into direct_test_type_bytea values('greenplum'); +insert into direct_test_type_bytea values('cloudberry'); INFO: (slice 0) Dispatch command to SINGLE content INFO: Distributed transaction command 'Distributed Commit (one-phase)' to SINGLE content insert into direct_test_type_cidr values('68.44.55.111'); @@ -161,11 +161,11 @@ INFO: (slice 1) Dispatch command to SINGLE content abs (1 row) -select * from direct_test_type_bytea where x = 'greenplum'; +select * from direct_test_type_bytea where x = 'cloudberry'; INFO: (slice 1) Dispatch command to SINGLE content - x ----------------------- - \x677265656e706c756d + x +------------------------ + \x636c6f75646265727279 (1 row) -- TODO: this currently not directly dispatched (AGL-1246) diff --git a/contrib/pax_storage/src/test/regress/expected/bfv_index_optimizer.out b/contrib/pax_storage/src/test/regress/expected/bfv_index_optimizer.out index 83ddf72d69e..c0eb0a2b87d 100644 --- a/contrib/pax_storage/src/test/regress/expected/bfv_index_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/bfv_index_optimizer.out @@ -680,12 +680,12 @@ SET optimizer_enable_tablescan=off; SET optimizer_enable_indexscan=off; SET optimizer_enable_indexonlyscan=on; EXPLAIN SELECT c, a FROM table_with_reversed_index WHERE a > 5; - QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=10000000000.17..10000001173.28 rows=16267 width=36) - -> Index Only Scan using table_with_reversed_index_c_a_idx on table_with_reversed_index (cost=10000000000.17..10000000956.39 rows=5422 width=36) + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..6.01 rows=1 width=12) + -> Index Only Scan using table_with_reversed_index_c_a_idx on table_with_reversed_index (cost=0.00..6.01 rows=1 width=12) Index Cond: (a > 5) - Optimizer: Postgres query optimizer + Optimizer: GPORCA (4 rows) SELECT c, a FROM table_with_reversed_index WHERE a > 5; @@ -699,3 +699,750 @@ RESET enable_bitmapscan; RESET optimizer_enable_tablescan; RESET optimizer_enable_indexscan; RESET optimizer_enable_indexonlyscan; +-- +-- Test Hash indexes +-- +CREATE TABLE hash_tbl (a int, b int) DISTRIBUTED BY(a); +INSERT INTO hash_tbl select i,i FROM generate_series(1, 100)i; +ANALYZE hash_tbl; +CREATE INDEX hash_idx1 ON hash_tbl USING hash(b); +-- Now check the results by turning on indexscan +SET enable_seqscan = ON; +SET enable_indexscan = ON; +SET enable_bitmapscan = OFF; +SET optimizer_enable_tablescan =ON; +SET optimizer_enable_indexscan = ON; +SET optimizer_enable_bitmapscan = OFF; +EXPLAIN (COSTS OFF) +SELECT * FROM hash_tbl WHERE b=3; + QUERY PLAN +------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on hash_tbl + Filter: (b = 3) + Optimizer: GPORCA +(4 rows) + +SELECT * FROM hash_tbl WHERE b=3; + a | b +---+--- + 3 | 3 +(1 row) + +EXPLAIN (COSTS OFF) +SELECT * FROM hash_tbl WHERE b=3 and a=3; + QUERY PLAN +------------------------------------------ + Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on hash_tbl + Filter: ((b = 3) AND (a = 3)) + Optimizer: GPORCA +(4 rows) + +SELECT * FROM hash_tbl WHERE b=3 and a=3; + a | b +---+--- + 3 | 3 +(1 row) + +EXPLAIN (COSTS OFF) +SELECT * FROM hash_tbl WHERE b=3 or b=5; + QUERY PLAN +------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on hash_tbl + Filter: ((b = 3) OR (b = 5)) + Optimizer: GPORCA +(4 rows) + +SELECT * FROM hash_tbl WHERE b=3 or b=5; + a | b +---+--- + 3 | 3 + 5 | 5 +(2 rows) + +-- Now check the results by turning on bitmapscan +SET enable_seqscan = OFF; +SET enable_indexscan = OFF; +SET enable_bitmapscan = ON; +SET optimizer_enable_tablescan =OFF; +SET optimizer_enable_indexscan = OFF; +SET optimizer_enable_bitmapscan = ON; +EXPLAIN (COSTS OFF) +SELECT * FROM hash_tbl WHERE b=3; + QUERY PLAN +-------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Bitmap Heap Scan on hash_tbl + Recheck Cond: (b = 3) + -> Bitmap Index Scan on hash_idx1 + Index Cond: (b = 3) + Optimizer: GPORCA +(6 rows) + +SELECT * FROM hash_tbl WHERE b=3; + a | b +---+--- + 3 | 3 +(1 row) + +EXPLAIN (COSTS OFF) +SELECT * FROM hash_tbl WHERE b=3 and a=3; + QUERY PLAN +-------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) + -> Bitmap Heap Scan on hash_tbl + Recheck Cond: (b = 3) + Filter: (a = 3) + -> Bitmap Index Scan on hash_idx1 + Index Cond: (b = 3) + Optimizer: GPORCA +(7 rows) + +SELECT * FROM hash_tbl WHERE b=3 and a=3; + a | b +---+--- + 3 | 3 +(1 row) + +EXPLAIN (COSTS OFF) +SELECT * FROM hash_tbl WHERE b=3 or b=5; + QUERY PLAN +-------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Bitmap Heap Scan on hash_tbl + Recheck Cond: ((b = 3) OR (b = 5)) + -> BitmapOr + -> Bitmap Index Scan on hash_idx1 + Index Cond: (b = 3) + -> Bitmap Index Scan on hash_idx1 + Index Cond: (b = 5) + Optimizer: GPORCA +(9 rows) + +SELECT * FROM hash_tbl WHERE b=3 or b=5; + a | b +---+--- + 3 | 3 + 5 | 5 +(2 rows) + +DROP INDEX hash_idx1; +DROP TABLE hash_tbl; +RESET enable_seqscan; +RESET enable_indexscan; +RESET enable_bitmapscan; +RESET optimizer_enable_tablescan; +RESET optimizer_enable_indexscan; +RESET optimizer_enable_bitmapscan; +-- Test Hash indexes with AO tables +CREATE TABLE hash_tbl_ao (a int, b int) WITH (appendonly = true) DISTRIBUTED BY(a); +INSERT INTO hash_tbl_ao select i,i FROM generate_series(1, 100)i; +ANALYZE hash_tbl_ao; +CREATE INDEX hash_idx2 ON hash_tbl_ao USING hash(b); +-- get results for comparison purposes +EXPLAIN (COSTS OFF) +SELECT * FROM hash_tbl_ao WHERE b=3; + QUERY PLAN +-------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Bitmap Heap Scan on hash_tbl_ao + Recheck Cond: (b = 3) + -> Bitmap Index Scan on hash_idx2 + Index Cond: (b = 3) + Optimizer: GPORCA +(6 rows) + +SELECT * FROM hash_tbl_ao WHERE b=3; + a | b +---+--- + 3 | 3 +(1 row) + +EXPLAIN (COSTS OFF) +SELECT * FROM hash_tbl_ao WHERE b=3 and a=3; + QUERY PLAN +-------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) + -> Bitmap Heap Scan on hash_tbl_ao + Recheck Cond: (b = 3) + Filter: (a = 3) + -> Bitmap Index Scan on hash_idx2 + Index Cond: (b = 3) + Optimizer: GPORCA +(7 rows) + +SELECT * FROM hash_tbl_ao WHERE b=3 and a=3; + a | b +---+--- + 3 | 3 +(1 row) + +EXPLAIN (COSTS OFF) +SELECT * FROM hash_tbl_ao WHERE b=3 or b=5; + QUERY PLAN +------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on hash_tbl_ao + Filter: ((b = 3) OR (b = 5)) + Optimizer: GPORCA +(4 rows) + +SELECT * FROM hash_tbl_ao WHERE b=3 or b=5; + a | b +---+--- + 3 | 3 + 5 | 5 +(2 rows) + +-- Now check the results by turning off seqscan/tablescan +SET enable_seqscan = OFF; +SET optimizer_enable_tablescan =OFF; +EXPLAIN (COSTS OFF) +SELECT * FROM hash_tbl_ao WHERE b=3; + QUERY PLAN +-------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Bitmap Heap Scan on hash_tbl_ao + Recheck Cond: (b = 3) + -> Bitmap Index Scan on hash_idx2 + Index Cond: (b = 3) + Optimizer: GPORCA +(6 rows) + +EXPLAIN (COSTS OFF) +SELECT * FROM hash_tbl_ao WHERE b=3 and a=3; + QUERY PLAN +-------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) + -> Bitmap Heap Scan on hash_tbl_ao + Recheck Cond: (b = 3) + Filter: (a = 3) + -> Bitmap Index Scan on hash_idx2 + Index Cond: (b = 3) + Optimizer: GPORCA +(7 rows) + +EXPLAIN (COSTS OFF) +SELECT * FROM hash_tbl_ao WHERE b=3 or b=5; + QUERY PLAN +-------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Bitmap Heap Scan on hash_tbl_ao + Recheck Cond: ((b = 3) OR (b = 5)) + -> BitmapOr + -> Bitmap Index Scan on hash_idx2 + Index Cond: (b = 3) + -> Bitmap Index Scan on hash_idx2 + Index Cond: (b = 5) + Optimizer: GPORCA +(9 rows) + +DROP INDEX hash_idx2; +DROP TABLE hash_tbl_ao; +RESET enable_seqscan; +RESET optimizer_enable_tablescan; +-- Test hash indexes with partition table +CREATE TABLE hash_prt_tbl (a int, b int) DISTRIBUTED BY(a) PARTITION BY RANGE(a) +(PARTITION p1 START (1) END (500) INCLUSIVE, +PARTITION p2 START(501) END (1000) INCLUSIVE); +INSERT INTO hash_prt_tbl select i,i FROM generate_series(1, 1000)i; +ANALYZE hash_prt_tbl; +CREATE INDEX hash_idx3 ON hash_prt_tbl USING hash(b); +-- Now check the results by turning off dynamictablescan/seqscan +SET enable_seqscan = OFF; +SET optimizer_enable_dynamictablescan =OFF; +EXPLAIN (COSTS OFF) +SELECT * FROM hash_prt_tbl WHERE b=3; + QUERY PLAN +---------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Bitmap Heap Scan on hash_prt_tbl + Number of partitions to scan: 2 (out of 2) + Recheck Cond: (b = 3) + Filter: (b = 3) + -> Dynamic Bitmap Index Scan on hash_idx3 + Index Cond: (b = 3) + Optimizer: GPORCA +(8 rows) + +EXPLAIN (COSTS OFF) +SELECT * FROM hash_prt_tbl WHERE b=3 and a=3; + QUERY PLAN +---------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) + -> Dynamic Bitmap Heap Scan on hash_prt_tbl + Number of partitions to scan: 1 (out of 2) + Recheck Cond: (b = 3) + Filter: ((b = 3) AND (a = 3)) + -> Dynamic Bitmap Index Scan on hash_idx3 + Index Cond: (b = 3) + Optimizer: GPORCA +(8 rows) + +EXPLAIN (COSTS OFF) +SELECT * FROM hash_prt_tbl WHERE b=3 or b=5; + QUERY PLAN +---------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Bitmap Heap Scan on hash_prt_tbl + Number of partitions to scan: 2 (out of 2) + Recheck Cond: ((b = 3) OR (b = 5)) + Filter: ((b = 3) OR (b = 5)) + -> BitmapOr + -> Dynamic Bitmap Index Scan on hash_idx3 + Index Cond: (b = 3) + -> Dynamic Bitmap Index Scan on hash_idx3 + Index Cond: (b = 5) + Optimizer: GPORCA +(11 rows) + +DROP INDEX hash_idx3; +DROP TABLE hash_prt_tbl; +RESET enable_seqscan; +RESET optimizer_enable_dynamictablescan; +-- +-- Test ORCA generates Bitmap/IndexScan alternative for ScalarArrayOpExpr ANY only +-- +CREATE TABLE bitmap_alt (id int, bitmap_idx_col int, btree_idx_col int, hash_idx_col int); +CREATE INDEX bitmap_alt_idx1 on bitmap_alt using bitmap(bitmap_idx_col); +CREATE INDEX bitmap_alt_idx2 on bitmap_alt using btree(btree_idx_col); +CREATE INDEX bitmap_alt_idx3 on bitmap_alt using hash(hash_idx_col); +INSERT INTO bitmap_alt SELECT i, i, i, i from generate_series(1,10)i; +ANALYZE bitmap_alt; +-- ORCA should generate bitmap index scan plans for the following +EXPLAIN (COSTS OFF) +SELECT * FROM bitmap_alt WHERE bitmap_idx_col IN (3, 5); + QUERY PLAN +----------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Bitmap Heap Scan on bitmap_alt + Recheck Cond: (bitmap_idx_col = ANY ('{3,5}'::integer[])) + -> Bitmap Index Scan on bitmap_alt_idx1 + Index Cond: (bitmap_idx_col = ANY ('{3,5}'::integer[])) + Optimizer: GPORCA +(6 rows) + +SELECT * FROM bitmap_alt WHERE bitmap_idx_col IN (3, 5); + id | bitmap_idx_col | btree_idx_col | hash_idx_col +----+----------------+---------------+-------------- + 3 | 3 | 3 | 3 + 5 | 5 | 5 | 5 +(2 rows) + +EXPLAIN (COSTS OFF) +SELECT * FROM bitmap_alt WHERE btree_idx_col IN (3, 5); + QUERY PLAN +------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on bitmap_alt + Filter: (btree_idx_col = ANY ('{3,5}'::integer[])) + Optimizer: GPORCA +(4 rows) + +SELECT * FROM bitmap_alt WHERE btree_idx_col IN (3, 5); + id | bitmap_idx_col | btree_idx_col | hash_idx_col +----+----------------+---------------+-------------- + 3 | 3 | 3 | 3 + 5 | 5 | 5 | 5 +(2 rows) + +EXPLAIN (COSTS OFF) +SELECT * FROM bitmap_alt WHERE hash_idx_col IN (3, 5); + QUERY PLAN +--------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Bitmap Heap Scan on bitmap_alt + Recheck Cond: (hash_idx_col = ANY ('{3,5}'::integer[])) + -> Bitmap Index Scan on bitmap_alt_idx3 + Index Cond: (hash_idx_col = ANY ('{3,5}'::integer[])) + Optimizer: GPORCA +(6 rows) + +SELECT * FROM bitmap_alt WHERE hash_idx_col IN (3, 5); + id | bitmap_idx_col | btree_idx_col | hash_idx_col +----+----------------+---------------+-------------- + 3 | 3 | 3 | 3 + 5 | 5 | 5 | 5 +(2 rows) + +-- ORCA should generate seq scan plans for the following +EXPLAIN (COSTS OFF) +SELECT * FROM bitmap_alt WHERE bitmap_idx_col=ALL(ARRAY[3, 5]); + QUERY PLAN +------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on bitmap_alt + Filter: (bitmap_idx_col = ALL ('{3,5}'::integer[])) + Optimizer: GPORCA +(4 rows) + +SELECT * FROM bitmap_alt WHERE bitmap_idx_col=ALL(ARRAY[3, 5]); + id | bitmap_idx_col | btree_idx_col | hash_idx_col +----+----------------+---------------+-------------- +(0 rows) + +EXPLAIN (COSTS OFF) +SELECT * FROM bitmap_alt WHERE btree_idx_col=ALL(ARRAY[3, 5]); + QUERY PLAN +------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on bitmap_alt + Filter: (btree_idx_col = ALL ('{3,5}'::integer[])) + Optimizer: GPORCA +(4 rows) + +SELECT * FROM bitmap_alt WHERE btree_idx_col=ALL(ARRAY[3, 5]); + id | bitmap_idx_col | btree_idx_col | hash_idx_col +----+----------------+---------------+-------------- +(0 rows) + +EXPLAIN (COSTS OFF) +SELECT * FROM bitmap_alt WHERE hash_idx_col=ALL(ARRAY[3, 5]); + QUERY PLAN +----------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on bitmap_alt + Filter: (hash_idx_col = ALL ('{3,5}'::integer[])) + Optimizer: GPORCA +(4 rows) + +SELECT * FROM bitmap_alt WHERE hash_idx_col=ALL(ARRAY[3, 5]); + id | bitmap_idx_col | btree_idx_col | hash_idx_col +----+----------------+---------------+-------------- +(0 rows) + +-- +-- Test ORCA considers ScalarArrayOp in indexqual for partitioned table +-- with multikey indexes only when predicate key is the first index key +-- (similar test for non-partitioned tables in create_index) +-- +CREATE TABLE pt_with_multikey_index (a int, key1 char(6), key2 char(1)) +PARTITION BY list(key2) +(PARTITION p1 VALUES ('R'), PARTITION p2 VALUES ('G'), DEFAULT PARTITION other); +CREATE INDEX multikey_idx on pt_with_multikey_index (key1, key2); +INSERT INTO pt_with_multikey_index SELECT i, 'KEY'||i, 'R' from generate_series(1,500)i; +INSERT INTO pt_with_multikey_index SELECT i, 'KEY'||i, 'G' from generate_series(1,500)i; +INSERT INTO pt_with_multikey_index SELECT i, 'KEY'||i, 'B' from generate_series(1,500)i; +explain (costs off) +SELECT key1 FROM pt_with_multikey_index +WHERE key1 IN ('KEY55', 'KEY65', 'KEY5') +ORDER BY key1; + QUERY PLAN +------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: key1 + -> Sort + Sort Key: key1 + -> Dynamic Index Only Scan on multikey_idx on pt_with_multikey_index + Index Cond: (key1 = ANY ('{KEY55,KEY65,KEY5}'::bpchar[])) + Number of partitions to scan: 3 (out of 3) + Optimizer: GPORCA +(8 rows) + +SELECT key1 FROM pt_with_multikey_index +WHERE key1 IN ('KEY55', 'KEY65', 'KEY5') +ORDER BY key1; + key1 +-------- + KEY5 + KEY5 + KEY5 + KEY55 + KEY55 + KEY55 + KEY65 + KEY65 + KEY65 +(9 rows) + +EXPLAIN (costs off) +SELECT * FROM pt_with_multikey_index +WHERE key1 = 'KEY55' AND key2 IN ('R', 'G') +ORDER BY key2; + QUERY PLAN +--------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: key2 + -> Sort + Sort Key: key2 + -> Dynamic Seq Scan on pt_with_multikey_index + Number of partitions to scan: 2 (out of 3) + Filter: ((key1 = 'KEY55'::bpchar) AND (key2 = ANY ('{R,G}'::bpchar[]))) + Optimizer: GPORCA +(8 rows) + +SELECT * FROM pt_with_multikey_index +WHERE key1 = 'KEY55' AND key2 IN ('R', 'G') +ORDER BY key2; + a | key1 | key2 +----+--------+------ + 55 | KEY55 | G + 55 | KEY55 | R +(2 rows) + +EXPLAIN (costs off) +SELECT * FROM pt_with_multikey_index +WHERE key1 IN ('KEY55', 'KEY65') AND key2 = 'R' +ORDER BY key1; + QUERY PLAN +------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: key1 + -> Sort + Sort Key: key1 + -> Dynamic Seq Scan on pt_with_multikey_index + Number of partitions to scan: 1 (out of 3) + Filter: ((key1 = ANY ('{KEY55,KEY65}'::bpchar[])) AND (key2 = 'R'::bpchar)) + Optimizer: GPORCA +(8 rows) + +SELECT * FROM pt_with_multikey_index +WHERE key1 IN ('KEY55', 'KEY65') AND key2 = 'R' +ORDER BY key1; + a | key1 | key2 +----+--------+------ + 55 | KEY55 | R + 65 | KEY65 | R +(2 rows) + +-- +-- Enable the index only scan in append only table. +-- Note: expect ORCA to use seq scan rather than index only scan like planner, +-- because ORCA hasn't yet implemented index only scan for AO/CO tables. +-- +CREATE TABLE bfv_index_only_ao(a int, b int) WITH (appendonly =true); +CREATE INDEX bfv_index_only_ao_a_b on bfv_index_only_ao(a) include (b); +insert into bfv_index_only_ao select i,i from generate_series(1, 10000) i; +explain select count(*) from bfv_index_only_ao where a < 100; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------- + Aggregate (cost=0.00..6.01 rows=1 width=8) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..6.01 rows=1 width=1) + -> Index Only Scan using bfv_index_only_ao_a_b on bfv_index_only_ao (cost=0.00..6.01 rows=1 width=1) + Index Cond: (a < 100) + Optimizer: GPORCA +(5 rows) + +select count(*) from bfv_index_only_ao where a < 100; + count +------- + 99 +(1 row) + +explain select count(*) from bfv_index_only_ao where a < 1000; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------- + Aggregate (cost=0.00..6.01 rows=1 width=8) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..6.01 rows=1 width=1) + -> Index Only Scan using bfv_index_only_ao_a_b on bfv_index_only_ao (cost=0.00..6.01 rows=1 width=1) + Index Cond: (a < 1000) + Optimizer: GPORCA +(5 rows) + +select count(*) from bfv_index_only_ao where a < 1000; + count +------- + 999 +(1 row) + +CREATE TABLE bfv_index_only_aocs(a int, b int) WITH (appendonly =true, orientation=column); +CREATE INDEX bfv_index_only_aocs_a_b on bfv_index_only_aocs(a) include (b); +insert into bfv_index_only_aocs select i,i from generate_series(1, 10000) i; +explain select count(*) from bfv_index_only_aocs where a < 100; + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------- + Aggregate (cost=0.00..6.01 rows=1 width=8) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..6.01 rows=1 width=1) + -> Index Only Scan using bfv_index_only_aocs_a_b on bfv_index_only_aocs (cost=0.00..6.01 rows=1 width=1) + Index Cond: (a < 100) + Optimizer: GPORCA +(5 rows) + +select count(*) from bfv_index_only_aocs where a < 100; + count +------- + 99 +(1 row) + +explain select count(*) from bfv_index_only_aocs where a < 1000; + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------- + Aggregate (cost=0.00..6.01 rows=1 width=8) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..6.01 rows=1 width=1) + -> Index Only Scan using bfv_index_only_aocs_a_b on bfv_index_only_aocs (cost=0.00..6.01 rows=1 width=1) + Index Cond: (a < 1000) + Optimizer: GPORCA +(5 rows) + +select count(*) from bfv_index_only_aocs where a < 1000; + count +------- + 999 +(1 row) + +-- The following tests are to verify a fix that allows ORCA to +-- choose the bitmap index scan alternative when the predicate +-- is in the form of `value operator cast(column)`. The fix +-- converts the scalar comparison expression to the more common +-- form of `cast(column) operator value` in the preprocessor. +-- Each test includes two queries. One query's predicate has +-- the column on the left side, and the other has the column +-- on the right side. We expect the two queries to generate +-- identical plans with bitmap index scan. +-- Index only scan will probably be selected once index only +-- scan in enabled for AO tables in ORCA. To prevent retain +-- the bitmap scan alternative, turn off index only scan. +set optimizer_enable_indexonlyscan=off; +-- Test AO table +-- Index scan is disabled in AO table, so bitmap scan is the +-- most performant +create table ao_tbl ( + path_hash character varying(10) +) with (appendonly='true'); +create index ao_idx on ao_tbl using btree (path_hash); +insert into ao_tbl select 'abc' from generate_series(1,20) i; +analyze ao_tbl; +-- identical plans +explain select * from ao_tbl where path_hash = 'ABC'; + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 1:1 (slice1; segments: 1) (cost=0.00..387.96 rows=1 width=4) + -> Bitmap Heap Scan on ao_tbl (cost=0.00..387.96 rows=1 width=4) + Recheck Cond: ((path_hash)::text = 'ABC'::text) + -> Bitmap Index Scan on ao_idx (cost=0.00..0.00 rows=0 width=0) + Index Cond: ((path_hash)::text = 'ABC'::text) + Optimizer: GPORCA +(6 rows) + +explain select * from ao_tbl where 'ABC' = path_hash; + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 1:1 (slice1; segments: 1) (cost=0.00..387.96 rows=1 width=4) + -> Bitmap Heap Scan on ao_tbl (cost=0.00..387.96 rows=1 width=4) + Recheck Cond: ((path_hash)::text = 'ABC'::text) + -> Bitmap Index Scan on ao_idx (cost=0.00..0.00 rows=0 width=0) + Index Cond: ((path_hash)::text = 'ABC'::text) + Optimizer: GPORCA +(6 rows) + +-- Test AO partition table +-- Dynamic index scan is disabled in AO table, so dynamic bitmap +-- scan is the most performant +create table part_tbl ( + path_hash character varying(10) +) partition by list(path_hash) + (partition pics values('a') , + default partition other with (appendonly='true')); +create index part_idx on part_tbl using btree (path_hash); +insert into part_tbl select 'abc' from generate_series(1,20) i; +analyze part_tbl; +-- identical plans +explain select * from part_tbl where path_hash = 'ABC'; + QUERY PLAN +----------------------------------------------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) (cost=0.00..6.00 rows=1 width=4) + -> Dynamic Index Only Scan on part_idx on part_tbl (cost=0.00..6.00 rows=1 width=4) + Index Cond: (path_hash = 'ABC'::text) + Number of partitions to scan: 1 (out of 2) + Optimizer: GPORCA +(5 rows) + +explain select * from part_tbl where 'ABC' = path_hash; + QUERY PLAN +----------------------------------------------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) (cost=0.00..6.00 rows=1 width=4) + -> Dynamic Index Only Scan on part_idx on part_tbl (cost=0.00..6.00 rows=1 width=4) + Index Cond: (path_hash = 'ABC'::text) + Number of partitions to scan: 1 (out of 2) + Optimizer: GPORCA +(5 rows) + +-- Test table indexed on two columns +-- Two indices allow ORCA to generate the bitmap scan alternative +create table two_idx_tbl (x varchar(10), y varchar(10)); +create index x_idx on two_idx_tbl using btree (x); +create index y_idx on two_idx_tbl using btree (y); +insert into two_idx_tbl select 'aa', 'bb' from generate_series(1,10000) i; +analyze two_idx_tbl; +-- encourage bitmap scan by discouraging index scan +set optimizer_enable_indexscan=off; +-- identical plans +explain select * from two_idx_tbl where x = 'cc' or y = 'dd'; + QUERY PLAN +-------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.06 rows=3 width=6) + -> Bitmap Heap Scan on two_idx_tbl (cost=0.00..431.06 rows=1 width=6) + Recheck Cond: (((x)::text = 'cc'::text) OR ((y)::text = 'dd'::text)) + -> BitmapOr (cost=0.00..0.00 rows=0 width=0) + -> Bitmap Index Scan on x_idx (cost=0.00..0.00 rows=0 width=0) + Index Cond: ((x)::text = 'cc'::text) + -> Bitmap Index Scan on y_idx (cost=0.00..0.00 rows=0 width=0) + Index Cond: ((y)::text = 'dd'::text) + Optimizer: GPORCA +(9 rows) + +explain select * from two_idx_tbl where 'cc' = x or 'dd' = y; + QUERY PLAN +-------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.06 rows=3 width=6) + -> Bitmap Heap Scan on two_idx_tbl (cost=0.00..431.06 rows=1 width=6) + Recheck Cond: (((x)::text = 'cc'::text) OR ((y)::text = 'dd'::text)) + -> BitmapOr (cost=0.00..0.00 rows=0 width=0) + -> Bitmap Index Scan on x_idx (cost=0.00..0.00 rows=0 width=0) + Index Cond: ((x)::text = 'cc'::text) + -> Bitmap Index Scan on y_idx (cost=0.00..0.00 rows=0 width=0) + Index Cond: ((y)::text = 'dd'::text) + Optimizer: GPORCA +(9 rows) + +RESET optimizer_enable_indexscan; +RESET optimizer_enable_indexonlyscan; +RESET enable_indexonlyscan; +RESET seq_page_cost; +-- Test IndexNLJoin on IndexOnlyScan in ORCA (both heap and AOCS table) +create table index_only_join_test (a int, b int) distributed by (a); +create table index_only_join_test_aocs (a int, b int) with (appendonly='true') distributed by (a); +create index index_only_join_test_a_idx on index_only_join_test(a); +create index index_only_join_test_b_idx on index_only_join_test(b) include (a); +create index index_only_join_test_aocs_a_idx on index_only_join_test_aocs(a); +create index index_only_join_test_aocs_b_idx on index_only_join_test_aocs(b) include (a); +insert into index_only_join_test select i,i from generate_series(1, 100)i; +insert into index_only_join_test_aocs select i,i from generate_series(1, 100)i; +analyze index_only_join_test; +analyze index_only_join_test_aocs; +set enable_nestloop to on; +set enable_seqscan to off; +set optimizer_enable_indexscan to off; +explain select t1.a from index_only_join_test t1, index_only_join_test t2 where t1.a = t2.a and t1.b < 10; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..60.65 rows=10 width=4) + -> Nested Loop (cost=0.00..60.65 rows=4 width=4) + Join Filter: true + -> Index Only Scan using index_only_join_test_b_idx on index_only_join_test t1 (cost=0.00..6.05 rows=4 width=4) + Index Cond: (b < 10) + -> Index Only Scan using index_only_join_test_a_idx on index_only_join_test t2 (cost=0.00..54.60 rows=1 width=1) + Index Cond: (a = t1.a) + Optimizer: GPORCA +(8 rows) + +reset optimizer_enable_indexscan; +explain select t1.a from index_only_join_test_aocs t1, index_only_join_test_aocs t2 where t1.a = t2.a and t1.b < 10; + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..60.65 rows=10 width=4) + -> Nested Loop (cost=0.00..60.65 rows=4 width=4) + Join Filter: true + -> Index Only Scan using index_only_join_test_aocs_b_idx on index_only_join_test_aocs t1 (cost=0.00..6.05 rows=4 width=4) + Index Cond: (b < 10) + -> Index Only Scan using index_only_join_test_aocs_a_idx on index_only_join_test_aocs t2 (cost=0.00..54.60 rows=1 width=1) + Index Cond: (a = t1.a) + Optimizer: GPORCA +(8 rows) + +reset enable_nestloop; +reset enable_seqscan; +drop table index_only_join_test; +drop table index_only_join_test_aocs; diff --git a/contrib/pax_storage/src/test/regress/expected/bfv_joins_optimizer.out b/contrib/pax_storage/src/test/regress/expected/bfv_joins_optimizer.out index cc84f9983ff..4527dab51ce 100644 --- a/contrib/pax_storage/src/test/regress/expected/bfv_joins_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/bfv_joins_optimizer.out @@ -11,13 +11,16 @@ create table y (a int, b int, c int); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into y (select * from x); -CREATE TABLE t1 (a int, b int); +CREATE TABLE t1 (a int, b int, c int not null); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE t2 (a int, b int); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. -INSERT INTO t1 VALUES (1,1),(2,1),(3,NULL); +CREATE TABLE t3 (a int not null, b int, c int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +INSERT INTO t1 VALUES (1,1,1),(2,1,2),(3,NULL,3); INSERT INTO t2 VALUES (2,3); CREATE FUNCTION func_x(x int) RETURNS int AS $$ BEGIN @@ -618,38 +621,347 @@ SELECT * from x left join y on True where func_x(y.a) > 0; (100 rows) SELECT * FROM t1 LEFT OUTER JOIN t2 ON t1.a = t2.a WHERE t1.b IS DISTINCT FROM t2.b; - a | b | a | b ----+---+---+--- - 1 | 1 | | - 2 | 1 | 2 | 3 + a | b | c | a | b +---+---+---+---+--- + 1 | 1 | 1 | | + 2 | 1 | 2 | 2 | 3 (2 rows) SELECT * FROM t1 LEFT OUTER JOIN t2 ON t1.a = t2.a WHERE t1.b IS DISTINCT FROM NULL; - a | b | a | b ----+---+---+--- - 1 | 1 | | - 2 | 1 | 2 | 3 + a | b | c | a | b +---+---+---+---+--- + 1 | 1 | 1 | | + 2 | 1 | 2 | 2 | 3 (2 rows) SELECT * FROM t1 LEFT OUTER JOIN t2 ON t1.a = t2.a WHERE t2.b IS DISTINCT FROM NULL; - a | b | a | b ----+---+---+--- - 2 | 1 | 2 | 3 + a | b | c | a | b +---+---+---+---+--- + 2 | 1 | 2 | 2 | 3 (1 row) SELECT * FROM t1 LEFT OUTER JOIN t2 ON t1.a = t2.a WHERE t2.b IS NOT DISTINCT FROM NULL; - a | b | a | b ----+---+---+--- - 1 | 1 | | - 3 | | | + a | b | c | a | b +---+---+---+---+--- + 1 | 1 | 1 | | + 3 | | 3 | | (2 rows) SELECT * FROM t1 LEFT OUTER JOIN t2 ON t1.a = t2.a WHERE t1.b IS NOT DISTINCT FROM NULL; - a | b | a | b ----+---+---+--- - 3 | | | + a | b | c | a | b +---+---+---+---+--- + 3 | | 3 | | (1 row) +--- Tests for LOJ with single predicate uses columns of outer child only +explain select t1.* from t1 left outer join t3 on t1.b=1; + QUERY PLAN +--------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324032.10 rows=1 width=12) + -> Nested Loop Left Join (cost=0.00..1324032.10 rows=1 width=12) + Join Filter: (t1.b = 1) + -> Seq Scan on t1 (cost=0.00..431.00 rows=1 width=12) + -> Materialize (cost=0.00..431.00 rows=1 width=1) + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=1) + -> Seq Scan on t3 (cost=0.00..431.00 rows=1 width=1) + Optimizer: GPORCA +(8 rows) + +select t1.* from t1 left outer join t3 on t1.b=1; + a | b | c +---+---+--- + 1 | 1 | 1 + 2 | 1 | 2 + 3 | | 3 +(3 rows) + +explain select t1.* from t1 left outer join t3 on t1.c=1; + QUERY PLAN +--------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324032.10 rows=1 width=12) + -> Nested Loop Left Join (cost=0.00..1324032.10 rows=1 width=12) + Join Filter: (t1.c = 1) + -> Seq Scan on t1 (cost=0.00..431.00 rows=1 width=12) + -> Materialize (cost=0.00..431.00 rows=1 width=1) + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=1) + -> Seq Scan on t3 (cost=0.00..431.00 rows=1 width=1) + Optimizer: GPORCA +(8 rows) + +select t1.* from t1 left outer join t3 on t1.c=1; + a | b | c +---+---+--- + 1 | 1 | 1 + 2 | 1 | 2 + 3 | | 3 +(3 rows) + +--- Tests for LOJ with null-filtering on self check conditions. +--- make sure that we dont optimize the equality checks of inner table of LOJ. +explain SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t3.a = t3.a IS NULL; + QUERY PLAN +--------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324032.37 rows=1 width=4) + -> Result (cost=0.00..1324032.37 rows=1 width=4) + Filter: ((t3.a = t3.a) IS NULL) + -> Nested Loop Left Join (cost=0.00..1324032.37 rows=1 width=8) + Join Filter: true + -> Seq Scan on t1 (cost=0.00..431.00 rows=1 width=4) + -> Materialize (cost=0.00..431.00 rows=1 width=4) + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=4) + -> Seq Scan on t3 (cost=0.00..431.00 rows=1 width=4) + Filter: (b > a) + Optimizer: GPORCA +(11 rows) + +SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t3.a = t3.a IS NULL; + c +--- + 1 + 2 + 3 +(3 rows) + +explain SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t1.c = t1.c IS NULL; + QUERY PLAN +--------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324032.14 rows=2 width=4) + -> Nested Loop Left Join (cost=0.00..1324032.14 rows=1 width=4) + Join Filter: true + -> Seq Scan on t1 (cost=0.00..431.00 rows=1 width=4) + Filter: (true IS NULL) + -> Materialize (cost=0.00..431.00 rows=1 width=1) + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=1) + -> Seq Scan on t3 (cost=0.00..431.00 rows=1 width=1) + Filter: (b > a) + Optimizer: GPORCA +(10 rows) + +SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t1.c = t1.c IS NULL; + c +--- +(0 rows) + +explain SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t3.a = t3.a IS NULL and t3.b=2; + QUERY PLAN +--------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324032.14 rows=1 width=4) + -> Nested Loop (cost=0.00..1324032.14 rows=1 width=4) + Join Filter: true + -> Seq Scan on t1 (cost=0.00..431.00 rows=1 width=4) + -> Materialize (cost=0.00..431.00 rows=1 width=1) + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=1) + -> Seq Scan on t3 (cost=0.00..431.00 rows=1 width=1) + Filter: ((true IS NULL) AND (b = 2) AND (b > a)) + Optimizer: GPORCA +(9 rows) + +SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t3.a = t3.a IS NULL and t3.a=2; + c +--- +(0 rows) + +explain SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t3.a = t3.a IS NULL and t1.b=1; + QUERY PLAN +--------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324032.38 rows=1 width=4) + -> Result (cost=0.00..1324032.38 rows=1 width=4) + Filter: ((t3.a = t3.a) IS NULL) + -> Nested Loop Left Join (cost=0.00..1324032.38 rows=1 width=8) + Join Filter: true + -> Seq Scan on t1 (cost=0.00..431.00 rows=1 width=4) + Filter: (b = 1) + -> Materialize (cost=0.00..431.00 rows=1 width=4) + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=4) + -> Seq Scan on t3 (cost=0.00..431.00 rows=1 width=4) + Filter: (b > a) + Optimizer: GPORCA +(12 rows) + +SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t3.a = t3.a IS NULL and t1.b=1; + c +--- + 2 + 1 +(2 rows) + +explain SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t3.a = t3.a IS NULL or t3.a is NULL; + QUERY PLAN +--------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324032.37 rows=1 width=4) + -> Result (cost=0.00..1324032.37 rows=1 width=4) + Filter: (((t3.a = t3.a) IS NULL) OR (t3.a IS NULL)) + -> Nested Loop Left Join (cost=0.00..1324032.37 rows=1 width=8) + Join Filter: true + -> Seq Scan on t1 (cost=0.00..431.00 rows=1 width=4) + -> Materialize (cost=0.00..431.00 rows=1 width=4) + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=4) + -> Seq Scan on t3 (cost=0.00..431.00 rows=1 width=4) + Filter: (b > a) + Optimizer: GPORCA +(11 rows) + +SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t3.a = t3.a IS NULL or t3.a is NULL; + c +--- + 1 + 2 + 3 +(3 rows) + +explain SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t3.a = t3.a IS NULL or t3.b=2; + QUERY PLAN +--------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324032.61 rows=1 width=4) + -> Result (cost=0.00..1324032.61 rows=1 width=4) + Filter: (((t3.a = t3.a) IS NULL) OR (t3.b = 2)) + -> Nested Loop Left Join (cost=0.00..1324032.61 rows=1 width=12) + Join Filter: true + -> Seq Scan on t1 (cost=0.00..431.00 rows=1 width=4) + -> Materialize (cost=0.00..431.00 rows=1 width=8) + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=8) + -> Seq Scan on t3 (cost=0.00..431.00 rows=1 width=8) + Filter: (b > a) + Optimizer: GPORCA +(11 rows) + +SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t3.a = t3.a IS NULL or t3.b=2; + c +--- + 1 + 2 + 3 +(3 rows) + +explain SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t3.a = t3.a IS NULL or t1.a=1; + QUERY PLAN +--------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324032.48 rows=1 width=4) + -> Result (cost=0.00..1324032.48 rows=1 width=4) + Filter: (((t3.a = t3.a) IS NULL) OR (t1.a = 1)) + -> Nested Loop Left Join (cost=0.00..1324032.48 rows=1 width=12) + Join Filter: true + -> Seq Scan on t1 (cost=0.00..431.00 rows=1 width=8) + -> Materialize (cost=0.00..431.00 rows=1 width=4) + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=4) + -> Seq Scan on t3 (cost=0.00..431.00 rows=1 width=4) + Filter: (b > a) + Optimizer: GPORCA +(11 rows) + +SELECT t1.c FROM t1 LEFT OUTER JOIN t3 ON t3.b > t3.a WHERE t3.a = t3.a IS NULL or t1.a=1; + c +--- + 1 + 2 + 3 +(3 rows) + +explain SELECT t.c FROM (select t1.*, t1.a+t1.b as cc from t1)t LEFT OUTER JOIN t3 ON t.cc = t.cc IS NULL; + QUERY PLAN +--------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324032.13 rows=1 width=4) + -> Nested Loop Left Join (cost=0.00..1324032.13 rows=1 width=4) + Join Filter: ((((t1.a + t1.b)) = ((t1.a + t1.b))) IS NULL) + -> Seq Scan on t1 (cost=0.00..431.00 rows=1 width=12) + -> Materialize (cost=0.00..431.00 rows=1 width=1) + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=1) + -> Seq Scan on t3 (cost=0.00..431.00 rows=1 width=1) + Optimizer: GPORCA +(8 rows) + +SELECT t.c FROM (select t1.*, t1.a+t1.b as cc from t1)t LEFT OUTER JOIN t3 ON t.cc = t.cc IS NULL; + c +--- + 1 + 2 + 3 +(3 rows) + +explain SELECT t.c FROM (select t1.*, t1.a+t1.b as cc from t1)t LEFT OUTER JOIN t3 ON t3.a > t3.b where t.cc = t.cc IS NULL; + QUERY PLAN +--------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324032.19 rows=2 width=4) + -> Nested Loop Left Join (cost=0.00..1324032.19 rows=1 width=4) + Join Filter: true + -> Result (cost=0.00..431.00 rows=1 width=4) + Filter: ((((t1.a + t1.b)) = ((t1.a + t1.b))) IS NULL) + -> Seq Scan on t1 (cost=0.00..431.00 rows=1 width=12) + -> Materialize (cost=0.00..431.00 rows=1 width=1) + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=1) + -> Seq Scan on t3 (cost=0.00..431.00 rows=1 width=1) + Filter: (a > b) + Optimizer: GPORCA +(11 rows) + +SELECT t.c FROM (select t1.*, t1.a+t1.b as cc from t1)t LEFT OUTER JOIN t3 ON t3.a > t3.b where t.cc = t.cc IS NULL; + c +--- + 3 +(1 row) + +explain SELECT t1.c FROM t1 LEFT OUTER JOIN (select t3.*, t3.a+t3.b as cc from t3)t ON t.cc = t.cc IS NULL; + QUERY PLAN +--------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324032.10 rows=2 width=4) + -> Nested Loop Left Join (cost=0.00..1324032.10 rows=1 width=4) + Join Filter: true + -> Seq Scan on t1 (cost=0.00..431.00 rows=1 width=4) + -> Materialize (cost=0.00..431.00 rows=1 width=1) + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=1) + -> Result (cost=0.00..431.00 rows=1 width=1) + Filter: ((((t3.a + t3.b)) = ((t3.a + t3.b))) IS NULL) + -> Seq Scan on t3 (cost=0.00..431.00 rows=1 width=8) + Optimizer: GPORCA +(10 rows) + +SELECT t1.c FROM t1 LEFT OUTER JOIN (select t3.*, t3.a+t3.b as cc from t3)t ON t.cc = t.cc IS NULL; + c +--- + 1 + 2 + 3 +(3 rows) + +explain SELECT t1.c FROM t1 LEFT OUTER JOIN (select t3.*, t3.a+t3.b as cc from t3)t ON t.b > t.a WHERE t.cc = t.cc IS NULL; + QUERY PLAN +--------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324032.22 rows=1 width=4) + -> Result (cost=0.00..1324032.22 rows=1 width=4) + Filter: ((((t3.a + t3.b)) = ((t3.a + t3.b))) IS NULL) + -> Nested Loop Left Join (cost=0.00..1324032.22 rows=1 width=8) + Join Filter: true + -> Seq Scan on t1 (cost=0.00..431.00 rows=1 width=4) + -> Materialize (cost=0.00..431.00 rows=1 width=4) + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=4) + -> Seq Scan on t3 (cost=0.00..431.00 rows=1 width=8) + Filter: (b > a) + Optimizer: GPORCA +(11 rows) + +SELECT t1.c FROM t1 LEFT OUTER JOIN (select t3.*, t3.a+t3.b as cc from t3)t ON t.b > t.a WHERE t.cc = t.cc IS NULL; + c +--- + 1 + 2 + 3 +(3 rows) + +-- Test for eliminating self check condition in subquery +explain SELECT * FROM t1 LEFT JOIN (select t3.b from t3 where t3.a + < t3.a) AS t ON t1.a = t.b; + QUERY PLAN +------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=2 width=16) + -> Hash Left Join (cost=0.00..431.00 rows=1 width=16) + Hash Cond: (a = (NULL::integer)) + -> Seq Scan on t1 (cost=0.00..431.00 rows=1 width=12) + -> Hash (cost=0.00..0.00 rows=0 width=4) + -> Result (cost=0.00..0.00 rows=0 width=4) + One-Time Filter: false + Optimizer: GPORCA +(8 rows) + -- Test for unexpected NLJ qual -- explain select 1 as mrs_t1 where 1 <= ALL (select x from z); @@ -3214,6 +3526,7 @@ explain (costs off) select * from b, lateral (select * from a, c where b.i = a.i --------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) -> Nested Loop + Join Filter: ((a.i + b.i) = c.j) -> Broadcast Motion 3:3 (slice2; segments: 3) -> Seq Scan on b -> Materialize @@ -3225,7 +3538,7 @@ explain (costs off) select * from b, lateral (select * from a, c where b.i = a.i -> Index Only Scan using c_j_i_idx on c Index Cond: (j = (a.i + b.i)) Optimizer: Postgres query optimizer -(13 rows) +(14 rows) select * from b, lateral (select * from a, c where b.i = a.i and (a.i + b.i) = c.j) as ac; i | i | i | j @@ -3347,6 +3660,561 @@ EXPLAIN SELECT a, b FROM gp_float1 JOIN gp_float2 ON a = c AND b = float8 '3.0' Optimizer: Postgres query optimizer (10 rows) +-- Testing optimizer_enable_nljoin +SET optimizer_enable_hashjoin=off; +SET optimizer_enable_nljoin=off; +EXPLAIN SELECT * FROM t1 JOIN t2 ON t1.a=t2.a; + QUERY PLAN +------------------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) (cost=618.25..205952.92 rows=6707190 width=20) + -> Hash Join (cost=618.25..116523.72 rows=2235730 width=20) + Hash Cond: (t2.a = t1.a) + -> Seq Scan on t2 (cost=0.00..321.00 rows=28700 width=8) + -> Hash (cost=293.67..293.67 rows=25967 width=12) + -> Seq Scan on t1 (cost=0.00..293.67 rows=25967 width=12) + Optimizer: Postgres query optimizer +(7 rows) + +SET optimizer_enable_nljoin=on; +EXPLAIN SELECT * FROM t1 JOIN t2 ON t1.a=t2.a; + QUERY PLAN +----------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324032.37 rows=1 width=20) + -> Nested Loop (cost=0.00..1324032.37 rows=1 width=20) + Join Filter: (t1.a = t2.a) + -> Seq Scan on t1 (cost=0.00..431.00 rows=1 width=12) + -> Seq Scan on t2 (cost=0.00..431.00 rows=1 width=8) + Optimizer: GPORCA +(6 rows) + +RESET optimizer_enable_hashjoin; +RESET optimizer_enable_nljoin; +-- Test hashed distribution spec derivation and -- +-- motion enforcement given INDF join condition -- +-- Outer joins' inner table yields false nulls -- +-- colocation if join condition is null-aware -- +--start_ignore +drop table o1; +ERROR: table "o1" does not exist +drop table o2; +ERROR: table "o2" does not exist +drop table o3; +ERROR: table "o3" does not exist +--end_ignore +-- Current case add in Derive Combined Hashed Spec For Outer Joins (#14045), f8264ad +-- GPDB will got wrong result in ORCA plan, util merged [FIXME] Wrong results on main branch for INDF query,ce25faf +create table o1 (a1 int, b1 int) distributed by (a1); +create table o2 (a2 int, b2 int) distributed by (a2); +create table o3 (a3 int, b3 int) distributed by (a3); +insert into o1 select i, i from generate_series(1,20) i; +insert into o2 select i, null from generate_series(11,30) i; +insert into o3 values (NULL, 20); +select * from o1 left join o2 on a1 = a2 left join o3 on a2 is not distinct from a3; + a1 | b1 | a2 | b2 | a3 | b3 +----+----+----+----+----+---- + 2 | 2 | | | | 20 + 3 | 3 | | | | 20 + 4 | 4 | | | | 20 + 7 | 7 | | | | 20 + 8 | 8 | | | | 20 + 16 | 16 | 16 | | | + 18 | 18 | 18 | | | + 19 | 19 | 19 | | | + 1 | 1 | | | | 20 + 12 | 12 | 12 | | | + 15 | 15 | 15 | | | + 20 | 20 | 20 | | | + 5 | 5 | | | | 20 + 6 | 6 | | | | 20 + 9 | 9 | | | | 20 + 10 | 10 | | | | 20 + 11 | 11 | 11 | | | + 13 | 13 | 13 | | | + 14 | 14 | 14 | | | + 17 | 17 | 17 | | | +(20 rows) + +select * from o1 left join o2 on a1 = a2 left join o3 on a2 is not distinct from a3 and b2 is distinct from b3; + a1 | b1 | a2 | b2 | a3 | b3 +----+----+----+----+----+---- + 2 | 2 | | | | 20 + 3 | 3 | | | | 20 + 4 | 4 | | | | 20 + 7 | 7 | | | | 20 + 8 | 8 | | | | 20 + 16 | 16 | 16 | | | + 18 | 18 | 18 | | | + 19 | 19 | 19 | | | + 1 | 1 | | | | 20 + 12 | 12 | 12 | | | + 15 | 15 | 15 | | | + 20 | 20 | 20 | | | + 5 | 5 | | | | 20 + 6 | 6 | | | | 20 + 9 | 9 | | | | 20 + 10 | 10 | | | | 20 + 11 | 11 | 11 | | | + 13 | 13 | 13 | | | + 14 | 14 | 14 | | | + 17 | 17 | 17 | | | +(20 rows) + +select * from o1 left join o2 on a1 = a2 left join o3 on a2 is not distinct from a3 and b2 = b3; + a1 | b1 | a2 | b2 | a3 | b3 +----+----+----+----+----+---- + 2 | 2 | | | | + 3 | 3 | | | | + 4 | 4 | | | | + 7 | 7 | | | | + 8 | 8 | | | | + 16 | 16 | 16 | | | + 18 | 18 | 18 | | | + 19 | 19 | 19 | | | + 1 | 1 | | | | + 12 | 12 | 12 | | | + 15 | 15 | 15 | | | + 20 | 20 | 20 | | | + 5 | 5 | | | | + 6 | 6 | | | | + 9 | 9 | | | | + 10 | 10 | | | | + 11 | 11 | 11 | | | + 13 | 13 | 13 | | | + 14 | 14 | 14 | | | + 17 | 17 | 17 | | | +(20 rows) + +explain select * from o1 left join o2 on a1 = a2 left join o3 on a2 is not distinct from a3; + QUERY PLAN +------------------------------------------------------------------------------------------------ + Hash Left Join (cost=0.00..1293.00 rows=3 width=24) + Hash Cond: (NOT (o2.a2 IS DISTINCT FROM o3.a3)) + -> Hash Left Join (cost=0.00..862.00 rows=2 width=16) + Hash Cond: (o1.a1 = o2.a2) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=8) + -> Seq Scan on o1 (cost=0.00..431.00 rows=1 width=8) + -> Hash (cost=431.00..431.00 rows=1 width=8) + -> Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=8) + -> Seq Scan on o2 (cost=0.00..431.00 rows=1 width=8) + -> Hash (cost=431.00..431.00 rows=1 width=8) + -> Gather Motion 3:1 (slice3; segments: 3) (cost=0.00..431.00 rows=1 width=8) + -> Seq Scan on o3 (cost=0.00..431.00 rows=1 width=8) + Optimizer: GPORCA +(13 rows) + +explain select * from o1 left join o2 on a1 = a2 left join o3 on a2 is not distinct from a3 and b2 is distinct from b3; + QUERY PLAN +------------------------------------------------------------------------------------------------ + Hash Left Join (cost=0.00..1293.00 rows=3 width=24) + Hash Cond: (NOT (o2.a2 IS DISTINCT FROM o3.a3)) + Join Filter: (o2.b2 IS DISTINCT FROM o3.b3) + -> Hash Left Join (cost=0.00..862.00 rows=2 width=16) + Hash Cond: (o1.a1 = o2.a2) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=8) + -> Seq Scan on o1 (cost=0.00..431.00 rows=1 width=8) + -> Hash (cost=431.00..431.00 rows=1 width=8) + -> Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=8) + -> Seq Scan on o2 (cost=0.00..431.00 rows=1 width=8) + -> Hash (cost=431.00..431.00 rows=1 width=8) + -> Gather Motion 3:1 (slice3; segments: 3) (cost=0.00..431.00 rows=1 width=8) + -> Seq Scan on o3 (cost=0.00..431.00 rows=1 width=8) + Optimizer: GPORCA +(14 rows) + +explain select * from o1 left join o2 on a1 = a2 left join o3 on a2 is not distinct from a3 and b2 = b3; + QUERY PLAN +------------------------------------------------------------------------------------------------ + Hash Left Join (cost=0.00..1293.00 rows=3 width=24) + Hash Cond: ((NOT (o2.a2 IS DISTINCT FROM o3.a3)) AND (o2.b2 = o3.b3)) + -> Hash Left Join (cost=0.00..862.00 rows=2 width=16) + Hash Cond: (o1.a1 = o2.a2) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=8) + -> Seq Scan on o1 (cost=0.00..431.00 rows=1 width=8) + -> Hash (cost=431.00..431.00 rows=1 width=8) + -> Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=8) + -> Seq Scan on o2 (cost=0.00..431.00 rows=1 width=8) + -> Hash (cost=431.00..431.00 rows=1 width=8) + -> Gather Motion 3:1 (slice3; segments: 3) (cost=0.00..431.00 rows=1 width=8) + -> Seq Scan on o3 (cost=0.00..431.00 rows=1 width=8) + Optimizer: GPORCA +(13 rows) + +-- Test hashed distribution spec derived from a self join +truncate o1; +truncate o2; +insert into o1 select i, i from generate_series(1,9) i; +insert into o1 values (NULL, NULL); +insert into o2 select i, NULL from generate_series(11,100) i; +insert into o2 values (NULL, NULL); +analyze o1; +analyze o2; +-- Self join maintains the distribution keys from both children (i.e. the join +-- result produces a combine hash distribution spec) +-- +-- Expect no redistribute under the joins +explain select t2.b1 from (select distinct a1 from o1) t1 +left outer join (select a1, b1 from o1) t2 on t1.a1 = t2.a1 +left outer join o1 t3 on t2.a1 = t3.a1 +left outer join o1 t4 on t2.a1 = t4.a1; + QUERY PLAN +--------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1724.00 rows=13 width=4) + -> Hash Left Join (cost=0.00..1724.00 rows=5 width=4) + Hash Cond: (o1.a1 = t4.a1) + -> Hash Left Join (cost=0.00..1293.00 rows=4 width=8) + Hash Cond: (o1.a1 = t3.a1) + -> Hash Right Join (cost=0.00..862.00 rows=4 width=8) + Hash Cond: (o1.a1 = o1_1.a1) + -> Seq Scan on o1 (cost=0.00..431.00 rows=4 width=8) + -> Hash (cost=431.00..431.00 rows=4 width=4) + -> GroupAggregate (cost=0.00..431.00 rows=4 width=4) + Group Key: o1_1.a1 + -> Sort (cost=0.00..431.00 rows=4 width=4) + Sort Key: o1_1.a1 + -> Seq Scan on o1 o1_1 (cost=0.00..431.00 rows=4 width=4) + -> Hash (cost=431.00..431.00 rows=4 width=4) + -> Seq Scan on o1 t3 (cost=0.00..431.00 rows=4 width=4) + -> Hash (cost=431.00..431.00 rows=4 width=4) + -> Seq Scan on o1 t4 (cost=0.00..431.00 rows=4 width=4) + Optimizer: GPORCA +(19 rows) + +select t2.b1 from (select distinct a1 from o1) t1 +left outer join (select a1, b1 from o1) t2 on t1.a1 = t2.a1 +left outer join o1 t3 on t2.a1 = t3.a1 +left outer join o1 t4 on t2.a1 = t4.a1; + b1 +---- + 1 + 5 + 6 + 9 + 2 + 3 + 4 + 7 + 8 + +(10 rows) + +-- Self join maintains the distribution keys from both children (i.e. the join +-- result produces a combine hash distribution spec) +-- +-- Expect no redistribute under the joins +explain (costs off) select t2.b1 from o1 t3 +right outer join (select a1, b1 from o1) t2 on t2.a1 = t3.a1 +right outer join o1 t4 on t2.a1 = t4.a1 +right outer join (select distinct a1 from o1) t1 on t1.a1 = t2.a1; + QUERY PLAN +--------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Right Join + Hash Cond: (o1.a1 = o1_1.a1) + -> Hash Right Join + Hash Cond: (o1.a1 = t4.a1) + -> Hash Left Join + Hash Cond: (o1.a1 = t3.a1) + -> Seq Scan on o1 + -> Hash + -> Seq Scan on o1 t3 + -> Hash + -> Seq Scan on o1 t4 + -> Hash + -> GroupAggregate + Group Key: o1_1.a1 + -> Sort + Sort Key: o1_1.a1 + -> Seq Scan on o1 o1_1 + Optimizer: GPORCA +(19 rows) + +select t2.b1 from o1 t3 +right outer join (select a1, b1 from o1) t2 on t2.a1 = t3.a1 +right outer join o1 t4 on t2.a1 = t4.a1 +right outer join (select distinct a1 from o1) t1 on t1.a1 = t2.a1; + b1 +---- + 1 + 5 + 6 + 9 + 2 + 3 + 4 + 7 + 8 + +(10 rows) + +-- Self join, but the projected distribution key value is changed +-- +-- Expect redistribute under the joins +explain select t2.b1 from (select distinct a1+1 as a1 from o1) t1 +left outer join o1 t2 on t2.a1 = t1.a1; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=11 width=4) + -> Hash Right Join (cost=0.00..862.00 rows=4 width=4) + Hash Cond: (t2.a1 = ((o1.a1 + 1))) + -> Seq Scan on o1 t2 (cost=0.00..431.00 rows=4 width=8) + -> Hash (cost=431.00..431.00 rows=4 width=4) + -> GroupAggregate (cost=0.00..431.00 rows=4 width=4) + Group Key: ((o1.a1 + 1)) + -> Sort (cost=0.00..431.00 rows=4 width=4) + Sort Key: ((o1.a1 + 1)) + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=4 width=4) + Hash Key: ((o1.a1 + 1)) + -> Seq Scan on o1 (cost=0.00..431.00 rows=4 width=4) + Optimizer: GPORCA +(13 rows) + +select t2.b1 from (select distinct a1+1 as a1 from o1) t1 +left outer join o1 t2 on t2.a1 = t1.a1; + b1 +---- + 5 + 6 + 9 + + 2 + 3 + 4 + 7 + 8 + +(10 rows) + +-- Self join, but the joined distribution key value is changed +-- +-- Expect redistribute under the joins +explain select t2.b1 from (select distinct a1 from o1) t1 +left outer join o1 t2 on t2.a1 = t1.a1+1; + QUERY PLAN +------------------------------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=14 width=4) + -> Hash Right Join (cost=0.00..862.00 rows=5 width=4) + Hash Cond: (t2.a1 = (o1.a1 + 1)) + -> Seq Scan on o1 t2 (cost=0.00..431.00 rows=4 width=8) + -> Hash (cost=431.00..431.00 rows=4 width=4) + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=4 width=4) + Hash Key: (o1.a1 + 1) + -> GroupAggregate (cost=0.00..431.00 rows=4 width=4) + Group Key: o1.a1 + -> Sort (cost=0.00..431.00 rows=4 width=4) + Sort Key: o1.a1 + -> Seq Scan on o1 (cost=0.00..431.00 rows=4 width=4) + Optimizer: GPORCA +(13 rows) + +select t2.b1 from (select distinct a1 from o1) t1 +left outer join o1 t2 on t2.a1 = t1.a1+1; + b1 +---- + 5 + 6 + 9 + + 2 + 3 + 4 + 7 + 8 + +(10 rows) + +-- Test case from community Github PR 13722 +create table t_13722(id int, tt timestamp) + distributed by (id); +-- j->jointype == join_lasj_notin +select + t1.* +from + t_13722 t1 +where + t1.id not in (select id from t_13722 where id != 4) + and + t1.tt = (select min(tt) from t_13722 where id = t1.id); + id | tt +----+---- +(0 rows) + +-- j->jointype == join_anti +select + t1.* +from + t_13722 t1 +where + not exists (select id from t_13722 where id != 4 and id = t1.id) + and t1.tt = (select min(tt) from t_13722 where id = t1.id); + id | tt +----+---- +(0 rows) + +drop table t_13722; +-- This test is introduced to verify incorrect result +-- from hash join of char columns is fixed +-- Notice when varchar/text is cast to bpchar and used for +-- comparison, the trailing spaces are ignored +-- When char is cast to varchar/text, it's considered +-- comparison, and the trailing spaces are also ignored +-- Prior to the fix, opclasses belonging to different +-- opfamilies could be grouped as equivalent, and thence +-- deriving incorrect equality hash join conditions +--start_ignore +drop table foo; +ERROR: table "foo" does not exist +drop table bar; +ERROR: table "bar" does not exist +drop table baz; +ERROR: table "baz" does not exist +--end_ignore +create table foo (varchar_3 varchar(3)) distributed by (varchar_3); +create table bar (char_3 char(3)) distributed by (char_3); +create table baz (text_any text) distributed by (text_any); +insert into foo values ('cd'); -- 0 trailing spaces +insert into bar values ('cd '); -- 1 trailing space +insert into baz values ('cd '); -- 2 trailing spaces +-- varchar cast to bpchar +-- 'cd' matches 'cd', returns 1 row +explain select varchar_3, char_3 from foo join bar on varchar_3=char_3; + QUERY PLAN +------------------------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=1 width=16) + -> Hash Join (cost=0.00..862.00 rows=1 width=16) + Hash Cond: ((foo.varchar_3)::bpchar = bar.char_3) + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=8) + Hash Key: foo.varchar_3 + -> Seq Scan on foo (cost=0.00..431.00 rows=1 width=8) + -> Hash (cost=431.00..431.00 rows=1 width=8) + -> Seq Scan on bar (cost=0.00..431.00 rows=1 width=8) + Optimizer: GPORCA +(9 rows) + +select varchar_3, char_3 from foo join bar on varchar_3=char_3; + varchar_3 | char_3 +-----------+-------- + cd | cd +(1 row) + +-- char cast to text +-- 'cd' doesn't match 'cd ', returns 0 rows +explain select char_3, text_any from bar join baz on char_3=text_any; + QUERY PLAN +------------------------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=1 width=16) + -> Hash Join (cost=0.00..862.00 rows=1 width=16) + Hash Cond: ((bar.char_3)::text = baz.text_any) + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=8) + Hash Key: (bar.char_3)::text + -> Seq Scan on bar (cost=0.00..431.00 rows=1 width=8) + -> Hash (cost=431.00..431.00 rows=1 width=8) + -> Seq Scan on baz (cost=0.00..431.00 rows=1 width=8) + Optimizer: GPORCA +(9 rows) + +select char_3, text_any from bar join baz on char_3=text_any; + char_3 | text_any +--------+---------- +(0 rows) + +-- foo - bar join: varchar cast to bpchar +-- 'cd' matches 'cd' +-- foo - baz join: no cast +-- 'cd' doesn't match 'cd ' +-- returns 0 rows +-- Notice ORCA changes join order to minimize motion +explain select varchar_3, char_3, text_any from foo join bar on varchar_3=char_3 +join baz on varchar_3=text_any; + QUERY PLAN +------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1293.00 rows=1 width=24) + -> Hash Join (cost=0.00..1293.00 rows=1 width=24) + Hash Cond: ((foo.varchar_3)::bpchar = bar.char_3) + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..862.00 rows=1 width=16) + Hash Key: foo.varchar_3 + -> Hash Join (cost=0.00..862.00 rows=1 width=16) + Hash Cond: ((foo.varchar_3)::text = baz.text_any) + -> Seq Scan on foo (cost=0.00..431.00 rows=1 width=8) + -> Hash (cost=431.00..431.00 rows=1 width=8) + -> Seq Scan on baz (cost=0.00..431.00 rows=1 width=8) + -> Hash (cost=431.00..431.00 rows=1 width=8) + -> Seq Scan on bar (cost=0.00..431.00 rows=1 width=8) + Optimizer: GPORCA +(13 rows) + +select varchar_3, char_3, text_any from foo join bar on varchar_3=char_3 +join baz on varchar_3=text_any; + varchar_3 | char_3 | text_any +-----------+--------+---------- +(0 rows) + +-- +-- Test case for Hash Join rescan after squelched without hashtable built +-- See https://github.com/greenplum-db/gpdb/pull/15590 +-- +--- Lateral Join +set from_collapse_limit = 1; +set join_collapse_limit = 1; +select 1 from pg_namespace join lateral + (select * from aclexplode(nspacl) x join pg_authid on x.grantee = pg_authid.oid where rolname = current_user) z on true limit 1; + ?column? +---------- + 1 +(1 row) + +reset from_collapse_limit; +reset join_collapse_limit; +--- NestLoop index join +create table l_table (a int, b int) distributed replicated; +create index l_table_idx on l_table(a); +create table r_table1 (ra1 int, rb1 int) distributed replicated; +create table r_table2 (ra2 int, rb2 int) distributed replicated; +insert into l_table select i % 10 , i from generate_series(1, 10000) i; +insert into r_table1 select i, i from generate_series(1, 1000) i; +insert into r_table2 values(11, 11), (1, 1) ; +analyze l_table; +analyze r_table1; +analyze r_table2; +set optimizer to off; +set enable_nestloop to on; +set enable_bitmapscan to off; +explain select * from r_table2 where ra2 in ( select a from l_table join r_table1 on b = rb1); + QUERY PLAN +------------------------------------------------------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) (cost=57.99..57.99 rows=10 width=8) + -> Nested Loop Semi Join (cost=23.66..57.99 rows=10 width=8) + -> Seq Scan on r_table2 (cost=0.00..1.02 rows=2 width=8) + -> Hash Join (cost=23.66..56.28 rows=100 width=4) + Hash Cond: (l_table.b = r_table1.rb1) + -> Index Scan using l_table_idx on l_table (cost=0.16..20.15 rows=1000 width=8) + Index Cond: (a = r_table2.ra2) + -> Hash (cost=11.00..11.00 rows=1000 width=4) + -> Seq Scan on r_table1 (cost=0.00..11.00 rows=1000 width=4) + Optimizer: Postgres query optimizer +(10 rows) + +select * from r_table2 where ra2 in ( select a from l_table join r_table1 on b = rb1); + ra2 | rb2 +-----+----- + 1 | 1 +(1 row) + +reset optimizer; +reset enable_nestloop; +reset enable_bitmapscan; +drop table l_table; +drop table r_table1; +drop table r_table2; +-- Should throw an error during planning: FULL JOIN is only supported with merge-joinable or hash-joinable join conditions +-- Falls back on GPORCA, but shouldn't cause GPORCA to crash +CREATE TABLE ext_stats_tbl(c0 name, c2 boolean); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c0' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +CREATE STATISTICS IF NOT EXISTS s0 (mcv) ON c2, c0 FROM ext_stats_tbl; +INSERT INTO ext_stats_tbl VALUES('tC', true); +ANALYZE ext_stats_tbl; +explain SELECT 1 FROM ext_stats_tbl t11 FULL JOIN ext_stats_tbl t12 ON t12.c2; +ERROR: FULL JOIN is only supported with merge-joinable or hash-joinable join conditions -- Clean up. None of the objects we create are very interesting to keep around. reset search_path; set client_min_messages='warning'; diff --git a/contrib/pax_storage/src/test/regress/expected/bfv_olap_optimizer.out b/contrib/pax_storage/src/test/regress/expected/bfv_olap_optimizer.out index 6f1ac2d5097..cc7e0467d90 100644 --- a/contrib/pax_storage/src/test/regress/expected/bfv_olap_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/bfv_olap_optimizer.out @@ -65,8 +65,7 @@ drop aggregate if exists mysum2(int4); NOTICE: aggregate mysum2(int4) does not exist, skipping -- end_ignore create table toy(id,val) as select i,i from generate_series(1,5) i; -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'id' as the Apache Cloudberry data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry. create aggregate mysum1(int4) (sfunc = int4_sum, combinefunc=int8pl, stype=bigint); create aggregate mysum2(int4) (sfunc = int4_sum, stype=bigint); -- TEST @@ -94,9 +93,9 @@ drop table if exists toy; drop type if exists ema_type cascade; NOTICE: type "ema_type" does not exist, skipping drop function if exists ema_adv(t ema_type, v float, x float) cascade; -ERROR: type "ema_type" does not exist +NOTICE: type "ema_type" does not exist, skipping drop function if exists ema_fin(t ema_type) cascade; -ERROR: type "ema_type" does not exist +NOTICE: type "ema_type" does not exist, skipping drop aggregate if exists ema(float, float); NOTICE: aggregate ema(pg_catalog.float8,pg_catalog.float8) does not exist, skipping drop table if exists ema_test cascade; @@ -408,35 +407,37 @@ drop table mpp23240; -- Test for the bug reported at https://github.com/greenplum-db/gpdb/issues/2236 -- create table test1 (x int, y int, z double precision); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into test1 select a, b, a*10 + b from generate_series(1, 5) a, generate_series(1, 5) b; select sum(z) over (partition by x) as sumx, sum(z) over (partition by y) as sumy from test1; sumx | sumy ------+------ - 65 | 155 - 65 | 160 - 65 | 165 - 65 | 170 65 | 175 - 115 | 155 - 115 | 160 - 115 | 165 - 115 | 170 + 265 | 175 115 | 175 - 165 | 155 - 165 | 160 - 165 | 165 - 165 | 170 165 | 175 - 215 | 155 - 215 | 160 - 215 | 165 - 215 | 170 215 | 175 - 265 | 155 + 165 | 160 + 115 | 160 + 65 | 160 265 | 160 + 215 | 160 + 115 | 165 + 65 | 165 265 | 165 + 165 | 165 + 215 | 165 + 215 | 170 + 165 | 170 + 65 | 170 + 115 | 170 265 | 170 - 265 | 175 + 65 | 155 + 265 | 155 + 115 | 155 + 165 | 155 + 215 | 155 (25 rows) drop table test1; @@ -453,51 +454,221 @@ where g in ( 15 (1 row) +-- +-- Test to check the query plan for a ROLLUP query. +-- +explain (costs off) select cn, vn, pn, sum(qty*prc) from sale group by rollup(cn,vn,pn); + QUERY PLAN +------------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Sequence + -> Shared Scan (share slice:id 1:0) + -> Seq Scan on sale + -> Append + -> GroupAggregate + Group Key: share0_ref2.cn, share0_ref2.vn, share0_ref2.pn + -> Sort + Sort Key: share0_ref2.cn, share0_ref2.vn, share0_ref2.pn + -> Shared Scan (share slice:id 1:0) + -> HashAggregate + Group Key: share0_ref3.cn, share0_ref3.vn + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: share0_ref3.cn, share0_ref3.vn + -> Result + -> Shared Scan (share slice:id 2:0) + -> HashAggregate + Group Key: share0_ref4.cn + -> Redistribute Motion 3:3 (slice3; segments: 3) + Hash Key: share0_ref4.cn + -> Result + -> Shared Scan (share slice:id 3:0) + -> Result + -> Redistribute Motion 1:3 (slice4) + -> Finalize Aggregate + -> Gather Motion 3:1 (slice5; segments: 3) + -> Partial Aggregate + -> Shared Scan (share slice:id 5:0) + Optimizer: GPORCA +(29 rows) + +select cn, vn, pn, sum(qty*prc) from sale group by rollup(cn,vn,pn); + cn | vn | pn | sum +----+----+-----+--------- + 2 | 50 | 400 | 0 + 3 | 30 | 600 | 60 + 4 | 40 | 800 | 1 + 4 | 40 | | 2 + 3 | 30 | | 120 + 3 | | | 120 + 4 | | | 2 + 2 | | | 2640000 + 1 | 20 | 100 | 0 + 1 | 30 | 300 | 0 + 1 | 30 | 500 | 60 + 1 | 50 | 400 | 0 + 2 | 40 | 100 | 2640000 + 3 | 40 | 200 | 0 + 4 | 40 | 700 | 1 + 2 | 50 | | 0 + 1 | 10 | | 0 + 1 | 50 | | 0 + 1 | 20 | | 0 + 1 | 30 | | 60 + 1 | | | 60 + 1 | 10 | 200 | 0 + 3 | 30 | 500 | 60 + 2 | 40 | | 2640000 + 3 | 40 | | 0 + | | | 2640182 +(26 rows) + -- -- This caused a crash in ROLLUP planning at one point. -- +EXPLAIN (costs off) +SELECT sale.vn +FROM sale,vendor +WHERE sale.vn=vendor.vn +GROUP BY ROLLUP( (sale.dt,sale.cn),(sale.pn),(sale.vn)); + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Sequence + -> Shared Scan (share slice:id 1:0) + -> Nested Loop + Join Filter: true + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: sale.vn + -> Seq Scan on sale + -> Index Only Scan using vendor_pkey on vendor + Index Cond: (vn = sale.vn) + -> Append + -> GroupAggregate + Group Key: share0_ref2.dt, share0_ref2.cn, share0_ref2.pn, share0_ref2.vn + -> Sort + Sort Key: share0_ref2.dt, share0_ref2.cn, share0_ref2.pn, share0_ref2.vn + -> Shared Scan (share slice:id 1:0) + -> GroupAggregate + Group Key: share0_ref3.dt, share0_ref3.cn, share0_ref3.pn + -> Sort + Sort Key: share0_ref3.dt, share0_ref3.cn, share0_ref3.pn + -> Redistribute Motion 3:3 (slice3; segments: 3) + Hash Key: share0_ref3.dt, share0_ref3.cn, share0_ref3.pn + -> Result + -> Shared Scan (share slice:id 3:0) + -> GroupAggregate + Group Key: share0_ref4.dt, share0_ref4.cn + -> Sort + Sort Key: share0_ref4.dt, share0_ref4.cn + -> Redistribute Motion 3:3 (slice4; segments: 3) + Hash Key: share0_ref4.dt, share0_ref4.cn + -> Result + -> Shared Scan (share slice:id 4:0) + -> Result + -> Redistribute Motion 1:3 (slice5) + -> Aggregate + -> Gather Motion 3:1 (slice6; segments: 3) + -> Result + -> Shared Scan (share slice:id 6:0) + Optimizer: GPORCA +(39 rows) + SELECT sale.vn FROM sale,vendor WHERE sale.vn=vendor.vn GROUP BY ROLLUP( (sale.dt,sale.cn),(sale.pn),(sale.vn)); vn ---- - 40 - 20 - 30 - 10 + 40 + 40 + 20 + 30 + 50 + 30 + 50 + 30 + 30 + 40 40 - 50 - 30 - 40 + 10 - 30 - 50 - 30 - 40 (34 rows) +EXPLAIN (costs off) +SELECT DISTINCT sale.vn +FROM sale,vendor +WHERE sale.vn=vendor.vn +GROUP BY ROLLUP( (sale.dt,sale.cn),(sale.pn),(sale.vn)); + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> GroupAggregate + Group Key: share0_ref2.vn + -> Sort + Sort Key: share0_ref2.vn + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: share0_ref2.vn + -> Sequence + -> Shared Scan (share slice:id 2:0) + -> Nested Loop + Join Filter: true + -> Redistribute Motion 3:3 (slice3; segments: 3) + Hash Key: sale.vn + -> Seq Scan on sale + -> Index Only Scan using vendor_pkey on vendor + Index Cond: (vn = sale.vn) + -> Append + -> GroupAggregate + Group Key: share0_ref2.dt, share0_ref2.cn, share0_ref2.pn, share0_ref2.vn + -> Sort + Sort Key: share0_ref2.dt, share0_ref2.cn, share0_ref2.pn, share0_ref2.vn + -> Shared Scan (share slice:id 2:0) + -> GroupAggregate + Group Key: share0_ref3.dt, share0_ref3.cn, share0_ref3.pn + -> Sort + Sort Key: share0_ref3.dt, share0_ref3.cn, share0_ref3.pn + -> Redistribute Motion 3:3 (slice4; segments: 3) + Hash Key: share0_ref3.dt, share0_ref3.cn, share0_ref3.pn + -> Result + -> Shared Scan (share slice:id 4:0) + -> GroupAggregate + Group Key: share0_ref4.dt, share0_ref4.cn + -> Sort + Sort Key: share0_ref4.dt, share0_ref4.cn + -> Redistribute Motion 3:3 (slice5; segments: 3) + Hash Key: share0_ref4.dt, share0_ref4.cn + -> Result + -> Shared Scan (share slice:id 5:0) + -> Result + -> Redistribute Motion 1:3 (slice6) + -> Aggregate + -> Gather Motion 3:1 (slice7; segments: 3) + -> Result + -> Shared Scan (share slice:id 7:0) + Optimizer: GPORCA +(45 rows) + SELECT DISTINCT sale.vn FROM sale,vendor WHERE sale.vn=vendor.vn @@ -505,11 +676,11 @@ GROUP BY ROLLUP( (sale.dt,sale.cn),(sale.pn),(sale.vn)); vn ---- 10 + 20 30 40 50 - (6 rows) -- @@ -523,17 +694,19 @@ GROUP BY ROLLUP( sale.vn); vn | rank ----+------ | 1 - 10 | 1 20 | 1 30 | 1 40 | 1 50 | 1 + 10 | 1 (6 rows) -- -- Test window function with constant PARTITION BY -- CREATE TABLE testtab (a int4); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into testtab values (1), (2); SELECT count(*) OVER (PARTITION BY 1) AS count FROM testtab; count @@ -600,17 +773,22 @@ create table t1_github_issue_10143( code varchar(5), name varchar(60) ); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'base_ym' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table t2_github_issue_10143( base_ym varchar(6), dong varchar(8), code varchar(6), salary numeric(18) ); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'base_ym' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into t1_github_issue_10143 values ('a', 'acode', 'aname'); insert into t2_github_issue_10143 values ('a', 'adong', 'acode', 1000); insert into t2_github_issue_10143 values ('b', 'bdong', 'bcode', 1100); analyze t1_github_issue_10143; analyze t2_github_issue_10143; +set optimizer_trace_fallback = on; explain select (select name from t1_github_issue_10143 where code = a.code limit 1) as dongnm ,sum(sum(a.salary)) over() from t2_github_issue_10143 a @@ -640,8 +818,8 @@ from t2_github_issue_10143 a group by a.code; dongnm | sum --------+------ - aname | 2100 | 2100 + aname | 2100 (2 rows) select * from (select sum(a.salary) over(), count(*) @@ -653,7 +831,6 @@ select * from (select sum(a.salary) over(), count(*) 2100 | 1 (2 rows) --- this query currently falls back, needs to be fixed select (select rn from (select row_number() over () as rn, name from t1_github_issue_10143 where code = a.code @@ -664,13 +841,10 @@ from t2_github_issue_10143 a group by a.code; dongnm | sum --------+------ - 1 | 2100 | 2100 + 1 | 2100 (2 rows) --- start_ignore --- GPDB_12_MERGE_FIXME: unsupported exec location fallback --- end_ignore with cte as (select row_number() over (order by code) as rn1, code from t2_github_issue_10143 group by code) @@ -686,7 +860,15 @@ select * from cte; 2 | bcode (3 rows) +reset optimizer_trace_fallback; -- CLEANUP -- start_ignore drop schema bfv_olap cascade; +NOTICE: drop cascades to 6 other objects +DETAIL: drop cascades to table customer +drop cascades to table vendor +drop cascades to table sale +drop cascades to table testtab +drop cascades to table t1_github_issue_10143 +drop cascades to table t2_github_issue_10143 -- end_ignore diff --git a/contrib/pax_storage/src/test/regress/expected/bfv_partition_plans_optimizer.out b/contrib/pax_storage/src/test/regress/expected/bfv_partition_plans_optimizer.out index 5a913ab8688..06f0bab1c56 100644 --- a/contrib/pax_storage/src/test/regress/expected/bfv_partition_plans_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/bfv_partition_plans_optimizer.out @@ -1,15 +1,30 @@ -- start_matchsubs -- m/((Mon|Tue|Wed|Thu|Fri|Sat|Sun) (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) (0[1-9]|[12][0-9]|3[01]) ([01][0-9]|2[0-3]):[0-5][0-9]:[0-5][0-9](.[0-9]+)? (?!0000)[0-9]{4}.*)+(['"])/ -- s/((Mon|Tue|Wed|Thu|Fri|Sat|Sun) (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) (0[1-9]|[12][0-9]|3[01]) ([01][0-9]|2[0-3]):[0-5][0-9]:[0-5][0-9](.[0-9]+)? (?!0000)[0-9]{4}.*)+(['"])/xxx xx xx xx:xx:xx xxxx"/ +-- m/Memory Usage: \d+\w?B/ +-- s/Memory Usage: \d+\w?B/Memory Usage: ###B/ +-- m/Memory: \d+kB/ +-- s/Memory: \d+kB/Memory: ###kB/ +-- m/Max: \d+kB/ +-- s/Max: \d+kB/Max: ###kB/ +-- m/Buckets: \d+/ +-- s/Buckets: \d+/Buckets: ###/ +-- m/Batches: \d+/ +-- s/Batches: \d+/Batches: ###/ +-- m/segment \d+/ +-- s/segment \d+/segment ###/ +-- m/using \d+ of \d+ buckets/ +-- s/using \d+ of \d+ buckets/using ## of ### buckets/ -- end_matchsubs create schema bfv_partition_plans; set search_path=bfv_partition_plans; +SET optimizer_trace_fallback=on; -- -- Initial setup for all the partitioning test for this suite -- -- start_ignore create language plpython3u; -ERROR: language "plpython3u" already exists +ERROR: extension "plpython3u" already exists -- end_ignore create or replace function count_operator(query text, operator text) returns int as $$ @@ -43,7 +58,11 @@ NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into mpp3061 values(1); update mpp3061 set i = 2 where i = 1; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(update) on partitioned tables select tableoid::regclass, * from mpp3061 where i = 2; +NOTICE: One or more columns in the following table(s) do not have statistics: mpp3061 +HINT: For non-partitioned tables, run analyze (). For partitioned tables, run analyze rootpartition (). See log for columns missing statistics. tableoid | i -----------------+--- mpp3061_1_prt_2 | 2 @@ -72,6 +91,10 @@ distributed by (subscription_id, bill_stmt_id) -- TEST select count_operator('select cust_type, subscription_status,count(distinct subscription_id),sum(voice_call_min),sum(minute_per_call) from mpp7980 where month_id =E''2009-04-01'' group by rollup(1,2);','SIGSEGV'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions +NOTICE: One or more columns in the following table(s) do not have statistics: mpp7980 +HINT: For non-partitioned tables, run analyze (). For partitioned tables, run analyze rootpartition (). See log for columns missing statistics. count_operator ---------------- 0 @@ -81,12 +104,14 @@ insert into mpp7980 values('2009-04-01','xyz','zyz','1',1,1,'1'); insert into mpp7980 values('2009-04-01','zxyz','zyz','2',2,1,'1'); insert into mpp7980 values('2009-03-03','xyz','zyz','4',1,3,'1'); select cust_type, subscription_status,count(distinct subscription_id),sum(voice_call_min),sum(minute_per_call) from mpp7980 where month_id ='2009-04-01' group by rollup(1,2); +NOTICE: One or more columns in the following table(s) do not have statistics: mpp7980 +HINT: For non-partitioned tables, run analyze (). For partitioned tables, run analyze rootpartition (). See log for columns missing statistics. cust_type | subscription_status | count | sum | sum -----------+---------------------+-------+------+------ - | | 1 | 3.00 | 2.00 - zyz | | 1 | 3.00 | 2.00 zyz | 1 | 1 | 1.00 | 1.00 zyz | 2 | 1 | 2.00 | 1.00 + zyz | | 1 | 3.00 | 2.00 + | | 1 | 3.00 | 2.00 (4 rows) -- CLEANUP @@ -98,6 +123,7 @@ drop table mpp7980; -- SETUP -- start_ignore set optimizer_enable_bitmapscan=on; +set optimizer_enable_dynamicbitmapscan=on; set optimizer_enable_indexjoin=on; drop table if exists mpp23195_t1; NOTICE: table "mpp23195_t1" does not exist, skipping @@ -115,12 +141,36 @@ insert into mpp23195_t1 values (generate_series(1,19)); insert into mpp23195_t2 values (1); -- TEST select find_operator('select * from mpp23195_t1,mpp23195_t2 where mpp23195_t1.i < mpp23195_t2.i;', 'Dynamic Index Scan'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions +NOTICE: One or more columns in the following table(s) do not have statistics: mpp23195_t1 +HINT: For non-partitioned tables, run analyze (). For partitioned tables, run analyze rootpartition (). See log for columns missing statistics. find_operator --------------- ['false'] (1 row) select * from mpp23195_t1,mpp23195_t2 where mpp23195_t1.i < mpp23195_t2.i; +NOTICE: One or more columns in the following table(s) do not have statistics: mpp23195_t1 +HINT: For non-partitioned tables, run analyze (). For partitioned tables, run analyze rootpartition (). See log for columns missing statistics. + i | i +---+--- +(0 rows) + +vacuum mpp23195_t1; +select find_operator('select * from mpp23195_t1,mpp23195_t2 where mpp23195_t1.i < mpp23195_t2.i;', 'Dynamic Index Only Scan'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions +NOTICE: One or more columns in the following table(s) do not have statistics: mpp23195_t1 +HINT: For non-partitioned tables, run analyze (). For partitioned tables, run analyze rootpartition (). See log for columns missing statistics. + find_operator +--------------- + ['true'] +(1 row) + +select * from mpp23195_t1,mpp23195_t2 where mpp23195_t1.i < mpp23195_t2.i; +NOTICE: One or more columns in the following table(s) do not have statistics: mpp23195_t1 +HINT: For non-partitioned tables, run analyze (). For partitioned tables, run analyze rootpartition (). See log for columns missing statistics. i | i ---+--- (0 rows) @@ -130,6 +180,7 @@ select * from mpp23195_t1,mpp23195_t2 where mpp23195_t1.i < mpp23195_t2.i; drop table if exists mpp23195_t1; drop table if exists mpp23195_t2; set optimizer_enable_bitmapscan=off; +set optimizer_enable_dynamicbitmapscan=off; set optimizer_enable_indexjoin=off; -- end_ignore -- @@ -153,12 +204,20 @@ HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sur -- TEST set optimizer_enable_hashjoin = off; select find_operator('analyze select * from mpp21834_t2,mpp21834_t1 where mpp21834_t2.i < mpp21834_t1.i;','Dynamic Index Scan'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions +NOTICE: One or more columns in the following table(s) do not have statistics: mpp21834_t1 +HINT: For non-partitioned tables, run analyze (). For partitioned tables, run analyze rootpartition (). See log for columns missing statistics. find_operator --------------- ['false'] (1 row) select find_operator('analyze select * from mpp21834_t2,mpp21834_t1 where mpp21834_t2.i < mpp21834_t1.i;','Nested Loop'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions +NOTICE: One or more columns in the following table(s) do not have statistics: mpp21834_t1 +HINT: For non-partitioned tables, run analyze (). For partitioned tables, run analyze rootpartition (). See log for columns missing statistics. find_operator --------------- ['true'] @@ -192,6 +251,8 @@ insert into mpp23288(a) select generate_series(1,20); analyze mpp23288; -- TEST select count_operator('select t2.a, t1.a from mpp23288 as t1 join mpp23288 as t2 on (t1.a < t2.a and t2.a =10) order by t2.a, t1.a;','Dynamic Seq Scan'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions count_operator ---------------- 2 @@ -212,6 +273,8 @@ select t2.a, t1.a from mpp23288 as t1 join mpp23288 as t2 on (t1.a < t2.a and t2 (9 rows) select count_operator('select t2.a, t1.a from mpp23288 as t1 join mpp23288 as t2 on (t1.a < t2.a and (t2.a = 10 or t2.a = 5 or t2.a = 12)) order by t2.a, t1.a;','Dynamic Seq Scan'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions count_operator ---------------- 2 @@ -247,6 +310,8 @@ select t2.a, t1.a from mpp23288 as t1 join mpp23288 as t2 on (t1.a < t2.a and (t (24 rows) select count_operator('select t2.a, t1.a from mpp23288 as t1 join mpp23288 as t2 on t1.a < t2.a and t2.a = 1 or t2.a < 10 order by t2.a, t1.a;','Dynamic Seq Scan'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions count_operator ---------------- 2 @@ -488,6 +553,8 @@ analyze p; -- We need to disable parallel before doing this query. set enable_parallel to false; select count_operator('select * from (select * from p1 union all select * from p2) as p_all, t where p_all.b=t.b;','Partition Selector'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions count_operator ---------------- 2 @@ -495,54 +562,72 @@ select count_operator('select * from (select * from p1 union all select * from p reset enable_parallel; select count_operator('select * from (select * from p1 union select * from p2) as p_all, t where p_all.b=t.b;','Partition Selector'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions count_operator ---------------- 2 (1 row) select count_operator('select * from (select * from p1 except all select * from p2) as p_all, t where p_all.b=t.b;','Partition Selector'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions count_operator ---------------- 0 (1 row) select count_operator('select * from (select * from p1 except select * from p2) as p_all, t where p_all.b=t.b;','Partition Selector'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions count_operator ---------------- 2 (1 row) select count_operator('select * from (select * from p1 intersect all select * from p2) as p_all, t where p_all.b=t.b;','Partition Selector'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions count_operator ---------------- 2 (1 row) select count_operator('select * from (select * from p1 union select * from p2 union all select * from p3) as p_all, t where p_all.b=t.b;','Partition Selector'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions count_operator ---------------- 3 (1 row) select count_operator('select * from (select * from p1 union select * from p2 union all select * from p) as p_all, t where p_all.b=t.b;','Partition Selector'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions count_operator ---------------- 2 (1 row) select count_operator('select * from (select * from p1 union select * from p union all select * from p2) as p_all, t where p_all.b=t.b;','Partition Selector'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions count_operator ---------------- 2 (1 row) select count_operator('select * from (select * from p1 union select * from p2 intersect all select * from p3) as p_all, t where p_all.b=t.b;','Partition Selector'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions count_operator ---------------- 3 (1 row) select count_operator('select * from (select * from p1 union select * from p intersect all select * from p2) as p_all, t where p_all.b=t.b;','Partition Selector'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions count_operator ---------------- 2 @@ -581,6 +666,10 @@ HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sur create index dbs_index on dbs using bitmap(c3); -- TEST select find_operator('(select * from dts where c2 = 1) union (select * from dts where c2 = 2) union (select * from dts where c2 = 3) union (select * from dts where c2 = 4) union (select * from dts where c2 = 5) union (select * from dts where c2 = 6) union (select * from dts where c2 = 7) union (select * from dts where c2 = 8) union (select * from dts where c2 = 9) union (select * from dts where c2 = 10);', 'Dynamic Seq Scan'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions +NOTICE: One or more columns in the following table(s) do not have statistics: dts +HINT: For non-partitioned tables, run analyze (). For partitioned tables, run analyze rootpartition (). See log for columns missing statistics. find_operator --------------- ['true'] @@ -596,12 +685,18 @@ select find_operator('(select * from dts where c2 = 1) union (select * from dts (select * from dts where c2 = 8) union (select * from dts where c2 = 9) union (select * from dts where c2 = 10); +NOTICE: One or more columns in the following table(s) do not have statistics: dts +HINT: For non-partitioned tables, run analyze (). For partitioned tables, run analyze rootpartition (). See log for columns missing statistics. c1 | c2 ----+---- (0 rows) set optimizer_enable_dynamictablescan = off; select find_operator('(select * from dis where c3 = 1) union (select * from dis where c3 = 2) union (select * from dis where c3 = 3) union (select * from dis where c3 = 4) union (select * from dis where c3 = 5) union (select * from dis where c3 = 6) union (select * from dis where c3 = 7) union (select * from dis where c3 = 8) union (select * from dis where c3 = 9) union (select * from dis where c3 = 10);', 'Dynamic Index Scan'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because no plan has been computed for required properties in GPORCA find_operator --------------- ['false'] @@ -617,11 +712,15 @@ select find_operator('(select * from dis where c3 = 1) union (select * from dis (select * from dis where c3 = 8) union (select * from dis where c3 = 9) union (select * from dis where c3 = 10); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because no plan has been computed for required properties in GPORCA c1 | c2 | c3 ----+----+---- (0 rows) select find_operator('select * from dbs where c2= 15 and c3 = 5;', 'Bitmap Heap Scan'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions find_operator --------------- ['false'] @@ -667,11 +766,17 @@ create index pp_rest_2_idx on pp_1_prt_3(c,a); -- TEST set optimizer_enable_dynamictablescan = off; select * from pp where b=2 and c=2; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because no plan has been computed for required properties in GPORCA a | b | c ---+---+--- (0 rows) select count_operator('select * from pp where b=2 and c=2;','Partition Selector'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because no plan has been computed for required properties in GPORCA count_operator ---------------- 0 @@ -713,44 +818,68 @@ PARTITION BY LIST(month_id) ); -- TEST select * from ds_4 where month_id = '200800'; +NOTICE: One or more columns in the following table(s) do not have statistics: ds_4 +HINT: For non-partitioned tables, run analyze (). For partitioned tables, run analyze rootpartition (). See log for columns missing statistics. month_id | cust_group_acc | mobile_no ----------+----------------+----------- (0 rows) select count_operator('select * from ds_4 where month_id = E''200800'';','Partition Selector'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions +NOTICE: One or more columns in the following table(s) do not have statistics: ds_4 +HINT: For non-partitioned tables, run analyze (). For partitioned tables, run analyze rootpartition (). See log for columns missing statistics. count_operator ---------------- 0 (1 row) select * from ds_4 where month_id > '200800'; +NOTICE: One or more columns in the following table(s) do not have statistics: ds_4 +HINT: For non-partitioned tables, run analyze (). For partitioned tables, run analyze rootpartition (). See log for columns missing statistics. month_id | cust_group_acc | mobile_no ----------+----------------+----------- (0 rows) select count_operator('select * from ds_4 where month_id > E''200800'';','Partition Selector'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions +NOTICE: One or more columns in the following table(s) do not have statistics: ds_4 +HINT: For non-partitioned tables, run analyze (). For partitioned tables, run analyze rootpartition (). See log for columns missing statistics. count_operator ---------------- 0 (1 row) select * from ds_4 where month_id <= '200800'; +NOTICE: One or more columns in the following table(s) do not have statistics: ds_4 +HINT: For non-partitioned tables, run analyze (). For partitioned tables, run analyze rootpartition (). See log for columns missing statistics. month_id | cust_group_acc | mobile_no ----------+----------------+----------- (0 rows) select count_operator('select * from ds_4 where month_id <= E''200800'';','Partition Selector'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions +NOTICE: One or more columns in the following table(s) do not have statistics: ds_4 +HINT: For non-partitioned tables, run analyze (). For partitioned tables, run analyze rootpartition (). See log for columns missing statistics. count_operator ---------------- 0 (1 row) select * from ds_4 a1,ds_4 a2 where a1.month_id = a2.month_id and a1.month_id > '200800'; +NOTICE: One or more columns in the following table(s) do not have statistics: ds_4 +HINT: For non-partitioned tables, run analyze (). For partitioned tables, run analyze rootpartition (). See log for columns missing statistics. month_id | cust_group_acc | mobile_no | month_id | cust_group_acc | mobile_no ----------+----------------+-----------+----------+----------------+----------- (0 rows) select count_operator('select * from ds_4 a1,ds_4 a2 where a1.month_id = a2.month_id and a1.month_id > E''200800'';','Partition Selector'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions +NOTICE: One or more columns in the following table(s) do not have statistics: ds_4 +HINT: For non-partitioned tables, run analyze (). For partitioned tables, run analyze rootpartition (). See log for columns missing statistics. count_operator ---------------- 1 @@ -768,6 +897,7 @@ reset optimizer_segments; -- SETUP -- start_ignore DROP TABLE IF EXISTS bar; +NOTICE: table "bar" does not exist, skipping -- end_ignore CREATE TABLE bar (b int, c int) PARTITION BY RANGE (b) @@ -782,26 +912,26 @@ ANALYZE bar; SELECT b FROM bar GROUP BY b; b ---- - 7 - 4 - 19 - 3 - 5 - 18 - 6 + 17 11 - 9 - 8 - 12 + 13 10 - 17 - 1 + 9 + 5 + 6 + 14 + 15 0 + 1 + 12 + 8 + 19 + 7 + 18 + 4 2 16 - 15 - 14 - 13 + 3 (20 rows) EXPLAIN SELECT b FROM bar GROUP BY b; @@ -812,11 +942,12 @@ EXPLAIN SELECT b FROM bar GROUP BY b; Group Key: b -> Dynamic Seq Scan on bar (cost=0.00..431.01 rows=334 width=4) Number of partitions to scan: 2 (out of 2) - Optimizer: Pivotal Optimizer (GPORCA) + Optimizer: GPORCA (6 rows) -- CLEANUP DROP TABLE IF EXISTS foo; +NOTICE: table "foo" does not exist, skipping DROP TABLE IF EXISTS bar; -- Test EXPLAIN ANALYZE on a partitioned table. There used to be a bug, where -- you got an internal error with this, because the EXPLAIN ANALYZE sends the @@ -837,24 +968,26 @@ EVERY ('2 mons'::interval) NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'oid' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. explain analyze select a.* from mpp8031 a, mpp8031 b where a.oid = b.oid; - QUERY PLAN -------------------------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=1 width=16) (actual time=2.533..2.533 rows=0 loops=1) - -> Hash Join (cost=0.00..862.00 rows=1 width=16) (never executed) +NOTICE: One or more columns in the following table(s) do not have statistics: mpp8031 +HINT: For non-partitioned tables, run analyze (). For partitioned tables, run analyze rootpartition (). See log for columns missing statistics. + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=1 width=16) (actual time=9.030..9.036 rows=0 loops=1) + -> Hash Join (cost=0.00..862.00 rows=1 width=16) (actual time=6.122..6.132 rows=0 loops=1) Hash Cond: (a.oid = b.oid) -> Dynamic Seq Scan on mpp8031 a (cost=0.00..431.00 rows=1 width=16) (never executed) Number of partitions to scan: 4 (out of 4) - -> Hash (cost=431.00..431.00 rows=1 width=4) (never executed) + -> Hash (cost=431.00..431.00 rows=1 width=4) (actual time=2.660..2.667 rows=0 loops=1) Buckets: 524288 Batches: 1 Memory Usage: 4096kB - -> Dynamic Seq Scan on mpp8031 b (cost=0.00..431.00 rows=1 width=4) (actual time=0.000..0.324 rows=0 loops=1) + -> Dynamic Seq Scan on mpp8031 b (cost=0.00..431.00 rows=1 width=4) (actual time=2.658..2.658 rows=0 loops=1) Number of partitions to scan: 4 (out of 4) Partitions scanned: Avg 4.0 x 3 workers. Max 4 parts (seg0). - Planning Time: 4.433 ms - (slice0) Executor memory: 31K bytes. - (slice1) Executor memory: 4156K bytes avg x 3 workers, 4156K bytes max (seg0). Work_mem: 4096K bytes max. + Planning Time: 47.729 ms + (slice0) Executor memory: 113K bytes. + (slice1) Executor memory: 4231K bytes avg x 3x(0) workers, 4231K bytes max (seg0). Work_mem: 4096K bytes max. Memory used: 128000kB - Optimizer: Pivotal Optimizer (GPORCA) - Execution Time: 3.231 ms + Optimizer: GPORCA + Execution Time: 11.130 ms (16 rows) drop table mpp8031; @@ -880,14 +1013,23 @@ SUBPARTITION TEMPLATE END (2015111100::numeric) WITH (appendonly=false) ); INSERT INTO part_tbl VALUES (2015111000, 479534741, 99999999); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multi-level partitioned tables INSERT INTO part_tbl VALUES (2015111000, 479534742, 99999999); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multi-level partitioned tables CREATE INDEX part_tbl_idx ON part_tbl(profile_key); +-- start_ignore +analyze part_tbl; +-- end_ignore EXPLAIN SELECT * FROM part_tbl WHERE profile_key = 99999999; - QUERY PLAN ----------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..2.07 rows=2 width=25) - -> Append (cost=0.00..2.03 rows=1 width=25) +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multi-level partitioned tables + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..2.11 rows=6 width=25) + -> Append (cost=0.00..2.03 rows=2 width=25) -> Seq Scan on part_tbl_1_prt_p20151110_2_prt_package5 part_tbl_1 (cost=0.00..1.01 rows=1 width=25) Filter: (profile_key = '99999999'::numeric) -> Seq Scan on part_tbl_1_prt_p20151110_2_prt_other_services part_tbl_2 (cost=0.00..1.01 rows=1 width=25) @@ -896,10 +1038,12 @@ EXPLAIN SELECT * FROM part_tbl WHERE profile_key = 99999999; (7 rows) SELECT * FROM part_tbl WHERE profile_key = 99999999; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multi-level partitioned tables time_client_key | ngin_service_key | profile_key -----------------+------------------+------------- - 2015111000 | 479534742 | 99999999 2015111000 | 479534741 | 99999999 + 2015111000 | 479534742 | 99999999 (2 rows) DROP TABLE part_tbl; @@ -928,6 +1072,8 @@ HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sur insert into r_part values (1,1), (2,2), (3,3), (4,4), (5,5), (6,6), (7,7), (8,8); -- following tests rely on the data distribution, verify them select gp_segment_id, * from r_part order by a,b; +NOTICE: One or more columns in the following table(s) do not have statistics: r_part +HINT: For non-partitioned tables, run analyze (). For partitioned tables, run analyze rootpartition (). See log for columns missing statistics. gp_segment_id | a | b ---------------+---+--- 1 | 1 | 1 @@ -944,8 +1090,8 @@ analyze r_part; explain select * from r_part r1, r_part r2 where r1.a=1; -- should eliminate partitions in the r1 copy of r_part QUERY PLAN --------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324032.53 rows=8 width=16) - -> Nested Loop (cost=0.00..1324032.53 rows=3 width=16) + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324032.54 rows=8 width=16) + -> Nested Loop (cost=0.00..1324032.54 rows=3 width=16) Join Filter: true -> Dynamic Seq Scan on r_part r2 (cost=0.00..431.00 rows=3 width=8) Number of partitions to scan: 9 (out of 9) @@ -954,7 +1100,7 @@ explain select * from r_part r1, r_part r2 where r1.a=1; -- should eliminate par -> Dynamic Seq Scan on r_part r1 (cost=0.00..431.00 rows=1 width=8) Number of partitions to scan: 1 (out of 9) Filter: (a = 1) - Optimizer: Pivotal Optimizer (GPORCA) + Optimizer: GPORCA (11 rows) -- the numbers in the filter should be both on segment 0 @@ -965,7 +1111,7 @@ explain select * from r_part where a in (7,8); -- should eliminate partitions -> Dynamic Seq Scan on r_part (cost=0.00..431.00 rows=1 width=8) Number of partitions to scan: 2 (out of 9) Filter: (a = ANY ('{7,8}'::integer[])) - Optimizer: Pivotal Optimizer (GPORCA) + Optimizer: GPORCA (5 rows) -- Test partition elimination in prepared statements @@ -999,7 +1145,7 @@ explain select * from r_part where a = 1 order by a,b; -- should eliminate parti -> Dynamic Seq Scan on r_part (cost=0.00..431.00 rows=1 width=8) Number of partitions to scan: 1 (out of 9) Filter: (a = 1) - Optimizer: Pivotal Optimizer (GPORCA) + Optimizer: GPORCA (8 rows) --force_explain @@ -1013,7 +1159,7 @@ explain execute f1(1); -- should eliminate partitions -> Dynamic Seq Scan on r_part (cost=0.00..431.00 rows=1 width=8) Number of partitions to scan: 1 (out of 9) Filter: (a = 1) - Optimizer: Pivotal Optimizer (GPORCA) + Optimizer: GPORCA (8 rows) --force_explain @@ -1027,7 +1173,7 @@ explain execute f2(2); -- should eliminate partitions -> Dynamic Seq Scan on r_part (cost=0.00..431.00 rows=1 width=8) Number of partitions to scan: 1 (out of 9) Filter: (a = 2) - Optimizer: Pivotal Optimizer (GPORCA) + Optimizer: GPORCA (8 rows) -- Test partition elimination on CO tables @@ -1040,7 +1186,7 @@ explain select * from r_co where a=2; -- should eliminate partitions -> Dynamic Seq Scan on r_co (cost=0.00..431.00 rows=1 width=8) Number of partitions to scan: 1 (out of 9) Filter: (a = 2) - Optimizer: Pivotal Optimizer (GPORCA) + Optimizer: GPORCA (5 rows) -- test partition elimination in prepared statements on CO tables @@ -1056,7 +1202,7 @@ explain execute f3(2); -- should eliminate partitions -> Dynamic Seq Scan on r_co (cost=0.00..431.00 rows=1 width=8) Number of partitions to scan: 1 (out of 9) Filter: (a = 2) - Optimizer: Pivotal Optimizer (GPORCA) + Optimizer: GPORCA (8 rows) -- start_ignore @@ -1084,57 +1230,67 @@ select '2009-01-02'::date = to_date('2009-01-02','YYYY-MM-DD'); -- ensure that b (1 row) explain select * from fact where dd < '2009-01-02'::date; -- partitions eliminated +NOTICE: One or more columns in the following table(s) do not have statistics: fact +HINT: For non-partitioned tables, run analyze (). For partitioned tables, run analyze rootpartition (). See log for columns missing statistics. QUERY PLAN ------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=16) -> Dynamic Seq Scan on fact (cost=0.00..431.00 rows=1 width=16) Number of partitions to scan: 1 (out of 4) Filter: (dd < '01-02-2009'::date) - Optimizer: Pivotal Optimizer (GPORCA) + Optimizer: GPORCA (5 rows) explain select * from fact where dd < to_date('2009-01-02','YYYY-MM-DD'); -- partitions eliminated +NOTICE: One or more columns in the following table(s) do not have statistics: fact +HINT: For non-partitioned tables, run analyze (). For partitioned tables, run analyze rootpartition (). See log for columns missing statistics. QUERY PLAN ------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=16) -> Dynamic Seq Scan on fact (cost=0.00..431.00 rows=1 width=16) Number of partitions to scan: 1 (out of 4) Filter: (dd < '01-02-2009'::date) - Optimizer: Pivotal Optimizer (GPORCA) + Optimizer: GPORCA (5 rows) explain select * from fact where dd < current_date; --partitions eliminated +NOTICE: One or more columns in the following table(s) do not have statistics: fact +HINT: For non-partitioned tables, run analyze (). For partitioned tables, run analyze rootpartition (). See log for columns missing statistics. QUERY PLAN ------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=16) -> Dynamic Seq Scan on fact (cost=0.00..431.00 rows=1 width=16) Number of partitions to scan: 1 (out of 4) - Filter: (dd < '09-30-2022'::date) - Optimizer: Pivotal Optimizer (GPORCA) + Filter: (dd < '04-10-2026'::date) + Optimizer: GPORCA (5 rows) -- Test partition elimination in prepared statements prepare f1(date) as select * from fact where dd < $1; -- force_explain explain execute f1('2009-01-02'::date); -- should eliminate partitions +NOTICE: One or more columns in the following table(s) do not have statistics: fact +HINT: For non-partitioned tables, run analyze (). For partitioned tables, run analyze rootpartition (). See log for columns missing statistics. QUERY PLAN ------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=16) -> Dynamic Seq Scan on fact (cost=0.00..431.00 rows=1 width=16) Number of partitions to scan: 1 (out of 4) Filter: (dd < '01-02-2009'::date) - Optimizer: Pivotal Optimizer (GPORCA) + Optimizer: GPORCA (5 rows) -- force_explain explain execute f1(to_date('2009-01-02', 'YYYY-MM-DD')); -- should eliminate partitions +NOTICE: One or more columns in the following table(s) do not have statistics: fact +HINT: For non-partitioned tables, run analyze (). For partitioned tables, run analyze rootpartition (). See log for columns missing statistics. QUERY PLAN ------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=16) -> Dynamic Seq Scan on fact (cost=0.00..431.00 rows=1 width=16) Number of partitions to scan: 1 (out of 4) Filter: (dd < '01-02-2009'::date) - Optimizer: Pivotal Optimizer (GPORCA) + Optimizer: GPORCA (5 rows) -- start_ignore @@ -1148,6 +1304,10 @@ create table mpp6247_bar (like mpp6247_foo); NOTICE: table doesn't have 'DISTRIBUTED BY' clause, defaulting to distribution columns from LIKE table -- EXPECT: Single HJ after partition elimination instead of sequence of HJ under Append select count_operator('delete from mpp6247_foo using mpp6247_bar where mpp6247_foo.c1 = mpp6247_bar.c1 and mpp6247_foo.dt = ''2009-05-03''', 'Hash Join'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables count_operator ---------------- 1 @@ -1155,10 +1315,112 @@ select count_operator('delete from mpp6247_foo using mpp6247_bar where mpp6247_f drop table mpp6247_bar; drop table mpp6247_foo; +-- Validate that basic DELETE on partition table with index functions properly +CREATE TABLE delete_from_indexed_pt (a int, b int) PARTITION BY RANGE(b) (START (0) END (7) EVERY (3)); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +CREATE INDEX index_delete_from_indexed_pt ON delete_from_indexed_pt USING bitmap(b); +INSERT INTO delete_from_indexed_pt SELECT i, i%6 FROM generate_series(1, 10)i; +EXPLAIN (COSTS OFF) DELETE FROM delete_from_indexed_pt WHERE b=1; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables + QUERY PLAN +----------------------------------------------------------------------------------- + Delete on delete_from_indexed_pt + Delete on delete_from_indexed_pt_1_prt_1 delete_from_indexed_pt_1 + -> Bitmap Heap Scan on delete_from_indexed_pt_1_prt_1 delete_from_indexed_pt_1 + Recheck Cond: (b = 1) + -> Bitmap Index Scan on delete_from_indexed_pt_1_prt_1_b_idx + Index Cond: (b = 1) + Optimizer: Postgres query optimizer +(7 rows) + +DELETE FROM delete_from_indexed_pt WHERE b=1; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables +SELECT * FROM delete_from_indexed_pt; +NOTICE: One or more columns in the following table(s) do not have statistics: delete_from_indexed_pt +HINT: For non-partitioned tables, run analyze (). For partitioned tables, run analyze rootpartition (). See log for columns missing statistics. + a | b +----+--- + 2 | 2 + 8 | 2 + 3 | 3 + 4 | 4 + 6 | 0 + 5 | 5 + 9 | 3 + 10 | 4 +(8 rows) + +-- Validate that basic DELETE on partition table using DPE functions properly +CREATE TABLE delete_from_pt (a int, b int) PARTITION BY RANGE(b) (START (0) END (7) EVERY (3)); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +CREATE TABLE t(a int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +INSERT INTO delete_from_pt SELECT i, i%6 FROM generate_series(1, 10)i; +INSERT INTO t VALUES (1); +ANALYZE delete_from_pt, t; +EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF, ANALYZE) DELETE FROM delete_from_pt WHERE b IN (SELECT b FROM delete_from_pt, t WHERE t.a=delete_from_pt.b); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------- + Delete on delete_from_pt (actual rows=0 loops=1) + Delete on delete_from_pt_1_prt_1 delete_from_pt_2 + Delete on delete_from_pt_1_prt_2 delete_from_pt_3 + Delete on delete_from_pt_1_prt_3 delete_from_pt_4 + -> Hash Semi Join (actual rows=1 loops=1) + Hash Cond: (delete_from_pt.b = t.a) + Extra Text: (seg0) Hash chain length 2.0 avg, 2 max, using 1 of 131072 buckets. + -> Append (actual rows=3 loops=1) + Partition Selectors: $1 + -> Seq Scan on delete_from_pt_1_prt_1 delete_from_pt_2 (actual rows=3 loops=1) + -> Seq Scan on delete_from_pt_1_prt_2 delete_from_pt_3 (never executed) + -> Seq Scan on delete_from_pt_1_prt_3 delete_from_pt_4 (never executed) + -> Hash (actual rows=2 loops=1) + Buckets: 131072 Batches: 1 Memory Usage: 1025kB + -> Partition Selector (selector id: $1) (actual rows=2 loops=1) + -> Broadcast Motion 3:3 (slice1; segments: 3) (actual rows=2 loops=1) + -> Hash Join (actual rows=1 loops=1) + Hash Cond: (delete_from_pt_1.b = t.a) + Extra Text: (seg0) Hash chain length 1.0 avg, 1 max, using 1 of 262144 buckets. + -> Append (actual rows=3 loops=1) + Partition Selectors: $2 + -> Seq Scan on delete_from_pt_1_prt_1 delete_from_pt_5 (actual rows=3 loops=1) + -> Seq Scan on delete_from_pt_1_prt_2 delete_from_pt_6 (never executed) + -> Seq Scan on delete_from_pt_1_prt_3 delete_from_pt_7 (never executed) + -> Hash (actual rows=1 loops=1) + Buckets: 262144 Batches: 1 Memory Usage: 2049kB + -> Partition Selector (selector id: $2) (actual rows=1 loops=1) + -> Broadcast Motion 3:3 (slice2; segments: 3) (actual rows=1 loops=1) + -> Seq Scan on t (actual rows=1 loops=1) + Optimizer: Postgres query optimizer +(30 rows) + +SELECT * FROM delete_from_pt order by a; + a | b +----+--- + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 0 + 8 | 2 + 9 | 3 + 10 | 4 +(8 rows) + +RESET optimizer_trace_fallback; -- CLEANUP -- start_ignore drop schema if exists bfv_partition_plans cascade; -NOTICE: drop cascades to 2 other objects +NOTICE: drop cascades to 5 other objects DETAIL: drop cascades to function count_operator(text,text) drop cascades to function find_operator(text,text) +drop cascades to table delete_from_indexed_pt +drop cascades to table delete_from_pt +drop cascades to table t -- end_ignore diff --git a/contrib/pax_storage/src/test/regress/expected/bfv_planner_optimizer.out b/contrib/pax_storage/src/test/regress/expected/bfv_planner_optimizer.out index fe696b39e0e..d72103210ec 100644 --- a/contrib/pax_storage/src/test/regress/expected/bfv_planner_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/bfv_planner_optimizer.out @@ -805,6 +805,32 @@ drop table t2_12146; drop table t3_12146; drop table t4_12146; reset allow_system_table_mods; +-- +-- test https://github.com/apache/cloudberry/issues/593 +-- +CREATE TABLE t0_issue_593(c0 bigserial PRIMARY KEY) USING heap WITH (autovacuum_vacuum_threshold=1468046284, autovacuum_analyze_threshold=1889118206, autovacuum_vacuum_cost_delay=9, fillfactor=25, autovacuum_freeze_max_age=1860760049, autovacuum_enabled=0, autovacuum_freeze_min_age=402702412, autovacuum_vacuum_cost_limit=2500); +CREATE TABLE IF NOT EXISTS t1_issue_593(LIKE t0_issue_593); +CREATE TABLE IF NOT EXISTS t2_issue_593(LIKE t0_issue_593 INCLUDING INDEXES); +CREATE UNLOGGED TABLE IF NOT EXISTS t3_issue_593(LIKE t2_issue_593); +CREATE TEMPORARY TABLE IF NOT EXISTS t4_issue_593(LIKE t3_issue_593); +SELECT '100.147.127.36' FROM t1_issue_593, ONLY t2_issue_593, t4_issue_593* + CROSS JOIN t0_issue_593* CROSS JOIN ONLY t3_issue_593 GROUP BY pg_jit_available() + HAVING inet_same_family('148.199.107.23', '214.26.36.61') UNION ALL + SELECT '100.147.127.36' FROM t1_issue_593*, ONLY t2_issue_593, t4_issue_593* + CROSS JOIN t0_issue_593* CROSS JOIN ONLY t3_issue_593 GROUP BY pg_jit_available() + HAVING NOT (inet_same_family('148.199.107.23', '214.26.36.61')) UNION ALL + SELECT '100.147.127.36' FROM t1_issue_593*, ONLY t2_issue_593, t4_issue_593* + CROSS JOIN t0_issue_593 CROSS JOIN ONLY t3_issue_593 GROUP BY pg_jit_available() + HAVING (inet_same_family('148.199.107.23', '214.26.36.61')) ISNULL; + ?column? +---------- +(0 rows) + +drop table t0_issue_593; +drop table t1_issue_593; +drop table t2_issue_593; +drop table t3_issue_593; +drop table t4_issue_593; -- start_ignore drop table if exists bfv_planner_x; drop table if exists testbadsql; diff --git a/contrib/pax_storage/src/test/regress/expected/bfv_statistic_optimizer.out b/contrib/pax_storage/src/test/regress/expected/bfv_statistic_optimizer.out index 4ebd43e4e02..a1eaae70eb6 100644 --- a/contrib/pax_storage/src/test/regress/expected/bfv_statistic_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/bfv_statistic_optimizer.out @@ -465,3 +465,211 @@ where tablename = 'uniformtest'; 40-60 | <= 5 | >= 95 (1 row) +-- ORCA: Test previous scenario with duplicate memo groups running multiple +-- xforms that need stats before applying and reset after applying. It +-- used to be that this scenario could lead to SIGSEGV where stats were +-- reset and were not re-derived between applying the xforms. +SET optimizer_join_order=exhaustive; +SET optimizer_trace_fallback=on; +CREATE TABLE duplicate_memo_group_test_t1 (c11 varchar, c12 integer) DISTRIBUTED BY (c11); +CREATE TABLE duplicate_memo_group_test_t2 (c2 varchar) DISTRIBUTED BY (c2); +CREATE TABLE duplicate_memo_group_test_t3 (c3 varchar) DISTRIBUTED BY (c3); +INSERT INTO duplicate_memo_group_test_t1 SELECT 'something', generate_series(1,900); +INSERT INTO duplicate_memo_group_test_t2 SELECT generate_series(1,900); +ANALYZE duplicate_memo_group_test_t1, duplicate_memo_group_test_t2; +SELECT + (SELECT c11 FROM duplicate_memo_group_test_t1 WHERE c12 = 100) AS column1, + (SELECT sum(c12) +FROM duplicate_memo_group_test_t1 + INNER JOIN duplicate_memo_group_test_t2 ON c11 = c2 + INNER JOIN duplicate_memo_group_test_t3 ON c2 = c3 + INNER JOIN duplicate_memo_group_test_t3 a1 on a1.c3 = a2.c3 + LEFT OUTER JOIN duplicate_memo_group_test_t3 a3 ON a1.c3 = a3.c3 + LEFT OUTER JOIN duplicate_memo_group_test_t3 a4 ON a1.c3 = a4.c3 +) AS column2 +FROM duplicate_memo_group_test_t3 a2; + column1 | column2 +---------+--------- +(0 rows) + +-- Tests ORCA coverage for time-related cross-type stats calculation +-- +-- Previously, ORCA didn't support stats calculation for time-related +-- cross-type predicates. It used default scale factor for cardinality +-- estimate, that could sometimes be off by a few orders of magnitude, +-- thence affecting plan performance. This was because date type was +-- converted to int internally, whereas other time-related types were +-- converted to double. +-- +-- Using int for date type allows an equality predicate that only +-- involves the date type to be always viewed as a singleton, rather +-- than a range in double in ORCA's constraint framework. This provided +-- convenience of implementing stats derivation. However, such choice +-- prevented ORCA from deriving stats from predicates that involve both +-- date type and other time-related types. Now, in an attempt of +-- supporting cross-type stats calcualtion, we convert date type to +-- double as well. +-- +-- Test filter stats derivation in table scans +drop table if exists t1, t2; +NOTICE: table "t2" does not exist, skipping +create table t1 (a int, b date); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +create table t2 (a int, b date); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +insert into t1 select i, j::date from generate_series(1, 10) i, generate_series('2015-01-01','2021-12-31', '1 day'::interval) j; +insert into t2 select i, j::date from generate_series(1, 10) i, generate_series('2021-01-01','2021-12-31', '1 day'::interval) j; +analyze t1, t2; +-- The following two queries should generate the same plan, now that +-- we support time-related cross-type stats calculation. ORCA should +-- derive the same stats for t1 (small subset of the total) based on +-- predicates on t1.b. Prior to this commit, the date-timestamp cross +-- type predicates used in the following queries yielded a cardinality +-- estimate in the order of 3000. +-- +-- inequality predicates: +explain select * from t1, t2 where t1.a = t2.a and t1.b < '2015-01-05'::date; + QUERY PLAN +----------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..863.91 rows=14933 width=16) + -> Hash Join (cost=0.00..863.02 rows=4978 width=16) + Hash Cond: (t2.a = t1.a) + -> Seq Scan on t2 (cost=0.00..431.03 rows=1217 width=8) + -> Hash (cost=431.48..431.48 rows=14 width=8) + -> Seq Scan on t1 (cost=0.00..431.48 rows=14 width=8) + Filter: (b < '01-05-2015'::date) + Optimizer: GPORCA +(8 rows) + +explain select * from t1, t2 where t1.a = t2.a and t1.b < '2015-01-05'::timestamp; + QUERY PLAN +------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..863.91 rows=14933 width=16) + -> Hash Join (cost=0.00..863.02 rows=4978 width=16) + Hash Cond: (t2.a = t1.a) + -> Seq Scan on t2 (cost=0.00..431.03 rows=1217 width=8) + -> Hash (cost=431.48..431.48 rows=14 width=8) + -> Seq Scan on t1 (cost=0.00..431.48 rows=14 width=8) + Filter: (b < 'Mon Jan 05 00:00:00 2015'::timestamp without time zone) + Optimizer: GPORCA +(8 rows) + +-- equality predicates: +explain select * from t1, t2 where t1.a = t2.a and t1.b = '2015-01-05'::date; + QUERY PLAN +---------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..863.03 rows=3650 width=16) + -> Hash Join (cost=0.00..862.81 rows=1217 width=16) + Hash Cond: (t2.a = t1.a) + -> Seq Scan on t2 (cost=0.00..431.03 rows=1217 width=8) + -> Hash (cost=431.48..431.48 rows=4 width=8) + -> Seq Scan on t1 (cost=0.00..431.48 rows=4 width=8) + Filter: (b = '01-05-2015'::date) + Optimizer: GPORCA +(8 rows) + +explain select * from t1, t2 where t1.a = t2.a and t1.b = '2015-01-05'::timestamp; + QUERY PLAN +------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..863.03 rows=3650 width=16) + -> Hash Join (cost=0.00..862.81 rows=1217 width=16) + Hash Cond: (t2.a = t1.a) + -> Seq Scan on t2 (cost=0.00..431.03 rows=1217 width=8) + -> Hash (cost=431.48..431.48 rows=4 width=8) + -> Seq Scan on t1 (cost=0.00..431.48 rows=4 width=8) + Filter: (b = 'Mon Jan 05 00:00:00 2015'::timestamp without time zone) + Optimizer: GPORCA +(8 rows) + +-- Test filter stats derivation in dynamic table scans +drop table if exists t1, t2; +create table t1 (a int, b date) +partition by range (b) ( + start (date '2015-01-01') end (date '2021-01-01') every (interval '1' year), + default partition d); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +create table t2 (a int, b date); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +insert into t1 select i, j::date from generate_series(1, 10) i, generate_series('2015-01-01','2021-12-31', '1 day'::interval) j; +insert into t2 select i, j::date from generate_series(1, 10) i, generate_series('2015-01-01','2021-12-31', '1 day'::interval) j; +analyze t1, t2; +-- The following two queries should generate the same plan, now that +-- we support time-related cross-type comparison. ORCA should derive +-- the same stats for t1 (small number of partitions) and t2 (small +-- subset of the total) based on the predicates and allow DPE. Prior +-- to this commit, the date-timestamp cross-type predicates used in +-- the following queries yielded a cardinality estimate in the order +-- of 500~1000. Consequently, the partition selector wasn't propagated. +-- +-- inequality predicates (2 out of 7 partitions): +explain select * from t1, t2 where t1.a = t2.a and t1.b = t2.b and t1.b < '2015-01-05'::date; + QUERY PLAN +--------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.49 rows=80 width=16) + -> Hash Join (cost=0.00..862.49 rows=27 width=16) + Hash Cond: ((t1.a = t2.a) AND (t1.b = t2.b)) + -> Dynamic Seq Scan on t1 (cost=0.00..431.00 rows=14 width=8) + Number of partitions to scan: 2 (out of 7) + Filter: (b < '01-05-2015'::date) + -> Hash (cost=431.48..431.48 rows=14 width=8) + -> Partition Selector (selector id: $0) (cost=0.00..431.48 rows=14 width=8) + -> Seq Scan on t2 (cost=0.00..431.48 rows=14 width=8) + Filter: (b < '01-05-2015'::date) + Optimizer: GPORCA +(11 rows) + +explain select * from t1, t2 where t1.a = t2.a and t1.b = t2.b and t1.b < '2015-01-05'::timestamp; + QUERY PLAN +------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.49 rows=80 width=16) + -> Hash Join (cost=0.00..862.49 rows=27 width=16) + Hash Cond: ((t1.a = t2.a) AND (t1.b = t2.b)) + -> Dynamic Seq Scan on t1 (cost=0.00..431.00 rows=14 width=8) + Number of partitions to scan: 2 (out of 7) + Filter: (b < 'Mon Jan 05 00:00:00 2015'::timestamp without time zone) + -> Hash (cost=431.48..431.48 rows=14 width=8) + -> Partition Selector (selector id: $0) (cost=0.00..431.48 rows=14 width=8) + -> Seq Scan on t2 (cost=0.00..431.48 rows=14 width=8) + Filter: (b < 'Mon Jan 05 00:00:00 2015'::timestamp without time zone) + Optimizer: GPORCA +(11 rows) + +-- equality predicates (1 out of 7 partitions): +explain select * from t1, t2 where t1.a = t2.a and t1.b = t2.b and t1.b = '2015-01-05'::date; + QUERY PLAN +-------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.48 rows=10 width=16) + -> Hash Join (cost=0.00..862.48 rows=4 width=16) + Hash Cond: ((t1.a = t2.a) AND (t1.b = t2.b)) + -> Dynamic Seq Scan on t1 (cost=0.00..431.00 rows=4 width=8) + Number of partitions to scan: 1 (out of 7) + Filter: (b = '01-05-2015'::date) + -> Hash (cost=431.48..431.48 rows=4 width=8) + -> Partition Selector (selector id: $0) (cost=0.00..431.48 rows=4 width=8) + -> Seq Scan on t2 (cost=0.00..431.48 rows=4 width=8) + Filter: (b = '01-05-2015'::date) + Optimizer: GPORCA +(11 rows) + +explain select * from t1, t2 where t1.a = t2.a and t1.b = t2.b and t1.b = '2015-01-05'::timestamp; + QUERY PLAN +------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.48 rows=10 width=16) + -> Hash Join (cost=0.00..862.48 rows=4 width=16) + Hash Cond: ((t1.a = t2.a) AND (t1.b = t2.b)) + -> Dynamic Seq Scan on t1 (cost=0.00..431.00 rows=4 width=8) + Number of partitions to scan: 1 (out of 7) + Filter: (b = 'Mon Jan 05 00:00:00 2015'::timestamp without time zone) + -> Hash (cost=431.48..431.48 rows=4 width=8) + -> Partition Selector (selector id: $0) (cost=0.00..431.48 rows=4 width=8) + -> Seq Scan on t2 (cost=0.00..431.48 rows=4 width=8) + Filter: (b = 'Mon Jan 05 00:00:00 2015'::timestamp without time zone) + Optimizer: GPORCA +(11 rows) + +RESET optimizer_join_order; +RESET optimizer_trace_fallback; diff --git a/contrib/pax_storage/src/test/regress/expected/bfv_subquery_optimizer.out b/contrib/pax_storage/src/test/regress/expected/bfv_subquery_optimizer.out index 2b879266008..dff682bd4a5 100644 --- a/contrib/pax_storage/src/test/regress/expected/bfv_subquery_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/bfv_subquery_optimizer.out @@ -508,3 +508,101 @@ select count(*) from pg_backend_pid() b(a) where b.a % 100000 in (select a from (1 row) drop table t1; +-- Test filter of RESULT node with a LIMIT parent +-- Historically, when ORCA generates a RESULT node with a LIMIT parent, +-- the parent node's tuple bound is pushed down to the RESULT node's +-- child node. This could cause the query to return a subset of the +-- actual result, if the RESULT node has a filter. This is because the +-- tuple bound was applied before the filter. +-- Now, we allow tuple bound push down only if the RESULT node DOES NOT +-- have a filter. +-- start_ignore +drop table if exists with_test1; +NOTICE: table "with_test1" does not exist, skipping +drop table if exists with_test2; +NOTICE: table "with_test2" does not exist, skipping +create table with_test1 (i int, value int) distributed by (i); +insert into with_test1 select i%10, i%30 from generate_series(0, 99) i; +create table with_test2 (i int, value int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +insert into with_test2 select i%100, i%300 from generate_series(0, 999) i; +-- end_ignore +with my_group_sum(i, total) as (select i, sum(value) from with_test1 group by i) +select with_test2.* from with_test2 +where value < all (select total from my_group_sum where my_group_sum.i = with_test2.i) +order by 1,2 +limit 15; + i | value +---+------- + 0 | 0 + 0 | 0 + 0 | 0 + 0 | 0 + 1 | 1 + 1 | 1 + 1 | 1 + 1 | 1 + 2 | 2 + 2 | 2 + 2 | 2 + 2 | 2 + 2 | 102 + 2 | 102 + 2 | 102 +(15 rows) + +-- Test case for Issue 15794, 15767 and 15793 +create table t_15767 (c0 int, c1 int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c0' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +insert into t_15767 values(1,0),(2,1); +select max(c0) from t_15767 +union all +select max(c0) from t_15767 +group by 1*t_15767.c0; + max +----- + 2 + 2 + 1 +(3 rows) + +drop table t_15767; +create table t2_15794( + id integer, + x double precision, + y double precision, + position double precision[] +); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +insert into t2_15794 values (1,1,1,array[1,1]); +insert into t2_15794 values (2,2,2,array[2,2]); +select array_agg(length) from ( +select ( +array_upper( position, 1) +- array_lower( position, 1) + 1 +) as length, +array_lower( position, 1) as lower +from t2_15794 +group by length, lower) t; + array_agg +----------- + {2} +(1 row) + +drop table t2_15794; +create table t1_15793 (c0 int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c0' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +create table t2_15793 (c0 int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c0' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +select * from t1_15793 cross join t2_15793 where not ((t1_15793.c0)+(t1_15793.c0)!=(t2_15793.c0)); + c0 | c0 +----+---- +(0 rows) + +drop table t1_15793; +drop table t2_15793; diff --git a/contrib/pax_storage/src/test/regress/expected/bitmap_index_optimizer.out b/contrib/pax_storage/src/test/regress/expected/bitmap_index_optimizer.out index c73f117ea38..1f55aff1462 100644 --- a/contrib/pax_storage/src/test/regress/expected/bitmap_index_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/bitmap_index_optimizer.out @@ -1,3 +1,5 @@ +create extension if not exists gp_inject_fault; +NOTICE: extension "gp_inject_fault" already exists, skipping SET enable_seqscan = OFF; SET enable_indexscan = ON; SET enable_bitmapscan = ON; @@ -64,25 +66,25 @@ select * from bm_test where i=5 and t='5'; select * from bm_test where i=5 or t='6'; i | t ---+--- - 5 | 5 - 5 | 5 - 5 | 5 - 5 | 5 - 5 | 5 - 5 | 5 - 5 | 5 - 5 | 5 - 5 | 5 5 | 5 6 | 6 + 5 | 5 6 | 6 + 5 | 5 6 | 6 + 5 | 5 6 | 6 + 5 | 5 6 | 6 + 5 | 5 6 | 6 + 5 | 5 6 | 6 + 5 | 5 6 | 6 + 5 | 5 6 | 6 + 5 | 5 6 | 6 (20 rows) @@ -96,6 +98,31 @@ select * from bm_test where i=5 or t='6'; select * from bm_test where i=5 or t='1'; i | t +---+--- + 5 | 5 + 5 | 5 + 5 | 5 + 5 | 5 + 5 | 5 + 5 | 5 + 5 | 5 + 5 | 5 + 5 | 5 + 5 | 5 + 1 | 1 + 1 | 1 + 1 | 1 + 1 | 1 + 1 | 1 + 1 | 1 + 1 | 1 + 1 | 1 + 1 | 1 + 1 | 1 +(20 rows) + +select * from bm_test where i between 1 and 10 and i::text = t; + i | t ---+--- 1 | 1 1 | 1 @@ -108,110 +135,85 @@ select * from bm_test where i=5 or t='1'; 1 | 1 1 | 1 5 | 5 + 6 | 6 + 9 | 9 5 | 5 + 6 | 6 + 9 | 9 5 | 5 + 6 | 6 + 9 | 9 5 | 5 + 6 | 6 + 9 | 9 5 | 5 + 6 | 6 + 9 | 9 5 | 5 + 6 | 6 + 9 | 9 5 | 5 + 6 | 6 + 9 | 9 5 | 5 + 6 | 6 + 9 | 9 5 | 5 + 6 | 6 + 9 | 9 5 | 5 -(20 rows) - -select * from bm_test where i between 1 and 10 and i::text = t; - i | t ----+--- - 2 | 2 - 4 | 4 6 | 6 - 8 | 8 + 9 | 9 2 | 2 + 3 | 3 4 | 4 - 6 | 6 + 7 | 7 8 | 8 2 | 2 + 3 | 3 4 | 4 - 6 | 6 + 7 | 7 8 | 8 2 | 2 + 3 | 3 4 | 4 - 6 | 6 + 7 | 7 8 | 8 2 | 2 + 3 | 3 4 | 4 - 6 | 6 + 7 | 7 8 | 8 2 | 2 + 3 | 3 4 | 4 - 6 | 6 + 7 | 7 8 | 8 2 | 2 + 3 | 3 4 | 4 - 6 | 6 + 7 | 7 8 | 8 2 | 2 + 3 | 3 4 | 4 - 6 | 6 + 7 | 7 8 | 8 2 | 2 + 3 | 3 4 | 4 - 6 | 6 + 7 | 7 8 | 8 2 | 2 - 4 | 4 - 6 | 6 - 8 | 8 - 1 | 1 - 3 | 3 - 5 | 5 - 7 | 7 - 9 | 9 - 1 | 1 3 | 3 - 5 | 5 - 7 | 7 - 9 | 9 - 1 | 1 - 3 | 3 - 5 | 5 - 7 | 7 - 9 | 9 - 1 | 1 - 3 | 3 - 5 | 5 - 7 | 7 - 9 | 9 - 1 | 1 - 3 | 3 - 5 | 5 - 7 | 7 - 9 | 9 - 1 | 1 - 3 | 3 - 5 | 5 - 7 | 7 - 9 | 9 - 1 | 1 - 3 | 3 - 5 | 5 - 7 | 7 - 9 | 9 - 1 | 1 - 3 | 3 - 5 | 5 - 7 | 7 - 9 | 9 - 1 | 1 - 3 | 3 - 5 | 5 + 4 | 4 7 | 7 - 9 | 9 - 1 | 1 + 8 | 8 + 2 | 2 3 | 3 - 5 | 5 + 4 | 4 7 | 7 - 9 | 9 + 8 | 8 (90 rows) drop table bm_test; @@ -249,22 +251,22 @@ create index bm_n_null_idx on bm_test using bitmap(n) WHERE n ISNULL; select a.t from bm_test a, bm_test b where a.i2 = b.i2; t -------------------------- - Tue Jan 02 01:01:01 2007 Mon Jan 01 01:01:01 2007 + Tue Jan 02 01:01:01 2007 (2 rows) select a.t from bm_test a, bm_test b where a.i2 = b.i4; t -------------------------- - Mon Jan 01 01:01:01 2007 Tue Jan 02 01:01:01 2007 + Mon Jan 01 01:01:01 2007 (2 rows) select a.t from bm_test a, bm_test b where a.i2 = b.i8; t -------------------------- - Mon Jan 01 01:01:01 2007 Tue Jan 02 01:01:01 2007 + Mon Jan 01 01:01:01 2007 (2 rows) select a.t from bm_test a, bm_test b where b.f4 = a.f8 and a.f8 = '2.0'; @@ -290,8 +292,8 @@ select a.t from bm_test a, bm_test b where a.ip < b.ip; select a.t from bm_test a, bm_test b where a.ip=b.ip OR a.b = b.b; t -------------------------- - Mon Jan 01 01:01:01 2007 Tue Jan 02 01:01:01 2007 + Mon Jan 01 01:01:01 2007 (2 rows) -- and @@ -335,8 +337,8 @@ select * from bm_test where i4=3; i2 | i4 | i8 | f4 | f8 | n | t1 | t2 | t3 | a | ip | b | t | d | g ----+----+----+----+----+----------+----+-----+-----+-------+-----------+------+--------------------------+------------+--- 1 | 3 | 1 | 1 | 1 | 1000.333 | 1 | 1 | 1 | {1,3} | 127.0.0.1 | \x01 | Mon Jan 01 01:01:01 2007 | 01-01-2007 | t - | 3 | | | | | | | | | | | | | 2 | 3 | 2 | 2 | 2 | 2000.333 | 2 | 2 | foo | {2,6} | 127.0.0.2 | \x02 | Tue Jan 02 01:01:01 2007 | 01-02-2007 | f + | 3 | | | | | | | | | | | | | (3 rows) -- should return one row @@ -479,12 +481,12 @@ set optimizer_enable_bitmapscan=off; -- Known_opt_diff: MPP-19808 -- end_ignore explain select * from bm_test where j = 1; - QUERY PLAN ------------------------------------------------------------------------------------------ - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..2.03 rows=1 width=8) - -> Seq Scan on bm_test (cost=0.00..1.03 rows=1 width=8) - Filter: j = 1 - Settings: enable_bitmapscan=off; enable_indexscan=on; enable_seqscan=off; optimizer=on + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=2 width=8) + -> Seq Scan on bm_test (cost=0.00..431.00 rows=1 width=8) + Filter: (j = 1) + Optimizer: GPORCA (4 rows) select * from bm_test where j = 1; @@ -514,10 +516,10 @@ insert into ijk values (1, NULL, NULL); insert into ijk values (1, NULL, NULL); -- should fail. create unique index ijk_i on ijk(i); -ERROR: could not create unique index "ijk_i" +ERROR: could not create unique index "ijk_i" (seg1 127.0.0.1:7003 pid=1478588) DETAIL: Key (i)=(1) is duplicated. create unique index ijk_ij on ijk(i,j); -ERROR: could not create unique index "ijk_ij" +ERROR: could not create unique index "ijk_ij" (seg1 127.0.0.1:7003 pid=1478588) DETAIL: Key (i, j)=(1, 3) is duplicated. -- should OK. create unique index ijk_ijk on ijk(i,j,k); @@ -531,10 +533,10 @@ insert into ijk values (1, NULL, NULL); insert into ijk values (1, NULL, NULL); -- should fail. create unique index ijk_i on ijk(i); -ERROR: could not create unique index "ijk_i" +ERROR: could not create unique index "ijk_i" (seg1 127.0.0.1:7003 pid=1478588) DETAIL: Key (i)=(1) is duplicated. create unique index ijk_ij on ijk(i,j); -ERROR: could not create unique index "ijk_ij" +ERROR: could not create unique index "ijk_ij" (seg1 127.0.0.1:7003 pid=1478588) DETAIL: Key (i, j)=(1, 3) is duplicated. -- should OK. create unique index ijk_ijk on ijk(i,j,k); @@ -571,6 +573,8 @@ drop table bmap_test; -- Test over-sized values -- create table oversize_test (c1 text); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE INDEX oversize_test_idx ON oversize_test USING BITMAP (c1); insert into oversize_test values ('a'); select * from oversize_test; @@ -581,7 +585,7 @@ select * from oversize_test; -- this fails, because the value is too large insert into oversize_test values (array_to_string(array(select generate_series(1, 10000)), '123456789')); -ERROR: row is too big: size 33256, maximum size 32736 (seg2 127.0.0.1:40002 pid=5270) +ERROR: row is too big: size 33256, maximum size 32736 (seg2 127.0.0.1:7004 pid=1478589) set enable_seqscan=off; select * from oversize_test where c1 < 'z'; c1 @@ -594,7 +598,7 @@ select * from oversize_test where c1 < 'z'; drop index oversize_test_idx; insert into oversize_test values (array_to_string(array(select generate_series(1, 10000)), '123456789')); CREATE INDEX oversize_test_idx ON oversize_test USING BITMAP (c1); -ERROR: row is too big: size 33256, maximum size 32736 (seg2 172.17.0.2:25434 pid=391078) +ERROR: row is too big: size 33256, maximum size 32736 (seg2 127.0.0.1:7004 pid=1478589) -- -- Test Index Only Scans. -- @@ -617,7 +621,7 @@ explain (costs off) select i, t from bm_indexonly_test where i = 1; Gather Motion 1:1 (slice1; segments: 1) -> Seq Scan on bm_indexonly_test Filter: (i = 1) - Optimizer: Pivotal Optimizer (GPORCA) + Optimizer: GPORCA (4 rows) select i, t from bm_indexonly_test where i = 1; @@ -633,7 +637,7 @@ explain (costs off) select 'foobar' from bm_indexonly_test; Result -> Gather Motion 3:1 (slice1; segments: 3) -> Seq Scan on bm_indexonly_test - Optimizer: Pivotal Optimizer (GPORCA) + Optimizer: GPORCA (4 rows) select 'foobar' from bm_indexonly_test; @@ -661,12 +665,12 @@ analyze unlogged_test; explain select * from unlogged_test where c1 = 100; QUERY PLAN -------------------------------------------------------------------------------------- - Gather Motion 1:1 (slice1; segments: 1) (cost=0.00..204.38 rows=1 width=4) - -> Bitmap Heap Scan on unlogged_test (cost=0.00..204.38 rows=1 width=4) - Recheck Cond: c1 = 100 + Gather Motion 1:1 (slice1; segments: 1) (cost=0.00..391.30 rows=1 width=4) + -> Bitmap Heap Scan on unlogged_test (cost=0.00..391.30 rows=1 width=4) + Recheck Cond: (c1 = 100) -> Bitmap Index Scan on unlogged_test_idx (cost=0.00..0.00 rows=0 width=0) - Index Cond: c1 = 100 - Optimizer: Pivotal Optimizer (GPORCA) version 2.70.0 + Index Cond: (c1 = 100) + Optimizer: GPORCA (6 rows) select * from unlogged_test where c1 = 100; @@ -709,8 +713,12 @@ INSERT INTO bm_test_insert SELECT generate_series (1, 10000); UPDATE bm_test_update SET b=b+1; -- trigger recovery on primaries SELECT gp_inject_fault_infinite('finish_prepared_after_record_commit_prepared', 'panic', dbid) FROM gp_segment_configuration WHERE role = 'p' AND content > -1; +WARNING: consider disabling FTS probes while injecting a panic. (seg0 127.0.0.1:7002 pid=1486134) +HINT: Inject an infinite 'skip' into the 'fts_probe' fault to disable FTS probing. +WARNING: consider disabling FTS probes while injecting a panic. (seg1 127.0.0.1:7003 pid=1486135) +HINT: Inject an infinite 'skip' into the 'fts_probe' fault to disable FTS probing. +WARNING: consider disabling FTS probes while injecting a panic. (seg2 127.0.0.1:7004 pid=1486136) HINT: Inject an infinite 'skip' into the 'fts_probe' fault to disable FTS probing. -WARNING: consider disabling FTS probes while injecting a panic. gp_inject_fault_infinite -------------------------- Success: @@ -721,6 +729,12 @@ WARNING: consider disabling FTS probes while injecting a panic. SET client_min_messages='ERROR'; CREATE TABLE trigger_recovery_on_primaries(c int); RESET client_min_messages; +SELECT pg_sleep(2); + pg_sleep +---------- + +(1 row) + -- reconnect to the database after restart \c SELECT gp_inject_fault('checkpoint', 'reset', dbid) FROM gp_segment_configuration WHERE role = 'p' AND content > -1; @@ -731,6 +745,12 @@ SELECT gp_inject_fault('checkpoint', 'reset', dbid) FROM gp_segment_configuratio Success: (3 rows) +SELECT pg_sleep(2); + pg_sleep +---------- + +(1 row) + SELECT gp_inject_fault('finish_prepared_after_record_commit_prepared', 'reset', dbid) FROM gp_segment_configuration WHERE role = 'p' AND content > -1; gp_inject_fault ----------------- @@ -813,42 +833,42 @@ CREATE INDEX ON test_bmselec USING bitmap(type); ANALYZE test_bmselec; -- it used to choose bitmap index over seq scan, which not right. explain (analyze, verbose) select * from test_bmselec where type < 500; - QUERY PLAN -------------------------------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..434.28 rows=4943 width=41) (actual time=0.435..7.910 rows=5000 loops=1) + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..434.37 rows=5036 width=41) (actual time=5.213..47.845 rows=5000 loops=1) Output: id, type, msg - -> Seq Scan on public.test_bmselec (cost=0.00..433.52 rows=1648 width=41) (actual time=0.030..5.213 rows=1693 loops=1) + -> Seq Scan on public.test_bmselec (cost=0.00..433.60 rows=1679 width=41) (actual time=4.161..44.551 rows=1693 loops=1) Output: id, type, msg Filter: (test_bmselec.type < 500) Rows Removed by Filter: 31769 - Planning Time: 49.134 ms - (slice0) Executor memory: 36K bytes. - (slice1) Executor memory: 36K bytes avg x 3 workers, 36K bytes max (seg0). + Settings: optimizer = 'on', enable_seqscan = 'on', enable_indexscan = 'on', enable_bitmapscan = 'on' + Planning Time: 22.078 ms + (slice0) Executor memory: 111K bytes. + (slice1) Executor memory: 116K bytes avg x 3x(0) workers, 116K bytes max (seg0). Memory used: 128000kB - Optimizer: Pivotal Optimizer (GPORCA) - Settings: enable_bitmapscan=on, enable_indexscan=on, enable_seqscan=on - Execution Time: 8.972 ms + Optimizer: GPORCA + Execution Time: 49.359 ms (13 rows) SET enable_seqscan = OFF; SET enable_bitmapscan = OFF; -- we can see the bitmap index scan is much more slower explain (analyze, verbose) select * from test_bmselec where type < 500; - QUERY PLAN -------------------------------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..434.28 rows=4943 width=41) (actual time=0.439..8.396 rows=5000 loops=1) + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..434.37 rows=5036 width=41) (actual time=4.827..78.146 rows=5000 loops=1) Output: id, type, msg - -> Seq Scan on public.test_bmselec (cost=0.00..433.52 rows=1648 width=41) (actual time=0.032..6.750 rows=1693 loops=1) + -> Seq Scan on public.test_bmselec (cost=0.00..433.60 rows=1679 width=41) (actual time=3.813..75.796 rows=1693 loops=1) Output: id, type, msg Filter: (test_bmselec.type < 500) Rows Removed by Filter: 31769 - Planning Time: 4.239 ms - (slice0) Executor memory: 36K bytes. - (slice1) Executor memory: 36K bytes avg x 3 workers, 36K bytes max (seg0). + Settings: optimizer = 'on', enable_seqscan = 'off', enable_indexscan = 'on', enable_bitmapscan = 'off' + Planning Time: 16.935 ms + (slice0) Executor memory: 111K bytes. + (slice1) Executor memory: 115K bytes avg x 3x(0) workers, 115K bytes max (seg0). Memory used: 128000kB - Optimizer: Pivotal Optimizer (GPORCA) - Settings: enable_bitmapscan=off, enable_indexscan=on, enable_seqscan=off - Execution Time: 9.222 ms + Optimizer: GPORCA + Execution Time: 79.766 ms (13 rows) DROP TABLE test_bmselec; @@ -865,19 +885,19 @@ ANALYZE test_bmsparse; explain (analyze, verbose) select * from test_bmsparse where type < 200; QUERY PLAN ---------------------------------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..447.65 rows=79956 width=41) (actual time=0.888..24.753 rows=80400 loops=1) + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..447.70 rows=79847 width=41) (actual time=8.800..74.413 rows=80400 loops=1) Output: id, type, msg - -> Seq Scan on public.test_bmsparse (cost=0.00..435.43 rows=26652 width=41) (actual time=0.037..7.395 rows=26975 loops=1) + -> Seq Scan on public.test_bmsparse (cost=0.00..435.50 rows=26616 width=41) (actual time=1.257..56.639 rows=26975 loops=1) Output: id, type, msg Filter: (test_bmsparse.type < 200) Rows Removed by Filter: 6596 - Planning Time: 161.423 ms - (slice0) Executor memory: 36K bytes. - (slice1) Executor memory: 36K bytes avg x 3 workers, 36K bytes max (seg0). + Settings: optimizer = 'on', enable_seqscan = 'on', enable_indexscan = 'on', enable_bitmapscan = 'on' + Planning Time: 26.760 ms + (slice0) Executor memory: 111K bytes. + (slice1) Executor memory: 115K bytes avg x 3x(0) workers, 115K bytes max (seg0). Memory used: 128000kB - Optimizer: Pivotal Optimizer (GPORCA) - Settings: enable_bitmapscan=on, enable_indexscan=on, enable_seqscan=on - Execution Time: 29.185 ms + Optimizer: GPORCA + Execution Time: 80.297 ms (13 rows) SET enable_seqscan = OFF; @@ -885,19 +905,19 @@ SET enable_bitmapscan = OFF; explain (analyze, verbose) select * from test_bmsparse where type < 200; QUERY PLAN ---------------------------------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..447.65 rows=79956 width=41) (actual time=0.866..24.050 rows=80400 loops=1) + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..447.70 rows=79847 width=41) (actual time=8.511..69.632 rows=80400 loops=1) Output: id, type, msg - -> Seq Scan on public.test_bmsparse (cost=0.00..435.43 rows=26652 width=41) (actual time=0.029..7.666 rows=26975 loops=1) + -> Seq Scan on public.test_bmsparse (cost=0.00..435.50 rows=26616 width=41) (actual time=1.173..55.815 rows=26975 loops=1) Output: id, type, msg Filter: (test_bmsparse.type < 200) Rows Removed by Filter: 6596 - Planning Time: 5.059 ms - (slice0) Executor memory: 36K bytes. - (slice1) Executor memory: 36K bytes avg x 3 workers, 36K bytes max (seg0). + Settings: optimizer = 'on', enable_seqscan = 'off', enable_indexscan = 'on', enable_bitmapscan = 'off' + Planning Time: 20.083 ms + (slice0) Executor memory: 111K bytes. + (slice1) Executor memory: 115K bytes avg x 3x(0) workers, 115K bytes max (seg0). Memory used: 128000kB - Optimizer: Pivotal Optimizer (GPORCA) - Settings: enable_bitmapscan=off, enable_indexscan=on, enable_seqscan=off - Execution Time: 28.480 ms + Optimizer: GPORCA + Execution Time: 75.324 ms (13 rows) SET enable_seqscan = ON; @@ -906,39 +926,216 @@ SET enable_bitmapscan = ON; explain (analyze, verbose) select * from test_bmsparse where type > 500; QUERY PLAN ---------------------------------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..436.80 rows=19101 width=41) (actual time=0.391..12.640 rows=18998 loops=1) + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..436.87 rows=19087 width=41) (actual time=2.558..48.182 rows=18998 loops=1) Output: id, type, msg - -> Seq Scan on public.test_bmsparse (cost=0.00..433.88 rows=6367 width=41) (actual time=0.049..5.123 rows=6448 loops=1) + -> Seq Scan on public.test_bmsparse (cost=0.00..433.96 rows=6363 width=41) (actual time=1.589..44.886 rows=6448 loops=1) Output: id, type, msg Filter: (test_bmsparse.type > 500) Rows Removed by Filter: 26979 - Planning Time: 5.442 ms - (slice0) Executor memory: 36K bytes. - (slice1) Executor memory: 36K bytes avg x 3 workers, 36K bytes max (seg0). + Settings: optimizer = 'on', enable_seqscan = 'on', enable_indexscan = 'on', enable_bitmapscan = 'on' + Planning Time: 20.730 ms + (slice0) Executor memory: 111K bytes. + (slice1) Executor memory: 116K bytes avg x 3x(0) workers, 116K bytes max (seg0). Memory used: 128000kB - Optimizer: Pivotal Optimizer (GPORCA) - Settings: enable_bitmapscan=on, enable_indexscan=on, enable_seqscan=on - Execution Time: 14.206 ms + Optimizer: GPORCA + Execution Time: 50.704 ms (13 rows) SET enable_seqscan = OFF; SET enable_bitmapscan = OFF; explain (analyze, verbose) select * from test_bmsparse where type > 500; - QUERY PLAN ---------------------------------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..436.80 rows=19101 width=41) (actual time=0.352..9.098 rows=18998 loops=1) + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..436.87 rows=19087 width=41) (actual time=5.822..55.071 rows=18998 loops=1) Output: id, type, msg - -> Seq Scan on public.test_bmsparse (cost=0.00..433.88 rows=6367 width=41) (actual time=0.049..5.299 rows=6448 loops=1) + -> Seq Scan on public.test_bmsparse (cost=0.00..433.96 rows=6363 width=41) (actual time=1.981..47.771 rows=6448 loops=1) Output: id, type, msg Filter: (test_bmsparse.type > 500) Rows Removed by Filter: 26979 - Planning Time: 5.703 ms - (slice0) Executor memory: 36K bytes. - (slice1) Executor memory: 36K bytes avg x 3 workers, 36K bytes max (seg0). + Settings: optimizer = 'on', enable_seqscan = 'off', enable_indexscan = 'on', enable_bitmapscan = 'off' + Planning Time: 28.948 ms + (slice0) Executor memory: 111K bytes. + (slice1) Executor memory: 115K bytes avg x 3x(0) workers, 115K bytes max (seg0). Memory used: 128000kB - Optimizer: Pivotal Optimizer (GPORCA) - Settings: enable_bitmapscan=off, enable_indexscan=on, enable_seqscan=off - Execution Time: 10.390 ms + Optimizer: GPORCA + Execution Time: 57.684 ms (13 rows) DROP TABLE test_bmsparse; +-- test bitmap index scan when using NULL array-condition as index key +create table foo(a int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +create index foo_i on foo using bitmap(a); +explain (verbose on, costs off) select * from foo where a = any(null::int[]); + QUERY PLAN +-------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: a + -> Bitmap Heap Scan on public.foo + Output: a + Recheck Cond: (foo.a = ANY (NULL::integer[])) + -> Bitmap Index Scan on foo_i + Index Cond: (foo.a = ANY (NULL::integer[])) + Settings: optimizer = 'on', enable_seqscan = 'off', enable_indexscan = 'on', enable_bitmapscan = 'off' + Optimizer: GPORCA +(9 rows) + +select * from foo where a = any(null::int[]); + a +--- +(0 rows) + +insert into foo values(1); +select * from foo where a = 1 and a = any(null::int[]); + a +--- +(0 rows) + +select * from foo where a = 1 or a = any(null::int[]); + a +--- + 1 +(1 row) + +drop table foo; +-- test for compressed bitmap index ; see https://github.com/apache/cloudberry/pull/679 +SET enable_seqscan = OFF; +SET enable_indexscan = ON; +SET enable_bitmapscan = OFF; +create table bm_test_ao (i int, j int, k int) WITH (appendonly=true) distributed by (k) ; +create index bm_test_ao_i_idx on bm_test_ao using bitmap(i); +insert into bm_test_ao select i, 1, 1 from +generate_series(1, 65535) g, generate_series(1, 4) i; +explain select count(*) from bm_test_ao where i =2; + QUERY PLAN +------------------------------------------------------------------------------------------- + Aggregate (cost=0.00..391.30 rows=1 width=8) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..391.30 rows=1 width=1) + -> Bitmap Heap Scan on bm_test_ao (cost=0.00..391.30 rows=1 width=1) + Recheck Cond: (i = 2) + -> Bitmap Index Scan on bm_test_ao_i_idx (cost=0.00..0.00 rows=0 width=0) + Index Cond: (i = 2) + Optimizer: GPORCA +(7 rows) + +select count(*) from bm_test_ao where i = 2; + count +------- + 65535 +(1 row) + +DROP TABLE bm_test_ao; +-- +-- test union bitmap batch words for multivalues index scan like where x in (x1, x2) or x > v +-- which creates bitmapand plan on two bitmap indexs that match multiple keys by using in in where clause +-- +create table bmunion (a int, b int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +insert into bmunion + select (r%53), (r%59) + from generate_series(1,70000) r; +create index bmu_i_bmtest2_a on bmunion using bitmap(a); +create index bmu_i_bmtest2_b on bmunion using bitmap(b); +insert into bmunion select 53, 1 from generate_series(1, 1000); +analyze bmunion; +set optimizer_enable_tablescan=off; +set optimizer_enable_dynamictablescan=off; +-- inject fault for planner so that it could produce bitmapand plan node. +select gp_inject_fault('simulate_bitmap_and', 'skip', dbid) from gp_segment_configuration where role = 'p' and content = -1; + gp_inject_fault +----------------- + Success: +(1 row) + +explain (costs off) select count(*) from bmunion where a = 53 and b < 3; + QUERY PLAN +-------------------------------------------------------------------- + Finalize Aggregate + -> Gather Motion 1:1 (slice1; segments: 1) + -> Partial Aggregate + -> Bitmap Heap Scan on bmunion + Recheck Cond: ((a = 53) AND (b < 3)) + -> BitmapAnd + -> Bitmap Index Scan on bmu_i_bmtest2_a + Index Cond: (a = 53) + -> Bitmap Index Scan on bmu_i_bmtest2_b + Index Cond: (b < 3) + Optimizer: GPORCA +(11 rows) + +select gp_inject_fault('simulate_bitmap_and', 'reset', dbid) from gp_segment_configuration where role = 'p' and content = -1; + gp_inject_fault +----------------- + Success: +(1 row) + +select gp_inject_fault('simulate_bitmap_and', 'skip', dbid) from gp_segment_configuration where role = 'p' and content = -1; + gp_inject_fault +----------------- + Success: +(1 row) + +select count(*) from bmunion where a = 53 and b < 3; + count +------- + 1000 +(1 row) + +select gp_inject_fault('simulate_bitmap_and', 'reset', dbid) from gp_segment_configuration where role = 'p' and content = -1; + gp_inject_fault +----------------- + Success: +(1 row) + +reset optimizer_enable_tablescan; +reset optimizer_enable_dynamictablescan; +drop table bmunion; +-- test create bitmap index and there have HOT chains. +drop table if exists bm_test; +NOTICE: table "bm_test" does not exist, skipping +create table bm_test(a int, b int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +-- insert some data into a one segment +insert into bm_test values (1, 1); +insert into bm_test values (1, 2); +insert into bm_test values (1, 3); +insert into bm_test values (12, 1); +-- update the first tuple using HOT, since this page +-- just have 4 tuples, there have full free space to +-- use HOT update. +update bm_test set b = 1 where a = 1 and b = 1; +-- After the update, the tids that the value of b is equal to 1 +-- we scanned will not be in order, due to HOT. +create index idx_bm_test on bm_test using bitmap(b); +select * from bm_test where b = 1; + a | b +----+--- + 12 | 1 + 1 | 1 +(2 rows) + +-- clean up +drop table bm_test; +-- test the scenario that we need read the same batch words many times +-- more detials can be found at https://github.com/greenplum-db/gpdb/issues/13446 +SET enable_seqscan = OFF; +SET enable_bitmapscan = OFF; +create table foo_13446(a int, b int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +create index idx_13446 on foo_13446 using bitmap(b); +insert into foo_13446 select 1, 1 from generate_series(0, 16384); +-- At current implementation, BMIterateResult can only store 16*1024=16384 TIDs, +-- if we have 13685 TIDs to read, it must scan same batch words twice, that's what we want +select count(*) from foo_13446 where b = 1; + count +------- + 16385 +(1 row) + +drop table foo_13446; +SET enable_seqscan = ON; +SET enable_bitmapscan = ON; diff --git a/contrib/pax_storage/src/test/regress/expected/box_optimizer.out b/contrib/pax_storage/src/test/regress/expected/box_optimizer.out index c3a8bd6a97a..49993bade09 100644 --- a/contrib/pax_storage/src/test/regress/expected/box_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/box_optimizer.out @@ -16,6 +16,7 @@ -- -- boxes are specified by two points, given by four floats x1,y1,x2,y2 CREATE TABLE BOX_TBL (f1 box); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause, and no column type is suitable for a distribution key. Creating a NULL policy entry. INSERT INTO BOX_TBL (f1) VALUES ('(2.0,2.0,0.0,0.0)'); INSERT INTO BOX_TBL (f1) VALUES ('(1.0,1.0,3.0,3.0)'); INSERT INTO BOX_TBL (f1) VALUES ('((-8, 2), (-2, -10))'); @@ -47,22 +48,22 @@ LINE 1: INSERT INTO BOX_TBL (f1) VALUES ('asdfasdf(ad'); SELECT * FROM BOX_TBL; f1 --------------------- - (2,2),(0,0) (3,3),(1,1) + (3,3),(3,3) + (2,2),(0,0) (-2,2),(-8,-10) (2.5,3.5),(2.5,2.5) - (3,3),(3,3) (5 rows) SELECT b.*, area(b.f1) as barea FROM BOX_TBL b; f1 | barea ---------------------+------- - (2,2),(0,0) | 4 (3,3),(1,1) | 4 + (3,3),(3,3) | 0 + (2,2),(0,0) | 4 (-2,2),(-8,-10) | 72 (2.5,3.5),(2.5,2.5) | 0 - (3,3),(3,3) | 0 (5 rows) -- overlap @@ -71,8 +72,8 @@ SELECT b.f1 WHERE b.f1 && box '(2.5,2.5,1.0,1.0)'; f1 --------------------- - (2,2),(0,0) (3,3),(1,1) + (2,2),(0,0) (2.5,3.5),(2.5,2.5) (3 rows) @@ -115,8 +116,8 @@ SELECT b.f1 f1 --------------------- (2,2),(0,0) - (3,3),(1,1) (2.5,3.5),(2.5,2.5) + (3,3),(1,1) (3,3),(3,3) (4 rows) @@ -136,8 +137,8 @@ SELECT b.f1 WHERE b.f1 = box '(3.0,3.0,5.0,5.0)'; f1 ------------- - (2,2),(0,0) (3,3),(1,1) + (2,2),(0,0) (2 rows) -- area > @@ -146,8 +147,8 @@ SELECT b.f1 WHERE b.f1 > box '(3.5,3.0,4.5,3.0)'; f1 ----------------- - (2,2),(0,0) (3,3),(1,1) + (2,2),(0,0) (-2,2),(-8,-10) (3 rows) @@ -158,9 +159,9 @@ SELECT b.f1 f1 --------------------- (2,2),(0,0) - (3,3),(1,1) (-2,2),(-8,-10) (2.5,3.5),(2.5,2.5) + (3,3),(1,1) (3,3),(3,3) (5 rows) @@ -181,9 +182,9 @@ SELECT b.f1 WHERE b.f1 <@ box '(0,0,3,3)'; f1 ------------- - (2,2),(0,0) (3,3),(1,1) (3,3),(3,3) + (2,2),(0,0) (3 rows) -- contains @@ -192,9 +193,9 @@ SELECT b.f1 WHERE box '(0,0,3,3)' @> b.f1; f1 ------------- - (2,2),(0,0) (3,3),(1,1) (3,3),(3,3) + (2,2),(0,0) (3 rows) -- box equality @@ -211,11 +212,11 @@ SELECT @@(b1.f1) AS p FROM BOX_TBL b1; p --------- - (1,1) (2,2) + (3,3) + (1,1) (-5,-4) (2.5,3) - (3,3) (5 rows) -- wholly-contained @@ -231,22 +232,22 @@ SELECT height(f1), width(f1) FROM BOX_TBL; height | width --------+------- 2 | 2 + 0 | 0 2 | 2 12 | 6 1 | 0 - 0 | 0 (5 rows) -- -- Test the SP-GiST index -- CREATE TEMPORARY TABLE box_temp (f1 box); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause, and no column type is suitable for a distribution key. Creating a NULL policy entry. INSERT INTO box_temp SELECT box(point(i, i), point(i * 2, i * 2)) FROM generate_series(1, 50) AS i; --- PAX not support gist/spgist/brin indexes CREATE INDEX box_spgist ON box_temp USING spgist (f1); -ERROR: pax only support btree/hash/gin/bitmap indexes (pax_access_handle.cc:736) +ERROR: pax only support btree/hash/gin/bitmap indexes (pax_access_handle.cc:591) INSERT INTO box_temp VALUES (NULL), ('(0,0)(0,100)'), @@ -259,34 +260,35 @@ SET enable_seqscan = false; SELECT * FROM box_temp WHERE f1 << '(10,20),(30,40)'; f1 ---------------------------- - (0,Infinity),(0,100) - (2,2),(1,1) - (4,4),(2,2) - (8,8),(4,4) (0,100),(0,0) + (0,Infinity),(0,100) (0,Infinity),(-Infinity,0) (6,6),(3,3) + (8,8),(4,4) + (2,2),(1,1) + (4,4),(2,2) (7 rows) EXPLAIN (COSTS OFF) SELECT * FROM box_temp WHERE f1 << '(10,20),(30,40)'; - QUERY PLAN ------------------------------------------------------- + QUERY PLAN +------------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) -> Seq Scan on box_temp Filter: (f1 << '(30,40),(10,20)'::box) + Optimizer: GPORCA (4 rows) SELECT * FROM box_temp WHERE f1 &< '(10,4.333334),(5,100)'; f1 ---------------------------- - (6,6),(3,3) - (10,10),(5,5) + (0,100),(0,0) (0,Infinity),(0,100) + (0,Infinity),(-Infinity,0) (2,2),(1,1) (4,4),(2,2) + (10,10),(5,5) + (6,6),(3,3) (8,8),(4,4) - (0,100),(0,0) - (0,Infinity),(-Infinity,0) (8 rows) EXPLAIN (COSTS OFF) SELECT * FROM box_temp WHERE f1 &< '(10,4.333334),(5,100)'; @@ -295,36 +297,38 @@ EXPLAIN (COSTS OFF) SELECT * FROM box_temp WHERE f1 &< '(10,4.333334),(5,100)'; Gather Motion 3:1 (slice1; segments: 3) -> Seq Scan on box_temp Filter: (f1 &< '(10,100),(5,4.333334)'::box) + Optimizer: GPORCA (4 rows) SELECT * FROM box_temp WHERE f1 && '(15,20),(25,30)'; f1 ------------------------------------------- - (24,24),(12,12) - (28,28),(14,14) (34,34),(17,17) + (36,36),(18,18) (40,40),(20,20) (50,50),(25,25) - (20,20),(10,10) + (24,24),(12,12) (26,26),(13,13) + (28,28),(14,14) (30,30),(15,15) - (32,32),(16,16) - (36,36),(18,18) (42,42),(21,21) - (22,22),(11,11) - (38,38),(19,19) (44,44),(22,22) (46,46),(23,23) (48,48),(24,24) (Infinity,Infinity),(-Infinity,-Infinity) + (20,20),(10,10) + (22,22),(11,11) + (32,32),(16,16) + (38,38),(19,19) (17 rows) EXPLAIN (COSTS OFF) SELECT * FROM box_temp WHERE f1 && '(15,20),(25,30)'; - QUERY PLAN ------------------------------------------------------- + QUERY PLAN +------------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) -> Seq Scan on box_temp Filter: (f1 && '(25,30),(15,20)'::box) + Optimizer: GPORCA (4 rows) SELECT * FROM box_temp WHERE f1 &> '(40,30),(45,50)'; @@ -333,45 +337,47 @@ SELECT * FROM box_temp WHERE f1 &> '(40,30),(45,50)'; (80,80),(40,40) (82,82),(41,41) (86,86),(43,43) - (90,90),(45,45) - (92,92),(46,46) + (88,88),(44,44) (94,94),(47,47) (96,96),(48,48) - (88,88),(44,44) (100,100),(50,50) (84,84),(42,42) + (92,92),(46,46) + (90,90),(45,45) (98,98),(49,49) (11 rows) EXPLAIN (COSTS OFF) SELECT * FROM box_temp WHERE f1 &> '(40,30),(45,50)'; - QUERY PLAN ------------------------------------------------------- + QUERY PLAN +------------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) -> Seq Scan on box_temp Filter: (f1 &> '(45,50),(40,30)'::box) + Optimizer: GPORCA (4 rows) SELECT * FROM box_temp WHERE f1 >> '(30,40),(40,30)'; f1 ------------------- - (88,88),(44,44) - (100,100),(50,50) (84,84),(42,42) - (98,98),(49,49) + (92,92),(46,46) (82,82),(41,41) (86,86),(43,43) - (90,90),(45,45) - (92,92),(46,46) + (88,88),(44,44) (94,94),(47,47) (96,96),(48,48) + (100,100),(50,50) + (90,90),(45,45) + (98,98),(49,49) (10 rows) EXPLAIN (COSTS OFF) SELECT * FROM box_temp WHERE f1 >> '(30,40),(40,30)'; - QUERY PLAN ------------------------------------------------------- + QUERY PLAN +------------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) -> Seq Scan on box_temp Filter: (f1 >> '(40,40),(30,30)'::box) + Optimizer: GPORCA (4 rows) SELECT * FROM box_temp WHERE f1 <<| '(10,4.33334),(5,100)'; @@ -388,6 +394,7 @@ EXPLAIN (COSTS OFF) SELECT * FROM box_temp WHERE f1 <<| '(10,4.33334),(5,100)'; Gather Motion 3:1 (slice1; segments: 3) -> Seq Scan on box_temp Filter: (f1 <<| '(10,100),(5,4.33334)'::box) + Optimizer: GPORCA (4 rows) SELECT * FROM box_temp WHERE f1 &<| '(10,4.3333334),(5,1)'; @@ -404,13 +411,14 @@ EXPLAIN (COSTS OFF) SELECT * FROM box_temp WHERE f1 &<| '(10,4.3333334),(5,1)'; Gather Motion 3:1 (slice1; segments: 3) -> Seq Scan on box_temp Filter: (f1 &<| '(10,4.3333334),(5,1)'::box) + Optimizer: GPORCA (4 rows) SELECT * FROM box_temp WHERE f1 |&> '(49.99,49.99),(49.99,49.99)'; f1 ---------------------- - (100,100),(50,50) (0,Infinity),(0,100) + (100,100),(50,50) (2 rows) EXPLAIN (COSTS OFF) SELECT * FROM box_temp WHERE f1 |&> '(49.99,49.99),(49.99,49.99)'; @@ -419,47 +427,50 @@ EXPLAIN (COSTS OFF) SELECT * FROM box_temp WHERE f1 |&> '(49.99,49.99),(49.99,49 Gather Motion 3:1 (slice1; segments: 3) -> Seq Scan on box_temp Filter: (f1 |&> '(49.99,49.99),(49.99,49.99)'::box) + Optimizer: GPORCA (4 rows) SELECT * FROM box_temp WHERE f1 |>> '(37,38),(39,40)'; f1 ---------------------- - (88,88),(44,44) - (100,100),(50,50) - (0,Infinity),(0,100) - (84,84),(42,42) - (98,98),(49,49) (82,82),(41,41) (86,86),(43,43) - (90,90),(45,45) - (92,92),(46,46) + (88,88),(44,44) (94,94),(47,47) (96,96),(48,48) + (100,100),(50,50) + (84,84),(42,42) + (92,92),(46,46) + (0,Infinity),(0,100) + (90,90),(45,45) + (98,98),(49,49) (11 rows) EXPLAIN (COSTS OFF) SELECT * FROM box_temp WHERE f1 |>> '(37,38),(39,40)'; - QUERY PLAN ------------------------------------------------------- + QUERY PLAN +------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) -> Seq Scan on box_temp Filter: (f1 |>> '(39,40),(37,38)'::box) + Optimizer: GPORCA (4 rows) SELECT * FROM box_temp WHERE f1 @> '(10,11),(15,16)'; f1 ------------------------------------------- + (18,18),(9,9) (16,16),(8,8) (20,20),(10,10) (Infinity,Infinity),(-Infinity,-Infinity) - (18,18),(9,9) (4 rows) EXPLAIN (COSTS OFF) SELECT * FROM box_temp WHERE f1 @> '(10,11),(15,15)'; - QUERY PLAN ------------------------------------------------------- + QUERY PLAN +------------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) -> Seq Scan on box_temp Filter: (f1 @> '(15,15),(10,11)'::box) + Optimizer: GPORCA (4 rows) SELECT * FROM box_temp WHERE f1 <@ '(10,15),(30,35)'; @@ -469,11 +480,12 @@ SELECT * FROM box_temp WHERE f1 <@ '(10,15),(30,35)'; (1 row) EXPLAIN (COSTS OFF) SELECT * FROM box_temp WHERE f1 <@ '(10,15),(30,35)'; - QUERY PLAN ------------------------------------------------------- + QUERY PLAN +------------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) -> Seq Scan on box_temp Filter: (f1 <@ '(30,35),(10,15)'::box) + Optimizer: GPORCA (4 rows) SELECT * FROM box_temp WHERE f1 ~= '(20,20),(40,40)'; @@ -483,11 +495,12 @@ SELECT * FROM box_temp WHERE f1 ~= '(20,20),(40,40)'; (1 row) EXPLAIN (COSTS OFF) SELECT * FROM box_temp WHERE f1 ~= '(20,20),(40,40)'; - QUERY PLAN ------------------------------------------------------- + QUERY PLAN +------------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) -> Seq Scan on box_temp Filter: (f1 ~= '(40,40),(20,20)'::box) + Optimizer: GPORCA (4 rows) RESET enable_seqscan; @@ -498,6 +511,7 @@ ERROR: index "box_spgist" does not exist -- CREATE TABLE quad_box_tbl (id int, b box); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. INSERT INTO quad_box_tbl SELECT (x - 1) * 100 + y, box(point(x * 10, y * 10), point(x * 10 + 5, y * 10 + 5)) FROM generate_series(1, 100) x, @@ -513,9 +527,8 @@ VALUES (11003, '((-infinity,-infinity),(infinity,infinity))'), (11004, '((-infinity,100),(-infinity,500))'), (11005, '((-infinity,-infinity),(700,infinity))'); --- PAX not support gist/spgist/brin indexes CREATE INDEX quad_box_tbl_idx ON quad_box_tbl USING spgist(b); -ERROR: pax only support btree/hash/gin/bitmap indexes (pax_access_handle.cc:736) +ERROR: pax only support btree/hash/gin/bitmap indexes (pax_access_handle.cc:591) ANALYZE quad_box_tbl; -- get reference results for ORDER BY distance from seq scan SET enable_seqscan = ON; @@ -613,11 +626,6 @@ SELECT count(*) FROM quad_box_tbl WHERE b ~= box '((200,300),(205,305))'; -- test ORDER BY distance SET enable_indexscan = ON; SET enable_bitmapscan = OFF; --- start_ignore --- GPDB_13_MERGE_FIXME: --- The ORCA sorts the result of seqscan, while the postgres planner uses index scan --- to keep order. Is it better to also use index scan for ORCA? --- end_ignore EXPLAIN (COSTS OFF) SELECT rank() OVER (ORDER BY b <-> point '123,456') n, b <-> point '123,456' dist, id FROM quad_box_tbl; @@ -631,7 +639,7 @@ FROM quad_box_tbl; -> Sort Sort Key: ((b <-> '(123,456)'::point)) -> Seq Scan on quad_box_tbl - Optimizer: Pivotal Optimizer (GPORCA) + Optimizer: GPORCA (9 rows) CREATE TEMP TABLE quad_box_tbl_ord_idx1 AS @@ -661,7 +669,7 @@ FROM quad_box_tbl WHERE b <@ box '((200,300),(500,600))'; Sort Key: ((b <-> '(123,456)'::point)) -> Seq Scan on quad_box_tbl Filter: (b <@ '(500,600),(200,300)'::box) - Optimizer: Pivotal Optimizer (GPORCA) + Optimizer: GPORCA (10 rows) CREATE TEMP TABLE quad_box_tbl_ord_idx2 AS @@ -680,3 +688,28 @@ WHERE seq.id IS NULL OR idx.id IS NULL; RESET enable_seqscan; RESET enable_indexscan; RESET enable_bitmapscan; +-- test non-error-throwing API for some core types +SELECT pg_input_is_valid('200', 'box'); + pg_input_is_valid +------------------- + f +(1 row) + +SELECT * FROM pg_input_error_info('200', 'box'); + message | detail | hint | sql_error_code +------------------------------------------+--------+------+---------------- + invalid input syntax for type box: "200" | | | 22P02 +(1 row) + +SELECT pg_input_is_valid('((200,300),(500, xyz))', 'box'); + pg_input_is_valid +------------------- + f +(1 row) + +SELECT * FROM pg_input_error_info('((200,300),(500, xyz))', 'box'); + message | detail | hint | sql_error_code +-------------------------------------------------------------+--------+------+---------------- + invalid input syntax for type box: "((200,300),(500, xyz))" | | | 22P02 +(1 row) + diff --git a/contrib/pax_storage/src/test/regress/expected/brin_ao_optimizer.out b/contrib/pax_storage/src/test/regress/expected/brin_ao_optimizer.out index 80c8fd8b6f9..bd512d86538 100644 --- a/contrib/pax_storage/src/test/regress/expected/brin_ao_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/brin_ao_optimizer.out @@ -1,4 +1,8 @@ -CREATE TABLE brintest_ao (byteacol bytea, +-- Most of these test steps are modified such that the tables' tuples are +-- co-located on one QE. +-- Test scan correctness +CREATE TABLE brintest_ao (id int, + byteacol bytea, charcol "char", namecol name, int8col bigint, @@ -27,7 +31,10 @@ CREATE TABLE brintest_ao (byteacol bytea, lsncol pg_lsn, boxcol box ) WITH (appendonly = true); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. INSERT INTO brintest_ao SELECT + 1, repeat(stringu1, 8)::bytea, substr(stringu1, 1, 1)::"char", stringu1::name, 142857 * tenthous, @@ -57,7 +64,8 @@ INSERT INTO brintest_ao SELECT box(point(odd, even), point(thousand, twothousand)) FROM tenk1 ORDER BY unique2 LIMIT 100; -- throw in some NULL's and different values -INSERT INTO brintest_ao (inetcol, cidrcol, int4rangecol) SELECT +INSERT INTO brintest_ao (id, inetcol, cidrcol, int4rangecol) SELECT + 1, inet 'fe80::6e40:8ff:fea9:8c46' + tenthous, cidr 'fe80::6e40:8ff:fea9:8c46' + tenthous, 'empty'::int4range @@ -98,6 +106,8 @@ CREATE TABLE brinopers_ao (colname name, typ text, op text[], value text[], matches int[], check (cardinality(op) = cardinality(value)), check (cardinality(op) = cardinality(matches))); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'colname' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. INSERT INTO brinopers_ao VALUES ('byteacol', 'bytea', '{>, >=, =, <=, <}', @@ -326,7 +336,7 @@ BEGIN IF plan_line LIKE '%Bitmap Heap Scan on brintest_ao%' THEN plan_ok := true; END IF; - IF plan_line LIKE '%Postgres query optimizer%' THEN + IF plan_line LIKE '%Postgres-based planner%' THEN is_planner_plan := true; END IF; END LOOP; @@ -400,24 +410,6 @@ BEGIN END LOOP; END; $x$; -WARNING: ORCA did not produce a bitmap indexscan plan for (cidrcol,<,cidr,255.255.255.255,100) -WARNING: ORCA did not produce a bitmap indexscan plan for (cidrcol,<=,cidr,255.255.255.255,100) -WARNING: ORCA did not produce a bitmap indexscan plan for (cidrcol,>,cidr,0.0.0.0,125) -WARNING: ORCA did not produce a bitmap indexscan plan for (cidrcol,>=,cidr,0.0.0.0,125) -WARNING: ORCA did not produce a bitmap indexscan plan for (cidrcol,<,inet,255.255.255.255,100) -WARNING: ORCA did not produce a bitmap indexscan plan for (cidrcol,<=,inet,255.255.255.255,100) -WARNING: ORCA did not produce a bitmap indexscan plan for (cidrcol,>,inet,0.0.0.0,125) -WARNING: ORCA did not produce a bitmap indexscan plan for (cidrcol,>=,inet,0.0.0.0,125) -WARNING: ORCA did not produce a bitmap indexscan plan for (inetcol,<,cidr,255.255.255.255,100) -WARNING: ORCA did not produce a bitmap indexscan plan for (inetcol,<=,cidr,255.255.255.255,100) -WARNING: ORCA did not produce a bitmap indexscan plan for (inetcol,>,cidr,0.0.0.0,125) -WARNING: ORCA did not produce a bitmap indexscan plan for (inetcol,>=,cidr,0.0.0.0,125) -WARNING: ORCA did not produce a bitmap indexscan plan for (inetcol,<,inet,255.255.255.255,100) -WARNING: ORCA did not produce a bitmap indexscan plan for (inetcol,<=,inet,255.255.255.255,100) -WARNING: ORCA did not produce a bitmap indexscan plan for (inetcol,>,inet,0.0.0.0,125) -WARNING: ORCA did not produce a bitmap indexscan plan for (inetcol,>=,inet,0.0.0.0,125) -WARNING: ORCA did not produce a bitmap indexscan plan for (lsncol,IS,pg_lsn,,25) -WARNING: ORCA did not produce a bitmap indexscan plan for (lsncol,"IS NOT",pg_lsn,,100) -- Note: ORCA does not support all of the above operators: -- - standard comparison operators on inet and cidr columns -- because ORCA does not look at the second occurrence of a column in an index, @@ -429,6 +421,7 @@ RESET enable_bitmapscan; RESET optimizer_enable_tablescan; RESET optimizer_enable_bitmapscan; INSERT INTO brintest_ao SELECT + 1, repeat(stringu1, 42)::bytea, substr(stringu1, 1, 1)::"char", stringu1::name, 142857 * tenthous, @@ -457,42 +450,3 @@ INSERT INTO brintest_ao SELECT format('%s/%s%s', odd, even, tenthous)::pg_lsn, box(point(odd, even), point(thousand, twothousand)) FROM tenk1 ORDER BY unique2 LIMIT 5 OFFSET 5; -VACUUM brintest_ao; -- force a summarization cycle in brinaoidx -UPDATE brintest_ao SET int8col = int8col * int4col; -UPDATE brintest_ao SET textcol = '' WHERE textcol IS NOT NULL; --- Vaccum again so that a new segment file is created. -VACUUM brintest_ao; -INSERT INTO brintest_ao SELECT * FROM brintest_ao; --- We should have two segment files per Cloudberry segment (QE). --- start_ignore -SELECT segment_id, segno, tupcount, state FROM gp_toolkit.__gp_aoseg('brintest_ao'); - segment_id | segno | tupcount | state -------------+-------+----------+------- - 2 | 1 | 120 | 2 - 2 | 2 | 80 | 1 - 0 | 1 | 128 | 2 - 0 | 2 | 102 | 1 - 1 | 1 | 117 | 2 - 1 | 2 | 78 | 1 -(6 rows) - --- end_ignore --- Tests for brin_summarize_new_values -SELECT brin_summarize_new_values('brintest_ao'); -- error, not an index -ERROR: "brintest_ao" is not an index -CONTEXT: SQL function "brin_summarize_new_values" statement 1 -SELECT brin_summarize_new_values('tenk1_unique1'); -- error, not a BRIN index -ERROR: "tenk1_unique1" is not a BRIN index -CONTEXT: SQL function "brin_summarize_new_values" statement 1 --- New strategy of VACUUM AO/CO was introduced by PR #13255 for performance enhancement. --- Index dead tuples will not always be cleaned up completely after VACUUM, resulting --- brin_summarize_new_values() will not always be accurate. So ignore the check to --- coordinate with the new behavior. --- start_ignore -SELECT brin_summarize_new_values('brinaoidx'); -- ok, no change expected - brin_summarize_new_values ---------------------------- - 0 -(1 row) - --- end_ignore \ No newline at end of file diff --git a/contrib/pax_storage/src/test/regress/expected/brin_aocs_optimizer.out b/contrib/pax_storage/src/test/regress/expected/brin_aocs_optimizer.out index e2a74261dcb..2011480d017 100644 --- a/contrib/pax_storage/src/test/regress/expected/brin_aocs_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/brin_aocs_optimizer.out @@ -1,4 +1,8 @@ -CREATE TABLE brintest_aocs (byteacol bytea, +-- Most of these test steps are modified such that the tables' tuples are +-- co-located on one QE. +-- Test scan correctness +CREATE TABLE brintest_aocs (id int, + byteacol bytea, charcol "char", namecol name, int8col bigint, @@ -28,6 +32,7 @@ CREATE TABLE brintest_aocs (byteacol bytea, boxcol box ) WITH (appendonly = true, orientation=column); INSERT INTO brintest_aocs SELECT + 1, repeat(stringu1, 8)::bytea, substr(stringu1, 1, 1)::"char", stringu1::name, 142857 * tenthous, @@ -57,7 +62,8 @@ INSERT INTO brintest_aocs SELECT box(point(odd, even), point(thousand, twothousand)) FROM tenk1 ORDER BY unique2 LIMIT 100; -- throw in some NULL's and different values -INSERT INTO brintest_aocs (inetcol, cidrcol, int4rangecol) SELECT +INSERT INTO brintest_aocs (id, inetcol, cidrcol, int4rangecol) SELECT + 1, inet 'fe80::6e40:8ff:fea9:8c46' + tenthous, cidr 'fe80::6e40:8ff:fea9:8c46' + tenthous, 'empty'::int4range @@ -326,7 +332,7 @@ BEGIN IF plan_line LIKE '%Bitmap Heap Scan on brintest_aocs%' THEN plan_ok := true; END IF; - IF plan_line LIKE '%Postgres query optimizer%' THEN + IF plan_line LIKE '%Postgres-based planner%' THEN is_planner_plan := true; END IF; END LOOP; @@ -400,24 +406,6 @@ BEGIN END LOOP; END; $x$; -WARNING: ORCA did not produce a bitmap indexscan plan for (cidrcol,<,cidr,255.255.255.255,100) -WARNING: ORCA did not produce a bitmap indexscan plan for (cidrcol,<=,cidr,255.255.255.255,100) -WARNING: ORCA did not produce a bitmap indexscan plan for (cidrcol,>,cidr,0.0.0.0,125) -WARNING: ORCA did not produce a bitmap indexscan plan for (cidrcol,>=,cidr,0.0.0.0,125) -WARNING: ORCA did not produce a bitmap indexscan plan for (cidrcol,<,inet,255.255.255.255,100) -WARNING: ORCA did not produce a bitmap indexscan plan for (cidrcol,<=,inet,255.255.255.255,100) -WARNING: ORCA did not produce a bitmap indexscan plan for (cidrcol,>,inet,0.0.0.0,125) -WARNING: ORCA did not produce a bitmap indexscan plan for (cidrcol,>=,inet,0.0.0.0,125) -WARNING: ORCA did not produce a bitmap indexscan plan for (inetcol,<,cidr,255.255.255.255,100) -WARNING: ORCA did not produce a bitmap indexscan plan for (inetcol,<=,cidr,255.255.255.255,100) -WARNING: ORCA did not produce a bitmap indexscan plan for (inetcol,>,cidr,0.0.0.0,125) -WARNING: ORCA did not produce a bitmap indexscan plan for (inetcol,>=,cidr,0.0.0.0,125) -WARNING: ORCA did not produce a bitmap indexscan plan for (inetcol,<,inet,255.255.255.255,100) -WARNING: ORCA did not produce a bitmap indexscan plan for (inetcol,<=,inet,255.255.255.255,100) -WARNING: ORCA did not produce a bitmap indexscan plan for (inetcol,>,inet,0.0.0.0,125) -WARNING: ORCA did not produce a bitmap indexscan plan for (inetcol,>=,inet,0.0.0.0,125) -WARNING: ORCA did not produce a bitmap indexscan plan for (lsncol,IS,pg_lsn,,25) -WARNING: ORCA did not produce a bitmap indexscan plan for (lsncol,"IS NOT",pg_lsn,,100) -- Note: ORCA does not support all of the above operators: -- - standard comparison operators on inet and cidr columns -- because ORCA does not look at the second occurrence of a column in an index, @@ -429,6 +417,7 @@ RESET enable_bitmapscan; RESET optimizer_enable_tablescan; RESET optimizer_enable_bitmapscan; INSERT INTO brintest_aocs SELECT + 1, repeat(stringu1, 42)::bytea, substr(stringu1, 1, 1)::"char", stringu1::name, 142857 * tenthous, @@ -457,42 +446,3 @@ INSERT INTO brintest_aocs SELECT format('%s/%s%s', odd, even, tenthous)::pg_lsn, box(point(odd, even), point(thousand, twothousand)) FROM tenk1 ORDER BY unique2 LIMIT 5 OFFSET 5; -VACUUM brintest_aocs; -- force a summarization cycle in brinaocsidx -UPDATE brintest_aocs SET int8col = int8col * int4col; -UPDATE brintest_aocs SET textcol = '' WHERE textcol IS NOT NULL; --- Vaccum again so that a new segment file is created. -VACUUM brintest_aocs; -INSERT INTO brintest_aocs SELECT * FROM brintest_aocs; --- We should have two segment files per Cloudberry segment (QE). --- start_ignore -SELECT segment_id, segno, tupcount, state FROM gp_toolkit.__gp_aocsseg('brintest_aocs'); - segment_id | segno | tupcount | state -------------+-------+----------+------- - 1 | 1 | 78 | 1 - 1 | 2 | 78 | 1 - 2 | 1 | 80 | 1 - 2 | 2 | 80 | 1 - 0 | 1 | 102 | 1 - 0 | 2 | 102 | 1 -(6 rows) - --- end_ignore --- Tests for brin_summarize_new_values -SELECT brin_summarize_new_values('brintest_aocs'); -- error, not an index -ERROR: "brintest_aocs" is not an index -CONTEXT: SQL function "brin_summarize_new_values" statement 1 -SELECT brin_summarize_new_values('tenk1_unique1'); -- error, not a BRIN index -ERROR: "tenk1_unique1" is not a BRIN index -CONTEXT: SQL function "brin_summarize_new_values" statement 1 --- New strategy of VACUUM AO/CO was introduced by PR #13255 for performance enhancement. --- Index dead tuples will not always be cleaned up completely after VACUUM, resulting --- brin_summarize_new_values() will not always be accurate. So ignore the check to --- coordinate with the new behavior. --- start_ignore -SELECT brin_summarize_new_values('brinaocsidx'); -- ok, no change expected - brin_summarize_new_values ---------------------------- - 0 -(1 row) - --- end_ignore \ No newline at end of file diff --git a/contrib/pax_storage/src/test/regress/expected/co_nestloop_idxscan_optimizer.out b/contrib/pax_storage/src/test/regress/expected/co_nestloop_idxscan_optimizer.out index f1d1ea98c22..83471e5e253 100644 --- a/contrib/pax_storage/src/test/regress/expected/co_nestloop_idxscan_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/co_nestloop_idxscan_optimizer.out @@ -17,6 +17,7 @@ create schema co_nestloop_idxscan; create table co_nestloop_idxscan.foo (id bigint, data text) with (appendonly=true, orientation=column) distributed by (id); create table co_nestloop_idxscan.bar (id bigint) distributed by (id); +set optimizer_enable_indexonlyscan = off; -- Changing the text to be smaller doesn't repro the issue insert into co_nestloop_idxscan.foo select i, repeat('xxxxxxxxxx', 100000) from generate_series(1,50) i; insert into co_nestloop_idxscan.bar values (1); @@ -25,16 +26,18 @@ analyze co_nestloop_idxscan.bar; create index foo_id_idx on co_nestloop_idxscan.foo(id); -- test with hash join explain select f.id from co_nestloop_idxscan.foo f, co_nestloop_idxscan.bar b where f.id = b.id; - QUERY PLAN ------------------------------------------------------------------------------------------ - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..437.01 rows=1 width=8) - -> Nested Loop (cost=0.00..437.01 rows=1 width=8) + QUERY PLAN +-------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..499.13 rows=1 width=8) + -> Nested Loop (cost=0.00..499.13 rows=1 width=8) Join Filter: true -> Seq Scan on bar b (cost=0.00..431.00 rows=1 width=8) - -> Index Only Scan using foo_id_idx on foo f (cost=0.00..6.01 rows=1 width=8) - Index Cond: (id = b.id) + -> Bitmap Heap Scan on foo f (cost=0.00..68.13 rows=1 width=8) + Recheck Cond: (id = b.id) + -> Bitmap Index Scan on foo_id_idx (cost=0.00..0.00 rows=0 width=0) + Index Cond: (id = b.id) Optimizer: GPORCA -(7 rows) +(9 rows) select f.id from co_nestloop_idxscan.foo f, co_nestloop_idxscan.bar b where f.id = b.id; id @@ -47,16 +50,18 @@ set optimizer_enable_hashjoin = off; set enable_hashjoin=off; set enable_nestloop=on; explain select f.id from co_nestloop_idxscan.foo f, co_nestloop_idxscan.bar b where f.id = b.id; - QUERY PLAN ------------------------------------------------------------------------------------------ - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..437.01 rows=1 width=8) - -> Nested Loop (cost=0.00..437.01 rows=1 width=8) + QUERY PLAN +-------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..499.13 rows=1 width=8) + -> Nested Loop (cost=0.00..499.13 rows=1 width=8) Join Filter: true -> Seq Scan on bar b (cost=0.00..431.00 rows=1 width=8) - -> Index Only Scan using foo_id_idx on foo f (cost=0.00..6.01 rows=1 width=8) - Index Cond: (id = b.id) + -> Bitmap Heap Scan on foo f (cost=0.00..68.13 rows=1 width=8) + Recheck Cond: (id = b.id) + -> Bitmap Index Scan on foo_id_idx (cost=0.00..0.00 rows=0 width=0) + Index Cond: (id = b.id) Optimizer: GPORCA -(7 rows) +(9 rows) select f.id from co_nestloop_idxscan.foo f, co_nestloop_idxscan.bar b where f.id = b.id; id @@ -70,16 +75,18 @@ set enable_seqscan = off; -- Known_opt_diff: OPT-929 -- end_ignore explain select f.id from co_nestloop_idxscan.bar b, co_nestloop_idxscan.foo f where f.id = b.id; - QUERY PLAN ------------------------------------------------------------------------------------------ - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..437.01 rows=1 width=8) - -> Nested Loop (cost=0.00..437.01 rows=1 width=8) + QUERY PLAN +-------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..499.13 rows=1 width=8) + -> Nested Loop (cost=0.00..499.13 rows=1 width=8) Join Filter: true -> Seq Scan on bar b (cost=0.00..431.00 rows=1 width=8) - -> Index Only Scan using foo_id_idx on foo f (cost=0.00..6.01 rows=1 width=8) - Index Cond: (id = b.id) + -> Bitmap Heap Scan on foo f (cost=0.00..68.13 rows=1 width=8) + Recheck Cond: (id = b.id) + -> Bitmap Index Scan on foo_id_idx (cost=0.00..0.00 rows=0 width=0) + Index Cond: (id = b.id) Optimizer: GPORCA -(7 rows) +(9 rows) select f.id from co_nestloop_idxscan.foo f, co_nestloop_idxscan.bar b where f.id = b.id; id @@ -155,6 +162,7 @@ select b.id from co_nestloop_idxscan.bar b where b.id in (select f.id from co_ne (2 rows) reset optimizer; +reset optimizer_enable_indexonlyscan; drop schema co_nestloop_idxscan cascade; NOTICE: drop cascades to 2 other objects DETAIL: drop cascades to table co_nestloop_idxscan.foo diff --git a/contrib/pax_storage/src/test/regress/expected/conversion.out b/contrib/pax_storage/src/test/regress/expected/conversion.out index 36543ed6682..6c11a3a5bd5 100644 --- a/contrib/pax_storage/src/test/regress/expected/conversion.out +++ b/contrib/pax_storage/src/test/regress/expected/conversion.out @@ -134,33 +134,9 @@ insert into utf8_verification_inputs values ('\xfa9a9a8a8a', '5-byte'); -- Test UTF-8 verification slow path select description, (test_conv(inbytes, 'utf8', 'utf8')).* from utf8_verification_inputs; - description | result | errorat | error -------------------------------------+------------+--------------+---------------------------------------------------------------- - NUL byte | \x66 | \x006f | invalid byte sequence for encoding "UTF8": 0x00 - bare continuation | \x | \xaf | invalid byte sequence for encoding "UTF8": 0xaf - missing second byte in 2-byte char | \x | \xc5 | invalid byte sequence for encoding "UTF8": 0xc5 - smallest 2-byte overlong | \x | \xc080 | invalid byte sequence for encoding "UTF8": 0xc0 0x80 - largest 2-byte overlong | \x | \xc1bf | invalid byte sequence for encoding "UTF8": 0xc1 0xbf - next 2-byte after overlongs | \xc280 | | - largest 2-byte | \xdfbf | | - missing third byte in 3-byte char | \x | \xe9af | invalid byte sequence for encoding "UTF8": 0xe9 0xaf - smallest 3-byte overlong | \x | \xe08080 | invalid byte sequence for encoding "UTF8": 0xe0 0x80 0x80 - largest 3-byte overlong | \x | \xe09fbf | invalid byte sequence for encoding "UTF8": 0xe0 0x9f 0xbf - next 3-byte after overlong | \xe0a080 | | - last before surrogates | \xed9fbf | | - smallest surrogate | \x | \xeda080 | invalid byte sequence for encoding "UTF8": 0xed 0xa0 0x80 - largest surrogate | \x | \xedbfbf | invalid byte sequence for encoding "UTF8": 0xed 0xbf 0xbf - next after surrogates | \xee8080 | | - largest 3-byte | \xefbfbf | | - missing fourth byte in 4-byte char | \x | \xf1afbf | invalid byte sequence for encoding "UTF8": 0xf1 0xaf 0xbf - smallest 4-byte overlong | \x | \xf0808080 | invalid byte sequence for encoding "UTF8": 0xf0 0x80 0x80 0x80 - largest 4-byte overlong | \x | \xf08fbfbf | invalid byte sequence for encoding "UTF8": 0xf0 0x8f 0xbf 0xbf - next 4-byte after overlong | \xf0908080 | | - largest 4-byte | \xf48fbfbf | | - smallest too large | \x | \xf4908080 | invalid byte sequence for encoding "UTF8": 0xf4 0x90 0x80 0x80 - 5-byte | \x | \xfa9a9a8a8a | invalid byte sequence for encoding "UTF8": 0xfa -(23 rows) - +ERROR: query plan with multiple segworker groups is not supported +HINT: likely caused by a function that reads or modifies data in a distributed table +CONTEXT: PL/pgSQL function test_conv(bytea,text,text) line 8 during statement block entry -- Test UTF-8 verification with ASCII padding appended to provide -- coverage for algorithms that work on multiple bytes at a time. -- The error message for a sequence starting with a 4-byte lead diff --git a/contrib/pax_storage/src/test/regress/expected/create_function_sql.out b/contrib/pax_storage/src/test/regress/expected/create_function_sql.out index 5cae4ecbc1e..75019627f13 100644 --- a/contrib/pax_storage/src/test/regress/expected/create_function_sql.out +++ b/contrib/pax_storage/src/test/regress/expected/create_function_sql.out @@ -583,10 +583,9 @@ SELECT * FROM functest_sri1(); EXPLAIN (verbose, costs off) SELECT * FROM functest_sri1(); QUERY PLAN -------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) - Output: functest3.a - -> Seq Scan on temp_func_test.functest3 - Output: functest3.a + Function Scan on temp_func_test.functest_sri1 + Output: functest_sri1 + Function Call: functest_sri1() Optimizer: Postgres query optimizer (5 rows) @@ -605,12 +604,11 @@ SELECT * FROM functest_sri2(); (3 rows) EXPLAIN (verbose, costs off) SELECT * FROM functest_sri2(); - QUERY PLAN + QUERY PLAN -------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) - Output: functest3.a - -> Seq Scan on temp_func_test.functest3 - Output: functest3.a + Function Scan on temp_func_test.functest_sri2 + Output: functest_sri2 + Function Call: functest_sri2() Optimizer: Postgres query optimizer (5 rows) diff --git a/contrib/pax_storage/src/test/regress/expected/create_index_optimizer.out b/contrib/pax_storage/src/test/regress/expected/create_index_optimizer.out index 8d0a41352a8..31a20e35003 100644 --- a/contrib/pax_storage/src/test/regress/expected/create_index_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/create_index_optimizer.out @@ -2,6 +2,8 @@ -- CREATE_INDEX -- Create ancillary data structures (i.e. indices) -- +-- directory paths are passed to us in environment variables +\getenv abs_srcdir PG_ABS_SRCDIR -- -- BTREE -- @@ -31,18 +33,6 @@ ERROR: relation "six_wrong" does not exist COMMENT ON INDEX six IS 'good index'; COMMENT ON INDEX six IS NULL; -- --- BTREE ascending/descending cases --- --- we load int4/text from pure descending data (each key is a new --- low key) and name/f8 from pure ascending data (each key is a new --- high key). we had a bug where new low keys would sometimes be --- "lost". --- -CREATE INDEX bt_i4_index ON bt_i4_heap USING btree (seqno int4_ops); -CREATE INDEX bt_name_index ON bt_name_heap USING btree (seqno name_ops); -CREATE INDEX bt_txt_index ON bt_txt_heap USING btree (seqno text_ops); -CREATE INDEX bt_f8_index ON bt_f8_heap USING btree (seqno float8_ops); --- -- BTREE partial indices -- CREATE INDEX onek2_u1_prtl ON onek2 USING btree(unique1 int4_ops) @@ -54,14 +44,23 @@ CREATE INDEX onek2_stu1_prtl ON onek2 USING btree(stringu1 name_ops) -- -- GiST (rtree-equivalent opclasses only) -- --- PAX not support gist/spgist/brin indexes +CREATE TABLE slow_emp4000 ( + home_base box +); +CREATE TABLE fast_emp4000 ( + home_base box +); +\set filename :abs_srcdir '/data/rect.data' +COPY slow_emp4000 FROM :'filename'; +INSERT INTO fast_emp4000 SELECT * FROM slow_emp4000; +ANALYZE slow_emp4000; +ANALYZE fast_emp4000; CREATE INDEX grect2ind ON fast_emp4000 USING gist (home_base); ERROR: pax only support btree/hash/gin/bitmap indexes (pax_access_handle.cc:736) -CREATE INDEX gpolygonind ON polygon_tbl USING gist (f1); -ERROR: pax only support btree/hash/gin/bitmap indexes (pax_access_handle.cc:736) -CREATE INDEX gcircleind ON circle_tbl USING gist (f1); -ERROR: pax only support btree/hash/gin/bitmap indexes (pax_access_handle.cc:736) +-- we want to work with a point_tbl that includes a null +CREATE TEMP TABLE point_tbl AS SELECT * FROM public.point_tbl; INSERT INTO POINT_TBL(f1) VALUES (NULL); +ANALYZE POINT_TBL; CREATE INDEX gpointind ON point_tbl USING gist (f1); ERROR: pax only support btree/hash/gin/bitmap indexes (pax_access_handle.cc:736) CREATE TEMP TABLE gpolygon_tbl AS @@ -105,23 +104,6 @@ SELECT count(*) FROM fast_emp4000 WHERE home_base IS NULL; 278 (1 row) -SELECT * FROM polygon_tbl WHERE f1 @> '((1,1),(2,2),(2,1))'::polygon - ORDER BY (poly_center(f1))[0]; - f1 ---------------------- - ((2,0),(2,4),(0,0)) -(1 row) - -SELECT * FROM circle_tbl WHERE f1 && circle(point(1,-2), 1) - ORDER BY area(f1); - f1 ---------------- - <(1,2),3> - <(1,3),5> - <(1,2),100> - <(100,1),115> -(4 rows) - SELECT count(*) FROM gpolygon_tbl WHERE f1 && '(1000,1000,0,0)'::polygon; count ------- @@ -193,6 +175,7 @@ SELECT count(*) FROM point_tbl p WHERE p.f1 ~= '(-5, -12)'; CREATE VIEW point_tblv AS SELECT * FROM point_tbl WHERE NOT f1 ~= '(1e-300, -1e-300)' AND (f1 <-> '(0,0)') != 'inf'; +NOTICE: view "point_tblv" will be a temporary view -- In gpdb, we intentional filter out point (1e-300, -1e-300) and `inf` every order by related queries -- in this test case file. It is an underflow point, rank it cause randomly results( (0,0), -- (1e-300, -1e-300) are equal). @@ -325,51 +308,6 @@ SELECT count(*) FROM fast_emp4000 WHERE home_base IS NULL; 278 (1 row) -EXPLAIN (COSTS OFF) -SELECT * FROM polygon_tbl WHERE f1 @> '((1,1),(2,2),(2,1))'::polygon - ORDER BY (poly_center(f1))[0]; - QUERY PLAN --------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) - Merge Key: ((poly_center(f1))[0]) - -> Sort - Sort Key: ((poly_center(f1))[0]) - -> Seq Scan on polygon_tbl - Filter: (f1 @> '((1,1),(2,2),(2,1))'::polygon) - Optimizer: Postgres query optimizer -(7 rows) - -SELECT * FROM polygon_tbl WHERE f1 @> '((1,1),(2,2),(2,1))'::polygon - ORDER BY (poly_center(f1))[0]; - f1 ---------------------- - ((2,0),(2,4),(0,0)) -(1 row) - -EXPLAIN (COSTS OFF) -SELECT * FROM circle_tbl WHERE f1 && circle(point(1,-2), 1) - ORDER BY area(f1); - QUERY PLAN ----------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) - Merge Key: (area(f1)) - -> Sort - Sort Key: (area(f1)) - -> Seq Scan on circle_tbl - Filter: (f1 && '<(1,-2),1>'::circle) - Optimizer: Postgres query optimizer -(7 rows) - -SELECT * FROM circle_tbl WHERE f1 && circle(point(1,-2), 1) - ORDER BY area(f1); - f1 ---------------- - <(1,2),3> - <(1,3),5> - <(1,2),100> - <(100,1),115> -(4 rows) - EXPLAIN (COSTS OFF) SELECT count(*) FROM gpolygon_tbl WHERE f1 && '(1000,1000,0,0)'::polygon; QUERY PLAN @@ -781,6 +719,36 @@ RESET enable_bitmapscan; -- -- Note: GIN currently supports only bitmap scans, not plain indexscans -- +CREATE TABLE array_index_op_test ( + seqno int4, + i int4[], + t text[] +); +\set filename :abs_srcdir '/data/array.data' +COPY array_index_op_test FROM :'filename'; +ANALYZE array_index_op_test; +SELECT * FROM array_index_op_test WHERE i = '{NULL}' ORDER BY seqno; + seqno | i | t +-------+--------+-------- + 102 | {NULL} | {NULL} +(1 row) + +SELECT * FROM array_index_op_test WHERE i @> '{NULL}' ORDER BY seqno; + seqno | i | t +-------+---+--- +(0 rows) + +SELECT * FROM array_index_op_test WHERE i && '{NULL}' ORDER BY seqno; + seqno | i | t +-------+---+--- +(0 rows) + +SELECT * FROM array_index_op_test WHERE i <@ '{NULL}' ORDER BY seqno; + seqno | i | t +-------+----+---- + 101 | {} | {} +(1 row) + SET enable_seqscan = OFF; SET enable_indexscan = OFF; SET enable_bitmapscan = ON; @@ -1011,28 +979,6 @@ SELECT * FROM array_index_op_test WHERE i <@ '{}' ORDER BY seqno; 101 | {} | {} (1 row) -SELECT * FROM array_op_test WHERE i = '{NULL}' ORDER BY seqno; - seqno | i | t --------+--------+-------- - 102 | {NULL} | {NULL} -(1 row) - -SELECT * FROM array_op_test WHERE i @> '{NULL}' ORDER BY seqno; - seqno | i | t --------+---+--- -(0 rows) - -SELECT * FROM array_op_test WHERE i && '{NULL}' ORDER BY seqno; - seqno | i | t --------+---+--- -(0 rows) - -SELECT * FROM array_op_test WHERE i <@ '{NULL}' ORDER BY seqno; - seqno | i | t --------+----+---- - 101 | {} | {} -(1 row) - CREATE INDEX textarrayidx ON array_index_op_test USING gin (t); explain (costs off) SELECT * FROM array_index_op_test WHERE t @> '{AAAAAAAA72908}' ORDER BY seqno; @@ -1305,18 +1251,6 @@ SELECT * FROM array_index_op_test WHERE t = '{}' ORDER BY seqno; 101 | {} | {} (1 row) -SELECT * FROM array_op_test WHERE i = '{NULL}' ORDER BY seqno; - seqno | i | t --------+--------+-------- - 102 | {NULL} | {NULL} -(1 row) - -SELECT * FROM array_op_test WHERE i <@ '{NULL}' ORDER BY seqno; - seqno | i | t --------+----+---- - 101 | {} | {} -(1 row) - RESET enable_seqscan; RESET enable_indexscan; RESET enable_bitmapscan; @@ -1350,10 +1284,6 @@ Options: fastupdate=on, gin_pending_list_limit=128 -- -- HASH -- -CREATE INDEX hash_i4_index ON hash_i4_heap USING hash (random int4_ops); -CREATE INDEX hash_name_index ON hash_name_heap USING hash (random name_ops); -CREATE INDEX hash_txt_index ON hash_txt_heap USING hash (random text_ops); -CREATE INDEX hash_f8_index ON hash_f8_heap USING hash (random float8_ops) WITH (fillfactor=60); CREATE UNLOGGED TABLE unlogged_hash_table (id int4); CREATE INDEX unlogged_hash_index ON unlogged_hash_table USING hash (id int4_ops); DROP TABLE unlogged_hash_table; @@ -1385,6 +1315,67 @@ SELECT count(*) FROM tenk1 WHERE stringu1 = 'TVAAAA'; DROP INDEX hash_tuplesort_idx; RESET maintenance_work_mem; -- +-- Test unique null behavior +-- +CREATE TABLE unique_tbl (i int, t text); +CREATE UNIQUE INDEX unique_idx1 ON unique_tbl (i) NULLS DISTINCT; +CREATE UNIQUE INDEX unique_idx2 ON unique_tbl (i) NULLS NOT DISTINCT; +INSERT INTO unique_tbl VALUES (1, 'one'); +INSERT INTO unique_tbl VALUES (2, 'two'); +INSERT INTO unique_tbl VALUES (3, 'three'); +INSERT INTO unique_tbl VALUES (4, 'four'); +INSERT INTO unique_tbl VALUES (5, 'one'); +INSERT INTO unique_tbl (t) VALUES ('six'); +INSERT INTO unique_tbl (t) VALUES ('seven'); -- error from unique_idx2 +DETAIL: Key (i)=(null) already exists. +ERROR: duplicate key value violates unique constraint "unique_idx2" +DROP INDEX unique_idx1, unique_idx2; +INSERT INTO unique_tbl (t) VALUES ('seven'); +-- build indexes on filled table +CREATE UNIQUE INDEX unique_idx3 ON unique_tbl (i) NULLS DISTINCT; -- ok +CREATE UNIQUE INDEX unique_idx4 ON unique_tbl (i) NULLS NOT DISTINCT; -- error +DETAIL: Key (i)=(null) is duplicated. +ERROR: could not create unique index "unique_idx4" +DELETE FROM unique_tbl WHERE t = 'seven'; +CREATE UNIQUE INDEX unique_idx4 ON unique_tbl (i) NULLS NOT DISTINCT; -- ok now +\d unique_tbl + Table "public.unique_tbl" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + i | integer | | | + t | text | | | +Indexes: + "unique_idx3" UNIQUE, btree (i) + "unique_idx4" UNIQUE, btree (i) NULLS NOT DISTINCT + +\d unique_idx3 + Index "public.unique_idx3" + Column | Type | Key? | Definition +--------+---------+------+------------ + i | integer | yes | i +unique, btree, for table "public.unique_tbl" + +\d unique_idx4 + Index "public.unique_idx4" + Column | Type | Key? | Definition +--------+---------+------+------------ + i | integer | yes | i +unique nulls not distinct, btree, for table "public.unique_tbl" + +SELECT pg_get_indexdef('unique_idx3'::regclass); + pg_get_indexdef +---------------------------------------------------------------------- + CREATE UNIQUE INDEX unique_idx3 ON public.unique_tbl USING btree (i) +(1 row) + +SELECT pg_get_indexdef('unique_idx4'::regclass); + pg_get_indexdef +----------------------------------------------------------------------------------------- + CREATE UNIQUE INDEX unique_idx4 ON public.unique_tbl USING btree (i) NULLS NOT DISTINCT +(1 row) + +DROP TABLE unique_tbl; +-- -- Test functional index -- CREATE TABLE func_index_heap (f1 text, f2 text); @@ -1468,13 +1459,6 @@ ALTER TABLE covering_index_heap ADD CONSTRAINT covering_pkey PRIMARY KEY USING I covering_pkey; DROP TABLE covering_index_heap; -- --- Also try building functional, expressional, and partial indexes on --- tables that already contain data. --- -create index hash_f8_index_1 on hash_f8_heap(abs(random)); -create index hash_f8_index_2 on hash_f8_heap((seqno + 1), random); -create index hash_f8_index_3 on hash_f8_heap(random) where seqno > 1000; --- -- Try some concurrent index builds -- -- Unfortunately this only tests about half the code paths because there are @@ -1704,6 +1688,12 @@ create unique index on cwi_test (a); alter table cwi_test add primary key using index cwi_test_a_idx ; ERROR: ALTER TABLE / ADD CONSTRAINT USING INDEX is not supported on partitioned tables DROP TABLE cwi_test; +-- PRIMARY KEY constraint cannot be backed by a NULLS NOT DISTINCT index +CREATE TABLE cwi_test(a int, b int); +CREATE UNIQUE INDEX cwi_a_nnd ON cwi_test (a) NULLS NOT DISTINCT; +ALTER TABLE cwi_test ADD PRIMARY KEY USING INDEX cwi_a_nnd; +ERROR: primary keys cannot use NULLS NOT DISTINCT indexes +DROP TABLE cwi_test; -- -- Check handling of indexes on system columns -- @@ -1995,10 +1985,18 @@ CREATE TABLE dupindexcols AS SELECT unique1 as id, stringu2::text as f1 FROM tenk1; CREATE INDEX dupindexcols_i ON dupindexcols (f1, id, f1 text_pattern_ops); ANALYZE dupindexcols; --- FIXME: It's a unstable case in PAX --- EXPLAIN (COSTS OFF) --- SELECT count(*) FROM dupindexcols --- WHERE f1 BETWEEN 'WA' AND 'ZZZ' and id < 1000 and f1 ~<~ 'YX'; +EXPLAIN (COSTS OFF) + SELECT count(*) FROM dupindexcols + WHERE f1 BETWEEN 'WA' AND 'ZZZ' and id < 1000 and f1 ~<~ 'YX'; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------- + Finalize Aggregate + -> Gather Motion 3:1 (slice1; segments: 3) + -> Partial Aggregate + -> Index Only Scan using dupindexcols_i on dupindexcols + Index Cond: ((f1 >= 'WA'::text) AND (f1 <= 'ZZZ'::text) AND (id < 1000) AND (f1 ~<~ 'YX'::text)) +(6 rows) + SELECT count(*) FROM dupindexcols WHERE f1 BETWEEN 'WA' AND 'ZZZ' and id < 1000 and f1 ~<~ 'YX'; count @@ -2009,7 +2007,6 @@ SELECT count(*) FROM dupindexcols -- -- Check ordering of =ANY indexqual results (bug in 9.2.0) -- -vacuum tenk1; -- ensure we get consistent plans here explain (costs off) SELECT unique1 FROM tenk1 WHERE unique1 IN (1,42,7) @@ -2280,15 +2277,12 @@ WHERE classid = 'pg_class'::regclass AND index concur_reindex_ind3 | column c1 of table concur_reindex_tab | a index concur_reindex_ind4 | column c1 of table concur_reindex_tab | a index concur_reindex_ind4 | column c2 of table concur_reindex_tab | a - materialized view concur_reindex_matview | access method pax | n materialized view concur_reindex_matview | schema public | n - table concur_reindex_tab | access method pax | n table concur_reindex_tab | schema public | n -(10 rows) +(8 rows) REINDEX INDEX concur_reindex_ind1; REINDEX TABLE concur_reindex_tab; --- FIXME: The materialized view create by PAX can't do REINDEX REINDEX TABLE concur_reindex_matview; NOTICE: table "concur_reindex_matview" has no indexes to reindex SELECT pg_describe_object(classid, objid, objsubid) as obj, @@ -2311,11 +2305,9 @@ WHERE classid = 'pg_class'::regclass AND index concur_reindex_ind3 | column c1 of table concur_reindex_tab | a index concur_reindex_ind4 | column c1 of table concur_reindex_tab | a index concur_reindex_ind4 | column c2 of table concur_reindex_tab | a - materialized view concur_reindex_matview | access method pax | n materialized view concur_reindex_matview | schema public | n - table concur_reindex_tab | access method pax | n table concur_reindex_tab | schema public | n -(10 rows) +(8 rows) -- Check that comments are preserved CREATE TABLE testcomment (i int); @@ -2381,7 +2373,6 @@ DROP TABLE concur_replident; -- Check that opclass parameters are preserved CREATE TABLE concur_appclass_tab(i tsvector, j tsvector, k tsvector); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause, and no column type is suitable for a distribution key. Creating a NULL policy entry. --- PAX not support gist/spgist/brin indexes CREATE INDEX concur_appclass_ind on concur_appclass_tab USING gist (i tsvector_ops (siglen='1000'), j tsvector_ops (siglen='500')); ERROR: pax only support btree/hash/gin/bitmap indexes (pax_access_handle.cc:736) @@ -2480,13 +2471,11 @@ WHERE classid = 'pg_class'::regclass AND table concur_reindex_part | schema public | n table concur_reindex_part_0 | schema public | n table concur_reindex_part_0 | table concur_reindex_part | a - table concur_reindex_part_0_1 | access method pax | n table concur_reindex_part_0_1 | schema public | n table concur_reindex_part_0_1 | table concur_reindex_part_0 | a - table concur_reindex_part_0_2 | access method pax | n table concur_reindex_part_0_2 | schema public | n table concur_reindex_part_0_2 | table concur_reindex_part_0 | a -(21 rows) +(19 rows) REINDEX INDEX concur_reindex_part_index_0_1; REINDEX INDEX concur_reindex_part_index_0_2; @@ -2534,13 +2523,11 @@ WHERE classid = 'pg_class'::regclass AND table concur_reindex_part | schema public | n table concur_reindex_part_0 | schema public | n table concur_reindex_part_0 | table concur_reindex_part | a - table concur_reindex_part_0_1 | access method pax | n table concur_reindex_part_0_1 | schema public | n table concur_reindex_part_0_1 | table concur_reindex_part_0 | a - table concur_reindex_part_0_2 | access method pax | n table concur_reindex_part_0_2 | schema public | n table concur_reindex_part_0_2 | table concur_reindex_part_0 | a -(21 rows) +(19 rows) SELECT relid, parentrelid, level FROM pg_partition_tree('concur_reindex_part_index') ORDER BY relid, level; @@ -2729,9 +2716,16 @@ REINDEX INDEX CONCURRENTLY pg_toast.pg_toast_1260_index; -- no catalog toast ind ERROR: cannot reindex system catalogs concurrently REINDEX SYSTEM CONCURRENTLY postgres; -- not allowed for SYSTEM ERROR: cannot reindex system catalogs concurrently +REINDEX (CONCURRENTLY) SYSTEM postgres; -- ditto +ERROR: cannot reindex system catalogs concurrently +REINDEX (CONCURRENTLY) SYSTEM; -- ditto +ERROR: cannot reindex system catalogs concurrently -- Warns about catalog relations REINDEX SCHEMA CONCURRENTLY pg_catalog; WARNING: cannot reindex system catalogs concurrently, skipping all +-- Not the current database +REINDEX DATABASE not_current_database; +ERROR: can only reindex the currently open database -- Check the relation status, there should not be invalid indexes \d concur_reindex_tab Table "public.concur_reindex_tab" @@ -2976,7 +2970,6 @@ CREATE TABLE reindex_before AS SELECT oid, relname, relfilenode, relkind, reltoastrelid FROM pg_class where relnamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'schema_to_reindex'); --- Pax won't insert anything, because no toast here INSERT INTO reindex_before SELECT oid, 'pg_toast_TABLE', relfilenode, relkind, reltoastrelid FROM pg_class WHERE oid IN diff --git a/contrib/pax_storage/src/test/regress/expected/create_view_optimizer.out b/contrib/pax_storage/src/test/regress/expected/create_view_optimizer.out index de91254a5ba..fa00bf24030 100755 --- a/contrib/pax_storage/src/test/regress/expected/create_view_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/create_view_optimizer.out @@ -8,15 +8,38 @@ set Debug_print_plan=on; -- Virtual class definitions -- (this also tests the query rewrite system) -- +-- directory paths and dlsuffix are passed to us in environment variables +\getenv abs_srcdir PG_ABS_SRCDIR +\getenv libdir PG_LIBDIR +\getenv dlsuffix PG_DLSUFFIX +\set regresslib :libdir '/regress' :dlsuffix +CREATE FUNCTION interpt_pp(path, path) + RETURNS point + AS :'regresslib' + LANGUAGE C STRICT; +CREATE TABLE real_city ( + pop int4, + cname text, + outline path +); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'pop' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +\set filename :abs_srcdir '/data/real_city.data' +COPY real_city FROM :'filename'; +ANALYZE real_city; +SELECT * + INTO TABLE ramp + FROM ONLY road + WHERE name ~ '.*Ramp'; CREATE VIEW street AS SELECT r.name, r.thepath, c.cname AS cname FROM ONLY road r, real_city c - WHERE c.outline ## r.thepath; + WHERE c.outline ?# r.thepath; CREATE VIEW iexit AS SELECT ih.name, ih.thepath, interpt_pp(ih.thepath, r.thepath) AS exit FROM ihighway ih, ramp r - WHERE ih.thepath ## r.thepath; + WHERE ih.thepath ?# r.thepath; CREATE VIEW toyemp AS SELECT name, age, location, 12*salary AS annualsal FROM emp; @@ -38,27 +61,29 @@ CREATE VIEW key_dependent_view_no_cols AS -- -- CREATE OR REPLACE VIEW -- -CREATE TABLE viewtest_tbl (a int, b int); +CREATE TABLE viewtest_tbl (a int, b int, c numeric(10,1), d text COLLATE "C"); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. COPY viewtest_tbl FROM stdin; CREATE OR REPLACE VIEW viewtest AS SELECT * FROM viewtest_tbl; CREATE OR REPLACE VIEW viewtest AS SELECT * FROM viewtest_tbl WHERE a > 10; SELECT * FROM viewtest; - a | b -----+---- - 15 | 20 - 20 | 25 + a | b | c | d +----+----+-----+------- + 15 | 20 | 3.3 | xyzz + 20 | 25 | 4.4 | xyzzy (2 rows) CREATE OR REPLACE VIEW viewtest AS - SELECT a, b FROM viewtest_tbl WHERE a > 5 ORDER BY b DESC; + SELECT a, b, c, d FROM viewtest_tbl WHERE a > 5 ORDER BY b DESC; SELECT * FROM viewtest; - a | b -----+---- - 20 | 25 - 15 | 20 - 10 | 15 + a | b | c | d +----+----+-----+------- + 20 | 25 | 4.4 | xyzzy + 15 | 20 | 3.3 | xyzz + 10 | 15 | 2.2 | xyz (3 rows) -- should fail @@ -69,21 +94,36 @@ ERROR: cannot drop columns from view CREATE OR REPLACE VIEW viewtest AS SELECT 1, * FROM viewtest_tbl; ERROR: cannot change name of view column "a" to "?column?" +HINT: Use ALTER VIEW ... RENAME COLUMN ... to change name of view column instead. -- should fail CREATE OR REPLACE VIEW viewtest AS - SELECT a, b::numeric FROM viewtest_tbl; + SELECT a, b::numeric, c, d FROM viewtest_tbl; ERROR: cannot change data type of view column "b" from integer to numeric +-- should fail +CREATE OR REPLACE VIEW viewtest AS + SELECT a, b, c::numeric(10,2), d FROM viewtest_tbl; +ERROR: cannot change data type of view column "c" from numeric(10,1) to numeric(10,2) +-- should fail +CREATE OR REPLACE VIEW viewtest AS + SELECT a, b, c, d COLLATE "POSIX" FROM viewtest_tbl; +ERROR: cannot change collation of view column "d" from "C" to "POSIX" -- should work CREATE OR REPLACE VIEW viewtest AS - SELECT a, b, 0 AS c FROM viewtest_tbl; + SELECT a, b, c, d, 0 AS e FROM viewtest_tbl; DROP VIEW viewtest; DROP TABLE viewtest_tbl; -- tests for temporary views CREATE SCHEMA temp_view_test CREATE TABLE base_table (a int, id int) CREATE TABLE base_table2 (a int, id int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. SET search_path TO temp_view_test, public; CREATE TEMPORARY TABLE temp_table (a int, id int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. -- should be created in temp_view_test schema CREATE VIEW v1 AS SELECT * FROM base_table; -- should be created in temp object schema @@ -187,8 +227,14 @@ SELECT relname FROM pg_class CREATE SCHEMA testviewschm2; SET search_path TO testviewschm2, public; CREATE TABLE t1 (num int, name text); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'num' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE t2 (num2 int, value text); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'num2' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TEMP TABLE tt (num2 int, value text); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'num2' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE VIEW nontemp1 AS SELECT * FROM t1 CROSS JOIN t2; CREATE VIEW temporal1 AS SELECT * FROM t1 CROSS JOIN tt; NOTICE: view "temporal1" will be a temporary view @@ -226,10 +272,20 @@ SELECT relname FROM pg_class (4 rows) CREATE TABLE tbl1 ( a int, b int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE tbl2 (c int, d int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE tbl3 (e int, f int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'e' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE tbl4 (g int, h int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'g' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TEMP TABLE tmptbl (i int, j int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. --Should be in testviewschm2 CREATE VIEW pubview AS SELECT * FROM tbl1 WHERE tbl1.a BETWEEN (SELECT d FROM tbl2 WHERE c = 1) AND (SELECT e FROM tbl3 WHERE f = 2) @@ -271,17 +327,31 @@ ERROR: invalid value for boolean option "security_barrier": 100 CREATE VIEW mysecview6 WITH (invalid_option) -- Error AS SELECT * FROM tbl1 WHERE a < 100; ERROR: unrecognized parameter "invalid_option" +CREATE VIEW mysecview7 WITH (security_invoker=true) + AS SELECT * FROM tbl1 WHERE a = 100; +CREATE VIEW mysecview8 WITH (security_invoker=false, security_barrier=true) + AS SELECT * FROM tbl1 WHERE a > 100; +CREATE VIEW mysecview9 WITH (security_invoker) + AS SELECT * FROM tbl1 WHERE a < 100; +CREATE VIEW mysecview10 WITH (security_invoker=100) -- Error + AS SELECT * FROM tbl1 WHERE a <> 100; +ERROR: invalid value for boolean option "security_invoker": 100 SELECT relname, relkind, reloptions FROM pg_class WHERE oid in ('mysecview1'::regclass, 'mysecview2'::regclass, - 'mysecview3'::regclass, 'mysecview4'::regclass) + 'mysecview3'::regclass, 'mysecview4'::regclass, + 'mysecview7'::regclass, 'mysecview8'::regclass, + 'mysecview9'::regclass) ORDER BY relname; - relname | relkind | reloptions -------------+---------+-------------------------- + relname | relkind | reloptions +------------+---------+------------------------------------------------ mysecview1 | v | mysecview2 | v | {security_barrier=true} mysecview3 | v | {security_barrier=false} mysecview4 | v | {security_barrier=true} -(4 rows) + mysecview7 | v | {security_invoker=true} + mysecview8 | v | {security_invoker=false,security_barrier=true} + mysecview9 | v | {security_invoker=true} +(7 rows) CREATE OR REPLACE VIEW mysecview1 AS SELECT * FROM tbl1 WHERE a = 256; @@ -291,17 +361,28 @@ CREATE OR REPLACE VIEW mysecview3 WITH (security_barrier=true) AS SELECT * FROM tbl1 WHERE a < 256; CREATE OR REPLACE VIEW mysecview4 WITH (security_barrier=false) AS SELECT * FROM tbl1 WHERE a <> 256; +CREATE OR REPLACE VIEW mysecview7 + AS SELECT * FROM tbl1 WHERE a > 256; +CREATE OR REPLACE VIEW mysecview8 WITH (security_invoker=true) + AS SELECT * FROM tbl1 WHERE a < 256; +CREATE OR REPLACE VIEW mysecview9 WITH (security_invoker=false, security_barrier=true) + AS SELECT * FROM tbl1 WHERE a <> 256; SELECT relname, relkind, reloptions FROM pg_class WHERE oid in ('mysecview1'::regclass, 'mysecview2'::regclass, - 'mysecview3'::regclass, 'mysecview4'::regclass) + 'mysecview3'::regclass, 'mysecview4'::regclass, + 'mysecview7'::regclass, 'mysecview8'::regclass, + 'mysecview9'::regclass) ORDER BY relname; - relname | relkind | reloptions -------------+---------+-------------------------- + relname | relkind | reloptions +------------+---------+------------------------------------------------ mysecview1 | v | mysecview2 | v | mysecview3 | v | {security_barrier=true} mysecview4 | v | {security_barrier=false} -(4 rows) + mysecview7 | v | + mysecview8 | v | {security_invoker=true} + mysecview9 | v | {security_invoker=false,security_barrier=true} +(7 rows) -- Check that unknown literals are converted to "text" in CREATE VIEW, -- so that we don't end up with unknown-type columns. @@ -345,10 +426,10 @@ CREATE VIEW tt1 AS c | numeric | | | | main | d | character varying(4) | | | | extended | View definition: - SELECT vv.a, - vv.b, - vv.c, - vv.d + SELECT a, + b, + c, + d FROM ( VALUES ('abc'::character varying(3),'0123456789'::character varying,42,'abcd'::character varying(4)), ('0123456789'::character varying,'abc'::character varying(3),42.12,'abc'::character varying(4))) vv(a, b, c, d); SELECT * FROM tt1; @@ -368,8 +449,14 @@ SELECT a::varchar(3) FROM tt1; DROP VIEW tt1; -- Test view decompilation in the face of relation renaming conflicts CREATE TABLE tt1 (f1 int, f2 int, f3 text); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'f1' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE tx1 (x1 int, x2 int, x3 text); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x1' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE temp_view_test.tt1 (y1 int, f2 int, f3 text); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'y1' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE VIEW aliased_view_1 AS select * from tt1 where exists (select 1 from tx1 where tt1.f1 = tx1.x1); @@ -390,9 +477,9 @@ CREATE VIEW aliased_view_4 AS f2 | integer | | | | plain | f3 | text | | | | extended | View definition: - SELECT tt1.f1, - tt1.f2, - tt1.f3 + SELECT f1, + f2, + f3 FROM tt1 WHERE (EXISTS ( SELECT 1 FROM tx1 @@ -406,9 +493,9 @@ View definition: f2 | integer | | | | plain | f3 | text | | | | extended | View definition: - SELECT a1.f1, - a1.f2, - a1.f3 + SELECT f1, + f2, + f3 FROM tt1 a1 WHERE (EXISTS ( SELECT 1 FROM tx1 @@ -422,9 +509,9 @@ View definition: f2 | integer | | | | plain | f3 | text | | | | extended | View definition: - SELECT tt1.f1, - tt1.f2, - tt1.f3 + SELECT f1, + f2, + f3 FROM tt1 WHERE (EXISTS ( SELECT 1 FROM tx1 a2 @@ -438,9 +525,9 @@ View definition: f2 | integer | | | | plain | f3 | text | | | | extended | View definition: - SELECT tt1.y1, - tt1.f2, - tt1.f3 + SELECT y1, + f2, + f3 FROM temp_view_test.tt1 WHERE (EXISTS ( SELECT 1 FROM tt1 tt1_1 @@ -455,9 +542,9 @@ ALTER TABLE tx1 RENAME TO a1; f2 | integer | | | | plain | f3 | text | | | | extended | View definition: - SELECT tt1.f1, - tt1.f2, - tt1.f3 + SELECT f1, + f2, + f3 FROM tt1 WHERE (EXISTS ( SELECT 1 FROM a1 @@ -471,9 +558,9 @@ View definition: f2 | integer | | | | plain | f3 | text | | | | extended | View definition: - SELECT a1.f1, - a1.f2, - a1.f3 + SELECT f1, + f2, + f3 FROM tt1 a1 WHERE (EXISTS ( SELECT 1 FROM a1 a1_1 @@ -487,9 +574,9 @@ View definition: f2 | integer | | | | plain | f3 | text | | | | extended | View definition: - SELECT tt1.f1, - tt1.f2, - tt1.f3 + SELECT f1, + f2, + f3 FROM tt1 WHERE (EXISTS ( SELECT 1 FROM a1 a2 @@ -503,9 +590,9 @@ View definition: f2 | integer | | | | plain | f3 | text | | | | extended | View definition: - SELECT tt1.y1, - tt1.f2, - tt1.f3 + SELECT y1, + f2, + f3 FROM temp_view_test.tt1 WHERE (EXISTS ( SELECT 1 FROM tt1 tt1_1 @@ -520,9 +607,9 @@ ALTER TABLE tt1 RENAME TO a2; f2 | integer | | | | plain | f3 | text | | | | extended | View definition: - SELECT a2.f1, - a2.f2, - a2.f3 + SELECT f1, + f2, + f3 FROM a2 WHERE (EXISTS ( SELECT 1 FROM a1 @@ -536,9 +623,9 @@ View definition: f2 | integer | | | | plain | f3 | text | | | | extended | View definition: - SELECT a1.f1, - a1.f2, - a1.f3 + SELECT f1, + f2, + f3 FROM a2 a1 WHERE (EXISTS ( SELECT 1 FROM a1 a1_1 @@ -552,9 +639,9 @@ View definition: f2 | integer | | | | plain | f3 | text | | | | extended | View definition: - SELECT a2.f1, - a2.f2, - a2.f3 + SELECT f1, + f2, + f3 FROM a2 WHERE (EXISTS ( SELECT 1 FROM a1 a2_1 @@ -568,9 +655,9 @@ View definition: f2 | integer | | | | plain | f3 | text | | | | extended | View definition: - SELECT tt1.y1, - tt1.f2, - tt1.f3 + SELECT y1, + f2, + f3 FROM temp_view_test.tt1 WHERE (EXISTS ( SELECT 1 FROM a2 @@ -585,9 +672,9 @@ ALTER TABLE a1 RENAME TO tt1; f2 | integer | | | | plain | f3 | text | | | | extended | View definition: - SELECT a2.f1, - a2.f2, - a2.f3 + SELECT f1, + f2, + f3 FROM a2 WHERE (EXISTS ( SELECT 1 FROM tt1 @@ -601,9 +688,9 @@ View definition: f2 | integer | | | | plain | f3 | text | | | | extended | View definition: - SELECT a1.f1, - a1.f2, - a1.f3 + SELECT f1, + f2, + f3 FROM a2 a1 WHERE (EXISTS ( SELECT 1 FROM tt1 @@ -617,9 +704,9 @@ View definition: f2 | integer | | | | plain | f3 | text | | | | extended | View definition: - SELECT a2.f1, - a2.f2, - a2.f3 + SELECT f1, + f2, + f3 FROM a2 WHERE (EXISTS ( SELECT 1 FROM tt1 a2_1 @@ -633,9 +720,9 @@ View definition: f2 | integer | | | | plain | f3 | text | | | | extended | View definition: - SELECT tt1.y1, - tt1.f2, - tt1.f3 + SELECT y1, + f2, + f3 FROM temp_view_test.tt1 WHERE (EXISTS ( SELECT 1 FROM a2 @@ -651,9 +738,9 @@ ALTER TABLE tx1 SET SCHEMA temp_view_test; f2 | integer | | | | plain | f3 | text | | | | extended | View definition: - SELECT tx1.f1, - tx1.f2, - tx1.f3 + SELECT f1, + f2, + f3 FROM temp_view_test.tx1 WHERE (EXISTS ( SELECT 1 FROM tt1 @@ -667,9 +754,9 @@ View definition: f2 | integer | | | | plain | f3 | text | | | | extended | View definition: - SELECT a1.f1, - a1.f2, - a1.f3 + SELECT f1, + f2, + f3 FROM temp_view_test.tx1 a1 WHERE (EXISTS ( SELECT 1 FROM tt1 @@ -683,9 +770,9 @@ View definition: f2 | integer | | | | plain | f3 | text | | | | extended | View definition: - SELECT tx1.f1, - tx1.f2, - tx1.f3 + SELECT f1, + f2, + f3 FROM temp_view_test.tx1 WHERE (EXISTS ( SELECT 1 FROM tt1 a2 @@ -699,9 +786,9 @@ View definition: f2 | integer | | | | plain | f3 | text | | | | extended | View definition: - SELECT tt1.y1, - tt1.f2, - tt1.f3 + SELECT y1, + f2, + f3 FROM temp_view_test.tt1 WHERE (EXISTS ( SELECT 1 FROM temp_view_test.tx1 @@ -718,9 +805,9 @@ ALTER TABLE tmp1 RENAME TO tx1; f2 | integer | | | | plain | f3 | text | | | | extended | View definition: - SELECT tx1.f1, - tx1.f2, - tx1.f3 + SELECT f1, + f2, + f3 FROM temp_view_test.tx1 WHERE (EXISTS ( SELECT 1 FROM tt1 @@ -734,9 +821,9 @@ View definition: f2 | integer | | | | plain | f3 | text | | | | extended | View definition: - SELECT a1.f1, - a1.f2, - a1.f3 + SELECT f1, + f2, + f3 FROM temp_view_test.tx1 a1 WHERE (EXISTS ( SELECT 1 FROM tt1 @@ -750,9 +837,9 @@ View definition: f2 | integer | | | | plain | f3 | text | | | | extended | View definition: - SELECT tx1.f1, - tx1.f2, - tx1.f3 + SELECT f1, + f2, + f3 FROM temp_view_test.tx1 WHERE (EXISTS ( SELECT 1 FROM tt1 a2 @@ -766,14 +853,62 @@ View definition: f2 | integer | | | | plain | f3 | text | | | | extended | View definition: - SELECT tx1.y1, - tx1.f2, - tx1.f3 + SELECT y1, + f2, + f3 FROM tx1 WHERE (EXISTS ( SELECT 1 FROM temp_view_test.tx1 tx1_1 WHERE tx1.y1 = tx1_1.f1)); +-- Test correct deparsing of ORDER BY when there is an output name conflict +create view aliased_order_by as +select x1 as x2, x2 as x1, x3 from tt1 + order by x2; -- this is interpreted per SQL92, so really ordering by x1 +\d+ aliased_order_by + View "testviewschm2.aliased_order_by" + Column | Type | Collation | Nullable | Default | Storage | Description +--------+---------+-----------+----------+---------+----------+------------- + x2 | integer | | | | plain | + x1 | integer | | | | plain | + x3 | text | | | | extended | +View definition: + SELECT x1 AS x2, + x2 AS x1, + x3 + FROM tt1 + ORDER BY tt1.x1; + +alter view aliased_order_by rename column x1 to x0; +\d+ aliased_order_by + View "testviewschm2.aliased_order_by" + Column | Type | Collation | Nullable | Default | Storage | Description +--------+---------+-----------+----------+---------+----------+------------- + x2 | integer | | | | plain | + x0 | integer | | | | plain | + x3 | text | | | | extended | +View definition: + SELECT x1 AS x2, + x2 AS x0, + x3 + FROM tt1 + ORDER BY x1; + +alter view aliased_order_by rename column x3 to x1; +\d+ aliased_order_by + View "testviewschm2.aliased_order_by" + Column | Type | Collation | Nullable | Default | Storage | Description +--------+---------+-----------+----------+---------+----------+------------- + x2 | integer | | | | plain | + x0 | integer | | | | plain | + x1 | text | | | | extended | +View definition: + SELECT x1 AS x2, + x2 AS x0, + x3 AS x1 + FROM tt1 + ORDER BY tt1.x1; + -- Test aliasing of joins create view view_of_joins as select * from @@ -810,6 +945,8 @@ View definition: CROSS JOIN tbl4) same; create table tbl1a (a int, c int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create view view_of_joins_2a as select * from tbl1 join tbl1a using (a); create view view_of_joins_2b as select * from tbl1 join tbl1a using (a) as x; create view view_of_joins_2c as select * from (tbl1 join tbl1a using (a)) as y; @@ -856,8 +993,14 @@ select pg_get_viewdef('view_of_joins_2d', true); -- Test view decompilation in the face of column addition/deletion/renaming create table tt2 (a int, b int, c int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table tt3 (ax int8, b int2, c numeric); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'ax' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table tt4 (ay int, b int, q int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'ay' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create view v1 as select * from tt2 natural join tt3; create view v1a as select * from (tt2 natural join tt3) j; create view v2 as select * from tt2 join tt3 using (b,c) join tt4 using (b); @@ -1190,7 +1333,11 @@ select pg_get_viewdef('v3', true); (1 row) create table tt5 (a int, b int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table tt6 (c int, d int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create view vv1 as select * from (tt5 cross join tt6) j(aa,bb,cc,dd); select pg_get_viewdef('vv1', true); pg_get_viewdef @@ -1255,17 +1402,21 @@ select pg_get_viewdef('v1', true); select pg_get_viewdef('v4', true); pg_get_viewdef ---------------- - SELECT v1.b, + - v1.c, + - v1.x AS a,+ - v1.ax + + ax + + c, + + x AS a, + FROM v1; + SELECT b, + (1 row) -- Unnamed FULL JOIN USING is lots of fun too create table tt7 (x int, xx int, y int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. alter table tt7 drop column xx; create table tt8 (x int, z int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create view vv2 as select * from (values(1,2,3,4,5)) v(a,b,c,d,e) union all @@ -1426,8 +1577,12 @@ select pg_get_viewdef('vv4', true); -- Implicit coercions in a JOIN USING create issues similar to FULL JOIN create table tt7a (x date, xx int, y int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. alter table tt7a drop column xx; create table tt8a (x timestamptz, z int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create view vv2a as select * from (values(now(),2,3,now(),5)) v(a,b,c,d,e) union all @@ -1456,7 +1611,11 @@ select pg_get_viewdef('vv2a', true); -- Also check dropping a column that existed when the view was made -- create table tt9 (x int, xx int, y int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table tt10 (x int, z int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create view vv5 as select x,y,z from tt9 join tt10 using(x); select pg_get_viewdef('vv5', true); pg_get_viewdef @@ -1484,8 +1643,14 @@ select pg_get_viewdef('vv5', true); -- JOIN USING, and thereby make the USING column name ambiguous -- create table tt11 (x int, y int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table tt12 (x int, z int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table tt13 (z int, q int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'z' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create view vv6 as select x,y,z,q from (tt11 join tt12 using(x)) join tt13 using(z); select pg_get_viewdef('vv6', true); @@ -1517,6 +1682,8 @@ select pg_get_viewdef('vv6', true); -- Check cases involving dropped/altered columns in a function's rowtype result -- create table tt14t (f1 text, f2 text, f3 text, f4 text); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'f1' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into tt14t values('foo', 'bar', 'baz', '42'); alter table tt14t drop column f2; create function tt14f() returns setof tt14t as @@ -1535,10 +1702,10 @@ create view tt14v as select t.* from tt14f() t; select pg_get_viewdef('tt14v', true); pg_get_viewdef -------------------------------- - SELECT t.f1, + - t.f3, + - t.f4 + + f3, + + f4 + FROM tt14f() t(f1, f3, f4); + SELECT f1, + (1 row) select * from tt14v; @@ -1547,53 +1714,105 @@ select * from tt14v; foo | baz | 42 (1 row) -begin; --- this perhaps should be rejected, but it isn't: -alter table tt14t drop column f3; --- f3 is still in the view ... +alter table tt14t drop column f3; -- fail, view has explicit reference to f3 +ERROR: cannot drop column f3 of table tt14t because other objects depend on it +DETAIL: view tt14v depends on column f3 of table tt14t +HINT: Use DROP ... CASCADE to drop the dependent objects too. +-- MERGE16_FIXME: delete command can only delete tuples from master, But we +-- need to delete them from both master and segments + +-- We used to have a bug that would allow the above to succeed, posing +-- hazards for later execution of the view. Check that the internal +-- defenses for those hazards haven't bit-rotted, in case some other +-- bug with similar symptoms emerges. +-- begin; +-- +-- -- destroy the dependency entry that prevents the DROP: +-- delete from pg_depend where +-- objid = (select oid from pg_rewrite +-- where ev_class = 'tt14v'::regclass and rulename = '_RETURN') +-- and refobjsubid = 3 +-- returning pg_describe_object(classid, objid, objsubid) as obj, +-- pg_describe_object(refclassid, refobjid, refobjsubid) as ref, +-- deptype; +-- +-- -- this will now succeed: +-- alter table tt14t drop column f3; +-- +-- -- column f3 is still in the view, sort of ... +-- select pg_get_viewdef('tt14v', true); +-- -- ... and you can even EXPLAIN it ... +-- explain (verbose, costs off) select * from tt14v; +-- -- but it will fail at execution +-- select f1, f4 from tt14v; +-- select * from tt14v; +-- +-- rollback; +-- likewise, altering a referenced column's type is prohibited ... +alter table tt14t alter column f4 type integer using f4::integer; -- fail +ERROR: cannot alter type of a column used by a view or rule +DETAIL: rule _RETURN on view tt14v depends on column "f4" +-- ... but some bug might let it happen, so check defenses +-- begin; +-- +-- -- destroy the dependency entry that prevents the ALTER: +-- delete from pg_depend where +-- objid = (select oid from pg_rewrite +-- where ev_class = 'tt14v'::regclass and rulename = '_RETURN') +-- and refobjsubid = 4 +-- returning pg_describe_object(classid, objid, objsubid) as obj, +-- pg_describe_object(refclassid, refobjid, refobjsubid) as ref, +-- deptype; +-- +-- -- this will now succeed: +-- alter table tt14t alter column f4 type integer using f4::integer; +-- +-- -- f4 is still in the view ... +-- select pg_get_viewdef('tt14v', true); +-- -- but will fail at execution +-- select f1, f3 from tt14v; +-- select * from tt14v; +-- +-- rollback; +drop view tt14v; +create view tt14v as select t.f1, t.f4 from tt14f() t; select pg_get_viewdef('tt14v', true); pg_get_viewdef -------------------------------- - SELECT t.f1, + - t.f3, + - t.f4 + + SELECT f1, + + f4 + FROM tt14f() t(f1, f3, f4); (1 row) --- but will fail at execution -select f1, f4 from tt14v; +select * from tt14v; f1 | f4 -----+---- foo | 42 (1 row) -select * from tt14v; -ERROR: attribute 3 of type record has been dropped -rollback; -begin; --- this perhaps should be rejected, but it isn't: -alter table tt14t alter column f4 type integer using f4::integer; --- f4 is still in the view ... +alter table tt14t drop column f3; -- ok select pg_get_viewdef('tt14v', true); - pg_get_viewdef --------------------------------- - SELECT t.f1, + - t.f3, + - t.f4 + - FROM tt14f() t(f1, f3, f4); + pg_get_viewdef +---------------------------- + SELECT f1, + + f4 + + FROM tt14f() t(f1, f4); (1 row) --- but will fail at execution -select f1, f3 from tt14v; - f1 | f3 ------+----- - foo | baz -(1 row) +explain (verbose, costs off) select * from tt14v; + QUERY PLAN +---------------------------------------- + Function Scan on testviewschm2.tt14f t + Output: t.f1, t.f4 + Function Call: tt14f() +(4 rows) select * from tt14v; -ERROR: attribute 4 of type record has wrong type -DETAIL: Table has type integer, but query expects text. -rollback; + f1 | f4 +-----+---- + foo | 42 +(1 row) + -- check display of whole-row variables in some corner cases create type nestedcomposite as (x int8_tbl); create view tt15v as select row(i)::nestedcomposite from int8_tbl i; @@ -1602,16 +1821,16 @@ select * from tt15v; ------------------------------------------ ("(123,456)") ("(123,4567890123456789)") + ("(4567890123456789,-4567890123456789)") ("(4567890123456789,123)") ("(4567890123456789,4567890123456789)") - ("(4567890123456789,-4567890123456789)") (5 rows) select pg_get_viewdef('tt15v', true); pg_get_viewdef ------------------------------------------------------ - SELECT ROW(i.*::int8_tbl)::nestedcomposite AS "row"+ FROM int8_tbl i; + SELECT ROW(i.*::int8_tbl)::nestedcomposite AS "row"+ (1 row) select row(i.*::int8_tbl)::nestedcomposite from int8_tbl i; @@ -1619,9 +1838,9 @@ select row(i.*::int8_tbl)::nestedcomposite from int8_tbl i; ------------------------------------------ ("(123,456)") ("(123,4567890123456789)") + ("(4567890123456789,-4567890123456789)") ("(4567890123456789,123)") ("(4567890123456789,4567890123456789)") - ("(4567890123456789,-4567890123456789)") (5 rows) create view tt16v as select * from int8_tbl i, lateral(values(i)) ss; @@ -1638,11 +1857,11 @@ select * from tt16v; select pg_get_viewdef('tt16v', true); pg_get_viewdef ------------------------------------------- - SELECT i.q1, + + LATERAL ( VALUES (i.*::int8_tbl)) ss; i.q2, + ss.column1 + FROM int8_tbl i, + - LATERAL ( VALUES (i.*::int8_tbl)) ss; + SELECT i.q1, + (1 row) select * from int8_tbl i, lateral(values(i.*::int8_tbl)) ss; @@ -1669,10 +1888,10 @@ select * from tt17v; select pg_get_viewdef('tt17v', true); pg_get_viewdef --------------------------------------------- - SELECT i.q1, + - i.q2 + + q2 + FROM int8_tbl i + WHERE (i.* IN ( VALUES (i.*::int8_tbl))); + SELECT q1, + (1 row) select * from int8_tbl i where i.* in (values(i.*::int8_tbl)); @@ -1711,13 +1930,13 @@ NOTICE: identifier "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx select pg_get_viewdef('tt18v', true); pg_get_viewdef ----------------------------------------------------------------------------------- - SELECT xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx.q1, + xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx.q2 + - FROM int8_tbl xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + - UNION ALL + - SELECT xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx.q1, + xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx.q2 + + FROM int8_tbl xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + FROM int8_tbl xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx; + SELECT xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx.q1, + + SELECT xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx.q1, + + UNION ALL + (1 row) explain (costs off) select * from tt18v; @@ -1787,6 +2006,7 @@ select pg_get_viewdef('tt20v', true); -- reverse-listing of various special function syntaxes required by SQL create view tt201v as select + ('2022-12-01'::date + '1 day'::interval) at time zone 'UTC' as atz, extract(day from now()) as extr, (now(), '1 day'::interval) overlaps (current_timestamp(2), '1 day'::interval) as o, @@ -1805,14 +2025,47 @@ select trim(trailing ' foo ') as rt, trim(E'\\000'::bytea from E'\\000Tom\\000'::bytea) as btb, trim(leading E'\\000'::bytea from E'\\000Tom\\000'::bytea) as ltb, - trim(trailing E'\\000'::bytea from E'\\000Tom\\000'::bytea) as rtb; + trim(trailing E'\\000'::bytea from E'\\000Tom\\000'::bytea) as rtb, + CURRENT_DATE as cd, + (select * from CURRENT_DATE) as cd2, + CURRENT_TIME as ct, + (select * from CURRENT_TIME) as ct2, + CURRENT_TIME (1) as ct3, + (select * from CURRENT_TIME (1)) as ct4, + CURRENT_TIMESTAMP as ct5, + (select * from CURRENT_TIMESTAMP) as ct6, + CURRENT_TIMESTAMP (1) as ct7, + (select * from CURRENT_TIMESTAMP (1)) as ct8, + LOCALTIME as lt1, + (select * from LOCALTIME) as lt2, + LOCALTIME (1) as lt3, + (select * from LOCALTIME (1)) as lt4, + LOCALTIMESTAMP as lt5, + (select * from LOCALTIMESTAMP) as lt6, + LOCALTIMESTAMP (1) as lt7, + (select * from LOCALTIMESTAMP (1)) as lt8, + CURRENT_CATALOG as ca, + (select * from CURRENT_CATALOG) as ca2, + CURRENT_ROLE as cr, + (select * from CURRENT_ROLE) as cr2, + CURRENT_SCHEMA as cs, + (select * from CURRENT_SCHEMA) as cs2, + CURRENT_USER as cu, + (select * from CURRENT_USER) as cu2, + USER as us, + (select * from USER) as us2, + SESSION_USER seu, + (select * from SESSION_USER) as seu2, + SYSTEM_USER as su, + (select * from SYSTEM_USER) as su2; select pg_get_viewdef('tt201v', true); pg_get_viewdef ----------------------------------------------------------------------------------------------- - SELECT EXTRACT(day FROM now()) AS extr, + + SELECT (('12-01-2022'::date + '@ 1 day'::interval) AT TIME ZONE 'UTC'::text) AS atz, + + EXTRACT(day FROM now()) AS extr, + ((now(), '@ 1 day'::interval) OVERLAPS (CURRENT_TIMESTAMP(2), '@ 1 day'::interval)) AS o,+ - (('foo'::text) IS NORMALIZED) AS isn, + - (('foo'::text) IS NFKC NORMALIZED) AS isnn, + + ('foo'::text IS NORMALIZED) AS isn, + + ('foo'::text IS NFKC NORMALIZED) AS isnn, + NORMALIZE('foo'::text) AS n, + NORMALIZE('foo'::text, NFKD) AS nfkd, + OVERLAY('foo'::text PLACING 'bar'::text FROM 2) AS ovl, + @@ -1826,7 +2079,55 @@ select pg_get_viewdef('tt201v', true); TRIM(TRAILING FROM ' foo '::text) AS rt, + TRIM(BOTH '\x00'::bytea FROM '\x00546f6d00'::bytea) AS btb, + TRIM(LEADING '\x00'::bytea FROM '\x00546f6d00'::bytea) AS ltb, + - TRIM(TRAILING '\x00'::bytea FROM '\x00546f6d00'::bytea) AS rtb; + TRIM(TRAILING '\x00'::bytea FROM '\x00546f6d00'::bytea) AS rtb, + + CURRENT_DATE AS cd, + + ( SELECT "current_date"."current_date" + + FROM CURRENT_DATE "current_date"("current_date")) AS cd2, + + CURRENT_TIME AS ct, + + ( SELECT "current_time"."current_time" + + FROM CURRENT_TIME "current_time"("current_time")) AS ct2, + + CURRENT_TIME(1) AS ct3, + + ( SELECT "current_time"."current_time" + + FROM CURRENT_TIME(1) "current_time"("current_time")) AS ct4, + + CURRENT_TIMESTAMP AS ct5, + + ( SELECT "current_timestamp"."current_timestamp" + + FROM CURRENT_TIMESTAMP "current_timestamp"("current_timestamp")) AS ct6, + + CURRENT_TIMESTAMP(1) AS ct7, + + ( SELECT "current_timestamp"."current_timestamp" + + FROM CURRENT_TIMESTAMP(1) "current_timestamp"("current_timestamp")) AS ct8, + + LOCALTIME AS lt1, + + ( SELECT "localtime"."localtime" + + FROM LOCALTIME "localtime"("localtime")) AS lt2, + + LOCALTIME(1) AS lt3, + + ( SELECT "localtime"."localtime" + + FROM LOCALTIME(1) "localtime"("localtime")) AS lt4, + + LOCALTIMESTAMP AS lt5, + + ( SELECT "localtimestamp"."localtimestamp" + + FROM LOCALTIMESTAMP "localtimestamp"("localtimestamp")) AS lt6, + + LOCALTIMESTAMP(1) AS lt7, + + ( SELECT "localtimestamp"."localtimestamp" + + FROM LOCALTIMESTAMP(1) "localtimestamp"("localtimestamp")) AS lt8, + + CURRENT_CATALOG AS ca, + + ( SELECT "current_catalog"."current_catalog" + + FROM CURRENT_CATALOG "current_catalog"("current_catalog")) AS ca2, + + CURRENT_ROLE AS cr, + + ( SELECT "current_role"."current_role" + + FROM CURRENT_ROLE "current_role"("current_role")) AS cr2, + + CURRENT_SCHEMA AS cs, + + ( SELECT "current_schema"."current_schema" + + FROM CURRENT_SCHEMA "current_schema"("current_schema")) AS cs2, + + CURRENT_USER AS cu, + + ( SELECT "current_user"."current_user" + + FROM CURRENT_USER "current_user"("current_user")) AS cu2, + + USER AS us, + + ( SELECT "user"."user" + + FROM USER "user"("user")) AS us2, + + SESSION_USER AS seu, + + ( SELECT "session_user"."session_user" + + FROM SESSION_USER "session_user"("session_user")) AS seu2, + + SYSTEM_USER AS su, + + ( SELECT "system_user"."system_user" + + FROM SYSTEM_USER "system_user"("system_user")) AS su2; (1 row) -- corner cases with empty join conditions @@ -1866,25 +2167,25 @@ select 42, 43; select pg_get_viewdef('tt23v', true); pg_get_viewdef ------------------------------- - SELECT int8_tbl.q1 AS col_a,+ + 43 AS col_b; int8_tbl.q2 AS col_b + FROM int8_tbl + - UNION + SELECT 42 AS col_a, + - 43 AS col_b; + SELECT int8_tbl.q1 AS col_a,+ + UNION + (1 row) select pg_get_ruledef(oid, true) from pg_rewrite where ev_class = 'tt23v'::regclass and ev_type = '1'; pg_get_ruledef ----------------------------------------------------------------- - CREATE RULE "_RETURN" AS + + 43 AS col_b; ON SELECT TO tt23v DO INSTEAD SELECT int8_tbl.q1 AS col_a,+ int8_tbl.q2 AS col_b + FROM int8_tbl + - UNION + SELECT 42 AS col_a, + - 43 AS col_b; + CREATE RULE "_RETURN" AS + + UNION + (1 row) -- test extraction of FieldSelect field names (get_name_for_var_field) @@ -1917,26 +2218,26 @@ select pg_get_viewdef('tt25v', true); WITH cte AS MATERIALIZED ( + SELECT pg_get_keywords() AS k+ ) + - SELECT (cte.k).word AS word + + SELECT (k).word AS word + FROM cte; (1 row) -- also check cases seen only in EXPLAIN explain (verbose, costs off) select * from tt24v; - QUERY PLAN --------------------------------------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------------------------------------------ Hash Join - Output: (share0_ref1.r).column2, ((ROW("*VALUES*".column1, "*VALUES*".column2))).column2 - Hash Cond: (((ROW("*VALUES*".column1, "*VALUES*".column2))).column1 = (share0_ref1.r).column1) - -> Limit - Output: (ROW("*VALUES*".column1, "*VALUES*".column2)) + Output: (share0_ref1.r).column2, ((ROW("*VALUES*_1".column1, "*VALUES*_1".column2))).column2 + Hash Cond: ((share0_ref1.r).column1 = ((ROW("*VALUES*_1".column1, "*VALUES*_1".column2))).column1) + -> Shared Scan (share slice:id 0:0) + Output: share0_ref1.r -> Values Scan on "*VALUES*" Output: ROW("*VALUES*".column1, "*VALUES*".column2) -> Hash - Output: share0_ref1.r - -> Shared Scan (share slice:id 0:0) - Output: share0_ref1.r + Output: (ROW("*VALUES*_1".column1, "*VALUES*_1".column2)) + -> Limit + Output: (ROW("*VALUES*_1".column1, "*VALUES*_1".column2)) -> Values Scan on "*VALUES*_1" Output: ROW("*VALUES*_1".column1, "*VALUES*_1".column2) Optimizer: Postgres query optimizer @@ -1970,22 +2271,39 @@ select x + y + z as c1, (x,y) <= ANY (values(1,2),(3,4)) as c11 from (values(1,2,3)) v(x,y,z); select pg_get_viewdef('tt26v', true); - pg_get_viewdef --------------------------------------------------------- - SELECT v.x + v.y + v.z AS c1, + - v.x * v.y + v.z AS c2, + - v.x + v.y * v.z AS c3, + - (v.x + v.y) * v.z AS c4, + - v.x * (v.y + v.z) AS c5, + - v.x + (v.y + v.z) AS c6, + - v.x + (v.y # v.z) AS c7, + - v.x > v.y AND (v.y > v.z OR v.x > v.z) AS c8, + - v.x > v.y OR (v.y > v.z AND NOT v.x > v.z) AS c9, + - ((v.x, v.y) <> ALL ( VALUES (1,2), (3,4))) AS c10,+ - ((v.x, v.y) <= ANY ( VALUES (1,2), (3,4))) AS c11 + + pg_get_viewdef +---------------------------------------------------- + SELECT x + y + z AS c1, + + x * y + z AS c2, + + x + y * z AS c3, + + (x + y) * z AS c4, + + x * (y + z) AS c5, + + x + (y + z) AS c6, + + x + (y # z) AS c7, + + x > y AND (y > z OR x > z) AS c8, + + x > y OR (y > z AND NOT x > z) AS c9, + + ((x, y) <> ALL ( VALUES (1,2), (3,4))) AS c10,+ + ((x, y) <= ANY ( VALUES (1,2), (3,4))) AS c11 + FROM ( VALUES (1,2,3)) v(x, y, z); (1 row) +-- test restriction on non-system view expansion. +create table tt27v_tbl (a int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +create view tt27v as select a from tt27v_tbl; +set restrict_nonsystem_relation_kind to 'view'; +select a from tt27v where a > 0; -- Error +ERROR: access to non-system view "tt27v" is restricted +insert into tt27v values (1); -- Error +ERROR: access to non-system view "tt27v" is restricted +select viewname from pg_views where viewname = 'tt27v'; -- Ok to access a system view. + viewname +---------- + tt27v +(1 row) + +reset restrict_nonsystem_relation_kind; -- clean up all the random objects we made above DROP SCHEMA temp_view_test CASCADE; NOTICE: drop cascades to 27 other objects @@ -2017,7 +2335,7 @@ drop cascades to view aliased_view_2 drop cascades to view aliased_view_3 drop cascades to view aliased_view_4 DROP SCHEMA testviewschm2 CASCADE; -NOTICE: drop cascades to 74 other objects +NOTICE: drop cascades to 80 other objects DETAIL: drop cascades to table t1 drop cascades to view temporal1 drop cascades to view temporal2 @@ -2038,9 +2356,13 @@ drop cascades to view mysecview1 drop cascades to view mysecview2 drop cascades to view mysecview3 drop cascades to view mysecview4 +drop cascades to view mysecview7 +drop cascades to view mysecview8 +drop cascades to view mysecview9 drop cascades to view unspecified_types drop cascades to table tt1 drop cascades to table tx1 +drop cascades to view aliased_order_by drop cascades to view view_of_joins drop cascades to table tbl1a drop cascades to view view_of_joins_2a @@ -2092,3 +2414,5 @@ drop cascades to view tt23v drop cascades to view tt24v drop cascades to view tt25v drop cascades to view tt26v +drop cascades to table tt27v_tbl +drop cascades to view tt27v diff --git a/contrib/pax_storage/src/test/regress/expected/direct_dispatch_optimizer.out b/contrib/pax_storage/src/test/regress/expected/direct_dispatch_optimizer.out index 588cb4ee1de..8efc7ccfc0f 100644 --- a/contrib/pax_storage/src/test/regress/expected/direct_dispatch_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/direct_dispatch_optimizer.out @@ -400,9 +400,9 @@ EXPLAIN SELECT a.* FROM MPP_22019_a a INNER JOIN MPP_22019_b b ON a.i = b.i WHER -> Result (cost=0.00..431.00 rows=1 width=4) Filter: (a2.j = b.j) -> Materialize (cost=0.00..431.00 rows=1 width=4) - -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=4) + -> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=4) -> Seq Scan on mpp_22019_a a2 (cost=0.00..431.00 rows=1 width=4) - Optimizer: GPORCA + Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0 (17 rows) SELECT a.* FROM MPP_22019_a a INNER JOIN MPP_22019_b b ON a.i = b.i WHERE a.j NOT IN (SELECT j FROM MPP_22019_a a2 where a2.j = b.j) and a.i = 1; @@ -622,6 +622,168 @@ INFO: (slice 1) Dispatch command to SINGLE content drop table test_prepare; INFO: Distributed transaction command 'Distributed Prepare' to ALL contents: 0 1 2 INFO: Distributed transaction command 'Distributed Commit Prepared' to ALL contents: 0 1 2 +-- Tests to check direct dispatch if the table is randomly distributed and the +-- filter has condition on gp_segment_id +-- NOTE: Only EXPLAIN query included, output of SELECT query is not shown. +-- Since the table is distributed randomly, the output of SELECT query +-- will differ everytime new table is created, and hence the during comparision +-- the tests will fail. +drop table if exists bar_randDistr; +NOTICE: table "bar_randdistr" does not exist, skipping +INFO: Distributed transaction command 'Distributed Commit (one-phase)' to ALL contents: 0 1 2 +create table bar_randDistr(col1 int, col2 int) distributed randomly; +INFO: Distributed transaction command 'Distributed Prepare' to ALL contents: 0 1 2 +INFO: Distributed transaction command 'Distributed Commit Prepared' to ALL contents: 0 1 2 +insert into bar_randDistr select i,i*2 from generate_series(1, 10)i; +INFO: (slice 1) Dispatch command to ALL contents: 0 1 2 +INFO: (slice 0) Dispatch command to ALL contents: 0 1 2 +INFO: Distributed transaction command 'Distributed Prepare' to ALL contents: 0 1 2 +INFO: Distributed transaction command 'Distributed Commit Prepared' to ALL contents: 0 1 2 +-- Case 1 : simple conditions on gp_segment_id +explain (costs off) select gp_segment_id, * from bar_randDistr where gp_segment_id=0; + QUERY PLAN +------------------------------------------ + Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on bar_randdistr + Filter: (gp_segment_id = 0) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +explain (costs off) select gp_segment_id, * from bar_randDistr where gp_segment_id=1 or gp_segment_id=2; + QUERY PLAN +-------------------------------------------------------------- + Gather Motion 2:1 (slice1; segments: 2) + -> Seq Scan on bar_randdistr + Filter: ((gp_segment_id = 1) OR (gp_segment_id = 2)) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +explain (costs off) select gp_segment_id, count(*) from bar_randDistr group by gp_segment_id; + QUERY PLAN +------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> GroupAggregate + Group Key: gp_segment_id + -> Sort + Sort Key: gp_segment_id + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: gp_segment_id + -> Seq Scan on bar_randdistr + Optimizer: Pivotal Optimizer (GPORCA) +(9 rows) + +-- Case2: Conjunction scenario with filter condition on gp_segment_id and column +explain (costs off) select gp_segment_id, * from bar_randDistr where gp_segment_id=0 and col1 between 1 and 10; + QUERY PLAN +------------------------------------------------------------------------ + Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on bar_randdistr + Filter: ((gp_segment_id = 0) AND (col1 >= 1) AND (col1 <= 10)) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +-- Case3: Disjunction scenario with filter condition on gp_segment_id and column +explain (costs off) select gp_segment_id, * from bar_randDistr where gp_segment_id=1 or (col1=6 and gp_segment_id=2); + QUERY PLAN +------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on bar_randdistr + Filter: ((gp_segment_id = 1) OR ((col1 = 6) AND (gp_segment_id = 2))) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +-- Case4: Scenario with constant/variable column and constant/variable gp_segment_id +explain (costs off) select gp_segment_id, * from bar_randDistr where col1 =3 and gp_segment_id in (0,1); + QUERY PLAN +----------------------------------------------------------------------------- + Gather Motion 2:1 (slice1; segments: 2) + -> Seq Scan on bar_randdistr + Filter: ((col1 = 3) AND (gp_segment_id = ANY ('{0,1}'::integer[]))) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +explain (costs off) select gp_segment_id, * from bar_randDistr where col1 =3 and gp_segment_id <>1; + QUERY PLAN +------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on bar_randdistr + Filter: ((col1 = 3) AND (gp_segment_id <> 1)) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +explain (costs off) select gp_segment_id, * from bar_randDistr where col1 between 1 and 5 and gp_segment_id =0; + QUERY PLAN +----------------------------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on bar_randdistr + Filter: ((col1 >= 1) AND (col1 <= 5) AND (gp_segment_id = 0)) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +explain (costs off) select gp_segment_id, * from bar_randDistr where col1 in (1,5) and gp_segment_id <> 0; + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on bar_randdistr + Filter: ((col1 = ANY ('{1,5}'::integer[])) AND (gp_segment_id <> 0)) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +explain (costs off) select gp_segment_id, * from bar_randDistr where col1 in (1,5) and gp_segment_id in (0,1); + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Gather Motion 2:1 (slice1; segments: 2) + -> Seq Scan on bar_randdistr + Filter: ((col1 = ANY ('{1,5}'::integer[])) AND (gp_segment_id = ANY ('{0,1}'::integer[]))) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +-- Case5: Scenarios with special conditions +create function afunc() returns integer as $$ begin return 42; end; $$ language plpgsql; +INFO: Distributed transaction command 'Distributed Prepare' to ALL contents: 0 1 2 +INFO: Distributed transaction command 'Distributed Commit Prepared' to ALL contents: 0 1 2 +create function immutable_func() returns integer as $$ begin return 42; end; $$ language plpgsql immutable; +INFO: Distributed transaction command 'Distributed Prepare' to ALL contents: 0 1 2 +INFO: Distributed transaction command 'Distributed Commit Prepared' to ALL contents: 0 1 2 +explain (costs off) select * from bar_randDistr where col1 = 1; + QUERY PLAN +------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on bar_randdistr + Filter: (col1 = 1) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +explain (costs off) select * from bar_randDistr where gp_segment_id % 2 = 0; + QUERY PLAN +------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on bar_randdistr + Filter: ((gp_segment_id % 2) = 0) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +explain (costs off) select * from bar_randDistr where gp_segment_id=immutable_func(); + QUERY PLAN +------------------------------------------ + Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on bar_randdistr + Filter: (gp_segment_id = 42) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +explain (costs off) select * from bar_randDistr where gp_segment_id=afunc(); + QUERY PLAN +------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on bar_randdistr + Filter: (gp_segment_id = afunc()) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +drop table if exists bar_randDistr; +INFO: Distributed transaction command 'Distributed Prepare' to ALL contents: 0 1 2 +INFO: Distributed transaction command 'Distributed Commit Prepared' to ALL contents: 0 1 2 -- test direct dispatch via gp_segment_id qual create table t_test_dd_via_segid(id int); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Apache Cloudberry data distribution key for this table. @@ -720,8 +882,8 @@ INFO: (slice 1) Dispatch command to PARTIAL contents: 1 2 (3 rows) explain (costs off) select t1.gp_segment_id, t2.gp_segment_id, * from t_test_dd_via_segid t1, t_test_dd_via_segid t2 where t1.gp_segment_id=t2.id; - QUERY PLAN ------------------------------------------------------------- + QUERY PLAN +----------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) -> Hash Join Hash Cond: (t1.gp_segment_id = t2.id) @@ -730,7 +892,7 @@ explain (costs off) select t1.gp_segment_id, t2.gp_segment_id, * from t_test_dd_ -> Seq Scan on t_test_dd_via_segid t1 -> Hash -> Seq Scan on t_test_dd_via_segid t2 - Optimizer: GPORCA + Optimizer: Pivotal Optimizer (GPORCA) (9 rows) select t1.gp_segment_id, t2.gp_segment_id, * from t_test_dd_via_segid t1, t_test_dd_via_segid t2 where t1.gp_segment_id=t2.id; @@ -743,30 +905,484 @@ INFO: (slice 1) Dispatch command to ALL contents: 0 1 2 2 | 0 | 6 | 2 (3 rows) -explain (costs off) select gp_segment_id, count(*) from t_test_dd_via_segid group by gp_segment_id; +-- flaky tests +-- explain (costs off) select gp_segment_id, count(*) from t_test_dd_via_segid group by gp_segment_id; +-- select gp_segment_id, count(*) from t_test_dd_via_segid group by gp_segment_id; +-- test direct dispatch via gp_segment_id qual with conjunction +create table t_test_dd_via_segid_conj(a int, b int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +INFO: Distributed transaction command 'Distributed Prepare' to ALL contents: 0 1 2 +INFO: Distributed transaction command 'Distributed Commit Prepared' to ALL contents: 0 1 2 +insert into t_test_dd_via_segid_conj select i,i from generate_series(1, 10)i; +INFO: (slice 0) Dispatch command to ALL contents: 0 1 2 +INFO: Distributed transaction command 'Distributed Prepare' to ALL contents: 0 1 2 +INFO: Distributed transaction command 'Distributed Commit Prepared' to ALL contents: 0 1 2 +explain (costs off) select gp_segment_id, * from t_test_dd_via_segid_conj where gp_segment_id=0 and a between 1 and 10; QUERY PLAN ------------------------------------------------------------------ + Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on t_test_dd_via_segid_conj + Filter: ((gp_segment_id = 0) AND (a >= 1) AND (a <= 10)) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +select gp_segment_id, * from t_test_dd_via_segid_conj where gp_segment_id=0 and a between 1 and 10; +INFO: (slice 1) Dispatch command to SINGLE content + gp_segment_id | a | b +---------------+---+--- + 0 | 2 | 2 + 0 | 3 | 3 + 0 | 4 | 4 + 0 | 7 | 7 + 0 | 8 | 8 +(5 rows) + +explain (costs off) select gp_segment_id, * from t_test_dd_via_segid_conj where b between 1 and 5 and gp_segment_id=2 and a between 1 and 10; + QUERY PLAN +-------------------------------------------------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on t_test_dd_via_segid_conj + Filter: ((b >= 1) AND (b <= 5) AND (gp_segment_id = 2) AND (a >= 1) AND (a <= 10)) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +select gp_segment_id, * from t_test_dd_via_segid_conj where b between 1 and 5 and gp_segment_id=2 and a between 1 and 10; +INFO: (slice 1) Dispatch command to SINGLE content + gp_segment_id | a | b +---------------+---+--- + 2 | 5 | 5 +(1 row) + +--test direct dispatch via gp_segment_id with disjunction +explain (costs off) select * from t_test_dd_via_segid_conj where gp_segment_id=1 or (a=3 and gp_segment_id=2); + QUERY PLAN +---------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) - -> GroupAggregate - Group Key: gp_segment_id - -> Sort - Sort Key: gp_segment_id - -> Redistribute Motion 3:3 (slice2; segments: 3) - Hash Key: gp_segment_id - -> Seq Scan on t_test_dd_via_segid - Optimizer: GPORCA -(9 rows) + -> Seq Scan on t_test_dd_via_segid_conj + Filter: ((gp_segment_id = 1) OR ((a = 3) AND (gp_segment_id = 2))) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) -select gp_segment_id, count(*) from t_test_dd_via_segid group by gp_segment_id; -INFO: (slice 2) Dispatch command to ALL contents: 0 1 2 +select * from t_test_dd_via_segid_conj where gp_segment_id=1 or (a=3 and gp_segment_id=2); INFO: (slice 1) Dispatch command to ALL contents: 0 1 2 - gp_segment_id | count ----------------+------- - 0 | 3 - 1 | 1 - 2 | 2 -(3 rows) + a | b +---+--- + 1 | 1 +(1 row) +--test direct dispatch with constant distribution column and constant/variable gp_segment_id condition +explain (costs off) select gp_segment_id, * from t_test_dd_via_segid_conj where a =3 and b between 1 and 10 and gp_segment_id in (0,1); + QUERY PLAN +----------------------------------------------------------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on t_test_dd_via_segid_conj + Filter: ((a = 3) AND (b >= 1) AND (b <= 10) AND (gp_segment_id = ANY ('{0,1}'::integer[]))) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +select gp_segment_id, * from t_test_dd_via_segid_conj where a =3 and b between 1 and 10 and gp_segment_id in (0,1); +INFO: (slice 1) Dispatch command to SINGLE content + gp_segment_id | a | b +---------------+---+--- + 0 | 3 | 3 +(1 row) + +explain (costs off) select gp_segment_id, * from t_test_dd_via_segid_conj where a =3 and b between 1 and 10 and gp_segment_id <>1; + QUERY PLAN +------------------------------------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on t_test_dd_via_segid_conj + Filter: ((a = 3) AND (b >= 1) AND (b <= 10) AND (gp_segment_id <> 1)) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +select gp_segment_id, * from t_test_dd_via_segid_conj where a =3 and b between 1 and 10 and gp_segment_id <>1; +INFO: (slice 1) Dispatch command to SINGLE content + gp_segment_id | a | b +---------------+---+--- + 0 | 3 | 3 +(1 row) + +explain (costs off) select gp_segment_id, * from t_test_dd_via_segid_conj where a =3 and b between 1 and 100 and gp_segment_id =0; + QUERY PLAN +------------------------------------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on t_test_dd_via_segid_conj + Filter: ((a = 3) AND (b >= 1) AND (b <= 100) AND (gp_segment_id = 0)) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +select gp_segment_id, * from t_test_dd_via_segid_conj where a =3 and b between 1 and 100 and gp_segment_id =0; +INFO: (slice 1) Dispatch command to SINGLE content + gp_segment_id | a | b +---------------+---+--- + 0 | 3 | 3 +(1 row) + +explain (costs off) select gp_segment_id, * from t_test_dd_via_segid_conj where a in (1,3) and gp_segment_id <> 0; + QUERY PLAN +--------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on t_test_dd_via_segid_conj + Filter: ((a = ANY ('{1,3}'::integer[])) AND (gp_segment_id <> 0)) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +select gp_segment_id, * from t_test_dd_via_segid_conj where a in (1,3) and gp_segment_id <> 0; +INFO: (slice 1) Dispatch command to ALL contents: 0 1 2 + gp_segment_id | a | b +---------------+---+--- + 1 | 1 | 1 +(1 row) + +explain (costs off) select gp_segment_id, * from t_test_dd_via_segid_conj where a in (1,3) and gp_segment_id in (0,1); + QUERY PLAN +------------------------------------------------------------------------------------------------- + Gather Motion 2:1 (slice1; segments: 2) + -> Seq Scan on t_test_dd_via_segid_conj + Filter: ((a = ANY ('{1,3}'::integer[])) AND (gp_segment_id = ANY ('{0,1}'::integer[]))) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +select gp_segment_id, * from t_test_dd_via_segid_conj where a in (1,3) and gp_segment_id in (0,1); +INFO: (slice 1) Dispatch command to PARTIAL contents: 0 1 + gp_segment_id | a | b +---------------+---+--- + 1 | 1 | 1 + 0 | 3 | 3 +(2 rows) + +--test direct dispatch if distribution column is of varchar type +drop table if exists t1_varchar; +NOTICE: table "t1_varchar" does not exist, skipping +INFO: Distributed transaction command 'Distributed Commit (one-phase)' to ALL contents: 0 1 2 +create table t1_varchar(col1_varchar varchar, col2_int int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'col1_varchar' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +INFO: Distributed transaction command 'Distributed Prepare' to ALL contents: 0 1 2 +INFO: Distributed transaction command 'Distributed Commit Prepared' to ALL contents: 0 1 2 +insert into t1_varchar values ('a',1); +INFO: (slice 0) Dispatch command to SINGLE content +INFO: Distributed transaction command 'Distributed Commit (one-phase)' to SINGLE content +insert into t1_varchar values ('b',2); +INFO: (slice 0) Dispatch command to SINGLE content +INFO: Distributed transaction command 'Distributed Commit (one-phase)' to SINGLE content +insert into t1_varchar values ('c',3); +INFO: (slice 0) Dispatch command to SINGLE content +INFO: Distributed transaction command 'Distributed Commit (one-phase)' to SINGLE content +insert into t1_varchar values ('d',4); +INFO: (slice 0) Dispatch command to SINGLE content +INFO: Distributed transaction command 'Distributed Commit (one-phase)' to SINGLE content +insert into t1_varchar values ('e',5); +INFO: (slice 0) Dispatch command to SINGLE content +INFO: Distributed transaction command 'Distributed Commit (one-phase)' to SINGLE content +insert into t1_varchar values ('97',6); +INFO: (slice 0) Dispatch command to SINGLE content +INFO: Distributed transaction command 'Distributed Commit (one-phase)' to SINGLE content +explain (costs off) select gp_segment_id, * from t1_varchar where col1_varchar = 'c'; + QUERY PLAN +---------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on t1_varchar + Filter: ((col1_varchar)::text = 'c'::text) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +select gp_segment_id, * from t1_varchar where col1_varchar = 'c'; +INFO: (slice 1) Dispatch command to SINGLE content + gp_segment_id | col1_varchar | col2_int +---------------+--------------+---------- + 2 | c | 3 +(1 row) + +explain (costs off) select gp_segment_id, * from t1_varchar where col1_varchar <>'c'; + QUERY PLAN +----------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on t1_varchar + Filter: ((col1_varchar)::text <> 'c'::text) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +select gp_segment_id, * from t1_varchar where col1_varchar <>'c'; +INFO: (slice 1) Dispatch command to ALL contents: 0 1 2 + gp_segment_id | col1_varchar | col2_int +---------------+--------------+---------- + 1 | b | 2 + 1 | e | 5 + 0 | d | 4 + 2 | a | 1 + 2 | 97 | 6 +(5 rows) + +--test direct dispatch if distribution column is of varchar type and disjunction scenario +explain (costs off) select gp_segment_id, * from t1_varchar where col1_varchar in ('a','b'); + QUERY PLAN +---------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on t1_varchar + Filter: ((col1_varchar)::text = ANY ('{a,b}'::text[])) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +select gp_segment_id, * from t1_varchar where col1_varchar in ('a','b'); +INFO: (slice 1) Dispatch command to ALL contents: 0 1 2 + gp_segment_id | col1_varchar | col2_int +---------------+--------------+---------- + 1 | b | 2 + 2 | a | 1 +(2 rows) + +explain (costs off) select gp_segment_id, * from t1_varchar where col1_varchar = 'a' or col1_varchar = 'b'; + QUERY PLAN +-------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on t1_varchar + Filter: (((col1_varchar)::text = 'a'::text) OR ((col1_varchar)::text = 'b'::text)) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +select gp_segment_id, * from t1_varchar where col1_varchar = 'a' or col1_varchar = 'b'; +INFO: (slice 1) Dispatch command to ALL contents: 0 1 2 + gp_segment_id | col1_varchar | col2_int +---------------+--------------+---------- + 1 | b | 2 + 2 | a | 1 +(2 rows) + +--test direct dispatch if distribution column is of varchar type, having disjunction condition +-- or an additional conjunction constraint using another table column or both +explain (costs off) select gp_segment_id, * from t1_varchar where col1_varchar = 'c' and col2_int=3; + QUERY PLAN +------------------------------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on t1_varchar + Filter: (((col1_varchar)::text = 'c'::text) AND (col2_int = 3)) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +select gp_segment_id, * from t1_varchar where col1_varchar = 'c' and col2_int=3; +INFO: (slice 1) Dispatch command to SINGLE content + gp_segment_id | col1_varchar | col2_int +---------------+--------------+---------- + 2 | c | 3 +(1 row) + +explain (costs off) select gp_segment_id, * from t1_varchar where col1_varchar = 'a' and col2_int in (1,3); + QUERY PLAN +------------------------------------------------------------------------------------------------ + Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on t1_varchar + Filter: (((col1_varchar)::text = 'a'::text) AND (col2_int = ANY ('{1,3}'::integer[]))) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +select gp_segment_id, * from t1_varchar where col1_varchar = 'a' and col2_int in (1,3); +INFO: (slice 1) Dispatch command to SINGLE content + gp_segment_id | col1_varchar | col2_int +---------------+--------------+---------- + 2 | a | 1 +(1 row) + +explain (costs off) select gp_segment_id, * from t1_varchar where col1_varchar = 'a' and col2_int not in (2,3); + QUERY PLAN +------------------------------------------------------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on t1_varchar + Filter: (((col1_varchar)::text = 'a'::text) AND (col2_int <> ALL ('{2,3}'::integer[]))) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +select gp_segment_id, * from t1_varchar where col1_varchar = 'a' and col2_int not in (2,3); +INFO: (slice 1) Dispatch command to SINGLE content + gp_segment_id | col1_varchar | col2_int +---------------+--------------+---------- + 2 | a | 1 +(1 row) + +explain (costs off) select gp_segment_id, * from t1_varchar where col1_varchar in ('a', 'b') and col2_int=2; + QUERY PLAN +------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on t1_varchar + Filter: (((col1_varchar)::text = ANY ('{a,b}'::text[])) AND (col2_int = 2)) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +select gp_segment_id, * from t1_varchar where col1_varchar in ('a', 'b') and col2_int=2; +INFO: (slice 1) Dispatch command to ALL contents: 0 1 2 + gp_segment_id | col1_varchar | col2_int +---------------+--------------+---------- + 1 | b | 2 +(1 row) + +explain (costs off) select gp_segment_id, * from t1_varchar where (col1_varchar = 'a' or col1_varchar = 'b') and col2_int=1; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on t1_varchar + Filter: ((((col1_varchar)::text = 'a'::text) OR ((col1_varchar)::text = 'b'::text)) AND (col2_int = 1)) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +select gp_segment_id, * from t1_varchar where (col1_varchar = 'a' or col1_varchar = 'b') and col2_int=1; +INFO: (slice 1) Dispatch command to ALL contents: 0 1 2 + gp_segment_id | col1_varchar | col2_int +---------------+--------------+---------- + 2 | a | 1 +(1 row) + +--Test direct dispatch with explicit typecasting +explain (costs off) select gp_segment_id, * from t1_varchar where col1_varchar = 97::VARCHAR; + QUERY PLAN +----------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on t1_varchar + Filter: ((col1_varchar)::text = '97'::text) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +select gp_segment_id, * from t1_varchar where col1_varchar = 97::VARCHAR; +INFO: (slice 1) Dispatch command to SINGLE content + gp_segment_id | col1_varchar | col2_int +---------------+--------------+---------- + 2 | 97 | 6 +(1 row) + +-- varchar hash and bpchar hash belong to different opfamilies +-- hash distribution of col1_varchar and col1_varchar::bpchar +-- could assign values to different segments. Therefore, the +-- gather motion applies to all 3 segments, and no direct +-- dispatch occurs. +explain (costs off) select gp_segment_id, * from t1_varchar where col1_varchar = 'c'::char; + QUERY PLAN +-------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on t1_varchar + Filter: ((col1_varchar)::bpchar = 'c'::character(1)) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +select gp_segment_id, * from t1_varchar where col1_varchar = 'c'::char; +INFO: (slice 1) Dispatch command to ALL contents: 0 1 2 + gp_segment_id | col1_varchar | col2_int +---------------+--------------+---------- + 2 | c | 3 +(1 row) + +explain (costs off) select gp_segment_id, * from t1_varchar where col1_varchar = '2'::char; + QUERY PLAN +-------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on t1_varchar + Filter: ((col1_varchar)::bpchar = '2'::character(1)) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +select gp_segment_id, * from t1_varchar where col1_varchar = '2'::char; +INFO: (slice 1) Dispatch command to ALL contents: 0 1 2 + gp_segment_id | col1_varchar | col2_int +---------------+--------------+---------- +(0 rows) + +--No direct dispatch case, scenario: cast exists but not binary coercible +drop table if exists t3; +NOTICE: table "t3" does not exist, skipping +INFO: Distributed transaction command 'Distributed Commit (one-phase)' to ALL contents: 0 1 2 +create table t3 (c1 timestamp without time zone); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +INFO: Distributed transaction command 'Distributed Prepare' to ALL contents: 0 1 2 +INFO: Distributed transaction command 'Distributed Commit Prepared' to ALL contents: 0 1 2 +insert into t3 values ('2015-07-03 00:00:00'::timestamp without time zone); +INFO: (slice 0) Dispatch command to SINGLE content +INFO: Distributed transaction command 'Distributed Commit (one-phase)' to SINGLE content +explain (costs off) select c1 from t3 where c1 = '2015-07-03'::date; + QUERY PLAN +------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on t3 + Filter: (c1 = '07-03-2015'::date) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +select c1 from t3 where c1 = '2015-07-03'::date; +INFO: (slice 1) Dispatch command to ALL contents: 0 1 2 + c1 +-------------------------- + Fri Jul 03 00:00:00 2015 +(1 row) + +drop table t3; +INFO: Distributed transaction command 'Distributed Prepare' to ALL contents: 0 1 2 +INFO: Distributed transaction command 'Distributed Commit Prepared' to ALL contents: 0 1 2 +drop table t1_varchar; +INFO: Distributed transaction command 'Distributed Prepare' to ALL contents: 0 1 2 +INFO: Distributed transaction command 'Distributed Commit Prepared' to ALL contents: 0 1 2 +--check direct dispatch working based on the distribution policy of relation +drop extension if exists citext cascade; +NOTICE: extension "citext" does not exist, skipping +INFO: Distributed transaction command 'Distributed Commit (one-phase)' to ALL contents: 0 1 2 +drop table if exists srt_dd; +NOTICE: table "srt_dd" does not exist, skipping +INFO: Distributed transaction command 'Distributed Commit (one-phase)' to ALL contents: 0 1 2 +CREATE EXTENSION citext; +INFO: Distributed transaction command 'Distributed Prepare' to ALL contents: 0 1 2 +INFO: Distributed transaction command 'Distributed Commit Prepared' to ALL contents: 0 1 2 +create table srt_dd (name CITEXT); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'name' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +INFO: Distributed transaction command 'Distributed Prepare' to ALL contents: 0 1 2 +INFO: Distributed transaction command 'Distributed Commit Prepared' to ALL contents: 0 1 2 +INSERT INTO srt_dd (name) +VALUES ('abb'), + ('ABA'), + ('ABC'), + ('abd'); +INFO: (slice 0) Dispatch command to ALL contents: 0 1 2 +INFO: Distributed transaction command 'Distributed Prepare' to ALL contents: 0 1 2 +INFO: Distributed transaction command 'Distributed Commit Prepared' to ALL contents: 0 1 2 +-- text hash/btree and citext hash/btree belong to different opfamilies +-- hash distribution of name and name::text could assign values to +-- different segments. Therefore, the gather motion applies to all 3 +-- segments, and no direct dispatch occurs. +explain (costs off) select LOWER(name) as aba FROM srt_dd WHERE name = 'ABA'::text; + QUERY PLAN +---------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on srt_dd + Filter: ((name)::text = 'ABA'::text) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +select LOWER(name) as aba FROM srt_dd WHERE name = 'ABA'::text; +INFO: (slice 1) Dispatch command to ALL contents: 0 1 2 + aba +----- + aba +(1 row) + +explain (costs off) delete from srt_dd where name='ABA'::text; + QUERY PLAN +---------------------------------------------- + Delete on srt_dd + -> Seq Scan on srt_dd + Filter: ((name)::text = 'ABA'::text) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +delete from srt_dd where name='ABA'::text; +INFO: (slice 0) Dispatch command to ALL contents: 0 1 2 +INFO: Distributed transaction command 'Distributed Prepare' to ALL contents: 0 1 2 +INFO: Distributed transaction command 'Distributed Commit Prepared' to ALL contents: 0 1 2 +drop extension if exists citext cascade; +NOTICE: drop cascades to table srt_dd +INFO: Distributed transaction command 'Distributed Prepare' to ALL contents: 0 1 2 +INFO: Distributed transaction command 'Distributed Commit Prepared' to ALL contents: 0 1 2 +drop table if exists srt_dd; +NOTICE: table "srt_dd" does not exist, skipping +INFO: Distributed transaction command 'Distributed Commit (one-phase)' to ALL contents: 0 1 2 -- test direct dispatch via SQLValueFunction and FuncExpr for single row insertion. create table t_sql_value_function1 (a int, b date); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. @@ -986,6 +1602,79 @@ INFO: Distributed transaction command 'Distributed Abort (No Prepared)' to ALL -- cleanup set test_print_direct_dispatch_info=off; set allow_system_table_mods=off; +-- https://github.com/greenplum-db/gpdb/issues/14887 +-- If opno of clause does not belong to opfamily of distributed key, +-- do not use direct dispatch to resolve wrong result +create table t_14887(a varchar); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +insert into t_14887 values('a '); +explain select * from t_14887 where a = 'a'::bpchar; + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=8) + -> Seq Scan on t_14887 (cost=0.00..431.00 rows=1 width=8) + Filter: ((a)::bpchar = 'a'::bpchar) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +select * from t_14887 where a = 'a'::bpchar; + a +------ + a +(1 row) + +-- texteq does not belong to the hash opfamily of the table's citext distkey. +-- But from the implementation can deduce: texteq ==> citext_eq, and we can +-- do the direct dispatch. +-- But we do not have the kind of implication rule in Postgres: texteq ==> citext_eq. +-- Also partition table with citext as hash key and condition with text type +-- does not do partition prune. +CREATE EXTENSION if not exists citext; +drop table t_14887; +create table t_14887(a citext); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +insert into t_14887 values('A'),('a'); +explain select * from t_14887 where a = 'a'::text; + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=8) + -> Seq Scan on t_14887 (cost=0.00..431.00 rows=1 width=8) + Filter: ((a)::text = 'a'::text) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +select * from t_14887 where a = 'a'::text; + a +--- + a +(1 row) + +drop table t_14887; +create table t_14887 (a citext) partition by hash (a); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +create table t0_14887 partition of t_14887 for values with (modulus 3,remainder 0); +NOTICE: table has parent, setting distribution columns to match parent table +create table t1_14887 partition of t_14887 for values with (modulus 3,remainder 1); +NOTICE: table has parent, setting distribution columns to match parent table +create table t2_14887 partition of t_14887 for values with (modulus 3,remainder 2); +NOTICE: table has parent, setting distribution columns to match parent table +explain select * from t_14887 where a = 'a'::text; + QUERY PLAN +---------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..764.38 rows=158 width=32) + -> Append (cost=0.00..762.26 rows=53 width=32) + -> Seq Scan on t0_14887 t_14887_1 (cost=0.00..254.00 rows=18 width=32) + Filter: ((a)::text = 'a'::text) + -> Seq Scan on t1_14887 t_14887_2 (cost=0.00..254.00 rows=18 width=32) + Filter: ((a)::text = 'a'::text) + -> Seq Scan on t2_14887 t_14887_3 (cost=0.00..254.00 rows=18 width=32) + Filter: ((a)::text = 'a'::text) + Optimizer: Postgres query optimizer +(9 rows) + begin; drop table if exists direct_test; drop table if exists direct_test_two_column; @@ -999,4 +1688,6 @@ drop table if exists MPP_22019_b; drop table if exists t_sql_value_function1; drop table if exists t_sql_value_function2; drop table if exists t_hash_partition; +drop table if exists t_14887; +drop extension if exists citext cascade; commit; diff --git a/contrib/pax_storage/src/test/regress/expected/domain_optimizer.out b/contrib/pax_storage/src/test/regress/expected/domain_optimizer.out index aa691802470..e5799cdfa58 100755 --- a/contrib/pax_storage/src/test/regress/expected/domain_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/domain_optimizer.out @@ -87,6 +87,56 @@ drop domain domainvarchar restrict; drop domain domainnumeric restrict; drop domain domainint4 restrict; drop domain domaintext; +-- Test non-error-throwing input +create domain positiveint int4 check(value > 0); +create domain weirdfloat float8 check((1 / value) < 10); +select pg_input_is_valid('1', 'positiveint'); + pg_input_is_valid +------------------- + t +(1 row) + +select pg_input_is_valid('junk', 'positiveint'); + pg_input_is_valid +------------------- + f +(1 row) + +select pg_input_is_valid('-1', 'positiveint'); + pg_input_is_valid +------------------- + f +(1 row) + +select * from pg_input_error_info('junk', 'positiveint'); + message | detail | hint | sql_error_code +-----------------------------------------------+--------+------+---------------- + invalid input syntax for type integer: "junk" | | | 22P02 +(1 row) + +select * from pg_input_error_info('-1', 'positiveint'); + message | detail | hint | sql_error_code +----------------------------------------------------------------------------+--------+------+---------------- + value for domain positiveint violates check constraint "positiveint_check" | | | 23514 +(1 row) + +select * from pg_input_error_info('junk', 'weirdfloat'); + message | detail | hint | sql_error_code +--------------------------------------------------------+--------+------+---------------- + invalid input syntax for type double precision: "junk" | | | 22P02 +(1 row) + +select * from pg_input_error_info('0.01', 'weirdfloat'); + message | detail | hint | sql_error_code +--------------------------------------------------------------------------+--------+------+---------------- + value for domain weirdfloat violates check constraint "weirdfloat_check" | | | 23514 +(1 row) + +-- We currently can't trap errors raised in the CHECK expression itself +select * from pg_input_error_info('0', 'weirdfloat'); +ERROR: division by zero +drop domain positiveint; +drop domain weirdfloat; -- Test domains over array types create domain domainint4arr int4[1]; create domain domainchar4arr varchar(4)[2][3]; @@ -426,7 +476,7 @@ explain (verbose, costs off) Update on public.dcomptable -> Explicit Redistribute Motion 3:3 (slice1; segments: 3) Output: (((d1[1].r := (d1[1].r - '1'::double precision))[1].i := (d1[1].i + '1'::double precision))::dcomptypea), ctid, gp_segment_id, dcomptable.*, (DMLAction) - -> Split + -> Split Update Output: (((d1[1].r := (d1[1].r - '1'::double precision))[1].i := (d1[1].i + '1'::double precision))::dcomptypea), ctid, gp_segment_id, dcomptable.*, DMLAction -> Seq Scan on public.dcomptable Output: (d1[1].r := (d1[1].r - '1'::double precision))[1].i := (d1[1].i + '1'::double precision), ctid, gp_segment_id, dcomptable.* @@ -1202,3 +1252,64 @@ from 1 (1 row) +-- +-- ORCA shouldn't fail for data corruption while translating query to DXL +-- for a constant domain value of the following text related types: +-- char, bpchar, name. +-- github issue: https://github.com/greenplum-db/gpdb/issues/14155 +-- +create table test_table_14155(txtime timestamptz default now(), user_role text); +create domain domainname as name; +create function test_func_name( + i_msg text, + i_caller domainname = current_user +) returns void language plpgsql as $$ +begin + insert into test_table_14155 ( + txtime, user_role + ) + select now(), i_caller; +end +$$; +select * from test_func_name('test'); + test_func_name +---------------- + +(1 row) + +create domain domainchar as char; +create function test_func_char( + i_msg text, + i_caller domainchar = 'a' +) returns void language plpgsql as $$ +begin + insert into test_table_14155 ( + txtime, user_role + ) + select now(), i_caller; +end +$$; +select * from test_func_char('test'); + test_func_char +---------------- + +(1 row) + +create domain domainbpchar as bpchar; +create function test_func_bpchar( + i_msg text, + i_caller domainbpchar = 'test' +) returns void language plpgsql as $$ +begin + insert into test_table_14155 ( + txtime, user_role + ) + select now(), i_caller; +end +$$; +select * from test_func_bpchar('test'); + test_func_bpchar +------------------ + +(1 row) + diff --git a/contrib/pax_storage/src/test/regress/expected/dpe_optimizer.out b/contrib/pax_storage/src/test/regress/expected/dpe_optimizer.out index aada58a4588..debb61445a6 100644 --- a/contrib/pax_storage/src/test/regress/expected/dpe_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/dpe_optimizer.out @@ -1,5 +1,9 @@ -- Tests for Dynamic Partition Elimination, or partition pruning in -- PostgreSQL terms, based on join quals. +-- This test requires autovacuum to be disabled to guarantee a consistent state +-- after vacuum. An inopportune autovacuum could cause an explicit vacuum to +-- skip. That leads to stale relallvisible stats which may prevent picking index +-- only scan plan shapes. -- start_matchsubs -- m/Memory Usage: \d+\w?B/ -- s/Memory Usage: \d+\w?B/Memory Usage: ###B/ @@ -60,6 +64,7 @@ NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'dist' HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create index pt1_idx on pt using btree (pt1); create index ptid_idx on pt using btree (ptid); +create index ptid_pt1_idx on pt using btree (ptid, pt1); insert into pt select i, 'hello' || i, 'world', 'drop this', i % 6 from generate_series(0,53) i; insert into t select i, i % 6, 'hello' || i, 'bar' from generate_series(0,1) i; create table t1(dist int, tid int, t1 text, t2 text); @@ -69,7 +74,7 @@ insert into t1 select i, i % 6, 'hello' || i, 'bar' from generate_series(1,2) i; insert into pt1 select * from pt; insert into pt1 select dist, pt1, pt2, pt3, ptid-100 from pt; alter table pt1 set with(REORGANIZE=false) DISTRIBUTED RANDOMLY; -analyze pt; +vacuum analyze pt; analyze pt1; analyze t; analyze t1; @@ -201,23 +206,22 @@ select * from t, pt where tid = ptid and t1 = 'hello' || tid; (18 rows) explain (costs off, timing off, summary off, analyze) select * from t, pt where t1 = pt1 and ptid = tid; - QUERY PLAN + QUERY PLAN ----------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) (actual rows=2 loops=1) - -> Nested Loop (actual rows=2 loops=1) - Join Filter: true - -> Broadcast Motion 3:3 (slice2; segments: 3) (actual rows=2 loops=1) - -> Seq Scan on t (actual rows=2 loops=1) - -> Dynamic Bitmap Heap Scan on pt (actual rows=1 loops=2) + -> Hash Join (actual rows=2 loops=1) + Hash Cond: ((pt.pt1 = t.t1) AND (pt.ptid = t.tid)) + Extra Text: ### Hash chain length ###, using ## of ### buckets. + -> Dynamic Seq Scan on pt (actual rows=8 loops=1) Number of partitions to scan: 6 (out of 6) - Recheck Cond: (pt1 = t.t1) - Filter: ((pt1 = t.t1) AND (ptid = t.tid)) - Heap Blocks: exact=4294967295 - Partitions scanned: Avg 6.0 x 3 workers of 2 scans. Max 6 parts (seg0). - -> Dynamic Bitmap Index Scan on pt1_idx (actual rows=0 loops=12) - Index Cond: (pt1 = t.t1) + Partitions scanned: Avg 2.0 x 3 workers. Max 2 parts (seg0). + -> Hash (actual rows=2 loops=1) + Buckets: ### Batches: ### Memory Usage: ###B + -> Partition Selector (selector id: $0) (actual rows=2 loops=1) + -> Broadcast Motion 3:3 (slice2; segments: 3) (actual rows=2 loops=1) + -> Seq Scan on t (actual rows=2 loops=1) Optimizer: GPORCA -(14 rows) +(13 rows) select * from t, pt where t1 = pt1 and ptid = tid; dist | tid | t1 | t2 | dist | pt1 | pt2 | pt3 | ptid @@ -271,9 +275,44 @@ select * from pt where ptid in (select tid from t where t1 = 'hello' || tid); 48 | hello48 | world | drop this | 0 (18 rows) +explain (costs off, timing off, summary off, analyze) select ptid from pt where ptid in (select tid from t where t1 = 'hello' || tid) and pt1 = 'hello1'; + QUERY PLAN +--------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (actual rows=1 loops=1) + -> Hash Semi Join (actual rows=1 loops=1) + Hash Cond: (pt.ptid = t.tid) + Extra Text: ### Hash chain length ###, using ## of ### buckets. + -> Dynamic Index Only Scan on ptid_pt1_idx on pt (actual rows=1 loops=1) + Index Cond: (pt1 = 'hello1'::text) + Heap Fetches: 0 + Number of partitions to scan: 6 (out of 6) + Partitions scanned: Avg 2.0 x 3 workers. Max 2 parts (seg0). + -> Hash (actual rows=2 loops=1) + Buckets: ### Batches: ### Memory Usage: ###B + -> Partition Selector (selector id: $0) (actual rows=2 loops=1) + -> Broadcast Motion 3:3 (slice2; segments: 3) (actual rows=2 loops=1) + -> Seq Scan on t (actual rows=2 loops=1) + Filter: (t1 = ('hello'::text || (tid)::text)) + Optimizer: GPORCA +(16 rows) + +select ptid from pt where ptid in (select tid from t where t1 = 'hello' || tid) and pt1 = 'hello1'; + ptid +------ + 1 +(1 row) + -- start_ignore -- Known_opt_diff: MPP-21320 -- end_ignore +-- Disable 'CXformSelect2DynamicIndexGet' to avoid picking Dynamic Index Scan and use this test +-- to showcase dpe alternative +select disable_xform('CXformSelect2DynamicIndexGet'); + disable_xform +------------------------------------------ + CXformSelect2DynamicIndexGet is disabled +(1 row) + explain (costs off, timing off, summary off, analyze) select * from pt where exists (select 1 from t where tid = ptid and t1 = 'hello' || tid); QUERY PLAN --------------------------------------------------------------------------------------------- @@ -317,6 +356,13 @@ select * from pt where exists (select 1 from t where tid = ptid and t1 = 'hello' 0 | hello0 | world | drop this | 0 (18 rows) +-- enable xform +select enable_xform('CXformSelect2DynamicIndexGet'); + enable_xform +----------------------------------------- + CXformSelect2DynamicIndexGet is enabled +(1 row) + -- -- group-by on top -- @@ -580,23 +626,26 @@ set enable_hashjoin=off; -- Known_opt_diff: MPP-21322 -- end_ignore explain (costs off, timing off, summary off, analyze) select * from t, pt where tid = ptid and pt1 = 'hello0'; - QUERY PLAN + QUERY PLAN ----------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (actual rows=1 loops=1) - -> Nested Loop (actual rows=1 loops=1) - Join Filter: true - -> Broadcast Motion 3:3 (slice2; segments: 3) (actual rows=2 loops=1) - -> Seq Scan on t (actual rows=2 loops=1) - -> Dynamic Bitmap Heap Scan on pt (actual rows=0 loops=2) - Number of partitions to scan: 6 (out of 6) - Recheck Cond: (pt1 = 'hello0'::text) - Filter: ((pt1 = 'hello0'::text) AND (t.tid = ptid)) - Heap Blocks: exact=4294967295 - Partitions scanned: Avg 6.0 x 3 workers of 2 scans. Max 6 parts (seg0). - -> Dynamic Bitmap Index Scan on pt1_idx (actual rows=0 loops=12) - Index Cond: (pt1 = 'hello0'::text) + Hash Join (actual rows=1 loops=1) + Hash Cond: (t.tid = pt.ptid) + Extra Text: Hash chain length ###, using ## of ### buckets. + -> Gather Motion 3:1 (slice1; segments: 3) (actual rows=2 loops=1) + -> Seq Scan on t (actual rows=2 loops=1) + -> Hash (actual rows=1 loops=1) + Buckets: ### Batches: ### Memory Usage: ###B + -> Gather Motion 3:1 (slice2; segments: 3) (actual rows=1 loops=1) + -> Dynamic Bitmap Heap Scan on pt (actual rows=1 loops=1) + Number of partitions to scan: 6 (out of 6) + Recheck Cond: (pt1 = 'hello0'::text) + Filter: (pt1 = 'hello0'::text) + Heap Blocks: exact=4294967295 + Partitions scanned: Avg 6.0 x 3 workers. Max 6 parts (seg0). + -> Dynamic Bitmap Index Scan on pt1_idx (actual rows=0 loops=6) + Index Cond: (pt1 = 'hello0'::text) Optimizer: GPORCA -(14 rows) +(17 rows) select * from t, pt where tid = ptid and pt1 = 'hello0'; dist | tid | t1 | t2 | dist | pt1 | pt2 | pt3 | ptid @@ -1008,16 +1057,13 @@ explain (costs off, timing off, summary off, analyze) select count(*) from pt, p Buckets: 524288 Batches: 1 Memory Usage: 4097kB -> Partition Selector (selector id: $0) (actual rows=1 loops=1) -> Broadcast Motion 3:3 (slice2; segments: 3) (actual rows=1 loops=1) - -> Dynamic Bitmap Heap Scan on pt (actual rows=1 loops=1) + -> Dynamic Index Only Scan on ptid_pt1_idx on pt (actual rows=1 loops=1) + Index Cond: (pt1 = 'hello0'::text) + Heap Fetches: 0 Number of partitions to scan: 6 (out of 6) - Recheck Cond: (pt1 = 'hello0'::text) - Filter: (pt1 = 'hello0'::text) - Heap Blocks: exact=4294967295 Partitions scanned: Avg 6.0 x 3 workers. Max 6 parts (seg0). - -> Dynamic Bitmap Index Scan on pt1_idx (actual rows=0 loops=6) - Index Cond: (pt1 = 'hello0'::text) Optimizer: GPORCA -(22 rows) +(19 rows) select count(*) from pt, pt1 where pt.ptid = pt1.ptid and pt.pt1 = 'hello0'; count diff --git a/contrib/pax_storage/src/test/regress/expected/equivclass_optimizer.out b/contrib/pax_storage/src/test/regress/expected/equivclass_optimizer.out index 172e1ca6ca1..591de616f38 100644 --- a/contrib/pax_storage/src/test/regress/expected/equivclass_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/equivclass_optimizer.out @@ -98,15 +98,11 @@ create table ec2 (xf int8 primary key, x1 int8alias1, x2 int8alias2); set enable_nestloop = on; set enable_hashjoin = off; set enable_mergejoin = off; -set optimizer_enable_hashjoin = off; -set optimizer_enable_mergejoin = off; -- -- Note that for cases where there's a missing operator, we don't care so -- much whether the plan is ideal as that we don't fail or generate an -- outright incorrect plan. -- --- FIXME: looks like ORCA bug, PAX and AO no index scan path --- when filter is `ff = f1 and f1 = '42'::int8`. explain (costs off) select * from ec0 where ff = f1 and f1 = '42'::int8; QUERY PLAN @@ -295,36 +291,35 @@ explain (costs off) QUERY PLAN -------------------------------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) - -> Nested Loop - Join Filter: ((((((ec1.ff + 2)) + 1)) = ec1_6.f1) AND (((((ec1.ff + 2)) + 1)) = ((((ec1_3.ff + 2)) + 1)))) + -> Hash Join + Hash Cond: (((((ec1.ff + 2)) + 1)) = ((((ec1_4.ff + 2)) + 1))) + Join Filter: (((((ec1_4.ff + 2)) + 1)) = ec1_3.f1) -> Nested Loop Join Filter: true - -> Seq Scan on ec1 ec1_6 + -> Seq Scan on ec1 ec1_3 Filter: (ff = '42'::bigint) -> Materialize - -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Broadcast Motion 3:3 (slice2; segments: 3) -> Append -> Result -> Append - -> Seq Scan on ec1 ec1_3 - -> Seq Scan on ec1 ec1_4 - -> Seq Scan on ec1 ec1_5 - -> Materialize - -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Seq Scan on ec1 + -> Seq Scan on ec1 ec1_1 + -> Seq Scan on ec1 ec1_2 + -> Hash + -> Broadcast Motion 3:3 (slice3; segments: 3) -> Append -> Result -> Append - -> Seq Scan on ec1 - -> Seq Scan on ec1 ec1_1 - -> Seq Scan on ec1 ec1_2 + -> Seq Scan on ec1 ec1_4 + -> Seq Scan on ec1 ec1_5 + -> Seq Scan on ec1 ec1_6 Optimizer: Pivotal Optimizer (GPORCA) -(24 rows) +(25 rows) -- let's try that as a mergejoin set enable_mergejoin = on; set enable_nestloop = off; -set optimizer_enable_mergejoin = on; -set optimizer_enable_hashjoin = on; explain (costs off) select * from ec1, (select ff + 1 as x from @@ -372,7 +367,6 @@ explain (costs off) -- check partially indexed scan set enable_nestloop = on; set enable_mergejoin = off; -set optimizer_enable_mergejoin = off; drop index ec1_expr3; explain (costs off) select * from ec1, @@ -404,7 +398,6 @@ explain (costs off) -- let's try that as a mergejoin set enable_mergejoin = on; set enable_nestloop = off; -set optimizer_enable_mergejoin = on; explain (costs off) select * from ec1, (select ff + 1 as x from @@ -435,7 +428,6 @@ explain (costs off) -- check effects of row-level security set enable_nestloop = on; set enable_mergejoin = off; -set optimizer_enable_mergejoin = off; alter table ec1 enable row level security; create policy p1 on ec1 using (f1 < '5'::int8alias1); create user regress_user_ectest; @@ -517,5 +509,3 @@ explain (costs off) -- this should not require a sort Optimizer: Postgres query optimizer (4 rows) -reset optimizer_enable_hashjoin; -reset optimizer_enable_mergejoin; diff --git a/contrib/pax_storage/src/test/regress/expected/explain_optimizer.out b/contrib/pax_storage/src/test/regress/expected/explain_optimizer.out index d5c53acfa0e..57bab0caddd 100644 --- a/contrib/pax_storage/src/test/regress/expected/explain_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/explain_optimizer.out @@ -51,6 +51,12 @@ begin reset enable_parallel; end; $$; +-- Disable JIT, or we'll get different output on machines where that's been +-- forced on +set jit = off; +-- Similarly, disable track_io_timing, to avoid output differences when +-- enabled. +set track_io_timing = off; -- Simple cases select explain_filter('explain select * from int8_tbl i8'); explain_filter @@ -102,111 +108,6 @@ select explain_filter('explain (analyze, buffers, format text) select * from int Execution Time: N.N ms (8 rows) -select explain_filter('explain (analyze, buffers, format json) select * from int8_tbl i8'); - explain_filter -------------------------------------------- - [ + - { + - "Plan": { + - "Node Type": "Gather Motion", + - "Senders": N, + - "Receivers": N, + - "Slice": N, + - "Segments": N, + - "Gang Type": "primary reader", + - "Parallel Aware": false, + - "Async Capable": false, + - "Startup Cost": N.N, + - "Total Cost": N.N, + - "Plan Rows": N, + - "Plan Width": N, + - "Actual Startup Time": N.N, + - "Actual Total Time": N.N, + - "Actual Rows": N, + - "Actual Loops": N, + - "Shared Hit Blocks": N, + - "Shared Read Blocks": N, + - "Shared Dirtied Blocks": N, + - "Shared Written Blocks": N, + - "Local Hit Blocks": N, + - "Local Read Blocks": N, + - "Local Dirtied Blocks": N, + - "Local Written Blocks": N, + - "Temp Read Blocks": N, + - "Temp Written Blocks": N, + - "Plans": [ + - { + - "Node Type": "Seq Scan", + - "Parent Relationship": "Outer",+ - "Slice": N, + - "Segments": N, + - "Gang Type": "primary reader", + - "Parallel Aware": false, + - "Async Capable": false, + - "Relation Name": "int8_tbl", + - "Alias": "i8", + - "Startup Cost": N.N, + - "Total Cost": N.N, + - "Plan Rows": N, + - "Plan Width": N, + - "Actual Startup Time": N.N, + - "Actual Total Time": N.N, + - "Actual Rows": N, + - "Actual Loops": N, + - "Shared Hit Blocks": N, + - "Shared Read Blocks": N, + - "Shared Dirtied Blocks": N, + - "Shared Written Blocks": N, + - "Local Hit Blocks": N, + - "Local Read Blocks": N, + - "Local Dirtied Blocks": N, + - "Local Written Blocks": N, + - "Temp Read Blocks": N, + - "Temp Written Blocks": N + - } + - ] + - }, + - "Settings": { + - "Optimizer": "GPORCA" + - }, + - "Planning": { + - "Shared Hit Blocks": N, + - "Shared Read Blocks": N, + - "Shared Dirtied Blocks": N, + - "Shared Written Blocks": N, + - "Local Hit Blocks": N, + - "Local Read Blocks": N, + - "Local Dirtied Blocks": N, + - "Local Written Blocks": N, + - "Temp Read Blocks": N, + - "Temp Written Blocks": N + - }, + - "Planning Time": N.N, + - "Triggers": [ + - ], + - "Slice statistics": [ + - { + - "Slice": N, + - "Executor Memory": N + - }, + - { + - "Slice": N, + - "Executor Memory": { + - "Average": N, + - "Workers": N, + - "Subworkers": N, + - "Maximum Memory Used": N + - } + - } + - ], + - "Statement statistics": { + - "Memory used": N + - }, + - "Execution Time": N.N + - } + - ] -(1 row) - select explain_filter('explain (analyze, buffers, format xml) select * from int8_tbl i8'); explain_filter ------------------------------------------------------------ @@ -482,6 +383,115 @@ select explain_filter('explain (buffers, format json) select * from int8_tbl i8' ] (1 row) +-- Check output including I/O timings. These fields are conditional +-- but always set in JSON format, so check them only in this case. +set track_io_timing = on; +select explain_filter('explain (analyze, buffers, format json) select * from int8_tbl i8'); + explain_filter +------------------------------------------- + [ + + { + + "Plan": { + + "Node Type": "Gather Motion", + + "Senders": N, + + "Receivers": N, + + "Slice": N, + + "Segments": N, + + "Gang Type": "primary reader", + + "Parallel Aware": false, + + "Async Capable": false, + + "Startup Cost": N.N, + + "Total Cost": N.N, + + "Plan Rows": N, + + "Plan Width": N, + + "Actual Startup Time": N.N, + + "Actual Total Time": N.N, + + "Actual Rows": N, + + "Actual Loops": N, + + "Shared Hit Blocks": N, + + "Shared Read Blocks": N, + + "Shared Dirtied Blocks": N, + + "Shared Written Blocks": N, + + "Local Hit Blocks": N, + + "Local Read Blocks": N, + + "Local Dirtied Blocks": N, + + "Local Written Blocks": N, + + "Temp Read Blocks": N, + + "Temp Written Blocks": N, + + "Plans": [ + + { + + "Node Type": "Seq Scan", + + "Parent Relationship": "Outer",+ + "Slice": N, + + "Segments": N, + + "Gang Type": "primary reader", + + "Parallel Aware": false, + + "Async Capable": false, + + "Relation Name": "int8_tbl", + + "Alias": "int8_tbl", + + "Startup Cost": N.N, + + "Total Cost": N.N, + + "Plan Rows": N, + + "Plan Width": N, + + "Actual Startup Time": N.N, + + "Actual Total Time": N.N, + + "Actual Rows": N, + + "Actual Loops": N, + + "Shared Hit Blocks": N, + + "Shared Read Blocks": N, + + "Shared Dirtied Blocks": N, + + "Shared Written Blocks": N, + + "Local Hit Blocks": N, + + "Local Read Blocks": N, + + "Local Dirtied Blocks": N, + + "Local Written Blocks": N, + + "Temp Read Blocks": N, + + "Temp Written Blocks": N + + } + + ] + + }, + + "Settings": { + + "Optimizer": "GPORCA" + + }, + + "Planning": { + + "Shared Hit Blocks": N, + + "Shared Read Blocks": N, + + "Shared Dirtied Blocks": N, + + "Shared Written Blocks": N, + + "Local Hit Blocks": N, + + "Local Read Blocks": N, + + "Local Dirtied Blocks": N, + + "Local Written Blocks": N, + + "Temp Read Blocks": N, + + "Temp Written Blocks": N + + }, + + "Planning Time": N.N, + + "Triggers": [ + + ], + + "Slice statistics": [ + + { + + "Slice": N, + + "Executor Memory": N + + }, + + { + + "Slice": N, + + "Executor Memory": { + + "Average": N, + + "Workers": N, + + "Subworkers": N, + + "Maximum Memory Used": N + + } + + } + + ], + + "Statement statistics": { + + "Memory used": N + + }, + + "Execution Time": N.N + + } + + ] +(1 row) + +set track_io_timing = off; -- SETTINGS option -- We have to ignore other settings that might be imposed by the environment, -- so printing the whole Settings field unfortunately won't do. @@ -502,6 +512,49 @@ select explain_filter_to_json('explain (settings, format json) select * from int (1 row) rollback; +-- GENERIC_PLAN option +select explain_filter('explain (generic_plan) select unique1 from tenk1 where thousand = $1'); + explain_filter +-------------------------------------------------------------------------- + Gather Motion N:N (slice1; segments: N) (cost=N.N..N.N rows=N width=N) + -> Seq Scan on tenk1 (cost=N.N..N.N rows=N width=N) + Filter: (thousand = $1) + Optimizer: GPORCA +(4 rows) + +-- should fail +select explain_filter('explain (analyze, generic_plan) select unique1 from tenk1 where thousand = $1'); +ERROR: EXPLAIN options ANALYZE and GENERIC_PLAN cannot be used together +-- Test EXPLAIN (GENERIC_PLAN) with partition pruning +-- partitions should be pruned at plan time, based on constants, +-- but there should be no pruning based on parameter placeholders +create table gen_part ( + key1 integer not null, + key2 integer not null +) partition by list (key1); +create table gen_part_1 + partition of gen_part for values in (1) + partition by range (key2); +create table gen_part_1_1 + partition of gen_part_1 for values from (1) to (2); +create table gen_part_1_2 + partition of gen_part_1 for values from (2) to (3); +create table gen_part_2 + partition of gen_part for values in (2); +-- should scan gen_part_1_1 and gen_part_1_2, but not gen_part_2 +select explain_filter('explain (generic_plan) select key1, key2 from gen_part where key1 = 1 and key2 = $1'); + explain_filter +--------------------------------------------------------------------------------- + Gather Motion N:N (slice1; segments: N) (cost=N.N..N.N rows=N width=N) + -> Append (cost=N.N..N.N rows=N width=N) + -> Seq Scan on gen_part_1_1 gen_part_1 (cost=N.N..N.N rows=N width=N) + Filter: ((key1 = N) AND (key2 = $1)) + -> Seq Scan on gen_part_1_2 gen_part_2 (cost=N.N..N.N rows=N width=N) + Filter: ((key1 = N) AND (key2 = $1)) + Optimizer: GPORCA +(7 rows) + +drop table gen_part; -- -- Test production of per-worker data -- @@ -606,13 +659,6 @@ select jsonb_pretty( "Sort Key": [ + "tenk1.tenthous" + ], + - "work_mem": { + - "Used": 0, + - "Segments": 0, + - "Max Memory": 0, + - "Workfile Spilling": 0, + - "Max Memory Segment": 0 + - }, + "Gang Type": "primary reader", + "Node Type": "Sort", + "Plan Rows": 0, + @@ -703,6 +749,7 @@ select jsonb_pretty( "Shared Written Blocks": 0 + }, + "Settings": { + + "jit": "off", + "Optimizer": "GPORCA", + "optimizer": "on", + "enable_parallel": "off", + @@ -726,8 +773,7 @@ select jsonb_pretty( "Workers": 0, + "Subworkers": 0, + "Maximum Memory Used": 0 + - }, + - "Work Maximum Memory": 0 + + } + } + ], + "Statement statistics": { + @@ -738,6 +784,22 @@ select jsonb_pretty( (1 row) rollback; +-- Test display of temporary objects +create temp table t1(f1 float8); +create function pg_temp.mysin(float8) returns float8 language plpgsql +as 'begin return sin($1); end'; +select explain_filter('explain (verbose) select * from t1 where pg_temp.mysin(f1) < 0.5'); + explain_filter +-------------------------------------------------------------------------- + Gather Motion N:N (slice1; segments: N) (cost=N.N..N.N rows=N width=N) + Output: f1 + -> Seq Scan on pg_temp.t1 (cost=N.N..N.N rows=N width=N) + Output: f1 + Filter: (pg_temp.mysin(t1.f1) < 'N.N'::double precision) + Optimizer: GPORCA +(6 rows) + +-- Test compute_query_id set compute_query_id = on; select explain_filter('explain (verbose) select * from int8_tbl i8'); explain_filter diff --git a/contrib/pax_storage/src/test/regress/expected/expressions_optimizer.out b/contrib/pax_storage/src/test/regress/expected/expressions_optimizer.out index b9c08587154..29ed3da7b8d 100644 --- a/contrib/pax_storage/src/test/regress/expected/expressions_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/expressions_optimizer.out @@ -2,7 +2,7 @@ -- expression evaluation tests that don't fit into a more specific file -- -- --- Tests for SQLVAlueFunction +-- Tests for SQLValueFunction -- -- current_date (always matches because of transactional behaviour) SELECT date(now())::text = current_date::text; @@ -36,7 +36,7 @@ SELECT now()::time(3)::text = localtime(3)::text; t (1 row) --- current_timestamp / localtimestamp (always matches because of transactional behaviour) +-- current_time[stamp]/ localtime[stamp] (always matches because of transactional behaviour) SELECT current_timestamp = NOW(); ?column? ---------- @@ -57,7 +57,36 @@ SELECT now()::timestamp::text = localtimestamp::text; t (1 row) --- current_role/user/user is tested in rolnames.sql +-- precision overflow +SELECT current_time = current_time(7); +WARNING: TIME(7) WITH TIME ZONE precision reduced to maximum allowed, 6 + ?column? +---------- + t +(1 row) + +SELECT current_timestamp = current_timestamp(7); +WARNING: TIMESTAMP(7) WITH TIME ZONE precision reduced to maximum allowed, 6 + ?column? +---------- + t +(1 row) + +SELECT localtime = localtime(7); +WARNING: TIME(7) precision reduced to maximum allowed, 6 + ?column? +---------- + t +(1 row) + +SELECT localtimestamp = localtimestamp(7); +WARNING: TIMESTAMP(7) precision reduced to maximum allowed, 6 + ?column? +---------- + t +(1 row) + +-- current_role/user/user is tested in rolenames.sql -- current database / catalog SELECT current_catalog = current_database(); ?column? @@ -88,90 +117,114 @@ SELECT current_schema; RESET search_path; -- --- Tests for BETWEEN +-- Test parsing of a no-op cast to a type with unspecified typmod -- --- start_ignore --- GPDB_13_MERGE_FIXME: --- ORCA does support 2 phase aggregate, but not clear why it doesn't generate the plan --- that makes use of 2 phase aggregate. However, the plan is correct. --- NOTE: we should consider remove this optimizer answer file after we fix this issue. --- end_ignore -explain (costs off) -select count(*) from date_tbl - where f1 between '1997-01-01' and '1998-01-01'; - QUERY PLAN ------------------------------------------------------------------------------------ - Aggregate - -> Gather Motion 3:1 (slice1; segments: 3) - -> Seq Scan on date_tbl - Filter: ((f1 >= '01-01-1997'::date) AND (f1 <= '01-01-1998'::date)) - Optimizer: Pivotal Optimizer (GPORCA) -(5 rows) - -select count(*) from date_tbl - where f1 between '1997-01-01' and '1998-01-01'; - count -------- - 3 -(1 row) +begin; +create table numeric_tbl (f1 numeric(18,3), f2 numeric); +create view numeric_view as + select + f1, f1::numeric(16,4) as f1164, f1::numeric as f1n, + f2, f2::numeric(16,4) as f2164, f2::numeric as f2n + from numeric_tbl; +\d+ numeric_view + View "public.numeric_view" + Column | Type | Collation | Nullable | Default | Storage | Description +--------+---------------+-----------+----------+---------+---------+------------- + f1 | numeric(18,3) | | | | main | + f1164 | numeric(16,4) | | | | main | + f1n | numeric | | | | main | + f2 | numeric | | | | main | + f2164 | numeric(16,4) | | | | main | + f2n | numeric | | | | main | +View definition: + SELECT f1, + f1::numeric(16,4) AS f1164, + f1::numeric AS f1n, + f2, + f2::numeric(16,4) AS f2164, + f2 AS f2n + FROM numeric_tbl; -explain (costs off) -select count(*) from date_tbl - where f1 not between '1997-01-01' and '1998-01-01'; - QUERY PLAN --------------------------------------------------------------------------------- - Aggregate - -> Gather Motion 3:1 (slice1; segments: 3) - -> Seq Scan on date_tbl - Filter: ((f1 < '01-01-1997'::date) OR (f1 > '01-01-1998'::date)) - Optimizer: Pivotal Optimizer (GPORCA) +explain (verbose, costs off) select * from numeric_view; + QUERY PLAN +------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: f1, ((f1)::numeric(16,4)), ((f1)::numeric), f2, ((f2)::numeric(16,4)), f2 + -> Seq Scan on public.numeric_tbl + Output: f1, (f1)::numeric(16,4), f1, f2, (f2)::numeric(16,4), f2 (5 rows) -select count(*) from date_tbl - where f1 not between '1997-01-01' and '1998-01-01'; - count -------- - 13 -(1 row) - -explain (costs off) -select count(*) from date_tbl - where f1 between symmetric '1997-01-01' and '1998-01-01'; - QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------------------------------- - Aggregate - -> Gather Motion 3:1 (slice1; segments: 3) - -> Seq Scan on date_tbl - Filter: (((f1 >= '01-01-1997'::date) AND (f1 <= '01-01-1998'::date)) OR ((f1 >= '01-01-1998'::date) AND (f1 <= '01-01-1997'::date))) - Optimizer: Pivotal Optimizer (GPORCA) -(5 rows) +-- bpchar, lacking planner support for its length coercion function, +-- could behave differently +create table bpchar_tbl (f1 character(16) unique, f2 bpchar); +create view bpchar_view as + select + f1, f1::character(14) as f114, f1::bpchar as f1n, + f2, f2::character(14) as f214, f2::bpchar as f2n + from bpchar_tbl; +\d+ bpchar_view + View "public.bpchar_view" + Column | Type | Collation | Nullable | Default | Storage | Description +--------+---------------+-----------+----------+---------+----------+------------- + f1 | character(16) | | | | extended | + f114 | character(14) | | | | extended | + f1n | bpchar | | | | extended | + f2 | bpchar | | | | extended | + f214 | character(14) | | | | extended | + f2n | bpchar | | | | extended | +View definition: + SELECT f1, + f1::character(14) AS f114, + f1::bpchar AS f1n, + f2, + f2::character(14) AS f214, + f2 AS f2n + FROM bpchar_tbl; -select count(*) from date_tbl - where f1 between symmetric '1997-01-01' and '1998-01-01'; - count -------- - 3 -(1 row) +explain (verbose, costs off) select * from bpchar_view + where f1::bpchar = 'foo'; + QUERY PLAN +------------------------------------------------------------------------------------ + Gather Motion 1:1 (slice1; segments: 1) + Output: f1, ((f1)::character(14)), ((f1)::bpchar), f2, ((f2)::character(14)), f2 + -> Seq Scan on public.bpchar_tbl + Output: f1, (f1)::character(14), f1, f2, (f2)::character(14), f2 + Filter: ((bpchar_tbl.f1)::bpchar = 'foo'::bpchar) + Settings: optimizer = 'on' + Optimizer: GPORCA +(7 rows) -explain (costs off) -select count(*) from date_tbl - where f1 not between symmetric '1997-01-01' and '1998-01-01'; - QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------------------ - Aggregate - -> Gather Motion 3:1 (slice1; segments: 3) - -> Seq Scan on date_tbl - Filter: (((f1 < '01-01-1997'::date) OR (f1 > '01-01-1998'::date)) AND ((f1 < '01-01-1998'::date) OR (f1 > '01-01-1997'::date))) - Optimizer: Pivotal Optimizer (GPORCA) -(5 rows) +rollback; +-- +-- Ordinarily, IN/NOT IN can be converted to a ScalarArrayOpExpr +-- with a suitably-chosen array type. +-- +explain (verbose, costs off) +select random() IN (1, 4, 8.0); + QUERY PLAN +------------------------------------------------------------ + Result + Output: (random() = ANY ('{1,4,8}'::double precision[])) +(3 rows) -select count(*) from date_tbl - where f1 not between symmetric '1997-01-01' and '1998-01-01'; - count -------- - 13 -(1 row) +explain (verbose, costs off) +select random()::int IN (1, 4, 8.0); + QUERY PLAN +--------------------------------------------------------------------------- + Result + Output: (((random())::integer)::numeric = ANY ('{1,4,8.0}'::numeric[])) +(3 rows) +-- However, if there's not a common supertype for the IN elements, +-- we should instead try to produce "x = v1 OR x = v2 OR ...". +-- In most cases that'll fail for lack of all the requisite = operators, +-- but it can succeed sometimes. So this should complain about lack of +-- an = operator, not about cast failure. +select '(0,0)'::point in ('(0,0,0,0)'::box, point(0,0)); +ERROR: operator does not exist: point = box +LINE 1: select '(0,0)'::point in ('(0,0,0,0)'::box, point(0,0)); + ^ +HINT: No operator matches the given name and argument types. You might need to add explicit type casts. -- -- Test parsing of a no-op cast to a type with unspecified typmod -- @@ -193,12 +246,12 @@ create view numeric_view as f2164 | numeric(16,4) | | | | main | f2n | numeric | | | | main | View definition: - SELECT numeric_tbl.f1, - numeric_tbl.f1::numeric(16,4) AS f1164, - numeric_tbl.f1::numeric AS f1n, - numeric_tbl.f2, - numeric_tbl.f2::numeric(16,4) AS f2164, - numeric_tbl.f2 AS f2n + SELECT f1, + f1::numeric(16,4) AS f1164, + f1::numeric AS f1n, + f2, + f2::numeric(16,4) AS f2164, + f2 AS f2n FROM numeric_tbl; explain (verbose, costs off) select * from numeric_view; @@ -208,7 +261,6 @@ explain (verbose, costs off) select * from numeric_view; Output: f1, ((f1)::numeric(16,4)), ((f1)::numeric), f2, ((f2)::numeric(16,4)), f2 -> Seq Scan on public.numeric_tbl Output: f1, (f1)::numeric(16,4), f1, f2, (f2)::numeric(16,4), f2 - Optimizer: Pivotal Optimizer (GPORCA) (5 rows) -- bpchar, lacking planner support for its length coercion function, @@ -230,12 +282,12 @@ create view bpchar_view as f214 | character(14) | | | | extended | f2n | bpchar | | | | extended | View definition: - SELECT bpchar_tbl.f1, - bpchar_tbl.f1::character(14) AS f114, - bpchar_tbl.f1::bpchar AS f1n, - bpchar_tbl.f2, - bpchar_tbl.f2::character(14) AS f214, - bpchar_tbl.f2 AS f2n + SELECT f1, + f1::character(14) AS f114, + f1::bpchar AS f1n, + f2, + f2::character(14) AS f214, + f2 AS f2n FROM bpchar_tbl; explain (verbose, costs off) select * from bpchar_view @@ -262,7 +314,7 @@ select random() IN (1, 4, 8.0); ------------------------------------------------------------ Result Output: (random() = ANY ('{1,4,8}'::double precision[])) -(2 rows) +(3 rows) explain (verbose, costs off) select random()::int IN (1, 4, 8.0); @@ -270,7 +322,7 @@ select random()::int IN (1, 4, 8.0); --------------------------------------------------------------------------- Result Output: (((random())::integer)::numeric = ANY ('{1,4,8.0}'::numeric[])) -(2 rows) +(3 rows) -- However, if there's not a common supertype for the IN elements, -- we should instead try to produce "x = v1 OR x = v2 OR ...". @@ -340,6 +392,55 @@ select return_text_input('a') in ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', ' t (1 row) +-- NOT IN +select return_int_input(1) not in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1); + ?column? +---------- + f +(1 row) + +select return_int_input(1) not in (10, 9, 2, 8, 3, 7, 4, 6, 5, 0); + ?column? +---------- + t +(1 row) + +select return_int_input(1) not in (10, 9, 2, 8, 3, 7, 4, 6, 5, 2, null); + ?column? +---------- + +(1 row) + +select return_int_input(1) not in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1, null); + ?column? +---------- + f +(1 row) + +select return_int_input(1) not in (null, null, null, null, null, null, null, null, null, null, null); + ?column? +---------- + +(1 row) + +select return_int_input(null::int) not in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1); + ?column? +---------- + +(1 row) + +select return_int_input(null::int) not in (10, 9, 2, 8, 3, 7, 4, 6, 5, null); + ?column? +---------- + +(1 row) + +select return_text_input('a') not in ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'); + ?column? +---------- + f +(1 row) + rollback; -- Test with non-strict equality function. -- We need to create our own type for this. @@ -366,6 +467,11 @@ begin end if; end; $$ language plpgsql immutable; +create function myintne(myint, myint) returns bool as $$ +begin + return not myinteq($1, $2); +end; +$$ language plpgsql immutable; create operator = ( leftarg = myint, rightarg = myint, @@ -376,6 +482,16 @@ create operator = ( join = eqjoinsel, merges ); +create operator <> ( + leftarg = myint, + rightarg = myint, + commutator = <>, + negator = =, + procedure = myintne, + restrict = eqsel, + join = eqjoinsel, + merges +); create operator class myint_ops default for type myint using hash as operator 1 = (myint, myint), @@ -386,17 +502,37 @@ insert into inttest values(1::myint),(null); select * from inttest where a in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint, null); a --- - 1 + (2 rows) +select * from inttest where a not in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint, null); + a +--- +(0 rows) + +select * from inttest where a not in (0::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint, null); + a +--- +(0 rows) + -- ensure the result matched with the non-hashed version. We simply remove -- some array elements so that we don't reach the hashing threshold. select * from inttest where a in (1::myint,2::myint,3::myint,4::myint,5::myint, null); a --- - 1 + 1 (2 rows) +select * from inttest where a not in (1::myint,2::myint,3::myint,4::myint,5::myint, null); + a +--- +(0 rows) + +select * from inttest where a not in (0::myint,2::myint,3::myint,4::myint,5::myint, null); + a +--- +(0 rows) + rollback; diff --git a/contrib/pax_storage/src/test/regress/expected/external_table_optimizer.out b/contrib/pax_storage/src/test/regress/expected/external_table_optimizer.out index 50fea45576c..da3247684d2 100644 --- a/contrib/pax_storage/src/test/regress/expected/external_table_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/external_table_optimizer.out @@ -23,7 +23,14 @@ -- m/DETAIL: Found \d+ URLs and \d+ primary segments./ -- s/Found.+// -- +-- # normalize absolute source paths across environments (also handles file:// URIs) +-- m|/.+/src/test/regress/| +-- s|/.+/src/test/regress/|/ABSPATH/src/test/regress/| +-- -- end_matchsubs +\getenv abs_srcdir PG_ABS_SRCDIR +\getenv hostname PG_HOSTNAME +\set nation_tbl 'file://' :hostname :abs_srcdir '/data/nation.tbl' CREATE TABLE REG_REGION (R_REGIONKEY INT, R_NAME CHAR(25), R_COMMENT VARCHAR(152)) DISTRIBUTED BY (R_REGIONKEY); -- -------------------------------------- -- 'file' protocol - (only CREATE, don't SELECT - won't work on distributed installation) @@ -32,10 +39,10 @@ CREATE EXTERNAL TABLE EXT_NATION ( N_NATIONKEY INTEGER , N_NAME CHAR(25) , N_REGIONKEY INTEGER , N_COMMENT VARCHAR(152)) -location ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/nation.tbl' ) +location (:'nation_tbl' ) FORMAT 'text' (delimiter '|'); CREATE EXTERNAL TABLE EXT_REGION (LIKE REG_REGION) -location ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/region.tbl' ) +location (:'nation_tbl' ) FORMAT 'text' (delimiter '|'); -- Only tables with custom protocol should create dependency, due to a bug there -- used to be entries created for non custom protocol tables with refobjid=0. @@ -143,6 +150,21 @@ SELECT * FROM table_env WHERE val LIKE 'GP_QUERY%\%' ESCAPE '&' ORDER BY val ASC GP_QUERY_STRING=SELECT * FROM table_env WHERE val LIKE 'GP_QUERY%\%' ESCAPE '&' ORDER BY val ASC; (1 row) +-- ensure squelching on master +\set lineitem 'cat ' :abs_srcdir '/data/lineitem.csv' +CREATE EXTERNAL WEB TABLE table_master (val TEXT) + EXECUTE E:'lineitem' ON MASTER + FORMAT 'TEXT' (ESCAPE 'OFF'); +BEGIN; +DECLARE _psql_cursor NO SCROLL CURSOR FOR SELECT 1 FROM table_master; +FETCH FORWARD 1 FROM _psql_cursor; + ?column? +---------- + 1 +(1 row) + +CLOSE _psql_cursor; +COMMIT; -- echo will behave differently on different platforms, force to use bash with -E option CREATE EXTERNAL WEB TABLE table_qry (val TEXT) EXECUTE E'/usr/bin/env bash -c ''echo -E "$GP_QUERY_STRING"''' ON SEGMENT 0 @@ -178,11 +200,12 @@ drop external web table ext_stderr2; -- -- bad csv (quote must be a single char) -- +\set whois_file 'gpfdist://' :hostname ':7070/exttab1/whois.csv' create external table bad_whois ( source_lineno int, domain_name varchar(350) ) -location ('gpfdist://gpadmin:7070/exttab1/whois.csv' ) +location (:'whois_file' ) format 'csv' ( header quote as 'ggg'); ERROR: COPY quote must be a single one-byte character select count(*) from bad_whois; @@ -194,26 +217,28 @@ ERROR: foreign table "bad_whois" does not exist -- -- try a bad location -- +\set badt1_file 'file://' :hostname :abs_srcdir '/data/no/such/place/badt1.tbl' create external table badt1 (x text) -location ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/no/such/place/badt1.tbl' ) +location (:'badt1_file' ) format 'text' (delimiter '|'); select * from badt1; -NOTICE: gfile stat /home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/no/such/place/badt1.tbl failure: No such file or directory (seg0 slice1 gpadmin:50000 pid=64819) -NOTICE: fstream unable to open file /home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/no/such/place/badt1.tbl (seg0 slice1 gpadmin:50000 pid=64819) -ERROR: could not open file "/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/no/such/place/badt1.tbl": 404 file not found (seg0 slice1 gpadmin:50000 pid=64819) +ERROR: could not open file "/ABSPATH/src/test/regress/data/no/such/place/badt1.tbl": 404 file not found +NOTICE: fstream unable to open file /ABSPATH/src/test/regress/data/no/such/place/badt1.tbl +NOTICE: gfile stat /ABSPATH/src/test/regress/data/no/such/place/badt1.tbl failure: No such file or directory drop external table badt1; -- -- try a bad protocol -- +\set baadt2_file 'bad_protocol://' :hostname :abs_srcdir '/data/no/such/place/badt2.tbl' create external table badt2 (x text) -location ('bad_protocol://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/no/such/place/badt2.tbl' ) +location (:'baadt2_file' ) format 'text' (delimiter '|'); ERROR: protocol "bad_protocol" does not exist -- -- ALTER (partial support) -- create external table ext (a int, x text) -location ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/no/such/place/badt1.tbl' ) +location (:'badt1_file' ) format 'text'; alter table ext drop column a; -- should pass alter external table ext add column a int; -- pass @@ -235,7 +260,8 @@ ERROR: cannot update foreign table "ext" insert into ext(x) values(123); ERROR: foreign table "ext" does not allow inserts create index ext_index on ext(x); -- should fail -ERROR: cannot create index on foreign table "ext" +DETAIL: This operation is not supported for foreign tables. +ERROR: cannot create index on relation "ext" drop table ext; -- should fail (wrong object) ERROR: "ext" is not a table HINT: Use DROP FOREIGN TABLE to remove a foreign table. @@ -343,6 +369,48 @@ DROP OWNED BY test_role_issue_12748; -- Clean up. DROP ROLE test_role_issue_12748; DROP PROTOCOL dummy_protocol_issue_12748; +-- Test pg_exttable's encoding: QE's encoding should be consistent with QD +-- GitHub Issue #9727: https://github.com/greenplum-db/gpdb/issues/9727 +SET client_encoding = 'ISO-8859-1'; +CREATE EXTERNAL TABLE issue_9727 (d varchar(20)) location ('gpfdist://9727/d.dat') format 'csv' (DELIMITER '|'); +SELECT encoding from pg_exttable where urilocation='{gpfdist://9727:8080/d.dat}'; + encoding +---------- + 6 +(1 row) + +SELECT encoding from gp_dist_random('pg_exttable') where urilocation='{gpfdist://9727:8080/d.dat}'; + encoding +---------- + 6 + 6 + 6 +(3 rows) + +DROP FOREIGN TABLE issue_9727; +RESET client_encoding; +-- Test external table location escape +-- GitHub Issue #17179: https://github.com/greenplum-db/gpdb/issues/17179 +CREATE READABLE EXTERNAL TABLE public.test_ext +( + id integer +) +LOCATION( + 'file://gpdev/tmp/test1|.|tx||t|||', + 'file://gpdev/tmp/test2|.|tx||t||||' +) +FORMAT 'TEXT' ( + delimiter 'off' null E'\\N' escape E'\\' +) +ENCODING 'UTF8' +LOG ERRORS PERSISTENTLY SEGMENT REJECT LIMIT 10 PERCENT; +SELECT urilocation FROM pg_exttable WHERE reloid = 'public.test_ext'::regclass; + urilocation +------------------------------------------------------------------------ + {file://gpdev/tmp/test1|.|tx||t|||,file://gpdev/tmp/test2|.|tx||t||||} +(1 row) + +DROP EXTERNAL TABLE public.test_ext; -- -- WET tests -- @@ -352,7 +420,8 @@ DROP PROTOCOL dummy_protocol_issue_12748; -- positive create writable external web table wet_pos4(a text, b text) execute 'some command' format 'text'; -- negative -create writable external table wet_neg1(a text, b text) location('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/badt1.tbl') format 'text'; +\set badt1_file2 'file://' :hostname :abs_srcdir '/badt1.tbl' +create writable external table wet_neg1(a text, b text) location(:'badt1_file2') format 'text'; ERROR: unsupported URI protocol 'file' for writable external table HINT: Writable external tables may use 'gpfdist' or 'gpfdists' URIs only. create writable external table wet_neg1(a text, b text) location('gpfdist://foo:7070/wet.out', 'gpfdist://foo:7070/wet.out') format 'text'; @@ -370,8 +439,9 @@ HINT: Create the table as READABLE instead. -- scans, because the planner generated plans that used the CTID attribute -- to implement certain semi-joins. Nowadays, we use generated row IDs in -- such plans, and don't need CTID for that purpose anymore. +\set mpp17980_file2 'file://' :hostname :abs_srcdir '/data/mpp17980.data' CREATE EXTERNAL TABLE ext_mpp17980 ( id int , id1 int , id2 int) -LOCATION ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/mpp17980.data') +LOCATION (:'mpp17980_file2') FORMAT 'CSV' ( DELIMITER ',' NULL ' '); CREATE TABLE mpp17980 (id int, date date, amt decimal(10,2)) DISTRIBUTED randomly PARTITION BY RANGE (date) @@ -415,9 +485,10 @@ SELECT ctid, * FROM ext_mpp17980; DROP EXTERNAL TABLE ext_mpp17980; DROP TABLE mpp17980; -COPY (VALUES('1,2'),('1,2,3'),('1,'),('1')) TO '/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/tableless.csv'; +\set tableless_file :abs_srcdir '/data/tableless.csv' +COPY (VALUES('1,2'),('1,2,3'),('1,'),('1')) TO :'tableless_file'; CREATE TABLE tableless_heap(a int, b int); -COPY tableless_heap FROM '/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/tableless.csv' CSV LOG ERRORS SEGMENT REJECT LIMIT 10; +COPY tableless_heap FROM :'tableless_file' CSV LOG ERRORS SEGMENT REJECT LIMIT 10; NOTICE: found 2 data formatting errors (2 or more input rows), rejected related input data SELECT relname, linenum, errmsg FROM gp_read_error_log('tableless_heap'); relname | linenum | errmsg @@ -444,8 +515,9 @@ SELECT relname, linenum, errmsg FROM gp_read_error_log('tableless_heap'); ---------+---------+-------- (0 rows) +\set tableless_file2 'file://' :hostname :abs_srcdir '/data/tableless.csv' create external table tableless_ext(a int, b int) -location ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/tableless.csv') +location (:'tableless_file2') format 'csv' log errors segment reject limit 10; select * from tableless_ext; @@ -558,11 +630,12 @@ create writable external table wet_too_many_uris(a text, b text) location( 'gpfdist://foo.invalid:7070/wet.out19', 'gpfdist://foo.invalid:7070/wet.out20' ) format 'text'; insert into wet_too_many_uris values ('foo', 'bar'); -ERROR: external table has more URLs than available primary segments that can write into them (seg2 127.0.0.1:40002 pid=24162) +ERROR: external table has more URLs than available primary segments that can write into them -- Test for error log functionality -- Scan with no errors +\set exttab_file 'file://' :hostname :abs_srcdir '/data/exttab.data' CREATE EXTERNAL TABLE exttab_basic_1( i int, j text ) -LOCATION ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/exttab.data') FORMAT 'TEXT' (DELIMITER '|') +LOCATION (:'exttab_file') FORMAT 'TEXT' (DELIMITER '|') LOG ERRORS SEGMENT REJECT LIMIT 2; -- Empty error log SELECT * FROM gp_read_error_log('exttab_basic_1'); @@ -583,8 +656,9 @@ SELECT * FROM gp_read_error_log('exttab_basic_1'); (0 rows) -- test ON COORDINATOR without LOG ERRORS, return empty results for all rows error out +\set cat_exttab 'cat ' :abs_srcdir '/data/exttab.data' CREATE EXTERNAL WEB TABLE exttab_basic_error_1( i int ) -EXECUTE E'cat /home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/exttab.data' ON COORDINATOR +EXECUTE E:'cat_exttab' ON COORDINATOR FORMAT 'TEXT' (DELIMITER '|') SEGMENT REJECT LIMIT 20; SELECT * FROM exttab_basic_error_1; @@ -596,7 +670,7 @@ NOTICE: found 10 data formatting errors (10 or more input rows), rejected relat DROP EXTERNAL TABLE IF EXISTS exttab_basic_error_1; -- test ON MASTER still works (this syntax will be removed in GPDB8 and forward) CREATE EXTERNAL WEB TABLE exttab_basic_error_1( i int ) -EXECUTE E'cat /home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/exttab.data' ON MASTER +EXECUTE E:'cat_exttab' ON MASTER FORMAT 'TEXT' (DELIMITER '|') SEGMENT REJECT LIMIT 20; SELECT * FROM exttab_basic_error_1; @@ -606,8 +680,9 @@ NOTICE: found 10 data formatting errors (10 or more input rows), rejected relat (0 rows) -- Some errors without exceeding reject limit +\set exttab_few_errors_file 'file://' :hostname :abs_srcdir '/data/exttab_few_errors.data' CREATE EXTERNAL TABLE exttab_basic_2( i int, j text ) -LOCATION ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/exttab_few_errors.data') FORMAT 'TEXT' (DELIMITER '|') +LOCATION (:'exttab_few_errors_file') FORMAT 'TEXT' (DELIMITER '|') LOG ERRORS SEGMENT REJECT LIMIT 10; -- should not error out as segment reject limit will not be reached SELECT * FROM exttab_basic_2 order by i; @@ -632,8 +707,9 @@ select count(*) from gp_read_error_log('exttab_basic_2'); (1 row) -- Errors with exceeding reject limit +\set exttab_more_errors_file 'file://' :hostname :abs_srcdir '/data/exttab_more_errors.data' CREATE EXTERNAL TABLE exttab_basic_3( i int, j text ) -LOCATION ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/exttab_more_errors.data') FORMAT 'TEXT' (DELIMITER '|') +LOCATION (:'exttab_more_errors_file') FORMAT 'TEXT' (DELIMITER '|') LOG ERRORS SEGMENT REJECT LIMIT 2; -- should error out as segment reject limit will be reached SELECT * FROM exttab_basic_3; @@ -649,7 +725,7 @@ select count(*) > 0 from gp_read_error_log('exttab_basic_3'); -- Insert into another table CREATE EXTERNAL TABLE exttab_basic_4( i int, j text ) -LOCATION ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/exttab_more_errors.data') FORMAT 'TEXT' (DELIMITER '|') +LOCATION (:'exttab_more_errors_file') FORMAT 'TEXT' (DELIMITER '|') LOG ERRORS SEGMENT REJECT LIMIT 100; CREATE TABLE exttab_insert_1 (LIKE exttab_basic_4); NOTICE: table doesn't have 'DISTRIBUTED BY' clause, defaulting to distribution columns from LIKE table @@ -665,7 +741,7 @@ select count(*) > 0 from gp_read_error_log('exttab_basic_4'); -- Use the same error log above CREATE EXTERNAL TABLE exttab_basic_5( i int, j text ) -LOCATION ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/exttab_more_errors.data') FORMAT 'TEXT' (DELIMITER '|') +LOCATION (:'exttab_more_errors_file') FORMAT 'TEXT' (DELIMITER '|') LOG ERRORS SEGMENT REJECT LIMIT 5; -- Insert should fail INSERT INTO exttab_insert_1 select * from exttab_basic_5; @@ -700,7 +776,7 @@ SELECT count(*) from gp_read_error_log('exttab_basic_5'); -- CTAS CREATE EXTERNAL TABLE exttab_basic_6( i int, j text ) -LOCATION ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/exttab_more_errors.data') FORMAT 'TEXT' (DELIMITER '|') +LOCATION (:'exttab_more_errors_file') FORMAT 'TEXT' (DELIMITER '|') LOG ERRORS SEGMENT REJECT LIMIT 100; CREATE TABLE exttab_ctas_1 as SELECT * FROM exttab_basic_6; NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'i' as the Apache Cloudberry data distribution key for this table. @@ -734,7 +810,7 @@ select count(*) from gp_read_error_log('exttab_basic_6'); (1 row) CREATE EXTERNAL TABLE exttab_basic_7( i int, j text ) -LOCATION ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/exttab_more_errors.data') FORMAT 'TEXT' (DELIMITER '|') +LOCATION (:'exttab_more_errors_file') FORMAT 'TEXT' (DELIMITER '|') LOG ERRORS SEGMENT REJECT LIMIT 5; -- CTAS should fail CREATE TABLE exttab_ctas_2 AS select * from exttab_basic_7; @@ -759,7 +835,7 @@ SELECT count(*) from gp_read_error_log('exttab_basic_7'); DROP EXTERNAL TABLE IF EXISTS exttab_error_log; NOTICE: foreign table "exttab_error_log" does not exist, skipping CREATE EXTERNAL TABLE exttab_error_log( i int, j text ) -LOCATION ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/exttab.data') FORMAT 'TEXT' (DELIMITER '|') +LOCATION (:'exttab_file') FORMAT 'TEXT' (DELIMITER '|') LOG ERRORS SEGMENT REJECT LIMIT 2; SELECT COUNT(*) FROM exttab_error_log; count @@ -782,7 +858,7 @@ SELECT COUNT(*) FROM gp_read_error_log('exttab_error_log'); -- Insert into another table with unique constraints CREATE EXTERNAL TABLE exttab_constraints_1( i int, j text ) -LOCATION ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/exttab.data') FORMAT 'TEXT' (DELIMITER '|') +LOCATION (:'exttab_file') FORMAT 'TEXT' (DELIMITER '|') LOG ERRORS SEGMENT REJECT LIMIT 10; -- Should not error out SELECT COUNT(*) FROM exttab_constraints_1; @@ -818,11 +894,11 @@ SELECT COUNT(*) FROM gp_read_error_log('exttab_constraints_1'); -- CTE with segment reject limit reached -- does not reach reject limit CREATE EXTERNAL TABLE exttab_cte_1( i int, j text ) -LOCATION ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/exttab_few_errors.data') FORMAT 'TEXT' (DELIMITER '|') +LOCATION (:'exttab_few_errors_file') FORMAT 'TEXT' (DELIMITER '|') LOG ERRORS SEGMENT REJECT LIMIT 10; -- reaches reject limit, use the same err table CREATE EXTERNAL TABLE exttab_cte_2( i int, j text ) -LOCATION ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/exttab_more_errors.data') FORMAT 'TEXT' (DELIMITER '|') +LOCATION (:'exttab_more_errors_file') FORMAT 'TEXT' (DELIMITER '|') LOG ERRORS SEGMENT REJECT LIMIT 2; with cte1 as ( @@ -878,11 +954,11 @@ NOTICE: found 4 data formatting errors (4 or more input rows), rejected related -- Check permissions with gp_truncate_error_log and gp_read_error_log -- does not reach reject limit CREATE EXTERNAL TABLE exttab_permissions_1( i int, j text ) -LOCATION ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/exttab_few_errors.data') FORMAT 'TEXT' (DELIMITER '|') +LOCATION (:'exttab_few_errors_file') FORMAT 'TEXT' (DELIMITER '|') LOG ERRORS SEGMENT REJECT LIMIT 10; -- reaches reject limit CREATE EXTERNAL TABLE exttab_permissions_2( i int, j text ) -LOCATION ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/exttab_more_errors.data') FORMAT 'TEXT' (DELIMITER '|') +LOCATION (:'exttab_more_errors_file') FORMAT 'TEXT' (DELIMITER '|') LOG ERRORS SEGMENT REJECT LIMIT 2; -- generate some error logs SELECT COUNT(*) FROM exttab_permissions_1; @@ -971,7 +1047,7 @@ CREATE DATABASE exttab_db WITH OWNER=exttab_user1; -- generate some error logs in this db NOTICE: table "exttab_permissions_1" does not exist, skipping CREATE EXTERNAL TABLE exttab_permissions_1( i int, j text ) -LOCATION ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/exttab_few_errors.data') FORMAT 'TEXT' (DELIMITER '|') +LOCATION (:'exttab_few_errors_file') FORMAT 'TEXT' (DELIMITER '|') LOG ERRORS SEGMENT REJECT LIMIT 10; SELECT COUNT(*) FROM exttab_permissions_1 e1, exttab_permissions_1 e2; NOTICE: found 4 data formatting errors (4 or more input rows), rejected related input data @@ -1025,7 +1101,7 @@ CREATE ROLE errlog_exttab_user4 WITH NOSUPERUSER LOGIN; NOTICE: resource queue required -- using default resource queue "pg_default" -- generate some error logs in this db CREATE EXTERNAL TABLE exttab_permissions_3( i int, j text ) -LOCATION ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/exttab_few_errors.data') FORMAT 'TEXT' (DELIMITER '|') +LOCATION (:'exttab_few_errors_file') FORMAT 'TEXT' (DELIMITER '|') LOG ERRORS SEGMENT REJECT LIMIT 10; SELECT COUNT(*) FROM exttab_permissions_3 e1, exttab_permissions_3 e2; NOTICE: found 4 data formatting errors (4 or more input rows), rejected related input data @@ -1092,11 +1168,11 @@ SELECT * FROM gp_read_error_log('exttab_permissions_3'); -- Subqueries reaching segment reject limit -- does not reach reject limit CREATE EXTERNAL TABLE exttab_subq_1( i int, j text ) -LOCATION ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/exttab_few_errors.data') FORMAT 'TEXT' (DELIMITER '|') +LOCATION (:'exttab_few_errors_file') FORMAT 'TEXT' (DELIMITER '|') LOG ERRORS SEGMENT REJECT LIMIT 10; -- reaches reject limit, use the same err table CREATE EXTERNAL TABLE exttab_subq_2( i int, j text ) -LOCATION ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/exttab_more_errors.data') FORMAT 'TEXT' (DELIMITER '|') +LOCATION (:'exttab_more_errors_file') FORMAT 'TEXT' (DELIMITER '|') LOG ERRORS SEGMENT REJECT LIMIT 2; SELECT sum(distinct e1.i), sum(distinct e2.i), e1.j FROM (SELECT i, j FROM exttab_subq_1 WHERE i < 5 ) e1, @@ -1296,11 +1372,12 @@ SELECT * FROM gp_read_error_log('exttab_subq_2') -- TRUNCATE / delete / write to error logs within subtransactions -- does not reach reject limit CREATE EXTERNAL TABLE exttab_subtxs_1( i int, j text ) -LOCATION ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/exttab_few_errors.data') FORMAT 'TEXT' (DELIMITER '|') +LOCATION (:'exttab_few_errors_file') FORMAT 'TEXT' (DELIMITER '|') LOG ERRORS SEGMENT REJECT LIMIT 10; -- reaches reject limit, use the same err table +\set exttab_more_errors_file2 'file://' :hostname ':' :abs_srcdir '/data/exttab_more_errors.data' CREATE EXTERNAL TABLE exttab_subtxs_2( i int, j text ) -LOCATION ('file://gpadmin:/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/exttab_more_errors.data') FORMAT 'TEXT' (DELIMITER '|') +LOCATION (:'exttab_more_errors_file2') FORMAT 'TEXT' (DELIMITER '|') LOG ERRORS SEGMENT REJECT LIMIT 2; -- Populate error logs before transaction SELECT e1.i, e2.j FROM @@ -1431,11 +1508,11 @@ SELECT * FROM gp_read_error_log('exttab_subtxs_2') -- TRUNCATE error logs within tx , abort transaction -- does not reach reject limit CREATE EXTERNAL TABLE exttab_txs_1( i int, j text ) -LOCATION ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/exttab_few_errors.data') FORMAT 'TEXT' (DELIMITER '|') +LOCATION (:'exttab_few_errors_file') FORMAT 'TEXT' (DELIMITER '|') LOG ERRORS SEGMENT REJECT LIMIT 10; -- reaches reject limit, use the same err table CREATE EXTERNAL TABLE exttab_txs_2( i int, j text ) -LOCATION ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/exttab_more_errors.data') FORMAT 'TEXT' (DELIMITER '|') +LOCATION (:'exttab_more_errors_file') FORMAT 'TEXT' (DELIMITER '|') LOG ERRORS SEGMENT REJECT LIMIT 2; -- Populate error log before transaction SELECT e1.i, e2.j FROM @@ -1587,11 +1664,11 @@ BEGIN; -- create an external table that will reach segment reject limit -- reaches reject limit CREATE EXTERNAL TABLE exttab_txs_3( i int, j text ) -LOCATION ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/exttab_more_errors.data') FORMAT 'TEXT' (DELIMITER '|') +LOCATION (:'exttab_more_errors_file') FORMAT 'TEXT' (DELIMITER '|') LOG ERRORS SEGMENT REJECT LIMIT 2; -- new error log, within segment reject limit CREATE EXTERNAL TABLE exttab_txs_4( i int, j text ) -LOCATION ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/exttab_few_errors.data') FORMAT 'TEXT' (DELIMITER '|') +LOCATION (:'exttab_few_errors_file') FORMAT 'TEXT' (DELIMITER '|') LOG ERRORS SEGMENT REJECT LIMIT 10; SELECT e1.i, e2.j FROM (SELECT i, j FROM exttab_txs_4 WHERE i < 5 ) e1, @@ -1646,11 +1723,11 @@ LINE 1: SELECT count(*) FROM exttab_txs_4; -- UDFS with segment reject limit reached -- does not reach reject limit CREATE EXTERNAL TABLE exttab_udfs_1( i int, j text ) -LOCATION ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/exttab_few_errors.data') FORMAT 'TEXT' (DELIMITER '|') +LOCATION (:'exttab_few_errors_file') FORMAT 'TEXT' (DELIMITER '|') LOG ERRORS SEGMENT REJECT LIMIT 10; -- reaches reject limit, use the same err table CREATE EXTERNAL TABLE exttab_udfs_2( i int, j text ) -LOCATION ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/exttab_more_errors.data') FORMAT 'TEXT' (DELIMITER '|') +LOCATION (:'exttab_more_errors_file') FORMAT 'TEXT' (DELIMITER '|') LOG ERRORS SEGMENT REJECT LIMIT 2; CREATE OR REPLACE FUNCTION exttab_udfs_func1 () RETURNS boolean @@ -1905,11 +1982,11 @@ NOTICE: foreign table "exttab_union_1" does not exist, skipping DROP EXTERNAL TABLE IF EXISTS exttab_union_2; NOTICE: foreign table "exttab_union_2" does not exist, skipping CREATE EXTERNAL TABLE exttab_union_1( i int, j text ) -LOCATION ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/exttab_few_errors.data') FORMAT 'TEXT' (DELIMITER '|') +LOCATION (:'exttab_few_errors_file') FORMAT 'TEXT' (DELIMITER '|') LOG ERRORS SEGMENT REJECT LIMIT 10; -- reaches reject limit CREATE EXTERNAL TABLE exttab_union_2( i int, j text ) -LOCATION ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/exttab_more_errors.data') FORMAT 'TEXT' (DELIMITER '|') +LOCATION (:'exttab_more_errors_file') FORMAT 'TEXT' (DELIMITER '|') LOG ERRORS SEGMENT REJECT LIMIT 2; -- Should error out as exttab_union_2 would reach it's reject limit SELECT * FROM @@ -2015,11 +2092,11 @@ DROP EXTERNAL TABLE IF EXISTS exttab_views_2 CASCADE; NOTICE: foreign table "exttab_views_2" does not exist, skipping -- does not reach reject limit CREATE EXTERNAL TABLE exttab_views_1( i int, j text ) -LOCATION ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/exttab_few_errors.data') FORMAT 'TEXT' (DELIMITER '|') +LOCATION (:'exttab_few_errors_file') FORMAT 'TEXT' (DELIMITER '|') LOG ERRORS SEGMENT REJECT LIMIT 10; -- reaches reject limit, use the same err table CREATE EXTERNAL TABLE exttab_views_2( i int, j text ) -LOCATION ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/exttab_more_errors.data') FORMAT 'TEXT' (DELIMITER '|') +LOCATION (:'exttab_more_errors_file') FORMAT 'TEXT' (DELIMITER '|') LOG ERRORS SEGMENT REJECT LIMIT 2; DROP VIEW IF EXISTS exttab_views_3; NOTICE: view "exttab_views_3" does not exist, skipping @@ -2182,11 +2259,11 @@ DROP EXTERNAL TABLE IF EXISTS exttab_windows_2; NOTICE: foreign table "exttab_windows_2" does not exist, skipping -- does not reach reject limit CREATE EXTERNAL TABLE exttab_windows_1( i int, j text ) -LOCATION ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/exttab_few_errors.data') FORMAT 'TEXT' (DELIMITER '|') +LOCATION (:'exttab_few_errors_file') FORMAT 'TEXT' (DELIMITER '|') LOG ERRORS SEGMENT REJECT LIMIT 10; -- reaches reject limit CREATE EXTERNAL TABLE exttab_windows_2( i int, j text ) -LOCATION ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/exttab_more_errors.data') FORMAT 'TEXT' (DELIMITER '|') +LOCATION (:'exttab_more_errors_file') FORMAT 'TEXT' (DELIMITER '|') LOG ERRORS SEGMENT REJECT LIMIT 2; -- without reaching segment reject limit with cte1 as( @@ -2292,11 +2369,11 @@ DROP EXTERNAL TABLE IF EXISTS exttab_limit_2 cascade; NOTICE: foreign table "exttab_limit_2" does not exist, skipping -- does not reach reject limit CREATE EXTERNAL TABLE exttab_limit_1( i int, j text ) -LOCATION ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/exttab_few_errors.data') FORMAT 'TEXT' (DELIMITER '|') +LOCATION (:'exttab_few_errors_file') FORMAT 'TEXT' (DELIMITER '|') LOG ERRORS SEGMENT REJECT LIMIT 10; -- reaches reject limit, use the same err table CREATE EXTERNAL TABLE exttab_limit_2( i int, j text ) -LOCATION ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/exttab_more_errors.data') FORMAT 'TEXT' (DELIMITER '|') +LOCATION (:'exttab_more_errors_file') FORMAT 'TEXT' (DELIMITER '|') LOG ERRORS SEGMENT REJECT LIMIT 2; -- Note that even though we use exttab_limit_2 here , the LIMIT 3 will not throw a segment reject limit error with cte1 as @@ -2475,6 +2552,7 @@ SELECT * FROM gp_read_error_log('exttab_limit_2') -- the data. If there is a valid row within the first 'n' rows specified by -- this guc, the database continues to load the data. -- default should be 1000 +\set exttab_first_errors_file 'file://' :hostname :abs_srcdir '/data/exttab_first_errors.data' SHOW gp_initial_bad_row_limit; gp_initial_bad_row_limit -------------------------- @@ -2484,7 +2562,7 @@ SHOW gp_initial_bad_row_limit; DROP EXTERNAL TABLE IF EXISTS exttab_first_reject_limit_1 cascade; NOTICE: foreign table "exttab_first_reject_limit_1" does not exist, skipping CREATE EXTERNAL TABLE exttab_first_reject_limit_1( i int, j text ) -LOCATION ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/exttab_first_errors.data') FORMAT 'TEXT' (DELIMITER '|') +LOCATION (:'exttab_first_errors_file') FORMAT 'TEXT' (DELIMITER '|') LOG ERRORS SEGMENT REJECT LIMIT 20000; -- should fail with an appropriate error message SELECT COUNT(*) FROM exttab_first_reject_limit_1; @@ -2522,7 +2600,7 @@ SELECT COUNT(*) FROM gp_read_error_log('exttab_first_reject_limit_1'); DROP EXTERNAL TABLE IF EXISTS exttab_first_reject_limit_2; NOTICE: foreign table "exttab_first_reject_limit_2" does not exist, skipping CREATE EXTERNAL TABLE exttab_first_reject_limit_2( i int, j text ) -LOCATION ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/exttab_first_errors.data') FORMAT 'TEXT' (DELIMITER '|') +LOCATION (:'exttab_first_errors_file') FORMAT 'TEXT' (DELIMITER '|') LOG ERRORS SEGMENT REJECT LIMIT 500; -- should report an error saying first rows were rejected SET gp_initial_bad_row_limit = 2; @@ -2576,7 +2654,7 @@ DROP EXTERNAL TABLE IF EXISTS exttab_heap_join_1; NOTICE: foreign table "exttab_heap_join_1" does not exist, skipping -- does not reach reject limit CREATE EXTERNAL TABLE exttab_heap_join_1( i int, j text ) -LOCATION ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/exttab_few_errors.data') FORMAT 'TEXT' (DELIMITER '|') +LOCATION (:'exttab_few_errors_file') FORMAT 'TEXT' (DELIMITER '|') LOG ERRORS SEGMENT REJECT LIMIT 10; DROP TABLE IF EXISTS test_ext_heap_join; NOTICE: table "test_ext_heap_join" does not exist, skipping @@ -2599,14 +2677,14 @@ SELECT COUNT(*) FROM gp_read_error_log('exttab_heap_join_1'); 2 (1 row) -\! rm /home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/tableless.csv +\! rm $PG_ABS_SRCDIR/data/tableless.csv -- start_ignore DROP EXTERNAL TABLE IF EXISTS exttab_with_on_coordinator; -NOTICE: table "exttab_with_on_coordinator" does not exist, skipping +NOTICE: foreign table "exttab_with_on_coordinator" does not exist, skipping -- end_ignore -- Create external table with on clause CREATE EXTERNAL TABLE exttab_with_on_coordinator( i int, j text ) -LOCATION ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/exttab_few_errors.data') ON COORDINATOR FORMAT 'TEXT' (DELIMITER '|'); +LOCATION (:'exttab_few_errors_file') ON COORDINATOR FORMAT 'TEXT' (DELIMITER '|'); SELECT * FROM exttab_with_on_coordinator; ERROR: 'ON COORDINATOR' is not supported by this protocol yet DROP EXTERNAL TABLE IF EXISTS exttab_with_on_coordinator; @@ -2620,13 +2698,13 @@ NOTICE: foreign table "exttab_with_options" does not exist, skipping -- end_ignore -- Create external table with 'OPTIONS' CREATE EXTERNAL TABLE exttab_with_option_empty( i int, j text ) -LOCATION ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/exttab_few_errors.data') FORMAT 'TEXT' (DELIMITER '|') +LOCATION (:'exttab_few_errors_file') FORMAT 'TEXT' (DELIMITER '|') OPTIONS (); CREATE EXTERNAL TABLE exttab_with_option_1( i int, j text ) -LOCATION ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/exttab_few_errors.data') FORMAT 'TEXT' (DELIMITER '|') +LOCATION (:'exttab_few_errors_file') FORMAT 'TEXT' (DELIMITER '|') OPTIONS (hello 'world'); CREATE EXTERNAL TABLE exttab_with_options( i int, j text ) -LOCATION ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/exttab_few_errors.data') FORMAT 'TEXT' (DELIMITER '|') +LOCATION (:'exttab_few_errors_file') FORMAT 'TEXT' (DELIMITER '|') OPTIONS (hello 'world', bonjour 'again', nihao 'again and again' ); \d exttab_with_options Foreign table "public.exttab_with_options" @@ -2634,7 +2712,7 @@ OPTIONS (hello 'world', bonjour 'again', nihao 'again and again' ); --------+---------+-----------+----------+---------+------------- i | integer | | | | j | text | | | | -FDW options: (format 'text', delimiter '|', "null" E'\\N', escape E'\\', hello 'world', bonjour 'again', nihao 'again and again', format_type 't', location_uris 'file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/exttab_few_errors.data', execute_on 'ALL_SEGMENTS', log_errors 'f', encoding '6', is_writable 'false') +FDW options: (format 'text', delimiter '|', "null" E'\\N', escape E'\\', hello 'world', bonjour 'again', nihao 'again and again', format_type 't', location_uris 'file:/ABSPATH/src/test/regress/data/exttab_few_errors.data', execute_on 'ALL_SEGMENTS', log_errors 'f', encoding '6', is_writable 'false') \d exttab_with_option_empty Foreign table "public.exttab_with_option_empty" @@ -2642,7 +2720,7 @@ FDW options: (format 'text', delimiter '|', "null" E'\\N', escape E'\\', hello ' --------+---------+-----------+----------+---------+------------- i | integer | | | | j | text | | | | -FDW options: (format 'text', delimiter '|', "null" E'\\N', escape E'\\', format_type 't', location_uris 'file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/exttab_few_errors.data', execute_on 'ALL_SEGMENTS', log_errors 'f', encoding '6', is_writable 'false') +FDW options: (format 'text', delimiter '|', "null" E'\\N', escape E'\\', format_type 't', location_uris 'file:/ABSPATH/src/test/regress/data/exttab_few_errors.data', execute_on 'ALL_SEGMENTS', log_errors 'f', encoding '6', is_writable 'false') DROP EXTERNAL TABLE IF EXISTS exttab_with_option_empty; DROP EXTERNAL TABLE IF EXISTS exttab_with_option_1; @@ -2660,16 +2738,22 @@ DROP EXTERNAL TABLE IF EXISTS tbl_wet_csv5; NOTICE: foreign table "tbl_wet_csv5" does not exist, skipping -- end_ignore -- Create writable external table with AS for DELIMITER , NULL, ESCAPE -CREATE WRITABLE EXTERNAL WEB TABLE tbl_wet_csv1 (a int, b text) EXECUTE 'cat > /home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/wet_csv1.tbl' FORMAT 'CSV' (DELIMITER AS '|' NULL AS 'null' ESCAPE AS ' '); +\set wet_csv1_file 'cat > ' :abs_srcdir '/data/wet_csv1.tbl' +CREATE WRITABLE EXTERNAL WEB TABLE tbl_wet_csv1 (a int, b text) EXECUTE :'wet_csv1_file' FORMAT 'CSV' (DELIMITER AS '|' NULL AS 'null' ESCAPE AS ' '); -- Create writable external table without AS for DELIMITER , NULL, ESCAPE -CREATE WRITABLE EXTERNAL WEB TABLE tbl_wet_csv2 (a int, b text) EXECUTE 'cat > /home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/wet_csv2.tbl' FORMAT 'CSV' (DELIMITER AS ',' NULL 'null' ESCAPE ' '); +\set wet_csv2_file 'cat > ' :abs_srcdir '/data/wet_csv2.tbl' +CREATE WRITABLE EXTERNAL WEB TABLE tbl_wet_csv2 (a int, b text) EXECUTE :'wet_csv2_file' FORMAT 'CSV' (DELIMITER AS ',' NULL 'null' ESCAPE ' '); -- Create writable external table with double quotes -CREATE WRITABLE EXTERNAL WEB TABLE tbl_wet_csv3 (a int, b text) EXECUTE 'cat > /home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/wet_csv3.tbl' FORMAT 'CSV' (DELIMITER AS '|' NULL AS 'null' ESCAPE AS ' ' QUOTE AS '"'); +\set wet_csv3_file 'cat > ' :abs_srcdir '/data/wet_csv3.tbl' +CREATE WRITABLE EXTERNAL WEB TABLE tbl_wet_csv3 (a int, b text) EXECUTE :'wet_csv3_file' FORMAT 'CSV' (DELIMITER AS '|' NULL AS 'null' ESCAPE AS ' ' QUOTE AS '"'); -- Create writable external table with single quotes -CREATE WRITABLE EXTERNAL WEB TABLE tbl_wet_csv4 (a int, b text) EXECUTE 'cat > /home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/wet_csv4.tbl' FORMAT 'CSV' (DELIMITER AS '|' NULL AS 'null' ESCAPE AS ' ' QUOTE AS ''''); +\set wet_csv4_file 'cat > ' :abs_srcdir '/data/wet_csv4.tbl' +CREATE WRITABLE EXTERNAL WEB TABLE tbl_wet_csv4 (a int, b text) EXECUTE :'wet_csv4_file' FORMAT 'CSV' (DELIMITER AS '|' NULL AS 'null' ESCAPE AS ' ' QUOTE AS ''''); -- Create writable external table with force quote -CREATE WRITABLE EXTERNAL WEB TABLE tbl_wet_csv5 (a int, b text) EXECUTE 'cat > /home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/wet_csv5.tbl' FORMAT 'CSV' (DELIMITER AS '|' NULL AS 'null' ESCAPE AS ' ' QUOTE AS '"' FORCE QUOTE b); -CREATE WRITABLE EXTERNAL WEB TABLE tbl_wet_csv6 (a int, b text, c text) EXECUTE 'cat > /home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/wet_csv6.tbl' FORMAT 'CSV' (DELIMITER AS '|' NULL AS 'null' ESCAPE AS ' ' QUOTE AS '"' FORCE QUOTE *); +\set wet_csv5_file 'cat > ' :abs_srcdir '/data/wet_csv5.tbl' +CREATE WRITABLE EXTERNAL WEB TABLE tbl_wet_csv5 (a int, b text) EXECUTE :'wet_csv5_file' FORMAT 'CSV' (DELIMITER AS '|' NULL AS 'null' ESCAPE AS ' ' QUOTE AS '"' FORCE QUOTE b); +\set wet_csv6_file 'cat > ' :abs_srcdir '/data/wet_csv6.tbl' +CREATE WRITABLE EXTERNAL WEB TABLE tbl_wet_csv6 (a int, b text, c text) EXECUTE :'wet_csv6_file' FORMAT 'CSV' (DELIMITER AS '|' NULL AS 'null' ESCAPE AS ' ' QUOTE AS '"' FORCE QUOTE *); INSERT INTO tbl_wet_csv1 VALUES (generate_series(1,256), 'test_1'); INSERT INTO tbl_wet_csv2 VALUES (generate_series(1,256), 'test_2'); INSERT INTO tbl_wet_csv3 VALUES (generate_series(1,256), 'test_3'); @@ -2684,18 +2768,21 @@ DROP EXTERNAL TABLE IF EXISTS tbl_wet_csv5; DROP EXTERNAL TABLE IF EXISTS tbl_wet_csv6; -- start_ignore DROP EXTERNAL TABLE IF EXISTS tbl_wet_text1; -NOTICE: table "tbl_wet_text1" does not exist, skipping +NOTICE: foreign table "tbl_wet_text1" does not exist, skipping DROP EXTERNAL TABLE IF EXISTS tbl_wet_text2; -NOTICE: table "tbl_wet_text2" does not exist, skipping +NOTICE: foreign table "tbl_wet_text2" does not exist, skipping DROP EXTERNAL TABLE IF EXISTS tbl_wet_text3; -NOTICE: table "tbl_wet_text3" does not exist, skipping +NOTICE: foreign table "tbl_wet_text3" does not exist, skipping -- end_ignore -- Create writable external table with AS for DELIMITER , NULL, ESCAPE -CREATE WRITABLE EXTERNAL WEB TABLE tbl_wet_text1 (a int, b text) EXECUTE 'cat > /home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/wet_text1.tbl' FORMAT 'TEXT' (DELIMITER AS '|' NULL AS 'null' ESCAPE AS ' '); +\set wet_text1_file 'cat > ' :abs_srcdir '/data/wet_text1.tbl' +CREATE WRITABLE EXTERNAL WEB TABLE tbl_wet_text1 (a int, b text) EXECUTE :'wet_text1_file' FORMAT 'TEXT' (DELIMITER AS '|' NULL AS 'null' ESCAPE AS ' '); -- Create writable external table without AS for DELIMITER , NULL, ESCAPE -CREATE WRITABLE EXTERNAL WEB TABLE tbl_wet_text2 (a int, b text) EXECUTE 'cat > /home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/wet_text2.tbl' FORMAT 'TEXT' (DELIMITER AS ',' NULL 'null' ESCAPE ' '); +\set wet_text2_file 'cat > ' :abs_srcdir '/data/wet_text2.tbl' +CREATE WRITABLE EXTERNAL WEB TABLE tbl_wet_text2 (a int, b text) EXECUTE :'wet_text2_file' FORMAT 'TEXT' (DELIMITER AS ',' NULL 'null' ESCAPE ' '); -- Create writable external table with ESCAPE OFF -CREATE WRITABLE EXTERNAL WEB TABLE tbl_wet_text3 (a int, b text) EXECUTE 'cat > /home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/wet_text3.tbl' FORMAT 'TEXT' (DELIMITER AS '|' NULL AS 'null' ESCAPE 'OFF'); +\set wet_text3_file 'cat > ' :abs_srcdir '/data/wet_text3.tbl' +CREATE WRITABLE EXTERNAL WEB TABLE tbl_wet_text3 (a int, b text) EXECUTE :'wet_text3_file' FORMAT 'TEXT' (DELIMITER AS '|' NULL AS 'null' ESCAPE 'OFF'); INSERT INTO tbl_wet_text1 VALUES (generate_series(1,256), 'test_1'); INSERT INTO tbl_wet_text2 VALUES (generate_series(1,256), 'test_2'); INSERT INTO tbl_wet_text3 VALUES (generate_series(1,256), 'test_3'); @@ -2708,17 +2795,20 @@ NOTICE: table "test_dp1" does not exist, skipping DROP TABLE IF EXISTS test_dp2; NOTICE: table "test_dp2" does not exist, skipping DROP EXTERNAL TABLE IF EXISTS tbl_wet_syntax1; -NOTICE: table "tbl_wet_syntax1" does not exist, skipping +NOTICE: foreign table "tbl_wet_syntax1" does not exist, skipping DROP EXTERNAL TABLE IF EXISTS tbl_wet_syntax2; -NOTICE: table "tbl_wet_syntax2" does not exist, skipping +NOTICE: foreign table "tbl_wet_syntax2" does not exist, skipping DROP EXTERNAL TABLE IF EXISTS tbl_wet_syntax3; -NOTICE: table "tbl_wet_syntax3" does not exist, skipping +NOTICE: foreign table "tbl_wet_syntax3" does not exist, skipping -- end_ignore CREATE TABLE test_dp1 (a int, b text) DISTRIBUTED RANDOMLY; CREATE TABLE test_dp2 (a int, b text) DISTRIBUTED BY (b); -CREATE WRITABLE EXTERNAL WEB TABLE tbl_wet_syntax1 (like test_dp1) EXECUTE 'cat > /home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/wet_syntax1.tbl' FORMAT 'TEXT' (DELIMITER '|' ) DISTRIBUTED BY (a); -CREATE WRITABLE EXTERNAL WEB TABLE tbl_wet_syntax2 (like test_dp2) EXECUTE 'cat > /home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/wet_syntax2.tbl' FORMAT 'TEXT' (DELIMITER '|' ) DISTRIBUTED BY (a); -CREATE WRITABLE EXTERNAL WEB TABLE tbl_wet_syntax3 (like test_dp2) EXECUTE 'cat > /home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/wet_syntax3.tbl' FORMAT 'TEXT' (DELIMITER '|' ) DISTRIBUTED RANDOMLY; +\set wet_syntax1_file 'cat > ' :abs_srcdir '/data/wet_syntax1.tbl' +CREATE WRITABLE EXTERNAL WEB TABLE tbl_wet_syntax1 (like test_dp1) EXECUTE :'wet_syntax1_file' FORMAT 'TEXT' (DELIMITER '|' ) DISTRIBUTED BY (a); +\set wet_syntax2_file 'cat > ' :abs_srcdir '/data/wet_syntax2.tbl' +CREATE WRITABLE EXTERNAL WEB TABLE tbl_wet_syntax2 (like test_dp2) EXECUTE :'wet_syntax2_file' FORMAT 'TEXT' (DELIMITER '|' ) DISTRIBUTED BY (a); +\set wet_syntax3_file 'cat > ' :abs_srcdir '/data/wet_syntax3.tbl' +CREATE WRITABLE EXTERNAL WEB TABLE tbl_wet_syntax3 (like test_dp2) EXECUTE :'wet_syntax3_file' FORMAT 'TEXT' (DELIMITER '|' ) DISTRIBUTED RANDOMLY; INSERT INTO tbl_wet_syntax1 VALUES (generate_series(1,256), 'test_1'); INSERT INTO tbl_wet_syntax2 VALUES (generate_series(1,256), 'test_2'); INSERT INTO tbl_wet_syntax3 VALUES (generate_series(1,256), 'test_3'); @@ -2737,7 +2827,8 @@ CREATE TABLE table_execute (id integer, name varchar(40)) DISTRIBUTED RANDOMLY; INSERT INTO table_execute VALUES (100, 'name_1'); INSERT INTO table_execute VALUES (200, 'name_2'); INSERT INTO table_execute VALUES (300, 'name_3'); -CREATE WRITABLE EXTERNAL WEB TABLE tbl_wet_execute (like table_execute) EXECUTE 'cat > /home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/wet_execute.tbl' FORMAT 'TEXT' (DELIMITER '|' ); +\set wet_execute_file 'cat > ' :abs_srcdir '/data/wet_execute.tbl' +CREATE WRITABLE EXTERNAL WEB TABLE tbl_wet_execute (like table_execute) EXECUTE :'wet_execute_file' FORMAT 'TEXT' (DELIMITER '|' ); NOTICE: table doesn't have 'DISTRIBUTED BY' clause, defaulting to distribution columns from LIKE table INSERT INTO tbl_wet_execute SELECT * from table_execute ; DROP TABLE IF EXISTS table_execute; @@ -2770,14 +2861,15 @@ DROP PROTOCOL if exists demoprot; NOTICE: protocol "demoprot" does not exist, skipping DROP PROTOCOL if exists demoprot2; -- create external protocol with a serial type column +\set serial_file 'file://' :hostname :abs_srcdir '/data/no/such/place/serial.tbl' CREATE EXTERNAL TABLE SERIAL (a serial, x text) -LOCATION ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/no/such/place/serial.tbl') +LOCATION (:'serial_file') FORMAT 'csv'; -- drop temp external table DROP EXTERNAL TABLE IF EXISTS serial; -- External table query within plpgSQL function get error CREATE EXTERNAL TABLE exttab_error_context_callback(c1 int, c2 int) -LOCATION ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/exttab.data') FORMAT 'TEXT'; +LOCATION (:'exttab_file') FORMAT 'TEXT'; CREATE or REPLACE FUNCTION exttab_error_context_callback_func() RETURNS SETOF INTEGER AS @@ -2804,8 +2896,9 @@ DROP EXTERNAL TABLE exttab_error_context_callback; -- -------------------------------------- -- Encoding -- -------------------------------------- +\set latin1_encoding_file 'file://' :hostname :abs_srcdir '/data/latin1_encoding.csv' CREATE EXTERNAL TABLE encoding_issue (num int, word text) -LOCATION ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/latin1_encoding.csv') +LOCATION (:'latin1_encoding_file') FORMAT 'CSV' ENCODING 'LATIN1'; SELECT * FROM encoding_issue WHERE num = 4; num | word @@ -2814,8 +2907,9 @@ SELECT * FROM encoding_issue WHERE num = 4; (1 row) COPY (SELECT * FROM encoding_issue) TO '/tmp/latin1_encoding.csv' WITH (FORMAT 'csv', ENCODING 'LATIN1'); +\set latin1_encoding_file2 'file://' :hostname '/tmp/latin1_encoding.csv' CREATE EXTERNAL TABLE encoding_issue2 (num int, word text) -LOCATION ('file://gpadmin/tmp/latin1_encoding.csv') +LOCATION (:'latin1_encoding_file2') FORMAT 'CSV' ENCODING 'LATIN1'; SELECT * FROM encoding_issue2 WHERE num = 5; num | word @@ -4698,6 +4792,7 @@ line_delim=E'\n' ; drop external table large_custom_format_definitions; -- Incomplete external data file +\set incomplete_formatter_data 'file://' :hostname :abs_srcdir '/data/incomplete_formatter_data.tbl' CREATE OR REPLACE FUNCTION gpformatter() RETURNS record AS '$libdir/gpformatter.so', 'formatter_import' LANGUAGE C STABLE; @@ -4707,7 +4802,7 @@ CREATE READABLE EXTERNAL TABLE tbl_ext_gpformatter ( d1 text ) LOCATION ( - 'file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/incomplete_formatter_data.tbl' + :'incomplete_formatter_data' ) FORMAT 'CUSTOM' (formatter='gpformatter'); SELECT * FROM tbl_ext_gpformatter; @@ -4884,8 +4979,13 @@ SELECT * FROM test_part_integrity; (4 rows) DROP TABLE test_part_integrity; +-- Testing creating external table with replicated distribution +-- Should report error +CREATE WRITABLE EXTERNAL WEB TABLE ext_dist_repl(a int, b int) EXECUTE 'some command' FORMAT 'TEXT' DISTRIBUTED REPLICATED; +ERROR: external tables can't have DISTRIBUTED REPLICATED clause -- Testing altering the distribution policy of external tables. -CREATE WRITABLE EXTERNAL WEB TABLE ext_w_dist(a int, b int) EXECUTE 'cat > /home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/ext_w_dist.tbl' FORMAT 'TEXT' (DELIMITER AS '|' NULL AS 'null' ESCAPE AS ' ') DISTRIBUTED BY (a); +\set ext_w_dist_file 'cat > ' :abs_srcdir '/data/ext_w_dist.tbl' +CREATE WRITABLE EXTERNAL WEB TABLE ext_w_dist(a int, b int) EXECUTE :'ext_w_dist_file' FORMAT 'TEXT' (DELIMITER AS '|' NULL AS 'null' ESCAPE AS ' ') DISTRIBUTED BY (a); ALTER TABLE ext_w_dist SET WITH (reorganize=true); -- should error out if forcing reorganize ERROR: cannot reorganize external table "ext_w_dist" SELECT policytype, distkey FROM gp_distribution_policy WHERE localoid = 'ext_w_dist'::regclass; @@ -4908,6 +5008,8 @@ SELECT policytype, distkey FROM gp_distribution_policy WHERE localoid = 'ext_w_d p | (1 row) +ALTER TABLE ext_w_dist SET DISTRIBUTED REPLICATED; -- ERROR +ERROR: SET DISTRIBUTED REPLICATED is not supported for external table CREATE EXTERNAL WEB TABLE ext_r_dist(a int) EXECUTE 'printf ${GP_SEGMENT_ID}' FORMAT 'TEXT' DISTRIBUTED BY (a); ERROR: readable external tables can't specify a DISTRIBUTED BY clause CREATE EXTERNAL WEB TABLE ext_r_dist(a int) EXECUTE 'printf ${GP_SEGMENT_ID}' FORMAT 'TEXT'; @@ -4916,8 +5018,9 @@ ERROR: cannot set distribution policy of readable external table "ext_r_dist" -- Testing external table as the partition child. CREATE TABLE part_root(a int) PARTITION BY RANGE(a); CREATE TABLE part_child (LIKE part_root); -CREATE EXTERNAL WEB TABLE part_ext_r(a int) EXECUTE 'cat > /home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/part_ext.tbl' FORMAT 'TEXT' (DELIMITER AS '|' NULL AS 'null' ESCAPE AS ' '); -CREATE WRITABLE EXTERNAL WEB TABLE part_ext_w(a int, b int) EXECUTE 'cat > /home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/part_ext.tbl' FORMAT 'TEXT' (DELIMITER AS '|' NULL AS 'null' ESCAPE AS ' ') DISTRIBUTED BY (a); +\set part_ext_file 'cat > ' :abs_srcdir '/data/part_ext.tbl' +CREATE EXTERNAL WEB TABLE part_ext_r(a int) EXECUTE :'part_ext_file' FORMAT 'TEXT' (DELIMITER AS '|' NULL AS 'null' ESCAPE AS ' '); +CREATE WRITABLE EXTERNAL WEB TABLE part_ext_w(a int, b int) EXECUTE :'part_ext_file' FORMAT 'TEXT' (DELIMITER AS '|' NULL AS 'null' ESCAPE AS ' ') DISTRIBUTED BY (a); ALTER TABLE part_root ATTACH PARTITION part_child FOR VALUES FROM (0) TO (10); ALTER TABLE part_root ATTACH PARTITION part_ext_r FOR VALUES FROM (10) TO (20); NOTICE: partition constraints are not validated when attaching a readable external table @@ -4939,31 +5042,34 @@ SELECT policytype, distkey FROM gp_distribution_policy WHERE localoid = 'part_ex DROP TABLE part_root; -- check logerrors value of pg_exttable +\set ext_fasle_file 'file://' :hostname :abs_srcdir '/data/ext_fasle.tbl' +\set ext_true_file 'file://' :hostname :abs_srcdir '/data/ext_true.tbl' +\set ext_persistently_file 'file://' :hostname :abs_srcdir '/data/ext_persistently.tbl' CREATE EXTERNAL TABLE ext_false (c INT) -location ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/ext_fasle.tbl' ) +location (:'ext_fasle_file' ) FORMAT 'text' (delimiter '|'); CREATE EXTERNAL TABLE ext_true (c INT) -location ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/ext_true.tbl' ) +location (:'ext_true_file' ) FORMAT 'text' (delimiter '|') LOG ERRORS SEGMENT REJECT LIMIT 100; CREATE EXTERNAL TABLE ext_persistently (c INT) -location ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/ext_persistently.tbl' ) +location (:'ext_persistently_file' ) FORMAT 'text' (delimiter '|') LOG ERRORS PERSISTENTLY SEGMENT REJECT LIMIT 100; -SELECT logerrors from pg_exttable a, pg_class b where a.reloid = b.oid and b.relname = 'ext_false'; - logerrors ------------ - f +SELECT logerrors, options from pg_exttable a, pg_class b where a.reloid = b.oid and b.relname = 'ext_false'; + logerrors | options +-----------+--------- + f | (1 row) -SELECT logerrors from pg_exttable a, pg_class b where a.reloid = b.oid and b.relname = 'ext_true'; - logerrors ------------ - t +SELECT logerrors, options from pg_exttable a, pg_class b where a.reloid = b.oid and b.relname = 'ext_true'; + logerrors | options +-----------+--------- + t | (1 row) -SELECT logerrors from pg_exttable a, pg_class b where a.reloid = b.oid and b.relname = 'ext_persistently'; - logerrors ------------ - t +SELECT logerrors, options from pg_exttable a, pg_class b where a.reloid = b.oid and b.relname = 'ext_persistently'; + logerrors | options +-----------+----------------------------- + t | {error_log_persistent=true} (1 row) -- drop tables diff --git a/contrib/pax_storage/src/test/regress/expected/external_table_persistent_error_log.out b/contrib/pax_storage/src/test/regress/expected/external_table_persistent_error_log.out index b43f9df94a7..836a352d5cd 100644 --- a/contrib/pax_storage/src/test/regress/expected/external_table_persistent_error_log.out +++ b/contrib/pax_storage/src/test/regress/expected/external_table_persistent_error_log.out @@ -49,6 +49,8 @@ NOTICE: found 2 data formatting errors (2 or more input rows), rejected related (2 rows) SELECT (gp_read_persistent_error_log('ext_error_persistent')).errmsg; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: unsupported exec location errmsg --------------------------------------- extra data after last expected column @@ -56,29 +58,39 @@ SELECT (gp_read_persistent_error_log('ext_error_persistent')).errmsg; (2 rows) select errmsg from gp_read_error_log('ext_error_persistent'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: unsupported exec location errmsg -------- (0 rows) select * from gp_truncate_error_log('ext_error_persistent'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions gp_truncate_error_log ----------------------- t (1 row) select * from gp_truncate_error_log('*'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions gp_truncate_error_log ----------------------- t (1 row) select * from gp_truncate_error_log('*.*'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions gp_truncate_error_log ----------------------- t (1 row) SELECT (gp_read_persistent_error_log('ext_error_persistent')).errmsg; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: unsupported exec location errmsg --------------------------------------- extra data after last expected column @@ -86,12 +98,16 @@ SELECT (gp_read_persistent_error_log('ext_error_persistent')).errmsg; (2 rows) SELECT gp_truncate_persistent_error_log('ext_error_persistent'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions gp_truncate_persistent_error_log ---------------------------------- t (1 row) SELECT relname, linenum, errmsg FROM gp_read_persistent_error_log('ext_error_persistent'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: unsupported exec location relname | linenum | errmsg ---------+---------+-------- (0 rows) @@ -105,6 +121,8 @@ NOTICE: found 2 data formatting errors (2 or more input rows), rejected related (2 rows) SELECT relname, linenum, errmsg FROM gp_read_persistent_error_log('ext_error_persistent'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: unsupported exec location relname | linenum | errmsg ----------------------+---------+--------------------------------------- ext_error_persistent | 2 | extra data after last expected column @@ -112,12 +130,16 @@ SELECT relname, linenum, errmsg FROM gp_read_persistent_error_log('ext_error_per (2 rows) SELECT gp_truncate_persistent_error_log('*'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions gp_truncate_persistent_error_log ---------------------------------- t (1 row) SELECT relname, linenum, errmsg FROM gp_read_persistent_error_log('ext_error_persistent'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: unsupported exec location relname | linenum | errmsg ---------+---------+-------- (0 rows) @@ -131,6 +153,8 @@ NOTICE: found 2 data formatting errors (2 or more input rows), rejected related (2 rows) SELECT relname, linenum, errmsg FROM gp_read_persistent_error_log('ext_error_persistent'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: unsupported exec location relname | linenum | errmsg ----------------------+---------+--------------------------------------- ext_error_persistent | 2 | extra data after last expected column @@ -138,12 +162,16 @@ SELECT relname, linenum, errmsg FROM gp_read_persistent_error_log('ext_error_per (2 rows) SELECT gp_truncate_persistent_error_log('*.*'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions gp_truncate_persistent_error_log ---------------------------------- t (1 row) SELECT relname, linenum, errmsg FROM gp_read_persistent_error_log('ext_error_persistent'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: unsupported exec location relname | linenum | errmsg ---------+---------+-------- (0 rows) @@ -157,6 +185,8 @@ NOTICE: found 2 data formatting errors (2 or more input rows), rejected related (2 rows) SELECT relname, linenum, errmsg FROM gp_read_persistent_error_log('ext_error_persistent'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: unsupported exec location relname | linenum | errmsg ----------------------+---------+--------------------------------------- ext_error_persistent | 2 | extra data after last expected column @@ -166,6 +196,8 @@ SELECT relname, linenum, errmsg FROM gp_read_persistent_error_log('ext_error_per DROP EXTERNAL TABLE ext_error_persistent; -- error log still exists SELECT relname, linenum, errmsg FROM gp_read_persistent_error_log('ext_error_persistent'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: unsupported exec location relname | linenum | errmsg ----------------------+---------+--------------------------------------- ext_error_persistent | 2 | extra data after last expected column @@ -186,6 +218,8 @@ NOTICE: found 2 data formatting errors (2 or more input rows), rejected related (2 rows) SELECT relname, linenum, errmsg FROM gp_read_error_log('ext_error_persistent'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: unsupported exec location relname | linenum | errmsg ----------------------+---------+--------------------------------------- ext_error_persistent | 2 | extra data after last expected column @@ -194,6 +228,8 @@ SELECT relname, linenum, errmsg FROM gp_read_error_log('ext_error_persistent'); -- persistent error log has no change SELECT relname, linenum, errmsg FROM gp_read_persistent_error_log('ext_error_persistent'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: unsupported exec location relname | linenum | errmsg ----------------------+---------+--------------------------------------- ext_error_persistent | 2 | extra data after last expected column @@ -202,11 +238,15 @@ SELECT relname, linenum, errmsg FROM gp_read_persistent_error_log('ext_error_per DROP EXTERNAL TABLE ext_error_persistent; SELECT relname, linenum, errmsg FROM gp_read_error_log('ext_error_persistent'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: unsupported exec location relname | linenum | errmsg ---------+---------+-------- (0 rows) SELECT relname, linenum, errmsg FROM gp_read_persistent_error_log('ext_error_persistent'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: unsupported exec location relname | linenum | errmsg ----------------------+---------+--------------------------------------- ext_error_persistent | 2 | extra data after last expected column @@ -214,12 +254,16 @@ SELECT relname, linenum, errmsg FROM gp_read_persistent_error_log('ext_error_per (2 rows) SELECT gp_truncate_persistent_error_log('ext_error_persistent'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions gp_truncate_persistent_error_log ---------------------------------- t (1 row) SELECT relname, linenum, errmsg FROM gp_read_persistent_error_log('ext_error_persistent'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: unsupported exec location relname | linenum | errmsg ---------+---------+-------- (0 rows) @@ -241,18 +285,24 @@ NOTICE: found 1 data formatting errors (1 or more input rows), rejected related (3 rows) SELECT relname, linenum, errmsg FROM gp_read_persistent_error_log('ext_bytea'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: unsupported exec location relname | linenum | errmsg -----------+---------+------------------------------------------------ ext_bytea | 4 | invalid hexadecimal digit: "T", column content (1 row) SELECT gp_truncate_persistent_error_log('ext_bytea'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions gp_truncate_persistent_error_log ---------------------------------- t (1 row) SELECT relname, linenum, errmsg FROM gp_read_persistent_error_log('ext_bytea'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: unsupported exec location relname | linenum | errmsg ---------+---------+-------- (0 rows) diff --git a/contrib/pax_storage/src/test/regress/expected/external_table_union_all_optimizer.out b/contrib/pax_storage/src/test/regress/expected/external_table_union_all_optimizer.out index 4a268a80a7e..cd8c4737860 100644 --- a/contrib/pax_storage/src/test/regress/expected/external_table_union_all_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/external_table_union_all_optimizer.out @@ -1,9 +1,15 @@ +SET optimizer_trace_fallback=on; -- Test external table as left child of union all with replicated table +\getenv abs_srcdir PG_ABS_SRCDIR +\getenv hostname PG_HOSTNAME +\set location1_file 'file://' :hostname :abs_srcdir '/data/location1.csv' +\set location2_file 'file://' :hostname :abs_srcdir '/data/location2.csv' +\set location3_file 'file://' :hostname :abs_srcdir '/data/location3.csv' CREATE EXTERNAL TABLE multilocation_external_table(a INTEGER) -location ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/location1.csv', 'file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/location2.csv', 'file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/location3.csv') +location (:'location1_file', :'location2_file', :'location3_file') ON ALL FORMAT 'text'; CREATE EXTERNAL TABLE one_external_table(a INTEGER) -location ('file://gpadmin/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/data/location2.csv') +location (:'location2_file') ON SEGMENT 2 FORMAT 'text'; CREATE TABLE simple_replicated_table(a integer) DISTRIBUTED REPLICATED; INSERT INTO simple_replicated_table VALUES (1); diff --git a/contrib/pax_storage/src/test/regress/expected/function_extensions_optimizer.out b/contrib/pax_storage/src/test/regress/expected/function_extensions_optimizer.out index fd9c026044d..ce5ce3cb927 100644 --- a/contrib/pax_storage/src/test/regress/expected/function_extensions_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/function_extensions_optimizer.out @@ -391,50 +391,34 @@ explain select * from srf_testtab, test_srf() where test_srf = srf_testtab.t; -- Test ALTER FUNCTION, and that \df displays the EXECUTE ON correctly \df+ test_srf - List of functions - Schema | Name | Result data type | Argument data types | Type | Data access | Execute on | Volatility | Parallel | Owner | Security | Access privileges | Language | Source code | Description ---------+----------+------------------+---------------------+------+-------------+------------+------------+----------+-------------+----------+-------------------+----------+----------------------+------------- - public | test_srf | SETOF text | | func | no sql | any | immutable | unsafe | srftestuser | invoker | | plpgsql | +| - | | | | | | | | | | | | | begin +| - | | | | | | | | | | | | | return next 'foo';+| - | | | | | | | | | | | | | end; +| - | | | | | | | | | | | | | | + List of functions + Schema | Name | Result data type | Argument data types | Type | Data access | Execute on | Volatility | Parallel | Owner | Security | Access privileges | Language | Internal name | Description +--------+----------+------------------+---------------------+------+-------------+------------+------------+----------+-------------+----------+-------------------+----------+---------------+------------- + public | test_srf | SETOF text | | func | no sql | any | immutable | unsafe | srftestuser | invoker | | plpgsql | | (1 row) alter function test_srf() EXECUTE ON COORDINATOR; \df+ test_srf - List of functions - Schema | Name | Result data type | Argument data types | Type | Data access | Execute on | Volatility | Parallel | Owner | Security | Access privileges | Language | Source code | Description ---------+----------+------------------+---------------------+------+-------------+-------------+------------+----------+-------------+----------+-------------------+----------+----------------------+------------- - public | test_srf | SETOF text | | func | no sql | coordinator | immutable | unsafe | srftestuser | invoker | | plpgsql | +| - | | | | | | | | | | | | | begin +| - | | | | | | | | | | | | | return next 'foo';+| - | | | | | | | | | | | | | end; +| - | | | | | | | | | | | | | | + List of functions + Schema | Name | Result data type | Argument data types | Type | Data access | Execute on | Volatility | Parallel | Owner | Security | Access privileges | Language | Internal name | Description +--------+----------+------------------+---------------------+------+-------------+-------------+------------+----------+-------------+----------+-------------------+----------+---------------+------------- + public | test_srf | SETOF text | | func | no sql | coordinator | immutable | unsafe | srftestuser | invoker | | plpgsql | | (1 row) alter function test_srf() EXECUTE ON ALL SEGMENTS; \df+ test_srf - List of functions - Schema | Name | Result data type | Argument data types | Type | Data access | Execute on | Volatility | Parallel | Owner | Security | Access privileges | Language | Source code | Description ---------+----------+------------------+---------------------+------+-------------+--------------+------------+----------+-------------+----------+-------------------+----------+----------------------+------------- - public | test_srf | SETOF text | | func | no sql | all segments | immutable | unsafe | srftestuser | invoker | | plpgsql | +| - | | | | | | | | | | | | | begin +| - | | | | | | | | | | | | | return next 'foo';+| - | | | | | | | | | | | | | end; +| - | | | | | | | | | | | | | | + List of functions + Schema | Name | Result data type | Argument data types | Type | Data access | Execute on | Volatility | Parallel | Owner | Security | Access privileges | Language | Internal name | Description +--------+----------+------------------+---------------------+------+-------------+--------------+------------+----------+-------------+----------+-------------------+----------+---------------+------------- + public | test_srf | SETOF text | | func | no sql | all segments | immutable | unsafe | srftestuser | invoker | | plpgsql | | (1 row) alter function test_srf() EXECUTE ON ANY; \df+ test_srf - List of functions - Schema | Name | Result data type | Argument data types | Type | Data access | Execute on | Volatility | Parallel | Owner | Security | Access privileges | Language | Source code | Description ---------+----------+------------------+---------------------+------+-------------+------------+------------+----------+-------------+----------+-------------------+----------+----------------------+------------- - public | test_srf | SETOF text | | func | no sql | any | immutable | unsafe | srftestuser | invoker | | plpgsql | +| - | | | | | | | | | | | | | begin +| - | | | | | | | | | | | | | return next 'foo';+| - | | | | | | | | | | | | | end; +| - | | | | | | | | | | | | | | + List of functions + Schema | Name | Result data type | Argument data types | Type | Data access | Execute on | Volatility | Parallel | Owner | Security | Access privileges | Language | Internal name | Description +--------+----------+------------------+---------------------+------+-------------+------------+------------+----------+-------------+----------+-------------------+----------+---------------+------------- + public | test_srf | SETOF text | | func | no sql | any | immutable | unsafe | srftestuser | invoker | | plpgsql | | (1 row) DROP FUNCTION test_srf(); @@ -464,17 +448,14 @@ NOTICE: unique_violation -- the function on initplan to overcome the above issue. -- Helper function to count the number of temporary files in -- pgsql_tmp. -CREATE or replace FUNCTION get_temp_file_num() returns text as +CREATE or replace FUNCTION get_temp_file_num() returns int as $$ import os fileNum = 0 -dirNum = 0 for root, directories, filenames in os.walk('base/pgsql_tmp'): for filename in filenames: fileNum += 1 - for dir in directories: - dirNum += 1 -return '{} files and {} directories'.format(fileNum, dirNum) +return fileNum $$ language plpython3u; CREATE OR REPLACE FUNCTION get_country() RETURNS TABLE ( @@ -500,12 +481,8 @@ AS $$ end; $$ LANGUAGE 'plpgsql' EXECUTE ON INITPLAN; -- Temp file number before running INITPLAN function -SELECT get_temp_file_num(); - get_temp_file_num ---------------------------- - 0 files and 0 directories -(1 row) - +SELECT get_temp_file_num() AS num_temp_files_before +\gset SELECT * FROM get_country(); NOTICE: table "country" does not exist, skipping NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'country_id' as the Apache Cloudberry data distribution key for this table. @@ -651,13 +628,15 @@ CREATE TABLE t4_function_scan AS SELECT 444, (1 / (0* random()))::text UNION ALL NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named '?column?' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. ERROR: division by zero (entry db 10.146.0.4:7000 pid=20360) --- Temp file number after running INITPLAN function. All the files should've --- been cleaned up, but it's normal that the temp directory to hold them is --- still around. -SELECT get_temp_file_num(); - get_temp_file_num ---------------------------- - 0 files and 1 directories +-- Temp file number after running INITPLAN function. All the files generated during this time should've +-- been cleaned up, so the number of files should not be more than previously (it could be less, if some +-- existing temp file happens to be cleaned up at the same time). +SELECT get_temp_file_num() AS num_temp_files_after +\gset +SELECT :num_temp_files_before >= :num_temp_files_after; + ?column? +---------- + t (1 row) -- test join case with two INITPLAN functions @@ -701,3 +680,24 @@ SELECT count(*) FROM t7_function_scan; 4 (1 row) +-- Test INITPLAN functions in INITPLAN +-- more details could be found at https://github.com/greenplum-db/gpdb/issues/16679 +create or replace function hello_initplan() returns setof text as $$ +return ["hello"] +$$ language plpython3u +execute on initplan; +explain select array(select f from hello_initplan() as f); + QUERY PLAN +---------------------------------------------------------------------------------- + Result (cost=10.25..10.26 rows=1 width=32) + InitPlan 1 (returns $0) + -> Function Scan on hello_initplan f (cost=0.25..10.25 rows=1000 width=32) + Optimizer: Postgres query optimizer +(4 rows) + +select array(select f from hello_initplan() as f); + array +--------- + {hello} +(1 row) + diff --git a/contrib/pax_storage/src/test/regress/expected/geometry.out b/contrib/pax_storage/src/test/regress/expected/geometry.out index df8d2987665..8a93ba49800 100644 --- a/contrib/pax_storage/src/test/regress/expected/geometry.out +++ b/contrib/pax_storage/src/test/regress/expected/geometry.out @@ -86,9 +86,9 @@ SELECT p1.f1 WHERE p1.f1 ?- point '(0,0)'; f1 ------------------ - (0,0) (-10,0) (1e-300,-1e-300) + (0,0) (3 rows) -- "is vertical" function @@ -5254,16 +5254,17 @@ SELECT * FROM circle_tbl WHERE f1 && circle(point(1,-2), 1) EXPLAIN (COSTS OFF) SELECT * FROM circle_tbl WHERE f1 && circle(point(1,-2), 1) ORDER BY area(f1); - QUERY PLAN + QUERY PLAN ---------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) - Merge Key: (area(f1)) - -> Sort - Sort Key: (area(f1)) - -> Seq Scan on circle_tbl - Filter: (f1 && '<(1,-2),1>'::circle) + Result + -> Gather Motion 3:1 (slice1; segments: 3) + Merge Key: (area(f1)) + -> Sort + Sort Key: (area(f1)) + -> Seq Scan on circle_tbl + Filter: (f1 && '<(1,-2),1>'::circle) Optimizer: Postgres query optimizer -(7 rows) +(8 rows) SELECT * FROM circle_tbl WHERE f1 && circle(point(1,-2), 1) ORDER BY area(f1); @@ -5288,16 +5289,17 @@ SELECT * FROM polygon_tbl WHERE f1 @> '((1,1),(2,2),(2,1))'::polygon EXPLAIN (COSTS OFF) SELECT * FROM polygon_tbl WHERE f1 @> '((1,1),(2,2),(2,1))'::polygon ORDER BY (poly_center(f1))[0]; - QUERY PLAN + QUERY PLAN -------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) - Merge Key: ((poly_center(f1))[0]) - -> Sort - Sort Key: ((poly_center(f1))[0]) - -> Seq Scan on polygon_tbl - Filter: (f1 @> '((1,1),(2,2),(2,1))'::polygon) + Result + -> Gather Motion 3:1 (slice1; segments: 3) + Merge Key: ((poly_center(f1))[0]) + -> Sort + Sort Key: ((poly_center(f1))[0]) + -> Seq Scan on polygon_tbl + Filter: (f1 @> '((1,1),(2,2),(2,1))'::polygon) Optimizer: Postgres query optimizer -(7 rows) +(8 rows) SELECT * FROM polygon_tbl WHERE f1 @> '((1,1),(2,2),(2,1))'::polygon ORDER BY (poly_center(f1))[0]; diff --git a/contrib/pax_storage/src/test/regress/expected/gin_optimizer.out b/contrib/pax_storage/src/test/regress/expected/gin_optimizer.out index 6f99514b949..aad3dc693cb 100644 --- a/contrib/pax_storage/src/test/regress/expected/gin_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/gin_optimizer.out @@ -180,6 +180,7 @@ begin end; $$; -- check number of rows returned by index and removed by recheck +-- start_ignore select query, js->0->'Plan'->'Plans'->0->'Actual Rows' as "return by index", @@ -201,20 +202,21 @@ from lateral explain_query_json($$select * from t_gin_test_tbl where $$ || query) js, lateral execute_text_query_index($$select string_agg((i, j)::text, ' ') from ( select * from t_gin_test_tbl where $$ || query || $$ order by i ) a$$ ) res_index, lateral execute_text_query_heap($$select string_agg((i, j)::text, ' ') from ( select * from t_gin_test_tbl where $$ || query || $$ order by i ) a $$ ) res_heap; - query | return by index | removed by recheck | match + query | return by index | removed by recheck | match -------------------------------------------+-----------------+--------------------+------- - i @> '{}' | 4 | 0 | t - j @> '{}' | 5 | | t + i @> '{}' | 4 | 0 | f + j @> '{}' | 5 | | f i @> '{}' and j @> '{}' | 3 | | t - i @> '{1}' | 2 | 0 | t + i @> '{1}' | 2 | 0 | f i @> '{1}' and j @> '{}' | 2 | | t i @> '{1}' and i @> '{}' and j @> '{}' | 2 | | t j @> '{10}' | 3 | | t j @> '{10}' and i @> '{}' | 2 | | t j @> '{10}' and j @> '{}' and i @> '{}' | 2 | | t - i @> '{1}' and j @> '{10}' | 1 | | t + i @> '{1}' and j @> '{10}' | 1 | | f (10 rows) +-- end_ignore reset enable_seqscan; reset enable_bitmapscan; -- re-purpose t_gin_test_tbl to test scans involving posting trees @@ -309,3 +311,12 @@ select count(*) from t_gin_test_tbl where j @> '{}'::int[]; reset enable_seqscan; reset enable_bitmapscan; drop table t_gin_test_tbl; +-- test an unlogged table, mostly to get coverage of ginbuildempty +create unlogged table t_gin_test_tbl(i int4[], j int4[]); +create index on t_gin_test_tbl using gin (i, j); +insert into t_gin_test_tbl +values + (null, null), + ('{}', null), + ('{1}', '{2,3}'); +drop table t_gin_test_tbl; diff --git a/contrib/pax_storage/src/test/regress/expected/gp_aggregates_costs_optimizer.out b/contrib/pax_storage/src/test/regress/expected/gp_aggregates_costs_optimizer.out index 563e3be21de..18269b21b84 100644 --- a/contrib/pax_storage/src/test/regress/expected/gp_aggregates_costs_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/gp_aggregates_costs_optimizer.out @@ -130,3 +130,41 @@ select count(*) from test_operator_mem; (5 rows) abort; +-- Test user-defined aggregate marked safe to execute on replicated slices without motion +CREATE AGGREGATE my_unsafe_avg (float8) +( + sfunc = float8_accum, + stype = float8[], + finalfunc = float8_avg, + initcond = '{0,0,0}' +); +CREATE AGGREGATE my_safe_avg (float8) +( + sfunc = float8_accum, + stype = float8[], + finalfunc = float8_avg, + initcond = '{0,0,0}', + repsafe = true +); +CREATE TABLE a_reptable (a int) DISTRIBUTED REPLICATED; +CREATE TABLE b_reptable (b int) DISTRIBUTED REPLICATED; +EXPLAIN INSERT INTO a_reptable(a) SELECT my_unsafe_avg(b) FROM b_reptable; + QUERY PLAN +--------------------------------------------------------------------------------------------- + Insert on a_reptable (cost=0.00..431.03 rows=1 width=4) + -> Result (cost=0.00..431.00 rows=3 width=8) + -> Broadcast Motion 1:3 (slice1; segments: 1) (cost=0.00..431.00 rows=3 width=4) + -> Aggregate (cost=0.00..431.00 rows=3 width=8) + -> Seq Scan on b_reptable (cost=0.00..431.00 rows=3 width=4) + Optimizer: Pivotal Optimizer (GPORCA) +(6 rows) + +EXPLAIN INSERT INTO a_reptable(a) SELECT my_safe_avg(b) FROM b_reptable; + QUERY PLAN +------------------------------------------------------------------------ + Insert on a_reptable (cost=0.00..431.03 rows=1 width=4) + -> Aggregate (cost=0.00..431.00 rows=3 width=8) + -> Seq Scan on b_reptable (cost=0.00..431.00 rows=3 width=4) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + diff --git a/contrib/pax_storage/src/test/regress/expected/gp_array_agg_optimizer.out b/contrib/pax_storage/src/test/regress/expected/gp_array_agg_optimizer.out index 64cf06e8597..e00075c9ddc 100644 --- a/contrib/pax_storage/src/test/regress/expected/gp_array_agg_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/gp_array_agg_optimizer.out @@ -3,6 +3,7 @@ -- create schema test_gp_array_agg; set search_path=test_gp_array_agg; +set optimizer_trace_fallback = on; -- Test array_agg(anynonarray) create table perct as select a, a / 10 as b from generate_series(1, 100)a distributed by (a); drop table if exists t1; @@ -126,6 +127,8 @@ select a, b, array_dims(gp_array_agg(x)) from mergeappend_test r group by a, b union all select null, null, array_dims(gp_array_agg(x)) from mergeappend_test r, pg_sleep(0) order by 1,2; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions a | b | array_dims ---+---+------------ 0 | 0 | [1:99] @@ -191,6 +194,8 @@ from ( group by y; -- ensure results are correct. select * from v_pagg_test order by y; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer y | tmin | tmax | tndistinct | amin | amax | andistinct ---+------+------+------------+------+------+------------ 0 | 10 | 5000 | 500 | 10 | 5000 | 500 @@ -206,6 +211,8 @@ select * from v_pagg_test order by y; (10 rows) explain (costs off) select * from v_pagg_test order by y; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer QUERY PLAN ------------------------------------------------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) @@ -213,16 +220,18 @@ explain (costs off) select * from v_pagg_test order by y; -> GroupAggregate Group Key: pagg_test.y -> Sort - Sort Key: pagg_test.y + Sort Key: pagg_test.y, (((unnest(regexp_split_to_array((string_agg((pagg_test.x)::text, ','::text)), ','::text))))::integer) -> Result -> ProjectSet - -> HashAggregate + -> Finalize HashAggregate Group Key: pagg_test.y -> Redistribute Motion 3:3 (slice2; segments: 3) Hash Key: pagg_test.y - -> Seq Scan on pagg_test + -> Partial HashAggregate + Group Key: pagg_test.y + -> Seq Scan on pagg_test Optimizer: Postgres query optimizer -(14 rows) +(16 rows) -- Test array_agg(anyarray) create table int_array_table (a int, arr int[]); @@ -333,6 +342,8 @@ from arrtest; $query$ AS qry \gset EXPLAIN (COSTS OFF, VERBOSE) :qry ; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-default collation QUERY PLAN ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Finalize Aggregate @@ -344,9 +355,11 @@ EXPLAIN (COSTS OFF, VERBOSE) -> Seq Scan on test_gp_array_agg.arrtest Output: a, b, c, d, e, f, g Optimizer: Postgres query optimizer -(9 rows) +(10 rows) :qry ; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-default collation agg_a | dims_b | dims_c | agg_d | dims_e | agg_f | agg_g ---------------+----------------------+------------+-------------------------------+------------+-----------------------------------+--------------------------- {{1,2},{1,2}} | [1:2][1:2][1:2][1:2] | [1:2][1:1] | {{{elt1,elt2}},{{elt1,elt2}}} | [1:2][1:2] | {{"abc ",abcde},{"abc ",abcde}} | {{abc,abcde},{abc,abcde}} @@ -362,3 +375,4 @@ drop cascades to table pagg_test drop cascades to view v_pagg_test drop cascades to table int_array_table drop cascades to table arrtest +reset optimizer_trace_fallback; diff --git a/contrib/pax_storage/src/test/regress/expected/gp_dqa_optimizer.out b/contrib/pax_storage/src/test/regress/expected/gp_dqa_optimizer.out index 250ad329a5e..a5ec6dce3a0 100644 --- a/contrib/pax_storage/src/test/regress/expected/gp_dqa_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/gp_dqa_optimizer.out @@ -3,7 +3,8 @@ -- are flowing from different segments in different order. Mask those -- differences by setting 'extra_float_digits'. This isn't enough for all of -- the queries, so a few also use TO_CHAR() to truncate the results further. -set extra_float_digits=0; +set extra_float_digits=-1; +SET optimizer_trace_fallback to on; drop table if exists dqa_t1; NOTICE: table "dqa_t1" does not exist, skipping drop table if exists dqa_t2; @@ -114,12 +115,16 @@ explain (costs off) select count(distinct d), sum(distinct d) from dqa_t1 group (11 rows) select count(distinct d), count(distinct dt) from dqa_t1; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer count | count -------+------- 23 | 34 (1 row) explain (costs off) select count(distinct d), count(distinct dt) from dqa_t1; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer QUERY PLAN ------------------------------------------------------------------ Finalize Aggregate @@ -136,12 +141,16 @@ explain (costs off) select count(distinct d), count(distinct dt) from dqa_t1; (11 rows) select count(distinct d), count(distinct c), count(distinct dt) from dqa_t1; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer count | count | count -------+-------+------- 23 | 10 | 34 (1 row) explain (costs off) select count(distinct d), count(distinct c), count(distinct dt) from dqa_t1; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer QUERY PLAN ------------------------------------------------------------------ Finalize Aggregate @@ -158,6 +167,8 @@ explain (costs off) select count(distinct d), count(distinct c), count(distinct (11 rows) select count(distinct d), count(distinct dt) from dqa_t1 group by c; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer count | count -------+------- 10 | 10 @@ -173,6 +184,8 @@ select count(distinct d), count(distinct dt) from dqa_t1 group by c; (10 rows) explain (costs off) select count(distinct d), count(distinct dt) from dqa_t1 group by c; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer QUERY PLAN ------------------------------------------------------------------------ Finalize HashAggregate @@ -194,6 +207,8 @@ explain (costs off) select count(distinct d), count(distinct dt) from dqa_t1 gro (16 rows) select count(distinct d), count(distinct dt) from dqa_t1 group by d; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer count | count -------+------- 1 | 5 @@ -222,6 +237,8 @@ select count(distinct d), count(distinct dt) from dqa_t1 group by d; (23 rows) explain (costs off) select count(distinct d), count(distinct dt) from dqa_t1 group by d; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer QUERY PLAN ------------------------------------------------------------------------ Finalize HashAggregate @@ -338,7 +355,7 @@ explain (costs off) select count(distinct dqa_t1.d) from dqa_t1, dqa_t2 where dq -> Seq Scan on dqa_t2 -> Hash -> Seq Scan on dqa_t1 - Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0 + Optimizer: Pivotal Optimizer (GPORCA) (13 rows) -- Distinct keys are not distribution keys @@ -357,7 +374,7 @@ explain (costs off) select count(distinct c) from dqa_t1; -> Redistribute Motion 3:3 (slice2; segments: 3) Hash Key: c -> Seq Scan on dqa_t1 - Optimizer: Pivotal Optimizer (GPORCA) version 3.64.0 + Optimizer: Pivotal Optimizer (GPORCA) (7 rows) select count(distinct c) from dqa_t1 group by dt; @@ -410,7 +427,7 @@ explain (costs off) select count(distinct c) from dqa_t1 group by dt; -> Redistribute Motion 3:3 (slice2; segments: 3) Hash Key: dt -> Seq Scan on dqa_t1 - Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0 + Optimizer: Pivotal Optimizer (GPORCA) (9 rows) select count(distinct c) from dqa_t1 group by d; @@ -450,7 +467,7 @@ explain (costs off) select count(distinct c) from dqa_t1 group by d; -> Sort Sort Key: d -> Seq Scan on dqa_t1 - Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0 + Optimizer: Pivotal Optimizer (GPORCA) (7 rows) select count(distinct i), sum(distinct i) from dqa_t1 group by c; @@ -479,16 +496,20 @@ explain (costs off) select count(distinct i), sum(distinct i) from dqa_t1 group -> Redistribute Motion 3:3 (slice2; segments: 3) Hash Key: c -> Seq Scan on dqa_t1 - Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0 + Optimizer: Pivotal Optimizer (GPORCA) (9 rows) select count(distinct c), count(distinct dt) from dqa_t1; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer count | count -------+------- 10 | 34 (1 row) explain (costs off) select count(distinct c), count(distinct dt) from dqa_t1; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer QUERY PLAN ------------------------------------------------------------------ Finalize Aggregate @@ -505,6 +526,8 @@ explain (costs off) select count(distinct c), count(distinct dt) from dqa_t1; (11 rows) select count(distinct c), count(distinct dt), i from dqa_t1 group by i; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer count | count | i -------+-------+---- 5 | 9 | 3 @@ -522,6 +545,8 @@ select count(distinct c), count(distinct dt), i from dqa_t1 group by i; (12 rows) explain (costs off) select count(distinct c), count(distinct dt), i from dqa_t1 group by i; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer QUERY PLAN ------------------------------------------------------------------------ Finalize HashAggregate @@ -543,6 +568,8 @@ explain (costs off) select count(distinct c), count(distinct dt), i from dqa_t1 (16 rows) select count(distinct i), count(distinct c), d from dqa_t1 group by d; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer count | count | d -------+-------+---- 5 | 5 | 3 @@ -571,6 +598,8 @@ select count(distinct i), count(distinct c), d from dqa_t1 group by d; (23 rows) explain (costs off) select count(distinct i), count(distinct c), d from dqa_t1 group by d; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer QUERY PLAN ------------------------------------------------------------------------ Finalize HashAggregate @@ -614,7 +643,7 @@ explain (costs off) select count(distinct dqa_t1.dt) from dqa_t1, dqa_t2 where d -> Redistribute Motion 3:3 (slice4; segments: 3) Hash Key: dqa_t2.c -> Seq Scan on dqa_t2 - Optimizer: Pivotal Optimizer (GPORCA) version 3.64.0 + Optimizer: Pivotal Optimizer (GPORCA) (15 rows) select count(distinct dqa_t1.dt) from dqa_t1, dqa_t2 where dqa_t1.c = dqa_t2.c group by dqa_t2.dt; @@ -697,11 +726,13 @@ explain (costs off) select count(distinct dqa_t1.dt) from dqa_t1, dqa_t2 where d -> Redistribute Motion 3:3 (slice4; segments: 3) Hash Key: dqa_t2.c -> Seq Scan on dqa_t2 - Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0 + Optimizer: Pivotal Optimizer (GPORCA) (17 rows) -- multidqa with groupby and order by select sum(distinct d), count(distinct i), count(distinct c),i,c from dqa_t1 group by i,c order by i,c; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer sum | count | count | i | c -----+-------+-------+----+--- 14 | 1 | 1 | 0 | 0 @@ -767,6 +798,8 @@ select sum(distinct d), count(distinct i), count(distinct c),i,c from dqa_t1 gro (60 rows) explain (costs off) select sum(distinct d), count(distinct i), count(distinct c),i,c from dqa_t1 group by i,c order by i,c; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer QUERY PLAN ------------------------------------------------------------------------------------ Sort @@ -791,9 +824,9 @@ explain (costs off) select sum(distinct d), count(distinct i), count(distinct c) -- multi args singledqa select corr(distinct d, i) from dqa_t1; - corr --------------------- - 0.0824013341460019 + corr +------------------- + 0.082401334146002 (1 row) explain (costs off) select corr(distinct d, i) from dqa_t1; @@ -877,12 +910,16 @@ explain (costs off) select to_char(corr(distinct d, i), '9.99999999999999') from -- multi args multidqa select count(distinct c), corr(distinct d, i) from dqa_t1; - count | corr --------+-------------------- - 10 | 0.0824013341460019 +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer + count | corr +-------+------------------- + 10 | 0.082401334146002 (1 row) explain (costs off) select count(distinct c), corr(distinct d, i) from dqa_t1; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer QUERY PLAN ----------------------------------------------------------------------------------------------------- Finalize Aggregate @@ -899,12 +936,16 @@ explain (costs off) select count(distinct c), corr(distinct d, i) from dqa_t1; (11 rows) select count(distinct d), corr(distinct d, i) from dqa_t1; - count | corr --------+-------------------- - 23 | 0.0824013341460019 +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer + count | corr +-------+------------------- + 23 | 0.082401334146002 (1 row) explain (costs off) select count(distinct d), corr(distinct d, i) from dqa_t1; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer QUERY PLAN ----------------------------------------------------------------------------------------------------- Finalize Aggregate @@ -921,12 +962,16 @@ explain (costs off) select count(distinct d), corr(distinct d, i) from dqa_t1; (11 rows) select count(distinct d), count(distinct i), corr(distinct d, i) from dqa_t1; - count | count | corr --------+-------+-------------------- - 23 | 12 | 0.0824013341460019 +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer + count | count | corr +-------+-------+------------------- + 23 | 12 | 0.082401334146002 (1 row) explain (costs off) select count(distinct d), count(distinct i), corr(distinct d, i) from dqa_t1; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer QUERY PLAN -------------------------------------------------------------------------------------------------------- Finalize Aggregate @@ -943,12 +988,16 @@ explain (costs off) select count(distinct d), count(distinct i), corr(distinct d (11 rows) select count(distinct c), count(distinct d), count(distinct i), corr(distinct d, i) from dqa_t1; - count | count | count | corr --------+-------+-------+-------------------- - 10 | 23 | 12 | 0.0824013341460019 +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer + count | count | count | corr +-------+-------+-------+------------------- + 10 | 23 | 12 | 0.082401334146002 (1 row) explain (costs off) select count(distinct c), count(distinct d), count(distinct i), corr(distinct d, i) from dqa_t1; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer QUERY PLAN ------------------------------------------------------------------------------------------------------------ Finalize Aggregate @@ -966,6 +1015,8 @@ explain (costs off) select count(distinct c), count(distinct d), count(distinct -- multi args multidqa with group by select count(distinct c), corr(distinct d, i), d from dqa_t1 group by d; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer count | corr | d -------+------+---- 5 | | 0 @@ -994,6 +1045,8 @@ select count(distinct c), corr(distinct d, i), d from dqa_t1 group by d; (23 rows) explain (costs off) select count(distinct c), corr(distinct d, i), d from dqa_t1 group by d; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer QUERY PLAN -------------------------------------------------------------------------------------------------------------- Finalize HashAggregate @@ -1015,6 +1068,8 @@ explain (costs off) select count(distinct c), corr(distinct d, i), d from dqa_t1 (16 rows) select count(distinct c), corr(distinct d, i), d, i from dqa_t1 group by d,i; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer count | corr | d | i -------+------+----+---- 1 | | 0 | 0 @@ -1120,6 +1175,8 @@ select count(distinct c), corr(distinct d, i), d, i from dqa_t1 group by d,i; (100 rows) explain (costs off) select count(distinct c), corr(distinct d, i), d, i from dqa_t1 group by d,i; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer QUERY PLAN ----------------------------------------------------------------------------------------------------------------- Finalize HashAggregate @@ -1141,45 +1198,49 @@ explain (costs off) select count(distinct c), corr(distinct d, i), d, i from dqa (16 rows) select count(distinct c), corr(distinct d, i), dt from dqa_t1 group by dt; - count | corr | dt --------+--------------------+------------ - 3 | 0.59603956067927 | 06-25-2009 - 3 | 0.0750939261482638 | 06-20-2009 - 3 | 0.0750939261482638 | 07-11-2009 - 3 | 0.0750939261482638 | 06-18-2009 - 3 | 0.0750939261482638 | 06-14-2009 - 3 | 0.755928946018455 | 06-10-2009 - 3 | 0.0750939261482638 | 06-28-2009 - 3 | 0.0750939261482638 | 06-17-2009 - 3 | 0.0750939261482638 | 06-16-2009 - 3 | 0.59603956067927 | 06-24-2009 - 3 | 0.0750939261482638 | 06-29-2009 - 3 | 0.0750939261482638 | 07-09-2009 - 3 | 0.0750939261482638 | 06-21-2009 - 3 | 0.0750939261482638 | 06-26-2009 - 3 | -0.709570905570559 | 06-13-2009 - 3 | -0.709570905570559 | 06-23-2009 - 3 | 0.59603956067927 | 06-11-2009 - 3 | 0.0750939261482638 | 07-10-2009 - 3 | 0.0750939261482638 | 07-01-2009 - 3 | -0.709570905570559 | 06-12-2009 - 3 | 0.59603956067927 | 07-04-2009 - 3 | 0.0750939261482638 | 06-15-2009 - 3 | 0.0750939261482638 | 06-30-2009 - 3 | 0.59603956067927 | 07-05-2009 - 2 | -1 | 07-12-2009 - 3 | 0.0750939261482638 | 07-02-2009 - 3 | 0.0750939261482638 | 06-27-2009 - 3 | -1 | 07-03-2009 - 3 | -0.709570905570559 | 07-06-2009 - 2 | -1 | 07-13-2009 - 3 | -0.709570905570559 | 07-07-2009 - 3 | 0.0750939261482638 | 07-08-2009 - 3 | 0.0750939261482638 | 06-19-2009 - 3 | -0.709570905570559 | 06-22-2009 +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer + count | corr | dt +-------+-------------------+------------ + 3 | 0.75592894601845 | 06-10-2009 + 3 | 0.075093926148264 | 07-11-2009 + 3 | 0.075093926148264 | 06-28-2009 + 3 | 0.075093926148264 | 06-19-2009 + 3 | 0.075093926148264 | 06-17-2009 + 3 | 0.075093926148264 | 07-08-2009 + 3 | 0.075093926148264 | 06-16-2009 + 3 | 0.075093926148264 | 06-29-2009 + 2 | -1 | 07-13-2009 + 3 | 0.59603956067927 | 07-04-2009 + 3 | -0.70957090557056 | 06-23-2009 + 2 | -1 | 07-12-2009 + 3 | 0.075093926148264 | 07-02-2009 + 3 | 0.59603956067927 | 06-11-2009 + 3 | 0.075093926148264 | 07-10-2009 + 3 | 0.075093926148264 | 06-18-2009 + 3 | 0.59603956067927 | 07-05-2009 + 3 | 0.075093926148264 | 06-20-2009 + 3 | 0.075093926148264 | 07-09-2009 + 3 | -0.70957090557056 | 06-12-2009 + 3 | 0.59603956067927 | 06-24-2009 + 3 | 0.075093926148264 | 06-14-2009 + 3 | 0.075093926148264 | 06-26-2009 + 3 | -0.70957090557056 | 06-22-2009 + 3 | -0.70957090557056 | 06-13-2009 + 3 | 0.075093926148264 | 06-21-2009 + 3 | -0.70957090557056 | 07-07-2009 + 3 | 0.075093926148264 | 06-27-2009 + 3 | -0.70957090557056 | 07-06-2009 + 3 | 0.075093926148264 | 07-01-2009 + 3 | -1 | 07-03-2009 + 3 | 0.075093926148264 | 06-15-2009 + 3 | 0.59603956067927 | 06-25-2009 + 3 | 0.075093926148264 | 06-30-2009 (34 rows) explain (costs off) select count(distinct c), corr(distinct d, i), dt from dqa_t1 group by dt; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer QUERY PLAN --------------------------------------------------------------------------------------------------------------- Finalize HashAggregate @@ -1201,6 +1262,8 @@ explain (costs off) select count(distinct c), corr(distinct d, i), dt from dqa_t (16 rows) select count(distinct d), corr(distinct d, i), i from dqa_t1 group by i; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer count | corr | i -------+------+---- 9 | | 0 @@ -1218,6 +1281,8 @@ select count(distinct d), corr(distinct d, i), i from dqa_t1 group by i; (12 rows) explain (costs off) select count(distinct d), corr(distinct d, i), i from dqa_t1 group by i; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer QUERY PLAN -------------------------------------------------------------------------------------------------------------- Finalize HashAggregate @@ -1239,6 +1304,8 @@ explain (costs off) select count(distinct d), corr(distinct d, i), i from dqa_t1 (16 rows) select count(distinct d), corr(distinct d, i), d from dqa_t1 group by d; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer count | corr | d -------+------+---- 1 | | 0 @@ -1267,6 +1334,8 @@ select count(distinct d), corr(distinct d, i), d from dqa_t1 group by d; (23 rows) explain (costs off) select count(distinct d), corr(distinct d, i), d from dqa_t1 group by d; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer QUERY PLAN ----------------------------------------------------------------------------------------------------------- Finalize HashAggregate @@ -1288,6 +1357,8 @@ explain (costs off) select count(distinct d), corr(distinct d, i), d from dqa_t1 (16 rows) select count(distinct d), to_char(corr(distinct d, i), '9.99999999999999'), c from dqa_t1 group by c; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer count | to_char | c -------+-------------------+--- 10 | .13670602618479 | 0 @@ -1303,6 +1374,8 @@ select count(distinct d), to_char(corr(distinct d, i), '9.99999999999999'), c f (10 rows) explain (costs off) select count(distinct d), to_char(corr(distinct d, i), '9.99999999999999'), c from dqa_t1 group by c; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer QUERY PLAN -------------------------------------------------------------------------------------------------------------- Finalize HashAggregate @@ -1359,6 +1432,8 @@ from fact_route_aggregation T218094 where ( T43883.device_id = T218094.device_id ) group by T43883.platform; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8 | c9 ----+----+----+----+----+----+----+----+---- (0 rows) @@ -1384,6 +1459,8 @@ insert into t2_mdqa select i % 10 , i % 5, i || 'value' from generate_series(1, insert into t2_mdqa select i % 10 , i % 5, i || 'value' from generate_series(1, 20) i; -- simple mdqa select count(distinct t1.a), count(distinct t2.b), t1.c, t2.c from t1_mdqa t1, t2_mdqa t2 where t1.c = t2.c group by t1.c, t2.c order by t1.c; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer count | count | c | c -------+-------+---------+--------- 1 | 1 | 10value | 10value @@ -1410,6 +1487,8 @@ select count(distinct t1.a), count(distinct t2.b), t1.c, t2.c from t1_mdqa t1, t -- distinct on top of some mdqas select distinct sum(distinct t1.a), avg(t2.a), sum(distinct t2.b), t1.a, t2.b from t1_mdqa t1, t2_mdqa t2 where t1.a = t2.a group by t1.a, t2.b order by t1.a; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer sum | avg | sum | a | b -----+------------------------+-----+---+--- 0 | 0.00000000000000000000 | 0 | 0 | 0 @@ -1420,6 +1499,8 @@ select distinct sum(distinct t1.a), avg(t2.a), sum(distinct t2.b), t1.a, t2.b fr (5 rows) select distinct sum (distinct t1.a), avg(distinct t2.a), sum(distinct t2.b), t1.c from t1_mdqa t1, t2_mdqa t2 where t1.a = t2.a group by t1.c order by t1.c; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer sum | avg | sum | c -----+------------------------+-----+--------- 0 | 0.00000000000000000000 | 0 | 10value @@ -1446,6 +1527,8 @@ select distinct sum (distinct t1.a), avg(distinct t2.a), sum(distinct t2.b), t1. -- distinct on group by fields select distinct t1.c , sum(distinct t1.a), count(t2.b), sum(distinct t2.b) from t1_mdqa t1, t2_mdqa t2 where t1.a = t2.a group by t1.c order by t1.c; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer c | sum | count | sum ---------+-----+-------+----- 10value | 0 | 8 | 0 @@ -1472,6 +1555,8 @@ select distinct t1.c , sum(distinct t1.a), count(t2.b), sum(distinct t2.b) from -- distinct on normal aggregates select distinct sum(t1.a), avg(distinct t2.a), sum(distinct (t1.a + t2.a)), t1.a, t2.b from t1_mdqa t1, t2_mdqa t2 where t1.a = t2.a group by t1.a, t2.b order by t1.a; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer sum | avg | sum | a | b -----+------------------------+-----+---+--- 0 | 0.00000000000000000000 | 0 | 0 | 0 @@ -1482,6 +1567,8 @@ select distinct sum(t1.a), avg(distinct t2.a), sum(distinct (t1.a + t2.a)), t1.a (5 rows) select distinct avg(t1.a + t2.b), count(distinct t1.c), count(distinct char_length(t1.c)), t1.a, t2.b from t1_mdqa t1, t2_mdqa t2 where t1.a = t2.a group by t1.a, t2.b order by t1.a; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer avg | count | count | a | b ------------------------+-------+-------+---+--- 0.00000000000000000000 | 4 | 2 | 0 | 0 @@ -1508,6 +1595,8 @@ HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sur insert into gp_dqa_r select i , i %10, i%5 from generate_series(1,20) i; insert into gp_dqa_s select i, i %15, i%10 from generate_series(1,30) i; select a, d, count(distinct b) as c1, count(distinct c) as c2 from gp_dqa_r, gp_dqa_s where ( e = a ) group by d, a order by a,d; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer a | d | c1 | c2 ----+----+----+---- 1 | 1 | 1 | 1 @@ -1549,6 +1638,8 @@ d as c9 from gp_dqa_r, gp_dqa_s where ( e = a ) group by d order by c9; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer c1 | c2 | c3 | c2 | c9 ----+----+----+----+---- 1 | 2 | 1 | 1 | 1 @@ -1588,6 +1679,8 @@ d as c9 from gp_dqa_r, gp_dqa_s where ( e = a ) group by d order by c9; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer c1 | c2 | c9 ----+----+---- 1 | 1 | 1 @@ -1624,6 +1717,8 @@ select distinct count(distinct b) as c1, count(distinct c) as c2, d as c9 from gp_dqa_r, gp_dqa_s where ( e = a ) group by d order by c9; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer c1 | c2 | c9 ----+----+---- 1 | 1 | 1 @@ -1657,6 +1752,8 @@ group by d order by c9; (28 rows) select distinct d, count(distinct b) as c1, count(distinct c) as c2, d as c9 from gp_dqa_r, gp_dqa_s group by d order by c9; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer d | c1 | c2 | c9 ----+----+----+---- 1 | 10 | 5 | 1 @@ -1692,6 +1789,8 @@ select distinct d, count(distinct b) as c1, count(distinct c) as c2, d as c9 fro (30 rows) select distinct d, count(distinct b) as c1, count(distinct c) as c2, d as c9 from gp_dqa_r, gp_dqa_s group by d, a order by c9; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer d | c1 | c2 | c9 ----+----+----+---- 1 | 1 | 1 | 1 @@ -1727,18 +1826,24 @@ select distinct d, count(distinct b) as c1, count(distinct c) as c2, d as c9 fro (30 rows) select distinct count(distinct b) as c1, count(distinct c) as c2 from gp_dqa_r, gp_dqa_s; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer c1 | c2 ----+---- 10 | 5 (1 row) select distinct count(distinct b) as c1, count(distinct c) as c2 from gp_dqa_r; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer c1 | c2 ----+---- 10 | 5 (1 row) select distinct count(distinct b) as c1, count(distinct c) as c2, d, a from gp_dqa_r, gp_dqa_s where ( e = a)group by d, a order by a,d; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer c1 | c2 | d | a ----+----+----+---- 1 | 1 | 1 | 1 @@ -1776,6 +1881,8 @@ ERROR: for SELECT DISTINCT, ORDER BY expressions must appear in select list LINE 1: ...as c2, d from gp_dqa_r, gp_dqa_s group by d, a order by d,a; ^ select distinct count(distinct b) as c1, count(distinct c) as c2, d from gp_dqa_r, gp_dqa_s group by d, a order by d; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer c1 | c2 | d ----+----+---- 1 | 1 | 1 @@ -1811,6 +1918,8 @@ select distinct count(distinct b) as c1, count(distinct c) as c2, d from gp_dqa_ (30 rows) select distinct count(distinct b) as c1, count(distinct c) as c2, d from gp_dqa_r, gp_dqa_s group by d order by d; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer c1 | c2 | d ----+----+---- 10 | 5 | 1 @@ -1858,6 +1967,8 @@ create table gp_dqa_t2 (a int, c int) distributed by (a); insert into gp_dqa_t1 select i , i %5 from generate_series(1,10) i; insert into gp_dqa_t2 select i , i %4 from generate_series(1,10) i; select distinct A.a, sum(distinct A.b), count(distinct B.c) from gp_dqa_t1 A left join gp_dqa_t2 B on (A.a = B.a) group by A.a order by A.a; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer a | sum | count ----+-----+------- 1 | 1 | 1 @@ -1873,6 +1984,8 @@ select distinct A.a, sum(distinct A.b), count(distinct B.c) from gp_dqa_t1 A lef (10 rows) select distinct A.a, sum(distinct A.b), count(distinct B.c) from gp_dqa_t1 A right join gp_dqa_t2 B on (A.a = B.a) group by A.a order by A.a; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer a | sum | count ----+-----+------- 1 | 1 | 1 @@ -1925,12 +2038,16 @@ explain (costs off) select count(distinct d) from dqa_t1 group by i; (11 rows) select count(distinct d), count(distinct c), count(distinct dt) from dqa_t1; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer count | count | count -------+-------+------- 23 | 10 | 34 (1 row) select count(distinct c), count(distinct dt), i from dqa_t1 group by i; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer count | count | i -------+-------+---- 5 | 9 | 3 @@ -1954,11 +2071,15 @@ create table foo_mdqa(x int, y int); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. SELECT distinct C.z, count(distinct FS.x), count(distinct FS.y) FROM (SELECT 1 AS z FROM generate_series(1,10)) C, foo_mdqa FS GROUP BY z; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer z | count | count ---+-------+------- (0 rows) SELECT distinct C.z, count(distinct FS.x), count(distinct FS.y) FROM (SELECT i AS z FROM generate_series(1,10) i) C, foo_mdqa FS GROUP BY z; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer z | count | count ---+-------+------- (0 rows) @@ -1977,6 +2098,8 @@ HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sur insert into nonullstab select 1, 1 from generate_series(1, 100); -- This returns wrong result. countall(distinct a) should return 1. select countall(distinct a), count(distinct b) from nonullstab; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer countall | count ----------+------- 1 | 1 @@ -1990,12 +2113,16 @@ create table dqa_f2(x int, y int, z int) distributed by (x); insert into dqa_f1 select i%17, i%5 , i%3 from generate_series(1,1000) i; insert into dqa_f2 select i % 13, i % 5 , i % 11 from generate_series(1,1000) i; select sum(distinct a) filter (where a > 0), sum(distinct b) filter (where a > 0) from dqa_f1; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER sum | sum -----+----- 136 | 10 (1 row) select sum(distinct a) filter (where a > 0), sum(distinct b) filter (where a > 0) from dqa_f1 group by b; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER sum | sum -----+----- 136 | 0 @@ -2006,6 +2133,8 @@ select sum(distinct a) filter (where a > 0), sum(distinct b) filter (where a > 0 (5 rows) select sum(distinct a) filter (where a > 0), sum(distinct b) filter (where a > 0) from dqa_f1 group by c; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER sum | sum -----+----- 136 | 10 @@ -2014,12 +2143,16 @@ select sum(distinct a) filter (where a > 0), sum(distinct b) filter (where a > 0 (3 rows) select sum(distinct a) filter (where a in (select x from dqa_f2 where x = a)), sum(distinct b) filter (where a > 0) from dqa_f1; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER sum | sum -----+----- 78 | 10 (1 row) select sum(distinct a) filter (where a in (select x from dqa_f2 where x = a)), sum(distinct b) filter (where a > 0) from dqa_f1 group by c; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER sum | sum -----+----- 78 | 10 @@ -2028,12 +2161,25 @@ select sum(distinct a) filter (where a in (select x from dqa_f2 where x = a)), s (3 rows) select count(distinct a) filter (where a > 3),count( distinct b) filter (where a > 4), sum(distinct b) filter( where a > 4) from dqa_f1; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER count | count | sum -------+-------+----- 13 | 5 | 10 (1 row) +-- fix hang of multi-dqa with filter (https://github.com/greenplum-db/gpdb/issues/14728) +select count(distinct a) filter (where a > 3), count(distinct b) from dqa_f1; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER + count | count +-------+------- + 13 | 5 +(1 row) + explain select sum(distinct a) filter (where a > 0), sum(distinct b) filter (where a > 0) from dqa_f1; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER QUERY PLAN ------------------------------------------------------------------------------------------------------ Finalize Aggregate (cost=20.66..20.67 rows=1 width=16) @@ -2050,6 +2196,8 @@ explain select sum(distinct a) filter (where a > 0), sum(distinct b) filter (whe (11 rows) explain select sum(distinct a) filter (where a > 0), sum(distinct b) filter (where a > 0) from dqa_f1 group by b; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER QUERY PLAN ------------------------------------------------------------------------------------------------------------ Finalize HashAggregate (cost=21.62..21.67 rows=5 width=20) @@ -2071,6 +2219,8 @@ explain select sum(distinct a) filter (where a > 0), sum(distinct b) filter (whe (16 rows) explain select sum(distinct a) filter (where a > 0), sum(distinct b) filter (where a > 0) from dqa_f1 group by c; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER QUERY PLAN ------------------------------------------------------------------------------------------------------------- Finalize HashAggregate (cost=21.20..21.23 rows=3 width=20) @@ -2092,6 +2242,8 @@ explain select sum(distinct a) filter (where a > 0), sum(distinct b) filter (whe (16 rows) explain select sum(distinct a) filter (where a in (select x from dqa_f2 where x = a)), sum(distinct b) filter (where a > 0) from dqa_f1; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------ Finalize Aggregate (cost=96.41..96.42 rows=1 width=16) @@ -2114,6 +2266,8 @@ explain select sum(distinct a) filter (where a in (select x from dqa_f2 where x (17 rows) explain select sum(distinct a) filter (where a in (select x from dqa_f2 where x = a)), sum(distinct b) filter (where a > 0) from dqa_f1 group by c; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------------ Finalize HashAggregate (cost=181.11..181.14 rows=3 width=20) @@ -2141,6 +2295,8 @@ explain select sum(distinct a) filter (where a in (select x from dqa_f2 where x (22 rows) explain select count(distinct a) filter (where a > 3),count( distinct b) filter (where a > 4), sum(distinct b) filter( where a > 4) from dqa_f1; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER QUERY PLAN ------------------------------------------------------------------------------------------------------ Finalize Aggregate (cost=20.67..20.68 rows=1 width=24) @@ -2174,6 +2330,58 @@ select count(distinct a), sum(b), sum(c) from dqa_f1; 17 | 2000 | 1000 (1 row) +-- multi DQA with primary key +create table dqa_unique(a int, b int, c int, d int, primary key(a, b)); +insert into dqa_unique select i%3, i%5, i%7, i%9 from generate_series(1, 10) i; +explain(verbose on, costs off) select count(distinct a), count(distinct d), c from dqa_unique group by a, b; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Query-to-DXL Translation: No attribute entry found due to incorrect normalization of query + QUERY PLAN +------------------------------------------------------------------------------------------------ + Finalize HashAggregate + Output: count(a), count(d), c, a, b + Group Key: dqa_unique.a, dqa_unique.b + -> Gather Motion 3:1 (slice1; segments: 3) + Output: a, b, (PARTIAL count(a)), (PARTIAL count(d)), c + -> Partial HashAggregate + Output: a, b, PARTIAL count(a), PARTIAL count(d), c + Group Key: dqa_unique.a, dqa_unique.b + -> HashAggregate + Output: a, b, d, c, (AggExprId) + Group Key: (AggExprId), dqa_unique.d, dqa_unique.a, dqa_unique.b + -> Redistribute Motion 3:3 (slice2; segments: 3) + Output: a, b, d, c, (AggExprId) + Hash Key: a, b, d, (AggExprId) + -> Streaming HashAggregate + Output: a, b, d, c, (AggExprId) + Group Key: AggExprId, dqa_unique.d, dqa_unique.a, dqa_unique.b + -> TupleSplit + Output: a, b, d, c, AggExprId + Split by Col: (dqa_unique.a), (dqa_unique.d) + Group Key: dqa_unique.a, dqa_unique.b + -> Seq Scan on public.dqa_unique + Output: a, b, d, c + Optimizer: Postgres query optimizer + Settings: enable_groupagg = 'off', gp_motion_cost_per_row = '1' +(25 rows) + +select count(distinct a), count(distinct d), c from dqa_unique group by a, b; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Query-to-DXL Translation: No attribute entry found due to incorrect normalization of query + count | count | c +-------+-------+--- + 1 | 1 | 5 + 1 | 1 | 6 + 1 | 1 | 0 + 1 | 1 | 2 + 1 | 1 | 1 + 1 | 1 | 1 + 1 | 1 | 2 + 1 | 1 | 4 + 1 | 1 | 3 + 1 | 1 | 3 +(10 rows) + -- multi DQA with type conversions create table dqa_f3(a character varying, b bigint) distributed by (a); insert into dqa_f3 values ('123', 2), ('213', 0), ('231', 2), ('312', 0), ('321', 2), ('132', 1), ('4', 0); @@ -2189,12 +2397,16 @@ insert into dqa_f3 values ('123', 2), ('213', 0), ('231', 2), ('312', 0), ('321' -- -> Seq Scan on public.dqa_f3 -- Output: b, a, (b)::text select count(distinct (b)::text) as b, count(distinct (a)::text) as a from dqa_f3; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer b | a ---+--- 3 | 7 (1 row) explain (verbose, costs off) select count(distinct (b)::text) as b, count(distinct (a)::text) as a from dqa_f3; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer QUERY PLAN ------------------------------------------------------------------------------------------------ Finalize Aggregate @@ -2220,14 +2432,18 @@ explain (verbose, costs off) select count(distinct (b)::text) as b, count(distin -- Case 2: Same as the above one, but convert the type of column 'a' to 'varchar' via binary-compatible types. select count(distinct (b)::text) as b, count(distinct (a)::text::varchar) as a from dqa_f3; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer b | a ---+--- 3 | 7 (1 row) explain (verbose, costs off) select count(distinct (b)::text) as b, count(distinct (a)::text::varchar) as a from dqa_f3; - QUERY PLAN -------------------------------------------------------------------------------------------------------------- +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer + QUERY PLAN +----------------------------------------------------------------------------------------------------------- Finalize Aggregate Output: count(DISTINCT ((b)::text)), count(DISTINCT a) -> Gather Motion 3:1 (slice1; segments: 3) @@ -2261,14 +2477,18 @@ explain (verbose, costs off) select count(distinct (b)::text) as b, count(distin -- -> Seq Scan on public.dqa_f3 -- Output: b, a, (b)::text, (a)::integer select count(distinct (b)::text) as b, count(distinct (a)::int) as a from dqa_f3; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer b | a ---+--- 3 | 7 (1 row) explain (verbose, costs off) select count(distinct (b)::text) as b, count(distinct (a)::int) as a from dqa_f3; - QUERY PLAN ---------------------------------------------------------------------------------------------------- +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer + QUERY PLAN +----------------------------------------------------------------------- Finalize Aggregate Output: count(DISTINCT ((b)::text)), count(DISTINCT ((a)::integer)) -> Gather Motion 3:1 (slice1; segments: 3) @@ -2293,14 +2513,18 @@ explain (verbose, costs off) select count(distinct (b)::text) as b, count(distin -- Case 4: When converting the type of column 'a' from 'varchar' to 'int' to 'varchar', TupleSplit should generate an additional -- column '(a)::integer::varchar' as part of hash-key in Redistribute-Motion. select count(distinct (b)::text) as b, count(distinct (a)::int::varchar) as a from dqa_f3; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer b | a ---+--- 3 | 7 (1 row) explain (verbose, costs off) select count(distinct (b)::text) as b, count(distinct (a)::int::varchar) as a from dqa_f3; - QUERY PLAN ------------------------------------------------------------------------------------------------------------------------- +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer + QUERY PLAN +-------------------------------------------------------------------------------------------- Finalize Aggregate Output: count(DISTINCT ((b)::text)), count(DISTINCT (((a)::integer)::character varying)) -> Gather Motion 3:1 (slice1; segments: 3) @@ -2322,3 +2546,769 @@ explain (verbose, costs off) select count(distinct (b)::text) as b, count(distin Settings: enable_groupagg = 'off', gp_motion_cost_per_row = '1' (19 rows) +drop table dqa_f3; +-- Test 3-phase agg for DISTINCT on distribution keys +-- or DISTINCT when GROUP BY on distribution keys +create table t_issue_659(a int, b int) using ao_row; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +insert into t_issue_659 select i from generate_series(1, 1000) i; +insert into t_issue_659 select * from t_issue_659; +insert into t_issue_659 select * from t_issue_659; +insert into t_issue_659 select * from t_issue_659; +insert into t_issue_659 select * from t_issue_659; +insert into t_issue_659 select * from t_issue_659; +insert into t_issue_659 select * from t_issue_659; +insert into t_issue_659 select * from t_issue_659; +insert into t_issue_659 select * from t_issue_659; +insert into t_issue_659 select * from t_issue_659; +insert into t_issue_659 select * from t_issue_659; +insert into t_issue_659 select * from t_issue_659; +insert into t_issue_659 select * from t_issue_659; +analyze t_issue_659; +explain(costs off) +select count(distinct a) from t_issue_659; + QUERY PLAN +------------------------------------------------ + Finalize Aggregate + -> Gather Motion 3:1 (slice1; segments: 3) + -> Partial Aggregate + -> Seq Scan on t_issue_659 + Optimizer: Pivotal Optimizer (GPORCA) +(5 rows) + +select count(distinct a) from t_issue_659; + count +------- + 1000 +(1 row) + +set gp_eager_distinct_dedup = on; +-- for ORCA +set optimizer_force_three_stage_scalar_dqa to on; +set optimizer_force_multistage_agg to on; +set optimizer_enable_use_distribution_in_dqa to on; +explain(costs off) +select count(distinct a) from t_issue_659; + QUERY PLAN +------------------------------------------------- + Aggregate + -> Gather Motion 3:1 (slice1; segments: 3) + -> HashAggregate + Group Key: a + -> Streaming HashAggregate + Group Key: a + -> Seq Scan on t_issue_659 + Optimizer: Pivotal Optimizer (GPORCA) +(8 rows) + +select count(distinct a) from t_issue_659; + count +------- + 1000 +(1 row) + +reset gp_eager_distinct_dedup; +reset optimizer_force_three_stage_scalar_dqa; +reset optimizer_enable_use_distribution_in_dqa; +drop table t_issue_659; +-- fix dqa bug when optimizer_force_multistage_agg is on +set optimizer_force_multistage_agg = on; +create table multiagg1(a int, b bigint, c int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +create table multiagg2(a int, b bigint, c numeric(8, 4)); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +insert into multiagg1 values(generate_series(1, 10), generate_series(1, 10), generate_series(1, 10)); +insert into multiagg2 values(generate_series(1, 10), generate_series(1, 10), 555.55); +analyze multiagg1; +analyze multiagg2; +explain (verbose, costs off) select count(distinct b), sum(c) from multiagg1; + QUERY PLAN +-------------------------------------------------------------------------------------------------------- + Finalize Aggregate + Output: count(b), sum(c) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: b, (PARTIAL sum(c)) + -> Partial HashAggregate + Output: b, PARTIAL sum(c) + Group Key: multiagg1.b + -> Redistribute Motion 3:3 (slice2; segments: 3) + Output: b, (PARTIAL sum(c)) + Hash Key: b + -> Streaming Partial HashAggregate + Output: b, PARTIAL sum(c) + Group Key: multiagg1.b + -> Seq Scan on public.multiagg1 + Output: b, c + Optimizer: Pivotal Optimizer (GPORCA) + Settings: enable_groupagg = 'off', gp_motion_cost_per_row = '1', optimizer_force_multistage_agg = 'on' +(17 rows) + +select count(distinct b), sum(c) from multiagg1; + count | sum +-------+----- + 10 | 55 +(1 row) + +explain (verbose, costs off) select count(distinct b), sum(c) from multiagg2; + QUERY PLAN +-------------------------------------------------------------------------------------------------------- + Finalize Aggregate + Output: count(b), sum(c) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: b, (PARTIAL sum(c)) + -> Partial HashAggregate + Output: b, PARTIAL sum(c) + Group Key: multiagg2.b + -> Redistribute Motion 3:3 (slice2; segments: 3) + Output: b, (PARTIAL sum(c)) + Hash Key: b + -> Streaming Partial HashAggregate + Output: b, PARTIAL sum(c) + Group Key: multiagg2.b + -> Seq Scan on public.multiagg2 + Output: b, c + Optimizer: Pivotal Optimizer (GPORCA) + Settings: enable_groupagg = 'off', gp_motion_cost_per_row = '1', optimizer_force_multistage_agg = 'on' +(17 rows) + +select count(distinct b), sum(c) from multiagg2; + count | sum +-------+----------- + 10 | 5555.5000 +(1 row) + +drop table multiagg1; +drop table multiagg2; +-- Support Multi-stage DQA with ride along aggregation in ORCA +-- Historically, Agg aggsplit is identically equal to Aggref aggsplit +-- In ORCA's attempt to support intermediate aggregation +-- The two are allowed to differ +-- Now Agg aggsplit is derived as bitwise OR of its children Aggref aggsplit +-- The plan is to eventually make Agg aggsplit a dummy +-- And use Aggref aggsplit to build trans/combine functions +set optimizer_force_multistage_agg=on; +create table num_table(id int, a bigint, b int, c numeric); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +insert into num_table values(1,1,1,1),(2,2,2,2),(3,3,3,3); +-- count(distinct a) is a simple aggregation +-- sum(b) is a split aggregation +-- Before the fix, in the final aggregation of sum(b) +-- the executor mistakenly built a trans func instead of a combine func +-- The trans func building process errored out due to mismatch between +-- the input type (int) and trans type (bigint), and caused missing plan +explain select count(distinct a), sum(b) from num_table; + QUERY PLAN +------------------------------------------------------------------------------------------------------- + Finalize Aggregate (cost=0.00..431.00 rows=1 width=16) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=3 width=16) + -> Partial HashAggregate (cost=0.00..431.00 rows=1 width=16) + Group Key: a + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=16) + Hash Key: a + -> Streaming Partial HashAggregate (cost=0.00..431.00 rows=1 width=16) + Group Key: a + -> Seq Scan on num_table (cost=0.00..431.00 rows=1 width=12) + Optimizer: Pivotal Optimizer (GPORCA) +(10 rows) + +select count(distinct a), sum(b) from num_table; + count | sum +-------+----- + 3 | 6 +(1 row) + +explain select count(distinct a), sum(b) from num_table group by id; + QUERY PLAN +------------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=16) + -> Finalize GroupAggregate (cost=0.00..431.00 rows=1 width=16) + Group Key: id + -> Partial GroupAggregate (cost=0.00..431.00 rows=1 width=20) + Group Key: id, a + -> Sort (cost=0.00..431.00 rows=1 width=16) + Sort Key: id, a + -> Seq Scan on num_table (cost=0.00..431.00 rows=1 width=16) + Optimizer: Pivotal Optimizer (GPORCA) +(9 rows) + +select count(distinct a), sum(b) from num_table group by id; + count | sum +-------+----- + 1 | 1 + 1 | 2 + 1 | 3 +(3 rows) + +-- count(distinct a) is a simple aggregation +-- sum(c) is a split aggregation +-- Before the fix, the final aggregation of sum(c) was mistakenly +-- treated as simple aggregation, and led to the missing +-- deserialization step in the aggregation execution prep +-- Numeric aggregation serializes partial aggregation states +-- The executor then evaluated the aggregation state without deserializing it first +-- This led to the creation of garbage NaN count, and caused NaN output +explain select count(distinct a), sum(c) from num_table; + QUERY PLAN +------------------------------------------------------------------------------------------------------- + Finalize Aggregate (cost=0.00..431.00 rows=1 width=16) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=3 width=16) + -> Partial HashAggregate (cost=0.00..431.00 rows=1 width=16) + Group Key: a + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=16) + Hash Key: a + -> Streaming Partial HashAggregate (cost=0.00..431.00 rows=1 width=16) + Group Key: a + -> Seq Scan on num_table (cost=0.00..431.00 rows=1 width=13) + Optimizer: Pivotal Optimizer (GPORCA) +(10 rows) + +select count(distinct a), sum(c) from num_table; + count | sum +-------+----- + 3 | 6 +(1 row) + +explain select id, count(distinct a), avg(b), sum(c) from num_table group by grouping sets ((id,c)); + QUERY PLAN +------------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=28) + -> Finalize GroupAggregate (cost=0.00..431.00 rows=1 width=28) + Group Key: id, c + -> Partial GroupAggregate (cost=0.00..431.00 rows=1 width=36) + Group Key: id, c, a + -> Sort (cost=0.00..431.00 rows=1 width=24) + Sort Key: id, c, a + -> Seq Scan on num_table (cost=0.00..431.00 rows=1 width=24) + Optimizer: Pivotal Optimizer (GPORCA) +(9 rows) + +select id, count(distinct a), avg(b), sum(c) from num_table group by grouping sets ((id,c)); + id | count | avg | sum +----+-------+------------------------+----- + 1 | 1 | 1.00000000000000000000 | 1 + 2 | 1 | 2.0000000000000000 | 2 + 3 | 1 | 3.0000000000000000 | 3 +(3 rows) + +explain (verbose on, costs off) select count(distinct b) from num_table group by c; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: (count(DISTINCT b)) + -> Finalize HashAggregate + Output: count(DISTINCT b) + Group Key: num_table.c + -> Redistribute Motion 3:3 (slice2; segments: 3) + Output: c, (PARTIAL count(DISTINCT b)) + Hash Key: c + -> Partial GroupAggregate + Output: c, PARTIAL count(DISTINCT b) + Group Key: num_table.c + -> Sort + Output: b, c + Sort Key: num_table.c, num_table.b + -> Redistribute Motion 3:3 (slice3; segments: 3) + Output: b, c + Hash Key: c, b + -> Seq Scan on public.num_table + Output: b, c + Settings: enable_groupagg = 'off', enable_hashagg = 'on', enable_parallel = 'off', gp_motion_cost_per_row = '2', optimizer = 'on' + Optimizer: GPORCA +(21 rows) + +select count(distinct b) from num_table group by c; + count +------- + 1 + 1 + 1 +(3 rows) + +reset optimizer_force_multistage_agg; +-- DQA with Agg(Intermediate Agg) +-- In PG optimizer Intermediate Agg have not support yet +-- Current test cases will be changed after commit 971fa82(Support intermediate Agg in planner (#13959)) +set enable_hashagg=on; +set enable_groupagg=off; +create table dqa_f3(a int, b int, c int, d int, e int ) distributed by (a); +insert into dqa_f3 select i % 17, i % 5 , i % 3, i %10, i % 7 from generate_series(1,1000) i; +analyze dqa_f3; +/* + * Test distinct or group by column is distributed key + * + * 1. If the input's locus matches the DISTINCT, but not GROUP BY: + * + * HashAggregate + * -> Redistribute (according to GROUP BY) + * -> HashAggregate (to eliminate duplicates) + * -> input (hashed by GROUP BY + DISTINCT) + * + * 2. If the input's locus matches the GROUP BY(don't care DISTINCT any more): + * + * HashAggregate (to aggregate) + * -> HashAggregate (to eliminate duplicates) + * -> input (hashed by GROUP BY) + * + */ +explain (verbose on, costs off)select sum(Distinct a), count(b), sum(c) from dqa_f3 group by e; + QUERY PLAN +-------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: (sum(a)), (count(b)), (sum(c)) + -> Finalize HashAggregate + Output: sum(a), count(b), sum(c) + Group Key: dqa_f3.e + -> Redistribute Motion 3:3 (slice2; segments: 3) + Output: a, e, (PARTIAL count(b)), (PARTIAL sum(c)) + Hash Key: e + -> Partial HashAggregate + Output: a, e, PARTIAL count(b), PARTIAL sum(c) + Group Key: dqa_f3.e, dqa_f3.a + -> Seq Scan on public.dqa_f3 + Output: a, b, c, e + Optimizer: Pivotal Optimizer (GPORCA) + Settings: enable_groupagg = 'off', gp_motion_cost_per_row = '1' +(15 rows) + +select sum(Distinct a), count(b), sum(c) from dqa_f3 group by e; + sum | count | sum +-----+-------+----- + 136 | 142 | 142 + 136 | 143 | 144 + 136 | 143 | 142 + 136 | 143 | 143 + 136 | 143 | 144 + 136 | 143 | 142 + 136 | 143 | 143 +(7 rows) + +explain (verbose on, costs off) select sum(Distinct e), count(b), sum(c) from dqa_f3 group by a; + QUERY PLAN +----------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: (sum(e)), (count(b)), (sum(c)) + -> Finalize HashAggregate + Output: sum(e), count(b), sum(c) + Group Key: dqa_f3.a + -> Partial HashAggregate + Output: PARTIAL count(b), PARTIAL sum(c), a, e + Group Key: dqa_f3.a, dqa_f3.e + -> Seq Scan on public.dqa_f3 + Output: a, b, c, e + Optimizer: Pivotal Optimizer (GPORCA) + Settings: enable_groupagg = 'off', gp_motion_cost_per_row = '1' +(12 rows) + +select sum(Distinct e), count(b), sum(c) from dqa_f3 group by a; + sum | count | sum +-----+-------+----- + 21 | 58 | 57 + 21 | 58 | 59 + 21 | 59 | 58 + 21 | 59 | 59 + 21 | 59 | 59 + 21 | 59 | 58 + 21 | 59 | 60 + 21 | 59 | 60 + 21 | 58 | 58 + 21 | 59 | 58 + 21 | 59 | 60 + 21 | 59 | 58 + 21 | 59 | 58 + 21 | 59 | 59 + 21 | 59 | 60 + 21 | 59 | 59 + 21 | 59 | 60 +(17 rows) + +/* + * Test both distinct and group by column are not distributed key + * + * HashAgg (to aggregate) + * -> HashAgg (to eliminate duplicates) + * -> Redistribute (according to GROUP BY) + * -> Streaming HashAgg (to eliminate duplicates) + * -> input + * + */ +explain (verbose on, costs off) select sum(Distinct c), count(a), sum(d) from dqa_f3 group by b; + QUERY PLAN +-------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: (sum(c)), (count(a)), (sum(d)) + -> Finalize HashAggregate + Output: sum(c), count(a), sum(d) + Group Key: dqa_f3.b + -> Redistribute Motion 3:3 (slice2; segments: 3) + Output: b, c, (PARTIAL count(a)), (PARTIAL sum(d)) + Hash Key: b + -> Partial GroupAggregate + Output: b, c, PARTIAL count(a), PARTIAL sum(d) + Group Key: dqa_f3.b, dqa_f3.c + -> Sort + Output: b, c, (PARTIAL count(a)), (PARTIAL sum(d)) + Sort Key: dqa_f3.b, dqa_f3.c + -> Redistribute Motion 3:3 (slice3; segments: 3) + Output: b, c, (PARTIAL count(a)), (PARTIAL sum(d)) + Hash Key: b, c + -> Streaming Partial HashAggregate + Output: b, c, PARTIAL count(a), PARTIAL sum(d) + Group Key: dqa_f3.b, dqa_f3.c + -> Seq Scan on public.dqa_f3 + Output: a, b, c, d + Optimizer: Pivotal Optimizer (GPORCA) + Settings: enable_groupagg = 'off', gp_motion_cost_per_row = '1' +(24 rows) + +select sum(Distinct c), count(a), sum(d) from dqa_f3 group by b; + sum | count | sum +-----+-------+------ + 3 | 200 | 1100 + 3 | 200 | 900 + 3 | 200 | 1300 + 3 | 200 | 500 + 3 | 200 | 700 +(5 rows) + +explain (verbose on, costs off) select sum(Distinct c), count(a), sum(d) from dqa_f3 group by b order by b; + QUERY PLAN +-------------------------------------------------------------------------------------------------- + Result + Output: (sum(c)), (count(a)), (sum(d)) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: (sum(c)), (count(a)), (sum(d)), b + Merge Key: b + -> Finalize GroupAggregate + Output: sum(c), count(a), sum(d), b + Group Key: dqa_f3.b + -> Sort + Output: b, c, (PARTIAL count(a)), (PARTIAL sum(d)) + Sort Key: dqa_f3.b + -> Redistribute Motion 3:3 (slice2; segments: 3) + Output: b, c, (PARTIAL count(a)), (PARTIAL sum(d)) + Hash Key: b + -> Partial GroupAggregate + Output: b, c, PARTIAL count(a), PARTIAL sum(d) + Group Key: dqa_f3.b, dqa_f3.c + -> Sort + Output: b, c, (PARTIAL count(a)), (PARTIAL sum(d)) + Sort Key: dqa_f3.b, dqa_f3.c + -> Redistribute Motion 3:3 (slice3; segments: 3) + Output: b, c, (PARTIAL count(a)), (PARTIAL sum(d)) + Hash Key: b, c + -> Streaming Partial HashAggregate + Output: b, c, PARTIAL count(a), PARTIAL sum(d) + Group Key: dqa_f3.b, dqa_f3.c + -> Seq Scan on public.dqa_f3 + Output: a, b, c, d + Optimizer: Pivotal Optimizer (GPORCA) + Settings: enable_groupagg = 'off', gp_motion_cost_per_row = '1' +(30 rows) + +select sum(Distinct c), count(a), sum(d) from dqa_f3 group by b order by b; + sum | count | sum +-----+-------+------ + 3 | 200 | 500 + 3 | 200 | 700 + 3 | 200 | 900 + 3 | 200 | 1100 + 3 | 200 | 1300 +(5 rows) + +explain (verbose on, costs off) select distinct sum(Distinct c), count(a), sum(d) from dqa_f3 group by b; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: (sum(c)), (count(a)), (sum(d)) + -> GroupAggregate + Output: (sum(c)), (count(a)), (sum(d)) + Group Key: (sum(dqa_f3.c)), (count(dqa_f3.a)), (sum(dqa_f3.d)) + -> Sort + Output: (sum(c)), (count(a)), (sum(d)) + Sort Key: (sum(dqa_f3.c)), (count(dqa_f3.a)), (sum(dqa_f3.d)) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Output: (sum(c)), (count(a)), (sum(d)) + Hash Key: (sum(c)), (count(a)), (sum(d)) + -> Finalize HashAggregate + Output: sum(c), count(a), sum(d) + Group Key: dqa_f3.b + -> Redistribute Motion 3:3 (slice3; segments: 3) + Output: b, c, (PARTIAL count(a)), (PARTIAL sum(d)) + Hash Key: b + -> Partial GroupAggregate + Output: b, c, PARTIAL count(a), PARTIAL sum(d) + Group Key: dqa_f3.b, dqa_f3.c + -> Sort + Output: b, c, (PARTIAL count(a)), (PARTIAL sum(d)) + Sort Key: dqa_f3.b, dqa_f3.c + -> Redistribute Motion 3:3 (slice4; segments: 3) + Output: b, c, (PARTIAL count(a)), (PARTIAL sum(d)) + Hash Key: b, c + -> Streaming Partial HashAggregate + Output: b, c, PARTIAL count(a), PARTIAL sum(d) + Group Key: dqa_f3.b, dqa_f3.c + -> Seq Scan on public.dqa_f3 + Output: a, b, c, d + Settings: enable_groupagg = 'off', enable_hashagg = 'on', enable_parallel = 'off', gp_motion_cost_per_row = '2', optimizer = 'on' + Optimizer: Pivotal Optimizer (GPORCA) +(33 rows) + +select distinct sum(Distinct c), count(a), sum(d) from dqa_f3 group by b; + sum | count | sum +-----+-------+------ + 3 | 200 | 700 + 3 | 200 | 900 + 3 | 200 | 500 + 3 | 200 | 1300 + 3 | 200 | 1100 +(5 rows) + +explain (verbose on, costs off) select sum(Distinct c), count(a), sum(d) from dqa_f3 group by b having avg(e) > 3; + QUERY PLAN +------------------------------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + Output: (sum(c)), (count(a)), (sum(d)) + -> Result + Output: (sum(c)), (count(a)), (sum(d)) + Filter: ((avg(dqa_f3.e)) > '3'::numeric) + -> Finalize HashAggregate + Output: sum(c), count(a), sum(d), avg(e), b + Group Key: dqa_f3.b + -> Redistribute Motion 3:3 (slice2; segments: 3) + Output: b, c, (PARTIAL count(a)), (PARTIAL sum(d)), (PARTIAL avg(e)) + Hash Key: b + -> Partial HashAggregate + Output: b, c, PARTIAL count(a), PARTIAL sum(d), PARTIAL avg(e) + Group Key: dqa_f3.b, dqa_f3.c + -> Redistribute Motion 3:3 (slice3; segments: 3) + Output: b, c, (PARTIAL count(a)), (PARTIAL sum(d)), (PARTIAL avg(e)) + Hash Key: b, c + -> Streaming Partial HashAggregate + Output: b, c, PARTIAL count(a), PARTIAL sum(d), PARTIAL avg(e) + Group Key: dqa_f3.b, dqa_f3.c + -> Seq Scan on public.dqa_f3 + Output: a, b, c, d, e + Optimizer: Pivotal Optimizer (GPORCA) + Settings: enable_groupagg = 'off', gp_motion_cost_per_row = '1' +(24 rows) + +select sum(Distinct c), count(a), sum(d) from dqa_f3 group by b having avg(e) > 3; + sum | count | sum +-----+-------+------ + 3 | 200 | 500 + 3 | 200 | 700 + 3 | 200 | 1100 +(3 rows) + +-- flaky tests +-- explain (verbose on, costs off) +-- select sum(Distinct sub.c), count(a), sum(d) +-- from dqa_f3 left join(select x, coalesce(y, 5) as c from dqa_f2) as sub +-- on sub.x = dqa_f3.e group by b; +-- select sum(Distinct sub.c), count(a), sum(d) +-- from dqa_f3 left join(select x, coalesce(y, 5) as c from dqa_f2) as sub +-- on sub.x = dqa_f3.e group by b; +-- Test gp_enable_agg_distinct_pruning is off on this branch +set gp_enable_agg_distinct_pruning = off; +explain (verbose on, costs off) select sum(Distinct c), count(a), sum(d) from dqa_f3 group by b; + QUERY PLAN +--------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: (sum(c)), (count(a)), (sum(d)) + -> Finalize HashAggregate + Output: sum(c), count(a), sum(d) + Group Key: dqa_f3.b + -> Redistribute Motion 3:3 (slice2; segments: 3) + Output: b, c, (PARTIAL count(a)), (PARTIAL sum(d)) + Hash Key: b + -> Partial GroupAggregate + Output: b, c, PARTIAL count(a), PARTIAL sum(d) + Group Key: dqa_f3.b, dqa_f3.c + -> Sort + Output: b, c, (PARTIAL count(a)), (PARTIAL sum(d)) + Sort Key: dqa_f3.b, dqa_f3.c + -> Redistribute Motion 3:3 (slice3; segments: 3) + Output: b, c, (PARTIAL count(a)), (PARTIAL sum(d)) + Hash Key: b, c + -> Streaming Partial HashAggregate + Output: b, c, PARTIAL count(a), PARTIAL sum(d) + Group Key: dqa_f3.b, dqa_f3.c + -> Seq Scan on public.dqa_f3 + Output: a, b, c, d + Optimizer: Pivotal Optimizer (GPORCA) + Settings: enable_groupagg = 'off', gp_enable_agg_distinct_pruning = 'off', gp_motion_cost_per_row = '1' +(24 rows) + +select sum(Distinct c), count(a), sum(d) from dqa_f3 group by b; + sum | count | sum +-----+-------+------ + 3 | 200 | 1100 + 3 | 200 | 900 + 3 | 200 | 1300 + 3 | 200 | 500 + 3 | 200 | 700 +(5 rows) + +reset gp_enable_agg_distinct_pruning; +/* + * Test multistage through Gather Motion(grouplocus cannot hashed or not exist) + * + * Finalize Aggregate + * -> Gather Motion + * -> Partial Aggregate + * -> HashAggregate, to remove duplicates + * -> Redistribute Motion (according to DISTINCT arg) + * -> Streaming HashAgg (to eliminate duplicates) + * -> input + */ +explain (verbose on, costs off) select sum(Distinct b), count(c), sum(a) from dqa_f3; + QUERY PLAN +----------------------------------------------------------------------- + Finalize Aggregate + Output: sum(b), count(c), sum(a) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: b, (PARTIAL count(c)), (PARTIAL sum(a)) + -> Partial HashAggregate + Output: b, PARTIAL count(c), PARTIAL sum(a) + Group Key: dqa_f3.b + -> Redistribute Motion 3:3 (slice2; segments: 3) + Output: b, (PARTIAL count(c)), (PARTIAL sum(a)) + Hash Key: b + -> Streaming Partial HashAggregate + Output: b, PARTIAL count(c), PARTIAL sum(a) + Group Key: dqa_f3.b + -> Seq Scan on public.dqa_f3 + Output: a, b, c + Optimizer: Pivotal Optimizer (GPORCA) + Settings: enable_groupagg = 'off', gp_motion_cost_per_row = '1' +(17 rows) + +select sum(Distinct b), count(c), sum(a) from dqa_f3; + sum | count | sum +-----+-------+------ + 10 | 1000 | 7993 +(1 row) + +explain (verbose on, costs off) select distinct sum(Distinct b), count(c), sum(a) from dqa_f3; + QUERY PLAN +----------------------------------------------------------------------- + Finalize Aggregate + Output: sum(b), count(c), sum(a) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: b, (PARTIAL count(c)), (PARTIAL sum(a)) + -> Partial HashAggregate + Output: b, PARTIAL count(c), PARTIAL sum(a) + Group Key: dqa_f3.b + -> Redistribute Motion 3:3 (slice2; segments: 3) + Output: b, (PARTIAL count(c)), (PARTIAL sum(a)) + Hash Key: b + -> Streaming Partial HashAggregate + Output: b, PARTIAL count(c), PARTIAL sum(a) + Group Key: dqa_f3.b + -> Seq Scan on public.dqa_f3 + Output: a, b, c + Optimizer: Pivotal Optimizer (GPORCA) + Settings: enable_groupagg = 'off', gp_motion_cost_per_row = '1' +(17 rows) + +select distinct sum(Distinct b), count(c), sum(a) from dqa_f3; + sum | count | sum +-----+-------+------ + 10 | 1000 | 7993 +(1 row) + +explain (verbose on, costs off) select sum(Distinct b), count(c) filter(where c > 1), sum(a) from dqa_f3; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER + QUERY PLAN +---------------------------------------------------------------------------------------------- + Finalize Aggregate + Output: sum(DISTINCT b), count(c) FILTER (WHERE (c > 1)), sum(a) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: (PARTIAL sum(DISTINCT b)), (PARTIAL count(c) FILTER (WHERE (c > 1))), (PARTIAL sum(a)) + -> Partial Aggregate + Output: PARTIAL sum(DISTINCT b), PARTIAL count(c) FILTER (WHERE (c > 1)), PARTIAL sum(a) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Output: b, c, a + Hash Key: b + -> Seq Scan on public.dqa_f3 + Output: b, c, a + Settings: enable_groupagg = 'off', enable_hashagg = 'on', enable_parallel = 'off', gp_motion_cost_per_row = '2', optimizer = 'on' + Optimizer: Postgres query optimizer +(13 rows) + +select sum(Distinct b), count(c) filter(where c > 1), sum(a) from dqa_f3; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER + sum | count | sum +-----+-------+------ + 10 | 333 | 7993 +(1 row) + +drop table dqa_f3; +-- Test some corner case of dqa ex.NULL +create table dqa_f4(a int, b int, c int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +insert into dqa_f4 values(null, null, null); +insert into dqa_f4 values(1, 1, 1); +insert into dqa_f4 values(2, 2, 2); +select count(distinct a), count(distinct b) from dqa_f4 group by c; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer + count | count +-------+------- + 0 | 0 + 1 | 1 + 1 | 1 +(3 rows) + +set optimizer_enable_multiple_distinct_aggs=on; +explain (verbose on, costs off) select count(distinct a), count(distinct b) from dqa_f4 group by c; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Invalid system target list found for AO table + QUERY PLAN +---------------------------------------------------------------------------------------------------------- + Finalize HashAggregate + Output: count(a), count(b), c + Group Key: dqa_f4.c + -> Gather Motion 3:1 (slice1; segments: 3) + Output: c, (PARTIAL count(a)), (PARTIAL count(b)) + -> Partial HashAggregate + Output: c, PARTIAL count(a), PARTIAL count(b) + Group Key: dqa_f4.c + -> HashAggregate + Output: c, a, b, (AggExprId) + Group Key: (AggExprId), dqa_f4.a, dqa_f4.b, dqa_f4.c + -> Redistribute Motion 3:3 (slice2; segments: 3) + Output: c, a, b, (AggExprId) + Hash Key: c, a, b, (AggExprId) + -> Streaming HashAggregate + Output: c, a, b, (AggExprId) + Group Key: AggExprId, dqa_f4.a, dqa_f4.b, dqa_f4.c + -> TupleSplit + Output: c, a, b, AggExprId + Split by Col: (dqa_f4.a), (dqa_f4.b) + Group Key: dqa_f4.c + -> Seq Scan on public.dqa_f4 + Output: c, a, b + Settings: optimizer = 'on', gp_motion_cost_per_row = '2', enable_hashagg = 'on', enable_groupagg = 'off' + Optimizer: Postgres query optimizer +(25 rows) + +select count(distinct a), count(distinct b) from dqa_f4 group by c; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Invalid system target list found for AO table + count | count +-------+------- + 1 | 1 + 1 | 1 + 0 | 0 +(3 rows) + +reset optimizer_enable_multiple_distinct_aggs; +drop table dqa_f4; diff --git a/contrib/pax_storage/src/test/regress/expected/gp_explain_optimizer.out b/contrib/pax_storage/src/test/regress/expected/gp_explain_optimizer.out index 0302b469eff..4244ab91437 100644 --- a/contrib/pax_storage/src/test/regress/expected/gp_explain_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/gp_explain_optimizer.out @@ -1,3 +1,7 @@ +-- start_matchsubs +-- m/Seq Scan on recursive_table_ic \(actual rows=\d+ loops=\d+\)/ +-- s/Seq Scan on recursive_table_ic \(actual rows=\d+ loops=\d+\)/Seq Scan on recursive_table_ic (actual rows=XXXX loops=1)/ +-- end_matchsubs create schema gpexplain; set search_path = gpexplain; -- Helper function, to return the EXPLAIN output of a query as a normal @@ -523,3 +527,107 @@ explain (slicetable, costs off, format json) SELECT * FROM explaintest; ] (1 row) +-- +-- The same slice may have different number of plan nodes on every qExec. +-- Check if explain analyze can work in that case +-- +create schema explain_subplan; +set search_path = explain_subplan; +CREATE TABLE mintab(c1 int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +INSERT into mintab VALUES (120); +CREATE TABLE range_parted ( + a text, + b bigint, + c numeric +) PARTITION BY RANGE (a, b); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +CREATE TABLE part_a_1_a_10 PARTITION OF range_parted FOR VALUES FROM ('a', 1) TO ('a', 10); +NOTICE: table has parent, setting distribution columns to match parent table +CREATE TABLE part_a_10_a_20 PARTITION OF range_parted FOR VALUES FROM ('a', 10) TO ('a', 20); +NOTICE: table has parent, setting distribution columns to match parent table +CREATE TABLE part_b_1_b_10 PARTITION OF range_parted FOR VALUES FROM ('b', 1) TO ('b', 10); +NOTICE: table has parent, setting distribution columns to match parent table +CREATE TABLE part_b_10_b_20 PARTITION OF range_parted FOR VALUES FROM ('b', 10) TO ('b', 20); +NOTICE: table has parent, setting distribution columns to match parent table +ALTER TABLE range_parted ENABLE ROW LEVEL SECURITY; +INSERT INTO range_parted VALUES ('a', 1, 1), ('a', 12, 200); +CREATE USER regress_range_parted_user; +NOTICE: resource queue required -- using default resource queue "pg_default" +GRANT ALL ON SCHEMA explain_subplan TO regress_range_parted_user; +GRANT ALL ON range_parted, mintab TO regress_range_parted_user; +CREATE POLICY seeall ON range_parted AS PERMISSIVE FOR SELECT USING (true); +CREATE POLICY policy_range_parted ON range_parted for UPDATE USING (true) WITH CHECK (c % 2 = 0); +CREATE POLICY policy_range_parted_subplan on range_parted +AS RESTRICTIVE for UPDATE USING (true) +WITH CHECK ((SELECT range_parted.c <= c1 FROM mintab)); +SET SESSION AUTHORIZATION regress_range_parted_user; +EXPLAIN (analyze, costs off, timing off, summary off) UPDATE explain_subplan.range_parted set a = 'b', c = 120 WHERE a = 'a' AND c = '200'; + QUERY PLAN +------------------------------------------------------------------------------------------- + Update on range_parted (actual rows=0 loops=1) + Update on part_a_1_a_10 range_parted_1 + Update on part_a_10_a_20 range_parted_2 + -> Explicit Redistribute Motion 1:3 (slice1; segments: 1) (actual rows=1 loops=1) + -> Split Update (actual rows=2 loops=1) + -> Append (actual rows=1 loops=1) + -> Seq Scan on part_a_1_a_10 range_parted_1 (actual rows=0 loops=1) + Filter: ((a = 'a'::text) AND (c = '200'::numeric)) + Rows Removed by Filter: 1 + -> Seq Scan on part_a_10_a_20 range_parted_2 (actual rows=1 loops=1) + Filter: ((a = 'a'::text) AND (c = '200'::numeric)) + SubPlan 1 (copy 2) + -> Result (actual rows=1 loops=1) + -> Materialize (actual rows=1 loops=1) + -> Broadcast Motion 3:3 (slice2; segments: 3) (actual rows=1 loops=1) + -> Seq Scan on mintab (actual rows=1 loops=1) + SubPlan 1 (copy 3) + -> Result (never executed) + -> Materialize (never executed) + -> Broadcast Motion 3:3 (slice3; segments: 3) (never executed) + -> Seq Scan on mintab mintab_1 (actual rows=1 loops=1) + Optimizer: Postgres query optimizer +(22 rows) + +RESET SESSION AUTHORIZATION; +DROP POLICY seeall ON range_parted; +DROP POLICY policy_range_parted ON range_parted; +DROP POLICY policy_range_parted_subplan ON range_parted; +DROP TABLE mintab; +DROP TABLE range_parted; +RESET search_path; +DROP SCHEMA explain_subplan; +DROP USER regress_range_parted_user; +-- Test if explain analyze will hang with materialize node +CREATE TABLE recursive_table_ic (a INT) DISTRIBUTED BY (a); +INSERT INTO recursive_table_ic SELECT * FROM generate_series(20, 30000); +explain (analyze, costs off, timing off, summary off) WITH RECURSIVE +r(i) AS ( + SELECT 1 +), +y(i) AS ( + SELECT 1 + UNION ALL + SELECT i + 1 FROM y, recursive_table_ic WHERE NOT EXISTS (SELECT * FROM r LIMIT 10) +) +SELECT * FROM y LIMIT 10; + QUERY PLAN +----------------------------------------------------------------------------------------------- + Limit (actual rows=1 loops=1) + -> Recursive Union (actual rows=1 loops=1) + -> Result (actual rows=1 loops=1) + -> Result (actual rows=0 loops=1) + One-Time Filter: (NOT $1) + InitPlan 1 (returns $1) (slice2) + -> Result (actual rows=1 loops=1) + -> Nested Loop (never executed) + -> WorkTable Scan on y (never executed) + -> Materialize (never executed) + -> Gather Motion 3:1 (slice1; segments: 3) (never executed) + -> Seq Scan on recursive_table_ic (actual rows=XXXX loops=1) + Optimizer: Postgres query optimizer +(13 rows) + +DROP TABLE recursive_table_ic; diff --git a/contrib/pax_storage/src/test/regress/expected/gp_gin_index_optimizer.out b/contrib/pax_storage/src/test/regress/expected/gp_gin_index_optimizer.out index 86a414eabd3..a488696b0e5 100644 --- a/contrib/pax_storage/src/test/regress/expected/gp_gin_index_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/gp_gin_index_optimizer.out @@ -3,95 +3,88 @@ SET optimizer_enable_tablescan = off; SET enable_seqscan = off; set enable_bitmapscan = on; EXPLAIN SELECT count(*) FROM testjsonb WHERE j @> '{"wait":null}'; - QUERY PLAN ------------------------------------------------------------------------------------------ - Finalize Aggregate (cost=0.00..33665.68 rows=1 width=8) - -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..33665.68 rows=1 width=8) - -> Partial Aggregate (cost=0.00..33665.68 rows=1 width=8) - -> Bitmap Heap Scan on testjsonb (cost=0.00..33665.68 rows=135 width=1) - Recheck Cond: (j @> '{"wait": null}'::jsonb) - -> Bitmap Index Scan on jidx (cost=0.00..0.00 rows=0 width=0) - Index Cond: (j @> '{"wait": null}'::jsonb) - Optimizer: Pivotal Optimizer (GPORCA) version 3.58.1 -(8 rows) + QUERY PLAN +------------------------------------------------------------------------------------ + Aggregate (cost=0.00..391.30 rows=1 width=8) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..391.30 rows=1 width=1) + -> Bitmap Heap Scan on testjsonb (cost=0.00..391.30 rows=1 width=1) + Recheck Cond: (j @> '{"wait": null}'::jsonb) + -> Bitmap Index Scan on jidx (cost=0.00..0.00 rows=0 width=0) + Index Cond: (j @> '{"wait": null}'::jsonb) + Optimizer: GPORCA +(7 rows) EXPLAIN SELECT count(*) FROM testjsonb WHERE j @> '{"wait":"CC"}'; - QUERY PLAN ------------------------------------------------------------------------------------------ - Finalize Aggregate (cost=0.00..33665.68 rows=1 width=8) - -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..33665.68 rows=1 width=8) - -> Partial Aggregate (cost=0.00..33665.68 rows=1 width=8) - -> Bitmap Heap Scan on testjsonb (cost=0.00..33665.68 rows=135 width=1) - Recheck Cond: (j @> '{"wait": "CC"}'::jsonb) - -> Bitmap Index Scan on jidx (cost=0.00..0.00 rows=0 width=0) - Index Cond: (j @> '{"wait": "CC"}'::jsonb) - Optimizer: Pivotal Optimizer (GPORCA) version 3.58.1 -(8 rows) + QUERY PLAN +------------------------------------------------------------------------------------ + Aggregate (cost=0.00..391.30 rows=1 width=8) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..391.30 rows=1 width=1) + -> Bitmap Heap Scan on testjsonb (cost=0.00..391.30 rows=1 width=1) + Recheck Cond: (j @> '{"wait": "CC"}'::jsonb) + -> Bitmap Index Scan on jidx (cost=0.00..0.00 rows=0 width=0) + Index Cond: (j @> '{"wait": "CC"}'::jsonb) + Optimizer: GPORCA +(7 rows) EXPLAIN SELECT count(*) FROM testjsonb WHERE j @> '{"wait":"CC", "public":true}'; - QUERY PLAN ------------------------------------------------------------------------------------------ - Finalize Aggregate (cost=0.00..33665.68 rows=1 width=8) - -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..33665.68 rows=1 width=8) - -> Partial Aggregate (cost=0.00..33665.68 rows=1 width=8) - -> Bitmap Heap Scan on testjsonb (cost=0.00..33665.68 rows=135 width=1) - Recheck Cond: (j @> '{"wait": "CC", "public": true}'::jsonb) - -> Bitmap Index Scan on jidx (cost=0.00..0.00 rows=0 width=0) - Index Cond: (j @> '{"wait": "CC", "public": true}'::jsonb) - Optimizer: Pivotal Optimizer (GPORCA) version 3.58.1 -(8 rows) + QUERY PLAN +------------------------------------------------------------------------------------ + Aggregate (cost=0.00..391.30 rows=1 width=8) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..391.30 rows=1 width=1) + -> Bitmap Heap Scan on testjsonb (cost=0.00..391.30 rows=1 width=1) + Recheck Cond: (j @> '{"wait": "CC", "public": true}'::jsonb) + -> Bitmap Index Scan on jidx (cost=0.00..0.00 rows=0 width=0) + Index Cond: (j @> '{"wait": "CC", "public": true}'::jsonb) + Optimizer: GPORCA +(7 rows) EXPLAIN SELECT count(*) FROM testjsonb WHERE j @> '{"age":25}'; - QUERY PLAN ------------------------------------------------------------------------------------------ - Finalize Aggregate (cost=0.00..33665.68 rows=1 width=8) - -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..33665.68 rows=1 width=8) - -> Partial Aggregate (cost=0.00..33665.68 rows=1 width=8) - -> Bitmap Heap Scan on testjsonb (cost=0.00..33665.68 rows=135 width=1) - Recheck Cond: (j @> '{"age": 25}'::jsonb) - -> Bitmap Index Scan on jidx (cost=0.00..0.00 rows=0 width=0) - Index Cond: (j @> '{"age": 25}'::jsonb) - Optimizer: Pivotal Optimizer (GPORCA) version 3.58.1 -(8 rows) + QUERY PLAN +------------------------------------------------------------------------------------ + Aggregate (cost=0.00..391.30 rows=1 width=8) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..391.30 rows=1 width=1) + -> Bitmap Heap Scan on testjsonb (cost=0.00..391.30 rows=1 width=1) + Recheck Cond: (j @> '{"age": 25}'::jsonb) + -> Bitmap Index Scan on jidx (cost=0.00..0.00 rows=0 width=0) + Index Cond: (j @> '{"age": 25}'::jsonb) + Optimizer: GPORCA +(7 rows) EXPLAIN SELECT count(*) FROM testjsonb WHERE j @> '{"age":25.0}'; - QUERY PLAN ------------------------------------------------------------------------------------------ - Finalize Aggregate (cost=0.00..33665.68 rows=1 width=8) - -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..33665.68 rows=1 width=8) - -> Partial Aggregate (cost=0.00..33665.68 rows=1 width=8) - -> Bitmap Heap Scan on testjsonb (cost=0.00..33665.68 rows=135 width=1) - Recheck Cond: (j @> '{"age": 25.0}'::jsonb) - -> Bitmap Index Scan on jidx (cost=0.00..0.00 rows=0 width=0) - Index Cond: (j @> '{"age": 25.0}'::jsonb) - Optimizer: Pivotal Optimizer (GPORCA) version 3.58.1 -(8 rows) + QUERY PLAN +------------------------------------------------------------------------------------ + Aggregate (cost=0.00..391.30 rows=1 width=8) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..391.30 rows=1 width=1) + -> Bitmap Heap Scan on testjsonb (cost=0.00..391.30 rows=1 width=1) + Recheck Cond: (j @> '{"age": 25.0}'::jsonb) + -> Bitmap Index Scan on jidx (cost=0.00..0.00 rows=0 width=0) + Index Cond: (j @> '{"age": 25.0}'::jsonb) + Optimizer: GPORCA +(7 rows) EXPLAIN SELECT count(*) FROM testjsonb WHERE j @> '{"array":["foo"]}'; - QUERY PLAN ------------------------------------------------------------------------------------------ - Finalize Aggregate (cost=0.00..33665.68 rows=1 width=8) - -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..33665.68 rows=1 width=8) - -> Partial Aggregate (cost=0.00..33665.68 rows=1 width=8) - -> Bitmap Heap Scan on testjsonb (cost=0.00..33665.68 rows=135 width=1) - Recheck Cond: (j @> '{"array": ["foo"]}'::jsonb) - -> Bitmap Index Scan on jidx (cost=0.00..0.00 rows=0 width=0) - Index Cond: (j @> '{"array": ["foo"]}'::jsonb) - Optimizer: Pivotal Optimizer (GPORCA) version 3.58.1 -(8 rows) + QUERY PLAN +------------------------------------------------------------------------------------ + Aggregate (cost=0.00..391.30 rows=1 width=8) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..391.30 rows=1 width=1) + -> Bitmap Heap Scan on testjsonb (cost=0.00..391.30 rows=1 width=1) + Recheck Cond: (j @> '{"array": ["foo"]}'::jsonb) + -> Bitmap Index Scan on jidx (cost=0.00..0.00 rows=0 width=0) + Index Cond: (j @> '{"array": ["foo"]}'::jsonb) + Optimizer: GPORCA +(7 rows) EXPLAIN SELECT count(*) FROM testjsonb WHERE j @> '{"array":["bar"]}'; - QUERY PLAN ------------------------------------------------------------------------------------------ - Finalize Aggregate (cost=0.00..33665.68 rows=1 width=8) - -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..33665.68 rows=1 width=8) - -> Partial Aggregate (cost=0.00..33665.68 rows=1 width=8) - -> Bitmap Heap Scan on testjsonb (cost=0.00..33665.68 rows=135 width=1) - Recheck Cond: (j @> '{"array": ["bar"]}'::jsonb) - -> Bitmap Index Scan on jidx (cost=0.00..0.00 rows=0 width=0) - Index Cond: (j @> '{"array": ["bar"]}'::jsonb) - Optimizer: Pivotal Optimizer (GPORCA) version 3.58.1 -(8 rows) + QUERY PLAN +------------------------------------------------------------------------------------ + Aggregate (cost=0.00..391.30 rows=1 width=8) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..391.30 rows=1 width=1) + -> Bitmap Heap Scan on testjsonb (cost=0.00..391.30 rows=1 width=1) + Recheck Cond: (j @> '{"array": ["bar"]}'::jsonb) + -> Bitmap Index Scan on jidx (cost=0.00..0.00 rows=0 width=0) + Index Cond: (j @> '{"array": ["bar"]}'::jsonb) + Optimizer: GPORCA +(7 rows) SELECT count(*) FROM testjsonb WHERE j @> '{"wait":null}'; count @@ -137,69 +130,64 @@ SELECT count(*) FROM testjsonb WHERE j @> '{"array":["bar"]}'; -- exercise GIN_SEARCH_MODE_ALL EXPLAIN SELECT count(*) FROM testjsonb WHERE j @> '{}'; - QUERY PLAN ------------------------------------------------------------------------------------------ - Finalize Aggregate (cost=0.00..33665.68 rows=1 width=8) - -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..33665.68 rows=1 width=8) - -> Partial Aggregate (cost=0.00..33665.68 rows=1 width=8) - -> Bitmap Heap Scan on testjsonb (cost=0.00..33665.68 rows=135 width=1) - Recheck Cond: (j @> '{}'::jsonb) - -> Bitmap Index Scan on jidx (cost=0.00..0.00 rows=0 width=0) - Index Cond: (j @> '{}'::jsonb) - Optimizer: Pivotal Optimizer (GPORCA) version 3.58.1 -(8 rows) + QUERY PLAN +------------------------------------------------------------------------------------ + Aggregate (cost=0.00..391.30 rows=1 width=8) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..391.30 rows=1 width=1) + -> Bitmap Heap Scan on testjsonb (cost=0.00..391.30 rows=1 width=1) + Recheck Cond: (j @> '{}'::jsonb) + -> Bitmap Index Scan on jidx (cost=0.00..0.00 rows=0 width=0) + Index Cond: (j @> '{}'::jsonb) + Optimizer: GPORCA +(7 rows) EXPLAIN SELECT count(*) FROM testjsonb WHERE j ? 'public'; - QUERY PLAN ------------------------------------------------------------------------------------------ - Finalize Aggregate (cost=0.00..33665.68 rows=1 width=8) - -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..33665.68 rows=1 width=8) - -> Partial Aggregate (cost=0.00..33665.68 rows=1 width=8) - -> Bitmap Heap Scan on testjsonb (cost=0.00..33665.68 rows=135 width=1) - Recheck Cond: (j ? 'public'::text) - -> Bitmap Index Scan on jidx (cost=0.00..0.00 rows=0 width=0) - Index Cond: (j ? 'public'::text) - Optimizer: Pivotal Optimizer (GPORCA) version 3.58.1 -(8 rows) + QUERY PLAN +------------------------------------------------------------------------------------ + Aggregate (cost=0.00..391.30 rows=1 width=8) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..391.30 rows=1 width=1) + -> Bitmap Heap Scan on testjsonb (cost=0.00..391.30 rows=1 width=1) + Recheck Cond: (j ? 'public'::text) + -> Bitmap Index Scan on jidx (cost=0.00..0.00 rows=0 width=0) + Index Cond: (j ? 'public'::text) + Optimizer: GPORCA +(7 rows) EXPLAIN SELECT count(*) FROM testjsonb WHERE j ? 'bar'; - QUERY PLAN ------------------------------------------------------------------------------------------ - Finalize Aggregate (cost=0.00..33665.68 rows=1 width=8) - -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..33665.68 rows=1 width=8) - -> Partial Aggregate (cost=0.00..33665.68 rows=1 width=8) - -> Bitmap Heap Scan on testjsonb (cost=0.00..33665.68 rows=135 width=1) - Recheck Cond: (j ? 'bar'::text) - -> Bitmap Index Scan on jidx (cost=0.00..0.00 rows=0 width=0) - Index Cond: (j ? 'bar'::text) - Optimizer: Pivotal Optimizer (GPORCA) version 3.58.1 -(8 rows) + QUERY PLAN +------------------------------------------------------------------------------------ + Aggregate (cost=0.00..391.30 rows=1 width=8) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..391.30 rows=1 width=1) + -> Bitmap Heap Scan on testjsonb (cost=0.00..391.30 rows=1 width=1) + Recheck Cond: (j ? 'bar'::text) + -> Bitmap Index Scan on jidx (cost=0.00..0.00 rows=0 width=0) + Index Cond: (j ? 'bar'::text) + Optimizer: GPORCA +(7 rows) EXPLAIN SELECT count(*) FROM testjsonb WHERE j ?| ARRAY['public','disabled']; - QUERY PLAN ------------------------------------------------------------------------------------------ - Finalize Aggregate (cost=0.00..33665.68 rows=1 width=8) - -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..33665.68 rows=1 width=8) - -> Partial Aggregate (cost=0.00..33665.68 rows=1 width=8) - -> Bitmap Heap Scan on testjsonb (cost=0.00..33665.68 rows=135 width=1) - Recheck Cond: (j ?| '{public,disabled}'::text[]) - -> Bitmap Index Scan on jidx (cost=0.00..0.00 rows=0 width=0) - Index Cond: (j ?| '{public,disabled}'::text[]) - Optimizer: Pivotal Optimizer (GPORCA) version 3.58.1 -(8 rows) + QUERY PLAN +------------------------------------------------------------------------------------ + Aggregate (cost=0.00..391.30 rows=1 width=8) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..391.30 rows=1 width=1) + -> Bitmap Heap Scan on testjsonb (cost=0.00..391.30 rows=1 width=1) + Recheck Cond: (j ?| '{public,disabled}'::text[]) + -> Bitmap Index Scan on jidx (cost=0.00..0.00 rows=0 width=0) + Index Cond: (j ?| '{public,disabled}'::text[]) + Optimizer: GPORCA +(7 rows) EXPLAIN SELECT count(*) FROM testjsonb WHERE j ?& ARRAY['public','disabled']; - QUERY PLAN ------------------------------------------------------------------------------------------ - Finalize Aggregate (cost=0.00..33665.68 rows=1 width=8) - -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..33665.68 rows=1 width=8) - -> Partial Aggregate (cost=0.00..33665.68 rows=1 width=8) - -> Bitmap Heap Scan on testjsonb (cost=0.00..33665.68 rows=135 width=1) - Recheck Cond: (j ?& '{public,disabled}'::text[]) - -> Bitmap Index Scan on jidx (cost=0.00..0.00 rows=0 width=0) - Index Cond: (j ?& '{public,disabled}'::text[]) - Optimizer: Pivotal Optimizer (GPORCA) version 3.58.1 -(8 rows) + QUERY PLAN +------------------------------------------------------------------------------------ + Aggregate (cost=0.00..391.30 rows=1 width=8) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..391.30 rows=1 width=1) + -> Bitmap Heap Scan on testjsonb (cost=0.00..391.30 rows=1 width=1) + Recheck Cond: (j ?& '{public,disabled}'::text[]) + -> Bitmap Index Scan on jidx (cost=0.00..0.00 rows=0 width=0) + Index Cond: (j ?& '{public,disabled}'::text[]) + Optimizer: GPORCA +(7 rows) SELECT count(*) FROM testjsonb WHERE j @> '{}'; count @@ -235,14 +223,14 @@ SELECT count(*) FROM testjsonb WHERE j ?& ARRAY['public','disabled']; CREATE INDEX jidx_array ON testjsonb USING gin((j->'array')); -- gin index on expression not support for orca EXPLAIN SELECT count(*) from testjsonb WHERE j->'array' ? 'bar'; - QUERY PLAN ---------------------------------------------------------------------------------------- - Finalize Aggregate (cost=15.19..14.20 rows=1 width=8) - -> Gather Motion 3:1 (slice1; segments: 3) (cost=14.14..14.19 rows=3 width=8) - -> Partial Aggregate (cost=14.14..14.15 rows=1 width=8) - -> Bitmap Heap Scan on testjsonb (cost=12.08..14.13 rows=3 width=0) + QUERY PLAN +--------------------------------------------------------------------------------------------- + Finalize Aggregate (cost=45.73..45.74 rows=1 width=8) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=45.68..45.73 rows=3 width=8) + -> Partial Aggregate (cost=45.68..45.69 rows=1 width=8) + -> Bitmap Heap Scan on testjsonb (cost=8.60..45.24 rows=176 width=0) Recheck Cond: ((j -> 'array'::text) ? 'bar'::text) - -> Bitmap Index Scan on jidx_array (cost=0.00..12.08 rows=3 width=0) + -> Bitmap Index Scan on jidx_array (cost=0.00..8.55 rows=176 width=0) Index Cond: ((j -> 'array'::text) ? 'bar'::text) Optimizer: Postgres query optimizer (8 rows) @@ -300,69 +288,64 @@ DROP INDEX jidx_array; DROP INDEX jidx; CREATE INDEX jidx ON testjsonb USING gin (j jsonb_path_ops); EXPLAIN SELECT count(*) FROM testjsonb WHERE j @> '{"wait":null}'; - QUERY PLAN ------------------------------------------------------------------------------------------ - Finalize Aggregate (cost=0.00..33665.68 rows=1 width=8) - -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..33665.68 rows=1 width=8) - -> Partial Aggregate (cost=0.00..33665.68 rows=1 width=8) - -> Bitmap Heap Scan on testjsonb (cost=0.00..33665.68 rows=135 width=1) - Recheck Cond: (j @> '{"wait": null}'::jsonb) - -> Bitmap Index Scan on jidx (cost=0.00..0.00 rows=0 width=0) - Index Cond: (j @> '{"wait": null}'::jsonb) - Optimizer: Pivotal Optimizer (GPORCA) version 3.58.1 -(8 rows) + QUERY PLAN +------------------------------------------------------------------------------------ + Aggregate (cost=0.00..391.30 rows=1 width=8) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..391.30 rows=1 width=1) + -> Bitmap Heap Scan on testjsonb (cost=0.00..391.30 rows=1 width=1) + Recheck Cond: (j @> '{"wait": null}'::jsonb) + -> Bitmap Index Scan on jidx (cost=0.00..0.00 rows=0 width=0) + Index Cond: (j @> '{"wait": null}'::jsonb) + Optimizer: GPORCA +(7 rows) EXPLAIN SELECT count(*) FROM testjsonb WHERE j @> '{"wait":"CC"}'; - QUERY PLAN ------------------------------------------------------------------------------------------ - Finalize Aggregate (cost=0.00..33665.68 rows=1 width=8) - -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..33665.68 rows=1 width=8) - -> Partial Aggregate (cost=0.00..33665.68 rows=1 width=8) - -> Bitmap Heap Scan on testjsonb (cost=0.00..33665.68 rows=135 width=1) - Recheck Cond: (j @> '{"wait": "CC"}'::jsonb) - -> Bitmap Index Scan on jidx (cost=0.00..0.00 rows=0 width=0) - Index Cond: (j @> '{"wait": "CC"}'::jsonb) - Optimizer: Pivotal Optimizer (GPORCA) version 3.58.1 -(8 rows) + QUERY PLAN +------------------------------------------------------------------------------------ + Aggregate (cost=0.00..391.30 rows=1 width=8) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..391.30 rows=1 width=1) + -> Bitmap Heap Scan on testjsonb (cost=0.00..391.30 rows=1 width=1) + Recheck Cond: (j @> '{"wait": "CC"}'::jsonb) + -> Bitmap Index Scan on jidx (cost=0.00..0.00 rows=0 width=0) + Index Cond: (j @> '{"wait": "CC"}'::jsonb) + Optimizer: GPORCA +(7 rows) EXPLAIN SELECT count(*) FROM testjsonb WHERE j @> '{"wait":"CC", "public":true}'; - QUERY PLAN ------------------------------------------------------------------------------------------ - Finalize Aggregate (cost=0.00..33665.68 rows=1 width=8) - -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..33665.68 rows=1 width=8) - -> Partial Aggregate (cost=0.00..33665.68 rows=1 width=8) - -> Bitmap Heap Scan on testjsonb (cost=0.00..33665.68 rows=135 width=1) - Recheck Cond: (j @> '{"wait": "CC", "public": true}'::jsonb) - -> Bitmap Index Scan on jidx (cost=0.00..0.00 rows=0 width=0) - Index Cond: (j @> '{"wait": "CC", "public": true}'::jsonb) - Optimizer: Pivotal Optimizer (GPORCA) version 3.58.1 -(8 rows) + QUERY PLAN +------------------------------------------------------------------------------------ + Aggregate (cost=0.00..391.30 rows=1 width=8) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..391.30 rows=1 width=1) + -> Bitmap Heap Scan on testjsonb (cost=0.00..391.30 rows=1 width=1) + Recheck Cond: (j @> '{"wait": "CC"}'::jsonb) + -> Bitmap Index Scan on jidx (cost=0.00..0.00 rows=0 width=0) + Index Cond: (j @> '{"wait": "CC"}'::jsonb) + Optimizer: GPORCA +(7 rows) EXPLAIN SELECT count(*) FROM testjsonb WHERE j @> '{"age":25}'; - QUERY PLAN ------------------------------------------------------------------------------------------ - Finalize Aggregate (cost=0.00..33665.68 rows=1 width=8) - -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..33665.68 rows=1 width=8) - -> Partial Aggregate (cost=0.00..33665.68 rows=1 width=8) - -> Bitmap Heap Scan on testjsonb (cost=0.00..33665.68 rows=135 width=1) - Recheck Cond: (j @> '{"age": 25}'::jsonb) - -> Bitmap Index Scan on jidx (cost=0.00..0.00 rows=0 width=0) - Index Cond: (j @> '{"age": 25}'::jsonb) - Optimizer: Pivotal Optimizer (GPORCA) version 3.58.1 -(8 rows) + QUERY PLAN +------------------------------------------------------------------------------------ + Aggregate (cost=0.00..391.30 rows=1 width=8) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..391.30 rows=1 width=1) + -> Bitmap Heap Scan on testjsonb (cost=0.00..391.30 rows=1 width=1) + Recheck Cond: (j @> '{"age": 25}'::jsonb) + -> Bitmap Index Scan on jidx (cost=0.00..0.00 rows=0 width=0) + Index Cond: (j @> '{"age": 25}'::jsonb) + Optimizer: GPORCA +(7 rows) EXPLAIN SELECT count(*) FROM testjsonb WHERE j @> '{"age":25.0}'; - QUERY PLAN ------------------------------------------------------------------------------------------ - Finalize Aggregate (cost=0.00..33665.68 rows=1 width=8) - -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..33665.68 rows=1 width=8) - -> Partial Aggregate (cost=0.00..33665.68 rows=1 width=8) - -> Bitmap Heap Scan on testjsonb (cost=0.00..33665.68 rows=135 width=1) - Recheck Cond: (j @> '{"age": 25.0}'::jsonb) - -> Bitmap Index Scan on jidx (cost=0.00..0.00 rows=0 width=0) - Index Cond: (j @> '{"age": 25.0}'::jsonb) - Optimizer: Pivotal Optimizer (GPORCA) version 3.58.1 -(8 rows) + QUERY PLAN +------------------------------------------------------------------------------------ + Aggregate (cost=0.00..391.30 rows=1 width=8) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..391.30 rows=1 width=1) + -> Bitmap Heap Scan on testjsonb (cost=0.00..391.30 rows=1 width=1) + Recheck Cond: (j @> '{"age": 25.0}'::jsonb) + -> Bitmap Index Scan on jidx (cost=0.00..0.00 rows=0 width=0) + Index Cond: (j @> '{"age": 25.0}'::jsonb) + Optimizer: GPORCA +(7 rows) SELECT count(*) FROM testjsonb WHERE j @> '{"wait":null}'; count @@ -396,17 +379,16 @@ SELECT count(*) FROM testjsonb WHERE j @> '{"age":25.0}'; -- exercise GIN_SEARCH_MODE_ALL EXPLAIN SELECT count(*) FROM testjsonb WHERE j @> '{}'; - QUERY PLAN ------------------------------------------------------------------------------------------ - Finalize Aggregate (cost=0.00..33665.68 rows=1 width=8) - -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..33665.68 rows=1 width=8) - -> Partial Aggregate (cost=0.00..33665.68 rows=1 width=8) - -> Bitmap Heap Scan on testjsonb (cost=0.00..33665.68 rows=135 width=1) - Recheck Cond: (j @> '{}'::jsonb) - -> Bitmap Index Scan on jidx (cost=0.00..0.00 rows=0 width=0) - Index Cond: (j @> '{}'::jsonb) - Optimizer: Pivotal Optimizer (GPORCA) version 3.58.1 -(8 rows) + QUERY PLAN +------------------------------------------------------------------------------------ + Aggregate (cost=0.00..391.30 rows=1 width=8) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..391.30 rows=1 width=1) + -> Bitmap Heap Scan on testjsonb (cost=0.00..391.30 rows=1 width=1) + Recheck Cond: (j @> '{}'::jsonb) + -> Bitmap Index Scan on jidx (cost=0.00..0.00 rows=0 width=0) + Index Cond: (j @> '{}'::jsonb) + Optimizer: GPORCA +(7 rows) SELECT count(*) FROM testjsonb WHERE j @> '{}'; count @@ -624,50 +606,50 @@ EXPLAIN SELECT count(*) FROM test_tsvector WHERE a @@ '!no_such_lexeme'; SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh'; count ------- - 158 + 316 (1 row) SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh'; count ------- - 17 + 34 (1 row) SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt'; count ------- - 6 + 12 (1 row) SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt'; count ------- - 98 + 196 (1 row) SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)'; count ------- - 23 + 46 (1 row) SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)'; count ------- - 39 + 78 (1 row) SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*'; count ------- - 494 + 988 (1 row) -- For orca, ScalarArrayOpExpr condition on index scan not supported SELECT count(*) FROM test_tsvector WHERE a @@ any ('{wr,qh}'); count ------- - 158 + 316 (1 row) SELECT count(*) FROM test_tsvector WHERE a @@ 'no_such_lexeme'; @@ -679,7 +661,7 @@ SELECT count(*) FROM test_tsvector WHERE a @@ 'no_such_lexeme'; SELECT count(*) FROM test_tsvector WHERE a @@ '!no_such_lexeme'; count ------- - 508 + 1018 (1 row) DROP INDEX wowidx; diff --git a/contrib/pax_storage/src/test/regress/expected/gpctas.out b/contrib/pax_storage/src/test/regress/expected/gpctas.out index cb0c2536bfe..374be787305 100644 --- a/contrib/pax_storage/src/test/regress/expected/gpctas.out +++ b/contrib/pax_storage/src/test/regress/expected/gpctas.out @@ -13,26 +13,25 @@ explain (costs off) create table ctas_dst as SELECT attr, class, (select count(distinct class) from ctas_src) as dclass FROM ctas_src GROUP BY attr, class distributed by (attr); QUERY PLAN -------------------------------------------------------------------------------- - HashAggregate - Group Key: ctas_src.attr, ctas_src.class - InitPlan 1 (returns $0) (slice2) - -> Finalize Aggregate - -> Gather Motion 3:1 (slice3; segments: 3) - -> Partial Aggregate - -> HashAggregate - Group Key: ctas_src_1.class - -> Redistribute Motion 3:3 (slice4; segments: 3) - Hash Key: ctas_src_1.class - -> Streaming HashAggregate - Group Key: ctas_src_1.class - -> Seq Scan on ctas_src ctas_src_1 + Result -> Redistribute Motion 3:3 (slice1; segments: 3) - Hash Key: ctas_src.attr - -> HashAggregate - Group Key: ctas_src.attr, ctas_src.class - -> Seq Scan on ctas_src - Optimizer: Postgres query optimizer -(19 rows) + Hash Key: ctas_src_1.attr + -> Nested Loop Left Join + Join Filter: true + -> GroupAggregate + Group Key: ctas_src_1.attr, ctas_src_1.class + -> Sort + Sort Key: ctas_src_1.attr, ctas_src_1.class + -> Redistribute Motion 3:3 (slice4; segments: 3) + Hash Key: ctas_src_1.attr, ctas_src_1.class + -> Seq Scan on ctas_src ctas_src_1 + -> Materialize + -> Broadcast Motion 1:3 (slice2; segments: 1) + -> Aggregate + -> Gather Motion 3:1 (slice3; segments: 3) + -> Seq Scan on ctas_src + Optimizer: GPORCA +(18 rows) create table ctas_dst as SELECT attr, class, (select count(distinct class) from ctas_src) as dclass FROM ctas_src GROUP BY attr, class distributed by (attr); @@ -48,20 +47,25 @@ explain (costs off) create table ctas_dst as SELECT attr, class, (select max(class) from ctas_src) as maxclass FROM ctas_src GROUP BY attr, class distributed by (attr); QUERY PLAN ----------------------------------------------------------- - HashAggregate - Group Key: ctas_src.attr, ctas_src.class - InitPlan 1 (returns $0) (slice2) - -> Finalize Aggregate - -> Gather Motion 3:1 (slice3; segments: 3) - -> Partial Aggregate - -> Seq Scan on ctas_src ctas_src_1 + Result -> Redistribute Motion 3:3 (slice1; segments: 3) - Hash Key: ctas_src.attr - -> HashAggregate - Group Key: ctas_src.attr, ctas_src.class - -> Seq Scan on ctas_src - Optimizer: Postgres query optimizer -(13 rows) + Hash Key: ctas_src_1.attr + -> Nested Loop Left Join + Join Filter: true + -> GroupAggregate + Group Key: ctas_src_1.attr, ctas_src_1.class + -> Sort + Sort Key: ctas_src_1.attr, ctas_src_1.class + -> Redistribute Motion 3:3 (slice4; segments: 3) + Hash Key: ctas_src_1.attr, ctas_src_1.class + -> Seq Scan on ctas_src ctas_src_1 + -> Materialize + -> Broadcast Motion 1:3 (slice2; segments: 1) + -> Aggregate + -> Gather Motion 3:1 (slice3; segments: 3) + -> Seq Scan on ctas_src + Optimizer: GPORCA +(18 rows) create table ctas_dst as SELECT attr, class, (select max(class) from ctas_src) as maxclass FROM ctas_src GROUP BY attr, class distributed by (attr); @@ -77,36 +81,39 @@ explain (costs off) create table ctas_dst as SELECT attr, class, (select count(distinct class) from ctas_src) as dclass, (select max(class) from ctas_src) as maxclass, (select min(class) from ctas_src) as minclass FROM ctas_src GROUP BY attr, class distributed by (attr); QUERY PLAN -------------------------------------------------------------------------------- - HashAggregate - Group Key: ctas_src.attr, ctas_src.class - InitPlan 1 (returns $0) (slice2) - -> Finalize Aggregate - -> Gather Motion 3:1 (slice3; segments: 3) - -> Partial Aggregate - -> HashAggregate - Group Key: ctas_src_1.class - -> Redistribute Motion 3:3 (slice4; segments: 3) - Hash Key: ctas_src_1.class - -> Streaming HashAggregate - Group Key: ctas_src_1.class - -> Seq Scan on ctas_src ctas_src_1 - InitPlan 2 (returns $1) (slice5) - -> Finalize Aggregate - -> Gather Motion 3:1 (slice6; segments: 3) - -> Partial Aggregate - -> Seq Scan on ctas_src ctas_src_2 - InitPlan 3 (returns $2) (slice7) - -> Finalize Aggregate - -> Gather Motion 3:1 (slice8; segments: 3) - -> Partial Aggregate - -> Seq Scan on ctas_src ctas_src_3 + Result -> Redistribute Motion 3:3 (slice1; segments: 3) - Hash Key: ctas_src.attr - -> HashAggregate - Group Key: ctas_src.attr, ctas_src.class - -> Seq Scan on ctas_src - Optimizer: Postgres query optimizer -(29 rows) + Hash Key: ctas_src_3.attr + -> Nested Loop Left Join + Join Filter: true + -> Nested Loop Left Join + Join Filter: true + -> Nested Loop Left Join + Join Filter: true + -> GroupAggregate + Group Key: ctas_src_3.attr, ctas_src_3.class + -> Sort + Sort Key: ctas_src_3.attr, ctas_src_3.class + -> Redistribute Motion 3:3 (slice8; segments: 3) + Hash Key: ctas_src_3.attr, ctas_src_3.class + -> Seq Scan on ctas_src ctas_src_3 + -> Materialize + -> Broadcast Motion 1:3 (slice6; segments: 1) + -> Aggregate + -> Gather Motion 3:1 (slice7; segments: 3) + -> Seq Scan on ctas_src ctas_src_2 + -> Materialize + -> Broadcast Motion 1:3 (slice4; segments: 1) + -> Aggregate + -> Gather Motion 3:1 (slice5; segments: 3) + -> Seq Scan on ctas_src ctas_src_1 + -> Materialize + -> Broadcast Motion 1:3 (slice2; segments: 1) + -> Aggregate + -> Gather Motion 3:1 (slice3; segments: 3) + -> Seq Scan on ctas_src + Optimizer: GPORCA +(32 rows) create table ctas_dst as SELECT attr, class, (select count(distinct class) from ctas_src) as dclass, (select max(class) from ctas_src) as maxclass, (select min(class) from ctas_src) as minclass FROM ctas_src GROUP BY attr, class distributed by (attr); @@ -131,6 +138,24 @@ NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entr create table ctas_baz as select 'delete me' as action, * from ctas_bar distributed by (a); -- "action" becomes text \d ctas_baz +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-default collation +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Queries on master-only tables +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-default collation +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-default collation +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-default collation +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Queries on master-only tables +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-default collation +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Queries on master-only tables +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-default collation Table "public.ctas_baz" Column | Type | Collation | Nullable | Default --------+---------+-----------+----------+--------- @@ -171,6 +196,24 @@ select action::text, b from ctas_baz order by 1,2 limit 5; alter table ctas_baz alter column action type text; \d ctas_baz +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-default collation +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Queries on master-only tables +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-default collation +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-default collation +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-default collation +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Queries on master-only tables +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-default collation +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Queries on master-only tables +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-default collation Table "public.ctas_baz" Column | Type | Collation | Nullable | Default --------+---------+-----------+----------+--------- @@ -225,6 +268,10 @@ BEGIN END; $$ LANGUAGE plpgsql; create table ctas_output as select ctas_inputArray()::int[] as x; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'x' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. -- Test CTAS with VALUES. @@ -357,9 +404,13 @@ reset optimizer; create or replace function mv_action_select_issue_11999() returns bool language sql as 'declare c cursor for select 1/0; select true'; create materialized view sro_mv_issue_11999 as select mv_action_select_issue_11999() with no data; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'mv_action_select_issue_11999' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table t_sro_mv_issue_11999 as select mv_action_select_issue_11999() with no data; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'mv_action_select_issue_11999' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. select count(*) @@ -375,6 +426,8 @@ from where localoid::regclass::text = 'sro_mv_issue_11999' or localoid::regclass::text = 't_sro_mv_issue_11999' )x; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Queries on master-only tables count ------- 8 @@ -393,6 +446,8 @@ from where localoid::regclass::text = 'sro_mv_issue_11999' or localoid::regclass::text = 't_sro_mv_issue_11999' )x; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: ROW EXPRESSION count ------- 2 @@ -400,6 +455,8 @@ from -- then refresh should error out refresh materialized view sro_mv_issue_11999; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Refresh matview is not supported with GPORCA ERROR: division by zero CONTEXT: SQL function "mv_action_select_issue_11999" statement 1 -- Test CTAS + initplan, and an exception was raised in preprocess_initplans @@ -408,6 +465,8 @@ CREATE OR REPLACE FUNCTION public.exception_func() LANGUAGE plpgsql AS $function$declare cname refcursor = 'result'; begin open cname for select 1; raise sqlstate '02000'; return cname; exception when sqlstate '02000' then return cname; end;$function$; SELECT exception_func() INTO TEMPORARY test_tmp1; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named '' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. SELECT * FROM test_tmp1; @@ -417,6 +476,8 @@ SELECT * FROM test_tmp1; (1 row) CREATE TEMPORARY TABLE test_tmp2 AS SELECT exception_func(); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named '' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. SELECT * FROM test_tmp2; diff --git a/contrib/pax_storage/src/test/regress/expected/gpdist_legacy_opclasses_optimizer.out b/contrib/pax_storage/src/test/regress/expected/gpdist_legacy_opclasses_optimizer.out index e02b9a3f00e..9d4092eb128 100644 --- a/contrib/pax_storage/src/test/regress/expected/gpdist_legacy_opclasses_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/gpdist_legacy_opclasses_optimizer.out @@ -295,9 +295,6 @@ explain (costs off) select * from modern_int a inner join legacy_domain_over_int Optimizer: Postgres query optimizer (9 rows) --- Distributing by enum has been banned, so this test is updated to instead distribute by a dummy int column --- Banned because in backup/restore scenarios the data will be in the "wrong" segment as oids for each enum --- entry are re-generated and hashing them will result in arbitrary segment assignment. create type colors as enum ('red', 'green', 'blue'); create table legacy_enum(col1 int, color colors) distributed by(col1); insert into legacy_enum values (1, 'red'), (2, 'green'), (3, 'blue'); diff --git a/contrib/pax_storage/src/test/regress/expected/gpdist_opclasses_optimizer.out b/contrib/pax_storage/src/test/regress/expected/gpdist_opclasses_optimizer.out index b378c4c37af..57651ff6461 100644 --- a/contrib/pax_storage/src/test/regress/expected/gpdist_opclasses_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/gpdist_opclasses_optimizer.out @@ -229,32 +229,32 @@ ALTER TABLE abs_opclass_test SET DISTRIBUTED BY (i abs_int_hash_ops, j abs_int_h -- We can't use that exact example here, without the 'btree_gist' extension -- that would provide the = gist opclass for basic types. So we use a more -- contrived example using IP addresses rather than rooms. --- start_ignore CREATE TABLE ip_reservations (ip_addr inet, reserved tsrange) DISTRIBUTED BY (ip_addr); -- these are not allowed ALTER TABLE ip_reservations ADD EXCLUDE USING gist (reserved WITH &&); -ERROR: exclusion constraint is not compatible with the table's distribution policy DETAIL: Distribution key column "ip_addr" is not included in the constraint. +ERROR: exclusion constraint is not compatible with the table's distribution policy HINT: Add "ip_addr" to the constraint with the =(inet,inet) operator. ALTER TABLE ip_reservations ADD EXCLUDE USING gist (ip_addr inet_ops WITH &&); -ERROR: exclusion constraint is not compatible with the table's distribution policy DETAIL: Distribution key column "ip_addr" is not included in the constraint. +ERROR: exclusion constraint is not compatible with the table's distribution policy HINT: Add "ip_addr" to the constraint with the =(inet,inet) operator. -- but this is. ALTER TABLE ip_reservations ADD EXCLUDE USING gist (ip_addr inet_ops WITH =, reserved WITH &&); +ERROR: pax only support btree/hash/gin/bitmap indexes (pax_access_handle.cc:###) -- new distribution is incompatible with the constraint. ALTER TABLE ip_reservations SET DISTRIBUTED BY (reserved); -ERROR: distribution policy is not compatible with exclusion constraint "ip_reservations_ip_addr_reserved_excl" -DETAIL: Distribution key column "reserved" is not included in the constraint. -HINT: Add "reserved" to the constraint with the =(anyrange,anyrange) operator. -- After dropping the constraint, it's allowed. ALTER TABLE ip_reservations DROP CONSTRAINT ip_reservations_ip_addr_reserved_excl; +ERROR: constraint "ip_reservations_ip_addr_reserved_excl" of relation "ip_reservations" does not exist ALTER TABLE ip_reservations SET DISTRIBUTED BY (reserved); +HINT: Use ALTER TABLE "ip_reservations" SET WITH (REORGANIZE=TRUE) DISTRIBUTED BY (reserved) to force redistribution +WARNING: distribution policy of relation "ip_reservations" already set to (reserved) -- Test creating exclusion constraint on tsrange column. (The subtle -- difference is there is no direct =(tsrange, tsrange) operator, we rely on -- the implicit casts for it) ALTER TABLE ip_reservations ADD EXCLUDE USING gist (reserved WITH =); --- end_ignore +ERROR: pax only support btree/hash/gin/bitmap indexes (pax_access_handle.cc:###) -- -- Test scenario, where a type has a hash operator class, but not a default -- one. diff --git a/contrib/pax_storage/src/test/regress/expected/gporca_optimizer.out b/contrib/pax_storage/src/test/regress/expected/gporca_optimizer.out index d638a97071b..54031e2582d 100644 --- a/contrib/pax_storage/src/test/regress/expected/gporca_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/gporca_optimizer.out @@ -40,8 +40,11 @@ alter table orca.s add column d int; insert into orca.s select i, i/2 from generate_series(1,30) i; set optimizer_log=on; set optimizer_enable_indexjoin=on; +set optimizer_trace_fallback = on; -- expected fall back to the planner select sum(distinct a), count(distinct b) from orca.r; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer sum | count -----+------- 210 | 7 @@ -6814,6 +6817,8 @@ select * from orca.r where a in (select count(*)+1 as v from orca.foo full join (1 row) select * from orca.r where r.a in (select d+r.b+1 as v from orca.foo full join orca.bar on (foo.d = bar.a) group by d+r.b) order by r.a, r.b; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Query-to-DXL Translation: No attribute entry found due to incorrect normalization of query a | b ----+--- 3 | 0 @@ -8203,7 +8208,11 @@ partition bb start(100) end(200) every (50) ); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into orca.multilevel_p values (1,1), (100,200); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multi-level partitioned tables select * from orca.multilevel_p; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multi-level partitioned tables a | b -----+----- 1 | 1 @@ -9090,12 +9099,16 @@ insert into mpp22453 values (1, '2012-01-01'), (2, '2012-01-02'), (3, '2012-12-3 create index mpp22453_idx on mpp22453(d); set optimizer_enable_tablescan = off; select * from mpp22453 where d > date '2012-01-31' + interval '1 day' ; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because no plan has been computed for required properties in GPORCA a | d ---+------------ 3 | 12-31-2012 (1 row) select * from mpp22453 where d > '2012-02-01'; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because no plan has been computed for required properties in GPORCA a | d ---+------------ 3 | 12-31-2012 @@ -9683,6 +9696,8 @@ create table orca.arrtest ( insert into orca.arrtest (a[1:5], b[1:1][1:2][1:2], c, d) values ('{1,2,3,4,5}', '{{{0,0},{1,2}}}', '{}', '{}'); select a[1:3], b[1][2][1], c[1], d[1][1] FROM orca.arrtest order by 1,2,3,4; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-default collation a | b | c | d ---------+---+---+--- {1,2,3} | 1 | | @@ -9995,9 +10010,14 @@ alter table orca.bm_dyn_test_onepart add partition part5 values(5); insert into orca.bm_dyn_test_onepart values(2, 5, '2'); set optimizer_enable_bitmapscan=on; set optimizer_enable_dynamictablescan = off; +-- start_ignore +analyze orca.bm_dyn_test_onepart; +-- end_ignore -- gather on 1 segment because of direct dispatch explain select * from orca.bm_dyn_test_onepart where i=2 and t='2'; - QUERY PLAN +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because no plan has been computed for required properties in GPORCA + QUERY PLAN ------------------------------------------------------------------------------------------------------------------ Gather Motion 1:1 (slice1; segments: 1) (cost=0.00..6.85 rows=22 width=10) -> Append (cost=0.00..6.55 rows=7 width=10) @@ -10017,6 +10037,8 @@ explain select * from orca.bm_dyn_test_onepart where i=2 and t='2'; (15 rows) select * from orca.bm_dyn_test_onepart where i=2 and t='2'; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because no plan has been computed for required properties in GPORCA i | j | t ---+---+--- 2 | 2 | 2 @@ -10046,10 +10068,14 @@ distributed by (id) partition by range (year) default subpartition other_regions ) ( start (2018) end (2020) every (1) ); insert into orca.bm_dyn_test_multilvl_part select i, 2018 + (i%2), i%2 + 1, 'usa' from generate_series(1,100)i; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multi-level partitioned tables create index bm_multi_test_idx_part on orca.bm_dyn_test_multilvl_part using bitmap(year); analyze orca.bm_dyn_test_multilvl_part; -- print name of parent index explain select * from orca.bm_dyn_test_multilvl_part where year = 2019; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multi-level partitioned tables QUERY PLAN -------------------------------------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..7.95 rows=53 width=18) @@ -10066,6 +10092,8 @@ explain select * from orca.bm_dyn_test_multilvl_part where year = 2019; (11 rows) select count(*) from orca.bm_dyn_test_multilvl_part where year = 2019; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multi-level partitioned tables count ------- 50 @@ -10481,6 +10509,8 @@ show optimizer; (1 row) update can_set_tag_target set y = y + 1; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 12 not found in project list select count(1) from can_set_tag_audit; count ------- @@ -10677,7 +10707,6 @@ create table foo(a int, b int) distributed by (a); -- and log_min_duration_statement, they are the most obvious ones. set log_statement='none'; set log_min_duration_statement=-1; -set pax_enable_debug to off; set client_min_messages='log'; explain select count(*) from foo group by cube(a,b); QUERY PLAN @@ -10715,7 +10744,6 @@ explain select count(*) from foo group by cube(a,b); (30 rows) reset client_min_messages; -reset pax_enable_debug; reset log_statement; reset log_min_duration_statement; -- TVF accepts ANYENUM, ANYELEMENT returns ANYENUM, ANYARRAY @@ -10831,12 +10859,12 @@ explain select * from foo where b in ('1', '2'); set optimizer_enable_ctas = off; set log_statement='none'; set log_min_duration_statement=-1; -set pax_enable_debug to off; set client_min_messages='log'; create table foo_ctas(a) as (select generate_series(1,10)) distributed by (a); -LOG: 2023-08-17 15:11:09:454388 PDT,THD000,NOTICE,"Falling back to Postgres-based planner because GPORCA does not support the following feature: CTAS. Set optimizer_enable_ctas to on to enable CTAS with GPORCA", +"Falling back to Postgres-based planner because GPORCA does not support the following feature: CTAS. Set optimizer_enable_ctas to on to enable CTAS with GPORCA", +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: CTAS. Set optimizer_enable_ctas to on to enable CTAS with GPORCA reset client_min_messages; -reset pax_enable_debug; reset log_min_duration_statement; reset log_statement; reset optimizer_enable_ctas; @@ -11620,7 +11648,9 @@ update gp_distribution_policy set numsegments = numsegments-1 where localoid = ' reset allow_system_table_mods; -- populate the tables on this smaller cluster explain insert into gpexp_hash select i, i from generate_series(1,50) i; - QUERY PLAN +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Unknown error: Partially Distributed Data + QUERY PLAN -------------------------------------------------------------------------------------------- Insert on gpexp_hash (cost=0.00..30.00 rows=500 width=8) -> Redistribute Motion 1:2 (slice1; segments: 1) (cost=0.00..30.00 rows=1000 width=8) @@ -11630,14 +11660,22 @@ explain insert into gpexp_hash select i, i from generate_series(1,50) i; (5 rows) insert into gpexp_hash select i, i from generate_series(1,50) i; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Unknown error: Partially Distributed Data insert into gpexp_rand select i, i from generate_series(1,50) i; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Unknown error: Partially Distributed Data insert into gpexp_repl select i, i from generate_series(1,50) i; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Unknown error: Partially Distributed Data analyze gpexp_hash; analyze gpexp_rand; analyze gpexp_repl; -- the segment ids in the unmodified table should have one extra number select max(noexp_hash.gp_segment_id) - max(gpexp_hash.gp_segment_id) as expect_one from noexp_hash, gpexp_hash; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Unknown error: Partially Distributed Data expect_one ------------ 1 @@ -11645,7 +11683,9 @@ from noexp_hash, gpexp_hash; -- join should have a redistribute motion for gpexp_hash explain select count(*) from noexp_hash n join gpexp_hash x on n.a=x.a; - QUERY PLAN +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Unknown error: Partially Distributed Data + QUERY PLAN ----------------------------------------------------------------------------------------------------------- Finalize Aggregate (cost=3.60..3.61 rows=1 width=8) -> Gather Motion 2:1 (slice1; segments: 2) (cost=3.55..3.59 rows=2 width=8) @@ -11661,20 +11701,30 @@ explain select count(*) from noexp_hash n join gpexp_hash x on n.a=x.a; (11 rows) select count(*) from noexp_hash n join gpexp_hash x on n.a=x.a; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Unknown error: Partially Distributed Data count ------- 50 (1 row) delete from gpexp_hash where b between 21 and 50; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Unknown error: Partially Distributed Data select count(*) from gpexp_hash; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Unknown error: Partially Distributed Data count ------- 20 (1 row) update gpexp_hash set b=-1 where b between 11 and 100; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Unknown error: Partially Distributed Data select b, count(*) from gpexp_hash group by b order by b; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Unknown error: Partially Distributed Data b | count ----+------- -1 | 10 @@ -11691,7 +11741,9 @@ select b, count(*) from gpexp_hash group by b order by b; (11 rows) explain update gpexp_rand set b=(select b from gpexp_hash where gpexp_rand.a = gpexp_hash.a); - QUERY PLAN +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Unknown error: Partially Distributed Data + QUERY PLAN ---------------------------------------------------------------------------------------------------------- Update on gpexp_rand (cost=0.00..216.00 rows=25 width=18) -> Seq Scan on gpexp_rand (cost=0.00..215.00 rows=25 width=18) @@ -11705,7 +11757,11 @@ explain update gpexp_rand set b=(select b from gpexp_hash where gpexp_rand.a = g (9 rows) update gpexp_rand set b=(select b from gpexp_hash where gpexp_rand.a = gpexp_hash.a); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Unknown error: Partially Distributed Data select b, count(*) from gpexp_rand group by b order by b; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Unknown error: Partially Distributed Data b | count ----+------- -1 | 10 @@ -11723,8 +11779,12 @@ select b, count(*) from gpexp_rand group by b order by b; (12 rows) delete from gpexp_repl where b >= 20; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Unknown error: Partially Distributed Data explain insert into gpexp_repl values (20, 20); - QUERY PLAN +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Unknown error: Partially Distributed Data + QUERY PLAN -------------------------------------------------------- Insert on gpexp_repl (cost=0.00..0.01 rows=1 width=8) -> Result (cost=0.00..0.01 rows=1 width=8) @@ -11732,8 +11792,12 @@ explain insert into gpexp_repl values (20, 20); (3 rows) insert into gpexp_repl values (20, 20); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Unknown error: Partially Distributed Data explain select count(*) from gpexp_hash h join gpexp_repl r on h.a=r.a; - QUERY PLAN +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Unknown error: Partially Distributed Data + QUERY PLAN ------------------------------------------------------------------------------------------- Finalize Aggregate (cost=3.61..3.62 rows=1 width=8) -> Gather Motion 2:1 (slice1; segments: 2) (cost=3.56..3.60 rows=2 width=8) @@ -11747,13 +11811,17 @@ explain select count(*) from gpexp_hash h join gpexp_repl r on h.a=r.a; (9 rows) select count(*) as expect_20 from gpexp_hash h join gpexp_repl r on h.a=r.a; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Unknown error: Partially Distributed Data expect_20 ----------- 20 (1 row) explain select count(*) as expect_20 from noexp_hash h join gpexp_repl r on h.a=r.a; - QUERY PLAN +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Unknown error: Partially Distributed Data + QUERY PLAN ----------------------------------------------------------------------------------------------------------- Finalize Aggregate (cost=3.87..3.88 rows=1 width=8) -> Gather Motion 3:1 (slice1; segments: 3) (cost=3.81..3.86 rows=3 width=8) @@ -11769,6 +11837,8 @@ explain select count(*) as expect_20 from noexp_hash h join gpexp_repl r on h.a= (11 rows) select count(*) as expect_20 from noexp_hash h join gpexp_repl r on h.a=r.a; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Unknown error: Partially Distributed Data expect_20 ----------- 20 @@ -11949,9 +12019,10 @@ select disable_xform('CXformFullOuterJoin2HashJoin'); CXformFullOuterJoin2HashJoin is disabled (1 row) --- fallback reason: Invalid system target list found for AO table EXPLAIN SELECT a, b FROM atab_old_hash FULL JOIN btab_old_hash ON a |=| b; - QUERY PLAN +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Invalid system target list found for AO table + QUERY PLAN ---------------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) (cost=1.06..2.31 rows=6 width=8) -> Hash Full Join (cost=1.06..2.23 rows=2 width=8) @@ -11967,6 +12038,8 @@ EXPLAIN SELECT a, b FROM atab_old_hash FULL JOIN btab_old_hash ON a |=| b; (11 rows) SELECT a, b FROM atab_old_hash FULL JOIN btab_old_hash ON a |=| b; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Invalid system target list found for AO table a | b ----+---- | 2 @@ -12446,6 +12519,8 @@ select * from tcorr1 out where out.b in (select coalesce(tcorr2.a, 99) from tcorr1 full outer join tcorr2 on tcorr1.a=tcorr2.a+out.a); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because no plan has been computed for required properties in GPORCA a | b ---+---- 1 | 99 @@ -12568,6 +12643,8 @@ select * from tcorr1 out where out.b in (select coalesce(tcorr2.a, 99) from tcorr1 full outer join tcorr2 on tcorr1.a=tcorr2.a+out.a); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because no plan has been computed for required properties in GPORCA a | b ---+---- 1 | 99 @@ -12676,7 +12753,7 @@ select * from foo join tbitmap on foo.a=tbitmap.a; Recheck Cond: (a = foo.a) -> Bitmap Index Scan on tbitmapxa Index Cond: (a = foo.a) - Optimizer: Pivotal Optimizer (GPORCA) + Optimizer: GPORCA (9 rows) select * from foo join tbitmap on foo.a=tbitmap.a; @@ -12723,7 +12800,6 @@ select * from foo join tbtree on foo.a=tbtree.a where tbtree.a < 5000; (5 rows) -- 4 bitmap with select pred --- start_ignore explain (costs off) select * from foo join tbitmap on foo.a=tbitmap.a where tbitmap.a < 5000; QUERY PLAN @@ -12737,18 +12813,17 @@ select * from foo join tbitmap on foo.a=tbitmap.a where tbitmap.a < 5000; Recheck Cond: ((a = foo.a) AND (a < 5000)) -> Bitmap Index Scan on tbitmapxa Index Cond: ((a = foo.a) AND (a < 5000)) - Optimizer: Pivotal Optimizer (GPORCA) + Optimizer: GPORCA (10 rows) --- end_ignore select * from foo join tbitmap on foo.a=tbitmap.a where tbitmap.a < 5000; a | b | c | a | b | c ------+------+------+------+------+------ - 3000 | 3000 | 3000 | 3000 | 3000 | 3000 - 4000 | 4000 | 4000 | 4000 | 4000 | 4000 1000 | 1000 | 1000 | 1000 | 1000 | 1000 - 2000 | 2000 | 2000 | 2000 | 2000 | 2000 2000 | 2000 | 2000 | 2000 | -1 | -1 + 2000 | 2000 | 2000 | 2000 | 2000 | 2000 + 3000 | 3000 | 3000 | 3000 | 3000 | 3000 + 4000 | 4000 | 4000 | 4000 | 4000 | 4000 (5 rows) -- 5 btree with project @@ -12910,7 +12985,6 @@ select * from foo join (select a, count(*) + 5 as cnt from tbtree where tbtree.a (3 rows) -- 10 bitmap with proj select grby select --- start_ignore explain (costs off) select * from foo join (select a, count(*) + 5 as cnt from tbitmap where tbitmap.a < 5000 group by a having count(*) < 2) proj_sel_grby_sel on foo.a=proj_sel_grby_sel.a; QUERY PLAN @@ -12928,10 +13002,9 @@ select * from foo join (select a, count(*) + 5 as cnt from tbitmap where tbitmap Recheck Cond: ((a = foo.a) AND (a < 5000)) -> Bitmap Index Scan on tbitmapxa Index Cond: ((a = foo.a) AND (a < 5000)) - Optimizer: Pivotal Optimizer (GPORCA) + Optimizer: GPORCA (14 rows) --- end_ignore select * from foo join (select a, count(*) + 5 as cnt from tbitmap where tbitmap.a < 5000 group by a having count(*) < 2) proj_sel_grby_sel on foo.a=proj_sel_grby_sel.a; a | b | c | a | cnt ------+------+------+------+----- @@ -14336,6 +14409,8 @@ INNER JOIN ( WHERE t.tradingday BETWEEN'20190715'AND '20190715' GROUP BY t.tradingday)t2 ON t1.tradingday = t2.tradingday; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because no plan has been computed for required properties in GPORCA tradingday | ins_spaninsarbitrageratio | tradingday | prod_spaninsarbitrageratio ------------+---------------------------+------------+---------------------------- 20190715 | 1 | 20190715 | 0.9233716475 @@ -14399,32 +14474,33 @@ SELECT ( -- heavy datasets. Sort node should be on it's place for both, Postgres -- optimizer and ORCA. create table window_agg_test(i int, j int) distributed randomly; --- fallback reason: Attribute number 21 not found in project list -explain (costs off) +explain update window_agg_test t set i = tt.i from (select (min(i) over (order by j)) as i, j from window_agg_test) tt where t.j = tt.j; - QUERY PLAN ------------------------------------------------------------------------------- - Update on window_agg_test t - -> Explicit Redistribute Motion 3:3 (slice1; segments: 3) - -> Hash Join +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 21 not found in project list + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------- + Update on window_agg_test t (cost=3699.81..185385.16 rows=0 width=0) + -> Explicit Redistribute Motion 3:3 (slice1; segments: 3) (cost=3699.81..185385.16 rows=2471070 width=78) + -> Hash Join (cost=3699.81..135963.76 rows=2471070 width=78) Hash Cond: (tt.j = t.j) - -> Redistribute Motion 1:3 (slice2; segments: 1) + -> Redistribute Motion 1:3 (slice2; segments: 1) (cost=2446.06..6966.31 rows=28700 width=40) Hash Key: tt.j - -> Subquery Scan on tt - -> WindowAgg + -> Subquery Scan on tt (cost=2446.06..5818.31 rows=86100 width=40) + -> WindowAgg (cost=2446.06..4957.31 rows=86100 width=8) Order By: window_agg_test.j - -> Gather Motion 3:1 (slice3; segments: 3) + -> Gather Motion 3:1 (slice3; segments: 3) (cost=2446.06..3665.81 rows=86100 width=8) Merge Key: window_agg_test.j - -> Sort + -> Sort (cost=2446.06..2517.81 rows=28700 width=8) Sort Key: window_agg_test.j - -> Seq Scan on window_agg_test - -> Hash - -> Redistribute Motion 3:3 (slice4; segments: 3) + -> Seq Scan on window_agg_test (cost=0.00..321.00 rows=28700 width=8) + -> Hash (cost=895.00..895.00 rows=28700 width=46) + -> Redistribute Motion 3:3 (slice4; segments: 3) (cost=0.00..895.00 rows=28700 width=46) Hash Key: t.j - -> Seq Scan on window_agg_test t + -> Seq Scan on window_agg_test t (cost=0.00..321.00 rows=28700 width=46) Optimizer: Postgres query optimizer (19 rows) @@ -14740,6 +14816,8 @@ with cte as ( select * from empty_cte_tl_test where id in(select id from cte); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Empty target list id ---- (0 rows) diff --git a/contrib/pax_storage/src/test/regress/expected/groupingsets_optimizer.out b/contrib/pax_storage/src/test/regress/expected/groupingsets_optimizer.out index 9923ea610d2..c88937e58cc 100644 --- a/contrib/pax_storage/src/test/regress/expected/groupingsets_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/groupingsets_optimizer.out @@ -88,6 +88,15 @@ values (1,1,b'0000','1'), (2,2,b'0001','1'), (3,4,b'0010','2'), (4,8,b'0011','2'), (5,16,b'0000','2'), (6,32,b'0001','2'), (7,64,b'0010','1'), (8,128,b'0011','1'); +create temp table gstest5(id integer, v integer, + unsortable_col1 xid, unsortable_col2 xid); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +insert into gstest5 +values (1,1,'3','1'), (2,2,'3','1'), + (3,4,'4','2'), (4,8,'4','2'), + (5,16,'4','2'), (6,32,'4','2'), + (7,64,'3','1'), (8,128,'3','1'); create temp table gstest_empty (a integer, b integer, v integer); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. @@ -513,7 +522,7 @@ select * from ( group by grouping sets(1, 2) ) ss where x = 1 and q1 = 123; - QUERY PLAN + QUERY PLAN ------------------------------------------------------ Result Output: NULL::integer, NULL::bigint, NULL::numeric @@ -540,17 +549,18 @@ cross join lateral (select (select i1.q1) as x) ss group by ss.x; QUERY PLAN ------------------------------------------------------------------ - Gather Motion 3:1 (slice1; segments: 3) + Finalize GroupAggregate Output: (GROUPING((SubPlan 1))), ((SubPlan 2)) - -> GroupAggregate - Output: GROUPING((SubPlan 1)), ((SubPlan 2)) - Group Key: ((SubPlan 2)) - -> Sort - Output: ((SubPlan 2)), i1.q1 - Sort Key: ((SubPlan 2)) - -> Redistribute Motion 3:3 (slice2; segments: 3) + Group Key: ((SubPlan 2)) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: ((SubPlan 2)), (GROUPING((SubPlan 1))) + Merge Key: ((SubPlan 2)) + -> Partial GroupAggregate + Output: ((SubPlan 2)), GROUPING((SubPlan 1)) + Group Key: ((SubPlan 2)) + -> Sort Output: ((SubPlan 2)), i1.q1 - Hash Key: ((SubPlan 2)) + Sort Key: ((SubPlan 2)) -> Seq Scan on public.int8_tbl i1 Output: (SubPlan 2), i1.q1 SubPlan 2 @@ -558,7 +568,7 @@ group by ss.x; Output: i1.q1 Settings: enable_hashagg = 'off', optimizer = 'on' Optimizer: Postgres query optimizer -(18 rows) +(19 rows) select grouping(ss.x) from int8_tbl i1 @@ -577,28 +587,29 @@ cross join lateral (select (select i1.q1) as x) ss group by ss.x; QUERY PLAN ------------------------------------------------------------------ - Gather Motion 3:1 (slice1; segments: 3) - Output: ((SubPlan 2)), ((SubPlan 3)) - -> GroupAggregate - Output: (SubPlan 2), ((SubPlan 3)) - Group Key: ((SubPlan 3)) - -> Sort - Output: ((SubPlan 3)), i1.q1 - Sort Key: ((SubPlan 3)) - -> Redistribute Motion 3:3 (slice2; segments: 3) + Finalize GroupAggregate + Output: (SubPlan 2), ((SubPlan 3)) + Group Key: ((SubPlan 3)) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: ((SubPlan 3)), (GROUPING((SubPlan 1))) + Merge Key: ((SubPlan 3)) + -> Partial GroupAggregate + Output: ((SubPlan 3)), GROUPING((SubPlan 1)) + Group Key: ((SubPlan 3)) + -> Sort Output: ((SubPlan 3)), i1.q1 - Hash Key: ((SubPlan 3)) + Sort Key: ((SubPlan 3)) -> Seq Scan on public.int8_tbl i1 Output: (SubPlan 3), i1.q1 SubPlan 3 -> Result Output: i1.q1 - SubPlan 2 - -> Result - Output: GROUPING((SubPlan 1)) + SubPlan 2 + -> Result + Output: (GROUPING((SubPlan 1))) Settings: enable_hashagg = 'off', optimizer = 'on' Optimizer: Postgres query optimizer -(21 rows) +(22 rows) select (select grouping(ss.x)) from int8_tbl i1 @@ -653,16 +664,16 @@ CREATE VIEW gstest_view AS select a, b, grouping(a,b), sum(c), count(*), max(c) from gstest2 group by rollup ((a,b,c),(c,d)); NOTICE: view "gstest_view" will be a temporary view select pg_get_viewdef('gstest_view'::regclass, true); - pg_get_viewdef -------------------------------------------------------------------------------- - SELECT gstest2.a, + - gstest2.b, + - GROUPING(gstest2.a, gstest2.b) AS "grouping", + - sum(gstest2.c) AS sum, + - count(*) AS count, + - max(gstest2.c) AS max + - FROM gstest2 + - GROUP BY ROLLUP((gstest2.a, gstest2.b, gstest2.c), (gstest2.c, gstest2.d)); + pg_get_viewdef +--------------------------------------- + SELECT a, + + b, + + GROUPING(a, b) AS "grouping", + + sum(c) AS sum, + + count(*) AS count, + + max(c) AS max + + FROM gstest2 + + GROUP BY ROLLUP((a, b, c), (c, d)); (1 row) -- Nested queries with 3 or more levels of nesting @@ -1047,8 +1058,6 @@ group by rollup(ten); (11 rows) -- More rescan tests --- start_ignore --- GPDB_95_MERGE_FIXME: the lateral query with grouping sets do not make right plans select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, count(*) from onek group by cube(four,ten)) s on true order by v.a,four,ten; a | a | four | ten | count ---+---+------+-----+------- @@ -1124,7 +1133,6 @@ select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, co 2 | 2 | | | 1000 (70 rows) --- end_ignore select array(select row(v.a,s1.*) from (select two,four, count(*) from onek group by cube(two,four) order by two,four) s1) from (values (1),(2)) v(a); array ------------------------------------------------------------------------------------------------------------------------------------------------------ @@ -1344,6 +1352,80 @@ explain (costs off) Optimizer: Postgres query optimizer (13 rows) +select unsortable_col1, unsortable_col2, + grouping(unsortable_col1, unsortable_col2), + count(*), sum(v) + from gstest5 group by grouping sets ((unsortable_col1),(unsortable_col2)) + order by 3,5; + unsortable_col1 | unsortable_col2 | grouping | count | sum +-----------------+-----------------+----------+-------+----- + | 1 | 2 | 4 | 195 + | 2 | 2 | 4 | 60 + 3 | | 1 | 4 | 195 + 4 | | 1 | 4 | 60 +(4 rows) + +explain (costs off) + select unsortable_col1, unsortable_col2, + grouping(unsortable_col1, unsortable_col2), + count(*), sum(v) + from gstest5 group by grouping sets ((unsortable_col1),(unsortable_col2)) + order by 3,5; + QUERY PLAN +------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: (GROUPING(unsortable_col1, unsortable_col2)), (sum(v)) + -> Sort + Sort Key: (GROUPING(unsortable_col1, unsortable_col2)), (sum(v)) + -> Finalize HashAggregate + Group Key: unsortable_col1, unsortable_col2, (GROUPINGSET_ID()) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: (GROUPINGSET_ID()) + -> Partial HashAggregate + Hash Key: unsortable_col1 + Hash Key: unsortable_col2 + -> Seq Scan on gstest5 + Optimizer: Postgres query optimizer +(13 rows) + +select unsortable_col1, unsortable_col2, + grouping(unsortable_col1, unsortable_col2), + count(*), sum(v) + from gstest5 group by grouping sets ((unsortable_col1),(unsortable_col2),()) + order by 3,5; + unsortable_col1 | unsortable_col2 | grouping | count | sum +-----------------+-----------------+----------+-------+----- + | | 3 | 8 | 255 + | 1 | 2 | 4 | 195 + | 2 | 2 | 4 | 60 + 3 | | 1 | 4 | 195 + 4 | | 1 | 4 | 60 +(5 rows) + +explain (costs off) + select unsortable_col1, unsortable_col2, + grouping(unsortable_col1, unsortable_col2), + count(*), sum(v) + from gstest5 group by grouping sets ((unsortable_col1),(unsortable_col2),()) + order by 3,5; + QUERY PLAN +------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: (GROUPING(unsortable_col1, unsortable_col2)), (sum(v)) + -> Sort + Sort Key: (GROUPING(unsortable_col1, unsortable_col2)), (sum(v)) + -> Finalize HashAggregate + Group Key: unsortable_col1, unsortable_col2, (GROUPINGSET_ID()) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: (GROUPINGSET_ID()) + -> Partial MixedAggregate + Hash Key: unsortable_col1 + Hash Key: unsortable_col2 + Group Key: () + -> Seq Scan on gstest5 + Optimizer: Postgres query optimizer +(14 rows) + -- empty input: first is 0 rows, second 1, third 3 etc. select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a); a | b | sum | count @@ -1719,8 +1801,6 @@ SELECT a, b, count(*), max(a), max(b) FROM gstest3 GROUP BY GROUPING SETS(a, b,( COMMIT; -- More rescan tests --- start_ignore --- GPDB_95_MERGE_FIXME: the lateral query with grouping sets do not make right plans select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, count(*) from onek group by cube(four,ten)) s on true order by v.a,four,ten; a | a | four | ten | count ---+---+------+-----+------- @@ -1796,7 +1876,6 @@ select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, co 2 | 2 | | | 1000 (70 rows) --- end_ignore select array(select row(v.a,s1.*) from (select two,four, count(*) from onek group by cube(two,four) order by two,four) s1) from (values (1),(2)) v(a); array ------------------------------------------------------------------------------------------------------------------------------------------------------ @@ -1848,6 +1927,7 @@ select array(select row(v.a,s1.*) from (select two,four, count(*) from onek grou -- test the knapsack set enable_indexscan = false; +set hash_mem_multiplier = 1.0; set work_mem = '64kB'; explain (costs off) select unique1, @@ -2339,6 +2419,7 @@ group by cube (g1000,g100,g10) distributed by (g1000); set jit_above_cost to default; set enable_sort = true; set work_mem to default; +set hash_mem_multiplier to default; -- Compare results of ORCA plan that relies on "IS NOT DISTINCT FROM" HASH Join (select * from gs_hash_1 except select * from gs_group_1) union all @@ -2489,7 +2570,6 @@ select (select grouping(v1)) from (values ((select 1))) v(v1) group by v1; QUERY PLAN --------------------------- GroupAggregate - Group Key: $2 InitPlan 1 (returns $1) -> Result InitPlan 3 (returns $2) @@ -2497,7 +2577,7 @@ select (select grouping(v1)) from (values ((select 1))) v(v1) group by v1; -> Result SubPlan 2 -> Result -(9 rows) +(8 rows) select (select grouping(v1)) from (values ((select 1))) v(v1) group by v1; grouping diff --git a/contrib/pax_storage/src/test/regress/expected/hash_index.out b/contrib/pax_storage/src/test/regress/expected/hash_index.out index c6d731598d6..5d26bf6dfa8 100644 --- a/contrib/pax_storage/src/test/regress/expected/hash_index.out +++ b/contrib/pax_storage/src/test/regress/expected/hash_index.out @@ -156,6 +156,8 @@ SELECT * FROM hash_f8_heap UPDATE hash_i4_heap SET random = 1 WHERE hash_i4_heap.seqno = 1492; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 11 not found in project list SELECT h.seqno AS i1492, h.random AS i1 FROM hash_i4_heap h WHERE h.random = 1; @@ -178,6 +180,8 @@ SELECT h.seqno AS i20000 UPDATE hash_name_heap SET random = '0123456789abcdef'::name WHERE hash_name_heap.seqno = 6543; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 11 not found in project list SELECT h.seqno AS i6543, h.random AS c0_to_f FROM hash_name_heap h WHERE h.random = '0123456789abcdef'::name; @@ -202,6 +206,8 @@ SELECT h.seqno AS emptyset UPDATE hash_txt_heap SET random = '0123456789abcdefghijklmnop'::text WHERE hash_txt_heap.seqno = 4002; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 11 not found in project list SELECT h.seqno AS i4002, h.random AS c0_to_p FROM hash_txt_heap h WHERE h.random = '0123456789abcdefghijklmnop'::text; @@ -224,6 +230,8 @@ SELECT h.seqno AS t20000 UPDATE hash_f8_heap SET random = '-1234.1234'::float8 WHERE hash_f8_heap.seqno = 8906; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 11 not found in project list SELECT h.seqno AS i8096, h.random AS f1234_1234 FROM hash_f8_heap h WHERE h.random = '-1234.1234'::float8; diff --git a/contrib/pax_storage/src/test/regress/expected/horology.out b/contrib/pax_storage/src/test/regress/expected/horology.out index f1d66186ee9..7279f67f787 100644 --- a/contrib/pax_storage/src/test/regress/expected/horology.out +++ b/contrib/pax_storage/src/test/regress/expected/horology.out @@ -2436,13 +2436,12 @@ select count(*) from date_tbl where f1 between '1997-01-01' and '1998-01-01'; QUERY PLAN ----------------------------------------------------------------------------------------- - Finalize Aggregate + Aggregate -> Gather Motion 3:1 (slice1; segments: 3) - -> Partial Aggregate - -> Seq Scan on date_tbl - Filter: ((f1 >= '01-01-1997'::date) AND (f1 <= '01-01-1998'::date)) + -> Seq Scan on date_tbl + Filter: ((f1 >= '01-01-1997'::date) AND (f1 <= '01-01-1998'::date)) Optimizer: Postgres query optimizer -(6 rows) +(5 rows) select count(*) from date_tbl where f1 between '1997-01-01' and '1998-01-01'; @@ -2454,15 +2453,14 @@ select count(*) from date_tbl explain (costs off) select count(*) from date_tbl where f1 not between '1997-01-01' and '1998-01-01'; - QUERY PLAN + QUERY PLAN -------------------------------------------------------------------------------------- - Finalize Aggregate + Aggregate -> Gather Motion 3:1 (slice1; segments: 3) - -> Partial Aggregate - -> Seq Scan on date_tbl - Filter: ((f1 < '01-01-1997'::date) OR (f1 > '01-01-1998'::date)) + -> Seq Scan on date_tbl + Filter: ((f1 < '01-01-1997'::date) OR (f1 > '01-01-1998'::date)) Optimizer: Postgres query optimizer -(6 rows) +(5 rows) select count(*) from date_tbl where f1 not between '1997-01-01' and '1998-01-01'; @@ -2474,15 +2472,14 @@ select count(*) from date_tbl explain (costs off) select count(*) from date_tbl where f1 between symmetric '1997-01-01' and '1998-01-01'; - QUERY PLAN + QUERY PLAN ---------------------------------------------------------------------------------------------------------------------------------------------------------- - Finalize Aggregate + Aggregate -> Gather Motion 3:1 (slice1; segments: 3) - -> Partial Aggregate - -> Seq Scan on date_tbl - Filter: (((f1 >= '01-01-1997'::date) AND (f1 <= '01-01-1998'::date)) OR ((f1 >= '01-01-1998'::date) AND (f1 <= '01-01-1997'::date))) + -> Seq Scan on date_tbl + Filter: (((f1 >= '01-01-1997'::date) AND (f1 <= '01-01-1998'::date)) OR ((f1 >= '01-01-1998'::date) AND (f1 <= '01-01-1997'::date))) Optimizer: Postgres query optimizer -(6 rows) +(5 rows) select count(*) from date_tbl where f1 between symmetric '1997-01-01' and '1998-01-01'; @@ -2494,15 +2491,14 @@ select count(*) from date_tbl explain (costs off) select count(*) from date_tbl where f1 not between symmetric '1997-01-01' and '1998-01-01'; - QUERY PLAN + QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------------------------------- - Finalize Aggregate + Aggregate -> Gather Motion 3:1 (slice1; segments: 3) - -> Partial Aggregate - -> Seq Scan on date_tbl - Filter: (((f1 < '01-01-1997'::date) OR (f1 > '01-01-1998'::date)) AND ((f1 < '01-01-1998'::date) OR (f1 > '01-01-1997'::date))) + -> Seq Scan on date_tbl + Filter: (((f1 < '01-01-1997'::date) OR (f1 > '01-01-1998'::date)) AND ((f1 < '01-01-1998'::date) OR (f1 > '01-01-1997'::date))) Optimizer: Postgres query optimizer -(6 rows) +(5 rows) select count(*) from date_tbl where f1 not between symmetric '1997-01-01' and '1998-01-01'; diff --git a/contrib/pax_storage/src/test/regress/expected/incremental_sort_optimizer.out b/contrib/pax_storage/src/test/regress/expected/incremental_sort_optimizer.out index 9f7a7ce4a57..215486fe8f5 100644 --- a/contrib/pax_storage/src/test/regress/expected/incremental_sort_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/incremental_sort_optimizer.out @@ -1484,11 +1484,6 @@ set parallel_tuple_cost=0; set parallel_setup_cost=0; set min_parallel_table_scan_size = 0; set min_parallel_index_scan_size = 0; --- The execution plan of pax will be inaccurate if it involves bitmap heap scan. --- This depends on the fact that the pg cost estimate will be calculated based on page --- but pax cannot give an accurate number of pages. -set enable_bitmapscan=off; -set optimizer_enable_bitmapscan=off; -- Parallel sort below join. explain (costs off) select distinct sub.unique1, stringu1 from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub; @@ -1663,19 +1658,3 @@ order by 1, 2; Optimizer: Postgres query optimizer (8 rows) --- Disallow pushing down sort when pathkey is an SRF. -explain (costs off) select unique1 from tenk1 order by unnest('{1,2}'::int[]); - QUERY PLAN ------------------------------------------------------ - Result - -> Gather Motion 3:1 (slice1; segments: 3) - Merge Key: (unnest('{1,2}'::anyarray)) - -> Sort - Sort Key: (unnest('{1,2}'::anyarray)) - -> ProjectSet - -> Seq Scan on tenk1 - Optimizer: Pivotal Optimizer (GPORCA) -(8 rows) - -reset enable_bitmapscan; -reset optimizer_enable_bitmapscan; diff --git a/contrib/pax_storage/src/test/regress/expected/inet_optimizer.out b/contrib/pax_storage/src/test/regress/expected/inet_optimizer.out index c0daafb9137..11ef8ddebb1 100644 --- a/contrib/pax_storage/src/test/regress/expected/inet_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/inet_optimizer.out @@ -346,7 +346,6 @@ SELECT * FROM inet_tbl WHERE '192.168.1.0/24'::cidr >> i; SET enable_seqscan TO on; DROP INDEX inet_idx1; -- check that gist index works correctly --- PAX not support gist/spgist/brin indexes CREATE INDEX inet_idx2 ON inet_tbl using gist (i inet_ops); ERROR: pax only support btree/hash/gin/bitmap indexes (pax_access_handle.cc:736) SET enable_seqscan TO off; @@ -500,7 +499,6 @@ SET enable_seqscan TO on; DROP INDEX inet_idx2; ERROR: index "inet_idx2" does not exist -- check that spgist index works correctly --- PAX not support gist/spgist/brin indexes CREATE INDEX inet_idx3 ON inet_tbl using spgist (i); ERROR: pax only support btree/hash/gin/bitmap indexes (pax_access_handle.cc:736) SET enable_seqscan TO off; @@ -1073,3 +1071,40 @@ SELECT a FROM (VALUES ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff (91 rows) +-- test non-error-throwing API for some core types +SELECT pg_input_is_valid('1234', 'cidr'); + pg_input_is_valid +------------------- + f +(1 row) + +SELECT * FROM pg_input_error_info('1234', 'cidr'); + message | detail | hint | sql_error_code +--------------------------------------------+--------+------+---------------- + invalid input syntax for type cidr: "1234" | | | 22P02 +(1 row) + +SELECT pg_input_is_valid('192.168.198.200/24', 'cidr'); + pg_input_is_valid +------------------- + f +(1 row) + +SELECT * FROM pg_input_error_info('192.168.198.200/24', 'cidr'); + message | detail | hint | sql_error_code +------------------------------------------+--------------------------------------+------+---------------- + invalid cidr value: "192.168.198.200/24" | Value has bits set to right of mask. | | 22P02 +(1 row) + +SELECT pg_input_is_valid('1234', 'inet'); + pg_input_is_valid +------------------- + f +(1 row) + +SELECT * FROM pg_input_error_info('1234', 'inet'); + message | detail | hint | sql_error_code +--------------------------------------------+--------+------+---------------- + invalid input syntax for type inet: "1234" | | | 22P02 +(1 row) + diff --git a/contrib/pax_storage/src/test/regress/expected/inherit_optimizer.out b/contrib/pax_storage/src/test/regress/expected/inherit_optimizer.out index c23a0877d7c..1b7799140e5 100644 --- a/contrib/pax_storage/src/test/regress/expected/inherit_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/inherit_optimizer.out @@ -539,6 +539,34 @@ CREATE TEMP TABLE z (b TEXT, PRIMARY KEY(aa, b)) inherits (a); INSERT INTO z VALUES (NULL, 'text'); -- should fail ERROR: null value in column "aa" of relation "z" violates not-null constraint DETAIL: Failing row contains (null, text). +-- Check inherited UPDATE with first child excluded +create table some_tab (f1 int, f2 int, f3 int, check (f1 < 10) no inherit); +create table some_tab_child () inherits(some_tab); +insert into some_tab_child select i, i+1, 0 from generate_series(1,1000) i; +create index on some_tab_child(f1, f2); +-- while at it, also check that statement-level triggers fire +create function some_tab_stmt_trig_func() returns trigger as +$$begin raise notice 'updating some_tab'; return NULL; end;$$ +language plpgsql; +create trigger some_tab_stmt_trig + before update on some_tab execute function some_tab_stmt_trig_func(); +ERROR: Triggers for statements are not yet supported +explain (costs off) +update some_tab set f3 = 11 where f1 = 12 and f2 = 13; + QUERY PLAN +------------------------------------------------------------------------------------------------- + Update on some_tab + Update on some_tab_child some_tab_1 + -> Result + -> Index Scan using some_tab_child_f1_f2_idx on some_tab_child some_tab_1 + Index Cond: ((f1 = 12) AND (f2 = 13)) + Optimizer: Postgres query optimizer +(6 rows) + +update some_tab set f3 = 11 where f1 = 12 and f2 = 13; +drop table some_tab cascade; +NOTICE: drop cascades to table some_tab_child +drop function some_tab_stmt_trig_func(); -- Check inherited UPDATE with all children excluded create table some_tab (a int, b int) distributed randomly; create table some_tab_child () inherits (some_tab); @@ -1080,6 +1108,34 @@ Inherits: inht1, Distributed by: (aa) DROP TABLE inhts; +-- Test for adding a column to a parent table with complex inheritance +CREATE TABLE inhta (); +CREATE TABLE inhtb () INHERITS (inhta); +CREATE TABLE inhtc () INHERITS (inhtb); +CREATE TABLE inhtd () INHERITS (inhta, inhtb, inhtc); +ALTER TABLE inhta ADD COLUMN i int, ADD COLUMN j bigint DEFAULT 1; +NOTICE: merging definition of column "i" for child "inhtd" +NOTICE: merging definition of column "j" for child "inhtd" +\d+ inhta + Table "public.inhta" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+---------+--------------+------------- + i | integer | | | | plain | | + j | bigint | | | 1 | plain | | +Child tables: inhtb, + inhtd + +\d+ inhtd + Table "public.inhtd" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+---------+--------------+------------- + i | integer | | | | plain | | + j | bigint | | | 1 | plain | | +Inherits: inhta, + inhtb, + inhtc + +DROP TABLE inhta, inhtb, inhtc, inhtd; -- Test for renaming in diamond inheritance CREATE TABLE inht2 (x int) INHERITS (inht1); CREATE TABLE inht3 (y int) INHERITS (inht1); @@ -1188,19 +1244,23 @@ Distributed by: (val1, val2) DROP TABLE test_constraints_inh; DROP TABLE test_constraints; --- PAX not support gist/spgist/brin indexes --- CREATE TABLE test_ex_constraints ( --- c circle, --- dkey inet, --- EXCLUDE USING gist (dkey inet_ops WITH =, c WITH &&) --- ); --- CREATE TABLE test_ex_constraints_inh () INHERITS (test_ex_constraints); --- \d+ test_ex_constraints --- ALTER TABLE test_ex_constraints DROP CONSTRAINT test_ex_constraints_dkey_c_excl; --- \d+ test_ex_constraints --- \d+ test_ex_constraints_inh --- DROP TABLE test_ex_constraints_inh; --- DROP TABLE test_ex_constraints; +CREATE TABLE test_ex_constraints ( + c circle, + dkey inet, + EXCLUDE USING gist (dkey inet_ops WITH =, c WITH &&) +); +ERROR: pax only support btree/hash/gin/bitmap indexes (pax_access_handle.cc:###) +CREATE TABLE test_ex_constraints_inh () INHERITS (test_ex_constraints); +ERROR: relation "test_ex_constraints" does not exist +\d+ test_ex_constraints +ALTER TABLE test_ex_constraints DROP CONSTRAINT test_ex_constraints_dkey_c_excl; +ERROR: relation "test_ex_constraints" does not exist +\d+ test_ex_constraints +\d+ test_ex_constraints_inh +DROP TABLE test_ex_constraints_inh; +ERROR: table "test_ex_constraints_inh" does not exist +DROP TABLE test_ex_constraints; +ERROR: table "test_ex_constraints" does not exist -- Test non-inheritable foreign key constraints CREATE TABLE test_primary_constraints(id int PRIMARY KEY); CREATE TABLE test_foreign_constraints(id1 int REFERENCES test_primary_constraints(id)); @@ -1579,6 +1639,39 @@ select min(1-id) from matest0; reset enable_seqscan; reset enable_parallel_append; reset enable_bitmapscan; +explain (verbose, costs off) -- bug #18652 +select 1 - id as c from +(select id from matest3 t1 union all select id * 2 from matest3 t2) ss +order by c; + QUERY PLAN +------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: ((1 - t1.id)) + Merge Key: ((1 - t1.id)) + -> Sort + Output: ((1 - t1.id)) + Sort Key: ((1 - t1.id)) + -> Result + Output: (1 - t1.id) + -> Append + -> Seq Scan on public.matest3 t1 + Output: t1.id + -> Seq Scan on public.matest3 t2 + Output: (t2.id * 2) + Optimizer: GPORCA +(14 rows) + +select 1 - id as c from +(select id from matest3 t1 union all select id * 2 from matest3 t2) ss +order by c; + c +----- + -4 + -5 + -9 + -11 +(4 rows) + drop table matest0 cascade; NOTICE: drop cascades to 3 other objects DETAIL: drop cascades to table matest1 @@ -1788,6 +1881,117 @@ reset enable_indexscan; reset enable_bitmapscan; rollback; -- +-- Check handling of MULTIEXPR SubPlans in inherited updates +-- +create table inhpar(f1 int, f2 name); +create table inhcld(f2 name, f1 int); +alter table inhcld inherit inhpar; +insert into inhpar select x, x::text from generate_series(1,5) x; +insert into inhcld select x::text, x from generate_series(6,10) x; +explain (verbose, costs off) +update inhpar i set (f1, f2) = (select i.f1, i.f2 || '-' from int4_tbl limit 1); +ERROR: can't split update for inherit table: +update inhpar i set (f1, f2) = (select i.f1, i.f2 || '-' from int4_tbl limit 1); +ERROR: can't split update for inherit table: +select * from inhpar order by f1; + f1 | f2 +----+---- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 10 | 10 +(10 rows) + +drop table inhpar cascade; +NOTICE: drop cascades to table inhcld +-- +-- And the same for partitioned cases +-- +create table inhpar(f1 int primary key, f2 name) partition by range (f1); +create table inhcld1(f2 name, f1 int primary key); +create table inhcld2(f1 int primary key, f2 name); +alter table inhpar attach partition inhcld1 for values from (1) to (5); +alter table inhpar attach partition inhcld2 for values from (5) to (100); +insert into inhpar select x, x::text from generate_series(1,10) x; +explain (verbose, costs off) +update inhpar i set (f1, f2) = (select i.f1, i.f2 || '-' from int4_tbl limit 1); + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------- + Update on public.inhpar i + Update on public.inhcld1 i_1 + Update on public.inhcld2 i_2 + -> Explicit Redistribute Motion 3:3 (slice1; segments: 3) + Output: ($2), (($3)::name), ((SubPlan 1 (returns $2,$3) (copy 2))), i.tableoid, i.ctid, i.gp_segment_id, i.*, (DMLAction) + -> Split Update + Output: ($2), (($3)::name), ((SubPlan 1 (returns $2,$3) (copy 2))), i.tableoid, i.ctid, i.gp_segment_id, i.*, DMLAction + -> Append + -> Seq Scan on public.inhcld1 i_1 + Output: $2, $3, (SubPlan 1 (returns $2,$3) (copy 2)), i_1.tableoid, i_1.ctid, i_1.gp_segment_id, i_1.* + SubPlan 1 (returns $2,$3) (copy 2) + -> Limit + Output: (i_1.f1), (((i_1.f2)::text || '-'::text)) + -> Result + Output: i_1.f1, ((i_1.f2)::text || '-'::text) + -> Materialize + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Seq Scan on public.int4_tbl + -> Seq Scan on public.inhcld2 i_2 + Output: $2, $3, (SubPlan 1 (returns $2,$3) (copy 3)), i_2.tableoid, i_2.ctid, i_2.gp_segment_id, i_2.* + SubPlan 1 (returns $2,$3) (copy 3) + -> Limit + Output: (i_2.f1), (((i_2.f2)::text || '-'::text)) + -> Result + Output: i_2.f1, ((i_2.f2)::text || '-'::text) + -> Materialize + -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Seq Scan on public.int4_tbl int4_tbl_1 + Settings: enable_mergejoin = 'on', enable_bitmapscan = 'off', enable_indexscan = 'on', enable_seqscan = 'off' + Optimizer: Postgres query optimizer +(30 rows) + +update inhpar i set (f1, f2) = (select i.f1, i.f2 || '-' from int4_tbl limit 1); +select * from inhpar; + f1 | f2 +----+----- + 1 | 1- + 2 | 2- + 3 | 3- + 4 | 4- + 5 | 5- + 6 | 6- + 7 | 7- + 8 | 8- + 9 | 9- + 10 | 10- +(10 rows) + +-- Also check ON CONFLICT +insert into inhpar as i values (3), (7) on conflict (f1) + do update set (f1, f2) = (select i.f1, i.f2 || '+'); +ERROR: modification of distribution columns in OnConflictUpdate is not supported +select * from inhpar order by f1; -- tuple order might be unstable here + f1 | f2 +----+----- + 1 | 1- + 2 | 2- + 3 | 3- + 4 | 4- + 5 | 5- + 6 | 6- + 7 | 7- + 8 | 8- + 9 | 9- + 10 | 10- +(10 rows) + +drop table inhpar cascade; +-- -- Check handling of a constant-null CHECK constraint -- create table cnullparent (f1 int); @@ -2306,6 +2510,8 @@ explain (costs off) select * from mcrparted where a < 20 order by a, abs(b), c; Optimizer: Postgres query optimizer (12 rows) +set enable_bitmapscan to off; +set enable_sort to off; create table mclparted (a int) partition by list(a); create table mclparted1 partition of mclparted for values in(1); create table mclparted2 partition of mclparted for values in(2); @@ -2339,7 +2545,110 @@ explain (costs off) select * from mclparted order by a; Optimizer: GPORCA (7 rows) +explain (costs off) select * from mclparted where a in(3,4,5) order by a; + QUERY PLAN +--------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: a + -> Sort + Sort Key: a + -> Dynamic Index Only Scan on mclparted_a_idx on mclparted + Index Cond: (a = ANY ('{3,4,5}'::integer[])) + Number of partitions to scan: 2 (out of 4) + Optimizer: GPORCA +(8 rows) + +-- Introduce a NULL and DEFAULT partition so we can test more complex cases +create table mclparted_null partition of mclparted for values in(null); +create table mclparted_def partition of mclparted default; +-- Append can be used providing we don't scan the interleaved partition +explain (costs off) select * from mclparted where a in(1,2,4) order by a; + QUERY PLAN +--------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: a + -> Sort + Sort Key: a + -> Dynamic Index Only Scan on mclparted_a_idx on mclparted + Index Cond: (a = ANY ('{1,2,4}'::integer[])) + Number of partitions to scan: 3 (out of 6) + Optimizer: GPORCA +(8 rows) + +explain (costs off) select * from mclparted where a in(1,2,4) or a is null order by a; + QUERY PLAN +------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: a + -> Sort + Sort Key: a + -> Dynamic Seq Scan on mclparted + Number of partitions to scan: 4 (out of 6) + Filter: ((a = ANY ('{1,2,4}'::integer[])) OR (a IS NULL)) + Optimizer: GPORCA +(8 rows) + +-- Test a more complex case where the NULL partition allows some other value +drop table mclparted_null; +create table mclparted_0_null partition of mclparted for values in(0,null); +-- Ensure MergeAppend is used since 0 and NULLs are in the same partition. +explain (costs off) select * from mclparted where a in(1,2,4) or a is null order by a; + QUERY PLAN +------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: a + -> Sort + Sort Key: a + -> Dynamic Seq Scan on mclparted + Number of partitions to scan: 4 (out of 6) + Filter: ((a = ANY ('{1,2,4}'::integer[])) OR (a IS NULL)) + Optimizer: GPORCA +(8 rows) + +explain (costs off) select * from mclparted where a in(0,1,2,4) order by a; + QUERY PLAN +--------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: a + -> Sort + Sort Key: a + -> Dynamic Index Only Scan on mclparted_a_idx on mclparted + Index Cond: (a = ANY ('{0,1,2,4}'::integer[])) + Number of partitions to scan: 4 (out of 6) + Optimizer: GPORCA +(8 rows) + +-- Ensure Append is used when the null partition is pruned +explain (costs off) select * from mclparted where a in(1,2,4) order by a; + QUERY PLAN +--------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: a + -> Sort + Sort Key: a + -> Dynamic Index Only Scan on mclparted_a_idx on mclparted + Index Cond: (a = ANY ('{1,2,4}'::integer[])) + Number of partitions to scan: 3 (out of 6) + Optimizer: GPORCA +(8 rows) + +-- Ensure MergeAppend is used when the default partition is not pruned +explain (costs off) select * from mclparted where a in(1,2,4,100) order by a; + QUERY PLAN +--------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: a + -> Sort + Sort Key: a + -> Dynamic Index Only Scan on mclparted_a_idx on mclparted + Index Cond: (a = ANY ('{1,2,4,100}'::integer[])) + Number of partitions to scan: 4 (out of 6) + Optimizer: GPORCA +(8 rows) + drop table mclparted; +reset enable_sort; +reset enable_bitmapscan; -- Ensure subplans which don't have a path with the correct pathkeys get -- sorted correctly. drop index mcrparted_a_abs_c_idx; @@ -2511,7 +2820,7 @@ alter table permtest_child attach partition permtest_grandchild for values in (' alter table permtest_parent attach partition permtest_child for values in (1); create index on permtest_parent (left(c, 3)); insert into permtest_parent - select 1, 'a', left(md5(i::text), 5) from generate_series(0, 100) i; + select 1, 'a', left(fipshash(i::text), 5) from generate_series(0, 100) i; analyze permtest_parent; create role regress_no_child_access; revoke all on permtest_grandchild from regress_no_child_access; diff --git a/contrib/pax_storage/src/test/regress/expected/interval_optimizer.out b/contrib/pax_storage/src/test/regress/expected/interval_optimizer.out index 0edf2284c9f..682813f3b14 100755 --- a/contrib/pax_storage/src/test/regress/expected/interval_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/interval_optimizer.out @@ -72,6 +72,37 @@ INSERT INTO INTERVAL_TBL (f1) VALUES ('@ 30 eons ago'); ERROR: invalid input syntax for type interval: "@ 30 eons ago" LINE 1: INSERT INTO INTERVAL_TBL (f1) VALUES ('@ 30 eons ago'); ^ +-- Test non-error-throwing API +SELECT pg_input_is_valid('1.5 weeks', 'interval'); + pg_input_is_valid +------------------- + t +(1 row) + +SELECT pg_input_is_valid('garbage', 'interval'); + pg_input_is_valid +------------------- + f +(1 row) + +SELECT pg_input_is_valid('@ 30 eons ago', 'interval'); + pg_input_is_valid +------------------- + f +(1 row) + +SELECT * FROM pg_input_error_info('garbage', 'interval'); + message | detail | hint | sql_error_code +---------------------------------------------------+--------+------+---------------- + invalid input syntax for type interval: "garbage" | | | 22007 +(1 row) + +SELECT * FROM pg_input_error_info('@ 30 eons ago', 'interval'); + message | detail | hint | sql_error_code +---------------------------------------------------------+--------+------+---------------- + invalid input syntax for type interval: "@ 30 eons ago" | | | 22007 +(1 row) + -- test interval operators SELECT * FROM INTERVAL_TBL; f1 @@ -362,6 +393,19 @@ SELECT * FROM INTERVAL_TBL; @ 6 years (10 rows) +-- multiplication and division overflow test cases +SELECT '3000000 months'::interval * 1000; +ERROR: interval out of range +SELECT '3000000 months'::interval / 0.001; +ERROR: interval out of range +SELECT '3000000 days'::interval * 1000; +ERROR: interval out of range +SELECT '3000000 days'::interval / 0.001; +ERROR: interval out of range +SELECT '1 month 2146410 days'::interval * 1000.5002; +ERROR: interval out of range +SELECT '4611686018427387904 usec'::interval / 0.1; +ERROR: interval out of range -- test avg(interval), which is somewhat fragile since people have been -- known to change the allowed input syntax for type interval without -- updating pg_aggregate.agginitval @@ -401,6 +445,10 @@ SELECT justify_days(interval '6 months 36 days 5 hours 4 minutes 3 seconds') as @ 7 mons 6 days 5 hours 4 mins 3 secs (1 row) +SELECT justify_hours(interval '2147483647 days 24 hrs'); +ERROR: interval out of range +SELECT justify_days(interval '2147483647 months 30 days'); +ERROR: interval out of range -- test justify_interval() SELECT justify_interval(interval '1 month -1 hour') as "1 month -1 hour"; 1 month -1 hour @@ -408,6 +456,38 @@ SELECT justify_interval(interval '1 month -1 hour') as "1 month -1 hour"; @ 29 days 23 hours (1 row) +SELECT justify_interval(interval '2147483647 days 24 hrs'); + justify_interval +------------------------------- + @ 5965232 years 4 mons 8 days +(1 row) + +SELECT justify_interval(interval '-2147483648 days -24 hrs'); + justify_interval +----------------------------------- + @ 5965232 years 4 mons 9 days ago +(1 row) + +SELECT justify_interval(interval '2147483647 months 30 days'); +ERROR: interval out of range +SELECT justify_interval(interval '-2147483648 months -30 days'); +ERROR: interval out of range +SELECT justify_interval(interval '2147483647 months 30 days -24 hrs'); + justify_interval +---------------------------------- + @ 178956970 years 7 mons 29 days +(1 row) + +SELECT justify_interval(interval '-2147483648 months -30 days 24 hrs'); + justify_interval +-------------------------------------- + @ 178956970 years 8 mons 29 days ago +(1 row) + +SELECT justify_interval(interval '2147483647 months -30 days 1440 hrs'); +ERROR: interval out of range +SELECT justify_interval(interval '-2147483648 months 30 days -1440 hrs'); +ERROR: interval out of range -- test fractional second input, and detection of duplicate units SET DATESTYLE = 'ISO'; SET IntervalStyle TO postgres; @@ -795,6 +875,16 @@ SELECT interval '+1 -1:00:00', 1 day -01:00:00 | -1 days +01:00:00 | 1 year 2 mons -3 days +04:05:06.789 | -1 years -2 mons +3 days -04:05:06.789 (1 row) +-- cases that trigger sign-matching rules in the sql style +SELECT interval '-23 hours 45 min 12.34 sec', + interval '-1 day 23 hours 45 min 12.34 sec', + interval '-1 year 2 months 1 day 23 hours 45 min 12.34 sec', + interval '-1 year 2 months 1 day 23 hours 45 min +12.34 sec'; + interval | interval | interval | interval +--------------+----------------------+-----------------------------+----------------------------- + -22:14:47.66 | -1 days +23:45:12.34 | -10 mons +1 day 23:45:12.34 | -10 mons +1 day 23:45:12.34 +(1 row) + -- test output of couple non-standard interval values in the sql style SET IntervalStyle TO sql_standard; SELECT interval '1 day -1 hours', @@ -806,6 +896,21 @@ SELECT interval '1 day -1 hours', +0-0 +1 -1:00:00 | +0-0 -1 +1:00:00 | +1-2 -3 +4:05:06.789 | -1-2 +3 -4:05:06.789 (1 row) +-- cases that trigger sign-matching rules in the sql style +SELECT interval '-23 hours 45 min 12.34 sec', + interval '-1 day 23 hours 45 min 12.34 sec', + interval '-1 year 2 months 1 day 23 hours 45 min 12.34 sec', + interval '-1 year 2 months 1 day 23 hours 45 min +12.34 sec'; + interval | interval | interval | interval +--------------+----------------+----------------------+----------------------- + -23:45:12.34 | -1 23:45:12.34 | -1-2 -1 -23:45:12.34 | -0-10 +1 +23:45:12.34 +(1 row) + +-- edge case for sign-matching rules +SELECT interval ''; -- error +ERROR: invalid input syntax for type interval: "" +LINE 1: SELECT interval ''; + ^ -- test outputting iso8601 intervals SET IntervalStyle to iso_8601; select interval '0' AS "zero", @@ -857,6 +962,47 @@ select interval 'P0002' AS "year only", 2 years | 2 years 10 mons | 2 years 10 mons 15 days | 2 years 00:00:01 | 2 years 10 mons 00:00:01 | 2 years 10 mons 15 days 00:00:01 | 10:00:00 | 10:30:00 (1 row) +-- Check handling of fractional fields in ISO8601 format. +select interval 'P1Y0M3DT4H5M6S'; + interval +------------------------ + 1 year 3 days 04:05:06 +(1 row) + +select interval 'P1.0Y0M3DT4H5M6S'; + interval +------------------------ + 1 year 3 days 04:05:06 +(1 row) + +select interval 'P1.1Y0M3DT4H5M6S'; + interval +------------------------------ + 1 year 1 mon 3 days 04:05:06 +(1 row) + +select interval 'P1.Y0M3DT4H5M6S'; + interval +------------------------ + 1 year 3 days 04:05:06 +(1 row) + +select interval 'P.1Y0M3DT4H5M6S'; + interval +----------------------- + 1 mon 3 days 04:05:06 +(1 row) + +select interval 'P10.5e4Y'; -- not per spec, but we've historically taken it + interval +-------------- + 105000 years +(1 row) + +select interval 'P.Y0M3DT4H5M6S'; -- error +ERROR: invalid input syntax for type interval: "P.Y0M3DT4H5M6S" +LINE 1: select interval 'P.Y0M3DT4H5M6S'; + ^ -- test a couple rounding cases that changed since 8.3 w/ HAVE_INT64_TIMESTAMP. SET IntervalStyle to postgres_verbose; select interval '-10 mons -3 days +03:55:06.70'; @@ -877,6 +1023,617 @@ select interval '0:0:0.7', interval '@ 0.70 secs', interval '0.7 seconds'; @ 0.7 secs | @ 0.7 secs | @ 0.7 secs (1 row) +-- test time fields using entire 64 bit microseconds range +select interval '2562047788.01521550194 hours'; + interval +----------------------------------- + @ 2562047788 hours 54.775807 secs +(1 row) + +select interval '-2562047788.01521550222 hours'; + interval +--------------------------------------- + @ 2562047788 hours 54.775808 secs ago +(1 row) + +select interval '153722867280.912930117 minutes'; + interval +----------------------------------- + @ 2562047788 hours 54.775807 secs +(1 row) + +select interval '-153722867280.912930133 minutes'; + interval +--------------------------------------- + @ 2562047788 hours 54.775808 secs ago +(1 row) + +select interval '9223372036854.775807 seconds'; + interval +----------------------------------- + @ 2562047788 hours 54.775807 secs +(1 row) + +select interval '-9223372036854.775808 seconds'; + interval +--------------------------------------- + @ 2562047788 hours 54.775808 secs ago +(1 row) + +select interval '9223372036854775.807 milliseconds'; + interval +----------------------------------- + @ 2562047788 hours 54.775807 secs +(1 row) + +select interval '-9223372036854775.808 milliseconds'; + interval +--------------------------------------- + @ 2562047788 hours 54.775808 secs ago +(1 row) + +select interval '9223372036854775807 microseconds'; + interval +----------------------------------- + @ 2562047788 hours 54.775807 secs +(1 row) + +select interval '-9223372036854775808 microseconds'; + interval +--------------------------------------- + @ 2562047788 hours 54.775808 secs ago +(1 row) + +select interval 'PT2562047788H54.775807S'; + interval +----------------------------------- + @ 2562047788 hours 54.775807 secs +(1 row) + +select interval 'PT-2562047788H-54.775808S'; + interval +--------------------------------------- + @ 2562047788 hours 54.775808 secs ago +(1 row) + +select interval 'PT2562047788:00:54.775807'; + interval +----------------------------------- + @ 2562047788 hours 54.775807 secs +(1 row) + +select interval 'PT2562047788.0152155019444'; + interval +----------------------------------- + @ 2562047788 hours 54.775429 secs +(1 row) + +select interval 'PT-2562047788.0152155022222'; + interval +--------------------------------------- + @ 2562047788 hours 54.775429 secs ago +(1 row) + +-- overflow each date/time field +select interval '2147483648 years'; +ERROR: interval field value out of range: "2147483648 years" +LINE 1: select interval '2147483648 years'; + ^ +select interval '-2147483649 years'; +ERROR: interval field value out of range: "-2147483649 years" +LINE 1: select interval '-2147483649 years'; + ^ +select interval '2147483648 months'; +ERROR: interval field value out of range: "2147483648 months" +LINE 1: select interval '2147483648 months'; + ^ +select interval '-2147483649 months'; +ERROR: interval field value out of range: "-2147483649 months" +LINE 1: select interval '-2147483649 months'; + ^ +select interval '2147483648 days'; +ERROR: interval field value out of range: "2147483648 days" +LINE 1: select interval '2147483648 days'; + ^ +select interval '-2147483649 days'; +ERROR: interval field value out of range: "-2147483649 days" +LINE 1: select interval '-2147483649 days'; + ^ +select interval '2562047789 hours'; +ERROR: interval field value out of range: "2562047789 hours" +LINE 1: select interval '2562047789 hours'; + ^ +select interval '-2562047789 hours'; +ERROR: interval field value out of range: "-2562047789 hours" +LINE 1: select interval '-2562047789 hours'; + ^ +select interval '153722867281 minutes'; +ERROR: interval field value out of range: "153722867281 minutes" +LINE 1: select interval '153722867281 minutes'; + ^ +select interval '-153722867281 minutes'; +ERROR: interval field value out of range: "-153722867281 minutes" +LINE 1: select interval '-153722867281 minutes'; + ^ +select interval '9223372036855 seconds'; +ERROR: interval field value out of range: "9223372036855 seconds" +LINE 1: select interval '9223372036855 seconds'; + ^ +select interval '-9223372036855 seconds'; +ERROR: interval field value out of range: "-9223372036855 seconds" +LINE 1: select interval '-9223372036855 seconds'; + ^ +select interval '9223372036854777 millisecond'; +ERROR: interval field value out of range: "9223372036854777 millisecond" +LINE 1: select interval '9223372036854777 millisecond'; + ^ +select interval '-9223372036854777 millisecond'; +ERROR: interval field value out of range: "-9223372036854777 millisecond" +LINE 1: select interval '-9223372036854777 millisecond'; + ^ +select interval '9223372036854775808 microsecond'; +ERROR: interval field value out of range: "9223372036854775808 microsecond" +LINE 1: select interval '9223372036854775808 microsecond'; + ^ +select interval '-9223372036854775809 microsecond'; +ERROR: interval field value out of range: "-9223372036854775809 microsecond" +LINE 1: select interval '-9223372036854775809 microsecond'; + ^ +select interval 'P2147483648'; +ERROR: interval field value out of range: "P2147483648" +LINE 1: select interval 'P2147483648'; + ^ +select interval 'P-2147483649'; +ERROR: interval field value out of range: "P-2147483649" +LINE 1: select interval 'P-2147483649'; + ^ +select interval 'P1-2147483647-2147483647'; +ERROR: interval out of range +LINE 1: select interval 'P1-2147483647-2147483647'; + ^ +select interval 'PT2562047789'; +ERROR: interval field value out of range: "PT2562047789" +LINE 1: select interval 'PT2562047789'; + ^ +select interval 'PT-2562047789'; +ERROR: interval field value out of range: "PT-2562047789" +LINE 1: select interval 'PT-2562047789'; + ^ +-- overflow with date/time unit aliases +select interval '2147483647 weeks'; +ERROR: interval field value out of range: "2147483647 weeks" +LINE 1: select interval '2147483647 weeks'; + ^ +select interval '-2147483648 weeks'; +ERROR: interval field value out of range: "-2147483648 weeks" +LINE 1: select interval '-2147483648 weeks'; + ^ +select interval '2147483647 decades'; +ERROR: interval field value out of range: "2147483647 decades" +LINE 1: select interval '2147483647 decades'; + ^ +select interval '-2147483648 decades'; +ERROR: interval field value out of range: "-2147483648 decades" +LINE 1: select interval '-2147483648 decades'; + ^ +select interval '2147483647 centuries'; +ERROR: interval field value out of range: "2147483647 centuries" +LINE 1: select interval '2147483647 centuries'; + ^ +select interval '-2147483648 centuries'; +ERROR: interval field value out of range: "-2147483648 centuries" +LINE 1: select interval '-2147483648 centuries'; + ^ +select interval '2147483647 millennium'; +ERROR: interval field value out of range: "2147483647 millennium" +LINE 1: select interval '2147483647 millennium'; + ^ +select interval '-2147483648 millennium'; +ERROR: interval field value out of range: "-2147483648 millennium" +LINE 1: select interval '-2147483648 millennium'; + ^ +select interval '1 week 2147483647 days'; +ERROR: interval field value out of range: "1 week 2147483647 days" +LINE 1: select interval '1 week 2147483647 days'; + ^ +select interval '-1 week -2147483648 days'; +ERROR: interval field value out of range: "-1 week -2147483648 days" +LINE 1: select interval '-1 week -2147483648 days'; + ^ +select interval '2147483647 days 1 week'; +ERROR: interval field value out of range: "2147483647 days 1 week" +LINE 1: select interval '2147483647 days 1 week'; + ^ +select interval '-2147483648 days -1 week'; +ERROR: interval field value out of range: "-2147483648 days -1 week" +LINE 1: select interval '-2147483648 days -1 week'; + ^ +select interval 'P1W2147483647D'; +ERROR: interval field value out of range: "P1W2147483647D" +LINE 1: select interval 'P1W2147483647D'; + ^ +select interval 'P-1W-2147483648D'; +ERROR: interval field value out of range: "P-1W-2147483648D" +LINE 1: select interval 'P-1W-2147483648D'; + ^ +select interval 'P2147483647D1W'; +ERROR: interval field value out of range: "P2147483647D1W" +LINE 1: select interval 'P2147483647D1W'; + ^ +select interval 'P-2147483648D-1W'; +ERROR: interval field value out of range: "P-2147483648D-1W" +LINE 1: select interval 'P-2147483648D-1W'; + ^ +select interval '1 decade 2147483647 years'; +ERROR: interval field value out of range: "1 decade 2147483647 years" +LINE 1: select interval '1 decade 2147483647 years'; + ^ +select interval '1 century 2147483647 years'; +ERROR: interval field value out of range: "1 century 2147483647 years" +LINE 1: select interval '1 century 2147483647 years'; + ^ +select interval '1 millennium 2147483647 years'; +ERROR: interval field value out of range: "1 millennium 2147483647 years" +LINE 1: select interval '1 millennium 2147483647 years'; + ^ +select interval '-1 decade -2147483648 years'; +ERROR: interval field value out of range: "-1 decade -2147483648 years" +LINE 1: select interval '-1 decade -2147483648 years'; + ^ +select interval '-1 century -2147483648 years'; +ERROR: interval field value out of range: "-1 century -2147483648 years" +LINE 1: select interval '-1 century -2147483648 years'; + ^ +select interval '-1 millennium -2147483648 years'; +ERROR: interval field value out of range: "-1 millennium -2147483648 years" +LINE 1: select interval '-1 millennium -2147483648 years'; + ^ +select interval '2147483647 years 1 decade'; +ERROR: interval field value out of range: "2147483647 years 1 decade" +LINE 1: select interval '2147483647 years 1 decade'; + ^ +select interval '2147483647 years 1 century'; +ERROR: interval field value out of range: "2147483647 years 1 century" +LINE 1: select interval '2147483647 years 1 century'; + ^ +select interval '2147483647 years 1 millennium'; +ERROR: interval field value out of range: "2147483647 years 1 millennium" +LINE 1: select interval '2147483647 years 1 millennium'; + ^ +select interval '-2147483648 years -1 decade'; +ERROR: interval field value out of range: "-2147483648 years -1 decade" +LINE 1: select interval '-2147483648 years -1 decade'; + ^ +select interval '-2147483648 years -1 century'; +ERROR: interval field value out of range: "-2147483648 years -1 century" +LINE 1: select interval '-2147483648 years -1 century'; + ^ +select interval '-2147483648 years -1 millennium'; +ERROR: interval field value out of range: "-2147483648 years -1 millennium" +LINE 1: select interval '-2147483648 years -1 millennium'; + ^ +-- overflowing with fractional fields - postgres format +select interval '0.1 millennium 2147483647 months'; +ERROR: interval field value out of range: "0.1 millennium 2147483647 months" +LINE 1: select interval '0.1 millennium 2147483647 months'; + ^ +select interval '0.1 centuries 2147483647 months'; +ERROR: interval field value out of range: "0.1 centuries 2147483647 months" +LINE 1: select interval '0.1 centuries 2147483647 months'; + ^ +select interval '0.1 decades 2147483647 months'; +ERROR: interval field value out of range: "0.1 decades 2147483647 months" +LINE 1: select interval '0.1 decades 2147483647 months'; + ^ +select interval '0.1 yrs 2147483647 months'; +ERROR: interval field value out of range: "0.1 yrs 2147483647 months" +LINE 1: select interval '0.1 yrs 2147483647 months'; + ^ +select interval '-0.1 millennium -2147483648 months'; +ERROR: interval field value out of range: "-0.1 millennium -2147483648 months" +LINE 1: select interval '-0.1 millennium -2147483648 months'; + ^ +select interval '-0.1 centuries -2147483648 months'; +ERROR: interval field value out of range: "-0.1 centuries -2147483648 months" +LINE 1: select interval '-0.1 centuries -2147483648 months'; + ^ +select interval '-0.1 decades -2147483648 months'; +ERROR: interval field value out of range: "-0.1 decades -2147483648 months" +LINE 1: select interval '-0.1 decades -2147483648 months'; + ^ +select interval '-0.1 yrs -2147483648 months'; +ERROR: interval field value out of range: "-0.1 yrs -2147483648 months" +LINE 1: select interval '-0.1 yrs -2147483648 months'; + ^ +select interval '2147483647 months 0.1 millennium'; +ERROR: interval field value out of range: "2147483647 months 0.1 millennium" +LINE 1: select interval '2147483647 months 0.1 millennium'; + ^ +select interval '2147483647 months 0.1 centuries'; +ERROR: interval field value out of range: "2147483647 months 0.1 centuries" +LINE 1: select interval '2147483647 months 0.1 centuries'; + ^ +select interval '2147483647 months 0.1 decades'; +ERROR: interval field value out of range: "2147483647 months 0.1 decades" +LINE 1: select interval '2147483647 months 0.1 decades'; + ^ +select interval '2147483647 months 0.1 yrs'; +ERROR: interval field value out of range: "2147483647 months 0.1 yrs" +LINE 1: select interval '2147483647 months 0.1 yrs'; + ^ +select interval '-2147483648 months -0.1 millennium'; +ERROR: interval field value out of range: "-2147483648 months -0.1 millennium" +LINE 1: select interval '-2147483648 months -0.1 millennium'; + ^ +select interval '-2147483648 months -0.1 centuries'; +ERROR: interval field value out of range: "-2147483648 months -0.1 centuries" +LINE 1: select interval '-2147483648 months -0.1 centuries'; + ^ +select interval '-2147483648 months -0.1 decades'; +ERROR: interval field value out of range: "-2147483648 months -0.1 decades" +LINE 1: select interval '-2147483648 months -0.1 decades'; + ^ +select interval '-2147483648 months -0.1 yrs'; +ERROR: interval field value out of range: "-2147483648 months -0.1 yrs" +LINE 1: select interval '-2147483648 months -0.1 yrs'; + ^ +select interval '0.1 months 2147483647 days'; +ERROR: interval field value out of range: "0.1 months 2147483647 days" +LINE 1: select interval '0.1 months 2147483647 days'; + ^ +select interval '-0.1 months -2147483648 days'; +ERROR: interval field value out of range: "-0.1 months -2147483648 days" +LINE 1: select interval '-0.1 months -2147483648 days'; + ^ +select interval '2147483647 days 0.1 months'; +ERROR: interval field value out of range: "2147483647 days 0.1 months" +LINE 1: select interval '2147483647 days 0.1 months'; + ^ +select interval '-2147483648 days -0.1 months'; +ERROR: interval field value out of range: "-2147483648 days -0.1 months" +LINE 1: select interval '-2147483648 days -0.1 months'; + ^ +select interval '0.5 weeks 2147483647 days'; +ERROR: interval field value out of range: "0.5 weeks 2147483647 days" +LINE 1: select interval '0.5 weeks 2147483647 days'; + ^ +select interval '-0.5 weeks -2147483648 days'; +ERROR: interval field value out of range: "-0.5 weeks -2147483648 days" +LINE 1: select interval '-0.5 weeks -2147483648 days'; + ^ +select interval '2147483647 days 0.5 weeks'; +ERROR: interval field value out of range: "2147483647 days 0.5 weeks" +LINE 1: select interval '2147483647 days 0.5 weeks'; + ^ +select interval '-2147483648 days -0.5 weeks'; +ERROR: interval field value out of range: "-2147483648 days -0.5 weeks" +LINE 1: select interval '-2147483648 days -0.5 weeks'; + ^ +select interval '0.01 months 9223372036854775807 microseconds'; +ERROR: interval field value out of range: "0.01 months 9223372036854775807 microseconds" +LINE 1: select interval '0.01 months 9223372036854775807 microsecond... + ^ +select interval '-0.01 months -9223372036854775808 microseconds'; +ERROR: interval field value out of range: "-0.01 months -9223372036854775808 microseconds" +LINE 1: select interval '-0.01 months -9223372036854775808 microseco... + ^ +select interval '9223372036854775807 microseconds 0.01 months'; +ERROR: interval field value out of range: "9223372036854775807 microseconds 0.01 months" +LINE 1: select interval '9223372036854775807 microseconds 0.01 month... + ^ +select interval '-9223372036854775808 microseconds -0.01 months'; +ERROR: interval field value out of range: "-9223372036854775808 microseconds -0.01 months" +LINE 1: select interval '-9223372036854775808 microseconds -0.01 mon... + ^ +select interval '0.1 weeks 9223372036854775807 microseconds'; +ERROR: interval field value out of range: "0.1 weeks 9223372036854775807 microseconds" +LINE 1: select interval '0.1 weeks 9223372036854775807 microseconds'... + ^ +select interval '-0.1 weeks -9223372036854775808 microseconds'; +ERROR: interval field value out of range: "-0.1 weeks -9223372036854775808 microseconds" +LINE 1: select interval '-0.1 weeks -9223372036854775808 microsecond... + ^ +select interval '9223372036854775807 microseconds 0.1 weeks'; +ERROR: interval field value out of range: "9223372036854775807 microseconds 0.1 weeks" +LINE 1: select interval '9223372036854775807 microseconds 0.1 weeks'... + ^ +select interval '-9223372036854775808 microseconds -0.1 weeks'; +ERROR: interval field value out of range: "-9223372036854775808 microseconds -0.1 weeks" +LINE 1: select interval '-9223372036854775808 microseconds -0.1 week... + ^ +select interval '0.1 days 9223372036854775807 microseconds'; +ERROR: interval field value out of range: "0.1 days 9223372036854775807 microseconds" +LINE 1: select interval '0.1 days 9223372036854775807 microseconds'; + ^ +select interval '-0.1 days -9223372036854775808 microseconds'; +ERROR: interval field value out of range: "-0.1 days -9223372036854775808 microseconds" +LINE 1: select interval '-0.1 days -9223372036854775808 microseconds... + ^ +select interval '9223372036854775807 microseconds 0.1 days'; +ERROR: interval field value out of range: "9223372036854775807 microseconds 0.1 days" +LINE 1: select interval '9223372036854775807 microseconds 0.1 days'; + ^ +select interval '-9223372036854775808 microseconds -0.1 days'; +ERROR: interval field value out of range: "-9223372036854775808 microseconds -0.1 days" +LINE 1: select interval '-9223372036854775808 microseconds -0.1 days... + ^ +-- overflowing with fractional fields - ISO8601 format +select interval 'P0.1Y2147483647M'; +ERROR: interval field value out of range: "P0.1Y2147483647M" +LINE 1: select interval 'P0.1Y2147483647M'; + ^ +select interval 'P-0.1Y-2147483648M'; +ERROR: interval field value out of range: "P-0.1Y-2147483648M" +LINE 1: select interval 'P-0.1Y-2147483648M'; + ^ +select interval 'P2147483647M0.1Y'; +ERROR: interval field value out of range: "P2147483647M0.1Y" +LINE 1: select interval 'P2147483647M0.1Y'; + ^ +select interval 'P-2147483648M-0.1Y'; +ERROR: interval field value out of range: "P-2147483648M-0.1Y" +LINE 1: select interval 'P-2147483648M-0.1Y'; + ^ +select interval 'P0.1M2147483647D'; +ERROR: interval field value out of range: "P0.1M2147483647D" +LINE 1: select interval 'P0.1M2147483647D'; + ^ +select interval 'P-0.1M-2147483648D'; +ERROR: interval field value out of range: "P-0.1M-2147483648D" +LINE 1: select interval 'P-0.1M-2147483648D'; + ^ +select interval 'P2147483647D0.1M'; +ERROR: interval field value out of range: "P2147483647D0.1M" +LINE 1: select interval 'P2147483647D0.1M'; + ^ +select interval 'P-2147483648D-0.1M'; +ERROR: interval field value out of range: "P-2147483648D-0.1M" +LINE 1: select interval 'P-2147483648D-0.1M'; + ^ +select interval 'P0.5W2147483647D'; +ERROR: interval field value out of range: "P0.5W2147483647D" +LINE 1: select interval 'P0.5W2147483647D'; + ^ +select interval 'P-0.5W-2147483648D'; +ERROR: interval field value out of range: "P-0.5W-2147483648D" +LINE 1: select interval 'P-0.5W-2147483648D'; + ^ +select interval 'P2147483647D0.5W'; +ERROR: interval field value out of range: "P2147483647D0.5W" +LINE 1: select interval 'P2147483647D0.5W'; + ^ +select interval 'P-2147483648D-0.5W'; +ERROR: interval field value out of range: "P-2147483648D-0.5W" +LINE 1: select interval 'P-2147483648D-0.5W'; + ^ +select interval 'P0.01MT2562047788H54.775807S'; +ERROR: interval field value out of range: "P0.01MT2562047788H54.775807S" +LINE 1: select interval 'P0.01MT2562047788H54.775807S'; + ^ +select interval 'P-0.01MT-2562047788H-54.775808S'; +ERROR: interval field value out of range: "P-0.01MT-2562047788H-54.775808S" +LINE 1: select interval 'P-0.01MT-2562047788H-54.775808S'; + ^ +select interval 'P0.1DT2562047788H54.775807S'; +ERROR: interval field value out of range: "P0.1DT2562047788H54.775807S" +LINE 1: select interval 'P0.1DT2562047788H54.775807S'; + ^ +select interval 'P-0.1DT-2562047788H-54.775808S'; +ERROR: interval field value out of range: "P-0.1DT-2562047788H-54.775808S" +LINE 1: select interval 'P-0.1DT-2562047788H-54.775808S'; + ^ +select interval 'PT2562047788.1H54.775807S'; +ERROR: interval field value out of range: "PT2562047788.1H54.775807S" +LINE 1: select interval 'PT2562047788.1H54.775807S'; + ^ +select interval 'PT-2562047788.1H-54.775808S'; +ERROR: interval field value out of range: "PT-2562047788.1H-54.775808S" +LINE 1: select interval 'PT-2562047788.1H-54.775808S'; + ^ +select interval 'PT2562047788H0.1M54.775807S'; +ERROR: interval field value out of range: "PT2562047788H0.1M54.775807S" +LINE 1: select interval 'PT2562047788H0.1M54.775807S'; + ^ +select interval 'PT-2562047788H-0.1M-54.775808S'; +ERROR: interval field value out of range: "PT-2562047788H-0.1M-54.775808S" +LINE 1: select interval 'PT-2562047788H-0.1M-54.775808S'; + ^ +-- overflowing with fractional fields - ISO8601 alternative format +select interval 'P0.1-2147483647-00'; +ERROR: interval field value out of range: "P0.1-2147483647-00" +LINE 1: select interval 'P0.1-2147483647-00'; + ^ +select interval 'P00-0.1-2147483647'; +ERROR: interval field value out of range: "P00-0.1-2147483647" +LINE 1: select interval 'P00-0.1-2147483647'; + ^ +select interval 'P00-0.01-00T2562047788:00:54.775807'; +ERROR: interval field value out of range: "P00-0.01-00T2562047788:00:54.775807" +LINE 1: select interval 'P00-0.01-00T2562047788:00:54.775807'; + ^ +select interval 'P00-00-0.1T2562047788:00:54.775807'; +ERROR: interval field value out of range: "P00-00-0.1T2562047788:00:54.775807" +LINE 1: select interval 'P00-00-0.1T2562047788:00:54.775807'; + ^ +select interval 'PT2562047788.1:00:54.775807'; +ERROR: interval field value out of range: "PT2562047788.1:00:54.775807" +LINE 1: select interval 'PT2562047788.1:00:54.775807'; + ^ +select interval 'PT2562047788:01.:54.775807'; +ERROR: interval field value out of range: "PT2562047788:01.:54.775807" +LINE 1: select interval 'PT2562047788:01.:54.775807'; + ^ +-- overflowing with fractional fields - SQL standard format +select interval '0.1 2562047788:0:54.775807'; +ERROR: interval field value out of range: "0.1 2562047788:0:54.775807" +LINE 1: select interval '0.1 2562047788:0:54.775807'; + ^ +select interval '0.1 2562047788:0:54.775808 ago'; +ERROR: interval field value out of range: "0.1 2562047788:0:54.775808 ago" +LINE 1: select interval '0.1 2562047788:0:54.775808 ago'; + ^ +select interval '2562047788.1:0:54.775807'; +ERROR: interval field value out of range: "2562047788.1:0:54.775807" +LINE 1: select interval '2562047788.1:0:54.775807'; + ^ +select interval '2562047788.1:0:54.775808 ago'; +ERROR: interval field value out of range: "2562047788.1:0:54.775808 ago" +LINE 1: select interval '2562047788.1:0:54.775808 ago'; + ^ +select interval '2562047788:0.1:54.775807'; +ERROR: invalid input syntax for type interval: "2562047788:0.1:54.775807" +LINE 1: select interval '2562047788:0.1:54.775807'; + ^ +select interval '2562047788:0.1:54.775808 ago'; +ERROR: invalid input syntax for type interval: "2562047788:0.1:54.775808 ago" +LINE 1: select interval '2562047788:0.1:54.775808 ago'; + ^ +-- overflowing using AGO with INT_MIN +select interval '-2147483648 months ago'; +ERROR: interval field value out of range: "-2147483648 months ago" +LINE 1: select interval '-2147483648 months ago'; + ^ +select interval '-2147483648 days ago'; +ERROR: interval field value out of range: "-2147483648 days ago" +LINE 1: select interval '-2147483648 days ago'; + ^ +select interval '-9223372036854775808 microseconds ago'; +ERROR: interval field value out of range: "-9223372036854775808 microseconds ago" +LINE 1: select interval '-9223372036854775808 microseconds ago'; + ^ +select interval '-2147483648 months -2147483648 days -9223372036854775808 microseconds ago'; +ERROR: interval field value out of range: "-2147483648 months -2147483648 days -9223372036854775808 microseconds ago" +LINE 1: select interval '-2147483648 months -2147483648 days -922337... + ^ +-- test that INT_MIN number is formatted properly +SET IntervalStyle to postgres; +select interval '-2147483648 months -2147483648 days -9223372036854775808 us'; + interval +-------------------------------------------------------------------- + -178956970 years -8 mons -2147483648 days -2562047788:00:54.775808 +(1 row) + +SET IntervalStyle to sql_standard; +select interval '-2147483648 months -2147483648 days -9223372036854775808 us'; + interval +--------------------------------------------------- + -178956970-8 -2147483648 -2562047788:00:54.775808 +(1 row) + +SET IntervalStyle to iso_8601; +select interval '-2147483648 months -2147483648 days -9223372036854775808 us'; + interval +----------------------------------------------------- + P-178956970Y-8M-2147483648DT-2562047788H-54.775808S +(1 row) + +SET IntervalStyle to postgres_verbose; +select interval '-2147483648 months -2147483648 days -9223372036854775808 us'; + interval +------------------------------------------------------------------------------ + @ 178956970 years 8 mons 2147483648 days 2562047788 hours 54.775808 secs ago +(1 row) + -- check that '30 days' equals '1 month' according to the hash function select '30 days'::interval = '1 month'::interval as t; t @@ -968,9 +1725,9 @@ SELECT f1, (10 rows) SELECT EXTRACT(FORTNIGHT FROM INTERVAL '2 days'); -- error -ERROR: interval units "fortnight" not recognized +ERROR: unit "fortnight" not recognized for type interval SELECT EXTRACT(TIMEZONE FROM INTERVAL '2 days'); -- error -ERROR: interval units "timezone" not supported +ERROR: unit "timezone" not supported for type interval SELECT EXTRACT(DECADE FROM INTERVAL '100 y'); extract --------- diff --git a/contrib/pax_storage/src/test/regress/expected/join_gp_optimizer.out b/contrib/pax_storage/src/test/regress/expected/join_gp_optimizer.out index a98e2b26b02..dbd71b6762a 100644 --- a/contrib/pax_storage/src/test/regress/expected/join_gp_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/join_gp_optimizer.out @@ -47,6 +47,31 @@ select * from l l1 join l l2 on l1.a = l2.a left join l l3 on l1.a = l3.a and l1 2 | 2 | 2 (5 rows) +-- +-- test anti_join/left_anti_semi_join selectivities +-- +create table aj_t1(a int, b int, c int) distributed by (a); +create table aj_t2(a int, b int, c int) distributed by (a); +insert into aj_t1 values(1,1,1); +insert into aj_t2 values(1,1,1),(2,2,2); +explain(costs off) select t1.a from aj_t1 t1 where not exists (select 1 from aj_t2 t2 where t1.b = t2.b and t1.c = t2.c); + QUERY PLAN +--------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Anti Join + Hash Cond: ((t1.b = t2.b) AND (t1.c = t2.c)) + -> Seq Scan on aj_t1 t1 + -> Hash + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Seq Scan on aj_t2 t2 + Optimizer: GPORCA +(8 rows) + +select t1.a from aj_t1 t1 where not exists (select 1 from aj_t2 t2 where t1.b = t2.b and t1.c = t2.c); + a +--- +(0 rows) + -- -- test hash join -- @@ -814,21 +839,19 @@ set enable_nestloop = 1; set enable_material = 0; set enable_seqscan = 0; set enable_bitmapscan = 0; -analyze tenk1; -analyze tenk2; explain select tenk1.unique2 >= 0 from tenk1 left join tenk2 on true limit 1; QUERY PLAN ------------------------------------------------------------------------------------------------------------------- - Limit (cost=0.00..1885517.36 rows=1 width=1) - -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1885517.36 rows=1 width=1) - -> Limit (cost=0.00..1885517.36 rows=1 width=1) - -> Nested Loop Left Join (cost=0.00..1885484.02 rows=33336667 width=4) + Limit (cost=0.00..1885524.86 rows=1 width=1) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1885524.86 rows=1 width=1) + -> Limit (cost=0.00..1885524.86 rows=1 width=1) + -> Nested Loop Left Join (cost=0.00..1885491.53 rows=33336667 width=4) Join Filter: true -> Seq Scan on tenk1 (cost=0.00..431.51 rows=3334 width=4) -> Materialize (cost=0.00..431.70 rows=10000 width=1) -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.69 rows=10000 width=1) - -> Seq Scan on tenk2 (cost=0.00..431.50 rows=3334 width=1) - Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0 + -> Seq Scan on tenk2 (cost=0.00..431.51 rows=3334 width=1) + Optimizer: GPORCA (10 rows) select tenk1.unique2 >= 0 from tenk1 left join tenk2 on true limit 1; @@ -1642,3 +1665,1908 @@ on (coalesce(t.id1) = tq_all.id1 and t.id2 = tq_all.id2) ; (14 rows) drop table t_issue_10315; +-- +-- Left Join Pruning -- +-- Cases when join will be pruned-- +-- Single Unique key in inner relation -- +create table fooJoinPruning (a int,b int,c int,constraint idx1 unique(a)); +create table barJoinPruning (p int,q int,r int,constraint idx2 unique(p)); +-- Unique key of inner relation ie 'p' is present in the join condition and is equal to a column from outer relation or is a constant -- +explain select fooJoinPruning.* from fooJoinPruning left join barJoinPruning on barJoinPruning.p=100 where fooJoinPruning.b>300; + QUERY PLAN +------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=12) + -> Seq Scan on foojoinpruning (cost=0.00..431.00 rows=1 width=12) + Filter: (b > 300) + Optimizer: GPORCA +(4 rows) + +explain select fooJoinPruning.* from fooJoinPruning left join barJoinPruning on fooJoinPruning.c=barJoinPruning.p where fooJoinPruning.b>300; + QUERY PLAN +------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=12) + -> Seq Scan on foojoinpruning (cost=0.00..431.00 rows=1 width=12) + Filter: (b > 300) + Optimizer: GPORCA +(4 rows) + +-- Unique key of inner relation ie 'p' is present in the join condition and is equal to a column from outer relation and filter contains subquery-- +explain select fooJoinPruning.* from fooJoinPruning left join barJoinPruning on fooJoinPruning.c=barJoinPruning.p where fooJoinPruning.b>300 and fooJoinPruning.c in (select barJoinPruning.q from barJoinPruning ); + QUERY PLAN +--------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=1 width=12) + -> Hash Semi Join (cost=0.00..862.00 rows=1 width=12) + Hash Cond: (foojoinpruning.c = barjoinpruning.q) + -> Seq Scan on foojoinpruning (cost=0.00..431.00 rows=1 width=12) + Filter: (b > 300) + -> Hash (cost=431.00..431.00 rows=1 width=4) + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=4) + -> Seq Scan on barjoinpruning (cost=0.00..431.00 rows=1 width=4) + Optimizer: GPORCA +(9 rows) + +explain select fooJoinPruning.* from fooJoinPruning left join barJoinPruning on fooJoinPruning.c=barJoinPruning.p where fooJoinPruning.b>300 and fooJoinPruning.c > ANY (select barJoinPruning.q from barJoinPruning ); + QUERY PLAN +----------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324032.22 rows=1 width=12) + -> Seq Scan on foojoinpruning (cost=0.00..1324032.22 rows=1 width=12) + Filter: ((b > 300) AND (SubPlan 1)) + SubPlan 1 + -> Materialize (cost=0.00..431.00 rows=1 width=4) + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=4) + -> Seq Scan on barjoinpruning (cost=0.00..431.00 rows=1 width=4) + Optimizer: GPORCA +(8 rows) + +-- Unique key of inner relation ie 'p' is present in the join condition and is equal to a column from outer relation and filter contains corelated subquery referencing outer relation column-- +explain select fooJoinPruning.* from fooJoinPruning left join barJoinPruning on fooJoinPruning.c=barJoinPruning.p where fooJoinPruning.b in (select fooJoinPruning.a from barJoinPruning); + QUERY PLAN +------------------------------------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324032.19 rows=1 width=12) + -> Nested Loop Semi Join (cost=0.00..1324032.19 rows=1 width=12) + Join Filter: true + -> Seq Scan on foojoinpruning (cost=0.00..431.00 rows=1 width=12) + Filter: (b = a) + -> Materialize (cost=0.00..431.00 rows=1 width=1) + -> Broadcast Motion 1:3 (slice2) (cost=0.00..431.00 rows=1 width=1) + -> Limit (cost=0.00..431.00 rows=1 width=1) + -> Gather Motion 3:1 (slice3; segments: 3) (cost=0.00..431.00 rows=1 width=1) + -> Seq Scan on barjoinpruning (cost=0.00..431.00 rows=1 width=1) + Optimizer: GPORCA +(11 rows) + +drop table fooJoinPruning; +drop table barJoinPruning; +-- MultipleUnique key sets in inner relation -- +create table fooJoinPruning (a int, b int, c int,d int,e int,f int,g int,constraint idx1 unique(a,b),constraint idx2 unique(a,c,d)); +create table barJoinPruning (p int, q int, r int,s int,t int,u int,v int,constraint idx3 unique(p,q),constraint idx4 unique(p,r,s)); +create table t1JoinPruning(m int primary key,n int); +create table t2JoinPruning(x int primary key,y int); +-- Unique key set of inner relation ie 'p,q' is present in the join condition and is equal to a column from outer relation or is a constant -- +explain select fooJoinPruning.* from fooJoinPruning left join barJoinPruning on barJoinPruning.p=100 and barJoinPruning.q=200 where fooJoinPruning.e >300 and fooJoinPruning.f<>10; + QUERY PLAN +------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=28) + -> Seq Scan on foojoinpruning (cost=0.00..431.00 rows=1 width=28) + Filter: ((e > 300) AND (f <> 10)) + Optimizer: GPORCA +(4 rows) + +explain select fooJoinPruning.* from fooJoinPruning left join barJoinPruning on fooJoinPruning.g=barJoinPruning.p and fooJoinPruning.a=barJoinPruning.q where fooJoinPruning.e >300 and fooJoinPruning.f<>10; + QUERY PLAN +------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=28) + -> Seq Scan on foojoinpruning (cost=0.00..431.00 rows=1 width=28) + Filter: ((e > 300) AND (f <> 10)) + Optimizer: GPORCA +(4 rows) + +explain select fooJoinPruning.* from fooJoinPruning left join barJoinPruning on fooJoinPruning.g=barJoinPruning.p and barJoinPruning.q=100 where fooJoinPruning.e >300 and fooJoinPruning.f<>10; + QUERY PLAN +------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=28) + -> Seq Scan on foojoinpruning (cost=0.00..431.00 rows=1 width=28) + Filter: ((e > 300) AND (f <> 10)) + Optimizer: GPORCA +(4 rows) + +-- Unique key set of inner relation ie 'p,r,s' is present in the join condition and is equal to a column from outer relation or is a constant -- +explain select fooJoinPruning.* from fooJoinPruning left join barJoinPruning on fooJoinPruning.g=barJoinPruning.p and barJoinPruning.r=100 and fooJoinPruning.b=barJoinPruning.s where fooJoinPruning.f<>10; + QUERY PLAN +------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=28) + -> Seq Scan on foojoinpruning (cost=0.00..431.00 rows=1 width=28) + Filter: (f <> 10) + Optimizer: GPORCA +(4 rows) + +explain select fooJoinPruning.* from fooJoinPruning left join barJoinPruning on fooJoinPruning.a=barJoinPruning.p and fooJoinPruning.b=barJoinPruning.r and fooJoinPruning.c=barJoinPruning.s and barJoinPruning.s=barJoinPruning.t where fooJoinPruning.b>300; + QUERY PLAN +------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=28) + -> Seq Scan on foojoinpruning (cost=0.00..431.00 rows=1 width=28) + Filter: (b > 300) + Optimizer: GPORCA +(4 rows) + +-- Unique key of inner relation ie 'p' is present in the join condition and is equal to a column from outer relation and filter contains subquery -- +explain select fooJoinPruning.* from fooJoinPruning left join barJoinPruning on fooJoinPruning.g=barJoinPruning.p and fooJoinPruning.a=barJoinPruning.q where fooJoinPruning.c in (select barJoinPruning.t from barJoinPruning ); + QUERY PLAN +--------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=1 width=28) + -> Hash Semi Join (cost=0.00..862.00 rows=1 width=28) + Hash Cond: (foojoinpruning.c = barjoinpruning.t) + -> Seq Scan on foojoinpruning (cost=0.00..431.00 rows=1 width=28) + -> Hash (cost=431.00..431.00 rows=1 width=4) + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=4) + -> Seq Scan on barjoinpruning (cost=0.00..431.00 rows=1 width=4) + Optimizer: GPORCA +(8 rows) + +explain select fooJoinPruning.* from fooJoinPruning left join barJoinPruning on fooJoinPruning.g=barJoinPruning.p and fooJoinPruning.a=barJoinPruning.q where fooJoinPruning.c > ANY (select barJoinPruning.t from barJoinPruning ); + QUERY PLAN +----------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324032.25 rows=1 width=28) + -> Seq Scan on foojoinpruning (cost=0.00..1324032.25 rows=1 width=28) + Filter: (SubPlan 1) + SubPlan 1 + -> Materialize (cost=0.00..431.00 rows=1 width=4) + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=4) + -> Seq Scan on barjoinpruning (cost=0.00..431.00 rows=1 width=4) + Optimizer: GPORCA +(8 rows) + +-- Unique key of inner relation ie 'p' is present in the join condition and is equal to a column from outer relation and filter contains corelated subquery referencing outer relation column -- +explain select fooJoinPruning.* from fooJoinPruning left join barJoinPruning on fooJoinPruning.g=barJoinPruning.p and fooJoinPruning.a=barJoinPruning.q where fooJoinPruning.e in (select fooJoinPruning.f from barJoinPruning); + QUERY PLAN +------------------------------------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324032.17 rows=1 width=28) + -> Nested Loop Semi Join (cost=0.00..1324032.17 rows=1 width=28) + Join Filter: true + -> Seq Scan on foojoinpruning (cost=0.00..431.00 rows=1 width=28) + Filter: (e = f) + -> Materialize (cost=0.00..431.00 rows=1 width=1) + -> Broadcast Motion 1:3 (slice2) (cost=0.00..431.00 rows=1 width=1) + -> Limit (cost=0.00..431.00 rows=1 width=1) + -> Gather Motion 3:1 (slice3; segments: 3) (cost=0.00..431.00 rows=1 width=1) + -> Seq Scan on barjoinpruning (cost=0.00..431.00 rows=1 width=1) + Optimizer: GPORCA +(11 rows) + +-- Prunable Left join present in subquery -- +explain select t1JoinPruning.n from t1JoinPruning where t1JoinPruning.m in (select fooJoinPruning.a from fooJoinPruning left join barJoinPruning on barJoinPruning.p=100 and barJoinPruning.q=200); + QUERY PLAN +-------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..437.03 rows=1 width=4) + -> Nested Loop (cost=0.00..437.03 rows=1 width=4) + Join Filter: true + -> Seq Scan on t1joinpruning (cost=0.00..431.00 rows=1 width=8) + -> GroupAggregate (cost=0.00..6.03 rows=1 width=1) + Group Key: foojoinpruning.a + -> Index Only Scan using idx1 on foojoinpruning (cost=0.00..6.03 rows=1 width=4) + Index Cond: (a = t1joinpruning.m) + Optimizer: GPORCA +(9 rows) + +drop table fooJoinPruning; +drop table barJoinPruning; +drop table t1JoinPruning; +drop table t2JoinPruning; +create table t1 (a int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +create table t2 (a int primary key, b int); +create table t3 (a int primary key, b int); +-- inner table is join +EXPLAIN select t1.a from t1 left join (t2 join t3 on true) on t2.a=t1.a and t3.a=t1.a; + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=4) + -> Seq Scan on t1 (cost=0.00..431.00 rows=1 width=4) + Optimizer: GPORCA +(3 rows) + +-- inner table has new left join +EXPLAIN select t1.* from t1 left join (t2 left join t3 on t3.a=t2.b) on t2.a=t1.a; + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=4) + -> Seq Scan on t1 (cost=0.00..431.00 rows=1 width=4) + Optimizer: GPORCA +(3 rows) + +-- inner table is a derived table +EXPLAIN (COSTS OFF) +select t1.* from t1 left join + ( + select t2.b as v2b, count(*) as v2c + from t2 left join t3 on t3.a=t2.b + group by t2.b + ) v2 + on v2.v2b=t1.a; + QUERY PLAN +------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on t1 + Optimizer: GPORCA +(3 rows) + +drop table t1; +drop table t2; +drop table t3; +-- +-- Cases where join will not be pruned +-- +-- Single Unique key in inner relation -- +create table fooJoinPruning (a int,b int,c int,constraint idx1 unique(a)); +create table barJoinPruning (p int,q int,r int,constraint idx2 unique(p)); +-- Unique key of inner relation ie 'p' is present in the join condition and is equal to a column from outer relation but filter is on a inner relation -- +explain select fooJoinPruning.* from fooJoinPruning left join barJoinPruning on barJoinPruning.p=fooJoinPruning.b where barJoinPruning.q<>10; + QUERY PLAN +------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=1 width=12) + -> Hash Join (cost=0.00..862.00 rows=1 width=12) + Hash Cond: (foojoinpruning.b = barjoinpruning.p) + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=12) + Hash Key: foojoinpruning.b + -> Seq Scan on foojoinpruning (cost=0.00..431.00 rows=1 width=12) + -> Hash (cost=431.00..431.00 rows=1 width=4) + -> Seq Scan on barjoinpruning (cost=0.00..431.00 rows=1 width=4) + Filter: (q <> 10) + Optimizer: GPORCA +(10 rows) + +-- Unique key of inner relation ie 'p' is present in the join condition and is equal to a column from outer relation but output columns are from inner relation -- +explain select barJoinPruning.* from fooJoinPruning left join barJoinPruning on barJoinPruning.p=fooJoinPruning.b where fooJoinPruning.b>1000; + QUERY PLAN +------------------------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=2 width=12) + -> Hash Left Join (cost=0.00..862.00 rows=1 width=12) + Hash Cond: (foojoinpruning.b = barjoinpruning.p) + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=4) + Hash Key: foojoinpruning.b + -> Seq Scan on foojoinpruning (cost=0.00..431.00 rows=1 width=4) + Filter: (b > 1000) + -> Hash (cost=431.00..431.00 rows=1 width=12) + -> Seq Scan on barjoinpruning (cost=0.00..431.00 rows=1 width=12) + Filter: (p > 1000) + Optimizer: GPORCA +(11 rows) + +-- Subquery present in join condition +explain select fooJoinPruning.* from fooJoinPruning left join barJoinPruning on barJoinPruning.p in (select fooJoinPruning.b from fooJoinPruning ) where fooJoinPruning.c>100; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1330181.79 rows=2 width=12) + -> Nested Loop Left Join (cost=0.00..1330181.79 rows=1 width=12) + Join Filter: true + -> Seq Scan on foojoinpruning foojoinpruning_1 (cost=0.00..431.00 rows=1 width=12) + Filter: (c > 100) + -> Materialize (cost=0.00..437.01 rows=1 width=1) + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..437.01 rows=1 width=1) + -> Nested Loop (cost=0.00..437.01 rows=1 width=1) + Join Filter: true + -> GroupAggregate (cost=0.00..431.00 rows=1 width=4) + Group Key: foojoinpruning.b + -> Sort (cost=0.00..431.00 rows=1 width=4) + Sort Key: foojoinpruning.b + -> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..431.00 rows=1 width=4) + Hash Key: foojoinpruning.b + -> Seq Scan on foojoinpruning (cost=0.00..431.00 rows=1 width=4) + -> Index Only Scan using idx2 on barjoinpruning (cost=0.00..6.01 rows=1 width=1) + Index Cond: (p = foojoinpruning.b) + Optimizer: GPORCA +(19 rows) + +-- Unique key of inner relation ie 'p' is present in the join condition and is equal to a column from outer relation and filter contains corelated subquery referencing inner relation column-- +explain select fooJoinPruning.* from fooJoinPruning left join barJoinPruning on fooJoinPruning.c=barJoinPruning.p where fooJoinPruning.b in (select barJoinPruning.q from fooJoinPruning); + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324463.10 rows=1 width=12) + -> Hash Join (cost=0.00..1324463.10 rows=1 width=12) + Hash Cond: ((foojoinpruning_1.b = barjoinpruning.q) AND (foojoinpruning_1.c = barjoinpruning.p)) + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..1324032.10 rows=1 width=12) + Hash Key: foojoinpruning_1.c + -> Nested Loop Semi Join (cost=0.00..1324032.10 rows=1 width=12) + Join Filter: true + -> Seq Scan on foojoinpruning foojoinpruning_1 (cost=0.00..431.00 rows=1 width=12) + -> Materialize (cost=0.00..431.00 rows=1 width=1) + -> Broadcast Motion 1:3 (slice3) (cost=0.00..431.00 rows=1 width=1) + -> Limit (cost=0.00..431.00 rows=1 width=1) + -> Gather Motion 3:1 (slice4; segments: 3) (cost=0.00..431.00 rows=1 width=1) + -> Seq Scan on foojoinpruning (cost=0.00..431.00 rows=1 width=1) + -> Hash (cost=431.00..431.00 rows=1 width=8) + -> Seq Scan on barjoinpruning (cost=0.00..431.00 rows=1 width=8) + Optimizer: GPORCA +(16 rows) + +explain select fooJoinPruning.* from fooJoinPruning left join barJoinPruning on fooJoinPruning.c=barJoinPruning.p where fooJoinPruning.b in (select barJoinPruning.q from fooJoinPruning where fooJoinPruning.a=barJoinPruning.r); + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..868.01 rows=1 width=12) + -> Hash Join (cost=0.00..868.01 rows=1 width=12) + Hash Cond: ((foojoinpruning.b = barjoinpruning.q) AND (foojoinpruning.c = barjoinpruning.p)) + -> Seq Scan on foojoinpruning (cost=0.00..431.00 rows=1 width=12) + -> Hash (cost=437.01..437.01 rows=1 width=8) + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..437.01 rows=1 width=8) + -> Nested Loop (cost=0.00..437.01 rows=1 width=8) + Join Filter: true + -> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..431.00 rows=1 width=12) + Hash Key: barjoinpruning.r + -> Seq Scan on barjoinpruning (cost=0.00..431.00 rows=1 width=12) + Filter: (NOT (r IS NULL)) + -> Index Only Scan using idx1 on foojoinpruning foojoinpruning_1 (cost=0.00..6.01 rows=1 width=1) + Index Cond: (a = barjoinpruning.r) + Optimizer: GPORCA +(15 rows) + +drop table fooJoinPruning; +drop table barJoinPruning; +-- Multiple Unique key sets in inner relation -- +create table fooJoinPruning (a int, b int, c int,d int,e int,f int,g int,constraint idx1 unique(a,b),constraint idx2 unique(a,c,d)); +create table barJoinPruning (p int, q int, r int,s int,t int,u int,v int,constraint idx3 unique(p,q),constraint idx4 unique(p,r,s)); +-- No equality operator present in join condition -- +explain select fooJoinPruning.* from fooJoinPruning left join barJoinPruning on barJoinPruning.p>100 and barJoinPruning.q>200 where fooJoinPruning.b>300; + QUERY PLAN +-------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..888843.40 rows=2 width=28) + -> Nested Loop Left Join (cost=0.00..888843.40 rows=1 width=28) + Join Filter: true + -> Seq Scan on foojoinpruning (cost=0.00..431.00 rows=1 width=28) + Filter: (b > 300) + -> Materialize (cost=0.00..6.01 rows=1 width=1) + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..6.01 rows=1 width=1) + -> Index Only Scan using idx3 on barjoinpruning (cost=0.00..6.01 rows=1 width=1) + Index Cond: ((p > 100) AND (q > 200)) + Optimizer: GPORCA +(10 rows) + +-- OR operator is present in join condition -- +explain select fooJoinPruning.* from fooJoinPruning left join barJoinPruning on fooJoinPruning.a=barJoinPruning.p and fooJoinPruning.c=barJoinPruning.r or fooJoinPruning.d=barJoinPruning.s; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324033.62 rows=1 width=28) + -> Nested Loop Left Join (cost=0.00..1324033.62 rows=1 width=28) + Join Filter: (((foojoinpruning.a = barjoinpruning.p) AND (foojoinpruning.c = barjoinpruning.r)) OR (foojoinpruning.d = barjoinpruning.s)) + -> Seq Scan on foojoinpruning (cost=0.00..431.00 rows=1 width=28) + -> Materialize (cost=0.00..431.00 rows=1 width=12) + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=12) + -> Seq Scan on barjoinpruning (cost=0.00..431.00 rows=1 width=12) + Optimizer: GPORCA +(8 rows) + +explain select fooJoinPruning.* from fooJoinPruning left join barJoinPruning on fooJoinPruning.a=barJoinPruning.p or fooJoinPruning.b=barJoinPruning.q where fooJoinPruning.b>300; + QUERY PLAN +------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324033.17 rows=1 width=28) + -> Nested Loop Left Join (cost=0.00..1324033.17 rows=1 width=28) + Join Filter: ((foojoinpruning.a = barjoinpruning.p) OR (foojoinpruning.b = barjoinpruning.q)) + -> Seq Scan on foojoinpruning (cost=0.00..431.00 rows=1 width=28) + Filter: (b > 300) + -> Materialize (cost=0.00..431.00 rows=1 width=8) + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=8) + -> Seq Scan on barjoinpruning (cost=0.00..431.00 rows=1 width=8) + Optimizer: GPORCA +(9 rows) + +-- Not all unique keys of inner relation are equal to a constant or column from outer relation +explain select fooJoinPruning.* from fooJoinPruning left join barJoinPruning on fooJoinPruning.a=barJoinPruning.p and barJoinPruning.r=barJoinPruning.s; + QUERY PLAN +-------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..437.02 rows=2 width=28) + -> Nested Loop Left Join (cost=0.00..437.02 rows=1 width=28) + Join Filter: true + -> Seq Scan on foojoinpruning (cost=0.00..431.00 rows=1 width=28) + -> Index Only Scan using idx4 on barjoinpruning (cost=0.00..6.02 rows=1 width=1) + Index Cond: (p = foojoinpruning.a) + Filter: (r = s) + Optimizer: GPORCA +(8 rows) + +explain select fooJoinPruning.* from fooJoinPruning left join barJoinPruning on fooJoinPruning.a=barJoinPruning.p and fooJoinPruning.b=barJoinPruning.r and barJoinPruning.s=barJoinPruning.t where fooJoinPruning.b>300; + QUERY PLAN +------------------------------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=2 width=28) + -> Hash Left Join (cost=0.00..862.00 rows=1 width=28) + Hash Cond: ((foojoinpruning.a = barjoinpruning.p) AND (foojoinpruning.b = barjoinpruning.r)) + -> Seq Scan on foojoinpruning (cost=0.00..431.00 rows=1 width=28) + Filter: (b > 300) + -> Hash (cost=431.00..431.00 rows=1 width=8) + -> Seq Scan on barjoinpruning (cost=0.00..431.00 rows=1 width=8) + Filter: ((s = t) AND (r > 300)) + Optimizer: GPORCA +(9 rows) + +drop table fooJoinPruning; +drop table barJoinPruning; +-- +-- Cases where join under union +-- +create table foo(a int primary key, b int,c int); +create table bar(a int primary key, b int,c int); +insert into foo values (1,1,10),(2,1,10),(3,2,20),(4,2,30),(5,2,30),(6,NULL,NULL),(7,NULL,3); +insert into bar values (1,1,10),(2,2,20),(3,NULL,NULL),(4,3,NULL),(5,1,10); +analyze foo,bar; +explain (costs off) select foo.a, bar.b from foo left join bar on foo.a = bar.a +union select foo.a, bar.b from foo join bar on foo.a = bar.a; + QUERY PLAN +--------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> GroupAggregate + Group Key: foo.a, bar.b + -> Sort + Sort Key: foo.a, bar.b + -> Append + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: foo.a, bar.b + -> Hash Left Join + Hash Cond: (foo.a = bar.a) + -> Seq Scan on foo + -> Hash + -> Seq Scan on bar + -> Redistribute Motion 3:3 (slice3; segments: 3) + Hash Key: foo_1.a, bar_1.b + -> Nested Loop + Join Filter: true + -> Seq Scan on bar bar_1 + -> Index Only Scan using foo_pkey on foo foo_1 + Index Cond: (a = bar_1.a) + Optimizer: GPORCA +(21 rows) + +------------------------------------- +-- CASES WHERE JOIN WILL BE PRUNED -- +------------------------------------- +-------------------------------------------------------------------------------- +-- join under UNION +-- For the below query the output columns of both the CLogicalLeftOuterJoin +-- are from the outer relation, so we can prune both the joins +-------------------------------------------------------------------------------- +explain (costs off) select foo.b,foo.c from foo left join bar on foo.a=bar.a union +select bar.b,bar.c from bar left join foo on foo.a=bar.a; + QUERY PLAN +------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> GroupAggregate + Group Key: foo.b, foo.c + -> Sort + Sort Key: foo.b, foo.c + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: foo.b, foo.c + -> Append + -> Seq Scan on foo + -> Seq Scan on bar + Optimizer: GPORCA +(11 rows) + +select foo.b,foo.c from foo left join bar on foo.a=bar.a union +select bar.b,bar.c from bar left join foo on foo.a=bar.a; + b | c +---+---- + 1 | 10 + 2 | 20 + 3 | + 2 | 30 + | 3 + | +(6 rows) + +-------------------------------------------------------------------------------- +-- join under UNION ALL +-- For the below query the output columns of both the CLogicalLeftOuterJoin +-- are from the outer relation, so we can prune both the joins +-------------------------------------------------------------------------------- +explain (costs off) select foo.b,foo.c from foo left join bar on foo.a=bar.a union all +select bar.b,bar.c from bar left join foo on foo.a=bar.a; + QUERY PLAN +------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Append + -> Seq Scan on foo + -> Seq Scan on bar + Optimizer: GPORCA +(5 rows) + +select foo.b,foo.c from foo left join bar on foo.a=bar.a union all +select bar.b,bar.c from bar left join foo on foo.a=bar.a; + b | c +---+---- + 1 | 10 + 1 | 10 + 1 | 10 + 2 | 20 + 2 | 30 + | 3 + 2 | 20 + | + 3 | + 2 | 30 + | + 1 | 10 +(12 rows) + +-------------------------------------------------------------------------------- +-- join under INTERSECT +-- For the below query the output columns of both the CLogicalLeftOuterJoin +-- are from the outer relation, so we can prune both the joins +-------------------------------------------------------------------------------- +explain (costs off) select foo.b,foo.c from foo left join bar on foo.a=bar.a intersect +select bar.b,bar.c from bar left join foo on foo.a=bar.a; + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Join + Hash Cond: ((NOT (foo.b IS DISTINCT FROM bar.b)) AND (NOT (foo.c IS DISTINCT FROM bar.c))) + -> GroupAggregate + Group Key: foo.b, foo.c + -> Sort + Sort Key: foo.b, foo.c + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: foo.b, foo.c + -> Seq Scan on foo + -> Hash + -> GroupAggregate + Group Key: bar.b, bar.c + -> Sort + Sort Key: bar.b, bar.c + -> Redistribute Motion 3:3 (slice3; segments: 3) + Hash Key: bar.b, bar.c + -> Seq Scan on bar + Optimizer: GPORCA +(19 rows) + +select foo.b,foo.c from foo left join bar on foo.a=bar.a intersect +select bar.b,bar.c from bar left join foo on foo.a=bar.a; + b | c +---+---- + | + 1 | 10 + 2 | 20 +(3 rows) + +-------------------------------------------------------------------------------- +-- join under INTERSECT ALL +-- For the below query the output columns of both the CLogicalLeftOuterJoin +-- are from the outer relation, so we can prune both the joins +-------------------------------------------------------------------------------- +explain (costs off) select foo.b,foo.c from foo left join bar on foo.a=bar.a intersect all +select bar.b,bar.c from bar left join foo on foo.a=bar.a; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Semi Join + Hash Cond: ((NOT (foo.b IS DISTINCT FROM bar.b)) AND (NOT (foo.c IS DISTINCT FROM bar.c)) AND (NOT ((row_number() OVER (?)) IS DISTINCT FROM (row_number() OVER (?))))) + -> WindowAgg + Partition By: foo.b, foo.c + -> Sort + Sort Key: foo.b, foo.c + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: foo.b, foo.c + -> Seq Scan on foo + -> Hash + -> WindowAgg + Partition By: bar.b, bar.c + -> Sort + Sort Key: bar.b, bar.c + -> Redistribute Motion 3:3 (slice3; segments: 3) + Hash Key: bar.b, bar.c + -> Seq Scan on bar + Optimizer: GPORCA +(19 rows) + +select foo.b,foo.c from foo left join bar on foo.a=bar.a intersect all +select bar.b,bar.c from bar left join foo on foo.a=bar.a; + b | c +---+---- + | + 1 | 10 + 1 | 10 + 2 | 20 +(4 rows) + +-------------------------------------------------------------------------------- +-- join under EXCEPT +-- For the below query the output columns of both the CLogicalLeftOuterJoin +-- are from the outer relation, so we can prune both the joins +-------------------------------------------------------------------------------- +explain (costs off) select foo.b,foo.c from foo left join bar on foo.a=bar.a except +select bar.b,bar.c from bar left join foo on foo.a=bar.a; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> GroupAggregate + Group Key: foo.b, foo.c + -> Sort + Sort Key: foo.b, foo.c + -> Hash Anti Join + Hash Cond: ((NOT (foo.b IS DISTINCT FROM bar.b)) AND (NOT (foo.c IS DISTINCT FROM bar.c))) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: foo.b, foo.c + -> Seq Scan on foo + -> Hash + -> Redistribute Motion 3:3 (slice3; segments: 3) + Hash Key: bar.b, bar.c + -> Seq Scan on bar + Optimizer: GPORCA +(15 rows) + +select foo.b,foo.c from foo left join bar on foo.a=bar.a except +select bar.b,bar.c from bar left join foo on foo.a=bar.a; + b | c +---+---- + 2 | 30 + | 3 +(2 rows) + +-------------------------------------------------------------------------------- +-- join under EXCEPT ALL +-- For the below query the output columns of both the CLogicalLeftOuterJoin +-- are from the outer relation, so we can prune both the joins +-------------------------------------------------------------------------------- +explain (costs off) select foo.b,foo.c from foo left join bar on foo.a=bar.a except all +select bar.b,bar.c from bar left join foo on foo.a=bar.a; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Anti Join + Hash Cond: ((NOT (foo.b IS DISTINCT FROM bar.b)) AND (NOT (foo.c IS DISTINCT FROM bar.c)) AND (NOT ((row_number() OVER (?)) IS DISTINCT FROM (row_number() OVER (?))))) + -> WindowAgg + Partition By: foo.b, foo.c + -> Sort + Sort Key: foo.b, foo.c + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: foo.b, foo.c + -> Seq Scan on foo + -> Hash + -> WindowAgg + Partition By: bar.b, bar.c + -> Sort + Sort Key: bar.b, bar.c + -> Redistribute Motion 3:3 (slice3; segments: 3) + Hash Key: bar.b, bar.c + -> Seq Scan on bar + Optimizer: GPORCA +(19 rows) + +select foo.b,foo.c from foo left join bar on foo.a=bar.a except all +select bar.b,bar.c from bar left join foo on foo.a=bar.a; + b | c +---+---- + 2 | 30 + 2 | 30 + | 3 +(3 rows) + +------------------------------------------ +-- CASES WHERE JOIN WILL NOT BE PRUNED -- +------------------------------------------ +-------------------------------------------------------------------------------- +-- join under UNION +-- For the below query since for the outer CLogicalLeftOuterJoin, all the output +-- columns are from the outer relation, the outer join can be pruned but for the +-- inner CLogicalLeftOuterJoin the output column contains columns from +-- inner relation.So the inner join can't be pruned. +-------------------------------------------------------------------------------- +explain (costs off) select foo.b,foo.c from foo left join bar on foo.a=bar.a union +select bar.b,foo.c from bar left join foo on foo.a=bar.a; + QUERY PLAN +------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> GroupAggregate + Group Key: foo.b, foo.c + -> Sort + Sort Key: foo.b, foo.c + -> Append + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: foo.b, foo.c + -> Seq Scan on foo + -> Redistribute Motion 3:3 (slice3; segments: 3) + Hash Key: bar.b, foo_1.c + -> Hash Left Join + Hash Cond: (bar.a = foo_1.a) + -> Seq Scan on bar + -> Hash + -> Seq Scan on foo foo_1 + Optimizer: GPORCA +(17 rows) + +select foo.b,foo.c from foo left join bar on foo.a=bar.a union +select bar.b,foo.c from bar left join foo on foo.a=bar.a; + b | c +---+---- + 2 | 30 + 3 | 30 + | 3 + | + 2 | 10 + 1 | 10 + 1 | 30 + 2 | 20 + | 20 +(9 rows) + +-------------------------------------------------------------------------------- +-- join under UNION ALL +-- For the below query since for the outer CLogicalLeftOuterJoin, all the output +-- columns are from the outer relation, the outer join can be pruned but for the +-- inner CLogicalLeftOuterJoin the output column contains columns from +-- inner relation.So the inner join can't be pruned. +-------------------------------------------------------------------------------- +explain (costs off) select foo.b,foo.c from foo left join bar on foo.a=bar.a union all +select bar.b,foo.c from bar left join foo on foo.a=bar.a; + QUERY PLAN +----------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Append + -> Seq Scan on foo + -> Hash Left Join + Hash Cond: (bar.a = foo_1.a) + -> Seq Scan on bar + -> Hash + -> Seq Scan on foo foo_1 + Optimizer: GPORCA +(9 rows) + +select foo.b,foo.c from foo left join bar on foo.a=bar.a union all +select bar.b,foo.c from bar left join foo on foo.a=bar.a; + b | c +---+---- + 1 | 10 + 1 | 10 + 1 | 10 + 2 | 20 + 2 | 30 + | 3 + 2 | 10 + | 20 + 3 | 30 + 2 | 30 + | + 1 | 30 +(12 rows) + +-------------------------------------------------------------------------------- +-- join under INTERSECT +-- For the below query since for the outer CLogicalLeftOuterJoin, all the output +-- columns are from the outer relation, the outer join can be pruned but for the +-- inner CLogicalLeftOuterJoin the output column contains columns from +-- inner relation.So the inner join can't be pruned. +-------------------------------------------------------------------------------- +explain (costs off) select foo.b,foo.c from foo left join bar on foo.a=bar.a intersect +select bar.b,foo.c from bar left join foo on foo.a=bar.a; + QUERY PLAN +------------------------------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Join + Hash Cond: ((NOT (foo.b IS DISTINCT FROM bar.b)) AND (NOT (foo.c IS DISTINCT FROM foo_1.c))) + -> GroupAggregate + Group Key: foo.b, foo.c + -> Sort + Sort Key: foo.b, foo.c + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: foo.b, foo.c + -> Seq Scan on foo + -> Hash + -> GroupAggregate + Group Key: bar.b, foo_1.c + -> Sort + Sort Key: bar.b, foo_1.c + -> Redistribute Motion 3:3 (slice3; segments: 3) + Hash Key: bar.b, foo_1.c + -> Hash Left Join + Hash Cond: (bar.a = foo_1.a) + -> Seq Scan on bar + -> Hash + -> Seq Scan on foo foo_1 + Optimizer: GPORCA +(23 rows) + +select foo.b,foo.c from foo left join bar on foo.a=bar.a intersect +select bar.b,foo.c from bar left join foo on foo.a=bar.a; + b | c +---+---- + 1 | 10 +(1 row) + +-------------------------------------------------------------------------------- +-- join under INTERSECT ALL +-- For the below query since for the outer CLogicalLeftOuterJoin, all the output +-- columns are from the outer relation, the outer join can be pruned but for the +-- inner CLogicalLeftOuterJoin the output column contains columns from +-- inner relation.So the inner join can't be pruned. +-------------------------------------------------------------------------------- +explain (costs off) select foo.b,foo.c from foo left join bar on foo.a=bar.a intersect all +select bar.b,foo.c from bar left join foo on foo.a=bar.a; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Semi Join + Hash Cond: ((NOT (foo.b IS DISTINCT FROM bar.b)) AND (NOT (foo.c IS DISTINCT FROM foo_1.c)) AND (NOT ((row_number() OVER (?)) IS DISTINCT FROM (row_number() OVER (?))))) + -> WindowAgg + Partition By: foo.b, foo.c + -> Sort + Sort Key: foo.b, foo.c + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: foo.b, foo.c + -> Seq Scan on foo + -> Hash + -> WindowAgg + Partition By: bar.b, foo_1.c + -> Sort + Sort Key: bar.b, foo_1.c + -> Redistribute Motion 3:3 (slice3; segments: 3) + Hash Key: bar.b, foo_1.c + -> Hash Left Join + Hash Cond: (bar.a = foo_1.a) + -> Seq Scan on bar + -> Hash + -> Seq Scan on foo foo_1 + Optimizer: GPORCA +(23 rows) + +select foo.b,foo.c from foo left join bar on foo.a=bar.a intersect all +select bar.b,foo.c from bar left join foo on foo.a=bar.a; + b | c +---+---- + 1 | 10 +(1 row) + +-------------------------------------------------------------------------------- +-- join under EXCEPT +-- For the below query since for the outer CLogicalLeftOuterJoin, all the output +-- columns are from the outer relation, the outer join can be pruned but for the +-- inner CLogicalLeftOuterJoin the output column contains columns from +-- inner relation.So the inner join can't be pruned. +-------------------------------------------------------------------------------- +explain (costs off) select foo.b,foo.c from foo left join bar on foo.a=bar.a except +select bar.b,foo.c from bar left join foo on foo.a=bar.a; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> GroupAggregate + Group Key: foo.b, foo.c + -> Sort + Sort Key: foo.b, foo.c + -> Hash Anti Join + Hash Cond: ((NOT (foo.b IS DISTINCT FROM bar.b)) AND (NOT (foo.c IS DISTINCT FROM foo_1.c))) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: foo.b, foo.c + -> Seq Scan on foo + -> Hash + -> Redistribute Motion 3:3 (slice3; segments: 3) + Hash Key: bar.b, foo_1.c + -> Hash Left Join + Hash Cond: (bar.a = foo_1.a) + -> Seq Scan on bar + -> Hash + -> Seq Scan on foo foo_1 + Optimizer: GPORCA +(19 rows) + +select foo.b,foo.c from foo left join bar on foo.a=bar.a except +select bar.b,foo.c from bar left join foo on foo.a=bar.a; + b | c +---+---- + 2 | 30 + | 3 + | + 2 | 20 +(4 rows) + +-------------------------------------------------------------------------------- +-- join under EXCEPT ALL +-- For the below query since for the outer CLogicalLeftOuterJoin, all the output +-- columns are from the outer relation, the outer join can be pruned but for the +-- inner CLogicalLeftOuterJoin the output column contains columns from +-- inner relation.So the inner join can't be pruned. +-------------------------------------------------------------------------------- +explain (costs off) select foo.b,foo.c from foo left join bar on foo.a=bar.a except all +select bar.b,foo.c from bar left join foo on foo.a=bar.a; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Anti Join + Hash Cond: ((NOT (foo.b IS DISTINCT FROM bar.b)) AND (NOT (foo.c IS DISTINCT FROM foo_1.c)) AND (NOT ((row_number() OVER (?)) IS DISTINCT FROM (row_number() OVER (?))))) + -> WindowAgg + Partition By: foo.b, foo.c + -> Sort + Sort Key: foo.b, foo.c + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: foo.b, foo.c + -> Seq Scan on foo + -> Hash + -> WindowAgg + Partition By: bar.b, foo_1.c + -> Sort + Sort Key: bar.b, foo_1.c + -> Redistribute Motion 3:3 (slice3; segments: 3) + Hash Key: bar.b, foo_1.c + -> Hash Left Join + Hash Cond: (bar.a = foo_1.a) + -> Seq Scan on bar + -> Hash + -> Seq Scan on foo foo_1 + Optimizer: GPORCA +(23 rows) + +select foo.b,foo.c from foo left join bar on foo.a=bar.a except all +select bar.b,foo.c from bar left join foo on foo.a=bar.a; + b | c +---+---- + 2 | 30 + 2 | 30 + | 3 + | + 1 | 10 + 2 | 20 +(6 rows) + +drop table foo; +drop table bar; +----------------------------------------------------------------- +-- Test cases on Dynamic Partition Elimination(DPE) for Right Joins +----------------------------------------------------------------- +-- Note1 : DPE for Right join will happen if, all the following satisfy +-- Condition 1: Outer table is partition table +-- Condition 2: The partitioned column is same as distribution column +-- Condition 3: Join condition is on partitioned key of outer table +-- Note2 : To view the effect of DPE, the queries should be run with +-- "Explain Analyze ...". With it, the exact number of partitions scanned +-- will be shows in the plan. +-- Eg: explain analyze select * from bar_PT1 right join foo on bar_PT1.a1_PC =foo.a; +drop table if exists foo; +NOTICE: table "foo" does not exist, skipping +drop table if exists bar_PT1; +NOTICE: table "bar_pt1" does not exist, skipping +drop table if exists bar_PT2; +NOTICE: table "bar_pt2" does not exist, skipping +drop table if exists bar_PT3; +NOTICE: table "bar_pt3" does not exist, skipping +drop table if exists bar_List_PT1; +NOTICE: table "bar_list_pt1" does not exist, skipping +drop table if exists bar_List_PT2; +NOTICE: table "bar_list_pt2" does not exist, skipping +-- Table creation : Normal table +create table foo (a int , b int) distributed by (a); +insert into foo select i,i from generate_series(1,5)i; +analyze foo; +-- Table creation : First range Partitioned table with same 'Distribution Column' and 'Partitioning key' +create table bar_PT1 (a1_PC int, b1 int) partition by range(a1_PC) (start (1) inclusive end (12) every (2)) distributed by (a1_PC); +insert into bar_PT1 select i,i from generate_series(1,11)i; +analyze bar_PT1; +-- Table creation : Second range Partitioned table with different 'Distribution Column' and 'Partitioning key' +create table bar_PT2 (a2 int, b2_PC int) partition by range(b2_PC) (start (1) inclusive end (12) every (2)) distributed by (a2); +insert into bar_PT2 select i,i from generate_series(1,11)i; +analyze bar_PT2; +-- Table creation : Third range Partitioned table with same 'Distribution Column' and 'Partitioning key' +create table bar_PT3 (a3_PC int, b3 int) partition by range(a3_PC) (start (1) inclusive end (6) every (2))distributed by (a3_PC); +insert into bar_PT3 select i,i from generate_series(1,5)i; +analyze bar_PT3; +-- Table creation : First list Partitioned table with same 'Distribution Column' and 'Partitioning key' +create table bar_List_PT1 (a1_PC int, b1 int) partition by list(a1_PC) +(partition p1 values(1,2), partition p2 values(3,4), partition p3 values(5,6), partition p4 values(7,8), partition p5 values(9,10), + partition p6 values(11,12), partition p7 values(13,14), partition p8 values(15,16), partition p9 values(17,18), partition p10 values(19,20), + partition p11 values(21,22), partition p12 values(23,24), default partition pdefault) distributed by (a1_PC); +insert into bar_List_PT1 select i,i from generate_series(1,24)i; +analyze bar_List_PT1; +-- Table creation : Second list Partitioned table with same 'Distribution Column' and 'Partitioning key' +create table bar_List_PT2 (a2_PC int, b2 int) partition by list(a2_PC) + (partition p1 values(1,2), partition p2 values(3,4), partition p3 values(5,6), partition p4 values(7,8), partition p5 values(9,10), + partition p6 values(11,12), default partition pdefault) distributed by (a2_PC); +insert into bar_List_PT2 select i,i from generate_series(1,12)i; +analyze bar_List_PT2; +-- Case-1 : Distribution colm = Partition Key. +-- FOR RANGE PARTITIONED TABLE +-- Outer table: Partitioned table, Join Condition on Partition key: Yes, Result: DPE - YES +explain (costs off) select * from bar_PT1 right join foo on bar_PT1.a1_PC =foo.a; + QUERY PLAN +---------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Right Join + Hash Cond: (bar_pt1.a1_pc = foo.a) + -> Dynamic Seq Scan on bar_pt1 + Number of partitions to scan: 6 (out of 6) + -> Hash + -> Partition Selector (selector id: $0) + -> Seq Scan on foo + Optimizer: GPORCA +(9 rows) + +select * from bar_PT1 right join foo on bar_PT1.a1_PC =foo.a; + a1_pc | b1 | a | b +-------+----+---+--- + 1 | 1 | 1 | 1 + 2 | 2 | 2 | 2 + 3 | 3 | 3 | 3 + 4 | 4 | 4 | 4 + 5 | 5 | 5 | 5 +(5 rows) + +-- Outer table: Partitioned table, Join Condition on Partition key: No, Result: DPE - No +explain (costs off) select * from bar_PT1 right join foo on bar_PT1.b1 =foo.a; + QUERY PLAN +---------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Right Join + Hash Cond: (bar_pt1.b1 = foo.a) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: bar_pt1.b1 + -> Dynamic Seq Scan on bar_pt1 + Number of partitions to scan: 6 (out of 6) + -> Hash + -> Seq Scan on foo + Optimizer: GPORCA +(10 rows) + +select * from bar_PT1 right join foo on bar_PT1.b1 =foo.a; + a1_pc | b1 | a | b +-------+----+---+--- + 1 | 1 | 1 | 1 + 2 | 2 | 2 | 2 + 3 | 3 | 3 | 3 + 4 | 4 | 4 | 4 + 5 | 5 | 5 | 5 +(5 rows) + +-- Outer,Inner table: Partitioned table, Join Condition on Partition key: Yes, Result: DPE - Yes +explain (costs off) select * from bar_PT1 right join bar_PT3 on bar_PT1.a1_PC =bar_PT3.a3_PC; + QUERY PLAN +---------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Right Join + Hash Cond: (bar_pt1.a1_pc = bar_pt3.a3_pc) + -> Dynamic Seq Scan on bar_pt1 + Number of partitions to scan: 6 (out of 6) + -> Hash + -> Partition Selector (selector id: $0) + -> Dynamic Seq Scan on bar_pt3 + Number of partitions to scan: 3 (out of 3) + Optimizer: GPORCA +(10 rows) + +select * from bar_PT1 right join bar_PT3 on bar_PT1.a1_PC =bar_PT3.a3_PC; + a1_pc | b1 | a3_pc | b3 +-------+----+-------+---- + 5 | 5 | 5 | 5 + 1 | 1 | 1 | 1 + 2 | 2 | 2 | 2 + 3 | 3 | 3 | 3 + 4 | 4 | 4 | 4 +(5 rows) + +-- Outer table: Not a Partitioned table, Join Condition on Partition key: Yes, Result: DPE - No +explain (costs off) select * from foo right join bar_PT1 on foo.a=bar_PT1.a1_PC; + QUERY PLAN +---------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Left Join + Hash Cond: (bar_pt1.a1_pc = foo.a) + -> Dynamic Seq Scan on bar_pt1 + Number of partitions to scan: 6 (out of 6) + -> Hash + -> Seq Scan on foo + Optimizer: GPORCA +(8 rows) + +select * from foo right join bar_PT1 on foo.a=bar_PT1.a1_PC; + a | b | a1_pc | b1 +---+---+-------+---- + 1 | 1 | 1 | 1 + 2 | 2 | 2 | 2 + 3 | 3 | 3 | 3 + 4 | 4 | 4 | 4 + | | 7 | 7 + | | 8 | 8 + 5 | 5 | 5 | 5 + | | 6 | 6 + | | 9 | 9 + | | 10 | 10 + | | 11 | 11 +(11 rows) + +-- Right join with predicate on the column of non partitioned table in 'where clause'. +-- Result: DPE - Yes, +explain (costs off) select * from bar_PT1 right join foo on bar_PT1.a1_PC =foo.a where foo.a>2; + QUERY PLAN +---------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Right Join + Hash Cond: (bar_pt1.a1_pc = foo.a) + -> Dynamic Seq Scan on bar_pt1 + Number of partitions to scan: 6 (out of 6) + Filter: (a1_pc > 2) + -> Hash + -> Partition Selector (selector id: $0) + -> Seq Scan on foo + Filter: (a > 2) + Optimizer: GPORCA +(11 rows) + +select * from bar_PT1 right join foo on bar_PT1.a1_PC =foo.a where foo.a>2; + a1_pc | b1 | a | b +-------+----+---+--- + 3 | 3 | 3 | 3 + 4 | 4 | 4 | 4 + 5 | 5 | 5 | 5 +(3 rows) + +--Conjunction in join condition, Result: DPE - Yes +explain (costs off) select * from bar_PT1 right join foo on bar_PT1.a1_PC =foo.a and bar_PT1.b1 =foo.b; + QUERY PLAN +----------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Right Join + Hash Cond: ((bar_pt1.a1_pc = foo.a) AND (bar_pt1.b1 = foo.b)) + -> Dynamic Seq Scan on bar_pt1 + Number of partitions to scan: 6 (out of 6) + -> Hash + -> Partition Selector (selector id: $0) + -> Seq Scan on foo + Optimizer: GPORCA +(9 rows) + +select * from bar_PT1 right join foo on bar_PT1.a1_PC =foo.a and bar_PT1.b1 =foo.b; + a1_pc | b1 | a | b +-------+----+---+--- + 1 | 1 | 1 | 1 + 2 | 2 | 2 | 2 + 3 | 3 | 3 | 3 + 4 | 4 | 4 | 4 + 5 | 5 | 5 | 5 +(5 rows) + +explain (costs off) select * from bar_PT1 right join foo on bar_PT1.a1_PC =foo.a and foo.b>2; + QUERY PLAN +---------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Right Join + Hash Cond: (bar_pt1.a1_pc = foo.a) + Join Filter: (foo.b > 2) + -> Dynamic Seq Scan on bar_pt1 + Number of partitions to scan: 6 (out of 6) + -> Hash + -> Partition Selector (selector id: $0) + -> Seq Scan on foo + Optimizer: GPORCA +(10 rows) + +select * from bar_PT1 right join foo on bar_PT1.a1_PC =foo.a and foo.b>2; + a1_pc | b1 | a | b +-------+----+---+--- + | | 1 | 1 + 3 | 3 | 3 | 3 + 4 | 4 | 4 | 4 + | | 2 | 2 + 5 | 5 | 5 | 5 +(5 rows) + +-- Multiple Right Joins, DPE- Yes +explain (costs off) select * from bar_PT1 right join foo on bar_PT1.a1_PC =foo.a right join bar_PT2 on bar_PT1.a1_PC =bar_PT2.b2_PC; + QUERY PLAN +---------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Left Join + Hash Cond: (bar_pt2.b2_pc = bar_pt1.a1_pc) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: bar_pt2.b2_pc + -> Dynamic Seq Scan on bar_pt2 + Number of partitions to scan: 6 (out of 6) + -> Hash + -> Redistribute Motion 3:3 (slice3; segments: 3) + Hash Key: bar_pt1.a1_pc + -> Hash Right Join + Hash Cond: (bar_pt1.a1_pc = foo.a) + -> Dynamic Seq Scan on bar_pt1 + Number of partitions to scan: 6 (out of 6) + -> Hash + -> Partition Selector (selector id: $0) + -> Seq Scan on foo + Optimizer: GPORCA +(18 rows) + +select * from bar_PT1 right join foo on bar_PT1.a1_PC =foo.a right join bar_PT2 on bar_PT1.a1_PC =bar_PT2.b2_PC; + a1_pc | b1 | a | b | a2 | b2_pc +-------+----+---+---+----+------- + 2 | 2 | 2 | 2 | 2 | 2 + 3 | 3 | 3 | 3 | 3 | 3 + 4 | 4 | 4 | 4 | 4 | 4 + | | | | 7 | 7 + | | | | 8 | 8 + 1 | 1 | 1 | 1 | 1 | 1 + 5 | 5 | 5 | 5 | 5 | 5 + | | | | 6 | 6 + | | | | 9 | 9 + | | | | 10 | 10 + | | | | 11 | 11 +(11 rows) + +-- FOR LIST PARTITIONED TABLE +-- Outer table: List Partitioned table, Join Condition on Partition key: Yes, Result: DPE - YES +explain (costs off) select * from bar_List_PT1 right join foo on bar_List_PT1.a1_PC =foo.a; + QUERY PLAN +------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Right Join + Hash Cond: (bar_list_pt1.a1_pc = foo.a) + -> Dynamic Seq Scan on bar_list_pt1 + Number of partitions to scan: 13 (out of 13) + -> Hash + -> Partition Selector (selector id: $0) + -> Seq Scan on foo + Optimizer: GPORCA +(9 rows) + +select * from bar_List_PT1 right join foo on bar_List_PT1.a1_PC =foo.a; + a1_pc | b1 | a | b +-------+----+---+--- + 1 | 1 | 1 | 1 + 2 | 2 | 2 | 2 + 3 | 3 | 3 | 3 + 4 | 4 | 4 | 4 + 5 | 5 | 5 | 5 +(5 rows) + +-- Outer,Inner table: Partitioned table, Join Condition on Partition key: Yes, Result: DPE - Yes +explain (costs off) select * from bar_List_PT1 right join bar_List_PT2 on bar_List_PT1.a1_PC =bar_List_PT2.a2_PC; + QUERY PLAN +---------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Right Join + Hash Cond: (bar_list_pt1.a1_pc = bar_list_pt2.a2_pc) + -> Dynamic Seq Scan on bar_list_pt1 + Number of partitions to scan: 13 (out of 13) + -> Hash + -> Partition Selector (selector id: $0) + -> Dynamic Seq Scan on bar_list_pt2 + Number of partitions to scan: 7 (out of 7) + Optimizer: GPORCA +(10 rows) + +select * from bar_List_PT1 right join bar_List_PT2 on bar_List_PT1.a1_PC =bar_List_PT2.a2_PC; + a1_pc | b1 | a2_pc | b2 +-------+----+-------+---- + 1 | 1 | 1 | 1 + 12 | 12 | 12 | 12 + 2 | 2 | 2 | 2 + 3 | 3 | 3 | 3 + 4 | 4 | 4 | 4 + 7 | 7 | 7 | 7 + 8 | 8 | 8 | 8 + 5 | 5 | 5 | 5 + 6 | 6 | 6 | 6 + 9 | 9 | 9 | 9 + 10 | 10 | 10 | 10 + 11 | 11 | 11 | 11 +(12 rows) + +-- Case-2 : Distribution colm <> Partition Key. +-- Outer table: Partitioned table, Join Condition on Partition key: Yes, Result: DPE - No +explain (costs off) select * from bar_PT2 right join foo on bar_PT2.b2_PC =foo.a; + QUERY PLAN +---------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Right Join + Hash Cond: (bar_pt2.b2_pc = foo.a) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: bar_pt2.b2_pc + -> Dynamic Seq Scan on bar_pt2 + Number of partitions to scan: 6 (out of 6) + -> Hash + -> Seq Scan on foo + Optimizer: GPORCA +(10 rows) + +select * from bar_PT2 right join foo on bar_PT2.b2_PC =foo.a; + a2 | b2_pc | a | b +----+-------+---+--- + 5 | 5 | 5 | 5 + 2 | 2 | 2 | 2 + 3 | 3 | 3 | 3 + 4 | 4 | 4 | 4 + 1 | 1 | 1 | 1 +(5 rows) + +-- Outer,Inner table: Partitioned table, Join Condition on Partition key: Yes, Result: DPE - No +explain (costs off) select * from bar_PT2 right join bar_PT1 on bar_PT2.b2_PC =bar_PT1.b1; + QUERY PLAN +---------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Left Join + Hash Cond: (bar_pt1.b1 = bar_pt2.b2_pc) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: bar_pt1.b1 + -> Dynamic Seq Scan on bar_pt1 + Number of partitions to scan: 6 (out of 6) + -> Hash + -> Redistribute Motion 3:3 (slice3; segments: 3) + Hash Key: bar_pt2.b2_pc + -> Dynamic Seq Scan on bar_pt2 + Number of partitions to scan: 6 (out of 6) + Optimizer: GPORCA +(13 rows) + +select * from bar_PT2 right join bar_PT1 on bar_PT2.b2_PC =bar_PT1.b1; + a2 | b2_pc | a1_pc | b1 +----+-------+-------+---- + 2 | 2 | 2 | 2 + 3 | 3 | 3 | 3 + 4 | 4 | 4 | 4 + 7 | 7 | 7 | 7 + 8 | 8 | 8 | 8 + 1 | 1 | 1 | 1 + 5 | 5 | 5 | 5 + 6 | 6 | 6 | 6 + 9 | 9 | 9 | 9 + 10 | 10 | 10 | 10 + 11 | 11 | 11 | 11 +(11 rows) + +drop table if exists foo; +drop table if exists bar_PT1; +drop table if exists bar_PT2; +drop table if exists bar_PT3; +drop table if exists bar_List_PT1; +drop table if exists bar_List_PT2; +-- Test that left-anti-semi-join not-in works with netowrk types +CREATE TABLE inverse (cidr inet); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'cidr' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +INSERT INTO inverse values ('192.168.100.199'); +explain SELECT 1 FROM inverse WHERE NOT (cidr <<= ANY(SELECT * FROM inverse)); + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Result (cost=0.00..1324032.93 rows=1 width=4) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324032.93 rows=1 width=1) + -> Result (cost=0.00..1324032.93 rows=1 width=1) + Filter: (NOT CASE WHEN ((count((true))) > '0'::bigint) THEN CASE WHEN ((sum((CASE WHEN ((inverse_1.cidr <<= inverse.cidr) IS NULL) THEN 1 ELSE 0 END))) = (count((true)))) THEN NULL::boolean ELSE true END ELSE false END) + -> GroupAggregate (cost=0.00..1324032.93 rows=1 width=16) + Group Key: inverse_1.cidr, inverse_1.ctid, inverse_1.gp_segment_id + -> Sort (cost=0.00..1324032.93 rows=1 width=23) + Sort Key: inverse_1.cidr, inverse_1.ctid, inverse_1.gp_segment_id + -> Nested Loop Left Join (cost=0.00..1324032.93 rows=1 width=27) + Join Filter: ((inverse_1.cidr <<= inverse.cidr) IS NOT FALSE) + -> Seq Scan on inverse inverse_1 (cost=0.00..431.00 rows=1 width=18) + -> Materialize (cost=0.00..431.00 rows=1 width=9) + -> Result (cost=0.00..431.00 rows=1 width=9) + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=8) + -> Seq Scan on inverse (cost=0.00..431.00 rows=1 width=8) + Optimizer: GPORCA +(16 rows) + +SELECT 1 FROM inverse WHERE NOT (cidr <<= ANY(SELECT * FROM inverse)); + ?column? +---------- +(0 rows) + +create table foo_varchar (a varchar(5)) distributed by (a); +create table bar_char (p char(5)) distributed by (p); +create table random_dis_varchar (x varchar(5)) distributed randomly; +create table random_dis_char (y char(5)) distributed randomly; +insert into foo_varchar values ('1 '),('2 '),('3 '); +insert into bar_char values ('1 '),('2 '),('3 '); +insert into random_dis_varchar values ('1 '),('2 '),('3 '); +insert into random_dis_char values ('1 '),('2 '),('3 '); +set optimizer_enable_hashjoin to off; +set enable_hashjoin to off; +set enable_nestloop to on; +-- check motion is added when performing a NL Left Outer Join between relations +-- when the join condition columns belong to different opfamily and both are +-- distribution keys +explain select * from foo_varchar left join bar_char on foo_varchar.a=bar_char.p; + QUERY PLAN +--------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324032.83 rows=2 width=16) + -> Nested Loop Left Join (cost=0.00..1324032.83 rows=1 width=16) + Join Filter: ((foo_varchar.a)::bpchar = bar_char.p) + -> Seq Scan on foo_varchar (cost=0.00..431.00 rows=1 width=8) + -> Materialize (cost=0.00..431.00 rows=1 width=8) + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=8) + -> Seq Scan on bar_char (cost=0.00..431.00 rows=1 width=8) + Optimizer: GPORCA +(8 rows) + +select * from foo_varchar left join bar_char on foo_varchar.a=bar_char.p; + a | p +------+------- + 2 | 2 + 1 | 1 + 3 | 3 +(3 rows) + +-- There is a plan change (from redistribution to broadcast) because a NULL +-- matching distribution is returned when there is opfamily mismatch between join +-- columns. +explain select * from foo_varchar left join random_dis_char on foo_varchar.a=random_dis_char.y; + QUERY PLAN +--------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324032.51 rows=2 width=16) + -> Nested Loop Left Join (cost=0.00..1324032.51 rows=1 width=16) + Join Filter: ((foo_varchar.a)::bpchar = random_dis_char.y) + -> Seq Scan on foo_varchar (cost=0.00..431.00 rows=1 width=8) + -> Materialize (cost=0.00..431.00 rows=1 width=8) + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=8) + -> Seq Scan on random_dis_char (cost=0.00..431.00 rows=1 width=8) + Optimizer: GPORCA +(8 rows) + +select * from foo_varchar left join random_dis_char on foo_varchar.a=random_dis_char.y; + a | y +------+------- + 2 | 2 + 1 | 1 + 3 | 3 +(3 rows) + +explain select * from bar_char left join random_dis_varchar on bar_char.p=random_dis_varchar.x; + QUERY PLAN +--------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324032.51 rows=2 width=16) + -> Nested Loop Left Join (cost=0.00..1324032.51 rows=1 width=16) + Join Filter: (bar_char.p = (random_dis_varchar.x)::bpchar) + -> Seq Scan on bar_char (cost=0.00..431.00 rows=1 width=8) + -> Materialize (cost=0.00..431.00 rows=1 width=8) + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=8) + -> Seq Scan on random_dis_varchar (cost=0.00..431.00 rows=1 width=8) + Optimizer: GPORCA +(8 rows) + +select * from bar_char left join random_dis_varchar on bar_char.p=random_dis_varchar.x; + p | x +-------+------ + 1 | 1 + 2 | 2 + 3 | 3 +(3 rows) + +-- check motion is added when performing a NL Inner Join between relations when +-- the join condition columns belong to different opfamily and both are +-- distribution keys +explain select * from foo_varchar inner join bar_char on foo_varchar.a=bar_char.p; + QUERY PLAN +--------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324032.79 rows=1 width=16) + -> Nested Loop (cost=0.00..1324032.79 rows=1 width=16) + Join Filter: ((foo_varchar.a)::bpchar = bar_char.p) + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=8) + -> Seq Scan on foo_varchar (cost=0.00..431.00 rows=1 width=8) + -> Seq Scan on bar_char (cost=0.00..431.00 rows=1 width=8) + Optimizer: GPORCA +(7 rows) + +select * from foo_varchar inner join bar_char on foo_varchar.a=bar_char.p; + a | p +------+------- + 1 | 1 + 2 | 2 + 3 | 3 +(3 rows) + +-- There is a plan change (from redistribution to broadcast) because a NULL +-- matching distribution is returned when there is opfamily mismatch between join +-- columns. +explain select * from foo_varchar inner join random_dis_char on foo_varchar.a=random_dis_char.y; + QUERY PLAN +--------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324032.48 rows=1 width=16) + -> Nested Loop (cost=0.00..1324032.48 rows=1 width=16) + Join Filter: ((foo_varchar.a)::bpchar = random_dis_char.y) + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=8) + -> Seq Scan on random_dis_char (cost=0.00..431.00 rows=1 width=8) + -> Seq Scan on foo_varchar (cost=0.00..431.00 rows=1 width=8) + Optimizer: GPORCA +(7 rows) + +select * from foo_varchar inner join random_dis_char on foo_varchar.a=random_dis_char.y; + a | y +------+------- + 2 | 2 + 3 | 3 + 1 | 1 +(3 rows) + +explain select * from bar_char inner join random_dis_varchar on bar_char.p=random_dis_varchar.x; + QUERY PLAN +--------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324032.48 rows=1 width=16) + -> Nested Loop (cost=0.00..1324032.48 rows=1 width=16) + Join Filter: (bar_char.p = (random_dis_varchar.x)::bpchar) + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=8) + -> Seq Scan on random_dis_varchar (cost=0.00..431.00 rows=1 width=8) + -> Seq Scan on bar_char (cost=0.00..431.00 rows=1 width=8) + Optimizer: GPORCA +(7 rows) + +select * from bar_char inner join random_dis_varchar on bar_char.p=random_dis_varchar.x; + p | x +-------+------ + 1 | 1 + 2 | 2 + 3 | 3 +(3 rows) + +drop table foo_varchar; +drop table bar_char; +drop table random_dis_varchar; +drop table random_dis_char; +set optimizer_enable_hashjoin to on; +reset enable_hashjoin; +reset enable_nestloop; +----------------------------------------------------------------- +-- Test cases to check if ORCA generates correct result +-- for "Left Semi Join with replicated outer table" +----------------------------------------------------------------- +drop table if exists repli_t1; +NOTICE: table "repli_t1" does not exist, skipping +drop table if exists dist_t1; +NOTICE: table "dist_t1" does not exist, skipping +create table repli_t1 (a int) distributed replicated; +insert into repli_t1 values(1); +analyze repli_t1; +create table dist_t1 (a int , b int) distributed by (a); +insert into dist_t1 select i, 1 from generate_series(1, 5) i; +analyze dist_t1; +-- No explicitly defined primary key for replicated table +--------------------------------------------------------- +-- Outer - replicated, Inner - distributed table +explain (costs off) select * from repli_t1 where exists ( select 1 from dist_t1 where repli_t1.a >= dist_t1.b); + QUERY PLAN +-------------------------------------------------------------- + Result + Filter: (SubPlan 1) + -> Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on repli_t1 + SubPlan 1 + -> Result + Filter: (repli_t1.a >= dist_t1.b) + -> Materialize + -> Gather Motion 3:1 (slice2; segments: 3) + -> Seq Scan on dist_t1 + Optimizer: GPORCA +(11 rows) + +select * from repli_t1 where exists ( select 1 from dist_t1 where repli_t1.a >= dist_t1.b); + a +--- + 1 +(1 row) + +explain (costs off) select * from (select t1.a as aVal1, t2.a as aVal2 from repli_t1 as t1 , repli_t1 as t2 where t1.a = t2.a) as t3 +where exists (select 1 from dist_t1 as t4 where t3.aVal1 >= t4.b); + QUERY PLAN +-------------------------------------------------------------------- + Hash Join + Hash Cond: (t1.a = t2.a) + -> Result + Filter: (SubPlan 1) + -> Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on repli_t1 t1 + SubPlan 1 + -> Result + Filter: (t1.a >= t4.b) + -> Materialize + -> Gather Motion 3:1 (slice2; segments: 3) + -> Seq Scan on dist_t1 t4 + -> Hash + -> Gather Motion 1:1 (slice3; segments: 1) + -> Seq Scan on repli_t1 t2 + Optimizer: GPORCA +(16 rows) + +select * from (select t1.a as aVal1, t2.a as aVal2 from repli_t1 as t1 , repli_t1 as t2 where t1.a = t2.a) as t3 +where exists (select 1 from dist_t1 as t4 where t3.aVal1 >= t4.b); + aval1 | aval2 +-------+------- + 1 | 1 +(1 row) + +create index idx_repl_t1_a ON repli_t1 using btree(a); +analyze repli_t1; +explain (costs off) select * from repli_t1 where exists ( select 1 from dist_t1 where repli_t1.a >= dist_t1.b); + QUERY PLAN +-------------------------------------------------------------- + Result + Filter: (SubPlan 1) + -> Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on repli_t1 + SubPlan 1 + -> Result + Filter: (repli_t1.a >= dist_t1.b) + -> Materialize + -> Gather Motion 3:1 (slice2; segments: 3) + -> Seq Scan on dist_t1 + Optimizer: GPORCA +(11 rows) + +select * from repli_t1 where exists ( select 1 from dist_t1 where repli_t1.a >= dist_t1.b); + a +--- + 1 +(1 row) + +drop index idx_repl_t1_a; +-- Outer - distributed, Inner - replicated table +explain (costs off) select * from dist_t1 where exists ( select 1 from repli_t1 where repli_t1.a >= dist_t1.b); + QUERY PLAN +------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on dist_t1 + Filter: (SubPlan 1) + SubPlan 1 + -> Seq Scan on repli_t1 + Filter: (a >= dist_t1.b) + Optimizer: GPORCA +(7 rows) + +select * from dist_t1 where exists ( select 1 from repli_t1 where repli_t1.a >= dist_t1.b); + a | b +---+--- + 1 | 1 + 5 | 1 + 2 | 1 + 3 | 1 + 4 | 1 +(5 rows) + +-- Both replicated table +explain (costs off) select * from repli_t1 as t1 where exists ( select 1 from repli_t1 as t2 where t1.a >= t2.a); + QUERY PLAN +------------------------------------------ + Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on repli_t1 t1 + Filter: (SubPlan 1) + SubPlan 1 + -> Seq Scan on repli_t1 t2 + Filter: (t1.a >= a) + Optimizer: GPORCA +(7 rows) + +select * from repli_t1 as t1 where exists ( select 1 from repli_t1 as t2 where t1.a >= t2.a); + a +--- + 1 +(1 row) + +-- Outer - Universal, Inner - distributed table +explain (costs off) select * from generate_series(1, 5) univ_t where exists ( select 1 from dist_t1 where univ_t >= dist_t1.b); + QUERY PLAN +------------------------------------------------------------------ + Result + Filter: (SubPlan 1) + -> Function Scan on generate_series + SubPlan 1 + -> Result + Filter: (generate_series.generate_series >= dist_t1.b) + -> Materialize + -> Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on dist_t1 + Optimizer: GPORCA +(10 rows) + +select * from generate_series(1, 5) univ_t where exists ( select 1 from dist_t1 where univ_t >= dist_t1.b); + univ_t +-------- + 1 + 2 + 3 + 4 + 5 +(5 rows) + +-- Outer - distributed, Inner - universal table +explain (costs off) select * from dist_t1 where exists ( select 1 from generate_series(1, 5) univ_t where univ_t >= dist_t1.b); + QUERY PLAN +--------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Nested Loop Semi Join + Join Filter: (generate_series.generate_series >= dist_t1.b) + -> Seq Scan on dist_t1 + -> Materialize + -> Function Scan on generate_series + Optimizer: GPORCA +(7 rows) + +select * from dist_t1 where exists ( select 1 from generate_series(1, 5) univ_t where univ_t >= dist_t1.b); + a | b +---+--- + 1 | 1 + 2 | 1 + 3 | 1 + 4 | 1 + 5 | 1 +(5 rows) + +-- Outer - replicated, Inner - universal table +explain (costs off)select * from repli_t1 where exists ( select 1 from generate_series(1, 5) univ_t where univ_t >= repli_t1.a); + QUERY PLAN +------------------------------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on repli_t1 + Filter: (SubPlan 1) + SubPlan 1 + -> Result + Filter: (generate_series.generate_series >= repli_t1.a) + -> Function Scan on generate_series + Optimizer: GPORCA +(8 rows) + +select * from repli_t1 where exists ( select 1 from generate_series(1, 5) univ_t where univ_t >= repli_t1.a); + a +--- + 1 +(1 row) + +-- Outer - universal, Inner - replicated table +explain (costs off) select * from generate_series(1, 5) univ_t where exists ( select 1 from repli_t1 where univ_t >= repli_t1.a); + QUERY PLAN +------------------------------------------------------------------- + Result + Filter: (SubPlan 1) + -> Function Scan on generate_series + SubPlan 1 + -> Result + Filter: (generate_series.generate_series >= repli_t1.a) + -> Materialize + -> Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on repli_t1 + Optimizer: GPORCA +(10 rows) + +select * from generate_series(1, 5) univ_t where exists ( select 1 from repli_t1 where univ_t >= repli_t1.a); + univ_t +-------- + 1 + 2 + 3 + 4 + 5 +(5 rows) + +-- Explicitly defined primary key for replicated table +--------------------------------------------------------- +drop table if exists repli_t1_pk; +NOTICE: table "repli_t1_pk" does not exist, skipping +drop table if exists repli_t2_pk; +NOTICE: table "repli_t2_pk" does not exist, skipping +drop table if exists repli_t3_pk; +NOTICE: table "repli_t3_pk" does not exist, skipping +drop table if exists repli_t4_pk; +NOTICE: table "repli_t4_pk" does not exist, skipping +-- Outer - replicated, Inner - distributed table +create table repli_t1_pk (a int, PRIMARY KEY(a)) distributed replicated; +insert into repli_t1_pk values(1); +analyze repli_t1_pk; +create table repli_t2_pk (a int, CONSTRAINT key1_t2 PRIMARY KEY(a) ) distributed replicated; +insert into repli_t2_pk values(1); +analyze repli_t2_pk; +create table repli_t3_pk (a int,b int, c int, d int, CONSTRAINT key1_t3 UNIQUE (c,d)) distributed replicated; +insert into repli_t3_pk values(1,2,3,4); +analyze repli_t3_pk; +create table repli_t4_pk (a int,b int, c int, d int, CONSTRAINT key1_t4 PRIMARY KEY(a) , CONSTRAINT key2_t4 UNIQUE (c,d)) distributed replicated; +insert into repli_t4_pk values(1,2,3,4); +analyze repli_t4_pk; +explain (costs off) select * from repli_t1_pk where exists ( select 1 from dist_t1 where repli_t1_pk.a >= dist_t1.b); + QUERY PLAN +------------------------------------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) + -> GroupAggregate + Group Key: repli_t1_pk.a + -> Sort + Sort Key: repli_t1_pk.a + -> Nested Loop + Join Filter: true + -> Broadcast Motion 3:1 (slice2; segments: 3) + -> Seq Scan on dist_t1 + -> Index Only Scan using repli_t1_pk_pkey on repli_t1_pk + Index Cond: (a >= dist_t1.b) + Optimizer: GPORCA +(12 rows) + +select * from repli_t1_pk where exists ( select 1 from dist_t1 where repli_t1_pk.a >= dist_t1.b); + a +--- + 1 +(1 row) + +create index idx_repli_t1_pk_a ON repli_t1_pk using btree(a); +analyze repli_t1_pk; +explain (costs off) select * from repli_t1_pk where exists ( select 1 from dist_t1 where repli_t1_pk.a >= dist_t1.b); + QUERY PLAN +------------------------------------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) + -> GroupAggregate + Group Key: repli_t1_pk.a + -> Sort + Sort Key: repli_t1_pk.a + -> Nested Loop + Join Filter: true + -> Broadcast Motion 3:1 (slice2; segments: 3) + -> Seq Scan on dist_t1 + -> Index Only Scan using repli_t1_pk_pkey on repli_t1_pk + Index Cond: (a >= dist_t1.b) + Optimizer: GPORCA +(12 rows) + +select * from repli_t1_pk where exists ( select 1 from dist_t1 where repli_t1_pk.a >= dist_t1.b); + a +--- + 1 +(1 row) + +drop index idx_repli_t1_pk_a; +explain (costs off) select * from repli_t2_pk where exists ( select 1 from dist_t1 where repli_t2_pk.a >= dist_t1.b); + QUERY PLAN +---------------------------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) + -> GroupAggregate + Group Key: repli_t2_pk.a + -> Sort + Sort Key: repli_t2_pk.a + -> Nested Loop + Join Filter: true + -> Broadcast Motion 3:1 (slice2; segments: 3) + -> Seq Scan on dist_t1 + -> Index Only Scan using key1_t2 on repli_t2_pk + Index Cond: (a >= dist_t1.b) + Optimizer: GPORCA +(12 rows) + +select * from repli_t2_pk where exists ( select 1 from dist_t1 where repli_t2_pk.a >= dist_t1.b); + a +--- + 1 +(1 row) + +explain (costs off) select * from repli_t3_pk where exists ( select 1 from dist_t1 where repli_t3_pk.a >= dist_t1.b); + QUERY PLAN +------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> GroupAggregate + Group Key: repli_t3_pk.a, repli_t3_pk.b, repli_t3_pk.c, repli_t3_pk.d + -> Sort + Sort Key: repli_t3_pk.c, repli_t3_pk.d + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: repli_t3_pk.c, repli_t3_pk.d + -> Nested Loop + Join Filter: (repli_t3_pk.a >= dist_t1.b) + -> Seq Scan on repli_t3_pk + -> Materialize + -> Seq Scan on dist_t1 + Optimizer: GPORCA +(13 rows) + +select * from repli_t3_pk where exists ( select 1 from dist_t1 where repli_t3_pk.a >= dist_t1.b); + a | b | c | d +---+---+---+--- + 1 | 2 | 3 | 4 +(1 row) + +explain (costs off) select * from repli_t4_pk where exists ( select 1 from dist_t1 where repli_t4_pk.a >= dist_t1.b); + QUERY PLAN +------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> GroupAggregate + Group Key: repli_t4_pk.a, repli_t4_pk.b, repli_t4_pk.c, repli_t4_pk.d + -> Sort + Sort Key: repli_t4_pk.a + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: repli_t4_pk.a + -> Nested Loop + Join Filter: (repli_t4_pk.a >= dist_t1.b) + -> Seq Scan on repli_t4_pk + -> Materialize + -> Seq Scan on dist_t1 + Optimizer: GPORCA +(13 rows) + +select * from repli_t4_pk where exists ( select 1 from dist_t1 where repli_t4_pk.a >= dist_t1.b); + a | b | c | d +---+---+---+--- + 1 | 2 | 3 | 4 +(1 row) + +drop table if exists repli_t1; +drop table if exists dist_t1; +drop table if exists repli_t1_pk; +drop table if exists repli_t2_pk; +drop table if exists repli_t3_pk; +drop table if exists repli_t4_pk; diff --git a/contrib/pax_storage/src/test/regress/expected/join_optimizer.out b/contrib/pax_storage/src/test/regress/expected/join_optimizer.out index faae07a1430..c6bcc5b1410 100644 --- a/contrib/pax_storage/src/test/regress/expected/join_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/join_optimizer.out @@ -1643,7 +1643,7 @@ SELECT * FROM (J1_TBL JOIN J2_TBL USING (i)) AS x WHERE J1_TBL.t = 'one'; -- er ERROR: invalid reference to FROM-clause entry for table "j1_tbl" LINE 1: ... * FROM (J1_TBL JOIN J2_TBL USING (i)) AS x WHERE J1_TBL.t =... ^ -HINT: There is an entry for table "j1_tbl", but it cannot be referenced from this part of the query. +DETAIL: There is an entry for table "j1_tbl", but it cannot be referenced from this part of the query. SELECT * FROM J1_TBL JOIN J2_TBL USING (i) AS x WHERE x.i = 1; -- ok i | j | t | k ---+---+-----+---- @@ -1909,12 +1909,38 @@ SELECT * -- -- semijoin selectivity for <> -- --- FIXME: It's a unstable case in PAX, the result depends on `analyze tenk1` --- explain (costs off) --- select * from int4_tbl i4, tenk1 a --- where exists(select * from tenk1 b --- where a.twothousand = b.twothousand and a.fivethous <> b.fivethous) --- and i4.f1 = a.tenthous; +explain (costs off) +select * from int4_tbl i4, tenk1 a +where exists(select * from tenk1 b + where a.twothousand = b.twothousand and a.fivethous <> b.fivethous) + and i4.f1 = a.tenthous; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> HashAggregate + Group Key: i4.f1, i4.ctid, i4.gp_segment_id, a.unique1, a.unique2, a.two, a.four, a.ten, a.twenty, a.hundred, a.thousand, a.twothousand, a.fivethous, a.tenthous, a.odd, a.even, a.stringu1, a.stringu2, a.string4, a.ctid, a.gp_segment_id + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: i4.ctid, i4.gp_segment_id, a.ctid, a.gp_segment_id + -> GroupAggregate + Group Key: i4.f1, i4.ctid, i4.gp_segment_id, a.unique1, a.unique2, a.two, a.four, a.ten, a.twenty, a.hundred, a.thousand, a.twothousand, a.fivethous, a.tenthous, a.odd, a.even, a.stringu1, a.stringu2, a.string4, a.ctid, a.gp_segment_id + -> Sort + Sort Key: i4.ctid, i4.gp_segment_id, a.ctid, a.gp_segment_id + -> Hash Join + Hash Cond: (b.twothousand = a.twothousand) + Join Filter: (a.fivethous <> b.fivethous) + -> Seq Scan on tenk1 b + -> Hash + -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Hash Join + Hash Cond: (a.tenthous = i4.f1) + -> Seq Scan on tenk1 a + Filter: (NOT (twothousand IS NULL)) + -> Hash + -> Broadcast Motion 3:3 (slice4; segments: 3) + -> Seq Scan on int4_tbl i4 + Optimizer: GPORCA +(23 rows) + -- -- More complicated constructs -- @@ -1937,7 +1963,7 @@ INSERT INTO t2 VALUES ( 'ee', 42 ); INSERT INTO t3 VALUES ( 'bb', 13 ); INSERT INTO t3 VALUES ( 'cc', 23 ); INSERT INTO t3 VALUES ( 'dd', 33 ); -SELECT * FROM t1 FULL JOIN t2 USING (name) FULL JOIN t3 USING (name) order by name; +SELECT * FROM t1 FULL JOIN t2 USING (name) FULL JOIN t3 USING (name); name | n | n | n ------+----+----+---- bb | 11 | 12 | 13 @@ -2265,17 +2291,17 @@ rollback; -- explain (costs off) select aa, bb, unique1, unique1 - from tenk1 right join b on aa = unique1 + from tenk1 right join b_star on aa = unique1 where bb < bb and bb is null; QUERY PLAN ------------------------------------- Result One-Time Filter: false - Optimizer: Postgres query optimizer + Optimizer: GPORCA (3 rows) select aa, bb, unique1, unique1 - from tenk1 right join b on aa = unique1 + from tenk1 right join b_star on aa = unique1 where bb < bb and bb is null; aa | bb | unique1 | unique1 ----+----+---------+--------- @@ -2288,30 +2314,26 @@ explain (costs off) select * from int8_tbl i1 left join (int8_tbl i2 join (select 123 as x) ss on i2.q1 = x) on i1.q2 = i2.q2 order by 1, 2; - QUERY PLAN ------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) - Merge Key: i1.q1, i1.q2 - -> Sort - Sort Key: i1.q1, i1.q2 - -> Hash Left Join - Hash Cond: (i1.q2 = i2.q2) - -> Redistribute Motion 3:3 (slice2; segments: 3) - Hash Key: i1.q2 - -> Seq Scan on int8_tbl i1 - -> Hash - -> Redistribute Motion 3:3 (slice3; segments: 3) - Hash Key: i2.q2 - -> Hash Join - Hash Cond: (i2.q1 = ((123))::bigint) - -> Seq Scan on int8_tbl i2 - Filter: (q1 = 123) - -> Hash + QUERY PLAN +---------------------------------------------------------------- + Sort + Sort Key: i1.q1, i1.q2 + -> Hash Left Join + Hash Cond: (i1.q2 = i2.q2) + -> Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on int8_tbl i1 + -> Hash + -> Gather Motion 3:1 (slice2; segments: 3) + -> Hash Join + Hash Cond: (i2.q1 = ((123))::bigint) + -> Seq Scan on int8_tbl i2 + Filter: (q1 = 123) + -> Hash + -> Result + Filter: ((123) = 123) -> Result - Filter: ((123) = 123) - -> Result - Optimizer: Pivotal Optimizer (GPORCA) -(21 rows) + Optimizer: GPORCA +(17 rows) select * from int8_tbl i1 left join (int8_tbl i2 join (select 123 as x) ss on i2.q1 = x) on i1.q2 = i2.q2 @@ -2326,8 +2348,8 @@ order by 1, 2; (5 rows) -- --- regression test: check a case where join_clause_is_movable_into() gives --- an imprecise result, causing an assertion failure +-- regression test: check a case where join_clause_is_movable_into() +-- used to give an imprecise result, causing an assertion failure -- select count(*) from @@ -2355,12 +2377,12 @@ select a.f1, b.f1, t.thousand, t.tenthous from (select sum(f1)+1 as f1 from int4_tbl i4a) a, (select sum(f1) as f1 from int4_tbl i4b) b where b.f1 = t.thousand and a.f1 = b.f1 and (a.f1+b.f1+999) = t.tenthous; - QUERY PLAN ---------------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) - -> Hash Join - Hash Cond: ((sum(i4b.f1)) = (((sum(i4a.f1)) + 1))) - Join Filter: ((((((sum(i4a.f1)) + 1)) + (sum(i4b.f1))) + 999) = (t.tenthous)::bigint) + QUERY PLAN +--------------------------------------------------------------------------------------- + Hash Join + Hash Cond: ((sum(i4b.f1)) = ((sum(i4a.f1) + 1))) + Join Filter: (((((sum(i4a.f1) + 1)) + (sum(i4b.f1))) + 999) = (t.tenthous)::bigint) + -> Gather Motion 3:1 (slice1; segments: 3) -> Nested Loop Join Filter: true -> Broadcast Motion 1:3 (slice2) @@ -2370,15 +2392,13 @@ where b.f1 = t.thousand and a.f1 = b.f1 and (a.f1+b.f1+999) = t.tenthous; -> Seq Scan on int4_tbl i4b -> Index Only Scan using tenk1_thous_tenthous on tenk1 t Index Cond: (thousand = (sum(i4b.f1))) - -> Hash - -> Result - -> Broadcast Motion 1:3 (slice4) - -> Finalize Aggregate - -> Gather Motion 3:1 (slice5; segments: 3) - -> Partial Aggregate - -> Seq Scan on int4_tbl i4a - Optimizer: Pivotal Optimizer (GPORCA) -(21 rows) + -> Hash + -> Finalize Aggregate + -> Gather Motion 3:1 (slice4; segments: 3) + -> Partial Aggregate + -> Seq Scan on int4_tbl i4a + Optimizer: GPORCA +(19 rows) select a.f1, b.f1, t.thousand, t.tenthous from tenk1 t, @@ -2392,149 +2412,586 @@ where b.f1 = t.thousand and a.f1 = b.f1 and (a.f1+b.f1+999) = t.tenthous; reset enable_hashjoin; reset enable_nestloop; -- --- check a case where we formerly got confused by conflicting sort orders --- in redundant merge join path keys +-- checks for correct handling of quals in multiway outer joins -- -set enable_mergejoin = true; -set enable_hashjoin = false; explain (costs off) -select * from - j1_tbl full join - (select * from j2_tbl order by j2_tbl.i desc, j2_tbl.k asc) j2_tbl - on j1_tbl.i = j2_tbl.i and j1_tbl.i = j2_tbl.k; - QUERY PLAN ----------------------------------------------------------------------- +select t1.f1 +from int4_tbl t1, int4_tbl t2 + left join int4_tbl t3 on t3.f1 > 0 + left join int4_tbl t4 on t3.f1 > 1 +where t4.f1 is null; + QUERY PLAN +-------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) - -> Hash Full Join - Hash Cond: ((j1_tbl.i = j2_tbl.i) AND (j1_tbl.i = j2_tbl.k)) - -> Seq Scan on j1_tbl - -> Hash - -> Seq Scan on j2_tbl + -> Nested Loop + Join Filter: true + -> Seq Scan on int4_tbl t1 + -> Materialize + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Result + Filter: (t4.f1 IS NULL) + -> Nested Loop Left Join + Join Filter: (t3.f1 > 1) + -> Nested Loop Left Join + Join Filter: true + -> Seq Scan on int4_tbl t2 + -> Materialize + -> Broadcast Motion 3:3 (slice4; segments: 3) + -> Seq Scan on int4_tbl t3 + Filter: (f1 > 0) + -> Materialize + -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Seq Scan on int4_tbl t4 Optimizer: GPORCA -(7 rows) +(21 rows) -select * from - j1_tbl full join - (select * from j2_tbl order by j2_tbl.i desc, j2_tbl.k asc) j2_tbl - on j1_tbl.i = j2_tbl.i and j1_tbl.i = j2_tbl.k; --order none - i | j | t | i | k ----+---+-------+---+---- - | | | 0 | - 0 | | zero | | - | | | 1 | -1 - 1 | 4 | one | | - | | | 5 | -5 - | | | 5 | -5 - 5 | 0 | five | | - 6 | 6 | six | | - 2 | 3 | two | 2 | 2 - | | | 2 | 4 - | | | 3 | -3 - | | | | 0 - | | | | - 3 | 2 | three | | - 4 | 1 | four | | - 7 | 7 | seven | | - 8 | 8 | eight | | - | | null | | - | 0 | zero | | -(19 rows) +select t1.f1 +from int4_tbl t1, int4_tbl t2 + left join int4_tbl t3 on t3.f1 > 0 + left join int4_tbl t4 on t3.f1 > 1 +where t4.f1 is null; + f1 +---- +(0 rows) -reset enable_mergejoin; -reset enable_hashjoin; --- --- a different check for handling of redundant sort keys in merge joins --- -set enable_mergejoin = true; -set enable_hashjoin = false; explain (costs off) -select count(*) from - (select * from tenk1 x order by x.thousand, x.twothousand, x.fivethous) x - left join - (select * from tenk1 y order by y.unique2) y - on x.thousand = y.unique2 and x.twothousand = y.hundred and x.fivethous = y.unique2; - QUERY PLAN -------------------------------------------------------------------------------------------------------------------------------------------------------- - Finalize Aggregate - -> Gather Motion 3:1 (slice1; segments: 3) - -> Partial Aggregate +select * +from int4_tbl t1 left join int4_tbl t2 on true + left join int4_tbl t3 on t2.f1 > 0 + left join int4_tbl t4 on t3.f1 > 0; + QUERY PLAN +-------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Nested Loop Left Join + Join Filter: (t3.f1 > 0) + -> Nested Loop Left Join + Join Filter: (t2.f1 > 0) + -> Nested Loop Left Join + Join Filter: true + -> Seq Scan on int4_tbl t1 + -> Materialize + -> Broadcast Motion 3:3 (slice4; segments: 3) + -> Seq Scan on int4_tbl t2 + -> Materialize + -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Seq Scan on int4_tbl t3 + -> Materialize + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Seq Scan on int4_tbl t4 + Optimizer: GPORCA +(18 rows) + +explain (costs off) +select * from onek t1 + left join onek t2 on t1.unique1 = t2.unique1 + left join onek t3 on t2.unique1 != t3.unique1 + left join onek t4 on t3.unique1 = t4.unique1; + QUERY PLAN +-------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Left Join + Hash Cond: (t3.unique1 = t4.unique1) + -> Nested Loop Left Join + Join Filter: (t2.unique1 <> t3.unique1) -> Hash Left Join - Hash Cond: ((x.thousand = y.unique2) AND (x.twothousand = y.hundred) AND (x.fivethous = y.unique2)) - -> Redistribute Motion 3:3 (slice2; segments: 3) - Hash Key: x.thousand, x.twothousand, x.thousand - -> Seq Scan on tenk1 x + Hash Cond: (t1.unique1 = t2.unique1) + -> Seq Scan on onek t1 -> Hash - -> Redistribute Motion 3:3 (slice3; segments: 3) - Hash Key: y.unique2, y.hundred, y.unique2 - -> Seq Scan on tenk1 y - Optimizer: Pivotal Optimizer (GPORCA) -(13 rows) - -select count(*) from - (select * from tenk1 x order by x.thousand, x.twothousand, x.fivethous) x - left join - (select * from tenk1 y order by y.unique2) y - on x.thousand = y.unique2 and x.twothousand = y.hundred and x.fivethous = y.unique2; - count -------- - 10000 -(1 row) + -> Seq Scan on onek t2 + -> Materialize + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Seq Scan on onek t3 + -> Hash + -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Seq Scan on onek t4 + Optimizer: GPORCA +(17 rows) -reset enable_mergejoin; -reset enable_hashjoin; --- --- Clean up --- -DROP TABLE t1; -DROP TABLE t2; -DROP TABLE t3; -DROP TABLE J1_TBL; -DROP TABLE J2_TBL; --- Both DELETE and UPDATE allow the specification of additional tables --- to "join" against to determine which rows should be modified. -CREATE TEMP TABLE t1 (a int, b int); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. -CREATE TEMP TABLE t2 (a int, b int); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. -CREATE TEMP TABLE t3 (x int, y int); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. -CREATE TEMP TABLE t4 (x int, y int); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. -INSERT INTO t1 VALUES (5, 10); -INSERT INTO t1 VALUES (15, 20); -INSERT INTO t1 VALUES (100, 100); -INSERT INTO t1 VALUES (200, 1000); -INSERT INTO t2 VALUES (200, 2000); -INSERT INTO t3 VALUES (5, 20); -INSERT INTO t3 VALUES (6, 7); -INSERT INTO t3 VALUES (7, 8); -INSERT INTO t3 VALUES (500, 100); -INSERT INTO t4 SELECT * FROM t3; -DELETE FROM t3 USING t1 table1 WHERE t3.x = table1.a; -SELECT * FROM t3; - x | y ------+----- - 7 | 8 - 500 | 100 - 6 | 7 -(3 rows) +explain (costs off) +select * from int4_tbl t1 + left join (select now() from int4_tbl t2 + left join int4_tbl t3 on t2.f1 = t3.f1 + left join int4_tbl t4 on t3.f1 = t4.f1) s on true + inner join int4_tbl t5 on true; + QUERY PLAN +------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Nested Loop Left Join + Join Filter: true + -> Nested Loop + Join Filter: true + -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Seq Scan on int4_tbl t1 + -> Seq Scan on int4_tbl t5 + -> Materialize + -> Result + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Hash Left Join + Hash Cond: (t3.f1 = t4.f1) + -> Hash Left Join + Hash Cond: (t2.f1 = t3.f1) + -> Seq Scan on int4_tbl t2 + -> Hash + -> Seq Scan on int4_tbl t3 + -> Hash + -> Seq Scan on int4_tbl t4 + Optimizer: GPORCA +(21 rows) -DELETE FROM t4 USING t1 JOIN t2 USING (a) WHERE t4.x > t1.a; -SELECT * FROM t4; - x | y ----+---- - 7 | 8 - 5 | 20 - 6 | 7 -(3 rows) +explain (costs off) +select * from int4_tbl t1 + left join int4_tbl t2 on true + left join int4_tbl t3 on true + left join int4_tbl t4 on t2.f1 = t3.f1; + QUERY PLAN +-------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Nested Loop Left Join + Join Filter: (t2.f1 = t3.f1) + -> Nested Loop Left Join + Join Filter: true + -> Nested Loop Left Join + Join Filter: true + -> Seq Scan on int4_tbl t1 + -> Materialize + -> Broadcast Motion 3:3 (slice4; segments: 3) + -> Seq Scan on int4_tbl t2 + -> Materialize + -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Seq Scan on int4_tbl t3 + -> Materialize + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Seq Scan on int4_tbl t4 + Optimizer: GPORCA +(18 rows) -DELETE FROM t3 USING t3 t3_other WHERE t3.x = t3_other.x AND t3.y = t3_other.y; -SELECT * FROM t3; - x | y +explain (costs off) +select * from int4_tbl t1 + left join int4_tbl t2 on true + left join int4_tbl t3 on t2.f1 = t3.f1 + left join int4_tbl t4 on t3.f1 != t4.f1; + QUERY PLAN +-------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Nested Loop Left Join + Join Filter: (t3.f1 <> t4.f1) + -> Hash Left Join + Hash Cond: (t2.f1 = t3.f1) + -> Nested Loop Left Join + Join Filter: true + -> Seq Scan on int4_tbl t1 + -> Materialize + -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Seq Scan on int4_tbl t2 + -> Hash + -> Broadcast Motion 3:3 (slice4; segments: 3) + -> Seq Scan on int4_tbl t3 + -> Materialize + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Seq Scan on int4_tbl t4 + Optimizer: GPORCA +(18 rows) + +explain (costs off) +select * from int4_tbl t1 + left join (int4_tbl t2 left join int4_tbl t3 on t2.f1 > 0) on t2.f1 > 1 + left join int4_tbl t4 on t2.f1 > 2 and t3.f1 > 3 +where t1.f1 = coalesce(t2.f1, 1); + QUERY PLAN +-------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Nested Loop Left Join + Join Filter: ((t2.f1 > 2) AND (t3.f1 > 3)) + -> Result + Filter: (t1.f1 = COALESCE(t2.f1, 1)) + -> Nested Loop Left Join + Join Filter: true + -> Seq Scan on int4_tbl t1 + -> Materialize + -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Nested Loop Left Join + Join Filter: (t2.f1 > 0) + -> Seq Scan on int4_tbl t2 + Filter: (f1 > 1) + -> Materialize + -> Broadcast Motion 3:3 (slice4; segments: 3) + -> Seq Scan on int4_tbl t3 + -> Materialize + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Seq Scan on int4_tbl t4 + Optimizer: GPORCA +(21 rows) + +explain (costs off) +select * from int4_tbl t1 + left join ((select t2.f1 from int4_tbl t2 + left join int4_tbl t3 on t2.f1 > 0 + where t3.f1 is null) s + left join tenk1 t4 on s.f1 > 1) + on s.f1 = t1.f1; + QUERY PLAN +-------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Right Join + Hash Cond: (t2.f1 = t1.f1) + -> Nested Loop Left Join + Join Filter: (t2.f1 > 1) + -> Result + Filter: (t3.f1 IS NULL) + -> Nested Loop Left Join + Join Filter: (t2.f1 > 0) + -> Seq Scan on int4_tbl t2 + -> Materialize + -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Seq Scan on int4_tbl t3 + -> Materialize + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Seq Scan on tenk1 t4 + -> Hash + -> Seq Scan on int4_tbl t1 + Optimizer: GPORCA +(19 rows) + +explain (costs off) +select * from int4_tbl t1 + left join ((select t2.f1 from int4_tbl t2 + left join int4_tbl t3 on t2.f1 > 0 + where t2.f1 <> coalesce(t3.f1, -1)) s + left join tenk1 t4 on s.f1 > 1) + on s.f1 = t1.f1; + QUERY PLAN +-------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Right Join + Hash Cond: (t2.f1 = t1.f1) + -> Nested Loop Left Join + Join Filter: (t2.f1 > 1) + -> Result + Filter: (t2.f1 <> COALESCE(t3.f1, '-1'::integer)) + -> Nested Loop Left Join + Join Filter: (t2.f1 > 0) + -> Seq Scan on int4_tbl t2 + -> Materialize + -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Seq Scan on int4_tbl t3 + -> Materialize + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Seq Scan on tenk1 t4 + -> Hash + -> Seq Scan on int4_tbl t1 + Optimizer: GPORCA +(19 rows) + +explain (costs off) +select * from onek t1 + left join onek t2 on t1.unique1 = t2.unique1 + left join onek t3 on t2.unique1 = t3.unique1 + left join onek t4 on t3.unique1 = t4.unique1 and t2.unique2 = t4.unique2; + QUERY PLAN +----------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Left Join + Hash Cond: ((t3.unique1 = t4.unique1) AND (t2.unique2 = t4.unique2)) + -> Hash Left Join + Hash Cond: (t2.unique1 = t3.unique1) + -> Hash Left Join + Hash Cond: (t1.unique1 = t2.unique1) + -> Seq Scan on onek t1 + -> Hash + -> Seq Scan on onek t2 + -> Hash + -> Seq Scan on onek t3 + -> Hash + -> Seq Scan on onek t4 + Optimizer: GPORCA +(15 rows) + +explain (costs off) +select * from int8_tbl t1 left join + (int8_tbl t2 left join int8_tbl t3 full join int8_tbl t4 on false on false) + left join int8_tbl t5 on t2.q1 = t5.q1 +on t2.q2 = 123; + QUERY PLAN +-------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Nested Loop Left Join + Join Filter: true + -> Seq Scan on int8_tbl t1 + -> Materialize + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Hash Left Join + Hash Cond: (t2.q1 = t5.q1) + -> Nested Loop Left Join + Join Filter: true + -> Seq Scan on int8_tbl t2 + Filter: (q2 = 123) + -> Result + One-Time Filter: false + -> Hash + -> Seq Scan on int8_tbl t5 + Optimizer: GPORCA +(17 rows) + +explain (costs off) +select * from int8_tbl t1 + left join int8_tbl t2 on true + left join lateral + (select * from int8_tbl t3 where t3.q1 = t2.q1 offset 0) s + on t2.q1 = 1; + QUERY PLAN +----------------------------------------------------------------------------- + Nested Loop Left Join + -> Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on int8_tbl t1 + -> Materialize + -> Nested Loop Left Join + Join Filter: (t2.q1 = 1) + -> Gather Motion 3:1 (slice2; segments: 3) + -> Seq Scan on int8_tbl t2 + -> Materialize + -> Result + Filter: (t3.q1 = t2.q1) + -> Materialize + -> Gather Motion 3:1 (slice3; segments: 3) + -> Seq Scan on int8_tbl t3 + Optimizer: GPORCA +(15 rows) + +explain (costs off) +select * from int8_tbl t1 + left join int8_tbl t2 on true + left join lateral + (select * from generate_series(t2.q1, 100)) s + on t2.q1 = 1; + QUERY PLAN +--------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Nested Loop Left Join + -> Seq Scan on int8_tbl t1 + -> Materialize + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Nested Loop Left Join + Join Filter: (t2.q1 = 1) + -> Seq Scan on int8_tbl t2 + -> Function Scan on generate_series + Optimizer: GPORCA +(10 rows) + +explain (costs off) +select * from int8_tbl t1 + left join int8_tbl t2 on true + left join lateral + (select t2.q1 from int8_tbl t3) s + on t2.q1 = 1; +ERROR: could not devise a query plan for the given query +explain (costs off) +select * from onek t1 + left join onek t2 on true + left join lateral + (select * from onek t3 where t3.two = t2.two offset 0) s + on t2.unique1 = 1; + QUERY PLAN +----------------------------------------------------------------------------------- + Nested Loop Left Join + -> Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on onek t1 + -> Materialize + -> Nested Loop Left Join + Join Filter: (t2.unique1 = 1) + -> Gather Motion 3:1 (slice2; segments: 3) + -> Seq Scan on onek t2 + -> Materialize + -> Memoize + Cache Key: t2.two + Cache Mode: binary + -> Result + Filter: (t3.two = t2.two) + -> Materialize + -> Gather Motion 3:1 (slice3; segments: 3) + -> Seq Scan on onek t3 + Optimizer: GPORCA +(18 rows) + +-- +-- check a case where we formerly got confused by conflicting sort orders +-- in redundant merge join path keys +-- +set enable_mergejoin = true; +set enable_hashjoin = false; +explain (costs off) +select * from + j1_tbl full join + (select * from j2_tbl order by j2_tbl.i desc, j2_tbl.k asc) j2_tbl + on j1_tbl.i = j2_tbl.i and j1_tbl.i = j2_tbl.k; + QUERY PLAN +---------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Full Join + Hash Cond: ((j1_tbl.i = j2_tbl.i) AND (j1_tbl.i = j2_tbl.k)) + -> Seq Scan on j1_tbl + -> Hash + -> Seq Scan on j2_tbl + Optimizer: GPORCA +(7 rows) + +select * from + j1_tbl full join + (select * from j2_tbl order by j2_tbl.i desc, j2_tbl.k asc) j2_tbl + on j1_tbl.i = j2_tbl.i and j1_tbl.i = j2_tbl.k; --order none + i | j | t | i | k +---+---+-------+---+---- + | | | 0 | + 0 | | zero | | + | | | 1 | -1 + 1 | 4 | one | | + | | | 5 | -5 + | | | 5 | -5 + 5 | 0 | five | | + 6 | 6 | six | | + 2 | 3 | two | 2 | 2 + | | | 2 | 4 + | | | 3 | -3 + | | | | 0 + | | | | + 3 | 2 | three | | + 4 | 1 | four | | + 7 | 7 | seven | | + 8 | 8 | eight | | + | | null | | + | 0 | zero | | +(19 rows) + +reset enable_mergejoin; +reset enable_hashjoin; +-- +-- a different check for handling of redundant sort keys in merge joins +-- +set enable_mergejoin = true; +set enable_hashjoin = false; +explain (costs off) +select count(*) from + (select * from tenk1 x order by x.thousand, x.twothousand, x.fivethous) x + left join + (select * from tenk1 y order by y.unique2) y + on x.thousand = y.unique2 and x.twothousand = y.hundred and x.fivethous = y.unique2; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------- + Finalize Aggregate + -> Gather Motion 3:1 (slice1; segments: 3) + -> Partial Aggregate + -> Hash Left Join + Hash Cond: ((x.thousand = y.unique2) AND (x.twothousand = y.hundred) AND (x.fivethous = y.unique2)) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: x.thousand, x.twothousand, x.thousand + -> Seq Scan on tenk1 x + -> Hash + -> Redistribute Motion 3:3 (slice3; segments: 3) + Hash Key: y.unique2, y.hundred, y.unique2 + -> Seq Scan on tenk1 y + Optimizer: Pivotal Optimizer (GPORCA) +(13 rows) + +select count(*) from + (select * from tenk1 x order by x.thousand, x.twothousand, x.fivethous) x + left join + (select * from tenk1 y order by y.unique2) y + on x.thousand = y.unique2 and x.twothousand = y.hundred and x.fivethous = y.unique2; + count +------- + 10000 +(1 row) + +reset enable_mergejoin; +reset enable_hashjoin; +set enable_hashjoin = 0; +set enable_nestloop = 0; +set enable_hashagg = 0; +-- +-- Check that we use the pathkeys from a prefix of the group by / order by +-- clause for the join pathkeys when that prefix covers all join quals. We +-- expect this to lead to an incremental sort for the group by / order by. +-- +explain (costs off) +select x.thousand, x.twothousand, count(*) +from tenk1 x inner join tenk1 y on x.thousand = y.thousand +group by x.thousand, x.twothousand +order by x.thousand desc, x.twothousand; + QUERY PLAN +----------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: x.thousand, x.twothousand + -> Sort + Sort Key: x.thousand DESC, x.twothousand + -> HashAggregate + Group Key: x.thousand, x.twothousand + -> Hash Join + Hash Cond: (x.thousand = y.thousand) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: x.thousand + -> Seq Scan on tenk1 x + -> Hash + -> Redistribute Motion 3:3 (slice3; segments: 3) + Hash Key: y.thousand + -> Seq Scan on tenk1 y + Optimizer: GPORCA +(16 rows) + +reset enable_hashagg; +reset enable_nestloop; +reset enable_hashjoin; +-- +-- Clean up +-- +DROP TABLE t1; +DROP TABLE t2; +DROP TABLE t3; +DROP TABLE J1_TBL; +DROP TABLE J2_TBL; +-- Both DELETE and UPDATE allow the specification of additional tables +-- to "join" against to determine which rows should be modified. +CREATE TEMP TABLE t1 (a int, b int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +CREATE TEMP TABLE t2 (a int, b int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +CREATE TEMP TABLE t3 (x int, y int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +CREATE TEMP TABLE t4 (x int, y int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +INSERT INTO t1 VALUES (5, 10); +INSERT INTO t1 VALUES (15, 20); +INSERT INTO t1 VALUES (100, 100); +INSERT INTO t1 VALUES (200, 1000); +INSERT INTO t2 VALUES (200, 2000); +INSERT INTO t3 VALUES (5, 20); +INSERT INTO t3 VALUES (6, 7); +INSERT INTO t3 VALUES (7, 8); +INSERT INTO t3 VALUES (500, 100); +INSERT INTO t4 SELECT * FROM t3; +DELETE FROM t3 USING t1 table1 WHERE t3.x = table1.a; +SELECT * FROM t3; + x | y +-----+----- + 7 | 8 + 500 | 100 + 6 | 7 +(3 rows) + +DELETE FROM t4 USING t1 JOIN t2 USING (a) WHERE t4.x > t1.a; +SELECT * FROM t4; + x | y +---+---- + 7 | 8 + 5 | 20 + 6 | 7 +(3 rows) + +DELETE FROM t3 USING t3 t3_other WHERE t3.x = t3_other.x AND t3.y = t3_other.y; +SELECT * FROM t3; + x | y ---+--- (0 rows) @@ -2558,6 +3015,39 @@ ERROR: column t1.x does not exist LINE 1: select t1.x from t1 join t3 on (t1.a = t3.x); ^ HINT: Perhaps you meant to reference the column "t3.x". +-- Test matching of locking clause with wrong alias +select t1.*, t2.*, unnamed_join.* from + t1 join t2 on (t1.a = t2.a), t3 as unnamed_join + for update of unnamed_join; + a | b | a | b | x | y +---+---+---+---+---+--- +(0 rows) + +select foo.*, unnamed_join.* from + t1 join t2 using (a) as foo, t3 as unnamed_join + for update of unnamed_join; + a | x | y +---+---+--- +(0 rows) + +select foo.*, unnamed_join.* from + t1 join t2 using (a) as foo, t3 as unnamed_join + for update of foo; +ERROR: FOR UPDATE cannot be applied to a join +LINE 3: for update of foo; + ^ +select bar.*, unnamed_join.* from + (t1 join t2 using (a) as foo) as bar, t3 as unnamed_join + for update of foo; +ERROR: relation "foo" in FOR UPDATE clause not found in FROM clause +LINE 3: for update of foo; + ^ +select bar.*, unnamed_join.* from + (t1 join t2 using (a) as foo) as bar, t3 as unnamed_join + for update of bar; +ERROR: FOR UPDATE cannot be applied to a join +LINE 3: for update of bar; + ^ -- -- regression test for 8.1 merge right join bug -- @@ -2590,6 +3080,68 @@ select tt1.*, tt2.* from tt2 right join tt1 on tt1.joincol = tt2.joincol; 1 | 11 | 22 | 11 (3 rows) +reset enable_hashjoin; +reset enable_nestloop; +-- +-- regression test for bug #18522 (merge-right-anti-join in inner_unique cases) +-- +create temp table tbl_ra(a int unique, b int); +insert into tbl_ra select i, i%100 from generate_series(1,1000)i; +create index on tbl_ra (b); +analyze tbl_ra; +set enable_hashjoin to off; +set enable_nestloop to off; +-- ensure we get a merge right anti join +explain (costs off) +select * from tbl_ra t1 +where not exists (select 1 from tbl_ra t2 where t2.b = t1.a) and t1.b < 2; + QUERY PLAN +----------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Result + Filter: (COALESCE((count(*)), '0'::bigint) = '0'::bigint) + -> Hash Left Join + Hash Cond: (t1.a = t2.b) + -> Bitmap Heap Scan on tbl_ra t1 + Recheck Cond: (b < 2) + -> Bitmap Index Scan on tbl_ra_b_idx + Optimizer: GPORCA + Index Cond: (b < 2) + -> Hash + -> HashAggregate + Group Key: t2.b + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: t2.b + -> Seq Scan on tbl_ra t2 + Optimizer: GPORCA +(16 rows) + +-- and check we get the expected results +select * from tbl_ra t1 +where not exists (select 1 from tbl_ra t2 where t2.b = t1.a) and t1.b < 2; + a | b +------+--- + 100 | 0 + 101 | 1 + 200 | 0 + 201 | 1 + 300 | 0 + 301 | 1 + 400 | 0 + 401 | 1 + 500 | 0 + 501 | 1 + 600 | 0 + 601 | 1 + 700 | 0 + 701 | 1 + 800 | 0 + 801 | 1 + 900 | 0 + 901 | 1 + 1000 | 0 +(19 rows) + reset enable_hashjoin; reset enable_nestloop; -- @@ -2636,28 +3188,150 @@ create temp table tt3(f1 int, f2 text); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'f1' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into tt3 select x, repeat('xyzzy', 100) from generate_series(1,10000) x; -create index tt3i on tt3(f1); analyze tt3; create temp table tt4(f1 int); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'f1' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into tt4 values (0),(1),(9999); analyze tt4; +set enable_nestloop to off; +EXPLAIN (COSTS OFF) SELECT a.f1 FROM tt4 a LEFT JOIN ( SELECT b.f1 FROM tt3 b LEFT JOIN tt3 c ON (b.f1 = c.f1) - WHERE c.f1 IS NULL + WHERE COALESCE(c.f1, 0) = 0 ) AS d ON (a.f1 = d.f1) -WHERE d.f1 IS NULL; +WHERE COALESCE(d.f1, 0) = 0 +ORDER BY 1; + QUERY PLAN +-------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: a.f1 + -> Sort + Sort Key: a.f1 + -> Result + Filter: (COALESCE(b.f1, 0) = 0) + -> Hash Right Join + Hash Cond: (b.f1 = a.f1) + -> Result + Filter: (COALESCE(c.f1, 0) = 0) + -> Hash Left Join + Hash Cond: (b.f1 = c.f1) + -> Seq Scan on tt3 b + -> Hash + -> Seq Scan on tt3 c + -> Hash + -> Seq Scan on tt4 a + Optimizer: GPORCA +(18 rows) + +SELECT a.f1 +FROM tt4 a +LEFT JOIN ( + SELECT b.f1 + FROM tt3 b LEFT JOIN tt3 c ON (b.f1 = c.f1) + WHERE COALESCE(c.f1, 0) = 0 +) AS d ON (a.f1 = d.f1) +WHERE COALESCE(d.f1, 0) = 0 +ORDER BY 1; f1 ------ - 9999 0 1 + 9999 (3 rows) +reset enable_nestloop; +-- +-- basic semijoin and antijoin recognition tests +-- +explain (costs off) +select a.* from tenk1 a +where unique1 in (select unique2 from tenk1 b); + QUERY PLAN +---------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Semi Join + Hash Cond: (a.unique1 = b.unique2) + -> Seq Scan on tenk1 a + -> Hash + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: b.unique2 + -> Seq Scan on tenk1 b + Optimizer: GPORCA +(9 rows) + +-- sadly, this is not an antijoin +explain (costs off) +select a.* from tenk1 a +where unique1 not in (select unique2 from tenk1 b); + QUERY PLAN +-------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Left Anti Semi (Not-In) Join + Hash Cond: (a.unique1 = b.unique2) + -> Seq Scan on tenk1 a + -> Hash + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Seq Scan on tenk1 b + Optimizer: GPORCA +(8 rows) + +explain (costs off) +select a.* from tenk1 a +where exists (select 1 from tenk1 b where a.unique1 = b.unique2); + QUERY PLAN +-------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Semi Join + Hash Cond: (a.unique1 = b.unique2) + -> Seq Scan on tenk1 a + Filter: (NOT (unique1 IS NULL)) + -> Hash + -> Result + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: b.unique2 + -> Seq Scan on tenk1 b + Optimizer: GPORCA +(11 rows) + +explain (costs off) +select a.* from tenk1 a +where not exists (select 1 from tenk1 b where a.unique1 = b.unique2); + QUERY PLAN +------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Anti Join + Hash Cond: (a.unique1 = b.unique2) + -> Seq Scan on tenk1 a + -> Hash + -> Result + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: b.unique2 + -> Seq Scan on tenk1 b + Optimizer: GPORCA +(10 rows) + +explain (costs off) +select a.* from tenk1 a left join tenk1 b on a.unique1 = b.unique2 +where b.unique2 is null; + QUERY PLAN +------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Result + Filter: (b.unique2 IS NULL) + -> Hash Left Join + Hash Cond: (a.unique1 = b.unique2) + -> Seq Scan on tenk1 a + -> Hash + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: b.unique2 + -> Seq Scan on tenk1 b + Optimizer: GPORCA +(11 rows) + -- -- regression test for proper handling of outer joins within antijoins -- @@ -2816,15 +3490,13 @@ begin; set enable_mergejoin = 1; set enable_hashjoin = 0; set enable_nestloop = 0; -set optimizer_enable_hashjoin = 0; -set optimizer_enable_mergejoin = 1; create temp table a (i integer); +create temp table b (x integer, y integer); +select * from a left join b on i = x and i = y and x = i; NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. -create temp table b (x integer, y integer); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. -select * from a left join b on i = x and i = y and x = i; i | x | y ---+---+--- (0 rows) @@ -2836,12 +3508,14 @@ rollback; begin; create type mycomptype as (id int, v bigint); create temp table tidv (idv mycomptype); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause, and no column type is suitable for a distribution key. Creating a NULL policy entry. create index on tidv (idv); +analyze tidv; explain (costs off) select a.idv, b.idv from tidv a, tidv b where a.idv = b.idv; - QUERY PLAN ------------------------------------------------------ +NOTICE: One or more columns in the following table(s) do not have statistics: tidv +HINT: For non-partitioned tables, run analyze (). For partitioned tables, run analyze rootpartition (). See log for columns missing statistics. + QUERY PLAN +---------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) -> Nested Loop Join Filter: true @@ -2854,12 +3528,10 @@ select a.idv, b.idv from tidv a, tidv b where a.idv = b.idv; set enable_mergejoin = 0; set enable_hashjoin = 0; set enable_nestloop = 1; -set optimizer_enable_hashjoin = 0; -set optimizer_enable_mergejoin = 0; explain (costs off) select a.idv, b.idv from tidv a, tidv b where a.idv = b.idv; - QUERY PLAN ------------------------------------------------------ + QUERY PLAN +---------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) -> Nested Loop Join Filter: true @@ -2869,8 +3541,6 @@ select a.idv, b.idv from tidv a, tidv b where a.idv = b.idv; Optimizer: GPORCA (7 rows) -reset optimizer_enable_hashjoin; -reset optimizer_enable_mergejoin; rollback; -- -- test NULL behavior of whole-row Vars, per bug #5025 @@ -3026,18 +3696,15 @@ SELECT qq, unique1 Join Filter: true -> Result -> Broadcast Motion 3:3 (slice2; segments: 3) - -> Merge Full Join - Merge Cond: ((COALESCE(b.q2, '-1'::bigint)) = (COALESCE(a.q1, '0'::bigint))) - -> Sort - Sort Key: (COALESCE(b.q2, '-1'::bigint)) - -> Redistribute Motion 3:3 (slice3; segments: 3) + -> Hash Full Join + Hash Cond: ((COALESCE(a.q1, '0'::bigint)) = (COALESCE(b.q2, '-1'::bigint))) + -> Redistribute Motion 3:3 (slice3; segments: 3) + Hash Key: (COALESCE(a.q1, '0'::bigint)) + -> Seq Scan on int8_tbl a + -> Hash + -> Redistribute Motion 3:3 (slice4; segments: 3) Hash Key: (COALESCE(b.q2, '-1'::bigint)) -> Seq Scan on int8_tbl b - -> Sort - Sort Key: (COALESCE(a.q1, '0'::bigint)) - -> Redistribute Motion 3:3 (slice4; segments: 3) - Hash Key: (COALESCE(a.q1, '0'::bigint)) - -> Seq Scan on int8_tbl a -> Bitmap Heap Scan on tenk1 c Recheck Cond: (unique2 = (COALESCE((COALESCE(a.q1, '0'::bigint)), (COALESCE(b.q2, '-1'::bigint))))) -> Bitmap Index Scan on tenk1_unique2 @@ -3235,6 +3902,7 @@ ERROR: could not devise a query plan for the given query (pathnode.c:277) -> Materialize -> Seq Scan on int4_tbl i43 Filter: (f1 > 1) + Optimizer: GPORCA (12 rows) --end_ignore select * from @@ -3536,23 +4204,26 @@ where t1.unique2 < 42 and t1.stringu1 > t2.stringu2; Join Filter: (t1.stringu1 > t2.stringu2) -> Gather Motion 3:1 (slice1; segments: 3) -> Hash Join - Hash Cond: ((0) = i1.f1) - -> Hash Join - Hash Cond: (t2.unique1 = (3)) - -> Seq Scan on tenk1 t2 - -> Hash - -> Hash Left Join - Hash Cond: ((1) = (1)) - -> Result - One-Time Filter: (11 < 42) - -> Result - -> Hash - -> Result + Hash Cond: (t2.unique1 = (3)) + -> Seq Scan on tenk1 t2 -> Hash -> Broadcast Motion 3:3 (slice2; segments: 3) - -> Seq Scan on int4_tbl i1 + -> Hash Join + Hash Cond: ((0) = i1.f1) + -> Result + One-Time Filter: (gp_execution_segment() = ###) + -> Hash Left Join + Hash Cond: ((1) = (1)) + -> Result + One-Time Filter: (11 < 42) + -> Result + -> Hash + -> Result + -> Hash + -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Seq Scan on int4_tbl i1 -> Hash - -> Gather Motion 3:1 (slice3; segments: 3) + -> Gather Motion 3:1 (slice4; segments: 3) -> Bitmap Heap Scan on tenk1 t1 Recheck Cond: ((unique2 < 42) AND (unique2 = 11)) -> Bitmap Index Scan on tenk1_unique2 @@ -3626,6 +4297,7 @@ where b; -> Result -> Result One-Time Filter: (true) + Optimizer: GPORCA (6 rows) select * from @@ -3643,6 +4315,84 @@ where b; 0 | t | t (2 rows) +-- Test PHV in a semijoin qual, which confused useless-RTE removal (bug #17700) +explain (verbose, costs off) +with ctetable as not materialized ( select 1 as f1 ) +select * from ctetable c1 +where f1 in ( select c3.f1 from ctetable c2 full join ctetable c3 on true ); + QUERY PLAN +------------------------------------ + Hash Semi Join + Output: (1) + Hash Cond: ((1) = (1)) + -> Result + Output: 1 + -> Hash + Output: (1) + -> Merge Full Join + Output: (1) + -> Result + Output: 1 + -> Materialize + Output: (1) + -> Result + Output: 1 + Optimizer: GPORCA +(17 rows) + +with ctetable as not materialized ( select 1 as f1 ) +select * from ctetable c1 +where f1 in ( select c3.f1 from ctetable c2 full join ctetable c3 on true ); + f1 +---- + 1 +(1 row) + +-- Test PHV that winds up in a Result node, despite having nonempty nullingrels +explain (verbose, costs off) +select table_catalog, table_name +from int4_tbl t1 + inner join (int8_tbl t2 + left join information_schema.column_udt_usage on null) + on null; + QUERY PLAN +---------------------------------------------------------------------------------------------------------- + Result + Output: 'regression'::information_schema.sql_identifier, (c.relname)::information_schema.sql_identifier + One-Time Filter: false + Optimizer: GPORCA +(5 rows) + +-- Test handling of qual pushdown to appendrel members with non-Var outputs +explain (verbose, costs off) +select * from int4_tbl left join ( + select text 'foo' union all select text 'bar' +) ss(x) on true +where ss.x is null; + QUERY PLAN +---------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: f1, ('foo'::text) + -> Result + Output: f1, ('foo'::text) + Filter: (('foo'::text) IS NULL) + -> Nested Loop Left Join + Output: f1, ('foo'::text) + Join Filter: true + -> Seq Scan on public.int4_tbl + Output: f1 + -> Append + -> Materialize + Output: ('foo'::text) + -> Result + Output: 'foo'::text + -> Materialize + Output: ('bar'::text) + -> Result + Output: 'bar'::text + Optimizer: GPORCA +(21 rows) + -- -- test inlining of immutable functions -- @@ -3726,6 +4476,7 @@ select unique1 from tenk1, lateral f_immutable_int4(1) x where x in (select 17); -------------------------- Result One-Time Filter: false + Optimizer: GPORCA (3 rows) explain (costs off) @@ -3873,6 +4624,37 @@ select * from mki4(42); drop function mki8(bigint, bigint); drop function mki4(int); +-- test const-folding of a whole-row Var into a per-field Var +-- (need to inline a function to reach this case, else parser does it) +create function f_field_select(t onek) returns int4 as +$$ select t.unique2; $$ language sql immutable; +explain (verbose, costs off) +select (t2.*).unique1, f_field_select(t2) from tenk1 t1 + left join onek t2 on t1.unique1 = t2.unique1 + left join int8_tbl t3 on true; + QUERY PLAN +-------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: t2.unique1, t2.unique2 + -> Hash Left Join + Output: t2.unique1, t2.unique2 + Hash Cond: (t1.unique1 = t2.unique1) + -> Nested Loop Left Join + Output: t1.unique1 + Join Filter: true + -> Seq Scan on public.tenk1 t1 + Output: t1.unique1 + -> Materialize + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Seq Scan on public.int8_tbl t3 + -> Hash + Output: t2.unique1, t2.unique2 + -> Seq Scan on public.onek t2 + Output: t2.unique1, t2.unique2 + Optimizer: GPORCA +(19 rows) + +drop function f_field_select(t onek); -- -- test extraction of restriction OR clauses from join OR clause -- (we used to only do this for indexable clauses) @@ -4046,14 +4828,64 @@ select count(*) from tenk1 a join tenk1 b on a.unique1 = b.unique2 left join tenk1 c on a.unique2 = b.unique1 and c.thousand = a.thousand join int4_tbl on b.thousand = f1; + QUERY PLAN +------------------------------------------------------------------------------------------- + Aggregate + -> Gather Motion 3:1 (slice1; segments: 3) + -> Hash Left Join + Hash Cond: (a.thousand = c.thousand) + Join Filter: (a.unique2 = b.unique1) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: a.thousand + -> Nested Loop + Join Filter: true + -> Redistribute Motion 3:3 (slice3; segments: 3) + Hash Key: b.unique2 + -> Nested Loop + Join Filter: true + -> Broadcast Motion 3:3 (slice4; segments: 3) + -> Seq Scan on int4_tbl + -> Bitmap Heap Scan on tenk1 b + Recheck Cond: (thousand = int4_tbl.f1) + -> Bitmap Index Scan on tenk1_thous_tenthous + Index Cond: (thousand = int4_tbl.f1) + -> Bitmap Heap Scan on tenk1 a + Recheck Cond: (unique1 = b.unique2) + -> Bitmap Index Scan on tenk1_unique1 + Index Cond: (unique1 = b.unique2) + -> Hash + -> Redistribute Motion 3:3 (slice5; segments: 3) + Hash Key: c.thousand + -> Seq Scan on tenk1 c + Optimizer: GPORCA +(28 rows) + +select count(*) from + tenk1 a join tenk1 b on a.unique1 = b.unique2 + left join tenk1 c on a.unique2 = b.unique1 and c.thousand = a.thousand + join int4_tbl on b.thousand = f1; + count +------- + 10 +(1 row) + +explain (costs off) +select b.unique1 from + tenk1 a join tenk1 b on a.unique1 = b.unique2 + left join tenk1 c on b.unique1 = 42 and c.thousand = a.thousand + join int4_tbl i1 on b.thousand = f1 + right join int4_tbl i2 on i2.f1 = b.tenthous + order by 1; QUERY PLAN ------------------------------------------------------------------------------------------------- - Finalize Aggregate - -> Gather Motion 3:1 (slice1; segments: 3) - -> Partial Aggregate + Sort + Sort Key: b.unique1 + -> Hash Right Join + Hash Cond: (b.tenthous = i2.f1) + -> Gather Motion 3:1 (slice1; segments: 3) -> Hash Left Join Hash Cond: (a.thousand = c.thousand) - Join Filter: (a.unique2 = b.unique1) + Join Filter: (b.unique1 = 42) -> Redistribute Motion 3:3 (slice2; segments: 3) Hash Key: a.thousand -> Nested Loop @@ -4063,11 +4895,11 @@ select count(*) from -> Nested Loop Join Filter: true -> Broadcast Motion 3:3 (slice4; segments: 3) - -> Seq Scan on int4_tbl + -> Seq Scan on int4_tbl i1 -> Bitmap Heap Scan on tenk1 b - Recheck Cond: (thousand = int4_tbl.f1) + Recheck Cond: (thousand = i1.f1) -> Bitmap Index Scan on tenk1_thous_tenthous - Index Cond: (thousand = int4_tbl.f1) + Index Cond: (thousand = i1.f1) -> Bitmap Heap Scan on tenk1 a Recheck Cond: (unique1 = b.unique2) -> Bitmap Index Scan on tenk1_unique1 @@ -4076,64 +4908,11 @@ select count(*) from -> Redistribute Motion 3:3 (slice5; segments: 3) Hash Key: c.thousand -> Seq Scan on tenk1 c - Optimizer: GPORCA -(29 rows) - -select count(*) from - tenk1 a join tenk1 b on a.unique1 = b.unique2 - left join tenk1 c on a.unique2 = b.unique1 and c.thousand = a.thousand - join int4_tbl on b.thousand = f1; - count -------- - 10 -(1 row) - -explain (costs off) -select b.unique1 from - tenk1 a join tenk1 b on a.unique1 = b.unique2 - left join tenk1 c on b.unique1 = 42 and c.thousand = a.thousand - join int4_tbl i1 on b.thousand = f1 - right join int4_tbl i2 on i2.f1 = b.tenthous - order by 1; - QUERY PLAN -------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) - Merge Key: b.unique1 - -> Sort - Sort Key: b.unique1 - -> Hash Right Join - Hash Cond: (b.tenthous = i2.f1) - -> Redistribute Motion 3:3 (slice2; segments: 3) - Hash Key: b.tenthous - -> Hash Left Join - Hash Cond: (a.thousand = c.thousand) - Join Filter: (b.unique1 = 42) - -> Redistribute Motion 3:3 (slice3; segments: 3) - Hash Key: a.thousand - -> Nested Loop - Join Filter: true - -> Redistribute Motion 3:3 (slice4; segments: 3) - Hash Key: b.unique2 - -> Nested Loop - Join Filter: true - -> Broadcast Motion 3:3 (slice5; segments: 3) - -> Seq Scan on int4_tbl i1 - -> Bitmap Heap Scan on tenk1 b - Recheck Cond: (thousand = i1.f1) - -> Bitmap Index Scan on tenk1_thous_tenthous - Index Cond: (thousand = i1.f1) - -> Bitmap Heap Scan on tenk1 a - Recheck Cond: (unique1 = b.unique2) - -> Bitmap Index Scan on tenk1_unique1 - Index Cond: (unique1 = b.unique2) - -> Hash - -> Redistribute Motion 3:3 (slice6; segments: 3) - Hash Key: c.thousand - -> Seq Scan on tenk1 c - -> Hash + -> Hash + -> Gather Motion 3:1 (slice6; segments: 3) -> Seq Scan on int4_tbl i2 Optimizer: GPORCA -(36 rows) +(34 rows) select b.unique1 from tenk1 a join tenk1 b on a.unique1 = b.unique2 @@ -4158,24 +4937,24 @@ select * from ) ss where fault = 122 order by fault; - QUERY PLAN ----------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) - Merge Key: ((COALESCE(tenk1.unique1, '-1'::integer) + int8_tbl.q1)) - -> Sort - Sort Key: ((COALESCE(tenk1.unique1, '-1'::integer) + int8_tbl.q1)) - -> Result - Filter: ((COALESCE(tenk1.unique1, '-1'::integer) + int8_tbl.q1) = 122) - -> Hash Right Join - Hash Cond: ((tenk1.unique2)::bigint = int8_tbl.q2) - -> Redistribute Motion 3:3 (slice2; segments: 3) - Hash Key: (tenk1.unique2)::bigint - -> Seq Scan on tenk1 - -> Hash - -> Redistribute Motion 3:3 (slice3; segments: 3) - Hash Key: int8_tbl.q2 - -> Seq Scan on int8_tbl - Optimizer: Pivotal Optimizer (GPORCA) + QUERY PLAN +-------------------------------------------------------------------------------------------- + Sort + Sort Key: ((COALESCE(tenk1.unique1, '-1'::integer) + int8_tbl.q1)) + -> Result + -> Gather Motion 3:1 (slice1; segments: 3) + -> Result + Filter: ((COALESCE(tenk1.unique1, '-1'::integer) + int8_tbl.q1) = 122) + -> Hash Right Join + Hash Cond: ((tenk1.unique2)::bigint = int8_tbl.q2) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: (tenk1.unique2)::bigint + -> Seq Scan on tenk1 + -> Hash + -> Redistribute Motion 3:3 (slice3; segments: 3) + Hash Key: int8_tbl.q2 + -> Seq Scan on int8_tbl + Optimizer: GPORCA (16 rows) select * from @@ -4314,6 +5093,44 @@ select a.unique1, b.unique1, c.unique1, coalesce(b.twothousand, a.twothousand) ---------+---------+---------+---------- (0 rows) +-- related case +explain (costs off) +select * from int8_tbl t1 left join int8_tbl t2 on t1.q2 = t2.q1, + lateral (select * from int8_tbl t3 where t2.q1 = t2.q2) ss; + QUERY PLAN +------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Nested Loop + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Hash Left Join + Hash Cond: (t1.q2 = t2.q1) + Filter: (t2.q1 = t2.q2) + -> Redistribute Motion 3:3 (slice3; segments: 3) + Hash Key: t1.q2 + -> Seq Scan on int8_tbl t1 + -> Hash + -> Seq Scan on int8_tbl t2 + -> Materialize + -> Seq Scan on int8_tbl t3 + Optimizer: Postgres query optimizer +(14 rows) + +select * from int8_tbl t1 left join int8_tbl t2 on t1.q2 = t2.q1, + lateral (select * from int8_tbl t3 where t2.q1 = t2.q2) ss; + q1 | q2 | q1 | q2 | q1 | q2 +------------------+------------------+------------------+------------------+------------------+------------------- + 123 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 123 | 456 + 123 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 123 | 4567890123456789 + 123 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 123 + 123 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 + 123 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | -4567890123456789 + 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 123 | 456 + 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 123 | 4567890123456789 + 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 123 + 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 + 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | -4567890123456789 +(10 rows) + -- -- check handling of join aliases when flattening multiple levels of subquery -- @@ -4376,6 +5193,71 @@ using (join_key); 1 | | (2 rows) +-- +-- check handling of a variable-free join alias +-- +explain (verbose, costs off) +select * from +int4_tbl i0 left join +( (select *, 123 as x from int4_tbl i1) ss1 + left join + (select *, q2 as x from int8_tbl i2) ss2 + using (x) +) ss0 +on (i0.f1 = ss0.f1) +order by i0.f1, x; + QUERY PLAN +------------------------------------------------------------------------ + Sort + Output: i0.f1, (((123))::bigint), i1.f1, i2.q1, i2.q2 + Sort Key: i0.f1, (((123))::bigint) + -> Hash Right Join + Output: i0.f1, (123), i1.f1, i2.q1, i2.q2 + Hash Cond: (i1.f1 = i0.f1) + -> Hash Right Join + Output: i1.f1, (123), i2.q1, i2.q2 + Hash Cond: (i2.q2 = ((123))::bigint) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: i2.q1, i2.q2 + -> Seq Scan on public.int8_tbl i2 + Output: i2.q1, i2.q2 + Filter: (i2.q2 = 123) + -> Hash + Output: (123), i1.f1 + -> Result + Output: 123, i1.f1 + -> Gather Motion 3:1 (slice2; segments: 3) + Output: i1.f1 + -> Seq Scan on public.int4_tbl i1 + Output: i1.f1 + -> Hash + Output: i0.f1 + -> Gather Motion 3:1 (slice3; segments: 3) + Output: i0.f1 + -> Seq Scan on public.int4_tbl i0 + Output: i0.f1 + Settings: optimizer = 'on' + Optimizer: GPORCA +(30 rows) + +select * from +int4_tbl i0 left join +( (select *, 123 as x from int4_tbl i1) ss1 + left join + (select *, q2 as x from int8_tbl i2) ss2 + using (x) +) ss0 +on (i0.f1 = ss0.f1) +order by i0.f1, x; + f1 | x | f1 | q1 | q2 +-------------+-----+-------------+------------------+----- + 0 | 123 | 0 | 4567890123456789 | 123 + 123456 | 123 | 123456 | 4567890123456789 | 123 + -123456 | 123 | -123456 | 4567890123456789 | 123 + 2147483647 | 123 | 2147483647 | 4567890123456789 | 123 + -2147483647 | 123 | -2147483647 | 4567890123456789 | 123 +(5 rows) + -- -- test successful handling of nested outer joins with degenerate join quals -- @@ -4390,54 +5272,51 @@ select t1.* from on (t1.f1 = b1.d1) left join int4_tbl i4 on (i8.q2 = i4.f1); - QUERY PLAN ----------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) + QUERY PLAN +------------------------------------------------------------------------ + Hash Left Join Output: t1.f1 - -> Hash Left Join - Output: t1.f1 - Hash Cond: (i8.q2 = (i4.f1)::bigint) - -> Redistribute Motion 1:3 (slice2) - Output: t1.f1, i8.q2 - -> Hash Right Join - Output: t1.f1, i8.q2 - Hash Cond: (('***'::text) = t1.f1) - -> Hash Right Join - Output: ('***'::text), i8.q2 - Hash Cond: (((NULL::integer))::bigint = i8b1.q2) - -> Gather Motion 3:1 (slice3; segments: 3) - Output: i8.q2, (NULL::integer) - -> Hash Left Join - Output: i8.q2, (NULL::integer) - Hash Cond: (i8.q1 = i8b2.q1) - -> Seq Scan on public.int8_tbl i8 - Output: i8.q1, i8.q2 - -> Hash - Output: (NULL::integer), i8b2.q1 - -> Seq Scan on public.int8_tbl i8b2 - Output: NULL::integer, i8b2.q1 + Hash Cond: (i8.q2 = (i4.f1)::bigint) + -> Hash Right Join + Output: t1.f1, i8.q2 + Hash Cond: (('***'::text) = t1.f1) + -> Hash Right Join + Output: ('***'::text), i8.q2 + Hash Cond: (((NULL::integer))::bigint = i8b1.q2) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: i8.q2, (NULL::integer) + -> Hash Left Join + Output: i8.q2, (NULL::integer) + Hash Cond: (i8.q1 = i8b2.q1) + -> Seq Scan on public.int8_tbl i8 + Output: i8.q1, i8.q2 -> Hash - Output: ('***'::text), i8b1.q2 - -> Result - Output: '***'::text, i8b1.q2 - -> Gather Motion 3:1 (slice4; segments: 3) - Output: i8b1.q2 - -> Seq Scan on public.int8_tbl i8b1 - Output: i8b1.q2 - -> Hash - Output: t1.f1 - -> Gather Motion 3:1 (slice5; segments: 3) - Output: t1.f1 - -> Seq Scan on public.text_tbl t1 - Output: t1.f1 + Output: (NULL::integer), i8b2.q1 + -> Seq Scan on public.int8_tbl i8b2 + Output: NULL::integer, i8b2.q1 + -> Hash + Output: ('***'::text), i8b1.q2 + -> Result + Output: '***'::text, i8b1.q2 + -> Gather Motion 3:1 (slice2; segments: 3) + Output: i8b1.q2 + -> Seq Scan on public.int8_tbl i8b1 + Output: i8b1.q2 -> Hash + Output: t1.f1 + -> Gather Motion 3:1 (slice3; segments: 3) + Output: t1.f1 + -> Seq Scan on public.text_tbl t1 + Output: t1.f1 + -> Hash + Output: i4.f1 + -> Gather Motion 3:1 (slice4; segments: 3) Output: i4.f1 - -> Broadcast Motion 3:3 (slice6; segments: 3) + -> Seq Scan on public.int4_tbl i4 Output: i4.f1 - -> Seq Scan on public.int4_tbl i4 - Output: i4.f1 - Optimizer: Pivotal Optimizer (GPORCA) -(48 rows) + Settings: optimizer = 'on' + Optimizer: GPORCA +(42 rows) select t1.* from text_tbl t1 @@ -4466,60 +5345,57 @@ select t1.* from on (t1.f1 = b1.d1) left join int4_tbl i4 on (i8.q2 = i4.f1); - QUERY PLAN ---------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) + QUERY PLAN +--------------------------------------------------------------------------------------------- + Hash Left Join Output: t1.f1 - -> Hash Left Join - Output: t1.f1 - Hash Cond: (i8.q2 = (i4.f1)::bigint) - -> Redistribute Motion 1:3 (slice2) - Output: t1.f1, i8.q2 - -> Hash Right Join - Output: t1.f1, i8.q2 - Hash Cond: (('***'::text) = t1.f1) - -> Hash Right Join - Output: ('***'::text), i8.q2 - Hash Cond: (((NULL::integer))::bigint = i8b1.q2) - -> Gather Motion 3:1 (slice3; segments: 3) - Output: i8.q2, (NULL::integer) - -> Hash Left Join - Output: i8.q2, (NULL::integer) - Hash Cond: (i8.q1 = i8b2.q1) - -> Seq Scan on public.int8_tbl i8 - Output: i8.q1, i8.q2 - -> Hash - Output: (NULL::integer), i8b2.q1 - -> Nested Loop - Output: NULL::integer, i8b2.q1 - Join Filter: true - -> Seq Scan on public.int8_tbl i8b2 - Output: i8b2.q1 - -> Materialize - -> Broadcast Motion 3:3 (slice4; segments: 3) - -> Seq Scan on public.int4_tbl i4b2 + Hash Cond: (i8.q2 = (i4.f1)::bigint) + -> Hash Right Join + Output: t1.f1, i8.q2 + Hash Cond: (('***'::text) = t1.f1) + -> Hash Right Join + Output: ('***'::text), i8.q2 + Hash Cond: (((NULL::integer))::bigint = i8b1.q2) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: i8.q2, (NULL::integer) + -> Hash Left Join + Output: i8.q2, (NULL::integer) + Hash Cond: (i8.q1 = i8b2.q1) + -> Seq Scan on public.int8_tbl i8 + Output: i8.q1, i8.q2 -> Hash - Output: ('***'::text), i8b1.q2 - -> Result - Output: '***'::text, i8b1.q2 - -> Gather Motion 3:1 (slice5; segments: 3) - Output: i8b1.q2 - -> Seq Scan on public.int8_tbl i8b1 - Output: i8b1.q2 - -> Hash - Output: t1.f1 - -> Gather Motion 3:1 (slice6; segments: 3) - Output: t1.f1 - -> Seq Scan on public.text_tbl t1 - Output: t1.f1 + Output: (NULL::integer), i8b2.q1 + -> Nested Loop + Output: NULL::integer, i8b2.q1 + Join Filter: true + -> Seq Scan on public.int8_tbl i8b2 + Output: i8b2.q1 + -> Materialize + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Seq Scan on public.int4_tbl i4b2 + -> Hash + Output: ('***'::text), i8b1.q2 + -> Result + Output: '***'::text, i8b1.q2 + -> Gather Motion 3:1 (slice3; segments: 3) + Output: i8b1.q2 + -> Seq Scan on public.int8_tbl i8b1 + Output: i8b1.q2 -> Hash + Output: t1.f1 + -> Gather Motion 3:1 (slice4; segments: 3) + Output: t1.f1 + -> Seq Scan on public.text_tbl t1 + Output: t1.f1 + -> Hash + Output: i4.f1 + -> Gather Motion 3:1 (slice5; segments: 3) Output: i4.f1 - -> Broadcast Motion 3:3 (slice7; segments: 3) + -> Seq Scan on public.int4_tbl i4 Output: i4.f1 - -> Seq Scan on public.int4_tbl i4 - Output: i4.f1 - Optimizer: Pivotal Optimizer (GPORCA) -(54 rows) + Settings: optimizer = 'on' + Optimizer: GPORCA +(48 rows) select t1.* from text_tbl t1 @@ -4549,64 +5425,61 @@ select t1.* from on (t1.f1 = b1.d1) left join int4_tbl i4 on (i8.q2 = i4.f1); - QUERY PLAN ---------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) + QUERY PLAN +------------------------------------------------------------------------------------------------ + Hash Left Join Output: t1.f1 - -> Hash Left Join - Output: t1.f1 - Hash Cond: (i8.q2 = (i4.f1)::bigint) - -> Redistribute Motion 1:3 (slice2) - Output: t1.f1, i8.q2 - -> Hash Right Join - Output: t1.f1, i8.q2 - Hash Cond: (('***'::text) = t1.f1) - -> Hash Right Join - Output: ('***'::text), i8.q2 - Hash Cond: (((NULL::integer))::bigint = i8b1.q2) - -> Gather Motion 3:1 (slice3; segments: 3) - Output: i8.q2, (NULL::integer) - -> Hash Left Join - Output: i8.q2, (NULL::integer) - Hash Cond: (i8.q1 = i8b2.q1) - -> Seq Scan on public.int8_tbl i8 - Output: i8.q1, i8.q2 + Hash Cond: (i8.q2 = (i4.f1)::bigint) + -> Hash Right Join + Output: t1.f1, i8.q2 + Hash Cond: (('***'::text) = t1.f1) + -> Hash Right Join + Output: ('***'::text), i8.q2 + Hash Cond: (((NULL::integer))::bigint = i8b1.q2) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: i8.q2, (NULL::integer) + -> Hash Left Join + Output: i8.q2, (NULL::integer) + Hash Cond: (i8.q1 = i8b2.q1) + -> Seq Scan on public.int8_tbl i8 + Output: i8.q1, i8.q2 + -> Hash + Output: (NULL::integer), i8b2.q1 + -> Hash Join + Output: NULL::integer, i8b2.q1 + Hash Cond: (i8b2.q1 = (i4b2.f1)::bigint) + -> Seq Scan on public.int8_tbl i8b2 + Output: i8b2.q1 -> Hash - Output: (NULL::integer), i8b2.q1 - -> Hash Join - Output: NULL::integer, i8b2.q1 - Hash Cond: (i8b2.q1 = (i4b2.f1)::bigint) - -> Seq Scan on public.int8_tbl i8b2 - Output: i8b2.q1 - -> Hash + Output: i4b2.f1 + -> Redistribute Motion 3:3 (slice2; segments: 3) + Output: i4b2.f1 + Hash Key: (i4b2.f1)::bigint + -> Seq Scan on public.int4_tbl i4b2 Output: i4b2.f1 - -> Broadcast Motion 3:3 (slice4; segments: 3) - Output: i4b2.f1 - -> Seq Scan on public.int4_tbl i4b2 - Output: i4b2.f1 - -> Hash - Output: ('***'::text), i8b1.q2 - -> Result - Output: '***'::text, i8b1.q2 - -> Gather Motion 3:1 (slice5; segments: 3) - Output: i8b1.q2 - -> Seq Scan on public.int8_tbl i8b1 - Output: i8b1.q2 - -> Hash - Output: t1.f1 - -> Gather Motion 3:1 (slice6; segments: 3) - Output: t1.f1 - -> Seq Scan on public.text_tbl t1 - Output: t1.f1 + -> Hash + Output: ('***'::text), i8b1.q2 + -> Result + Output: '***'::text, i8b1.q2 + -> Gather Motion 3:1 (slice3; segments: 3) + Output: i8b1.q2 + -> Seq Scan on public.int8_tbl i8b1 + Output: i8b1.q2 -> Hash + Output: t1.f1 + -> Gather Motion 3:1 (slice4; segments: 3) + Output: t1.f1 + -> Seq Scan on public.text_tbl t1 + Output: t1.f1 + -> Hash + Output: i4.f1 + -> Gather Motion 3:1 (slice5; segments: 3) Output: i4.f1 - -> Broadcast Motion 3:3 (slice7; segments: 3) + -> Seq Scan on public.int4_tbl i4 Output: i4.f1 - -> Seq Scan on public.int4_tbl i4 - Output: i4.f1 Settings: optimizer = 'on' Optimizer: GPORCA -(55 rows) +(52 rows) select t1.* from text_tbl t1 @@ -4621,8 +5494,8 @@ select t1.* from on (i8.q2 = i4.f1); f1 ------------------- - hi de ho neighbor doh! + hi de ho neighbor (2 rows) explain (verbose, costs off) @@ -4634,8 +5507,8 @@ select * from on t1.f1 = 'doh!' left join int4_tbl i4 on i8.q1 = i4.f1; - QUERY PLAN ----------------------------------------------------------------------------------------- + QUERY PLAN +--------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) Output: t1.f1, i8.q1, i8.q2, t2.f1, i4.f1 -> Hash Left Join @@ -4669,7 +5542,8 @@ select * from Output: i4.f1 -> Seq Scan on public.int4_tbl i4 Output: i4.f1 - Optimizer: Pivotal Optimizer (GPORCA) + Settings: optimizer = 'on' + Optimizer: GPORCA (35 rows) select * from @@ -4686,6 +5560,62 @@ select * from doh! | 123 | 456 | hi de ho neighbor | (2 rows) +-- check handling of a variable-free qual for a non-commutable outer join +explain (costs off) +select nspname +from (select 1 as x) ss1 +left join +( select n.nspname, c.relname + from pg_class c left join pg_namespace n on n.oid = c.relnamespace + where c.relkind = 'r' +) ss2 on false; + QUERY PLAN +------------------------------- + Nested Loop Left Join + Join Filter: false + -> Result + -> Result + One-Time Filter: false + Optimizer: GPORCA +(6 rows) + +-- check handling of apparently-commutable outer joins with non-commutable +-- joins between them +explain (costs off) +select 1 from + int4_tbl i4 + left join int8_tbl i8 on i4.f1 is not null + left join (select 1 as a) ss1 on null + join int4_tbl i42 on ss1.a is null or i8.q1 <> i8.q2 + right join (select 2 as b) ss2 + on ss2.b < i4.f1; + QUERY PLAN +-------------------------------------------------------------------------------------- + Nested Loop Left Join + Join Filter: ((2) < i4.f1) + -> Result + -> Materialize + -> Gather Motion 3:1 (slice1; segments: 3) + -> Nested Loop + Join Filter: true + -> Result + Filter: (((NULL::integer) IS NULL) OR (i8.q1 <> i8.q2)) + -> Nested Loop Left Join + Join Filter: (NOT (i4.f1 IS NULL)) + -> Nested Loop Left Join + Join Filter: true + -> Seq Scan on int4_tbl i4 + -> Result + One-Time Filter: false + -> Materialize + -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Seq Scan on int8_tbl i8 + -> Materialize + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Seq Scan on int4_tbl i42 + Optimizer: GPORCA +(23 rows) + -- -- test for appropriate join order in the presence of lateral references -- @@ -4744,6 +5674,57 @@ select 1 from where tt1.f1 = ss1.c0; */ --end_ignore +explain (verbose, costs off) +select 1 from + int4_tbl as i4 + inner join + ((select 42 as n from int4_tbl x1 left join int8_tbl x2 on f1 = q1) as ss1 + right join (select 1 as z) as ss2 on true) + on false, + lateral (select i4.f1, ss1.n from int8_tbl as i8 limit 1) as ss3; + QUERY PLAN +------------------------- + Result + Output: 1 + One-Time Filter: false + Optimizer: GPORCA +(5 rows) + +select 1 from + int4_tbl as i4 + inner join + ((select 42 as n from int4_tbl x1 left join int8_tbl x2 on f1 = q1) as ss1 + right join (select 1 as z) as ss2 on true) + on false, + lateral (select i4.f1, ss1.n from int8_tbl as i8 limit 1) as ss3; + ?column? +---------- +(0 rows) + +-- +-- check a case where we formerly generated invalid parameterized paths +-- +begin; +create temp table t (a int unique); +explain (costs off) +select 1 from t t1 + join lateral (select t1.a from (select 1) foo offset 0) as s1 on true + join + (select 1 from t t2 + inner join (t t3 + left join (t t4 left join t t5 on t4.a = 1) + on t3.a = t4.a) + on false + where t3.a = coalesce(t5.a,1)) as s2 + on true; + QUERY PLAN +------------------------- + Result + One-Time Filter: false + Optimizer: GPORCA +(3 rows) + +rollback; -- -- check a case in which a PlaceHolderVar forces join order -- @@ -4948,6 +5929,154 @@ select a.q2, b.q1 reset enable_hashjoin; reset enable_nestloop; reset enable_mergejoin; +-- +-- test join strength reduction with a SubPlan providing the proof +-- +explain (costs off) +select a.unique1, b.unique2 + from onek a left join onek b on a.unique1 = b.unique2 + where b.unique2 = any (select q1 from int8_tbl c where c.q1 < b.unique1); + QUERY PLAN +----------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Semi Join + Hash Cond: ((b.unique2)::bigint = c.q1) + Join Filter: (c.q1 < b.unique1) + -> Hash Left Join + Hash Cond: (a.unique1 = b.unique2) + -> Seq Scan on onek a + -> Hash + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: b.unique2 + -> Seq Scan on onek b + -> Hash + -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Seq Scan on int8_tbl c + Optimizer: GPORCA +(15 rows) + +select a.unique1, b.unique2 + from onek a left join onek b on a.unique1 = b.unique2 + where b.unique2 = any (select q1 from int8_tbl c where c.q1 < b.unique1); + unique1 | unique2 +---------+--------- + 123 | 123 +(1 row) + +-- +-- test full-join strength reduction +-- +explain (costs off) +select a.unique1, b.unique2 + from onek a full join onek b on a.unique1 = b.unique2 + where a.unique1 = 42; + QUERY PLAN +--------------------------------------------------------------- + Hash Left Join + Hash Cond: (a.unique1 = b.unique2) + -> Gather Motion 3:1 (slice1; segments: 3) + -> Index Only Scan using onek_unique1 on onek a + Index Cond: (unique1 = 42) + -> Hash + -> Gather Motion 3:1 (slice2; segments: 3) + -> Index Only Scan using onek_unique2 on onek b + Index Cond: (unique2 = 42) + Optimizer: GPORCA +(10 rows) + +select a.unique1, b.unique2 + from onek a full join onek b on a.unique1 = b.unique2 + where a.unique1 = 42; + unique1 | unique2 +---------+--------- + 42 | 42 +(1 row) + +explain (costs off) +select a.unique1, b.unique2 + from onek a full join onek b on a.unique1 = b.unique2 + where b.unique2 = 43; + QUERY PLAN +----------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Result + Filter: (b.unique2 = 43) + -> Hash Full Join + Hash Cond: (a.unique1 = b.unique2) + -> Seq Scan on onek a + -> Hash + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: b.unique2 + -> Seq Scan on onek b + Optimizer: GPORCA +(11 rows) + +select a.unique1, b.unique2 + from onek a full join onek b on a.unique1 = b.unique2 + where b.unique2 = 43; + unique1 | unique2 +---------+--------- + 43 | 43 +(1 row) + +explain (costs off) +select a.unique1, b.unique2 + from onek a full join onek b on a.unique1 = b.unique2 + where a.unique1 = 42 and b.unique2 = 42; + QUERY PLAN +--------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Nested Loop + Join Filter: true + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: b.unique2 + -> Index Only Scan using onek_unique2 on onek b + Index Cond: (unique2 = 42) + -> Index Only Scan using onek_unique1 on onek a + Index Cond: ((unique1 = b.unique2) AND (unique1 = 42)) + Optimizer: GPORCA +(10 rows) + +select a.unique1, b.unique2 + from onek a full join onek b on a.unique1 = b.unique2 + where a.unique1 = 42 and b.unique2 = 42; + unique1 | unique2 +---------+--------- + 42 | 42 +(1 row) + +-- +-- test result-RTE removal underneath a full join +-- +explain (costs off) +select * from + (select * from int8_tbl i81 join (values(123,2)) v(v1,v2) on q2=v1) ss1 +full join + (select * from (values(456,2)) w(v1,v2) join int8_tbl i82 on q2=v1) ss2 +on true; + QUERY PLAN +----------------------------------------------------- + Merge Full Join + -> Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on int8_tbl i81 + Filter: (q2 = 123) + -> Materialize + -> Gather Motion 3:1 (slice2; segments: 3) + -> Seq Scan on int8_tbl i82 + Filter: (q2 = 456) + Optimizer: GPORCA +(9 rows) + +select * from + (select * from int8_tbl i81 join (values(123,2)) v(v1,v2) on q2=v1) ss1 +full join + (select * from (values(456,2)) w(v1,v2) join int8_tbl i82 on q2=v1) ss2 +on true; + q1 | q2 | v1 | v2 | v1 | v2 | q1 | q2 +------------------+-----+-----+----+-----+----+-----+----- + 4567890123456789 | 123 | 123 | 2 | 456 | 2 | 123 | 456 +(1 row) + -- -- test join removal -- @@ -5009,6 +6138,263 @@ select id from a where id in ( Optimizer: GPORCA (7 rows) +-- check optimization with oddly-nested outer joins +explain (costs off) +select a1.id from + (a a1 left join a a2 on true) + left join + (a a3 left join a a4 on a3.id = a4.id) + on a2.id = a3.id; + QUERY PLAN +--------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Nested Loop Left Join + Join Filter: true + -> Seq Scan on a a1 + -> Materialize + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Seq Scan on a a2 + Optimizer: GPORCA +(8 rows) + +explain (costs off) +select a1.id from + (a a1 left join a a2 on a1.id = a2.id) + left join + (a a3 left join a a4 on a3.id = a4.id) + on a2.id = a3.id; + QUERY PLAN +-------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Nested Loop Left Join + Join Filter: true + -> Seq Scan on a a1 + -> Index Only Scan using a_pkey on a a2 + Index Cond: (id = a1.id) + Optimizer: GPORCA +(7 rows) + +explain (costs off) +select 1 from a t1 + left join a t2 on true + inner join a t3 on true + left join a t4 on t2.id = t4.id and t2.id = t3.id; + QUERY PLAN +--------------------------------------------------------------------------------------- + Result + -> Gather Motion 3:1 (slice1; segments: 3) + -> Nested Loop Left Join + Join Filter: true + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: t2.id + -> Nested Loop Left Join + Join Filter: true + -> Nested Loop + Join Filter: true + -> Seq Scan on a t3 + -> Materialize + -> Broadcast Motion 3:3 (slice4; segments: 3) + -> Seq Scan on a t1 + -> Materialize + -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Seq Scan on a t2 + -> Index Only Scan using a_pkey on a t4 + Index Cond: (id = t2.id) + Filter: (t2.id = t3.id) + Optimizer: GPORCA +(21 rows) + +-- another example (bug #17781) +explain (costs off) +select ss1.f1 +from int4_tbl as t1 + left join (int4_tbl as t2 + right join int4_tbl as t3 on null + left join (int4_tbl as t4 + right join int8_tbl as t5 on null) + on t2.f1 = t4.f1 + left join ((select null as f1 from int4_tbl as t6) as ss1 + inner join int8_tbl as t7 on null) + on t5.q1 = t7.q2) + on false; + QUERY PLAN +------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Nested Loop Left Join + Join Filter: true + -> Seq Scan on int4_tbl t1 + -> Result + One-Time Filter: false + Optimizer: GPORCA +(7 rows) + +-- variant with Var rather than PHV coming from t6 +explain (costs off) +select ss1.f1 +from int4_tbl as t1 + left join (int4_tbl as t2 + right join int4_tbl as t3 on null + left join (int4_tbl as t4 + right join int8_tbl as t5 on null) + on t2.f1 = t4.f1 + left join ((select f1 from int4_tbl as t6) as ss1 + inner join int8_tbl as t7 on null) + on t5.q1 = t7.q2) + on false; + QUERY PLAN +------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Nested Loop Left Join + Join Filter: true + -> Seq Scan on int4_tbl t1 + -> Result + One-Time Filter: false + Optimizer: GPORCA +(7 rows) + +-- per further discussion of bug #17781 +explain (costs off) +select ss1.x +from (select f1/2 as x from int4_tbl i4 left join a on a.id = i4.f1) ss1 + right join int8_tbl i8 on true +where current_user is not null; -- this is to add a Result node + QUERY PLAN +--------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Nested Loop Left Join + Join Filter: true + -> Seq Scan on int8_tbl i8 + -> Materialize + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Seq Scan on int4_tbl i4 + Optimizer: GPORCA +(8 rows) + +-- and further discussion of bug #17781 +explain (costs off) +select * +from int8_tbl t1 + left join (int8_tbl t2 left join onek t3 on t2.q1 > t3.unique1) + on t1.q2 = t2.q2 + left join onek t4 + on t2.q2 < t3.unique2; + QUERY PLAN +--------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Nested Loop Left Join + Join Filter: (t2.q2 < t3.unique2) + -> Hash Right Join + Hash Cond: (t2.q2 = t1.q2) + -> Redistribute Motion 3:3 (slice3; segments: 3) + Hash Key: t2.q2 + -> Nested Loop Left Join + Join Filter: (t2.q1 > t3.unique1) + -> Seq Scan on int8_tbl t2 + -> Materialize + -> Broadcast Motion 3:3 (slice4; segments: 3) + -> Seq Scan on onek t3 + -> Hash + -> Redistribute Motion 3:3 (slice5; segments: 3) + Hash Key: t1.q2 + -> Seq Scan on int8_tbl t1 + -> Materialize + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Seq Scan on onek t4 + Optimizer: GPORCA +(21 rows) + +-- More tests of correct placement of pseudoconstant quals +-- simple constant-false condition +explain (costs off) +select * from int8_tbl t1 left join + (int8_tbl t2 inner join int8_tbl t3 on false + left join int8_tbl t4 on t2.q2 = t4.q2) +on t1.q1 = t2.q1; + QUERY PLAN +-------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Left Join + Hash Cond: (q1 = (NULL::bigint)) + -> Seq Scan on int8_tbl t1 + -> Hash + -> Result + One-Time Filter: false + Optimizer: GPORCA +(8 rows) + +-- deduce constant-false from an EquivalenceClass +explain (costs off) +select * from int8_tbl t1 left join + (int8_tbl t2 inner join int8_tbl t3 on (t2.q1-t3.q2) = 0 and (t2.q1-t3.q2) = 1 + left join int8_tbl t4 on t2.q2 = t4.q2) +on t1.q1 = t2.q1; + QUERY PLAN +------------------------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Right Join + Hash Cond: (t2.q1 = t1.q1) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: t2.q1 + -> Hash Left Join + Hash Cond: (t2.q2 = t4.q2) + -> Redistribute Motion 3:3 (slice3; segments: 3) + Hash Key: t2.q2 + -> Nested Loop + Join Filter: (((t2.q1 - t3.q2) = 0) AND ((t2.q1 - t3.q2) = 1)) + -> Broadcast Motion 3:3 (slice4; segments: 3) + -> Seq Scan on int8_tbl t3 + -> Seq Scan on int8_tbl t2 + -> Hash + -> Redistribute Motion 3:3 (slice5; segments: 3) + Hash Key: t4.q2 + -> Seq Scan on int8_tbl t4 + -> Hash + -> Seq Scan on int8_tbl t1 + Optimizer: GPORCA +(21 rows) + +-- pseudoconstant based on an outer-level Param +explain (costs off) +select exists( + select * from int8_tbl t1 left join + (int8_tbl t2 inner join int8_tbl t3 on x0.f1 = 1 + left join int8_tbl t4 on t2.q2 = t4.q2) + on t1.q1 = t2.q1 +) from int4_tbl x0; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------- + Result + -> Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on int4_tbl x0 + SubPlan 1 + -> Aggregate + -> Nested Loop Left Join + Join Filter: (t1.q1 = t2.q1) + -> Materialize + -> Gather Motion 3:1 (slice6; segments: 3) + -> Seq Scan on int8_tbl t1 + -> Materialize + -> Result + One-Time Filter: (x0.f1 = 1) + -> Materialize + -> Gather Motion 3:1 (slice2; segments: 3) + -> Hash Left Join + Hash Cond: (t2.q2 = t4.q2) + -> Redistribute Motion 3:3 (slice3; segments: 3) + Hash Key: t2.q2 + -> Nested Loop + Join Filter: true + -> Seq Scan on int8_tbl t2 + -> Materialize + -> Broadcast Motion 3:3 (slice4; segments: 3) + -> Seq Scan on int8_tbl t3 + -> Hash + -> Redistribute Motion 3:3 (slice5; segments: 3) + Hash Key: t4.q2 + -> Seq Scan on int8_tbl t4 + Optimizer: GPORCA +(30 rows) + -- check that join removal works for a left join when joining a subquery -- that is guaranteed to be unique by its GROUP BY clause explain (costs off) @@ -5065,6 +6451,29 @@ select d.* from d left join (select distinct * from b) s Optimizer: GPORCA (7 rows) +-- join removal is not possible here +explain (costs off) +select 1 from a t1 + left join (a t2 left join a t3 on t2.id = 1) on t2.id = 1; + QUERY PLAN +--------------------------------------------------------------------------------------- + Result + -> Gather Motion 3:1 (slice1; segments: 3) + -> Nested Loop Left Join + Join Filter: true + -> Seq Scan on a t1 + -> Materialize + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Nested Loop Left Join + Join Filter: (t2.id = 1) + -> Index Only Scan using a_pkey on a t2 + Index Cond: (id = 1) + -> Materialize + -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Seq Scan on a t3 + Optimizer: GPORCA +(15 rows) + -- check join removal works when uniqueness of the join condition is enforced -- by a UNION explain (costs off) @@ -5099,9 +6508,42 @@ select 1 from (select a.id FROM a left join b on a.b_id = b.id) q, -> Seq Scan on a -> Function Scan on generate_series gs Filter: (a.id = i) - Optimizer: Postgres query optimizer + Optimizer: GPORCA (6 rows) +-- check join removal within RHS of an outer join +explain (costs off) +select c.id, ss.a from c + left join (select d.a from onerow, d left join b on d.a = b.id) ss + on c.id = ss.a; + QUERY PLAN +--------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Left Join + Hash Cond: (c.id = d.a) + -> Seq Scan on c + -> Hash + -> Nested Loop + Join Filter: true + -> Seq Scan on d + -> Materialize + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Seq Scan on onerow + Optimizer: GPORCA +(12 rows) + +CREATE TEMP TABLE parted_b (id int PRIMARY KEY) partition by range(id); +CREATE TEMP TABLE parted_b1 partition of parted_b for values from (0) to (10); +-- test join removals on a partitioned table +explain (costs off) +select a.* from a left join parted_b pb on a.b_id = pb.id; + QUERY PLAN +----------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on a + Optimizer: GPORCA +(3 rows) + rollback; create temp table parent (k int primary key, pd int); create temp table child (k int unique, cd int); @@ -5226,6 +6668,56 @@ SELECT * FROM 1 | 4567890123456789 | -4567890123456789 | 4567890123456789 (5 rows) +-- join removal bug #17769: can't remove if there's a pushed-down reference +EXPLAIN (COSTS OFF) +SELECT q2 FROM + (SELECT * + FROM int8_tbl LEFT JOIN innertab ON q2 = id) ss + WHERE COALESCE(dat1, 0) = q1; + QUERY PLAN +--------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Result + Filter: (COALESCE(innertab.dat1, '0'::bigint) = int8_tbl.q1) + -> Hash Left Join + Hash Cond: (int8_tbl.q2 = innertab.id) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: int8_tbl.q2 + -> Seq Scan on int8_tbl + -> Hash + -> Seq Scan on innertab + Optimizer: GPORCA +(11 rows) + +-- join removal bug #17773: otherwise-removable PHV appears in a qual condition +EXPLAIN (VERBOSE, COSTS OFF) +SELECT q2 FROM + (SELECT q2, 'constant'::text AS x + FROM int8_tbl LEFT JOIN innertab ON q2 = id) ss + RIGHT JOIN int4_tbl ON NULL + WHERE x >= x; + QUERY PLAN +------------------------- + Result + Output: NULL::bigint + One-Time Filter: false + Optimizer: GPORCA +(5 rows) + +-- join removal bug #17786: check that OR conditions are cleaned up +EXPLAIN (COSTS OFF) +SELECT f1, x +FROM int4_tbl + JOIN ((SELECT 42 AS x FROM int8_tbl LEFT JOIN innertab ON q1 = id) AS ss1 + RIGHT JOIN tenk1 ON NULL) + ON tenk1.unique1 = ss1.x OR tenk1.unique2 = ss1.x; + QUERY PLAN +------------------------- + Result + One-Time Filter: false + Optimizer: GPORCA +(3 rows) + rollback; -- another join removal bug: we must clean up correctly when removing a PHV begin; @@ -5297,6 +6789,115 @@ where ss.stringu2 !~* ss.case1; doh! (1 row) +rollback; +-- another join removal bug: we must clean up EquivalenceClasses too +begin; +create temp table t (a int unique); +insert into t values (1); +explain (costs off) +select 1 +from t t1 + left join (select 2 as c + from t t2 left join t t3 on t2.a = t3.a) s + on true +where t1.a = s.c; + QUERY PLAN +-------------------------------------------------------------------- + Result + -> Gather Motion 3:1 (slice1; segments: 3) + -> Hash Join + Hash Cond: ((2) = t1.a) + -> Nested Loop Left Join + Join Filter: true + -> Seq Scan on t t2 + Filter: (2 = 2) + -> Index Only Scan using t_a_key on t t3 + Index Cond: (a = t2.a) + -> Hash + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Index Only Scan using t_a_key on t t1 + Index Cond: (a = 2) + Optimizer: GPORCA +(15 rows) + +select 1 +from t t1 + left join (select 2 as c + from t t2 left join t t3 on t2.a = t3.a) s + on true +where t1.a = s.c; + ?column? +---------- +(0 rows) + +rollback; +-- test cases where we can remove a join, but not a PHV computed at it +begin; +create temp table t (a int unique, b int); +insert into t values (1,1), (2,2); +explain (costs off) +select 1 +from t t1 + left join (select t2.a, 1 as c + from t t2 left join t t3 on t2.a = t3.a) s + on true + left join t t4 on true +where s.a < s.c; + QUERY PLAN +-------------------------------------------------------------------------- + Result + -> Gather Motion 3:1 (slice1; segments: 3) + -> Nested Loop + Join Filter: true + -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Result + Filter: (t2.a < (1)) + -> Seq Scan on t t2 + -> Nested Loop Left Join + Join Filter: true + -> Seq Scan on t t1 + -> Materialize + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Seq Scan on t t4 + Optimizer: GPORCA +(15 rows) + +explain (costs off) +select t1.a, s.* +from t t1 + left join lateral (select t2.a, coalesce(t1.a, 1) as c + from t t2 left join t t3 on t2.a = t3.a) s + on true + left join t t4 on true +where s.a < s.c; + QUERY PLAN +-------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Nested Loop Left Join + -> Nested Loop + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Seq Scan on t t1 + -> Seq Scan on t t2 + Filter: (a < COALESCE(t1.a, 1)) + -> Materialize + -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Seq Scan on t t4 + Optimizer: GPORCA +(11 rows) + +select t1.a, s.* +from t t1 + left join lateral (select t2.a, coalesce(t1.a, 1) as c + from t t2 left join t t3 on t2.a = t3.a) s + on true + left join t t4 on true +where s.a < s.c; + a | a | c +---+---+--- + 2 | 1 | 2 + 2 | 1 | 2 +(2 rows) + rollback; -- test case to expose miscomputation of required relid set for a PHV explain (verbose, costs off) @@ -5306,37 +6907,36 @@ select i8.*, ss.v, t.unique2 left join lateral (select i4.f1 + 1 as v) as ss on true left join tenk1 t on t.unique2 = ss.v where q2 = 456; - QUERY PLAN ---------------------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) Output: i8.q1, i8.q2, ((i4.f1 + 1)), t.unique2 - -> Hash Right Join + -> Hash Left Join Output: i8.q1, i8.q2, ((i4.f1 + 1)), t.unique2 - Hash Cond: (t.unique2 = ((i4.f1 + 1))) - -> Redistribute Motion 3:3 (slice2; segments: 3) - Output: t.unique2 - Hash Key: t.unique2 - -> Seq Scan on public.tenk1 t - Output: t.unique2 + Hash Cond: (((i4.f1 + 1)) = t.unique2) + -> Nested Loop Left Join + Output: i8.q1, i8.q2, (i4.f1 + 1) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Output: i8.q1, i8.q2 + Hash Key: 1 + -> Seq Scan on public.int8_tbl i8 + Output: i8.q1, i8.q2 + Filter: (i8.q2 = 456) + -> Materialize + Output: i4.f1 + -> Seq Scan on public.int4_tbl i4 + Output: i4.f1 + Filter: (i4.f1 = 1) -> Hash - Output: i8.q1, i8.q2, ((i4.f1 + 1)) + Output: t.unique2 -> Redistribute Motion 3:3 (slice3; segments: 3) - Output: i8.q1, i8.q2, ((i4.f1 + 1)) - Hash Key: ((i4.f1 + 1)) - -> Nested Loop Left Join - Output: i8.q1, i8.q2, (i4.f1 + 1) - -> Seq Scan on public.int8_tbl i8 - Output: i8.q1, i8.q2 - Filter: (i8.q2 = 456) - -> Materialize - Output: i4.f1 - -> Broadcast Motion 1:3 (slice4; segments: 1) - Output: i4.f1 - -> Seq Scan on public.int4_tbl i4 - Output: i4.f1 - Filter: (i4.f1 = 1) + Output: t.unique2 + Hash Key: 1 + -> Seq Scan on public.tenk1 t + Output: t.unique2 + Settings: optimizer = 'on' Optimizer: Postgres query optimizer -(28 rows) +(27 rows) select i8.*, ss.v, t.unique2 from int8_tbl i8 @@ -5354,6 +6954,7 @@ where q2 = 456; create temp table parttbl (a integer primary key) partition by range (a); create temp table parttbl1 partition of parttbl for values from (1) to (100); insert into parttbl values (11), (12); +set optimizer_enable_dynamicindexonlyscan=off; explain (costs off) select * from (select *, 12 as phv from parttbl) as ss @@ -5366,9 +6967,9 @@ where ss.a = ss.phv and f1 = 0; Join Filter: true -> Result Filter: (((12) = 12) AND (parttbl.a = (12))) - -> Dynamic Index Only Scan on parttbl_pkey on parttbl - Index Cond: (a = 12) + -> Dynamic Seq Scan on parttbl Number of partitions to scan: 1 (out of 1) + Filter: (a = 12) -> Materialize -> Broadcast Motion 3:3 (slice2; segments: 3) -> Seq Scan on int4_tbl @@ -5376,6 +6977,7 @@ where ss.a = ss.phv and f1 = 0; Optimizer: GPORCA (13 rows) +reset optimizer_enable_dynamicindexonlyscan; select * from (select *, 12 as phv from parttbl) as ss right join int4_tbl on true @@ -5396,7 +6998,7 @@ select * from ERROR: invalid reference to FROM-clause entry for table "y" LINE 2: ...bl x join (int4_tbl x cross join int4_tbl y) j on q1 = y.f1; ^ -HINT: There is an entry for table "y", but it cannot be referenced from this part of the query. +DETAIL: There is an entry for table "y", but it cannot be referenced from this part of the query. select * from int8_tbl x join (int4_tbl x cross join int4_tbl y(ff)) j on q1 = f1; -- ok q1 | q2 | f1 | ff @@ -5424,6 +7026,13 @@ ERROR: column "uunique1" does not exist LINE 1: select uunique1 from ^ HINT: Perhaps you meant to reference the column "t1.unique1" or the column "t2.unique1". +select ctid from + tenk1 t1 join tenk2 t2 on t1.two = t2.two; -- error, need qualification +ERROR: column "ctid" does not exist +LINE 1: select ctid from + ^ +DETAIL: There are columns named "ctid", but they are in tables that cannot be referenced from this part of the query. +HINT: Try using a table-qualified name. -- -- Take care to reference the correct RTE -- @@ -5435,6 +7044,19 @@ select atts.relid::regclass, s.* from pg_stats s join ERROR: column atts.relid does not exist LINE 1: select atts.relid::regclass, s.* from pg_stats s join ^ +-- Test bug in rangetable flattening +explain (verbose, costs off) +select 1 from + (select * from int8_tbl where q1 <> (select 42) offset 0) ss +where false; + QUERY PLAN +------------------------- + Result + Output: NULL::integer + One-Time Filter: false + Optimizer: GPORCA +(5 rows) + -- -- Test LATERAL -- @@ -5452,10 +7074,10 @@ explain (costs off) ------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) -> Hash Join - Hash Cond: (a.unique1 = b.f1) - -> Seq Scan on tenk1 a + Hash Cond: (b.f1 = a.unique1) + -> Seq Scan on int4_tbl b -> Hash - -> Seq Scan on int4_tbl b + -> Seq Scan on tenk1 a Optimizer: Postgres query optimizer (7 rows) @@ -5473,10 +7095,10 @@ explain (costs off) ------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) -> Hash Join - Hash Cond: (tenk1.unique1 = x.f1) - -> Seq Scan on tenk1 + Hash Cond: (x.f1 = tenk1.unique1) + -> Seq Scan on int4_tbl x -> Hash - -> Seq Scan on int4_tbl x + -> Seq Scan on tenk1 Optimizer: Postgres query optimizer (7 rows) @@ -5487,10 +7109,10 @@ explain (costs off) ------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) -> Hash Join - Hash Cond: (tenk1.unique1 = x.f1) - -> Seq Scan on tenk1 + Hash Cond: (x.f1 = tenk1.unique1) + -> Seq Scan on int4_tbl x -> Hash - -> Seq Scan on int4_tbl x + -> Seq Scan on tenk1 Optimizer: Postgres query optimizer (7 rows) @@ -5511,11 +7133,11 @@ explain (costs off) QUERY PLAN ------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) - -> Hash Right Join - Hash Cond: (tenk1.unique1 = x.f1) - -> Seq Scan on tenk1 + -> Hash Left Join + Hash Cond: (x.f1 = tenk1.unique1) + -> Seq Scan on int4_tbl x -> Hash - -> Seq Scan on int4_tbl x + -> Seq Scan on tenk1 Optimizer: Postgres query optimizer (7 rows) @@ -5577,6 +7199,7 @@ explain (costs off) Cache Key: a.two Cache Mode: binary -> Function Scan on generate_series g + Optimizer: GPORCA (10 rows) -- don't need the explicit LATERAL keyword for functions @@ -6014,15 +7637,15 @@ select * from int4_tbl i left join ------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) Output: i.f1, j.f1 - -> Hash Left Join + -> Hash Right Join Output: i.f1, j.f1 - Hash Cond: (i.f1 = j.f1) - -> Seq Scan on public.int4_tbl i - Output: i.f1 - -> Hash + Hash Cond: (j.f1 = i.f1) + -> Seq Scan on public.int2_tbl j Output: j.f1 - -> Seq Scan on public.int2_tbl j - Output: j.f1 + -> Hash + Output: i.f1 + -> Seq Scan on public.int4_tbl i + Output: i.f1 Settings: optimizer = 'on' Optimizer: Postgres query optimizer (13 rows) @@ -6071,8 +7694,8 @@ select * from int4_tbl a, lateral ( select * from int4_tbl b left join int8_tbl c on (b.f1 = q1 and a.f1 = q2) ) ss; - QUERY PLAN ------------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------------------ Nested Loop Output: a.f1, b.f1, c.q1, c.q2 -> Gather Motion 3:1 (slice1; segments: 3) @@ -6081,28 +7704,29 @@ select * from int4_tbl a, Output: a.f1 -> Materialize Output: b.f1, c.q1, c.q2 - -> Hash Right Join + -> Hash Left Join Output: b.f1, c.q1, c.q2 - Hash Cond: (c.q1 = b.f1) - -> Result + Hash Cond: (b.f1 = c.q1) + -> Materialize + Output: b.f1 + -> Gather Motion 3:1 (slice2; segments: 3) + Output: b.f1 + -> Seq Scan on public.int4_tbl b + Output: b.f1 + -> Hash Output: c.q1, c.q2 - Filter: (a.f1 = c.q2) - -> Materialize + -> Result Output: c.q1, c.q2 - -> Gather Motion 3:1 (slice2; segments: 3) + Filter: (a.f1 = c.q2) + -> Materialize Output: c.q1, c.q2 - -> Seq Scan on public.int8_tbl c + -> Gather Motion 3:1 (slice3; segments: 3) Output: c.q1, c.q2 - -> Hash - Output: b.f1 - -> Materialize - Output: b.f1 - -> Gather Motion 3:1 (slice3; segments: 3) - Output: b.f1 - -> Seq Scan on public.int4_tbl b - Output: b.f1 + -> Seq Scan on public.int8_tbl c + Output: c.q1, c.q2 + Settings: optimizer = 'on' Optimizer: Postgres query optimizer -(29 rows) +(30 rows) select * from int4_tbl a, lateral ( @@ -6170,40 +7794,74 @@ select * from Output: c.q1, c.q2, a.q1, a.q2, b.q1, (COALESCE(b.q2, '42'::bigint)), d.q1, (COALESCE((COALESCE(b.q2, '42'::bigint)), d.q2)), ((COALESCE((COALESCE(b.q2, '42'::bigint)), d.q2))) -> Nested Loop Output: c.q1, c.q2, a.q1, a.q2, b.q1, (COALESCE(b.q2, '42'::bigint)), d.q1, (COALESCE((COALESCE(b.q2, '42'::bigint)), d.q2)), ((COALESCE((COALESCE(b.q2, '42'::bigint)), d.q2))) - -> Hash Right Join - Output: c.q1, c.q2, a.q1, a.q2, b.q1, d.q1, (COALESCE(b.q2, '42'::bigint)), (COALESCE((COALESCE(b.q2, '42'::bigint)), d.q2)) - Hash Cond: (d.q1 = c.q2) - -> Nested Loop - Output: a.q1, a.q2, b.q1, d.q1, (COALESCE(b.q2, '42'::bigint)), (COALESCE((COALESCE(b.q2, '42'::bigint)), d.q2)) - -> Broadcast Motion 3:3 (slice2; segments: 3) - Output: a.q1, a.q2, b.q1, (COALESCE(b.q2, '42'::bigint)) - -> Hash Left Join - Output: a.q1, a.q2, b.q1, (COALESCE(b.q2, '42'::bigint)) - Hash Cond: (a.q2 = b.q1) - -> Redistribute Motion 3:3 (slice3; segments: 3) - Output: a.q1, a.q2 - Hash Key: a.q2 - -> Seq Scan on public.int8_tbl a - Output: a.q1, a.q2 - -> Hash - Output: b.q1, (COALESCE(b.q2, '42'::bigint)) - -> Seq Scan on public.int8_tbl b - Output: b.q1, COALESCE(b.q2, '42'::bigint) - -> Seq Scan on public.int8_tbl d - Output: d.q1, COALESCE((COALESCE(b.q2, '42'::bigint)), d.q2) - -> Hash + -> Merge Left Join + Output: c.q1, c.q2, a.q1, a.q2, b.q1, (COALESCE(b.q2, '42'::bigint)), d.q1, (COALESCE((COALESCE(b.q2, '42'::bigint)), d.q2)) + Merge Cond: (c.q2 = d.q1) + -> Sort Output: c.q1, c.q2 - -> Redistribute Motion 3:3 (slice4; segments: 3) + Sort Key: c.q2 + -> Redistribute Motion 3:3 (slice2; segments: 3) Output: c.q1, c.q2 Hash Key: c.q2 -> Seq Scan on public.int8_tbl c Output: c.q1, c.q2 + -> Materialize + Output: a.q1, a.q2, b.q1, (COALESCE(b.q2, '42'::bigint)), d.q1, (COALESCE((COALESCE(b.q2, '42'::bigint)), d.q2)) + -> Sort + Output: a.q1, a.q2, b.q1, (COALESCE(b.q2, '42'::bigint)), d.q1, (COALESCE((COALESCE(b.q2, '42'::bigint)), d.q2)) + Sort Key: d.q1 + -> Nested Loop + Output: a.q1, a.q2, b.q1, (COALESCE(b.q2, '42'::bigint)), d.q1, (COALESCE((COALESCE(b.q2, '42'::bigint)), d.q2)) + -> Broadcast Motion 3:3 (slice3; segments: 3) + Output: a.q1, a.q2, b.q1, (COALESCE(b.q2, '42'::bigint)) + -> Hash Left Join + Output: a.q1, a.q2, b.q1, (COALESCE(b.q2, '42'::bigint)) + Hash Cond: (a.q2 = b.q1) + -> Redistribute Motion 3:3 (slice4; segments: 3) + Output: a.q1, a.q2 + Hash Key: a.q2 + -> Seq Scan on public.int8_tbl a + Output: a.q1, a.q2 + -> Hash + Output: b.q1, (COALESCE(b.q2, '42'::bigint)) + -> Seq Scan on public.int8_tbl b + Output: b.q1, COALESCE(b.q2, '42'::bigint) + -> Seq Scan on public.int8_tbl d + Output: d.q1, COALESCE((COALESCE(b.q2, '42'::bigint)), d.q2) -> Materialize Output: ((COALESCE((COALESCE(b.q2, '42'::bigint)), d.q2))) -> Result Output: (COALESCE((COALESCE(b.q2, '42'::bigint)), d.q2)) - Optimizer: Postgres query optimizer -(37 rows) + Optimizer: GPORCA +(38 rows) + +-- another case requiring nested PlaceHolderVars +explain (verbose, costs off) +select * from + (select 0 as val0) as ss0 + left join (select 1 as val) as ss1 on true + left join lateral (select ss1.val as val_filtered where false) as ss2 on true; + QUERY PLAN +------------------------------- + Nested Loop Left Join + Output: 0, (1), ((1)) + Join Filter: false + -> Result + Output: 1 + -> Result + Output: (1) + One-Time Filter: false + Optimizer: GPORCA +(10 rows) + +select * from + (select 0 as val0) as ss0 + left join (select 1 as val) as ss1 on true + left join lateral (select ss1.val as val_filtered where false) as ss2 on true; + val0 | val | val_filtered +------+-----+-------------- + 0 | 1 | +(1 row) -- case that breaks the old ph_may_need optimization explain (verbose, costs off) @@ -6230,45 +7888,51 @@ select c.*,a.*,ss1.q1,ss2.q1,ss3.* from Output: i.f1 -> Materialize Output: c.q1, c.q2, a.q1, a.q2, b.q1, d.q1, (COALESCE((COALESCE(b.q2, (b2.f1)::bigint)), d.q2)) - -> Hash Right Join + -> Merge Left Join Output: c.q1, c.q2, a.q1, a.q2, b.q1, d.q1, (COALESCE((COALESCE(b.q2, (b2.f1)::bigint)), d.q2)) - Hash Cond: (d.q1 = c.q2) - -> Nested Loop - Output: a.q1, a.q2, b.q1, d.q1, (COALESCE((COALESCE(b.q2, (b2.f1)::bigint)), d.q2)) - -> Broadcast Motion 3:3 (slice3; segments: 3) - Output: a.q1, a.q2, b.q1, (COALESCE(b.q2, (b2.f1)::bigint)) - -> Hash Right Join - Output: a.q1, a.q2, b.q1, (COALESCE(b.q2, (b2.f1)::bigint)) - Hash Cond: (b.q1 = a.q2) - -> Nested Loop - Output: b.q1, COALESCE(b.q2, (b2.f1)::bigint) - Join Filter: (b.q1 < b2.f1) - -> Seq Scan on public.int8_tbl b - Output: b.q1, b.q2 - -> Materialize - Output: b2.f1 - -> Broadcast Motion 3:3 (slice4; segments: 3) - Output: b2.f1 - -> Seq Scan on public.int4_tbl b2 - Output: b2.f1 - -> Hash - Output: a.q1, a.q2 - -> Redistribute Motion 3:3 (slice5; segments: 3) - Output: a.q1, a.q2 - Hash Key: a.q2 - -> Seq Scan on public.int8_tbl a - Output: a.q1, a.q2 - -> Seq Scan on public.int8_tbl d - Output: d.q1, COALESCE((COALESCE(b.q2, (b2.f1)::bigint)), d.q2) - -> Hash + Merge Cond: (c.q2 = d.q1) + -> Sort Output: c.q1, c.q2 - -> Redistribute Motion 3:3 (slice6; segments: 3) + Sort Key: c.q2 + -> Redistribute Motion 3:3 (slice3; segments: 3) Output: c.q1, c.q2 Hash Key: c.q2 -> Seq Scan on public.int8_tbl c Output: c.q1, c.q2 + -> Materialize + Output: a.q1, a.q2, b.q1, d.q1, (COALESCE((COALESCE(b.q2, (b2.f1)::bigint)), d.q2)) + -> Sort + Output: a.q1, a.q2, b.q1, d.q1, (COALESCE((COALESCE(b.q2, (b2.f1)::bigint)), d.q2)) + Sort Key: d.q1 + -> Nested Loop + Output: a.q1, a.q2, b.q1, d.q1, (COALESCE((COALESCE(b.q2, (b2.f1)::bigint)), d.q2)) + -> Broadcast Motion 3:3 (slice4; segments: 3) + Output: a.q1, a.q2, b.q1, (COALESCE(b.q2, (b2.f1)::bigint)) + -> Hash Right Join + Output: a.q1, a.q2, b.q1, (COALESCE(b.q2, (b2.f1)::bigint)) + Hash Cond: (b.q1 = a.q2) + -> Nested Loop + Output: b.q1, COALESCE(b.q2, (b2.f1)::bigint) + Join Filter: (b.q1 < b2.f1) + -> Broadcast Motion 3:3 (slice5; segments: 3) + Output: b2.f1 + -> Seq Scan on public.int4_tbl b2 + Output: b2.f1 + -> Materialize + Output: b.q1, b.q2 + -> Seq Scan on public.int8_tbl b + Output: b.q1, b.q2 + -> Hash + Output: a.q1, a.q2 + -> Redistribute Motion 3:3 (slice6; segments: 3) + Output: a.q1, a.q2 + Hash Key: a.q2 + -> Seq Scan on public.int8_tbl a + Output: a.q1, a.q2 + -> Seq Scan on public.int8_tbl d + Output: d.q1, COALESCE((COALESCE(b.q2, (b2.f1)::bigint)), d.q2) Optimizer: Postgres query optimizer -(49 rows) +(56 rows) -- check processing of postponed quals (bug #9041) explain (verbose, costs off) @@ -6294,8 +7958,37 @@ select * from Output: (3) -> Result Output: 3 + Optimizer: GPORCA +(17 rows) + +-- a new postponed-quals issue (bug #17768) +explain (costs off) +select * from int4_tbl t1, + lateral (select * from int4_tbl t2 inner join int4_tbl t3 on t1.f1 = 1 + inner join (int4_tbl t4 left join int4_tbl t5 on true) on true) ss; + QUERY PLAN +--------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Nested Loop + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Seq Scan on int4_tbl t3 + -> Materialize + -> Nested Loop + -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Seq Scan on int4_tbl t2 + -> Materialize + -> Nested Loop + -> Broadcast Motion 1:3 (slice4; segments: 1) + -> Seq Scan on int4_tbl t1 + Filter: (f1 = 1) + -> Materialize + -> Nested Loop Left Join + -> Seq Scan on int4_tbl t4 + -> Materialize + -> Broadcast Motion 3:3 (slice5; segments: 3) + -> Seq Scan on int4_tbl t5 Optimizer: Postgres query optimizer -(16 rows) +(20 rows) -- check dummy rels with lateral references (bug #15694) explain (verbose, costs off) @@ -6307,6 +8000,7 @@ select * from int8_tbl i8 left join lateral Output: i8.q1, i8.q2, f1, (i8.q2) -> Nested Loop Left Join Output: i8.q1, i8.q2, f1, (i8.q2) + Join Filter: false -> Seq Scan on public.int8_tbl i8 Output: i8.q1, i8.q2 -> Result @@ -6421,22 +8115,26 @@ select f1,g from int4_tbl a, (select f1 as g) ss; ERROR: column "f1" does not exist LINE 1: select f1,g from int4_tbl a, (select f1 as g) ss; ^ -HINT: There is a column named "f1" in table "a", but it cannot be referenced from this part of the query. +DETAIL: There is a column named "f1" in table "a", but it cannot be referenced from this part of the query. +HINT: To reference that column, you must mark this subquery with LATERAL. select f1,g from int4_tbl a, (select a.f1 as g) ss; ERROR: invalid reference to FROM-clause entry for table "a" LINE 1: select f1,g from int4_tbl a, (select a.f1 as g) ss; ^ -HINT: There is an entry for table "a", but it cannot be referenced from this part of the query. +DETAIL: There is an entry for table "a", but it cannot be referenced from this part of the query. +HINT: To reference that table, you must mark this subquery with LATERAL. select f1,g from int4_tbl a cross join (select f1 as g) ss; ERROR: column "f1" does not exist LINE 1: select f1,g from int4_tbl a cross join (select f1 as g) ss; ^ -HINT: There is a column named "f1" in table "a", but it cannot be referenced from this part of the query. +DETAIL: There is a column named "f1" in table "a", but it cannot be referenced from this part of the query. +HINT: To reference that column, you must mark this subquery with LATERAL. select f1,g from int4_tbl a cross join (select a.f1 as g) ss; ERROR: invalid reference to FROM-clause entry for table "a" LINE 1: select f1,g from int4_tbl a cross join (select a.f1 as g) ss... ^ -HINT: There is an entry for table "a", but it cannot be referenced from this part of the query. +DETAIL: There is an entry for table "a", but it cannot be referenced from this part of the query. +HINT: To reference that table, you must mark this subquery with LATERAL. -- SQL:2008 says the left table is in scope but illegal to access here select f1,g from int4_tbl a right join lateral generate_series(0, a.f1) g on true; ERROR: invalid reference to FROM-clause entry for table "a" @@ -6467,12 +8165,12 @@ update xx1 set x2 = f1 from (select * from int4_tbl where f1 = x1) ss; ERROR: column "x1" does not exist LINE 1: ... set x2 = f1 from (select * from int4_tbl where f1 = x1) ss; ^ -HINT: There is a column named "x1" in table "xx1", but it cannot be referenced from this part of the query. +DETAIL: There is a column named "x1" in table "xx1", but it cannot be referenced from this part of the query. update xx1 set x2 = f1 from (select * from int4_tbl where f1 = xx1.x1) ss; ERROR: invalid reference to FROM-clause entry for table "xx1" LINE 1: ...t x2 = f1 from (select * from int4_tbl where f1 = xx1.x1) ss... ^ -HINT: There is an entry for table "xx1", but it cannot be referenced from this part of the query. +DETAIL: There is an entry for table "xx1", but it cannot be referenced from this part of the query. -- can't do it even with LATERAL: update xx1 set x2 = f1 from lateral (select * from int4_tbl where f1 = x1) ss; ERROR: invalid reference to FROM-clause entry for table "xx1" @@ -6487,12 +8185,12 @@ delete from xx1 using (select * from int4_tbl where f1 = x1) ss; ERROR: column "x1" does not exist LINE 1: ...te from xx1 using (select * from int4_tbl where f1 = x1) ss; ^ -HINT: There is a column named "x1" in table "xx1", but it cannot be referenced from this part of the query. +DETAIL: There is a column named "x1" in table "xx1", but it cannot be referenced from this part of the query. delete from xx1 using (select * from int4_tbl where f1 = xx1.x1) ss; ERROR: invalid reference to FROM-clause entry for table "xx1" LINE 1: ...from xx1 using (select * from int4_tbl where f1 = xx1.x1) ss... ^ -HINT: There is an entry for table "xx1", but it cannot be referenced from this part of the query. +DETAIL: There is an entry for table "xx1", but it cannot be referenced from this part of the query. delete from xx1 using lateral (select * from int4_tbl where f1 = x1) ss; ERROR: invalid reference to FROM-clause entry for table "xx1" LINE 1: ...xx1 using lateral (select * from int4_tbl where f1 = x1) ss; @@ -7015,6 +8713,33 @@ left join j2 on j1.id1 = j2.id1 where j1.id2 = 1; Optimizer: GPORCA (16 rows) +create unique index j1_id2_idx on j1(id2) where id2 is not null; +DETAIL: Distribution key column "id1" is not included in the constraint. +ERROR: UNIQUE index must contain all columns in the table's distribution key +-- ensure we don't use a partial unique index as unique proofs +explain (verbose, costs off) +select * from j1 +inner join j2 on j1.id2 = j2.id2; + QUERY PLAN +--------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: j1.id1, j1.id2, j2.id1, j2.id2 + -> Nested Loop + Output: j1.id1, j1.id2, j2.id1, j2.id2 + Join Filter: true + -> Broadcast Motion 3:3 (slice2; segments: 3) + Output: j2.id1, j2.id2 + -> Seq Scan on public.j2 + Output: j2.id1, j2.id2 + -> Index Only Scan using j1_pkey on public.j1 + Output: j1.id1, j1.id2 + Index Cond: (j1.id2 = j2.id2) + Settings: optimizer = 'on', enable_nestloop = 'on' + Optimizer: GPORCA +(14 rows) + +drop index j1_id2_idx; +ERROR: index "j1_id2_idx" does not exist -- validate logic in merge joins which skips mark and restore. -- it should only do this if all quals which were used to detect the unique -- are present as join quals, and not plain quals. @@ -7195,6 +8920,36 @@ where exists (select 1 from j3 (24 rows) drop table j3; +-- Test that we do not account for nullingrels when looking up statistics +CREATE TABLE group_tbl (a INT, b INT); +INSERT INTO group_tbl SELECT 1, 1; +CREATE STATISTICS group_tbl_stat (ndistinct) ON a, b FROM group_tbl; +ANALYZE group_tbl; +EXPLAIN (COSTS OFF) +SELECT 1 FROM group_tbl t1 + LEFT JOIN (SELECT a c1, COALESCE(a) c2 FROM group_tbl t2) s ON TRUE +GROUP BY s.c1, s.c2; + QUERY PLAN +-------------------------------------------------------------------------------------------- + Result + -> Gather Motion 3:1 (slice1; segments: 3) + -> GroupAggregate + Group Key: t2.a, (COALESCE(t2.a)) + -> Sort + Sort Key: t2.a, (COALESCE(t2.a)) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: t2.a, (COALESCE(t2.a)) + -> Nested Loop Left Join + Join Filter: true + -> Seq Scan on group_tbl t1 + -> Materialize + -> Result + -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Seq Scan on group_tbl t2 + Optimizer: GPORCA +(16 rows) + +DROP TABLE group_tbl; reset enable_hashjoin; reset enable_nestloop; reset enable_seqscan; diff --git a/contrib/pax_storage/src/test/regress/expected/jsonb_optimizer.out b/contrib/pax_storage/src/test/regress/expected/jsonb_optimizer.out index 6250207107a..18095582265 100644 --- a/contrib/pax_storage/src/test/regress/expected/jsonb_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/jsonb_optimizer.out @@ -1,3 +1,10 @@ +-- directory paths are passed to us in environment variables +\getenv abs_srcdir PG_ABS_SRCDIR +CREATE TABLE testjsonb ( + j jsonb +); +\set filename :abs_srcdir '/data/jsonb.data' +COPY testjsonb FROM :'filename'; -- Strings. SELECT '""'::jsonb; -- OK. jsonb @@ -303,6 +310,31 @@ LINE 1: SELECT '{ DETAIL: Expected JSON value, but found "}". CONTEXT: JSON data, line 4: ...yveryveryveryveryveryveryveryverylongfieldname":} -- ERROR missing value for last field +-- test non-error-throwing input +select pg_input_is_valid('{"a":true}', 'jsonb'); + pg_input_is_valid +------------------- + t +(1 row) + +select pg_input_is_valid('{"a":true', 'jsonb'); + pg_input_is_valid +------------------- + f +(1 row) + +select * from pg_input_error_info('{"a":true', 'jsonb'); + message | detail | hint | sql_error_code +------------------------------------+--------------------------------------+------+---------------- + invalid input syntax for type json | The input string ended unexpectedly. | | 22P02 +(1 row) + +select * from pg_input_error_info('{"a":1e1000000}', 'jsonb'); + message | detail | hint | sql_error_code +--------------------------------+--------+------+---------------- + value overflows numeric format | | | 22003 +(1 row) + -- make sure jsonb is passed through json generators without being escaped SELECT array_to_json(ARRAY [jsonb '{"a":1}', jsonb '{"b":[2,3]}']); array_to_json @@ -1560,6 +1592,13 @@ SELECT jsonb_object_agg(name, type) FROM foo; INSERT INTO foo VALUES (999999, NULL, 'bar'); SELECT jsonb_object_agg(name, type) FROM foo; ERROR: field name must not be null +-- edge case for parser +SELECT jsonb_object_agg(DISTINCT 'a', 'abc'); + jsonb_object_agg +------------------ + {"a": "abc"} +(1 row) + -- jsonb_object -- empty object, one dimension SELECT jsonb_object('{}'); @@ -3015,17 +3054,16 @@ SELECT count(*) FROM testjsonb WHERE j ?& ARRAY['public','disabled']; EXPLAIN (COSTS OFF) SELECT count(*) FROM testjsonb WHERE j @@ '$.wait == null'; - QUERY PLAN ------------------------------------------------------------------------------ - Finalize Aggregate + QUERY PLAN +----------------------------------------------------------------------- + Aggregate -> Gather Motion 3:1 (slice1; segments: 3) - -> Partial Aggregate - -> Bitmap Heap Scan on testjsonb - Recheck Cond: (j @@ '($."wait" == null)'::jsonpath) - -> Bitmap Index Scan on jidx - Index Cond: (j @@ '($."wait" == null)'::jsonpath) + -> Bitmap Heap Scan on testjsonb + Recheck Cond: (j @@ '($."wait" == null)'::jsonpath) + -> Bitmap Index Scan on jidx + Index Cond: (j @@ '($."wait" == null)'::jsonpath) Optimizer: Pivotal Optimizer (GPORCA) -(8 rows) +(7 rows) SELECT count(*) FROM testjsonb WHERE j @@ '$.wait == null'; count @@ -3131,17 +3169,16 @@ SELECT count(*) FROM testjsonb WHERE j @@ 'exists($.public) && exists($.disabled EXPLAIN (COSTS OFF) SELECT count(*) FROM testjsonb WHERE j @? '$.wait ? (@ == null)'; - QUERY PLAN -------------------------------------------------------------------------------- - Finalize Aggregate + QUERY PLAN +------------------------------------------------------------------------ + Aggregate -> Gather Motion 3:1 (slice1; segments: 3) - -> Partial Aggregate - -> Bitmap Heap Scan on testjsonb - Recheck Cond: (j @? '$."wait"?(@ == null)'::jsonpath) - -> Bitmap Index Scan on jidx - Index Cond: (j @? '$."wait"?(@ == null)'::jsonpath) + -> Bitmap Heap Scan on testjsonb + Recheck Cond: (j @? '$."wait"?(@ == null)'::jsonpath) + -> Bitmap Index Scan on jidx + Index Cond: (j @? '$."wait"?(@ == null)'::jsonpath) Optimizer: Pivotal Optimizer (GPORCA) -(8 rows) +(7 rows) SELECT count(*) FROM testjsonb WHERE j @? '$.wait ? (@ == null)'; count @@ -3440,17 +3477,16 @@ SELECT count(*) FROM testjsonb WHERE j @@ 'exists($)'; EXPLAIN (COSTS OFF) SELECT count(*) FROM testjsonb WHERE j @? '$.wait ? (@ == null)'; - QUERY PLAN -------------------------------------------------------------------------------- - Finalize Aggregate + QUERY PLAN +------------------------------------------------------------------------ + Aggregate -> Gather Motion 3:1 (slice1; segments: 3) - -> Partial Aggregate - -> Bitmap Heap Scan on testjsonb - Recheck Cond: (j @? '$."wait"?(@ == null)'::jsonpath) - -> Bitmap Index Scan on jidx - Index Cond: (j @? '$."wait"?(@ == null)'::jsonpath) + -> Bitmap Heap Scan on testjsonb + Recheck Cond: (j @? '$."wait"?(@ == null)'::jsonpath) + -> Bitmap Index Scan on jidx + Index Cond: (j @? '$."wait"?(@ == null)'::jsonpath) Optimizer: Pivotal Optimizer (GPORCA) -(8 rows) +(7 rows) SELECT count(*) FROM testjsonb WHERE j @? '$.wait ? (@ == null)'; count @@ -5209,6 +5245,30 @@ DETAIL: The path assumes key is a composite object, but it is a scalar value. update test_jsonb_subscript set test_json[0][0] = '1'; ERROR: cannot replace existing key DETAIL: The path assumes key is a composite object, but it is a scalar value. +-- try some things with short-header and toasted subscript values +drop table test_jsonb_subscript; +create temp table test_jsonb_subscript ( + id text, + test_json jsonb +); +insert into test_jsonb_subscript values('foo', '{"foo": "bar"}'); +insert into test_jsonb_subscript + select s, ('{"' || s || '": "bar"}')::jsonb from repeat('xyzzy', 500) s; +select length(id), test_json[id] from test_jsonb_subscript; + length | test_json +--------+----------- + 3 | "bar" + 2500 | "bar" +(2 rows) + +update test_jsonb_subscript set test_json[id] = '"baz"'; +select length(id), test_json[id] from test_jsonb_subscript; + length | test_json +--------+----------- + 3 | "baz" + 2500 | "baz" +(2 rows) + -- jsonb to tsvector select to_tsvector('{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::jsonb); to_tsvector diff --git a/contrib/pax_storage/src/test/regress/expected/limit_optimizer.out b/contrib/pax_storage/src/test/regress/expected/limit_optimizer.out index b8562703a6c..3dbcf28dccc 100644 --- a/contrib/pax_storage/src/test/regress/expected/limit_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/limit_optimizer.out @@ -131,7 +131,7 @@ select * from int8_tbl offset (case when random() < 0.5 then null::bigint end); (5 rows) -- Test assorted cases involving backwards fetch from a LIMIT plan node --- Disable backward scan test which is not supported in this version of Cloudberry Database +-- Disable backward scan test which is not supported in this version of Apache Cloudberry --start_ignore /* * begin; @@ -478,10 +478,10 @@ CREATE VIEW limit_thousand_v_1 AS SELECT thousand FROM onek WHERE thousand < 995 ----------+---------+-----------+----------+---------+---------+------------- thousand | integer | | | | plain | View definition: - SELECT onek.thousand + SELECT thousand FROM onek - WHERE onek.thousand < 995 - ORDER BY onek.thousand + WHERE thousand < 995 + ORDER BY thousand OFFSET 10 FETCH FIRST 5 ROWS WITH TIES; @@ -493,10 +493,10 @@ CREATE VIEW limit_thousand_v_2 AS SELECT thousand FROM onek WHERE thousand < 995 ----------+---------+-----------+----------+---------+---------+------------- thousand | integer | | | | plain | View definition: - SELECT onek.thousand + SELECT thousand FROM onek - WHERE onek.thousand < 995 - ORDER BY onek.thousand + WHERE thousand < 995 + ORDER BY thousand OFFSET 10 LIMIT 5; @@ -511,10 +511,10 @@ CREATE VIEW limit_thousand_v_3 AS SELECT thousand FROM onek WHERE thousand < 995 ----------+---------+-----------+----------+---------+---------+------------- thousand | integer | | | | plain | View definition: - SELECT onek.thousand + SELECT thousand FROM onek - WHERE onek.thousand < 995 - ORDER BY onek.thousand + WHERE thousand < 995 + ORDER BY thousand FETCH FIRST (NULL::integer + 1) ROWS WITH TIES; CREATE VIEW limit_thousand_v_4 AS SELECT thousand FROM onek WHERE thousand < 995 @@ -525,10 +525,10 @@ CREATE VIEW limit_thousand_v_4 AS SELECT thousand FROM onek WHERE thousand < 995 ----------+---------+-----------+----------+---------+---------+------------- thousand | integer | | | | plain | View definition: - SELECT onek.thousand + SELECT thousand FROM onek - WHERE onek.thousand < 995 - ORDER BY onek.thousand + WHERE thousand < 995 + ORDER BY thousand LIMIT ALL; -- leave these views diff --git a/contrib/pax_storage/src/test/regress/expected/matview_optimizer.out b/contrib/pax_storage/src/test/regress/expected/matview_optimizer.out index 503ef25bffa..47a62c9473d 100644 --- a/contrib/pax_storage/src/test/regress/expected/matview_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/matview_optimizer.out @@ -19,7 +19,7 @@ SELECT * FROM mvtest_tv ORDER BY type; -- create a materialized view with no data, and confirm correct behavior EXPLAIN (costs off) - CREATE MATERIALIZED VIEW mvtest_tm AS SELECT type, sum(amt) AS totamt FROM mvtest_t GROUP BY type WITH NO DATA distributed by(type); + CREATE MATERIALIZED VIEW IF NOT EXISTS mvtest_tm AS SELECT type, sum(amt) AS totamt FROM mvtest_t GROUP BY type WITH NO DATA distributed by(type); QUERY PLAN ------------------------------------------------------------ GroupAggregate @@ -32,7 +32,7 @@ EXPLAIN (costs off) Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0 (8 rows) -CREATE MATERIALIZED VIEW mvtest_tm AS SELECT type, sum(amt) AS totamt FROM mvtest_t GROUP BY type WITH NO DATA distributed by(type); +CREATE MATERIALIZED VIEW IF NOT EXISTS mvtest_tm AS SELECT type, sum(amt) AS totamt FROM mvtest_t GROUP BY type WITH NO DATA distributed by(type); SELECT relispopulated FROM pg_class WHERE oid = 'mvtest_tm'::regclass; relispopulated ---------------- @@ -125,10 +125,10 @@ CREATE INDEX mvtest_aa ON mvtest_bb (grandtot); type | text | | | | extended | | totamt | numeric | | | | main | | View definition: - SELECT mvtest_tv.type, - mvtest_tv.totamt + SELECT type, + totamt FROM mvtest_tv - ORDER BY mvtest_tv.type; + ORDER BY type; Distributed randomly \d+ mvtest_tvm @@ -138,10 +138,10 @@ Distributed randomly type | text | | | | extended | | totamt | numeric | | | | main | | View definition: - SELECT mvtest_tv.type, - mvtest_tv.totamt + SELECT type, + totamt FROM mvtest_tv - ORDER BY mvtest_tv.type; + ORDER BY type; Distributed randomly \d+ mvtest_tvvm @@ -150,7 +150,7 @@ Distributed randomly ----------+---------+-----------+----------+---------+---------+--------------+------------- grandtot | numeric | | | | main | | View definition: - SELECT mvtest_tvv.grandtot + SELECT grandtot FROM mvtest_tvv; Distributed randomly @@ -162,7 +162,7 @@ Distributed randomly Indexes: "mvtest_aa" btree (grandtot) View definition: - SELECT mvtest_tvvmv.grandtot + SELECT grandtot FROM mvtest_tvvmv; Distributed randomly @@ -178,7 +178,7 @@ ALTER MATERIALIZED VIEW mvtest_tvm SET SCHEMA mvtest_mvschema; Indexes: "mvtest_tvmm_pred" UNIQUE, btree (grandtot) WHERE grandtot < 0::numeric View definition: - SELECT sum(mvtest_tvm.totamt) AS grandtot + SELECT sum(totamt) AS grandtot FROM mvtest_mvschema.mvtest_tvm; Distributed by: (grandtot) @@ -190,10 +190,10 @@ SET search_path = mvtest_mvschema, public; type | text | | | | extended | | totamt | numeric | | | | main | | View definition: - SELECT mvtest_tv.type, - mvtest_tv.totamt + SELECT type, + totamt FROM mvtest_tv - ORDER BY mvtest_tv.type; + ORDER BY type; Distributed randomly -- modify the underlying table data @@ -531,6 +531,20 @@ DETAIL: drop cascades to materialized view mvtest_mv_v drop cascades to materialized view mvtest_mv_v_2 drop cascades to materialized view mvtest_mv_v_3 drop cascades to materialized view mvtest_mv_v_4 +-- Check that CREATE IF NOT EXISTS accept DISTRIBUTED BY +CREATE MATERIALIZED VIEW IF NOT EXISTS mv_ine_distr (a, b) AS + SELECT generate_series(1, 10) a, generate_series(1, 10) b DISTRIBUTED BY (b); +\d+ mv_ine_distr + Materialized view "public.mv_ine_distr" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+---------+--------------+------------- + a | integer | | | | plain | | + b | integer | | | | plain | | +View definition: + SELECT generate_series(1, 10) AS a, + generate_series(1, 10) AS b; + +DROP MATERIALIZED VIEW mv_ine_distr; -- Check that unknown literals are converted to "text" in CREATE MATVIEW, -- so that we don't end up with unknown-type columns. CREATE MATERIALIZED VIEW mv_unspecified_types AS @@ -598,10 +612,10 @@ SET ROLE regress_user_mvtest; -- duplicate all the aliases used in those queries CREATE TABLE mvtest_foo_data AS SELECT i, i+1 AS tid, - md5(random()::text) AS mv, - md5(random()::text) AS newdata, - md5(random()::text) AS newdata2, - md5(random()::text) AS diff + fipshash(random()::text) AS mv, + fipshash(random()::text) AS newdata, + fipshash(random()::text) AS newdata2, + fipshash(random()::text) AS diff FROM generate_series(1, 10) i; CREATE MATERIALIZED VIEW mvtest_mv_foo AS SELECT * FROM mvtest_foo_data distributed by(i); CREATE MATERIALIZED VIEW mvtest_mv_foo AS SELECT * FROM mvtest_foo_data distributed by(i); @@ -614,6 +628,26 @@ REFRESH MATERIALIZED VIEW mvtest_mv_foo; REFRESH MATERIALIZED VIEW CONCURRENTLY mvtest_mv_foo; DROP OWNED BY regress_user_mvtest CASCADE; DROP ROLE regress_user_mvtest; +-- Concurrent refresh requires a unique index on the materialized +-- view. Test what happens if it's dropped during the refresh. +CREATE OR REPLACE FUNCTION mvtest_drop_the_index() + RETURNS bool AS $$ +BEGIN + EXECUTE 'DROP INDEX IF EXISTS mvtest_drop_idx'; + RETURN true; +END; +$$ LANGUAGE plpgsql; +CREATE MATERIALIZED VIEW drop_idx_matview AS + SELECT 1 as i WHERE mvtest_drop_the_index(); +ERROR: function cannot execute on a QE slice because it issues a non-SELECT statement +CONTEXT: SQL statement "DROP INDEX IF EXISTS mvtest_drop_idx" +PL/pgSQL function mvtest_drop_the_index() line 3 at EXECUTE +CREATE UNIQUE INDEX mvtest_drop_idx ON drop_idx_matview (i); +ERROR: relation "drop_idx_matview" does not exist +REFRESH MATERIALIZED VIEW CONCURRENTLY drop_idx_matview; +ERROR: relation "drop_idx_matview" does not exist +DROP MATERIALIZED VIEW drop_idx_matview; -- clean up +ERROR: materialized view "drop_idx_matview" does not exist -- make sure that create WITH NO DATA works via SPI BEGIN; CREATE FUNCTION mvtest_func() @@ -749,3 +783,99 @@ NOTICE: relation "matview_ine_tab" already exists, skipping (0 rows) DROP MATERIALIZED VIEW matview_ine_tab; +-- test REFRESH fast path +create materialized view mv_fast as select * from mvtest_t; +set gp_enable_refresh_fast_path = off; +select relfilenode into temp mv_fast_relfilenode_0 from pg_class where oid = 'mv_fast'::regclass::oid; +refresh materialized view mv_fast; +select relfilenode into temp mv_fast_relfilenode_1 from pg_class where oid = 'mv_fast'::regclass::oid; +-- shoule be 0 +select count(*) from mv_fast_relfilenode_0 natural join mv_fast_relfilenode_1; + count +------- + 0 +(1 row) + +-- relfilenode should not be changed then. +set gp_enable_refresh_fast_path = on; +refresh materialized view mv_fast; +select relfilenode into temp mv_fast_relfilenode_2 from pg_class where oid = 'mv_fast'::regclass::oid; +-- shoule be 1 +select count(*) from mv_fast_relfilenode_1 natural join mv_fast_relfilenode_2; + count +------- + 1 +(1 row) + +reset gp_enable_refresh_fast_path; +drop materialized view mv_fast; +-- test REFRESH MATERIALIZED VIEW with 'WITH NO DATA' option can be executed immediately. +DROP TABLE IF EXISTS mvtest_twn; +CREATE TABLE mvtest_twn(a int); +CREATE MATERIALIZED VIEW mat_view_twn as SELECT a.a as p, b.a as q, c.a as x, d.a as y FROM mvtest_twn a, mvtest_twn b, mvtest_twn c, mvtest_twn d; +INSERT INTO mvtest_twn SELECT i FROM generate_series(1,10000)i; +-- t1 contains 10000 tuples, after cross join it four times, the output is much too huge +-- refresh with 'no data' should not actually execute the sql +set statement_timeout = 5000; +REFRESH MATERIALIZED VIEW mat_view_twn WITH NO DATA; +reset statement_timeout; +SELECT relispopulated FROM pg_class WHERE oid = 'mat_view_twn'::regclass; + relispopulated +---------------- + f +(1 row) + +SELECT relispopulated FROM gp_dist_random('pg_class') WHERE oid = 'mat_view_twn'::regclass; + relispopulated +---------------- + f + f + f +(3 rows) + +SELECT * FROM mat_view_twn; +ERROR: materialized view "mat_view_twn" has not been populated +HINT: Use the REFRESH MATERIALIZED VIEW command. +DROP MATERIALIZED VIEW mat_view_twn; +DROP TABLE mvtest_twn; +-- +-- https://github.com/apache/cloudberry/issues/865 +-- +set default_table_access_method TO AO_ROW; +CREATE TABLE t_issue_865_ao +( + id bigint NOT NULL, + user_id bigint +); +insert into t_issue_865_ao values (1, 1), (2, 1), (3, 2), (4, 2), (5, 3), (6, 3), (7, 4), (8, 4), (9, 5), (10, 5); +CREATE MATERIALIZED VIEW matview_issue_865_ao AS SELECT * FROM t_issue_865_ao WHERE id < 6; +CREATE INDEX idx_matview_issue_865_ao ON matview_issue_865_ao USING btree (user_id); +BEGIN; +UPDATE t_issue_865_ao SET id = id WHERE id = 1; +UPDATE t_issue_865_ao SET id = id WHERE id = 2; +UPDATE t_issue_865_ao SET id = id WHERE id = 3; +COMMIT; +VACUUM t_issue_865_ao; +REFRESH MATERIALIZED VIEW matview_issue_865_ao; +-- AOCS +set default_table_access_method TO AO_COLUMN; +CREATE TABLE t_issue_865_aocs +( + id bigint NOT NULL, + user_id bigint +); +insert into t_issue_865_aocs values (1, 1), (2, 1), (3, 2), (4, 2), (5, 3), (6, 3), (7, 4), (8, 4), (9, 5), (10, 5); +CREATE MATERIALIZED VIEW matview_issue_865_aocs AS SELECT * FROM t_issue_865_aocs WHERE id < 6; +CREATE INDEX idx_matview_issue_865_aocs ON matview_issue_865_aocs USING btree (user_id); +BEGIN; +UPDATE t_issue_865_aocs SET id = id WHERE id = 1; +UPDATE t_issue_865_aocs SET id = id WHERE id = 2; +UPDATE t_issue_865_aocs SET id = id WHERE id = 3; +COMMIT; +VACUUM t_issue_865_aocs; +REFRESH MATERIALIZED VIEW matview_issue_865_aocs; +RESET default_table_access_method; +DROP TABLE t_issue_865_ao CASCADE; +NOTICE: drop cascades to materialized view matview_issue_865_ao +DROP TABLE t_issue_865_aocs CASCADE; +NOTICE: drop cascades to materialized view matview_issue_865_aocs diff --git a/contrib/pax_storage/src/test/regress/expected/memoize_optimizer.out b/contrib/pax_storage/src/test/regress/expected/memoize_optimizer.out index 2e380ad2a98..a082828630c 100644 --- a/contrib/pax_storage/src/test/regress/expected/memoize_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/memoize_optimizer.out @@ -6,12 +6,6 @@ -- machines. Let's just replace the number with an 'N'. In order to allow us -- to perform validation when the measure was zero, we replace a zero value -- with "Zero". All other numbers are replaced with 'N'. --- start_ignore --- m/Extra Text: \(seg\d+\)/ --- s/Extra Text: \(seg\d+\)/Extra Text: ###/ --- m/Buckets: \d+/ --- s/Buckets: \d+/Buckets: ###/ --- end_ignore create function explain_memoize(query text, hide_hitmiss bool) returns setof text language plpgsql as $$ @@ -39,30 +33,31 @@ begin end; $$; -- Ensure we get a memoize node on the inner side of the nested loop +SET optimizer_enable_hashjoin TO off; +SET optimizer_enable_bitmapscan TO off; SET enable_hashjoin TO off; SET enable_bitmapscan TO off; SELECT explain_memoize(' SELECT COUNT(*),AVG(t1.unique1) FROM tenk1 t1 INNER JOIN tenk1 t2 ON t1.unique1 = t2.twenty WHERE t2.unique1 < 1000;', false); - explain_memoize ---------------------------------------------------------------------------------------------------------- + explain_memoize +-------------------------------------------------------------------------------------------------- Finalize Aggregate (actual rows=1 loops=N) -> Gather Motion 3:1 (slice1; segments: 3) (actual rows=3 loops=N) -> Partial Aggregate (actual rows=1 loops=N) - -> Hash Join (actual rows=400 loops=N) - Hash Cond: (t1.unique1 = t2.twenty) - Extra Text: (seg0) Hash chain length 50.0 avg, 50 max, using 8 of 524288 buckets. - -> Seq Scan on tenk1 t1 (actual rows=3386 loops=N) - -> Hash (actual rows=400 loops=N) - Buckets: 524288 Batches: 1 Memory Usage: NkB - -> Redistribute Motion 3:3 (slice2; segments: 3) (actual rows=400 loops=N) - Hash Key: t2.twenty - -> Seq Scan on tenk1 t2 (actual rows=340 loops=N) - Filter: (unique1 < 1000) - Rows Removed by Filter: 2906 + -> Nested Loop (actual rows=400 loops=N) + Join Filter: true + -> Redistribute Motion 3:3 (slice2; segments: 3) (actual rows=400 loops=N) + Hash Key: t2.twenty + -> Seq Scan on tenk1 t2 (actual rows=340 loops=N) + Filter: (unique1 < 1000) + Rows Removed by Filter: 2906 + -> Index Only Scan using tenk1_unique1 on tenk1 t1 (actual rows=1 loops=N) + Index Cond: (unique1 = t2.twenty) + Heap Fetches: N Optimizer: GPORCA -(15 rows) +(14 rows) -- And check we get the expected results. SELECT COUNT(*),AVG(t1.unique1) FROM tenk1 t1 @@ -76,40 +71,76 @@ WHERE t2.unique1 < 1000; -- Try with LATERAL joins SELECT explain_memoize(' SELECT COUNT(*),AVG(t2.unique1) FROM tenk1 t1, -LATERAL (SELECT t2.unique1 FROM tenk1 t2 WHERE t1.twenty = t2.unique1) t2 +LATERAL (SELECT t2.unique1 FROM tenk1 t2 + WHERE t1.twenty = t2.unique1 OFFSET 0) t2 WHERE t1.unique1 < 1000;', false); - explain_memoize --------------------------------------------------------------------------------------------------------- - Finalize Aggregate (actual rows=1 loops=N) - -> Gather Motion 3:1 (slice1; segments: 3) (actual rows=3 loops=N) - -> Partial Aggregate (actual rows=1 loops=N) - -> Merge Join (actual rows=400 loops=N) - Merge Cond: (t2.unique1 = t1.twenty) - -> Index Only Scan using tenk1_unique1 on tenk1 t2 (actual rows=9 loops=N) - Heap Fetches: N - -> Sort (actual rows=400 loops=N) - Sort Key: t1.twenty - Sort Method: quicksort Memory: NkB - -> Redistribute Motion 3:3 (slice2; segments: 3) (actual rows=400 loops=N) - Hash Key: t1.twenty - -> Seq Scan on tenk1 t1 (actual rows=340 loops=N) - Filter: (unique1 < 1000) - Rows Removed by Filter: 2906 + explain_memoize +---------------------------------------------------------------------------------------------------------- + Aggregate (actual rows=1 loops=N) + -> Nested Loop (actual rows=1000 loops=N) + -> Gather Motion 3:1 (slice1; segments: 3) (actual rows=1000 loops=N) + -> Seq Scan on tenk1 t1 (actual rows=340 loops=N) + Filter: (unique1 < 1000) + Rows Removed by Filter: 2906 + -> Materialize (actual rows=1 loops=N) + -> Memoize (actual rows=1 loops=N) + Cache Key: t1.twenty + Cache Mode: binary + Hits: 980 Misses: 20 Evictions: Zero Overflows: 0 Memory Usage: NkB + -> Result (actual rows=1 loops=N) + Filter: (t1.twenty = t2.unique1) + -> Materialize (actual rows=10000 loops=N) + -> Gather Motion 3:1 (slice2; segments: 3) (actual rows=10000 loops=N) + -> Seq Scan on tenk1 t2 (actual rows=3386 loops=N) Optimizer: Postgres query optimizer -(16 rows) +(17 rows) -- And check we get the expected results. SELECT COUNT(*),AVG(t2.unique1) FROM tenk1 t1, -LATERAL (SELECT t2.unique1 FROM tenk1 t2 WHERE t1.twenty = t2.unique1) t2 +LATERAL (SELECT t2.unique1 FROM tenk1 t2 + WHERE t1.twenty = t2.unique1 OFFSET 0) t2 WHERE t1.unique1 < 1000; count | avg -------+-------------------- 1000 | 9.5000000000000000 (1 row) --- Reduce work_mem so that we see some cache evictions -SET work_mem TO '64kB'; SET enable_mergejoin TO off; +-- Test for varlena datatype with expr evaluation +CREATE TABLE expr_key (x numeric, t text); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +INSERT INTO expr_key (x, t) +SELECT d1::numeric, d1::text FROM ( + SELECT round((d / pi())::numeric, 7) AS d1 FROM generate_series(1, 20) AS d +) t; +-- duplicate rows so we get some cache hits +INSERT INTO expr_key SELECT * FROM expr_key; +CREATE INDEX expr_key_idx_x_t ON expr_key (x, t); +VACUUM ANALYZE expr_key; +-- Ensure we get we get a cache miss and hit for each of the 20 distinct values +SELECT explain_memoize(' +SELECT * FROM expr_key t1 INNER JOIN expr_key t2 +ON t1.x = t2.t::numeric AND t1.t::numeric = t2.x;', false); + explain_memoize +------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (actual rows=80 loops=N) + -> Nested Loop (actual rows=28 loops=N) + Join Filter: true + -> Redistribute Motion 3:3 (slice2; segments: 3) (actual rows=14 loops=N) + Hash Key: (t1.t)::numeric + -> Seq Scan on expr_key t1 (actual rows=14 loops=N) + -> Index Only Scan using expr_key_idx_x_t on expr_key t2 (actual rows=2 loops=N) + Index Cond: (x = (t1.t)::numeric) + Filter: (t1.x = (t)::numeric) + Heap Fetches: N + Optimizer: GPORCA +(11 rows) + +DROP TABLE expr_key; +-- Reduce work_mem and hash_mem_multiplier so that we see some cache evictions +SET work_mem TO '64kB'; +SET hash_mem_multiplier TO 1.0; -- Ensure we get some evictions. We're unable to validate the hits and misses -- here as the number of entries that fit in the cache at once will vary -- between different machines. @@ -117,24 +148,23 @@ SELECT explain_memoize(' SELECT COUNT(*),AVG(t1.unique1) FROM tenk1 t1 INNER JOIN tenk1 t2 ON t1.unique1 = t2.thousand WHERE t2.unique1 < 1200;', true); - explain_memoize ---------------------------------------------------------------------------------------------------------- + explain_memoize +-------------------------------------------------------------------------------------------------- Finalize Aggregate (actual rows=1 loops=N) -> Gather Motion 3:1 (slice1; segments: 3) (actual rows=3 loops=N) -> Partial Aggregate (actual rows=1 loops=N) - -> Hash Join (actual rows=407 loops=N) - Hash Cond: (t1.unique1 = t2.thousand) - Extra Text: (seg0) Hash chain length 1.2 avg, 2 max, using 337 of 524288 buckets. - -> Seq Scan on tenk1 t1 (actual rows=3386 loops=N) - -> Hash (actual rows=407 loops=N) - Buckets: 524288 Batches: 1 Memory Usage: NkB - -> Redistribute Motion 3:3 (slice2; segments: 3) (actual rows=407 loops=N) - Hash Key: t2.thousand - -> Seq Scan on tenk1 t2 (actual rows=407 loops=N) - Filter: (unique1 < 1200) - Rows Removed by Filter: 2961 + -> Nested Loop (actual rows=407 loops=N) + Join Filter: true + -> Redistribute Motion 3:3 (slice2; segments: 3) (actual rows=407 loops=N) + Hash Key: t2.thousand + -> Seq Scan on tenk1 t2 (actual rows=407 loops=N) + Filter: (unique1 < 1200) + Rows Removed by Filter: 2961 + -> Index Only Scan using tenk1_unique1 on tenk1 t1 (actual rows=1 loops=N) + Index Cond: (unique1 = t2.thousand) + Heap Fetches: N Optimizer: GPORCA -(15 rows) +(14 rows) CREATE TABLE flt (f float); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'f' as the Apache Cloudberry data distribution key for this table. @@ -182,7 +212,7 @@ NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'n' as HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE INDEX strtest_n_idx ON strtest (n); CREATE INDEX strtest_t_idx ON strtest (t); -INSERT INTO strtest VALUES('one','one'),('two','two'),('three',repeat(md5('three'),100)); +INSERT INTO strtest VALUES('one','one'),('two','two'),('three',repeat(fipshash('three'),100)); -- duplicate rows so we get some cache hits INSERT INTO strtest SELECT * FROM strtest; ANALYZE strtest; @@ -216,6 +246,53 @@ SELECT * FROM strtest s1 INNER JOIN strtest s2 ON s1.t >= s2.t;', false); (8 rows) DROP TABLE strtest; +-- Ensure memoize works with partitionwise join +SET enable_partitionwise_join TO on; +CREATE TABLE prt (a int) PARTITION BY RANGE(a); +CREATE TABLE prt_p1 PARTITION OF prt FOR VALUES FROM (0) TO (10); +CREATE TABLE prt_p2 PARTITION OF prt FOR VALUES FROM (10) TO (20); +INSERT INTO prt VALUES (0), (0), (0), (0); +INSERT INTO prt VALUES (10), (10), (10), (10); +CREATE INDEX iprt_p1_a ON prt_p1 (a); +CREATE INDEX iprt_p2_a ON prt_p2 (a); +ANALYZE prt; +SELECT explain_memoize(' +SELECT * FROM prt t1 INNER JOIN prt t2 ON t1.a = t2.a;', false); + explain_memoize +----------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (actual rows=32 loops=N) + -> Nested Loop (actual rows=16 loops=N) + Join Filter: (t1.a = t2.a) + -> Dynamic Seq Scan on prt t1 (actual rows=4 loops=N) + Number of partitions to scan: 2 (out of 2) + Partitions scanned: Avg 2.0 x 3 workers. Max 2 parts (seg0). + -> Dynamic Seq Scan on prt t2 (actual rows=3 loops=N) + Number of partitions to scan: 2 (out of 2) + Partitions scanned: Avg 1.4 x 3 workers of 5 scans. Max 2 parts (seg2). +(10 rows) + +-- Ensure memoize works with parameterized union-all Append path +SET enable_partitionwise_join TO off; +SELECT explain_memoize(' +SELECT * FROM prt_p1 t1 INNER JOIN +(SELECT * FROM prt_p1 UNION ALL SELECT * FROM prt_p2) t2 +ON t1.a = t2.a;', false); + explain_memoize +---------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (actual rows=16 loops=N) + -> Nested Loop (actual rows=16 loops=N) + Join Filter: true + -> Append (actual rows=4 loops=N) + -> Seq Scan on prt_p1 (actual rows=4 loops=N) + -> Seq Scan on prt_p2 (actual rows=4 loops=N) + -> Index Only Scan using iprt_p1_a on prt_p1 t1 (actual rows=4 loops=N) + Index Cond: (a = prt_p1.a) + Heap Fetches: N + Optimizer: GPORCA +(10 rows) + +DROP TABLE prt; +RESET enable_partitionwise_join; -- Exercise Memoize code that flushes the cache when a parameter changes which -- is not part of the cache key. -- Ensure we get a Memoize plan @@ -226,31 +303,32 @@ WHERE unique1 < 3 SELECT 1 FROM tenk1 t1 INNER JOIN tenk1 t2 ON t1.unique1 = t2.hundred WHERE t0.ten = t1.twenty AND t0.two <> t2.four OFFSET 0); - QUERY PLAN ------------------------------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) - -> Hash Semi Join - Hash Cond: (t0.ten = t1.twenty) - Join Filter: (t0.two <> t2.four) - -> Redistribute Motion 3:3 (slice2; segments: 3) - Hash Key: t0.ten - -> Bitmap Heap Scan on tenk1 t0 - Recheck Cond: (unique1 < 3) - -> Bitmap Index Scan on tenk1_unique1 - Index Cond: (unique1 < 3) - -> Hash - -> Result - -> Redistribute Motion 3:3 (slice3; segments: 3) - Hash Key: t1.twenty - -> Hash Join - Hash Cond: (t1.unique1 = t2.hundred) - -> Seq Scan on tenk1 t1 - -> Hash - -> Redistribute Motion 3:3 (slice4; segments: 3) - Hash Key: t2.hundred - -> Seq Scan on tenk1 t2 + -> GroupAggregate + Group Key: t0.unique1, t0.ctid, t0.gp_segment_id + -> Sort + Sort Key: t0.ctid, t0.gp_segment_id + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: t0.ctid, t0.gp_segment_id + -> Streaming HashAggregate + Group Key: t0.unique1, t0.ctid, t0.gp_segment_id + -> Nested Loop + Join Filter: ((t0.ten = t1.twenty) AND (t0.two <> t2.four)) + -> Broadcast Motion 3:3 (slice4; segments: 3) + -> Seq Scan on tenk1 t0 + Filter: (unique1 < 3) + -> Materialize + -> Nested Loop + Join Filter: (t1.unique1 = t2.hundred) + -> Seq Scan on tenk1 t1 + -> Materialize + -> Redistribute Motion 3:3 (slice3; segments: 3) + Hash Key: t2.hundred + -> Seq Scan on tenk1 t2 Optimizer: GPORCA -(22 rows) +(23 rows) -- Ensure the above query returns the correct result SELECT unique1 FROM tenk1 t0 @@ -269,6 +347,9 @@ RESET enable_mergejoin; RESET work_mem; RESET enable_bitmapscan; RESET enable_hashjoin; +RESET optimizer_enable_hashjoin; +RESET optimizer_enable_bitmapscan; +RESET hash_mem_multiplier; -- Test parallel plans with Memoize SET min_parallel_table_scan_size TO 0; SET parallel_setup_cost TO 0; diff --git a/contrib/pax_storage/src/test/regress/expected/misc_functions_optimizer.out b/contrib/pax_storage/src/test/regress/expected/misc_functions_optimizer.out index 8cdcba95fb1..b46c7055fa3 100644 --- a/contrib/pax_storage/src/test/regress/expected/misc_functions_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/misc_functions_optimizer.out @@ -1,3 +1,7 @@ +-- directory paths and dlsuffix are passed to us in environment variables +\getenv libdir PG_LIBDIR +\getenv dlsuffix PG_DLSUFFIX +\set regresslib :libdir '/regress' :dlsuffix -- -- num_nulls() -- @@ -135,19 +139,195 @@ ERROR: function num_nulls() does not exist LINE 1: SELECT num_nulls(); ^ HINT: No function matches the given name and argument types. You might need to add explicit type casts. +-- +-- canonicalize_path() +-- +CREATE FUNCTION test_canonicalize_path(text) + RETURNS text + AS :'regresslib' + LANGUAGE C STRICT IMMUTABLE; +SELECT test_canonicalize_path('/'); + test_canonicalize_path +------------------------ + / +(1 row) + +SELECT test_canonicalize_path('/./abc/def/'); + test_canonicalize_path +------------------------ + /abc/def +(1 row) + +SELECT test_canonicalize_path('/./../abc/def'); + test_canonicalize_path +------------------------ + /abc/def +(1 row) + +SELECT test_canonicalize_path('/./../../abc/def/'); + test_canonicalize_path +------------------------ + /abc/def +(1 row) + +SELECT test_canonicalize_path('/abc/.././def/ghi'); + test_canonicalize_path +------------------------ + /def/ghi +(1 row) + +SELECT test_canonicalize_path('/abc/./../def/ghi//'); + test_canonicalize_path +------------------------ + /def/ghi +(1 row) + +SELECT test_canonicalize_path('/abc/def/../..'); + test_canonicalize_path +------------------------ + / +(1 row) + +SELECT test_canonicalize_path('/abc/def/../../..'); + test_canonicalize_path +------------------------ + / +(1 row) + +SELECT test_canonicalize_path('/abc/def/../../../../ghi/jkl'); + test_canonicalize_path +------------------------ + /ghi/jkl +(1 row) + +SELECT test_canonicalize_path('.'); + test_canonicalize_path +------------------------ + . +(1 row) + +SELECT test_canonicalize_path('./'); + test_canonicalize_path +------------------------ + . +(1 row) + +SELECT test_canonicalize_path('./abc/..'); + test_canonicalize_path +------------------------ + . +(1 row) + +SELECT test_canonicalize_path('abc/../'); + test_canonicalize_path +------------------------ + . +(1 row) + +SELECT test_canonicalize_path('abc/../def'); + test_canonicalize_path +------------------------ + def +(1 row) + +SELECT test_canonicalize_path('..'); + test_canonicalize_path +------------------------ + .. +(1 row) + +SELECT test_canonicalize_path('../abc/def'); + test_canonicalize_path +------------------------ + ../abc/def +(1 row) + +SELECT test_canonicalize_path('../abc/..'); + test_canonicalize_path +------------------------ + .. +(1 row) + +SELECT test_canonicalize_path('../abc/../def'); + test_canonicalize_path +------------------------ + ../def +(1 row) + +SELECT test_canonicalize_path('../abc/../../def/ghi'); + test_canonicalize_path +------------------------ + ../../def/ghi +(1 row) + +SELECT test_canonicalize_path('./abc/./def/.'); + test_canonicalize_path +------------------------ + abc/def +(1 row) + +SELECT test_canonicalize_path('./abc/././def/.'); + test_canonicalize_path +------------------------ + abc/def +(1 row) + +SELECT test_canonicalize_path('./abc/./def/.././ghi/../../../jkl/mno'); + test_canonicalize_path +------------------------ + ../jkl/mno +(1 row) + -- -- pg_log_backend_memory_contexts() -- -- Memory contexts are logged and they are not returned to the function. -- Furthermore, their contents can vary depending on the timing. However, --- we can at least verify that the code doesn't fail. +-- we can at least verify that the code doesn't fail, and that the +-- permissions are set properly. -- -SELECT * FROM pg_log_backend_memory_contexts(pg_backend_pid()); +SELECT pg_log_backend_memory_contexts(pg_backend_pid()); pg_log_backend_memory_contexts -------------------------------- t (1 row) +SELECT pg_log_backend_memory_contexts(pid) FROM pg_stat_activity + WHERE backend_type = 'checkpointer'; + pg_log_backend_memory_contexts +-------------------------------- + t +(1 row) + +CREATE ROLE regress_log_memory; +NOTICE: resource queue required -- using default resource queue "pg_default" +SELECT has_function_privilege('regress_log_memory', + 'pg_log_backend_memory_contexts(integer)', 'EXECUTE'); -- no + has_function_privilege +------------------------ + f +(1 row) + +GRANT EXECUTE ON FUNCTION pg_log_backend_memory_contexts(integer) + TO regress_log_memory; +SELECT has_function_privilege('regress_log_memory', + 'pg_log_backend_memory_contexts(integer)', 'EXECUTE'); -- yes + has_function_privilege +------------------------ + t +(1 row) + +SET ROLE regress_log_memory; +SELECT pg_log_backend_memory_contexts(pg_backend_pid()); + pg_log_backend_memory_contexts +-------------------------------- + t +(1 row) + +RESET ROLE; +REVOKE EXECUTE ON FUNCTION pg_log_backend_memory_contexts(integer) + FROM regress_log_memory; +DROP ROLE regress_log_memory; -- -- Test some built-in SRFs -- @@ -195,18 +375,105 @@ select count(*) >= 0 as ok from pg_ls_archive_statusdir(); t (1 row) +-- pg_read_file() +select length(pg_read_file('postmaster.pid')) > 20; + ?column? +---------- + t +(1 row) + +select length(pg_read_file('postmaster.pid', 1, 20)); + length +-------- + 20 +(1 row) + +-- Test missing_ok +select pg_read_file('does not exist'); -- error +ERROR: could not open file "does not exist" for reading: No such file or directory +select pg_read_file('does not exist', true) IS NULL; -- ok + ?column? +---------- + t +(1 row) + +-- Test invalid argument +select pg_read_file('does not exist', 0, -1); -- error +ERROR: requested length cannot be negative +select pg_read_file('does not exist', 0, -1, true); -- error +ERROR: requested length cannot be negative +-- pg_read_binary_file() +select length(pg_read_binary_file('postmaster.pid')) > 20; + ?column? +---------- + t +(1 row) + +select length(pg_read_binary_file('postmaster.pid', 1, 20)); + length +-------- + 20 +(1 row) + +-- Test missing_ok +select pg_read_binary_file('does not exist'); -- error +ERROR: could not open file "does not exist" for reading: No such file or directory +select pg_read_binary_file('does not exist', true) IS NULL; -- ok + ?column? +---------- + t +(1 row) + +-- Test invalid argument +select pg_read_binary_file('does not exist', 0, -1); -- error +ERROR: requested length cannot be negative +select pg_read_binary_file('does not exist', 0, -1, true); -- error +ERROR: requested length cannot be negative +-- pg_stat_file() +select size > 20, isdir from pg_stat_file('postmaster.pid'); + ?column? | isdir +----------+------- + t | f +(1 row) + +-- pg_ls_dir() select * from (select pg_ls_dir('.') a) a where a = 'base' limit 1; a ------ base (1 row) +-- Test missing_ok (second argument) +select pg_ls_dir('does not exist', false, false); -- error +ERROR: could not open directory "does not exist": No such file or directory +select pg_ls_dir('does not exist', true, false); -- ok + pg_ls_dir +----------- +(0 rows) + +-- Test include_dot_dirs (third argument) +select count(*) = 1 as dot_found + from pg_ls_dir('.', false, true) as ls where ls = '.'; + dot_found +----------- + t +(1 row) + +select count(*) = 1 as dot_found + from pg_ls_dir('.', false, false) as ls where ls = '.'; + dot_found +----------- + f +(1 row) + +-- pg_timezone_names() select * from (select (pg_timezone_names()).name) ptn where name='UTC' limit 1; name ------ UTC (1 row) +-- pg_tablespace_databases() select count(*) > 0 from (select pg_tablespace_databases(oid) as pts from pg_tablespace where spcname = 'pg_default') pts @@ -216,6 +483,56 @@ select count(*) > 0 from t (1 row) +-- +-- Test replication slot directory functions +-- +CREATE ROLE regress_slot_dir_funcs; +-- Not available by default. +SELECT has_function_privilege('regress_slot_dir_funcs', + 'pg_ls_logicalsnapdir()', 'EXECUTE'); + has_function_privilege +------------------------ + f +(1 row) + +SELECT has_function_privilege('regress_slot_dir_funcs', + 'pg_ls_logicalmapdir()', 'EXECUTE'); + has_function_privilege +------------------------ + f +(1 row) + +SELECT has_function_privilege('regress_slot_dir_funcs', + 'pg_ls_replslotdir(text)', 'EXECUTE'); + has_function_privilege +------------------------ + f +(1 row) + +GRANT pg_monitor TO regress_slot_dir_funcs; +-- Role is now part of pg_monitor, so these are available. +SELECT has_function_privilege('regress_slot_dir_funcs', + 'pg_ls_logicalsnapdir()', 'EXECUTE'); + has_function_privilege +------------------------ + t +(1 row) + +SELECT has_function_privilege('regress_slot_dir_funcs', + 'pg_ls_logicalmapdir()', 'EXECUTE'); + has_function_privilege +------------------------ + t +(1 row) + +SELECT has_function_privilege('regress_slot_dir_funcs', + 'pg_ls_replslotdir(text)', 'EXECUTE'); + has_function_privilege +------------------------ + t +(1 row) + +DROP ROLE regress_slot_dir_funcs; -- -- Test adding a support function to a subject function -- @@ -239,6 +556,10 @@ WHERE my_int_eq(a.unique2, 42); (8 rows) -- With support function that knows it's int4eq, we get a different plan +CREATE FUNCTION test_support_func(internal) + RETURNS internal + AS :'regresslib', 'test_support_func' + LANGUAGE C STRICT; ALTER FUNCTION my_int_eq(int, int) SUPPORT test_support_func; EXPLAIN (COSTS OFF) SELECT * FROM tenk1 a JOIN tenk1 b ON a.unique1 = b.unique1 @@ -274,8 +595,8 @@ SELECT * FROM tenk1 a JOIN my_gen_series(1,1000) g ON a.unique1 = g; (7 rows) EXPLAIN (COSTS OFF) -SELECT * FROM tenk1 a JOIN my_gen_series(1,5) g ON a.unique1 = g; - QUERY PLAN +SELECT * FROM tenk1 a JOIN my_gen_series(1,10) g ON a.unique1 = g; + QUERY PLAN ------------------------------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) -> Hash Join @@ -286,3 +607,51 @@ SELECT * FROM tenk1 a JOIN my_gen_series(1,5) g ON a.unique1 = g; Optimizer: Pivotal Optimizer (GPORCA) (7 rows) +-- Test functions for control data +SELECT count(*) > 0 AS ok FROM pg_control_checkpoint(); + ok +---- + t +(1 row) + +SELECT count(*) > 0 AS ok FROM pg_control_init(); + ok +---- + t +(1 row) + +SELECT count(*) > 0 AS ok FROM pg_control_recovery(); + ok +---- + t +(1 row) + +SELECT count(*) > 0 AS ok FROM pg_control_system(); + ok +---- + t +(1 row) + +-- pg_split_walfile_name +SELECT * FROM pg_split_walfile_name(NULL); + segment_number | timeline_id +----------------+------------- + | +(1 row) + +SELECT * FROM pg_split_walfile_name('invalid'); +ERROR: invalid WAL file name "invalid" +SELECT segment_number > 0 AS ok_segment_number, timeline_id + FROM pg_split_walfile_name('000000010000000100000000'); + ok_segment_number | timeline_id +-------------------+------------- + t | 1 +(1 row) + +SELECT segment_number > 0 AS ok_segment_number, timeline_id + FROM pg_split_walfile_name('ffffffFF00000001000000af'); + ok_segment_number | timeline_id +-------------------+------------- + t | 4294967295 +(1 row) + diff --git a/contrib/pax_storage/src/test/regress/expected/notin_optimizer.out b/contrib/pax_storage/src/test/regress/expected/notin_optimizer.out index 1edb1ba4602..e5445f368e4 100644 --- a/contrib/pax_storage/src/test/regress/expected/notin_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/notin_optimizer.out @@ -1459,15 +1459,15 @@ select * from t1_12930 where (a, b) not in (select a, b from t2_12930); (0 rows) explain select * from t1_12930 where (a, b) not in (select a, b from t2_12930) and b is not null; - QUERY PLAN ------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=751.50..206932.71 rows=16 width=8) - -> Hash Left Anti Semi (Not-In) Join (cost=751.50..206932.49 rows=5 width=8) - Hash Cond: ((t1_12930.a = t2_12930.a) AND (t1_12930.b = t2_12930.b)) - -> Seq Scan on t1_12930 (cost=0.00..321.00 rows=28671 width=8) - Filter: (b IS NOT NULL) - -> Hash (cost=321.00..321.00 rows=28700 width=8) - -> Seq Scan on t2_12930 (cost=0.00..321.00 rows=28700 width=8) + QUERY PLAN +--------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=751.07..208007.32 rows=64510 width=8) + -> Hash Right Anti Join (cost=751.07..207147.18 rows=21503 width=8) + Hash Cond: ((t2_12930.a = t1_12930.a) AND (t2_12930.b = t1_12930.b)) + -> Seq Scan on t2_12930 (cost=0.00..321.00 rows=28700 width=8) + -> Hash (cost=321.00..321.00 rows=28671 width=8) + -> Seq Scan on t1_12930 (cost=0.00..321.00 rows=28671 width=8) + Filter: (b IS NOT NULL) Optimizer: Postgres query optimizer (8 rows) diff --git a/contrib/pax_storage/src/test/regress/expected/olap_window_seq_optimizer.out b/contrib/pax_storage/src/test/regress/expected/olap_window_seq_optimizer.out index 7423f90098e..0ef0af4da07 100644 --- a/contrib/pax_storage/src/test/regress/expected/olap_window_seq_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/olap_window_seq_optimizer.out @@ -8,6 +8,11 @@ set enable_incremental_sort=on; -- Changes here should also be made to olap_window_seq.sql -- reduce noise, specifying a precision set extra_float_digits=-2; +set optimizer_trace_fallback=on; +-- start_ignore +create schema olap_window_seq; +set search_path to olap_window_seq, public; +-- end_ignore -- 1 -- Null window specification -- OVER () -- select row_number() over (), cn,pn,vn from sale; -- mvd 1->1 @@ -1119,6 +1124,8 @@ select from sale group by rollup(cn,vn,pn) order by 2 desc, 5; -- order 2,5 +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Grouping function with multiple arguments gr | cn | vn | pn | sum | rank ----+----+----+-----+---------+------ 7 | | | | 2640182 | 1 @@ -1155,6 +1162,8 @@ select from sale group by rollup(cn,vn,pn) order by 2, 5; -- order 2,5 +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Grouping function with multiple arguments gr | cn | vn | pn | sum | rank ----+----+----+-----+---------+------ 0 | 1 | 10 | 200 | 0 | 1 @@ -1380,6 +1389,8 @@ from from sale group by rollup(cn,vn,pn)) olap_tmp_for_window_seq(g,cn,vn,pn,s) order by 1,5; -- order 1,5 +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Grouping function with multiple arguments g | cn | vn | pn | s | rank ---+----+----+-----+---------+------ 0 | 1 | 10 | 200 | 0 | 1 @@ -1418,6 +1429,8 @@ select from sale group by rollup(cn,vn,pn) order by 1,5; -- order 1,5 +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Grouping function with multiple arguments grouping | cn | vn | pn | sum | rank ----------+----+----+-----+---------+------ 0 | 1 | 10 | 200 | 0 | 1 @@ -1894,6 +1907,8 @@ SUM(sale.cn) OVER(partition by sale.vn order by sale.cn desc range between curre SELECT cn, vn, pn, GROUPING(cn,vn,pn), SUM(vn) OVER (PARTITION BY GROUPING(cn,vn,pn) ORDER BY cn) as sum FROM sale GROUP BY ROLLUP(cn,vn,pn) order by 4; -- order 4 +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Grouping function with multiple arguments cn | vn | pn | grouping | sum ----+----+-----+----------+----- 1 | 20 | 100 | 0 | 140 @@ -1928,6 +1943,8 @@ select cn,vn,pn,grouping, sum(vn) over (partition by grouping order by cn) as sum from (select cn,vn,pn,grouping(cn,vn,pn) from sale group by rollup(cn,vn,pn)) t order by 4; -- order 4 +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Grouping function with multiple arguments cn | vn | pn | grouping | sum ----+----+-----+----------+----- 1 | 20 | 100 | 0 | 140 @@ -8466,6 +8483,435 @@ select dt, pn, sum(distinct pn) over (partition by dt), sum(pn) over (partition 06-01-1401 | 800 | 3000 | 3900 (12 rows) +-- Various test cases from MDP tests. Previously, these were transformed into +-- different plan shapes, but we now do these operations in the executor so the +-- plan shape will be similar in the below queries. We want to ensure the +-- executor works correctly and produces correct results in these cases. +drop table if exists t; +NOTICE: table "t" does not exist, skipping +create table t (a int, b int, c int) distributed by (c); +insert into t select i%3+2, i%4, i from generate_series(1,5)i; +analyze t; +explain (costs off) select count(distinct a) over(), sum(distinct a) over() from t; + QUERY PLAN +------------------------------------------------ + WindowAgg + -> Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on t + Optimizer: GPORCA +(4 rows) + +select count(distinct a) over(), sum(distinct a) over() from t; + count | sum +-------+----- + 3 | 9 + 3 | 9 + 3 | 9 + 3 | 9 + 3 | 9 +(5 rows) + +explain (costs off) select count(distinct a) over() from t; + QUERY PLAN +------------------------------------------------ + WindowAgg + -> Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on t + Optimizer: GPORCA +(4 rows) + +select count(distinct a) over() from t; + count +------- + 3 + 3 + 3 + 3 + 3 +(5 rows) + +explain (costs off) select count(distinct a) over(), sum(distinct a) over(), avg(distinct a) over() from t; + QUERY PLAN +------------------------------------------------ + WindowAgg + -> Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on t + Optimizer: GPORCA +(4 rows) + +select count(distinct a) over(), sum(distinct a) over(), avg(distinct a) over() from t; + count | sum | avg +-------+-----+-------------------- + 3 | 9 | 3.0000000000000000 + 3 | 9 | 3.0000000000000000 + 3 | 9 | 3.0000000000000000 + 3 | 9 | 3.0000000000000000 + 3 | 9 | 3.0000000000000000 +(5 rows) + +explain (costs off) select count(distinct a) over(), sum(distinct a) over(), avg(distinct b) over() from t; + QUERY PLAN +------------------------------------------------ + WindowAgg + -> Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on t + Optimizer: GPORCA +(4 rows) + +select count(distinct a) over(), sum(distinct a) over(), avg(distinct b) over() from t; + count | sum | avg +-------+-----+-------------------- + 3 | 9 | 1.5000000000000000 + 3 | 9 | 1.5000000000000000 + 3 | 9 | 1.5000000000000000 + 3 | 9 | 1.5000000000000000 + 3 | 9 | 1.5000000000000000 +(5 rows) + +explain (costs off) select count(distinct a) over(partition by a), sum(distinct a) over(partition by a) from t; + QUERY PLAN +------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> WindowAgg + Partition By: a + -> Sort + Sort Key: a + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: a + -> Seq Scan on t + Optimizer: GPORCA +(9 rows) + +select count(distinct a) over(partition by a), sum(distinct a) over(partition by a) from t; + count | sum +-------+----- + 1 | 2 + 1 | 3 + 1 | 3 + 1 | 4 + 1 | 4 +(5 rows) + +explain (costs off) select dqa1+1, dqa2+2 from (select count(distinct a) over(partition by a), sum(distinct a) over(partition by a) from t) as foo(dqa1,dqa2); + QUERY PLAN +------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> WindowAgg + Partition By: a + -> Sort + Sort Key: a + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: a + -> Seq Scan on t + Optimizer: GPORCA +(9 rows) + +select dqa1+1, dqa2+2 from (select count(distinct a) over(partition by a), sum(distinct a) over(partition by a) from t) as foo(dqa1,dqa2); + ?column? | ?column? +----------+---------- + 2 | 4 + 2 | 5 + 2 | 5 + 2 | 6 + 2 | 6 +(5 rows) + +explain (costs off) select count(distinct a) over(partition by a), sum(distinct a) over(partition by b) from t; + QUERY PLAN +------------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> WindowAgg + Partition By: b + -> Sort + Sort Key: b + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: b + -> WindowAgg + Partition By: a + -> Sort + Sort Key: a + -> Redistribute Motion 3:3 (slice3; segments: 3) + Hash Key: a + -> Seq Scan on t + Optimizer: GPORCA +(15 rows) + +select count(distinct a) over(partition by a), sum(distinct a) over(partition by b) from t; + count | sum +-------+----- + 1 | 4 + 1 | 2 + 1 | 3 + 1 | 7 + 1 | 7 +(5 rows) + +explain (costs off) select dqa1+1, dqa2+2 from (select count(distinct a) over(partition by a), sum(distinct a) over(partition by b) from t) as foo(dqa1,dqa2); + QUERY PLAN +------------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> WindowAgg + Partition By: b + -> Sort + Sort Key: b + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: b + -> WindowAgg + Partition By: a + -> Sort + Sort Key: a + -> Redistribute Motion 3:3 (slice3; segments: 3) + Hash Key: a + -> Seq Scan on t + Optimizer: GPORCA +(15 rows) + +select dqa1+1, dqa2+2 from (select count(distinct a) over(partition by a), sum(distinct a) over(partition by b) from t) as foo(dqa1,dqa2); + ?column? | ?column? +----------+---------- + 2 | 5 + 2 | 9 + 2 | 9 + 2 | 6 + 2 | 4 +(5 rows) + +explain (costs off) select count(distinct a) over(), sum(distinct a) over(), row_number() over() from t; + QUERY PLAN +------------------------------------------------ + WindowAgg + -> Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on t + Optimizer: GPORCA +(4 rows) + +select count(distinct a) over(), sum(distinct a) over(), row_number() over() from t; + count | sum | row_number +-------+-----+------------ + 3 | 9 | 1 + 3 | 9 | 2 + 3 | 9 | 3 + 3 | 9 | 4 + 3 | 9 | 5 +(5 rows) + +explain (costs off) select dqa1+1, dqa2+2 , dqa3+3 from ( select count(distinct a) over(), sum(distinct a) over(), row_number() over() from t) as foo(dqa1,dqa2,dqa3); + QUERY PLAN +------------------------------------------------ + WindowAgg + -> Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on t + Optimizer: GPORCA +(4 rows) + +select dqa1+1, dqa2+2 , dqa3+3 from ( select count(distinct a) over(), sum(distinct a) over(), row_number() over() from t) as foo(dqa1,dqa2,dqa3); + ?column? | ?column? | ?column? +----------+----------+---------- + 4 | 11 | 4 + 4 | 11 | 5 + 4 | 11 | 6 + 4 | 11 | 7 + 4 | 11 | 8 +(5 rows) + +explain (costs off) select count(distinct a) over(partition by a), sum(distinct a) over(partition by a), row_number() over(partition by a) from t; + QUERY PLAN +------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> WindowAgg + Partition By: a + -> Sort + Sort Key: a + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: a + -> Seq Scan on t + Optimizer: GPORCA +(9 rows) + +select count(distinct a) over(partition by a), sum(distinct a) over(partition by a), row_number() over(partition by a) from t; + count | sum | row_number +-------+-----+------------ + 1 | 2 | 1 + 1 | 3 | 1 + 1 | 3 | 2 + 1 | 4 | 1 + 1 | 4 | 2 +(5 rows) + +explain (costs off) select dqa1+1, dqa2+2 , dqa3+3 from ( select count(distinct a) over(partition by a), sum(distinct a) over(partition by a), row_number() over(partition by a) from t) as foo(dqa1,dqa2,dqa3); + QUERY PLAN +------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> WindowAgg + Partition By: a + -> Sort + Sort Key: a + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: a + -> Seq Scan on t + Optimizer: GPORCA +(9 rows) + +select dqa1+1, dqa2+2 , dqa3+3 from ( select count(distinct a) over(partition by a), sum(distinct a) over(partition by a), row_number() over(partition by a) from t) as foo(dqa1,dqa2,dqa3); + ?column? | ?column? | ?column? +----------+----------+---------- + 2 | 4 | 4 + 2 | 5 | 4 + 2 | 5 | 5 + 2 | 6 | 4 + 2 | 6 | 5 +(5 rows) + +explain (costs off) select count(distinct a) over(partition by a), sum(distinct a) over(partition by b), row_number() over(order by c) from t; + QUERY PLAN +------------------------------------------------------------------------------------------------------ + WindowAgg + Order By: c + -> Gather Motion 3:1 (slice1; segments: 3) + Merge Key: c + -> Result + -> Sort + Sort Key: c + -> WindowAgg + Partition By: b + -> Sort + Sort Key: b + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: b + -> WindowAgg + Partition By: a + -> Sort + Sort Key: a + -> Redistribute Motion 3:3 (slice3; segments: 3) + Hash Key: a + -> Seq Scan on t + Optimizer: GPORCA +(21 rows) + +select count(distinct a) over(partition by a), sum(distinct a) over(partition by b), row_number() over(order by c) from t; + count | sum | row_number +-------+-----+------------ + 1 | 7 | 1 + 1 | 4 | 2 + 1 | 2 | 3 + 1 | 3 | 4 + 1 | 7 | 5 +(5 rows) + +explain (costs off) select dqa1+1, dqa2+2 , dqa3+3 from ( select count(distinct a) over(partition by a), sum(distinct a) over(partition by a), row_number() over(partition by a order by c) from t) as foo(dqa1,dqa2,dqa3); + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> WindowAgg + Partition By: a + Order By: c + -> Sort + Sort Key: a, c + -> WindowAgg + Partition By: a + -> Sort + Sort Key: a + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: a + -> Seq Scan on t + Optimizer: GPORCA +(14 rows) + +select dqa1+1, dqa2+2 , dqa3+3 from ( select count(distinct a) over(partition by a), sum(distinct a) over(partition by a), row_number() over(partition by a order by c) from t) as foo(dqa1,dqa2,dqa3); + ?column? | ?column? | ?column? +----------+----------+---------- + 2 | 4 | 4 + 2 | 5 | 4 + 2 | 5 | 5 + 2 | 6 | 4 + 2 | 6 | 5 +(5 rows) + +explain (costs off) select count(distinct a) over(partition by a), sum(distinct b) over(partition by a), row_number() over(partition by a) from t; + QUERY PLAN +------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> WindowAgg + Partition By: a + -> Sort + Sort Key: a + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: a + -> Seq Scan on t + Optimizer: GPORCA +(9 rows) + +select count(distinct a) over(partition by a), sum(distinct b) over(partition by a), row_number() over(partition by a) from t; + count | sum | row_number +-------+-----+------------ + 1 | 3 | 1 + 1 | 1 | 1 + 1 | 1 | 2 + 1 | 3 | 1 + 1 | 3 | 2 +(5 rows) + +explain (costs off) select count(distinct a) over(partition by a), sum(distinct b) over(partition by b), row_number() over(partition by a order by c) from t; + QUERY PLAN +------------------------------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> WindowAgg + Partition By: a + Order By: c + -> Sort + Sort Key: a, c + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: a + -> WindowAgg + Partition By: b + -> Sort + Sort Key: b + -> Redistribute Motion 3:3 (slice3; segments: 3) + Hash Key: b + -> WindowAgg + Partition By: a + -> Sort + Sort Key: a + -> Redistribute Motion 3:3 (slice4; segments: 3) + Hash Key: a + -> Seq Scan on t + Optimizer: GPORCA +(22 rows) + +select count(distinct a) over(partition by a), sum(distinct b) over(partition by b), row_number() over(partition by a order by c) from t; + count | sum | row_number +-------+-----+------------ + 1 | 3 | 1 + 1 | 1 | 1 + 1 | 0 | 2 + 1 | 2 | 1 + 1 | 1 | 2 +(5 rows) + +explain (costs off) select count(distinct a) over(partition by a,b), sum(distinct b) over(partition by a,b), row_number() over(partition by a,b) from t; + QUERY PLAN +------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> WindowAgg + Partition By: a, b + -> Sort + Sort Key: a, b + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: a, b + -> Seq Scan on t + Optimizer: GPORCA +(9 rows) + +select count(distinct a) over(partition by a,b), sum(distinct b) over(partition by a,b), row_number() over(partition by a,b) from t; + count | sum | row_number +-------+-----+------------ + 1 | 3 | 1 + 1 | 1 | 1 + 1 | 2 | 1 + 1 | 0 | 1 + 1 | 1 | 1 +(5 rows) + -- Also test with a pass-by-ref type, to make sure we don't get confused with memory contexts. select pcolor, pname, count(distinct pname) over (partition by pcolor) from product; pcolor | pname | count @@ -8504,12 +8950,26 @@ ERROR: DISTINCT is supported only for single-argument window aggregates -- Test deparsing (for \d+ and pg_dump) create view distinct_windowagg_view as select sum(distinct g/2) OVER (partition by g/4) from generate_series (1, 5) g; \d+ distinct_windowagg_view +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-default collation +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Queries on master-only tables +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-default collation +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-default collation +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-default collation +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-default collation +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Queries on master-only tables View "olap_window_seq.distinct_windowagg_view" Column | Type | Collation | Nullable | Default | Storage | Description --------+--------+-----------+----------+---------+---------+------------- sum | bigint | | | | plain | View definition: - SELECT sum(DISTINCT g.g / 2) OVER (PARTITION BY (g.g / 4)) AS sum + SELECT sum(DISTINCT g / 2) OVER (PARTITION BY (g / 4)) AS sum FROM generate_series(1, 5) g(g); -- These are tests for pushing down filter predicates in window functions. @@ -8526,6 +8986,8 @@ EVERY (INTERVAL '1 month'), DEFAULT PARTITION outlying_dates ); -- When there is no PARTITION BY in the window function, we do not want to push down any of the filter predicates. EXPLAIN WITH cte as (SELECT *, row_number() over () FROM window_part_sales) SELECT * FROM cte WHERE date > '2011-03-01' AND region = 'usa'; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multi-level partitioned tables QUERY PLAN -------------------------------------------------------------------------------------------------------------------------------------------------------------- Subquery Scan on cte (cost=0.00..17687.00 rows=324 width=48) @@ -8550,6 +9012,8 @@ EXPLAIN WITH cte as (SELECT *, row_number() over () FROM window_part_sales) SELE -- If there is a PARTITION BY in the window function, we can push down ONLY the predicates that match the PARTITION BY column. EXPLAIN WITH cte as (SELECT *, row_number() over (PARTITION BY region) FROM window_part_sales) SELECT * FROM cte WHERE date > '2011-03-01' AND region = 'usa'; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multi-level partitioned tables QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1376.48 rows=93 width=48) @@ -8577,6 +9041,8 @@ EXPLAIN WITH cte as (SELECT *, row_number() over (PARTITION BY region) FROM wind -- When both columns in the filter predicates are in the window function, it is possible to push both down. EXPLAIN WITH cte as (SELECT *, row_number() over (PARTITION BY date,region) FROM window_part_sales) SELECT * FROM cte WHERE date > '2011-03-01' AND region = 'usa'; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multi-level partitioned tables QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) (cost=1071.83..1073.98 rows=108 width=48) @@ -8600,6 +9066,8 @@ EXPLAIN WITH cte as (SELECT *, row_number() over (PARTITION BY date,region) FROM -- When the column in the filter predicates is also present in the window function, it is possible to push it down. EXPLAIN WITH cte as (SELECT *, row_number() over (PARTITION BY date,region) FROM window_part_sales) SELECT * FROM cte WHERE region = 'usa'; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multi-level partitioned tables QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) (cost=1376.89..1382.50 rows=280 width=48) @@ -8627,6 +9095,8 @@ EXPLAIN WITH cte as (SELECT *, row_number() over (PARTITION BY date,region) FROM -- When there is a disjunct in the filter predicates, it is not possible to push down either into the window function. EXPLAIN WITH cte as (SELECT *, row_number() over (PARTITION BY date,region) FROM window_part_sales) SELECT * FROM cte WHERE date > '2011-03-01' OR region = 'usa'; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multi-level partitioned tables QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) (cost=8047.41..11166.97 rows=155978 width=48) diff --git a/contrib/pax_storage/src/test/regress/expected/orca_groupingsets_fallbacks_optimizer.out b/contrib/pax_storage/src/test/regress/expected/orca_groupingsets_fallbacks_optimizer.out index 2e942275e98..1e234974d41 100644 --- a/contrib/pax_storage/src/test/regress/expected/orca_groupingsets_fallbacks_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/orca_groupingsets_fallbacks_optimizer.out @@ -1,10 +1,12 @@ -- -- One purpose of these tests is to make sure that ORCA can gracefully fall +-- back for these queries. To detect that, turn optimizer_trace_fallback on, -- and watch for "falling back to planner" messages. -- -- start_matchignore -- m/^DETAIL: GPDB Expression type: .* not supported in DXL/ -- end_matchignore +set optimizer_trace_fallback='on'; create temp table gstest1 (a int, b int, c int, d int, v int); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. @@ -12,78 +14,48 @@ insert into gstest1 values (1, 5, 10, 0, 100); insert into gstest1 values (1, 42, 20, 7, 200); insert into gstest1 values (2, 5, 30, 21, 300); insert into gstest1 values (2, 42, 40, 53, 400); --- Orca falls back due to Cube -select a, b, c, sum(v) from gstest1 group by cube(a, b, c); - a | b | c | sum ----+----+----+------ - 1 | 5 | 10 | 100 - 1 | 5 | | 100 - 1 | 42 | 20 | 200 - 1 | 42 | | 200 - 1 | | | 300 - 2 | 5 | 30 | 300 - 2 | 5 | | 300 - 2 | 42 | 40 | 400 - 2 | 42 | | 400 - 2 | | | 700 - | | | 1000 - | | 30 | 300 - | | 10 | 100 - | | 40 | 400 - | | 20 | 200 - 2 | | 30 | 300 - 1 | | 20 | 200 - 2 | | 40 | 400 - 1 | | 10 | 100 - | 5 | | 400 - | 42 | | 600 - | 5 | 30 | 300 - | 42 | 20 | 200 - | 42 | 40 | 400 - | 5 | 10 | 100 -(25 rows) +-- Orca falls back due to multiple grouping sets specifications referencing +-- duplicate alias columns where column is possibly nulled by ROLLUP or CUBE. +-- This is also a known issue in Postgres. Following threads [1][2] have more +-- details. +-- +-- [1] https://www.postgresql.org/message-id/flat/CAHnPFjSdFx_TtNpQturPMkRSJMYaD5rGP2=8iFH9V24-OjHGiQ@mail.gmail.com +-- [2] https://www.postgresql.org/message-id/flat/830269.1656693747@sss.pgh.pa.us +select a as alias1, a as alias2 from gstest1 group by alias1, rollup(alias2); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple grouping sets specifications with duplicate aliased columns + alias1 | alias2 +--------+-------- + 2 | 2 + 2 | + 1 | 1 + 1 | +(4 rows) --- Orca falls back due to multiple grouping sets specifications -select sum(v), b, a, c from gstest1 group by c, grouping sets ((a, b), ()); - sum | b | a | c ------+----+---+---- - 400 | 42 | 2 | 40 - 300 | 5 | 2 | 30 - 200 | 42 | 1 | 20 - 300 | | | 30 - 400 | | | 40 - 200 | | | 20 - 100 | 5 | 1 | 10 - 100 | | | 10 -(8 rows) +select a as alias1, a as alias2 from gstest1 group by alias1, cube(alias2); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple grouping sets specifications with duplicate aliased columns + alias1 | alias2 +--------+-------- + 2 | 2 + 2 | + 1 | 1 + 1 | +(4 rows) -select sum(v), b, a, c, d from gstest1 group by grouping sets(a, b), rollup(c, d); - sum | b | a | c | d ------+----+---+----+---- - 100 | | 1 | 10 | 0 - 100 | | 1 | 10 | - 200 | | 1 | 20 | 7 - 200 | | 1 | 20 | - 300 | | 1 | | - 300 | | 2 | 30 | 21 - 300 | | 2 | 30 | - 400 | | 2 | 40 | 53 - 400 | | 2 | 40 | - 700 | | 2 | | - 100 | 5 | | 10 | 0 - 100 | 5 | | 10 | - 300 | 5 | | 30 | 21 - 300 | 5 | | 30 | - 400 | 5 | | | - 200 | 42 | | 20 | 7 - 200 | 42 | | 20 | - 400 | 42 | | 40 | 53 - 400 | 42 | | 40 | - 600 | 42 | | | -(20 rows) +-- Following does not need to fallback because no ROLLUP/CUBE means neither +-- column needs to be nulled. +select a as alias1, a as alias2 from gstest1 group by alias1, alias2; + alias1 | alias2 +--------+-------- + 1 | 1 + 2 | 2 +(2 rows) -- Orca falls back due to nested grouping sets select sum(v), b, a, c, d from gstest1 group by grouping sets(a, b, rollup(c, d)); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: nested grouping set sum | b | a | c | d ------+----+---+----+---- 1000 | | | | @@ -107,6 +79,8 @@ create temp table gstest2 (a int primary key, b int, c int, d int, v int); insert into gstest2 values (1, 1, 1, 1, 1); insert into gstest2 values (2, 2, 2, 2, 1); select d from gstest2 group by grouping sets ((a,b), (a)); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Query-to-DXL Translation: No attribute entry found due to incorrect normalization of query d --- 2 @@ -116,28 +90,37 @@ select d from gstest2 group by grouping sets ((a,b), (a)); (4 rows) -- Orca falls back due to HAVING clause with outer references +-- start_ignore +-- The output with relid will not be the same. select v.c, (select count(*) from gstest1 group by () having v.c) from (values (false),(true)) v(c); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: ({RTE :alias <> :eref {ALIAS :aliasname gstest1 :colnames ("a" "b" "c" "d" "v")} :rtekind 0 :relisivm false :relid 255627 :relkind r :rellockmode 1 :tablesample <> :perminfoindex 1 :lateral false :inh true :inFromCl true :securityQuals <> :forceDistRandom false}) c | count ---+------- f | t | 4 (2 rows) +-- end_ignore -- Orca falls back due to grouping function with multiple arguments select a, b, grouping(a,b), sum(v), count(*), max(v) from gstest1 group by rollup (a,b); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Grouping function with multiple arguments a | b | grouping | sum | count | max ---+----+----------+------+-------+----- - 1 | 5 | 0 | 100 | 1 | 100 - 1 | 42 | 0 | 200 | 1 | 200 - 1 | | 1 | 300 | 2 | 200 + | | 3 | 1000 | 4 | 400 2 | 5 | 0 | 300 | 1 | 300 + 1 | | 1 | 300 | 2 | 200 2 | 42 | 0 | 400 | 1 | 400 2 | | 1 | 700 | 2 | 400 - | | 3 | 1000 | 4 | 400 + 1 | 5 | 0 | 100 | 1 | 100 + 1 | 42 | 0 | 200 | 1 | 200 (7 rows) -- Orca falls back due to grouping function with outer references select (select grouping(a) from (values(1)) v2(c)) from (values(1, 2)) v1(a, b) group by (a, b); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Grouping function with outer references grouping ---------- 0 diff --git a/contrib/pax_storage/src/test/regress/expected/orca_static_pruning_optimizer.out b/contrib/pax_storage/src/test/regress/expected/orca_static_pruning_optimizer.out index 0c15013c939..15ffbe07d84 100644 --- a/contrib/pax_storage/src/test/regress/expected/orca_static_pruning_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/orca_static_pruning_optimizer.out @@ -13,6 +13,7 @@ SELECT * FROM rp WHERE b > 4200 $query$ AS qry \gset +SET optimizer_trace_fallback TO on; EXPLAIN (COSTS OFF, VERBOSE) :qry ; QUERY PLAN @@ -83,6 +84,7 @@ EXPLAIN (COSTS OFF, VERBOSE) 4201 | 4201 | 0 (1 row) +RESET optimizer_trace_fallback; CREATE TABLE lp (a int, b int) DISTRIBUTED BY (a) PARTITION BY LIST (b); CREATE TABLE lp0 PARTITION OF lp FOR VALUES IN (0, 1); NOTICE: table has parent, setting distribution columns to match parent table @@ -91,6 +93,7 @@ NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE lp2 PARTITION OF lp FOR VALUES IN (42, 43); NOTICE: table has parent, setting distribution columns to match parent table INSERT INTO lp VALUES (0, 0), (10, 10), (42, 42); +SET optimizer_trace_fallback TO on; SELECT $query$ SELECT * FROM lp @@ -140,6 +143,7 @@ EXPLAIN (COSTS OFF, VERBOSE) 42 | 42 (1 row) +RESET optimizer_trace_fallback; CREATE TABLE hp (a int, b int) PARTITION BY HASH (b); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. @@ -148,6 +152,7 @@ NOTICE: table has parent, setting distribution columns to match parent table CREATE TABLE hp1 PARTITION OF hp FOR VALUES WITH (MODULUS 2, REMAINDER 1); NOTICE: table has parent, setting distribution columns to match parent table INSERT INTO hp VALUES (0, 1), (0, 3), (0, 4), (0, 42); +SET optimizer_trace_fallback TO on; SELECT $query$ SELECT * FROM hp @@ -155,6 +160,8 @@ WHERE b = 42 $query$ AS qry \gset EXPLAIN (COSTS OFF, VERBOSE) :qry ; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: hash partitioning QUERY PLAN ---------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) @@ -163,14 +170,17 @@ EXPLAIN (COSTS OFF, VERBOSE) Output: hp.a, hp.b Filter: (hp.b = 42) Optimizer: Postgres query optimizer -(6 rows) +(7 rows) :qry ; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: hash partitioning a | b ---+---- 0 | 42 (1 row) +RESET optimizer_trace_fallback; CREATE TABLE rp_multi_inds (a int, b int, c int) DISTRIBUTED BY (a) PARTITION BY RANGE (b); CREATE TABLE rp_multi_inds_part1 PARTITION OF rp_multi_inds FOR VALUES FROM (MINVALUE) TO (10); NOTICE: table has parent, setting distribution columns to match parent table @@ -186,6 +196,7 @@ CREATE INDEX rp_btree_idx ON rp_multi_inds USING btree(c); CREATE INDEX rp_bitmap_idx ON rp_multi_inds USING bitmap(b); -- Expect a plan that only uses the two indexes inherited from root SET optimizer_enable_dynamictablescan TO off; +SET optimizer_trace_fallback TO on; EXPLAIN (COSTS OFF, VERBOSE) SELECT * FROM rp_multi_inds WHERE b = 11 AND (c = 11 OR c = 4201); QUERY PLAN --------------------------------------------------------------------------------------------------- @@ -208,6 +219,7 @@ SELECT * FROM rp_multi_inds WHERE b = 11 AND (c = 11 OR c = 4201); 11 | 11 | 11 (1 row) +RESET optimizer_trace_fallback; RESET optimizer_enable_dynamictablescan; CREATE TABLE foo (a int, b int) DISTRIBUTED BY (a) PARTITION BY RANGE (b); CREATE TABLE foo_part1 PARTITION OF foo FOR VALUES FROM (MINVALUE) TO (10); @@ -220,6 +232,7 @@ CREATE INDEX foo_idx on foo(a); CREATE TABLE bar (a int) DISTRIBUTED BY (a); INSERT INTO foo VALUES (0, 0), (11, 11), (4201, 4201); INSERT INTO bar VALUES (0), (11), (42); +SET optimizer_trace_fallback TO on; -- Test ORCA index nested loop join has correct outer ref -- Set below GUCs for planner just to keep parity SET enable_hashjoin TO off; diff --git a/contrib/pax_storage/src/test/regress/expected/partition_locking.out b/contrib/pax_storage/src/test/regress/expected/partition_locking.out index 7bd6b19001e..f96e8fd7c14 100644 --- a/contrib/pax_storage/src/test/regress/expected/partition_locking.out +++ b/contrib/pax_storage/src/test/regress/expected/partition_locking.out @@ -177,8 +177,9 @@ select * from locktest_master where coalesce not like 'gp_%' and coalesce not li select * from locktest_segments where coalesce not like 'gp_%' and coalesce not like 'pg_%'; coalesce | mode | locktype | node -------------------+-----------------+----------+----------- + partlockt | AccessShareLock | relation | 1 segment partlockt_1_prt_1 | AccessShareLock | relation | 1 segment -(1 row) +(2 rows) commit; -- drop @@ -357,11 +358,12 @@ insert into partlockt values(1), (2), (3); select * from locktest_master where coalesce not like 'gp_%' and coalesce not like 'pg_%'; coalesce | mode | locktype | node -------------------+------------------+----------+-------- + partlockt | AccessShareLock | relation | master partlockt | RowExclusiveLock | relation | master partlockt_1_prt_1 | RowExclusiveLock | relation | master partlockt_1_prt_2 | RowExclusiveLock | relation | master partlockt_1_prt_3 | RowExclusiveLock | relation | master -(4 rows) +(5 rows) select * from locktest_segments where coalesce not like 'gp_%' and coalesce not like 'pg_%'; coalesce | mode | locktype | node @@ -498,19 +500,17 @@ select * from partlockt where i = 1; (0 rows) select * from locktest_master where coalesce not like 'gp_%' and coalesce not like 'pg_%'; - coalesce | mode | locktype | node --------------------------+-----------------+----------+-------- - partlockt | AccessShareLock | relation | master - partlockt_1_prt_1 | AccessShareLock | relation | master - partlockt_1_prt_1_i_idx | AccessShareLock | relation | master - partlockt_idx | AccessShareLock | relation | master -(4 rows) + coalesce | mode | locktype | node +-------------------+-----------------+----------+-------- + partlockt | AccessShareLock | relation | master + partlockt_1_prt_1 | AccessShareLock | relation | master +(2 rows) select * from locktest_segments where coalesce not like 'gp_%' and coalesce not like 'pg_%'; - coalesce | mode | locktype | node --------------------------+-----------------+----------+----------- - partlockt_1_prt_1 | AccessShareLock | relation | 1 segment - partlockt_1_prt_1_i_idx | AccessShareLock | relation | 1 segment + coalesce | mode | locktype | node +-------------------+-----------------+----------+----------- + partlockt | AccessShareLock | relation | 1 segment + partlockt_1_prt_1 | AccessShareLock | relation | 1 segment (2 rows) commit; diff --git a/contrib/pax_storage/src/test/regress/expected/partition_optimizer.out b/contrib/pax_storage/src/test/regress/expected/partition_optimizer.out index ab31dd1a761..37606a1303b 100755 --- a/contrib/pax_storage/src/test/regress/expected/partition_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/partition_optimizer.out @@ -367,9 +367,98 @@ select has_table_privilege('part_role', 'bar_p'::regclass, 'select'); t (1 row) +-- the ONLY keyword will affect just the partition root for both grant/revoke +create role part_role2; +grant select on only foo_p to part_role2; +select has_table_privilege('part_role2', 'foo_p'::regclass, 'select'); + has_table_privilege +--------------------- + t +(1 row) + +select has_table_privilege('part_role2', 'foo_p_1_prt_6'::regclass, 'select'); + has_table_privilege +--------------------- + f +(1 row) + +grant select on foo_p to part_role2; +revoke select on only foo_p from part_role2; +select has_table_privilege('part_role2', 'foo_p'::regclass, 'select'); + has_table_privilege +--------------------- + f +(1 row) + +select has_table_privilege('part_role2', 'foo_p_1_prt_6'::regclass, 'select'); + has_table_privilege +--------------------- + t +(1 row) + +revoke select on foo_p from part_role2; +select has_table_privilege('part_role2', 'foo_p_1_prt_6'::regclass, 'select'); + has_table_privilege +--------------------- + f +(1 row) + +create table foo_p2 (a int, b int) partition by range(a) (start(1) end(10) every(1)); +grant select on foo_p, only foo_p2 to part_role2; -- multiple tables in same statement +select has_table_privilege('part_role2', 'foo_p'::regclass, 'select'); + has_table_privilege +--------------------- + t +(1 row) + +select has_table_privilege('part_role2', 'foo_p_1_prt_6'::regclass, 'select'); + has_table_privilege +--------------------- + t +(1 row) + +select has_table_privilege('part_role2', 'foo_p2'::regclass, 'select'); + has_table_privilege +--------------------- + t +(1 row) + +select has_table_privilege('part_role2', 'foo_p2_1_prt_6'::regclass, 'select'); + has_table_privilege +--------------------- + f +(1 row) + +-- more cases +revoke all on foo_p from part_role2; +revoke all on foo_p2 from part_role2; +grant select on only public.foo_p to part_role2; -- with schema +select has_table_privilege('part_role2', 'foo_p'::regclass, 'select'); + has_table_privilege +--------------------- + t +(1 row) + +select has_table_privilege('part_role2', 'foo_p_1_prt_6'::regclass, 'select'); + has_table_privilege +--------------------- + f +(1 row) + +grant update(b) on only foo_p2 to part_role2; -- column level priviledge +select relname, has_column_privilege('part_role2', oid, 'b', 'update') from pg_class +where relname = 'foo_p2' or relname = 'foo_p2_1_prt_6'; + relname | has_column_privilege +----------------+---------------------- + foo_p2 | t + foo_p2_1_prt_6 | f +(2 rows) + drop table foo_p; +drop table foo_p2; drop table bar_p; drop role part_role; +drop role part_role2; -- validation create table foo_p (i int) partition by range(i) (start(1) end(10) every(1)); @@ -4487,30 +4576,6 @@ group by relname; drop table s cascade; -- MPP-13750 ) --- MPP-13806 start -drop table if exists mpp13806; -NOTICE: table "mpp13806" does not exist, skipping - CREATE TABLE mpp13806 (id int, date date, amt decimal(10,2)) - DISTRIBUTED BY (id) - PARTITION BY RANGE (date) - ( START (date '2008-01-01') INCLUSIVE - END (date '2008-01-05') EXCLUSIVE - EVERY (INTERVAL '1 day') ); - --- Adding unbound partition right before the start used to fail -alter table mpp13806 add partition test end (date '2008-01-01') exclusive; - -drop table if exists mpp13806; - CREATE TABLE mpp13806 (id int, date date, amt decimal(10,2)) - DISTRIBUTED BY (id) - PARTITION BY RANGE (date) - ( START (date '2008-01-01') EXCLUSIVE - END (date '2008-01-05') EXCLUSIVE - EVERY (INTERVAL '1 day') ); --- For good measure, test the opposite case -alter table mpp13806 add partition test end (date '2008-01-01') inclusive; -drop table mpp13806; --- MPP-13806 end -- MPP-14471 start -- No unenforceable PK/UK constraints! (UNIQUE INDEXes still allowed; tested above) drop table if exists tc cascade; @@ -5760,7 +5825,7 @@ select * from sales order by pkid; drop table sales cascade; NOTICE: drop cascades to default value for column pkid of table newpart --- Exchage partiton table with a table having dropped column +-- Exchage partition table with a table having dropped column create table exchange_part(a int, b int) partition by range(b) (start (0) end (10) every (5)); create table exchange1(a int, c int, b int); alter table exchange1 drop column c; @@ -5813,38 +5878,38 @@ select relname, pg_get_expr(relpartbound, oid) from pg_class where relname like pt_tab_encode_1_prt_s_xyz | FOR VALUES IN ('xyz') (3 rows) -select gp_segment_id, attrelid::regclass, attnum, attoptions from pg_attribute_encoding where attrelid = 'pt_tab_encode_1_prt_s_abc'::regclass; - gp_segment_id | attrelid | attnum | attoptions ----------------+---------------------------+--------+----------------------------------------------------- - -1 | pt_tab_encode_1_prt_s_abc | 1 | {compresstype=zlib,compresslevel=1,blocksize=32768} - -1 | pt_tab_encode_1_prt_s_abc | 2 | {compresstype=zlib,compresslevel=1,blocksize=32768} +select gp_segment_id, attrelid::regclass, attnum, filenum, attoptions from pg_attribute_encoding where attrelid = 'pt_tab_encode_1_prt_s_abc'::regclass; + gp_segment_id | attrelid | attnum | filenum | attoptions +---------------+---------------------------+--------+---------+----------------------------------------------------- + -1 | pt_tab_encode_1_prt_s_abc | 1 | 1 | {compresstype=zlib,compresslevel=1,blocksize=32768} + -1 | pt_tab_encode_1_prt_s_abc | 2 | 2 | {compresstype=zlib,compresslevel=1,blocksize=32768} (2 rows) -select gp_segment_id, attrelid::regclass, attnum, attoptions from gp_dist_random('pg_attribute_encoding') where attrelid = 'pt_tab_encode_1_prt_s_abc'::regclass order by 1,3 limit 5; - gp_segment_id | attrelid | attnum | attoptions ----------------+---------------------------+--------+----------------------------------------------------- - 0 | pt_tab_encode_1_prt_s_abc | 1 | {compresstype=zlib,compresslevel=1,blocksize=32768} - 0 | pt_tab_encode_1_prt_s_abc | 2 | {compresstype=zlib,compresslevel=1,blocksize=32768} - 1 | pt_tab_encode_1_prt_s_abc | 1 | {compresstype=zlib,compresslevel=1,blocksize=32768} - 1 | pt_tab_encode_1_prt_s_abc | 2 | {compresstype=zlib,compresslevel=1,blocksize=32768} - 2 | pt_tab_encode_1_prt_s_abc | 1 | {compresstype=zlib,compresslevel=1,blocksize=32768} +select gp_segment_id, attrelid::regclass, attnum, filenum, attoptions from gp_dist_random('pg_attribute_encoding') where attrelid = 'pt_tab_encode_1_prt_s_abc'::regclass order by 1,3 limit 5; + gp_segment_id | attrelid | attnum | filenum | attoptions +---------------+---------------------------+--------+---------+----------------------------------------------------- + 0 | pt_tab_encode_1_prt_s_abc | 1 | 1 | {compresstype=zlib,compresslevel=1,blocksize=32768} + 0 | pt_tab_encode_1_prt_s_abc | 2 | 2 | {compresstype=zlib,compresslevel=1,blocksize=32768} + 1 | pt_tab_encode_1_prt_s_abc | 1 | 1 | {compresstype=zlib,compresslevel=1,blocksize=32768} + 1 | pt_tab_encode_1_prt_s_abc | 2 | 2 | {compresstype=zlib,compresslevel=1,blocksize=32768} + 2 | pt_tab_encode_1_prt_s_abc | 1 | 1 | {compresstype=zlib,compresslevel=1,blocksize=32768} (5 rows) -select gp_segment_id, attrelid::regclass, attnum, attoptions from pg_attribute_encoding where attrelid = 'pt_tab_encode_1_prt_s_xyz'::regclass; - gp_segment_id | attrelid | attnum | attoptions ----------------+---------------------------+--------+----------------------------------------------------- - -1 | pt_tab_encode_1_prt_s_xyz | 1 | {compresstype=zlib,compresslevel=1,blocksize=32768} - -1 | pt_tab_encode_1_prt_s_xyz | 2 | {compresstype=zlib,compresslevel=1,blocksize=32768} +select gp_segment_id, attrelid::regclass, attnum, filenum, attoptions from pg_attribute_encoding where attrelid = 'pt_tab_encode_1_prt_s_xyz'::regclass; + gp_segment_id | attrelid | attnum | filenum | attoptions +---------------+---------------------------+--------+---------+----------------------------------------------------- + -1 | pt_tab_encode_1_prt_s_xyz | 1 | 1 | {compresstype=zlib,compresslevel=1,blocksize=32768} + -1 | pt_tab_encode_1_prt_s_xyz | 2 | 2 | {compresstype=zlib,compresslevel=1,blocksize=32768} (2 rows) -select gp_segment_id, attrelid::regclass, attnum, attoptions from gp_dist_random('pg_attribute_encoding') where attrelid = 'pt_tab_encode_1_prt_s_xyz'::regclass order by 1,3 limit 5; - gp_segment_id | attrelid | attnum | attoptions ----------------+---------------------------+--------+----------------------------------------------------- - 0 | pt_tab_encode_1_prt_s_xyz | 1 | {compresstype=zlib,compresslevel=1,blocksize=32768} - 0 | pt_tab_encode_1_prt_s_xyz | 2 | {compresstype=zlib,compresslevel=1,blocksize=32768} - 1 | pt_tab_encode_1_prt_s_xyz | 1 | {compresstype=zlib,compresslevel=1,blocksize=32768} - 1 | pt_tab_encode_1_prt_s_xyz | 2 | {compresstype=zlib,compresslevel=1,blocksize=32768} - 2 | pt_tab_encode_1_prt_s_xyz | 1 | {compresstype=zlib,compresslevel=1,blocksize=32768} +select gp_segment_id, attrelid::regclass, attnum, filenum, attoptions from gp_dist_random('pg_attribute_encoding') where attrelid = 'pt_tab_encode_1_prt_s_xyz'::regclass order by 1,3 limit 5; + gp_segment_id | attrelid | attnum | filenum | attoptions +---------------+---------------------------+--------+---------+----------------------------------------------------- + 0 | pt_tab_encode_1_prt_s_xyz | 1 | 1 | {compresstype=zlib,compresslevel=1,blocksize=32768} + 0 | pt_tab_encode_1_prt_s_xyz | 2 | 2 | {compresstype=zlib,compresslevel=1,blocksize=32768} + 1 | pt_tab_encode_1_prt_s_xyz | 1 | 1 | {compresstype=zlib,compresslevel=1,blocksize=32768} + 1 | pt_tab_encode_1_prt_s_xyz | 2 | 2 | {compresstype=zlib,compresslevel=1,blocksize=32768} + 2 | pt_tab_encode_1_prt_s_xyz | 1 | 1 | {compresstype=zlib,compresslevel=1,blocksize=32768} (5 rows) select c.oid::regclass, relkind, amname, reloptions from pg_class c left join pg_am am on am.oid = relam where c.oid = 'pt_tab_encode_1_prt_s_abc'::regclass; diff --git a/contrib/pax_storage/src/test/regress/expected/partition_prune_optimizer.out b/contrib/pax_storage/src/test/regress/expected/partition_prune_optimizer.out index f030bf81505..c8c35129b79 100644 --- a/contrib/pax_storage/src/test/regress/expected/partition_prune_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/partition_prune_optimizer.out @@ -1175,6 +1175,7 @@ create table boolpart (a bool) partition by list (a); create table boolpart_default partition of boolpart default; create table boolpart_t partition of boolpart for values in ('true'); create table boolpart_f partition of boolpart for values in ('false'); +insert into boolpart values (true), (false), (null); explain (costs off) select * from boolpart where a in (true, false); QUERY PLAN ---------------------------------------------------- @@ -1255,11 +1256,306 @@ explain (costs off) select * from boolpart where a is not unknown; Optimizer: GPORCA (5 rows) +select * from boolpart where a in (true, false); + a +--- + f + t +(2 rows) + +select * from boolpart where a = false; + a +--- + f +(1 row) + +select * from boolpart where not a = false; + a +--- + t +(1 row) + +select * from boolpart where a is true or a is not true; + a +--- + + f + t +(3 rows) + +select * from boolpart where a is not true; + a +--- + + f +(2 rows) + +select * from boolpart where a is not true and a is not false; + a +--- + +(1 row) + +select * from boolpart where a is unknown; + a +--- + +(1 row) + +select * from boolpart where a is not unknown; + a +--- + f + t +(2 rows) + +-- try some other permutations with a NULL partition instead of a DEFAULT +delete from boolpart where a is null; +create table boolpart_null partition of boolpart for values in (null); +insert into boolpart values(null); +explain (costs off) select * from boolpart where a is not true; + QUERY PLAN +---------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Seq Scan on boolpart + Number of partitions to scan: 2 (out of 4) + Filter: (a IS NOT TRUE) + Optimizer: GPORCA +(5 rows) + +explain (costs off) select * from boolpart where a is not true and a is not false; + QUERY PLAN +-------------------------------------------------------- + Gather Motion XXX + -> Dynamic Seq Scan on boolpart + Number of partitions to scan: 1 (out of 4) + Filter: ((a IS NOT TRUE) AND (a IS NOT FALSE)) + Optimizer: GPORCA +(5 rows) + +explain (costs off) select * from boolpart where a is not false; + QUERY PLAN +---------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Seq Scan on boolpart + Number of partitions to scan: 2 (out of 4) + Filter: (a IS NOT FALSE) + Optimizer: GPORCA +(5 rows) + +select * from boolpart where a is not true; + a +--- + + f +(2 rows) + +select * from boolpart where a is not true and a is not false; + a +--- + +(1 row) + +select * from boolpart where a is not false; + a +--- + + t +(2 rows) + +-- inverse boolean partitioning - a seemingly unlikely design, but we've got +-- code for it, so we'd better test it. +create table iboolpart (a bool) partition by list ((not a)); +create table iboolpart_default partition of iboolpart default; +create table iboolpart_f partition of iboolpart for values in ('true'); +create table iboolpart_t partition of iboolpart for values in ('false'); +insert into iboolpart values (true), (false), (null); +explain (costs off) select * from iboolpart where a in (true, false); + QUERY PLAN +---------------------------------------------------------------- + Gather Motion XXX + -> Append + -> Seq Scan on iboolpart_t iboolpart_1 + Filter: (a = ANY ('{t,f}'::boolean[])) + -> Seq Scan on iboolpart_f iboolpart_2 + Filter: (a = ANY ('{t,f}'::boolean[])) + -> Seq Scan on iboolpart_default iboolpart_3 + Filter: (a = ANY ('{t,f}'::boolean[])) + Optimizer: GPORCA +(9 rows) + +explain (costs off) select * from iboolpart where a = false; + QUERY PLAN +---------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on iboolpart_f iboolpart + Filter: (NOT a) + Optimizer: GPORCA +(4 rows) + +explain (costs off) select * from iboolpart where not a = false; + QUERY PLAN +---------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on iboolpart_t iboolpart + Filter: a + Optimizer: GPORCA +(4 rows) + +explain (costs off) select * from iboolpart where a is true or a is not true; + QUERY PLAN +---------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Append + -> Seq Scan on iboolpart_t iboolpart_1 + Filter: ((a IS TRUE) OR (a IS NOT TRUE)) + -> Seq Scan on iboolpart_f iboolpart_2 + Filter: ((a IS TRUE) OR (a IS NOT TRUE)) + -> Seq Scan on iboolpart_default iboolpart_3 + Filter: ((a IS TRUE) OR (a IS NOT TRUE)) + Optimizer: GPORCA +(9 rows) + +explain (costs off) select * from iboolpart where a is not true; + QUERY PLAN +---------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Append + -> Seq Scan on iboolpart_t iboolpart_1 + Filter: (a IS NOT TRUE) + -> Seq Scan on iboolpart_f iboolpart_2 + Filter: (a IS NOT TRUE) + -> Seq Scan on iboolpart_default iboolpart_3 + Filter: (a IS NOT TRUE) + Optimizer: GPORCA +(9 rows) + +explain (costs off) select * from iboolpart where a is not true and a is not false; + QUERY PLAN +---------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Append + -> Seq Scan on iboolpart_t iboolpart_1 + Filter: ((a IS NOT TRUE) AND (a IS NOT FALSE)) + -> Seq Scan on iboolpart_f iboolpart_2 + Filter: ((a IS NOT TRUE) AND (a IS NOT FALSE)) + -> Seq Scan on iboolpart_default iboolpart_3 + Filter: ((a IS NOT TRUE) AND (a IS NOT FALSE)) + Optimizer: GPORCA +(9 rows) + +explain (costs off) select * from iboolpart where a is unknown; + QUERY PLAN +---------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Append + -> Seq Scan on iboolpart_t iboolpart_1 + Filter: (a IS UNKNOWN) + -> Seq Scan on iboolpart_f iboolpart_2 + Filter: (a IS UNKNOWN) + -> Seq Scan on iboolpart_default iboolpart_3 + Filter: (a IS UNKNOWN) + Optimizer: GPORCA +(9 rows) + +explain (costs off) select * from iboolpart where a is not unknown; + QUERY PLAN +---------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Append + -> Seq Scan on iboolpart_t iboolpart_1 + Filter: (a IS NOT UNKNOWN) + -> Seq Scan on iboolpart_f iboolpart_2 + Filter: (a IS NOT UNKNOWN) + -> Seq Scan on iboolpart_default iboolpart_3 + Filter: (a IS NOT UNKNOWN) + Optimizer: GPORCA +(9 rows) + +select * from iboolpart where a in (true, false); + a +--- + f + t +(2 rows) + +select * from iboolpart where a = false; + a +--- + f +(1 row) + +select * from iboolpart where not a = false; + a +--- + t +(1 row) + +select * from iboolpart where a is true or a is not true; + a +--- + + f + t +(3 rows) + +select * from iboolpart where a is not true; + a +--- + + f +(2 rows) + +select * from iboolpart where a is not true and a is not false; + a +--- + +(1 row) + +select * from iboolpart where a is unknown; + a +--- + +(1 row) + +select * from iboolpart where a is not unknown; + a +--- + f + t +(2 rows) + +-- Try some other permutations with a NULL partition instead of a DEFAULT +delete from iboolpart where a is null; +create table iboolpart_null partition of iboolpart for values in (null); +insert into iboolpart values(null); +-- Pruning shouldn't take place for these. Just check the result is correct +select * from iboolpart where a is not true; + a +--- + + f +(2 rows) + +select * from iboolpart where a is not true and a is not false; + a +--- + +(1 row) + +select * from iboolpart where a is not false; + a +--- + + t +(2 rows) + create table boolrangep (a bool, b bool, c int) partition by range (a,b,c); create table boolrangep_tf partition of boolrangep for values from ('true', 'false', 0) to ('true', 'false', 100); create table boolrangep_ft partition of boolrangep for values from ('false', 'true', 0) to ('false', 'true', 100); create table boolrangep_ff1 partition of boolrangep for values from ('false', 'false', 0) to ('false', 'false', 50); create table boolrangep_ff2 partition of boolrangep for values from ('false', 'false', 50) to ('false', 'false', 100); +create table boolrangep_null partition of boolrangep default; -- try a more complex case that's been known to trip up pruning in the past explain (costs off) select * from boolrangep where not a and not b and c = 25; QUERY PLAN @@ -1270,6 +1566,32 @@ explain (costs off) select * from boolrangep where not a and not b and c = 25; Optimizer: Postgres query optimizer (4 rows) +-- ensure we prune boolrangep_tf +explain (costs off) select * from boolrangep where a is not true and not b and c = 25; +QUERY PLAN +___________ + Gather Motion 3:1 (slice1; segments: 3) + -> Append + -> Seq Scan on boolrangep_ff1 boolrangep_1 + Filter: ((a IS NOT TRUE) AND (NOT b) AND (c = 25)) + -> Seq Scan on boolrangep_ff2 boolrangep_2 + Filter: ((a IS NOT TRUE) AND (NOT b) AND (c = 25)) + -> Seq Scan on boolrangep_ft boolrangep_3 + Filter: ((a IS NOT TRUE) AND (NOT b) AND (c = 25)) + -> Seq Scan on boolrangep_null boolrangep_4 + Filter: ((a IS NOT TRUE) AND (NOT b) AND (c = 25)) + +-- ensure we prune everything apart from boolrangep_tf and boolrangep_null +explain (costs off) select * from boolrangep where a is not false and not b and c = 25; +QUERY PLAN +___________ + Gather Motion 3:1 (slice1; segments: 3) + -> Append + -> Seq Scan on boolrangep_tf boolrangep_1 + Filter: ((a IS NOT FALSE) AND (NOT b) AND (c = 25)) + -> Seq Scan on boolrangep_null boolrangep_2 + Filter: ((a IS NOT FALSE) AND (NOT b) AND (c = 25)) + -- test scalar-to-array operators create table coercepart (a varchar) partition by list (a); create table coercepart_ab partition of coercepart for values in ('ab'); @@ -1680,7 +2002,7 @@ explain (costs off) select * from like_op_noprune where a like '%BC'; create table lparted_by_int2 (a smallint) partition by list (a); create table lparted_by_int2_1 partition of lparted_by_int2 for values in (1); create table lparted_by_int2_16384 partition of lparted_by_int2 for values in (16384); -explain (costs off) select * from lparted_by_int2 where a = 100000000000000; +explain (costs off) select * from lparted_by_int2 where a = 100_000_000_000_000; QUERY PLAN -------------------------- Result @@ -1692,7 +2014,7 @@ create table rparted_by_int2 (a smallint) partition by range (a); create table rparted_by_int2_1 partition of rparted_by_int2 for values from (1) to (10); create table rparted_by_int2_16384 partition of rparted_by_int2 for values from (10) to (16384); -- all partitions pruned -explain (costs off) select * from rparted_by_int2 where a > 100000000000000; +explain (costs off) select * from rparted_by_int2 where a > 100_000_000_000_000; QUERY PLAN -------------------------- Result @@ -1702,7 +2024,7 @@ explain (costs off) select * from rparted_by_int2 where a > 100000000000000; create table rparted_by_int2_maxvalue partition of rparted_by_int2 for values from (16384) to (maxvalue); -- all partitions but rparted_by_int2_maxvalue pruned -explain (costs off) select * from rparted_by_int2 where a > 100000000000000; +explain (costs off) select * from rparted_by_int2 where a > 100_000_000_000_000; QUERY PLAN ---------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) @@ -1712,7 +2034,39 @@ explain (costs off) select * from rparted_by_int2 where a > 100000000000000; Optimizer: GPORCA (5 rows) -drop table lp, coll_pruning, rlp, mc3p, mc2p, boolpart, boolrangep, rp, coll_pruning_multi, like_op_noprune, lparted_by_int2, rparted_by_int2; +drop table lp, coll_pruning, rlp, mc3p, mc2p, boolpart, iboolpart, boolrangep, rp, coll_pruning_multi, like_op_noprune, lparted_by_int2, rparted_by_int2; +-- check that AlternativeSubPlan within a pruning expression gets cleaned up +create table asptab (id int primary key) partition by range (id); +create table asptab0 partition of asptab for values from (0) to (1); +create table asptab1 partition of asptab for values from (1) to (2); +explain (costs off) +select * from + (select exists (select 1 from int4_tbl tinner where f1 = touter.f1) as b + from int4_tbl touter) ss, + asptab +where asptab.id > ss.b::int; + QUERY PLAN +-------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Nested Loop + Join Filter: true + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> GroupAggregate + Group Key: touter.f1, touter.ctid, touter.gp_segment_id, (true) + -> Sort + Sort Key: touter.f1, touter.ctid, touter.gp_segment_id, (true) + -> Hash Left Join + Hash Cond: (touter.f1 = tinner.f1) + -> Seq Scan on int4_tbl touter + -> Hash + -> Seq Scan on int4_tbl tinner + -> Dynamic Index Only Scan on asptab_pkey on asptab + Index Cond: (id > ((CASE WHEN (NOT ((true) IS NULL)) THEN true ELSE false END))::integer) + Number of partitions to scan: 2 (out of 2) + Optimizer: GPORCA +(17 rows) + +drop table asptab; -- -- Test Partition pruning for HASH partitioning -- @@ -1958,7 +2312,6 @@ explain (costs off) select * from hp where a = 1 and b = 'abcde' and Optimizer: Postgres query optimizer (3 rows) -drop table hp; -- -- Test runtime partition pruning -- @@ -2092,6 +2445,28 @@ explain (analyze, costs off, summary off, timing off) execute ab_q3 (2, 2); Optimizer: Postgres query optimizer (12 rows) +-- +-- Test runtime pruning with hash partitioned tables +-- +-- recreate partitions dropped above +create table hp1 partition of hp for values with (modulus 4, remainder 1); +create table hp2 partition of hp for values with (modulus 4, remainder 2); +create table hp3 partition of hp for values with (modulus 4, remainder 3); +-- Ensure we correctly prune unneeded partitions when there is an IS NULL qual +prepare hp_q1 (text) as +select * from hp where a is null and b = $1; +explain (costs off) execute hp_q1('xxx'); +QUERY PLAN +___________ + Gather Motion XXX + -> Append + Subplans Removed: 3 + -> Seq Scan on hp2 hp_1 + Filter: ((a IS NULL) AND (b = $1)) +GP_IGNORE:(6 rows) + +deallocate hp_q1; +drop table hp; -- Test a backwards Append scan create table list_part (a int) partition by list (a); create table list_part1 partition of list_part for values in (1); @@ -4010,7 +4385,7 @@ explain (costs off) update listp1 set a = 1 where a = 2; -> Result -> Redistribute Motion 3:3 (slice1; segments: 3) Hash Key: a - -> Split + -> Split Update -> Seq Scan on listp1 Filter: (a = 2) Optimizer: GPORCA @@ -4036,7 +4411,7 @@ explain (costs off) update listp1 set a = 1 where a = 2; -> Result -> Redistribute Motion 3:3 (slice1; segments: 3) Hash Key: a - -> Split + -> Split Update -> Seq Scan on listp1 Filter: (a = 2) Optimizer: GPORCA @@ -4199,22 +4574,233 @@ explain (costs off) select * from rp_prefix_test3 where a >= 1 and b >= 1 and b Optimizer: Postgres query optimizer (4 rows) -create table hp_prefix_test (a int, b int, c int, d int) partition by hash (a part_test_int4_ops, b part_test_int4_ops, c part_test_int4_ops, d part_test_int4_ops); -create table hp_prefix_test_p1 partition of hp_prefix_test for values with (modulus 2, remainder 0); -create table hp_prefix_test_p2 partition of hp_prefix_test for values with (modulus 2, remainder 1); --- Test that get_steps_using_prefix() handles non-NULL step_nullkeys -explain (costs off) select * from hp_prefix_test where a = 1 and b is null and c = 1 and d = 1; - QUERY PLAN -------------------------------------------------------------------- - Gather Motion 1:1 (slice1; segments: 1) - -> Seq Scan on hp_prefix_test_p1 hp_prefix_test - Filter: ((b IS NULL) AND (a = 1) AND (c = 1) AND (d = 1)) - Optimizer: Postgres query optimizer -(4 rows) - drop table rp_prefix_test1; drop table rp_prefix_test2; drop table rp_prefix_test3; +-- +-- Test that get_steps_using_prefix() handles IS NULL clauses correctly +-- +create table hp_prefix_test (a int, b int, c int, d int) + partition by hash (a part_test_int4_ops, b part_test_int4_ops, c part_test_int4_ops, d part_test_int4_ops); +-- create 8 partitions +select 'create table hp_prefix_test_p' || x::text || ' partition of hp_prefix_test for values with (modulus 8, remainder ' || x::text || ');' +from generate_Series(0,7) x; + ?column? +------------------------------------------------------------------------------------------------------ + create table hp_prefix_test_p0 partition of hp_prefix_test for values with (modulus 8, remainder 0); + create table hp_prefix_test_p1 partition of hp_prefix_test for values with (modulus 8, remainder 1); + create table hp_prefix_test_p2 partition of hp_prefix_test for values with (modulus 8, remainder 2); + create table hp_prefix_test_p3 partition of hp_prefix_test for values with (modulus 8, remainder 3); + create table hp_prefix_test_p4 partition of hp_prefix_test for values with (modulus 8, remainder 4); + create table hp_prefix_test_p5 partition of hp_prefix_test for values with (modulus 8, remainder 5); + create table hp_prefix_test_p6 partition of hp_prefix_test for values with (modulus 8, remainder 6); + create table hp_prefix_test_p7 partition of hp_prefix_test for values with (modulus 8, remainder 7); +(8 rows) + +\gexec +create table hp_prefix_test_p0 partition of hp_prefix_test for values with (modulus 8, remainder 0); +create table hp_prefix_test_p1 partition of hp_prefix_test for values with (modulus 8, remainder 1); +create table hp_prefix_test_p2 partition of hp_prefix_test for values with (modulus 8, remainder 2); +create table hp_prefix_test_p3 partition of hp_prefix_test for values with (modulus 8, remainder 3); +create table hp_prefix_test_p4 partition of hp_prefix_test for values with (modulus 8, remainder 4); +create table hp_prefix_test_p5 partition of hp_prefix_test for values with (modulus 8, remainder 5); +create table hp_prefix_test_p6 partition of hp_prefix_test for values with (modulus 8, remainder 6); +create table hp_prefix_test_p7 partition of hp_prefix_test for values with (modulus 8, remainder 7); +-- insert 16 rows, one row for each test to perform. +insert into hp_prefix_test +select + case a when 0 then null else 1 end, + case b when 0 then null else 2 end, + case c when 0 then null else 3 end, + case d when 0 then null else 4 end +from + generate_series(0,1) a, + generate_series(0,1) b, + generate_Series(0,1) c, + generate_Series(0,1) d; +-- Ensure partition pruning works correctly for each combination of IS NULL +-- and equality quals. This may seem a little excessive, but there have been +-- a number of bugs in this area over the years. We make use of row only +-- output to reduce the size of the expected results. +\t on +select + 'explain (costs off) select tableoid::regclass,* from hp_prefix_test where ' || + string_agg(c.colname || case when g.s & (1 << c.colpos) = 0 then ' is null' else ' = ' || (colpos+1)::text end, ' and ' order by c.colpos) +from (values('a',0),('b',1),('c',2),('d',3)) c(colname, colpos), generate_Series(0,15) g(s) +group by g.s +order by g.s; + explain (costs off) select tableoid::regclass,* from hp_prefix_test where a is null and b is null and c is null and d is null + explain (costs off) select tableoid::regclass,* from hp_prefix_test where a = 1 and b is null and c is null and d is null + explain (costs off) select tableoid::regclass,* from hp_prefix_test where a is null and b = 2 and c is null and d is null + explain (costs off) select tableoid::regclass,* from hp_prefix_test where a = 1 and b = 2 and c is null and d is null + explain (costs off) select tableoid::regclass,* from hp_prefix_test where a is null and b is null and c = 3 and d is null + explain (costs off) select tableoid::regclass,* from hp_prefix_test where a = 1 and b is null and c = 3 and d is null + explain (costs off) select tableoid::regclass,* from hp_prefix_test where a is null and b = 2 and c = 3 and d is null + explain (costs off) select tableoid::regclass,* from hp_prefix_test where a = 1 and b = 2 and c = 3 and d is null + explain (costs off) select tableoid::regclass,* from hp_prefix_test where a is null and b is null and c is null and d = 4 + explain (costs off) select tableoid::regclass,* from hp_prefix_test where a = 1 and b is null and c is null and d = 4 + explain (costs off) select tableoid::regclass,* from hp_prefix_test where a is null and b = 2 and c is null and d = 4 + explain (costs off) select tableoid::regclass,* from hp_prefix_test where a = 1 and b = 2 and c is null and d = 4 + explain (costs off) select tableoid::regclass,* from hp_prefix_test where a is null and b is null and c = 3 and d = 4 + explain (costs off) select tableoid::regclass,* from hp_prefix_test where a = 1 and b is null and c = 3 and d = 4 + explain (costs off) select tableoid::regclass,* from hp_prefix_test where a is null and b = 2 and c = 3 and d = 4 + explain (costs off) select tableoid::regclass,* from hp_prefix_test where a = 1 and b = 2 and c = 3 and d = 4 + +\gexec +explain (costs off) select tableoid::regclass,* from hp_prefix_test where a is null and b is null and c is null and d is null + Gather Motion XXX + -> Seq Scan on hp_prefix_test_p0 hp_prefix_test + Filter: ((a IS NULL) AND (b IS NULL) AND (c IS NULL) AND (d IS NULL)) + +explain (costs off) select tableoid::regclass,* from hp_prefix_test where a = 1 and b is null and c is null and d is null + Gather Motion XXX + -> Seq Scan on hp_prefix_test_p1 hp_prefix_test + Filter: ((b IS NULL) AND (c IS NULL) AND (d IS NULL) AND (a = 1)) + +explain (costs off) select tableoid::regclass,* from hp_prefix_test where a is null and b = 2 and c is null and d is null + Gather Motion XXX + -> Seq Scan on hp_prefix_test_p2 hp_prefix_test + Filter: ((a IS NULL) AND (c IS NULL) AND (d IS NULL) AND (b = 2)) + +explain (costs off) select tableoid::regclass,* from hp_prefix_test where a = 1 and b = 2 and c is null and d is null + Gather Motion XXX + -> Seq Scan on hp_prefix_test_p4 hp_prefix_test + Filter: ((c IS NULL) AND (d IS NULL) AND (a = 1) AND (b = 2)) + +explain (costs off) select tableoid::regclass,* from hp_prefix_test where a is null and b is null and c = 3 and d is null + Gather Motion XXX + -> Seq Scan on hp_prefix_test_p3 hp_prefix_test + Filter: ((a IS NULL) AND (b IS NULL) AND (d IS NULL) AND (c = 3)) + +explain (costs off) select tableoid::regclass,* from hp_prefix_test where a = 1 and b is null and c = 3 and d is null + Gather Motion XXX + -> Seq Scan on hp_prefix_test_p7 hp_prefix_test + Filter: ((b IS NULL) AND (d IS NULL) AND (a = 1) AND (c = 3)) + +explain (costs off) select tableoid::regclass,* from hp_prefix_test where a is null and b = 2 and c = 3 and d is null + Gather Motion XXX + -> Seq Scan on hp_prefix_test_p4 hp_prefix_test + Filter: ((a IS NULL) AND (d IS NULL) AND (b = 2) AND (c = 3)) + +explain (costs off) select tableoid::regclass,* from hp_prefix_test where a = 1 and b = 2 and c = 3 and d is null + Gather Motion XXX + -> Seq Scan on hp_prefix_test_p5 hp_prefix_test + Filter: ((d IS NULL) AND (a = 1) AND (b = 2) AND (c = 3)) + +explain (costs off) select tableoid::regclass,* from hp_prefix_test where a is null and b is null and c is null and d = 4 + Gather Motion XXX + -> Seq Scan on hp_prefix_test_p4 hp_prefix_test + Filter: ((a IS NULL) AND (b IS NULL) AND (c IS NULL) AND (d = 4)) + +explain (costs off) select tableoid::regclass,* from hp_prefix_test where a = 1 and b is null and c is null and d = 4 + Gather Motion XXX + -> Seq Scan on hp_prefix_test_p6 hp_prefix_test + Filter: ((b IS NULL) AND (c IS NULL) AND (a = 1) AND (d = 4)) + +explain (costs off) select tableoid::regclass,* from hp_prefix_test where a is null and b = 2 and c is null and d = 4 + Gather Motion XXX + -> Seq Scan on hp_prefix_test_p5 hp_prefix_test + Filter: ((a IS NULL) AND (c IS NULL) AND (b = 2) AND (d = 4)) + +explain (costs off) select tableoid::regclass,* from hp_prefix_test where a = 1 and b = 2 and c is null and d = 4 + Gather Motion XXX + -> Seq Scan on hp_prefix_test_p6 hp_prefix_test + Filter: ((c IS NULL) AND (a = 1) AND (b = 2) AND (d = 4)) + +explain (costs off) select tableoid::regclass,* from hp_prefix_test where a is null and b is null and c = 3 and d = 4 + Gather Motion XXX + -> Seq Scan on hp_prefix_test_p4 hp_prefix_test + Filter: ((a IS NULL) AND (b IS NULL) AND (c = 3) AND (d = 4)) + +explain (costs off) select tableoid::regclass,* from hp_prefix_test where a = 1 and b is null and c = 3 and d = 4 + Gather Motion XXX + -> Seq Scan on hp_prefix_test_p5 hp_prefix_test + Filter: ((b IS NULL) AND (a = 1) AND (c = 3) AND (d = 4)) + +explain (costs off) select tableoid::regclass,* from hp_prefix_test where a is null and b = 2 and c = 3 and d = 4 + Gather Motion XXX + -> Seq Scan on hp_prefix_test_p6 hp_prefix_test + Filter: ((a IS NULL) AND (b = 2) AND (c = 3) AND (d = 4)) + +explain (costs off) select tableoid::regclass,* from hp_prefix_test where a = 1 and b = 2 and c = 3 and d = 4 + Gather Motion XXX + -> Seq Scan on hp_prefix_test_p4 hp_prefix_test + Filter: ((a = 1) AND (b = 2) AND (c = 3) AND (d = 4)) + +-- And ensure we get exactly 1 row from each. Again, all 16 possible combinations. +select + 'select tableoid::regclass,* from hp_prefix_test where ' || + string_agg(c.colname || case when g.s & (1 << c.colpos) = 0 then ' is null' else ' = ' || (colpos+1)::text end, ' and ' order by c.colpos) +from (values('a',0),('b',1),('c',2),('d',3)) c(colname, colpos), generate_Series(0,15) g(s) +group by g.s +order by g.s; + select tableoid::regclass,* from hp_prefix_test where a is null and b is null and c is null and d is null + select tableoid::regclass,* from hp_prefix_test where a = 1 and b is null and c is null and d is null + select tableoid::regclass,* from hp_prefix_test where a is null and b = 2 and c is null and d is null + select tableoid::regclass,* from hp_prefix_test where a = 1 and b = 2 and c is null and d is null + select tableoid::regclass,* from hp_prefix_test where a is null and b is null and c = 3 and d is null + select tableoid::regclass,* from hp_prefix_test where a = 1 and b is null and c = 3 and d is null + select tableoid::regclass,* from hp_prefix_test where a is null and b = 2 and c = 3 and d is null + select tableoid::regclass,* from hp_prefix_test where a = 1 and b = 2 and c = 3 and d is null + select tableoid::regclass,* from hp_prefix_test where a is null and b is null and c is null and d = 4 + select tableoid::regclass,* from hp_prefix_test where a = 1 and b is null and c is null and d = 4 + select tableoid::regclass,* from hp_prefix_test where a is null and b = 2 and c is null and d = 4 + select tableoid::regclass,* from hp_prefix_test where a = 1 and b = 2 and c is null and d = 4 + select tableoid::regclass,* from hp_prefix_test where a is null and b is null and c = 3 and d = 4 + select tableoid::regclass,* from hp_prefix_test where a = 1 and b is null and c = 3 and d = 4 + select tableoid::regclass,* from hp_prefix_test where a is null and b = 2 and c = 3 and d = 4 + select tableoid::regclass,* from hp_prefix_test where a = 1 and b = 2 and c = 3 and d = 4 + +\gexec +select tableoid::regclass,* from hp_prefix_test where a is null and b is null and c is null and d is null + hp_prefix_test_p0 | | | | + +select tableoid::regclass,* from hp_prefix_test where a = 1 and b is null and c is null and d is null + hp_prefix_test_p1 | 1 | | | + +select tableoid::regclass,* from hp_prefix_test where a is null and b = 2 and c is null and d is null + hp_prefix_test_p2 | | 2 | | + +select tableoid::regclass,* from hp_prefix_test where a = 1 and b = 2 and c is null and d is null + hp_prefix_test_p4 | 1 | 2 | | + +select tableoid::regclass,* from hp_prefix_test where a is null and b is null and c = 3 and d is null + hp_prefix_test_p3 | | | 3 | + +select tableoid::regclass,* from hp_prefix_test where a = 1 and b is null and c = 3 and d is null + hp_prefix_test_p7 | 1 | | 3 | + +select tableoid::regclass,* from hp_prefix_test where a is null and b = 2 and c = 3 and d is null + hp_prefix_test_p4 | | 2 | 3 | + +select tableoid::regclass,* from hp_prefix_test where a = 1 and b = 2 and c = 3 and d is null + hp_prefix_test_p5 | 1 | 2 | 3 | + +select tableoid::regclass,* from hp_prefix_test where a is null and b is null and c is null and d = 4 + hp_prefix_test_p4 | | | | 4 + +select tableoid::regclass,* from hp_prefix_test where a = 1 and b is null and c is null and d = 4 + hp_prefix_test_p6 | 1 | | | 4 + +select tableoid::regclass,* from hp_prefix_test where a is null and b = 2 and c is null and d = 4 + hp_prefix_test_p5 | | 2 | | 4 + +select tableoid::regclass,* from hp_prefix_test where a = 1 and b = 2 and c is null and d = 4 + hp_prefix_test_p6 | 1 | 2 | | 4 + +select tableoid::regclass,* from hp_prefix_test where a is null and b is null and c = 3 and d = 4 + hp_prefix_test_p4 | | | 3 | 4 + +select tableoid::regclass,* from hp_prefix_test where a = 1 and b is null and c = 3 and d = 4 + hp_prefix_test_p5 | 1 | | 3 | 4 + +select tableoid::regclass,* from hp_prefix_test where a is null and b = 2 and c = 3 and d = 4 + hp_prefix_test_p6 | | 2 | 3 | 4 + +select tableoid::regclass,* from hp_prefix_test where a = 1 and b = 2 and c = 3 and d = 4 + hp_prefix_test_p4 | 1 | 2 | 3 | 4 + +\t off drop table hp_prefix_test; -- -- Check that gen_partprune_steps() detects self-contradiction from clauses diff --git a/contrib/pax_storage/src/test/regress/expected/partition_pruning_optimizer.out b/contrib/pax_storage/src/test/regress/expected/partition_pruning_optimizer.out index 43ff4ab76ab..5f05005f1c1 100644 --- a/contrib/pax_storage/src/test/regress/expected/partition_pruning_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/partition_pruning_optimizer.out @@ -11,9 +11,31 @@ set enable_incremental_sort=on; -- the planner and the rest of the system, so the expected output can need -- updating, as the system improves. -- --- GPDB_12_MERGE_FIXME: Many of these queries are no longer able to constraint --- exclusion, like we used to on GPDB 6. Not sure what we should do about it. --- See https://github.com/greenplum-db/gpdb/issues/10287. +-- Create test table with two partitions, for values equal to '1' and values equal to '2'. +create table parttab (n numeric, t text) + partition by list (n)(partition one values ('1'), partition two values('2')); +-- Insert three rows. They're all equal to '1', but different number of zeros after decimal point. +insert into parttab values + ('1', 'one'), + ('1.0', 'one point zero'), + ('1.00', 'one point zero zero'); +-- select rows whose text representation is three characters long. This should return the '1.0' row. +select * from parttab where length(n::text) = 3; + n | t +-----+---------------- + 1.0 | one point zero +(1 row) + +explain select * from parttab where length(n::text) = 3; + QUERY PLAN +-------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Seq Scan on parttab + Number of partitions to scan: 2 (out of 2) + Filter: (length((n)::text) = 3) + Optimizer: GPORCA +(5 rows) + -- Use index scans when possible. That exercises more code, and allows us to -- spot the cases where the planner cannot use even when it exists. set enable_seqscan=off; @@ -2498,4 +2520,900 @@ select get_selected_parts('explain analyze select * from bar where j is distinct [0, 0] (1 row) +-- Table partitioned by boolean column +CREATE TABLE pt_bool_tab +( + col1 int, + col2 bool +) +distributed by (col1) +partition by list(col2) +( + partition part1 values(true), + partition part2 values(false) +); +INSERT INTO pt_bool_tab SELECT i, true FROM generate_series(1,3)i; +INSERT INTO pt_bool_tab SELECT i, false FROM generate_series(1,2)i; +EXPLAIN SELECT * FROM pt_bool_tab WHERE col2 IS true; + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Seq Scan on pt_bool_tab + Number of partitions to scan: 1 (out of 2) + Filter: (col2 IS TRUE) + Optimizer: GPORCA +(5 rows) + +SELECT * FROM pt_bool_tab WHERE col2 IS true; + col1 | col2 +------+------ + 1 | t + 2 | t + 3 | t +(3 rows) + +EXPLAIN SELECT * FROM pt_bool_tab WHERE col2 IS false; + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Seq Scan on pt_bool_tab + Number of partitions to scan: 1 (out of 2) + Filter: (col2 IS FALSE) + Optimizer: GPORCA +(5 rows) + +SELECT * FROM pt_bool_tab WHERE col2 IS false; + col1 | col2 +------+------ + 2 | f + 1 | f +(2 rows) + +EXPLAIN SELECT * FROM pt_bool_tab WHERE col2 IS NULL; + QUERY PLAN +------------------------------------------ + Result + One-Time Filter: false + Optimizer: GPORCA +(3 rows) + +SELECT * FROM pt_bool_tab WHERE col2 IS NULL; + col1 | col2 +------+------ +(0 rows) + +EXPLAIN SELECT * FROM pt_bool_tab WHERE col2 IS unknown; + QUERY PLAN +------------------------------------------ + Result + One-Time Filter: false + Optimizer: GPORCA +(3 rows) + +SELECT * FROM pt_bool_tab WHERE col2 IS unknown; + col1 | col2 +------+------ +(0 rows) + +EXPLAIN SELECT * FROM pt_bool_tab WHERE col2 IS NOT true; + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Seq Scan on pt_bool_tab + Number of partitions to scan: 1 (out of 2) + Filter: (col2 IS NOT TRUE) + Optimizer: GPORCA +(5 rows) + +SELECT * FROM pt_bool_tab WHERE col2 IS NOT true; + col1 | col2 +------+------ + 1 | f + 2 | f +(2 rows) + +EXPLAIN SELECT * FROM pt_bool_tab WHERE col2 IS NOT false; + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Seq Scan on pt_bool_tab + Number of partitions to scan: 1 (out of 2) + Filter: (col2 IS NOT FALSE) + Optimizer: GPORCA +(5 rows) + +SELECT * FROM pt_bool_tab WHERE col2 IS NOT false; + col1 | col2 +------+------ + 2 | t + 3 | t + 1 | t +(3 rows) + +EXPLAIN SELECT * FROM pt_bool_tab WHERE col2 IS NOT unknown; + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Seq Scan on pt_bool_tab + Number of partitions to scan: 2 (out of 2) + Filter: (col2 IS NOT UNKNOWN) + Optimizer: GPORCA +(5 rows) + +SELECT * FROM pt_bool_tab WHERE col2 IS NOT unknown; + col1 | col2 +------+------ + 2 | f + 2 | t + 3 | t + 1 | f + 1 | t +(5 rows) + +EXPLAIN SELECT * FROM pt_bool_tab WHERE col2 IS NOT NULL; + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Seq Scan on pt_bool_tab + Number of partitions to scan: 2 (out of 2) + Filter: (NOT (col2 IS NULL)) + Optimizer: GPORCA +(5 rows) + +SELECT * FROM pt_bool_tab WHERE col2 IS NOT NULL; + col1 | col2 +------+------ + 1 | f + 1 | t + 2 | f + 2 | t + 3 | t +(5 rows) + +EXPLAIN SELECT * FROM pt_bool_tab WHERE (not col2) IS true; + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Seq Scan on pt_bool_tab + Number of partitions to scan: 1 (out of 2) + Filter: ((NOT col2) IS TRUE) + Optimizer: GPORCA +(5 rows) + +SELECT * FROM pt_bool_tab WHERE (not col2) IS true; + col1 | col2 +------+------ + 1 | f + 2 | f +(2 rows) + +EXPLAIN SELECT * FROM pt_bool_tab WHERE (not col2) IS false; + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Seq Scan on pt_bool_tab + Number of partitions to scan: 1 (out of 2) + Filter: ((NOT col2) IS FALSE) + Optimizer: GPORCA +(5 rows) + +SELECT * FROM pt_bool_tab WHERE (not col2) IS false; + col1 | col2 +------+------ + 2 | t + 3 | t + 1 | t +(3 rows) + +EXPLAIN SELECT * FROM pt_bool_tab WHERE (not col2) IS NULL; + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Seq Scan on pt_bool_tab + Number of partitions to scan: 2 (out of 2) + Filter: ((NOT col2) IS NULL) + Optimizer: GPORCA +(5 rows) + +SELECT * FROM pt_bool_tab WHERE (not col2) IS NULL; + col1 | col2 +------+------ +(0 rows) + +EXPLAIN SELECT * FROM pt_bool_tab WHERE (not col2) IS unknown; + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Seq Scan on pt_bool_tab + Number of partitions to scan: 2 (out of 2) + Filter: ((NOT col2) IS UNKNOWN) + Optimizer: GPORCA +(5 rows) + +SELECT * FROM pt_bool_tab WHERE (not col2) IS unknown; + col1 | col2 +------+------ +(0 rows) + +EXPLAIN SELECT * FROM pt_bool_tab WHERE (not col2) IS NOT true; + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Seq Scan on pt_bool_tab + Number of partitions to scan: 1 (out of 2) + Filter: ((NOT col2) IS NOT TRUE) + Optimizer: GPORCA +(5 rows) + +SELECT * FROM pt_bool_tab WHERE (not col2) IS NOT true; + col1 | col2 +------+------ + 1 | t + 2 | t + 3 | t +(3 rows) + +EXPLAIN SELECT * FROM pt_bool_tab WHERE (not col2) IS NOT false; + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Seq Scan on pt_bool_tab + Number of partitions to scan: 1 (out of 2) + Filter: ((NOT col2) IS NOT FALSE) + Optimizer: GPORCA +(5 rows) + +SELECT * FROM pt_bool_tab WHERE (not col2) IS NOT false; + col1 | col2 +------+------ + 1 | f + 2 | f +(2 rows) + +EXPLAIN SELECT * FROM pt_bool_tab WHERE (not col2) IS NOT unknown; + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Seq Scan on pt_bool_tab + Number of partitions to scan: 2 (out of 2) + Filter: ((NOT col2) IS NOT UNKNOWN) + Optimizer: GPORCA +(5 rows) + +SELECT * FROM pt_bool_tab WHERE (not col2) IS NOT unknown; + col1 | col2 +------+------ + 2 | f + 2 | t + 3 | t + 1 | f + 1 | t +(5 rows) + +EXPLAIN SELECT * FROM pt_bool_tab WHERE (not col2) IS NOT NULL; + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Seq Scan on pt_bool_tab + Number of partitions to scan: 2 (out of 2) + Filter: (NOT ((NOT col2) IS NULL)) + Optimizer: GPORCA +(5 rows) + +SELECT * FROM pt_bool_tab WHERE (not col2) IS NOT NULL; + col1 | col2 +------+------ + 1 | f + 1 | t + 2 | f + 2 | t + 3 | t +(5 rows) + +CREATE TABLE pt_bool_tab_df +( + col1 int, + col2 bool +) +distributed by (col1) +partition by list(col2) +( + partition part1 values(true), + partition part2 values(false), + default partition def +); +INSERT INTO pt_bool_tab_df SELECT i, true FROM generate_series(1,3)i; +INSERT INTO pt_bool_tab_df SELECT i, false FROM generate_series(1,2)i; +INSERT INTO pt_bool_tab_df SELECT i, NULL FROM generate_series(1,1)i; +EXPLAIN SELECT * FROM pt_bool_tab_df WHERE col2 IS true; + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Seq Scan on pt_bool_tab_df + Number of partitions to scan: 1 (out of 3) + Filter: (col2 IS TRUE) + Optimizer: GPORCA +(5 rows) + +SELECT * FROM pt_bool_tab_df WHERE col2 IS true; + col1 | col2 +------+------ + 2 | t + 3 | t + 1 | t +(3 rows) + +EXPLAIN SELECT * FROM pt_bool_tab_df WHERE col2 IS false; + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Seq Scan on pt_bool_tab_df + Number of partitions to scan: 1 (out of 3) + Filter: (col2 IS FALSE) + Optimizer: GPORCA +(5 rows) + +SELECT * FROM pt_bool_tab_df WHERE col2 IS false; + col1 | col2 +------+------ + 2 | f + 1 | f +(2 rows) + +EXPLAIN SELECT * FROM pt_bool_tab_df WHERE col2 IS NULL; + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Seq Scan on pt_bool_tab_df + Number of partitions to scan: 1 (out of 3) + Filter: (col2 IS NULL) + Optimizer: GPORCA +(5 rows) + +SELECT * FROM pt_bool_tab_df WHERE col2 IS NULL; + col1 | col2 +------+------ + 1 | +(1 row) + +EXPLAIN SELECT * FROM pt_bool_tab_df WHERE col2 IS unknown; + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Seq Scan on pt_bool_tab_df + Number of partitions to scan: 1 (out of 3) + Filter: (col2 IS UNKNOWN) + Optimizer: GPORCA +(5 rows) + +SELECT * FROM pt_bool_tab_df WHERE col2 IS unknown; + col1 | col2 +------+------ + 1 | +(1 row) + +EXPLAIN SELECT * FROM pt_bool_tab_df WHERE col2 IS NOT true; + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Seq Scan on pt_bool_tab_df + Number of partitions to scan: 2 (out of 3) + Filter: (col2 IS NOT TRUE) + Optimizer: GPORCA +(5 rows) + +SELECT * FROM pt_bool_tab_df WHERE col2 IS NOT true; + col1 | col2 +------+------ + 2 | f + 1 | f + 1 | +(3 rows) + +EXPLAIN SELECT * FROM pt_bool_tab_df WHERE col2 IS NOT false; + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Seq Scan on pt_bool_tab_df + Number of partitions to scan: 2 (out of 3) + Filter: (col2 IS NOT FALSE) + Optimizer: GPORCA +(5 rows) + +SELECT * FROM pt_bool_tab_df WHERE col2 IS NOT false; + col1 | col2 +------+------ + 2 | t + 3 | t + 1 | t + 1 | +(4 rows) + +EXPLAIN SELECT * FROM pt_bool_tab_df WHERE col2 IS NOT unknown; + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Seq Scan on pt_bool_tab_df + Number of partitions to scan: 2 (out of 3) + Filter: (col2 IS NOT UNKNOWN) + Optimizer: GPORCA +(5 rows) + +SELECT * FROM pt_bool_tab_df WHERE col2 IS NOT unknown; + col1 | col2 +------+------ + 1 | f + 1 | t + 2 | f + 2 | t + 3 | t +(5 rows) + +EXPLAIN SELECT * FROM pt_bool_tab_df WHERE col2 IS NOT NULL; + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Seq Scan on pt_bool_tab_df + Number of partitions to scan: 2 (out of 3) + Filter: (NOT (col2 IS NULL)) + Optimizer: GPORCA +(5 rows) + +SELECT * FROM pt_bool_tab_df WHERE col2 IS NOT NULL; + col1 | col2 +------+------ + 1 | f + 1 | t + 2 | f + 2 | t + 3 | t +(5 rows) + +EXPLAIN SELECT * FROM pt_bool_tab_df WHERE (not col2) IS true; + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Seq Scan on pt_bool_tab_df + Number of partitions to scan: 1 (out of 3) + Filter: ((NOT col2) IS TRUE) + Optimizer: GPORCA +(5 rows) + +SELECT * FROM pt_bool_tab_df WHERE (not col2) IS true; + col1 | col2 +------+------ + 1 | f + 2 | f +(2 rows) + +EXPLAIN SELECT * FROM pt_bool_tab_df WHERE (not col2) IS false; + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Seq Scan on pt_bool_tab_df + Number of partitions to scan: 1 (out of 3) + Filter: ((NOT col2) IS FALSE) + Optimizer: GPORCA +(5 rows) + +SELECT * FROM pt_bool_tab_df WHERE (not col2) IS false; + col1 | col2 +------+------ + 1 | t + 2 | t + 3 | t +(3 rows) + +EXPLAIN SELECT * FROM pt_bool_tab_df WHERE (not col2) IS NULL; + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Seq Scan on pt_bool_tab_df + Number of partitions to scan: 3 (out of 3) + Filter: ((NOT col2) IS NULL) + Optimizer: GPORCA +(5 rows) + +SELECT * FROM pt_bool_tab_df WHERE (not col2) IS NULL; + col1 | col2 +------+------ + 1 | +(1 row) + +EXPLAIN SELECT * FROM pt_bool_tab_df WHERE (not col2) IS unknown; + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Seq Scan on pt_bool_tab_df + Number of partitions to scan: 3 (out of 3) + Filter: ((NOT col2) IS UNKNOWN) + Optimizer: GPORCA +(5 rows) + +SELECT * FROM pt_bool_tab_df WHERE (not col2) IS unknown; + col1 | col2 +------+------ + 1 | +(1 row) + +EXPLAIN SELECT * FROM pt_bool_tab_df WHERE (not col2) IS NOT true; + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Seq Scan on pt_bool_tab_df + Number of partitions to scan: 2 (out of 3) + Filter: ((NOT col2) IS NOT TRUE) + Optimizer: GPORCA +(5 rows) + +SELECT * FROM pt_bool_tab_df WHERE (not col2) IS NOT true; + col1 | col2 +------+------ + 2 | t + 3 | t + 1 | t + 1 | +(4 rows) + +EXPLAIN SELECT * FROM pt_bool_tab_df WHERE (not col2) IS NOT false; + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Seq Scan on pt_bool_tab_df + Number of partitions to scan: 2 (out of 3) + Filter: ((NOT col2) IS NOT FALSE) + Optimizer: GPORCA +(5 rows) + +SELECT * FROM pt_bool_tab_df WHERE (not col2) IS NOT false; + col1 | col2 +------+------ + 2 | f + 1 | f + 1 | +(3 rows) + +EXPLAIN SELECT * FROM pt_bool_tab_df WHERE (not col2) IS NOT unknown; + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Seq Scan on pt_bool_tab_df + Number of partitions to scan: 3 (out of 3) + Filter: ((NOT col2) IS NOT UNKNOWN) + Optimizer: GPORCA +(5 rows) + +SELECT * FROM pt_bool_tab_df WHERE (not col2) IS NOT unknown; + col1 | col2 +------+------ + 1 | f + 1 | t + 2 | f + 2 | t + 3 | t +(5 rows) + +EXPLAIN SELECT * FROM pt_bool_tab_df WHERE (not col2) IS NOT NULL; + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Seq Scan on pt_bool_tab_df + Number of partitions to scan: 3 (out of 3) + Filter: (NOT ((NOT col2) IS NULL)) + Optimizer: GPORCA +(5 rows) + +SELECT * FROM pt_bool_tab_df WHERE (not col2) IS NOT NULL; + col1 | col2 +------+------ + 1 | f + 1 | t + 2 | f + 2 | t + 3 | t +(5 rows) + +CREATE TABLE pt_bool_tab_null +( + col1 int, + col2 bool +) +distributed by (col1) +partition by list(col2) +( + partition part1 values(true), + partition part2 values(false), + partition part3 values(null) +); +INSERT INTO pt_bool_tab_null SELECT i, true FROM generate_series(1,3)i; +INSERT INTO pt_bool_tab_null SELECT i, false FROM generate_series(1,2)i; +INSERT INTO pt_bool_tab_null SELECT i, NULL FROM generate_series(1,1)i; +EXPLAIN SELECT * FROM pt_bool_tab_null WHERE col2 IS true; + QUERY PLAN +-------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Seq Scan on pt_bool_tab_null + Number of partitions to scan: 1 (out of 3) + Filter: (col2 IS TRUE) + Optimizer: GPORCA +(5 rows) + +SELECT * FROM pt_bool_tab_null WHERE col2 IS true; + col1 | col2 +------+------ + 2 | t + 3 | t + 1 | t +(3 rows) + +EXPLAIN SELECT * FROM pt_bool_tab_null WHERE col2 IS false; + QUERY PLAN +-------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Seq Scan on pt_bool_tab_null + Number of partitions to scan: 1 (out of 3) + Filter: (col2 IS FALSE) + Optimizer: GPORCA +(5 rows) + +SELECT * FROM pt_bool_tab_null WHERE col2 IS false; + col1 | col2 +------+------ + 2 | f + 1 | f +(2 rows) + +EXPLAIN SELECT * FROM pt_bool_tab_null WHERE col2 IS NULL; + QUERY PLAN +-------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Seq Scan on pt_bool_tab_null + Number of partitions to scan: 1 (out of 3) + Filter: (col2 IS NULL) + Optimizer: GPORCA +(5 rows) + +SELECT * FROM pt_bool_tab_null WHERE col2 IS NULL; + col1 | col2 +------+------ + 1 | +(1 row) + +EXPLAIN SELECT * FROM pt_bool_tab_null WHERE col2 IS unknown; + QUERY PLAN +-------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Seq Scan on pt_bool_tab_null + Number of partitions to scan: 1 (out of 3) + Filter: (col2 IS UNKNOWN) + Optimizer: GPORCA +(5 rows) + +SELECT * FROM pt_bool_tab_null WHERE col2 IS unknown; + col1 | col2 +------+------ + 1 | +(1 row) + +EXPLAIN SELECT * FROM pt_bool_tab_null WHERE col2 IS NOT true; + QUERY PLAN +-------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Seq Scan on pt_bool_tab_null + Number of partitions to scan: 2 (out of 3) + Filter: (col2 IS NOT TRUE) + Optimizer: GPORCA +(5 rows) + +SELECT * FROM pt_bool_tab_null WHERE col2 IS NOT true; + col1 | col2 +------+------ + 2 | f + 1 | f + 1 | +(3 rows) + +EXPLAIN SELECT * FROM pt_bool_tab_null WHERE col2 IS NOT false; + QUERY PLAN +-------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Seq Scan on pt_bool_tab_null + Number of partitions to scan: 2 (out of 3) + Filter: (col2 IS NOT FALSE) + Optimizer: GPORCA +(5 rows) + +SELECT * FROM pt_bool_tab_null WHERE col2 IS NOT false; + col1 | col2 +------+------ + 2 | t + 3 | t + 1 | t + 1 | +(4 rows) + +EXPLAIN SELECT * FROM pt_bool_tab_null WHERE col2 IS NOT unknown; + QUERY PLAN +-------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Seq Scan on pt_bool_tab_null + Number of partitions to scan: 2 (out of 3) + Filter: (col2 IS NOT UNKNOWN) + Optimizer: GPORCA +(5 rows) + +SELECT * FROM pt_bool_tab_null WHERE col2 IS NOT unknown; + col1 | col2 +------+------ + 2 | f + 2 | t + 3 | t + 1 | f + 1 | t +(5 rows) + +EXPLAIN SELECT * FROM pt_bool_tab_null WHERE col2 IS NOT NULL; + QUERY PLAN +-------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Seq Scan on pt_bool_tab_null + Number of partitions to scan: 2 (out of 3) + Filter: (NOT (col2 IS NULL)) + Optimizer: GPORCA +(5 rows) + +SELECT * FROM pt_bool_tab_null WHERE col2 IS NOT NULL; + col1 | col2 +------+------ + 2 | f + 2 | t + 3 | t + 1 | f + 1 | t +(5 rows) + +EXPLAIN SELECT * FROM pt_bool_tab_null WHERE (not col2) IS true; + QUERY PLAN +-------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Seq Scan on pt_bool_tab_null + Number of partitions to scan: 1 (out of 3) + Filter: ((NOT col2) IS TRUE) + Optimizer: GPORCA +(5 rows) + +SELECT * FROM pt_bool_tab_null WHERE (not col2) IS true; + col1 | col2 +------+------ + 2 | f + 1 | f +(2 rows) + +EXPLAIN SELECT * FROM pt_bool_tab_null WHERE (not col2) IS false; + QUERY PLAN +-------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Seq Scan on pt_bool_tab_null + Number of partitions to scan: 1 (out of 3) + Filter: ((NOT col2) IS FALSE) + Optimizer: GPORCA +(5 rows) + +SELECT * FROM pt_bool_tab_null WHERE (not col2) IS false; + col1 | col2 +------+------ + 1 | t + 2 | t + 3 | t +(3 rows) + +EXPLAIN SELECT * FROM pt_bool_tab_null WHERE (not col2) IS NULL; + QUERY PLAN +-------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Seq Scan on pt_bool_tab_null + Number of partitions to scan: 3 (out of 3) + Filter: ((NOT col2) IS NULL) + Optimizer: GPORCA +(5 rows) + +SELECT * FROM pt_bool_tab_null WHERE (not col2) IS NULL; + col1 | col2 +------+------ + 1 | +(1 row) + +EXPLAIN SELECT * FROM pt_bool_tab_null WHERE (not col2) IS unknown; + QUERY PLAN +-------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Seq Scan on pt_bool_tab_null + Number of partitions to scan: 3 (out of 3) + Filter: ((NOT col2) IS UNKNOWN) + Optimizer: GPORCA +(5 rows) + +SELECT * FROM pt_bool_tab_null WHERE (not col2) IS unknown; + col1 | col2 +------+------ + 1 | +(1 row) + +EXPLAIN SELECT * FROM pt_bool_tab_null WHERE (not col2) IS NOT true; + QUERY PLAN +-------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Seq Scan on pt_bool_tab_null + Number of partitions to scan: 2 (out of 3) + Filter: ((NOT col2) IS NOT TRUE) + Optimizer: GPORCA +(5 rows) + +SELECT * FROM pt_bool_tab_null WHERE (not col2) IS NOT true; + col1 | col2 +------+------ + 2 | t + 3 | t + 1 | t + 1 | +(4 rows) + +EXPLAIN SELECT * FROM pt_bool_tab_null WHERE (not col2) IS NOT false; + QUERY PLAN +-------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Seq Scan on pt_bool_tab_null + Number of partitions to scan: 2 (out of 3) + Filter: ((NOT col2) IS NOT FALSE) + Optimizer: GPORCA +(5 rows) + +SELECT * FROM pt_bool_tab_null WHERE (not col2) IS NOT false; + col1 | col2 +------+------ + 2 | f + 1 | f + 1 | +(3 rows) + +EXPLAIN SELECT * FROM pt_bool_tab_null WHERE (not col2) IS NOT unknown; + QUERY PLAN +-------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Seq Scan on pt_bool_tab_null + Number of partitions to scan: 3 (out of 3) + Filter: ((NOT col2) IS NOT UNKNOWN) + Optimizer: GPORCA +(5 rows) + +SELECT * FROM pt_bool_tab_null WHERE (not col2) IS NOT unknown; + col1 | col2 +------+------ + 2 | f + 2 | t + 3 | t + 1 | f + 1 | t +(5 rows) + +EXPLAIN SELECT * FROM pt_bool_tab_null WHERE (not col2) IS NOT NULL; + QUERY PLAN +-------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Seq Scan on pt_bool_tab_null + Number of partitions to scan: 3 (out of 3) + Filter: (NOT ((NOT col2) IS NULL)) + Optimizer: GPORCA +(5 rows) + +SELECT * FROM pt_bool_tab_null WHERE (not col2) IS NOT NULL; + col1 | col2 +------+------ + 2 | f + 2 | t + 3 | t + 1 | f + 1 | t +(5 rows) + RESET ALL; diff --git a/contrib/pax_storage/src/test/regress/expected/pg_lsn_optimizer.out b/contrib/pax_storage/src/test/regress/expected/pg_lsn_optimizer.out index d1293059ae2..e1d8a3d3833 100644 --- a/contrib/pax_storage/src/test/regress/expected/pg_lsn_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/pg_lsn_optimizer.out @@ -26,6 +26,19 @@ INSERT INTO PG_LSN_TBL VALUES ('/ABCD'); ERROR: invalid input syntax for type pg_lsn: "/ABCD" LINE 1: INSERT INTO PG_LSN_TBL VALUES ('/ABCD'); ^ +-- Also try it with non-error-throwing API +SELECT pg_input_is_valid('16AE7F7', 'pg_lsn'); + pg_input_is_valid +------------------- + f +(1 row) + +SELECT * FROM pg_input_error_info('16AE7F7', 'pg_lsn'); + message | detail | hint | sql_error_code +-------------------------------------------------+--------+------+---------------- + invalid input syntax for type pg_lsn: "16AE7F7" | | | 22P02 +(1 row) + -- Min/Max aggregation SELECT MIN(f1), MAX(f1) FROM PG_LSN_TBL; min | max diff --git a/contrib/pax_storage/src/test/regress/expected/plancache_optimizer.out b/contrib/pax_storage/src/test/regress/expected/plancache_optimizer.out index 163bd2fbf62..28fb9f94ef9 100644 --- a/contrib/pax_storage/src/test/regress/expected/plancache_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/plancache_optimizer.out @@ -1,6 +1,7 @@ -- -- Tests to exercise the plan caching/invalidation mechanism -- +SET optimizer_trace_fallback to on; CREATE TEMP TABLE pcachetest AS SELECT * FROM int8_tbl; -- create and use a cached plan PREPARE prepstmt AS SELECT * FROM pcachetest; @@ -119,18 +120,24 @@ begin end $$ language plpgsql; select cache_test(1); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions cache_test ------------ 37 (1 row) select cache_test(2); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions cache_test ------------ 38 (1 row) select cache_test(3); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions cache_test ------------ 39 @@ -144,6 +151,8 @@ begin return f1 from v1; end$$ language plpgsql; select cache_test_2(); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions cache_test_2 -------------- 4 @@ -152,6 +161,8 @@ select cache_test_2(); create or replace temp view v1 as select 2+2+4 as f1; select cache_test_2(); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions cache_test_2 -------------- 8 @@ -160,6 +171,8 @@ select cache_test_2(); create or replace temp view v1 as select 2+2+4+(select max(unique1) from tenk1) as f1; select cache_test_2(); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions cache_test_2 -------------- 10007 @@ -241,6 +254,8 @@ begin end loop; end$$ language plpgsql; select cachebug(); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions NOTICE: table "temptable" does not exist, skipping NOTICE: 1 NOTICE: 2 @@ -251,6 +266,8 @@ NOTICE: 3 (1 row) select cachebug(); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions NOTICE: drop cascades to view vv NOTICE: 1 NOTICE: 2 diff --git a/contrib/pax_storage/src/test/regress/expected/polygon_optimizer.out b/contrib/pax_storage/src/test/regress/expected/polygon_optimizer.out index baeb001a631..97cc97ecb85 100644 --- a/contrib/pax_storage/src/test/regress/expected/polygon_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/polygon_optimizer.out @@ -9,7 +9,6 @@ INSERT INTO POLYGON_TBL(f1) VALUES ('(3.0,1.0),(3.0,3.0),(1.0,0.0)'); INSERT INTO POLYGON_TBL(f1) VALUES ('(1,2),(3,4),(5,6),(7,8)'); INSERT INTO POLYGON_TBL(f1) VALUES ('(7,8),(5,6),(3,4),(1,2)'); -- Reverse INSERT INTO POLYGON_TBL(f1) VALUES ('(1,2),(7,8),(5,6),(3,-4)'); -ANALYZE POLYGON_TBL; -- degenerate polygons INSERT INTO POLYGON_TBL(f1) VALUES ('(0.0,0.0)'); INSERT INTO POLYGON_TBL(f1) VALUES ('(0.0,1.0),(0.0,1.0)'); @@ -34,6 +33,7 @@ INSERT INTO POLYGON_TBL(f1) VALUES ('asdf'); ERROR: invalid input syntax for type polygon: "asdf" LINE 1: INSERT INTO POLYGON_TBL(f1) VALUES ('asdf'); ^ +ANALYZE POLYGON_TBL; SELECT * FROM POLYGON_TBL; f1 ---------------------------- @@ -63,7 +63,6 @@ INSERT INTO quad_poly_tbl (11002, NULL), (11003, NULL); ANALYZE quad_poly_tbl; --- PAX not support gist/spgist/brin indexes CREATE INDEX quad_poly_tbl_idx ON quad_poly_tbl USING spgist(p); ERROR: pax only support btree/hash/gin/bitmap indexes (pax_access_handle.cc:736) -- get reference results for ORDER BY distance from seq scan @@ -328,3 +327,28 @@ WHERE seq.id IS NULL OR idx.id IS NULL; RESET enable_seqscan; RESET enable_indexscan; RESET enable_bitmapscan; +-- test non-error-throwing API for some core types +SELECT pg_input_is_valid('(2.0,0.8,0.1)', 'polygon'); + pg_input_is_valid +------------------- + f +(1 row) + +SELECT * FROM pg_input_error_info('(2.0,0.8,0.1)', 'polygon'); + message | detail | hint | sql_error_code +--------------------------------------------------------+--------+------+---------------- + invalid input syntax for type polygon: "(2.0,0.8,0.1)" | | | 22P02 +(1 row) + +SELECT pg_input_is_valid('(2.0,xyz)', 'polygon'); + pg_input_is_valid +------------------- + f +(1 row) + +SELECT * FROM pg_input_error_info('(2.0,xyz)', 'polygon'); + message | detail | hint | sql_error_code +----------------------------------------------------+--------+------+---------------- + invalid input syntax for type polygon: "(2.0,xyz)" | | | 22P02 +(1 row) + diff --git a/contrib/pax_storage/src/test/regress/expected/prepare.out b/contrib/pax_storage/src/test/regress/expected/prepare.out index d52b3bc9fef..eb0c1bd447d 100644 --- a/contrib/pax_storage/src/test/regress/expected/prepare.out +++ b/contrib/pax_storage/src/test/regress/expected/prepare.out @@ -60,6 +60,8 @@ PREPARE q2(text) AS SELECT datname, datistemplate, datallowconn FROM pg_database WHERE datname = $1; EXECUTE q2('postgres'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Queries on master-only tables datname | datistemplate | datallowconn ----------+---------------+-------------- postgres | f | t diff --git a/contrib/pax_storage/src/test/regress/expected/privileges.out b/contrib/pax_storage/src/test/regress/expected/privileges.out index df8d3294e1e..0af5816ef4f 100644 --- a/contrib/pax_storage/src/test/regress/expected/privileges.out +++ b/contrib/pax_storage/src/test/regress/expected/privileges.out @@ -1960,9 +1960,11 @@ ERROR: function "unwanted_grant_nofail" already exists with same argument types CREATE MATERIALIZED VIEW sro_index_mv AS SELECT 1 AS c; ERROR: relation "sro_index_mv" already exists CREATE UNIQUE INDEX ON sro_index_mv (c) WHERE unwanted_grant_nofail(1) > 0; +ERROR: UNIQUE and DISTRIBUTED RANDOMLY are incompatible \c - REFRESH MATERIALIZED VIEW CONCURRENTLY sro_index_mv; -NOTICE: table doesn't have 'DISTRIBUTED BY' clause, defaulting to distribution columns from LIKE table +ERROR: cannot refresh materialized view "public.sro_index_mv" concurrently +HINT: Create a unique index with no WHERE clause on one or more columns of the materialized view. REFRESH MATERIALIZED VIEW sro_index_mv; DROP OWNED BY regress_sro_user; DROP ROLE regress_sro_user; diff --git a/contrib/pax_storage/src/test/regress/expected/qp_correlated_query_optimizer.out b/contrib/pax_storage/src/test/regress/expected/qp_correlated_query_optimizer.out index dc45af9b15f..e1e10d4eb9b 100644 --- a/contrib/pax_storage/src/test/regress/expected/qp_correlated_query_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/qp_correlated_query_optimizer.out @@ -3,6 +3,7 @@ -- ---------------------------------------------------------------------- create schema qp_correlated_query; set search_path to qp_correlated_query; +set optimizer_trace_fallback TO on; -- ---------------------------------------------------------------------- -- Test: csq_heap_in.sql (Correlated Subquery: CSQ using IN clause (Heap)) -- ---------------------------------------------------------------------- @@ -955,6 +956,57 @@ select A.i, B.i, C.j from A, B, C where A.j = all (select C.j from C where C.j = 1 | -1 | 62 (10 rows) +-- -- -- -- +-- Test ALL clause with subqueries +-- -- -- -- +create table qp_csq_all_t1(a int) distributed by (a); +create table qp_csq_all_t2(b int) distributed by (b); +insert into qp_csq_all_t1 values (null); +select * from qp_csq_all_t1 where (select a from qp_csq_all_t1 limit 1) = all(select b from qp_csq_all_t2); + a +--- + +(1 row) + +truncate qp_csq_all_t1, qp_csq_all_t2; +insert into qp_csq_all_t2 values (null); +select * from qp_csq_all_t1 where (select a from qp_csq_all_t1 limit 1) = all(select b from qp_csq_all_t2); + a +--- +(0 rows) + +truncate qp_csq_all_t1, qp_csq_all_t2; +insert into qp_csq_all_t1 values (1); +select * from qp_csq_all_t1 where (select a from qp_csq_all_t1 limit 1) = all(select b from qp_csq_all_t2); + a +--- + 1 +(1 row) + +truncate qp_csq_all_t1, qp_csq_all_t2; +insert into qp_csq_all_t2 values (1); +select * from qp_csq_all_t1 where (select a from qp_csq_all_t1 limit 1) = all(select b from qp_csq_all_t2); + a +--- +(0 rows) + +truncate qp_csq_all_t1, qp_csq_all_t2; +insert into qp_csq_all_t1 values (1); +insert into qp_csq_all_t2 values (1); +select * from qp_csq_all_t1 where (select a from qp_csq_all_t1 limit 1) = all(select b from qp_csq_all_t2); + a +--- + 1 +(1 row) + +truncate qp_csq_all_t1, qp_csq_all_t2; +insert into qp_csq_all_t1 values (1); +insert into qp_csq_all_t2 values (2); +select * from qp_csq_all_t1 where (select a from qp_csq_all_t1 limit 1) = all(select b from qp_csq_all_t2); + a +--- +(0 rows) + -- ---------------------------------------------------------------------- -- Test: Correlated Subquery: CSQ using EXISTS clause (Heap) -- ---------------------------------------------------------------------- @@ -1555,6 +1607,8 @@ select * from qp_csq_t4 order by a; (4 rows) update qp_csq_t4 set a = (select y from qp_csq_t2 where x=a) where b < 8; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 21 not found in project list select * from qp_csq_t4 order by a; a | b ----+--- @@ -1565,6 +1619,8 @@ select * from qp_csq_t4 order by a; (4 rows) update qp_csq_t4 set a = 9999 where qp_csq_t4.a = (select max(x) from qp_csq_t2); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 23 not found in project list select * from qp_csq_t4 order by a; a | b ------+--- @@ -1575,6 +1631,8 @@ select * from qp_csq_t4 order by a; (4 rows) update qp_csq_t4 set a = (select max(y) from qp_csq_t2 where x=a) where qp_csq_t4.a = (select min(x) from qp_csq_t2); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 35 not found in project list select * from qp_csq_t4 order by a; a | b ------+--- @@ -1585,6 +1643,8 @@ select * from qp_csq_t4 order by a; (4 rows) update qp_csq_t4 set a = 8888 where (select (y*2)>b from qp_csq_t2 where a=x); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 22 not found in project list select * from qp_csq_t4 order by a; a | b ------+--- @@ -1595,6 +1655,8 @@ select * from qp_csq_t4 order by a; (4 rows) update qp_csq_t4 set a = 3333 where qp_csq_t4.a in (select x from qp_csq_t2); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 25 not found in project list select * from qp_csq_t4 order by a; a | b ------+--- @@ -1605,6 +1667,8 @@ select * from qp_csq_t4 order by a; (4 rows) update D set i = 11111 from C where C.i = D.i and exists (select C.j from C,B where C.j = B.j and D.j < 10); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 45 not found in project list select * from D; i | j -------+---- @@ -1616,13 +1680,15 @@ select * from D; (5 rows) update D set i = 22222 from C where C.i = D.i and not exists (select C.j from C,B where C.j = B.j and D.j < 10); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 45 not found in project list select * from D; i | j -------+---- + 19 | 5 11111 | 1 11111 | 1 11111 | -1 - 19 | 5 22222 | 62 (5 rows) @@ -1771,10 +1837,22 @@ select (select avg(x) from qp_csq_t1, qp_csq_t2 where qp_csq_t1.a = any (select 4.0000000000000000 (4 rows) +-- Planner should fail due to skip-level correlation not supported. Query should not cause segfault like in issue #12054. +select A.j, (select array_agg(a_B) from (select B.j, (select array_agg(a_C) from (select C.j from C where C.i = A.i) a_C) from B where B.i = A.i order by A.j) a_B) from A; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Whole-row variable +ERROR: correlated subquery with skip-level correlations is not supported +-- Planner should fail due to skip-level correlation not supported. Query should not return wrong results like in issue #12054. +select A.j, (select array_agg(a_B) from (select B.j, (select sum(a_C.j) from (select C.j from C where C.i = A.i) a_C) from B where B.i = A.i order by A.j) a_B) from A; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Whole-row variable +ERROR: correlated subquery with skip-level correlations is not supported -- ---------------------------------------------------------------------- -- Test: Correlated Subquery: CSQ with multiple columns (Heap) -- ---------------------------------------------------------------------- select A.i, B.i from A, B where (A.i,A.j) = (select min(B.i),min(B.j) from B where B.i = A.i) order by A.i, B.i; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery i | i ---+---- 1 | -1 @@ -1792,6 +1870,8 @@ select A.i, B.i from A, B where (A.i,A.j) = (select min(B.i),min(B.j) from B whe (12 rows) select A.i, B.i from A, B where (A.i,A.j) = all(select B.i,B.j from B where B.i = A.i) order by A.i, B.i; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery i | i ----+---- 19 | -1 @@ -1838,6 +1918,8 @@ select A.i, B.i from A, B where not exists (select B.i,B.j from B where B.i = A. (18 rows) select A.i, B.i from A, B where (A.i,A.j) in (select B.i,B.j from B where B.i = A.i) order by A.i, B.i; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery i | i ---+---- 1 | -1 @@ -1855,6 +1937,8 @@ select A.i, B.i from A, B where (A.i,A.j) in (select B.i,B.j from B where B.i = (12 rows) select A.i, B.i,C.i from A, B, C where (A.i,B.i) = any (select A.i, B.i from A,B where A.i = C.i and B.i = C.i) order by A.i, B.i, C.i; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery i | i | i ---+---+--- 1 | 1 | 1 @@ -2109,6 +2193,8 @@ select A.i, B.i,C.i from A, B, C where not exists (select A.i, B.i from A,B wher (240 rows) select A.i, B.i,C.i from A, B, C where (A.i,B.i) in (select A.i, B.i from A,B where A.i = C.i and B.i = C.i) order by A.i, B.i, C.i; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery i | i | i ---+---+--- 1 | 1 | 1 @@ -2118,6 +2204,8 @@ select A.i, B.i,C.i from A, B, C where (A.i,B.i) in (select A.i, B.i from A,B wh (4 rows) select * from A,B,C where (A.i,B.i) = any (select A.i, B.i from A,B where A.i < C.i and B.i = C.i and C.i not in (select A.i from A where A.j = 1 and A.j = B.j)) order by 1,2,3,4,5,6; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery i | j | i | j | i | j ----+---+----+---+----+---- 1 | 1 | 2 | 7 | 2 | 7 @@ -2131,6 +2219,8 @@ select * from A,B,C where (A.i,B.i) = any (select A.i, B.i from A,B where A.i < (8 rows) select A.i as A_i, B.i as B_i,C.i as C_i from A, B, C where (A.i,B.i) = (select min(A.i), min(B.i) from A,B where A.i = C.i and B.i = C.i) order by A_i, B_i, C_i; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery a_i | b_i | c_i -----+-----+----- 1 | 1 | 1 @@ -2259,6 +2349,8 @@ SELECT id, first_name FROM employee WHERE id IN SELECT id, first_name, salary from employee where (id, salary) IN (SELECT id, MIN(salary) FROM employee GROUP BY id) order by id; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery id | first_name | salary ----+------------+--------- 01 | Jason | 1234.56 @@ -2313,6 +2405,8 @@ analyze product; commit; UPDATE product SET product_price = product_price * .9 where product_name NOT IN (SELECT DISTINCT product_name FROM product_order); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 35 not found in project list SELECT * FROM product order by product_name; product_name | product_price | quantity_on_hand | last_stock_date --------------+---------------+------------------+----------------- @@ -3901,8 +3995,121 @@ select * from t1 where 0 < (select count(*) from generate_series(1, a), t1); reset optimizer_enforce_subplans; DROP TABLE IF EXISTS t1; DROP TABLE IF EXISTS supplier; +-------------------------------------------------------------------------------- +-- Planner should fail due to skip-level correlation not supported. Query should +-- not cause segfault on ORCA. Currently ORCA is falling back to planner which +-- is undesired. Github Issue #15693 +-------------------------------------------------------------------------------- +CREATE TABLE skip_correlated_t1 ( + a INT, + b INT +) DISTRIBUTED BY (a); +CREATE TABLE skip_correlated_t2 ( + a INT, + b INT +) DISTRIBUTED BY (a); +CREATE TABLE skip_correlated_t3 ( + a INT, + b INT +) DISTRIBUTED BY (a); +CREATE TABLE skip_correlated_t4 ( + a INT, + b INT +) DISTRIBUTED BY (a); +EXPLAIN (COSTS OFF) +SELECT * +FROM skip_correlated_t1 +WHERE EXISTS ( + SELECT skip_correlated_t2.a + FROM skip_correlated_t2 + LEFT JOIN skip_correlated_t3 ON EXISTS ( + SELECT skip_correlated_t1.b + FROM skip_correlated_t2 + ) +); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because no plan has been computed for required properties in GPORCA +ERROR: correlated subquery with skip-level correlations is not supported +EXPLAIN (COSTS OFF) +SELECT * +FROM skip_correlated_t1 +WHERE EXISTS ( + SELECT skip_correlated_t2.a + FROM skip_correlated_t2 + LEFT JOIN skip_correlated_t3 ON ( + EXISTS ( + SELECT 1 + FROM skip_correlated_t2 + WHERE a > skip_correlated_t1.b + ) + ) +); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because no plan has been computed for required properties in GPORCA +ERROR: correlated subquery with skip-level correlations is not supported +EXPLAIN (COSTS OFF) +SELECT * +FROM skip_correlated_t1 +WHERE EXISTS ( + SELECT skip_correlated_t2.a + FROM skip_correlated_t2 + INNER JOIN skip_correlated_t3 ON skip_correlated_t2.a = skip_correlated_t3.a + LEFT JOIN skip_correlated_t4 ON ( + EXISTS ( + SELECT skip_correlated_t1.b + FROM skip_correlated_t2 + ) + ) +); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because no plan has been computed for required properties in GPORCA +ERROR: correlated subquery with skip-level correlations is not supported +-------------------------------------------------------------------------------- +-- Query should not cause segfault on ORCA. Will fallback to planner as no plan +-- is computed by ORCA. Github Issue #15693 +-------------------------------------------------------------------------------- +EXPLAIN (COSTS OFF) +SELECT * +FROM skip_correlated_t1 +WHERE EXISTS ( + SELECT skip_correlated_t2.a + FROM skip_correlated_t2 + LEFT JOIN skip_correlated_t3 ON skip_correlated_t3.a = ALL ( + SELECT skip_correlated_t1.a + FROM skip_correlated_t2 + ) +); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because no plan has been computed for required properties in GPORCA + QUERY PLAN +------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on skip_correlated_t1 + Filter: (SubPlan 1) + SubPlan 1 + -> Nested Loop Left Join + -> Materialize + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Seq Scan on skip_correlated_t2 + -> Materialize + -> Nested Loop Left Anti Semi (Not-In) Join + Join Filter: (skip_correlated_t3.a <> skip_correlated_t1.a) + -> Materialize + -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Seq Scan on skip_correlated_t3 + -> Materialize + -> Broadcast Motion 3:3 (slice4; segments: 3) + -> Seq Scan on skip_correlated_t2 skip_correlated_t2_1 + Optimizer: Postgres query optimizer +(18 rows) + +DROP TABLE skip_correlated_t1; +DROP TABLE skip_correlated_t2; +DROP TABLE skip_correlated_t3; +DROP TABLE skip_correlated_t4; -- ---------------------------------------------------------------------- -- Test: teardown.sql -- ---------------------------------------------------------------------- set client_min_messages='warning'; drop schema qp_correlated_query cascade; +reset optimizer_trace_fallback; diff --git a/contrib/pax_storage/src/test/regress/expected/qp_dml_joins_optimizer.out b/contrib/pax_storage/src/test/regress/expected/qp_dml_joins_optimizer.out index 293fc24450c..daa153f1656 100644 --- a/contrib/pax_storage/src/test/regress/expected/qp_dml_joins_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/qp_dml_joins_optimizer.out @@ -4038,6 +4038,7 @@ UPDATE dml_heap_p SET a = dml_heap_p.b % 2 FROM dml_heap_r WHERE dml_heap_p.b::i --Update on table with composite distribution key UPDATE dml_heap_p SET b = (dml_heap_p.b * 1.1)::int FROM dml_heap_r WHERE dml_heap_p.b = dml_heap_r.a and dml_heap_p.b = dml_heap_r.b; -- Insert with join and except +SET optimizer_trace_fallback=on; CREATE TABLE dml_heap_int (a int) DISTRIBUTED BY (a); INSERT INTO dml_heap_int SELECT generate_series(1, 3); INSERT INTO dml_heap_int diff --git a/contrib/pax_storage/src/test/regress/expected/qp_dropped_cols_optimizer.out b/contrib/pax_storage/src/test/regress/expected/qp_dropped_cols_optimizer.out index a7204b7a168..812bcbb3060 100644 --- a/contrib/pax_storage/src/test/regress/expected/qp_dropped_cols_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/qp_dropped_cols_optimizer.out @@ -14932,6 +14932,146 @@ SELECT * FROM leaf_with_dropped_cols_index_part2 WHERE a>42 and z<42; ---+---+--- (0 rows) +-- Exchange partitions with dropped columns and check subplan with attribute remapping into dynamicSeqscan +-- start_ignore +DROP TABLE if exists ds_main; +NOTICE: table "ds_main" does not exist, skipping +DROP TABLE if exists ds_part1; +NOTICE: table "ds_part1" does not exist, skipping +DROP TABLE if exists non_part1; +NOTICE: table "non_part1" does not exist, skipping +DROP TABLE if exists non_part2; +NOTICE: table "non_part2" does not exist, skipping +-- end_ignore +CREATE TABLE ds_main ( a INT, b INT, c INT) PARTITION BY RANGE(c)( START(1) END (10) EVERY (2), DEFAULT PARTITION deflt); +CREATE TABLE ds_part1 (a int, a1 int, b int, c int); +CREATE TABLE non_part1 (c INT); +CREATE TABLE non_part2 (e INT, f INT); +SET optimizer_enforce_subplans TO ON; +-- drop columns to get attribute remapping +ALTER TABLE ds_part1 drop column a1; +ALTER TABLE ds_main exchange partition for (1) with table ds_part1; +INSERT INTO non_part1 SELECT i FROM generate_series(1, 1)i; +INSERT INTO non_part2 SELECT i, i + 1 FROM generate_series(1, 10)i; +INSERT INTO ds_main SELECT i, i + 1, i + 2 FROM generate_series (1, 1000)i; +analyze ds_main; +analyze non_part1; +analyze non_part2; +EXPLAIN (costs off) SELECT * FROM ds_main, non_part2 WHERE ds_main.c = non_part2.e AND a IN ( SELECT a FROM non_part1) order by a; + QUERY PLAN +-------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: ds_main.a + -> Sort + Sort Key: ds_main.a + -> Hash Join + Hash Cond: (ds_main.c = non_part2.e) + -> Dynamic Seq Scan on ds_main + Number of partitions to scan: 6 (out of 6) + Filter: ((a = a) AND (SubPlan 1)) + SubPlan 1 + -> Materialize + -> Broadcast Motion 1:3 (slice2) + -> Limit + -> Gather Motion 3:1 (slice3; segments: 3) + -> Seq Scan on non_part1 + -> Hash + -> Partition Selector (selector id: $0) + -> Broadcast Motion 3:3 (slice4; segments: 3) + -> Seq Scan on non_part2 +(20 rows) + +SELECT * FROM ds_main, non_part2 WHERE ds_main.c = non_part2.e AND a IN ( SELECT a FROM non_part1) order by a; + a | b | c | e | f +---+---+----+----+---- + 1 | 2 | 3 | 3 | 4 + 2 | 3 | 4 | 4 | 5 + 3 | 4 | 5 | 5 | 6 + 4 | 5 | 6 | 6 | 7 + 5 | 6 | 7 | 7 | 8 + 6 | 7 | 8 | 8 | 9 + 7 | 8 | 9 | 9 | 10 + 8 | 9 | 10 | 10 | 11 +(8 rows) + +-- Test for dropping distribution column via DROP TYPE..CASCADE +-- ensure the distribution policy for the table is updated +-- and the DML queries on the table work as expected for both +-- partitioned and non-partitioned tables +CREATE DOMAIN int_new AS int; +CREATE TABLE dist_key_dropped (a int_new, b int) DISTRIBUTED BY(a); +CREATE TABLE dist_key_dropped_pt (a int_new, b int) DISTRIBUTED BY(a) +PARTITION BY RANGE(b) + (PARTITION p1 START(0) END(5), + PARTITION p2 START(6) END(10)); +INSERT INTO dist_key_dropped VALUES(1, 1); +INSERT INTO dist_key_dropped VALUES(2, 2); +INSERT INTO dist_key_dropped_pt VALUES(1, 1); +INSERT INTO dist_key_dropped_pt VALUES(2, 6); +DROP TYPE int_new CASCADE; +NOTICE: drop cascades to 4 other objects +DETAIL: drop cascades to column a of table dist_key_dropped +drop cascades to column a of table dist_key_dropped_pt +drop cascades to column a of table dist_key_dropped_pt_1_prt_p1 +drop cascades to column a of table dist_key_dropped_pt_1_prt_p2 +NOTICE: dropping a column that is part of the distribution policy forces a NULL distribution policy +NOTICE: dropping a column that is part of the distribution policy forces a NULL distribution policy +NOTICE: dropping a column that is part of the distribution policy forces a NULL distribution policy +NOTICE: dropping a column that is part of the distribution policy forces a NULL distribution policy +\d dist_key_dropped + Table "qp_dropped_cols.dist_key_dropped" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + b | integer | | | +Distributed randomly + +\d dist_key_dropped_pt +Partitioned table "qp_dropped_cols.dist_key_dropped_pt" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + b | integer | | | +Partition key: RANGE (b) +Number of partitions: 2 (Use \d+ to list them.) +Distributed randomly + +\d dist_key_dropped_pt_1_prt_p1 +Table "qp_dropped_cols.dist_key_dropped_pt_1_prt_p1" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + b | integer | | | +Partition of: dist_key_dropped_pt FOR VALUES FROM (0) TO (5) +Distributed randomly + +\d dist_key_dropped_pt_1_prt_p2 +Table "qp_dropped_cols.dist_key_dropped_pt_1_prt_p2" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + b | integer | | | +Partition of: dist_key_dropped_pt FOR VALUES FROM (6) TO (10) +Distributed randomly + +INSERT INTO dist_key_dropped VALUES(10); +INSERT INTO dist_key_dropped_pt VALUES(7); +UPDATE dist_key_dropped SET b=11 where b=1; +UPDATE dist_key_dropped_pt SET b=2 where b=1; +SELECT * FROM dist_key_dropped; + b +---- + 2 + 10 + 11 +(3 rows) + +SELECT * FROM dist_key_dropped_pt; + b +--- + 6 + 2 + 7 +(3 rows) + +DELETE FROM dist_key_dropped WHERE b=2; +DELETE FROM dist_key_dropped_pt WHERE b=6; -- As of this writing, pg_dump creates an invalid dump for some of the tables -- here. See https://github.com/greenplum-db/gpdb/issues/3598. So we must drop -- the tables, or the pg_upgrade test fails. diff --git a/contrib/pax_storage/src/test/regress/expected/qp_misc.out b/contrib/pax_storage/src/test/regress/expected/qp_misc.out index ddc527c7c94..7369bf9cff0 100644 --- a/contrib/pax_storage/src/test/regress/expected/qp_misc.out +++ b/contrib/pax_storage/src/test/regress/expected/qp_misc.out @@ -11867,6 +11867,8 @@ select tjoin1.c1,tjoin2.c2 from tjoin1 left outer join tjoin2 on tjoin1.c1=tjoin group by f1,f2 ) Q ) P; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because no plan has been computed for required properties in GPORCA test_name_part | pass_ind -----------------------------------+---------- JoinCoreOnConditionSetFunction_p1 | 1 @@ -12862,6 +12864,8 @@ select sum( distinct c1 ), count( distinct c2 ) from tset1 group by f1,f2 ) Q ) P; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer test_name_part | pass_ind ------------------------+---------- MultipleSumDistinct_p1 | 1 @@ -14792,6 +14796,8 @@ select rnum, c1, c2 from tjoin1 where (c1,'BB') in (select c1, c2 from tjoin2 wh group by f1,f2,f3 ) Q ) P; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery test_name_part | pass_ind ----------------+---------- RowSubquery_p1 | 1 @@ -14810,6 +14816,8 @@ select * from tset1 where (c1,c2) in (select c1,c2 from tset2) group by f1,f2,f3 ) Q ) P; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery test_name_part | pass_ind ------------------------+---------- RowValueConstructor_p1 | 1 diff --git a/contrib/pax_storage/src/test/regress/expected/qp_olap_group2_optimizer.out b/contrib/pax_storage/src/test/regress/expected/qp_olap_group2_optimizer.out index 0c8c1ab8911..fbca24feb15 100644 --- a/contrib/pax_storage/src/test/regress/expected/qp_olap_group2_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/qp_olap_group2_optimizer.out @@ -1,8 +1,10 @@ -- -- One purpose of these tests is to make sure that ORCA can handle all these -- queries, and not fall back to the Postgres planner. To detect that, +-- turn optimizer_trace_fallback on, and watch for "falling back to planner" -- messages. -- +set optimizer_trace_fallback='on'; -- Query 1 SELECT GROUPING(product.pname) as g1 FROM product, sale WHERE product.pn=sale.pn GROUP BY GROUPING SETS (sale.pn, product.pname) ORDER BY g1; g1 diff --git a/contrib/pax_storage/src/test/regress/expected/qp_orca_fallback_optimizer.out b/contrib/pax_storage/src/test/regress/expected/qp_orca_fallback_optimizer.out index 4d3febe5cee..e0a411212f6 100644 --- a/contrib/pax_storage/src/test/regress/expected/qp_orca_fallback_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/qp_orca_fallback_optimizer.out @@ -1,10 +1,13 @@ -- Test the optimizer_enable_dml_constraints GUC, which forces GPORCA to fall back when there -- are NULL or CHECK constraints on a table. +set optimizer_trace_fallback = on; DROP TABLE IF EXISTS constr_tab; NOTICE: table "constr_tab" does not exist, skipping CREATE TABLE constr_tab ( a int check (a>0) , b int, c int, d int, CHECK (a+b>5)) DISTRIBUTED BY (a); set optimizer_enable_dml_constraints = off; explain insert into constr_tab values (1,2,3); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: INSERT with constraints QUERY PLAN -------------------------------------------------------- Insert on constr_tab (cost=0.00..0.01 rows=1 width=0) @@ -27,17 +30,21 @@ explain insert into constr_tab values (1,2,3); INSERT INTO constr_tab VALUES(1,5,3,4); set optimizer_enable_dml_constraints=off; explain update constr_tab set a = 10; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: UPDATE with constraints QUERY PLAN -------------------------------------------------------------------------------------------------- Update on constr_tab (cost=0.00..1.03 rows=1 width=22) -> Explicit Redistribute Motion 3:3 (slice1; segments: 3) (cost=0.00..1.03 rows=1 width=22) - -> Split (cost=0.00..1.03 rows=1 width=22) + -> Split Update (cost=0.00..1.03 rows=1 width=22) -> Seq Scan on constr_tab (cost=0.00..1.01 rows=1 width=22) Optimizer: Postgres query optimizer (5 rows) explain update constr_tab set b = 10; - QUERY PLAN +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: UPDATE with constraints + QUERY PLAN ----------------------------------------------------------------- Update on constr_tab (cost=0.00..1.01 rows=1 width=22) -> Seq Scan on constr_tab (cost=0.00..1.01 rows=1 width=22) @@ -46,7 +53,9 @@ explain update constr_tab set b = 10; set optimizer_enable_dml_constraints=on; explain update constr_tab set b = 10; - QUERY PLAN +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 13 not found in project list + QUERY PLAN ----------------------------------------------------------------------- Update on constr_tab (cost=0.00..271.00 rows=0 width=0) -> Seq Scan on constr_tab (cost=0.00..271.00 rows=23700 width=54) @@ -59,11 +68,13 @@ CREATE TABLE constr_tab ( a int NOT NULL, b int, c int, d int, CHECK (a+b>5)) DI INSERT INTO constr_tab VALUES(1,5,3,4); set optimizer_enable_dml_constraints=off; explain update constr_tab set a = 10; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: UPDATE with constraints QUERY PLAN -------------------------------------------------------------------------------------------------- Update on constr_tab (cost=0.00..1.03 rows=1 width=22) -> Explicit Redistribute Motion 3:3 (slice1; segments: 3) (cost=0.00..1.03 rows=1 width=22) - -> Split (cost=0.00..1.03 rows=1 width=22) + -> Split Update (cost=0.00..1.03 rows=1 width=22) -> Seq Scan on constr_tab (cost=0.00..1.01 rows=1 width=22) Optimizer: Postgres query optimizer (5 rows) @@ -71,14 +82,20 @@ explain update constr_tab set a = 10; DROP TABLE IF EXISTS constr_tab; CREATE TABLE constr_tab ( a int NOT NULL, b int NOT NULL, c int NOT NULL, d int NOT NULL) DISTRIBUTED BY (a,b); INSERT INTO constr_tab VALUES(1,5,3,4); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: INSERT with constraints INSERT INTO constr_tab VALUES(1,5,3,4); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: INSERT with constraints set optimizer_enable_dml_constraints=off; explain update constr_tab set b = 10; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: UPDATE with constraints QUERY PLAN -------------------------------------------------------------------------------------------------- Update on constr_tab (cost=0.00..1.03 rows=1 width=22) -> Explicit Redistribute Motion 3:3 (slice1; segments: 3) (cost=0.00..1.03 rows=1 width=22) - -> Split (cost=0.00..1.03 rows=1 width=22) + -> Split Update (cost=0.00..1.03 rows=1 width=22) -> Seq Scan on constr_tab (cost=0.00..1.01 rows=1 width=22) Optimizer: Postgres query optimizer (5 rows) @@ -97,7 +114,7 @@ explain update constr_tab set a = 10; -> Result (cost=0.00..431.00 rows=2 width=34) -> Redistribute Motion 3:3 (slice1; segments: 3) (cost=0.00..431.00 rows=2 width=30) Hash Key: constr_tab_1.a - -> Split (cost=0.00..431.00 rows=1 width=30) + -> Split Update (cost=0.00..431.00 rows=1 width=30) -> Seq Scan on constr_tab (cost=0.00..431.00 rows=1 width=26) Optimizer: Pivotal Optimizer (GPORCA) (9 rows) @@ -112,18 +129,28 @@ PARTITION BY range(b) ) (START(0) END(4) EVERY(2)); INSERT INTO homer VALUES (1,0,40),(2,1,43),(3,2,41),(4,3,44); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multi-level partitioned tables SELECT * FROM ONLY homer; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: ONLY in the FROM clause a | b | c ---+---+--- (0 rows) SELECT * FROM ONLY homer_1_prt_1; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: ONLY in the FROM clause a | b | c ---+---+--- (0 rows) UPDATE ONLY homer SET c = c + 1; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: ONLY in the FROM clause SELECT * FROM homer; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multi-level partitioned tables a | b | c ---+---+---- 1 | 0 | 40 @@ -133,7 +160,11 @@ SELECT * FROM homer; (4 rows) DELETE FROM ONLY homer WHERE a = 3; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: ONLY in the FROM clause SELECT * FROM homer; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multi-level partitioned tables a | b | c ---+---+---- 1 | 0 | 40 @@ -156,6 +187,8 @@ EXPLAIN SELECT * FROM ext_table_no_fallback; (3 rows) EXPLAIN SELECT * FROM ONLY ext_table_no_fallback; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: ONLY in the FROM clause QUERY PLAN ---------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..31000.00 rows=1000000 width=8) @@ -164,7 +197,9 @@ EXPLAIN SELECT * FROM ONLY ext_table_no_fallback; (3 rows) EXPLAIN INSERT INTO heap_t1 SELECT * FROM ONLY ext_table_no_fallback; - QUERY PLAN +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: ONLY in the FROM clause + QUERY PLAN ------------------------------------------------------------------------------------------------- Insert on heap_t1 (cost=0.00..31000.00 rows=333334 width=8) -> Redistribute Motion 3:3 (slice1; segments: 3) (cost=0.00..31000.00 rows=333334 width=8) @@ -175,7 +210,9 @@ EXPLAIN INSERT INTO heap_t1 SELECT * FROM ONLY ext_table_no_fallback; set optimizer_enable_dml=off; EXPLAIN INSERT INTO homer VALUES (1,0,40),(2,1,43),(3,2,41),(4,3,44); - QUERY PLAN +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML not enabled + QUERY PLAN ----------------------------------------------------------------------------------------- Insert on homer (cost=0.00..0.05 rows=2 width=12) -> Redistribute Motion 1:3 (slice1; segments: 1) (cost=0.00..0.05 rows=4 width=12) @@ -185,16 +222,20 @@ EXPLAIN INSERT INTO homer VALUES (1,0,40),(2,1,43),(3,2,41),(4,3,44); (5 rows) EXPLAIN UPDATE ONLY homer SET c = c + 1; - QUERY PLAN +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML not enabled + QUERY PLAN --------------------------------------------------- Update on homer (cost=0.00..0.00 rows=0 width=0) -> Result (cost=0.00..0.00 rows=0 width=22) One-Time Filter: false Optimizer: Postgres query optimizer -(3 rows) +(4 rows) EXPLAIN DELETE FROM ONLY homer WHERE a = 3; - QUERY PLAN +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML not enabled + QUERY PLAN --------------------------------------------------- Delete on homer (cost=0.00..0.00 rows=0 width=0) -> Result (cost=0.00..0.00 rows=0 width=0) @@ -237,7 +278,9 @@ explain select count(*) from foo group by a; set optimizer_enable_hashagg = off; set optimizer_enable_groupagg = off; explain select count(*) from foo group by a; - QUERY PLAN +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because no plan has been computed for required properties in GPORCA + QUERY PLAN ----------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) (cost=163.00..164.00 rows=100 width=12) -> HashAggregate (cost=163.00..164.00 rows=34 width=12) @@ -246,6 +289,16 @@ explain select count(*) from foo group by a; Optimizer: Postgres query optimizer (5 rows) +-- Orca should fallback for RTE_TABLEFUNC RTE type +explain SELECT * FROM xmltable('/root' passing '' COLUMNS element text); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: RangeTableEntry of type Table Function + QUERY PLAN +------------------------------------------------------------------------ + Table Function Scan on "xmltable" (cost=0.00..1.00 rows=100 width=32) + Optimizer: Postgres query optimizer +(2 rows) + create table ext_part(a int) partition by list(a); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. @@ -256,15 +309,55 @@ create external web table p2_ext (like p1) EXECUTE 'cat something.txt' FORMAT 'T alter table ext_part attach partition p1 for values in (1); alter table ext_part attach partition p2_ext for values in (2); NOTICE: partition constraints are not validated when attaching a readable external table -explain select * from ext_part; - QUERY PLAN ---------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..897.13 rows=999998 width=4) - -> Append (cost=0.00..882.23 rows=333333 width=4) - -> Dynamic Seq Scan on ext_part (cost=0.00..437.97 rows=333333 width=4) - Number of partitions to scan: 1 (out of 2) - -> Dynamic Foreign Scan on ext_part (cost=0.00..437.97 rows=333333 width=4) - Number of partitions to scan: 1 (out of 2) - Optimizer: GPORCA -(7 rows) +explain insert into ext_part values (1); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Insert with External/foreign partition storage types + QUERY PLAN +------------------------------------------------------ + Insert on ext_part (cost=0.00..0.03 rows=1 width=4) + -> Result (cost=0.00..0.01 rows=1 width=4) + Optimizer: Postgres query optimizer +(3 rows) +explain delete from ext_part where a=1; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(delete) on partitioned tables + QUERY PLAN +----------------------------------------------------------------------- + Delete on ext_part (cost=0.00..435.25 rows=0 width=0) + Delete on p1 ext_part_1 + -> Seq Scan on p1 ext_part_1 (cost=0.00..435.25 rows=32 width=14) + Filter: (a = 1) + Optimizer: Postgres query optimizer +(5 rows) + +explain update ext_part set a=1; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: DML(update) on partitioned tables +ERROR: cannot update foreign table "p2_ext" +set optimizer_enable_orderedagg=off; +select array_agg(a order by b) + from (values (1,4),(2,3),(3,1),(4,2)) v(a,b); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Ordered aggregates disabled. Enable by setting optimizer_enable_orderedagg=on + array_agg +----------- + {3,4,2,1} +(1 row) + +-- Orca should fallback if a function in 'from' clause uses 'WITH ORDINALITY' +SELECT * FROM jsonb_array_elements('["b", "a"]'::jsonb) WITH ORDINALITY; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: WITH ORDINALITY + value | ordinality +-------+------------ + "b" | 1 + "a" | 2 +(2 rows) + +-- start_ignore +-- FIXME: gpcheckcat fails due to mismatching distribution policy if this table isn't dropped +-- Keep this table around once this is fixed +reset optimizer_enable_orderedagg; +drop table ext_part; +-- end_ignore diff --git a/contrib/pax_storage/src/test/regress/expected/qp_subquery_optimizer.out b/contrib/pax_storage/src/test/regress/expected/qp_subquery_optimizer.out index 9fcf6f3cf43..4a5a3be132b 100644 --- a/contrib/pax_storage/src/test/regress/expected/qp_subquery_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/qp_subquery_optimizer.out @@ -1,5 +1,6 @@ create schema qp_subquery; set search_path to qp_subquery; +set optimizer_trace_fallback to on; begin; CREATE TABLE SUBSELECT_TBL1 (f1 integer, f2 integer, f3 float); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'f1' as the Apache Cloudberry data distribution key for this table. @@ -70,6 +71,8 @@ SELECT '' AS three, f1, f2 FROM SUBSELECT_TBL1 WHERE (f1, f2) NOT IN (SELECT f2, CAST(f3 AS int4) FROM SUBSELECT_TBL1 WHERE f3 IS NOT NULL) ORDER BY 2,3; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery three | f1 | f2 -------+----+---- | 1 | 2 @@ -102,12 +105,12 @@ SELECT '' AS six, f1 AS "Correlated Field", f2 AS "Second Field" WHERE f1 IN (SELECT f2 FROM SUBSELECT_TBL1 WHERE f1 = upper.f1); six | Correlated Field | Second Field -----+------------------+-------------- - | 1 | 2 | 2 | 3 - | 1 | 1 - | 2 | 2 | 3 | 4 + | 2 | 2 | 3 | 3 + | 1 | 2 + | 1 | 1 (6 rows) SELECT '' AS six, f1 AS "Correlated Field", f3 AS "Second Field" @@ -117,10 +120,10 @@ SELECT '' AS six, f1 AS "Correlated Field", f3 AS "Second Field" six | Correlated Field | Second Field -----+------------------+-------------- | 2 | 4 - | 1 | 1 - | 2 | 2 | 3 | 5 + | 2 | 2 | 3 | 3 + | 1 | 1 (5 rows) SELECT '' AS six, f1 AS "Correlated Field", f3 AS "Second Field" @@ -129,23 +132,25 @@ SELECT '' AS six, f1 AS "Correlated Field", f3 AS "Second Field" WHERE f2 = CAST(f3 AS integer)); six | Correlated Field | Second Field -----+------------------+-------------- + | 6 | 8 | 1 | 3 | 2 | 4 | 3 | 5 - | 6 | 8 (4 rows) SELECT '' AS five, f1 AS "Correlated Field" FROM SUBSELECT_TBL1 WHERE (f1, f2) IN (SELECT f2, CAST(f3 AS int4) FROM SUBSELECT_TBL1 WHERE f3 IS NOT NULL); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery five | Correlated Field ------+------------------ - | 3 - | 3 - | 2 | 1 | 2 + | 3 + | 2 + | 3 (5 rows) begin; @@ -182,31 +187,31 @@ select * from ( SELECT '' AS "col", * FROM join_tab1 AS tx)A; col | i | j | t -----+---+---+------- | 1 | 4 | one - | 2 | 3 | two | 0 | | zero - | 3 | 2 | three - | 4 | 1 | four | 5 | 0 | five | 6 | 6 | six + | 2 | 3 | two + | 3 | 2 | three + | 4 | 1 | four | 7 | 7 | seven + | 8 | 8 | eight | | | null | | 0 | zero - | 8 | 8 | eight (11 rows) select * from ( SELECT '' AS "col", * FROM join_tab1 AS tx) AS A; col | i | j | t -----+---+---+------- - | 8 | 8 | eight + | 5 | 0 | five + | 6 | 6 | six | 1 | 4 | one - | 2 | 3 | two | 0 | | zero + | 2 | 3 | two | 3 | 2 | three | 4 | 1 | four - | 5 | 0 | five - | 6 | 6 | six | 7 | 7 | seven + | 8 | 8 | eight | | | null | | 0 | zero (11 rows) @@ -215,17 +220,17 @@ select * from ( SELECT '' AS "col", * FROM join_tab1 AS tx) AS A; select * from(SELECT '' AS "col", * FROM join_tab1 AS tx) as A(a,b,c); a | b | c | t ---+---+---+------- - | 3 | 2 | three - | 4 | 1 | four | 5 | 0 | five | 6 | 6 | six + | 1 | 4 | one + | 0 | | zero + | 2 | 3 | two + | 3 | 2 | three + | 4 | 1 | four | 7 | 7 | seven + | 8 | 8 | eight | | | null | | 0 | zero - | 8 | 8 | eight - | 1 | 4 | one - | 2 | 3 | two - | 0 | | zero (11 rows) @@ -233,24 +238,24 @@ select * from(SELECT '' AS "col", t1.a, t2.e FROM join_tab1 t1 (a, b, c), join_t WHERE t1.a = t2.d)as A; col | a | e -----+---+---- - | 3 | -3 + | 1 | -1 + | 0 | | 5 | -5 | 5 | -5 - | 1 | -1 | 2 | 2 + | 3 | -3 | 2 | 4 - | 0 | (7 rows) select * from join_tab1 where exists(select * from join_tab2 where join_tab1.i=join_tab2.i); i | j | t ---+---+------- - 3 | 2 | three - 5 | 0 | five 1 | 4 | one - 2 | 3 | two 0 | | zero + 5 | 0 | five + 2 | 3 | two + 3 | 2 | three (5 rows) @@ -404,9 +409,9 @@ select name from emp_list where sal=(select min(sal) from emp_list); select name from emp_list where sal>(select avg(sal) from emp_list); name ---------------------- + empfive empthree empfour - empfive (3 rows) @@ -444,8 +449,8 @@ select to_char(Avg(sum_col1),'9999999.9999999') from (select sum(s1) as sum_col1 select g2,count(*) from (select I, count(*) as g2 from join_tab1 group by I) as vtable group by g2; g2 | count ----+------- - 1 | 9 2 | 1 + 1 | 9 (2 rows) begin; @@ -464,24 +469,24 @@ select i,j,t from (select * from (select i,j,t from join_tab1)as dtab1 UNION select * from(select i,j,t from join_tab4) as dtab2 )as mtab; i | j | t ---+---+-------- - 1 | 7 | sunday - 3 | 5 | tueday - 4 | 1 | four - 6 | 2 | friday - | | null + 0 | | zero 1 | 4 | one - 2 | 6 | monday + 1 | 7 | sunday 5 | 0 | five 5 | 3 | thuday + 6 | 2 | friday 6 | 6 | six - 7 | 7 | seven - 8 | 8 | eight - | 0 | zero - 0 | | zero 2 | 3 | two + 2 | 6 | monday 3 | 2 | three + 3 | 5 | tueday + 4 | 1 | four 4 | 4 | wedday 7 | 1 | satday + 7 | 7 | seven + 8 | 8 | eight + | 0 | zero + | | null (18 rows) @@ -576,6 +581,8 @@ FROM join_tab1 out1, join_tab2 out2; --- + + 0 0 @@ -594,7 +601,7 @@ FROM join_tab1 out1, join_tab2 out2; - + 0 0 @@ -607,16 +614,13 @@ FROM join_tab1 out1, join_tab2 out2; - 0 - 0 - 0 @@ -624,8 +628,6 @@ FROM join_tab1 out1, join_tab2 out2; - 0 - @@ -641,10 +643,13 @@ FROM join_tab1 out1, join_tab2 out2; + 0 + 0 + 0 @@ -864,6 +869,8 @@ create table Tbl8352_t2(a int, b int) distributed by (a); insert into Tbl8352_t1 values(1,null),(null,1),(1,1),(null,null); insert into Tbl8352_t2 values(1,1); select * from Tbl8352_t1 where (Tbl8352_t1.a,Tbl8352_t1.b) not in (select Tbl8352_t2.a,Tbl8352_t2.b from Tbl8352_t2); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery a | b ---+--- (0 rows) @@ -873,6 +880,8 @@ create table Tbl8352_t2a(a int, b int) distributed by (a); insert into Tbl8352_t1a values(1,2),(3,null),(null,4),(null,null); insert into Tbl8352_t2a values(1,2); select * from Tbl8352_t1a where (Tbl8352_t1a.a,Tbl8352_t1a.b) not in (select Tbl8352_t2a.a,Tbl8352_t2a.b from Tbl8352_t2a) order by 1,2; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery a | b ---+--- 3 | @@ -880,12 +889,16 @@ select * from Tbl8352_t1a where (Tbl8352_t1a.a,Tbl8352_t1a.b) not in (select Tbl (2 rows) select (1,null::int) not in (select 1,1); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery ?column? ---------- (1 row) select (3,null::int) not in (select 1,1); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery ?column? ---------- t @@ -928,18 +941,24 @@ commit; -- not in subquery involving vars from different rels with inner join -- select t1.a, t2.b from t1, t2 where t1.a=t2.a and ((t1.a,t2.b) not in (select i1.a,i1.b from i1)); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery a | b ---+--- 3 | 4 (1 row) select t1.a, t2.b from t1 inner join t2 on (t1.a=t2.a and ((t1.a,t2.b) not in (select i1.a,i1.b from i1))); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery a | b ---+--- 3 | 4 (1 row) select t1.a, t2.b from t1 inner join t2 on (t1.a=t2.a) where ((t1.a,t2.b) not in (select i1.a,i1.b from i1)); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery a | b ---+--- 3 | 4 @@ -947,6 +966,8 @@ select t1.a, t2.b from t1 inner join t2 on (t1.a=t2.a) where ((t1.a,t2.b) not i -- unsupported case explain select t1.a, t2.b from t1, t2 where t1.a=t2.a or ((t1.a,t2.b) not in (select i1.a,i1.b from i1)); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery QUERY PLAN ------------------------------------------------------------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) (cost=10000000000.00..10000000002.10 rows=3 width=8) @@ -965,37 +986,45 @@ explain select t1.a, t2.b from t1, t2 where t1.a=t2.a or ((t1.a,t2.b) not in (se -- not in subquery involving vars from different rels with left join. -- select t1.a, t2.b from t1 left join t2 on (t1.a=t2.a) where ((t1.a,t2.b) not in (select i1.a,i1.b from i1)); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery a | b ---+--- - 3 | 4 5 | + 3 | 4 (2 rows) select t1.a, t2.b from t1 left join t2 on (t1.a=t2.a and ((t1.a,t2.b) not in (select i1.a,i1.b from i1))); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery a | b ---+--- + 5 | 1 | 3 | 4 - 5 | (3 rows) -- -- not in subquery involving vars from different rels with outer join -- select t1.a, t2.b from t1 full outer join t2 on (t1.a=t2.a) where ((t1.a,t2.b) not in (select i1.a,i1.b from i1)); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery a | b ---+--- - 3 | 4 5 | + 3 | 4 | 8 (3 rows) -- not in subquery with a row var in FULL JOIN condition select t1.a, t2.b from t1 full outer join t2 on (t1.a=t2.a and ((t1.a,t2.b) not in (select i1.a,i1.b from i1))); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery a | b ---+--- - 3 | 4 5 | + 3 | 4 | 8 1 | | 2 @@ -1007,8 +1036,8 @@ select t1.a, t2.b from t1 full outer join t2 on (t1.a=t2.a and ((t1.a,t2.b) not select t1.a,t2.b from t1 left join (t2 inner join t3 on (t3.a not in (select t4.a from t4))) on (t1.a=t2.a); a | b ---+--- - 1 | 2 3 | 4 + 1 | 2 5 | (3 rows) @@ -1046,8 +1075,7 @@ select Tbl01.* from Tbl01 where foo(Tbl01.a) not in (select a from Tbl03); (1 row) create table Tbl02 as select Tbl01.*,foo(Tbl01.a) as foo from Tbl01; -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'a' as the Apache Cloudberry data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry. select Tbl02.* from Tbl02 where foo not in (select a from Tbl03); a | b | c | foo ---+----+----+----- @@ -1056,18 +1084,32 @@ select Tbl02.* from Tbl02 where foo not in (select a from Tbl03); begin; create table Tbl04(a int, b int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into Tbl04 values(1,2),(3,4),(5,6); create table Tbl05(a int, b int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into Tbl05 values(1,2); create table Tbl06(a int, b int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into Tbl06 values(1,2),(3,4); create table i3(a int not null, b int not null); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into i3 values(1,2); create table Tbl07(a int, b int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into Tbl07 values(1,2),(3,4),(null,null); create table Tbl08(a int, b int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into Tbl08 values(1,2),(3,4),(null,null); create table Tbl09(a int, b int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into Tbl09 values(1,2),(5,null),(null,8); analyze Tbl04; analyze Tbl05; @@ -1082,6 +1124,8 @@ commit; -- -- non-nullability due to inner join select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl05.a,Tbl05.b from Tbl05,Tbl06 where Tbl05.a=Tbl06.a and Tbl05.b < 10); -- expected: (3,4),(5,6) +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery a | b ---+--- 3 | 4 @@ -1089,6 +1133,8 @@ select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl05.a,Tbl05.b (2 rows) select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl07.a,Tbl07.b from Tbl07 inner join Tbl08 on (Tbl07.a=Tbl08.a and Tbl07.b=Tbl08.b) inner join i3 on (i3.a=Tbl08.a and i3.b=Tbl08.b)); -- expected:(3,4), (5,6) +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery a | b ---+--- 3 | 4 @@ -1097,6 +1143,8 @@ select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl07.a,Tbl07.b -- non-nullability due to where clause condition select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl05.a,Tbl05.b from Tbl05 where Tbl05.a < 2 and Tbl05.b < 10); -- expected: (3,4), (5,6) +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery a | b ---+--- 3 | 4 @@ -1104,6 +1152,8 @@ select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl05.a,Tbl05.b (2 rows) select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl07.a,Tbl07.b from Tbl07 left join Tbl08 on (Tbl07.a=Tbl08.a) where Tbl07.a = 1 and Tbl07.b = 2); -- expected: (3,4),(5,6) +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery a | b ---+--- 3 | 4 @@ -1112,12 +1162,16 @@ select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl07.a,Tbl07.b -- not null condition in the where clause select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl07.a,Tbl07.b from Tbl07 full outer join Tbl08 on (Tbl07.a=Tbl08.a) where Tbl07.a is not null and Tbl07.b is not null); -- (5,6) +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery a | b ---+--- 5 | 6 (1 row) select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl07.a,Tbl07.b from Tbl07 left join Tbl08 on (Tbl07.a=Tbl08.a) where Tbl07.a is not null and Tbl07.b is not null); -- (5,6) +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery a | b ---+--- 5 | 6 @@ -1125,6 +1179,8 @@ select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl07.a,Tbl07.b -- or clauses that should lead to non-nullability select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl05.a,Tbl05.b from Tbl05 where (Tbl05.a < 2 or Tbl05.a > 100) AND (Tbl05.b < 4 or Tbl05.b > 100)); -- expected: (3,4), (5,6) +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery a | b ---+--- 3 | 4 @@ -1133,13 +1189,17 @@ select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl05.a,Tbl05.b -- base-table constraints select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select i3.a,i3.b from i3); -- expected: (3,4),(5,6) +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery a | b ---+--- - 3 | 4 5 | 6 + 3 | 4 (2 rows) select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl05.a,Tbl05.b from Tbl05,i3 where Tbl05.a = i3.a and Tbl05.b = i3.b); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery a | b ---+--- 3 | 4 @@ -1147,6 +1207,8 @@ select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl05.a,Tbl05.b (2 rows) select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl05.a,Tbl05.b from Tbl05,i3 where Tbl05.a < i3.a and Tbl05.b > i3.b); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery a | b ---+--- 3 | 4 @@ -1156,10 +1218,12 @@ select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl05.a,Tbl05.b -- non-null constant values select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select 1,2); -- (3,4),(5,6) +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery a | b ---+--- - 3 | 4 5 | 6 + 3 | 4 (2 rows) select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in ((1,2)); @@ -1171,6 +1235,8 @@ select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in ((1,2)); -- multiple NOT-IN expressions select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl06.a,Tbl06.b from Tbl06) and (Tbl04.a,Tbl04.b) not in (select i3.a, i3.b from i3); -- expected: (5,6) +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery a | b ---+--- 5 | 6 @@ -1178,6 +1244,8 @@ select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl06.a,Tbl06.b explain (costs off) select Tbl04.* from Tbl04 where not ((Tbl04.a,Tbl04.b) in (select Tbl06.a,Tbl06.b from Tbl06) or (Tbl04.a,Tbl04.b) in (select i3.a, i3.b from i3)); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery QUERY PLAN -------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) @@ -1196,6 +1264,8 @@ select Tbl04.* from Tbl04 where not ((Tbl04.a,Tbl04.b) in (select Tbl06.a,Tbl06. (13 rows) select Tbl04.* from Tbl04 where not ((Tbl04.a,Tbl04.b) in (select Tbl06.a,Tbl06.b from Tbl06) or (Tbl04.a,Tbl04.b) in (select i3.a, i3.b from i3)); -- expected: (5,6) +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery a | b ---+--- 5 | 6 @@ -1212,15 +1282,15 @@ select Tbl04.* from Tbl04 where Tbl04.a NOT IN (select Tbl09.a from Tbl09 where select Tbl04.* from Tbl04 where Tbl04.a NOT IN (select i3.a from i3); a | b ---+--- - 3 | 4 5 | 6 + 3 | 4 (2 rows) select Tbl04.* from Tbl04 where Tbl04.a NOT IN (select Tbl05.a from Tbl05 left join i3 on (Tbl05.a=i3.a)); a | b ---+--- - 3 | 4 5 | 6 + 3 | 4 (2 rows) -- @@ -1228,37 +1298,49 @@ select Tbl04.* from Tbl04 where Tbl04.a NOT IN (select Tbl05.a from Tbl05 left j -- -- No where clause select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl05.a,Tbl05.b from Tbl05); -- expected: (3,4), (5,6) +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery a | b ---+--- - 3 | 4 5 | 6 + 3 | 4 (2 rows) -- INDF in the where clause select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl07.a,Tbl07.b from Tbl07,Tbl08 where Tbl07.a is not distinct from Tbl08.a and Tbl07.b is not distinct from Tbl08.b); -- no rows +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery a | b ---+--- (0 rows) -- null conditions in the where clause select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl07.a,Tbl07.b from Tbl07 left join Tbl08 on (Tbl07.a=Tbl08.a and Tbl07.b=Tbl08.b) where Tbl07.a is null and Tbl07.b is null); -- no rows +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery a | b ---+--- (0 rows) select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl07.a,Tbl07.b from Tbl07 full outer join Tbl08 on (Tbl07.a=Tbl08.a and Tbl07.b=Tbl08.b) where Tbl07.a is null and Tbl07.b is null); -- no rows +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery a | b ---+--- (0 rows) -- OR clauses that should not lead to non-nullability select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl07.a,Tbl07.b from Tbl07,Tbl08 where Tbl07.a is not distinct from Tbl08.a or Tbl07.a=1); -- no rows +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery a | b ---+--- (0 rows) -- values list: we don't support it yet. not worth the effort. select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (values(1,2),(3,4)); -- (3,4),(5,6) +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery a | b ---+--- 5 | 6 @@ -1266,6 +1348,8 @@ select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (values(1,2),(3,4)); -- -- functions/ops in the target list of the subquery select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select i3.a+2,i3.b+2 from i3); -- expected: (5,6) +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery a | b ---+--- 1 | 2 @@ -1276,12 +1360,14 @@ select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select i3.a+2,i3.b+2 f select Tbl09.a, Tbl09.b from Tbl09; a | b ---+--- - 1 | 2 5 | | 8 + 1 | 2 (3 rows) select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl09.a,Tbl09.b from Tbl09); -- expected: (3,4) +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery a | b ---+--- 3 | 4 @@ -1290,18 +1376,22 @@ select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl09.a,Tbl09.b select Tbl09.a, Tbl09.b from Tbl09 group by Tbl09.a, Tbl09.b; a | b ---+--- + 1 | 2 5 | | 8 - 1 | 2 (3 rows) select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl09.a, Tbl09.b from Tbl09 group by Tbl09.a, Tbl09.b); -- expected: (3,4) +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery a | b ---+--- 3 | 4 (1 row) select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select sum(i3.b),i3.a from i3 group by i3.a); -- (1,2),(3,4),(5,6) +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery a | b ---+--- 1 | 2 @@ -1311,18 +1401,22 @@ select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select sum(i3.b),i3.a -- infering not-nullability for only one of the columns select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select i3.a,Tbl05.b from i3,Tbl05 where i3.a=Tbl05.a); -- (3,4),(5,6) +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery a | b ---+--- - 3 | 4 5 | 6 + 3 | 4 (2 rows) select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) NOT IN (select i3.a,i3.b from Tbl07 left join i3 on (i3.a=Tbl07.a and i3.b=Tbl07.b) where i3.a > 2); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery a | b ---+--- 3 | 4 - 5 | 6 1 | 2 + 5 | 6 (3 rows) -- @@ -1330,30 +1424,40 @@ select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) NOT IN (select i3.a,i3.b from -- Started supporting since RIO -- select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select i3.a,i3.b from i3 union select Tbl07.a, Tbl07.b from Tbl07); -- nulls in the inner side, should not return any rows +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery a | b ---+--- (0 rows) select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select i3.a,i3.b from i3 union all select Tbl07.a, Tbl07.b from Tbl07); -- nulls in the innder side, should not return any rows +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery a | b ---+--- (0 rows) select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select 1,2 union select 3,4); --(5,6) +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery a | b ---+--- 5 | 6 (1 row) select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select i3.a,i3.b from i3) or (Tbl04.a,Tbl04.b) not in (select Tbl07.a, Tbl07.b from Tbl07); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery a | b ---+--- - 3 | 4 5 | 6 + 3 | 4 (2 rows) -- Cases where the planner "should have" determined not-nullabitlity select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select i3.a,i3.b from i3 left join Tbl07 on (i3.a=Tbl07.a and i3.b=Tbl07.b)); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery a | b ---+--- 3 | 4 @@ -1361,6 +1465,8 @@ select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select i3.a,i3.b from (2 rows) select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl05.a,Tbl05.b from Tbl05 where (Tbl05.a IN (select i3.a from i3)) AND (Tbl05.b IN (select i3.b from i3))); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery a | b ---+--- 3 | 4 @@ -1370,17 +1476,25 @@ select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl05.a,Tbl05.b -- additional queries drop table if exists Tbl04; create table Tbl04(x int, y int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into Tbl04 values(1,2); insert into Tbl04 values(3,4); create table Tbl10(x int, y int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into Tbl10 values(1,null); select * from Tbl04 where (x,y) not in (select x,y from Tbl10); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery x | y ---+--- 3 | 4 (1 row) select * from Tbl04 where (x,y) not in (select 1,y from Tbl10); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery x | y ---+--- 3 | 4 @@ -1394,6 +1508,8 @@ select * from tbl10 where y not in (select 1 where false); alter table Tbl10 alter column x set not null; select * from Tbl04 where (x,y) not in (select x,y from Tbl10); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery x | y ---+--- 3 | 4 @@ -1418,17 +1534,21 @@ insert into TblText3 values('florian','waas'); insert into TblText3 values('oak','barrett'); commit; SELECT TblText1.a, TblText2.b FROM TblText1 JOIN TblText2 ON TblText1.a = TblText2.a WHERE ((NOT (TblText1.a, TblText2.b) IN (SELECT TblText3.a, TblText3.b FROM TblText3))); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery a | b --------+---------- - tushar | pednekar rob | klopp + tushar | pednekar (2 rows) SELECT TblText1.a, TblText2.b FROM TblText1 JOIN TblText2 ON TblText1.a = TblText2.a WHERE (( (TblText1.a, TblText2.b) IN (SELECT TblText3.a, TblText3.b FROM TblText3))); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery a | b ---------+--------- - florian | waas oak | barrett + florian | waas (2 rows) -- @@ -1436,11 +1556,18 @@ SELECT TblText1.a, TblText2.b FROM TblText1 JOIN TblText2 ON TblText1.a = TblTex -- begin; create table TabDel1(a int, b int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into TabDel1 values(1,2),(3,4),(5,6); create table TabDel2 as select * from TabDel1; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry. create table TabDel3(a int, b int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into TabDel3 values(1,2); create table TabDel4(a int not null, b int not null); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into TabDel4 values(1,2); commit; explain delete from TabDel1 where TabDel1.a not in (select a from TabDel3); -- do not support this because we produce NLASJ @@ -1500,18 +1627,20 @@ update TblUp1 set a=100 where a not in (select a from TblUp3); select * from TblUp1; a | b -----+--- - 100 | 4 - 100 | 6 1 | 2 + 100 | 6 + 100 | 4 (3 rows) update TblUp2 set a=100 where a not in (select a from TblUp4); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 26 not found in project list select * from TblUp2; a | b -----+--- 100 | 4 - 1 | 2 100 | 6 + 1 | 2 (3 rows) -- @@ -1542,8 +1671,8 @@ select * from subselect_tab1 where (select b from subselect_tab2) is null; select * from subselect_tab1 where b::bool = ( c = any(select c from subselect_tab2)); a | b | c -----+-------+--- - 200 | true | 2 100 | false | 1 + 200 | true | 2 (2 rows) -- ALL subquery deeply nested in a scalar expression @@ -1597,19 +1726,23 @@ NOTICE: table has parent, setting distribution columns to match parent table create table append_rel2(att4 int) INHERITS(append_rel); NOTICE: table has parent, setting distribution columns to match parent table insert into append_rel values(1,10),(2,20),(3,30); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Inherited tables analyze append_rel; explain with test as (select * from (select * from append_rel) p where att1 in (select att1 from append_rel where att2 >= 19) ) select att2 from append_rel where att1 in (select att1 from test where att2 <= 21); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Inherited tables QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------ - Gather Motion 3:1 (slice1; segments: 3) (cost=2296.15..4165.55 rows=51935 width=4) - -> Hash Join (cost=2296.15..3473.08 rows=17312 width=4) + Gather Motion 3:1 (slice1; segments: 3) (cost=2123.03..3992.43 rows=51935 width=4) + -> Hash Join (cost=2123.03..3299.97 rows=17312 width=4) Hash Cond: (append_rel.att1 = test.att1) -> Append (cost=0.00..848.02 rows=51934 width=8) -> Seq Scan on append_rel append_rel_1 (cost=0.00..1.01 rows=1 width=8) -> Seq Scan on append_rel1 append_rel_2 (cost=0.00..293.67 rows=25967 width=8) -> Seq Scan on append_rel2 append_rel_3 (cost=0.00..293.67 rows=25967 width=8) - -> Hash (cost=2283.65..2283.65 rows=1000 width=4) - -> HashAggregate (cost=2280.31..2283.65 rows=1000 width=4) + -> Hash (cost=2110.53..2110.53 rows=1000 width=4) + -> HashAggregate (cost=2107.20..2110.53 rows=1000 width=4) Group Key: test.att1 -> Subquery Scan on test (cost=1021.14..2063.92 rows=17312 width=4) -> Hash Semi Join (cost=1021.14..2063.92 rows=17312 width=8) @@ -1633,6 +1766,8 @@ explain with test as (select * from (select * from append_rel) p where att1 in ( (29 rows) with test as (select * from (select * from append_rel) p where att1 in (select att1 from append_rel where att2 >= 19) ) select att2 from append_rel where att1 in (select att1 from test where att2 <= 21); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Inherited tables att2 ------ 20 @@ -1675,5 +1810,339 @@ select sum(case when b in (select b from temp_b where EXISTS (select sum(d) from 4 | 6 (1 row) +-- Check that predicate with set-returning function is not pushed down +create table table_with_array_column (an_array_column double precision[]); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'an_array_column' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +insert into table_with_array_column values (array[1.1, 2.2]); +explain (costs off) +select * +from ( + select unnest(t1.an_array_column) unnested_array_column + from table_with_array_column t1, table_with_array_column t2) zz +where unnested_array_column is not null; + QUERY PLAN +---------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Result + Filter: (NOT ((unnest(t1.an_array_column)) IS NULL)) + -> ProjectSet + -> Nested Loop + Join Filter: true + -> Seq Scan on table_with_array_column t1 + -> Materialize + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Seq Scan on table_with_array_column t2 + Optimizer: GPORCA +(11 rows) + +select * +from ( + select unnest(t1.an_array_column) unnested_array_column + from table_with_array_column t1, table_with_array_column t2) zz +where unnested_array_column is not null; + unnested_array_column +----------------------- + 1.1 + 2.2 +(2 rows) + +-- check that predicate is not pushed through a projected non-correlated subquery +create table subquery_nonpush_through_1(a int, b int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +create table subquery_nonpush_through_2(a int, b int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +explain (costs off) +select * +from( + select (subquery_nonpush_through_1.a in (select a from subquery_nonpush_through_2))::text as xx, subquery_nonpush_through_1.b + from subquery_nonpush_through_1,subquery_nonpush_through_2) t +where xx='dd'; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Result + Filter: ((((hashed SubPlan 1))::text) = 'dd'::text) + -> Nested Loop + Join Filter: true + -> Seq Scan on subquery_nonpush_through_1 + -> Materialize + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Seq Scan on subquery_nonpush_through_2 + SubPlan 1 + -> Result + -> Result + -> Materialize + -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Seq Scan on subquery_nonpush_through_2 subquery_nonpush_through_2_1 + Optimizer: GPORCA +(16 rows) + +select * +from( + select (subquery_nonpush_through_1.a in (select a from subquery_nonpush_through_2))::text as xx, subquery_nonpush_through_1.b + from subquery_nonpush_through_1,subquery_nonpush_through_2) t +where xx='dd'; + xx | b +----+--- +(0 rows) + +-- Ensure we produce a hashed subplan when there are no outer references +CREATE TABLE a1 AS ( + SELECT * FROM generate_series(1, 5) AS a1) + WITH data distributed replicated; +CREATE TABLE a2 AS ( + SELECT * FROM generate_series(1, 10) AS a1) + WITH data distributed BY (a1); +CREATE TABLE a3 AS ( + SELECT a1, row_to_json(a2) AS rj FROM a2) + WITH data distributed BY (a1); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Whole-row variable +-- explain "verbose" is needed to show that the subplan is hashed +explain (verbose, costs off) select a1,case when a2 in (select a1::text from a1 where a1 is not null) then 'true' else 'false' end as checkcol +from ( + select a1,rj->>'a1'::text as a2 + from a3 + )t; + QUERY PLAN +---------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: a3.a1, (CASE WHEN (hashed SubPlan 1) THEN 'true'::text ELSE 'false'::text END) + -> Seq Scan on qp_subquery.a3 + Output: a3.a1, CASE WHEN (hashed SubPlan 1) THEN 'true'::text ELSE 'false'::text END + SubPlan 1 + -> Result + Output: ((a1.a1)::text) + -> Result + Output: (a1.a1)::text, true + -> Seq Scan on qp_subquery.a1 + Output: a1.a1 + Filter: (NOT (a1.a1 IS NULL)) + Settings: optimizer = 'on' + Optimizer: GPORCA +(14 rows) + +select a1,case when a2 in (select a1::text from a1 where a1 is not null) then 'true' else 'false' end as checkcol +from ( + select a1,rj->>'a1'::text as a2 + from a3 + )t; + a1 | checkcol +----+---------- + 1 | true + 5 | true + 6 | false + 9 | false + 10 | false + 2 | true + 3 | true + 4 | true + 7 | false + 8 | false +(10 rows) + +-- check various [NOT] EXISTS subqueries on materialized views +create table t (a int, b int) distributed by (a); +insert into t values (1, 1), (2, NULL), (NULL, 3); +create materialized view v as select a, b from t distributed randomly; +select * from v where exists (select a from v); + a | b +---+--- + 2 | + | 3 + 1 | 1 +(3 rows) + +select * from v where exists (select a from v limit 0); + a | b +---+--- +(0 rows) + +select * from v where exists (select a from v where a=2); + a | b +---+--- + 1 | 1 + 2 | + | 3 +(3 rows) + +select * from v where exists (select a from v where a<>2); + a | b +---+--- + 1 | 1 + 2 | + | 3 +(3 rows) + +select * from v where not exists (select a from v); + a | b +---+--- +(0 rows) + +select * from v where not exists (select a from v limit 0); + a | b +---+--- + 2 | + | 3 + 1 | 1 +(3 rows) + +select * from v where not exists (select a from v where a=2); + a | b +---+--- +(0 rows) + +select * from v where not exists (select a from v where a<>2); + a | b +---+--- +(0 rows) + +select * from v where exists (select b from v); + a | b +---+--- + 2 | + | 3 + 1 | 1 +(3 rows) + +select * from v where exists (select b from v limit 0); + a | b +---+--- +(0 rows) + +select * from v where exists (select b from v where b=2); + a | b +---+--- +(0 rows) + +select * from v where exists (select b from v where b<>2); + a | b +---+--- + 1 | 1 + 2 | + | 3 +(3 rows) + +select * from v where not exists (select b from v); + a | b +---+--- +(0 rows) + +select * from v where not exists (select b from v limit 0); + a | b +---+--- + 1 | 1 + 2 | + | 3 +(3 rows) + +select * from v where not exists (select b from v where b=2); + a | b +---+--- + 2 | + | 3 + 1 | 1 +(3 rows) + +select * from v where not exists (select b from v where b<>2); + a | b +---+--- +(0 rows) + +-- Check that a query having pattern of Select-Project-NaryJoin, +-- also containing a Select predicate condition with the same pattern nested in a subquery runs +CREATE TABLE tab1(a TEXT, b TEXT) DISTRIBUTED RANDOMLY; +INSERT INTO tab1 SELECT i,i FROM GENERATE_SERIES(1,3)i; +SELECT * FROM (SELECT BTRIM(p1.b) AS param FROM tab1 p1 JOIN tab1 p2 USING(a)) t1 +WHERE EXISTS + (SELECT 1 FROM + (SELECT BTRIM(p1.b) AS param FROM tab1 p1 JOIN tab1 p2 USING(a)) t2 + WHERE t2.param = t1.param); + param +------- + 2 + 1 + 3 +(3 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM (SELECT BTRIM(p1.b) AS param FROM tab1 p1 JOIN tab1 p2 USING(a)) t1 +WHERE EXISTS + (SELECT 1 FROM + (SELECT BTRIM(p1.b) AS param FROM tab1 p1 JOIN tab1 p2 USING(a)) t2 + WHERE t2.param = t1.param); + QUERY PLAN +--------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Semi Join + Hash Cond: (btrim(p1.b) = btrim(p1_1.b)) + -> Hash Join + Hash Cond: (p1.a = p2.a) + -> Seq Scan on tab1 p1 + Filter: (NOT (btrim(b) IS NULL)) + -> Hash + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Seq Scan on tab1 p2 + -> Hash + -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Hash Join + Hash Cond: (p1_1.a = p2_1.a) + -> Seq Scan on tab1 p1_1 + -> Hash + -> Broadcast Motion 3:3 (slice4; segments: 3) + -> Seq Scan on tab1 p2_1 + Optimizer: GPORCA +(19 rows) + +-- Check that a query having pattern of Select-Project-NaryJoin, +-- also containing a Select predicate condition with the same pattern nested in a subquery runs when subplan is enforced +SET optimizer_enforce_subplans TO on; +SELECT * FROM (SELECT BTRIM(p1.b) AS param FROM tab1 p1 JOIN tab1 p2 USING(a)) t1 +WHERE EXISTS + (SELECT 1 FROM + (SELECT BTRIM(p1.b) AS param FROM tab1 p1 JOIN tab1 p2 USING(a)) t2 + WHERE t2.param = t1.param); + param +------- + 2 + 1 + 3 +(3 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM (SELECT BTRIM(p1.b) AS param FROM tab1 p1 JOIN tab1 p2 USING(a)) t1 +WHERE EXISTS + (SELECT 1 FROM + (SELECT BTRIM(p1.b) AS param FROM tab1 p1 JOIN tab1 p2 USING(a)) t2 + WHERE t2.param = t1.param); + QUERY PLAN +----------------------------------------------------------------------------------------- + Hash Join + Hash Cond: (p1.a = p2_1.a) + -> Result + Filter: (SubPlan 1) + -> Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on tab1 p1 + Filter: (NOT (btrim(b) IS NULL)) + SubPlan 1 + -> Result + Filter: (btrim(p1_1.b) = btrim(p1.b)) + -> Materialize + -> Gather Motion 3:1 (slice2; segments: 3) + -> Hash Join + Hash Cond: (p1_1.a = p2.a) + -> Seq Scan on tab1 p1_1 + -> Hash + -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Seq Scan on tab1 p2 + -> Hash + -> Gather Motion 3:1 (slice4; segments: 3) + -> Seq Scan on tab1 p2_1 + Optimizer: GPORCA +(22 rows) + +reset optimizer_enforce_subplans; set client_min_messages='warning'; drop schema qp_subquery cascade; +reset optimizer_trace_fallback; diff --git a/contrib/pax_storage/src/test/regress/expected/qp_union_intersect_optimizer.out b/contrib/pax_storage/src/test/regress/expected/qp_union_intersect_optimizer.out index 94847736a7b..da58390502c 100644 --- a/contrib/pax_storage/src/test/regress/expected/qp_union_intersect_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/qp_union_intersect_optimizer.out @@ -653,8 +653,6 @@ SELECT COUNT(*) FROM dml_union_r; 120 (1 row) --- GPDB_12_MERGE_FIXME: ORCA doesn't produce the right intersect plan -set optimizer=off; SELECT COUNT(*) FROM (SELECT dml_union_r.* FROM dml_union_r INTERSECT (SELECT dml_union_r.* FROM dml_union_r UNION ALL SELECT dml_union_s.* FROM dml_union_s) EXCEPT SELECT dml_union_s.* FROM dml_union_s)foo; count ------- @@ -668,7 +666,6 @@ SELECT COUNT(*) FROM dml_union_r; 221 (1 row) -reset optimizer; rollback; -- @description union_test30: INSERT NON ATOMICS with union/intersect/except begin; @@ -2010,6 +2007,44 @@ select * from generate_series(100, 105); 105 (12 rows) +-- test INTERSECT/EXCEPT with General and partitioned locus, but none of the columns are hashable +CREATE TABLE p1(a int) distributed by (a); +INSERT INTO p1 select generate_series(1,10); +explain (costs off) +select from generate_series(1,5) intersect select from p1; + QUERY PLAN +------------------------------------------------------ + Nested Loop + Join Filter: true + -> Aggregate + -> Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on p1 + -> Aggregate + -> Function Scan on generate_series + Optimizer: Pivotal Optimizer (GPORCA) +(8 rows) + +select from generate_series(1,5) intersect select from p1; +-- +(1 row) + +explain (costs off) +select from generate_series(1,5) except select from p1; + QUERY PLAN +------------------------------------------------------ + Nested Loop Anti Join + Join Filter: true + -> Function Scan on generate_series + -> Materialize + -> Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on p1 + Optimizer: Pivotal Optimizer (GPORCA) +(7 rows) + +select from generate_series(1,5) except select from p1; +-- +(0 rows) + -- -- Test for creation of MergeAppend paths. -- @@ -2026,13 +2061,13 @@ select null, null, array_dims(array_agg(x)) from mergeappend_test r order by 1,2; a | b | array_dims ---+---+------------ + | | [1:500] 0 | 0 | [1:99] 1 | 1 | [1:100] 2 | 2 | [1:100] 3 | 3 | [1:100] 4 | 4 | [1:100] 5 | 5 | [1:1] - | | [1:500] (7 rows) -- Check that it's using a MergeAppend diff --git a/contrib/pax_storage/src/test/regress/expected/qp_with_clause.out b/contrib/pax_storage/src/test/regress/expected/qp_with_clause.out index b87845ca82b..d44d814f251 100644 --- a/contrib/pax_storage/src/test/regress/expected/qp_with_clause.out +++ b/contrib/pax_storage/src/test/regress/expected/qp_with_clause.out @@ -384,18 +384,18 @@ with diversecountries as from diversecountries,city where city.id = diversecountries.capital; name | name | cnt --------------------+------------------+----- - Austria | Wien | 8 Russian Federation | Moscow | 12 - Italy | Roma | 8 + South Africa | Pretoria | 11 Myanmar | Rangoon (Yangon) | 8 + Iran | Teheran | 10 China | Peking | 12 - Denmark | Kobenhavn | 7 Canada | Ottawa | 12 + Italy | Roma | 8 + India | New Delhi | 12 United States | Washington | 12 - South Africa | Pretoria | 11 - Iran | Teheran | 10 + Austria | Wien | 8 + Denmark | Kobenhavn | 7 Australia | Canberra | 8 - India | New Delhi | 12 (12 rows) --query5 -Using a CTE in the select list @@ -430,18 +430,18 @@ select from country,city where country.capital = city.id) FOO where FOO.CNT is not null; cnt | country | capital -----+--------------------+------------------ + 8 | Australia | Canberra + 12 | India | New Delhi 8 | Austria | Wien - 12 | Russian Federation | Moscow - 12 | China | Peking - 8 | Myanmar | Rangoon (Yangon) + 7 | Denmark | Kobenhavn + 12 | United States | Washington 10 | Iran | Teheran + 12 | China | Peking 11 | South Africa | Pretoria - 12 | United States | Washington + 12 | Russian Federation | Moscow 8 | Italy | Roma 12 | Canada | Ottawa - 7 | Denmark | Kobenhavn - 8 | Australia | Canberra - 12 | India | New Delhi + 8 | Myanmar | Rangoon (Yangon) (12 rows) --queries Using a CTE in the HAVING clause @@ -820,18 +820,18 @@ with capitals as select * from capitals where id < 100; code | id | name | code ------+----+------------------+------ - NLD | 5 | Amsterdam | NLD + ATG | 63 | Saint Johns | ATG ALB | 34 | Tirana | ALB - DZA | 35 | Alger | DZA + AGO | 56 | Luanda | AGO + AFG | 1 | Kabul | AFG + ASM | 54 | Fagatogo | ASM AND | 55 | Andorra la Vella | AND + NLD | 5 | Amsterdam | NLD + ANT | 33 | Willemstad | ANT AIA | 62 | The Valley | AIA - ATG | 63 | Saint Johns | ATG - AFG | 1 | Kabul | AFG ARE | 65 | Abu Dhabi | ARE + DZA | 35 | Alger | DZA ARG | 69 | Buenos Aires | ARG - ANT | 33 | Willemstad | ANT - ASM | 54 | Fagatogo | ASM - AGO | 56 | Luanda | AGO (12 rows) with allofficiallanguages as @@ -840,46 +840,16 @@ with allofficiallanguages as select * from allofficiallanguages where language like 'A%'; countrycode | countrycode | language -------------+-------------+------------- - DZA | DZA | Arabic - DZA | DZA | Arabic - DZA | DZA | Arabic - DZA | DZA | Arabic - DZA | DZA | Arabic - ARM | ARM | Armenian - AZE | AZE | Azerbaijani - IRQ | IRQ | Arabic - IRQ | IRQ | Arabic - IRQ | IRQ | Arabic - IRQ | IRQ | Arabic - IRQ | IRQ | Arabic - IRQ | IRQ | Arabic - IRQ | IRQ | Arabic - IRQ | IRQ | Arabic - KWT | KWT | Arabic - KWT | KWT | Arabic - SOM | SOM | Arabic - DJI | DJI | Arabic - BHR | BHR | Arabic - JOR | JOR | Arabic - JOR | JOR | Arabic - JOR | JOR | Arabic - JOR | JOR | Arabic - OMN | OMN | Arabic - OMN | OMN | Arabic - SAU | SAU | Arabic - SAU | SAU | Arabic - SAU | SAU | Arabic - SAU | SAU | Arabic - SAU | SAU | Arabic - SAU | SAU | Arabic - SAU | SAU | Arabic - SAU | SAU | Arabic - SAU | SAU | Arabic - SAU | SAU | Arabic - SDN | SDN | Arabic - SDN | SDN | Arabic - SDN | SDN | Arabic - TCD | TCD | Arabic + EGY | EGY | Arabic + EGY | EGY | Arabic + EGY | EGY | Arabic + EGY | EGY | Arabic + EGY | EGY | Arabic + EGY | EGY | Arabic + EGY | EGY | Arabic + EGY | EGY | Arabic + EGY | EGY | Arabic + EGY | EGY | Arabic ZAF | ZAF | Afrikaans ZAF | ZAF | Afrikaans ZAF | ZAF | Afrikaans @@ -895,9 +865,13 @@ select * from allofficiallanguages where language like 'A%'; ZAF | ZAF | Afrikaans ZAF | ZAF | Afrikaans ZAF | ZAF | Afrikaans - ARE | ARE | Arabic - ARE | ARE | Arabic - ARE | ARE | Arabic + LBY | LBY | Arabic + EGY | EGY | Arabic + EGY | EGY | Arabic + EGY | EGY | Arabic + EGY | EGY | Arabic + EGY | EGY | Arabic + EGY | EGY | Arabic EGY | EGY | Arabic EGY | EGY | Arabic EGY | EGY | Arabic @@ -907,63 +881,6 @@ select * from allofficiallanguages where language like 'A%'; EGY | EGY | Arabic EGY | EGY | Arabic EGY | EGY | Arabic - YEM | YEM | Arabic - MAR | MAR | Arabic - MAR | MAR | Arabic - MAR | MAR | Arabic - MAR | MAR | Arabic - MAR | MAR | Arabic - MAR | MAR | Arabic - MAR | MAR | Arabic - MAR | MAR | Arabic - MAR | MAR | Arabic - MAR | MAR | Arabic - SYR | SYR | Arabic - SYR | SYR | Arabic - SYR | SYR | Arabic - SYR | SYR | Arabic - TUN | TUN | Arabic - ISR | ISR | Arabic - ISR | ISR | Arabic - ISR | ISR | Arabic - ISR | ISR | Arabic - BOL | BOL | Aimara - BOL | BOL | Aimara - PER | PER | Aimara - PER | PER | Aimara - PER | PER | Aimara - PER | PER | Aimara - PER | PER | Aimara - PER | PER | Aimara - PER | PER | Aimara - PER | PER | Aimara - PER | PER | Aimara - PER | PER | Aimara - DZA | DZA | Arabic - DZA | DZA | Arabic - DZA | DZA | Arabic - DZA | DZA | Arabic - DZA | DZA | Arabic - DZA | DZA | Arabic - DZA | DZA | Arabic - DZA | DZA | Arabic - DZA | DZA | Arabic - ARM | ARM | Armenian - IRQ | IRQ | Arabic - LBY | LBY | Arabic - LBY | LBY | Arabic - OMN | OMN | Arabic - SAU | SAU | Arabic - SAU | SAU | Arabic - SAU | SAU | Arabic - SAU | SAU | Arabic - SAU | SAU | Arabic - SAU | SAU | Arabic - SAU | SAU | Arabic - SAU | SAU | Arabic - SDN | SDN | Arabic - SDN | SDN | Arabic - SDN | SDN | Arabic ZAF | ZAF | Afrikaans ZAF | ZAF | Afrikaans ZAF | ZAF | Afrikaans @@ -979,9 +896,10 @@ select * from allofficiallanguages where language like 'A%'; ZAF | ZAF | Afrikaans ZAF | ZAF | Afrikaans ZAF | ZAF | Afrikaans - ALB | ALB | Albaniana - EGY | EGY | Arabic - EGY | EGY | Arabic + ZAF | ZAF | Afrikaans + LBY | LBY | Arabic + LBY | LBY | Arabic + BHR | BHR | Arabic EGY | EGY | Arabic EGY | EGY | Arabic EGY | EGY | Arabic @@ -994,119 +912,188 @@ select * from allofficiallanguages where language like 'A%'; EGY | EGY | Arabic EGY | EGY | Arabic EGY | EGY | Arabic - YEM | YEM | Arabic - YEM | YEM | Arabic + ZAF | ZAF | Afrikaans + ZAF | ZAF | Afrikaans + ZAF | ZAF | Afrikaans + ZAF | ZAF | Afrikaans + ZAF | ZAF | Afrikaans + ZAF | ZAF | Afrikaans + ZAF | ZAF | Afrikaans + ZAF | ZAF | Afrikaans + ZAF | ZAF | Afrikaans + ZAF | ZAF | Afrikaans + ZAF | ZAF | Afrikaans + ZAF | ZAF | Afrikaans + ZAF | ZAF | Afrikaans + LBY | LBY | Arabic + ESH | ESH | Arabic + DZA | DZA | Arabic + DZA | DZA | Arabic + DZA | DZA | Arabic + DZA | DZA | Arabic + DZA | DZA | Arabic + DZA | DZA | Arabic + ARE | ARE | Arabic + ARE | ARE | Arabic + ARM | ARM | Armenian + ARM | ARM | Armenian + IRQ | IRQ | Arabic + IRQ | IRQ | Arabic + IRQ | IRQ | Arabic + IRQ | IRQ | Arabic + IRQ | IRQ | Arabic + JOR | JOR | Arabic + LBN | LBN | Arabic LBN | LBN | Arabic - MAR | MAR | Arabic - MAR | MAR | Arabic - MAR | MAR | Arabic - MAR | MAR | Arabic - MAR | MAR | Arabic - MAR | MAR | Arabic - MAR | MAR | Arabic - MAR | MAR | Arabic - SYR | SYR | Arabic - SYR | SYR | Arabic - SYR | SYR | Arabic - SYR | SYR | Arabic - TUN | TUN | Arabic - TUN | TUN | Arabic - TUN | TUN | Arabic - TUN | TUN | Arabic - TUN | TUN | Arabic - ISR | ISR | Arabic - ISR | ISR | Arabic - ISR | ISR | Arabic - BOL | BOL | Aimara - BOL | BOL | Aimara - BOL | BOL | Aimara - BOL | BOL | Aimara - PER | PER | Aimara - PER | PER | Aimara PER | PER | Aimara PER | PER | Aimara PER | PER | Aimara PER | PER | Aimara + SYR | SYR | Arabic + SYR | SYR | Arabic DZA | DZA | Arabic DZA | DZA | Arabic DZA | DZA | Arabic DZA | DZA | Arabic - ARM | ARM | Armenian - AZE | AZE | Azerbaijani - AZE | AZE | Azerbaijani - AZE | AZE | Azerbaijani - IRQ | IRQ | Arabic + DZA | DZA | Arabic + DZA | DZA | Arabic + DZA | DZA | Arabic + DZA | DZA | Arabic + DZA | DZA | Arabic + DZA | DZA | Arabic + ARE | ARE | Arabic + ARE | ARE | Arabic + DJI | DJI | Arabic IRQ | IRQ | Arabic IRQ | IRQ | Arabic IRQ | IRQ | Arabic IRQ | IRQ | Arabic IRQ | IRQ | Arabic - KWT | KWT | Arabic - ESH | ESH | Arabic - SOM | SOM | Arabic - SOM | SOM | Arabic + YEM | YEM | Arabic + YEM | YEM | Arabic JOR | JOR | Arabic - LBY | LBY | Arabic - LBY | LBY | Arabic + KWT | KWT | Arabic OMN | OMN | Arabic OMN | OMN | Arabic - SAU | SAU | Arabic - SAU | SAU | Arabic - SAU | SAU | Arabic - SAU | SAU | Arabic - SAU | SAU | Arabic - SAU | SAU | Arabic - SDN | SDN | Arabic + OMN | OMN | Arabic + OMN | OMN | Arabic + PER | PER | Aimara + PER | PER | Aimara + PER | PER | Aimara + PER | PER | Aimara + PER | PER | Aimara + PER | PER | Aimara + PER | PER | Aimara + PER | PER | Aimara + PER | PER | Aimara + PER | PER | Aimara + PER | PER | Aimara + SYR | SYR | Arabic + SYR | SYR | Arabic + SYR | SYR | Arabic + SYR | SYR | Arabic + SYR | SYR | Arabic + SYR | SYR | Arabic + DZA | DZA | Arabic + DZA | DZA | Arabic + ARE | ARE | Arabic + ARM | ARM | Armenian + IRQ | IRQ | Arabic + IRQ | IRQ | Arabic + IRQ | IRQ | Arabic + IRQ | IRQ | Arabic + IRQ | IRQ | Arabic + YEM | YEM | Arabic + YEM | YEM | Arabic + YEM | YEM | Arabic + YEM | YEM | Arabic + JOR | JOR | Arabic + JOR | JOR | Arabic + JOR | JOR | Arabic + KWT | KWT | Arabic + KWT | KWT | Arabic + OMN | OMN | Arabic + PER | PER | Aimara + PER | PER | Aimara + PER | PER | Aimara + PER | PER | Aimara + PER | PER | Aimara + PER | PER | Aimara + PER | PER | Aimara + SYR | SYR | Arabic + SYR | SYR | Arabic + SYR | SYR | Arabic + ALB | ALB | Albaniana + AZE | AZE | Azerbaijani + AZE | AZE | Azerbaijani + BOL | BOL | Aimara + BOL | BOL | Aimara + ISR | ISR | Arabic + MAR | MAR | Arabic + MAR | MAR | Arabic + MAR | MAR | Arabic + MAR | MAR | Arabic + MAR | MAR | Arabic + MAR | MAR | Arabic + MAR | MAR | Arabic + SAU | SAU | Arabic + SAU | SAU | Arabic + SAU | SAU | Arabic + SAU | SAU | Arabic + SAU | SAU | Arabic + SAU | SAU | Arabic + SAU | SAU | Arabic + SAU | SAU | Arabic + SAU | SAU | Arabic + SOM | SOM | Arabic + SOM | SOM | Arabic + SDN | SDN | Arabic SDN | SDN | Arabic SDN | SDN | Arabic SDN | SDN | Arabic SDN | SDN | Arabic SDN | SDN | Arabic TCD | TCD | Arabic - ZAF | ZAF | Afrikaans - ZAF | ZAF | Afrikaans - ZAF | ZAF | Afrikaans - ZAF | ZAF | Afrikaans - ZAF | ZAF | Afrikaans - ZAF | ZAF | Afrikaans - ZAF | ZAF | Afrikaans - ZAF | ZAF | Afrikaans - ZAF | ZAF | Afrikaans - ZAF | ZAF | Afrikaans - ZAF | ZAF | Afrikaans - ZAF | ZAF | Afrikaans - ZAF | ZAF | Afrikaans - ZAF | ZAF | Afrikaans - ARE | ARE | Arabic - ARE | ARE | Arabic - EGY | EGY | Arabic - EGY | EGY | Arabic - EGY | EGY | Arabic - EGY | EGY | Arabic - EGY | EGY | Arabic - EGY | EGY | Arabic - EGY | EGY | Arabic - EGY | EGY | Arabic - EGY | EGY | Arabic - EGY | EGY | Arabic - EGY | EGY | Arabic - EGY | EGY | Arabic - EGY | EGY | Arabic - EGY | EGY | Arabic - YEM | YEM | Arabic - YEM | YEM | Arabic - YEM | YEM | Arabic - LBN | LBN | Arabic + TUN | TUN | Arabic + TUN | TUN | Arabic + BOL | BOL | Aimara + BOL | BOL | Aimara + BOL | BOL | Aimara + ISR | ISR | Arabic + ISR | ISR | Arabic + ISR | ISR | Arabic + ISR | ISR | Arabic + ISR | ISR | Arabic MAR | MAR | Arabic MAR | MAR | Arabic MAR | MAR | Arabic MAR | MAR | Arabic - QAT | QAT | Arabic - SYR | SYR | Arabic - SYR | SYR | Arabic - SYR | SYR | Arabic + MAR | MAR | Arabic + MAR | MAR | Arabic + MAR | MAR | Arabic + MAR | MAR | Arabic + SAU | SAU | Arabic + SAU | SAU | Arabic + SAU | SAU | Arabic + SAU | SAU | Arabic + SAU | SAU | Arabic + SAU | SAU | Arabic + SAU | SAU | Arabic + SOM | SOM | Arabic + SDN | SDN | Arabic + SDN | SDN | Arabic + SDN | SDN | Arabic + SDN | SDN | Arabic + SDN | SDN | Arabic + TCD | TCD | Arabic + TUN | TUN | Arabic TUN | TUN | Arabic TUN | TUN | Arabic + AZE | AZE | Azerbaijani + AZE | AZE | Azerbaijani + BOL | BOL | Aimara + BOL | BOL | Aimara + BOL | BOL | Aimara ISR | ISR | Arabic ISR | ISR | Arabic ISR | ISR | Arabic @@ -1114,14 +1101,27 @@ select * from allofficiallanguages where language like 'A%'; ISR | ISR | Arabic ISR | ISR | Arabic ISR | ISR | Arabic - BOL | BOL | Aimara - BOL | BOL | Aimara - PER | PER | Aimara - PER | PER | Aimara - PER | PER | Aimara - PER | PER | Aimara - PER | PER | Aimara - PER | PER | Aimara + ISR | ISR | Arabic + MAR | MAR | Arabic + MAR | MAR | Arabic + MAR | MAR | Arabic + MAR | MAR | Arabic + MAR | MAR | Arabic + MAR | MAR | Arabic + MAR | MAR | Arabic + QAT | QAT | Arabic + SAU | SAU | Arabic + SAU | SAU | Arabic + SAU | SAU | Arabic + SAU | SAU | Arabic + SAU | SAU | Arabic + SAU | SAU | Arabic + SAU | SAU | Arabic + SAU | SAU | Arabic + SDN | SDN | Arabic + TUN | TUN | Arabic + TUN | TUN | Arabic + TUN | TUN | Arabic (282 rows) with capitals(code,id,name,code) as @@ -1130,18 +1130,18 @@ with capitals(code,id,name,code) as select * from capitals where id < 100; code | id | name | code ------+----+------------------+------ - NLD | 5 | Amsterdam | NLD + ATG | 63 | Saint Johns | ATG ALB | 34 | Tirana | ALB - DZA | 35 | Alger | DZA + AGO | 56 | Luanda | AGO + AFG | 1 | Kabul | AFG + ASM | 54 | Fagatogo | ASM AND | 55 | Andorra la Vella | AND + NLD | 5 | Amsterdam | NLD + ANT | 33 | Willemstad | ANT AIA | 62 | The Valley | AIA - ATG | 63 | Saint Johns | ATG - AFG | 1 | Kabul | AFG ARE | 65 | Abu Dhabi | ARE + DZA | 35 | Alger | DZA ARG | 69 | Buenos Aires | ARG - ANT | 33 | Willemstad | ANT - ASM | 54 | Fagatogo | ASM - AGO | 56 | Luanda | AGO (12 rows) -- query1 CTE referencing itself @@ -1184,238 +1184,238 @@ with capitals(code,id) as select * from capitals; code | id | name ------+------+----------------------------------- - AIA | 62 | The Valley - ARE | 65 | Abu Dhabi - AUS | 135 | Canberra - BHS | 148 | Nassau - BHR | 149 | al-Manama - BGD | 150 | Dhaka - BLZ | 185 | Belmopan - BWA | 204 | Gaborone + AGO | 56 | Luanda + BEL | 179 | Bruxelles [Brussel] CYM | 553 | George Town - CHL | 554 | Santiago de Chile - COK | 583 | Avarua CRI | 584 | San Jose - DJI | 585 | Djibouti - DMA | 586 | Roseau - ERI | 652 | Asmara - ETH | 756 | Addis Abeba - GAB | 902 | Libreville GEO | 905 | Tbilisi - GIB | 915 | Gibraltar - GLP | 919 | Basse-Terre - GUM | 921 | Agaaa + GIN | 926 | Conakry HTI | 929 | Port-au-Prince - SJM | 938 | Longyearbyen - IDN | 939 | Jakarta - IND | 1109 | New Delhi - IRN | 1380 | Teheran - JAM | 1530 | Kingston - JPN | 1532 | Tokyo - CAF | 1889 | Bangui - COM | 2295 | Moroni - COD | 2298 | Kinshasa - CYP | 2430 | Nicosia - LVA | 2434 | Riga - LBN | 2438 | Beirut - LIE | 2446 | Vaduz - MAC | 2454 | Macao - MWI | 2462 | Lilongwe - MLI | 2482 | Bamako - MLT | 2484 | Valletta - MAR | 2486 | Rabat - MTQ | 2508 | Fort-de-France - MRT | 2509 | Nouakchott - MCO | 2695 | Monaco-Ville - MOZ | 2698 | Maputo - NPL | 2729 | Kathmandu - NIU | 2805 | Alofi - NFK | 2806 | Kingston - CIV | 2814 | Yamoussoukro - PAK | 2831 | Islamabad - PLW | 2881 | Koror - PER | 2890 | Lima - GNQ | 2972 | Malabo - QAT | 2973 | Doha - GUF | 3014 | Cayenne + IRL | 1447 | Dublin + TMP | 1522 | Dili + CPV | 1859 | Praia + LSO | 2437 | Maseru + MHL | 2507 | Dalap-Uliga-Darrit + MDA | 2690 | Chisinau + NRU | 2728 | Yaren SHN | 3063 | Jamestown - LCA | 3065 | Castries - VCT | 3066 | Kingstown - STP | 3172 | Sao Tome - SAU | 3173 | Riyadh - SEN | 3198 | Dakar - SVN | 3212 | Ljubljana - SUR | 3243 | Paramaribo - SWZ | 3244 | Mbabane - SYR | 3250 | Damascus - THA | 3320 | Bangkok - TKL | 3333 | Fakaofo - TON | 3334 | Nukualofa TTO | 3336 | Port-of-Spain - TUN | 3349 | Tunis - TUR | 3358 | Ankara - TKM | 3419 | Ashgabat - UKR | 3426 | Kyiv - NCL | 3493 | Noumea + BLR | 3520 | Minsk WLF | 3536 | Mata-Utu EST | 3791 | Tallinn - AFG | 1 | Kabul - ANT | 33 | Willemstad - ALB | 34 | Tirana - DZA | 35 | Alger ATG | 63 | Saint Johns - ARG | 69 | Buenos Aires - BRB | 174 | Bridgetown - BEN | 187 | Porto-Novo - BTN | 192 | Thimphu - BOL | 194 | La Paz - GBR | 456 | London - VGB | 537 | Road Town - BRN | 538 | Bandar Seri Begawan + AUS | 135 | Canberra + AZE | 144 | Baku + BGR | 539 | Sofija + BFA | 549 | Ouagadougou BDI | 552 | Bujumbura DOM | 587 | Santo Domingo de Guzman - ECU | 594 | Quito - EGY | 608 | Cairo - SLV | 645 | San Salvador - PHL | 766 | Manila - GMB | 904 | Banjul + FRO | 901 | Torshavn GRD | 916 | Saint Georges - GTM | 922 | Ciudad de Guatemala - HND | 933 | Tegucigalpa - IRQ | 1365 | Baghdad + HKG | 937 | Victoria ISR | 1450 | Jerusalem - ITA | 1464 | Roma - AUT | 1523 | Wien - YEM | 1780 | Sanaa CXR | 1791 | Flying Fish Cove - YUG | 1792 | Beograd - KAZ | 1864 | Astana - COG | 2296 | Brazzaville CCK | 2317 | West Island - GRC | 2401 | Athenai - HRV | 2409 | Zagreb - LBR | 2440 | Monrovia - LBY | 2441 | Tripoli - LUX | 2452 | Luxembourg [Luxemburg/Letzebuerg] - MKD | 2460 | Skopje + MAC | 2454 | Macao + MDV | 2463 | Male MYS | 2464 | Kuala Lumpur - MUS | 2511 | Port-Louis - FSM | 2689 | Palikir - MSR | 2697 | Plymouth - NAM | 2726 | Windhoek - NRU | 2728 | Yaren - NIC | 2734 | Managua + MOZ | 2698 | Maputo NGA | 2754 | Abuja - PNG | 2884 | Port Moresby - PRY | 2885 | Asuncion + QAT | 2973 | Doha + SLB | 3161 | Honiara + STP | 3172 | Sao Tome + FIN | 3236 | Helsinki [Helsingfors] + SUR | 3243 | Paramaribo + TGO | 3332 | Lome + TON | 3334 | Nukualofa + HUN | 3483 | Budapest + NCL | 3493 | Noumea + NZL | 3499 | Wellington + VEN | 3539 | Caracas + NLD | 5 | Amsterdam + ANT | 33 | Willemstad + AIA | 62 | The Valley + BWA | 204 | Gaborone + VGB | 537 | Road Town + ECU | 594 | Quito + GUY | 928 | Georgetown + SJM | 938 | Longyearbyen + ISL | 1449 | Reykjavik + YEM | 1780 | Sanaa + JOR | 1786 | Amman + KOR | 2331 | Seoul + KWT | 2429 | Kuwait + LVA | 2434 | Riga + LIE | 2446 | Vaduz + FSM | 2689 | Palikir + OMN | 2821 | Masqat + SWE | 3048 | Stockholm + SEN | 3198 | Dakar + URY | 3492 | Montevideo + ASM | 54 | Fagatogo + AND | 55 | Andorra la Vella + BTN | 192 | Thimphu + BRA | 211 | Brasilia + GRL | 917 | Nuuk + GUM | 921 | Agaaa + CHN | 1891 | Peking + COL | 2257 | Santafe de Bogota + HRV | 2409 | Zagreb + LAO | 2432 | Vientiane PCN | 2912 | Adamstown - POL | 2928 | Warszawa - FRA | 2974 | Paris - REU | 3017 | Saint-Denis - ROM | 3018 | Bucuresti - SPM | 3067 | Saint-Pierre - WSM | 3169 | Apia + GUF | 3014 | Cayenne + DEU | 3068 | Berlin SMR | 3171 | San Marino SGP | 3208 | Singapore SVK | 3209 | Bratislava - LKA | 3217 | Colombo + VAT | 3538 | Citta del Vaticano + VIR | 4067 | Charlotte Amalie + ALB | 34 | Tirana + BEN | 187 | Porto-Novo + GBR | 456 | London + DMA | 586 | Roseau + GAB | 902 | Libreville + IND | 1109 | New Delhi + AUT | 1523 | Wien + KIR | 2256 | Bairiki + MDG | 2455 | Antananarivo + MKD | 2460 | Skopje + MLI | 2482 | Bamako + MLT | 2484 | Valletta + PAN | 2882 | Ciudad de Panama + REU | 3017 | Saint-Denis + LCA | 3065 | Castries + VCT | 3066 | Kingstown + SOM | 3214 | Mogadishu SDN | 3225 | Khartum CHE | 3248 | Bern - TGO | 3332 | Lome + DNK | 3315 | Kobenhavn TCD | 3337 | NDjamena - CZE | 3339 | Praha + TUN | 3349 | Tunis TCA | 3423 | Cockburn Town TUV | 3424 | Funafuti - URY | 3492 | Montevideo - NZL | 3499 | Wellington VUT | 3537 | Port-Vila - VAT | 3538 | Citta del Vaticano - VEN | 3539 | Caracas - RUS | 3580 | Moscow - VNM | 3770 | Hanoi - NLD | 5 | Amsterdam - ASM | 54 | Fagatogo - AND | 55 | Andorra la Vella - AGO | 56 | Luanda + USA | 3813 | Washington + PSE | 4074 | Gaza + ARE | 65 | Abu Dhabi ARM | 126 | Yerevan ABW | 129 | Oranjestad - AZE | 144 | Baku - BEL | 179 | Bruxelles [Brussel] + BLZ | 185 | Belmopan + ERI | 652 | Asmara + ETH | 756 | Addis Abeba + GHA | 910 | Accra + GLP | 919 | Basse-Terre + ITA | 1464 | Roma + YUG | 1792 | Beograd + CAN | 1822 | Ottawa + COD | 2298 | Kinshasa + LBN | 2438 | Beirut + MCO | 2695 | Monaco-Ville + PAK | 2831 | Islamabad + MNP | 2913 | Garapan + GNQ | 2972 | Malabo + PYF | 3016 | Papeete + SYR | 3250 | Damascus + TZA | 3306 | Dodoma + THA | 3320 | Bangkok + UKR | 3426 | Kyiv + AFG | 1 | Kabul + BHR | 149 | al-Manama BMU | 191 | Hamilton - BIH | 201 | Sarajevo - BRA | 211 | Brasilia - BGR | 539 | Sofija - BFA | 549 | Ouagadougou - ESP | 653 | Madrid + COK | 583 | Avarua + EGY | 608 | Cairo ZAF | 716 | Pretoria FLK | 763 | Stanley - FJI | 764 | Suva - FRO | 901 | Torshavn - GHA | 910 | Accra - GRL | 917 | Nuuk - GIN | 926 | Conakry - GNB | 927 | Bissau - GUY | 928 | Georgetown - HKG | 937 | Victoria - IRL | 1447 | Dublin - ISL | 1449 | Reykjavik - TMP | 1522 | Dili - JOR | 1786 | Amman + IDN | 939 | Jakarta KHM | 1800 | Phnom Penh - CMR | 1804 | Yaounde - CAN | 1822 | Ottawa - CPV | 1859 | Praia - KEN | 1881 | Nairobi - CHN | 1891 | Peking + KAZ | 1864 | Astana KGZ | 2253 | Bishkek - KIR | 2256 | Bairiki - COL | 2257 | Santafe de Bogota - PRK | 2318 | Pyongyang - KOR | 2331 | Seoul - CUB | 2413 | La Habana - KWT | 2429 | Kuwait - LAO | 2432 | Vientiane - LSO | 2437 | Maseru + CYP | 2430 | Nicosia + LBY | 2441 | Tripoli LTU | 2447 | Vilnius ESH | 2453 | El-Aaiun - MDG | 2455 | Antananarivo - MDV | 2463 | Male - MHL | 2507 | Dalap-Uliga-Darrit + MTQ | 2508 | Fort-de-France + MUS | 2511 | Port-Louis MYT | 2514 | Mamoutzou + MNG | 2696 | Ulan Bator + MSR | 2697 | Plymouth + NAM | 2726 | Windhoek + NPL | 2729 | Kathmandu + NIC | 2734 | Managua + NER | 2738 | Niamey + NIU | 2805 | Alofi + PLW | 2881 | Koror + ROM | 3018 | Bucuresti + LKA | 3217 | Colombo + TKM | 3419 | Ashgabat + RUS | 3580 | Moscow + ZWE | 4068 | Harare + BHS | 148 | Nassau + BOL | 194 | La Paz + BIH | 201 | Sarajevo + BRN | 538 | Bandar Seri Begawan + SLV | 645 | San Salvador + FJI | 764 | Suva + GTM | 922 | Ciudad de Guatemala + GNB | 927 | Bissau + HND | 933 | Tegucigalpa + IRN | 1380 | Teheran + JAM | 1530 | Kingston + CMR | 1804 | Yaounde + KEN | 1881 | Nairobi + CAF | 1889 | Bangui + COM | 2295 | Moroni + CUB | 2413 | La Habana + MAR | 2486 | Rabat MEX | 2515 | Ciudad de Mexico - MDA | 2690 | Chisinau - MNG | 2696 | Ulan Bator - MMR | 2710 | Rangoon (Yangon) - NER | 2738 | Niamey NOR | 2807 | Oslo - OMN | 2821 | Masqat - PAN | 2882 | Ciudad de Panama - MNP | 2913 | Garapan + PNG | 2884 | Port Moresby PRT | 2914 | Lisboa PRI | 2919 | San Juan - PYF | 3016 | Papeete + POL | 2928 | Warszawa + FRA | 2974 | Paris RWA | 3047 | Kigali - SWE | 3048 | Stockholm - KNA | 3064 | Basseterre - DEU | 3068 | Berlin - SLB | 3161 | Honiara - ZMB | 3162 | Lusaka + SAU | 3173 | Riyadh SYC | 3206 | Victoria SLE | 3207 | Freetown - SOM | 3214 | Mogadishu - FIN | 3236 | Helsinki [Helsingfors] - TJK | 3261 | Dushanbe + SWZ | 3244 | Mbabane TWN | 3263 | Taipei - TZA | 3306 | Dodoma - DNK | 3315 | Kobenhavn + TKL | 3333 | Fakaofo + TUR | 3358 | Ankara UGA | 3425 | Kampala - HUN | 3483 | Budapest UZB | 3503 | Toskent - BLR | 3520 | Minsk - USA | 3813 | Washington - VIR | 4067 | Charlotte Amalie - ZWE | 4068 | Harare - PSE | 4074 | Gaza + DZA | 35 | Alger + ARG | 69 | Buenos Aires + BGD | 150 | Dhaka + BRB | 174 | Bridgetown + CHL | 554 | Santiago de Chile + DJI | 585 | Djibouti + ESP | 653 | Madrid + PHL | 766 | Manila + GMB | 904 | Banjul + GIB | 915 | Gibraltar + IRQ | 1365 | Baghdad + JPN | 1532 | Tokyo + COG | 2296 | Brazzaville + PRK | 2318 | Pyongyang + GRC | 2401 | Athenai + LBR | 2440 | Monrovia + LUX | 2452 | Luxembourg [Luxemburg/Letzebuerg] + MWI | 2462 | Lilongwe + MRT | 2509 | Nouakchott + MMR | 2710 | Rangoon (Yangon) + NFK | 2806 | Kingston + CIV | 2814 | Yamoussoukro + PRY | 2885 | Asuncion + PER | 2890 | Lima + KNA | 3064 | Basseterre + SPM | 3067 | Saint-Pierre + ZMB | 3162 | Lusaka + WSM | 3169 | Apia + SVN | 3212 | Ljubljana + TJK | 3261 | Dushanbe + CZE | 3339 | Praha + VNM | 3770 | Hanoi (232 rows) --query 2 @@ -2030,6 +2030,8 @@ and CITY_CNT/LANG_CNT > (select max(CITY_CNT/LANG_CNT) from allcountrystats,co WHERE allcountrystats.code = country.code and FOO.region = country.region group by FOO.region order by FOO.region; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer region_city_cnt | region_lang_cnt | region -----------------+-----------------+--------------------------- 840 | 192 | Caribbean @@ -2139,6 +2141,8 @@ where longlivingregions.region = denseregions.region and allcountrystats.code = and country.indepyear > 1900 order by name LIMIT 50; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer city_cnt | lang_cnt | name | REGION_LIFETIME | REGION_POP | lang_count | REGION_GNP | region ----------+----------+---------------------------------------+------------------+------------+------------+------------+--------------------------- 4 | 5 | Afghanistan | 61.3500003814697 | 1490776000 | 54 | 810604.00 | Southern and Central Asia @@ -2869,6 +2873,8 @@ and CITY_CNT/LANG_CNT > (select max(CITY_CNT/LANG_CNT) from allcountrystats,co WHERE allcountrystats.code = country.code and FOO.region = country.region group by FOO.region order by FOO.region; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer region_city_cnt | region_lang_cnt | region -----------------+-----------------+--------------------------- 840 | 192 | Caribbean @@ -2978,6 +2984,8 @@ where longlivingregions.region = denseregions.region and allcountrystats.code = and country.indepyear > 1900 order by name LIMIT 50; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer city_cnt | lang_cnt | name | REGION_SURFACE_AREA | REGION_LIFETIME | REGION_POP | lang_count | REGION_GNP | region ----------+----------+---------------------------------------+---------------------+------------------+------------+------------+------------+--------------------------- 4 | 5 | Afghanistan | 90749795.00 | 61.3500003814697 | 1490776000 | 54 | 810604.00 | Southern and Central Asia @@ -3271,6 +3279,8 @@ and CITY_CNT/LANG_CNT > (select max(CITY_CNT/LANG_CNT) from allcountrystats,co WHERE allcountrystats.code = country.code and FOO.region = country.region group by FOO.region order by FOO.region; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer region_city_cnt | region_lang_cnt | region -----------------+-----------------+--------------------------- 840 | 192 | Caribbean @@ -3331,170 +3341,170 @@ select ),* from official_languages; max | code | name | language -----+------+------------------------+------------------ - 16 | AFG | Afghanistan | Dari + 16 | ANT | Netherlands Antilles | Papiamento + 16 | ARE | United Arab Emirates | Arabic 16 | ARM | Armenia | Armenian - 16 | AUT | Austria | German - 16 | BMU | Bermuda | English - 16 | BRB | Barbados | English - 16 | CHE | Switzerland | Romansh - 16 | CHE | Switzerland | German - 16 | COL | Colombia | Spanish - 16 | CRI | Costa Rica | Spanish - 16 | CZE | Czech Republic | Czech - 16 | ECU | Ecuador | Spanish - 16 | EGY | Egypt | Arabic - 16 | FIN | Finland | Finnish - 16 | GTM | Guatemala | Spanish - 16 | GUM | Guam | Chamorro - 16 | HND | Honduras | Spanish - 16 | HRV | Croatia | Serbo-Croatian - 16 | IDN | Indonesia | Malay - 16 | IND | India | Hindi - 16 | IRQ | Iraq | Arabic - 16 | ISR | Israel | Arabic - 16 | KAZ | Kazakstan | Kazakh - 16 | KOR | South Korea | Korean - 16 | LIE | Liechtenstein | German - 16 | LKA | Sri Lanka | Tamil - 16 | LTU | Lithuania | Lithuanian - 16 | MDG | Madagascar | French - 16 | MKD | Macedonia | Macedonian - 16 | MLT | Malta | English - 16 | MTQ | Martinique | French - 16 | NCL | New Caledonia | French - 16 | NIC | Nicaragua | Spanish - 16 | PAN | Panama | Spanish - 16 | PHL | Philippines | Pilipino - 16 | PRY | Paraguay | Guarani - 16 | ROM | Romania | Romanian - 16 | ROM | Romania | Romani - 16 | RWA | Rwanda | French - 16 | SDN | Sudan | Arabic - 16 | SEN | Senegal | Wolof - 16 | SGP | Singapore | Tamil - 16 | SVK | Slovakia | Slovak + 16 | CHL | Chile | Spanish + 16 | ITA | Italy | Italian + 16 | YEM | Yemen | Arabic + 16 | YUG | Yugoslavia | Serbo-Croatian + 16 | PRK | North Korea | Korean + 16 | MWI | Malawi | Chichewa + 16 | PER | Peru | Spanish + 16 | SVN | Slovenia | Slovene 16 | SYR | Syria | Arabic - 16 | TUR | Turkey | Turkish - 16 | TWN | Taiwan | Mandarin Chinese - 16 | TZA | Tanzania | Swahili - 16 | URY | Uruguay | Spanish 16 | VNM | Vietnam | Vietnamese - 16 | ZAF | South Africa | Afrikaans - 16 | ANT | Netherlands Antilles | Papiamento - 16 | ARE | United Arab Emirates | Arabic - 16 | AZE | Azerbaijan | Azerbaijani - 16 | BEL | Belgium | French - 16 | BIH | Bosnia and Herzegovina | Serbo-Croatian - 16 | BLR | Belarus | Belorussian - 16 | BLR | Belarus | Russian - 16 | BOL | Bolivia | Spanish - 16 | BOL | Bolivia | Aimara - 16 | BRA | Brazil | Portuguese 16 | CAN | Canada | French - 16 | CAN | Canada | English - 16 | CHE | Switzerland | French - 16 | CHL | Chile | Spanish - 16 | COM | Comoros | Comorian - 16 | CYP | Cyprus | Greek - 16 | DEU | Germany | German - 16 | DOM | Dominican Republic | Spanish - 16 | DZA | Algeria | Arabic + 16 | ANT | Netherlands Antilles | Dutch + 16 | PER | Peru | Aimara + 16 | ARG | Argentina | Spanish + 16 | BGD | Bangladesh | Bengali 16 | ESP | Spain | Spanish - 16 | HKG | Hong Kong | English - 16 | HTI | Haiti | French - 16 | HUN | Hungary | Hungarian - 16 | IRL | Ireland | English - 16 | IRL | Ireland | Irish - 16 | IRN | Iran | Persian + 16 | PHL | Philippines | Pilipino + 16 | IRQ | Iraq | Arabic 16 | ISL | Iceland | Icelandic - 16 | ITA | Italy | Italian - 16 | JOR | Jordan | Arabic 16 | JPN | Japan | Japanese - 16 | KGZ | Kyrgyzstan | Russian - 16 | LBY | Libyan Arab Jamahiriya | Arabic + 16 | JOR | Jordan | Arabic + 16 | KWT | Kuwait | Arabic + 16 | LBN | Lebanon | Arabic + 16 | LIE | Liechtenstein | German 16 | LUX | Luxembourg | Luxembourgish - 16 | LUX | Luxembourg | German + 16 | SWE | Sweden | Swedish + 16 | TJK | Tajikistan | Tadzhik + 16 | CZE | Czech Republic | Czech + 16 | UKR | Ukraine | Ukrainian + 16 | BRB | Barbados | English + 16 | PER | Peru | Ketdua + 16 | TZA | Tanzania | Swahili 16 | LUX | Luxembourg | French - 16 | MAR | Morocco | Arabic - 16 | MDG | Madagascar | Malagasy - 16 | MWI | Malawi | Chichewa - 16 | MYS | Malaysia | Malay + 16 | LUX | Luxembourg | German 16 | NLD | Netherlands | Dutch - 16 | NZL | New Zealand | English - 16 | PER | Peru | Ketdua - 16 | PER | Peru | Aimara - 16 | PRT | Portugal | Portuguese + 16 | DZA | Algeria | Arabic + 16 | ECU | Ecuador | Spanish + 16 | CAN | Canada | English + 16 | KOR | South Korea | Korean + 16 | GRC | Greece | Greek + 16 | LVA | Latvia | Latvian + 16 | MMR | Myanmar | Burmese 16 | PRY | Paraguay | Spanish - 16 | RUS | Russian Federation | Russian - 16 | RWA | Rwanda | Rwanda + 16 | SEN | Senegal | Wolof + 16 | THA | Thailand | Thai + 16 | URY | Uruguay | Spanish + 16 | GLP | Guadeloupe | French + 16 | PRY | Paraguay | Guarani + 16 | PAK | Pakistan | Urdu + 16 | AFG | Afghanistan | Pashto + 16 | BMU | Bermuda | English + 16 | BRA | Brazil | Portuguese + 16 | CYM | Cayman Islands | English + 16 | CRI | Costa Rica | Spanish + 16 | ZAF | South Africa | Zulu + 16 | KAZ | Kazakstan | Kazakh + 16 | CHN | China | Chinese + 16 | KGZ | Kyrgyzstan | Kirgiz + 16 | LTU | Lithuania | Lithuanian + 16 | MDA | Moldova | Romanian + 16 | ROM | Romania | Romanian + 16 | DEU | Germany | German + 16 | EST | Estonia | Estonian + 16 | GUM | Guam | Chamorro + 16 | IRL | Ireland | Irish + 16 | CYP | Cyprus | Turkish + 16 | MTQ | Martinique | French 16 | SGP | Singapore | Malay + 16 | LKA | Sri Lanka | Tamil + 16 | BEL | Belgium | Dutch + 16 | KHM | Cambodia | Khmer + 16 | COL | Colombia | Spanish + 16 | HRV | Croatia | Serbo-Croatian + 16 | LAO | Laos | Lao 16 | SGP | Singapore | Chinese + 16 | LKA | Sri Lanka | Singali + 16 | TKM | Turkmenistan | Turkmenian + 16 | BLR | Belarus | Belorussian + 16 | BEL | Belgium | French + 16 | HTI | Haiti | French + 16 | KGZ | Kyrgyzstan | Russian + 16 | ZAF | South Africa | Afrikaans + 16 | IDN | Indonesia | Malay + 16 | ROM | Romania | Romani + 16 | BEL | Belgium | German + 16 | BHR | Bahrain | Arabic + 16 | EGY | Egypt | Arabic + 16 | GEO | Georgia | Georgiana + 16 | GUM | Guam | English + 16 | IRL | Ireland | English + 16 | CYP | Cyprus | Greek + 16 | LBY | Libyan Arab Jamahiriya | Arabic + 16 | NPL | Nepal | Nepali + 16 | NIC | Nicaragua | Spanish + 16 | SVK | Slovakia | Slovak + 16 | RUS | Russian Federation | Russian + 16 | AFG | Afghanistan | Dari + 16 | AND | Andorra | Catalan + 16 | ZAF | South Africa | Xhosa + 16 | BLR | Belarus | Russian + 16 | SGP | Singapore | Tamil + 16 | ZWE | Zimbabwe | English + 16 | ZAF | South Africa | English + 16 | ALB | Albania | Albaniana + 16 | BIH | Bosnia and Herzegovina | Serbo-Croatian 16 | SLV | El Salvador | Spanish - 16 | SWE | Sweden | Swedish + 16 | GTM | Guatemala | Spanish + 16 | HND | Honduras | Spanish + 16 | IND | India | Hindi + 16 | ISR | Israel | Hebrew + 16 | COM | Comoros | Comorian + 16 | MDG | Madagascar | Malagasy + 16 | MKD | Macedonia | Macedonian + 16 | POL | Poland | Polish + 16 | SDN | Sudan | Arabic + 16 | NZL | New Zealand | English + 16 | VEN | Venezuela | Spanish + 16 | ISR | Israel | Arabic + 16 | MAC | Macao | Portuguese + 16 | RWA | Rwanda | French + 16 | CHE | Switzerland | French 16 | TCD | Chad | Arabic - 16 | TGO | Togo | Kabye - 16 | TGO | Togo | Ewe - 16 | TUN | Tunisia | Arabic - 16 | USA | United States | English - 16 | UZB | Uzbekistan | Uzbek - 16 | ZAF | South Africa | English - 16 | ZAF | South Africa | Zulu - 16 | AFG | Afghanistan | Pashto - 16 | ALB | Albania | Albaniana - 16 | AND | Andorra | Catalan - 16 | ANT | Netherlands Antilles | Dutch - 16 | ARG | Argentina | Spanish - 16 | AUS | Australia | English - 16 | BEL | Belgium | German - 16 | BEL | Belgium | Dutch - 16 | BGD | Bangladesh | Bengali - 16 | BGR | Bulgaria | Bulgariana - 16 | BHR | Bahrain | Arabic - 16 | BOL | Bolivia | Ketdua + 16 | NCL | New Caledonia | French + 16 | BOL | Bolivia | Aimara 16 | CHE | Switzerland | Italian - 16 | CHN | China | Chinese + 16 | BOL | Bolivia | Spanish + 16 | BGR | Bulgaria | Bulgariana + 16 | FJI | Fiji Islands | Fijian 16 | CUB | Cuba | Spanish - 16 | CYM | Cayman Islands | English - 16 | CYP | Cyprus | Turkish + 16 | MYS | Malaysia | Malay + 16 | MAR | Morocco | Arabic + 16 | PAN | Panama | Spanish + 16 | RWA | Rwanda | Rwanda + 16 | CHE | Switzerland | German 16 | DNK | Denmark | Danish - 16 | EST | Estonia | Estonian + 16 | HUN | Hungary | Hungarian + 16 | UZB | Uzbekistan | Uzbek + 16 | USA | United States | English + 16 | MDG | Madagascar | French 16 | FIN | Finland | Swedish - 16 | FJI | Fiji Islands | Fijian - 16 | FRA | France | French + 16 | TWN | Taiwan | Mandarin Chinese + 16 | CHE | Switzerland | Romansh + 16 | AUS | Australia | English + 16 | AZE | Azerbaijan | Azerbaijani 16 | GBR | United Kingdom | English - 16 | GEO | Georgia | Georgiana - 16 | GLP | Guadeloupe | French - 16 | GRC | Greece | Greek - 16 | GUM | Guam | English - 16 | ISR | Israel | Hebrew - 16 | KGZ | Kyrgyzstan | Kirgiz - 16 | KHM | Cambodia | Khmer - 16 | KWT | Kuwait | Arabic - 16 | LAO | Laos | Lao - 16 | LBN | Lebanon | Arabic - 16 | LKA | Sri Lanka | Singali - 16 | LVA | Latvia | Latvian - 16 | MAC | Macao | Portuguese - 16 | MDA | Moldova | Romanian - 16 | MEX | Mexico | Spanish + 16 | DOM | Dominican Republic | Spanish + 16 | IRN | Iran | Persian + 16 | AUT | Austria | German 16 | MLT | Malta | Maltese - 16 | MMR | Myanmar | Burmese + 16 | MEX | Mexico | Spanish 16 | NOR | Norway | Norwegian - 16 | NPL | Nepal | Nepali - 16 | PAK | Pakistan | Urdu - 16 | PER | Peru | Spanish - 16 | POL | Poland | Polish - 16 | PRK | North Korea | Korean - 16 | SVN | Slovenia | Slovene - 16 | THA | Thailand | Thai - 16 | TJK | Tajikistan | Tadzhik - 16 | TKM | Turkmenistan | Turkmenian - 16 | UKR | Ukraine | Ukrainian - 16 | VEN | Venezuela | Spanish - 16 | YEM | Yemen | Arabic - 16 | YUG | Yugoslavia | Serbo-Croatian - 16 | ZAF | South Africa | Xhosa - 16 | ZWE | Zimbabwe | English + 16 | PRT | Portugal | Portuguese + 16 | FRA | France | French + 16 | FIN | Finland | Finnish + 16 | TGO | Togo | Ewe + 16 | TUN | Tunisia | Arabic + 16 | TUR | Turkey | Turkish + 16 | BOL | Bolivia | Ketdua + 16 | HKG | Hong Kong | English + 16 | MLT | Malta | English + 16 | TGO | Togo | Kabye (164 rows) --query 6 Use CTE in the main query and subqueries within the main query @@ -4483,6 +4493,8 @@ and CITY_CNT/LANG_CNT > (select max(CITY_CNT/LANG_CNT) from allcountrystats,co WHERE allcountrystats.code = country.code and FOO.region = country.region group by FOO.region order by FOO.region; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer region_city_cnt | region_lang_cnt | region -----------------+-----------------+--------------------------- 840 | 192 | Caribbean @@ -4596,6 +4608,8 @@ where longlivingregions.region = denseregions.region and allcountrystats.code = and country.indepyear > 1900 order by name LIMIT 50; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer city_cnt | lang_cnt | name | REGION_SURFACE_AREA | REGION_LIFETIME | REGION_POP | lang_count | REGION_GNP | region ----------+----------+---------------------------------------+---------------------+------------------+------------+------------+------------+--------------------------- 4 | 5 | Afghanistan | 90749795.00 | 61.3500003814697 | 1490776000 | 54 | 810604.00 | Southern and Central Asia @@ -5278,6 +5292,8 @@ and CITY_CNT/LANG_CNT > (select max(CITY_CNT/LANG_CNT) from allcountrystats,co WHERE allcountrystats.code = country.code and FOO.region = country.region group by FOO.region order by FOO.region; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer region_city_cnt | region_lang_cnt | region -----------------+-----------------+--------------------------- 840 | 192 | Caribbean @@ -5391,6 +5407,8 @@ where longlivingregions.region = denseregions.region and allcountrystats.code = and country.indepyear > 1900 order by name LIMIT 50; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer city_cnt | lang_cnt | name | REGION_SURFACE_AREA | REGION_LIFETIME | REGION_POP | lang_count | REGION_GNP | region ----------+----------+---------------------------------------+---------------------+------------------+------------+------------+------------+--------------------------- 4 | 5 | Afghanistan | 90749795.00 | 61.3500003814697 | 1490776000 | 54 | 810604.00 | Southern and Central Asia @@ -6120,6 +6138,8 @@ and CITY_CNT/LANG_CNT > (select max(CITY_CNT/LANG_CNT) from allcountrystats,co WHERE allcountrystats.code = country.code and FOO.region = country.region group by FOO.region order by FOO.region; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer region_city_cnt | region_lang_cnt | region -----------------+-----------------+--------------------------- 840 | 192 | Caribbean @@ -6229,6 +6249,8 @@ where longlivingregions.region = denseregions.region and allcountrystats.code = and country.indepyear > 1900 order by name LIMIT 50; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer city_cnt | lang_cnt | name | REGION_SURFACE_AREA | REGION_LIFETIME | REGION_POP | lang_count | REGION_GNP | region ----------+----------+---------------------------------------+---------------------+------------------+------------+------------+------------+--------------------------- 4 | 5 | Afghanistan | 90749795.00 | 61.3500003814697 | 1490776000 | 54 | 810604.00 | Southern and Central Asia @@ -7009,6 +7031,8 @@ and CITY_AO_CNT/LANG_CNT > (select max(CITY_AO_CNT/LANG_CNT) from allcountry_a WHERE allcountry_aostats.code = country_ao.code and FOO.region = country_ao.region group by FOO.region order by FOO.region; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer region_city_ao_cnt | region_lang_cnt | region --------------------+-----------------+--------------------------- 840 | 192 | Caribbean @@ -7122,6 +7146,8 @@ where longlivingregions.region = denseregions.region and allcountry_aostats.code and country_ao.indepyear > 1900 order by name LIMIT 50; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer city_ao_cnt | lang_cnt | name | REGION_SURFACE_AREA | REGION_LIFETIME | REGION_POP | lang_count | REGION_GNP | region -------------+----------+---------------------------------------+---------------------+------------------+------------+------------+------------+--------------------------- 4 | 5 | Afghanistan | 90749795.00 | 61.3500003814697 | 1490776000 | 54 | 810604.00 | Southern and Central Asia @@ -7896,6 +7922,8 @@ and CITY_CO_CNT/LANG_CNT > (select max(CITY_CO_CNT/LANG_CNT) from allcountry_c WHERE allcountry_costats.code = country_co.code and FOO.region = country_co.region group by FOO.region order by FOO.region; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer region_city_co_cnt | region_lang_cnt | region --------------------+-----------------+--------------------------- 840 | 192 | Caribbean @@ -8009,6 +8037,8 @@ where longlivingregions.region = denseregions.region and allcountry_costats.code and country_co.indepyear > 1900 order by name LIMIT 50; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer city_co_cnt | lang_cnt | name | REGION_SURFACE_AREA | REGION_LIFETIME | REGION_POP | lang_count | REGION_GNP | region -------------+----------+---------------------------------------+---------------------+------------------+------------+------------+------------+--------------------------- 4 | 5 | Afghanistan | 90749795.00 | 61.3500003814697 | 1490776000 | 54 | 810604.00 | Southern and Central Asia @@ -8246,6 +8276,8 @@ where longlivingregions.region = denseregions.region and allcountrystats.code = and country.indepyear > 1900 order by name LIMIT 50; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer city_cnt | lang_cnt | name | REGION_SURFACE_AREA | REGION_LIFETIME | REGION_POP | lang_count | REGION_GNP | region ----------+----------+---------------------------------------+---------------------+------------------+------------+------------+------------+--------------------------- 4 | 5 | Afghanistan | 90749795.00 | 61.3500003814697 | 1490776000 | 54 | 810604.00 | Southern and Central Asia @@ -8520,21 +8552,21 @@ where capitals."C" = country.code and country.code = lang_total.cc; name | C | ID | CAP | lc ---------------------------------+-----+------+------------------+---- - Austria | AUT | 1523 | Wien | 8 - Venezuela | VEN | 3539 | Caracas | 3 - Russian Federation | RUS | 3580 | Moscow | 12 - Bosnia and Herzegovina | BIH | 201 | Sarajevo | 1 - Brazil | BRA | 211 | Brasilia | 5 - Madagascar | MDG | 2455 | Antananarivo | 2 - Mexico | MEX | 2515 | Ciudad de Mexico | 6 + Micronesia, Federated States of | FSM | 2689 | Palikir | 6 Argentina | ARG | 69 | Buenos Aires | 3 - Azerbaijan | AZE | 144 | Baku | 4 - United States | USA | 3813 | Washington | 12 Yugoslavia | YUG | 1792 | Beograd | 6 - Micronesia, Federated States of | FSM | 2689 | Palikir | 6 - Nigeria | NGA | 2754 | Abuja | 10 + Brazil | BRA | 211 | Brasilia | 5 Germany | DEU | 3068 | Berlin | 6 + Russian Federation | RUS | 3580 | Moscow | 12 + Austria | AUT | 1523 | Wien | 8 + Azerbaijan | AZE | 144 | Baku | 4 + Madagascar | MDG | 2455 | Antananarivo | 2 + Nigeria | NGA | 2754 | Abuja | 10 + Bosnia and Herzegovina | BIH | 201 | Sarajevo | 1 + United States | USA | 3813 | Washington | 12 + Venezuela | VEN | 3539 | Caracas | 3 India | IND | 1109 | New Delhi | 12 + Mexico | MEX | 2515 | Ciudad de Mexico | 6 (15 rows) -- query 2 Check case sensitivity for quoted names in column list. This should error out @@ -8602,52 +8634,52 @@ select (select min(GNP_IDX) from gnp_index_compare) MIN_COUNTRY_GNP_IDX, (select from city,country where city.id = country.capital and country.continent='Europe'; min_country_gnp_idx | max_reg_gnp_idx | capital | country ------------------------+--------------------+-----------------------------------+------------------------------- + 0.39526143942847043283 | 1.2055929013175585 | Sarajevo | Bosnia and Herzegovina + 0.39526143942847043283 | 1.2055929013175585 | Oslo | Norway + 0.39526143942847043283 | 1.2055929013175585 | Lisboa | Portugal + 0.39526143942847043283 | 1.2055929013175585 | Warszawa | Poland + 0.39526143942847043283 | 1.2055929013175585 | Paris | France + 0.39526143942847043283 | 1.2055929013175585 | Madrid | Spain 0.39526143942847043283 | 1.2055929013175585 | Gibraltar | Gibraltar + 0.39526143942847043283 | 1.2055929013175585 | Athenai | Greece + 0.39526143942847043283 | 1.2055929013175585 | Luxembourg [Luxemburg/Letzebuerg] | Luxembourg + 0.39526143942847043283 | 1.2055929013175585 | Ljubljana | Slovenia + 0.39526143942847043283 | 1.2055929013175585 | Praha | Czech Republic + 0.39526143942847043283 | 1.2055929013175585 | Vilnius | Lithuania + 0.39526143942847043283 | 1.2055929013175585 | Bucuresti | Romania + 0.39526143942847043283 | 1.2055929013175585 | Moscow | Russian Federation + 0.39526143942847043283 | 1.2055929013175585 | Sofija | Bulgaria + 0.39526143942847043283 | 1.2055929013175585 | Torshavn | Faroe Islands + 0.39526143942847043283 | 1.2055929013175585 | Helsinki [Helsingfors] | Finland + 0.39526143942847043283 | 1.2055929013175585 | Budapest | Hungary + 0.39526143942847043283 | 1.2055929013175585 | Amsterdam | Netherlands 0.39526143942847043283 | 1.2055929013175585 | Longyearbyen | Svalbard and Jan Mayen - 0.39526143942847043283 | 1.2055929013175585 | Tallinn | Estonia - 0.39526143942847043283 | 1.2055929013175585 | Kyiv | Ukraine + 0.39526143942847043283 | 1.2055929013175585 | Reykjavik | Iceland 0.39526143942847043283 | 1.2055929013175585 | Riga | Latvia 0.39526143942847043283 | 1.2055929013175585 | Vaduz | Liechtenstein - 0.39526143942847043283 | 1.2055929013175585 | Valletta | Malta - 0.39526143942847043283 | 1.2055929013175585 | Monaco-Ville | Monaco - 0.39526143942847043283 | 1.2055929013175585 | Ljubljana | Slovenia + 0.39526143942847043283 | 1.2055929013175585 | Stockholm | Sweden + 0.39526143942847043283 | 1.2055929013175585 | Bruxelles [Brussel] | Belgium + 0.39526143942847043283 | 1.2055929013175585 | Dublin | Ireland + 0.39526143942847043283 | 1.2055929013175585 | Chisinau | Moldova + 0.39526143942847043283 | 1.2055929013175585 | Minsk | Belarus + 0.39526143942847043283 | 1.2055929013175585 | Tallinn | Estonia 0.39526143942847043283 | 1.2055929013175585 | Tirana | Albania - 0.39526143942847043283 | 1.2055929013175585 | Luxembourg [Luxemburg/Letzebuerg] | Luxembourg - 0.39526143942847043283 | 1.2055929013175585 | Praha | Czech Republic - 0.39526143942847043283 | 1.2055929013175585 | Citta del Vaticano | Holy See (Vatican City State) 0.39526143942847043283 | 1.2055929013175585 | London | United Kingdom - 0.39526143942847043283 | 1.2055929013175585 | Roma | Italy - 0.39526143942847043283 | 1.2055929013175585 | Beograd | Yugoslavia - 0.39526143942847043283 | 1.2055929013175585 | Athenai | Greece - 0.39526143942847043283 | 1.2055929013175585 | Zagreb | Croatia - 0.39526143942847043283 | 1.2055929013175585 | Warszawa | Poland - 0.39526143942847043283 | 1.2055929013175585 | Bucuresti | Romania - 0.39526143942847043283 | 1.2055929013175585 | Bratislava | Slovakia 0.39526143942847043283 | 1.2055929013175585 | Wien | Austria 0.39526143942847043283 | 1.2055929013175585 | Skopje | Macedonia - 0.39526143942847043283 | 1.2055929013175585 | Paris | France - 0.39526143942847043283 | 1.2055929013175585 | San Marino | San Marino + 0.39526143942847043283 | 1.2055929013175585 | Valletta | Malta 0.39526143942847043283 | 1.2055929013175585 | Bern | Switzerland - 0.39526143942847043283 | 1.2055929013175585 | Moscow | Russian Federation - 0.39526143942847043283 | 1.2055929013175585 | Amsterdam | Netherlands - 0.39526143942847043283 | 1.2055929013175585 | Sofija | Bulgaria - 0.39526143942847043283 | 1.2055929013175585 | Dublin | Ireland - 0.39526143942847043283 | 1.2055929013175585 | Oslo | Norway - 0.39526143942847043283 | 1.2055929013175585 | Lisboa | Portugal + 0.39526143942847043283 | 1.2055929013175585 | Kobenhavn | Denmark + 0.39526143942847043283 | 1.2055929013175585 | Roma | Italy + 0.39526143942847043283 | 1.2055929013175585 | Beograd | Yugoslavia + 0.39526143942847043283 | 1.2055929013175585 | Monaco-Ville | Monaco + 0.39526143942847043283 | 1.2055929013175585 | Kyiv | Ukraine 0.39526143942847043283 | 1.2055929013175585 | Andorra la Vella | Andorra - 0.39526143942847043283 | 1.2055929013175585 | Bruxelles [Brussel] | Belgium - 0.39526143942847043283 | 1.2055929013175585 | Madrid | Spain - 0.39526143942847043283 | 1.2055929013175585 | Vilnius | Lithuania - 0.39526143942847043283 | 1.2055929013175585 | Stockholm | Sweden + 0.39526143942847043283 | 1.2055929013175585 | Zagreb | Croatia 0.39526143942847043283 | 1.2055929013175585 | Berlin | Germany - 0.39526143942847043283 | 1.2055929013175585 | Helsinki [Helsingfors] | Finland - 0.39526143942847043283 | 1.2055929013175585 | Kobenhavn | Denmark - 0.39526143942847043283 | 1.2055929013175585 | Minsk | Belarus - 0.39526143942847043283 | 1.2055929013175585 | Sarajevo | Bosnia and Herzegovina - 0.39526143942847043283 | 1.2055929013175585 | Torshavn | Faroe Islands - 0.39526143942847043283 | 1.2055929013175585 | Reykjavik | Iceland - 0.39526143942847043283 | 1.2055929013175585 | Chisinau | Moldova - 0.39526143942847043283 | 1.2055929013175585 | Budapest | Hungary + 0.39526143942847043283 | 1.2055929013175585 | San Marino | San Marino + 0.39526143942847043283 | 1.2055929013175585 | Bratislava | Slovakia + 0.39526143942847043283 | 1.2055929013175585 | Citta del Vaticano | Holy See (Vatican City State) (46 rows) --query2 using the CTE in the where clause(initplan) of the main query. One CTE using another CTE in it's where clause as it's initplan @@ -8673,43 +8705,43 @@ group by country.name,country.code where FOO.LANG_CNT between (select min(CNT) from notdiversecountries) AND (select max(CNT) from diversecountries); lang_cnt | name ----------+---------------------------------- - 4 | Aruba - 2 | El Salvador - 1 | Saint Pierre and Miquelon + 6 | Panama + 6 | Mexico + 4 | Nicaragua 2 | Dominica 2 | Jamaica - 6 | Panama - 3 | Virgin Islands, U.S. - 1 | Anguilla - 4 | Belize + 1 | Turks and Caicos Islands 3 | Trinidad and Tobago - 1 | Cayman Islands - 2 | Bahamas - 2 | Dominican Republic - 2 | Greenland - 2 | Martinique - 2 | Saint Vincent and the Grenadines + 2 | Haiti + 1 | Anguilla 2 | Saint Kitts and Nevis - 4 | Nicaragua - 2 | Saint Lucia - 12 | United States - 12 | Canada - 3 | Netherlands Antilles - 6 | Mexico + 5 | Guatemala + 4 | Aruba + 1 | Montserrat + 3 | Virgin Islands, U.S. 4 | Costa Rica - 1 | Turks and Caicos Islands + 2 | Guadeloupe + 2 | Antigua and Barbuda 2 | Puerto Rico - 1 | Virgin Islands, British - 5 | Guatemala + 2 | Bahamas 1 | Cuba + 2 | Greenland + 2 | El Salvador 1 | Bermuda - 1 | Montserrat - 2 | Barbados - 2 | Haiti + 2 | Saint Lucia + 2 | Saint Vincent and the Grenadines + 1 | Virgin Islands, British + 3 | Netherlands Antilles + 1 | Saint Pierre and Miquelon 1 | Grenada - 2 | Guadeloupe + 1 | Cayman Islands 4 | Honduras - 2 | Antigua and Barbuda + 4 | Belize + 2 | Dominican Republic + 2 | Martinique + 12 | Canada + 2 | Barbados + 12 | United States (37 rows) --query3 using CTE more than once in the same initplan and also more than once in the main query @@ -9209,6 +9241,8 @@ group by OUTERMOST_FOO.region order by OUTERMOST_FOO.region ) cm where bad_headofstates.region = 'Caribbean'; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: DXL-to-PlStmt Translation: Attribute number 212 not found in project list select * from bad_headofstates order by avg,region,headofstate; avg | region | headofstate @@ -9484,6 +9518,20 @@ where longlivingregions.region = denseregions.region and allcountrystats.code = and country.indepyear > 1900 ); \d+ view_with_shared_scans; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-default collation +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Queries on master-only tables +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-default collation +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-default collation +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-default collation +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-default collation +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Queries on master-only tables View "qp_with_clause.view_with_shared_scans" Column | Type | Collation | Nullable | Default | Storage | Description ---------------------+------------------+-----------+----------+---------+----------+------------- @@ -9586,6 +9634,8 @@ UNION ALL WHERE longlivingregions.region = denseregions.region AND allcountrystats.code = country.code AND country.region = longlivingregions.region AND country.indepyear > 1900; select city_cnt,lang_cnt,name,region from view_with_shared_scans order by name LIMIT 50; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer city_cnt | lang_cnt | name | region ----------+----------+---------------------------------------+--------------------------- 4 | 5 | Afghanistan | Southern and Central Asia @@ -9641,18 +9691,20 @@ select city_cnt,lang_cnt,name,region from view_with_shared_scans order by name L (50 rows) select city_cnt,lang_cnt,name,"REGION_POP","REGION_GNP",region from view_with_shared_scans where region = 'Eastern Europe'; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer city_cnt | lang_cnt | name | REGION_POP | REGION_GNP | region ----------+----------+--------------------+------------+------------+---------------- - 189 | 12 | Russian Federation | 307026000 | 659980.00 | Eastern Europe - 3 | 5 | Slovakia | 307026000 | 659980.00 | Eastern Europe 10 | 8 | Czech Republic | 307026000 | 659980.00 | Eastern Europe - 44 | 4 | Poland | 307026000 | 659980.00 | Eastern Europe + 57 | 7 | Ukraine | 307026000 | 659980.00 | Eastern Europe + 29 | 6 | Romania | 307026000 | 659980.00 | Eastern Europe + 4 | 5 | Moldova | 307026000 | 659980.00 | Eastern Europe + 3 | 5 | Slovakia | 307026000 | 659980.00 | Eastern Europe 16 | 4 | Belarus | 307026000 | 659980.00 | Eastern Europe + 189 | 12 | Russian Federation | 307026000 | 659980.00 | Eastern Europe 10 | 4 | Bulgaria | 307026000 | 659980.00 | Eastern Europe - 29 | 6 | Romania | 307026000 | 659980.00 | Eastern Europe - 57 | 7 | Ukraine | 307026000 | 659980.00 | Eastern Europe + 44 | 4 | Poland | 307026000 | 659980.00 | Eastern Europe 9 | 6 | Hungary | 307026000 | 659980.00 | Eastern Europe - 4 | 5 | Moldova | 307026000 | 659980.00 | Eastern Europe (10 rows) drop view view_with_shared_scans; @@ -9675,9 +9727,9 @@ with cte as select code from tbl87 t where 1= (select count(*) from cte where cte.code::text=t.code::text or cte.code::text = t.code::text); code ------ + def abc xyz - def (3 rows) with cte as @@ -9706,9 +9758,9 @@ with cte as select code from tbl87 t where 1= (select count(*) from cte); code ------ + def abc xyz - def (3 rows) --start_ignore @@ -11118,34 +11170,35 @@ EXPLAIN (COSTS OFF) WITH q AS (SELECT * FROM (WITH cte AS (SELECT * FROM car) SE Gather Motion 3:1 (slice1; segments: 3) -> Seq Scan on car Filter: (a > 7) - Optimizer: Postgres-based planner + Optimizer: GPORCA (4 rows) WITH q AS (SELECT * FROM (WITH cte AS (SELECT * FROM car) SELECT * FROM car WHERE a > 7) t) SELECT * FROM q; a | b ----+---- - 8 | 9 9 | 10 10 | 11 + 8 | 9 (3 rows) -- start_ignore drop schema qp_with_clause cascade; -NOTICE: drop cascades to table zoo -NOTICE: drop cascades to table car -NOTICE: drop cascades to table manager -NOTICE: drop cascades to table emp -NOTICE: drop cascades to table bar -NOTICE: drop cascades to table foo -NOTICE: drop cascades to table tbl87 -NOTICE: drop cascades to table countrylanguage_co -NOTICE: drop cascades to table country_co -NOTICE: drop cascades to table city_co -NOTICE: drop cascades to table countrylanguage_ao -NOTICE: drop cascades to table country_ao -NOTICE: drop cascades to table city_ao -NOTICE: drop cascades to table countrylanguage -NOTICE: drop cascades to table country -NOTICE: drop cascades to table city +NOTICE: drop cascades to 16 other objects +DETAIL: drop cascades to table city +drop cascades to table country +drop cascades to table countrylanguage +drop cascades to table city_ao +drop cascades to table country_ao +drop cascades to table countrylanguage_ao +drop cascades to table city_co +drop cascades to table country_co +drop cascades to table countrylanguage_co +drop cascades to table tbl87 +drop cascades to table foo +drop cascades to table bar +drop cascades to table emp +drop cascades to table manager +drop cascades to table car +drop cascades to table zoo -- end_ignore RESET optimizer_trace_fallback; diff --git a/contrib/pax_storage/src/test/regress/expected/rangefuncs_optimizer.out b/contrib/pax_storage/src/test/regress/expected/rangefuncs_optimizer.out index 4a24ea41c3f..144044fa256 100644 --- a/contrib/pax_storage/src/test/regress/expected/rangefuncs_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/rangefuncs_optimizer.out @@ -149,10 +149,10 @@ select * from vw_ord; select definition from pg_views where viewname='vw_ord'; definition ---------------------------------------------------------------------------------------- - SELECT z.a, + - z.b, + - z.c + + b, + + c + FROM UNNEST(ARRAY[10, 20], ARRAY['foo'::text, 'bar'::text], ARRAY[1.0]) z(a, b, c); + SELECT a, + (1 row) drop view vw_ord; @@ -167,9 +167,9 @@ select * from vw_ord; select definition from pg_views where viewname='vw_ord'; definition ---------------------------------------------------------------------------------------- - SELECT z.a, + - z.b, + - z.c + + SELECT a, + + b, + + c + FROM UNNEST(ARRAY[10, 20], ARRAY['foo'::text, 'bar'::text], ARRAY[1.0]) z(a, b, c); (1 row) @@ -185,10 +185,10 @@ select * from vw_ord; select definition from pg_views where viewname='vw_ord'; definition ---------------------------------------------------------------------------------------------------------------------- - SELECT z.a, + - z.b, + - z.c + + b, + + c + FROM ROWS FROM(unnest(ARRAY[10, 20]), unnest(ARRAY['foo'::text, 'bar'::text]), generate_series(1, 2)) z(a, b, c); + SELECT a, + (1 row) drop view vw_ord; @@ -610,15 +610,15 @@ select * from vw_rngfunc; select pg_get_viewdef('vw_rngfunc'); pg_get_viewdef ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ - SELECT t1.a, + - t1.b, + - t1.c, + - t1.d, + - t1.e, + - t1.f, + - t1.g, + - t1.n + + b, + + c, + + d, + + e, + + f, + + g, + + n + FROM ROWS FROM(getrngfunc9(1), getrngfunc7(1) AS (rngfuncid integer, rngfuncsubid integer, rngfuncname text), getrngfunc1(1)) WITH ORDINALITY t1(a, b, c, d, e, f, g, n); + SELECT a, + (1 row) drop view vw_rngfunc; @@ -635,8 +635,9 @@ DROP FUNCTION rngfunct(int); DROP TABLE rngfunc2; DROP TABLE rngfunc; -- Rescan tests -- -CREATE TEMPORARY SEQUENCE rngfunc_rescan_seq1; -CREATE TEMPORARY SEQUENCE rngfunc_rescan_seq2; +-- GPDB sets the cache to 1 to ensure consistency in tests +CREATE TEMPORARY SEQUENCE rngfunc_rescan_seq1 CACHE 1; +CREATE TEMPORARY SEQUENCE rngfunc_rescan_seq2 CACHE 1; CREATE TYPE rngfunc_rescan_t AS (i integer, s bigint); CREATE FUNCTION rngfunc_sql(int,int) RETURNS setof rngfunc_rescan_t AS 'SELECT i, nextval(''rngfunc_rescan_seq1'') FROM generate_series($1,$2) i;' LANGUAGE SQL; -- plpgsql functions use materialize mode @@ -1602,7 +1603,10 @@ DROP FUNCTION rngfunc(); -- -- some tests on SQL functions with RETURNING -- -create temp table tt(f1 serial, data text); +-- GPDB: use a sequence column instead of serial to enforce a cache size for consistent results +create temporary sequence tt_seq cache 1; +create temp table tt(f1 int NOT NULL DEFAULT nextval('tt_seq'), data text); +alter sequence tt_seq owned by tt.f1; -- GPDB: The tests below which throw NOTICEs, throw them in indeterminate -- order, if the rows are hashed to different segments. Force the rows -- that have problem to be hashed to the same segment, using a custom hash @@ -1712,20 +1716,15 @@ select * from tt; (10 rows) -- triggers will fire, too --- Pax have not implements tuple_fetch_row_version --- create function noticetrigger() returns trigger as $$ --- begin --- raise notice 'noticetrigger % %', new.f1, new.data; --- return null; --- end $$ language plpgsql; --- create trigger tnoticetrigger after insert on tt for each row --- execute procedure noticetrigger(); +create function noticetrigger() returns trigger as $$ +begin + raise notice 'noticetrigger % %', new.f1, new.data; + return null; +end $$ language plpgsql; +create trigger tnoticetrigger after insert on tt for each row +execute procedure noticetrigger(); select insert_tt2('foolme','barme') limit 1; - insert_tt2 ------------- - 11 -(1 row) - +ERROR: not implemented yet on pax relations: TupleFetchRowVersion select * from tt; f1 | data ----+---------- @@ -1739,20 +1738,40 @@ select * from tt; 8 | quux 9 | foolish 10 | barrish - 11 | foolme - 12 | barme -(12 rows) +(10 rows) -- and rules work --- Pax have not implements tuple_fetch_row_version --- create temp table tt_log(f1 int, data text); --- create rule insert_tt_rule as on insert to tt do also --- insert into tt_log values(new.*); --- select insert_tt2('foollog','barlog') limit 1; --- select * from tt; +create temp table tt_log(f1 int, data text); +create rule insert_tt_rule as on insert to tt do also + insert into tt_log values(new.*); +select insert_tt2('foollog','barlog') limit 1; +ERROR: not implemented yet on pax relations: TupleFetchRowVersion +select * from tt; + f1 | data +----+---------- + 1 | foo + 2 | bar + 3 | fool + 4 | foolfool + 5 | foolish + 6 | barrish + 7 | baz + 8 | quux + 9 | foolish + 10 | barrish +(10 rows) + -- note that nextval() gets executed a second time in the rule expansion, -- which is expected. --- select * from tt_log; +-- GPDB: Only select data here. With triggers and rules, some may execute in +-- different orders depending on which segment triggers first--causing the +-- sequence number to be different. Therefore, we only select the data here to +-- ensure consistency in the tests +select data from tt_log; + data +------ +(0 rows) + -- test case for a whole-row-variable bug create function rngfunc1(n integer, out a text, out b text) returns setof record @@ -2193,15 +2212,47 @@ select * from usersview; id2 | 2 | email2 | 12 | t | 11 | 2 (2 rows) +alter table users drop column moredrop; -- fail, view has reference +DETAIL: view usersview depends on column moredrop of table users +ERROR: cannot drop column moredrop of table users because other objects depend on it +HINT: Use DROP ... CASCADE to drop the dependent objects too. +-- We used to have a bug that would allow the above to succeed, posing +-- hazards for later execution of the view. Check that the internal +-- defenses for those hazards haven't bit-rotted, in case some other +-- bug with similar symptoms emerges. begin; -alter table users drop column moredrop; +-- destroy the dependency entry that prevents the DROP: +delete from pg_depend where + objid = (select oid from pg_rewrite + where ev_class = 'usersview'::regclass and rulename = '_RETURN') + and refobjsubid = 5 +returning pg_describe_object(classid, objid, objsubid) as obj, + pg_describe_object(refclassid, refobjid, refobjsubid) as ref, + deptype; +ERROR: permission denied: "pg_depend" is a system catalog +alter table users drop column moredrop cascade; +ERROR: current transaction is aborted, commands ignored until end of transaction block select * from usersview; -- expect clean failure -ERROR: attribute 5 of type record has been dropped +ERROR: current transaction is aborted, commands ignored until end of transaction block rollback; -alter table users alter column seq type numeric; +alter table users alter column seq type numeric; -- fail, view has reference +DETAIL: rule _RETURN on view usersview depends on column "seq" +ERROR: cannot alter type of a column used by a view or rule +-- likewise, check we don't crash if the dependency goes wrong +begin; +-- destroy the dependency entry that prevents the ALTER: +delete from pg_depend where + objid = (select oid from pg_rewrite + where ev_class = 'usersview'::regclass and rulename = '_RETURN') + and refobjsubid = 2 +returning pg_describe_object(classid, objid, objsubid) as obj, + pg_describe_object(refclassid, refobjid, refobjsubid) as ref, + deptype; +ERROR: permission denied: "pg_depend" is a system catalog +-- alter table users alter column seq type numeric; select * from usersview; -- expect clean failure -ERROR: attribute 2 of type record has wrong type -DETAIL: Table has type numeric, but query expects integer. +ERROR: current transaction is aborted, commands ignored until end of transaction block +rollback; drop view usersview; drop function get_first_user(); drop function get_users(); @@ -2474,3 +2525,19 @@ select * from [{"id": "1"}] | 1 (1 row) +-- check detection of mismatching record types with a const-folded expression +with a(b) as (values (row(1,2,3))) +select * from a, coalesce(b) as c(d int, e int); -- fail +DETAIL: Returned row contains 3 attributes, but query expects 2. +ERROR: function return row and query-specified return row do not match +with a(b) as (values (row(1,2,3))) +select * from a, coalesce(b) as c(d int, e int, f int, g int); -- fail +DETAIL: Returned row contains 3 attributes, but query expects 4. +ERROR: function return row and query-specified return row do not match +with a(b) as (values (row(1,2,3))) +select * from a, coalesce(b) as c(d int, e int, f float); -- fail +DETAIL: Returned type integer at ordinal position 3, but query expects double precision. +ERROR: function return row and query-specified return row do not match +select * from int8_tbl, coalesce(row(1)) as (a int, b int); -- fail +DETAIL: Returned row contains 1 attribute, but query expects 2. +ERROR: function return row and query-specified return row do not match diff --git a/contrib/pax_storage/src/test/regress/expected/rangetypes_optimizer.out b/contrib/pax_storage/src/test/regress/expected/rangetypes_optimizer.out index 46f072abc41..7c4a1a580e8 100644 --- a/contrib/pax_storage/src/test/regress/expected/rangetypes_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/rangetypes_optimizer.out @@ -1,13 +1,7 @@ -- Tests for range data types. --- start_matchsubs --- m/NOTICE: One or more columns in the following table\(s\) do not have statistics: / --- s/.//gs --- m/HINT: For non-partitioned tables, run analyze .+\. For partitioned tables, run analyze rootpartition .+\. See log for columns missing statistics\./ --- s/.//gs --- end_matchsubs -create type textrange as range (subtype=text, collation="C"); -- -- test input parser +-- (type textrange was already made in test_setup.sql) -- -- negative tests; should fail select ''::textrange; @@ -181,6 +175,73 @@ select '(a,a)'::textrange; empty (1 row) +-- Also try it with non-error-throwing API +select pg_input_is_valid('(1,4)', 'int4range'); + pg_input_is_valid +------------------- + t +(1 row) + +select pg_input_is_valid('(1,4', 'int4range'); + pg_input_is_valid +------------------- + f +(1 row) + +select * from pg_input_error_info('(1,4', 'int4range'); + message | detail | hint | sql_error_code +---------------------------------+--------------------------+------+---------------- + malformed range literal: "(1,4" | Unexpected end of input. | | 22P02 +(1 row) + +select pg_input_is_valid('(4,1)', 'int4range'); + pg_input_is_valid +------------------- + f +(1 row) + +select * from pg_input_error_info('(4,1)', 'int4range'); + message | detail | hint | sql_error_code +-------------------------------------------------------------------+--------+------+---------------- + range lower bound must be less than or equal to range upper bound | | | 22000 +(1 row) + +select pg_input_is_valid('(4,zed)', 'int4range'); + pg_input_is_valid +------------------- + f +(1 row) + +select * from pg_input_error_info('(4,zed)', 'int4range'); + message | detail | hint | sql_error_code +----------------------------------------------+--------+------+---------------- + invalid input syntax for type integer: "zed" | | | 22P02 +(1 row) + +select pg_input_is_valid('[1,2147483647]', 'int4range'); + pg_input_is_valid +------------------- + f +(1 row) + +select * from pg_input_error_info('[1,2147483647]', 'int4range'); + message | detail | hint | sql_error_code +----------------------+--------+------+---------------- + integer out of range | | | 22003 +(1 row) + +select pg_input_is_valid('[2000-01-01,5874897-12-31]', 'daterange'); + pg_input_is_valid +------------------- + f +(1 row) + +select * from pg_input_error_info('[2000-01-01,5874897-12-31]', 'daterange'); + message | detail | hint | sql_error_code +-------------------+--------+------+---------------- + date out of range | | | 22008 +(1 row) + -- -- create some test data and test the operators -- @@ -784,7 +845,6 @@ select daterange('2000-01-01'::date, 'infinity'::date, '[]'); -- test GiST index that's been built incrementally create table test_range_gist(ir int4range); --- PAX not support gist/spgist/brin indexes create index test_range_gist_idx on test_range_gist using gist (ir); ERROR: pax only support btree/hash/gin/bitmap indexes (pax_access_handle.cc:736) insert into test_range_gist select int4range(g, g+10) from generate_series(1,2000) g; @@ -1050,7 +1110,6 @@ select count(*) from test_range_gist where ir -|- int4multirange(int4range(100,2 -- now check same queries using a bulk-loaded index drop index test_range_gist_idx; ERROR: index "test_range_gist_idx" does not exist --- PAX not support gist/spgist/brin indexes create index test_range_gist_idx on test_range_gist using gist (ir); ERROR: pax only support btree/hash/gin/bitmap indexes (pax_access_handle.cc:736) select count(*) from test_range_gist where ir @> 'empty'::int4range; @@ -1453,6 +1512,45 @@ select count(*) from test_range_elem where int4range(i,i+10) <@ int4range(10,30) RESET enable_seqscan; drop table test_range_elem; +-- +-- Btree_gist is not included by default, so to test exclusion +-- constraints with range types, use singleton int ranges for the "=" +-- portion of the constraint. +-- +create table test_range_excl( + id int4, + room int4range, + speaker int4range, + during tsrange, + exclude using gist (room with =, during with &&), + exclude using gist (speaker with =, during with &&) +) DISTRIBUTED REPLICATED; +ERROR: pax only support btree/hash/gin/bitmap indexes (pax_access_handle.cc:###) +insert into test_range_excl + values(1, int4range(123, 123, '[]'), int4range(1, 1, '[]'), '[2010-01-02 10:00, 2010-01-02 11:00)'); +ERROR: relation "test_range_excl" does not exist +LINE 1: insert into test_range_excl + ^ +insert into test_range_excl + values(1, int4range(123, 123, '[]'), int4range(2, 2, '[]'), '[2010-01-02 11:00, 2010-01-02 12:00)'); +ERROR: relation "test_range_excl" does not exist +LINE 1: insert into test_range_excl + ^ +insert into test_range_excl + values(1, int4range(123, 123, '[]'), int4range(3, 3, '[]'), '[2010-01-02 10:10, 2010-01-02 11:00)'); +ERROR: relation "test_range_excl" does not exist +LINE 1: insert into test_range_excl + ^ +insert into test_range_excl + values(1, int4range(124, 124, '[]'), int4range(3, 3, '[]'), '[2010-01-02 10:10, 2010-01-02 11:10)'); +ERROR: relation "test_range_excl" does not exist +LINE 1: insert into test_range_excl + ^ +insert into test_range_excl + values(1, int4range(125, 125, '[]'), int4range(1, 1, '[]'), '[2010-01-02 10:10, 2010-01-02 11:00)'); +ERROR: relation "test_range_excl" does not exist +LINE 1: insert into test_range_excl + ^ -- test bigint ranges select int8range(10000000000::int8, 20000000000::int8,'(]'); int8range @@ -1476,12 +1574,11 @@ LINE 1: select '[2010-01-01 01:00:00 -08, 2010-01-01 02:00:00 -05)':... set timezone to default; -- -- Test user-defined range of floats +-- (type float8range was already made in test_setup.sql) -- --should fail -create type float8range as range (subtype=float8, subtype_diff=float4mi); +create type bogus_float8range as range (subtype=float8, subtype_diff=float4mi); ERROR: function float4mi(double precision, double precision) does not exist ---should succeed -create type float8range as range (subtype=float8, subtype_diff=float8mi); select '[123.001, 5.e9)'::float8range @> 888.882::float8; ?column? ---------- @@ -1666,8 +1763,6 @@ select array[1,3] <@ arrayrange(array[1,2], array[2,1]); t (1 row) --- start_ignore --- GPDB_94_MERGE_FIXME: orca can not run the test green. -- -- Check behavior when subtype lacks a hash function -- @@ -1680,13 +1775,12 @@ select '(2,5)'::cashrange except select '(5,6)'::cashrange; (1 row) reset enable_sort; --- end_ignore -- -- Ranges of composites -- create type two_ints as (a int, b int); create type two_ints_range as range (subtype = two_ints); --- with force_parallel_mode on, this exercises tqueue.c's range remapping +-- with debug_parallel_query on, this exercises tqueue.c's range remapping select *, row_to_json(upper(t)) as u from (values (two_ints_range(row(1,2), row(3,4))), (two_ints_range(row(5,6), row(7,8)))) v(t); diff --git a/contrib/pax_storage/src/test/regress/expected/rowsecurity_optimizer.out b/contrib/pax_storage/src/test/regress/expected/rowsecurity_optimizer.out index da828356739..0a2a17de3fc 100644 --- a/contrib/pax_storage/src/test/regress/expected/rowsecurity_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/rowsecurity_optimizer.out @@ -1480,9 +1480,9 @@ ERROR: infinite recursion detected in policy for relation "rec1" -- SET SESSION AUTHORIZATION regress_rls_alice; CREATE TABLE s1 (a int, b text); -INSERT INTO s1 (SELECT x, md5(x::text) FROM generate_series(-10,10) x); +INSERT INTO s1 (SELECT x, public.fipshash(x::text) FROM generate_series(-10,10) x); CREATE TABLE s2 (x int, y text); -INSERT INTO s2 (SELECT x, md5(x::text) FROM generate_series(-6,6) x); +INSERT INTO s2 (SELECT x, public.fipshash(x::text) FROM generate_series(-6,6) x); GRANT SELECT ON s1, s2 TO regress_rls_bob; CREATE POLICY p1 ON s1 USING (a in (select x from s2 where y like '%2f%')); CREATE POLICY p2 ON s2 USING (x in (select a from s1 where b like '%22%')); @@ -1500,13 +1500,11 @@ DROP POLICY p3 on s1; ALTER POLICY p2 ON s2 USING (x % 2 = 0); SET SESSION AUTHORIZATION regress_rls_bob; SELECT * FROM s1 WHERE f_leak(b); -- OK -NOTICE: f_leak => c81e728d9d4c2f636f067f89cc14862c -NOTICE: f_leak => a87ff679a2f3e71d9181a67b7542122c - a | b ----+---------------------------------- - 2 | c81e728d9d4c2f636f067f89cc14862c - 4 | a87ff679a2f3e71d9181a67b7542122c -(2 rows) +NOTICE: f_leak => 03b26944890929ff751653acb2f2af79 + a | b +----+---------------------------------- + -6 | 03b26944890929ff751653acb2f2af79 +(1 row) EXPLAIN (COSTS OFF) SELECT * FROM only s1 WHERE f_leak(b); QUERY PLAN @@ -1525,13 +1523,11 @@ SET SESSION AUTHORIZATION regress_rls_alice; ALTER POLICY p1 ON s1 USING (a in (select x from v2)); -- using VIEW in RLS policy SET SESSION AUTHORIZATION regress_rls_bob; SELECT * FROM s1 WHERE f_leak(b); -- OK -NOTICE: f_leak => 0267aaf632e87a63288a08331f22c7c3 -NOTICE: f_leak => 1679091c5a880faf6fb5e6087eb1b2dc +NOTICE: f_leak => 03b26944890929ff751653acb2f2af79 a | b ----+---------------------------------- - -4 | 0267aaf632e87a63288a08331f22c7c3 - 6 | 1679091c5a880faf6fb5e6087eb1b2dc -(2 rows) + -6 | 03b26944890929ff751653acb2f2af79 +(1 row) EXPLAIN (COSTS OFF) SELECT * FROM s1 WHERE f_leak(b); QUERY PLAN @@ -1549,10 +1545,8 @@ EXPLAIN (COSTS OFF) SELECT * FROM s1 WHERE f_leak(b); SELECT (SELECT x FROM s1 LIMIT 1) xx, * FROM s2 WHERE y like '%28%'; xx | x | y ----+----+---------------------------------- - -6 | -6 | 596a3d04481816330f07e4f97510c28f - -4 | -4 | 0267aaf632e87a63288a08331f22c7c3 - 2 | 2 | c81e728d9d4c2f636f067f89cc14862c -(3 rows) + -4 | -4 | e5e0093f285a4fb94c3fcc2ad7fd04ed +(1 row) EXPLAIN (COSTS OFF) SELECT (SELECT x FROM s1 LIMIT 1) xx, * FROM s2 WHERE y like '%28%'; QUERY PLAN @@ -1878,15 +1872,15 @@ AND f_leak(t2_1.b) AND f_leak(t2_2.b) RETURNING *, t2_1, t2_2; -> Update on t2 t2_1 -> Explicit Redistribute Motion 3:3 (slice2; segments: 3) -> Hash Join - Hash Cond: (t2_2.b = t2_1.b) + Hash Cond: (t2_1.b = t2_2.b) -> Redistribute Motion 3:3 (slice3; segments: 3) - Hash Key: t2_2.b - -> Seq Scan on t2 t2_2 + Hash Key: t2_1.b + -> Seq Scan on t2 t2_1 Filter: ((a = 3) AND ((a % 2) = 1) AND f_leak(b)) -> Hash -> Redistribute Motion 3:3 (slice4; segments: 3) - Hash Key: t2_1.b - -> Seq Scan on t2 t2_1 + Hash Key: t2_2.b + -> Seq Scan on t2 t2_2 Filter: ((a = 3) AND ((a % 2) = 1) AND f_leak(b)) Optimizer: Postgres query optimizer (15 rows) @@ -1994,15 +1988,20 @@ EXPLAIN (COSTS OFF) DELETE FROM t1 WHERE f_leak(b); Filter: (((a % 2) = 0) AND f_leak(b)) (11 rows) --- pax not support tuple_fetch_row_version --- DELETE FROM only t1 WHERE f_leak(b) RETURNING tableoid::regclass, *, t1; --- DELETE FROM t1 WHERE f_leak(b) RETURNING tableoid::regclass, *, t1; +DELETE FROM only t1 WHERE f_leak(b) RETURNING tableoid::regclass, *, t1; +NOTICE: f_leak => bbbbbb_updt +ERROR: not implemented yet on pax relations: TupleFetchRowVersion +DELETE FROM t1 WHERE f_leak(b) RETURNING tableoid::regclass, *, t1; +NOTICE: f_leak => bbbbbb_updt +NOTICE: f_leak => bcdbcd +NOTICE: f_leak => defdef +ERROR: not implemented yet on pax relations: TupleFetchRowVersion -- -- S.b. view on top of Row-level security -- SET SESSION AUTHORIZATION regress_rls_alice; CREATE TABLE b1 (a int, b text); -INSERT INTO b1 (SELECT x, md5(x::text) FROM generate_series(-10,10) x); +INSERT INTO b1 (SELECT x, public.fipshash(x::text) FROM generate_series(-10,10) x); CREATE POLICY p1 ON b1 USING (a % 2 = 0); ALTER TABLE b1 ENABLE ROW LEVEL SECURITY; GRANT ALL ON b1 TO regress_rls_bob; @@ -2022,18 +2021,18 @@ EXPLAIN (COSTS OFF) SELECT * FROM bv1 WHERE f_leak(b); (6 rows) SELECT * FROM bv1 WHERE f_leak(b); -NOTICE: f_leak => c81e728d9d4c2f636f067f89cc14862c -NOTICE: f_leak => a87ff679a2f3e71d9181a67b7542122c -NOTICE: f_leak => 1679091c5a880faf6fb5e6087eb1b2dc -NOTICE: f_leak => c9f0f895fb98ab9159f51fd0297e236d -NOTICE: f_leak => d3d9446802a44259755d38e6d163e820 +NOTICE: f_leak => 2c624232cdd221771294dfbb310aca00 +NOTICE: f_leak => 4a44dc15364204a80fe80e9039455cc1 +NOTICE: f_leak => 4b227777d4dd1fc61c6f884f48641d02 +NOTICE: f_leak => d4735e3a265e16eee03f59718b9b5d03 +NOTICE: f_leak => e7f6c011776e8db7cd330b54174fd76f a | b ----+---------------------------------- - 2 | c81e728d9d4c2f636f067f89cc14862c - 4 | a87ff679a2f3e71d9181a67b7542122c - 6 | 1679091c5a880faf6fb5e6087eb1b2dc - 8 | c9f0f895fb98ab9159f51fd0297e236d - 10 | d3d9446802a44259755d38e6d163e820 + 2 | d4735e3a265e16eee03f59718b9b5d03 + 4 | 4b227777d4dd1fc61c6f884f48641d02 + 6 | e7f6c011776e8db7cd330b54174fd76f + 8 | 2c624232cdd221771294dfbb310aca00 + 10 | 4a44dc15364204a80fe80e9039455cc1 (5 rows) INSERT INTO bv1 VALUES (-1, 'xxx'); -- should fail view WCO @@ -2050,7 +2049,7 @@ EXPLAIN (COSTS OFF) UPDATE bv1 SET b = 'yyy' WHERE a = 4 AND f_leak(b); (3 rows) UPDATE bv1 SET b = 'yyy' WHERE a = 4 AND f_leak(b); -NOTICE: f_leak => a87ff679a2f3e71d9181a67b7542122c +NOTICE: f_leak => 4b227777d4dd1fc61c6f884f48641d02 EXPLAIN (COSTS OFF) DELETE FROM bv1 WHERE a = 6 AND f_leak(b); QUERY PLAN ------------------------------------------------------------------------- @@ -2061,30 +2060,30 @@ EXPLAIN (COSTS OFF) DELETE FROM bv1 WHERE a = 6 AND f_leak(b); (4 rows) DELETE FROM bv1 WHERE a = 6 AND f_leak(b); -NOTICE: f_leak => 1679091c5a880faf6fb5e6087eb1b2dc +NOTICE: f_leak => e7f6c011776e8db7cd330b54174fd76f SET SESSION AUTHORIZATION regress_rls_alice; SELECT * FROM b1; a | b -----+---------------------------------- - -10 | 1b0fd9efa5279c4203b7c70233f86dbf - -9 | 252e691406782824eec43d7eadc3d256 - -8 | a8d2ec85eaf98407310b72eb73dda247 - -7 | 74687a12d3915d3c4d83f1af7b3683d5 - -6 | 596a3d04481816330f07e4f97510c28f - -5 | 47c1b025fa18ea96c33fbb6718688c0f - -4 | 0267aaf632e87a63288a08331f22c7c3 - -3 | b3149ecea4628efd23d2f86e5a723472 - -2 | 5d7b9adcbe1c629ec722529dd12e5129 - -1 | 6bb61e3b7bce0931da574d19d1d82c88 - 0 | cfcd208495d565ef66e7dff9f98764da - 1 | c4ca4238a0b923820dcc509a6f75849b - 2 | c81e728d9d4c2f636f067f89cc14862c - 3 | eccbc87e4b5ce2fe28308fd9f2a7baf3 - 5 | e4da3b7fbbce2345d7772b0674a318d5 - 7 | 8f14e45fceea167a5a36dedd4bea2543 - 8 | c9f0f895fb98ab9159f51fd0297e236d - 9 | 45c48cce2e2d7fbdea1afc51c7c6ad26 - 10 | d3d9446802a44259755d38e6d163e820 + -10 | c171d4ec282b23db89a99880cd624e9b + -9 | d5c534fde62beb89c745a59952c8efed + -8 | e91592205d3881e3ea35d66973bb4898 + -7 | a770d3270c9dcdedf12ed9fd70444f7c + -6 | 03b26944890929ff751653acb2f2af79 + -5 | 37aa1ccf80e481832b2db282d4d4f895 + -4 | e5e0093f285a4fb94c3fcc2ad7fd04ed + -3 | 615bdd17c2556f82f384392ea8557f8c + -2 | cf3bae39dd692048a8bf961182e6a34d + -1 | 1bad6b8cf97131fceab8543e81f77571 + 0 | 5feceb66ffc86f38d952786c6d696c79 + 1 | 6b86b273ff34fce19d6b804eff5a3f57 + 2 | d4735e3a265e16eee03f59718b9b5d03 + 3 | 4e07408562bedb8b60ce05c1decfe3ad + 5 | ef2d127de37b942baad06145e54b0c61 + 7 | 7902699be42c8a8e46fbbb4501726517 + 8 | 2c624232cdd221771294dfbb310aca00 + 9 | 19581e27de7ced00ff1ce50b2047e7a5 + 10 | 4a44dc15364204a80fe80e9039455cc1 12 | xxx 4 | yyy (21 rows) @@ -2110,44 +2109,50 @@ SELECT * FROM document WHERE did = 2; -- ...so violates actual WITH CHECK OPTION within UPDATE (not INSERT, since -- alternative UPDATE path happens to be taken): --- Pax not support insert conflict --- INSERT INTO document VALUES (2, (SELECT cid from category WHERE cname = 'novel'), 1, 'regress_rls_carol', 'my first novel') --- ON CONFLICT (did) DO UPDATE SET dtitle = EXCLUDED.dtitle, dauthor = EXCLUDED.dauthor; +INSERT INTO document VALUES (2, (SELECT cid from category WHERE cname = 'novel'), 1, 'regress_rls_carol', 'my first novel') + ON CONFLICT (did) DO UPDATE SET dtitle = EXCLUDED.dtitle, dauthor = EXCLUDED.dauthor; +ERROR: new row violates row-level security policy for table "document" -- Violates USING qual for UPDATE policy p3. -- -- UPDATE path is taken, but UPDATE fails purely because *existing* row to be -- updated is not a "novel"/cid 11 (row is not leaked, even though we have -- SELECT privileges sufficient to see the row in this instance): --- pax not support this trigger --- INSERT INTO document VALUES (33, 22, 1, 'regress_rls_bob', 'okay science fiction'); -- preparation for next statement --- INSERT INTO document VALUES (33, (SELECT cid from category WHERE cname = 'novel'), 1, 'regress_rls_bob', 'Some novel, replaces sci-fi') -- takes UPDATE path --- ON CONFLICT (did) DO UPDATE SET dtitle = EXCLUDED.dtitle; +INSERT INTO document VALUES (33, 22, 1, 'regress_rls_bob', 'okay science fiction'); -- preparation for next statement +INSERT INTO document VALUES (33, (SELECT cid from category WHERE cname = 'novel'), 1, 'regress_rls_bob', 'Some novel, replaces sci-fi') -- takes UPDATE path + ON CONFLICT (did) DO UPDATE SET dtitle = EXCLUDED.dtitle; +ERROR: not implemented yet on pax relations: TupleLock -- Fine (we UPDATE, since INSERT WCOs and UPDATE security barrier quals + WCOs -- not violated): --- INSERT INTO document VALUES (2, (SELECT cid from category WHERE cname = 'novel'), 1, 'regress_rls_bob', 'my first novel') --- ON CONFLICT (did) DO UPDATE SET dtitle = EXCLUDED.dtitle RETURNING *; +INSERT INTO document VALUES (2, (SELECT cid from category WHERE cname = 'novel'), 1, 'regress_rls_bob', 'my first novel') + ON CONFLICT (did) DO UPDATE SET dtitle = EXCLUDED.dtitle RETURNING *; +ERROR: not implemented yet on pax relations: TupleLock -- Fine (we INSERT, so "cid = 33" ("technology") isn't evaluated): --- INSERT INTO document VALUES (78, (SELECT cid from category WHERE cname = 'novel'), 1, 'regress_rls_bob', 'some technology novel') --- ON CONFLICT (did) DO UPDATE SET dtitle = EXCLUDED.dtitle, cid = 33 RETURNING *; +INSERT INTO document VALUES (78, (SELECT cid from category WHERE cname = 'novel'), 1, 'regress_rls_bob', 'some technology novel') + ON CONFLICT (did) DO UPDATE SET dtitle = EXCLUDED.dtitle, cid = 33 RETURNING *; +ERROR: not implemented yet on pax relations: TupleInsertSpeculative -- Fine (same query, but we UPDATE, so "cid = 33", ("technology") is not the -- case in respect of *existing* tuple): --- INSERT INTO document VALUES (78, (SELECT cid from category WHERE cname = 'novel'), 1, 'regress_rls_bob', 'some technology novel') --- ON CONFLICT (did) DO UPDATE SET dtitle = EXCLUDED.dtitle, cid = 33 RETURNING *; +INSERT INTO document VALUES (78, (SELECT cid from category WHERE cname = 'novel'), 1, 'regress_rls_bob', 'some technology novel') + ON CONFLICT (did) DO UPDATE SET dtitle = EXCLUDED.dtitle, cid = 33 RETURNING *; +ERROR: not implemented yet on pax relations: TupleInsertSpeculative -- Same query a third time, but now fails due to existing tuple finally not -- passing quals: --- INSERT INTO document VALUES (78, (SELECT cid from category WHERE cname = 'novel'), 1, 'regress_rls_bob', 'some technology novel') --- ON CONFLICT (did) DO UPDATE SET dtitle = EXCLUDED.dtitle, cid = 33 RETURNING *; +INSERT INTO document VALUES (78, (SELECT cid from category WHERE cname = 'novel'), 1, 'regress_rls_bob', 'some technology novel') + ON CONFLICT (did) DO UPDATE SET dtitle = EXCLUDED.dtitle, cid = 33 RETURNING *; +ERROR: not implemented yet on pax relations: TupleInsertSpeculative -- Don't fail just because INSERT doesn't satisfy WITH CHECK option that -- originated as a barrier/USING() qual from the UPDATE. Note that the UPDATE -- path *isn't* taken, and so UPDATE-related policy does not apply: --- INSERT INTO document VALUES (79, (SELECT cid from category WHERE cname = 'technology'), 1, 'regress_rls_bob', 'technology book, can only insert') --- ON CONFLICT (did) DO UPDATE SET dtitle = EXCLUDED.dtitle RETURNING *; +INSERT INTO document VALUES (79, (SELECT cid from category WHERE cname = 'technology'), 1, 'regress_rls_bob', 'technology book, can only insert') + ON CONFLICT (did) DO UPDATE SET dtitle = EXCLUDED.dtitle RETURNING *; +ERROR: not implemented yet on pax relations: TupleInsertSpeculative -- But this time, the same statement fails, because the UPDATE path is taken, -- and updating the row just inserted falls afoul of security barrier qual -- (enforced as WCO) -- what we might have updated target tuple to is -- irrelevant, in fact. --- INSERT INTO document VALUES (79, (SELECT cid from category WHERE cname = 'technology'), 1, 'regress_rls_bob', 'technology book, can only insert') --- ON CONFLICT (did) DO UPDATE SET dtitle = EXCLUDED.dtitle RETURNING *; +INSERT INTO document VALUES (79, (SELECT cid from category WHERE cname = 'technology'), 1, 'regress_rls_bob', 'technology book, can only insert') + ON CONFLICT (did) DO UPDATE SET dtitle = EXCLUDED.dtitle RETURNING *; +ERROR: not implemented yet on pax relations: TupleInsertSpeculative -- Test default USING qual enforced as WCO SET SESSION AUTHORIZATION regress_rls_alice; DROP POLICY p1 ON document; @@ -2167,14 +2172,16 @@ SET SESSION AUTHORIZATION regress_rls_bob; -- a USING qual for the purposes of RLS in general, as opposed to an explicit -- USING qual that is ordinarily a security barrier. We leave it up to the -- UPDATE to make this fail: --- INSERT INTO document VALUES (79, (SELECT cid from category WHERE cname = 'technology'), 1, 'regress_rls_bob', 'technology book, can only insert') --- ON CONFLICT (did) DO UPDATE SET dtitle = EXCLUDED.dtitle RETURNING *; +INSERT INTO document VALUES (79, (SELECT cid from category WHERE cname = 'technology'), 1, 'regress_rls_bob', 'technology book, can only insert') + ON CONFLICT (did) DO UPDATE SET dtitle = EXCLUDED.dtitle RETURNING *; +ERROR: new row violates row-level security policy for table "document" -- UPDATE path is taken here. Existing tuple passes, since its cid -- corresponds to "novel", but default USING qual is enforced against -- post-UPDATE tuple too (as always when updating with a policy that lacks an -- explicit WCO), and so this fails: --- INSERT INTO document VALUES (2, (SELECT cid from category WHERE cname = 'technology'), 1, 'regress_rls_bob', 'my first novel') --- ON CONFLICT (did) DO UPDATE SET cid = EXCLUDED.cid, dtitle = EXCLUDED.dtitle RETURNING *; +INSERT INTO document VALUES (2, (SELECT cid from category WHERE cname = 'technology'), 1, 'regress_rls_bob', 'my first novel') + ON CONFLICT (did) DO UPDATE SET cid = EXCLUDED.cid, dtitle = EXCLUDED.dtitle RETURNING *; +ERROR: new row violates row-level security policy for table "document" SET SESSION AUTHORIZATION regress_rls_alice; DROP POLICY p3_with_default ON document; -- @@ -2186,15 +2193,235 @@ CREATE POLICY p3_with_all ON document FOR ALL WITH CHECK (dauthor = current_user); SET SESSION AUTHORIZATION regress_rls_bob; -- Fails, since ALL WCO is enforced in insert path: --- INSERT INTO document VALUES (80, (SELECT cid from category WHERE cname = 'novel'), 1, 'regress_rls_carol', 'my first novel') --- ON CONFLICT (did) DO UPDATE SET dtitle = EXCLUDED.dtitle, cid = 33; +INSERT INTO document VALUES (80, (SELECT cid from category WHERE cname = 'novel'), 1, 'regress_rls_carol', 'my first novel') + ON CONFLICT (did) DO UPDATE SET dtitle = EXCLUDED.dtitle, cid = 33; +ERROR: new row violates row-level security policy for table "document" -- Fails, since ALL policy USING qual is enforced (existing, target tuple is in -- violation, since it has the "manga" cid): --- INSERT INTO document VALUES (4, (SELECT cid from category WHERE cname = 'novel'), 1, 'regress_rls_bob', 'my first novel') --- ON CONFLICT (did) DO UPDATE SET dtitle = EXCLUDED.dtitle; +INSERT INTO document VALUES (4, (SELECT cid from category WHERE cname = 'novel'), 1, 'regress_rls_bob', 'my first novel') + ON CONFLICT (did) DO UPDATE SET dtitle = EXCLUDED.dtitle; +ERROR: not implemented yet on pax relations: TupleLock -- Fails, since ALL WCO are enforced: --- INSERT INTO document VALUES (1, (SELECT cid from category WHERE cname = 'novel'), 1, 'regress_rls_bob', 'my first novel') --- ON CONFLICT (did) DO UPDATE SET dauthor = 'regress_rls_carol'; +INSERT INTO document VALUES (1, (SELECT cid from category WHERE cname = 'novel'), 1, 'regress_rls_bob', 'my first novel') + ON CONFLICT (did) DO UPDATE SET dauthor = 'regress_rls_carol'; +ERROR: not implemented yet on pax relations: TupleLock +-- +-- MERGE +-- +RESET SESSION AUTHORIZATION; +DROP POLICY p3_with_all ON document; +ALTER TABLE document ADD COLUMN dnotes text DEFAULT ''; +-- all documents are readable +CREATE POLICY p1 ON document FOR SELECT USING (true); +-- one may insert documents only authored by them +CREATE POLICY p2 ON document FOR INSERT WITH CHECK (dauthor = current_user); +-- one may only update documents in 'novel' category and new dlevel must be > 0 +CREATE POLICY p3 ON document FOR UPDATE + USING (cid = (SELECT cid from category WHERE cname = 'novel')) + WITH CHECK (dlevel > 0); +-- one may only delete documents in 'manga' category +CREATE POLICY p4 ON document FOR DELETE + USING (cid = (SELECT cid from category WHERE cname = 'manga')); +SELECT * FROM document; + did | cid | dlevel | dauthor | dtitle | dnotes +-----+-----+--------+-------------------+-------------------------+-------- + 1 | 11 | 1 | regress_rls_bob | my first novel | + 2 | 11 | 2 | regress_rls_bob | my second novel | + 3 | 22 | 2 | regress_rls_bob | my science fiction | + 4 | 44 | 1 | regress_rls_bob | my first manga | + 5 | 44 | 2 | regress_rls_bob | my second manga | + 6 | 22 | 1 | regress_rls_carol | great science fiction | + 7 | 33 | 2 | regress_rls_carol | great technology book | + 8 | 44 | 1 | regress_rls_carol | great manga | + 9 | 22 | 1 | regress_rls_dave | awesome science fiction | + 10 | 33 | 2 | regress_rls_dave | awesome technology book | + 11 | 33 | 1 | regress_rls_carol | hoge | + 33 | 22 | 1 | regress_rls_bob | okay science fiction | +(12 rows) + +SET SESSION AUTHORIZATION regress_rls_bob; +-- Fails, since update violates WITH CHECK qual on dlevel +MERGE INTO document d +USING (SELECT 1 as sdid) s +ON did = s.sdid +WHEN MATCHED THEN + UPDATE SET dnotes = dnotes || ' notes added by merge1 ', dlevel = 0; +ERROR: not implemented yet on pax relations: TupleFetchRowVersion +-- Should be OK since USING and WITH CHECK quals pass +MERGE INTO document d +USING (SELECT 1 as sdid) s +ON did = s.sdid +WHEN MATCHED THEN + UPDATE SET dnotes = dnotes || ' notes added by merge2 '; +ERROR: not implemented yet on pax relations: TupleFetchRowVersion +-- Even when dlevel is updated explicitly, but to the existing value +MERGE INTO document d +USING (SELECT 1 as sdid) s +ON did = s.sdid +WHEN MATCHED THEN + UPDATE SET dnotes = dnotes || ' notes added by merge3 ', dlevel = 1; +ERROR: not implemented yet on pax relations: TupleFetchRowVersion +-- There is a MATCH for did = 3, but UPDATE's USING qual does not allow +-- updating an item in category 'science fiction' +MERGE INTO document d +USING (SELECT 3 as sdid) s +ON did = s.sdid +WHEN MATCHED THEN + UPDATE SET dnotes = dnotes || ' notes added by merge '; +ERROR: not implemented yet on pax relations: TupleFetchRowVersion +-- The same thing with DELETE action, but fails again because no permissions +-- to delete items in 'science fiction' category that did 3 belongs to. +MERGE INTO document d +USING (SELECT 3 as sdid) s +ON did = s.sdid +WHEN MATCHED THEN + DELETE; +ERROR: not implemented yet on pax relations: TupleFetchRowVersion +-- Document with did 4 belongs to 'manga' category which is allowed for +-- deletion. But this fails because the UPDATE action is matched first and +-- UPDATE policy does not allow updation in the category. +MERGE INTO document d +USING (SELECT 4 as sdid) s +ON did = s.sdid +WHEN MATCHED AND dnotes = '' THEN + UPDATE SET dnotes = dnotes || ' notes added by merge ' +WHEN MATCHED THEN + DELETE; +ERROR: not implemented yet on pax relations: TupleFetchRowVersion +-- UPDATE action is not matched this time because of the WHEN qual. +-- DELETE still fails because role regress_rls_bob does not have SELECT +-- privileges on 'manga' category row in the category table. +MERGE INTO document d +USING (SELECT 4 as sdid) s +ON did = s.sdid +WHEN MATCHED AND dnotes <> '' THEN + UPDATE SET dnotes = dnotes || ' notes added by merge ' +WHEN MATCHED THEN + DELETE; +ERROR: not implemented yet on pax relations: TupleFetchRowVersion +-- OK if DELETE is replaced with DO NOTHING +MERGE INTO document d +USING (SELECT 4 as sdid) s +ON did = s.sdid +WHEN MATCHED AND dnotes <> '' THEN + UPDATE SET dnotes = dnotes || ' notes added by merge ' +WHEN MATCHED THEN + DO NOTHING; +ERROR: not implemented yet on pax relations: TupleFetchRowVersion +SELECT * FROM document WHERE did = 4; + did | cid | dlevel | dauthor | dtitle | dnotes +-----+-----+--------+-----------------+----------------+-------- + 4 | 44 | 1 | regress_rls_bob | my first manga | +(1 row) + +-- Switch to regress_rls_carol role and try the DELETE again. It should succeed +-- this time +RESET SESSION AUTHORIZATION; +SET SESSION AUTHORIZATION regress_rls_carol; +MERGE INTO document d +USING (SELECT 4 as sdid) s +ON did = s.sdid +WHEN MATCHED AND dnotes <> '' THEN + UPDATE SET dnotes = dnotes || ' notes added by merge ' +WHEN MATCHED THEN + DELETE; +ERROR: not implemented yet on pax relations: TupleFetchRowVersion +-- Switch back to regress_rls_bob role +RESET SESSION AUTHORIZATION; +SET SESSION AUTHORIZATION regress_rls_bob; +-- Try INSERT action. This fails because we are trying to insert +-- dauthor = regress_rls_dave and INSERT's WITH CHECK does not allow +-- that +MERGE INTO document d +USING (SELECT 12 as sdid) s +ON did = s.sdid +WHEN MATCHED THEN + DELETE +WHEN NOT MATCHED THEN + INSERT VALUES (12, 11, 1, 'regress_rls_dave', 'another novel'); +ERROR: new row violates row-level security policy for table "document" +-- This should be fine +MERGE INTO document d +USING (SELECT 12 as sdid) s +ON did = s.sdid +WHEN MATCHED THEN + DELETE +WHEN NOT MATCHED THEN + INSERT VALUES (12, 11, 1, 'regress_rls_bob', 'another novel'); +-- ok +MERGE INTO document d +USING (SELECT 1 as sdid) s +ON did = s.sdid +WHEN MATCHED THEN + UPDATE SET dnotes = dnotes || ' notes added by merge4 ' +WHEN NOT MATCHED THEN + INSERT VALUES (12, 11, 1, 'regress_rls_bob', 'another novel'); +ERROR: not implemented yet on pax relations: TupleFetchRowVersion +-- drop and create a new SELECT policy which prevents us from reading +-- any document except with category 'novel' +RESET SESSION AUTHORIZATION; +DROP POLICY p1 ON document; +CREATE POLICY p1 ON document FOR SELECT + USING (cid = (SELECT cid from category WHERE cname = 'novel')); +SET SESSION AUTHORIZATION regress_rls_bob; +-- MERGE can no longer see the matching row and hence attempts the +-- NOT MATCHED action, which results in unique key violation +MERGE INTO document d +USING (SELECT 7 as sdid) s +ON did = s.sdid +WHEN MATCHED THEN + UPDATE SET dnotes = dnotes || ' notes added by merge5 ' +WHEN NOT MATCHED THEN + INSERT VALUES (12, 11, 1, 'regress_rls_bob', 'another novel'); +ERROR: duplicate key value violates unique constraint "document_pkey" +-- UPDATE action fails if new row is not visible +MERGE INTO document d +USING (SELECT 1 as sdid) s +ON did = s.sdid +WHEN MATCHED THEN + UPDATE SET dnotes = dnotes || ' notes added by merge6 ', + cid = (SELECT cid from category WHERE cname = 'technology'); +ERROR: not implemented yet on pax relations: TupleFetchRowVersion +-- but OK if new row is visible +MERGE INTO document d +USING (SELECT 1 as sdid) s +ON did = s.sdid +WHEN MATCHED THEN + UPDATE SET dnotes = dnotes || ' notes added by merge7 ', + cid = (SELECT cid from category WHERE cname = 'novel'); +ERROR: not implemented yet on pax relations: TupleFetchRowVersion +-- OK to insert a new row that is not visible +MERGE INTO document d +USING (SELECT 13 as sdid) s +ON did = s.sdid +WHEN MATCHED THEN + UPDATE SET dnotes = dnotes || ' notes added by merge8 ' +WHEN NOT MATCHED THEN + INSERT VALUES (13, 44, 1, 'regress_rls_bob', 'new manga'); +RESET SESSION AUTHORIZATION; +-- drop the restrictive SELECT policy so that we can look at the +-- final state of the table +DROP POLICY p1 ON document; +-- Just check everything went per plan +SELECT * FROM document; + did | cid | dlevel | dauthor | dtitle | dnotes +-----+-----+--------+-------------------+-------------------------+-------- + 1 | 11 | 1 | regress_rls_bob | my first novel | + 2 | 11 | 2 | regress_rls_bob | my second novel | + 3 | 22 | 2 | regress_rls_bob | my science fiction | + 4 | 44 | 1 | regress_rls_bob | my first manga | + 5 | 44 | 2 | regress_rls_bob | my second manga | + 6 | 22 | 1 | regress_rls_carol | great science fiction | + 7 | 33 | 2 | regress_rls_carol | great technology book | + 8 | 44 | 1 | regress_rls_carol | great manga | + 9 | 22 | 1 | regress_rls_dave | awesome science fiction | + 10 | 33 | 2 | regress_rls_dave | awesome technology book | + 11 | 33 | 1 | regress_rls_carol | hoge | + 12 | 11 | 1 | regress_rls_bob | another novel | + 13 | 44 | 1 | regress_rls_bob | new manga | + 33 | 22 | 1 | regress_rls_bob | okay science fiction | +(14 rows) + -- -- ROLE/GROUP -- @@ -2211,6 +2438,8 @@ INSERT INTO z1 VALUES CREATE POLICY p1 ON z1 TO regress_rls_group1 USING (a % 2 = 0); CREATE POLICY p2 ON z1 TO regress_rls_group2 USING (a % 2 = 1); ALTER TABLE z1 ENABLE ROW LEVEL SECURITY; +analyze z1; +analyze z2; SET SESSION AUTHORIZATION regress_rls_bob; SELECT * FROM z1 WHERE f_leak(b); NOTICE: f_leak => bbb @@ -2260,17 +2489,18 @@ EXPLAIN (COSTS OFF) EXECUTE plancache_test2; PREPARE plancache_test3 AS WITH q AS MATERIALIZED (SELECT * FROM z2) SELECT * FROM q,z1 WHERE f_leak(z1.b); EXPLAIN (COSTS OFF) EXECUTE plancache_test3; - QUERY PLAN + QUERY PLAN ----------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) -> Nested Loop Join Filter: true -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Seq Scan on z2 + -> Materialize -> Seq Scan on z1 Filter: (((a % 2) = 0) AND f_leak(b)) - -> Seq Scan on z2 Optimizer: GPORCA -(8 rows) +(9 rows) SET ROLE regress_rls_group1; SELECT * FROM z1 WHERE f_leak(b); @@ -2318,17 +2548,18 @@ EXPLAIN (COSTS OFF) EXECUTE plancache_test2; (12 rows) EXPLAIN (COSTS OFF) EXECUTE plancache_test3; - QUERY PLAN + QUERY PLAN ----------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) -> Nested Loop Join Filter: true -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Seq Scan on z2 + -> Materialize -> Seq Scan on z1 Filter: (((a % 2) = 0) AND f_leak(b)) - -> Seq Scan on z2 Optimizer: GPORCA -(8 rows) +(9 rows) SET SESSION AUTHORIZATION regress_rls_carol; SELECT * FROM z1 WHERE f_leak(b); @@ -2376,17 +2607,18 @@ EXPLAIN (COSTS OFF) EXECUTE plancache_test2; (12 rows) EXPLAIN (COSTS OFF) EXECUTE plancache_test3; - QUERY PLAN + QUERY PLAN ----------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) -> Nested Loop Join Filter: true -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Seq Scan on z2 + -> Materialize -> Seq Scan on z1 Filter: (((a % 2) = 1) AND f_leak(b)) - -> Seq Scan on z2 Optimizer: GPORCA -(8 rows) +(9 rows) SET ROLE regress_rls_group2; SELECT * FROM z1 WHERE f_leak(b); @@ -2434,17 +2666,18 @@ EXPLAIN (COSTS OFF) EXECUTE plancache_test2; (12 rows) EXPLAIN (COSTS OFF) EXECUTE plancache_test3; - QUERY PLAN + QUERY PLAN ----------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) -> Nested Loop Join Filter: true -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Seq Scan on z2 + -> Materialize -> Seq Scan on z1 Filter: (((a % 2) = 1) AND f_leak(b)) - -> Seq Scan on z2 Optimizer: GPORCA -(8 rows) +(9 rows) -- -- Views should follow policy for view owner. @@ -2557,6 +2790,7 @@ ERROR: permission denied for view rls_view -- Query as role that is not the owner of the table or view with permissions. SET SESSION AUTHORIZATION regress_rls_bob; GRANT SELECT ON rls_view TO regress_rls_carol; +SET SESSION AUTHORIZATION regress_rls_carol; SELECT * FROM rls_view; NOTICE: f_leak => bbb NOTICE: f_leak => dad @@ -2567,7 +2801,7 @@ NOTICE: f_leak => dad (2 rows) EXPLAIN (COSTS OFF) SELECT * FROM rls_view; - QUERY PLAN + QUERY PLAN ----------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) -> Seq Scan on z1 @@ -2575,6 +2809,283 @@ EXPLAIN (COSTS OFF) SELECT * FROM rls_view; Optimizer: Postgres query optimizer (4 rows) +-- Policy requiring access to another table. +SET SESSION AUTHORIZATION regress_rls_alice; +CREATE TABLE z1_blacklist (a int); +INSERT INTO z1_blacklist VALUES (3), (4); +CREATE POLICY p3 ON z1 AS RESTRICTIVE USING (a NOT IN (SELECT a FROM z1_blacklist)); +-- Query as role that is not owner of table but is owner of view without permissions. +SET SESSION AUTHORIZATION regress_rls_bob; +SELECT * FROM rls_view; --fail - permission denied. +ERROR: permission denied for table z1_blacklist +EXPLAIN (COSTS OFF) SELECT * FROM rls_view; --fail - permission denied. +ERROR: permission denied for table z1_blacklist +-- Query as role that is not the owner of the table or view without permissions. +SET SESSION AUTHORIZATION regress_rls_carol; +SELECT * FROM rls_view; --fail - permission denied. +ERROR: permission denied for table z1_blacklist +EXPLAIN (COSTS OFF) SELECT * FROM rls_view; --fail - permission denied. +ERROR: permission denied for table z1_blacklist +-- Query as role that is not owner of table but is owner of view with permissions. +SET SESSION AUTHORIZATION regress_rls_alice; +GRANT SELECT ON z1_blacklist TO regress_rls_bob; +SET SESSION AUTHORIZATION regress_rls_bob; +SELECT * FROM rls_view; +NOTICE: f_leak => bbb + a | b +---+----- + 2 | bbb +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM rls_view; + QUERY PLAN +---------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on z1 + Filter: ((NOT (hashed SubPlan 1)) AND ((a % 2) = 0) AND f_leak(b)) + SubPlan 1 + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Seq Scan on z1_blacklist + Optimizer: Postgres query optimizer +(7 rows) + +-- Query as role that is not the owner of the table or view with permissions. +SET SESSION AUTHORIZATION regress_rls_carol; +SELECT * FROM rls_view; +NOTICE: f_leak => bbb + a | b +---+----- + 2 | bbb +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM rls_view; + QUERY PLAN +---------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on z1 + Filter: ((NOT (hashed SubPlan 1)) AND ((a % 2) = 0) AND f_leak(b)) + SubPlan 1 + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Seq Scan on z1_blacklist + Optimizer: Postgres query optimizer +(7 rows) + +SET SESSION AUTHORIZATION regress_rls_alice; +REVOKE SELECT ON z1_blacklist FROM regress_rls_bob; +DROP POLICY p3 ON z1; +SET SESSION AUTHORIZATION regress_rls_bob; +DROP VIEW rls_view; +-- +-- Security invoker views should follow policy for current user. +-- +-- View and table owner are the same. +SET SESSION AUTHORIZATION regress_rls_alice; +CREATE VIEW rls_view WITH (security_invoker) AS + SELECT * FROM z1 WHERE f_leak(b); +GRANT SELECT ON rls_view TO regress_rls_bob; +GRANT SELECT ON rls_view TO regress_rls_carol; +-- Query as table owner. Should return all records. +SELECT * FROM rls_view; +NOTICE: f_leak => aba +NOTICE: f_leak => bbb +NOTICE: f_leak => ccc +NOTICE: f_leak => dad + a | b +---+----- + 1 | aba + 2 | bbb + 3 | ccc + 4 | dad +(4 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM rls_view; + QUERY PLAN +------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on z1 + Filter: f_leak(b) + Optimizer: GPORCA +(4 rows) + +-- Queries as other users. +-- Should return records based on current user's policies. +SET SESSION AUTHORIZATION regress_rls_bob; +SELECT * FROM rls_view; +NOTICE: f_leak => bbb +NOTICE: f_leak => dad + a | b +---+----- + 2 | bbb + 4 | dad +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM rls_view; + QUERY PLAN +----------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on z1 + Filter: (((a % 2) = 0) AND f_leak(b)) + Optimizer: GPORCA +(4 rows) + +SET SESSION AUTHORIZATION regress_rls_carol; +SELECT * FROM rls_view; +NOTICE: f_leak => aba +NOTICE: f_leak => ccc + a | b +---+----- + 1 | aba + 3 | ccc +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM rls_view; + QUERY PLAN +----------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on z1 + Filter: (((a % 2) = 1) AND f_leak(b)) + Optimizer: GPORCA +(4 rows) + +-- View and table owners are different. +SET SESSION AUTHORIZATION regress_rls_alice; +DROP VIEW rls_view; +SET SESSION AUTHORIZATION regress_rls_bob; +CREATE VIEW rls_view WITH (security_invoker) AS + SELECT * FROM z1 WHERE f_leak(b); +GRANT SELECT ON rls_view TO regress_rls_alice; +GRANT SELECT ON rls_view TO regress_rls_carol; +-- Query as table owner. Should return all records. +SET SESSION AUTHORIZATION regress_rls_alice; +SELECT * FROM rls_view; +NOTICE: f_leak => aba +NOTICE: f_leak => bbb +NOTICE: f_leak => ccc +NOTICE: f_leak => dad + a | b +---+----- + 1 | aba + 2 | bbb + 3 | ccc + 4 | dad +(4 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM rls_view; + QUERY PLAN +------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on z1 + Filter: f_leak(b) + Optimizer: GPORCA +(4 rows) + +-- Queries as other users. +-- Should return records based on current user's policies. +SET SESSION AUTHORIZATION regress_rls_bob; +SELECT * FROM rls_view; +NOTICE: f_leak => bbb +NOTICE: f_leak => dad + a | b +---+----- + 2 | bbb + 4 | dad +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM rls_view; + QUERY PLAN +----------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on z1 + Filter: (((a % 2) = 0) AND f_leak(b)) + Optimizer: GPORCA +(4 rows) + +SET SESSION AUTHORIZATION regress_rls_carol; +SELECT * FROM rls_view; +NOTICE: f_leak => aba +NOTICE: f_leak => ccc + a | b +---+----- + 1 | aba + 3 | ccc +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM rls_view; + QUERY PLAN +----------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on z1 + Filter: (((a % 2) = 1) AND f_leak(b)) + Optimizer: GPORCA +(4 rows) + +-- Policy requiring access to another table. +SET SESSION AUTHORIZATION regress_rls_alice; +CREATE POLICY p3 ON z1 AS RESTRICTIVE USING (a NOT IN (SELECT a FROM z1_blacklist)); +-- Query as role that is not owner of table but is owner of view without permissions. +SET SESSION AUTHORIZATION regress_rls_bob; +SELECT * FROM rls_view; --fail - permission denied. +ERROR: permission denied for table z1_blacklist +EXPLAIN (COSTS OFF) SELECT * FROM rls_view; --fail - permission denied. +ERROR: permission denied for table z1_blacklist +-- Query as role that is not the owner of the table or view without permissions. +SET SESSION AUTHORIZATION regress_rls_carol; +SELECT * FROM rls_view; --fail - permission denied. +ERROR: permission denied for table z1_blacklist +EXPLAIN (COSTS OFF) SELECT * FROM rls_view; --fail - permission denied. +ERROR: permission denied for table z1_blacklist +-- Query as role that is not owner of table but is owner of view with permissions. +SET SESSION AUTHORIZATION regress_rls_alice; +GRANT SELECT ON z1_blacklist TO regress_rls_bob; +SET SESSION AUTHORIZATION regress_rls_bob; +SELECT * FROM rls_view; +NOTICE: f_leak => bbb + a | b +---+----- + 2 | bbb +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM rls_view; + QUERY PLAN +---------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on z1 + Filter: ((NOT (hashed SubPlan 1)) AND ((a % 2) = 0) AND f_leak(b)) + SubPlan 1 + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Seq Scan on z1_blacklist + Optimizer: Postgres query optimizer +(7 rows) + +-- Query as role that is not the owner of the table or view without permissions. +SET SESSION AUTHORIZATION regress_rls_carol; +SELECT * FROM rls_view; --fail - permission denied. +ERROR: permission denied for table z1_blacklist +EXPLAIN (COSTS OFF) SELECT * FROM rls_view; --fail - permission denied. +ERROR: permission denied for table z1_blacklist +-- Query as role that is not the owner of the table or view with permissions. +SET SESSION AUTHORIZATION regress_rls_alice; +GRANT SELECT ON z1_blacklist TO regress_rls_carol; +SET SESSION AUTHORIZATION regress_rls_carol; +SELECT * FROM rls_view; +NOTICE: f_leak => aba + a | b +---+----- + 1 | aba +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM rls_view; + QUERY PLAN +---------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on z1 + Filter: ((NOT (hashed SubPlan 1)) AND ((a % 2) = 1) AND f_leak(b)) + SubPlan 1 + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Seq Scan on z1_blacklist + Optimizer: Postgres query optimizer +(7 rows) + SET SESSION AUTHORIZATION regress_rls_bob; DROP VIEW rls_view; -- @@ -2668,7 +3179,10 @@ NOTICE: f_leak => fgh_updt 8 | fgh_updt_updt | regress_rls_carol (6 rows) --- DELETE FROM x1 WHERE f_leak(b) RETURNING *; +DELETE FROM x1 WHERE f_leak(b) RETURNING *; +NOTICE: f_leak => cde_updt +NOTICE: f_leak => fgh_updt_updt +ERROR: not implemented yet on pax relations: TupleFetchRowVersion -- -- Duplicate Policy Names -- @@ -2718,42 +3232,42 @@ DROP VIEW rls_sbv; -- Expression structure -- SET SESSION AUTHORIZATION regress_rls_alice; -INSERT INTO y2 (SELECT x, md5(x::text) FROM generate_series(0,20) x); +INSERT INTO y2 (SELECT x, public.fipshash(x::text) FROM generate_series(0,20) x); ANALYZE y2; CREATE POLICY p2 ON y2 USING (a % 3 = 0); CREATE POLICY p3 ON y2 USING (a % 4 = 0); SET SESSION AUTHORIZATION regress_rls_bob; SELECT * FROM y2 WHERE f_leak(b); -NOTICE: f_leak => cfcd208495d565ef66e7dff9f98764da -NOTICE: f_leak => c81e728d9d4c2f636f067f89cc14862c -NOTICE: f_leak => eccbc87e4b5ce2fe28308fd9f2a7baf3 -NOTICE: f_leak => a87ff679a2f3e71d9181a67b7542122c -NOTICE: f_leak => 1679091c5a880faf6fb5e6087eb1b2dc -NOTICE: f_leak => c9f0f895fb98ab9159f51fd0297e236d -NOTICE: f_leak => 45c48cce2e2d7fbdea1afc51c7c6ad26 -NOTICE: f_leak => d3d9446802a44259755d38e6d163e820 -NOTICE: f_leak => c20ad4d76fe97759aa27a0c99bff6710 -NOTICE: f_leak => aab3238922bcc25a6f606eb525ffdc56 -NOTICE: f_leak => 9bf31c7ff062936a96d3c8bd1f8f2ff3 -NOTICE: f_leak => c74d97b01eae257e44aa9d5bade97baf -NOTICE: f_leak => 6f4922f45568161a8cdf4ad2299f6d23 -NOTICE: f_leak => 98f13708210194c475687be6106a3b84 +NOTICE: f_leak => 5feceb66ffc86f38d952786c6d696c79 +NOTICE: f_leak => d4735e3a265e16eee03f59718b9b5d03 +NOTICE: f_leak => 4e07408562bedb8b60ce05c1decfe3ad +NOTICE: f_leak => 4b227777d4dd1fc61c6f884f48641d02 +NOTICE: f_leak => e7f6c011776e8db7cd330b54174fd76f +NOTICE: f_leak => 2c624232cdd221771294dfbb310aca00 +NOTICE: f_leak => 19581e27de7ced00ff1ce50b2047e7a5 +NOTICE: f_leak => 4a44dc15364204a80fe80e9039455cc1 +NOTICE: f_leak => 6b51d431df5d7f141cbececcf79edf3d +NOTICE: f_leak => 8527a891e224136950ff32ca212b45bc +NOTICE: f_leak => e629fa6598d732768f7c726b4b621285 +NOTICE: f_leak => b17ef6d19c7a5b1ee83b907c595526dc +NOTICE: f_leak => 4ec9599fc203d176a301536c2e091a19 +NOTICE: f_leak => f5ca38f748a1d6eaf726b8a42fb575c3 a | b ----+---------------------------------- - 0 | cfcd208495d565ef66e7dff9f98764da - 2 | c81e728d9d4c2f636f067f89cc14862c - 3 | eccbc87e4b5ce2fe28308fd9f2a7baf3 - 4 | a87ff679a2f3e71d9181a67b7542122c - 6 | 1679091c5a880faf6fb5e6087eb1b2dc - 8 | c9f0f895fb98ab9159f51fd0297e236d - 9 | 45c48cce2e2d7fbdea1afc51c7c6ad26 - 10 | d3d9446802a44259755d38e6d163e820 - 12 | c20ad4d76fe97759aa27a0c99bff6710 - 14 | aab3238922bcc25a6f606eb525ffdc56 - 15 | 9bf31c7ff062936a96d3c8bd1f8f2ff3 - 16 | c74d97b01eae257e44aa9d5bade97baf - 18 | 6f4922f45568161a8cdf4ad2299f6d23 - 20 | 98f13708210194c475687be6106a3b84 + 0 | 5feceb66ffc86f38d952786c6d696c79 + 2 | d4735e3a265e16eee03f59718b9b5d03 + 3 | 4e07408562bedb8b60ce05c1decfe3ad + 4 | 4b227777d4dd1fc61c6f884f48641d02 + 6 | e7f6c011776e8db7cd330b54174fd76f + 8 | 2c624232cdd221771294dfbb310aca00 + 9 | 19581e27de7ced00ff1ce50b2047e7a5 + 10 | 4a44dc15364204a80fe80e9039455cc1 + 12 | 6b51d431df5d7f141cbececcf79edf3d + 14 | 8527a891e224136950ff32ca212b45bc + 15 | e629fa6598d732768f7c726b4b621285 + 16 | b17ef6d19c7a5b1ee83b907c595526dc + 18 | 4ec9599fc203d176a301536c2e091a19 + 20 | f5ca38f748a1d6eaf726b8a42fb575c3 (14 rows) EXPLAIN (COSTS OFF) SELECT * FROM y2 WHERE f_leak(b); @@ -2792,20 +3306,20 @@ NOTICE: f_leak => abc NOTICE: f_leak => abc a | b ----+---------------------------------- - 0 | cfcd208495d565ef66e7dff9f98764da - 2 | c81e728d9d4c2f636f067f89cc14862c - 3 | eccbc87e4b5ce2fe28308fd9f2a7baf3 - 4 | a87ff679a2f3e71d9181a67b7542122c - 6 | 1679091c5a880faf6fb5e6087eb1b2dc - 8 | c9f0f895fb98ab9159f51fd0297e236d - 9 | 45c48cce2e2d7fbdea1afc51c7c6ad26 - 10 | d3d9446802a44259755d38e6d163e820 - 12 | c20ad4d76fe97759aa27a0c99bff6710 - 14 | aab3238922bcc25a6f606eb525ffdc56 - 15 | 9bf31c7ff062936a96d3c8bd1f8f2ff3 - 16 | c74d97b01eae257e44aa9d5bade97baf - 18 | 6f4922f45568161a8cdf4ad2299f6d23 - 20 | 98f13708210194c475687be6106a3b84 + 0 | 5feceb66ffc86f38d952786c6d696c79 + 2 | d4735e3a265e16eee03f59718b9b5d03 + 3 | 4e07408562bedb8b60ce05c1decfe3ad + 4 | 4b227777d4dd1fc61c6f884f48641d02 + 6 | e7f6c011776e8db7cd330b54174fd76f + 8 | 2c624232cdd221771294dfbb310aca00 + 9 | 19581e27de7ced00ff1ce50b2047e7a5 + 10 | 4a44dc15364204a80fe80e9039455cc1 + 12 | 6b51d431df5d7f141cbececcf79edf3d + 14 | 8527a891e224136950ff32ca212b45bc + 15 | e629fa6598d732768f7c726b4b621285 + 16 | b17ef6d19c7a5b1ee83b907c595526dc + 18 | 4ec9599fc203d176a301536c2e091a19 + 20 | f5ca38f748a1d6eaf726b8a42fb575c3 (14 rows) EXPLAIN (COSTS OFF) SELECT * FROM y2 WHERE f_leak('abc'); @@ -2845,20 +3359,20 @@ EXPLAIN (COSTS OFF) SELECT * FROM y2 JOIN test_qual_pushdown ON (b = abc) WHERE (10 rows) SELECT * FROM y2 JOIN test_qual_pushdown ON (b = abc) WHERE f_leak(b); -NOTICE: f_leak => cfcd208495d565ef66e7dff9f98764da -NOTICE: f_leak => c81e728d9d4c2f636f067f89cc14862c -NOTICE: f_leak => eccbc87e4b5ce2fe28308fd9f2a7baf3 -NOTICE: f_leak => a87ff679a2f3e71d9181a67b7542122c -NOTICE: f_leak => 1679091c5a880faf6fb5e6087eb1b2dc -NOTICE: f_leak => c9f0f895fb98ab9159f51fd0297e236d -NOTICE: f_leak => 45c48cce2e2d7fbdea1afc51c7c6ad26 -NOTICE: f_leak => d3d9446802a44259755d38e6d163e820 -NOTICE: f_leak => c20ad4d76fe97759aa27a0c99bff6710 -NOTICE: f_leak => aab3238922bcc25a6f606eb525ffdc56 -NOTICE: f_leak => 9bf31c7ff062936a96d3c8bd1f8f2ff3 -NOTICE: f_leak => c74d97b01eae257e44aa9d5bade97baf -NOTICE: f_leak => 6f4922f45568161a8cdf4ad2299f6d23 -NOTICE: f_leak => 98f13708210194c475687be6106a3b84 +NOTICE: f_leak => 5feceb66ffc86f38d952786c6d696c79 +NOTICE: f_leak => d4735e3a265e16eee03f59718b9b5d03 +NOTICE: f_leak => 4e07408562bedb8b60ce05c1decfe3ad +NOTICE: f_leak => 4b227777d4dd1fc61c6f884f48641d02 +NOTICE: f_leak => e7f6c011776e8db7cd330b54174fd76f +NOTICE: f_leak => 2c624232cdd221771294dfbb310aca00 +NOTICE: f_leak => 19581e27de7ced00ff1ce50b2047e7a5 +NOTICE: f_leak => 4a44dc15364204a80fe80e9039455cc1 +NOTICE: f_leak => 6b51d431df5d7f141cbececcf79edf3d +NOTICE: f_leak => 8527a891e224136950ff32ca212b45bc +NOTICE: f_leak => e629fa6598d732768f7c726b4b621285 +NOTICE: f_leak => b17ef6d19c7a5b1ee83b907c595526dc +NOTICE: f_leak => 4ec9599fc203d176a301536c2e091a19 +NOTICE: f_leak => f5ca38f748a1d6eaf726b8a42fb575c3 a | b | abc ---+---+----- (0 rows) @@ -2938,33 +3452,33 @@ CREATE TABLE t1 (a integer, b text); CREATE POLICY p1 ON t1 USING (a % 2 = 0); ALTER TABLE t1 ENABLE ROW LEVEL SECURITY; GRANT ALL ON t1 TO regress_rls_bob; -INSERT INTO t1 (SELECT x, md5(x::text) FROM generate_series(0,20) x); +INSERT INTO t1 (SELECT x, public.fipshash(x::text) FROM generate_series(0,20) x); SET SESSION AUTHORIZATION regress_rls_bob; WITH cte1 AS MATERIALIZED (SELECT * FROM t1 WHERE f_leak(b)) SELECT * FROM cte1; -NOTICE: f_leak => cfcd208495d565ef66e7dff9f98764da -NOTICE: f_leak => c81e728d9d4c2f636f067f89cc14862c -NOTICE: f_leak => a87ff679a2f3e71d9181a67b7542122c -NOTICE: f_leak => 1679091c5a880faf6fb5e6087eb1b2dc -NOTICE: f_leak => c9f0f895fb98ab9159f51fd0297e236d -NOTICE: f_leak => d3d9446802a44259755d38e6d163e820 -NOTICE: f_leak => c20ad4d76fe97759aa27a0c99bff6710 -NOTICE: f_leak => aab3238922bcc25a6f606eb525ffdc56 -NOTICE: f_leak => c74d97b01eae257e44aa9d5bade97baf -NOTICE: f_leak => 6f4922f45568161a8cdf4ad2299f6d23 -NOTICE: f_leak => 98f13708210194c475687be6106a3b84 +NOTICE: f_leak => 5feceb66ffc86f38d952786c6d696c79 +NOTICE: f_leak => d4735e3a265e16eee03f59718b9b5d03 +NOTICE: f_leak => 4b227777d4dd1fc61c6f884f48641d02 +NOTICE: f_leak => e7f6c011776e8db7cd330b54174fd76f +NOTICE: f_leak => 2c624232cdd221771294dfbb310aca00 +NOTICE: f_leak => 4a44dc15364204a80fe80e9039455cc1 +NOTICE: f_leak => 6b51d431df5d7f141cbececcf79edf3d +NOTICE: f_leak => 8527a891e224136950ff32ca212b45bc +NOTICE: f_leak => b17ef6d19c7a5b1ee83b907c595526dc +NOTICE: f_leak => 4ec9599fc203d176a301536c2e091a19 +NOTICE: f_leak => f5ca38f748a1d6eaf726b8a42fb575c3 a | b ----+---------------------------------- - 0 | cfcd208495d565ef66e7dff9f98764da - 2 | c81e728d9d4c2f636f067f89cc14862c - 4 | a87ff679a2f3e71d9181a67b7542122c - 6 | 1679091c5a880faf6fb5e6087eb1b2dc - 8 | c9f0f895fb98ab9159f51fd0297e236d - 10 | d3d9446802a44259755d38e6d163e820 - 12 | c20ad4d76fe97759aa27a0c99bff6710 - 14 | aab3238922bcc25a6f606eb525ffdc56 - 16 | c74d97b01eae257e44aa9d5bade97baf - 18 | 6f4922f45568161a8cdf4ad2299f6d23 - 20 | 98f13708210194c475687be6106a3b84 + 0 | 5feceb66ffc86f38d952786c6d696c79 + 2 | d4735e3a265e16eee03f59718b9b5d03 + 4 | 4b227777d4dd1fc61c6f884f48641d02 + 6 | e7f6c011776e8db7cd330b54174fd76f + 8 | 2c624232cdd221771294dfbb310aca00 + 10 | 4a44dc15364204a80fe80e9039455cc1 + 12 | 6b51d431df5d7f141cbececcf79edf3d + 14 | 8527a891e224136950ff32ca212b45bc + 16 | b17ef6d19c7a5b1ee83b907c595526dc + 18 | 4ec9599fc203d176a301536c2e091a19 + 20 | f5ca38f748a1d6eaf726b8a42fb575c3 (11 rows) EXPLAIN (COSTS OFF) @@ -2985,17 +3499,17 @@ ERROR: new row violates row-level security policy for table "t1" WITH cte1 AS (UPDATE t1 SET a = a RETURNING *) SELECT * FROM cte1; --ok a | b ----+---------------------------------- - 0 | cfcd208495d565ef66e7dff9f98764da - 2 | c81e728d9d4c2f636f067f89cc14862c - 4 | a87ff679a2f3e71d9181a67b7542122c - 6 | 1679091c5a880faf6fb5e6087eb1b2dc - 8 | c9f0f895fb98ab9159f51fd0297e236d - 10 | d3d9446802a44259755d38e6d163e820 - 12 | c20ad4d76fe97759aa27a0c99bff6710 - 14 | aab3238922bcc25a6f606eb525ffdc56 - 16 | c74d97b01eae257e44aa9d5bade97baf - 18 | 6f4922f45568161a8cdf4ad2299f6d23 - 20 | 98f13708210194c475687be6106a3b84 + 0 | 5feceb66ffc86f38d952786c6d696c79 + 2 | d4735e3a265e16eee03f59718b9b5d03 + 4 | 4b227777d4dd1fc61c6f884f48641d02 + 6 | e7f6c011776e8db7cd330b54174fd76f + 8 | 2c624232cdd221771294dfbb310aca00 + 10 | 4a44dc15364204a80fe80e9039455cc1 + 12 | 6b51d431df5d7f141cbececcf79edf3d + 14 | 8527a891e224136950ff32ca212b45bc + 16 | b17ef6d19c7a5b1ee83b907c595526dc + 18 | 4ec9599fc203d176a301536c2e091a19 + 20 | f5ca38f748a1d6eaf726b8a42fb575c3 (11 rows) WITH cte1 AS (INSERT INTO t1 VALUES (21, 'Fail') RETURNING *) SELECT * FROM cte1; --fail @@ -3049,17 +3563,17 @@ EXPLAIN (COSTS OFF) INSERT INTO t2 (SELECT * FROM t1); SELECT * FROM t2; a | b ----+---------------------------------- - 0 | cfcd208495d565ef66e7dff9f98764da - 2 | c81e728d9d4c2f636f067f89cc14862c - 4 | a87ff679a2f3e71d9181a67b7542122c - 6 | 1679091c5a880faf6fb5e6087eb1b2dc - 8 | c9f0f895fb98ab9159f51fd0297e236d - 10 | d3d9446802a44259755d38e6d163e820 - 12 | c20ad4d76fe97759aa27a0c99bff6710 - 14 | aab3238922bcc25a6f606eb525ffdc56 - 16 | c74d97b01eae257e44aa9d5bade97baf - 18 | 6f4922f45568161a8cdf4ad2299f6d23 - 20 | 98f13708210194c475687be6106a3b84 + 0 | 5feceb66ffc86f38d952786c6d696c79 + 2 | d4735e3a265e16eee03f59718b9b5d03 + 4 | 4b227777d4dd1fc61c6f884f48641d02 + 6 | e7f6c011776e8db7cd330b54174fd76f + 8 | 2c624232cdd221771294dfbb310aca00 + 10 | 4a44dc15364204a80fe80e9039455cc1 + 12 | 6b51d431df5d7f141cbececcf79edf3d + 14 | 8527a891e224136950ff32ca212b45bc + 16 | b17ef6d19c7a5b1ee83b907c595526dc + 18 | 4ec9599fc203d176a301536c2e091a19 + 20 | f5ca38f748a1d6eaf726b8a42fb575c3 20 | Success (12 rows) @@ -3075,17 +3589,17 @@ CREATE TABLE t3 AS SELECT * FROM t1; SELECT * FROM t3; a | b ----+---------------------------------- - 0 | cfcd208495d565ef66e7dff9f98764da - 2 | c81e728d9d4c2f636f067f89cc14862c - 4 | a87ff679a2f3e71d9181a67b7542122c - 6 | 1679091c5a880faf6fb5e6087eb1b2dc - 8 | c9f0f895fb98ab9159f51fd0297e236d - 10 | d3d9446802a44259755d38e6d163e820 - 12 | c20ad4d76fe97759aa27a0c99bff6710 - 14 | aab3238922bcc25a6f606eb525ffdc56 - 16 | c74d97b01eae257e44aa9d5bade97baf - 18 | 6f4922f45568161a8cdf4ad2299f6d23 - 20 | 98f13708210194c475687be6106a3b84 + 0 | 5feceb66ffc86f38d952786c6d696c79 + 2 | d4735e3a265e16eee03f59718b9b5d03 + 4 | 4b227777d4dd1fc61c6f884f48641d02 + 6 | e7f6c011776e8db7cd330b54174fd76f + 8 | 2c624232cdd221771294dfbb310aca00 + 10 | 4a44dc15364204a80fe80e9039455cc1 + 12 | 6b51d431df5d7f141cbececcf79edf3d + 14 | 8527a891e224136950ff32ca212b45bc + 16 | b17ef6d19c7a5b1ee83b907c595526dc + 18 | 4ec9599fc203d176a301536c2e091a19 + 20 | f5ca38f748a1d6eaf726b8a42fb575c3 20 | Success (12 rows) @@ -3093,17 +3607,17 @@ SELECT * INTO t4 FROM t1; SELECT * FROM t4; a | b ----+---------------------------------- - 0 | cfcd208495d565ef66e7dff9f98764da - 2 | c81e728d9d4c2f636f067f89cc14862c - 4 | a87ff679a2f3e71d9181a67b7542122c - 6 | 1679091c5a880faf6fb5e6087eb1b2dc - 8 | c9f0f895fb98ab9159f51fd0297e236d - 10 | d3d9446802a44259755d38e6d163e820 - 12 | c20ad4d76fe97759aa27a0c99bff6710 - 14 | aab3238922bcc25a6f606eb525ffdc56 - 16 | c74d97b01eae257e44aa9d5bade97baf - 18 | 6f4922f45568161a8cdf4ad2299f6d23 - 20 | 98f13708210194c475687be6106a3b84 + 0 | 5feceb66ffc86f38d952786c6d696c79 + 2 | d4735e3a265e16eee03f59718b9b5d03 + 4 | 4b227777d4dd1fc61c6f884f48641d02 + 6 | e7f6c011776e8db7cd330b54174fd76f + 8 | 2c624232cdd221771294dfbb310aca00 + 10 | 4a44dc15364204a80fe80e9039455cc1 + 12 | 6b51d431df5d7f141cbececcf79edf3d + 14 | 8527a891e224136950ff32ca212b45bc + 16 | b17ef6d19c7a5b1ee83b907c595526dc + 18 | 4ec9599fc203d176a301536c2e091a19 + 20 | f5ca38f748a1d6eaf726b8a42fb575c3 20 | Success (12 rows) @@ -3176,27 +3690,27 @@ RESET SESSION AUTHORIZATION; SELECT * FROM t1; a | b ----+---------------------------------- - 1 | c4ca4238a0b923820dcc509a6f75849b - 3 | eccbc87e4b5ce2fe28308fd9f2a7baf3 - 5 | e4da3b7fbbce2345d7772b0674a318d5 - 7 | 8f14e45fceea167a5a36dedd4bea2543 - 9 | 45c48cce2e2d7fbdea1afc51c7c6ad26 - 11 | 6512bd43d9caa6e02c990b0a82652dca - 13 | c51ce410c124a10e0db5e4b97fc2af39 - 15 | 9bf31c7ff062936a96d3c8bd1f8f2ff3 - 17 | 70efdf2ec9b086079795c442636b55fb - 19 | 1f0e3dad99908345f7439f8ffabdffc4 - 0 | cfcd208495d565ef66e7dff9f98764da - 2 | c81e728d9d4c2f636f067f89cc14862c - 4 | a87ff679a2f3e71d9181a67b7542122c - 6 | 1679091c5a880faf6fb5e6087eb1b2dc - 8 | c9f0f895fb98ab9159f51fd0297e236d - 10 | d3d9446802a44259755d38e6d163e820 - 12 | c20ad4d76fe97759aa27a0c99bff6710 - 14 | aab3238922bcc25a6f606eb525ffdc56 - 16 | c74d97b01eae257e44aa9d5bade97baf - 18 | 6f4922f45568161a8cdf4ad2299f6d23 - 20 | 98f13708210194c475687be6106a3b84 + 1 | 6b86b273ff34fce19d6b804eff5a3f57 + 3 | 4e07408562bedb8b60ce05c1decfe3ad + 5 | ef2d127de37b942baad06145e54b0c61 + 7 | 7902699be42c8a8e46fbbb4501726517 + 9 | 19581e27de7ced00ff1ce50b2047e7a5 + 11 | 4fc82b26aecb47d2868c4efbe3581732 + 13 | 3fdba35f04dc8c462986c992bcf87554 + 15 | e629fa6598d732768f7c726b4b621285 + 17 | 4523540f1504cd17100c4835e85b7eef + 19 | 9400f1b21cb527d7fa3d3eabba93557a + 0 | 5feceb66ffc86f38d952786c6d696c79 + 2 | d4735e3a265e16eee03f59718b9b5d03 + 4 | 4b227777d4dd1fc61c6f884f48641d02 + 6 | e7f6c011776e8db7cd330b54174fd76f + 8 | 2c624232cdd221771294dfbb310aca00 + 10 | 4a44dc15364204a80fe80e9039455cc1 + 12 | 6b51d431df5d7f141cbececcf79edf3d + 14 | 8527a891e224136950ff32ca212b45bc + 16 | b17ef6d19c7a5b1ee83b907c595526dc + 18 | 4ec9599fc203d176a301536c2e091a19 + 20 | f5ca38f748a1d6eaf726b8a42fb575c3 20 | Success (22 rows) @@ -3213,27 +3727,27 @@ SET SESSION AUTHORIZATION regress_rls_alice; SELECT * FROM t1; a | b ----+---------------------------------- - 1 | c4ca4238a0b923820dcc509a6f75849b - 3 | eccbc87e4b5ce2fe28308fd9f2a7baf3 - 5 | e4da3b7fbbce2345d7772b0674a318d5 - 7 | 8f14e45fceea167a5a36dedd4bea2543 - 9 | 45c48cce2e2d7fbdea1afc51c7c6ad26 - 11 | 6512bd43d9caa6e02c990b0a82652dca - 13 | c51ce410c124a10e0db5e4b97fc2af39 - 15 | 9bf31c7ff062936a96d3c8bd1f8f2ff3 - 17 | 70efdf2ec9b086079795c442636b55fb - 19 | 1f0e3dad99908345f7439f8ffabdffc4 - 0 | cfcd208495d565ef66e7dff9f98764da - 2 | c81e728d9d4c2f636f067f89cc14862c - 4 | a87ff679a2f3e71d9181a67b7542122c - 6 | 1679091c5a880faf6fb5e6087eb1b2dc - 8 | c9f0f895fb98ab9159f51fd0297e236d - 10 | d3d9446802a44259755d38e6d163e820 - 12 | c20ad4d76fe97759aa27a0c99bff6710 - 14 | aab3238922bcc25a6f606eb525ffdc56 - 16 | c74d97b01eae257e44aa9d5bade97baf - 18 | 6f4922f45568161a8cdf4ad2299f6d23 - 20 | 98f13708210194c475687be6106a3b84 + 1 | 6b86b273ff34fce19d6b804eff5a3f57 + 3 | 4e07408562bedb8b60ce05c1decfe3ad + 5 | ef2d127de37b942baad06145e54b0c61 + 7 | 7902699be42c8a8e46fbbb4501726517 + 9 | 19581e27de7ced00ff1ce50b2047e7a5 + 11 | 4fc82b26aecb47d2868c4efbe3581732 + 13 | 3fdba35f04dc8c462986c992bcf87554 + 15 | e629fa6598d732768f7c726b4b621285 + 17 | 4523540f1504cd17100c4835e85b7eef + 19 | 9400f1b21cb527d7fa3d3eabba93557a + 0 | 5feceb66ffc86f38d952786c6d696c79 + 2 | d4735e3a265e16eee03f59718b9b5d03 + 4 | 4b227777d4dd1fc61c6f884f48641d02 + 6 | e7f6c011776e8db7cd330b54174fd76f + 8 | 2c624232cdd221771294dfbb310aca00 + 10 | 4a44dc15364204a80fe80e9039455cc1 + 12 | 6b51d431df5d7f141cbececcf79edf3d + 14 | 8527a891e224136950ff32ca212b45bc + 16 | b17ef6d19c7a5b1ee83b907c595526dc + 18 | 4ec9599fc203d176a301536c2e091a19 + 20 | f5ca38f748a1d6eaf726b8a42fb575c3 20 | Success (22 rows) @@ -3287,35 +3801,35 @@ CREATE TABLE copy_t (a integer, b text); CREATE POLICY p1 ON copy_t USING (a % 2 = 0); ALTER TABLE copy_t ENABLE ROW LEVEL SECURITY; GRANT ALL ON copy_t TO regress_rls_bob, regress_rls_exempt_user; -INSERT INTO copy_t (SELECT x, md5(x::text) FROM generate_series(0,10) x); +INSERT INTO copy_t (SELECT x, public.fipshash(x::text) FROM generate_series(0,10) x); -- Check COPY TO as Superuser/owner. RESET SESSION AUTHORIZATION; SET row_security TO OFF; COPY (SELECT * FROM copy_t ORDER BY a ASC) TO STDOUT WITH DELIMITER ','; -0,cfcd208495d565ef66e7dff9f98764da -1,c4ca4238a0b923820dcc509a6f75849b -2,c81e728d9d4c2f636f067f89cc14862c -3,eccbc87e4b5ce2fe28308fd9f2a7baf3 -4,a87ff679a2f3e71d9181a67b7542122c -5,e4da3b7fbbce2345d7772b0674a318d5 -6,1679091c5a880faf6fb5e6087eb1b2dc -7,8f14e45fceea167a5a36dedd4bea2543 -8,c9f0f895fb98ab9159f51fd0297e236d -9,45c48cce2e2d7fbdea1afc51c7c6ad26 -10,d3d9446802a44259755d38e6d163e820 +0,5feceb66ffc86f38d952786c6d696c79 +1,6b86b273ff34fce19d6b804eff5a3f57 +2,d4735e3a265e16eee03f59718b9b5d03 +3,4e07408562bedb8b60ce05c1decfe3ad +4,4b227777d4dd1fc61c6f884f48641d02 +5,ef2d127de37b942baad06145e54b0c61 +6,e7f6c011776e8db7cd330b54174fd76f +7,7902699be42c8a8e46fbbb4501726517 +8,2c624232cdd221771294dfbb310aca00 +9,19581e27de7ced00ff1ce50b2047e7a5 +10,4a44dc15364204a80fe80e9039455cc1 SET row_security TO ON; COPY (SELECT * FROM copy_t ORDER BY a ASC) TO STDOUT WITH DELIMITER ','; -0,cfcd208495d565ef66e7dff9f98764da -1,c4ca4238a0b923820dcc509a6f75849b -2,c81e728d9d4c2f636f067f89cc14862c -3,eccbc87e4b5ce2fe28308fd9f2a7baf3 -4,a87ff679a2f3e71d9181a67b7542122c -5,e4da3b7fbbce2345d7772b0674a318d5 -6,1679091c5a880faf6fb5e6087eb1b2dc -7,8f14e45fceea167a5a36dedd4bea2543 -8,c9f0f895fb98ab9159f51fd0297e236d -9,45c48cce2e2d7fbdea1afc51c7c6ad26 -10,d3d9446802a44259755d38e6d163e820 +0,5feceb66ffc86f38d952786c6d696c79 +1,6b86b273ff34fce19d6b804eff5a3f57 +2,d4735e3a265e16eee03f59718b9b5d03 +3,4e07408562bedb8b60ce05c1decfe3ad +4,4b227777d4dd1fc61c6f884f48641d02 +5,ef2d127de37b942baad06145e54b0c61 +6,e7f6c011776e8db7cd330b54174fd76f +7,7902699be42c8a8e46fbbb4501726517 +8,2c624232cdd221771294dfbb310aca00 +9,19581e27de7ced00ff1ce50b2047e7a5 +10,4a44dc15364204a80fe80e9039455cc1 -- Check COPY TO as user with permissions. SET SESSION AUTHORIZATION regress_rls_bob; SET row_security TO OFF; @@ -3323,40 +3837,40 @@ COPY (SELECT * FROM copy_t ORDER BY a ASC) TO STDOUT WITH DELIMITER ','; --fail ERROR: query would be affected by row-level security policy for table "copy_t" SET row_security TO ON; COPY (SELECT * FROM copy_t ORDER BY a ASC) TO STDOUT WITH DELIMITER ','; --ok -0,cfcd208495d565ef66e7dff9f98764da -2,c81e728d9d4c2f636f067f89cc14862c -4,a87ff679a2f3e71d9181a67b7542122c -6,1679091c5a880faf6fb5e6087eb1b2dc -8,c9f0f895fb98ab9159f51fd0297e236d -10,d3d9446802a44259755d38e6d163e820 +0,5feceb66ffc86f38d952786c6d696c79 +2,d4735e3a265e16eee03f59718b9b5d03 +4,4b227777d4dd1fc61c6f884f48641d02 +6,e7f6c011776e8db7cd330b54174fd76f +8,2c624232cdd221771294dfbb310aca00 +10,4a44dc15364204a80fe80e9039455cc1 -- Check COPY TO as user with permissions and BYPASSRLS SET SESSION AUTHORIZATION regress_rls_exempt_user; SET row_security TO OFF; COPY (SELECT * FROM copy_t ORDER BY a ASC) TO STDOUT WITH DELIMITER ','; --ok -0,cfcd208495d565ef66e7dff9f98764da -1,c4ca4238a0b923820dcc509a6f75849b -2,c81e728d9d4c2f636f067f89cc14862c -3,eccbc87e4b5ce2fe28308fd9f2a7baf3 -4,a87ff679a2f3e71d9181a67b7542122c -5,e4da3b7fbbce2345d7772b0674a318d5 -6,1679091c5a880faf6fb5e6087eb1b2dc -7,8f14e45fceea167a5a36dedd4bea2543 -8,c9f0f895fb98ab9159f51fd0297e236d -9,45c48cce2e2d7fbdea1afc51c7c6ad26 -10,d3d9446802a44259755d38e6d163e820 +0,5feceb66ffc86f38d952786c6d696c79 +1,6b86b273ff34fce19d6b804eff5a3f57 +2,d4735e3a265e16eee03f59718b9b5d03 +3,4e07408562bedb8b60ce05c1decfe3ad +4,4b227777d4dd1fc61c6f884f48641d02 +5,ef2d127de37b942baad06145e54b0c61 +6,e7f6c011776e8db7cd330b54174fd76f +7,7902699be42c8a8e46fbbb4501726517 +8,2c624232cdd221771294dfbb310aca00 +9,19581e27de7ced00ff1ce50b2047e7a5 +10,4a44dc15364204a80fe80e9039455cc1 SET row_security TO ON; COPY (SELECT * FROM copy_t ORDER BY a ASC) TO STDOUT WITH DELIMITER ','; --ok -0,cfcd208495d565ef66e7dff9f98764da -1,c4ca4238a0b923820dcc509a6f75849b -2,c81e728d9d4c2f636f067f89cc14862c -3,eccbc87e4b5ce2fe28308fd9f2a7baf3 -4,a87ff679a2f3e71d9181a67b7542122c -5,e4da3b7fbbce2345d7772b0674a318d5 -6,1679091c5a880faf6fb5e6087eb1b2dc -7,8f14e45fceea167a5a36dedd4bea2543 -8,c9f0f895fb98ab9159f51fd0297e236d -9,45c48cce2e2d7fbdea1afc51c7c6ad26 -10,d3d9446802a44259755d38e6d163e820 +0,5feceb66ffc86f38d952786c6d696c79 +1,6b86b273ff34fce19d6b804eff5a3f57 +2,d4735e3a265e16eee03f59718b9b5d03 +3,4e07408562bedb8b60ce05c1decfe3ad +4,4b227777d4dd1fc61c6f884f48641d02 +5,ef2d127de37b942baad06145e54b0c61 +6,e7f6c011776e8db7cd330b54174fd76f +7,7902699be42c8a8e46fbbb4501726517 +8,2c624232cdd221771294dfbb310aca00 +9,19581e27de7ced00ff1ce50b2047e7a5 +10,4a44dc15364204a80fe80e9039455cc1 -- Check COPY TO as user without permissions. SET row_security TO OFF; SET SESSION AUTHORIZATION regress_rls_carol; SET row_security TO OFF; @@ -3372,15 +3886,15 @@ CREATE TABLE copy_rel_to (a integer, b text); CREATE POLICY p1 ON copy_rel_to USING (a % 2 = 0); ALTER TABLE copy_rel_to ENABLE ROW LEVEL SECURITY; GRANT ALL ON copy_rel_to TO regress_rls_bob, regress_rls_exempt_user; -INSERT INTO copy_rel_to VALUES (1, md5('1')); +INSERT INTO copy_rel_to VALUES (1, public.fipshash('1')); -- Check COPY TO as Superuser/owner. RESET SESSION AUTHORIZATION; SET row_security TO OFF; COPY copy_rel_to TO STDOUT WITH DELIMITER ','; -1,c4ca4238a0b923820dcc509a6f75849b +1,6b86b273ff34fce19d6b804eff5a3f57 SET row_security TO ON; COPY copy_rel_to TO STDOUT WITH DELIMITER ','; -1,c4ca4238a0b923820dcc509a6f75849b +1,6b86b273ff34fce19d6b804eff5a3f57 -- Check COPY TO as user with permissions. SET SESSION AUTHORIZATION regress_rls_bob; SET row_security TO OFF; @@ -3392,10 +3906,47 @@ COPY copy_rel_to TO STDOUT WITH DELIMITER ','; --ok SET SESSION AUTHORIZATION regress_rls_exempt_user; SET row_security TO OFF; COPY copy_rel_to TO STDOUT WITH DELIMITER ','; --ok -1,c4ca4238a0b923820dcc509a6f75849b +1,6b86b273ff34fce19d6b804eff5a3f57 SET row_security TO ON; COPY copy_rel_to TO STDOUT WITH DELIMITER ','; --ok -1,c4ca4238a0b923820dcc509a6f75849b +1,6b86b273ff34fce19d6b804eff5a3f57 +-- Check COPY TO as user without permissions. SET row_security TO OFF; +SET SESSION AUTHORIZATION regress_rls_carol; +SET row_security TO OFF; +COPY copy_rel_to TO STDOUT WITH DELIMITER ','; --fail - permission denied +ERROR: permission denied for table copy_rel_to +SET row_security TO ON; +COPY copy_rel_to TO STDOUT WITH DELIMITER ','; --fail - permission denied +ERROR: permission denied for table copy_rel_to +-- Check behavior with a child table. +RESET SESSION AUTHORIZATION; +SET row_security TO ON; +CREATE TABLE copy_rel_to_child () INHERITS (copy_rel_to); +INSERT INTO copy_rel_to_child VALUES (1, 'one'), (2, 'two'); +-- Check COPY TO as Superuser/owner. +RESET SESSION AUTHORIZATION; +SET row_security TO OFF; +COPY copy_rel_to TO STDOUT WITH DELIMITER ','; +1,6b86b273ff34fce19d6b804eff5a3f57 +SET row_security TO ON; +COPY copy_rel_to TO STDOUT WITH DELIMITER ','; +1,6b86b273ff34fce19d6b804eff5a3f57 +-- Check COPY TO as user with permissions. +SET SESSION AUTHORIZATION regress_rls_bob; +SET row_security TO OFF; +COPY copy_rel_to TO STDOUT WITH DELIMITER ','; --fail - would be affected by RLS +ERROR: query would be affected by row-level security policy for table "copy_rel_to" +SET row_security TO ON; +COPY copy_rel_to TO STDOUT WITH DELIMITER ','; --ok +2,two +-- Check COPY TO as user with permissions and BYPASSRLS +SET SESSION AUTHORIZATION regress_rls_exempt_user; +SET row_security TO OFF; +COPY copy_rel_to TO STDOUT WITH DELIMITER ','; --ok +1,6b86b273ff34fce19d6b804eff5a3f57 +SET row_security TO ON; +COPY copy_rel_to TO STDOUT WITH DELIMITER ','; --ok +1,6b86b273ff34fce19d6b804eff5a3f57 -- Check COPY TO as user without permissions. SET row_security TO OFF; SET SESSION AUTHORIZATION regress_rls_carol; SET row_security TO OFF; @@ -3434,6 +3985,7 @@ ERROR: permission denied for table copy_t RESET SESSION AUTHORIZATION; DROP TABLE copy_t; DROP TABLE copy_rel_to CASCADE; +NOTICE: drop cascades to table copy_rel_to_child -- Check WHERE CURRENT OF SET SESSION AUTHORIZATION regress_rls_alice; CREATE TABLE current_check (currentid int, payload text, rlsuser text); @@ -3630,29 +4182,6 @@ DROP ROLE regress_rls_frank; -- succeeds ROLLBACK TO q; ROLLBACK; -- cleanup -- --- Converting table to view --- -BEGIN; -CREATE TABLE t (c int); -CREATE POLICY p ON t USING (c % 2 = 1); -ALTER TABLE t ENABLE ROW LEVEL SECURITY; -SAVEPOINT q; -CREATE RULE "_RETURN" AS ON SELECT TO t DO INSTEAD - SELECT * FROM generate_series(1,5) t0(c); -- fails due to row-level security enabled -ERROR: cannot convert non-heap table "t" to a view -ROLLBACK TO q; -ALTER TABLE t DISABLE ROW LEVEL SECURITY; -SAVEPOINT q; -CREATE RULE "_RETURN" AS ON SELECT TO t DO INSTEAD - SELECT * FROM generate_series(1,5) t0(c); -- fails due to policy p on t -ERROR: cannot convert non-heap table "t" to a view -ROLLBACK TO q; -DROP POLICY p ON t; -CREATE RULE "_RETURN" AS ON SELECT TO t DO INSTEAD - SELECT * FROM generate_series(1,5) t0(c); -- error, cannot convert non-heap table "t" to a view -ERROR: cannot convert non-heap table "t" to a view -ROLLBACK; --- -- Policy expression handling -- BEGIN; @@ -4109,7 +4638,7 @@ RESET SESSION AUTHORIZATION; -- RESET SESSION AUTHORIZATION; DROP SCHEMA regress_rls_schema CASCADE; -NOTICE: drop cascades to 29 other objects +NOTICE: drop cascades to 30 other objects DETAIL: drop cascades to function f_leak(text) drop cascades to table uaccount drop cascades to table category @@ -4127,6 +4656,7 @@ drop cascades to table b1 drop cascades to view bv1 drop cascades to table z1 drop cascades to table z2 +drop cascades to table z1_blacklist drop cascades to table x1 drop cascades to table y1 drop cascades to table y2 diff --git a/contrib/pax_storage/src/test/regress/expected/rowtypes.out b/contrib/pax_storage/src/test/regress/expected/rowtypes.out index 3eb99224118..32d93a68e4d 100644 --- a/contrib/pax_storage/src/test/regress/expected/rowtypes.out +++ b/contrib/pax_storage/src/test/regress/expected/rowtypes.out @@ -1341,12 +1341,13 @@ select (ss.a).x, (ss.a).n from (select information_schema._pg_expandarray(array[1,2]) AS a) ss; QUERY PLAN ------------------------------------------------------------------------ - Subquery Scan on ss - Output: (ss.a).x, (ss.a).n + Result + Output: ((information_schema._pg_expandarray('{1,2}'::integer[]))).x, ((information_schema._pg_expandarray('{1,2}'::integer[]))).n -> ProjectSet Output: information_schema._pg_expandarray('{1,2}'::integer[]) -> Result -(5 rows) + Output: true +(8 rows) explain (verbose, costs off) select (ss.a).x, (ss.a).n from @@ -1355,9 +1356,13 @@ where false; QUERY PLAN -------------------------- Result - Output: (a).f1, (a).f2 - One-Time Filter: false -(3 rows) + Output: ((information_schema._pg_expandarray('{1,2}'::integer[]))).x, ((information_schema._pg_expandarray('{1,2}'::integer[]))).n + -> ProjectSet + Output: information_schema._pg_expandarray('{1,2}'::integer[]) + -> Result + Output: NULL::boolean + One-Time Filter: false +(9 rows) explain (verbose, costs off) with cte(c) as materialized (select row(1, 2)), diff --git a/contrib/pax_storage/src/test/regress/expected/rpt_optimizer.out b/contrib/pax_storage/src/test/regress/expected/rpt_optimizer.out index 0113aab9fdf..8a6fe7788d1 100644 --- a/contrib/pax_storage/src/test/regress/expected/rpt_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/rpt_optimizer.out @@ -1211,7 +1211,10 @@ explain (costs off) select * from rep_tab; (3 rows) set optimizer_enable_replicated_table=off; +set optimizer_trace_fallback=on; explain (costs off) select * from rep_tab; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Use optimizer_enable_replicated_table to enable replicated tables QUERY PLAN ------------------------------------------ Gather Motion 1:1 (slice1; segments: 1) @@ -1219,12 +1222,65 @@ explain (costs off) select * from rep_tab; Optimizer: Postgres query optimizer (3 rows) +reset optimizer_trace_fallback; reset optimizer_enable_replicated_table; +-- Ensure plan with Gather Motion node is generated. +drop table if exists t; +NOTICE: table "t" does not exist, skipping +create table t (i int, j int) distributed replicated; +insert into t values (1, 2); +explain (costs off) select j, (select j) AS "Correlated Field" from t; + QUERY PLAN +------------------------------------------ + Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on t + SubPlan 1 + -> Result + -> Result + Optimizer: GPORCA +(6 rows) + +select j, (select j) AS "Correlated Field" from t; + j | Correlated Field +---+------------------ + 2 | 2 +(1 row) + +explain (costs off) select j, (select 5) AS "Uncorrelated Field" from t; + QUERY PLAN +------------------------------------------ + Gather Motion 1:1 (slice1; segments: 1) + -> Nested Loop Left Join + Join Filter: true + -> Seq Scan on t + -> Materialize + -> Result + Optimizer: GPORCA +(7 rows) + +select j, (select 5) AS "Uncorrelated Field" from t; + j | Uncorrelated Field +---+-------------------- + 2 | 5 +(1 row) + -- start_ignore drop schema rpt cascade; -NOTICE: drop cascades to 7 other objects +NOTICE: drop cascades to 16 other objects DETAIL: drop cascades to table foo drop cascades to table bar +drop cascades to view v_foo drop cascades to table baz drop cascades to table qux +drop cascades to table cursor_update +drop cascades to table minmaxtest +drop cascades to table t_hashdist +drop cascades to table t_replicate_volatile +drop cascades to sequence seq_for_insert_replicated_table +drop cascades to table dist_tab +drop cascades to table rep_tab +drop cascades to table rand_tab +drop cascades to table t1_13532 +drop cascades to table t2_13532 +drop cascades to table t -- end_ignore diff --git a/contrib/pax_storage/src/test/regress/expected/select_distinct_optimizer.out b/contrib/pax_storage/src/test/regress/expected/select_distinct_optimizer.out index 59ce7ab4d3c..b2e634d564e 100644 --- a/contrib/pax_storage/src/test/regress/expected/select_distinct_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/select_distinct_optimizer.out @@ -187,6 +187,27 @@ SET jit_above_cost TO DEFAULT; CREATE TABLE distinct_group_2 AS SELECT DISTINCT (g%1000)::text FROM generate_series(0,9999) g; NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry. +SET enable_seqscan = 0; +-- Check to see we get an incremental sort plan +EXPLAIN (costs off) +SELECT DISTINCT hundred, two FROM tenk1; + QUERY PLAN +----------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> GroupAggregate + Group Key: hundred, two + -> Sort + Sort Key: hundred, two + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: hundred, two + -> GroupAggregate + Group Key: hundred, two + -> Sort + Sort Key: hundred, two + -> Seq Scan on tenk1 +(13 rows) + +RESET enable_seqscan; SET enable_hashagg=TRUE; SET optimizer_enable_hashagg=TRUE; -- Produce results with hash aggregation. @@ -230,6 +251,157 @@ DROP TABLE distinct_hash_1; DROP TABLE distinct_hash_2; DROP TABLE distinct_group_1; DROP TABLE distinct_group_2; +-- Test parallel DISTINCT +SET parallel_tuple_cost=0; +SET parallel_setup_cost=0; +SET min_parallel_table_scan_size=0; +SET max_parallel_workers_per_gather=2; +-- Ensure we get a parallel plan +EXPLAIN (costs off) +SELECT DISTINCT four FROM tenk1; + QUERY PLAN +----------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> GroupAggregate + Group Key: four + -> Sort + Sort Key: four + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: four + -> Streaming HashAggregate + Group Key: four + -> Seq Scan on tenk1 +(11 rows) + +-- Ensure the parallel plan produces the correct results +SELECT DISTINCT four FROM tenk1; + four +------ + 0 + 1 + 2 + 3 +(4 rows) + +CREATE OR REPLACE FUNCTION distinct_func(a INT) RETURNS INT AS $$ + BEGIN + RETURN a; + END; +$$ LANGUAGE plpgsql PARALLEL UNSAFE; +-- Ensure we don't do parallel distinct with a parallel unsafe function +EXPLAIN (COSTS OFF) +SELECT DISTINCT distinct_func(1) FROM tenk1; + QUERY PLAN +--------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> HashAggregate + Group Key: (distinct_func(1)) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: (distinct_func(1)) + -> Seq Scan on tenk1 +(7 rows) + +-- make the function parallel safe +CREATE OR REPLACE FUNCTION distinct_func(a INT) RETURNS INT AS $$ + BEGIN + RETURN a; + END; +$$ LANGUAGE plpgsql PARALLEL SAFE; +-- Ensure we do parallel distinct now that the function is parallel safe +EXPLAIN (COSTS OFF) +SELECT DISTINCT distinct_func(1) FROM tenk1; + QUERY PLAN +--------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> HashAggregate + Group Key: (distinct_func(1)) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: (distinct_func(1)) + -> Seq Scan on tenk1 +(7 rows) + +RESET max_parallel_workers_per_gather; +RESET min_parallel_table_scan_size; +RESET parallel_setup_cost; +RESET parallel_tuple_cost; +-- +-- Test the planner's ability to use a LIMIT 1 instead of a Unique node when +-- all of the distinct_pathkeys have been marked as redundant +-- +-- Ensure we get a plan with a Limit 1 +EXPLAIN (COSTS OFF) +SELECT DISTINCT four FROM tenk1 WHERE four = 0; + QUERY PLAN +----------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> GroupAggregate + Group Key: four + -> Sort + Sort Key: four + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: four + -> Streaming HashAggregate + Group Key: four + -> Seq Scan on tenk1 + Filter: (four = 0) +(12 rows) + +-- Ensure the above gives us the correct result +SELECT DISTINCT four FROM tenk1 WHERE four = 0; + four +------ + 0 +(1 row) + +-- Ensure we get a plan with a Limit 1 +EXPLAIN (COSTS OFF) +SELECT DISTINCT four FROM tenk1 WHERE four = 0 AND two <> 0; + QUERY PLAN +------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> GroupAggregate + Group Key: four + -> Sort + Sort Key: four + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: four + -> Streaming HashAggregate + Group Key: four + -> Seq Scan on tenk1 + Filter: ((four = 0) AND (two <> 0)) +(12 rows) + +-- Ensure no rows are returned +SELECT DISTINCT four FROM tenk1 WHERE four = 0 AND two <> 0; + four +------ +(0 rows) + +-- Ensure we get a plan with a Limit 1 when the SELECT list contains constants +EXPLAIN (COSTS OFF) +SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0; + QUERY PLAN +----------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> GroupAggregate + Group Key: four, (1), (2), (3) + -> Sort + Sort Key: four, (1), (2), (3) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: four, (1), (2), (3) + -> Streaming HashAggregate + Group Key: four, 1, 2, 3 + -> Seq Scan on tenk1 + Filter: (four = 0) +(12 rows) + +-- Ensure we only get 1 row +SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0; + four | ?column? | ?column? | ?column? +------+----------+----------+---------- + 0 | 1 | 2 | 3 +(1 row) + -- -- Also, some tests of IS DISTINCT FROM, which doesn't quite deserve its -- very own regression file. @@ -535,3 +707,218 @@ DROP TABLE capitals; DROP TABLE cities; set gp_statistics_pullup_from_child_partition to off; -- gpdb end: test inherit/partition table distinct when gp_statistics_pullup_from_child_partition is on +create table t_distinct_sort(a int, b int, c int); +insert into t_distinct_sort select i, i+1, i+2 from generate_series(1, 10)i; +insert into t_distinct_sort select i, i+1, i+2 from generate_series(1, 10)i; +insert into t_distinct_sort select i, i+1, i+2 from generate_series(1, 10)i; +analyze t_distinct_sort; +explain(verbose, costs off) +select distinct count(a), sum(b) from t_distinct_sort order by sum(b), count(a); + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------- + Sort + Output: (count(a)), (sum(b)) + Sort Key: (sum(t_distinct_sort.b)), (count(t_distinct_sort.a)) + -> Finalize Aggregate + Output: count(a), sum(b) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: (PARTIAL count(a)), (PARTIAL sum(b)) + -> Partial Aggregate + Output: PARTIAL count(a), PARTIAL sum(b) + -> Seq Scan on public.t_distinct_sort + Output: a, b +(13 rows) + +select distinct count(a), sum(b) from t_distinct_sort order by sum(b), count(a); + count | sum +-------+----- + 30 | 195 +(1 row) + +explain(verbose, costs off) +select distinct on(count(b), count(c)) count(a), sum(b) from t_distinct_sort order by count(c); + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------- + Finalize Aggregate + Output: count(a), sum(b) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: (PARTIAL count(a)), (PARTIAL sum(b)) + -> Partial Aggregate + Output: PARTIAL count(a), PARTIAL sum(b) + -> Seq Scan on public.t_distinct_sort + Output: a, b +(10 rows) + +select distinct on(count(b), count(c)) count(a), sum(b) from t_distinct_sort order by count(c); + count | sum +-------+----- + 30 | 195 +(1 row) + +explain(verbose, costs off) +select count(a), sum(b) from t_distinct_sort order by sum(a), count(c); + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------- + Result + Output: (count(a)), (sum(b)) + -> Sort + Output: (count(a)), (sum(b)), (sum(a)), (count(c)) + Sort Key: (sum(t_distinct_sort.a)), (count(t_distinct_sort.c)) + -> Finalize Aggregate + Output: count(a), sum(b), sum(a), count(c) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: (PARTIAL count(a)), (PARTIAL sum(b)), (PARTIAL sum(a)), (PARTIAL count(c)) + -> Partial Aggregate + Output: PARTIAL count(a), PARTIAL sum(b), PARTIAL sum(a), PARTIAL count(c) + -> Seq Scan on public.t_distinct_sort + Output: a, b, c +(15 rows) + +select count(a), sum(b) from t_distinct_sort order by sum(a), count(c); + count | sum +-------+----- + 30 | 195 +(1 row) + +explain(verbose, costs off) +select distinct count(a), sum(b) from t_distinct_sort ; + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------- + Finalize Aggregate + Output: count(a), sum(b) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: (PARTIAL count(a)), (PARTIAL sum(b)) + -> Partial Aggregate + Output: PARTIAL count(a), PARTIAL sum(b) + -> Seq Scan on public.t_distinct_sort + Output: a, b +(10 rows) + +select distinct count(a), sum(b) from t_distinct_sort ; + count | sum +-------+----- + 30 | 195 +(1 row) + +-- should keep distinct clause +explain(verbose, costs off) +select distinct on(count(random())) count(a), sum(b) from t_distinct_sort; + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------- + Unique + Output: (count(a)), (sum(b)), (count(random())) + Group Key: (count(random())) + -> Sort + Output: (count(a)), (sum(b)), (count(random())) + Sort Key: (count(random())) + -> Finalize Aggregate + Output: count(a), sum(b), count(random()) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: (PARTIAL count(a)), (PARTIAL sum(b)), (PARTIAL count(random())) + -> Partial Aggregate + Output: PARTIAL count(a), PARTIAL sum(b), PARTIAL count(random()) + -> Seq Scan on public.t_distinct_sort + Output: a, b +(16 rows) + +select distinct on(count(random())) count(a), sum(b) from t_distinct_sort; + count | sum +-------+----- + 30 | 195 +(1 row) + +explain(verbose, costs off) +select distinct(count(a)) from t_distinct_sort, (select distinct(count(*)), generate_series( +0, 2) from t_distinct_sort)as xx; + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------- + Finalize Aggregate + Output: count(t_distinct_sort_1.a) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: (PARTIAL count(t_distinct_sort_1.a)) + -> Partial Aggregate + Output: PARTIAL count(t_distinct_sort_1.a) + -> Nested Loop + Output: t_distinct_sort_1.a + Join Filter: true + -> Seq Scan on public.t_distinct_sort t_distinct_sort_1 + Output: t_distinct_sort_1.a + -> Materialize + -> Broadcast Motion 1:3 (slice2) + -> Result + -> ProjectSet + Output: generate_series(0, 2) + -> Finalize Aggregate + Output: count(*) + -> Gather Motion 3:1 (slice3; segments: 3) + Output: (PARTIAL count(*)) + -> Partial Aggregate + Output: PARTIAL count(*) + -> Seq Scan on public.t_distinct_sort +(25 rows) + +select distinct(count(a)) from t_distinct_sort, (select distinct(count(*)), generate_series( +0, 2) from t_distinct_sort)as xx; + count +------- + 90 +(1 row) + +drop table t_distinct_sort; +explain(verbose, costs off) +select distinct(count(a)) from generate_series(0, 1) as a; + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------- + Aggregate + Output: count(generate_series) + -> Function Scan on pg_catalog.generate_series + Output: generate_series + Function Call: generate_series(0, 1) +(7 rows) + +select distinct(count(a)) from generate_series(0, 1) as a; + count +------- + 2 +(1 row) + +explain(verbose, costs off) +select distinct(count(*)) from generate_series(0, 1) a join generate_series(0, 2) b on true; + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------- + Aggregate + Output: count(*) + -> Nested Loop + Join Filter: true + -> Function Scan on pg_catalog.generate_series generate_series_1 + Output: generate_series_1.generate_series + Function Call: generate_series(0, 1) + -> Function Scan on pg_catalog.generate_series + Output: generate_series.generate_series + Function Call: generate_series(0, 2) +(12 rows) + +select distinct(count(*)) from generate_series(0, 1) a join generate_series(0, 2) b on true; + count +------- + 6 +(1 row) + +-- please refer to https://github.com/greenplum-db/gpdb/issues/15033 +CREATE TABLE t1_issue_15033(c DECIMAL CHECK (0.4 IS DISTINCT FROM 0.3)); +CREATE TABLE t2_issue_15033(c DECIMAL CHECK (0.4 IS NOT DISTINCT FROM 0.3)); +INSERT INTO t1_issue_15033 VALUES(10); +SELECT * FROM t1_issue_15033; + c +---- + 10 +(1 row) + +INSERT INTO t2_issue_15033 VALUES(10); +DETAIL: Failing row contains (10). +ERROR: new row for relation "t2_issue_15033" violates check constraint "t2_issue_15033_check" +SELECT * FROM t2_issue_15033; + c +--- +(0 rows) + diff --git a/contrib/pax_storage/src/test/regress/expected/select_into_optimizer.out b/contrib/pax_storage/src/test/regress/expected/select_into_optimizer.out index c8442b79a3c..90e10a3c2ab 100644 --- a/contrib/pax_storage/src/test/regress/expected/select_into_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/select_into_optimizer.out @@ -202,9 +202,9 @@ DROP TABLE easi, easi2; -- -- Disallowed uses of SELECT ... INTO. All should fail -- -DECLARE foo CURSOR FOR SELECT 1 INTO b; +DECLARE foo CURSOR FOR SELECT 1 INTO int4_tbl; ERROR: SELECT ... INTO is not allowed here -LINE 1: DECLARE foo CURSOR FOR SELECT 1 INTO b; +LINE 1: DECLARE foo CURSOR FOR SELECT 1 INTO int4_tbl; ^ COPY (SELECT 1 INTO frak UNION SELECT 2) TO 'blob'; ERROR: COPY (SELECT INTO) is not supported @@ -212,12 +212,31 @@ SELECT * FROM (SELECT 1 INTO f) bar; ERROR: SELECT ... INTO is not allowed here LINE 1: SELECT * FROM (SELECT 1 INTO f) bar; ^ -CREATE VIEW foo AS SELECT 1 INTO b; +CREATE VIEW foo AS SELECT 1 INTO int4_tbl; ERROR: views must not contain SELECT INTO -INSERT INTO b SELECT 1 INTO f; +INSERT INTO int4_tbl SELECT 1 INTO f; ERROR: SELECT ... INTO is not allowed here -LINE 1: INSERT INTO b SELECT 1 INTO f; - ^ +LINE 1: INSERT INTO int4_tbl SELECT 1 INTO f; + ^ +-- +-- Empty target list +-- +explain (costs off) select into empty_tl from generate_series(0, 10); + QUERY PLAN +------------------------------------------------------------- + Result + -> Redistribute Motion 3:3 (slice1; segments: 3) + -> Result + One-Time Filter: (gp_execution_segment() = ###) + -> Function Scan on generate_series +(6 rows) + +select into empty_tl from generate_series(0, 10); +select * from empty_tl; +-- +(11 rows) + +drop table empty_tl; -- Test CREATE TABLE AS ... IF NOT EXISTS CREATE TABLE ctas_ine_tbl AS SELECT 1; CREATE TABLE ctas_ine_tbl AS SELECT 1 / 0; -- error diff --git a/contrib/pax_storage/src/test/regress/expected/select_optimizer.out b/contrib/pax_storage/src/test/regress/expected/select_optimizer.out index 9a3592bea03..e54e00f5430 100644 --- a/contrib/pax_storage/src/test/regress/expected/select_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/select_optimizer.out @@ -293,10 +293,6 @@ RESET enable_seqscan; RESET enable_bitmapscan; RESET enable_sort; RESET optimizer_enable_tablescan; -SELECT two, stringu1, ten, string4 - INTO TABLE tmp - FROM onek; -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry. -- -- awk '{print $1,$2;}' person.data | -- awk '{if(NF!=2){print $3,$2;}else{print;}}' - emp.data | diff --git a/contrib/pax_storage/src/test/regress/expected/select_views_optimizer.out b/contrib/pax_storage/src/test/regress/expected/select_views_optimizer.out index b4f5ce7faa7..929b95390d0 100644 --- a/contrib/pax_storage/src/test/regress/expected/select_views_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/select_views_optimizer.out @@ -1,11 +1,16 @@ -- -- SELECT_VIEWS + -- test the views defined in CREATE_VIEWS + -- -set pax_enable_sparse_filter to off; + SELECT * FROM street; + name | thepath | cname + ------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------- + Access Rd 25 | [(-121.9283,37.894),(-121.9283,37.9)] | Oakland Ada St | [(-122.2487,37.398),(-122.2496,37.401)] | Lafayette Agua Fria Creek | [(-121.9254,37.922),(-121.9281,37.889)] | Oakland @@ -1587,6 +1592,8 @@ NOTICE: f_leak => hamburger (seg1 slice1 127.0.0.1:7003 pid=16817) (1 row) -- Cleanup. + RESET SESSION AUTHORIZATION; + DROP ROLE regress_alice; -reset pax_enable_sparse_filter; + diff --git a/contrib/pax_storage/src/test/regress/expected/select_views_optimizer_1.out b/contrib/pax_storage/src/test/regress/expected/select_views_optimizer_1.out index 3c2a066a798..65f73fac3af 100644 --- a/contrib/pax_storage/src/test/regress/expected/select_views_optimizer_1.out +++ b/contrib/pax_storage/src/test/regress/expected/select_views_optimizer_1.out @@ -340,7 +340,7 @@ SELECT * FROM street; 98th Ave | [(-122.2001,37.258),(-122.1974,37.27)] | Lafayette (333 rows) -SELECT name, #thepath FROM iexit ORDER BY 1, 2; +SELECT name, #thepath FROM iexit ORDER BY name COLLATE "C", 2; name | ?column? ------------------------------------+---------- I- 580 | 2 @@ -467,20 +467,6 @@ SELECT name, #thepath FROM iexit ORDER BY 1, 2; I- 580 | 21 I- 580 | 22 I- 580 | 22 - I- 580/I-680 Ramp | 2 - I- 580/I-680 Ramp | 2 - I- 580/I-680 Ramp | 2 - I- 580/I-680 Ramp | 2 - I- 580/I-680 Ramp | 2 - I- 580/I-680 Ramp | 2 - I- 580/I-680 Ramp | 4 - I- 580/I-680 Ramp | 4 - I- 580/I-680 Ramp | 4 - I- 580/I-680 Ramp | 4 - I- 580/I-680 Ramp | 5 - I- 580/I-680 Ramp | 6 - I- 580/I-680 Ramp | 6 - I- 580/I-680 Ramp | 6 I- 580 Ramp | 2 I- 580 Ramp | 2 I- 580 Ramp | 2 @@ -731,6 +717,20 @@ SELECT name, #thepath FROM iexit ORDER BY 1, 2; I- 580 Ramp | 8 I- 580 Ramp | 8 I- 580 Ramp | 8 + I- 580/I-680 Ramp | 2 + I- 580/I-680 Ramp | 2 + I- 580/I-680 Ramp | 2 + I- 580/I-680 Ramp | 2 + I- 580/I-680 Ramp | 2 + I- 580/I-680 Ramp | 2 + I- 580/I-680 Ramp | 4 + I- 580/I-680 Ramp | 4 + I- 580/I-680 Ramp | 4 + I- 580/I-680 Ramp | 4 + I- 580/I-680 Ramp | 5 + I- 580/I-680 Ramp | 6 + I- 580/I-680 Ramp | 6 + I- 580/I-680 Ramp | 6 I- 680 | 2 I- 680 | 2 I- 680 | 2 @@ -1325,27 +1325,21 @@ SET SESSION AUTHORIZATION regress_alice; -- scenario: if a qualifier with tiny-cost is given, it shall be launched -- prior to the security policy of the view. -- --- start_ignore --- GPDB_92_MERGE_FIXME: ORCA doesn't seem to order the predicates based on the --- cost of the function, do we want to do that? --- end_ignore SELECT * FROM my_property_normal WHERE f_leak(passwd); -NOTICE: f_leak => passwd123 (seg0 slice1 10.64.4.155:25432 pid=8483) +NOTICE: f_leak => beafsteak +NOTICE: f_leak => hamburger +NOTICE: f_leak => passwd123 cid | name | tel | passwd -----+---------------+------------------+----------- 101 | regress_alice | +81-12-3456-7890 | passwd123 (1 row) --- start_ignore --- GPDB_92_MERGE_FIXME: ORCA doesn't seem to order the predicates based on the --- cost of the function, do we want to do that? --- end_ignore EXPLAIN (COSTS OFF) SELECT * FROM my_property_normal WHERE f_leak(passwd); QUERY PLAN --------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) -> Seq Scan on customer - Filter: ((name = 'regress_alice'::text) AND f_leak(passwd)) + Filter: (f_leak(passwd) AND (name = 'regress_alice'::name)) Optimizer: Pivotal Optimizer (GPORCA) version 2.59.1 (4 rows) @@ -1363,7 +1357,7 @@ EXPLAIN (COSTS OFF) SELECT * FROM my_property_secure WHERE f_leak(passwd); -> Subquery Scan on my_property_secure Filter: f_leak(my_property_secure.passwd) -> Seq Scan on customer - Filter: (name = 'regress_alice'::text) + Filter: (name = 'regress_alice'::name) Optimizer: Postgres query optimizer (6 rows) @@ -1373,8 +1367,10 @@ EXPLAIN (COSTS OFF) SELECT * FROM my_property_secure WHERE f_leak(passwd); -- SELECT * FROM my_property_normal v WHERE f_leak('passwd') AND f_leak(passwd); -NOTICE: f_leak => passwd (seg1 slice1 127.0.0.1:25433 pid=36802) -NOTICE: f_leak => passwd123 (seg1 slice1 127.0.0.1:25433 pid=36802) +NOTICE: f_leak => beafsteak +NOTICE: f_leak => hamburger +NOTICE: f_leak => passwd +NOTICE: f_leak => passwd123 cid | name | tel | passwd -----+---------------+------------------+----------- 101 | regress_alice | +81-12-3456-7890 | passwd123 @@ -1386,7 +1382,7 @@ EXPLAIN (COSTS OFF) SELECT * FROM my_property_normal v ------------------------------------------------------------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) -> Seq Scan on customer - Filter: ((name = 'regress_alice'::text) AND f_leak('passwd'::text) AND f_leak(passwd)) + Filter: (f_leak('passwd'::text) AND f_leak(passwd) AND (name = 'regress_alice'::name)) Optimizer: Pivotal Optimizer (GPORCA) version 3.44.0 (4 rows) @@ -1409,7 +1405,7 @@ EXPLAIN (COSTS OFF) SELECT * FROM my_property_secure v -> Subquery Scan on v Filter: f_leak(v.passwd) -> Seq Scan on customer - Filter: (f_leak('passwd'::text) AND (name = 'regress_alice'::text)) + Filter: (f_leak('passwd'::text) AND (name = 'regress_alice'::name)) Optimizer: Postgres query optimizer (6 rows) @@ -1431,13 +1427,13 @@ EXPLAIN (COSTS OFF) SELECT * FROM my_credit_card_normal WHERE f_leak(cnum); QUERY PLAN ------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) - -> Nested Loop - Join Filter: true - -> Seq Scan on credit_card + -> Hash Join + Hash Cond: ((r.cid = l.cid) AND (r.dist_key = l.dist_key)) + -> Seq Scan on credit_card r Filter: f_leak(cnum) - -> Index Scan using customer_dist_key_cid_key on customer - Index Cond: ((dist_key = credit_card.dist_key) AND (cid = credit_card.cid)) - Filter: (name = 'regress_alice'::text) + -> Hash + -> Seq Scan on customer l + Filter: (name = 'regress_alice'::name) Optimizer: Pivotal Optimizer (GPORCA) version 2.67.0 (9 rows) @@ -1459,7 +1455,7 @@ EXPLAIN (COSTS OFF) SELECT * FROM my_credit_card_secure WHERE f_leak(cnum); -> Seq Scan on credit_card r -> Hash -> Seq Scan on customer l - Filter: (name = 'regress_alice'::text) + Filter: (name = 'regress_alice'::name) Optimizer: Postgres query optimizer (10 rows) @@ -1496,7 +1492,7 @@ EXPLAIN (COSTS OFF) SELECT * FROM my_credit_card_usage_normal -> Seq Scan on credit_card r_1 -> Hash -> Seq Scan on customer l_1 - Filter: (name = 'regress_alice'::text) + Filter: (name = 'regress_alice'::name) Optimizer: Postgres query optimizer (16 rows) @@ -1533,7 +1529,7 @@ EXPLAIN (COSTS OFF) SELECT * FROM my_credit_card_usage_secure -> Seq Scan on credit_card r_1 -> Hash -> Seq Scan on customer l - Filter: (name = 'regress_alice'::text) + Filter: (name = 'regress_alice'::name) Optimizer: Postgres query optimizer (19 rows) @@ -1544,11 +1540,9 @@ EXPLAIN (COSTS OFF) SELECT * FROM my_credit_card_usage_secure PREPARE p1 AS SELECT * FROM my_property_normal WHERE f_leak(passwd); PREPARE p2 AS SELECT * FROM my_property_secure WHERE f_leak(passwd); EXECUTE p1; --- start_ignore --- GPDB_92_MERGE_FIXME: ORCA doesn't seem to order the predicates based on the --- cost of the function, do we want to do that? --- end_ignore -NOTICE: f_leak => passwd123 (seg0 slice1 127.0.0.1:25434 pid=892) +NOTICE: f_leak => beafsteak +NOTICE: f_leak => hamburger +NOTICE: f_leak => passwd123 cid | name | tel | passwd -----+---------------+------------------+----------- 101 | regress_alice | +81-12-3456-7890 | passwd123 @@ -1573,11 +1567,9 @@ NOTICE: f_leak => passwd123 (1 row) EXECUTE p2; -- To be perform as a view without security-barrier --- start_ignore --- GPDB_92_MERGE_FIXME: ORCA doesn't seem to order the predicates based on the --- cost of the function, do we want to do that? --- end_ignore -NOTICE: f_leak => passwd123 (seg0 slice1 127.0.0.1:25434 pid=892) +NOTICE: f_leak => beafsteak +NOTICE: f_leak => hamburger +NOTICE: f_leak => passwd123 cid | name | tel | passwd -----+---------------+------------------+----------- 101 | regress_alice | +81-12-3456-7890 | passwd123 diff --git a/contrib/pax_storage/src/test/regress/expected/stats_ext_optimizer.out b/contrib/pax_storage/src/test/regress/expected/stats_ext_optimizer.out index 46a9c4b4273..fd317cc062a 100644 --- a/contrib/pax_storage/src/test/regress/expected/stats_ext_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/stats_ext_optimizer.out @@ -160,10 +160,9 @@ SELECT stxname, stxdndistinct, stxddependencies, stxdmcv FROM pg_statistic_ext s, pg_statistic_ext_data d WHERE s.stxname = 'ab1_a_b_stats' AND d.stxoid = s.oid; - stxname | stxdndistinct | stxddependencies | stxdmcv ----------------+---------------+------------------+--------- - ab1_a_b_stats | | | -(1 row) + stxname | stxdndistinct | stxddependencies | stxdmcv +---------+---------------+------------------+--------- +(0 rows) ALTER STATISTICS ab1_a_b_stats SET STATISTICS -1; \d+ ab1 @@ -224,7 +223,7 @@ VACUUM ANALYZE stxdinh, stxdinh1, stxdinh2; SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* GROUP BY 1, 2'); estimated | actual -----------+-------- - 400 | 150 + 150 | 150 (1 row) -- Dependencies are applied at individual relations (within append), so @@ -309,14 +308,18 @@ CREATE TABLE tststats.pt1 PARTITION OF tststats.pt FOR VALUES FROM (-10, -10) TO NOTICE: table has parent, setting distribution columns to match parent table CREATE STATISTICS tststats.s1 ON a, b FROM tststats.t; CREATE STATISTICS tststats.s2 ON a, b FROM tststats.ti; -ERROR: relation "ti" is not a table, foreign table, or materialized view +DETAIL: This operation is not supported for indexes. +ERROR: cannot define statistics for relation "ti" CREATE STATISTICS tststats.s3 ON a, b FROM tststats.s; -ERROR: relation "s" is not a table, foreign table, or materialized view +DETAIL: This operation is not supported for sequences. +ERROR: cannot define statistics for relation "s" CREATE STATISTICS tststats.s4 ON a, b FROM tststats.v; -ERROR: relation "v" is not a table, foreign table, or materialized view +DETAIL: This operation is not supported for views. +ERROR: cannot define statistics for relation "v" CREATE STATISTICS tststats.s5 ON a, b FROM tststats.mv; CREATE STATISTICS tststats.s6 ON a, b FROM tststats.ty; -ERROR: relation "ty" is not a table, foreign table, or materialized view +DETAIL: This operation is not supported for composite types. +ERROR: cannot define statistics for relation "ty" CREATE STATISTICS tststats.s7 ON a, b FROM tststats.f; CREATE STATISTICS tststats.s8 ON a, b FROM tststats.pt; CREATE STATISTICS tststats.s9 ON a, b FROM tststats.pt1; diff --git a/contrib/pax_storage/src/test/regress/expected/subselect_gp_indexes_optimizer.out b/contrib/pax_storage/src/test/regress/expected/subselect_gp_indexes_optimizer.out index 713b577e48b..f925f66b638 100644 --- a/contrib/pax_storage/src/test/regress/expected/subselect_gp_indexes_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/subselect_gp_indexes_optimizer.out @@ -119,7 +119,7 @@ explain select t1.id1, (select count(*) from choose_seqscan_t2 t2 where t2.id1 = (13 rows) -- Test using a join within the subplan. It could perhaps use an Nested Loop Join + --- Index Scan to do the join, but at the memont, the planner doesn't consider distributing +-- Index Scan to do the join, but at the moment, the planner doesn't consider distributing -- the Function Scan. select t1.id1, (select count(*) from generate_series(1,5) g, choose_seqscan_t2 t2 where t1.id1 = t2.id1 and t2.id2 = g) from choose_seqscan_t1 t1 where t1.id1 < 10; id1 | count diff --git a/contrib/pax_storage/src/test/regress/expected/subselect_gp_optimizer.out b/contrib/pax_storage/src/test/regress/expected/subselect_gp_optimizer.out index 41ab2a59a70..9bd62cbd4d2 100644 --- a/contrib/pax_storage/src/test/regress/expected/subselect_gp_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/subselect_gp_optimizer.out @@ -66,6 +66,45 @@ select csq_t1.x, (select bar.x from csq_t1 bar where bar.x = csq_t1.x) as sum fr 4 | 4 (3 rows) +-- +-- Another case correlations in the targetlist: PlaceHolderVar +-- +drop table if exists phv_t; +create table phv_t(a int, b int) distributed by (a); +insert into phv_t values(1,1),(2,2); +explain(costs off) select *, (select ss.y as z from phv_t as t3 limit 1) from phv_t t1 left join +(select a as x, 42 as y from phv_t t2) ss on t1.b = ss.x order by 1,2; + QUERY PLAN +----------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: t1.a, t1.b + -> Result + -> Sort + Sort Key: t1.a, t1.b + -> Hash Left Join + Hash Cond: (t1.b = t2.a) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: t1.b + -> Seq Scan on phv_t t1 + -> Hash + -> Seq Scan on phv_t t2 + SubPlan 1 + -> Limit + -> Result + -> Materialize + -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Seq Scan on phv_t t3 + Optimizer: GPORCA +(19 rows) + +select *, (select ss.y as z from phv_t as t3 limit 1) from phv_t t1 left join +(select a as x, 42 as y from phv_t t2) ss on t1.b = ss.x order by 1,2; + a | b | x | y | z +---+---+---+----+---- + 1 | 1 | 1 | 42 | 42 + 2 | 2 | 2 | 42 | 42 +(2 rows) + -- -- CSQs with partitioned tables -- @@ -239,6 +278,66 @@ select * from mrs_u1 join mrs_u2 on mrs_u1.a=mrs_u2.a where mrs_u1.a in (1,11) o drop table if exists mrs_u1; drop table if exists mrs_u2; -- +-- Set right motion type to subquery +-- +drop table if exists gs_tab; +create table gs_tab(a int, b int, c int) distributed by (a); +insert into gs_tab values (1,1,1),(2,2,2); +explain(costs off) +select a from gs_tab t1 where b in + (select b from gs_tab t2 where c in + (select c from gs_tab t3) + or (c >= 2)) + or (b <= 3) +order by a; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: t1.a + -> Sort + Sort Key: t1.a + -> Result + Filter: (CASE WHEN ((count((true))) > '0'::bigint) THEN CASE WHEN ((sum((CASE WHEN ((t1.b = t2.b) IS NULL) THEN 1 ELSE 0 END))) = (count((true)))) THEN NULL::boolean ELSE true END ELSE false END OR (t1.b <= 3)) + -> GroupAggregate + Group Key: t1.a, t1.b, t1.ctid, t1.gp_segment_id + -> Nested Loop Left Join + Join Filter: ((t1.b = t2.b) IS NOT FALSE) + -> Sort + Sort Key: t1.a, t1.b, t1.ctid, t1.gp_segment_id + -> Seq Scan on gs_tab t1 + -> Materialize + -> Result + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Result + Filter: (CASE WHEN ((count((true))) > '0'::bigint) THEN CASE WHEN ((sum((CASE WHEN ((t2.c = t3.c) IS NULL) THEN 1 ELSE 0 END))) = (count((true)))) THEN NULL::boolean ELSE true END ELSE false END OR (t2.c >= 2)) + -> GroupAggregate + Group Key: t2.a, t2.b, t2.c, t2.ctid, t2.gp_segment_id + -> Nested Loop Left Join + Join Filter: ((t2.c = t3.c) IS NOT FALSE) + -> Sort + Sort Key: t2.a, t2.b, t2.c, t2.ctid, t2.gp_segment_id + -> Seq Scan on gs_tab t2 + -> Materialize + -> Result + -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Seq Scan on gs_tab t3 + Optimizer: GPORCA +(30 rows) + +select a from gs_tab t1 where b in + (select b from gs_tab t2 where c in + (select c from gs_tab t3) + or (c >= 2)) + or (b <= 3) +order by a; + a +--- + 1 + 2 +(2 rows) + +drop table if exists gs_tab; +-- -- MPP-13758 -- drop table if exists csq_m1; @@ -1403,9 +1502,6 @@ SELECT bar_s.c FROM bar_s, foo_s WHERE foo_s.b = (SELECT max(i) FROM baz_s WHERE -- Same as above, but with another subquery, so it must use a SubPlan. There -- are two references to the same SubPlan in the plan, on different slices. --- GPDB_96_MERGE_FIXME: this EXPLAIN output should become nicer-looking once we --- merge upstream commit 4d042999f9, to suppress the SubPlans from being --- printed twice. explain SELECT bar_s.c FROM bar_s, foo_s WHERE foo_s.b = (SELECT max(i) FROM baz_s WHERE bar_s.c = 9) AND foo_s.b = (select bar_s.d::int4); QUERY PLAN ----------------------------------------------------------------------------------------------------------------------- @@ -1636,6 +1732,80 @@ EXPLAIN SELECT '' AS five, f1 AS "Correlated Field" Optimizer: Postgres query optimizer (13 rows) +-- Test simplify group-by/order-by inside subquery if sublink pull-up is possible +EXPLAIN SELECT '' AS six, f1 AS "Correlated Field", f2 AS "Second Field" + FROM SUBSELECT_TBL upper + WHERE f1 IN (SELECT f2 FROM SUBSELECT_TBL WHERE f1 = upper.f1 GROUP BY f2); + QUERY PLAN +-------------------------------------------------------------------------------------------- + Result (cost=0.00..862.00 rows=8 width=16) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=8 width=8) + -> Hash Semi Join (cost=0.00..862.00 rows=3 width=8) + Hash Cond: ((upper.f1 = subselect_tbl.f1) AND (upper.f1 = subselect_tbl.f2)) + -> Seq Scan on subselect_tbl upper (cost=0.00..431.00 rows=3 width=8) + -> Hash (cost=431.00..431.00 rows=3 width=8) + -> Seq Scan on subselect_tbl (cost=0.00..431.00 rows=3 width=8) + Optimizer: GPORCA +(8 rows) + +EXPLAIN SELECT '' AS six, f1 AS "Correlated Field", f2 AS "Second Field" + FROM SUBSELECT_TBL upper + WHERE f1 IN (SELECT f2 FROM SUBSELECT_TBL WHERE f1 = upper.f1 GROUP BY f2 LIMIT 3); + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------- + Result (cost=0.00..1324038.68 rows=8 width=16) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324038.68 rows=8 width=8) + -> Seq Scan on subselect_tbl upper (cost=0.00..1324038.68 rows=3 width=8) + Filter: (SubPlan 1) + SubPlan 1 + -> Limit (cost=0.00..431.01 rows=1 width=4) + -> HashAggregate (cost=0.00..431.01 rows=1 width=4) + Group Key: subselect_tbl.f2 + -> Result (cost=0.00..431.01 rows=1 width=4) + Filter: (subselect_tbl.f1 = upper.f1) + -> Materialize (cost=0.00..431.00 rows=8 width=8) + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=8 width=8) + -> Seq Scan on subselect_tbl (cost=0.00..431.00 rows=3 width=8) + Optimizer: GPORCA +(14 rows) + +EXPLAIN SELECT '' AS six, f1 AS "Correlated Field", f2 AS "Second Field" + FROM SUBSELECT_TBL upper + WHERE f1 IN (SELECT f2 FROM SUBSELECT_TBL WHERE f1 = upper.f1 ORDER BY f2); + QUERY PLAN +-------------------------------------------------------------------------------------------- + Result (cost=0.00..862.00 rows=8 width=16) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=8 width=8) + -> Hash Semi Join (cost=0.00..862.00 rows=3 width=8) + Hash Cond: ((upper.f1 = subselect_tbl.f1) AND (upper.f1 = subselect_tbl.f2)) + -> Seq Scan on subselect_tbl upper (cost=0.00..431.00 rows=3 width=8) + Filter: (NOT (f1 IS NULL)) + -> Hash (cost=431.00..431.00 rows=3 width=8) + -> Seq Scan on subselect_tbl (cost=0.00..431.00 rows=3 width=8) + Optimizer: GPORCA +(9 rows) + +EXPLAIN SELECT '' AS six, f1 AS "Correlated Field", f2 AS "Second Field" + FROM SUBSELECT_TBL upper + WHERE f1 IN (SELECT f2 FROM SUBSELECT_TBL WHERE f1 = upper.f1 ORDER BY f2 LIMIT 3); + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------- + Result (cost=0.00..1324039.05 rows=8 width=16) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324039.05 rows=8 width=8) + -> Seq Scan on subselect_tbl upper (cost=0.00..1324039.05 rows=3 width=8) + Filter: ((NOT (f1 IS NULL)) AND (SubPlan 1)) + SubPlan 1 + -> Limit (cost=0.00..431.01 rows=1 width=4) + -> Sort (cost=0.00..431.01 rows=1 width=4) + Sort Key: subselect_tbl.f2 + -> Result (cost=0.00..431.01 rows=1 width=4) + Filter: (subselect_tbl.f1 = upper.f1) + -> Materialize (cost=0.00..431.00 rows=8 width=8) + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=8 width=8) + -> Seq Scan on subselect_tbl (cost=0.00..431.00 rows=3 width=8) + Optimizer: GPORCA +(14 rows) + -- -- Test cases to catch unpleasant interactions between IN-join processing -- and subquery pullup. @@ -1755,20 +1925,22 @@ EXPLAIN select count(distinct ss.ten) from -- we should see 2 subplans in the explain -- EXPLAIN SELECT EXISTS(SELECT * FROM tenk1 WHERE tenk1.unique1 = tenk2.unique1) FROM tenk2 LIMIT 1; - QUERY PLAN ----------------------------------------------------------------------------------------------- - Limit (cost=0.00..865.45 rows=1 width=1) - -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..865.45 rows=1 width=1) - -> Limit (cost=0.00..865.45 rows=1 width=1) - -> Hash Left Join (cost=0.00..865.42 rows=3334 width=8) + QUERY PLAN +--------------------------------------------------------------------------------------------------------------- + Limit (cost=0.00..865.49 rows=1 width=1) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..865.49 rows=1 width=1) + -> Limit (cost=0.00..865.49 rows=1 width=1) + -> Hash Left Join (cost=0.00..865.46 rows=3334 width=8) Hash Cond: (tenk2.unique1 = tenk1.unique1) - -> Seq Scan on tenk2 (cost=0.00..431.51 rows=3334 width=4) + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..431.58 rows=3334 width=4) + Hash Key: tenk2.unique1 + -> Seq Scan on tenk2 (cost=0.00..431.51 rows=3334 width=4) -> Hash (cost=431.96..431.96 rows=3334 width=12) -> HashAggregate (cost=0.00..431.96 rows=3334 width=12) Group Key: tenk1.unique1 -> Seq Scan on tenk1 (cost=0.00..431.51 rows=3334 width=4) Optimizer: GPORCA -(11 rows) +(13 rows) SELECT EXISTS(SELECT * FROM tenk1 WHERE tenk1.unique1 = tenk2.unique1) FROM tenk2 LIMIT 1; exists @@ -3094,3 +3266,910 @@ select * from issue_12656 where (i, j) in (select distinct on (i) i, j from issu 1 | 10002 (1 row) +-- case 3, check correlated DISTINCT ON +explain select * from issue_12656 a where (i, j) in +(select distinct on (i) i, j from issue_12656 b where a.i=b.i order by i, j asc); + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..895.00 rows=43050 width=8) + -> Seq Scan on issue_12656 a (cost=0.00..321.00 rows=14350 width=8) + Filter: (SubPlan 1) + SubPlan 1 + -> Unique (cost=9818.00..10033.25 rows=1 width=8) + -> Sort (cost=9818.00..10033.25 rows=86100 width=8) + Sort Key: b.j + -> Result (cost=0.00..2760.50 rows=86100 width=8) + Filter: (a.i = b.i) + -> Materialize (cost=0.00..1899.50 rows=86100 width=8) + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..1469.00 rows=86100 width=8) + -> Seq Scan on issue_12656 b (cost=0.00..321.00 rows=28700 width=8) + Optimizer: Postgres query optimizer +(13 rows) + +select * from issue_12656 a where (i, j) in +(select distinct on (i) i, j from issue_12656 b where a.i=b.i order by i, j asc); + i | j +---+------- + 1 | 10001 +(1 row) + +--- +--- Test param info is preserved when bringing a path to OuterQuery locus +--- +drop table if exists param_t; +create table param_t (i int, j int); +insert into param_t select i, i from generate_series(1,10)i; +analyze param_t; +explain (costs off) +select * from param_t a where a.i in + (select count(b.j) from param_t b, param_t c, + lateral (select * from param_t d where d.j = c.j limit 10) s + where s.i = a.i + ); + QUERY PLAN +----------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on param_t a + Filter: (SubPlan 1) + SubPlan 1 + -> Aggregate + -> Nested Loop + -> Nested Loop + -> Materialize + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Seq Scan on param_t c + -> Materialize + -> Result + Filter: (d.i = a.i) + -> Limit + -> Result + Filter: (d.j = c.j) + -> Materialize + -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Seq Scan on param_t d + -> Materialize + -> Broadcast Motion 3:3 (slice4; segments: 3) + -> Seq Scan on param_t b + Optimizer: Postgres query optimizer +(23 rows) + +select * from param_t a where a.i in + (select count(b.j) from param_t b, param_t c, + lateral (select * from param_t d where d.j = c.j limit 10) s + where s.i = a.i + ); + i | j +----+---- + 10 | 10 +(1 row) + +drop table if exists param_t; +-- A guard test case for gpexpand's populate SQL +-- Some simple notes and background is: we want to compute +-- table size efficiently, it is better to avoid invoke +-- pg_relation_size() in serial on QD, since this function +-- will dispatch for each tuple. The bad pattern SQL is like +-- select pg_relation_size(oid) from pg_class where xxx +-- The idea is force pg_relations_size is evaluated on each +-- segment and the sum the result together to get the final +-- result. To make sure correctness, we have to evaluate +-- pg_relation_size before any motion. The skill here is +-- to wrap this in a subquery, due to volatile of pg_relation_size, +-- this subquery won't be pulled up. Plus the skill of +-- gp_dist_random('pg_class') we can achieve this goal. +-- the below test is to verify the plan, we should see pg_relation_size +-- is evaludated on each segment and then motion then sum together. The +-- SQL pattern is a catalog join a table size "dict". +set gp_enable_multiphase_agg = on; +-- force nestloop join to make test stable since we +-- are testing plan and do not care about where we +-- put hash table. +set enable_hashjoin = off; +set enable_nestloop = on; +set enable_indexscan = off; +set enable_bitmapscan = off; +explain (verbose on, costs off) +with cte(table_oid, size) as +( + select + table_oid, + sum(size) size + from ( + select oid, + pg_relation_size(oid) + from gp_dist_random('pg_class') + ) x(table_oid, size) + group by table_oid +) +select pc.relname, ts.size +from pg_class pc, cte ts +where pc.oid = ts.table_oid; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: pc.relname, (sum((pg_relation_size((pg_class.oid)::regclass, 'main'::text)))) + -> Nested Loop + Output: pc.relname, (sum((pg_relation_size((pg_class.oid)::regclass, 'main'::text)))) + Join Filter: (pc.oid = pg_class.oid) + -> Redistribute Motion 1:3 (slice2) + Output: pc.relname, pc.oid + Hash Key: pc.oid + -> Seq Scan on pg_catalog.pg_class pc + Output: pc.relname, pc.oid + -> Materialize + Output: pg_class.oid, (sum((pg_relation_size((pg_class.oid)::regclass, 'main'::text)))) + -> HashAggregate + Output: pg_class.oid, sum((pg_relation_size((pg_class.oid)::regclass, 'main'::text))) + Group Key: pg_class.oid + -> Redistribute Motion 3:3 (slice3; segments: 3) + Output: pg_class.oid, (pg_relation_size((pg_class.oid)::regclass, 'main'::text)) + Hash Key: pg_class.oid + -> Seq Scan on pg_catalog.pg_class + Output: pg_class.oid, pg_relation_size((pg_class.oid)::regclass, 'main'::text) + Settings: optimizer = 'on', gp_enable_multiphase_agg = 'on', enable_hashjoin = 'off', enable_nestloop = 'on', enable_indexscan = 'off', enable_bitmapscan = 'off' + Optimizer: Postgres query optimizer +(22 rows) + +set gp_enable_multiphase_agg = off; +explain (verbose on, costs off) +with cte(table_oid, size) as +( + select + table_oid, + sum(size) size + from ( + select oid, + pg_relation_size(oid) + from gp_dist_random('pg_class') + ) x(table_oid, size) + group by table_oid +) +select pc.relname, ts.size +from pg_class pc, cte ts +where pc.oid = ts.table_oid; + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: pc.relname, (sum((pg_relation_size((pg_class.oid)::regclass, 'main'::text)))) + -> Nested Loop + Output: pc.relname, (sum((pg_relation_size((pg_class.oid)::regclass, 'main'::text)))) + Join Filter: (pc.oid = pg_class.oid) + -> Redistribute Motion 1:3 (slice2) + Output: pc.relname, pc.oid + Hash Key: pc.oid + -> Seq Scan on pg_catalog.pg_class pc + Output: pc.relname, pc.oid + -> Materialize + Output: pg_class.oid, (sum((pg_relation_size((pg_class.oid)::regclass, 'main'::text)))) + -> HashAggregate + Output: pg_class.oid, sum((pg_relation_size((pg_class.oid)::regclass, 'main'::text))) + Group Key: pg_class.oid + -> Redistribute Motion 3:3 (slice3; segments: 3) + Output: pg_class.oid, (pg_relation_size((pg_class.oid)::regclass, 'main'::text)) + Hash Key: pg_class.oid + -> Seq Scan on pg_catalog.pg_class + Output: pg_class.oid, pg_relation_size((pg_class.oid)::regclass, 'main'::text) + Settings: optimizer = 'on', gp_enable_multiphase_agg = 'off', enable_hashjoin = 'off', enable_nestloop = 'on', enable_indexscan = 'off', enable_bitmapscan = 'off' + Optimizer: Postgres query optimizer +(22 rows) + +reset gp_enable_multiphase_agg; +reset enable_hashjoin; +reset enable_nestloop; +reset enable_indexscan; +reset enable_bitmapscan; +create table sublink_outer_table(a int, b int) distributed by(b); +create table sublink_inner_table(x int, y bigint) distributed by(y); +set optimizer to off; +explain select t.* from sublink_outer_table t join (select y ,10*avg(x) s from sublink_inner_table group by y) RR on RR.y = t.b and t.a > rr.s; + QUERY PLAN +-------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=432.67..1307.44 rows=28700 width=8) + -> Hash Join (cost=432.67..924.77 rows=9567 width=8) + Hash Cond: (t.b = sublink_inner_table.y) + Join Filter: ((t.a)::numeric > (('10'::numeric * avg(sublink_inner_table.x)))) + -> Seq Scan on sublink_outer_table t (cost=0.00..321.00 rows=28700 width=8) + -> Hash (cost=428.50..428.50 rows=333 width=40) + -> HashAggregate (cost=423.50..428.50 rows=333 width=40) + Group Key: sublink_inner_table.y + -> Seq Scan on sublink_inner_table (cost=0.00..293.67 rows=25967 width=12) + Optimizer: Postgres query optimizer +(10 rows) + +explain select * from sublink_outer_table T where a > (select 10*avg(x) from sublink_inner_table R where T.b=R.y); + QUERY PLAN +---------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=433.78..1308.55 rows=28700 width=8) + -> Hash Join (cost=433.78..925.89 rows=9567 width=8) + Hash Cond: (t.b = "Expr_SUBQUERY".csq_c0) + Join Filter: ((t.a)::numeric > "Expr_SUBQUERY".csq_c1) + -> Seq Scan on sublink_outer_table t (cost=0.00..321.00 rows=28700 width=8) + -> Hash (cost=429.61..429.61 rows=333 width=40) + -> Subquery Scan on "Expr_SUBQUERY" (cost=423.50..429.61 rows=333 width=40) + -> HashAggregate (cost=423.50..428.50 rows=333 width=40) + Group Key: r.y + -> Seq Scan on sublink_inner_table r (cost=0.00..293.67 rows=25967 width=12) + Optimizer: Postgres query optimizer +(11 rows) + +set enable_hashagg to off; +explain select t.* from sublink_outer_table t join (select y ,10*avg(x) s from sublink_inner_table group by y) RR on RR.y = t.b and t.a > rr.s; + QUERY PLAN +-------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=2401.51..3276.28 rows=28700 width=8) + -> Hash Join (cost=2401.51..2893.62 rows=9567 width=8) + Hash Cond: (t.b = sublink_inner_table.y) + Join Filter: ((t.a)::numeric > (('10'::numeric * avg(sublink_inner_table.x)))) + -> Seq Scan on sublink_outer_table t (cost=0.00..321.00 rows=28700 width=8) + -> Hash (cost=2397.34..2397.34 rows=333 width=40) + -> GroupAggregate (cost=2197.59..2397.34 rows=333 width=40) + Group Key: sublink_inner_table.y + -> Sort (cost=2197.59..2262.51 rows=25967 width=12) + Sort Key: sublink_inner_table.y + -> Seq Scan on sublink_inner_table (cost=0.00..293.67 rows=25967 width=12) + Optimizer: Postgres query optimizer +(12 rows) + +explain select * from sublink_outer_table T where a > (select 10*avg(x) from sublink_inner_table R where T.b=R.y); + QUERY PLAN +---------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=2402.62..3277.39 rows=28700 width=8) + -> Hash Join (cost=2402.62..2894.73 rows=9567 width=8) + Hash Cond: (t.b = "Expr_SUBQUERY".csq_c0) + Join Filter: ((t.a)::numeric > "Expr_SUBQUERY".csq_c1) + -> Seq Scan on sublink_outer_table t (cost=0.00..321.00 rows=28700 width=8) + -> Hash (cost=2398.45..2398.45 rows=333 width=40) + -> Subquery Scan on "Expr_SUBQUERY" (cost=2197.59..2398.45 rows=333 width=40) + -> GroupAggregate (cost=2197.59..2397.34 rows=333 width=40) + Group Key: r.y + -> Sort (cost=2197.59..2262.51 rows=25967 width=12) + Sort Key: r.y + -> Seq Scan on sublink_inner_table r (cost=0.00..293.67 rows=25967 width=12) + Optimizer: Postgres query optimizer +(13 rows) + +drop table sublink_outer_table; +drop table sublink_inner_table; +reset optimizer; +reset enable_hashagg; +-- Ensure sub-queries with order by outer reference can be decorrelated and executed correctly. +create table r(a int, b int, c int) distributed by (a); +create table s(a int, b int, c int) distributed by (a); +insert into r values (1,2,3); +insert into s values (1,2,10); +explain (costs off) select * from r where b in (select b from s where c=10 order by r.c); + QUERY PLAN +--------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Semi Join + Hash Cond: (r.b = s.b) + -> Seq Scan on r + -> Hash + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Seq Scan on s + Filter: (c = 10) + Optimizer: GPORCA +(9 rows) + +select * from r where b in (select b from s where c=10 order by r.c); + a | b | c +---+---+--- + 1 | 2 | 3 +(1 row) + +explain (costs off) select * from r where b in (select b from s where c=10 order by r.c limit 2); + QUERY PLAN +------------------------------------------------------------ + Hash Semi Join + Hash Cond: (r.b = s.b) + -> Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on r + -> Hash + -> Limit + -> Gather Motion 3:1 (slice2; segments: 3) + -> Seq Scan on s + Filter: (c = 10) + Optimizer: GPORCA +(10 rows) + +select * from r where b in (select b from s where c=10 order by r.c limit 2); + a | b | c +---+---+--- + 1 | 2 | 3 +(1 row) + +explain (costs off) select * from r where b in (select b from s where c=10 order by r.c, b); + QUERY PLAN +--------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Semi Join + Hash Cond: (r.b = s.b) + -> Seq Scan on r + -> Hash + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Seq Scan on s + Filter: (c = 10) + Optimizer: GPORCA +(9 rows) + +select * from r where b in (select b from s where c=10 order by r.c, b); + a | b | c +---+---+--- + 1 | 2 | 3 +(1 row) + +explain (costs off) select * from r where b in (select b from s where c=10 order by r.c, b limit 2); + QUERY PLAN +------------------------------------------------------------ + Hash Semi Join + Hash Cond: (r.b = s.b) + -> Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on r + -> Hash + -> Limit + -> Gather Motion 3:1 (slice2; segments: 3) + Merge Key: s.b + -> Sort + Sort Key: s.b + -> Seq Scan on s + Filter: (c = 10) + Optimizer: GPORCA +(13 rows) + +select * from r where b in (select b from s where c=10 order by r.c, b limit 2); + a | b | c +---+---+--- + 1 | 2 | 3 +(1 row) + +explain (costs off) select * from r where b in (select b from s where c=10 order by c); + QUERY PLAN +--------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Semi Join + Hash Cond: (r.b = s.b) + -> Seq Scan on r + -> Hash + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Seq Scan on s + Filter: (c = 10) + Optimizer: GPORCA +(9 rows) + +select * from r where b in (select b from s where c=10 order by c); + a | b | c +---+---+--- + 1 | 2 | 3 +(1 row) + +explain (costs off) select * from r where b in (select b from s where c=10 order by c limit 2); + QUERY PLAN +------------------------------------------------------------ + Hash Semi Join + Hash Cond: (r.b = s.b) + -> Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on r + -> Hash + -> Limit + -> Gather Motion 3:1 (slice2; segments: 3) + Merge Key: s.c + -> Sort + Sort Key: s.c + -> Seq Scan on s + Filter: (c = 10) + Optimizer: GPORCA +(13 rows) + +select * from r where b in (select b from s where c=10 order by c limit 2); + a | b | c +---+---+--- + 1 | 2 | 3 +(1 row) + +-- Test nested query with aggregate inside a sublink, +-- ORCA should correctly normalize the aggregate expression inside the +-- sublink's nested query and the column variable accessed in aggregate should +-- be accessible to the aggregate after the normalization of query. +-- If the query is not supported, ORCA should gracefully fallback to postgres +explain (COSTS OFF) with t0 AS ( + SELECT + ROW_TO_JSON((SELECT x FROM (SELECT max(t.b)) x)) + AS c + FROM r + JOIN s ON true + JOIN s as t ON true + ) +SELECT c FROM t0; + QUERY PLAN +--------------------------------------------------------------------------------------- + Finalize Aggregate + -> Gather Motion 3:1 (slice1; segments: 3) + -> Partial Aggregate + -> Nested Loop + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Nested Loop + -> Seq Scan on r + -> Materialize + -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Seq Scan on s + -> Materialize + -> Seq Scan on s t + SubPlan 1 + -> Result + Optimizer: Postgres query optimizer +(15 rows) + +-- Test push predicate into subquery +-- more details could be found at https://github.com/greenplum-db/gpdb/issues/8429 +CREATE TABLE foo_predicate_pushdown (a int, b int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +CREATE TABLE bar_predicate_pushdown (c int, d int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +explain (costs off) select * from ( + select distinct (select bar.c from bar_predicate_pushdown bar where c = foo.b) as ss from foo_predicate_pushdown foo +) ABC where ABC.ss = 5; + QUERY PLAN +------------------------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> GroupAggregate + Group Key: ((SubPlan 1)) + -> Sort + Sort Key: ((SubPlan 1)) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: ((SubPlan 1)) + -> Result + Filter: (((SubPlan 1)) = 5) + -> Seq Scan on foo_predicate_pushdown foo + SubPlan 1 + -> Result + Filter: (bar.c = foo.b) + -> Materialize + -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Seq Scan on bar_predicate_pushdown bar + Optimizer: GPORCA +(17 rows) + +DROP TABLE foo_predicate_pushdown; +DROP TABLE bar_predicate_pushdown; +-- +-- Test case for ORCA semi join with random table +-- See https://github.com/greenplum-db/gpdb/issues/16611 +-- +--- case for random distribute +create table table_left (l1 int, l2 int) distributed by (l1); +create table table_right (r1 int, r2 int) distributed randomly; +create index table_right_idx on table_right(r1); +insert into table_left values (1,1); +insert into table_right select i, i from generate_series(1, 300) i; +insert into table_right select 1, 1 from generate_series(1, 100) i; +--- make sure the same value (1,1) rows are inserted into different segments +select count(distinct gp_segment_id) > 1 from table_right where r1 = 1; + ?column? +---------- + t +(1 row) + +analyze table_left; +analyze table_right; +-- two types of semi join tests +explain (costs off) select * from table_left where exists (select 1 from table_right where l1 = r1); + QUERY PLAN +------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Semi Join + Hash Cond: (table_left.l1 = table_right.r1) + -> Seq Scan on table_left + Filter: (NOT (l1 IS NULL)) + -> Hash + -> Result + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: table_right.r1 + -> Seq Scan on table_right + Optimizer: GPORCA +(11 rows) + +select * from table_left where exists (select 1 from table_right where l1 = r1); + l1 | l2 +----+---- + 1 | 1 +(1 row) + +explain (costs off) select * from table_left where l1 in (select r1 from table_right); + QUERY PLAN +------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Semi Join + Hash Cond: (table_left.l1 = table_right.r1) + -> Seq Scan on table_left + -> Hash + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: table_right.r1 + -> Seq Scan on table_right + Optimizer: GPORCA +(9 rows) + +select * from table_left where exists (select 1 from table_right where l1 = r1); + l1 | l2 +----+---- + 1 | 1 +(1 row) + +--- case for replicate distribute +alter table table_right set distributed replicated; +explain (costs off) select * from table_left where exists (select 1 from table_right where l1 = r1); + QUERY PLAN +------------------------------------------------------------------------ + Gather Motion 1:1 (slice1; segments: 1) + -> Nested Loop + Join Filter: true + -> Broadcast Motion 3:1 (slice2; segments: 3) + -> Seq Scan on table_left + Filter: (NOT (l1 IS NULL)) + -> GroupAggregate + Group Key: table_right.r1 + -> Index Only Scan using table_right_idx on table_right + Index Cond: (r1 = table_left.l1) + Optimizer: GPORCA +(11 rows) + +select * from table_left where exists (select 1 from table_right where l1 = r1); + l1 | l2 +----+---- + 1 | 1 +(1 row) + +explain (costs off) select * from table_left where l1 in (select r1 from table_right); + QUERY PLAN +------------------------------------------------------------------------ + Gather Motion 1:1 (slice1; segments: 1) + -> Nested Loop + Join Filter: true + -> Broadcast Motion 3:1 (slice2; segments: 3) + -> Seq Scan on table_left + -> GroupAggregate + Group Key: table_right.r1 + -> Index Only Scan using table_right_idx on table_right + Index Cond: (r1 = table_left.l1) + Optimizer: GPORCA +(10 rows) + +select * from table_left where exists (select 1 from table_right where l1 = r1); + l1 | l2 +----+---- + 1 | 1 +(1 row) + +--- case for partition table with random distribute +drop table table_right; +create table table_right (r1 int, r2 int) distributed randomly partition by range (r1) ( start (0) end (300) every (100)); +create index table_right_idx on table_right(r1); +insert into table_right select i, i from generate_series(1, 299) i; +insert into table_right select 1, 1 from generate_series(1, 100) i; +analyze table_right; +explain (costs off) select * from table_left where exists (select 1 from table_right where l1 = r1); + QUERY PLAN +---------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Semi Join + Hash Cond: (table_left.l1 = table_right.r1) + -> Seq Scan on table_left + Filter: (NOT (l1 IS NULL)) + -> Hash + -> Result + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: table_right.r1 + -> Dynamic Seq Scan on table_right + Number of partitions to scan: 3 (out of 3) + Optimizer: GPORCA +(12 rows) + +select * from table_left where exists (select 1 from table_right where l1 = r1); + l1 | l2 +----+---- + 1 | 1 +(1 row) + +explain (costs off) select * from table_left where l1 in (select r1 from table_right); + QUERY PLAN +---------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Semi Join + Hash Cond: (table_left.l1 = table_right.r1) + -> Seq Scan on table_left + -> Hash + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: table_right.r1 + -> Dynamic Seq Scan on table_right + Number of partitions to scan: 3 (out of 3) + Optimizer: GPORCA +(10 rows) + +select * from table_left where exists (select 1 from table_right where l1 = r1); + l1 | l2 +----+---- + 1 | 1 +(1 row) + +-- clean up +drop table table_left; +drop table table_right; +-- test cross params of initplan +-- https://github.com/greenplum-db/gpdb/issues/16268 +create table tmp (a varchar, b varchar, c varchar); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +select (SELECT EXISTS + (SELECT + FROM pg_views + WHERE schemaname = a)) from tmp; + exists +-------- +(0 rows) + +drop table tmp; +-- Test LEAST() and GREATEST() with an embedded subquery +drop table if exists foo; +create table foo (a int, b int) distributed by(a); +insert into foo values (1, 2), (2, 3), (3, 4); +analyze foo; +explain (costs off) select foo.a from foo where foo.a <= LEAST(foo.b, (SELECT 1), NULL); + QUERY PLAN +------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on foo + Filter: (a <= LEAST(b, (SubPlan 1), NULL::integer)) + SubPlan 1 + -> Result + Optimizer: GPORCA +(6 rows) + +select foo.a from foo where foo.a <= LEAST(foo.b, (SELECT 1), NULL); + a +--- + 1 +(1 row) + +explain (costs off) select foo.a from foo where foo.a <= GREATEST(foo.b, (SELECT 1), NULL); + QUERY PLAN +---------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on foo + Filter: (a <= GREATEST(b, (SubPlan 1), NULL::integer)) + SubPlan 1 + -> Result + Optimizer: GPORCA +(6 rows) + +select foo.a from foo where foo.a <= GREATEST(foo.b, (SELECT 1), NULL); + a +--- + 1 + 2 + 3 +(3 rows) + +explain (costs off) select least((select 5), greatest(b, NULL, (select 1)), a) from foo; + QUERY PLAN +------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Nested Loop Left Join + Join Filter: true + -> Nested Loop Left Join + Join Filter: true + -> Seq Scan on foo + -> Materialize + -> Result + -> Materialize + -> Result + Optimizer: GPORCA +(11 rows) + +select least((select 5), greatest(b, NULL, (select 1)), a) from foo; + least +------- + 1 + 2 + 3 +(3 rows) + +drop table foo; +-- Test subquery within ScalarArrayRef or ScalarArrayRefIndexList +drop table if exists bar; +create table bar (a int[], b int[][]) distributed by(a); +insert into bar values (ARRAY[1, 2, 3], ARRAY[[1, 2, 3], [4, 5, 6]]); +analyze bar; +explain (costs off) select (select a from bar)[1] from bar; + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Nested Loop Left Join + Join Filter: true + -> Seq Scan on bar bar_1 + -> Assert + Assert Cond: ((row_number() OVER (?)) = 1) + -> Materialize + -> Broadcast Motion 1:3 (slice2) + -> WindowAgg + -> Gather Motion 3:1 (slice3; segments: 3) + -> Seq Scan on bar + Optimizer: GPORCA +(12 rows) + +select (select a from bar)[1] from bar; + a +--- + 1 +(1 row) + +explain (costs off) select (select a from bar)[(select 1)] from bar; + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Nested Loop Left Join + Join Filter: true + -> Nested Loop Left Join + Join Filter: true + -> Seq Scan on bar bar_1 + -> Materialize + -> Result + -> Assert + Assert Cond: ((row_number() OVER (?)) = 1) + -> Materialize + -> Broadcast Motion 1:3 (slice2) + -> WindowAgg + -> Gather Motion 3:1 (slice3; segments: 3) + -> Seq Scan on bar + Optimizer: GPORCA +(16 rows) + +select (select a from bar)[(select 1)] from bar; + a +--- + 1 +(1 row) + +explain (costs off) select (select b from bar)[1][1:3] from bar; + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Nested Loop Left Join + Join Filter: true + -> Seq Scan on bar bar_1 + -> Assert + Assert Cond: ((row_number() OVER (?)) = 1) + -> Materialize + -> Broadcast Motion 1:3 (slice2) + -> WindowAgg + -> Gather Motion 3:1 (slice3; segments: 3) + -> Seq Scan on bar + Optimizer: GPORCA +(12 rows) + +select (select b from bar)[1][1:3] from bar; + b +----------- + {{1,2,3}} +(1 row) + +explain (costs off) select (select b from bar)[(select 1)][1:3] from bar; + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Nested Loop Left Join + Join Filter: true + -> Nested Loop Left Join + Join Filter: true + -> Seq Scan on bar bar_1 + -> Materialize + -> Result + -> Assert + Assert Cond: ((row_number() OVER (?)) = 1) + -> Materialize + -> Broadcast Motion 1:3 (slice2) + -> WindowAgg + -> Gather Motion 3:1 (slice3; segments: 3) + -> Seq Scan on bar + Optimizer: GPORCA +(16 rows) + +select (select b from bar)[(select 1)][1:3] from bar; + b +----------- + {{1,2,3}} +(1 row) + +drop table bar; +create table outer_foo(a int primary key, b int); +create table inner_bar(a int, b int) distributed randomly; +insert into outer_foo values (generate_series(1,20), generate_series(11,30)); +insert into inner_bar values (generate_series(1,20), generate_series(25,44)); +set optimizer to off; +explain (costs off) select t1.a from outer_foo t1, LATERAL(SELECT distinct t2.a from inner_bar t2 where t1.b=t2.b) q order by 1; + QUERY PLAN +------------------------------------------------------------------------ + Nested Loop + -> Gather Motion 3:1 (slice1; segments: 3) + Merge Key: t1.a + -> Index Scan using outer_foo_pkey on outer_foo t1 + -> Materialize + -> HashAggregate + Group Key: t2.a + -> Result + Filter: (t1.b = t2.b) + -> Materialize + -> Gather Motion 3:1 (slice2; segments: 3) + -> Seq Scan on inner_bar t2 + Optimizer: Postgres query optimizer +(13 rows) + +explain (costs off) select t1.a from outer_foo t1, LATERAL(SELECT distinct t2.a from inner_bar t2 where t1.b=t2.b) q; + QUERY PLAN +------------------------------------------------------------------------ + Nested Loop + -> Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on outer_foo t1 + -> Materialize + -> HashAggregate + Group Key: t2.a + -> Result + Filter: (t1.b = t2.b) + -> Materialize + -> Gather Motion 3:1 (slice2; segments: 3) + -> Seq Scan on inner_bar t2 + Optimizer: Postgres query optimizer +(12 rows) + +select t1.a from outer_foo t1, LATERAL(SELECT distinct t2.a from inner_bar t2 where t1.b=t2.b) q order by 1; + a +---- + 15 + 16 + 17 + 18 + 19 + 20 +(6 rows) + +create table t(a int, b int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +explain (costs off) with cte(x) as (select t1.a from outer_foo t1, LATERAL(SELECT distinct t2.a from inner_bar t2 where t1.b=t2.b) q order by 1) +select * from t where a > (select count(1) from cte where x > t.a + random()); + QUERY PLAN +----------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on t + Filter: (a > (SubPlan 1)) + SubPlan 1 + -> Aggregate + -> Result + Filter: ((t1.a)::double precision > ((t.a)::double precision + random())) + -> Nested Loop + -> Materialize + -> Sort + Sort Key: t1.a + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Index Scan using outer_foo_pkey on outer_foo t1 + -> Materialize + -> HashAggregate + Group Key: t2.a + -> Result + Filter: (t1.b = t2.b) + -> Materialize + -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Seq Scan on inner_bar t2 + Optimizer: Postgres query optimizer +(22 rows) + +with cte(x) as (select t1.a from outer_foo t1, LATERAL(SELECT distinct t2.a from inner_bar t2 where t1.b=t2.b) q order by 1) +select * from t where a > (select count(1) from cte where x > t.a + random()); + a | b +---+--- +(0 rows) + +reset optimizer; +drop table outer_foo; +drop table inner_bar; +drop table t; diff --git a/contrib/pax_storage/src/test/regress/expected/subselect_optimizer.out b/contrib/pax_storage/src/test/regress/expected/subselect_optimizer.out index 12035c92509..2feda989103 100644 --- a/contrib/pax_storage/src/test/regress/expected/subselect_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/subselect_optimizer.out @@ -30,6 +30,12 @@ SELECT * FROM ((SELECT 1 AS x)) ss; 1 (1 row) +SELECT * FROM ((SELECT 1 AS x)), ((SELECT * FROM ((SELECT 2 AS y)))); + x | y +---+--- + 1 | 2 +(1 row) + (SELECT 2) UNION SELECT 2; ?column? ---------- @@ -196,6 +202,69 @@ SELECT f1 AS "Correlated Field" 3 (5 rows) +-- Subselects without aliases +SELECT count FROM (SELECT COUNT(DISTINCT name) FROM road); + count +------- + 2911 +(1 row) + +SELECT COUNT(*) FROM (SELECT DISTINCT name FROM road); + count +------- + 2911 +(1 row) + +SELECT * FROM (SELECT * FROM int4_tbl), (VALUES (123456)) WHERE f1 = column1; + f1 | column1 +--------+--------- + 123456 | 123456 +(1 row) + +CREATE VIEW view_unnamed_ss AS +SELECT * FROM (SELECT * FROM (SELECT abs(f1) AS a1 FROM int4_tbl)), + (SELECT * FROM int8_tbl) + WHERE a1 < 10 AND q1 > a1 ORDER BY q1, q2; +SELECT * FROM view_unnamed_ss; + a1 | q1 | q2 +----+------------------+------------------- + 0 | 123 | 456 + 0 | 123 | 4567890123456789 + 0 | 4567890123456789 | 123 + 0 | 4567890123456789 | 4567890123456789 + 0 | 4567890123456789 | -4567890123456789 +(5 rows) + +\sv view_unnamed_ss +CREATE OR REPLACE VIEW public.view_unnamed_ss AS + SELECT unnamed_subquery.a1, + unnamed_subquery_1.q1, + unnamed_subquery_1.q2 + FROM ( SELECT unnamed_subquery_2.a1 + FROM ( SELECT abs(int4_tbl.f1) AS a1 + FROM int4_tbl) unnamed_subquery_2) unnamed_subquery, + ( SELECT int8_tbl.q1, + int8_tbl.q2 + FROM int8_tbl) unnamed_subquery_1 + WHERE unnamed_subquery.a1 < 10 AND unnamed_subquery_1.q1 > unnamed_subquery.a1 + ORDER BY unnamed_subquery_1.q1, unnamed_subquery_1.q2 +DROP VIEW view_unnamed_ss; +-- Test matching of locking clause to correct alias +CREATE VIEW view_unnamed_ss_locking AS +SELECT * FROM (SELECT * FROM int4_tbl), int8_tbl AS unnamed_subquery + WHERE f1 = q1 + FOR UPDATE OF unnamed_subquery; +\sv view_unnamed_ss_locking +CREATE OR REPLACE VIEW public.view_unnamed_ss_locking AS + SELECT unnamed_subquery.f1, + unnamed_subquery_1.q1, + unnamed_subquery_1.q2 + FROM ( SELECT int4_tbl.f1 + FROM int4_tbl) unnamed_subquery, + int8_tbl unnamed_subquery_1 + WHERE unnamed_subquery.f1 = unnamed_subquery_1.q1 + FOR UPDATE OF unnamed_subquery_1 +DROP VIEW view_unnamed_ss_locking; -- -- Use some existing tables in the regression test -- @@ -738,15 +807,21 @@ from -- -- Test case for subselect within UPDATE of INSERT...ON CONFLICT DO UPDATE -- --- pax not support TupleInsertSpeculative --- create temp table upsert(key int4 primary key, val text); --- insert into upsert values(1, 'val') on conflict (key) do update set val = 'not seen'; --- insert into upsert values(1, 'val') on conflict (key) do update set val = 'seen with subselect ' || (select f1 from int4_tbl where f1 != 0 order by f1 limit 1)::text; --- select * from upsert; --- with aa as (select 'int4_tbl' u from int4_tbl limit 1) --- insert into upsert values (1, 'x'), (999, 'y') --- on conflict (key) do update set val = (select u from aa) --- returning *; +create temp table upsert(key int4 primary key, val text); +insert into upsert values(1, 'val') on conflict (key) do update set val = 'not seen'; +ERROR: not implemented yet on pax relations: TupleInsertSpeculative +insert into upsert values(1, 'val') on conflict (key) do update set val = 'seen with subselect ' || (select f1 from int4_tbl where f1 != 0 order by f1 limit 1)::text; +ERROR: not implemented yet on pax relations: TupleInsertSpeculative +select * from upsert; + key | val +-----+----- +(0 rows) + +with aa as (select 'int4_tbl' u from int4_tbl limit 1) +insert into upsert values (1, 'x'), (999, 'y') +on conflict (key) do update set val = (select u from aa) +returning *; +ERROR: not implemented yet on pax relations: TupleInsertSpeculative -- -- Test case for cross-type partial matching in hashed subplan (bug #7597) -- @@ -850,31 +925,86 @@ ERROR: operator does not exist: bigint = text LINE 1: select * from int8_tbl where q1 in (select c1 from inner_tex... ^ HINT: No operator matches the given name and argument types. You might need to add explicit type casts. --- It's a known bug in PAX --- it will use row reader and exec the sub plan with same motion --- begin; --- -- make an operator to allow it to succeed --- create function bogus_int8_text_eq(int8, text) returns boolean --- language sql as 'select $1::text = $2'; --- create operator = (procedure=bogus_int8_text_eq, leftarg=int8, rightarg=text); --- explain (costs off) --- select * from int8_tbl where q1 in (select c1 from inner_text); --- select * from int8_tbl where q1 in (select c1 from inner_text); --- -- inlining of this function results in unusual number of hash clauses, --- -- which we can still cope with --- create or replace function bogus_int8_text_eq(int8, text) returns boolean --- language sql as 'select $1::text = $2 and $1::text = $2'; --- explain (costs off) --- select * from int8_tbl where q1 in (select c1 from inner_text); --- select * from int8_tbl where q1 in (select c1 from inner_text); --- -- inlining of this function causes LHS and RHS to be switched, --- -- which we can't cope with, so hashing should be abandoned --- create or replace function bogus_int8_text_eq(int8, text) returns boolean --- language sql as 'select $2 = $1::text'; --- explain (costs off) --- select * from int8_tbl where q1 in (select c1 from inner_text); --- select * from int8_tbl where q1 in (select c1 from inner_text); --- rollback; -- to get rid of the bogus operator +begin; +-- make an operator to allow it to succeed +create function bogus_int8_text_eq(int8, text) returns boolean +language sql as 'select $1::text = $2'; +create operator = (procedure=bogus_int8_text_eq, leftarg=int8, rightarg=text); +explain (costs off) +select * from int8_tbl where q1 in (select c1 from inner_text); + QUERY PLAN +-------------------------------------------------------- + Result + Filter: (SubPlan 1) + -> Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on int8_tbl + SubPlan 1 + -> Materialize + -> Gather Motion 3:1 (slice2; segments: 3) + -> Seq Scan on inner_text + Optimizer: GPORCA +(9 rows) + +select * from int8_tbl where q1 in (select c1 from inner_text); + q1 | q2 +-----+------------------ + 123 | 456 + 123 | 4567890123456789 +(2 rows) + +-- inlining of this function results in unusual number of hash clauses, +-- which we can still cope with +create or replace function bogus_int8_text_eq(int8, text) returns boolean +language sql as 'select $1::text = $2 and $1::text = $2'; +explain (costs off) +select * from int8_tbl where q1 in (select c1 from inner_text); + QUERY PLAN +-------------------------------------------------------- + Result + Filter: (SubPlan 1) + -> Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on int8_tbl + SubPlan 1 + -> Materialize + -> Gather Motion 3:1 (slice2; segments: 3) + -> Seq Scan on inner_text + Optimizer: GPORCA +(9 rows) + +select * from int8_tbl where q1 in (select c1 from inner_text); + q1 | q2 +-----+------------------ + 123 | 456 + 123 | 4567890123456789 +(2 rows) + +-- inlining of this function causes LHS and RHS to be switched, +-- which we can't cope with, so hashing should be abandoned +create or replace function bogus_int8_text_eq(int8, text) returns boolean +language sql as 'select $2 = $1::text'; +explain (costs off) +select * from int8_tbl where q1 in (select c1 from inner_text); + QUERY PLAN +-------------------------------------------------------- + Result + Filter: (SubPlan 1) + -> Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on int8_tbl + SubPlan 1 + -> Materialize + -> Gather Motion 3:1 (slice2; segments: 3) + -> Seq Scan on inner_text + Optimizer: GPORCA +(9 rows) + +select * from int8_tbl where q1 in (select c1 from inner_text); + q1 | q2 +-----+------------------ + 123 | 456 + 123 | 4567890123456789 +(2 rows) + +rollback; -- to get rid of the bogus operator -- -- Test resolution of hashed vs non-hashed implementation of EXISTS subplan -- @@ -1584,16 +1714,10 @@ set optimizer to off; -- Test that LIMIT can be pushed to SORT through a subquery that just projects -- columns. We check for that having happened by looking to see if EXPLAIN -- ANALYZE shows that a top-N sort was used. We must suppress or filter away --- all the non-invariant parts of the EXPLAIN ANALYZE output. --- --- GPDB_12_MERGE_FIXME: we need to revisit the following test because it is not --- testing what it advertized in the above comment. Specificly, we don't --- execute top-N sort for the planner plan. Orca on the other hand never honors --- ORDER BY in a subquery, as permitted by the SQL spec. Consider rewriting --- the test using a replicated table so that we get the plan stucture like --- this: Limit -> Subquery -> Sort +-- all the non-invariant parts of the EXPLAIN ANALYZE output. Use a replicated +-- table to genarate a plan like: Limit -> Subquery -> Sort -- -create table sq_limit (pk int primary key, c1 int, c2 int); +create table sq_limit (pk int primary key, c1 int, c2 int) distributed replicated; insert into sq_limit values (1, 1, 1), (2, 2, 2), @@ -1619,18 +1743,17 @@ begin end; $$; select * from explain_sq_limit(); - explain_sq_limit ----------------------------------------------------------------------------- - Limit (actual rows=3 loops=1) - -> Gather Motion 3:1 (slice1; segments: 3) (actual rows=3 loops=1) - -> Limit (actual rows=3 loops=1) - -> Subquery Scan on x (actual rows=3 loops=1) - -> Sort (actual rows=3 loops=1) - Sort Key: sq_limit.c1, sq_limit.pk - Sort Method: quicksort Memory: xxx - -> Seq Scan on sq_limit (actual rows=5 loops=1) + explain_sq_limit +---------------------------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) (actual rows=3 loops=1) + -> Limit (actual rows=3 loops=1) + -> Subquery Scan on x (actual rows=3 loops=1) + -> Sort (actual rows=3 loops=1) + Sort Key: sq_limit.c1, sq_limit.pk + Sort Method: top-N heapsort Memory: xxx + -> Seq Scan on sq_limit (actual rows=8 loops=1) Optimizer: Postgres query optimizer -(9 rows) +(8 rows) -- a subpath is sorted under a subqueryscan. however, the subqueryscan is not. -- whether the order of subpath can applied to the subqueryscan is up-to-implement. @@ -1663,6 +1786,159 @@ fetch backward all in c1; ERROR: backward scan is not supported in this version of Apache Cloudberry commit; --end_ignore +-- +-- Verify that we correctly flatten cases involving a subquery output +-- expression that doesn't need to be wrapped in a PlaceHolderVar +-- +explain (costs off) +select tname, attname from ( +select relname::information_schema.sql_identifier as tname, * from + (select * from pg_class c) ss1) ss2 + right join pg_attribute a on a.attrelid = ss2.oid +where tname = 'tenk1' and attnum = 1; + QUERY PLAN +------------------------------------------------------------------------------ + Hash Join + Hash Cond: (a.attrelid = c.oid) + -> Seq Scan on pg_attribute a + Filter: (attnum = 1) + -> Hash + -> Index Scan using pg_class_relname_nsp_index on pg_class c + Index Cond: (relname = 'tenk1'::name) +(8 rows) + +select tname, attname from ( +select relname::information_schema.sql_identifier as tname, * from + (select * from pg_class c) ss1) ss2 + right join pg_attribute a on a.attrelid = ss2.oid +where tname = 'tenk1' and attnum = 1; + tname | attname +-------+--------- + tenk1 | unique1 +(1 row) + +-- Check behavior when there's a lateral reference in the output expression +explain (verbose, costs off) +select t1.ten, sum(x) from + tenk1 t1 left join lateral ( + select t1.ten + t2.ten as x, t2.fivethous from tenk1 t2 + ) ss on t1.unique1 = ss.fivethous +group by t1.ten +order by t1.ten; + QUERY PLAN +---------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: t1.ten, (sum((t1.ten + t2.ten))) + Merge Key: t1.ten + -> Sort + Output: t1.ten, (sum((t1.ten + t2.ten))) + Sort Key: t1.ten + -> Finalize HashAggregate + Output: t1.ten, sum((t1.ten + t2.ten)) + Group Key: t1.ten + -> Redistribute Motion 3:3 (slice2; segments: 3) + Output: t1.ten, (PARTIAL sum((t1.ten + t2.ten))) + Hash Key: t1.ten + -> Partial HashAggregate + Output: t1.ten, PARTIAL sum((t1.ten + t2.ten)) + Group Key: t1.ten + -> Hash Right Join + Output: t1.ten, t2.ten + Hash Cond: (t2.fivethous = t1.unique1) + -> Redistribute Motion 3:3 (slice3; segments: 3) + Output: t2.ten, t2.fivethous + Hash Key: t2.fivethous + -> Seq Scan on public.tenk1 t2 + Output: t2.ten, t2.fivethous + -> Hash + Output: t1.ten, t1.unique1 + -> Seq Scan on public.tenk1 t1 + Output: t1.ten, t1.unique1 +(29 rows) + +select t1.ten, sum(x) from + tenk1 t1 left join lateral ( + select t1.ten + t2.ten as x, t2.fivethous from tenk1 t2 + ) ss on t1.unique1 = ss.fivethous +group by t1.ten +order by t1.ten; + ten | sum +-----+------- + 0 | 0 + 1 | 2000 + 2 | 4000 + 3 | 6000 + 4 | 8000 + 5 | 10000 + 6 | 12000 + 7 | 14000 + 8 | 16000 + 9 | 18000 +(10 rows) + +explain (verbose, costs off) +select t1.q1, x from + int8_tbl t1 left join + (int8_tbl t2 left join + lateral (select t2.q1+t3.q1 as x, * from int8_tbl t3) t3 on t2.q2 = t3.q2) + on t1.q2 = t2.q2 +order by 1, 2; + QUERY PLAN +-------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: t1.q1, ((t2.q1 + t3.q1)) + Merge Key: t1.q1, ((t2.q1 + t3.q1)) + -> Sort + Output: t1.q1, ((t2.q1 + t3.q1)) + Sort Key: t1.q1, ((t2.q1 + t3.q1)) + -> Hash Right Join + Output: t1.q1, (t2.q1 + t3.q1) + Hash Cond: (t2.q2 = t1.q2) + -> Hash Left Join + Output: t2.q1, t2.q2, t3.q1 + Hash Cond: (t2.q2 = t3.q2) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Output: t2.q1, t2.q2 + Hash Key: t2.q2 + -> Seq Scan on public.int8_tbl t2 + Output: t2.q1, t2.q2 + -> Hash + Output: t3.q1, t3.q2 + -> Redistribute Motion 3:3 (slice3; segments: 3) + Output: t3.q1, t3.q2 + Hash Key: t3.q2 + -> Seq Scan on public.int8_tbl t3 + Output: t3.q1, t3.q2 + -> Hash + Output: t1.q1, t1.q2 + -> Redistribute Motion 3:3 (slice4; segments: 3) + Output: t1.q1, t1.q2 + Hash Key: t1.q2 + -> Seq Scan on public.int8_tbl t1 + Output: t1.q1, t1.q2 +(33 rows) + +select t1.q1, x from + int8_tbl t1 left join + (int8_tbl t2 left join + lateral (select t2.q1+t3.q1 as x, * from int8_tbl t3) t3 on t2.q2 = t3.q2) + on t1.q2 = t2.q2 +order by 1, 2; + q1 | x +------------------+------------------ + 123 | 246 + 123 | 246 + 123 | 4567890123456912 + 123 | 4567890123456912 + 123 | 9135780246913578 + 4567890123456789 | 246 + 4567890123456789 | 4567890123456912 + 4567890123456789 | 4567890123456912 + 4567890123456789 | 9135780246913578 + 4567890123456789 | 9135780246913578 + 4567890123456789 | 9135780246913578 +(11 rows) + -- -- Tests for CTE inlining behavior -- @@ -1713,10 +1989,8 @@ select * from x where f1 = 1; (7 rows) -- Volatile functions prevent inlining --- GPDB_12_MERGE_FIXME: inlining happens on GPDB: But the plan seems OK --- nevertheless. Is the GPDB planner smart, and notices that this is --- ok to inline, or is it doing something that would be unsafe in more --- complicated queries? Investigte +-- Prevent inlining happens on GPDB, inlining may cause wrong results. +-- For example, nextval() function. explain (verbose, costs off) with x as (select * from (select f1, random() from subselect_tbl) ss) select * from x where f1 = 1; @@ -1738,8 +2012,35 @@ select * from x where f1 = 1; Output: share0_ref2.f1, share0_ref2.random Optimizer: Pivotal Optimizer (GPORCA) Settings: gp_cte_sharing=on, optimizer=on -(18 rows) +(16 rows) + +create temporary sequence ts; +create table vol_test(a int, b int); +explain (verbose, costs off) +with x as (select * from (select a, nextval('ts') from vol_test) ss) +select * from x where a = 1; + QUERY PLAN +----------------------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) + Output: share0_ref2.a, share0_ref2.nextval + -> Sequence + Output: share0_ref2.a, share0_ref2.nextval + -> Shared Scan (share slice:id 1:0) + Output: share0_ref1.a, share0_ref1.nextval + -> Seq Scan on public.vol_test + Output: vol_test.a, nextval('ts'::regclass) + Filter: (vol_test.a = 1) + -> Result + Output: share0_ref2.a, share0_ref2.nextval + Filter: (share0_ref2.a = 1) + -> Shared Scan (share slice:id 1:0) + Output: share0_ref2.a, share0_ref2.nextval + Optimizer: Pivotal Optimizer (GPORCA) + Settings: gp_cte_sharing=on, optimizer=on +(16 rows) +drop sequence ts; +drop table vol_test; -- SELECT FOR UPDATE cannot be inlined -- GPDB: select statement with locking clause is not easy to fully supported -- in greenplum. The following case even with GDD enabled greenplum will still @@ -1815,11 +2116,6 @@ select * from x, x x2 where x.n = x2.n; (19 rows) -- Multiply-referenced CTEs can't be inlined if they contain outer self-refs --- start_ignore --- GPDB_12_MERGE_FIXME: This currenty produces incorrect results on GPDB. --- It's not a new issue, but it was exposed by this new upstream test case --- with the PostgreSQL v12 merge. --- See https://github.com/greenplum-db/gpdb/issues/10014 explain (verbose, costs off) with recursive x(a) as ((values ('a'), ('b')) @@ -1838,13 +2134,11 @@ select * from x; Join Filter: (length((x.a || x_1.a)) < 5) -> WorkTable Scan on x Output: x.a - -> Materialize + -> WorkTable Scan on x x_1 Output: x_1.a - -> WorkTable Scan on x x_1 - Output: x_1.a + Settings: optimizer = 'on', gp_cte_sharing = 'on' Optimizer: Postgres query optimizer - Settings: gp_cte_sharing=on, optimizer=on -(14 rows) +(12 rows) with recursive x(a) as ((values ('a'), ('b')) @@ -1853,13 +2147,32 @@ with recursive x(a) as select z.a || z1.a as a from z cross join z as z1 where length(z.a || z1.a) < 5)) select * from x; - a ---- + a +------ a + aa + aaaa + aaab + aaba + aabb + ab + abaa + abab + abba + abbb b -(2 rows) + ba + baaa + baab + baba + babb + bb + bbaa + bbab + bbba + bbbb +(22 rows) --- end_ignore explain (verbose, costs off) with recursive x(a) as ((values ('a'), ('b')) @@ -1954,3 +2267,134 @@ select * from x for update; (6 rows) set gp_cte_sharing to off; +-- Ensure that both planners produce valid plans for the query with the nested +-- SubLink, which contains attributes referenced in query's GROUP BY clause. +-- Due to presence of non-grouping columns in targetList, ORCA performs query +-- normalization, during which ORCA establishes a correspondence between vars +-- from targetlist entries to grouping attributes. And this process should +-- correctly handle nested structures. The inner part of SubPlan in the test +-- should contain only t.j. +-- start_ignore +drop table if exists t; +NOTICE: table "t" does not exist, skipping +-- end_ignore +create table t (i int, j int) distributed by (i); +insert into t values (1, 2); +explain (verbose, costs off) +select j, +(select j from (select j) q2) +from t +group by i, j; + QUERY PLAN +---------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: j, ((SubPlan 1)) + -> GroupAggregate + Output: j, (SubPlan 1) + Group Key: t.i, t.j + -> Sort + Output: i, j + Sort Key: t.i, t.j + -> Seq Scan on public.t + Output: i, j + SubPlan 1 + -> Result + Output: t.j + -> Result + Output: true + Settings: optimizer = 'on', gp_cte_sharing = 'off' + Optimizer: GPORCA +(17 rows) + +select j, +(select j from (select j) q2) +from t +group by i, j; + j | j +---+--- + 2 | 2 +(1 row) + +-- Ensure that both planners produce valid plans for the query with the nested +-- SubLink when this SubLink is inside the GROUP BY clause. Attribute, which is +-- not grouping column (1 as c), is added to query targetList to make ORCA +-- perform query normalization. During normalization ORCA modifies the vars of +-- the grouping elements of targetList in order to produce a new Query tree. +-- The modification of vars inside nested part of SubLinks should be handled +-- correctly. ORCA shouldn't fall back due to missing variable entry as a result +-- of incorrect query normalization. +explain (verbose, costs off) +select j, 1 as c, +(select j from (select j) q2) q1 +from t +group by j, q1; + QUERY PLAN +------------------------------------------------------------------------ + Result + Output: j, 1, ((SubPlan 1)) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: j, ((SubPlan 1)) + -> GroupAggregate + Output: j, ((SubPlan 1)) + Group Key: t.j, ((SubPlan 1)) + -> Sort + Output: j, ((SubPlan 1)) + Sort Key: t.j, ((SubPlan 1)) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Output: j, ((SubPlan 1)) + Hash Key: j, ((SubPlan 1)) + -> Seq Scan on public.t + Output: j, (SubPlan 1) + SubPlan 1 + -> Result + Output: t.j + -> Result + Output: true + Settings: optimizer = 'on', gp_cte_sharing = 'off' + Optimizer: GPORCA +(22 rows) + +select j, 1 as c, +(select j from (select j) q2) q1 +from t +group by j, q1; + j | c | q1 +---+---+---- + 2 | 1 | 2 +(1 row) + +-- Ensure that both planners produce valid plans for the query with the nested +-- SubLink, and this SubLink is under aggregation. ORCA shouldn't fall back due +-- to missing variable entry as a result of incorrect query normalization. ORCA +-- should correctly process args of the aggregation during normalization. +explain (verbose, costs off) +select (select max((select t.i))) from t; + QUERY PLAN +---------------------------------------------------- + Finalize Aggregate + Output: (SubPlan 2) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: (PARTIAL max((SubPlan 1))) + -> Partial Aggregate + Output: PARTIAL max((SubPlan 1)) + -> Seq Scan on public.t + Output: i + SubPlan 1 + -> Result + Output: t.i + SubPlan 2 + -> Result + Output: max((SubPlan 1)) + -> Result + Output: true + Settings: optimizer = 'on', gp_cte_sharing = 'off' + Optimizer: GPORCA +(18 rows) + +select (select max((select t.i))) from t; + max +----- + 1 +(1 row) + +drop table t; diff --git a/contrib/pax_storage/src/test/regress/expected/table_functions_optimizer.out b/contrib/pax_storage/src/test/regress/expected/table_functions_optimizer.out index e0e5d25fdce..e13d3f4e7db 100644 --- a/contrib/pax_storage/src/test/regress/expected/table_functions_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/table_functions_optimizer.out @@ -40,37 +40,39 @@ CREATE FUNCTION scalar_tf_5(IN a int) RETURNS SETOF RECORD CREATE FUNCTION scalar_tf_6(IN a anyelement) RETURNS SETOF example AS $$ SELECT a+$1, b from example $$ LANGUAGE SQL READS SQL DATA; /* CREATE some multiset input table functions */ +\getenv abs_builddir PG_ABS_BUILDDIR +\set regress_dll :abs_builddir '/regress.so' /* scalar value outputs */ CREATE FUNCTION multiset_scalar_null(anytable) RETURNS int - AS '/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/regress.so', 'multiset_scalar_null' LANGUAGE C READS SQL DATA; + AS :'regress_dll', 'multiset_scalar_null' LANGUAGE C READS SQL DATA; CREATE FUNCTION multiset_scalar_value(anytable) RETURNS int - AS '/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/regress.so', 'multiset_scalar_value' LANGUAGE C READS SQL DATA; + AS :'regress_dll', 'multiset_scalar_value' LANGUAGE C READS SQL DATA; CREATE FUNCTION multiset_scalar_tuple(anytable) RETURNS example - AS '/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/regress.so', 'multiset_scalar_tuple' LANGUAGE C READS SQL DATA; + AS :'regress_dll', 'multiset_scalar_tuple' LANGUAGE C READS SQL DATA; /* set value outputs */ CREATE FUNCTION multiset_setof_null(anytable) RETURNS setof int - AS '/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/regress.so', 'multiset_setof_null' LANGUAGE C READS SQL DATA; + AS :'regress_dll', 'multiset_setof_null' LANGUAGE C READS SQL DATA; CREATE FUNCTION multiset_setof_value(anytable) RETURNS setof int - AS '/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/regress.so', 'multiset_setof_value' LANGUAGE C READS SQL DATA; + AS :'regress_dll', 'multiset_setof_value' LANGUAGE C READS SQL DATA; /* Bunches of different ways of saying "returns a setof rows */ CREATE FUNCTION multiset_materialize_good(anytable) RETURNS TABLE(a int, b text) - AS '/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/regress.so', 'multiset_materialize_good' LANGUAGE C READS SQL DATA; + AS :'regress_dll', 'multiset_materialize_good' LANGUAGE C READS SQL DATA; CREATE FUNCTION multiset_materialize_bad(anytable) RETURNS TABLE(a int, b text) - AS '/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/regress.so', 'multiset_materialize_bad' LANGUAGE C READS SQL DATA; + AS :'regress_dll', 'multiset_materialize_bad' LANGUAGE C READS SQL DATA; CREATE FUNCTION multiset_1(a anytable) RETURNS TABLE(a int, b text) - AS '/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/regress.so', 'multiset_example' LANGUAGE C READS SQL DATA; + AS :'regress_dll', 'multiset_example' LANGUAGE C READS SQL DATA; CREATE FUNCTION multiset_2(a anytable) RETURNS TABLE(a int, b text) - AS '/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/regress.so', 'multiset_example' LANGUAGE C READS SQL DATA; + AS :'regress_dll', 'multiset_example' LANGUAGE C READS SQL DATA; CREATE FUNCTION multiset_3(a anytable, out a int, out b text) RETURNS SETOF RECORD - AS '/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/regress.so', 'multiset_example' LANGUAGE C READS SQL DATA; + AS :'regress_dll', 'multiset_example' LANGUAGE C READS SQL DATA; CREATE FUNCTION multiset_4(a anytable) RETURNS SETOF RECORD - AS '/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/regress.so', 'multiset_example' LANGUAGE C READS SQL DATA; + AS :'regress_dll', 'multiset_example' LANGUAGE C READS SQL DATA; CREATE FUNCTION multiset_5(a anytable) RETURNS SETOF example - AS '/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/regress.so', 'multiset_example' LANGUAGE C READS SQL DATA; + AS :'regress_dll', 'multiset_example' LANGUAGE C READS SQL DATA; CREATE FUNCTION multiset_6(a anytable) RETURNS SETOF record - AS '/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/regress.so', 'multiset_example' LANGUAGE C READS SQL DATA; + AS :'regress_dll', 'multiset_example' LANGUAGE C READS SQL DATA; /* Negative test cases around CREATE FUNCTION */ /* ERROR: TABLE output not allowed with OUT parameters */ CREATE FUNCTION error(OUT a int) RETURNS TABLE(a int, b text) @@ -102,7 +104,7 @@ CREATE FUNCTION error() RETURNS TABLE(a setof example) ERROR: functions cannot accept set arguments /* ERROR: anytable cannot have default value */ CREATE FUNCTION error(a anytable DEFAULT TABLE(select 1,'test')) RETURNS TABLE(a int, b text) - AS '/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/regress.so', 'multiset_example' LANGUAGE C; + AS :'regress_dll', 'multiset_example' LANGUAGE C; ERROR: anytable parameter cannot have default value /* Negative test cases around the "anytable" type */ CREATE TABLE fail(x anytable); @@ -1366,7 +1368,7 @@ LINE 1: SELECT * from nameres(TABLE(SELECT 5)); ^ HINT: No function matches the given name and argument types. You might need to add explicit type casts. CREATE FUNCTION nameres(anytable) RETURNS int - AS '/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/regress.so', 'multiset_scalar_value' LANGUAGE C READS SQL DATA; + AS :'regress_dll', 'multiset_scalar_value' LANGUAGE C READS SQL DATA; SELECT * from nameres(5); -- should work nameres --------- @@ -2247,8 +2249,8 @@ SELECT * FROM v1 order by a, b; a | integer | | | | plain | b | text | | | | extended | View definition: - SELECT tf.a, - tf.b + SELECT a, + b FROM multiset_2(TABLE( SELECT example.a, example.b FROM example)) tf; @@ -2256,8 +2258,8 @@ View definition: SELECT pg_get_viewdef('v1'::regclass); pg_get_viewdef --------------------------------------------- - SELECT tf.a, + - tf.b + + SELECT a, + + b + FROM multiset_2(TABLE( SELECT example.a,+ example.b + FROM example)) tf; @@ -2287,8 +2289,8 @@ SELECT * FROM v2 order by a, b; a | integer | | | | plain | b | text | | | | extended | View definition: - SELECT tf.a, - tf.b + SELECT a, + b FROM multiset_2(TABLE( SELECT example.a, example.b FROM example @@ -2297,8 +2299,8 @@ View definition: SELECT pg_get_viewdef('v2'::regclass); pg_get_viewdef -------------------------------------------------------- - SELECT tf.a, + - tf.b + + SELECT a, + + b + FROM multiset_2(TABLE( SELECT example.a, + example.b + FROM example + @@ -2329,8 +2331,8 @@ SELECT * FROM v3 order by a, b; a | integer | | | | plain | b | text | | | | extended | View definition: - SELECT tf.a, - tf.b + SELECT a, + b FROM multiset_2(TABLE( SELECT example.a, example.b FROM example @@ -2339,8 +2341,8 @@ View definition: SELECT pg_get_viewdef('v3'::regclass); pg_get_viewdef --------------------------------------------- - SELECT tf.a, + - tf.b + + SELECT a, + + b + FROM multiset_2(TABLE( SELECT example.a,+ example.b + FROM example + @@ -2354,7 +2356,7 @@ DROP view v3; -- Interaction with set returning functions -- ======================================== CREATE FUNCTION multi_args(a anytable, x int) RETURNS SETOF example - AS '/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/regress.so', 'multiset_example' LANGUAGE C READS SQL DATA; + AS :'regress_dll', 'multiset_example' LANGUAGE C READS SQL DATA; -- In select list requires some extra setrefs logic in planning SELECT *, generate_series(1,2) FROM multi_args( TABLE(SELECT 1::int, 'hello'::text), 2); a | b | generate_series @@ -2449,11 +2451,11 @@ ERROR: PL/Python functions cannot accept type anytable -- Create a function and a describe method CREATE FUNCTION sessionize_describe(internal) RETURNS internal - AS '/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/regress.so', 'describe' + AS :'regress_dll', 'describe' LANGUAGE C READS SQL DATA; CREATE FUNCTION sessionize(anytable, interval) RETURNS setof record - AS '/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/regress.so', 'sessionize' + AS :'regress_dll', 'sessionize' LANGUAGE C READS SQL DATA; -- No dependency yet, all three queries should return 0 rows SELECT * FROM pg_proc_callback where profnoid = 'sessionize'::regproc; @@ -2476,19 +2478,19 @@ DROP FUNCTION sessionize_describe(internal); -- Should fail, no such function CREATE OR REPLACE FUNCTION sessionize(anytable, interval) RETURNS setof record - AS '/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/regress.so', 'sessionize' + AS :'regress_dll', 'sessionize' LANGUAGE C READS SQL DATA WITH (describe = sessionize_describe); ERROR: function sessionize_describe(internal) does not exist -- Recreate describe function CREATE OR REPLACE FUNCTION sessionize_describe(internal) RETURNS internal - AS '/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/regress.so', 'describe' + AS :'regress_dll', 'describe' LANGUAGE C READS SQL DATA; -- Alter the existing function to add the describe callback CREATE OR REPLACE FUNCTION sessionize(anytable, interval) RETURNS setof record - AS '/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/regress.so', 'sessionize' + AS :'regress_dll', 'sessionize' LANGUAGE C READS SQL DATA WITH (describe = sessionize_describe); -- Observe the relationship now recorded in pg_proc_callback @@ -2529,11 +2531,11 @@ SELECT * FROM pg_proc_callback where procallback not in (select oid from pg_proc -- Recreate both functions CREATE OR REPLACE FUNCTION sessionize_describe(internal) RETURNS internal - AS '/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/regress.so', 'describe' + AS :'regress_dll', 'describe' LANGUAGE C READS SQL DATA; CREATE OR REPLACE FUNCTION sessionize(anytable, interval) RETURNS setof record - AS '/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/regress.so', 'sessionize' + AS :'regress_dll', 'sessionize' LANGUAGE C READS SQL DATA WITH (describe = sessionize_describe); -- Check the dependency again, drop should fail @@ -2550,7 +2552,7 @@ SELECT * FROM pg_proc_callback where profnoid = 'sessionize'::regproc; -- Alter existing function to remove the describe callback CREATE OR REPLACE FUNCTION sessionize(anytable, interval) RETURNS setof record - AS '/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/regress.so', 'sessionize' + AS :'regress_dll', 'sessionize' LANGUAGE C READS SQL DATA; -- Check the dependency again, drop should succeed DROP FUNCTION sessionize_describe(internal); @@ -2574,19 +2576,19 @@ SELECT * FROM pg_proc_callback where procallback not in (select oid from pg_proc -- One more time, creating without using "OR REPLACE" CREATE FUNCTION sessionize_describe(internal) RETURNS internal - AS '/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/regress.so', 'describe' + AS :'regress_dll', 'describe' LANGUAGE C READS SQL DATA; CREATE FUNCTION sessionize_plain(anytable, interval) RETURNS setof record - AS '/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/regress.so', 'sessionize' + AS :'regress_dll', 'sessionize' LANGUAGE C READS SQL DATA; CREATE FUNCTION sessionize_static(anytable, interval) RETURNS TABLE(id integer, "time" timestamp, sessionnum integer) - AS '/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/regress.so', 'sessionize' + AS :'regress_dll', 'sessionize' LANGUAGE C READS SQL DATA; CREATE FUNCTION sessionize_dynamic(anytable, interval) RETURNS setof record - AS '/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/regress.so', 'sessionize' + AS :'regress_dll', 'sessionize' LANGUAGE C READS SQL DATA WITH (describe = sessionize_describe); -- Check catalog for the new functions, should only see sessionize_describe @@ -2871,7 +2873,7 @@ FROM sessionize_plain( '1 minute' ) as sessionize(id integer, "time" timestamp, sessionnum integer); CREATE OR REPLACE FUNCTION sessionize_plain(anytable, interval) RETURNS setof record - AS '/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/regress.so', 'sessionize' + AS :'regress_dll', 'sessionize' LANGUAGE C READS SQL DATA WITH (describe = sessionize_describe); -- ERROR: views exist ERROR: cannot add DESCRIBE callback to function used in view(s) @@ -2881,11 +2883,11 @@ DROP VIEW supported; -- ======================== CREATE FUNCTION project_describe(internal) RETURNS internal - AS '/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/regress.so', 'project_describe' + AS :'regress_dll', 'project_describe' LANGUAGE C READS SQL DATA; CREATE FUNCTION project(anytable, integer) RETURNS setof record - AS '/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/regress.so', 'project' + AS :'regress_dll', 'project' LANGUAGE C READS SQL DATA WITH (describe = project_describe); SELECT * FROM project( TABLE( SELECT * FROM history ), 1) order by 1; @@ -2995,8 +2997,7 @@ SELECT * FROM project( TABLE( SELECT * FROM pg_am WHERE amname not in ('pax') ), spgist brin bitmap - heap2 -(11 rows) +(10 rows) SELECT * FROM project( TABLE( SELECT * FROM pg_am WHERE amname not in ('pax') ), CASE WHEN 3 IS NOT NULL AND 1 IN (1, 2) THEN floor(NULLIF(2, 3))::int END); @@ -3012,8 +3013,7 @@ SELECT * FROM project( TABLE( SELECT * FROM pg_am WHERE amname not in ('pax') ), spgist brin bitmap - heap2 -(11 rows) +(10 rows) -- ERROR: invalid operations demonstrating different return types SELECT extract(day from id) FROM project( TABLE( SELECT * FROM history ), 1); @@ -3056,10 +3056,10 @@ LINE 1: ... project( TABLE( SELECT * FROM pg_am ), (ROW(1, '')::example... -- User data exmaple -- ======================== CREATE FUNCTION ud_describe(internal) RETURNS internal - AS '/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/regress.so', 'userdata_describe' + AS :'regress_dll', 'userdata_describe' LANGUAGE C READS SQL DATA; CREATE FUNCTION ud_project(anytable) RETURNS setof RECORD - AS '/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/regress.so', 'userdata_project' + AS :'regress_dll', 'userdata_project' LANGUAGE C READS SQL DATA WITH (describe = ud_describe); SELECT * FROM ud_project( TABLE( SELECT * FROM history ) ); @@ -3089,7 +3089,7 @@ SELECT * FROM ud_project( TABLE( SELECT * FROM history ) ); -- Passing input without modification -- ======================== CREATE FUNCTION noop_project(anytable) RETURNS setof RECORD - AS '/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/regress.so' + AS :'regress_dll' LANGUAGE C READS SQL DATA; SELECT * FROM noop_project( TABLE( SELECT * FROM history ) ) AS s (id integer, time timestamp); id | time @@ -3160,25 +3160,25 @@ SELECT * FROM noop_project( TABLE( SELECT count(*) FROM history GROUP BY time SC -- ======================== -- explicit return type not suitable for dynamic type resolution CREATE FUNCTION x() returns int - AS '/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/regress.so', 'sessionize' + AS :'regress_dll', 'sessionize' LANGUAGE C READS SQL DATA WITH (describe = sessionize_describe); ERROR: DESCRIBE only supported for functions returning "record" -- explicit return type (setof) not suitable for dynamic type resolution CREATE FUNCTION x() returns setof int - AS '/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/regress.so', 'sessionize' + AS :'regress_dll', 'sessionize' LANGUAGE C READS SQL DATA WITH (describe = sessionize_describe); ERROR: DESCRIBE only supported for functions returning "record" -- explicit return type (TABLE) not suitable for dynamic type resolution CREATE FUNCTION x() returns TABLE(id integer, "time" timestamp, sessionnum integer) - AS '/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/regress.so', 'sessionize' + AS :'regress_dll', 'sessionize' LANGUAGE C READS SQL DATA WITH (describe = sessionize_describe); ERROR: DESCRIBE is not supported for functions that return TABLE -- explicit return type (OUT PARAMS) not suitable for dynamic type resolution CREATE FUNCTION x(OUT id integer, OUT "time" timestamp, OUT sessionnum integer) - AS '/home/gpadmin/cloudberry/contrib/pax_storage/src/test/regress/regress.so', 'sessionize' + AS :'regress_dll', 'sessionize' LANGUAGE C READS SQL DATA WITH (describe = sessionize_describe); ERROR: DESCRIBE is not supported for functions with OUT parameters diff --git a/contrib/pax_storage/src/test/regress/expected/tsearch_optimizer.out b/contrib/pax_storage/src/test/regress/expected/tsearch_optimizer.out index c4434add1b0..1241cc08013 100644 --- a/contrib/pax_storage/src/test/regress/expected/tsearch_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/tsearch_optimizer.out @@ -1,4 +1,6 @@ set optimizer_print_missing_stats = off; +-- directory paths are passed to us in environment variables +\getenv abs_srcdir PG_ABS_SRCDIR -- -- Sanity checks for text search catalogs -- @@ -56,6 +58,14 @@ WHERE -------+-------+--------+--------------+----------+--------- (0 rows) +-- Load some test data +CREATE TABLE test_tsvector( + t text, + a tsvector +); +\set filename :abs_srcdir '/data/tsearch.data' +COPY test_tsvector FROM :'filename'; +ANALYZE test_tsvector; -- test basic text search behavior without indexes, then with SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh'; count @@ -201,7 +211,6 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:D'; 450 (1 row) --- PAX not support gist/spgist/brin indexes create index wowidx on test_tsvector using gist (a); ERROR: pax only support btree/hash/gin/bitmap indexes (pax_access_handle.cc:736) SET enable_seqscan=OFF; @@ -520,7 +529,6 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:D'; (1 row) -- Test siglen parameter of GiST tsvector_ops --- PAX not support gist/spgist/brin indexes CREATE INDEX wowidx1 ON test_tsvector USING gist (a tsvector_ops(foo=1)); ERROR: unrecognized parameter "foo" CREATE INDEX wowidx1 ON test_tsvector USING gist (a tsvector_ops(siglen=0)); @@ -542,8 +550,7 @@ ERROR: pax only support btree/hash/gin/bitmap indexes (pax_access_handle.cc:736 t | text | | | a | tsvector | | | -DROP INDEX wowidx; -ERROR: index "wowidx" does not exist +-- DROP INDEX wowidx; EXPLAIN (costs off) SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh'; QUERY PLAN --------------------------------------------------------------- @@ -699,10 +706,9 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:D'; 450 (1 row) -DROP INDEX wowidx2; -ERROR: index "wowidx2" does not exist +-- DROP INDEX wowidx2; CREATE INDEX wowidx ON test_tsvector USING gist (a tsvector_ops(siglen=484)); -ERROR: pax only support btree/hash/gin/bitmap indexes (pax_access_handle.cc:736) +ERROR: pax only support btree/hash/gin/bitmap indexes (pax_access_handle.cc:###) \d test_tsvector Table "public.test_tsvector" Column | Type | Collation | Nullable | Default @@ -868,8 +874,7 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:D'; RESET enable_seqscan; RESET enable_indexscan; RESET enable_bitmapscan; -DROP INDEX wowidx; -ERROR: index "wowidx" does not exist +-- DROP INDEX wowidx; CREATE INDEX wowidx ON test_tsvector USING gin (a); SET enable_seqscan=OFF; -- GIN only supports bitmapscan, so no need to test plain indexscan @@ -1088,6 +1093,7 @@ SELECT * FROM ts_stat('SELECT a FROM test_tsvector', 'AB') ORDER BY ndoc DESC, n DFG | 1 | 2 (1 row) +DROP INDEX wowidx; --dictionaries and to_tsvector SELECT ts_lexize('english_stem', 'skies'); ts_lexize @@ -1817,13 +1823,131 @@ Water, water, every where Water, water, every where, Nor any drop to drink. S. T. Coleridge (1772-1834) -', phraseto_tsquery('english', 'painted Ocean')); - ts_headline ---------------------------------------- - painted Ship + - Upon a painted Ocean.+ - Water, water, every where + +', to_tsquery('english', 'day & drink')); + ts_headline +------------------------------------ + And all the boards did shrink; + + Nor any drop + Upon a painted Ocean. + + We stuck, nor breath nor motion,+ + day, + + As idle as a painted Ship + + Water, water, every where + + Water, water, every where, + +(1 row) + +SELECT ts_headline('english', ' +Day after day, day after day, + We stuck, nor breath nor motion, +As idle as a painted Ship + Upon a painted Ocean. +Water, water, every where + And all the boards did shrink; +Water, water, every where, + Nor any drop to drink. +S. T. Coleridge (1772-1834) +', to_tsquery('english', 'day | drink')); + ts_headline +----------------------------------------------------------- + We stuck, nor breath nor motion, + + Day after day, day after day,+ + As idle as a painted +(1 row) + +SELECT ts_headline('english', ' +Day after day, day after day, + We stuck, nor breath nor motion, +As idle as a painted Ship + Upon a painted Ocean. +Water, water, every where + And all the boards did shrink; +Water, water, every where, + Nor any drop to drink. +S. T. Coleridge (1772-1834) +', to_tsquery('english', 'day | !drink')); + ts_headline +----------------------------------------------------------- + We stuck, nor breath nor motion, + + Day after day, day after day,+ + As idle as a painted +(1 row) + +SELECT ts_headline('english', ' +Day after day, day after day, + We stuck, nor breath nor motion, +As idle as a painted Ship + Upon a painted Ocean. +Water, water, every where + And all the boards did shrink; +Water, water, every where, + Nor any drop to drink. +S. T. Coleridge (1772-1834) +', to_tsquery('english', 'painted <-> Ship & drink')); + ts_headline +---------------------------------- + And all the boards did shrink;+ + Nor any drop to drink + Upon a painted Ocean. + + painted Ship + + Water, water, every where + + Water, water, every where, + +(1 row) + +SELECT ts_headline('english', ' +Day after day, day after day, + We stuck, nor breath nor motion, +As idle as a painted Ship + Upon a painted Ocean. +Water, water, every where + And all the boards did shrink; +Water, water, every where, + Nor any drop to drink. +S. T. Coleridge (1772-1834) +', to_tsquery('english', 'painted <-> Ship | drink')); + ts_headline +--------------------------------- And all the boards did shrink + Upon a painted Ocean. + + painted Ship + + Water, water, every where + +(1 row) + +SELECT ts_headline('english', ' +Day after day, day after day, + We stuck, nor breath nor motion, +As idle as a painted Ship + Upon a painted Ocean. +Water, water, every where + And all the boards did shrink; +Water, water, every where, + Nor any drop to drink. +S. T. Coleridge (1772-1834) +', to_tsquery('english', 'painted <-> Ship | !drink')); + ts_headline +--------------------------------- + painted Ship + + Upon a painted Ocean. + + Water, water, every where + + And all the boards did shrink +(1 row) + +SELECT ts_headline('english', ' +Day after day, day after day, + We stuck, nor breath nor motion, +As idle as a painted Ship + Upon a painted Ocean. +Water, water, every where + And all the boards did shrink; +Water, water, every where, + Nor any drop to drink. +S. T. Coleridge (1772-1834) +', phraseto_tsquery('english', 'painted Ocean')); + ts_headline +---------------------------------- + painted Ocean. + + Water, water, every where + + And all the boards did shrink;+ + Water, water, every (1 row) SELECT ts_headline('english', ' @@ -1854,6 +1978,15 @@ to_tsquery('english','Lorem') && phraseto_tsquery('english','ullamcorper urna'), Lorem ipsum urna. Nullam nullam ullamcorper urna (1 row) +SELECT ts_headline('english', +'Lorem ipsum urna. Nullam nullam ullamcorper urna.', +phraseto_tsquery('english','ullamcorper urna'), +'MaxWords=100, MinWords=5'); + ts_headline +------------------------------------------------------------- + urna. Nullam nullam ullamcorper urna. +(1 row) + SELECT ts_headline('english', ' @@ -1896,9 +2029,9 @@ SELECT ts_headline('simple', '1 2 3 1 3'::text, '1 & 3', 'MaxWords=4, MinWords=1 (1 row) SELECT ts_headline('simple', '1 2 3 1 3'::text, '1 <-> 3', 'MaxWords=4, MinWords=1'); - ts_headline ----------------------------- - 3 1 3 + ts_headline +------------------- + 1 3 (1 row) --Check if headline fragments work @@ -2003,6 +2136,23 @@ to_tsquery('english','Lorem') && phraseto_tsquery('english','ullamcorper urna'), Lorem ipsum urna. Nullam nullam ullamcorper urna (1 row) +-- Edge cases with empty query +SELECT ts_headline('english', +'', to_tsquery('english', '')); +NOTICE: text-search query doesn't contain lexemes: "" + ts_headline +------------- + +(1 row) + +SELECT ts_headline('english', +'foo bar', to_tsquery('english', '')); +NOTICE: text-search query doesn't contain lexemes: "" + ts_headline +------------- + foo bar +(1 row) + --Rewrite sub system CREATE TABLE test_tsquery (txtkeyword TEXT, txtsample TEXT); \set ECHO none @@ -2347,10 +2497,10 @@ SELECT to_tsquery('SKIES & My | booKs'); -- distribution key column. alter table test_tsvector add column distkey int4; alter table test_tsvector set distributed by (distkey); --- Pax not support current TRIGGER --- CREATE TRIGGER tsvectorupdate --- BEFORE UPDATE OR INSERT ON test_tsvector --- FOR EACH ROW EXECUTE PROCEDURE tsvector_update_trigger(a, 'pg_catalog.english', t); +CREATE TRIGGER tsvectorupdate +BEFORE UPDATE OR INSERT ON test_tsvector +FOR EACH ROW EXECUTE PROCEDURE tsvector_update_trigger(a, 'pg_catalog.english', t); +ERROR: ON UPDATE triggers are not supported on append-only tables SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty'); count ------- @@ -2549,12 +2699,19 @@ select websearch_to_tsquery('simple', 'abc <-> def'); 'abc' & 'def' (1 row) +-- parens are ignored, too select websearch_to_tsquery('simple', 'abc (pg or class)'); websearch_to_tsquery ------------------------ 'abc' & 'pg' | 'class' (1 row) +select websearch_to_tsquery('simple', '(foo bar) or (ding dong)'); + websearch_to_tsquery +--------------------------------- + 'foo' & 'bar' | 'ding' & 'dong' +(1 row) + -- NOT is ignored in quotes select websearch_to_tsquery('english', 'My brand new smartphone'); websearch_to_tsquery diff --git a/contrib/pax_storage/src/test/regress/expected/tuplesort_optimizer.out b/contrib/pax_storage/src/test/regress/expected/tuplesort_optimizer.out index 0797ceb2aa2..4961af70dde 100644 --- a/contrib/pax_storage/src/test/regress/expected/tuplesort_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/tuplesort_optimizer.out @@ -538,20 +538,22 @@ EXPLAIN (COSTS OFF) :qry; -> GroupAggregate Group Key: a.col12 Filter: (count(*) > 1) - -> Merge Join - Merge Cond: (a.col12 = b.col12) - -> Sort - Sort Key: a.col12 DESC - -> Redistribute Motion 3:3 (slice2; segments: 3) - Hash Key: a.col12 - -> Seq Scan on test_mark_restore a - -> Sort - Sort Key: b.col12 DESC - -> Redistribute Motion 3:3 (slice3; segments: 3) - Hash Key: b.col12 - -> Seq Scan on test_mark_restore b + -> Sort + Sort Key: a.col12 DESC, a.col1 + -> Merge Join + Merge Cond: (a.col12 = b.col12) + -> Sort + Sort Key: a.col12 DESC + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: a.col12 + -> Seq Scan on test_mark_restore a + -> Sort + Sort Key: b.col12 DESC + -> Redistribute Motion 3:3 (slice3; segments: 3) + Hash Key: b.col12 + -> Seq Scan on test_mark_restore b Optimizer: Postgres query optimizer -(22 rows) +(24 rows) :qry; col12 | count | count | count | count | count @@ -582,20 +584,22 @@ EXPLAIN (COSTS OFF) :qry; -> GroupAggregate Group Key: a.col12 Filter: (count(*) > 1) - -> Merge Join - Merge Cond: (a.col12 = b.col12) - -> Sort - Sort Key: a.col12 DESC - -> Redistribute Motion 3:3 (slice2; segments: 3) - Hash Key: a.col12 - -> Seq Scan on test_mark_restore a - -> Sort - Sort Key: b.col12 DESC - -> Redistribute Motion 3:3 (slice3; segments: 3) - Hash Key: b.col12 - -> Seq Scan on test_mark_restore b + -> Sort + Sort Key: a.col12 DESC, a.col1 + -> Merge Join + Merge Cond: (a.col12 = b.col12) + -> Sort + Sort Key: a.col12 DESC + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: a.col12 + -> Seq Scan on test_mark_restore a + -> Sort + Sort Key: b.col12 DESC + -> Redistribute Motion 3:3 (slice3; segments: 3) + Hash Key: b.col12 + -> Seq Scan on test_mark_restore b Optimizer: Postgres query optimizer -(22 rows) +(24 rows) :qry; col12 | count | count | count | count | count diff --git a/contrib/pax_storage/src/test/regress/expected/union_gp_optimizer.out b/contrib/pax_storage/src/test/regress/expected/union_gp_optimizer.out index 55f940e06ed..b9f6eb52646 100644 --- a/contrib/pax_storage/src/test/regress/expected/union_gp_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/union_gp_optimizer.out @@ -1,4 +1,5 @@ -- Additional GPDB-added tests for UNION +SET optimizer_trace_fallback=on; create temp table t_union1 (a int, b int); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. @@ -50,6 +51,8 @@ select 1 a, row_number() over (partition by 'a') union all (select 1 a , 2 b); -- This should preserve domain types select pg_typeof(a) from (select 'a'::information_schema.sql_identifier a union all select 'b'::information_schema.sql_identifier)a; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-default collation pg_typeof ----------------------------------- information_schema.sql_identifier @@ -75,6 +78,8 @@ select 'b'::information_schema.sql_identifier)a; -- Yet, we keep behaviors on text-like columns select pg_typeof(a) from(select 'foo' a union select 'foo'::name)s; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-default collation pg_typeof ----------- name @@ -82,6 +87,8 @@ select pg_typeof(a) from(select 'foo' a union select 'foo'::name)s; select pg_typeof(a) from(select 1 x, 'foo' a union select 1, 'foo' union select 1, 'foo'::name)s; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-default collation pg_typeof ----------- text @@ -89,6 +96,8 @@ select pg_typeof(a) from(select 1 x, 'foo' a union select pg_typeof(a) from(select 1 x, 'foo' a union (select 1, 'foo' union select 1, 'foo'::name))s; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-default collation pg_typeof ----------- name @@ -293,9 +302,13 @@ create table rep3(c1 int, c2 int) distributed replicated; set allow_system_table_mods = on; update gp_distribution_policy set numsegments = 2 where localoid = 'rep2'::regclass; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Queries on master-only tables select localoid::regclass, policytype, numsegments from gp_distribution_policy where localoid::regclass in ('rep2', 'rep3'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Queries on master-only tables localoid | policytype | numsegments ----------+------------+------------- rep3 | r | 3 @@ -303,6 +316,8 @@ select localoid::regclass, policytype, numsegments (2 rows) explain select * from rep2 union all select * from rep3; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Unknown error: Partially Distributed Data QUERY PLAN --------------------------------------------------------------------------------------- Gather Motion 1:1 (slice1; segments: 1) (cost=1922.00..1922.00 rows=172200 width=8) @@ -313,6 +328,8 @@ explain select * from rep2 union all select * from rep3; (5 rows) select * from rep2 union all select * from rep3; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Unknown error: Partially Distributed Data c1 | c2 ----+---- (0 rows) @@ -690,6 +707,8 @@ UNION ALL (select d1 from T_constant) order by 1;' , 'APPEND'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions count_operator ---------------- 2 @@ -709,6 +728,8 @@ UNION ALL (select d1 from T_constant) order by 1;' , 'APPEND'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions count_operator ---------------- 2 @@ -728,6 +749,8 @@ UNION ALL (select d1 from T_constant) order by 1;' , 'APPEND'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions count_operator ---------------- 2 @@ -747,6 +770,8 @@ UNION ALL (select b1 from T_b2) order by 1;' , 'APPEND'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions count_operator ---------------- 2 @@ -968,6 +993,8 @@ UNION (select d1 from T_constant) order by 1;' , 'APPEND'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions count_operator ---------------- 2 @@ -987,6 +1014,8 @@ UNION (select d1 from T_constant) order by 1;' , 'APPEND'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions count_operator ---------------- 2 @@ -1006,6 +1035,8 @@ UNION (select d1 from T_constant) order by 1;' , 'APPEND'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions count_operator ---------------- 2 @@ -1025,6 +1056,8 @@ UNION (select b1 from T_b2) order by 1;' , 'APPEND'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions count_operator ---------------- 2 @@ -1460,42 +1493,56 @@ UNION ALL SELECT 300, 300) -- Binary UNION ALL explain -- select count_operator('(select a1 from T_a1) UNION ALL (select b1 from T_b2) order by 1;', 'APPEND'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions count_operator ---------------- 1 (1 row) select count_operator('(select b1 from T_b2) UNION ALL (select a1 from T_a1) order by 1;', 'APPEND'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions count_operator ---------------- 1 (1 row) select count_operator('(select a1 from T_a1) UNION ALL (select c1 from T_random) order by 1;', 'APPEND'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions count_operator ---------------- 1 (1 row) select count_operator('(select c1 from T_random) UNION ALL (select a1 from T_a1) order by 1;', 'APPEND'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions count_operator ---------------- 1 (1 row) select count_operator('(select * from T_a1) UNION ALL (select * from T_b2) order by 1;', 'APPEND'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions count_operator ---------------- 1 (1 row) select count_operator('(select * from T_a1) UNION ALL (select * from T_random) order by 1;', 'APPEND'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions count_operator ---------------- 1 (1 row) select count_operator('(select * from T_b2) UNION ALL (select * from T_random) order by 1;', 'APPEND'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions count_operator ---------------- 1 @@ -1507,6 +1554,8 @@ SELECT 100, 100 UNION ALL SELECT 200, 200 UNION ALL SELECT 300, 300) (select a1 from T_a1) UNION ALL (select d1 from T_constant) order by 1;', 'APPEND'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions count_operator ---------------- 2 @@ -1518,6 +1567,8 @@ SELECT 100, 100 UNION ALL SELECT 200, 200 UNION ALL SELECT 300, 300) (select d1 from T_constant) UNION ALL (select a1 from T_a1) order by 1;', 'APPEND'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions count_operator ---------------- 2 @@ -1529,6 +1580,8 @@ SELECT 100, 100 UNION ALL SELECT 200, 200 UNION ALL SELECT 300, 300) (select c1 from T_random) UNION ALL (select d1 from T_constant) order by 1;', 'APPEND'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions count_operator ---------------- 2 @@ -1539,6 +1592,8 @@ SELECT 100, 100 UNION ALL SELECT 200, 200 UNION ALL SELECT 300, 300) (select d1 from T_constant) UNION ALL (select c1 from T_random) order by 1;', 'APPEND'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions count_operator ---------------- 2 @@ -1894,42 +1949,56 @@ UNION SELECT 300, 300) -- Binary UNION explain -- select count_operator('(select a1 from T_a1) UNION (select b1 from T_b2) order by 1;', 'APPEND'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions count_operator ---------------- 1 (1 row) select count_operator('(select b1 from T_b2) UNION (select a1 from T_a1) order by 1;', 'APPEND'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions count_operator ---------------- 1 (1 row) select count_operator('(select a1 from T_a1) UNION (select c1 from T_random) order by 1;', 'APPEND'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions count_operator ---------------- 1 (1 row) select count_operator('(select c1 from T_random) UNION (select a1 from T_a1) order by 1;', 'APPEND'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions count_operator ---------------- 1 (1 row) select count_operator('(select * from T_a1) UNION (select * from T_b2) order by 1;', 'APPEND'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions count_operator ---------------- 1 (1 row) select count_operator('(select * from T_a1) UNION (select * from T_random) order by 1;', 'APPEND'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions count_operator ---------------- 1 (1 row) select count_operator('(select * from T_b2) UNION (select * from T_random) order by 1;', 'APPEND'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions count_operator ---------------- 1 @@ -1941,6 +2010,8 @@ SELECT 100, 100 UNION SELECT 200, 200 UNION SELECT 300, 300) (select a1 from T_a1) UNION (select d1 from T_constant) order by 1;', 'APPEND'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions count_operator ---------------- 2 @@ -1952,6 +2023,8 @@ SELECT 100, 100 UNION SELECT 200, 200 UNION SELECT 300, 300) (select d1 from T_constant) UNION (select a1 from T_a1) order by 1;', 'APPEND'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions count_operator ---------------- 2 @@ -1963,6 +2036,8 @@ SELECT 100, 100 UNION SELECT 200, 200 UNION SELECT 300, 300) (select c1 from T_random) UNION (select d1 from T_constant) order by 1;', 'APPEND'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions count_operator ---------------- 2 @@ -1973,6 +2048,8 @@ SELECT 100, 100 UNION SELECT 200, 200 UNION SELECT 300, 300) (select d1 from T_constant) UNION (select c1 from T_random) order by 1;', 'APPEND'); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions count_operator ---------------- 2 @@ -2056,14 +2133,20 @@ HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sur set allow_system_table_mods = on; update gp_distribution_policy set numsegments = 1 where localoid = 'union_schema.t1'::regclass::oid; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Queries on master-only tables update gp_distribution_policy set numsegments = 2 where localoid = 'union_schema.t2'::regclass::oid; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Queries on master-only tables select relname, policytype, numsegments, distkey from pg_class, gp_distribution_policy, pg_namespace ns where pg_class.oid = localoid and relnamespace = ns.oid and nspname = 'union_schema' and relname in ('t1', 't2', 't3') order by relname; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-default collation relname | policytype | numsegments | distkey ---------+------------+-------------+--------- t1 | p | 1 | 1 @@ -2072,11 +2155,17 @@ select relname, policytype, numsegments, distkey (3 rows) insert into union_schema.t1 select i, i from generate_series(1,10)i; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Unknown error: Partially Distributed Data insert into union_schema.t2 select i, i from generate_series(1,20)i; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Unknown error: Partially Distributed Data analyze union_schema.t1; analyze union_schema.t2; explain select * from union_schema.t1 join union_schema.t2 on union_schema.t1.a = union_schema.t2.b; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Unknown error: Partially Distributed Data QUERY PLAN ----------------------------------------------------------------------------------------------- Gather Motion 1:1 (slice1; segments: 1) (cost=1.23..3.00 rows=10 width=16) @@ -2095,6 +2184,8 @@ explain select union_schema.t1.a, union_schema.t2.b on union_schema.t1.a = union_schema.t2.b union all select * from union_schema.t3; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Unknown error: Partially Distributed Data QUERY PLAN ----------------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) (cost=1.23..1472.30 rows=86130 width=8) @@ -2113,6 +2204,8 @@ explain select union_schema.t1.a, union_schema.t2.b select * from union_schema.t1 join union_schema.t2 on union_schema.t1.a = union_schema.t2.b; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Unknown error: Partially Distributed Data a | b | a | b ----+----+----+---- 2 | 2 | 2 | 2 @@ -2132,6 +2225,8 @@ select union_schema.t1.a, union_schema.t2.b on union_schema.t1.a = union_schema.t2.b union all select * from union_schema.t3; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Unknown error: Partially Distributed Data a | b ----+---- 2 | 2 @@ -2148,11 +2243,17 @@ select * from union_schema.t3; truncate union_schema.t1, union_schema.t2; insert into union_schema.t1 select i, i from generate_series(1,20)i; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Unknown error: Partially Distributed Data insert into union_schema.t2 select i, i from generate_series(1,10)i; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Unknown error: Partially Distributed Data analyze union_schema.t1; analyze union_schema.t2; explain select * from union_schema.t1 join union_schema.t2 on union_schema.t1.a = union_schema.t2.b; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Unknown error: Partially Distributed Data QUERY PLAN ----------------------------------------------------------------------------------------------------- Gather Motion 1:1 (slice1; segments: 1) (cost=1.32..2.90 rows=10 width=16) @@ -2171,6 +2272,8 @@ explain select union_schema.t1.a, union_schema.t2.b on union_schema.t1.a = union_schema.t2.b union all select * from union_schema.t3; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Unknown error: Partially Distributed Data QUERY PLAN ----------------------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) (cost=1.32..1472.20 rows=86130 width=8) @@ -2189,6 +2292,8 @@ explain select union_schema.t1.a, union_schema.t2.b select * from union_schema.t1 join union_schema.t2 on union_schema.t1.a = union_schema.t2.b; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Unknown error: Partially Distributed Data a | b | a | b ----+----+----+---- 1 | 1 | 1 | 1 @@ -2208,6 +2313,8 @@ select union_schema.t1.a, union_schema.t2.b on union_schema.t1.a = union_schema.t2.b union all select * from union_schema.t3; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Unknown error: Partially Distributed Data a | b ----+---- 1 | 1 @@ -2223,6 +2330,165 @@ select * from union_schema.t3; (10 rows) reset allow_system_table_mods; +-- The following tests demonstrate the plan alternative +-- where ORCA requests union all's outer child to become +-- a Non-Singleton. This alternative is at times costed +-- lower than aligning the inner child's distribution spec +-- with that delivered by the outer child. +-- +-- Replicated ∪ Hashed +create table rep (a int) distributed replicated; +insert into rep select i from generate_series (1, 10) i; +create table dist (a int); +insert into dist select i from generate_series (1, 1000) i; +analyze dist; +analyze rep; +-- It's more cost-effective to apply a duplicate-sensitive +-- random motion (non-phyiscal) on a replicated table, +-- than a broadcast motion on a distributed table. +explain select a from rep union all select a from dist; +QUERY PLAN +___________ +{ + 'child' => [ + { + 'child' => [ + { + 'child' => [ + { + 'id' => 4, + 'parent' => 3, + 'short' => 'Seq Scan on rep' + } + ], + 'id' => 3, + 'parent' => 2, + 'short' => 'Result' + }, + { + 'id' => 5, + 'parent' => 2, + 'short' => 'Seq Scan on dist' + } + ], + 'id' => 2, + 'parent' => 1, + 'short' => 'Append' + } + ], + 'id' => 1, + 'recvsize' => 1, + 'segments' => 3, + 'sendsize' => 3, + 'short' => 'Gather Motion' +} +GP_IGNORE:(7 rows) + +-- Universal ∪ Random +create table rand (a int) distributed randomly; +insert into rand select i from generate_series (1, 10000) i; +analyze rand; +-- It's more cost-effective to apply a duplicate-sensitive +-- random motion (non-physical) on a universal TVF, than a +-- gather motion on a randomly distributed table. +explain select i from generate_series(1,1000) i union all select a from rand; +QUERY PLAN +___________ +{ + 'child' => [ + { + 'child' => [ + { + 'child' => [ + { + 'id' => 4, + 'parent' => 3, + 'short' => 'Function Scan on generate_series' + } + ], + 'id' => 3, + 'parent' => 2, + 'short' => 'Result' + }, + { + 'id' => 5, + 'parent' => 2, + 'short' => 'Seq Scan on rand' + } + ], + 'id' => 2, + 'parent' => 1, + 'short' => 'Append' + } + ], + 'id' => 1, + 'recvsize' => 1, + 'segments' => 3, + 'sendsize' => 3, + 'short' => 'Gather Motion' +} +GP_IGNORE:(7 rows) + +------------------------------------------------------------------------------- +--Test case to check parallel union all with 'json' type 1st column in project list +------------------------------------------------------------------------------- +set optimizer_parallel_union to on; +drop table if exists my_table; +create table my_table ( id serial primary key, json_data json); +insert into my_table (json_data) values ('{"name": "Name1", "age": 10}'); +insert into my_table (json_data) values ('{"name": "Name2", "age": 20}'); +insert into my_table (json_data) values ('{"name": "Name3", "age": 30}'); +insert into my_table (json_data) values ('{"name": "Name4", "age": 40}'); +explain select json_data from my_table where json_data->>'age' = '30' union all select json_data from my_table where json_data->>'age' = '40' ; + QUERY PLAN +------------------------------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=8) + -> Append (cost=0.00..431.00 rows=1 width=8) + -> Result (cost=0.00..431.00 rows=1 width=8) + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=8) + Hash Key: my_table.id + -> Seq Scan on my_table (cost=0.00..431.00 rows=1 width=12) + Filter: ((json_data ->> 'age'::text) = '30'::text) + -> Result (cost=0.00..431.00 rows=1 width=8) + -> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..431.00 rows=1 width=8) + Hash Key: my_table_1.id + -> Seq Scan on my_table my_table_1 (cost=0.00..431.00 rows=1 width=12) + Filter: ((json_data ->> 'age'::text) = '40'::text) + Optimizer: GPORCA +(13 rows) + +select json_data from my_table where json_data->>'age' = '30' union all select json_data from my_table where json_data->>'age' = '40' ; + json_data +------------------------------ + {"name": "Name3", "age": 30} + {"name": "Name4", "age": 40} +(2 rows) + +explain select json_data,id from my_table where json_data->>'age' = '30' union all select json_data,id from my_table where json_data->>'age' = '40' ; + QUERY PLAN +------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=12) + -> Append (cost=0.00..431.00 rows=1 width=12) + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=12) + Hash Key: my_table.id + -> Seq Scan on my_table (cost=0.00..431.00 rows=1 width=12) + Filter: ((json_data ->> 'age'::text) = '30'::text) + -> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..431.00 rows=1 width=12) + Hash Key: my_table_1.id + -> Seq Scan on my_table my_table_1 (cost=0.00..431.00 rows=1 width=12) + Filter: ((json_data ->> 'age'::text) = '40'::text) + Optimizer: GPORCA +(11 rows) + +select json_data,id from my_table where json_data->>'age' = '30' union all select json_data,id from my_table where json_data->>'age' = '40' ; + json_data | id +------------------------------+---- + {"name": "Name3", "age": 30} | 3 + {"name": "Name4", "age": 40} | 4 +(2 rows) + +set optimizer_parallel_union to off; +drop table if exists my_table; -- -- Clean up -- diff --git a/contrib/pax_storage/src/test/regress/expected/union_optimizer.out b/contrib/pax_storage/src/test/regress/expected/union_optimizer.out index 7351e93fd83..7eb78801078 100644 --- a/contrib/pax_storage/src/test/regress/expected/union_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/union_optimizer.out @@ -1,3 +1,4 @@ +set optimizer_trace_fallback = on; -- -- UNION (also INTERSECT, EXCEPT) -- @@ -708,6 +709,8 @@ reset enable_hashagg; set enable_hashagg to on; explain (costs off) select x from (values (row(1, 2)), (row(1, 3))) _(x) union select x from (values (row(1, 2)), (row(1, 4))) _(x); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: ROW EXPRESSION QUERY PLAN ----------------------------------------------- Unique @@ -721,6 +724,8 @@ select x from (values (row(1, 2)), (row(1, 3))) _(x) union select x from (values (8 rows) select x from (values (row(1, 2)), (row(1, 3))) _(x) union select x from (values (row(1, 2)), (row(1, 4))) _(x); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: ROW EXPRESSION x ------- (1,2) @@ -730,6 +735,8 @@ select x from (values (row(1, 2)), (row(1, 3))) _(x) union select x from (values explain (costs off) select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (values (row(1, 2)), (row(1, 4))) _(x); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: ROW EXPRESSION QUERY PLAN ----------------------------------------------------- SetOp Intersect @@ -744,6 +751,8 @@ select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (va (9 rows) select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (values (row(1, 2)), (row(1, 4))) _(x); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: ROW EXPRESSION x ------- (1,2) @@ -751,6 +760,8 @@ select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (va explain (costs off) select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (values (row(1, 2)), (row(1, 4))) _(x); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: ROW EXPRESSION QUERY PLAN ----------------------------------------------------- SetOp Except @@ -765,6 +776,8 @@ select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (value (9 rows) select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (values (row(1, 2)), (row(1, 4))) _(x); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: ROW EXPRESSION x ------- (1,3) @@ -775,6 +788,8 @@ select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (value -- type is hashable. (Otherwise, this would fail at execution time.) explain (costs off) select x from (values (row(100::money)), (row(200::money))) _(x) union select x from (values (row(100::money)), (row(300::money))) _(x); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: ROW EXPRESSION QUERY PLAN ----------------------------------------------- Unique @@ -788,6 +803,8 @@ select x from (values (row(100::money)), (row(200::money))) _(x) union select x (8 rows) select x from (values (row(100::money)), (row(200::money))) _(x) union select x from (values (row(100::money)), (row(300::money))) _(x); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: ROW EXPRESSION x ----------- ($100.00) @@ -824,6 +841,8 @@ drop type ct1; set enable_hashagg to off; explain (costs off) select x from (values (row(1, 2)), (row(1, 3))) _(x) union select x from (values (row(1, 2)), (row(1, 4))) _(x); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: ROW EXPRESSION QUERY PLAN ----------------------------------------------- Unique @@ -837,6 +856,8 @@ select x from (values (row(1, 2)), (row(1, 3))) _(x) union select x from (values (8 rows) select x from (values (row(1, 2)), (row(1, 3))) _(x) union select x from (values (row(1, 2)), (row(1, 4))) _(x); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: ROW EXPRESSION x ------- (1,2) @@ -846,6 +867,8 @@ select x from (values (row(1, 2)), (row(1, 3))) _(x) union select x from (values explain (costs off) select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (values (row(1, 2)), (row(1, 4))) _(x); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: ROW EXPRESSION QUERY PLAN ----------------------------------------------------- SetOp Intersect @@ -860,6 +883,8 @@ select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (va (9 rows) select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (values (row(1, 2)), (row(1, 4))) _(x); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: ROW EXPRESSION x ------- (1,2) @@ -867,6 +892,8 @@ select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (va explain (costs off) select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (values (row(1, 2)), (row(1, 4))) _(x); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: ROW EXPRESSION QUERY PLAN ----------------------------------------------------- SetOp Except @@ -881,6 +908,8 @@ select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (value (9 rows) select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (values (row(1, 2)), (row(1, 4))) _(x); +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: ROW EXPRESSION x ------- (1,3) @@ -982,7 +1011,7 @@ SELECT q1 FROM int8_tbl EXCEPT SELECT q2 FROM int8_tbl ORDER BY q2 LIMIT 1; ERROR: column "q2" does not exist LINE 1: ... int8_tbl EXCEPT SELECT q2 FROM int8_tbl ORDER BY q2 LIMIT 1... ^ -HINT: There is a column named "q2" in table "*SELECT* 2", but it cannot be referenced from this part of the query. +DETAIL: There is a column named "q2" in table "*SELECT* 2", but it cannot be referenced from this part of the query. -- But this should work: SELECT q1 FROM int8_tbl EXCEPT (((SELECT q2 FROM int8_tbl ORDER BY q2 LIMIT 1))) ORDER BY 1; q1 @@ -1225,6 +1254,8 @@ explain (costs off) UNION ALL SELECT ab FROM t2) t ORDER BY 1 LIMIT 8; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Inherited tables QUERY PLAN ----------------------------------------------------------------- Limit @@ -1245,6 +1276,8 @@ explain (costs off) UNION ALL SELECT ab FROM t2) t ORDER BY 1 LIMIT 8; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Inherited tables ab ---- ab @@ -1273,6 +1306,8 @@ select event_id union all select event_id from other_events) ss order by event_id; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Inherited tables QUERY PLAN ---------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) @@ -1297,6 +1332,8 @@ explain (costs off) UNION ALL SELECT 2 AS t, * FROM tenk1 b) c WHERE t = 2; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-default collation QUERY PLAN ------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) @@ -1379,6 +1416,8 @@ SELECT * FROM SELECT 2 AS t, 4 AS x) ss WHERE x > 3 ORDER BY x; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions QUERY PLAN ------------------------------------------------------------------------------ Sort @@ -1403,6 +1442,8 @@ SELECT * FROM SELECT 2 AS t, 4 AS x) ss WHERE x > 3 ORDER BY x; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions t | x ---+--- 2 | 4 @@ -1561,3 +1602,4 @@ where (x = 0) or (q1 >= q2 and q1 <= q2); 4567890123456789 | 4567890123456789 | 1 (6 rows) +reset optimizer_trace_fallback; diff --git a/contrib/pax_storage/src/test/regress/expected/update_gp_optimizer.out b/contrib/pax_storage/src/test/regress/expected/update_gp_optimizer.out index f96c7d0fca0..50aa3dbb064 100644 --- a/contrib/pax_storage/src/test/regress/expected/update_gp_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/update_gp_optimizer.out @@ -85,10 +85,10 @@ ERROR: can't split update for inherit table: base_tbl (preptlist.c:138) Update on child_a base_tbl_1 Update on child_b base_tbl_2 -> Explicit Redistribute Motion 3:3 (slice1; segments: 3) - -> Split + -> Split Update -> Seq Scan on base_tbl -> Explicit Redistribute Motion 3:3 (slice2; segments: 3) - -> Split + -> Split Update -> Seq Scan on child_a base_tbl_1 -> Seq Scan on child_b base_tbl_2 Optimizer: Postgres query optimizer @@ -435,7 +435,7 @@ EXPLAIN (COSTS OFF ) UPDATE tab3 SET C1 = C1 + 1, C5 = C5+1; -> Result -> Redistribute Motion 3:3 (slice1; segments: 3) Hash Key: c1, c2, c3 - -> Split + -> Split Update -> Seq Scan on tab3 Optimizer: GPORCA (9 rows) @@ -690,7 +690,7 @@ explain update nosplitupdate set a=0 where a=1 and a<1; One-Time Filter: true -> Result (cost=0.00..0.00 rows=0 width=22) -> Result (cost=0.00..0.00 rows=0 width=18) - -> Split (cost=0.00..0.00 rows=0 width=18) + -> Split Update (cost=0.00..0.00 rows=0 width=18) -> Result (cost=0.00..0.00 rows=0 width=18) One-Time Filter: false Optimizer: Pivotal Optimizer (GPORCA) @@ -713,7 +713,7 @@ explain update tsplit_entry set c = s.a from (select count(*) as a from gp_segme -> Redistribute Motion 1:3 (slice1) (cost=0.00..882689.42 rows=4 width=22) Hash Key: tsplit_entry_1.c -> Result (cost=0.00..882689.42 rows=4 width=22) - -> Split (cost=0.00..882689.42 rows=4 width=18) + -> Split Update (cost=0.00..882689.42 rows=4 width=18) -> Nested Loop (cost=0.00..882689.42 rows=2 width=22) Join Filter: true -> Aggregate (cost=0.00..0.00 rows=1 width=8) @@ -813,6 +813,97 @@ SELECT tableoid::regclass, * FROM update_gp_rangep WHERE b = 1; update_gp_rangep_10_to_20 | 11 | 1 | 4 (3 rows) +-- Test for update with LASJ_NOTIN +-- See Issue: https://github.com/greenplum-db/gpdb/issues/13265 +-- Actually master branch does not have the above issue even master +-- does have the same problematic code (other parts of code are +-- refactored). Also cherry-pick the case to master and keep it +-- since more test cases do no harm. +create table t1_13265(a int, b int, c int, d int) distributed by (a); +create table t2_13265(a int, b int, c int, d int) distributed by (a); +insert into t1_13265 values (1, null, 1, 1); +insert into t2_13265 values (2, null, 2, 2); +explain (verbose, costs off) +update t1_13265 set b = 2 where +(c, d) not in (select c, d from t2_13265 where a = 2); + QUERY PLAN +------------------------------------------------------------------------------------------------- + Update on public.t1_13265 + -> Nested Loop Left Anti Semi (Not-In) Join + Output: 2, t1_13265.ctid, t1_13265.gp_segment_id, t1_13265.*, t2_13265.ctid + Join Filter: ((t1_13265.c = t2_13265.c) AND (t1_13265.d = t2_13265.d)) + -> Seq Scan on public.t1_13265 + Output: t1_13265.ctid, t1_13265.gp_segment_id, t1_13265.*, t1_13265.c, t1_13265.d + -> Materialize + Output: t2_13265.ctid, t2_13265.c, t2_13265.d + -> Broadcast Motion 1:3 (slice1; segments: 1) + Output: t2_13265.ctid, t2_13265.c, t2_13265.d + -> Seq Scan on public.t2_13265 + Output: t2_13265.ctid, t2_13265.c, t2_13265.d + Filter: (t2_13265.a = 2) +(15 rows) + +update t1_13265 set b = 2 where +(c, d) not in (select c, d from t2_13265 where a = 2); +select * from t1_13265; + a | b | c | d +---+---+---+--- + 1 | 2 | 1 | 1 +(1 row) + +-- test for update on partition table +CREATE TABLE into_table ( + a numeric(10,0) NOT NULL, + b numeric(10,0) NOT NULL, + c numeric(10,0) NOT NULL, + d character varying(4), + e character varying(10), + f int +) DISTRIBUTED BY (a, b, c) PARTITION BY RANGE(f) (start (1) end(5) every(1)); +CREATE TABLE from_table ( + a numeric(10,0) NOT NULL, + b numeric(10,0) NOT NULL, + c numeric(10,0) NOT NULL, + d character varying(4), + e character varying(10), + f int +) DISTRIBUTED BY (a); +insert into into_table select i*1.5,i*2,i*3,'dd'||i,'ee'||i, i from generate_series(1,4) i; +insert into from_table select i*1.5,i*2,i*3,'xx'||i,'yy'||i, i+1 from generate_series(1,3) i; +explain (costs off) +update into_table set d=from_table.d, e=from_table.e, f=from_table.f from from_table +where into_table.a=from_table.a and into_table.b=from_table.b and into_table.c=from_table.c; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------ + Update on into_table + Update on into_table_1_prt_1 into_table_1 + Update on into_table_1_prt_2 into_table_2 + Update on into_table_1_prt_3 into_table_3 + Update on into_table_1_prt_4 into_table_4 + -> Hash Join + Hash Cond: ((into_table.a = from_table.a) AND (into_table.b = from_table.b) AND (into_table.c = from_table.c)) + -> Append + -> Seq Scan on into_table_1_prt_1 into_table_1 + -> Seq Scan on into_table_1_prt_2 into_table_2 + -> Seq Scan on into_table_1_prt_3 into_table_3 + -> Seq Scan on into_table_1_prt_4 into_table_4 + -> Hash + -> Redistribute Motion 3:3 (slice1; segments: 3) + Hash Key: from_table.a, from_table.b, from_table.c + -> Seq Scan on from_table +(17 rows) + +update into_table set d=from_table.d, e=from_table.e, f=from_table.f from from_table +where into_table.a=from_table.a and into_table.b=from_table.b and into_table.c=from_table.c; +select * from into_table order by a; + a | b | c | d | e | f +---+---+----+-----+-----+--- + 2 | 2 | 3 | xx1 | yy1 | 2 + 3 | 4 | 6 | xx2 | yy2 | 3 + 5 | 6 | 9 | xx3 | yy3 | 4 + 6 | 8 | 12 | dd4 | ee4 | 4 +(4 rows) + -- start_ignore drop table r; drop table s; diff --git a/contrib/pax_storage/src/test/regress/expected/update_optimizer.out b/contrib/pax_storage/src/test/regress/expected/update_optimizer.out index add985668c2..14b54ef6282 100755 --- a/contrib/pax_storage/src/test/regress/expected/update_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/update_optimizer.out @@ -177,7 +177,7 @@ UPDATE update_test t Update on public.update_test t -> Explicit Redistribute Motion 3:3 (slice1; segments: 3) Output: ($1), ($2), t.c, ((SubPlan 1 (returns $1,$2))), t.ctid, t.gp_segment_id, t.*, (DMLAction) - -> Split + -> Split Update Output: ($1), ($2), t.c, ((SubPlan 1 (returns $1,$2))), t.ctid, t.gp_segment_id, t.*, DMLAction -> Seq Scan on public.update_test t Output: $1, $2, t.c, (SubPlan 1 (returns $1,$2)), t.ctid, t.gp_segment_id, t.* @@ -529,35 +529,71 @@ ERROR: trigger "trans_inserttrig" for table "range_parted" does not exist -- the desired transition tuple format. But conversion happens when there is a -- BR trigger because the trigger can change the inserted row. So install a -- BR triggers on those child partitions where the rows will be moved. --- Pax not support this trigger --- CREATE FUNCTION func_parted_mod_b() RETURNS trigger AS $$ --- BEGIN --- NEW.b = NEW.b + 1; --- return NEW; --- END $$ language plpgsql; --- CREATE TRIGGER trig_c1_100 BEFORE UPDATE OR INSERT ON part_c_1_100 --- FOR EACH ROW EXECUTE PROCEDURE func_parted_mod_b(); --- CREATE TRIGGER trig_d1_15 BEFORE UPDATE OR INSERT ON part_d_1_15 --- FOR EACH ROW EXECUTE PROCEDURE func_parted_mod_b(); --- CREATE TRIGGER trig_d15_20 BEFORE UPDATE OR INSERT ON part_d_15_20 --- FOR EACH ROW EXECUTE PROCEDURE func_parted_mod_b(); --- :init_range_parted; --- UPDATE range_parted set c = (case when c = 96 then 110 else c + 1 end) WHERE a = 'b' and b > 10 and c >= 96; --- :show_data; --- :init_range_parted; --- UPDATE range_parted set c = c + 50 WHERE a = 'b' and b > 10 and c >= 96; --- :show_data; +CREATE FUNCTION func_parted_mod_b() RETURNS trigger AS $$ +BEGIN + NEW.b = NEW.b + 1; + return NEW; +END $$ language plpgsql; +CREATE TRIGGER trig_c1_100 BEFORE UPDATE OR INSERT ON part_c_1_100 + FOR EACH ROW EXECUTE PROCEDURE func_parted_mod_b(); +ERROR: ON UPDATE triggers are not supported on append-only tables +CREATE TRIGGER trig_d1_15 BEFORE UPDATE OR INSERT ON part_d_1_15 + FOR EACH ROW EXECUTE PROCEDURE func_parted_mod_b(); +ERROR: ON UPDATE triggers are not supported on append-only tables +CREATE TRIGGER trig_d15_20 BEFORE UPDATE OR INSERT ON part_d_15_20 + FOR EACH ROW EXECUTE PROCEDURE func_parted_mod_b(); +ERROR: ON UPDATE triggers are not supported on append-only tables +:init_range_parted; +UPDATE range_parted set c = (case when c = 96 then 110 else c + 1 end) WHERE a = 'b' and b > 10 and c >= 96; +:show_data; + partname | a | b | c | d | e +----------------+---+----+-----+----+--- + part_a_10_a_20 | a | 10 | 200 | 1 | + part_a_1_a_10 | a | 1 | 1 | 1 | + part_c_1_100 | b | 13 | 98 | 2 | + part_d_15_20 | b | 15 | 106 | 16 | + part_d_15_20 | b | 17 | 106 | 19 | + part_d_1_15 | b | 12 | 110 | 1 | +(6 rows) + +:init_range_parted; +UPDATE range_parted set c = c + 50 WHERE a = 'b' and b > 10 and c >= 96; +:show_data; + partname | a | b | c | d | e +----------------+---+----+-----+----+--- + part_a_10_a_20 | a | 10 | 200 | 1 | + part_a_1_a_10 | a | 1 | 1 | 1 | + part_d_15_20 | b | 15 | 155 | 16 | + part_d_15_20 | b | 17 | 155 | 19 | + part_d_1_15 | b | 12 | 146 | 1 | + part_d_1_15 | b | 13 | 147 | 2 | +(6 rows) + -- Case where per-partition tuple conversion map array is allocated, but the -- map is not required for the particular tuple that is routed, thanks to -- matching table attributes of the partition and the target table. --- :init_range_parted; --- UPDATE range_parted set b = 15 WHERE b = 1; --- :show_data; --- DROP TRIGGER trans_updatetrig ON range_parted; --- DROP TRIGGER trig_c1_100 ON part_c_1_100; --- DROP TRIGGER trig_d1_15 ON part_d_1_15; --- DROP TRIGGER trig_d15_20 ON part_d_15_20; --- DROP FUNCTION func_parted_mod_b(); +:init_range_parted; +UPDATE range_parted set b = 15 WHERE b = 1; +:show_data; + partname | a | b | c | d | e +----------------+---+----+-----+----+--- + part_a_10_a_20 | a | 10 | 200 | 1 | + part_a_10_a_20 | a | 15 | 1 | 1 | + part_c_1_100 | b | 12 | 96 | 1 | + part_c_1_100 | b | 13 | 97 | 2 | + part_d_15_20 | b | 15 | 105 | 16 | + part_d_15_20 | b | 17 | 105 | 19 | +(6 rows) + +DROP TRIGGER trans_updatetrig ON range_parted; +ERROR: trigger "trans_updatetrig" for table "range_parted" does not exist +DROP TRIGGER trig_c1_100 ON part_c_1_100; +ERROR: trigger "trig_c1_100" for table "part_c_1_100" does not exist +DROP TRIGGER trig_d1_15 ON part_d_1_15; +ERROR: trigger "trig_d1_15" for table "part_d_1_15" does not exist +DROP TRIGGER trig_d15_20 ON part_d_15_20; +ERROR: trigger "trig_d15_20" for table "part_d_15_20" does not exist +DROP FUNCTION func_parted_mod_b(); -- RLS policies with update-row-movement ----------------------------------------- ALTER TABLE range_parted ENABLE ROW LEVEL SECURITY; @@ -810,30 +846,44 @@ UPDATE list_default set a = 'x' WHERE a = 'd'; DROP TABLE list_parted; -- Test retrieval of system columns with non-consistent partition row types. -- This is only partially supported, as seen in the results. --- Pax used virtual tuple, which without xmin/xmax --- create table utrtest (a int, b text) partition by list (a); --- create table utr1 (a int check (a in (1)), q text, b text); --- create table utr2 (a int check (a in (2)), b text); --- alter table utr1 drop column q; --- alter table utrtest attach partition utr1 for values in (1); --- alter table utrtest attach partition utr2 for values in (2); +create table utrtest (a int, b text) partition by list (a); +create table utr1 (a int check (a in (1)), q text, b text); +create table utr2 (a int check (a in (2)), b text); +alter table utr1 drop column q; +alter table utrtest attach partition utr1 for values in (1); +alter table utrtest attach partition utr2 for values in (2); -- xmin_ok is likely false, xmin and pg_current_xact_id() comes from -- data segment and master, respectively. --- insert into utrtest values (1, 'foo') --- returning *, tableoid::regclass, xmin = pg_current_xact_id()::xid as xmin_ok; --- insert into utrtest values (2, 'bar') --- returning *, tableoid::regclass, xmin = pg_current_xact_id()::xid as xmin_ok; -- fails --- insert into utrtest values (2, 'bar') --- returning *, tableoid::regclass; --- update utrtest set b = b || b from (values (1), (2)) s(x) where a = s.x --- returning *, tableoid::regclass, xmin = pg_current_xact_id()::xid as xmin_ok; --- update utrtest set a = 3 - a from (values (1), (2)) s(x) where a = s.x --- returning *, tableoid::regclass, xmin = pg_current_xact_id()::xid as xmin_ok; -- fails --- update utrtest set a = 3 - a from (values (1), (2)) s(x) where a = s.x --- returning *, tableoid::regclass; --- delete from utrtest --- returning *, tableoid::regclass, xmax = pg_current_xact_id()::xid as xmax_ok; --- drop table utrtest; +insert into utrtest values (1, 'foo') + returning *, tableoid::regclass, xmin = pg_current_xact_id()::xid as xmin_ok; +ERROR: virtual tuple table slot does not have system attributes (execTuples.c:163) +insert into utrtest values (2, 'bar') + returning *, tableoid::regclass, xmin = pg_current_xact_id()::xid as xmin_ok; -- fails +ERROR: virtual tuple table slot does not have system attributes (execTuples.c:163) +insert into utrtest values (2, 'bar') + returning *, tableoid::regclass; + a | b | tableoid +---+-----+---------- + 2 | bar | utr2 +(1 row) + +update utrtest set b = b || b from (values (1), (2)) s(x) where a = s.x + returning *, tableoid::regclass, xmin = pg_current_xact_id()::xid as xmin_ok; +ERROR: virtual tuple table slot does not have system attributes (execTuples.c:163) +update utrtest set a = 3 - a from (values (1), (2)) s(x) where a = s.x + returning *, tableoid::regclass, xmin = pg_current_xact_id()::xid as xmin_ok; -- fails +ERROR: virtual tuple table slot does not have system attributes (execTuples.c:163) +update utrtest set a = 3 - a from (values (1), (2)) s(x) where a = s.x + returning *, tableoid::regclass; + a | b | x | tableoid +---+-----+---+---------- + 1 | bar | 2 | utr1 +(1 row) + +delete from utrtest + returning *, tableoid::regclass, xmax = pg_current_xact_id()::xid as xmax_ok; +ERROR: not implemented yet on pax relations: TupleFetchRowVersion +drop table utrtest; -------------- -- Some more update-partition-key test scenarios below. This time use list -- partitions. @@ -879,8 +929,9 @@ BEGIN NEW.b = 2; -- This is changing partition key column. return NEW; END $$ LANGUAGE plpgsql; --- CREATE TRIGGER parted_mod_b before update on sub_part1 --- for each row execute procedure func_parted_mod_b(); +CREATE TRIGGER parted_mod_b before update on sub_part1 + for each row execute procedure func_parted_mod_b(); +ERROR: ON UPDATE triggers are not supported on append-only tables SELECT tableoid::regclass::text, * FROM list_parted ORDER BY 1, 2, 3, 4; tableoid | a | b | c ------------+---+----+---- @@ -902,7 +953,8 @@ SELECT tableoid::regclass::text, * FROM list_parted ORDER BY 1, 2, 3, 4; sub_part2 | 1 | 2 | 10 (4 rows) --- DROP TRIGGER parted_mod_b ON sub_part1; +DROP TRIGGER parted_mod_b ON sub_part1; +ERROR: trigger "parted_mod_b" for table "sub_part1" does not exist -- If BR DELETE trigger prevented DELETE from happening, we should also skip -- the INSERT if that delete is part of UPDATE=>DELETE+INSERT. CREATE OR REPLACE FUNCTION func_parted_mod_b() returns trigger as $$ @@ -910,8 +962,9 @@ BEGIN raise notice 'Trigger: Got OLD row %, but returning NULL', OLD; return NULL; END $$ LANGUAGE plpgsql; --- CREATE TRIGGER trig_skip_delete before delete on sub_part2 --- for each row execute procedure func_parted_mod_b(); +CREATE TRIGGER trig_skip_delete before delete on sub_part2 + for each row execute procedure func_parted_mod_b(); +ERROR: ON DELETE triggers are not supported on append-only tables UPDATE list_parted set b = 1 WHERE c = 70; SELECT tableoid::regclass::text, * FROM list_parted ORDER BY 1, 2, 3, 4; tableoid | a | b | c @@ -923,7 +976,8 @@ SELECT tableoid::regclass::text, * FROM list_parted ORDER BY 1, 2, 3, 4; (4 rows) -- Drop the trigger. Now the row should be moved. --- DROP TRIGGER trig_skip_delete ON sub_part2; +DROP TRIGGER trig_skip_delete ON sub_part2; +ERROR: trigger "trig_skip_delete" for table "sub_part2" does not exist UPDATE list_parted set b = 1 WHERE c = 70; SELECT tableoid::regclass::text, * FROM list_parted ORDER BY 1, 2, 3, 4; tableoid | a | b | c diff --git a/contrib/pax_storage/src/test/regress/expected/with_clause_optimizer.out b/contrib/pax_storage/src/test/regress/expected/with_clause_optimizer.out index 8a017070bce..e74156c5556 100644 --- a/contrib/pax_storage/src/test/regress/expected/with_clause_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/with_clause_optimizer.out @@ -1029,7 +1029,7 @@ SELECT pg_get_viewdef('my_view'::regclass); SELECT sum(with_test1.value) AS sum+ FROM with_test1 + ) + - SELECT my_sum.total + + SELECT total + FROM my_sum; (1 row) @@ -1040,7 +1040,7 @@ SELECT pg_get_viewdef('my_view'::regclass, true); SELECT sum(with_test1.value) AS sum+ FROM with_test1 + ) + - SELECT my_sum.total + + SELECT total + FROM my_sum; (1 row) @@ -1063,7 +1063,7 @@ SELECT pg_get_viewdef('my_view'::regclass); SELECT sum(my_group_sum.total) AS sum + FROM my_group_sum + ) + - SELECT my_sum.total + + SELECT total + FROM my_sum; (1 row) @@ -1079,7 +1079,7 @@ SELECT pg_get_viewdef('my_view'::regclass, true); SELECT sum(my_group_sum.total) AS sum + FROM my_group_sum + ) + - SELECT my_sum.total + + SELECT total + FROM my_sum; (1 row) @@ -2196,6 +2196,40 @@ WITH RECURSIVE r1 AS ( ) SELECT * FROM r1 LIMIT 1; ERROR: joining nested RECURSIVE clauses is not supported +-- GPDB +-- Greenplum does not support window functions in recursive part's target list +-- See issue https://github.com/greenplum-db/gpdb/issues/13299 for details. +-- Previously the following SQL will PANIC or Assert Fail if compiled with assert. +create table t_window_ordered_set_agg_rte(a bigint, b bigint, c bigint); +insert into t_window_ordered_set_agg_rte select i,i,i from generate_series(1, 10)i; +-- should error out during parse-analyze +with recursive rcte(x,y) as +( + select a, b from t_window_ordered_set_agg_rte + union all + select (first_value(c) over (partition by b))::int, a+x + from rcte, + t_window_ordered_set_agg_rte as t + where t.b = x +) +select * from rcte limit 10; +ERROR: window functions in the target list of a recursive query is not supported +LINE 5: select (first_value(c) over (partition by b))::int, a+x + ^ +-- should error out during parse-analyze +with recursive rcte(x,y) as +( + select a, b from t_window_ordered_set_agg_rte + union all + select first_value(c) over (partition by b), a+x + from rcte, + t_window_ordered_set_agg_rte as t + where t.b = x +) +select * from rcte limit 10; +ERROR: window functions in the target list of a recursive query is not supported +LINE 5: select first_value(c) over (partition by b), a+x + ^ -- This used to deadlock, before the IPC between ShareInputScans across -- slices was rewritten. set gp_cte_sharing=on; diff --git a/contrib/pax_storage/src/test/regress/expected/workfile/sort_spill.out b/contrib/pax_storage/src/test/regress/expected/workfile/sort_spill.out index 70c9aa7b031..534cbd0c4f9 100644 --- a/contrib/pax_storage/src/test/regress/expected/workfile/sort_spill.out +++ b/contrib/pax_storage/src/test/regress/expected/workfile/sort_spill.out @@ -33,29 +33,7 @@ insert into testsort select i, i % 1000, i % 100000, i % 75 from (select count(*) as nsegments from gp_segment_configuration where role='p' and content >= 0) foo) bar; set statement_mem="1MB"; set gp_resqueue_print_operator_memory_limits=on; -select avg(i2) from (select i1,i2 from testsort order by i2) foo; - avg ----------------------- - 499.5000000000000000 -(1 row) - -select * from sort_spill.is_workfile_created('explain (analyze, verbose) select i1,i2 from testsort order by i2;'); - is_workfile_created ---------------------- -(0 rows) - -select * from sort_spill.is_workfile_created('explain (analyze, verbose) select i1,i2 from testsort order by i2 limit 50000;'); - is_workfile_created ---------------------- - 1 -(1 row) - -select avg(i2) from (select i1,i2 from testsort order by i2) foo; - avg ----------------------- - 499.5000000000000000 -(1 row) - +analyze testsort; select * from sort_spill.is_workfile_created('explain (analyze, verbose) select i1,i2 from testsort order by i2;'); is_workfile_created --------------------- diff --git a/contrib/pax_storage/src/test/regress/input/uao_dml/uao_dml.source b/contrib/pax_storage/src/test/regress/input/uao_dml/uao_dml.source index adb9f033b70..b6e7c05a8c6 100644 --- a/contrib/pax_storage/src/test/regress/input/uao_dml/uao_dml.source +++ b/contrib/pax_storage/src/test/regress/input/uao_dml/uao_dml.source @@ -70,11 +70,11 @@ select reltuples from pg_class where relname = 'ao2_@amname@'; select count(*) from ao2_@amname@; delete from ao2_@amname@ where i < 27; -- make sure the pgstat msg sends out at least PGSTAT_STAT_INTERVAL msec since last one -select pg_sleep(0.6); +SELECT gp_stat_force_next_flush(); analyze ao2_@amname@; select count(*) from ao2_@amname@; -- make sure the pgstat msg sends out at least PGSTAT_STAT_INTERVAL msec since last one -select pg_sleep(0.6); +SELECT gp_stat_force_next_flush(); select pg_stat_get_live_tuples(c.oid) as livetuples, pg_stat_get_dead_tuples(c.oid) as deadtuples from pg_class c where relname = 'ao2_@amname@'; diff --git a/contrib/pax_storage/src/test/regress/output/uao_dml/uao_dml.source b/contrib/pax_storage/src/test/regress/output/uao_dml/uao_dml.source index a3f3ced21d9..be230b0f640 100644 --- a/contrib/pax_storage/src/test/regress/output/uao_dml/uao_dml.source +++ b/contrib/pax_storage/src/test/regress/output/uao_dml/uao_dml.source @@ -147,9 +147,9 @@ select count(*) from ao2_@amname@; delete from ao2_@amname@ where i < 27; -- make sure the pgstat msg sends out at least PGSTAT_STAT_INTERVAL msec since last one -select pg_sleep(0.6); - pg_sleep ----------- +SELECT gp_stat_force_next_flush(); + gp_stat_force_next_flush +-------------------------- (1 row) @@ -161,9 +161,9 @@ select count(*) from ao2_@amname@; (1 row) -- make sure the pgstat msg sends out at least PGSTAT_STAT_INTERVAL msec since last one -select pg_sleep(0.6); - pg_sleep ----------- +SELECT gp_stat_force_next_flush(); + gp_stat_force_next_flush +-------------------------- (1 row) diff --git a/contrib/pax_storage/src/test/regress/sql/autostats.sql b/contrib/pax_storage/src/test/regress/sql/autostats.sql index b7479179ff9..bd345ebb7c2 100644 --- a/contrib/pax_storage/src/test/regress/sql/autostats.sql +++ b/contrib/pax_storage/src/test/regress/sql/autostats.sql @@ -8,6 +8,7 @@ -- m/^LOG: .*Feature not supported: Queries on master-only tables./ -- m/^LOG: .*Falling back to Postgres-based planner because GPORCA does not support the following feature: Queries on master-only tables./ -- m/^LOG:.*ERROR,"PG exception raised"/ +-- m/^LOG: An exception was encountered during the execution of statement:/ -- end_matchignore set gp_autostats_mode=on_change; set gp_autostats_on_change_threshold=9; diff --git a/contrib/pax_storage/src/test/regress/sql/gp_explain.sql b/contrib/pax_storage/src/test/regress/sql/gp_explain.sql index ca9bbdb7d69..591d8f61540 100644 --- a/contrib/pax_storage/src/test/regress/sql/gp_explain.sql +++ b/contrib/pax_storage/src/test/regress/sql/gp_explain.sql @@ -1,3 +1,7 @@ +-- start_matchsubs +-- m/Seq Scan on recursive_table_ic \(actual rows=\d+ loops=\d+\)/ +-- s/Seq Scan on recursive_table_ic \(actual rows=\d+ loops=\d+\)/Seq Scan on recursive_table_ic (actual rows=XXXX loops=1)/ +-- end_matchsubs create schema gpexplain; set search_path = gpexplain; diff --git a/contrib/pax_storage/src/test/regress/sql/workfile/sort_spill.sql b/contrib/pax_storage/src/test/regress/sql/workfile/sort_spill.sql index a5374b20731..fe49dace78f 100644 --- a/contrib/pax_storage/src/test/regress/sql/workfile/sort_spill.sql +++ b/contrib/pax_storage/src/test/regress/sql/workfile/sort_spill.sql @@ -37,11 +37,7 @@ insert into testsort select i, i % 1000, i % 100000, i % 75 from set statement_mem="1MB"; set gp_resqueue_print_operator_memory_limits=on; -select avg(i2) from (select i1,i2 from testsort order by i2) foo; -select * from sort_spill.is_workfile_created('explain (analyze, verbose) select i1,i2 from testsort order by i2;'); -select * from sort_spill.is_workfile_created('explain (analyze, verbose) select i1,i2 from testsort order by i2 limit 50000;'); - -select avg(i2) from (select i1,i2 from testsort order by i2) foo; +analyze testsort; select * from sort_spill.is_workfile_created('explain (analyze, verbose) select i1,i2 from testsort order by i2;'); select * from sort_spill.is_workfile_created('explain (analyze, verbose) select i1,i2 from testsort order by i2 limit 50000;');