apache · yjhjstz · Apr 17, 2026 · Apr 17, 2026
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
@@ -884,7 +884,7 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
 
 			if (Gp_role == GP_ROLE_DISPATCH && GpPolicyIsPartitioned(onerel->rd_cdbpolicy))
 			{
-				stats->stadistinctbyseg = colNDVBySeg[i];
+				stats->stadistinctbyseg = colNDVBySeg[stats->attr->attnum - 1];
 			}
 
 			stats->tupDesc = onerel->rd_att;
@@ -1008,7 +1008,7 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
 
 			if (Gp_role == GP_ROLE_EXECUTE) {
 				Assert(ctx->stadistincts);
-				ctx->stadistincts[i] = Float8GetDatum(stats->stadistinct);
+				ctx->stadistincts[stats->attr->attnum - 1] = Float8GetDatum(stats->stadistinct);
 			}
 
 			MemoryContextResetAndDeleteChildren(col_context);

diff --git a/src/test/regress/expected/analyze.out b/src/test/regress/expected/analyze.out
@@ -1314,3 +1314,30 @@ select * from pg_stats where tablename like 'part2';
 (1 row)
 
 drop table multipart cascade;
+--
+-- Test column-specific ANALYZE correctly uses attnum-based NDV index (not loop index).
+-- When ANALYZE t(b) is run, the QD loop has i=0 for column b (attnum=2),
+-- so attnum-1=1 != i=0. Without the fix, colNDVBySeg[i=0] reads column a's NDV
+-- instead of column b's NDV.
+--
+CREATE TABLE analyze_col_ndv_drop (a int, b int, c int) DISTRIBUTED BY (a);
+INSERT INTO analyze_col_ndv_drop SELECT i%5, i, i%50 FROM generate_series(1, 100) i;
+-- ANALYZE specific column b: QD loop has i=0, b.attnum=2, so attnum-1=1 != i=0
+ANALYZE analyze_col_ndv_drop (b);
+-- stadistinctbyseg for b should be 100 (all distinct), not ~5 (NDV of column a at index 0)
+SELECT a.attname,
+       CASE WHEN s.stakind1 = 8 THEN array_to_string(s.stavalues1, ',')
+            WHEN s.stakind2 = 8 THEN array_to_string(s.stavalues2, ',')
+            WHEN s.stakind3 = 8 THEN array_to_string(s.stavalues3, ',')
+            WHEN s.stakind4 = 8 THEN array_to_string(s.stavalues4, ',')
+            WHEN s.stakind5 = 8 THEN array_to_string(s.stavalues5, ',')
+       END AS stadistinctbyseg
+FROM pg_statistic s
+JOIN pg_attribute a ON a.attrelid = s.starelid AND a.attnum = s.staattnum
+WHERE s.starelid = 'analyze_col_ndv_drop'::regclass AND a.attname = 'b';
+ attname | stadistinctbyseg 
+---------+------------------
+ b       | 100
+(1 row)
+
+DROP TABLE analyze_col_ndv_drop;
diff --git a/src/test/regress/sql/analyze.sql b/src/test/regress/sql/analyze.sql
@@ -677,3 +677,26 @@ analyze verbose p2;
 select * from pg_stats where tablename like 'part2';
 
 drop table multipart cascade;
+
+--
+-- Test column-specific ANALYZE correctly uses attnum-based NDV index (not loop index).
+-- When ANALYZE t(b) is run, the QD loop has i=0 for column b (attnum=2),
+-- so attnum-1=1 != i=0. Without the fix, colNDVBySeg[i=0] reads column a's NDV
+-- instead of column b's NDV.
+--
+CREATE TABLE analyze_col_ndv_drop (a int, b int, c int) DISTRIBUTED BY (a);
+INSERT INTO analyze_col_ndv_drop SELECT i%5, i, i%50 FROM generate_series(1, 100) i;
+-- ANALYZE specific column b: QD loop has i=0, b.attnum=2, so attnum-1=1 != i=0
+ANALYZE analyze_col_ndv_drop (b);
+-- stadistinctbyseg for b should be 100 (all distinct), not ~5 (NDV of column a at index 0)
+SELECT a.attname,
+       CASE WHEN s.stakind1 = 8 THEN array_to_string(s.stavalues1, ',')
+            WHEN s.stakind2 = 8 THEN array_to_string(s.stavalues2, ',')
+            WHEN s.stakind3 = 8 THEN array_to_string(s.stavalues3, ',')
+            WHEN s.stakind4 = 8 THEN array_to_string(s.stavalues4, ',')
+            WHEN s.stakind5 = 8 THEN array_to_string(s.stavalues5, ',')
+       END AS stadistinctbyseg
+FROM pg_statistic s
+JOIN pg_attribute a ON a.attrelid = s.starelid AND a.attnum = s.staattnum
+WHERE s.starelid = 'analyze_col_ndv_drop'::regclass AND a.attname = 'b';
+DROP TABLE analyze_col_ndv_drop;