From e83f3ee17fb98d8f3b16e8d509606ec6e56d667a Mon Sep 17 00:00:00 2001 From: Peng Lu Date: Fri, 26 Jun 2026 00:45:26 +0800 Subject: [PATCH] HBASE-30259 Async WAL archiving causes TestMasterRegionWALCleaner flaky --- .../region/TestMasterRegionWALCleaner.java | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/region/TestMasterRegionWALCleaner.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/region/TestMasterRegionWALCleaner.java index 7ff130833ee2..546bcba29934 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/region/TestMasterRegionWALCleaner.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/region/TestMasterRegionWALCleaner.java @@ -17,11 +17,13 @@ */ package org.apache.hadoop.hbase.master.region; +import static org.awaitility.Awaitility.await; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.IOException; +import java.time.Duration; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -82,14 +84,24 @@ public void test() throws IOException, InterruptedException { assertFalse(fs.exists(globalWALArchiveDir)); region.requestRollAll(); region.waitUntilWalRollFinished(); - // should have one + // archiving wal is called in a background thread when rolling WALs, so it is possible that when + // waitUntilWalRollFinished returns, the archived WAL files have not been moved to the global + // archive directory yet, so here we need to wait for the directory to be created and the files + // to be moved. + await().atMost(Duration.ofSeconds(15)).untilAsserted(() -> { + assertTrue(fs.exists(globalWALArchiveDir)); + assertEquals(1, fs.listStatus(globalWALArchiveDir).length); + }); FileStatus[] files = fs.listStatus(globalWALArchiveDir); assertEquals(1, files.length); + // Rebase the WAL mtime so the following timing assertions are not affected by the wait above. + // Cleaner TTL is based on file mtime. + fs.setTimes(files[0].getPath(), System.currentTimeMillis(), -1); Thread.sleep(2000); // should still be there assertTrue(fs.exists(files[0].getPath())); - Thread.sleep(6000); // should have been cleaned - assertEquals(0, fs.listStatus(globalWALArchiveDir).length); + await().atMost(Duration.ofSeconds(15)) + .untilAsserted(() -> assertEquals(0, fs.listStatus(globalWALArchiveDir).length)); } }