Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions hbase-server/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,11 @@
<artifactId>junit-vintage-engine</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.awaitility</groupId>
<artifactId>awaitility</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1799,8 +1799,8 @@ private void setupWALAndReplication() throws IOException {
throw new RegionServerRunningException(
"Region server has already created directory at " + this.serverName.toString());
}
// Always create wal directory as now we need this when master restarts to find out the live
// region servers.
// Create wal directory here and we will never create it again in other places. This is
// important to make sure that our fencing way takes effect. See HBASE-29797 for more details.
if (!this.walFs.mkdirs(logDir)) {
throw new IOException("Can not create wal directory " + logDir);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -536,10 +536,8 @@ protected AbstractFSWAL(final FileSystem fs, final Abortable abortable, final Pa
this.remoteFs = remoteFs;
this.remoteWALDir = remoteWALDir;

if (!fs.exists(walDir) && !fs.mkdirs(walDir)) {
throw new IOException("Unable to mkdir " + walDir);
}

// Here we only crate archive dir, without wal dir. This is to make sure that our fencing way
// takes effect. See HBASE-29797 for more details.
if (!fs.exists(this.walArchiveDir)) {
if (!fs.mkdirs(this.walArchiveDir)) {
throw new IOException("Unable to mkdir " + this.walArchiveDir);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import org.apache.hadoop.hbase.regionserver.wal.ProtobufWALTailingReader;
import org.apache.hadoop.hbase.replication.ReplicationStorageFactory;
import org.apache.hadoop.hbase.util.CancelableProgressable;
import org.apache.hadoop.hbase.util.CommonFSUtils;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.LeaseNotRecoveredException;
import org.apache.hadoop.hbase.wal.WALProvider.Writer;
Expand Down Expand Up @@ -211,7 +212,7 @@ static WALProvider createProvider(Class<? extends WALProvider> clazz) throws IOE
public WALFactory(Configuration conf, String factoryId) throws IOException {
// default enableSyncReplicationWALProvider is true, only disable SyncReplicationWALProvider
// for HMaster or HRegionServer which take system table only. See HBASE-19999
this(conf, factoryId, null);
this(conf, factoryId, null, true);
}

/**
Expand All @@ -228,17 +229,30 @@ public WALFactory(Configuration conf, String factoryId) throws IOException {
*/
public WALFactory(Configuration conf, ServerName serverName, Abortable abortable)
throws IOException {
this(conf, serverName.toString(), abortable);
this(conf, serverName.toString(), abortable, false);
}

private static void createWALDirectory(Configuration conf, String factoryId) throws IOException {
FileSystem walFs = CommonFSUtils.getWALFileSystem(conf);
Path walRootDir = CommonFSUtils.getWALRootDir(conf);
Path walDir = new Path(walRootDir, AbstractFSWALProvider.getWALDirectoryName(factoryId));
if (!walFs.exists(walDir) && !walFs.mkdirs(walDir)) {
throw new IOException("Can not create wal directory " + walDir);
}
}

/**
* @param conf must not be null, will keep a reference to read params in later reader/writer
* instances.
* @param factoryId a unique identifier for this factory. used i.e. by filesystem implementations
* to make a directory
* @param abortable the server associated with this WAL file
* @param conf must not be null, will keep a reference to read params in later
* reader/writer instances.
* @param factoryId a unique identifier for this factory. used i.e. by filesystem
* implementations to make a directory
* @param abortable the server associated with this WAL file
* @param createWalDirectory pass {@code true} for testing purpose, to create the wal directory
* automatically. In normal code path, we should create it in
* HRegionServer setup.
*/
private WALFactory(Configuration conf, String factoryId, Abortable abortable) throws IOException {
private WALFactory(Configuration conf, String factoryId, Abortable abortable,
boolean createWalDirectory) throws IOException {
// until we've moved reader/writer construction down into providers, this initialization must
// happen prior to provider initialization, in case they need to instantiate a reader/writer.
timeoutMillis = conf.getInt("hbase.hlog.open.timeout", 300000);
Expand All @@ -259,6 +273,10 @@ private WALFactory(Configuration conf, String factoryId, Abortable abortable) th
REPLICATION_WAL_PROVIDER, this.abortable);
// end required early initialization
if (conf.getBoolean(WAL_ENABLED, true)) {
if (createWalDirectory) {
// for testing only
createWALDirectory(conf, factoryId);
}
WALProvider provider = createProvider(getProviderClass(WAL_PROVIDER, DEFAULT_WAL_PROVIDER));
provider.init(this, conf, null, this.abortable);
provider.addWALActionsListener(new MetricsWAL());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2194,8 +2194,9 @@ public static WAL createWal(final Configuration conf, final Path rootDir, final
// The WAL subsystem will use the default rootDir rather than the passed in rootDir
// unless I pass along via the conf.
Configuration confForWAL = new Configuration(conf);
confForWAL.set(HConstants.HBASE_DIR, rootDir.toString());
return new WALFactory(confForWAL, "hregion-" + RandomStringUtils.randomNumeric(8)).getWAL(hri);
CommonFSUtils.setRootDir(confForWAL, rootDir);
return new WALFactory(confForWAL, "hregion-" + RandomStringUtils.insecure().nextNumeric(8))
.getWAL(hri);
}

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.master;

import static org.awaitility.Awaitility.await;

import java.io.IOException;
import java.time.Duration;
import java.util.Collections;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseTestingUtil;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.regionserver.HRegionServer;
import org.apache.hadoop.hbase.regionserver.Region;
import org.apache.hadoop.hbase.testclassification.MasterTests;
import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.apache.hadoop.hbase.util.RecoverLeaseFSUtils;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Tag;
import org.junit.jupiter.api.Test;

/**
* Testcase for HBASE-29797, where the lazy initialized WALProvider may recreate the WAL directory
* and cause our fencing way loses efficacy.
*/
@Tag(MasterTests.TAG)
@Tag(MediumTests.TAG)
public class TestWALFencing {

private static final HBaseTestingUtil UTIL = new HBaseTestingUtil();

@BeforeAll
public static void setUp() throws Exception {
UTIL.startMiniCluster(3);
UTIL.getAdmin().balancerSwitch(false, true);
}

@AfterAll
public static void tearDown() throws IOException {
UTIL.shutdownMiniCluster();
}

@Test
public void testMoveMeta() throws Exception {
HRegionServer metaRs = UTIL.getRSForFirstRegionInTable(TableName.META_TABLE_NAME);
HRegionServer otherRs = UTIL.getOtherRegionServer(metaRs);
// do fencing here, i.e, kill otherRs
Path splittingDir = UTIL.getMiniHBaseCluster().getMaster().getMasterWalManager()
.getLogDirs(Collections.singleton(otherRs.getServerName())).get(0);
for (FileStatus walFile : otherRs.getWALFileSystem().listStatus(splittingDir)) {
RecoverLeaseFSUtils.recoverFileLease(otherRs.getWALFileSystem(), walFile.getPath(),
otherRs.getConfiguration());
}
// move meta region to otherRs, which should fail and crash otherRs, and then master will try to
// assign meta region to another rs
RegionInfo metaRegionInfo = metaRs.getRegions().stream().map(Region::getRegionInfo)
.filter(RegionInfo::isMetaRegion).findAny().get();
UTIL.getAdmin().move(metaRegionInfo.getRegionName(), otherRs.getServerName());
// make sure that meta region is not on otherRs
await().during(Duration.ofSeconds(5)).atMost(Duration.ofSeconds(6))
.until(() -> otherRs.getRegions(TableName.META_TABLE_NAME).isEmpty());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ protected void initialize() throws IOException {
.setColumnFamily(familyDescriptor).build();
RegionInfo info = RegionInfoBuilder.newBuilder(tableDescriptor.getTableName()).build();

fs.mkdirs(new Path(basedir, logName));
hlog = new FSHLog(fs, basedir, logName, conf);
hlog.init();
ChunkCreator.initialize(MemStoreLAB.CHUNK_SIZE_DEFAULT, false, 0, 0, 0, null,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ public void testLockupAroundBadAssignSync() throws IOException {
// the test.
FileSystem fs = FileSystem.get(CONF);
Path rootDir = new Path(dir + getName());
fs.mkdirs(new Path(rootDir, getName()));
DodgyFSLog dodgyWAL = new DodgyFSLog(fs, (Server) services, rootDir, getName(), CONF);
dodgyWAL.init();
LogRoller logRoller = new LogRoller(services);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -371,8 +371,9 @@ protected void doSync(long txid, boolean forceSync) throws IOException {
}

FileSystem fs = FileSystem.get(CONF);
Path rootDir = new Path(dir + "testMemstoreSnapshotSize");
MyFaultyFSLog faultyLog = new MyFaultyFSLog(fs, rootDir, "testMemstoreSnapshotSize", CONF);
Path rootDir = new Path(dir + method);
fs.mkdirs(new Path(rootDir, method));
MyFaultyFSLog faultyLog = new MyFaultyFSLog(fs, rootDir, method, CONF);
faultyLog.init();
region = initHRegion(tableName, null, null, CONF, false, Durability.SYNC_WAL, faultyLog,
COLUMN_FAMILY_BYTES);
Expand Down Expand Up @@ -414,10 +415,10 @@ private static WAL createWALCompatibleWithFaultyFileSystem(String callingMethod,

@Test
public void testMemstoreSizeAccountingWithFailedPostBatchMutate() throws IOException {
String testName = "testMemstoreSizeAccountingWithFailedPostBatchMutate";
FileSystem fs = FileSystem.get(CONF);
Path rootDir = new Path(dir + testName);
FSHLog hLog = new FSHLog(fs, rootDir, testName, CONF);
Path rootDir = new Path(dir + method);
fs.mkdirs(new Path(rootDir, method));
FSHLog hLog = new FSHLog(fs, rootDir, method, CONF);
hLog.init();
region = initHRegion(tableName, null, null, CONF, false, Durability.SYNC_WAL, hLog,
COLUMN_FAMILY_BYTES);
Expand Down Expand Up @@ -1253,8 +1254,10 @@ public long getSyncedLength() {
};
}
}
FailAppendFlushMarkerWAL wal = new FailAppendFlushMarkerWAL(FileSystem.get(walConf),
CommonFSUtils.getRootDir(walConf), method, walConf);
FileSystem fs = FileSystem.get(walConf);
Path rootDir = CommonFSUtils.getRootDir(walConf);
fs.mkdirs(new Path(rootDir, method));
FailAppendFlushMarkerWAL wal = new FailAppendFlushMarkerWAL(fs, rootDir, method, walConf);
wal.init();
this.region = initHRegion(tableName, HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW, CONF,
false, Durability.USE_DEFAULT, wal, family);
Expand Down Expand Up @@ -3355,8 +3358,9 @@ public void testScanner_DeleteOneFamilyNotAnother() throws IOException {
@Test
public void testDataInMemoryWithoutWAL() throws IOException {
FileSystem fs = FileSystem.get(CONF);
Path rootDir = new Path(dir + "testDataInMemoryWithoutWAL");
FSHLog hLog = new FSHLog(fs, rootDir, "testDataInMemoryWithoutWAL", CONF);
Path rootDir = new Path(dir + method);
fs.mkdirs(new Path(rootDir, method));
FSHLog hLog = new FSHLog(fs, rootDir, method, CONF);
hLog.init();
// This chunk creation is done throughout the code base. Do we want to move it into core?
// It is missing from this test. W/o it we NPE.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,13 @@ public class TestLogRoller {
private static FileSystem FS;

@Before
public void setup() throws Exception {
public void setUp() throws Exception {
CONF = TEST_UTIL.getConfiguration();
CONF.setInt("hbase.regionserver.logroll.period", LOG_ROLL_PERIOD);
CONF.setInt(HConstants.THREAD_WAKE_FREQUENCY, 300);
ROOT_DIR = TEST_UTIL.getRandomDir();
FS = FileSystem.get(CONF);
FS.mkdirs(new Path(ROOT_DIR, LOG_DIR));
RegionServerServices services = Mockito.mock(RegionServerServices.class);
Mockito.when(services.getConfiguration()).thenReturn(CONF);
ROLLER = new LogRoller(services);
Expand All @@ -77,7 +78,7 @@ public void setup() throws Exception {
public void tearDown() throws Exception {
ROLLER.close();
FS.close();
TEST_UTIL.shutdownMiniCluster();
TEST_UTIL.cleanupTestDir();
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ public void setUp() throws Exception {
final Path hbaseWALDir = TEST_UTIL.createWALRootDir();
DIR = new Path(hbaseWALDir, currentTest.getMethodName());
assertNotEquals(hbaseDir, hbaseWALDir);
FS.mkdirs(DIR);
}

@BeforeClass
Expand Down Expand Up @@ -393,9 +394,8 @@ public void testFindMemStoresEligibleForFlush() throws Exception {
@Test(expected = IOException.class)
public void testFailedToCreateWALIfParentRenamed()
throws IOException, CommonFSUtils.StreamLacksCapabilityException {
final String name = "testFailedToCreateWALIfParentRenamed";
AbstractFSWAL<?> wal = newWAL(FS, CommonFSUtils.getWALRootDir(CONF), name,
HConstants.HREGION_OLDLOGDIR_NAME, CONF, null, true, null, null);
AbstractFSWAL<?> wal = newWAL(FS, CommonFSUtils.getWALRootDir(CONF),
currentTest.getMethodName(), HConstants.HREGION_OLDLOGDIR_NAME, CONF, null, true, null, null);
long filenum = EnvironmentEdgeManager.currentTime();
Path path = wal.computeFilename(filenum);
wal.createWriterInstance(FS, path);
Expand Down Expand Up @@ -544,6 +544,7 @@ public void testRollWriterForClosedWAL() throws IOException {

private AbstractFSWAL<?> createHoldingWAL(String testName, AtomicBoolean startHoldingForAppend,
CountDownLatch holdAppend) throws IOException {
FS.mkdirs(new Path(CommonFSUtils.getRootDir(CONF), testName));
AbstractFSWAL<?> wal = newWAL(FS, CommonFSUtils.getRootDir(CONF), testName,
HConstants.HREGION_OLDLOGDIR_NAME, CONF, null, true, null, null);
// newWAL has already called wal.init()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1047,6 +1047,7 @@ private TableDescriptor createBasic1FamilyHTD(final TableName tableName) {
}

private MockWAL createMockWAL() throws IOException {
fs.mkdirs(new Path(hbaseRootDir, logName));
MockWAL wal = new MockWAL(fs, hbaseRootDir, logName, conf);
wal.init();
// Set down maximum recovery so we dfsclient doesn't linger retrying something
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ public static void tearDownAfterClass() throws Exception {
}

@Override
protected CustomAsyncFSWAL getWAL(FileSystem fs, Path root, String logDir, Configuration conf)
protected CustomAsyncFSWAL getWAL0(FileSystem fs, Path root, String logDir, Configuration conf)
throws IOException {
CustomAsyncFSWAL wal =
new CustomAsyncFSWAL(fs, root, logDir, conf, GROUP, NioSocketChannel.class);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ public void logRollRequested(RollRequestReason reason) {
}

};
UTIL.getTestFileSystem().mkdirs(new Path(rootDir, "log"));
WAL = new AsyncFSWAL(UTIL.getTestFileSystem(), null, rootDir, "log", "oldlog", conf,
Arrays.asList(listener), true, null, null, null, null, EVENT_LOOP_GROUP, CHANNEL_CLASS,
StreamSlowMonitor.create(conf, "monitor"));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,11 @@ public static void tearDownAfterClass() throws Exception {

@Override
protected WAL createWAL(Configuration c, Path hbaseRootDir, String logName) throws IOException {
AsyncFSWAL wal = new AsyncFSWAL(FileSystem.get(c), null, hbaseRootDir, logName,
HConstants.HREGION_OLDLOGDIR_NAME, c, null, true, null, null, null, null, GROUP,
CHANNEL_CLASS, StreamSlowMonitor.create(c, "monitor"));
FileSystem fs = hbaseRootDir.getFileSystem(c);
fs.mkdirs(new Path(hbaseRootDir, logName));
AsyncFSWAL wal =
new AsyncFSWAL(fs, null, hbaseRootDir, logName, HConstants.HREGION_OLDLOGDIR_NAME, c, null,
true, null, null, null, null, GROUP, CHANNEL_CLASS, StreamSlowMonitor.create(c, "monitor"));
wal.init();
return wal;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ protected void atHeadOfRingBufferEventHandlerAppend() {
public void testSyncRunnerIndexOverflow() throws IOException, NoSuchFieldException,
SecurityException, IllegalArgumentException, IllegalAccessException {
final String name = this.name.getMethodName();
FS.mkdirs(new Path(CommonFSUtils.getRootDir(CONF), name));
FSHLog log = new FSHLog(FS, CommonFSUtils.getRootDir(CONF), name,
HConstants.HREGION_OLDLOGDIR_NAME, CONF, null, true, null, null);
log.init();
Expand Down Expand Up @@ -140,6 +141,7 @@ public void testUnflushedSeqIdTracking() throws IOException, InterruptedExceptio
final CountDownLatch flushFinished = new CountDownLatch(1);
final CountDownLatch putFinished = new CountDownLatch(1);

FS.mkdirs(new Path(CommonFSUtils.getRootDir(CONF), name));
try (FSHLog log = new FSHLog(FS, CommonFSUtils.getRootDir(CONF), name,
HConstants.HREGION_OLDLOGDIR_NAME, CONF, null, true, null, null)) {
log.init();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ public class TestFSHLogDurability extends WALDurabilityTestBase<CustomFSHLog> {
HBaseClassTestRule.forClass(TestFSHLogDurability.class);

@Override
protected CustomFSHLog getWAL(FileSystem fs, Path root, String logDir, Configuration conf)
protected CustomFSHLog getWAL0(FileSystem fs, Path root, String logDir, Configuration conf)
throws IOException {
CustomFSHLog wal = new CustomFSHLog(fs, root, logDir, conf);
wal.init();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,9 @@ public static void setUpBeforeClass() throws Exception {

@Override
protected WAL createWAL(Configuration c, Path hbaseRootDir, String logName) throws IOException {
FSHLog wal = new FSHLog(FileSystem.get(c), hbaseRootDir, logName, c);
FileSystem fs = hbaseRootDir.getFileSystem(c);
fs.mkdirs(new Path(hbaseRootDir, logName));
FSHLog wal = new FSHLog(fs, hbaseRootDir, logName, c);
wal.init();
// Set down maximum recovery so we dfsclient doesn't linger retrying something
// long gone.
Expand Down
Loading