Skip to content

Commit e4f530a

Browse files
committed
YARN-8606. Opportunistic scheduling does not work post RM failover. Contributed by Bibin A Chundatt.
(cherry picked from commit a48a0cc)
1 parent 0f66a0d commit e4f530a

File tree

3 files changed

+72
-13
lines changed

3 files changed

+72
-13
lines changed

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/OpportunisticContainerAllocatorAMService.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
package org.apache.hadoop.yarn.server.resourcemanager;
2020

21+
import com.google.common.annotations.VisibleForTesting;
2122
import org.apache.commons.logging.Log;
2223
import org.apache.commons.logging.LogFactory;
2324
import org.apache.hadoop.conf.Configuration;
@@ -406,7 +407,8 @@ public QueueLimitCalculator getNodeManagerQueueLimitCalculator() {
406407
return nodeMonitor.getThresholdCalculator();
407408
}
408409

409-
private synchronized List<RemoteNode> getLeastLoadedNodes() {
410+
@VisibleForTesting
411+
synchronized List<RemoteNode> getLeastLoadedNodes() {
410412
long currTime = System.currentTimeMillis();
411413
if ((currTime - lastCacheUpdateTime > cacheRefreshInterval)
412414
|| (cachedNodes == null)) {

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java

Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -757,9 +757,11 @@ protected void serviceInit(Configuration configuration) throws Exception {
757757
}
758758

759759
masterService = createApplicationMasterService();
760+
createAndRegisterOpportunisticDispatcher(masterService);
760761
addService(masterService) ;
761762
rmContext.setApplicationMasterService(masterService);
762763

764+
763765
applicationACLsManager = new ApplicationACLsManager(conf);
764766

765767
queueACLsManager = createQueueACLsManager(scheduler, conf);
@@ -807,6 +809,23 @@ protected void serviceInit(Configuration configuration) throws Exception {
807809
super.serviceInit(conf);
808810
}
809811

812+
private void createAndRegisterOpportunisticDispatcher(
813+
ApplicationMasterService service) {
814+
if (!isOpportunisticSchedulingEnabled(conf)) {
815+
return;
816+
}
817+
EventDispatcher oppContainerAllocEventDispatcher = new EventDispatcher(
818+
(OpportunisticContainerAllocatorAMService) service,
819+
OpportunisticContainerAllocatorAMService.class.getName());
820+
// Add an event dispatcher for the
821+
// OpportunisticContainerAllocatorAMService to handle node
822+
// additions, updates and removals. Since the SchedulerEvent is currently
823+
// a super set of theses, we register interest for it.
824+
addService(oppContainerAllocEventDispatcher);
825+
rmDispatcher
826+
.register(SchedulerEventType.class, oppContainerAllocEventDispatcher);
827+
}
828+
810829
@Override
811830
protected void serviceStart() throws Exception {
812831
RMStateStore rmStore = rmContext.getStateStore();
@@ -1335,8 +1354,7 @@ protected ClientRMService createClientRMService() {
13351354

13361355
protected ApplicationMasterService createApplicationMasterService() {
13371356
Configuration config = this.rmContext.getYarnConfiguration();
1338-
if (YarnConfiguration.isOpportunisticContainerAllocationEnabled(config)
1339-
|| YarnConfiguration.isDistSchedulingEnabled(config)) {
1357+
if (isOpportunisticSchedulingEnabled(conf)) {
13401358
if (YarnConfiguration.isDistSchedulingEnabled(config) &&
13411359
!YarnConfiguration
13421360
.isOpportunisticContainerAllocationEnabled(config)) {
@@ -1348,16 +1366,6 @@ protected ApplicationMasterService createApplicationMasterService() {
13481366
oppContainerAllocatingAMService =
13491367
new OpportunisticContainerAllocatorAMService(this.rmContext,
13501368
scheduler);
1351-
EventDispatcher oppContainerAllocEventDispatcher =
1352-
new EventDispatcher(oppContainerAllocatingAMService,
1353-
OpportunisticContainerAllocatorAMService.class.getName());
1354-
// Add an event dispatcher for the
1355-
// OpportunisticContainerAllocatorAMService to handle node
1356-
// additions, updates and removals. Since the SchedulerEvent is currently
1357-
// a super set of theses, we register interest for it.
1358-
addService(oppContainerAllocEventDispatcher);
1359-
rmDispatcher.register(SchedulerEventType.class,
1360-
oppContainerAllocEventDispatcher);
13611369
this.rmContext.setContainerQueueLimitCalculator(
13621370
oppContainerAllocatingAMService.getNodeManagerQueueLimitCalculator());
13631371
return oppContainerAllocatingAMService;
@@ -1373,6 +1381,11 @@ protected RMSecretManagerService createRMSecretManagerService() {
13731381
return new RMSecretManagerService(conf, rmContext);
13741382
}
13751383

1384+
private boolean isOpportunisticSchedulingEnabled(Configuration conf) {
1385+
return YarnConfiguration.isOpportunisticContainerAllocationEnabled(conf)
1386+
|| YarnConfiguration.isDistSchedulingEnabled(conf);
1387+
}
1388+
13761389
/**
13771390
* Create RMDelegatedNodeLabelsUpdater based on configuration.
13781391
*/

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMHA.java

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@
1818

1919
package org.apache.hadoop.yarn.server.resourcemanager;
2020

21+
import com.google.common.base.Supplier;
22+
import org.apache.hadoop.test.GenericTestUtils;
23+
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
24+
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent;
2125
import static org.junit.Assert.assertEquals;
2226
import static org.junit.Assert.assertFalse;
2327
import static org.junit.Assert.assertTrue;
@@ -658,6 +662,46 @@ protected Dispatcher createDispatcher() {
658662
assertEquals(HAServiceState.STANDBY, rm.getRMContext().getHAServiceState());
659663
}
660664

665+
@Test
666+
public void testOpportunisticAllocatorAfterFailover() throws Exception {
667+
configuration.setBoolean(YarnConfiguration.AUTO_FAILOVER_ENABLED, false);
668+
configuration.setBoolean(YarnConfiguration.RECOVERY_ENABLED, true);
669+
Configuration conf = new YarnConfiguration(configuration);
670+
conf.set(YarnConfiguration.RM_STORE, MemoryRMStateStore.class.getName());
671+
conf.setBoolean(
672+
YarnConfiguration.OPPORTUNISTIC_CONTAINER_ALLOCATION_ENABLED, true);
673+
// 1. start RM
674+
rm = new MockRM(conf);
675+
rm.init(conf);
676+
rm.start();
677+
678+
StateChangeRequestInfo requestInfo = new StateChangeRequestInfo(
679+
HAServiceProtocol.RequestSource.REQUEST_BY_USER);
680+
// 2. Transition to active
681+
rm.adminService.transitionToActive(requestInfo);
682+
// 3. Transition to standby
683+
rm.adminService.transitionToStandby(requestInfo);
684+
// 4. Transition to active
685+
rm.adminService.transitionToActive(requestInfo);
686+
687+
MockNM nm1 = rm.registerNode("h1:1234", 8 * 1024);
688+
RMNode rmNode1 = rm.getRMContext().getRMNodes().get(nm1.getNodeId());
689+
rmNode1.getRMContext().getDispatcher().getEventHandler()
690+
.handle(new NodeUpdateSchedulerEvent(rmNode1));
691+
OpportunisticContainerAllocatorAMService appMaster =
692+
(OpportunisticContainerAllocatorAMService) rm.getRMContext()
693+
.getApplicationMasterService();
694+
GenericTestUtils.waitFor(new Supplier<Boolean>() {
695+
@Override
696+
public Boolean get() {
697+
return appMaster.getLeastLoadedNodes().size() == 1;
698+
}
699+
}, 100, 3000);
700+
rm.stop();
701+
Assert.assertEquals(1, appMaster.getLeastLoadedNodes().size());
702+
703+
}
704+
661705
@Test
662706
public void testResourceProfilesManagerAfterRMWentStandbyThenBackToActive()
663707
throws Exception {

0 commit comments

Comments
 (0)