@@ -50,12 +50,8 @@ private[spark] class KubernetesClusterSchedulerBackend(
50
50
51
51
private val EXECUTOR_ID_COUNTER = new AtomicLong (0L )
52
52
private val RUNNING_EXECUTOR_PODS_LOCK = new Object
53
- // Indexed by executor IDs
54
53
@ GuardedBy (" RUNNING_EXECUTOR_PODS_LOCK" )
55
54
private val runningExecutorsToPods = new mutable.HashMap [String , Pod ]
56
- // Indexed by executor pod names
57
- @ GuardedBy (" RUNNING_EXECUTOR_PODS_LOCK" )
58
- private val runningPodsToExecutors = new mutable.HashMap [String , String ]
59
55
private val executorPodsByIPs = new ConcurrentHashMap [String , Pod ]()
60
56
private val podsWithKnownExitReasons = new ConcurrentHashMap [String , ExecutorExited ]()
61
57
private val disconnectedPodsByExecutorIdPendingRemoval = new ConcurrentHashMap [String , Pod ]()
@@ -117,7 +113,6 @@ private[spark] class KubernetesClusterSchedulerBackend(
117
113
} else if (currentTotalExpectedExecutors <= runningExecutorsToPods.size) {
118
114
logDebug(" Maximum allowed executor limit reached. Not scaling up further." )
119
115
} else {
120
- val nodeToLocalTaskCount = getNodesWithLocalTaskCounts
121
116
for (i <- 0 until math.min(
122
117
currentTotalExpectedExecutors - runningExecutorsToPods.size, podAllocationSize)) {
123
118
val executorId = EXECUTOR_ID_COUNTER .incrementAndGet().toString
@@ -127,7 +122,16 @@ private[spark] class KubernetesClusterSchedulerBackend(
127
122
driverUrl,
128
123
conf.getExecutorEnv,
129
124
driverPod,
130
- nodeToLocalTaskCount)
125
+ currentNodeToLocalTaskCount)
126
+ require(executorPod.getMetadata.getLabels.containsKey(SPARK_EXECUTOR_ID_LABEL ),
127
+ s " Illegal internal state for pod with name ${executorPod.getMetadata.getName} - all " +
128
+ s " executor pods must contain the label $SPARK_EXECUTOR_ID_LABEL. " )
129
+ val resolvedExecutorIdLabel = executorPod.getMetadata.getLabels.get(
130
+ SPARK_EXECUTOR_ID_LABEL )
131
+ require(resolvedExecutorIdLabel == executorId,
132
+ s " Illegal internal state for pod with name ${executorPod.getMetadata.getName} - all " +
133
+ s " executor pods must map the label with key ${SPARK_EXECUTOR_ID_LABEL } to the " +
134
+ s " executor's ID. This label mapped instead to: $resolvedExecutorIdLabel. " )
131
135
executorsToAllocate(executorId) = executorPod
132
136
logInfo(
133
137
s " Requesting a new executor, total executors is now ${runningExecutorsToPods.size}" )
@@ -143,8 +147,6 @@ private[spark] class KubernetesClusterSchedulerBackend(
143
147
case (executorId, attemptedAllocatedExecutor) =>
144
148
attemptedAllocatedExecutor.map { successfullyAllocatedExecutor =>
145
149
runningExecutorsToPods.put(executorId, successfullyAllocatedExecutor)
146
- runningPodsToExecutors.put(
147
- successfullyAllocatedExecutor.getMetadata.getName, executorId)
148
150
}
149
151
}
150
152
}
@@ -166,11 +168,12 @@ private[spark] class KubernetesClusterSchedulerBackend(
166
168
// We keep around executors that have exit conditions caused by the application. This
167
169
// allows them to be debugged later on. Otherwise, mark them as to be deleted from the
168
170
// the API server.
169
- if (! executorExited.exitCausedByApp) {
171
+ if (executorExited.exitCausedByApp) {
172
+ logInfo(s " Executor $executorId exited because of the application. " )
173
+ deleteExecutorFromDataStructures(executorId)
174
+ } else {
170
175
logInfo(s " Executor $executorId failed because of a framework error. " )
171
176
deleteExecutorFromClusterAndDataStructures(executorId)
172
- } else {
173
- logInfo(s " Executor $executorId exited because of the application. " )
174
177
}
175
178
}
176
179
}
@@ -187,19 +190,20 @@ private[spark] class KubernetesClusterSchedulerBackend(
187
190
}
188
191
189
192
def deleteExecutorFromClusterAndDataStructures (executorId : String ): Unit = {
193
+ deleteExecutorFromDataStructures(executorId)
194
+ .foreach(pod => kubernetesClient.pods().delete(pod))
195
+ }
196
+
197
+ def deleteExecutorFromDataStructures (executorId : String ): Option [Pod ] = {
190
198
disconnectedPodsByExecutorIdPendingRemoval.remove(executorId)
191
199
executorReasonCheckAttemptCounts -= executorId
192
- podsWithKnownExitReasons -= executorId
193
- val maybeExecutorPodToDelete = RUNNING_EXECUTOR_PODS_LOCK .synchronized {
194
- runningExecutorsToPods.remove(executorId).map { pod =>
195
- runningPodsToExecutors.remove(pod.getMetadata.getName)
196
- pod
197
- }.orElse {
200
+ podsWithKnownExitReasons.remove(executorId)
201
+ RUNNING_EXECUTOR_PODS_LOCK .synchronized {
202
+ runningExecutorsToPods.remove(executorId).orElse {
198
203
logWarning(s " Unable to remove pod for unknown executor $executorId" )
199
204
None
200
205
}
201
206
}
202
- maybeExecutorPodToDelete.foreach(pod => kubernetesClient.pods().delete(pod))
203
207
}
204
208
}
205
209
@@ -231,14 +235,10 @@ private[spark] class KubernetesClusterSchedulerBackend(
231
235
super .stop()
232
236
233
237
// then delete the executor pods
234
- // TODO investigate why Utils.tryLogNonFatalError() doesn't work in this context.
235
- // When using Utils.tryLogNonFatalError some of the code fails but without any logs or
236
- // indication as to why.
237
238
Utils .tryLogNonFatalError {
238
239
val executorPodsToDelete = RUNNING_EXECUTOR_PODS_LOCK .synchronized {
239
240
val runningExecutorPodsCopy = Seq (runningExecutorsToPods.values.toSeq: _* )
240
241
runningExecutorsToPods.clear()
241
- runningPodsToExecutors.clear()
242
242
runningExecutorPodsCopy
243
243
}
244
244
kubernetesClient.pods().delete(executorPodsToDelete : _* )
@@ -288,7 +288,6 @@ private[spark] class KubernetesClusterSchedulerBackend(
288
288
val maybeRemovedExecutor = runningExecutorsToPods.remove(executor)
289
289
maybeRemovedExecutor.foreach { executorPod =>
290
290
disconnectedPodsByExecutorIdPendingRemoval.put(executor, executorPod)
291
- runningPodsToExecutors.remove(executorPod.getMetadata.getName)
292
291
podsToDelete += executorPod
293
292
}
294
293
if (maybeRemovedExecutor.isEmpty) {
@@ -300,11 +299,6 @@ private[spark] class KubernetesClusterSchedulerBackend(
300
299
true
301
300
}
302
301
303
- def getExecutorPodByIP (podIP : String ): Option [Pod ] = {
304
- val pod = executorPodsByIPs.get(podIP)
305
- Option (pod)
306
- }
307
-
308
302
private class ExecutorPodsWatcher extends Watcher [Pod ] {
309
303
310
304
private val DEFAULT_CONTAINER_FAILURE_EXIT_STATUS = - 1
@@ -316,21 +310,33 @@ private[spark] class KubernetesClusterSchedulerBackend(
316
310
val clusterNodeName = pod.getSpec.getNodeName
317
311
logInfo(s " Executor pod $pod ready, launched at $clusterNodeName as IP $podIP. " )
318
312
executorPodsByIPs.put(podIP, pod)
319
- } else if ((action == Action .MODIFIED && pod.getMetadata.getDeletionTimestamp != null ) ||
320
- action == Action .DELETED || action == Action .ERROR ) {
313
+ } else if (action == Action .DELETED || action == Action .ERROR ) {
314
+ val executorId = pod.getMetadata.getLabels.get(SPARK_EXECUTOR_ID_LABEL )
315
+ require(executorId != null , " Unexpected pod metadata; expected all executor pods" +
316
+ s " to have label $SPARK_EXECUTOR_ID_LABEL. " )
321
317
val podName = pod.getMetadata.getName
322
318
val podIP = pod.getStatus.getPodIP
323
319
logDebug(s " Executor pod $podName at IP $podIP was at $action. " )
324
320
if (podIP != null ) {
325
321
executorPodsByIPs.remove(podIP)
326
322
}
327
- if (action == Action .ERROR ) {
323
+ val executorExitReason = if (action == Action .ERROR ) {
328
324
logWarning(s " Received pod $podName exited event. Reason: " + pod.getStatus.getReason)
329
- handleErroredPod (pod)
325
+ executorExitReasonOnError (pod)
330
326
} else if (action == Action .DELETED ) {
331
327
logWarning(s " Received delete pod $podName event. Reason: " + pod.getStatus.getReason)
332
- handleDeletedPod(pod)
328
+ executorExitReasonOnDelete(pod)
329
+ } else {
330
+ throw new IllegalStateException (
331
+ s " Unknown action that should only be DELETED or ERROR: $action" )
332
+ }
333
+ podsWithKnownExitReasons.put(pod.getMetadata.getName, executorExitReason)
334
+ if (! disconnectedPodsByExecutorIdPendingRemoval.containsKey(executorId)) {
335
+ log.warn(s " Executor with id $executorId was not marked as disconnected, but the " +
336
+ s " watch received an event of type $action for this executor. The executor may " +
337
+ s " have failed to start in the first place and never registered with the driver. " )
333
338
}
339
+ disconnectedPodsByExecutorIdPendingRemoval.put(executorId, pod)
334
340
}
335
341
}
336
342
@@ -356,15 +362,16 @@ private[spark] class KubernetesClusterSchedulerBackend(
356
362
}
357
363
358
364
def isPodAlreadyReleased (pod : Pod ): Boolean = {
365
+ val executorId = pod.getMetadata.getLabels.get(SPARK_EXECUTOR_ID_LABEL )
359
366
RUNNING_EXECUTOR_PODS_LOCK .synchronized {
360
- ! runningPodsToExecutors .contains(pod.getMetadata.getName )
367
+ ! runningExecutorsToPods .contains(executorId )
361
368
}
362
369
}
363
370
364
- def handleErroredPod (pod : Pod ): Unit = {
371
+ def executorExitReasonOnError (pod : Pod ): ExecutorExited = {
365
372
val containerExitStatus = getExecutorExitStatus(pod)
366
373
// container was probably actively killed by the driver.
367
- val exitReason = if (isPodAlreadyReleased(pod)) {
374
+ if (isPodAlreadyReleased(pod)) {
368
375
ExecutorExited (containerExitStatus, exitCausedByApp = false ,
369
376
s " Container in pod ${pod.getMetadata.getName} exited from explicit termination " +
370
377
" request." )
@@ -373,18 +380,16 @@ private[spark] class KubernetesClusterSchedulerBackend(
373
380
s " exited with exit status code $containerExitStatus. "
374
381
ExecutorExited (containerExitStatus, exitCausedByApp = true , containerExitReason)
375
382
}
376
- podsWithKnownExitReasons.put(pod.getMetadata.getName, exitReason)
377
383
}
378
384
379
- def handleDeletedPod (pod : Pod ): Unit = {
385
+ def executorExitReasonOnDelete (pod : Pod ): ExecutorExited = {
380
386
val exitMessage = if (isPodAlreadyReleased(pod)) {
381
387
s " Container in pod ${pod.getMetadata.getName} exited from explicit termination request. "
382
388
} else {
383
389
s " Pod ${pod.getMetadata.getName} deleted or lost. "
384
390
}
385
- val exitReason = ExecutorExited (
391
+ ExecutorExited (
386
392
getExecutorExitStatus(pod), exitCausedByApp = false , exitMessage)
387
- podsWithKnownExitReasons.put(pod.getMetadata.getName, exitReason)
388
393
}
389
394
}
390
395
0 commit comments