Skip to content

Commit 15345cb

Browse files
jeremyjliuRobert Kruszewski
authored andcommitted
## Upstream SPARK-XXXXX ticket and PR link (if not applicable, explain) Not filed in upstream, touches code for conda. ## What changes were proposed in this pull request? rLibDir contains a sequence of possible paths for the SparkR package on the executor and is passed on to the R daemon with the SPARKR_RLIBDIR environment variable. This PR filters rLibDir for paths that exist before setting SPARKR_RLIBDIR, retaining existing functionality to preferentially choose a YARN or local SparkR install over conda if both are present. See daemon.R: https://github.com/palantir/spark/blob/master/R/pkg/inst/worker/daemon.R#L23 Fixes apache-spark-on-k8s#456 ## How was this patch tested? Manually testing cherry picked on older version Please review http://spark.apache.org/contributing.html before opening a pull request.
1 parent 0d3f5cb commit 15345cb

File tree

2 files changed

+14
-8
lines changed

2 files changed

+14
-8
lines changed

core/src/main/scala/org/apache/spark/api/r/RRunner.scala

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -360,10 +360,14 @@ private[r] object RRunner {
360360
val rConnectionTimeout = sparkConf.getInt(
361361
"spark.r.backendConnectionTimeout", SparkRDefaults.DEFAULT_CONNECTION_TIMEOUT)
362362
val rOptions = "--vanilla"
363-
val rLibDir = condaEnv.map { conda =>
364-
RUtils.sparkRPackagePath(isDriver = false) :+ (conda.condaEnvDir + "/lib/R/library")
365-
}.getOrElse(RUtils.sparkRPackagePath(isDriver = false))
366-
val rExecScript = RUtils.sparkRInstallLocation(rLibDir, "/SparkR/worker/" + script)
363+
val rLibDir = condaEnv.map(conda =>
364+
RUtils.sparkRPackagePath(isDriver = false) :+ (conda.condaEnvDir + "/lib/R/library"))
365+
.getOrElse(RUtils.sparkRPackagePath(isDriver = false))
366+
.filter(dir => new File(dir).exists)
367+
if (rLibDir.isEmpty) {
368+
throw new SparkException("SparkR package is not installed on executor.")
369+
}
370+
val rExecScript = RUtils.getSparkRScript(rLibDir, "/SparkR/worker/" + script)
367371
val pb = new ProcessBuilder(Arrays.asList(rCommand, rOptions, rExecScript))
368372
// Activate the conda environment by setting the right env variables if applicable.
369373
condaEnv.map(_.activatedEnvironment()).map(_.asJava).foreach(pb.environment().putAll)

core/src/main/scala/org/apache/spark/api/r/RUtils.scala

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -97,10 +97,12 @@ private[spark] object RUtils {
9797
}
9898
}
9999

100-
/** Finds the rLibDir with SparkR installed on it. */
101-
def sparkRInstallLocation(rLibDir: Seq[String], scriptPath: String): String = {
102-
rLibDir.find(dir => new File(dir + scriptPath).exists)
103-
.getOrElse(throw new SparkException("SparkR package not installed on executor.")) + scriptPath
100+
/** Finds a script in a sequence of possible SparkR installation directories. */
101+
def getSparkRScript(rLibDir: Seq[String], scriptPath: String): String = {
102+
rLibDir.find(dir => new File(dir + scriptPath).exists).getOrElse(
103+
throw new SparkException(
104+
s"Script $scriptPath not found in any SparkR installation directory.")
105+
) + scriptPath
104106
}
105107

106108
/** Check if R is installed before running tests that use R commands. */

0 commit comments

Comments
 (0)