if [ -f "${user_conf_dir}/spark-env.sh" ]; then # Promote all variable declarations to environment (exported) variables set -a . "${user_conf_dir}/spark-env.sh" set +a fi fi
if [[ -d "$ASSEMBLY_DIR2" && -d "$ASSEMBLY_DIR1" ]]; then echo -e "Presence of build for both scala versions(SCALA 2.10 and SCALA 2.11) detected." 1>&2 echo -e 'Either clean one of them or, export SPARK_SCALA_VERSION=2.11 in spark-env.sh.' 1>&2 exit 1 fi
if [ -d "$ASSEMBLY_DIR2" ]; then export SPARK_SCALA_VERSION="2.11" else export SPARK_SCALA_VERSION="2.10" fi fi
# Find the java binary if [ -n "${JAVA_HOME}" ]; then RUNNER="${JAVA_HOME}/bin/java" else if [ `command -v java` ]; then RUNNER="java" else echo"JAVA_HOME is not set" >&2 exit 1 fi fi
# Find assembly jar # 定位spark-assembly-1.5.0-hadoop2.4.0.jar文件(以spark1.5.0为例) #这意味着任务提交时无需将该JAR文件打包 SPARK_ASSEMBLY_JAR= if [ -f "${SPARK_HOME}/RELEASE" ]; then ASSEMBLY_DIR="${SPARK_HOME}/lib" else ASSEMBLY_DIR="${SPARK_HOME}/assembly/target/scala-$SPARK_SCALA_VERSION" fi
GREP_OPTIONS= num_jars="$(ls -1 "$ASSEMBLY_DIR" | grep "^spark-assembly.*hadoop.*\.jar$" | wc -l)" # 判断是 spark 是否已经编译 if [ "$num_jars" -eq "0" -a -z "$SPARK_ASSEMBLY_JAR" -a "$SPARK_PREPEND_CLASSES" != "1" ]; then echo"Failed to find Spark assembly in $ASSEMBLY_DIR." 1>&2 echo"You need to build Spark before running this program." 1>&2 exit 1 fi # Spark assembly jars只能存在一个 if [ -d "$ASSEMBLY_DIR" ]; then ASSEMBLY_JARS="$(ls -1 "$ASSEMBLY_DIR" | grep "^spark-assembly.*hadoop.*\.jar$" || true)" if [ "$num_jars" -gt "1" ]; then echo"Found multiple Spark assembly jars in $ASSEMBLY_DIR:" 1>&2 echo"$ASSEMBLY_JARS" 1>&2 echo"Please remove all but one jar." 1>&2 exit 1 fi fi # 获得ASSEMBLY_JAR路径 SPARK_ASSEMBLY_JAR="${ASSEMBLY_DIR}/${ASSEMBLY_JARS}"
LAUNCH_CLASSPATH="$SPARK_ASSEMBLY_JAR"
# Add the launcher build dir to the classpath if requested. if [ -n "$SPARK_PREPEND_CLASSES" ]; then LAUNCH_CLASSPATH="${SPARK_HOME}/launcher/target/scala-$SPARK_SCALA_VERSION/classes:$LAUNCH_CLASSPATH" fi
export _SPARK_ASSEMBLY="$SPARK_ASSEMBLY_JAR"
# For tests if [[ -n "$SPARK_TESTING" ]]; then unset YARN_CONF_DIR unset HADOOP_CONF_DIR fi
# The launcher library will print arguments separated by a NULL character, to allow arguments with # characters that would be otherwise interpreted by the shell. Read that in a while loop, populating # an array that will be used to exec the final command. # # The exit code of the launcher is appended to the output, so the parent shell removes it from the # command array and checks the value to see if the launcher succeeded. build_command() { "$RUNNER" -Xmx128m -cp "$LAUNCH_CLASSPATH" org.apache.spark.launcher.Main "$@" printf"%d\0" $? }
# Turn off posix mode since it does not allow process substitution set +o posix CMD=() # 创建数组 while IFS= read -d '' -r ARG; do# 把build_commands输出结果,循环加到数组CMD中 CMD+=("$ARG") done < <(build_command "$@")
# Certain JVM failures result in errors being printed to stdout (instead of stderr), which causes # the code that parses the output of the launcher to get confused. In those cases, check if the # exit code is an integer, and if it's not, handle it as a special error case. # 某些JVM失败会导致错误被打印到stdout(而不是stderr),这会导致解析启动程序输出的代码变得混乱。 # 在这些情况下,检查退出代码是否为整数,如果不是,将其作为特殊的错误处理。 if ! [[ $LAUNCHER_EXIT_CODE =~ ^[0-9]+$ ]]; then echo"${CMD[@]}" | head -n-1 1>&2 exit 1 fi
# 如果返回值不为0,退出,返回返回值 if [ $LAUNCHER_EXIT_CODE != 0 ]; then exit$LAUNCHER_EXIT_CODE fi