目录

Spark on YARN配置笔记

要安装的清单:

  • Scala 2.12.13
  • Spark 3.1.2

1
2
curl -s "https://get.sdkman.io" | bash
source ~/.bashrc

1
2
3
sdk install scala 2.12.13
sudo mkdir /opt/scala
sudo cp -r ~/.sdkman/candidates/scala/2.12.13 /opt/scala/

1
2
3
4
5
6
7
cd $HOME
wget https://mirror.sjtu.edu.cn/apache/spark/spark-3.1.2/spark-3.1.2-bin-hadoop3.2.tgz
tar -xzf ./spark-3.1.2-bin-hadoop3.2.tgz
mkdir /opt/spark
mv ./spark-3.1.2-bin-hadoop3.2 /opt/spark/
sudo chown -R wjadmin:hadoop /opt/spark/
sudo setfacl -R -m g:hadoop:rwx /opt/spark/

1
2
cd /opt/spark/spark-3.1.2-bin-hadoop3.2/conf/
cp spark-env.sh.template spark-env.sh

spark-env.sh

1
2
3
4
5
6
7
export SCALA_HOME="/opt/scala/2.12.13"
export JAVA_HOME="/opt/java/graalvm-ce-java8-20.3.2"
export HADOOP_HOME="/opt/hadoop/hadoop-3.3.1"
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
SPARK_MASTER_HOST=node98
SPARK_LOCAL_DIRS="/opt/spark/localdirs"
SPARK_DRIVER_MEMORY=20G

添加SPARK_HOME

1
export SPARK_HOME="/opt/spark/spark-3.1.2-bin-hadoop3.2"

添加SPARK_HOME

1
export SPARK_HOME="/opt/spark/spark-3.1.2-bin-hadoop3.2"

拷贝文件

1
2
scp -r wjadmin@192.168.131.198:/opt/scala /opt/scala
scp -r wjadmin@192.168.131.198:/opt/spark /opt/spark

设置权限

1
2
sudo chown -R wjadmin:hadoop /opt/spark/
sudo setfacl -R -m g:hadoop:rwx /opt/spark/

1
./sbin/start-all.sh

1
2
3
4
5
6
7
8
./bin/spark-submit --class org.apache.spark.examples.SparkPi \
    --master yarn \
    --deploy-mode cluster \
    --driver-memory 4g \
    --executor-memory 2g \
    --executor-cores 1 \
    examples/jars/spark-examples*.jar \
    10

https://cdn.nlark.com/yuque/0/2021/png/368236/1626687264473-2b1aba03-f093-4598-a31a-6278a4cb4fe4.png#clientId=u38b7730b-91b4-4&from=paste&height=454&id=u4b4978c3&margin=%5Bobject%20Object%5D&name=image.png&originHeight=454&originWidth=1606&originalType=binary&ratio=1&size=59925&status=done&style=none&taskId=u76c02005-e758-404b-b8e2-cbcdfb49e29&width=1606