hadoop生态搭建(3节点)-10.spark配置
#?https://www.scala-lang.org/download/2.12.4.html
# ==================================================================安裝 scala
# http://archive.apache.org/dist/spark/spark-2.3.0/
# ==================================================================安裝 spark
tar -zxf ~/spark-2.3.0-bin-hadoop2.7.tgz -C /usr/local mv /usr/local/spark-2.3.0-bin-hadoop2.7 /usr/local/spark-2.3.0 rm –r ~/spark-2.3.0-bin-hadoop2.7.tgz# 環境變量
# ==================================================================node1 node2 node3
# ==================================================================node1
# 使環境變量生效 source /etc/profile# 查看配置結果 echo $SPARK_HOME# ==================================================================node1
cp $SPARK_HOME/conf/docker.properties.template $SPARK_HOME/conf/docker.properties vi $SPARK_HOME/conf/docker.propertiesspark.mesos.executor.home: /usr/local/spark-2.3.0cp $SPARK_HOME/conf/fairscheduler.xml.template $SPARK_HOME/conf/fairscheduler.xml cp $SPARK_HOME/conf/log4j.properties.template $SPARK_HOME/conf/log4j.properties cp $SPARK_HOME/conf/metrics.properties.template $SPARK_HOME/conf/metrics.propertiescp $SPARK_HOME/conf/slaves.template $SPARK_HOME/conf/slaves vi $SPARK_HOME/conf/slavesnode1 node2 node3cp $SPARK_HOME/conf/spark-defaults.conf.template $SPARK_HOME/conf/spark-defaults.conf vi $SPARK_HOME/conf/spark-defaults.confspark.eventLog.enabled true spark.eventLog.dir hdfs://appcluster/spark/eventslog # 監控頁面需要監控的目錄,需要先啟用和指定事件日志目錄,配合上面兩項使用 spark.history.fs.logDirectory hdfs://appcluster/spark spark.eventLog.compress true# 如果想 YARN ResourceManager 訪問 Spark History Server ,則添加一行: # spark.yarn.historyServer.address http://node1:19888cp $SPARK_HOME/conf/spark-env.sh.template $SPARK_HOME/conf/spark-env.sh vi $SPARK_HOME/conf/spark-env.shexport SPARK_MASTER_PORT=7077 #提交任務的端口,默認是7077 export SPARK_MASTER_WEBUI_PORT=8070 #masster節點的webui端口 默認8080改為8070 export SPARK_WORKER_CORES=1 #每個worker從節點能夠支配的core的個數 export SPARK_WORKER_MEMORY=1g #每個worker從節點能夠支配的內存數 export SPARK_WORKER_PORT=7078 #每個worker從節點的端口(可選配置) export SPARK_WORKER_WEBUI_PORT=8071 #每個worker從節點的wwebui端口(可選配置) export SPARK_WORKER_INSTANCES=1 #每個worker從節點的實例(可選配置)export JAVA_HOME=/usr/java/jdk1.8.0_111 export SCALA_HOME=/usr/local/scala-2.12.4 export HADOOP_HOME=/usr/local/hadoop-2.7.6 export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop export YARN_CONF_DIR=$HADOOP_HOME/etc/Hadoop export SPARK_PID_DIR=/usr/local/spark-2.3.0/pids export SPARK_LOCAL_DIR=/usr/local/spark-2.3.0/tmp export LD_LIBRARY_PATH=$HADOOP_HOME/lib/native export SPARK_DAEMON_JAVA_OPTS="-Dspark.deploy.recoveryMode=ZOOKEEPER -Dspark.deploy.zookeeper.url=node1:2181,node2:2181,node3:2181 -Dspark.deploy.zookeeper.dir=/spark"vi $SPARK_HOME/sbin/start-master.shSPARK_MASTER_WEBUI_PORT=8070cp $HADOOP_HOME/etc/hadoop/hdfs-site.xml $SPARK_HOME/conf/vi $HADOOP_HOME/etc/hadoop/log4j.propertieslog4j.logger.org.apache.hadoop.util.NativeCodeLoader=ERRORscp -r $HADOOP_HOME/etc/hadoop/log4j.properties node2:$HADOOP_HOME/etc/hadoop/ scp -r $HADOOP_HOME/etc/hadoop/log4j.properties node3:$HADOOP_HOME/etc/hadoop/# ==================================================================node1
scp -r $SPARK_HOME node2:/usr/local/ scp -r $SPARK_HOME node3:/usr/local/# ==================================================================node2 node3
# 使環境變量生效 source /etc/profile# 查看配置結果 echo $FLUME_HOME?
# 啟動
# ==================================================================node1 node2 node3# 先啟動zookeeper 和 hdfs zkServer.sh start zkServer.sh status# ==================================================================node1 zkCli.sh create /spark ''$HADOOP_HOME/sbin/start-all.sh$HADOOP_HOME/sbin/hadoop-daemon.sh start zkfc# ==================================================================node2 $HADOOP_HOME/sbin/hadoop-daemon.sh start zkfc $HADOOP_HOME/sbin/yarn-daemon.sh start resourcemanager
# 啟動spark
# ==================================================================node1 $SPARK_HOME/sbin/start-master.sh$SPARK_HOME/sbin/start-slaves.sh# ==================================================================node2 $SPARK_HOME/sbin/start-master.sh# ==================================================================node1 # 獲取安全模式的狀態: hdfs dfsadmin -safemode get# 安全模式打開 # hdfs dfsadmin -safemode enter# 安全模式關閉 # hdfs dfsadmin -safemode leavehdfs dfs -mkdir -p /spark/eventslog$SPARK_HOME/bin/spark-shell# http://node1:4040 # http://node1:8070> :quit# test
# 需保證hdfs上該目錄不存在 # hdfs dfs -mkdir -p /spark/output # hdfs dfs -rmr /spark/outputvi ~/sparkdata.txthello man what are you doing now my running hello kevin hi manhdfs dfs -mkdir -p /usr/file/inputhdfs dfs -put ~/sparkdata.txt /usr/file/input hdfs dfs -ls /usr/file/inputval file1 = sc.textFile("file:///root/sparkdata.txt") val count1=file1.flatMap(line => line.split(" ")).map(word => (word,1)).reduceByKey(_+_) count1.saveAsTextFile("hdfs://node1:8020/spark/output1")val file=sc.textFile("hdfs://appcluster/usr/file/input/sparkdata.txt") val count=file.flatMap(line => line.split(" ")).map(word => (word,1)).reduceByKey(_+_) count.saveAsTextFile("hdfs://node1:8020/spark/output")hdfs dfs -ls /spark/outputhdfs dfs -cat /spark/output/part-00000# stop已經啟動的進程
# ==================================================================node1 $SPARK_HOME/sbin/stop-slaves.sh$SPARK_HOME/sbin/stop-master.sh$HADOOP_HOME/sbin/stop-all.sh# ==================================================================node1 node2 node3 # 停止 zookeeper zkServer.sh stop# ==================================================================node2 $HADOOP_HOME/sbin/yarn-daemon.sh stop resourcemanager $HADOOP_HOME/sbin/hadoop-daemon.sh stop zkfc# ==================================================================node1 $HADOOP_HOME/sbin/hadoop-daemon.sh stop zkfcshutdown -h now # 快照 spark?
轉載于:https://www.cnblogs.com/zcf5522/p/9775651.html
總結
以上是生活随笔為你收集整理的hadoop生态搭建(3节点)-10.spark配置的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: pythonencode_python的
- 下一篇: shell脚本安装python_shel