Ubuntu install Hive
下载 Hive
# 下载并解压 Hive
wget http://mirrors.cnnic.cn/apache/hive/hive-2.0.1/apache-hive-2.0.1-bin.tar.gz
tar xvzf tar xvzf apache-hive-2.0.1-bin.tar.gz
cd apache-hive-2.0.1-bin/
安装Hive
# 新建安装目录,并赋予权限
sudo mkdir /usr/local/hive
sudo chown -R hduser:hadoop /usr/local/hive
# 将安装文件复制到安装目录下
mv * /usr/local/hive
# 将hive添加到路径中
sudo vim ~/.bashrc
# 末尾增加路径
export HIVE_HOME=/usr/local/hive
export PATH=$HIVE_HOME/bin:$PATH
# 时路径生效
source ~/.bashrc
安装Spark
本文假定已经安装Hadoop,因而在安装Spark时选择已经编译好但不带Hadoop的Spark安装包。
下载Spark
wget http://mirrors.cnnic.cn/apache/spark/spark-1.4.1/spark-1.4.1-bin-without-hadoop.tgz
tar xvzf spark-1.4.1-bin-without-hadoop.tgz
cd spark-1.4.1-bin-without-hadoop/
创建Spark安装目录
sudo mkdir /usr/local/spark
sudo chown -R hduser:hadoop /usr/local/spark/
复制文件至安装目录
mv * /usr/local/spark/
添加Spark路径
vim ~/.bashrc
export SPARK_HOME=/usr/local/spark
export PATH=$PATH:$SPARK_HOME/bin
source ~/.bashrc
修改Spark配置文件
cd /usr/local/spark/conf
cp spark-env.sh.template spark-env.sh
vim spark-env.sh
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
export SCALA_HOME=/usr/lib/scala
export SPARK_MASTER_IP=10.58.21.225
export SPARK_WORKER_MEMORY=4g
export HADOOP_CONF_DIR=/usr/local/hadoop/etc/hadoop
export SPARK_DIST_CLASSPATH=$(/usr/local/hadoop/bin/hadoop classpath)
#export HADOOP_CONF_DIR=/usr/local/hadoop/etc/hadoop
#export SPARK_DIST_CLASSPATH=$(hadoop classpath)
配置Spark节点
vim slaves
# 加入Worker节点
Master
Slave1
Slave2
#单机 localhost
启动Spark
# 启动Hadoop
start-all.sh
# 启动Spark
cd /usr/local/spark/sbin/
./start-all.sh
测试Spark
cd /usr/local/spark
./bin/spark-shell
在进入Spark shell后,测试Spark实例
# 上传文件至HDFS
hadoop fs -copyFromLocal README.md /
# 在Spark shell中读取
val file = sc.textFile("hdfs://Master:54310/README.md") #注意单机和集群的地址可能不同
val sparks = file.filter(line => line.contains("Spark"))
sparks.count