1.安装
- 解压hadoop-XXX.tar.gz
[hadoop@master packages]$ tar -zxvf hadoop-2.7.7.tar.gz
# 配置JAVA_HOME
export JAVA_HOME=/home/hadoop/software/jdk1.8.0_211
# 配置HADOOP_CONF_DIR
export HADOOP_CONF_DIR=/home/hadoop/software/hadoop/etc/hadoop
export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-"/etc/hadoop"}
# 配置HADOOP_HEAPSIZE,限制hadoop进程堆内存大小
export HADOOP_HEAPSIZE=256
# 配置HADOOP_NAMENODE_OPTS,限制namenode进程的堆内存大小
export HADOOP_NAMENODE_OPTS="-Xmx128m -Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_NAMENODE_OPTS"
# 配置HADOOP_DATANODE_OPTS,限制datanode进程的堆内存大小
export HADOOP_DATANODE_OPTS="-Xmx256m -Dhadoop.security.logger=ERROR,RFAS $HADOOP_DATANODE_OPTS"
# 配置HADOOP_SECONDARYNAMENODE_OPTS,限制secondarynamenode进程的堆内存大小
export HADOOP_SECONDARYNAMENODE_OPTS="-Xmx128m -Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_SECONDARYNAMENODE_OPTS"
# 配置HADOOP_CLIENT_OPTS,限制client进程的堆内存大小
export HADOOP_CLIENT_OPTS="-Xmx96m $HADOOP_CLIENT_OPTS"
# 配置HADOOP_LOG_DIR,指定日志存放位置
export HADOOP_LOG_DIR=/home/hadoop/software/hadoop/logs/$USER
# 配置JAVA_HOME
export JAVA_HOME=/home/hadoop/software/jdk1.8.0_211
# 配置JAVA_HEAP_MAX
JAVA_HEAP_MAX=-Xmx256m
# 配置YARN_OPTS
YARN_OPTS="$YARN_OPTS -Xmx128m -Dhadoop.log.dir=$YARN_LOG_DIR"
- 配置core-site.xml
<configuration>
<property>
<!-- hdfs的RPC地址 -->
<name>fs.defaultFS</name>
<value>hdfs://master:9000</value>
</property>
<property>
<!-- hadoop临时文件的地址 -->
<name>hadoop.tmp.dir</name>
<value>/tmp/hadoop</value>
</property>
</configuration>
- 配置hdfs-site.xml
<configuration>
<property>
<!-- hdfs的复制份数 -->
<name>dfs.relication</name>
<value>2</value>
</property>
<property>
<!-- namenode数据文件的存放位置 -->
<name>dfs.name.dir</name>
<value>/home/hadoop/software/hadoop/data/dfs_name</value>
</property>
<property>
<!-- datanode数据文件的存放位置 -->
<name>dfs.data.dir</name>
<value>/home/hadoop/software/hadoop/data/dfs_data</value>
</property>
</configuration>
- 配置yarn-site.xml
<configuration>
<!-- Site specific YARN configuration properties -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.resoucemanager.address</name>
<value>master:18040</value>
</property>
<property>
<name>yarn.resoucemanager.scheduler.address</name>
<value>master:18030</value>
</property>
<property>
<name>yarn.resoucemanager.resource-tracker.address</name>
<value>master:18025</value>
</property>
<property>
<name>yarn.resoucemanager.admin.address</name>
<value>master:18141</value>
</property>
<property>
<name>yarn.resoucemanager.webapp.address</name>
<value>master:18088</value>
</property>
</configuration>
- 配置mapreduce-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
- 配置slaves
slave1
- 把包拷贝到slave1上
scp -r hadoop/ slave1:~/software/
- 格式化namenode
hdfs namenode -format
- 启动hadoop
start-all.sh
2. 配置文件
[hadoop@master hadoop]$ ls
capacity-scheduler.xml hadoop-policy.xml kms-log4j.properties ssl-client.xml.example
configuration.xsl hdfs-site.xml kms-site.xml ssl-server.xml.example
container-executor.cfg httpfs-env.sh log4j.properties yarn-env.cmd
core-site.xml httpfs-log4j.properties mapred-env.cmd yarn-env.sh
hadoop-env.cmd httpfs-signature.secret mapred-env.sh yarn-site.xml
hadoop-env.sh httpfs-site.xml mapred-queues.xml.template
hadoop-metrics2.properties kms-acls.xml mapred-site.xml
hadoop-metrics.properties kms-env.sh slaves
linux相关:
如何产看内存的使用情况?
free -h
实际占用的内存:used – buffer – cache
实际可用的内存:free + buffer + cache
版权声明:本文为u011697278原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。