[Hadoop](二)hadoop: 安装和配置

1.安装

  1. 解压hadoop-XXX.tar.gz
[hadoop@master packages]$ tar -zxvf hadoop-2.7.7.tar.gz 
  1. 配置hadoop-env.sh
# 配置JAVA_HOME
export JAVA_HOME=/home/hadoop/software/jdk1.8.0_211

# 配置HADOOP_CONF_DIR
export HADOOP_CONF_DIR=/home/hadoop/software/hadoop/etc/hadoop
export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-"/etc/hadoop"}

# 配置HADOOP_HEAPSIZE,限制hadoop进程堆内存大小
export HADOOP_HEAPSIZE=256

# 配置HADOOP_NAMENODE_OPTS,限制namenode进程的堆内存大小
export HADOOP_NAMENODE_OPTS="-Xmx128m -Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_NAMENODE_OPTS"

# 配置HADOOP_DATANODE_OPTS,限制datanode进程的堆内存大小
export HADOOP_DATANODE_OPTS="-Xmx256m -Dhadoop.security.logger=ERROR,RFAS $HADOOP_DATANODE_OPTS"

# 配置HADOOP_SECONDARYNAMENODE_OPTS,限制secondarynamenode进程的堆内存大小
export HADOOP_SECONDARYNAMENODE_OPTS="-Xmx128m -Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_SECONDARYNAMENODE_OPTS"

# 配置HADOOP_CLIENT_OPTS,限制client进程的堆内存大小
export HADOOP_CLIENT_OPTS="-Xmx96m $HADOOP_CLIENT_OPTS"

# 配置HADOOP_LOG_DIR,指定日志存放位置
export HADOOP_LOG_DIR=/home/hadoop/software/hadoop/logs/$USER
  1. 配置yarn-env.sh
# 配置JAVA_HOME
export JAVA_HOME=/home/hadoop/software/jdk1.8.0_211

# 配置JAVA_HEAP_MAX
JAVA_HEAP_MAX=-Xmx256m

# 配置YARN_OPTS
YARN_OPTS="$YARN_OPTS -Xmx128m -Dhadoop.log.dir=$YARN_LOG_DIR"
  1. 配置core-site.xml
<configuration>
	<property>
		<!-- hdfs的RPC地址 -->
		<name>fs.defaultFS</name>
		<value>hdfs://master:9000</value>
	</property>
	<property>
		<!-- hadoop临时文件的地址 -->
		<name>hadoop.tmp.dir</name>
		<value>/tmp/hadoop</value>
	</property>
</configuration>
  1. 配置hdfs-site.xml
<configuration>
	<property>
		<!-- hdfs的复制份数 -->
		<name>dfs.relication</name>
		<value>2</value>
	</property>
	<property>
		<!-- namenode数据文件的存放位置 -->
		<name>dfs.name.dir</name>
		<value>/home/hadoop/software/hadoop/data/dfs_name</value>
	</property>
	<property>
		<!-- datanode数据文件的存放位置 -->
		<name>dfs.data.dir</name>
		<value>/home/hadoop/software/hadoop/data/dfs_data</value>
	</property>
</configuration>
  1. 配置yarn-site.xml
<configuration>
	<!-- Site specific YARN configuration properties -->
	<property>
		<name>yarn.nodemanager.aux-services</name>
		<value>mapreduce_shuffle</value>
	</property>
	<property>
		<name>yarn.resoucemanager.address</name>
		<value>master:18040</value>
	</property>
	<property>
		<name>yarn.resoucemanager.scheduler.address</name>
		<value>master:18030</value>
	</property>
	<property>
		<name>yarn.resoucemanager.resource-tracker.address</name>
		<value>master:18025</value>
	</property>
	<property>
		<name>yarn.resoucemanager.admin.address</name>
		<value>master:18141</value>
	</property>
	<property>
		<name>yarn.resoucemanager.webapp.address</name>
		<value>master:18088</value>
	</property>
</configuration>
  1. 配置mapreduce-site.xml
<configuration>
	<property>
		<name>mapreduce.framework.name</name>
		<value>yarn</value>
	</property>
</configuration>
  1. 配置slaves
slave1
  1. 把包拷贝到slave1上
scp -r hadoop/ slave1:~/software/
  1. 格式化namenode
hdfs namenode -format
  1. 启动hadoop
start-all.sh

2. 配置文件

[hadoop@master hadoop]$ ls
capacity-scheduler.xml      hadoop-policy.xml        kms-log4j.properties        ssl-client.xml.example
configuration.xsl           hdfs-site.xml            kms-site.xml                ssl-server.xml.example
container-executor.cfg      httpfs-env.sh            log4j.properties            yarn-env.cmd
core-site.xml               httpfs-log4j.properties  mapred-env.cmd              yarn-env.sh
hadoop-env.cmd              httpfs-signature.secret  mapred-env.sh               yarn-site.xml
hadoop-env.sh               httpfs-site.xml          mapred-queues.xml.template
hadoop-metrics2.properties  kms-acls.xml             mapred-site.xml
hadoop-metrics.properties   kms-env.sh               slaves

linux相关:
如何产看内存的使用情况?
free -h
实际占用的内存:used – buffer – cache
实际可用的内存:free + buffer + cache


版权声明:本文为u011697278原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。