java连接hdfs（hdfs的API）

一：创建maven项目

导入maven

 <dependencies>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>2.7.6</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.zookeeper/zookeeper -->
        <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-common -->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>2.7.6</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-client -->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>2.7.6</version>
        </dependency>


        <!-- https://mvnrepository.com/artifact/junit/junit -->
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.13.2</version>
        </dependency>
    </dependencies>

junit是测试块的包
其他三个是hdfs连接需要的包

二:相关操作

1：目录操作

相关操作：1：mkdirs 创建目录。2：delete 删除文件或目录。3：listStatus 列出目录的内容。4：getFileStatus 显示文件系统的目录和文件的元数据信息。5：getFileBlockLocations 显示文件存储位置

（1：创建一个目录

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

import org.junit.Before;
import org.junit.Test;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;


public class Test1 {
    FileSystem fs;

    @Before
    public void conn() throws URISyntaxException, IOException {
        //hadoop配置文件，自动获取hadoop的hdfs配置文件
        Configuration conf = new Configuration();
        conf.set("dfs.replication", "1");//设置副本数为一
        //创建url 9000是端口号配置文件中有，master是主机名，如果没有配置映射可以是ip地址
        URI uri = new URI("hdfs://master:9000");
        //等同于客户端
        fs = FileSystem.get(uri, conf);
    }
    @Test
    public  void mkdir() throws IOException {
        //创建一个Path对象传入想要创建hdfs的路径
        Path path = new Path("/data1");
        //判断是否存在要是存在就删除，以免报错
        if(fs.exists(path)){
         fs.delete(path);
        }
        //创建目录
        fs.mkdirs(path);
    }

}

可以通过web界面查看有没有创建成功（master：50070）

（2：获取获取文件列表

 @Test
    public void filestatus() throws IOException {
        //获取根目录下的文件列表
        FileStatus[] fileStatuses = fs.listStatus(new Path("/"));
        //遍历 fileStatuses
        for (FileStatus fileStatus : fileStatuses) {
            System.out.println(fileStatus);
        }

注意：1：fs创建目录时后面还可以添加一个布尔参数为true时表示是递归创建目录

2：利用fs还可以进行目的其他操作就不一一介绍了

2：文件操作

（1：将本地数据(磁盘中）上传到hdfs中

 @Test
    public void put() throws IOException {
        //电脑磁盘中数据路径
        Path path = new Path("d:/students.txt");
        //hdfs中路径
        Path path1 = new Path("/");
        //上传到hdfs中
        fs.copyFromLocalFile(path,path1);

    }

（2：读取hdfs上的数据

 @Test
    public void open() throws IOException {
        //想要查看数据的路径
        Path path = new Path("/students.txt");
        //获得输入字节流
        FSDataInputStream open = fs.open(path);
        //将输入字节流通过转换流转化为字符流并读出
        BufferedReader br = new BufferedReader(new InputStreamReader(open));
        String line=null;
        while((line=br.readLine())!=null){
            System.out.println(line);
        }

（3：向hdfs上指定文件写数据

@Test
    public void write() throws IOException {
       //获得输出流然后将字节输出流通过转换流转换为字符缓冲流
       FSDataOutputStream fsDataOutputStream = fs.create(new Path("/test.txt"));
       BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream));
       bufferedWriter.write("你好");
       bufferedWriter.newLine();
       bufferedWriter.write("世界");
       bufferedWriter.close();
       fsDataOutputStream.close();
   }

（4：将hdfs上的数据下载下来

@Test
public void load()throws Exception{
//指定hdfs上的路径以及磁盘上的路径然后进行下载
    Path path = new Path("/student");
    Path path1 = new Path("D:\\student");
    fs.copyToLocalFile(path, path1);

}

三：关于测试包

我用的junit测试，可以不用main方法需要运行哪个就运行哪个，每个@Test前面都有一个运行的一个@Test对应一个方法（函数），还有@Before，@After等。@Before是运行每个@Test之前都会运行，常用来做连接。@After相反是运行每个@Test之后都会运行常用在关闭连接的方法前面。

原文链接：https://blog.csdn.net/weixin_50691399/article/details/121955946