java 批量提取PDF,ofd文件内容到数据库里面

批量 把PDF和ofd内容读取到数据库中

Controller层

/**
     *  @author: sunzhaoqi
     *  @Date: 2022/7/1 15:07
     *  @Description: filePath 文件夹地址
     */
    @GetMapping("/loadPDFTest")
    public void loadPDF(String filePath){
         //需要转到数据库中的pdf文件位置
        filePath = "C:\\Users\\15954\\Desktop\\PDF";

        ossFileService.loadPDF(filePath);
    }

Service层

 void loadPDF(String filePath);

ServiceImpl实现层

/**
     *  @author: sunzhaoqi
     *  @Date: 2022/10/11 15:08
     *  @Description: 把PDF和ofd内容读取到数据库中
     */     
    @Override
    public void loadPDF(String filePath) {
        File file = new File(filePath);
        List<OssFileBak> ossFiles = new ArrayList<>();
        File[] files = file.listFiles();
        PDFTextStripper pdfStripper = null;
        try {
            pdfStripper = new PDFTextStripper();
        } catch (IOException e) {
            e.printStackTrace();
        }

        for (File PDFFile : files) {
            OssFileBak ossFileBak = new OssFileBak();
            ossFileBak.setFileName(PDFFile.getName());
            //兼容ofd文件
            if (PDFFile.getName().indexOf(".pdf") != -1 || PDFFile.getName().indexOf(".PDF") != -1) {

                PDDocument document = null;

                try {
                    document = PDDocument.load(PDFFile);
                    ossFileBak.setFileContent(pdfStripper.getText(document));
                } catch (IOException e) {
                    e.printStackTrace();
                    throw new RuntimeException(PDFFile.getName() + "解析异常!");
                }
                //暂时性随时删除
                ossFileBak.setId(1);
                ossFileBak.setFileStatus(1);
                ossFiles.add(ossFileBak);
            }

            //如果为ofd文件特殊处理
            if (PDFFile.getName().indexOf(".ofd") != -1 || PDFFile.getName().indexOf(".OFD") != -1) {
                OFDReader reader = null;
                try {
                    //读ofd文件
                    reader = new OFDReader(PDFFile.getAbsolutePath());
                } catch (IOException e) {
                    e.printStackTrace();
                }
                //提取文字
                ContentExtractor extractor = new ContentExtractor(reader);

                List<String> pageContent = extractor.extractAll();
                StringBuffer stringBuffer = new StringBuffer();
                //循环提取
                for (String s : pageContent) {
                    stringBuffer.append(s);
                }
                //添加测试随时修改
                ossFileBak.setId(1);
                ossFileBak.setFileStatus(1);
                ossFileBak.setFileContent(stringBuffer.toString());
                ossFiles.add(ossFileBak);
            }
        }
        ossFileBakMapper.insertList(ossFiles);
    }

OssFileBak实体类

package com.shitu.project.domain;

import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;

import javax.persistence.Id;
import javax.persistence.Table;

@Data
@NoArgsConstructor
@AllArgsConstructor
@Table(name = "oss_file_bak")
public class OssFileBak {
    private Integer id;
    @Id
    private Integer fuId;
    private String fileUrl;
    private String fileName;
    private String fileType;
    private Integer fileStatus;
    private String errMsg;
    private String getUrl;
    private String fileContent;
}

xml导包

<!--   ofd转pdf     -->

		<dependency>
			<groupId>org.ofdrw</groupId>
			<artifactId>ofdrw-converter</artifactId>
			<version>1.17.18</version>
		</dependency>


	<!--   ofd转pdf     -->
		<dependency>
			<groupId>org.ofdrw</groupId>
			<artifactId>ofdrw-full</artifactId>
			<version>1.19.0</version>
		</dependency>

       <dependency> 
          <groupId>commons-io</groupId> 
          <artifactId>commons-io</artifactId> 
          <version>2.8.0</version> 
       </dependency>

如有代码疑问请联系博主及时更新完善


版权声明:本文为weixin_44151041原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。