Java读取ORC文件的内容

import java.util.List;
import java.util.Properties;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat;
import org.apache.hadoop.hive.ql.io.orc.OrcSerde;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;

public class HDFSSample {

    public static void main(String[] args) throws Exception {
        orcFileRead();

}

public static void orcFileRead() throws Exception {
        String path="/ol/ol_zx_empowerment_project/dt=20180508/000000_0";
        JobConf conf=new JobConf();
        conf.set("fs.default.name","hdfs://master:9000");
        OrcSerde serde=new OrcSerde();
        Properties p=new Properties();
        p.setProperty("columns", "date_id,referrer_type,referrer_shopid,is_test,referrer_id,user_unique,apply_mobile");
        p.setProperty("columns.types", "string,string,string,int,string,string,string");
        serde.initialize(conf, p);
        StructObjectInspector inspector = (StructObjectInspector) serde.getObjectInspector();
        InputFormat in = new OrcInputFormat();
        FileInputFormat.setInputPaths(conf, new Path(path));
        InputSplit[] splits = in.getSplits(conf, 1);
        conf.set("hive.io.file.readcolumn.ids", "1");//hive.io.file.readcolumn.ids的默认值是空,如果没有字段名 ,就会产生空值,在Integer.parseInt(element)就会报错。
        RecordReader reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
        Object key = reader.createKey();
        Object value = reader.createValue();
        List<? extends StructField> fields = inspector.getAllStructFieldRefs();
        long offset = reader.getPos();
        while(reader.next(key, value)) {
            Object date_id = inspector.getStructFieldData(value, fields.get(0));
            Object referrer_type = inspector.getStructFieldData(value, fields.get(1));
            Object referrer_shopid = inspector.getStructFieldData(value, fields.get(2));
            Object is_test = inspector.getStructFieldData(value, fields.get(3));
            Object referrer_id = inspector.getStructFieldData(value, fields.get(4));
            Object user_unique = inspector.getStructFieldData(value, fields.get(5));
            Object apply_mobile = inspector.getStructFieldData(value, fields.get(6));
            offset = reader.getPos();
            System.out.println(date_id + "|" + referrer_type + "|" + referrer_shopid + "|" + is_test+ "|" + referrer_id+ "|" + user_unique+ "|" + apply_mobile);
        }
    }
}

版权声明:本文为weixin_42001459原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。