import java.util.List; import java.util.Properties; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.hive.ql.io.orc.OrcSerde; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.Reporter; public class HDFSSample { public static void main(String[] args) throws Exception { orcFileRead();
}
public static void orcFileRead() throws Exception { String path="/ol/ol_zx_empowerment_project/dt=20180508/000000_0"; JobConf conf=new JobConf(); conf.set("fs.default.name","hdfs://master:9000"); OrcSerde serde=new OrcSerde(); Properties p=new Properties(); p.setProperty("columns", "date_id,referrer_type,referrer_shopid,is_test,referrer_id,user_unique,apply_mobile"); p.setProperty("columns.types", "string,string,string,int,string,string,string"); serde.initialize(conf, p); StructObjectInspector inspector = (StructObjectInspector) serde.getObjectInspector(); InputFormat in = new OrcInputFormat(); FileInputFormat.setInputPaths(conf, new Path(path)); InputSplit[] splits = in.getSplits(conf, 1); conf.set("hive.io.file.readcolumn.ids", "1");//hive.io.file.readcolumn.ids的默认值是空,如果没有字段名 ,就会产生空值,在Integer.parseInt(element)就会报错。 RecordReader reader = in.getRecordReader(splits[0], conf, Reporter.NULL); Object key = reader.createKey(); Object value = reader.createValue(); List<? extends StructField> fields = inspector.getAllStructFieldRefs(); long offset = reader.getPos(); while(reader.next(key, value)) { Object date_id = inspector.getStructFieldData(value, fields.get(0)); Object referrer_type = inspector.getStructFieldData(value, fields.get(1)); Object referrer_shopid = inspector.getStructFieldData(value, fields.get(2)); Object is_test = inspector.getStructFieldData(value, fields.get(3)); Object referrer_id = inspector.getStructFieldData(value, fields.get(4)); Object user_unique = inspector.getStructFieldData(value, fields.get(5)); Object apply_mobile = inspector.getStructFieldData(value, fields.get(6)); offset = reader.getPos(); System.out.println(date_id + "|" + referrer_type + "|" + referrer_shopid + "|" + is_test+ "|" + referrer_id+ "|" + user_unique+ "|" + apply_mobile); } } }
版权声明:本文为weixin_42001459原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。