关于工作中的一次读取lzo_deflate文件
作者:互联网
关于lzo及.lzo_deflate文件读取写入
1.在MR Job中:
如果输出格式是TextOutputFormat,要用LzopCodec,相应的读取这个输出的格式是LzoTextInputFormat。
如果输出格式用SequenceFileOutputFormat,要用LzoCodec,相应的读取这个输出的格式是SequenceFileInputFormat。
//获取文件格式 String inputfileformat = ConfigurationManager.getProperty("inputfileformat").trim(); if(null!=inputfileformat && inputfileformat.equalsIgnoreCase("lzo")){ //lzop(.lzo文件)输入 //job.setInputFormatClass(LzoTextInputFormat.class); //lzo(.lzo_deflate)输入 job.setInputFormatClass(SequenceFileInputFormat.class); } |
String outputfileformat = ConfigurationManager.getProperty("outputfileformat").trim(); if(null!=outputfileformat && outputfileformat.equalsIgnoreCase("lzo")){ FileOutputFormat.setCompressOutput(job, true); //lzop(.lzo文件)输出 FileOutputFormat.setOutputCompressorClass(job, LzopCodec.class); //lzo(.lzo_deflate)输出 FileOutputFormat.setOutputCompressorClass(job, LzoCodec.class); } |
2.java读取.lzo_deflate文件(针对本地)
//进行keberos认证 AuthKrb5.authKrb5(); String inputPath = ConfigurationManager.getProperty("inputpath").trim(); Path seqFile= new Path(inputPath); Configuration conf=new Configuration(); /** hadoop集群任一节点拿配置文件到本地 */ String coreSite = ConfigurationManager.getProperty("coreSite").trim();/// home/fate/FSDataAudit/auth/conf/core-site.xml log.info("coreSite:" + coreSite); /** hadoop集群任一节点拿配置文件到本地 */ String hdfsSite = ConfigurationManager.getProperty("hdfsSite").trim();/// home/fate/FSDataAudit/auth/conf/hdfs-site.xml log.info("hdfsSite:" + hdfsSite); conf.addResource(coreSite); conf.addResource(hdfsSite); /*SequenceFile.Reader reader=null; reader=new SequenceFile.Reader(fs, path, con);*/
SequenceFile.Reader reader = new SequenceFile.Reader(conf,SequenceFile.Reader.file(seqFile));
final LongWritable outputKey = new LongWritable(); final Text outputValue = new Text(); // 将文件读出来,打印 while (reader.next(outputKey, outputValue)) { System.out.println("outputKey: "+outputKey); System.out.println("outputValue:"+outputValue); }
/* while(reader.next(outputKey, outputValue)){ final File file = new File("/usr/"+outputKey.toString()); FileUtils.writeByteArrayToFile(file, outputValue.getBytes()); } */ reader.close(); |
标签:outputKey,conf,deflate,lzo,reader,new,outputValue,读取 来源: https://blog.csdn.net/qq_37359656/article/details/112130299