对于某些要求,我想将文本文件(分隔)转换为ORC(优化行列)格式.因为我必须定期运行它,所以我想编写一个java程序来执行此操作.我不想使用Hive临时表解决方法.有人可以帮我做吗?以下是我的尝试
/*ORCMapper.java*/
import java.io.IOException;
import java.util.*;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.hive.ql.io.orc.*;
import org.apache.hadoop.io.*;
public class ORCMapper extends MapReduceBase implements
Mapper<LongWritable, Text, NullWritable, Writable>{
OrcSerde serde;
@Override
public void configure(JobConf job) {
serde = new OrcSerde();
}
@Override
public void map(LongWritable key, Text value,
OutputCollector<NullWritable, Writable> output, Reporter reporter)
throws IOException {
output.collect(NullWritable.get(),serde.serialize(value, null));
}
}
/*ORCReducer.java*/
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
public class ORCReducer …Run Code Online (Sandbox Code Playgroud)