Javaを使用してgz圧縮hdfsファイルを読み込む

847 ワード

hdfsファイルの接尾辞タイプに基づいて自動的に認識し、解凍します.
 
		Path hdfsPath = new Path(args[0]);
		Configuration conf = new Configuration();
		FileSystem fs = FileSystem.get(hdfsPath.toUri(),conf); 
		CompressionCodecFactory factory = new CompressionCodecFactory(conf);
		CompressionCodec codec = factory.getCodec(hdfsPath);
	
		FSDataInputStream inputStream = fs.open(hdfsPath);
		BufferedReader reader = null;
		
		try {
			if (codec == null) {
				reader = new BufferedReader(new InputStreamReader(inputStream));
			} else {
				CompressionInputStream comInputStream = codec.createInputStream(inputStream);
				reader = new BufferedReader(new InputStreamReader(comInputStream));
			}
		} catch (Exception e) {
			e.printStackTrace();
		}