Hadoop統計単語の文字数の例

8260 ワード

hadoop map-reduce

hadoopのコアはMap-Reduceプロセスとhadoop分散ファイルシステムです

ステップ1:Mapプロシージャの定義


  
  
  
  
   
   
   
   /** 
   
   
   
    * 
   
   
   
    * Description: 
   
   
   
    * 
   
   
   
    * @author charles.wang 
   
   
   
    * @created Mar 12, 2012 1:41:57 PM 
   
   
   
    *  
   
   
   
    */ 
   
   
   
   public class MyMap extends Mapper<Object, Text, Text, IntWritable> { 
   
   
   
        
   
   
   
       private static final IntWritable one = new IntWritable(1); 
   
   
   
       private Text word; 
   
   
   
        
   
   
   
        
   
   
   
       public void map(Object key ,Text value,Context context)  
   
   
   
               throws IOException,InterruptedException{ 
   
   
   
            
   
   
   
           String line=value.toString(); 
   
   
   
           StringTokenizer tokenizer = new StringTokenizer(line); 
   
   
   
           while(tokenizer.hasMoreTokens()){ 
   
   
   
               word = new Text(); 
   
   
   
               word.set(tokenizer.nextToken()); 
   
   
   
               context.write(word, one); 
   
   
   
           } 
   
   
   
            
   
   
   
       } 
   
   
   
    
   
   
   
   }

ステップ2:Reduceプロシージャの定義


  
  
  
  
   
   
   
   /** 
   
   
   
    * 
   
   
   
    * Description: 
   
   
   
    * 
   
   
   
    * @author charles.wang 
   
   
   
    * @created Mar 12, 2012 1:48:18 PM 
   
   
   
    *  
   
   
   
    */ 
   
   
   
   public class MyReduce extends Reducer<Text, IntWritable, Text, IntWritable> { 
   
   
   
        
   
   
   
       public void reduce (Text key,Iterable<IntWritable> values,Context context) 
   
   
   
           throws IOException ,InterruptedException{ 
   
   
   
            
   
   
   
           int sum=0; 
   
   
   
           for(IntWritable val: values){ 
   
   
   
               sum+=val.get(); 
   
   
   
           } 
   
   
   
            
   
   
   
           context.write(key, new IntWritable(sum)); 
   
   
   
       } 
   
   
   
    
   
   
   
   }

Driverを作成してMap-Reduceプロセスを実行


  
  
  
  
   
   
   
   public class MyDriver { 
   
   
   
    
   
   
   
       public static void main(String [] args) throws Exception{ 
   
   
   
               
   
   
   
           Configuration conf = new Configuration(); 
   
   
   
           conf.set("hadoop.job.ugi", "root,root123"); 
   
   
   
            
   
   
   
           Job job = new Job(conf,"Hello,hadoop! ^_^"); 
   
   
   
            
   
   
   
           job.setJarByClass(MyDriver.class); 
   
   
   
           job.setMapOutputKeyClass(Text.class); 
   
   
   
           job.setMapOutputValueClass(IntWritable.class); 
   
   
   
           job.setMapperClass(MyMap.class); 
   
   
   
           job.setCombinerClass(MyReduce.class); 
   
   
   
           job.setReducerClass(MyReduce.class); 
   
   
   
           job.setInputFormatClass(TextInputFormat.class); 
   
   
   
           job.setOutputFormatClass(TextOutputFormat.class); 
   
   
   
            
   
   
   
           FileInputFormat.setInputPaths(job, new Path(args[0])); 
   
   
   
           FileOutputFormat.setOutputPath(job,new Path(args[1])); 
   
   
   
            
   
   
   
           job.waitForCompletion(true); 
   
   
   
       } 
   
   
   
   }

本文は“平行線の凝集”のブログから出て、転載して作者と連絡してください!

EasyMock,PowerMockとともにmock静的方法static method

[深潜学習]2週目(2)