Scala MapReduceの古典的なケースWordCountを実現

8317 ワード

あまり話さないで、直接コード(詳細な手順版と簡略化版を含む):
object WordCount {
    def main(args: Array[String]): Unit = {
        val lines = List("qiusuo zhao hello spark", "zhao spark scala qiusuo zhao hello")
        // 1.      ,    
        val res1: List[String] = lines.flatMap((x: String) => x.split(" "))
        // 2.      word => (word, 1)
        val res2: List[(String, Int)] = res1.map((x: String) => (x, 1))
        // 3.         
        val res3: Map[String, List[(String, Int)]] = res2.groupBy((x: (String, Int)) => x._1)
        // 4.          
        val res4: Map[String, Int] = res3.map((x: (String, List[(String, Int)])) => (x._1, x._2.size))
        // 5.   
        val res5: List[(String, Int)] = res4.toList.sortBy((x: (String, Int)) => x._2)
        // 6.     
        println(res5)

        //    
        val res: List[(String, Int)] = lines.flatMap(_.split(" ")).map((_, 1)).groupBy(_._1)
            .map(x => (x._1, x._2.size)).toList.sortBy(_._2)
        println("   : res=" + res)
    }
}