Scala MapReduceの古典的なケースWordCountを実現
あまり話さないで、直接コード(詳細な手順版と簡略化版を含む):
object WordCount {
def main(args: Array[String]): Unit = {
val lines = List("qiusuo zhao hello spark", "zhao spark scala qiusuo zhao hello")
// 1. ,
val res1: List[String] = lines.flatMap((x: String) => x.split(" "))
// 2. word => (word, 1)
val res2: List[(String, Int)] = res1.map((x: String) => (x, 1))
// 3.
val res3: Map[String, List[(String, Int)]] = res2.groupBy((x: (String, Int)) => x._1)
// 4.
val res4: Map[String, Int] = res3.map((x: (String, List[(String, Int)])) => (x._1, x._2.size))
// 5.
val res5: List[(String, Int)] = res4.toList.sortBy((x: (String, Int)) => x._2)
// 6.
println(res5)
//
val res: List[(String, Int)] = lines.flatMap(_.split(" ")).map((_, 1)).groupBy(_._1)
.map(x => (x._1, x._2.size)).toList.sortBy(_._2)
println(" : res=" + res)
}
}