import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
object TopN {
def main(args: Array[String]): Unit = {
val topN = new SparkConf().setMaster("local[4]").setAppName("TopN")
// SparkContext
val sc: SparkContext = new SparkContext(topN)
//
val rdd1: RDD[String] = sc.textFile("./data/topN")
val rdd2: RDD[(String, Int)] = rdd1.map(item => item.split(" ")(0) -> item.split(" ")(1).toInt)
val rdd3: RDD[(String, List[Int])] = rdd2.groupByKey().map(item => item._1 -> item._2.toList.sortWith(_ > _).take(3))
rdd3.foreach(println)
sc.stop()
}
}