Jsoupによるmeta info scraping sample


MetaInfoScrapeSample.scala
package com.rikima.data.practice

import org.jsoup.Jsoup

/**
  * Created by mrikitoku on 2016/01/01.
  */

case class MetaInfo(url: String, title: String, keywords: String, description: String)

object MetaInfoScrapeSample {

  def main(args:Array[String]): Unit = {
    val urls = Array(
      "http://www.orbis.co.jp/)",
      "http://b.hatena.ne.jp/",
      "http://www.sunmarie.com/"
    )

    urls.foreach {
      case url => {
        val doc = Jsoup.connect(url)
          .ignoreContentType(true)
          .ignoreHttpErrors(true)
          .userAgent("Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:25.0) Gecko/20100101 Firefox/25.0")
          .referrer("http://www.google.com")
          .timeout(12000)
          .followRedirects(true)
          .execute()
          .parse()

        val title = doc.select("title").first().text()
        val keywords = doc.select("meta[name=keywords]").first().attr("content");
        val description = doc.select("meta[name=description]").get(0).attr("content");

        val metaInfo = MetaInfo(url, title, keywords, description)

        println(metaInfo)

      }
    }
  }
}