大丈夫です.ニュースを見てください.
# encoding: utf-8
require 'rubygems'
require 'mechanize'
class FetchNews
def self.get_page(keyword)
a = Mechanize.new { |agent| agent.user_agent_alias = 'Mac Safari' }
base_url = "http://new.baidu.com/"
kw = URI.encode(keyword)
url = "http://news.baidu.com/ns?word=#{kw}&tn=news&from=news&cl=2&rn=20&ct=1"
page = a.get(url)
end
def self.parse_html(page)
result = []
page.body.to_s.force_encoding("UTF-8").split("result").each do |line|
next unless line =~ /class=\"c-title\"/
str = line.to_s.strip
str =~ /(><a href=\")(.+)(\")/
url = $2
str =~ /(>)(.+)(<\/a><\/h3>)(.+)(<p class="c-author">)(.+)( )(.+)(\d{4}-\d{2}-\d{2})/
title, source, date = $2, $6, $9
title = CGI.unescapeHTML(title.to_s.gsub(/[em,<,>,\/]/, ""))
url = url.to_s.strip.gsub(/["]/, "")
source = source.to_s.strip.gsub(" ","")
next if (title && url && date).nil?
result << {:title => title.to_s, :url => url, :date => date, :source => source}
end
result
end
def self.get_touch_news(keyword)
page = get_page(keyword)
parse_html(page)
end
end
# start = Time.now
# news = FetchNews.get_touch_news("rails")
# puts news
# puts "cost #{Time.now - start}"
WeChat購読号に注目してください.もっと多いルビー&LIls関連技術をご覧ください.