Python urllib 2を使用してCSDNブログリストをローカルにダウンロード
806 ワード
# -*- coding: utf-8 -*-
import string, urllib2
# CSDN
def load_csdn(url, page):
name = string.zfill(page, 5) + '.html'
print ' ' + str(page) + ' , ' + name
f = open(name, 'w+')
url = url + '/' + str(page)
print url
#
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.101 Safari/537.36'
}
req = urllib2.Request(url = url,headers = headers)
m = urllib2.urlopen(req).read()
f.write(m)
f.close()
# http://blog.csdn.net/Geek_ymv/article/list/2
url = str(raw_input(u' :'))
page = int(raw_input(u' :'))
#
load_csdn(url, page)