Python爬虫類大学ランキング
2591 ワード
#!/usr/bin/env python
# -*- coding: utf_8 -*-
import bs4
import requests
from bs4 import BeautifulSoup
'''
Python? 【 】 Python !
'''
# 1.
def getHTMLText(url):
#
try:
# , 30s
r = requests.get(url, timeout=30)
#
r.raise_for_status()
#
r.encoding = r.apparent_encoding
#
return r.text
except:
#
return ""
# 2.
def fillUnivList(ulist, html):
# BeautifulSoup ’html.parser‘
soup = BeautifulSoup(html, "html.parser")
# tbody , ’tbody‘
for tr in soup.find('tbody').children:
# bs4.element.Tag tr
if isinstance(tr, bs4.element.Tag):
# tr tr td
tds = tr('td')
# [1, , , 95.3...
# td ulist
ulist.append([tds[0].string, tds[1].string,
tds[2].string, tds[3].string])
# 3.
def printUnivList(ulist, province):
#
print(" 2019({} )".center(45, '-').format(province))
# format
# : {4} utf8 ,python
tplt = "{0:^10}\t{1:{4}^10}\t{2:^10}\t{3:^10}"
# , chr(12288)
#
print(tplt.format(" ", " ", " ", " ", chr(12288)))
if province == ' ':
print(tplt.format(1, ' ', ' ', 99.9, chr(12288)))
# , ( range(len(ulist)) )
for i in range(len(ulist)):
# u
u = ulist[i]
# u[2] , ( , , , )
if u[2] == province:
# ,
print(tplt.format(u[0], u[1], u[2], u[3], chr(12288)))
#
def main(province=' '):
# ,
uinfo = []
#
url = 'http://www.zuihaodaxue.cn/zuihaodaxuepaiming2019.html'
#
html = getHTMLText(url)
#
fillUnivList(uinfo, html)
#
printUnivList(uinfo, province=province)
main(province=' ')
知識点をまとめる: