python黒板授業爬虫類突破-第一関門
3159 ワード
#!/usr/bin/python
# -*- coding:utf-8 -*-
# Author: LiTianle
# Time:2019/9/24 15:36
'''
53639
10963.
'''
import requests,re
def get_num(s):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36',
}
ex = '.* ?(\d+)
'
#
page_text = requests.get(url=s, headers=headers).text
result=re.findall(ex, page_text, re.S)
if result:
#
num = result[0]
# url
URL = 'http://www.heibanke.com/lesson/crawler_ex00/' + num
print(URL)
return get_num(URL)
else:
print(' , :http://www.heibanke.com'+re.findall('',page_text,re.S)[0])
if __name__ == '__main__':
url = 'http://www.heibanke.com/lesson/crawler_ex00/'
get_num(url)