python黒板授業爬虫類突破-第一関門

3159 ワード

#!/usr/bin/python
# -*- coding:utf-8 -*-
# Author: LiTianle
# Time:2019/9/24 15:36
'''

53639

10963.

''' import requests,re def get_num(s): headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36', } ex = '

.* ?(\d+)

' # page_text = requests.get(url=s, headers=headers).text result=re.findall(ex, page_text, re.S) if result: # num = result[0] # url URL = 'http://www.heibanke.com/lesson/crawler_ex00/' + num print(URL) return get_num(URL) else: print(' , :http://www.heibanke.com'+re.findall('',page_text,re.S)[0]) if __name__ == '__main__': url = 'http://www.heibanke.com/lesson/crawler_ex00/' get_num(url)