python Webページのハイパーリンクを登る


bs 4のBeautifulSoupでページを解析する
from urllib.request import urlopen
from bs4 import BeautifulSoup

html = urlopen('https://blog.csdn.net/zzc15806/') #    
bs = BeautifulSoup(html, 'html.parser') #    
hyperlink = bs.find_all('a')  #       
for h in hyperlink:
    hh = h.get('href')
    print(hh)

結果は次のとおりです.
https://blog.csdn.net/zzc15806
javascript:void(0);
https://blog.csdn.net/zzc15806?orderby=UpdateTime
https://blog.csdn.net/zzc15806?orderby=ViewCount
https://blog.csdn.net/zzc15806/rss/list
https://blog.csdn.net/yoyo_liyy/article/details/82762601
https://blog.csdn.net/yoyo_liyy/article/details/82762601
https://blog.csdn.net/zzc15806/article/details/84996039
https://blog.csdn.net/zzc15806/article/details/84996039
https://blog.csdn.net/zzc15806/article/details/84975709
https://blog.csdn.net/zzc15806/article/details/84975709
https://blog.csdn.net/zzc15806/article/details/84975539
https://blog.csdn.net/zzc15806/article/details/84975539
https://blog.csdn.net/zzc15806/article/details/84975137
https://blog.csdn.net/zzc15806/article/details/84975137
https://blog.csdn.net/zzc15806/article/details/84974458
https://blog.csdn.net/zzc15806/article/details/84974458
https://blog.csdn.net/zzc15806/article/details/84973370
https://blog.csdn.net/zzc15806/article/details/84973370
https://blog.csdn.net/zzc15806/article/details/84972108
https://blog.csdn.net/zzc15806/article/details/84972108
https://blog.csdn.net/zzc15806/article/details/84971215
https://blog.csdn.net/zzc15806/article/details/84971215
https://blog.csdn.net/zzc15806/article/details/84875070
https://blog.csdn.net/zzc15806/article/details/84875070
https://blog.csdn.net/zzc15806/article/details/84779131
https://blog.csdn.net/zzc15806/article/details/84779131
https://blog.csdn.net/zzc15806/article/details/84137013
https://blog.csdn.net/zzc15806/article/details/84137013
https://blog.csdn.net/zzc15806/article/details/84067017
https://blog.csdn.net/zzc15806/article/details/84067017
https://blog.csdn.net/zzc15806/article/details/83999940
https://blog.csdn.net/zzc15806/article/details/83999940
https://blog.csdn.net/zzc15806/article/details/83999668
https://blog.csdn.net/zzc15806/article/details/83999668
https://blog.csdn.net/zzc15806/article/details/83540661
https://blog.csdn.net/zzc15806/article/details/83540661
https://blog.csdn.net/zzc15806/article/details/83504130
https://blog.csdn.net/zzc15806/article/details/83504130
https://blog.csdn.net/zzc15806/article/details/83474661
https://blog.csdn.net/zzc15806/article/details/83474661
https://blog.csdn.net/zzc15806/article/details/83472329
https://blog.csdn.net/zzc15806/article/details/83472329
https://blog.csdn.net/zzc15806/article/details/83448761
https://blog.csdn.net/zzc15806/article/details/83448761
https://blog.csdn.net/zzc15806/article/details/83447006
https://blog.csdn.net/zzc15806/article/details/83447006
https://me.csdn.net/zzc15806
https://me.csdn.net/zzc15806
None
https://blog.csdn.net/zzc15806?t=1
https://blog.csdn.net/zzc15806?t=1
https://blog.csdn.net/home/help.html#level
https://blog.csdn.net/zzc15806/column/info/25194
https://blog.csdn.net/zzc15806/column/info/25194
https://blog.csdn.net/zzc15806/column/info/30921
https://blog.csdn.net/zzc15806/column/info/30921
https://blog.csdn.net/zzc15806/column/info/30926
https://blog.csdn.net/zzc15806/column/info/30926
https://blog.csdn.net/zzc15806/article/category/6989201
https://blog.csdn.net/zzc15806/article/category/7255220
https://blog.csdn.net/zzc15806/article/category/7422481
https://blog.csdn.net/zzc15806/article/category/7515657
https://blog.csdn.net/zzc15806/article/category/7534232
https://blog.csdn.net/zzc15806/article/category/7548654
https://blog.csdn.net/zzc15806/article/category/7549573
https://blog.csdn.net/zzc15806/article/category/7731524
https://blog.csdn.net/zzc15806/article/category/7732152
https://blog.csdn.net/zzc15806/article/category/7740409
https://blog.csdn.net/zzc15806/article/category/7749247
https://blog.csdn.net/zzc15806/article/category/7776199
https://blog.csdn.net/zzc15806/article/category/7830103
https://blog.csdn.net/zzc15806/article/category/7842074
https://blog.csdn.net/zzc15806/article/category/7936547
https://blog.csdn.net/zzc15806/article/category/8489572
None
https://blog.csdn.net/zzc15806/article/month/2018/12
https://blog.csdn.net/zzc15806/article/month/2018/11
https://blog.csdn.net/zzc15806/article/month/2018/10
https://blog.csdn.net/zzc15806/article/month/2018/09
https://blog.csdn.net/zzc15806/article/month/2018/08
https://blog.csdn.net/zzc15806/article/month/2018/07
https://blog.csdn.net/zzc15806/article/month/2018/06
https://blog.csdn.net/zzc15806/article/month/2018/05
https://blog.csdn.net/zzc15806/article/month/2018/04
https://blog.csdn.net/zzc15806/article/month/2018/03
https://blog.csdn.net/zzc15806/article/month/2018/02
https://blog.csdn.net/zzc15806/article/month/2018/01
https://blog.csdn.net/zzc15806/article/month/2017/10
https://blog.csdn.net/zzc15806/article/month/2017/06
None
https://blog.csdn.net/zzc15806/article/details/73662491
https://blog.csdn.net/zzc15806/article/details/79711114
https://blog.csdn.net/zzc15806/article/details/79603994
https://blog.csdn.net/zzc15806/article/details/79246716
https://blog.csdn.net/zzc15806/article/details/79615426
https://blog.csdn.net/zzc15806/article/details/79592577#comments
https://my.csdn.net/qq_35300611
https://blog.csdn.net/zzc15806/article/details/79592577#comments
https://my.csdn.net/zzc15806
https://blog.csdn.net/zzc15806/article/details/79592577#comments
https://my.csdn.net/qq_35300611
https://blog.csdn.net/zzc15806/article/details/79615426#comments
https://my.csdn.net/qq254271304
https://blog.csdn.net/zzc15806/article/details/80712320#comments
https://my.csdn.net/zzc15806
None
None
None
None
None

上から見ると、登ったリンクが雑然としていて、フィルタリングができます.たとえば、すべてのブログのリンクを取得し、「txt」ファイルに保存します.
from urllib.request import urlopen
from bs4 import BeautifulSoup

html = urlopen('https://blog.csdn.net/zzc15806/') #    
bs = BeautifulSoup(html, 'html.parser') #    
hyperlink = bs.find_all('a')  #       

file = open('./blog.txt', 'w')

for h in hyperlink:
    hh = h.get('href')
    if hh and '/article/details/' in hh and '#comments' not in hh:  #      
        print(hh)
        file.write(hh)   #   “blog.txt”   
        file.write('
') file.close()

結果は次のとおりです.
https://blog.csdn.net/yoyo_liyy/article/details/82762601
https://blog.csdn.net/yoyo_liyy/article/details/82762601
https://blog.csdn.net/zzc15806/article/details/84996039
https://blog.csdn.net/zzc15806/article/details/84996039
https://blog.csdn.net/zzc15806/article/details/84975709
https://blog.csdn.net/zzc15806/article/details/84975709
https://blog.csdn.net/zzc15806/article/details/84975539
https://blog.csdn.net/zzc15806/article/details/84975539
https://blog.csdn.net/zzc15806/article/details/84975137
https://blog.csdn.net/zzc15806/article/details/84975137
https://blog.csdn.net/zzc15806/article/details/84974458
https://blog.csdn.net/zzc15806/article/details/84974458
https://blog.csdn.net/zzc15806/article/details/84973370
https://blog.csdn.net/zzc15806/article/details/84973370
https://blog.csdn.net/zzc15806/article/details/84972108
https://blog.csdn.net/zzc15806/article/details/84972108
https://blog.csdn.net/zzc15806/article/details/84971215
https://blog.csdn.net/zzc15806/article/details/84971215
https://blog.csdn.net/zzc15806/article/details/84875070
https://blog.csdn.net/zzc15806/article/details/84875070
https://blog.csdn.net/zzc15806/article/details/84779131
https://blog.csdn.net/zzc15806/article/details/84779131
https://blog.csdn.net/zzc15806/article/details/84137013
https://blog.csdn.net/zzc15806/article/details/84137013
https://blog.csdn.net/zzc15806/article/details/84067017
https://blog.csdn.net/zzc15806/article/details/84067017
https://blog.csdn.net/zzc15806/article/details/83999940
https://blog.csdn.net/zzc15806/article/details/83999940
https://blog.csdn.net/zzc15806/article/details/83999668
https://blog.csdn.net/zzc15806/article/details/83999668
https://blog.csdn.net/zzc15806/article/details/83540661
https://blog.csdn.net/zzc15806/article/details/83540661
https://blog.csdn.net/zzc15806/article/details/83504130
https://blog.csdn.net/zzc15806/article/details/83504130
https://blog.csdn.net/zzc15806/article/details/83474661
https://blog.csdn.net/zzc15806/article/details/83474661
https://blog.csdn.net/zzc15806/article/details/83472329
https://blog.csdn.net/zzc15806/article/details/83472329
https://blog.csdn.net/zzc15806/article/details/83448761
https://blog.csdn.net/zzc15806/article/details/83448761
https://blog.csdn.net/zzc15806/article/details/83447006
https://blog.csdn.net/zzc15806/article/details/83447006
https://blog.csdn.net/zzc15806/article/details/73662491
https://blog.csdn.net/zzc15806/article/details/79711114
https://blog.csdn.net/zzc15806/article/details/79603994
https://blog.csdn.net/zzc15806/article/details/79246716
https://blog.csdn.net/zzc15806/article/details/79615426