python爬虫類-筆趣閣

2531 ワード

突発的に筆趣閣の小説に登りたいと思って、結局私は古い本虫で、雑談は多く言わないで、コードは提出します.主にrequestsを使用
BeautifulSoup
from urllib.request import quote, unquote
import requests
from bs4 import BeautifulSoup
import sys
import time

content = input('            :')
initial_content = content
keyword = quote(initial_content,encoding='gb2312')
url = 'http://www.biquge.com.tw/modules/article/soshu.php?searchkey='+keyword
print(url)
re = requests.get(url)   #     url
retype=re.apparent_encoding
re.encoding = retype
print(re.status_code)
html = re.text
soup = BeautifulSoup(html, 'html.parser')
fileName = '/Users/john/Desktop/  /'+initial_content+'.txt'
print(fileName)
file = open(fileName, 'a', encoding='utf-8')

chapters = soup.find_all(id='list')
info = soup.find_all(id='info')
for link in info:
    file.write(link.get_text())#      
download_soup = BeautifulSoup(str(chapters), 'html.parser')

arr = []
for child in download_soup.dl.children:    #dl      
    if hasattr(child, 'href') and child.a != None:
        arr.append(child.get_text())
numbel = len(arr)
print(numbel)
index= 1
time1 = time.time ()#      ( )
for child in download_soup.dl.children:     #dl      
    if hasattr(child, 'href') and child.a != None:
        file.write(child.get_text() + '
' + '-----------------------------------------------' + '
') url = 'http://www.biquge.com.tw/' + child.a['href'] # print(url) reponse_dl = requests.get(url) type_dl = reponse_dl.apparent_encoding reponse_dl.encoding = type_dl html_dl = reponse_dl.text soup_dl = BeautifulSoup(html_dl, 'html.parser') contents = soup_dl.find_all(id='content') #
for link in contents: #print(link.get_text()) file.write(link.get_text() + '

') print(" :%.3f%%" % float(index / numbel*100))# index += 1 time2 = time.time() tt = (time2 - time1) print(' :' + str(tt) + ' ') file.close()
     ,                ,   macox  ,window                  。        BeautifulSoup,           ,    。