Unicode DecodeErr:'gb 2312'codec can't decode byte 0 x 88 in position 164111:illegal multi byte sequ
1785 ワード
pythonを使用してUnicode DecodeErrorに出会う:'gb 2312'codec can't decode byte 0 x 88 in position 164111:illegal multi byte sequence
#
from urllib import request
import chardet
page1_url = "http://fund.eastmoney.com/fund.html"
def getHtml(pageUrl):
response = request.urlopen(pageUrl)
raw_html = response.read()
getEncoding = chardet.detect(raw_html)['encoding']
src = raw_html.decode(getEncoding)
print(src)
getHtml(page1_url)
このようにしますか?ホームページに不正な文字があるという意味ですが、イグノアを追加してください。#
from urllib import request
import chardet
page1_url = "http://fund.eastmoney.com/fund.html"
def getHtml(pageUrl):
response = request.urlopen(pageUrl)
raw_html = response.read()
getEncoding = chardet.detect(raw_html)['encoding']
src = raw_html.decode(getEncoding, 'ignore')
print(src)
getHtml(page1_url)