Unicode DecodeErr:'gb 2312'codec can't decode byte 0 x 88 in position 164111:illegal multi byte sequ

1785 ワード

pythonを使用してUnicode DecodeErrorに出会う:'gb 2312'codec can't decode byte 0 x 88 in position 164111:illegal multi byte sequence
#     
from urllib import request
import chardet

page1_url = "http://fund.eastmoney.com/fund.html"
def getHtml(pageUrl):
    response = request.urlopen(pageUrl)
    raw_html = response.read()
    getEncoding = chardet.detect(raw_html)['encoding']
    src = raw_html.decode(getEncoding)
    print(src)

getHtml(page1_url)
このようにしますか?ホームページに不正な文字があるという意味ですが、イグノアを追加してください。
#     
from urllib import request
import chardet

page1_url = "http://fund.eastmoney.com/fund.html"
def getHtml(pageUrl):
    response = request.urlopen(pageUrl)
    raw_html = response.read()
    getEncoding = chardet.detect(raw_html)['encoding']
    src = raw_html.decode(getEncoding, 'ignore')
    print(src)

getHtml(page1_url)