lxmlトランスコード文字化問題

262 ワード

from lxml.html.clean import Cleaner
response =unicode(response.content, "utf-8")

  css  
cleaner = Cleaner(style=True, scripts=True, page_structure=False, safe_attrs_only=False)
response = etree.HTML(cleaner.clean_html(response))