python公衆番号をダウンロードhtmlに画像を含む

14743 ワード

python公衆番号をダウンロードhtmlに画像を含む
python公衆番号をダウンロードhtmlに画像を含む
pythonを学んで、コードを書くのがとても便利で、機能が強くて、書いたコードはとても簡素で、もとのjavaで書いたコードでpythonコードに修正して1編で完成しますε=(・д・`*)ハァ…
import os
import time
import datetime
from urllib.request import Request, urlopen
from urllib import request
import re
class DownWx():
    dirurl = 'Uploads/'  #    
    def __init__(self):  #     
        super().__init__()
    #     
    def put_file_img(self,dir,image_url):
        #             4    jpeg =png =jpg =gif
        exts = image_url[-4:]
        file_leixing = "." + exts.replace("=","")
        filename = re.sub(u"([^\u4e00-\u9fa5\u0030-\u0039\u0041-\u005a\u0061-\u007a])", "", image_url)#       
        try:
            request.urlretrieve(image_url, dir +"/"+ filename +file_leixing)
        except:
            print(image_url,"    ")
        return "img/" + filename +file_leixing
    #      
    def get_file_article(self,url,qcode=False):
        req = Request(url)
        response = urlopen(req)
        htmls = response.read().decode('utf-8', 'ignore')
        #   
        res = re.findall('[\s\S]*?', htmls)
        title = res[0][7:-8].lstrip()
        print(title)
        #          
        title = re.sub(u"([^\u4e00-\u9fa5\u0030-\u0039\u0041-\u005a\u0061-\u007a])", "", title) #    
        print(title)
        dir = self.dirurl + "("+str(datetime.datetime.now().strftime('%Y%m%d')) +")" + title #      
        isExists = os.path.exists(dir + "/img")
        if not isExists:
            print('     ')
            os.makedirs(dir + "/img")
        else:
            print('    ')
        #    
        res = re.findall('
[\s\S]*?
'
, htmls) content = res[0] # # res = re.findall('',content) # res = set(res) # old = [] new = [] for i in res: # old.append(i) new.append(self.put_file_img(dir + "/img",i)) old.append('data-src') new.append('src') for i in range(len(old)): htmls = htmls.replace(old[i], new[i]) # content = content.replace(old[i], new[i]) # fp = open(dir + "/" + title+".html",'w',encoding='utf-8') # fp.write(htmls) # #fp.write(content) # fp.close() # if __name__ == '__main__': w = DownWx() url = 'https://mp.weixin.qq.com/s/itDfmseKPFRagprtOOwWcA' w.get_file_article(url, True)