斗図とか表情包ノートを取りに行きます.


斗図とか表情包ノートを取りに行きます. 
 
          1

    :
1、  xpath        get     e.xpath.get( " " )
2、  os.path.splittext(url)[ ]     url    
3、  re.sub()    
4、  request    request.urlretrieve( )          
5、  format      

import requests
from lxml import etree
from urllib import request
import os
import re

headers={
    'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.70 Safari/537.36'
}

def parse_index(url):
    resp=requests.get(url,headers=headers)
    html=resp.text
    e=etree.HTML(html)
    imgs=e.xpath('//div[@class="page-content text-center"]//img')
    for img in imgs:
        #      
        img_url = img.get('data-original')
        #      
        alt=img.get('alt')
        #        
        # alt=re.sub(r'[\??\.。!!]', '', img)
        #       img_url      1
        suffix=os.path.splitext(img_url)[1]
        filename=alt+suffix
        #       
        request.urlretrieve(img_url,'image/'+filename)

def main():
    for i in range(1,100):
        url='https://www.doutula.com/photo/list/?page=%d'%i
        parse_index(url)

if __name__ == '__main__':
    main()