漫画の家

7611 ワード

[注意]:一例として議論するだけで、サーバリソースを悪用しないでください.漫画をオンラインで読む体験はかえっていいです.
[分析]:アニメの家の反爬虫類は主にUser-Agentに頼って、headersを1つ加えればいい.ここでは主にimg_を取得する方法について議論します.urls.
漫画の第1章の第1ページを分析して発見することができて、ラベルの中の第1つのscriptのスクリプトの中ですでにこの章のすべてのピクチャーのリンクを生成しました(中間のあの暗号化の関数は分析して、たぶん置き換えて、後で興味があって更に研究します)、consoleの中でarr_を入力しますpagesは、やはりすべての画像のurlを取得して、図庫のリンクの頭を補充しますhttps://images.dmzj.com/すべての画像リンクが得られました.具体的にはBeautifulSoupまたはlxmlでscriptを入手しexecjsでスクリプトを実行しarr_を取り出すpagesの値で登ることができます(Referを付けてください).
 

        var arr_img = new Array();
        var page = '';
        eval(function(p,a,c,k,e,d){e=function(c){return(c<a?'':e(parseInt(c/a)))+((c=c%a)>35?String.fromCharCode(c+29):c.toString(36))};if(!''.replace(/^/,String)){while(c--){d[e(c)]=k[c]||e(c)}k=[function(e){return d[e]}];e=function(){return'\\w+'};c=1};while(c--){if(k[c]){p=p.replace(new RegExp('\\b'+e(c)+'\\b','g'),k[c])}}return p}('v m=m=\'["h\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%k\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%l\\/x.j","h\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%k\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%l\\/y.j","h\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%k\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%l\\/u.j","h\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%k\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%l\\/A.j","h\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%k\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%l\\/z.j","h\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%k\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%l\\/B.j","h\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%k\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%l\\/r.j","h\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%k\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%l\\/p.j","h\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%k\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%l\\/o.j","h\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%k\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%l\\/q.j","h\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%k\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%l\\/s.j","h\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%k\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%l\\/t.j","h\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%k\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%l\\/n.j","h\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%k\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%l\\/w.j","h\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%k\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%l\\/J.j","h\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%k\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%l\\/M.j","h\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%k\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%l\\/L.j","h\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%k\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%l\\/O.j","h\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%k\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%l\\/P.j","h\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%k\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%l\\/Q.j","h\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%k\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%l\\/N.j","h\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%k\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%l\\/K.j","h\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%k\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%l\\/E.j","h\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%k\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%l\\/D.j","h\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%k\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%l\\/C.j","h\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%k\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%l\\/F.j","h\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%k\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%l\\/G.j","h\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%k\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%l\\/I.j","h\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%k\\/%3%2%0%1%5%4%9%a%0%d%b%7%6%8%c-%f%i%g%3%2%0%1%5%4%6%e%l\\/H.j"]\';',53,53,'91|E4|BB|E9|8B|B9|E8|A6|80|E5|A5|BA|85|E7|8A|E6|86||BC|jpg|B1|B1_ch01|pages|013|009|008|010|007|011|012|003|var|014|001|002|005|004|006|025|024|023|026|027|029|028|015|022|017|016|021|018|019|020'.split('|'),0,{}))

        ;
        var g_comic_name = "     -    ";
        var g_chapter_name = " 01 ";
        var g_comic_url = "hzqyzqhzh/";
        var g_chapter_url = "hzqyzqhzh/3505.shtml";
        var g_current_page = 1;
        var g_max_pic_count = 29;
        var g_page_base = '';
        var g_comic_id = res_id = '1499';
        var g_chapter_id = chapter_id = '3505';
        var g_comic_code = '9237261211e72bf656b9093c28f90dff';
        var arr_pages = eval(pages);
        var next_chapter_pages = '["h\/%E9%BB%91%E4%B9%8B%E5%A5%91%E7%BA%A6%E8%80%85-%E6%BC%86%E9%BB%91%E4%B9%8B%E8%8A%B1\/%E9%BB%91%E4%B9%8B%E5%A5%91%E7%BA%A6%E8%80%85-%E6%BC%86%E9%BB%91%E4%B9%8B%E8%8A%B1_ch02\/001.jpg","h\/%E9%BB%91%E4%B9%8B%E5%A5%91%E7%BA%A6%E8%80%85-%E6%BC%86%E9%BB%91%E4%B9%8B%E8%8A%B1\/%E9%BB%91%E4%B9%8B%E5%A5%91%E7%BA%A6%E8%80%85-%E6%BC%86%E9%BB%91%E4%B9%8B%E8%8A%B1_ch02\/002.jpg","h\/%E9%BB%91%E4%B9%8B%E5%A5%91%E7%BA%A6%E8%80%85-%E6%BC%86%E9%BB%91%E4%B9%8B%E8%8A%B1\/%E9%BB%91%E4%B9%8B%E5%A5%91%E7%BA%A6%E8%80%85-%E6%BC%86%E9%BB%91%E4%B9%8B%E8%8A%B1_ch02\/003.jpg"]';
        var arr_nextchapter_pages = eval(next_chapter_pages);
        var final_page_url = "/hzqyzqhzh/jump.shtml?1499_3505&fbee072bb6c4a008aae02970f932a1fc";
        var sns_sys_id = '1499_3505';
        var sns_view_point_token = 'fbee072bb6c4a008aae02970f932a1fc';
        var is_hot_comic = false;
        var is_fast_comic = true;
        var server_name = 0;
        var page_site_root = '/';
        var res_type = 1;
#       (     ,    )

def download_img(url):
    res = requests.get(img_host + '/' + url, headers = headers, timeout = 3)
    
    if res.status_code == 200:
        res.encoding = 'UTF-8'
        with open('{:s}'.format(url.split('/')[-1]), 'wb+') as f:    #  img          ,    
            f.write(res.content)
    else:
        res.raise_for_status


headers = {
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36',
            'Referer': 'https://manhua.dmzj.com/shiling/'
            }

chapters = soup(class_ = "cartoon_online_border")[0].find_all('a', href = re.compile('/shiling'))

for chapter in chapters:
    current_path = path + '/' + title + '/' + chapter.string
    not os.path.exists(current_path) and os.mkdir(current_path)
    os.chdir(current_path)
    js =  get_html(arr_url + chapter['href']).find('script')
    ctx = execjs.compile(js.string)    #         js,     
    imgs = ctx.eval('arr_pages')

    for img in imgs:
        download_img(img)

接続/ダウンロード速度はまあまあなので、異常処理は考慮していません