マルチスレッド登りkx 1 dピクチャ


# -*-coding:utf-8-*-
import os
import shutil
import threading

import lxml.html
import requests

list_href = []


class Download(object):
    current_num = 0

    def __init__(self, output, hf_list):
        self.output = output
        self.hf_list = hf_list
        self._value_lock = threading.Lock()

    def downJpgList(self):
        for jpg_url in self.hf_list:
            print(jpg_url)
            res = requests.get(jpg_url)
            imageFile = open(os.path.join(output, os.path.basename(jpg_url)), 'wb')
            for chunk in res.iter_content(100000):
                imageFile.write(chunk)
            imageFile.close()


def get_url_download(url_page, current_num, total_num):
    global list_href
    url_page_arr = url_page.rsplit('.', maxsplit=1)
    while current_num <= total_num:
        url_page_new = url_page_arr[0] + r'_%s.' + url_page_arr[1]
        current_num += 1
        s_content = requests.get(url_page_new % (current_num))
        tree_html = lxml.html.fromstring(s_content.text)
        href = [img.get('src') for img in tree_html.cssselect('.articleBody  a   img')]
        list_href.extend(href)


def get_url_first_download(url_page):
    global list_href
    s_content = requests.get(url_page)
    tree_html = lxml.html.fromstring(s_content.text)
    href = [img.get('src') for img in tree_html.cssselect('.articleBody  a   img')]
    list_href.extend(href)


def rand_generate():
    import random
    lst = [chr(i) for i in range(97, 123)]
    lst.extend(chr(i) for i in range(65, 91))
    lst.extend(i for i in range(0, 10))
    lst = list(map(str, lst))
    return ''.join(random.sample(lst, 12))


def view_bar(num=1, sum=100, bar_word=':'):
    rate = float(num) / float(sum)
    rate_num = int(rate * 100)
    # print ('\r%{}:'.format(rate_num),end='
')
os.write(1, bytes('\r%{}:'.format(rate_num), 'gbk')) for i in range(0, num): os.write(1, bytes(bar_word, 'gbk')) sys.stdout.flush() if __name__ == '__main__': import sys, getopt, random # , outputfile = rand_generate() opts, args = getopt.getopt(sys.argv[1:], "hn:o:", ["--name=", "--output="]) for opt, arg in opts: if opt == '-h': print('test.py -n -o url_page') sys.exit() elif opt in ("-n", "--name"): model_name = arg elif opt in ("-o", "--output"): outputfile = arg url_page = args[0] print('outputfile ', outputfile, 'url_page ', url_page) output = os.path.join('I:\chuan\down', outputfile) if os.path.exists(output): shutil.rmtree(output) os.mkdir(output) ss = requests.get(url_page) ss.encoding = 'utf-8' tree = lxml.html.fromstring(ss.text) num = tree.cssselect('.pages > span')[0].text_content() nums = str(num).split("/") current_num = int(nums[0]) total_num = int(nums[1]) get_url_first_download(url_page) get_url_download(url_page, current_num, total_num) downloadThreads = [] for i in range(5): download = Download(output, [href for href in list_href[i::5]]) downloadThread = threading.Thread(target=download.downJpgList) downloadThreads.append(downloadThread) downloadThread.start() for downloadThread in downloadThreads: downloadThread.join() print('ok')