pythonはBaiduの贴吧のピクチャーをダウンロードします.

7651 ワード

メインプログラム
#!/usr/bin/python
# -*-coding:utf-8-*-
import urllib.parse, urllib.request, http.cookiejar, re, time
import tools
import threading
from db import db


class tieba(threading.Thread):
    '      '
    # http://tieba.baidu.com/p/4519246742?see_lz=1&pn=1
    url = 'http://tieba.baidu.com/p/%s?see_lz=1&pn=%s'

    def __init__(self, tieid=4690733195, page=1):
        threading.Thread.__init__(self)
        self.tieid = tieid
        self.page = page

    @staticmethod
    def getEndPage(tieid):
        '        '
        url = tieba.url % (tieid, 1)
        res = urllib.request.urlopen(url)
        text = res.read().decode('utf-8')
        pattern = r'(\d+)   , (\d+) '
        match = re.search(pattern, text)
        if match:
            pages = match.group(2)
        else:
            pages = 1
        return int(pages)

    def run(self):
        '  '
        url = tieba.url % (self.tieid, self.page)
        res = urllib.request.urlopen(url)
        text = res.read().decode('utf-8')
        pattern = r' endpage:
            print("    ,    !")
            exit()
        start = data['EndPage'] + 1
        end = start + size - 1
    else:
        start = 1
        end = 5
    sql = """
        INSERT INTO `test`.`tieba` (
        `tieid`,
        `StartPage`,
        `EndPage`)
        VALUES('%s','%s', '%s');
        """ % (tieID, start, end)
    mysql.execute(sql)
    threads = []
    for page in range(start, end + 1):
        if page > endpage:
            print("    ,    !!")
            break
        thread = tieba(tieID, page)
        thread.start()
        threads.append(thread)

    for t in threads:
        #         
        t.join()
    print("     ")


if __name__ == '__main__':
    main()
tools.py
#!/usr/bin/python
# -*-coding:utf-8-*-
import time, random
import urllib.request
import os.path, re

'''
       ,tools.py
'''


def runTime(file='test.log'):
    def _runTime(func):
        '        '

        def newFunc(*args, **kwargs):
            start = time.clock()
            log('    ', file)
            res = func(*args, **kwargs)
            end = time.clock()
            msg = "    ,   : %f  " % (end - start)
            log(msg, file)
            print(msg)
            return res

        return newFunc

    return _runTime


def log(content, file='test.log', type=1):
    if type == 1:
        f = open(file, 'a+', encoding='utf-8')
    else:
        f = open(file, 'w+', encoding='utf-8')
    t = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
    content = t + ' : ' + content + '\r'
    f.write(content)


def download(url, filename='', foldername='', useOldName=False):
    """
    :param url: str       
    :param filename: str        ,  :yyyymmddHHiiss+3    
    :param foldername: str     ,   yyyy-mm-dd-HH,         , (windows ):"D:\\360Downloads\\test"
    :param useOldName: str                  ,     
    :return:
    """
    if not url:
        return
    oldFileName = os.path.basename(url)
    pattern = r'\.(.*?)$'
    match = re.search(pattern, oldFileName)
    suffix = match.group(1)
    t = time.localtime(time.time())
    if foldername == '':
        foldername = str(t.__getattribute__("tm_year")) + "-" + str(t.__getattribute__("tm_mon")) + "-" + str(
            t.__getattribute__("tm_mday")) + "-" + str(t.__getattribute__("tm_hour"))
    picpath = foldername  #         
    if not os.path.exists(picpath):  #           
        os.makedirs(picpath)
    if filename == '':
        filename = time.strftime("%Y%m%d%H%M%S", time.localtime()) + str(random.randint(100, 999)) + '.' + suffix
    if useOldName:
        filename = oldFileName
    target = picpath + '\\%s' % (filename,)
    image = urllib.request.urlretrieve(url, target)
db.py
# -*- coding: utf-8 -*-
import pymysql


class db:
    '      '
    dbconnect = ''  #        
    error = ''  #     

    def __init__(self, host, username, password, db='', port=3306):
        '    '
        try:
            self.dbconnect = pymysql.connect(host, username, password, db, cursorclass=pymysql.cursors.DictCursor,
                                             charset='utf8')
        except pymysql.Error as e:
            self.error = str(e)
            pass

    def __del__(self):
        '    '
        self.close()

    def execute(self, sql):
        '  sql'
        if self.dbconnect == '':
            return self.error
        cursor = self.dbconnect.cursor()
        db = self.dbconnect
        try:
            #   SQL  
            cursor.execute(sql)
            #         
            db.commit()
        except:
            #        
            db.rollback()
        return cursor

    def queryAll(self, sql):
        '    select sql     '
        if self.dbconnect == '':
            return self.error
        cursor = self.dbconnect.cursor()
        cursor.execute(sql)
        data = cursor.fetchall()
        return data

    def queryRow(self, sql):
        '    select sql       '
        if self.dbconnect == '':
            return self.error
        cursor = self.dbconnect.cursor()
        cursor.execute(sql)
        data = cursor.fetchone()
        return data

    def queryScalar(self, sql):
        '    select sql       '
        if self.dbconnect == '':
            return self.error
        data = self.queryRow(sql)
        res = ''
        values = data.values()
        count = 0
        for i in values:
            count += 1
            res = i
            if count == 1:
                break
        return res

    def close(self):
        if self.dbconnect:
            self.dbconnect.close()


if __name__ == '__main__':
    db = db('172.23.16.91', 'unipei', 'jiaparts','jpd')
    one = db.queryRow('select * from jpd.jpd_user limit 1')
    all = db.queryAll('select * from jpd.jpd_organ limit 10')
    count = db.queryScalar('select count(*) from jpd.jpd_organ')
    delSql = 'delete from pap.pap_evaluation_system_history limit 100'
    delRes = db.execute(delSql)
    updata = 'update jpd.jpd_user set lastvisittime=1 where id=61'
    updateRes = db.execute(updata)
    print(one)
    print(all)
    print(count)
    print(delRes.rowcount)
    print(updateRes.rowcount)
sql
CREATE TABLE `tieba` (
  `ID` int(11) NOT NULL AUTO_INCREMENT COMMENT '  ',
  `TieID` bigint(11) DEFAULT NULL,
  `StartPage` int(11) DEFAULT NULL COMMENT '    ',
  `EndPage` int(11) DEFAULT NULL COMMENT '    ',
  `CreateTime` int(13) DEFAULT NULL COMMENT '    ',
  PRIMARY KEY (`ID`)
) ENGINE=InnoDB AUTO_INCREMENT=31 DEFAULT CHARSET=utf8;