「爬虫類」--Webページ情報をMysqlデータベースに保存

3246 ワード

特殊なコンテンツを格納するには、次の2つの方法があります.
1つ目:repr関数の使用
repr()関数は、オブジェクトを解釈器読み取りのための形式に変換します.

import json
import requests
import pymysql


class mysql_conn(object):
    #      ，    ，     
    def __init__(self):
        self.db = pymysql.connect(host='127.0.0.1',user='root',password='123456',port=3306,database='python')
        self.cursor = self.db.cursor()
    #   modify（  ）     
    def execute_modify_mysql(self,sql):
        self.cursor.execute(sql)
        self.db.commit()
    #      ，     ，     
    def __del__(self):
        self.cursor.close()
        self.db.close()

mc = mysql_conn()

headers = {
    'Cookie': '***********',
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
}

# urllib        
url = 'https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id=-1&count=10&category=111'

response = requests.get(url, headers=headers)

res_dict = json.loads(response.text)

list_list = res_dict['list']

for list_item_dict in list_list:
    # list       item,     dict
    data_str = list_item_dict['data']

    data_dic = json.loads(data_str)

    id = int(data_dic['id'])
    title = data_dic['title']
    description = data_dic['description']
    target = data_dic['target']
    sql = "insert into xue_sql(id,title,description,target) values (%d,%s,%s,%s);"%(id,repr(title),repr(description),repr(target))
    mc.execute_modify_mysql(sql)

2つ目の方法:
メタグループ形式で値を渡し、つなぎ合わせる

import json
import requests
import pymysql


class mysql_conn(object):
    #      ，    ，     
    def __init__(self):
        self.db = pymysql.connect(host='127.0.0.1',user='root',password='123456',port=3306,database='python')
        self.cursor = self.db.cursor()
    #   modify（  ）     
    def execute_modify_mysql(self,sql,data = None):
        self.cursor.execute(sql,data)
        self.db.commit()
    #      ，     ，     
    def __del__(self):
        self.cursor.close()
        self.db.close()

mc = mysql_conn()

headers = {
    'Cookie': '*******',
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
}

# urllib        
url = 'https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id=-1&count=10&category=111'

response = requests.get(url, headers=headers)

res_dict = json.loads(response.text)

list_list = res_dict['list']

for list_item_dict in list_list:
    # list       item,     dict
    data_str = list_item_dict['data']

    data_dic = json.loads(data_str)

    id = int(data_dic['id'])
    title = data_dic['title']
    description = data_dic['description']
    target = data_dic['target']

    data = (id,repr(title),repr(description),repr(target))

    sql = "insert into new_xue(id,title,description,target) values (%s,%s,%s,%s);"
    mc.execute_modify_mysql(sql,data = data)

Amazon Linux の Docker イメージで yum install を高速化

030404:逆の出力の1つの3桁を求めます