人工知能トレーナー(気象データを登る)

3476 ワード

直接使用できます。天気データを取得して、get_air_quality(‘成都’、year=2019、monoth=1)は、具体的に方法の注釈を参照してください。
import time
import requests
from bs4 import BeautifulSoup
import re
import csv

BASE_URL = 'http://www.tianqihoubao.com/aqi/'


def get_city_coding():
    """
               
    :return: { '  ' : 'hangzhou', '  ' : 'guangzhou'}
    """
    response = requests.get(BASE_URL)

    soup = BeautifulSoup(response.text.encode(response.encoding), features="html.parser")
    all_city = re.findall(r'/aqi/(\w*).html">(.*?)', str(soup.select(".citychk")[0]))

    city_coding = {}
    for item in all_city:
        city_coding[item[1].strip()] = item[0].strip()
    return city_coding


def build_url(city, year=None, month=None):
    """
             url
    :param city:
    :param year:
    :param month:
    :return:    http://www.tianqihoubao.com/aqi/chongqing-201907.html or http://www.tianqihoubao.com/aqi/chongqing.html
    """
    if year is not None and month is not None:
        return (BASE_URL + '{}-{}{}.html').format(city, year,
                                                  '0'+str(month) if month < 10 else month)
    else:
        return (BASE_URL + '{}.html').format(city)


def get_http_content(url):
    """
      url      soup  
    :param url:
    :return:
    """
    response = requests.get(url)
    soup = BeautifulSoup(response.text.encode(response.encoding), features="html.parser")
    return soup.table.contents


def parse(contents, city):
    """
      soup  ,          
    :param contents:
    :param city:
    :return:
    """
    data = []
    count = 0
    for item in contents:
        if hasattr(item, 'text'):
            data.append((['  ']+item.text.split()) if count == 0 else [city]+item.text.split())
            count += 1
    return data


def save(data, filename, mode='w'):
    """
     list    csv  
    :param data:
    :param filename:
    :param mode:
    :return:
    """
    csv_file = open(r'./'+filename+'.csv', mode=mode, encoding='utf-8', newline='')
    csv_writer = csv.writer(csv_file)
    for item in data:
        csv_writer.writerow(item)


def get_air_quality(city=None, year=None, month=None):
    """
              
    :param city:    ,        
    :param year:    ,year month      ,        
    :param month:    ,year month      ,        
    :return:
    """
    city_coding = get_city_coding()
    if city is None:
        for index, request_city in enumerate(city_coding.keys()):
            if index < 10: #      ,   ,   ,   
                url = build_url(city_coding[request_city], year, month)
                contents = get_http_content(url)
                data = parse(contents, request_city)
                if index != 0:
                    data.pop(0)
                save(data, 'all_city', 'a')
                time.sleep(1)
    else:
        request_city_coding = city_coding[city]
        url = build_url(request_city_coding, year, month)
        contents = get_http_content(url)
        data = parse(contents, city)
        save(data, request_city_coding)


"""
        ,   csv  
"""
get_air_quality('  ', year=2019, month=1)