人工知能トレーナー(気象データを登る)
3476 ワード
直接使用できます。天気データを取得して、get_air_quality(‘成都’、year=2019、monoth=1)は、具体的に方法の注釈を参照してください。
import time
import requests
from bs4 import BeautifulSoup
import re
import csv
BASE_URL = 'http://www.tianqihoubao.com/aqi/'
def get_city_coding():
"""
:return: { ' ' : 'hangzhou', ' ' : 'guangzhou'}
"""
response = requests.get(BASE_URL)
soup = BeautifulSoup(response.text.encode(response.encoding), features="html.parser")
all_city = re.findall(r'/aqi/(\w*).html">(.*?)', str(soup.select(".citychk")[0]))
city_coding = {}
for item in all_city:
city_coding[item[1].strip()] = item[0].strip()
return city_coding
def build_url(city, year=None, month=None):
"""
url
:param city:
:param year:
:param month:
:return: http://www.tianqihoubao.com/aqi/chongqing-201907.html or http://www.tianqihoubao.com/aqi/chongqing.html
"""
if year is not None and month is not None:
return (BASE_URL + '{}-{}{}.html').format(city, year,
'0'+str(month) if month < 10 else month)
else:
return (BASE_URL + '{}.html').format(city)
def get_http_content(url):
"""
url soup
:param url:
:return:
"""
response = requests.get(url)
soup = BeautifulSoup(response.text.encode(response.encoding), features="html.parser")
return soup.table.contents
def parse(contents, city):
"""
soup ,
:param contents:
:param city:
:return:
"""
data = []
count = 0
for item in contents:
if hasattr(item, 'text'):
data.append(([' ']+item.text.split()) if count == 0 else [city]+item.text.split())
count += 1
return data
def save(data, filename, mode='w'):
"""
list csv
:param data:
:param filename:
:param mode:
:return:
"""
csv_file = open(r'./'+filename+'.csv', mode=mode, encoding='utf-8', newline='')
csv_writer = csv.writer(csv_file)
for item in data:
csv_writer.writerow(item)
def get_air_quality(city=None, year=None, month=None):
"""
:param city: ,
:param year: ,year month ,
:param month: ,year month ,
:return:
"""
city_coding = get_city_coding()
if city is None:
for index, request_city in enumerate(city_coding.keys()):
if index < 10: # , , ,
url = build_url(city_coding[request_city], year, month)
contents = get_http_content(url)
data = parse(contents, request_city)
if index != 0:
data.pop(0)
save(data, 'all_city', 'a')
time.sleep(1)
else:
request_city_coding = city_coding[city]
url = build_url(request_city_coding, year, month)
contents = get_http_content(url)
data = parse(contents, city)
save(data, request_city_coding)
"""
, csv
"""
get_air_quality(' ', year=2019, month=1)