IDWR速報データのインフルエンザ定点当たり報告数・都道府県別をスクレイピング


国立感染症研究所に同じデータのCSVがあるのでスクレイピング

from urllib.parse import urljoin

import requests
from bs4 import BeautifulSoup

url = "https://www.niid.go.jp/niid/ja/data.html"

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"
}

r = requests.get(url, headers=headers)
r.raise_for_status()

soup = BeautifulSoup(r.content, "html.parser")

tag = soup.select_one(
    'div.leading-0 > table > tbody > tr > td > p.body1 > a[href$="-teiten.csv"]'
)

link = urljoin(url, tag.get("href"))

import pandas as pd

df = pd.read_csv(
    link,
    encoding="cp932",
    skiprows=3,
    index_col=0,
    header=0,
    usecols=[0, 1, 2],
    na_values="-",
)

df1 = df[df.index.notna()]