大阪府の Go To EATの加盟店舗をスクレイピングしCSVに変換


Go To Eat大阪キャンペーンの加盟店をスクレイピング

import time

import requests
from bs4 import BeautifulSoup

import pandas as pd

result = []

url = "https://goto-eat.weare.osaka-info.jp/?search_element_0_0=2&search_element_0_1=3&search_element_0_2=4&search_element_0_3=5&search_element_0_4=6&search_element_0_5=7&search_element_0_6=8&search_element_0_7=9&search_element_0_8=10&search_element_0_9=11&search_element_0_cnt=10&search_element_1_0=12&search_element_1_1=13&search_element_1_2=14&search_element_1_3=15&search_element_1_4=16&search_element_1_5=17&search_element_1_6=18&search_element_1_7=19&search_element_1_8=20&search_element_1_9=21&search_element_1_10=22&search_element_1_11=23&search_element_1_12=24&search_element_1_13=25&search_element_1_14=26&search_element_1_15=27&search_element_1_16=28&search_element_1_17=29&search_element_1_cnt=18&searchbutton=%E5%8A%A0%E7%9B%9F%E5%BA%97%E8%88%97%E3%82%92%E6%A4%9C%E7%B4%A2%E3%81%99%E3%82%8B&csp=search_add&feadvns_max_line_0=2&fe_form_no=0"

while True:

    r = requests.get(url)
    r.raise_for_status()

    soup = BeautifulSoup(r.content, "html.parser")

    for li in soup.select("div.search_result_box > ul > li"):

        data = {}
        data["店舗名"] = li.select_one("p.name").get_text(strip=True)
        data["ジャンル"], data["地域"] = li.select_one("ul.tag_list").stripped_strings

        for tr in li.table.select("tr"):

            k = tr.th.get_text(strip=True)

            if k == "住所":
                v = list(tr.td.stripped_strings)

                data["郵便番号"] = v[0]
                data[k] = " ".join(v[-1].split())
            else:
                data[k] = tr.td.get_text(strip=True)

        result.append(data)

    tag = soup.select_one("div.wp-pagenavi > a.nextpostslink")

    if tag:

        url = tag.get("href")

    else:
        break

    time.sleep(1)

df = pd.DataFrame(result).reindex(
    columns=["店舗名", "ジャンル", "地域", "郵便番号", "住所", "TEL", "営業時間", "定休日"]
)

df.to_csv("osaka.csv", encoding="utf_8_sig")