JFLの試合結果から得点ランキングを作成
スクレイピング
import time
import unicodedata
from urllib.parse import urljoin
import re
import requests
from bs4 import BeautifulSoup
def cleaning(info, team, html):
result = []
for trs in html:
data = [i.get_text(strip=True) for i in trs.select("th, td")]
# 時間の分を除去後、延長時間を計算
data[0] = eval(data[0].rstrip("分"))
# 選手名のPKを削除
data[2] = re.sub("\(.+\)", "", unicodedata.normalize("NFKC", data[2])).strip()
result.append(info + [team] + data)
return result
def scraping(n, url):
r = requests.get(url)
r.raise_for_status()
soup = BeautifulSoup(r.content, "html5lib")
# 節
score_season = soup.select_one(
"div.score-header > h2.score-meta > span.score-season"
).get_text(strip=True)
score_season = int(score_season.strip("第節"))
# print(score_season)
# 日時
score_date = (
soup.select_one("div.score-header > h2.score-meta > span.score-date")
.get_text(strip=True)
.split()
)
# print(score_date)
# チーム名
score_table = soup.select_one("table.score-table")
home_team = score_table.select_one("th.score-team1").get_text(strip=True)
away_team = score_table.select_one("th.score-team2").get_text(strip=True)
# print(home_team, away_team)
# 試合情報
game_info = [n, score_season] + score_date + [home_team, away_team]
# 得点
tag = soup.find("h3", text="得 点")
# 得点のテーブルか確認
if tag:
table_home = [
trs
for trs in tag.parent.select(
"div.score-frame > div.score-left > table > tbody > tr"
)
]
home_data = cleaning(game_info, home_team, table_home)
table_away = [
trs
for trs in tag.parent.select(
"div.score-frame > div.score-right > table > tbody > tr"
)
]
away_data = cleaning(game_info, away_team, table_away)
score_data = home_data + away_data
return score_data
return None
url = "http://www.jfl.or.jp/jfl-pc/view/s.php?a=1542&f=2020A001_spc.html"
r = requests.get(url)
r.raise_for_status()
soup = BeautifulSoup(r.content, "html5lib")
links = [urljoin(url, link.get("href")) for link in soup.select("td.detail-link > a") if link.text == "詳細"]
result = []
for i, link in enumerate(links):
score_data = scraping(i, link)
if score_data:
result.extend(score_data)
time.sleep(1)
データラングリング
import pandas as pd
df = pd.DataFrame(result, columns=["試合", "節", "日付", "時刻", "ホーム", "アウェイ", "チーム名", "時間", "背番号", "選手名"])
df
df["得点"] = 1
# ゴール数ランキング
pv_goal = df.pivot_table(
values="得点", index=["選手名", "チーム名", "背番号"], aggfunc=sum, fill_value=0
).drop(["オウンゴール"]).reset_index()
pv_goal["背番号"] = pv_goal["背番号"].astype(int)
# ランキング
pv_goal["順位"] = pv_goal["得点"].rank(ascending=False, method="min").astype(int)
# チーム
jfl_2020 = [
"Honda FC",
"ソニー仙台FC",
"東京武蔵野シティFC",
"テゲバジャーロ宮崎",
"ホンダロックSC",
"ヴェルスパ大分",
"FC大阪",
"MIOびわこ滋賀",
"ヴィアティン三重",
"FCマルヤス岡崎",
"鈴鹿ポイントゲッターズ",
"ラインメール青森",
"奈良クラブ",
"松江シティFC",
"いわきFC",
"高知ユナイテッドSC",
]
team = {name: i for i, name in enumerate(jfl_2020, 1)}
pv_goal["チームID"] = pv_goal["チーム名"].map(team)
# 順位・チーム名・選手名で昇順
pv_goal.sort_values(["順位", "チームID", "背番号"], ascending=[True, True, True], inplace=True)
pv_goal.drop(["チームID", "背番号"], axis=1, inplace=True)
pv_goal.set_index("順位", inplace=True)
pv_goal.to_csv("goal.csv")
ランキング
df_rank = pd.read_html("http://www.jfl.or.jp/jfl-pc/view/s.php?a=1544", index_col=0, header=0)[0]
df_rank["選手名"] = df_rank["選手名"].str.normalize("NFKC")
df_rank.to_csv("ranking.csv")
import time
import unicodedata
from urllib.parse import urljoin
import re
import requests
from bs4 import BeautifulSoup
def cleaning(info, team, html):
result = []
for trs in html:
data = [i.get_text(strip=True) for i in trs.select("th, td")]
# 時間の分を除去後、延長時間を計算
data[0] = eval(data[0].rstrip("分"))
# 選手名のPKを削除
data[2] = re.sub("\(.+\)", "", unicodedata.normalize("NFKC", data[2])).strip()
result.append(info + [team] + data)
return result
def scraping(n, url):
r = requests.get(url)
r.raise_for_status()
soup = BeautifulSoup(r.content, "html5lib")
# 節
score_season = soup.select_one(
"div.score-header > h2.score-meta > span.score-season"
).get_text(strip=True)
score_season = int(score_season.strip("第節"))
# print(score_season)
# 日時
score_date = (
soup.select_one("div.score-header > h2.score-meta > span.score-date")
.get_text(strip=True)
.split()
)
# print(score_date)
# チーム名
score_table = soup.select_one("table.score-table")
home_team = score_table.select_one("th.score-team1").get_text(strip=True)
away_team = score_table.select_one("th.score-team2").get_text(strip=True)
# print(home_team, away_team)
# 試合情報
game_info = [n, score_season] + score_date + [home_team, away_team]
# 得点
tag = soup.find("h3", text="得 点")
# 得点のテーブルか確認
if tag:
table_home = [
trs
for trs in tag.parent.select(
"div.score-frame > div.score-left > table > tbody > tr"
)
]
home_data = cleaning(game_info, home_team, table_home)
table_away = [
trs
for trs in tag.parent.select(
"div.score-frame > div.score-right > table > tbody > tr"
)
]
away_data = cleaning(game_info, away_team, table_away)
score_data = home_data + away_data
return score_data
return None
url = "http://www.jfl.or.jp/jfl-pc/view/s.php?a=1542&f=2020A001_spc.html"
r = requests.get(url)
r.raise_for_status()
soup = BeautifulSoup(r.content, "html5lib")
links = [urljoin(url, link.get("href")) for link in soup.select("td.detail-link > a") if link.text == "詳細"]
result = []
for i, link in enumerate(links):
score_data = scraping(i, link)
if score_data:
result.extend(score_data)
time.sleep(1)
import pandas as pd
df = pd.DataFrame(result, columns=["試合", "節", "日付", "時刻", "ホーム", "アウェイ", "チーム名", "時間", "背番号", "選手名"])
df
df["得点"] = 1
# ゴール数ランキング
pv_goal = df.pivot_table(
values="得点", index=["選手名", "チーム名", "背番号"], aggfunc=sum, fill_value=0
).drop(["オウンゴール"]).reset_index()
pv_goal["背番号"] = pv_goal["背番号"].astype(int)
# ランキング
pv_goal["順位"] = pv_goal["得点"].rank(ascending=False, method="min").astype(int)
# チーム
jfl_2020 = [
"Honda FC",
"ソニー仙台FC",
"東京武蔵野シティFC",
"テゲバジャーロ宮崎",
"ホンダロックSC",
"ヴェルスパ大分",
"FC大阪",
"MIOびわこ滋賀",
"ヴィアティン三重",
"FCマルヤス岡崎",
"鈴鹿ポイントゲッターズ",
"ラインメール青森",
"奈良クラブ",
"松江シティFC",
"いわきFC",
"高知ユナイテッドSC",
]
team = {name: i for i, name in enumerate(jfl_2020, 1)}
pv_goal["チームID"] = pv_goal["チーム名"].map(team)
# 順位・チーム名・選手名で昇順
pv_goal.sort_values(["順位", "チームID", "背番号"], ascending=[True, True, True], inplace=True)
pv_goal.drop(["チームID", "背番号"], axis=1, inplace=True)
pv_goal.set_index("順位", inplace=True)
pv_goal.to_csv("goal.csv")
ランキング
df_rank = pd.read_html("http://www.jfl.or.jp/jfl-pc/view/s.php?a=1544", index_col=0, header=0)[0]
df_rank["選手名"] = df_rank["選手名"].str.normalize("NFKC")
df_rank.to_csv("ranking.csv")
df_rank = pd.read_html("http://www.jfl.or.jp/jfl-pc/view/s.php?a=1544", index_col=0, header=0)[0]
df_rank["選手名"] = df_rank["選手名"].str.normalize("NFKC")
df_rank.to_csv("ranking.csv")
Author And Source
この問題について(JFLの試合結果から得点ランキングを作成), 我々は、より多くの情報をここで見つけました https://qiita.com/barobaro/items/9b391717e127cef469d2著者帰属:元の著者の情報は、元のURLに含まれています。著作権は原作者に属する。
Content is automatically searched and collected through network algorithms . If there is a violation . Please contact us . We will adjust (correct author information ,or delete content ) as soon as possible .