[PYTHON] Create a score ranking from JFL match results

Scraping

import time
import unicodedata
from urllib.parse import urljoin
import re

import requests
from bs4 import BeautifulSoup

def cleaning(info, team, html):

    result = []

    for trs in html:

        data = [i.get_text(strip=True) for i in trs.select("th, td")]

        #Calculate overtime after removing minutes of time
        data[0] = eval(data[0].rstrip("Minutes"))

        #Deleted the PK of the player name
        data[2] = re.sub("\(.+\)", "", unicodedata.normalize("NFKC", data[2])).strip()

        result.append(info + [team] + data)

    return result

def scraping(n, url):

    r = requests.get(url)
    r.raise_for_status()

    soup = BeautifulSoup(r.content, "html5lib")

    #section
    score_season = soup.select_one(
        "div.score-header > h2.score-meta > span.score-season"
    ).get_text(strip=True)

    score_season = int(score_season.strip("Section"))

    # print(score_season)

    #Date and time
    score_date = (
        soup.select_one("div.score-header > h2.score-meta > span.score-date")
        .get_text(strip=True)
        .split()
    )

    # print(score_date)

    #Team name
    score_table = soup.select_one("table.score-table")

    home_team = score_table.select_one("th.score-team1").get_text(strip=True)
    away_team = score_table.select_one("th.score-team2").get_text(strip=True)

    # print(home_team, away_team)

    #Match information
    game_info = [n, score_season] + score_date + [home_team, away_team]

    #score
    tag = soup.find("h3", text="score")

    #Check if it is a scoring table
    if tag:

        table_home = [
            trs
            for trs in tag.parent.select(
                "div.score-frame > div.score-left > table > tbody > tr"
            )
        ]
        home_data = cleaning(game_info, home_team, table_home)

        table_away = [
            trs
            for trs in tag.parent.select(
                "div.score-frame > div.score-right > table > tbody > tr"
            )
        ]
        away_data = cleaning(game_info, away_team, table_away)

        score_data = home_data + away_data

        return score_data

    return None

url = "http://www.jfl.or.jp/jfl-pc/view/s.php?a=1542&f=2020A001_spc.html"

r = requests.get(url)
r.raise_for_status()

soup = BeautifulSoup(r.content, "html5lib")

links = [urljoin(url, link.get("href")) for link in soup.select("td.detail-link > a") if link.text == "Details"]

result = []

for i, link in enumerate(links):

    score_data = scraping(i, link)

    if score_data:

        result.extend(score_data)
    
    time.sleep(1)

Data wrangling

import pandas as pd

df = pd.DataFrame(result, columns=["match", "section", "date", "Times of Day", "home", "Away", "Team name", "time", "Uniform number", "Player name"])

df

df["score"] = 1

#Goal number ranking
pv_goal = df.pivot_table(
    values="score", index=["Player name", "Team name", "Uniform number"], aggfunc=sum, fill_value=0
).drop(["Own goal"]).reset_index()

pv_goal["Uniform number"] = pv_goal["Uniform number"].astype(int)

#Ranking
pv_goal["Ranking"] = pv_goal["score"].rank(ascending=False, method="min").astype(int)

#team
jfl_2020 = [
    "Honda FC",
    "Sony Sendai FC",
    "Tokyo Musashino City FC",
    "Tegevajaro Miyazaki",
    "Honda Lock SC",
    "Verspah Oita",
    "FC Osaka",
    "MIO Biwako Shiga",
    "Veertien Mie",
    "FC Maruyasu Okazaki",
    "Suzuka Point Getters",
    "Line mail Aomori",
    "Nara club",
    "Matsue City FC",
    "Iwaki FC",
    "Kochi United SC",
]

team = {name: i for i, name in enumerate(jfl_2020, 1)}

pv_goal["Team ID"] = pv_goal["Team name"].map(team)

#Ascending order by ranking, team name, player name
pv_goal.sort_values(["Ranking", "Team ID", "Uniform number"], ascending=[True, True, True], inplace=True)

pv_goal.drop(["Team ID", "Uniform number"], axis=1, inplace=True)

pv_goal.set_index("Ranking", inplace=True)

pv_goal.to_csv("goal.csv")

Ranking

df_rank = pd.read_html("http://www.jfl.or.jp/jfl-pc/view/s.php?a=1544", index_col=0, header=0)[0]

df_rank["Player name"] = df_rank["Player name"].str.normalize("NFKC")

df_rank.to_csv("ranking.csv")

Recommended Posts

Create a score ranking from JFL match results
Create a standings from JFL match results
Create a pandas Dataframe from a string.
How to create a clone from Github
Create a deb file from a python package
How to create a repository from media
Create a dataframe from excel using pandas
Edit Excel from Python to create a PivotTable
How to create a function object from a string
Create a C array from a Python> Excel sheet
Create a game UI from scratch with pygame2!
Create a New Todoist Task from Python Script
Create a phylogenetic tree from Biopyton using ClustalW2
Create a decision tree from 0 with Python (1. Overview)
Create a datetime object from a string in Python (Python 3.3)
Create a Word Cloud from an academic program
Python script to create a JSON file from a CSV file
How to create a kubernetes pod from python code
Create a machine learning environment from scratch with Winsows 10