Nationales Institut für Infektionskrankheiten hat CSV der gleichen Daten, also Scraping
from urllib.parse import urljoin
import requests
from bs4 import BeautifulSoup
url = "https://www.niid.go.jp/niid/ja/data.html"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"
}
r = requests.get(url, headers=headers)
r.raise_for_status()
soup = BeautifulSoup(r.content, "html.parser")
tag = soup.select_one(
'div.leading-0 > table > tbody > tr > td > p.body1 > a[href$="-teiten.csv"]'
)
link = urljoin(url, tag.get("href"))
import pandas as pd
df = pd.read_csv(
link,
encoding="cp932",
skiprows=3,
index_col=0,
header=0,
usecols=[0, 1, 2],
na_values="-",
)
df1 = df[df.index.notna()]
Recommended Posts