Was released last time
Convert PDF of list of Go To EAT member stores in Niigata prefecture to CSV https://qiita.com/barobaro/items/74fb5bdedbf1ae7267a0
Can't find PDF, so scrape to create a list
import re
import time
import requests
from bs4 import BeautifulSoup
url = "https://niigata-gte.com/shop/"
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"
}
result = []
while True:
    r = requests.get(url, headers=headers)
    r.raise_for_status()
    soup = BeautifulSoup(r.content, "html.parser")
    for shop in soup.select("div#result > div.cont"):
        data = {}
        data["Dealer code"] = (
            shop.select_one("div.no").get_text(strip=True).split(":", 1)[-1]
        )
        span = shop.select("div.tag > span")
        data["area"] = span[0].get_text(strip=True)
        data["Genre"] = span[1].get_text(strip=True)
        if len(span) > 2:
            temp = {i.get("alt"): "○" for i in span[2].select("img")}
            data.update(temp)
        h4 = shop.select_one("h4")
        data["Store name"] = h4.get_text(strip=True)
        if h4.select_one("a"):
            link = h4.a.get("href")
            if link:
                data["home page"] = link
        p_add = shop.select_one("p.add").contents
        postcode, address = p_add[0].split(sep=None, maxsplit=1)
        #Extract latitude / longitude from google map link
        gps = re.search(r"(?<=@)(.+?),(.+?)(?=,\d{1,2}z)", p_add[1].a.get("href"))
        if gps:
            data["latitude"] = float(gps.group(1))
            data["longitude"] = float(gps.group(2))
        data["Postal code"] = postcode.strip()
        data["location"] = address.strip()
        data["phone number"] = shop.select_one("p.tel").get_text(strip=True)
        result.append(data)
    tag = soup.select_one("li.next")
    if tag:
        m = re.search("https://niigata-gte.com/shop/page/\d+/", tag.a.get("onclick"))
        if m:
            url = m.group(0)
    else:
        break
    time.sleep(3)
result
import pandas as pd
df = pd.DataFrame(result)
df.index += 1
df.to_csv("niigata.csv", encoding="utf_8_sig")
        Recommended Posts