Use python to scrape the BicCamera website.
I don't think there were any "good scraping articles", so I made a code & Youtube video to practice as a Youtuber that surpasses Hikakin.
[Link] https://www.youtube.com/watch?v=SZuNFDzJndA&list=PLzPCqF-heFHyFb_aoqnXc8GrECL6yMdvZ
Please be careful when scraping at your own risk as the following may occur. Okazaki Municipal Central Library Case url : https://ja.wikipedia.org/wiki/%E5%B2%A1%E5%B4%8E%E5%B8%82%E7%AB%8B%E4%B8%AD%E5%A4%AE%E5%9B%B3%E6%9B%B8%E9%A4%A8%E4%BA%8B%E4%BB%B6
Also, if the site UI of BicCamera is changed, there is a high possibility that it will not work, so Please comment if it doesn't work. May be corrected.
scraping_biccamera.py
from bs4 import BeautifulSoup as bs
from datetime import datetime
import pandas as pd
import requests
import urllib
import time
import re
def get_html(url):
    """
A function that returns a beautiful object given a url
    """
    res = requests.get(url)
    return bs(res.content, "html.parser")
def next_url(res_bs):
    """
html data(res_bs)Function that returns url or None on the next page
    """
    domain = "https://www.biccamera.com/"
    for res in res_bs.findAll(class_="bcs_l"):
        if "Next" in res.text:
            if res.find("a"):
                next_url = domain + res.find("a").get("href")
                return next_url
    return None
def product_df(res_bs):
    """
Function to get DataFrame from BicCamera html
    """
    #Create an empty list
    output = []
    #Get product list information from html
    item_list = res_bs.findAll(class_=re.compile(r"prod_box sku*"))
    # item_Get product information one by one from list
    for num, item in enumerate(item_list):
        #Get product details URL
        item_url = item.find(class_="cssopa").get("href")
        #Get product title
        title = item.find(class_="cssopa").find("img").get("alt")
        #Product photo acquisition
        picture = item.find(class_="cssopa").find("img").get("src")
        #Acquired product manufacturer
        maker = item.find(class_="bcs_maker").text
        #Product price acquisition
        price = item.find(class_=re.compile(r"bcs_price*")).text
        #Get point information if there is, get 0 if not
        if item.find(class_="bcs_point"):
            point = item.find(class_="bcs_point").text
        else:
            point = "0"
        #Get inventory information if available, get 0 if not
        if item.find(class_=re.compile(r"label_*")):
            stock = item.find(class_=re.compile(r"label_*")).text
        else:
            stock = "0"
        #Get if there is evaluation number information, get 0 if not
        if item.find(class_="bcs_star"):
            ratings = item.find(class_="bcs_star").find("a").text
        else:
            ratings = "0"
        #Get delivery date information if available, get 0 if not
        if item.find(class_="bcs_ship"):
            terms = item.find(class_="bcs_ship").text
        else:
            terms = "no ship info"
        #Output the information for each product acquired above(list)Append to(add to)To do
        output.append({
            "item_url": item_url,
            "title": title,
            "picture": picture,
            "maker": maker,
            "price": price,
            "point": point,
            "stock": stock,
            "ratings": ratings,
            "terms": terms,
        })
    #Save all information in output and store output in pandas dataframe
    df = pd.DataFrame(output)
    return df
def get_product_list(url, pages=10):
    """
After entering the target url, page transition and
A function that returns all product list information as a dataframe
    """
    #Create an empty dataframe
    all_df = pd.DataFrame()
    for _ in range(pages):
        #Get html from the entered URL
        res_bs = get_html(url)
        #Get dataframe from html
        df = product_df(res_bs)
        # all_append the df created above to df(add to)To do
        all_df = all_df.append(df)
        # all_Print the number of dfs and check that they are added
        print("all_df:", len(all_df))
        print(f"sleeping.....{_}Time")
        #Allow 5 seconds to avoid overloading the other site
        time.sleep(5)
        #Get next page URL or None
        url = next_url(res_bs)
        #If the url is None, the for statement ends
        if url is None:
            print("break")
            break
            return all_df
    return all_df
if __name__ == "__main__":
    #Define the URL that contains the data you want to get
    url = "https://www.biccamera.com/bc/category/?q=laptop&rowPerPage=100#bcs_resultTxt"
    #All pages transition and data is acquired
    df = get_product_list(url)
    #Get search information from url
    qs = urllib.parse.urlparse(url).query
    #Turn search information into a dictionary
    kw = urllib.parse.parse_qs(qs)
    #Get search word
    query = kw.get("q")[0]
    #Save the acquired data as csv
    today = datetime.today().strftime("%Y%m%d_%H%M%S")
    df.to_csv(f"{today}_biccamera_{query}", index=False)
    #By substituting a numerical value for pages as shown below, the number of page transitions can be changed to 20 times.
    # df = get_product_list(url, pages=20)
I will not explain the above code from the beginning, so if you are interested, please watch the video.
Also, BicCamera has a slightly higher scraping difficulty (using re.complile), It is a good teaching material, so I hope you will understand this principle and enjoy your scraping life.
As a youtuber, I will continue to preprocess, save and visualize data, etc. We will realize "live by what you like".
Recommended Posts