Beginners in programming are learning scraping. For the time being, the scraped contents can be downloaded to a local file, so make a note as a memorandum before forgetting.
#Sample for scraping articles and URLs from websites and downloading results to local PC
#Import required modules
from bs4 import BeautifulSoup
import requests
import pandas as pd
#Create a data frame
columns = ["Article title", "URL"]
df = pd.DataFrame(columns = columns)
#Get the content from the website with requests and process the content with Beautiful Soup
res = requests.get("https:~~ URL of the website you want to scrape ~~")
soup = BeautifulSoup(res.content, 'html.parser') #Beautiful Soup initialization
tags = soup.find_all("XXXXX", {"class": "YYYYYY"}) #X and Y vary depending on the website
#Add article name and URL to dataframe
for tag in tags:
article = tag.a.string
url = tag.a.get("href")
se = pd.Series([article, url], columns)
df = df.append(se, columns)
#「to_Save the csv file in the same folder where the code is saved using "csv"
df.to_csv("./news.csv")
print("End")
Recommended Posts