[PYTHON] Extraits (scraping) enregistrés dans Google Colaboratory

Beautifulsoup4

base

from urllib.parse import urljoin

import requests
from bs4 import BeautifulSoup

url = "http://example.jp"

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"
}

r = requests.get(url, headers=headers)
r.raise_for_status()

soup = BeautifulSoup(r.content, "html.parser")


urljoin(url, "index.html")

session

with requests.Session() as s:

    r = s.get("http://example.jp", headers = headers)
    r.raise_for_status()

    soup = BeautifulSoup(r.content, "html.parser")

Pandas

import pandas as pd

df = pd.read_html("http://example.jp", header=0, index_col=0)

Selenium

!apt install chromium-chromedriver
!cp /usr/lib/chromium-browser/chromedriver /usr/bin
!pip install selenium

from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.chrome.options import Options

import time

options = webdriver.ChromeOptions()
options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")

driver = webdriver.Chrome("chromedriver", options=options)
driver.implicitly_wait(10)

#Fenêtre principale
parent_window = driver.current_window_handle

driver.get("http://example.jp")

#Affichage de l'URL
print(driver.current_url)

time.sleep(3)

#Cliquez sur
driver.find_element_by_link_text("XXXXX").click()

#Changement de fenêtre
driver.switch_to.window(driver.window_handles[-1])

Recommended Posts

Extraits (scraping) enregistrés dans Google Colaboratory

Extraits enregistrés dans Google Colaboratory (conversion de texte PDF)

Collaboratoire Google

[Débutant] Scrapage Web Python facile à comprendre à l'aide de Google Colaboratory

Utilisez cartopy avec Google Colaboratory sans souffrir de bugs

Cheet sheet lors du scraping avec Google Colaboratory (Colab)

Résumé de la configuration de Google Colaboratory

Comment charger des fichiers dans Google Drive avec Google Colaboratory

Comment utiliser le modèle japonais Spacy avec Google Colaboratory

Je ne peux pas utiliser la commande darknet dans Google Colaboratory!

Extraits de code souvent utilisés lors du traitement de vidéos avec Google Colaboratory