[PYTHON] In Google Colaboratory registrierte Snippets (Scraping)

Beautifulsoup4

Base

from urllib.parse import urljoin

import requests
from bs4 import BeautifulSoup

url = "http://example.jp"

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"
}

r = requests.get(url, headers=headers)
r.raise_for_status()

soup = BeautifulSoup(r.content, "html.parser")


urljoin(url, "index.html")

Session

with requests.Session() as s:

    r = s.get("http://example.jp", headers = headers)
    r.raise_for_status()

    soup = BeautifulSoup(r.content, "html.parser")

Pandas

import pandas as pd

df = pd.read_html("http://example.jp", header=0, index_col=0)

Selenium

!apt install chromium-chromedriver
!cp /usr/lib/chromium-browser/chromedriver /usr/bin
!pip install selenium

from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.chrome.options import Options

import time

options = webdriver.ChromeOptions()
options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")

driver = webdriver.Chrome("chromedriver", options=options)
driver.implicitly_wait(10)

#Hauptfenster
parent_window = driver.current_window_handle

driver.get("http://example.jp")

#URL-Anzeige
print(driver.current_url)

time.sleep(3)

#klicken
driver.find_element_by_link_text("XXXXX").click()

#Fenster wechseln
driver.switch_to.window(driver.window_handles[-1])

Recommended Posts

In Google Colaboratory registrierte Snippets (Scraping)

In Google Colaboratory registrierte Snippets (PDF-Textkonvertierung)

Google-Labor

[Anfänger] Leicht verständliches Python-Web-Scraping mit Google Colaboratory

Verwenden Sie Cartopy mit Google Colaboratory, ohne an Fehlern zu leiden

Cheet Sheet beim Schaben mit Google Colaboratory (Colab)

Zusammenfassung der Einrichtung von Google Colaboratory

So laden Sie Dateien in Google Drive mit Google Colaboratory

Verwendung des japanischen Spacy-Modells mit Google Colaboratory

Ich kann den Darknet-Befehl in Google Colaboratory nicht verwenden!

Codefragmente, die häufig bei der Verarbeitung von Videos mit Google Colaboratory verwendet werden