environment:
When you want to perform automated tests or operate a website to get something. This time, I used the API of selenium. The documentation is here This time, I used Firefox for crawling. It seems that it can be done with Chrome etc., but it seems that third-party browsers are not supported. .. ..
sample.py
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
#Launch browser
driver = webdriver.Firefox()
#Maximize the window
driver.maximize_window()
#Access to the website
driver.get("https://github.com")
#Get the title of the website
driver.title
# "q"Specify an element with the name
#Here, it is a keyword input form for repository search.
form_textfield = driver.find_element_by_name("q")
#Enter "hoge" in the input form
form_textfield.send_keys("hoge")
#Delete the entered content
form_textfield.clear()
#Keyword entered("hoge")And perform a search
form_textfield.submit()
#Find out the location of the input form
form_textfield.location
#Check the size of the input form
form_textfield.size
#Check the tag name of the input form
form_textfield.tag
#Of the element(On HTML source)Examine the text
form_textfield.text
#Specify the element with xpath
#Here, 1 at the bottom of the page, 2, ...,Refers to the button labeled 2 out of the buttons labeled n
next_button = driver.find_element_by_xpath("//*[@id=\"container\"]/div[2]/div/div[2]/div[2]/div/a[1]")
#This element can be clicked, so try clicking it
next_button.click()
#Specify the element with xpath
#Here, I will narrow down to the repository using python and search again.
refined_search_python = driver.find_element_by_xpath("//*[@id=\"container\"]/div[2]/div/div[1]/ul/li[9]/a")
refined_search_python.click()
#Set the timeout period(Page transition) <-I haven't tried it yet
driver.set_page_load_timeout(1)
#Set the timeout period(Script execution) <-I haven't tried it yet
driver.set_page_script_timeout(1)
#Go back one page
#How to do it 1
ActionChains(driver).key_down(Keys.BACKSPACE).send_keys('').keys_up(Keys.BACKSPACE).send_keys('').perform()
#How to do part 2
driver.back()
#Advance one page
driver.forward()
#Take a screenshot
driver.get_screenshot_as_file("./hogehoge.png ")
#Examine the position of the window
driver.get_window_position()
#Check the size of the window
driver.get_window_size()
#Page refresh
driver.refresh()
#Get url
driver.current_url
#Find out what browser you are currently using
driver.name
#Get the source of the currently open page
driver.page_source
#Close browser
#How to do it 1
driver.quit()
#How to do part 2
driver.close()
Recommended Posts