In order to solve these 4 points, I made a Chrome inheritance class so that it can be used easily.
There are more initialization arguments than the standard class, but for now it's easy to use. If I have time, I will add the opening of each process at a later date.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import selenium.webdriver
import os
import time
import shutil
from tempfile import TemporaryDirectory
from os.path import isdir, exists, join as pathjoin
class _tmpdir(TemporaryDirectory):
def __del__(self):
self.cleanup()
class ChromeOpen(selenium.webdriver.Chrome):
def __init__(self, init_url=None, executable_path="chromedriver", port=0,
options=None, service_args=None,
desired_capabilities=None, service_log_path=None,
keep_alive=True,
download_dir=None, background=False, timeout=300,
disable_extensions=True, maximized=False,
proxy_direct=True):
self.init_url = init_url
self._tmpdir = None
self.options = options or selenium.webdriver.ChromeOptions() # *3
addarg = self.options.add_argument
addarg('--ignore-certificate-errors-spki-list')
addarg('--ignore-certificate-errors')
addarg('--ignore-ssl-errors')
if background:
addarg('--headless')
if disable_extensions:
addarg('--disable-extensions')
if maximized:
addarg('--start-maximized')
if proxy_direct:
addarg('--proxy-server="direct://"')
addarg('--proxy-bypass-list=*')
self.prefs = {
"plugins.plugins_list":
[{"enabled": False,
"name": "Chrome PDF Viewer"}],
"download.extensions_to_open": "application/pdf",
}
self.download_dir = download_dir
if self.download_dir:
if not exists(self.download_dir):
raise FileNotFoundError("Not Found Directory {}".format(self.download_dir))
if not isdir(self.download_dir):
raise NotADirectoryError("Not a Download directory {}".format(self.download_dir))
self.prefs.update({"download.default_directory": self.tmpdir.name}) # *2
self.options.add_experimental_option("prefs", self.prefs)
super().__init__(
executable_path=executable_path,
port=port,
options=self.options,
service_args=service_args,
desired_capabilities=desired_capabilities,
service_log_path=service_log_path,
keep_alive=keep_alive,
)
if timeout > 0:
self.set_page_load_timeout(timeout) # *1
self.set_script_timeout(timeout) # *1
if self.init_url:
self.get(self.init_url)
self.xpath = self.find_element_by_xpath # *4
@property
def tmpdir(self): # *2
if self._tmpdir is None:
self._tmpdir = _tmpdir()
return self._tmpdir
def pagetop(self):
self.execute_script("window.scrollTo(0, 0);")
def pageend(self):
self.execute_script("window.scrollTo(0, document.body.scrollHeight);")
def organize_download_files(self): # *2
for fn in os.listdir(self.tmpdir.name):
if fn.endswith(".crdownload"):
time.sleep(0.5)
else:
src = pathjoin(self.tmpdir.name, fn)
dst = pathjoin(self.download_dir, fn)
if exists(dst):
if isdir(dst):
shutil.rmtree(dst)
else:
os.remove(dst)
shutil.move(src, dst)
def wait_for_downloads(self): # *2
while len(os.listdir(self.tmpdir.name)):
self.organize_download_files()
def quit(self):
if self.download_dir:
self.wait_for_downloads() # *2
if self._tmpdir:
self._tmpdir.cleanup()
super().quit()
After initializing the Chrome class, I will do driver.get (url) immediately, so The first argument is to enter the initial display url. Frequently used arguments etc. are grouped in init and set as the default value. When using it, it is one shot as follows.
with ChromeOpen("https://www.google.com", download_dir="C:/temp/hoge") as driver:
search = driver.xpath('//*[@name="q"]')
search.send_keys("hoge")
search.submit()
driver.pageend() #Scrolling to the last line of search results
time.sleep(3) #It closes in an instant, so wait 3 seconds
Send_keys is also troublesome, so I want to do something about it.
Recommended Posts