** What I wanted to do ** I want to take a screenshot of a web page on heroku and crop it with an HTML element.
problem When running PhantomJS with selenium, there is no method to get the location of the element position.
solution
Execute Javascript with the ʻexecute_script function provided in the
selenium.webdriver.PhantomJS` class.
** Python library **
screenshot_crop.py
from PIL import Image
from selenium import webdriver
driver = webdriver.PhantomJS()
driver.get("https://www.yahoo.co.jp") # (1)
driver.save_screenshot("screenshot.png ") # (2)
element_type = "Id" # (3)
element_name = "topicsboxbd" # (4)
before_script = """
var element = document.getElementBy""" + element_type + "('" + element_name + """');
var rect = element.getBoundingClientRect();
""" # (5)
left = driver.execute_script(before_script + "return rect.left;") # (6)
top = driver.execute_script(before_script + "return rect.top;") # (6)
right = driver.execute_script(before_script + "return rect.width;") + left # (7)
bottom = driver.execute_script(before_script + "return rect.height;") + top # (7)
im = Image.open("screenshot.png ") # (8)
im = im.crop((left, top, right, bottom)) # (9)
im.save("screenshot_crop.png ") # (10)
im.close()
(1) --Specify the URL to take the screenshot.
(2)-Save a screenshot of the entire page.
(3) --Specify the attribute name (Id, Class, etc.) of the element in ʻelement_type. Anything is acceptable as long as it is in the Javascript getElementBy 〇〇 circle. So the string to be assigned must start with an uppercase letter. (4) --Specify the attribute value (main part such as id = "main") of the attribute specified in (3) of the element in ʻelement_name
.
(5) --Common part of the JS code to be executed
(6) (7) --The Javascript code is executed by the driver.execute_script
function to get the coordinates of the upper left and lower right of the element.
(8)-Open the screenshot saved in (1).
(9)-Crop the original screenshot using the coordinates obtained in (6) and (7).
(10) --Save the cropped screenshot.
screenshot.png Screenshot of the entire page
screenshot_crop.png Screenshot of screenshot.png cropped with ʻid = "topicsboxbd" `element
When I put PhantomJS on heroku and take a screenshot, Japanese is not displayed in the saved image as it is.
By creating a .font
directory in the root directory and inserting a ttf file (otf) that supports Japanese, Japanese will be displayed.
Using phantomjs on Heroku | Program Memo
exphantom.py
from PIL import Image
from selenium import webdriver
class ScreenShot:
def __init__(self, file_name_: str = "screenshot.png "):
"""
:type file_name_: str
"""
self._filename = file_name_
self._driver = webdriver.PhantomJS()
self._driver.set_window_size(1024, 768)
self._crop_margin = 0
def screen_shot(self, url_: str) -> bool:
"""
Take a screenshot of the specified url.
:return: Success is True, Fail is False
:param url_: the webpage to save screenshot
"""
try:
self._driver.get(url_)
self._driver.save_screenshot(self._filename)
except Exception as e:
print(e)
return False
return True
def screen_shot_crop(self, url_: str, search_element_name: str, search_element_type: str = "Id") -> bool:
"""
Take a screenshot of the specified class of the specified url destination.
:return: Success is True, Fail is False
:param url_: the webpage to save screenshot
:param search_element_name: search to element name
:param search_element_type: search to element type
"""
self.screen_shot(url_)
before_script = """
var element = document.getElementBy""" + search_element_type + "('" + search_element_name + """');
var rect = element.getBoundingClientRect();
"""
try:
left = self._driver.execute_script(before_script + "return rect.left;") - self._crop_margin
top = self._driver.execute_script(before_script + "return rect.top;")
right = self._driver.execute_script(before_script + "return rect.width;") + left + self._crop_margin
bottom = self._driver.execute_script(before_script + "return rect.height;") + top + self._crop_margin
except Exception as e:
print(e)
return False
im = Image.open(self._filename)
im = im.crop((left, top, right, bottom))
im.save(self._filename)
im.close()
return True
def set_file_name(self, filename_: str):
self._filename = filename_
def set_window_size(self, width_: int, height_: int):
self._driver.set_window_size(width=width_, height=height_)
def get_window_size(self) -> object:
return self._driver.get_window_size()
def set_crop_margin(self, crop_margin_: int):
self._crop_margin = crop_margin_
def ger_crop_margin(self) -> object:
return self._crop_margin
def __del__(self):
self._driver.close()
if __name__ == "__main__":
#Specify the URL to take a screenshot
screen_url = "https://www.yahoo.co.jp"
#Specify the attribute of the element to crop
element_type = "Id"
#Specify the element name to crop
element_name = "topicsboxbd"
#Specify the save destination file name when creating an instance
ss = ScreenShot("screenshot.png ")
# screen_Save screenshot of url
ss.screen_shot(screen_url)
#Change the save destination file name
ss.set_file_name("screenshot_crop.png ")
# screen_url element_element of type attribute_Save a screenshot of the element named name
ss.screen_shot_crop(screen_url, element_name, element_type)
#Delete instance
del ss
** Actual use example ** [Unofficial] Miyazaki University Support Division Notice BOT
python selenium phantomJS element.location returns wrong location - Stack Overflow
Recommended Posts