[PYTHON] How to convert Web pages to PDF, PNG, JPG with VBA (Excel) (Selenium Basic)

How to convert Web pages to PDF, PNG, JPG with VBA (Excel)

-Overview- I made it because I often used it in business.

Selenium is a tool that easily automates browser operations. I often hear how to use Selenium with Python or JavaScript, but in fact it can be run with Visual Basic.

Especially in Japan, I think that many companies are highly dependent on Excel, so using Excel as a GUI may not be psychologically resistant. (I think I have a lot to say ...)

-procedure-

  1. Download Selenium Basic from Selenium Release.
  2. Install the above file.
  3. Download the driver for your browser. (This time, I will use Chrome) Chrome Driver
  4. Update chromedriver.exe of C: \ Users \ [user name] \ AppData \ Local \ Selenium Basic with chromedriver.exe downloaded in 3. (Just overwrite the file)
  5. write vba

-Details- 1.2. URL etc. may change. Please check each one. You can install it by default. 3. Again, the URL may change. Please check each one. For Chrome, please download the same version of Chrome you are using. 4. Just replace. 5.

If you go up to 4, Selenium Type Library will appear in Tools-> Reference Settings in the Excel development environment, so let's check it. If you don't do this, you won't be able to use Selenium. Also, check the reference settings such as Microsoft Scripting Runtime.

Below is an example. to ~ (save directory, save file name, web page URL, sheet line number (for checking or writing the save directory), sheet to manage whether saved or not)

For PDF

Option Explicit

Sub toPDF(ByVal directory, ByVal filename, ByVal url, ByVal i, ByVal sheetn As String)
  On Error GoTo myerror:
    Dim sheet1 As Worksheet
    Set sheet1 = Sheets(sheetn)
    Dim sPath As String, WSH As Variant
    Set WSH = CreateObject("WScript.Shell")
    sPath = WSH.SpecialFolders("Desktop") & "\"
    If (directory = "") Then directory = sPath
    If (Right(directory, 1) <> "\") Then directory = directory & "\"
    If (filename = "") Then filename = Format(Now(), "yyyy-mm-dd-hh-mm-ss")
    Dim savepath As String
    savepath = directory & filename & ".pdf"
    Dim driver As New Selenium.ChromeDriver
    driver.SetPreference "download.default_directory", directory
    driver.SetPreference "download.directory_upgrade", True
    driver.SetPreference "download.prompt_for_download", False
    driver.SetPreference "safebrowsing.enabled", True
    driver.SetPreference "plugins.plugins_disabled", Array("Chrome PDF Viewer")
    driver.AddArgument "headless"
    driver.AddArgument "disable-gpu"
    driver.AddArgument "hide-scrollbars"
    Dim w As Long
    Dim h As Long

    driver.Start
    driver.Get url

    w = driver.ExecuteScript("return document.body.scrollWidth")
    h = driver.ExecuteScript("return document.body.scrollHeight")
    
    Dim pdf As Object
    
    driver.Window.SetSize w, h
    
    Set pdf = CreateObject("Selenium.PdfFile")
    pdf.SetPageSize 210, 297, "mm"
    pdf.AddImage driver.TakeScreenshot, True
    pdf.SaveAs savepath
    sheet1.Cells(i, 5).Value = 1
    sheet1.Cells(i, 6).Value = savepath
    driver.Quit
    Exit Sub
myerror:
    MsgBox "no"
    sheet1.Cells(i, 5).Value = 0
End Sub
Sub dopdf()
  Dim sPath As String, WSH As Variant
    Set WSH = CreateObject("WScript.Shell")
    sPath = WSH.SpecialFolders("Desktop") & "\"
    Dim directory As String
    Dim filename As String
    directory = sPath
    If (Right(Len(directory), 1) <> "\") Then directory = directory & "\"
    
  Dim sheet1 As String
  Dim i As Long
  
  sheet1 = "For pdf"
  
  Dim sheetn As Worksheet
  Set sheetn = Sheets(sheet1)
  Dim r As Long
  r = sheetn.Cells(Rows.Count, 4).End(xlUp).Row
  For i = 2 To r
    If (sheetn.Cells(i, 4).Value = "") Then
      GoTo a1:
    End If
    filename = sheetn.Cells(i, 3).Text
    If (filename = "") Then filename = Format(Now(), "yyyy-mm-dd-hh-mm-ss")
    Dim result
    result = Dir(sheetn.Cells(i, 2).Text, vbDirectory)
    If (directory <> "" Or result <> True) Then sPath = sheetn.Cells(i, 2).Text
    Call toPDF(sPath, filename, sheetn.Cells(i, 4).Text, i, sheet1)
a1:
  Next i
End Sub

For JPG

Option Explicit

Sub toJPG(ByVal directory, ByVal filename, ByVal url, ByVal i, ByVal sheetn As String)
  On Error GoTo myerror:
    Dim sheet1 As Worksheet
    Set sheet1 = Sheets(sheetn)
    Dim sPath As String, WSH As Variant
    Set WSH = CreateObject("WScript.Shell")
    sPath = WSH.SpecialFolders("Desktop") & "\"
    If (directory = "") Then directory = sPath
    If (Right(directory, 1) <> "\") Then directory = directory & "\"
    If (filename = "") Then filename = Format(Now(), "yyyy-mm-dd-hh-mm-ss")
    Dim savepath As String
    savepath = directory & filename & ".jpg "
    Dim driver As New Selenium.ChromeDriver
    driver.SetPreference "download.default_directory", directory
    driver.SetPreference "download.directory_upgrade", True
    driver.SetPreference "download.prompt_for_download", False
    driver.SetPreference "safebrowsing.enabled", True
    driver.SetPreference "plugins.plugins_disabled", Array("Chrome PDF Viewer")
    driver.AddArgument "headless"
    driver.AddArgument "disable-gpu"
    driver.AddArgument "hide-scrollbars"
    Dim w As Long
    Dim h As Long
    
    driver.Start
    driver.Get url
    driver.FindElementByClass("tab02").Click
    driver.ExecuteScript ("this.document.getElementById('tab01').setAttribute('class','tabContent01');")
    driver.ExecuteScript ("this.document.getElementById('tab03').setAttribute('class','tabContent03');")
    w = driver.ExecuteScript("return document.body.scrollWidth")
    h = driver.ExecuteScript("return document.body.scrollHeight")
    driver.Window.SetSize w, h
    driver.TakeScreenshot.SaveAs savepath
    sheet1.Cells(i, 5).Value = 1
    sheet1.Cells(i, 6).Value = savepath
    driver.Quit
    Exit Sub
myerror:
    
    sheet1.Cells(i, 5).Value = 0
End Sub
Sub dojpg()
  Dim sPath As String, WSH As Variant
    Set WSH = CreateObject("WScript.Shell")
    sPath = WSH.SpecialFolders("Desktop") & "\"
    Dim directory As String
    Dim filename As String
    directory = sPath
    If (Right(Len(directory), 1) <> "\") Then directory = directory & "\"
    
  Dim sheet1 As String
  Dim i As Long
  
  sheet1 = "For jpg"
  
  Dim sheetn As Worksheet
  Set sheetn = Sheets(sheet1)
  Dim r As Long
  r = sheetn.Cells(Rows.Count, 4).End(xlUp).Row
  For i = 2 To r
    If (sheetn.Cells(i, 4).Value = "") Then
      GoTo a1:
    End If
    filename = sheetn.Cells(i, 3).Text
    If (filename = "") Then filename = Format(Now(), "yyyy-mm-dd-hh-mm-ss")
    Dim result
    result = Dir(sheetn.Cells(i, 2).Text, vbDirectory)
    If (directory <> "" Or result <> True) Then sPath = sheetn.Cells(i, 2).Text
    Call toJPG(sPath, filename, sheetn.Cells(i, 4).Text, i, sheet1)
a1:
  Next i
End Sub

For PNG

Option Explicit

Sub toPNG(ByVal directory, ByVal filename, ByVal url, ByVal i, ByVal sheetn As String)
  On Error GoTo myerror:
    Dim sheet1 As Worksheet
    Set sheet1 = Sheets(sheetn)
    Dim sPath As String, WSH As Variant
    Set WSH = CreateObject("WScript.Shell")
    sPath = WSH.SpecialFolders("Desktop") & "\"
    If (directory = "") Then directory = sPath
    If (Right(directory, 1) <> "\") Then directory = directory & "\"
    If (filename = "") Then filename = Format(Now(), "yyyy-mm-dd-hh-mm-ss")
    Dim savepath As String
    savepath = directory & filename & ".png "
    Dim driver As New Selenium.ChromeDriver
    driver.SetPreference "download.default_directory", directory
    driver.SetPreference "download.directory_upgrade", True
    driver.SetPreference "download.prompt_for_download", False
    driver.SetPreference "safebrowsing.enabled", True
    driver.SetPreference "plugins.plugins_disabled", Array("Chrome PDF Viewer")
    driver.AddArgument "headless"
    driver.AddArgument "disable-gpu"
    driver.AddArgument "hide-scrollbars"
    Dim w As Long
    Dim h As Long
    
    driver.Start
    driver.Get url
    driver.FindElementByClass("tab02").Click
    driver.ExecuteScript ("this.document.getElementById('tab01').setAttribute('class','tabContent01');")
    driver.ExecuteScript ("this.document.getElementById('tab03').setAttribute('class','tabContent03');")
    w = driver.ExecuteScript("return document.body.scrollWidth")
    h = driver.ExecuteScript("return document.body.scrollHeight")
    driver.Window.SetSize w, h
    driver.TakeScreenshot.SaveAs savepath
    sheet1.Cells(i, 5).Value = 1
    sheet1.Cells(i, 6).Value = savepath
    driver.Quit
    Exit Sub
myerror:
    
    sheet1.Cells(i, 5).Value = 0
End Sub
Sub dopng()
  Dim sPath As String, WSH As Variant
    Set WSH = CreateObject("WScript.Shell")
    sPath = WSH.SpecialFolders("Desktop") & "\"
    Dim directory As String
    Dim filename As String
    directory = sPath
    If (Right(Len(directory), 1) <> "\") Then directory = directory & "\"
    
  Dim sheet1 As String
  Dim i As Long
  
  sheet1 = "For png"
  
  Dim sheetn As Worksheet
  Set sheetn = Sheets(sheet1)
  Dim r As Long
  r = sheetn.Cells(Rows.Count, 4).End(xlUp).Row
  For i = 2 To r
    If (sheetn.Cells(i, 4).Value = "") Then
      GoTo a1:
    End If
    filename = sheetn.Cells(i, 3).Text
    If (filename = "") Then filename = Format(Now(), "yyyy-mm-dd-hh-mm-ss")
    Dim result
    result = Dir(sheetn.Cells(i, 2).Text, vbDirectory)
    If (directory <> "" Or result <> True) Then sPath = sheetn.Cells(i, 2).Text
    Call toPNG(sPath, filename, sheetn.Cells(i, 4).Text, i, sheet1)
a1:
  Next i
End Sub

The book structure consists of three "pdf", "png", and "jpg" sheets.

B, C, D columns after the second row Enter the save destination, save name, and URL It is saved when you move the macro.

By the way, you can also manipulate the DOM with driver.executescirpts etc., so you can capture the web page after playing with it.

sleepy. .. .. Also, I will attach a photo later.

Recommended Posts

How to convert Web pages to PDF, PNG, JPG with VBA (Excel) (Selenium Basic)
Convert PDF files to PNG files with GIMP
How to convert SVG to PDF and PNG [Python]
Convert A4 PDF to A3 every 2 pages
Convert PDF to image with ImageMagick
Made it possible to convert PNG to JPG with Pillow of Python
Convert from PDF to CSV with pdfplumber
Convert Excel data to JSON with python
Convert HEIC files to PNG files with Python
Convert DICOM to PNG with Ascending and Descending
[Python] How to read excel file with pandas
Convert svg file to png / ico with Python
How to deal with SessionNotCreatedException when using Selenium
Convert multiple jpg files to one PDF file
How to convert (32,32,3) to 4D tensor (1,32,32,1) with ndarray type
Automatic login to ServiceNow with Selenium Web Driver
How to convert / restore a string with [] in python
How to convert horizontally held data to vertically held data with pandas
How to convert a class object to a dictionary with SQLAlchemy
How to convert JSON file to CSV file with Python Pandas
How to convert Json file to CSV format or EXCEL format
How to switch to smartphone mode with Python + Selenium + Chrome
How to operate Firefox with selenium on Windows Memo
Convert 202003 to 2020-03 with pandas
How to debug selenium
Convert json to excel
How to deploy a web app made with Flask to Heroku
How to put a hyperlink to "file: // hogehoge" with sphinx-> pdf
How to convert an array to a dictionary with Python [Application]
How to output a document in pdf format with Sphinx
How to not load images when using PhantomJS with Selenium
How to manipulate the DOM in an iframe with Selenium
Convert garbled scanned images to PDF with Pillow and PyPDF
Introduction to Python for VBA users-Calling Python from Excel with xlwings-
How to scrape at speed per second with Python Selenium
How to read an Excel file (.xlsx) with Pandas [Python]