This is a memo that automates one troublesome simple task.
Because it was necessary to acquire data from the site that requires login this time I used selenium for the first time, but it's convenient.
At PJ, which is participating as a secondary receiver Submit Excel that summarizes the working hours to the primary receiving company at the end of the month.
The posting of the above 2 and 3 attendance and departure times has been automated.
python WorkingTimeGet.py
Python (3.8.5) selenium (3.141.0) beautifulsoup4 (4.9.3) pandas (1.1.3) openpyexl (3.0.5)
WorkingTimeGet.py
import sys
import os
import re #Regular expressions
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import chromedriver_binary #Code to pass the path
from bs4 import BeautifulSoup
import pandas as pd
import openpyxl as excel
loginpage = {Login page URL}
targetpage = {After logging in, the URL of the working hours inquiry page}
idtxt = {ID}
passtxt = {PASS}
print('Start processing')
print('Please enter the file name.')
file = input()
#File existence check
if not os.path.isfile(file):
print('The file does not exist.')
sys.exit()
# ================================================================================
#Get data from the target web page
# ================================================================================
#Open chrome
option = Options()
option.add_argument('--headless')
driver = webdriver.Chrome(options=option)
driver.get(loginpage)
#Set maximum wait time for element to be found
driver.implicitly_wait(5)
# ID/Enter PASS on the screen
id = driver.find_element_by_id({ID of the element in the ID input field})
id.send_keys(idtxt)
password = driver.find_element_by_id({ID of the element in the PASS input field})
password.send_keys(passtxt)
#Click the login button
login_button = driver.find_element_by_name({Login button element id})
login_button.click()
print('Data acquisition ...')
time.sleep(1)
#Open the working hours inquiry screen
driver.get(targetpage)
#Get html
html = driver.page_source
driver.quit() #Close browser
print('Data acquisition completed')
# ================================================================================
#Data processing
# ================================================================================
print('Data processing ...')
#Parse HTML
soup = BeautifulSoup(html, 'lxml')
#Create a DataFrame
columns = ["date", "Classification", "Attendance", "Leave work", "break", "total", "overtime", "Deduction", "project", "task", "Work type", "time"]
df = pd.DataFrame(columns = columns)
#Get table
table = soup.findAll("table", {"name":{Selector name}})[0]
trs = table.findAll("tr")
#Add row to DataFrame
for tr in trs:
tds = tr.findAll("td")
rowname = tds[0].get_text().strip()
df2 = pd.DataFrame(data = [[
tds[0].get_text().strip()
,tds[1].get_text().strip()
,tds[2].get_text().strip()
,tds[3].get_text().strip()
,tds[4].get_text().strip()
,tds[5].get_text().strip()
,tds[6].get_text().strip()
,tds[7].get_text().strip()
,tds[8].get_text().strip()
,tds[9].get_text().strip()
,tds[10].get_text().strip()
,tds[11].get_text().strip()
]], columns = columns)
df = df.append(df2, ignore_index = True)
print('Data processing completed')
# ================================================================================
#Write to excel
# ================================================================================
print('Start writing to Excel ……')
#Open excel
wb = excel.load_workbook(file, keep_vba=True)
ws = wb[{Sheet name}]
row = 1
for workfrom, workto in zip(df['Attendance'], df['Leave work']):
#Set attendance time
ws.cell(row=row, column=2).value = workfrom
ws.cell(row=row, column=3).value = workto
#Omitted ... (Other processing)
row += 1
wb.save(file)
print('Excel writing completed')
print('Processing Exit')
Recommended Posts