Last time, I used Selenium to get all the elements of the list page and page forward. This time
I did.
test.py
import os, re
import time
from selenium import webdriver
DRIVER_PATH = os.path.join(os.path.dirname(__file__), 'chromedriver')
browser = webdriver.Chrome(DRIVER_PATH)
url = 'https://wwwXXXX'
browser.get(url)
time.sleep(5)
for i in range(3):
try:
name_list = []
path_front = '//*[@id="main"]/ul/li['
count = 1
path_end = ']/div[1]/a'
for _ in range(5):
path = path_front + str(count) + path_end
for l in browser.find_elements_by_xpath(path):
l.click()
for t in browser.find_elements_by_xpath('//*[@id="main"]/div[1]/h1'):
name_list.append(t.text)
browser.back()
count += 1
for name_title in zip(name_list):
print (name_title, "\n+++++++++++++++++++++++++++++++++++++++++++++++++++")
link_elem = browser.find_element_by_class_name('nextpostslink')
link_elem.click()
time.sleep(5)
except:
print ('not found!')
browser.close()
The processing is not beautiful at all, but in the end, I felt like I was adding the link number of li. That said, I've almost achieved my goal, so I'm done.
Recommended Posts