Python, Selenium. Как парсить таблицу?
Сайт e-library, дохожу до получения таблицы на сайте, но проблема, не могу её спарсить, не могу найти хорошую документацию по selenium'у. К тому же, таблица может меняться в зависимости от указанных фильтров. Моя цель: чтобы селениум парсил любую таблицу, которую даст сайт, либо выдавал сообщение *данных нету, но на процессе парсинга застрял( p.s.Делал по чужому примеру
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common import keys
from selenium.webdriver.support.select import Select
import time
url = 'https://elibrary.ru/'
options = webdriver.ChromeOptions()
options.add_experimental_option('excludeSwitches', ['enable-logging'])
binary_yandex_driver_file = 'yandexdriver.exe' # path to YandexDriver
driver = webdriver.Chrome(binary_yandex_driver_file, options=options)
driver.get(url)
wait = WebDriverWait(driver, 10)
original_window = driver.current_window_handle
assert len(driver.window_handles) == 1
try:
xpath = '/html/body/table/tbody/tr/td/table[1]/tbody/tr/td[1]/table/tbody/tr[2]/td/div/div/table[2]/tbody/tr/td[2]/a'
elem = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, xpath)))
elem.click()
xpath2 = '/html/body/table/tbody/tr/td/table/tbody/tr/td[2]/table/tbody/tr/td/table[6]/tbody/tr[1]/td[3]/a'
elem2 = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, xpath2)))
elem2.click()
wait.until(EC.number_of_windows_to_be(2))
for window_handle in driver.window_handles:
if window_handle != original_window:
driver.switch_to.window(window_handle)
break
form_xpath = '/html/body/center/form/table[1]/tbody/tr/td[1]/input'
elem3 = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, form_xpath)))
print(elem3)
elem3.send_keys('Кромина Л А')
xpath4 = '/html/body/center/form/table[1]/tbody/tr/td[2]/table/tbody/tr/td[1]/a'
elem4 = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, xpath4)))
elem4.click()
time.sleep(4)
driver.switch_to.window(original_window)
time.sleep(4)
select_element = driver.find_element(By.NAME, 'begin_year')
select = Select(select_element)
option_list = select.options
select.select_by_value('2021')
select_element = driver.find_element(By.NAME, 'end_year')
select = Select(select_element)
option_list = select.options
select.select_by_value('2024')
time.sleep(4)
xpath5 = '/html/body/table/tbody/tr/td/table/tbody/tr/td[2]/table/tbody/tr/td/table[11]/tbody/tr/td[6]/a' #поиск
elem5 = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, xpath5)))
elem5.click()
#ПЫТАЮСЬ ПАРСИТЬ (ДАЛЕЕ ВНИЗ)
rows = 1+len(driver.find_elements(By.XPATH,
"/html/body/div[3]/table/tbody/tr/td/table/tbody/tr/td[2]/table/tbody/tr[2]/td/table/tbody/tr[2]"))
# Obtain the number of columns in table
cols = len(driver.find_elements(By.XPATH,
"/html/body/div[3]/table/tbody/tr/td/table/tbody/tr/td[2]/table/tbody/tr[2]/td/table/tbody/tr[2]/td[2]"))
# Print rows and columns
print(rows)
print(cols)
# Printing the table headers
print("Locators "+" Description")
for r in range(2, rows+1):
for p in range(1, cols+1):
# obtaining the text from each column of the table
value = driver.find_element(By.XPATH,
"/html/body/div[3]/table/tbody/tr/td/table/tbody/tr/td[2]/table/tbody/tr[2]/td/table/tbody/tr[2]["+str(r)+"]/td[2]["+str(p)+"]").text
print(value, end=' ')
print()
# Printing the data of the table
finally:
driver.quit()