Python. Парсинг. Как сделать парсинг всех страниц сайта?
import requests
from bs4 import BeautifulSoup
import csv
import openpyxl
import pandas as pd
import xlsxwriter
import xlrd
from openpyxl import load_workbook
import numpy as np
from csv import writer
from lxml import html
headers = {
'accept': 'image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8',
'user-agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36 OPR/94.0.0.0 (Edition Yx 02)'
}
url = 'https://www.divan.ru/category/krovati'
response = requests.get(url) # обработка запроса метотдом get
soup = BeautifulSoup(response.text, 'lxml') # создаем суп
item = soup.find_all('div', {'class' : 'LlPhw'})
nami = soup.find_all('a', {'class': 'ImmXq dpmhZ b8BqN ProductName'})
pagination = soup.find('div', class_='dqBvL').find_all('a')
pages = pagination[-2].text
print('Всего страниц: ' + pages)
data = []
for page in range(1, int(pages)+1):
response = requests.get(url, headers=headers, params={'PAGEN_1': page})
html = response.text
soup = BeautifulSoup(html, 'html.parser')
blocks = soup.find('div', class_= 'Lei8X').find_all('div', class_='LlPhw')
for block in blocks:
title = item.find('a').get_text(strip=True)
print(block)
for index, i in enumerate(item, start=1):
blocks = soup.find('div', class_='dqBvL').find_all('a')
nami = i.find('a', {'class': 'ImmXq dpmhZ b8BqN ProductName'}).text.strip()
print(f'{index}. {nami}')
with xlsxwriter.Workbook('m.xlsx') as workbook:
worksheet = workbook.add_worksheet()
content = ["N", "Наименование"]
worksheet.write_row(0, 0, content)
for index, i in enumerate(item, start=1):
nami = i.find('a', {'class': 'ImmXq dpmhZ b8BqN ProductName'}).text.strip()
row = [index, nami]
worksheet.write_row(index, 0, row)
Подскажите пожалуйста, как сделать парсинг всех страниц ? То, что у меня, выдает ошибку:
Traceback (most recent call last):
File "C:\Users\Максим\PycharmProjects\pythonProject4\main.py", line 39, in <module>
title = item.find('a').get_text(strip=True)
^^^^^^^^^
File "C:\Users\Максим\.virtualenvs\pythonProject4\Lib\site-packages\bs4\element.py", line 2289, in __getattr__
raise AttributeError(
AttributeError: ResultSet object has no attribute 'find'. You're probably treating a list of elements like a single element. Did you call find_all() when you meant to call find()?
введите сюда код
Источник: Stack Overflow на русском