Ускорение парсинга с помощью asyncio
мне нужно как то ускорить парсинг до 3 сек. возможно ли это? там 3000 объявлений по 1000 по 3 категориям
import aiohttp
import json
import time
import asyncio
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",
"Accept": "application/json, text/plain, */*",
"device": "pc"
}
categories = [
{"id": 5830, "name": "Продажа автомобилей"},
{"id": 2043, "name": "Аренда квартир"},
{"id": 2046, "name": "Продажа квартир"}
]
total_ads_limit_per_category = 1000
async def get_ads(session, category):
category_id = category["id"]
category_name = category["name"]
ads_fetched = 0
page = 1
ads_data = []
while ads_fetched < total_ads_limit_per_category:
url = f"http://lalafo.kg/api/search/v3/feed/search?expand=url&per-page=40&category_id={category_id}&page={page}"
async with session.get(url, headers=headers) as response:
response_data = await response.json()
items = response_data.get('items', [])
num_ads = min(40, len(items))
for i in range(num_ads):
item = items[i]
title = item["title"]
description = item["description"]
price = item["price"]
currency = item["currency"]
city = item["city"]
mobile = item["mobile"]
images = [image["original_url"] for image in item["images"]]
author = item["user"]["username"]
ad_data = {
"title": title,
"description": description,
"price": price,
"currency": currency,
"city": city,
"mobile": mobile,
"images": images,
"category": category_name,
"author": author
}
ads_data.append(ad_data)
ads_fetched += 1
page += 1
return ads_data
async def send_ads_data(data):
url = 'http://127.0.0.1:8000'
async with aiohttp.ClientSession() as session:
async with session.post(url, json=data) as response:
return await response.text()
async def main():
async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(limit=1000)) as session:
tasks = [get_ads(session, category) for category in categories]
results = await asyncio.gather(*tasks)
all_ads_data = []
for ads_data in results:
all_ads_data.extend(ads_data)
await send_ads_data(all_ads_data)
if __name__ == "__main__":
start_time = time.time()
asyncio.run(main())
end_time = time.time()
elapsed_time = end_time - start_time
print(f"Время выполнения скрипта: {elapsed_time} секунд")
Источник: Stack Overflow на русском