一、核心技术
import aiohttp
import asyncio
async def fetch(session, url):
async with session.get(url) as response:
return await response.text()
async def main(urls):
async with aiohttp.ClientSession() as session:
tasks = [fetch(session, url) for url in urls]
return await asyncio.gather(*tasks)
urls = ['http://example.com/1', 'http://example.com/2']
results = asyncio.run(main(urls))
二、完整案例
class AsyncSpider:
def __init__(self, concurrency=10):
self.semaphore = asyncio.Semaphore(concurrency)
async def crawl(self, url):
async with self.semaphore:
# 实现请求限流
data = await self.fetch(url)
return self.parse(data)
async def run(self, urls):
tasks = [self.crawl(url) for url in urls]
return await asyncio.gather(*tasks)