Python: 检查URL是否能正常访问
2025-02-17
URL 检查方法
方法 1: urllib
import urllib.request
import time
def check_url(url):
"""检查单个 URL 是否可访问"""
opener = urllib.request.build_opener()
opener.addheaders = [('User-Agent', 'Mozilla/5.0')]
try:
opener.open(url)
print(f"{url} 访问正常")
return True
except urllib.error.HTTPError as e:
print(f"{url} HTTP错误: {e.code}")
return False
except urllib.error.URLError as e:
print(f"{url} 访问失败: {e.reason}")
return False
def check_urls_from_file(filename):
"""从文件读取并检查多个 URL"""
with open(filename, 'r') as f:
urls = [line.strip() for line in f]方法 2: requests
import requests
from typing import List
def check_url(url: str) -> bool:
"""检查单个 URL 是否可访问"""
try:
response = requests.get(url, timeout=5)
return response.status_code == 200
except requests.RequestException as e:
print(f"访问 {url} 失败: {e}")
return False
def check_urls(urls: List[str]) -> dict:
"""批量检查 URLs"""
results = {}
for url in urls:
results[url] = check_url(url)
return results方法 3: 异步检查
import aiohttp
import asyncio
from typing import List
async def check_url(url: str) -> bool:
"""异步检查单个 URL"""
async with aiohttp.ClientSession() as session:
try:
async with session.get(url) as response:
return response.status == 200
except aiohttp.ClientError:
return False
async def check_urls(urls: List[str]):
"""异步批量检查 URLs"""
tasks = [check_url(url) for url in urls]
results = await asyncio.gather(*tasks)
return dict(zip(urls, results))最佳实践
异常处理
- 设置超时时间
- 处理各种网络异常
- 记录错误信息
性能优化
- 使用异步请求
- 设置并发限制
- 添加重试机制
注意:
- 遵守网站的爬虫规则
- 添加适当的请求间隔
- 使用合适的 User-Agent