Python使用代理IP示例
Python代码示例
Python requests库使用代理
安装依赖
pip install requests
基础使用示例
import requests
# 获取代理IP
api_url = "http://www.mayihttp.com/getapi"
params = {
"key": "您的API密钥",
"num": 1,
"type": "HTTP",
"time": 15
}
response = requests.get(api_url, params=params)
proxy_data = response.json()
if proxy_data["code"] == 0:
proxy_info = proxy_data["data"][0]
proxy = {
"http": f"http://{proxy_info['ip']}:{proxy_info['port']}",
"https": f"http://{proxy_info['ip']}:{proxy_info['port']}"
}
# 使用代理访问目标网站
target_url = "http://httpbin.org/ip"
result = requests.get(target_url, proxies=proxy, timeout=10)
print(result.text)
高级使用示例
import requests
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
class ProxySession:
def __init__(self, api_key):
self.api_key = api_key
self.session = self._create_session()
def _create_session(self):
session = requests.Session()
retry = Retry(
total=3,
read=3,
connect=3,
backoff_factor=0.3
)
adapter = HTTPAdapter(max_retries=retry)
session.mount("http://", adapter)
session.mount("https://", adapter)
return session
def get_proxy(self):
"""获取代理IP"""
api_url = "http://api.mayihttp.com/getapi"
params = {
"key": self.api_key,
"num": 1,
"type": "HTTP"
}
response = self.session.get(api_url, params=params)
data = response.json()
if data["code"] == 0:
proxy_info = data["data"][0]
return {
"http": f"http://{proxy_info['ip']}:{proxy_info['port']}",
"https": f"http://{proxy_info['ip']}:{proxy_info['port']}"
}
else:
raise Exception(f"获取代理失败: {data['msg']}")
def fetch(self, url, **kwargs):
"""使用代理访问URL"""
proxy = self.get_proxy()
kwargs["proxies"] = proxy
kwargs["timeout"] = kwargs.get("timeout", 10)
return self.session.get(url, **kwargs)
# 使用示例
proxy_session = ProxySession("您的API密钥")
response = proxy_session.fetch("http://httpbin.org/ip")
print(response.text)
Scrapy框架配置
1. 中间件配置
创建代理中间件 middlewares.py
:
import random
from mayihttp import Client
class ProxyMiddleware:
def __init__(self):
self.client = Client("您的API密钥")
self.proxy_pool = []
def get_proxy(self):
if not self.proxy_pool:
proxies = self.client.get_proxies(num=10)
self.proxy_pool = [f"http://{p['ip']}:{p['port']}" for p in proxies]
return random.choice(self.proxy_pool)
def process_request(self, request, spider):
proxy = self.get_proxy()
request.meta["proxy"] = proxy
2. 配置文件settings.py
# 启用代理中间件
DOWNLOADER_MIDDLEWARES = {
"myproject.middlewares.ProxyMiddleware": 350,
}
# 设置超时
DOWNLOAD_TIMEOUT = 10
# 设置重试次数
RETRY_TIMES = 3
异步请求示例
import asyncio
import aiohttp
async def fetch_with_proxy(session, url, proxy):
try:
async with session.get(url, proxy=proxy, timeout=10) as response:
return await response.text()
except Exception as e:
print(f"请求失败: {e}")
return None
async def main():
# 获取代理
async with aiohttp.ClientSession() as session:
api_url = "http://api.mayihttp.com/getapi"
params = {"key": "您的API密钥", "num": 5}
async with session.get(api_url, params=params) as response:
data = await response.json()
if data["code"] == 0:
tasks = []
for proxy_info in data["data"]:
proxy = f"http://{proxy_info['ip']}:{proxy_info['port']}"
task = fetch_with_proxy(session, "http://httpbin.org/ip", proxy)
tasks.append(task)
results = await asyncio.gather(*tasks)
for result in results:
if result:
print(result)
# 运行异步任务
asyncio.run(main())
最佳实践
1. 错误处理
始终处理API可能返回的错误:
- 网络超时
- 认证失败
- 配额超限
- 参数错误
2. 连接池管理
使用Session对象复用连接,提高性能。
3. 重试机制
实现自动重试,应对网络波动。
4. 并发控制
合理控制并发数,避免目标服务器压力过大。