基于Requests库的爬虫代理配置

发布时间: 2023-02-03 10:45:38

在进行爬虫项目时,如果没有针对特定网站制定相应的爬虫策略,那么会很容易造成自己的IP被网站封锁。

下面使用Requests库进行代理IP的配置何使用,同时也包括了新建IP池列表的操作

#!/usr/bin/env python3

# -*- coding: utf-8 -*-

import requests

# 随机选择一个代理

import random

# 生成代理IP池

proxy_list = [

'xxx.xxx.x.xx:3128',

'xx.xx.x.xx:8888',

'xxx.xxx.xxx.xxx:3128'

]

# 随机从ip池中选出一个ip

proxy = random.choice(proxy_list)

# 打印出随机选择的代理ip

print(proxy)

proxies = {

'http': 'http://' + proxy,

'https': 'https://' + proxy,

}

# -----------------------------------------------------------------------------------

# 返回一个随机的请求头 headers

def getheaders():

user_agent_list = [ \

"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1" \

"Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11", \

"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6", \

"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1090.0 Safari/536.6", \

"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1", \

"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.9 Safari/536.5", \

"Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5", \

"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3", \

"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3", \

"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3", \

"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3", \

"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3", \

"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3", \

"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3", \

"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3", \

"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.0 Safari/536.3", \

"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24", \

"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24"

]

UserAgent=random.choice(user_agent_list)

headers = {'User-Agent': UserAgent}

return headers

# ---------------------------------------------------------------------------------------

try:

headers = getheaders() # 定制请求头

print(headers)

response = requests.get(

'http://httpbin.org/get',

proxies=proxies,

headers=headers,

timeout = 5

)

print(response.text)

except requests.exceptions.ConnectionError as e:

print('Error', e.args)

若其运行结果的 origin 也是代理的 IP,证明代理已经设置成功。

倘若需要使用优质代理IP,可以试用全民HTTP代理,可用率高,安全稳定,操作简单,也有专业技术人员在线指导,是代理IP软件的不二之选。

长效静态套餐提取步骤
爬虫IP代理使用常见错误分析