spider随机请求头和ip
作者:互联网
#创建爬虫
scrapy genspider randomIp_spider "taobao.com"
#把需要请求的url放到一个混淆的url请求list中去,避免被监测到总是访问此页面
import random url_list = [ 'https://detail.tmall.com/item.htm?id=522194707780&ali_refid=a3_430583_1006:1109696291:N:%E6%B6%88%E9%98%B2%E5%BA%94%E6%80%A5%E7%81%AF:eb9682757281a9ec406cb4647d3f584a&ali_trackid=1_eb9682757281a9ec406cb4647d3f584a&spm=a230r.1.14.3', 'https://item.taobao.com/item.htm?spm=a219r.lmn002.14.1.f3b87156TcpPbp&id=587398066660&ns=1&abbucket=16', 'https://item.taobao.com/item.htm?spm=a230r.1.14.50.1af3248cr0GGyM&id=576997844987&ns=1&abbucket=16#detail' ] #随机去一个访问链接 start_urls = random.choice(url_list)
#到middlewares.py文件中去
#设置随机请求头 class UserAgentDownloadMiddleware(object): USER_AGENTS = [ 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; pl-PL; rv:1.0.1) Gecko/20021111 Chimera/0.6', 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; pl-PL; rv:1.0.1) Gecko/20021111 Chimera/0.6', 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en-US; rv:1.0.1) Gecko/20021111 Chimera/0.6', 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en-US; rv:1.0.1) Gecko/20021104 Chimera/0.6', 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.0.1) Gecko/20030111 Chimera/0.6', 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.0.1) Gecko/20030109 Chimera/0.6', 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.0.1) Gecko/20021220 Chimera/0.6', 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.0.1) Gecko/20021216 Chimera/0.6' ] def process_request(self, request, spider): user_agent = random.choice(self.USER_AGENTS) request.headers['User-Agent'] = user_agent
#设置随机ip class IPProxyDownloadMiddleware(object): PROXIES = [ '222.190.163.141:45334', '183.143.73.146:31998', '115.216.58.182:43060', '116.209.129.167:27158', '60.167.23.29.205:44728' ] def process_request(self, request, spider): proxy = random.choice(self.PROXIES) print('+' * 40) print(proxy) request.meta['proxy'] = proxy f = open("texr.json", encoding='utf-8') setting = json.load(f) family = setting['BaseSettings']['size']
#到settings.py文件中去,修改如下配置
DOWNLOADER_MIDDLEWARES = {
#随机请求头 'taobao_for_attack.middlewares.UserAgentDownloadMiddleware': 543,、
#随机ip 'taobao_for_attack.middlewares.IPProxyDownloadMiddleware': 124, }
标签:rv,1.0,ip,Mozilla,spider,0.6,随机,OS,Macintosh 来源: https://www.cnblogs.com/shaoqizhi/p/10485835.html