爬虫之遇到521,破解cookie之js监测到无头浏览器
作者:互联网
使用python、scrapy
import execjs# 安装pip install PyExecJS 用这种方式只是为了调试,实际使用中还是要用js引擎v8
看了不少博客是要用到模拟浏览器phantomjs、chrome什么的,其实有很多网站的js会发现这种操作。
下面就以一个会监测无头浏览器的一个网站为例(具体网站不公开!):
<script>var x="attachEvent@String@@@if@Sun@e@PqwgnPCMHw@781@@@fromCharCode@chars@match@@@@substr@new@@GMT@@onreadystatechange@@@@innerHTML@@@@challenge@@location@3@@DOMContentLoaded@D@Expires@0xFF@Mar@for@__cdn_clearance@@addEventListener@@href@div@@13@else@@join@@@@2@document@@cookie@https@@pathname@@@d@while@@@31@@firstChild@charCodeAt@parseInt@try@@a@Path@@catch@@1@@window@function@g@@15@false@reverse@MLPxP5@@RegExp@replace@return@headless@search@f@1554042973@charAt@@split@@@@length@0xEDB88320@36@BGZKFSTk@1500@createElement@toString@@@8@rOm9XFMtA3QKV7nYsPGT4lifyWwkq5vcjH2IdxUoCbhERLaz81DNB6@setTimeout@eval@@var@JgSe0upZ@@19@captcha@Array@@toLowerCase@0@".replace(/@*$/,"").split("@"),y="54 36=3f(){51('1a.20=1a.2g+1a.44.41(/[\\?|&]58-18/,\\'\\')',4h);2b.2d='1j=46.9|5c|'+(3f(){54 36=59(+[[1b+(+!+[])+((+!+[])+[~~{}])/[(+!+[])+(+!+[])]]]),2m=['1e',(-~{}/~~[]+[]).47(~~[])+[3e.43+[]+[[]][5c]][5c].47(~~''),'4g%',[-~[[2a]*(-~~~[]+(+!+[])+(+!+[]))]],'8%',[2a],'3l',[(-~[]|2a)]];1i(54 1c=5c;1c<2m.4d;1c++){36.3k()[1c]=2m[1c]};42 36.26('')})()+';1f=6, 30-1h-57 3i:4f:23 l;38=/;'};5((3f(){35{42 !!3e.1l;}3a(7){42 3j;}})()){2b.1l('1d',36,3j)}24{2b.1('10',36)}",f=function(x,y){var a=0,b=0,c=0;x=x.split("");y=y||99;while((a=x.shift())&&(b=a.charCodeAt(0)-77.5))c=(Math.abs(b)<13?(b+48.5):parseInt(a,36))+y*c;return c},z=f(y.match(/\w/g).sort(function(x,y){return f(x)-f(y)}).pop());while(z++)try{eval(y.replace(/\b\w+\b/g, function(y){return x[f(y,z)-1]||("_"+y)}));break}catch(_){}</script>
美化:
<script>
var x = "attachEvent@String@@@if@Sun@e@PqwgnPCMHw@781@@@fromCharCode@chars@match@@@@substr@new@@GMT@@onreadystatechange@@@@innerHTML@@@@challenge@@location@3@@DOMContentLoaded@D@Expires@0xFF@Mar@for@__cdn_clearance@@addEventListener@@href@div@@13@else@@join@@@@2@document@@cookie@https@@pathname@@@d@while@@@31@@firstChild@charCodeAt@parseInt@try@@a@Path@@catch@@1@@window@function@g@@15@false@reverse@MLPxP5@@RegExp@replace@return@headless@search@f@1554042973@charAt@@split@@@@length@0xEDB88320@36@BGZKFSTk@1500@createElement@toString@@@8@rOm9XFMtA3QKV7nYsPGT4lifyWwkq5vcjH2IdxUoCbhERLaz81DNB6@setTimeout@eval@@var@JgSe0upZ@@19@captcha@Array@@toLowerCase@0@".replace(/@*$/, "").split("@"),
y = "54 36=3f(){51('1a.20=1a.2g+1a.44.41(/[\\?|&]58-18/,\\'\\')',4h);2b.2d='1j=46.9|5c|'+(3f(){54 36=59(+[[1b+(+!+[])+((+!+[])+[~~{}])/[(+!+[])+(+!+[])]]]),2m=['1e',(-~{}/~~[]+[]).47(~~[])+[3e.43+[]+[[]][5c]][5c].47(~~''),'4g%',[-~[[2a]*(-~~~[]+(+!+[])+(+!+[]))]],'8%',[2a],'3l',[(-~[]|2a)]];1i(54 1c=5c;1c<2m.4d;1c++){36.3k()[1c]=2m[1c]};42 36.26('')})()+';1f=6, 30-1h-57 3i:4f:23 l;38=/;'};5((3f(){35{42 !!3e.1l;}3a(7){42 3j;}})()){2b.1l('1d',36,3j)}24{2b.1('10',36)}",
f = function (x, y) {
var a = 0,
b = 0,
c = 0;
x = x.split("");
y = y || 99;
while ((a = x.shift()) && (b = a.charCodeAt(0) - 77.5)) c = (Math.abs(b) < 13 ? (b + 48.5) : parseInt(a, 36)) + y * c;
return c
},
z = f(y.match(/\w/g).sort(function (x, y) {
return f(x) - f(y)
}).pop());
while (z++) try {
eval(y.replace(/\b\w+\b/g, function (y) {
return x[f(y, z) - 1] || ("_" + y)
}));
break
} catch (_) {}
</script>
发现问题出在这里,可以把eval替换成alert、console.log看看结果,不多说上代码
# cookie第一个值
__jsluid = response.headers["Set-Cookie"].split(';')[0]
cookie1 = __jsluid
# 解密
get_js = re.findall(r'<script>(.*?)</script>', resp_body)[0].replace('eval', 'return')
resHtml = "function getClearance(){" + get_js + "};"
ctx = execjs.compile(resHtml)
# 一级解密结果
temp1 = ctx.call('getClearance')
结果返回的又是一段js:
var _36=function(){setTimeout('location.href=location.pathname+location.search.replace(/[\?|&]captcha-challenge/,\'\')',1500);document.cookie='__cdn_clearance=1554042973.781|0|'+(function(){var _36=Array(+[[3+(+!+[])+((+!+[])+[~~{}])/[(+!+[])+(+!+[])]]]),_2m=['D',(-~{}/~~[]+[]).charAt(~~[])+[window.headless+[]+[[]][0]][0].charAt(~~''),'BGZKFSTk%',[-~[[2]*(-~~~[]+(+!+[])+(+!+[]))]],'PqwgnPCMHw%',[2],'MLPxP5',[(-~[]|2)]];for(var _1c=0;_1c<_2m.length;_1c++){_36.reverse()[_1c]=_2m[_1c]};return _36.join('')})()+';Expires=Sun, 31-Mar-19 15:36:13 GMT;Path=/;'};if((function(){try{return !!window.addEventListener;}catch(e){return false;}})()){document.addEventListener('DOMContentLoaded',_36,false)}else{document.attachEvent('onreadystatechange',_36)}
美化:
var _36 = function () {
setTimeout('location.href=location.pathname+location.search.replace(/[\?|&]captcha-challenge/,\'\')', 1500);
document.cookie = '__cdn_clearance=1554042973.781|0|' + (function () {
var _36 = Array(+[
[3 + (+!+[]) + ((+!+[]) + [~~{}]) / [(+!+[]) + (+!+[])]]
]),
_2m = ['D', (-~{}
/~~[]+[]).charAt(~~[])+[window.headless+[]+[[]][0]][0].charAt(~~''),'BGZKFSTk%',[-~[[2]*(-~~~[]+(+!+[])+(+!+[]))]],'PqwgnPCMHw%',[2],'MLPxP5',[(-~[]|2)]];for(var _1c=0;_1c<_2m.length;_1c++){_36.reverse()[_1c]=_2m[_1c]};return _36.join('')})()+';Expires=Sun, 31-Mar-19 15:36:13 GMT;Path=/;
'};if((function(){try{return !!window.addEventListener;}catch(e){return false;}})()){document.addEventListener('
DOMContentLoaded ',_36,false)}else{document.attachEvent('
onreadystatechange ',_36)}
可以看到window浏览器对象,window.headless这个就是监测无头的,所以说现在无头浏览器并不能解决了!
截取有用:
document.cookie = '__cdn_clearance=1554042973.781|0|' + (function () {
var _36 = Array(+[
[3 + (+!+[]) + ((+!+[]) + [~~{}]) / [(+!+[]) + (+!+[])]]
]),
_2m = ['D', (-~{}
/~~[]+[]).charAt(~~[])+[window.headless+[]+[[]][0]][0].charAt(~~''),'BGZKFSTk%',[-~[[2]*(-~~~[]+(+!+[])+(+!+[]))]],'PqwgnPCMHw%',[2],'MLPxP5',[(-~[]|2)]];for(var _1c=0;_1c<_2m.length;_1c++){_36.reverse()[_1c]=_2m[_1c]};return _36.join('')
具体的破解我就不公开了。。。。
注:工作中的遇到的一些问题,可能我的方法不一定是最好的,大家一起相互交流+扣扣571848990
标签:function,36,js,1c,window,cookie,521,var,location 来源: https://blog.csdn.net/qq_24760381/article/details/88937185