Python爬虫实战(十三)JS逆向必会基础案例 | 百度翻译参数破解
作者:互联网
前情回顾:有道翻译参数破解
在上一篇博客中,我们系统地分析了有道翻译的JS逆向过程。不难看出,JS逆向其实就是用Python重新来复写Javascript的代码。但当JS代码很长时,弄懂里面的逻辑关系就相当耗时,这时就需要execjs
库来帮忙了(它是一个可以执行JS代码的Python库),今天就以百度翻译的参数破解为例进行说明。
目录
爬取网址为百度翻译
一、分析参数构成
抓包的过程与有道翻译类似,在此不再赘述。通过比较不同的响应可知,参数query
和sign
是动态变化的,其中参数query
为翻译的词
那么我们只需要搜索sign
参数即可,如下图所示,共出现10个JS文件。依据经验,我们可确定sign
是在第一个JS文件中构成的。
搜索sign值,并在7047行打下断点进行调试,结果如下
分析可知,参数n
为翻译的词,而参数sign
则是通过f(n)
得到的。此时,我们点开f
函数,复制全部代码,其内容如下,并将其存为baidu.js
function e(r) {
var o = r.match(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g);
if (null === o) {
var t = r.length;
t > 30 && (r = "" + r.substr(0, 10) + r.substr(Math.floor(t / 2) - 5, 10) + r.substr(-10, 10))
} else {
for (var e = r.split(/[\uD800-\uDBFF][\uDC00-\uDFFF]/), C = 0, h = e.length, f = []; h > C; C++)
"" !== e[C] && f.push.apply(f, a(e[C].split(""))),
C !== h - 1 && f.push(o[C]);
var g = f.length;
g > 30 && (r = f.slice(0, 10).join("") + f.slice(Math.floor(g / 2) - 5, Math.floor(g / 2) + 5).join("") + f.slice(-10).join(""))
}
var u = void 0
, l = "" + String.fromCharCode(103) + String.fromCharCode(116) + String.fromCharCode(107);
u = null !== i ? i : (i = window[l] || "") || "";
for (var d = u.split("."), m = Number(d[0]) || 0, s = Number(d[1]) || 0, S = [], c = 0, v = 0; v < r.length; v++) {
var A = r.charCodeAt(v);
128 > A ? S[c++] = A : (2048 > A ? S[c++] = A >> 6 | 192 : (55296 === (64512 & A) && v + 1 < r.length && 56320 === (64512 & r.charCodeAt(v + 1)) ? (A = 65536 + ((1023 & A) << 10) + (1023 & r.charCodeAt(++v)),
S[c++] = A >> 18 | 240,
S[c++] = A >> 12 & 63 | 128) : S[c++] = A >> 12 | 224,
S[c++] = A >> 6 & 63 | 128),
S[c++] = 63 & A | 128)
}
for (var p = m, F = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(97) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(54)), D = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(51) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(98)) + ("" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(102)), b = 0; b < S.length; b++)
p += S[b],
p = n(p, F);
return p = n(p, D),
p ^= s,
0 > p && (p = (2147483647 & p) + 2147483648),
p %= 1e6,
p.toString() + "." + (p ^ m)
}
二、Python执行JS代码
不同于有道翻译,若人为地去拆解其内在逻辑再用Python复写较为耗时,这里我们使用Python中的execjs
库来直接执行JS代码。
import execjs
def get_sign(word):
with open('c:/users/dell/desktop/baidu.js','r') as f:
jscode = f.read()
sign = execjs.compile(jscode).call('e',word)
return sign
get_sign('tall')
执行结果如下
根据程序报错结果可知,我们复制的JS代码中缺少参数i
的定义,这里也告诉我们:JS逆向的过程中,复制下来的JS代码或多或少是不完整的,这时候只能缺啥补啥,挨个去补全。
三、完善JS代码
3.1 i 值构建
这里依旧是通过打断点进行i
值的寻找。在2535行处打下断点,将鼠标放置i
,即可显示其值"320305.131321201"
(或者直接在console控制台输入i
)
知道参数i
的值后,我们完善一下之前的JS代码,如下所示
function e(r) {
var i = "320305.131321201" #添加的i值定义!
var o = r.match(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g);
if (null === o) {
var t = r.length;
t > 30 && (r = "" + r.substr(0, 10) + r.substr(Math.floor(t / 2) - 5, 10) + r.substr(-10, 10))
} else {
for (var e = r.split(/[\uD800-\uDBFF][\uDC00-\uDFFF]/), C = 0, h = e.length, f = []; h > C; C++)
"" !== e[C] && f.push.apply(f, a(e[C].split(""))),
C !== h - 1 && f.push(o[C]);
var g = f.length;
g > 30 && (r = f.slice(0, 10).join("") + f.slice(Math.floor(g / 2) - 5, Math.floor(g / 2) + 5).join("") + f.slice(-10).join(""))
}
var u = void 0
, l = "" + String.fromCharCode(103) + String.fromCharCode(116) + String.fromCharCode(107);
u = null !== i ? i : (i = window[l] || "") || "";
for (var d = u.split("."), m = Number(d[0]) || 0, s = Number(d[1]) || 0, S = [], c = 0, v = 0; v < r.length; v++) {
var A = r.charCodeAt(v);
128 > A ? S[c++] = A : (2048 > A ? S[c++] = A >> 6 | 192 : (55296 === (64512 & A) && v + 1 < r.length && 56320 === (64512 & r.charCodeAt(v + 1)) ? (A = 65536 + ((1023 & A) << 10) + (1023 & r.charCodeAt(++v)),
S[c++] = A >> 18 | 240,
S[c++] = A >> 12 & 63 | 128) : S[c++] = A >> 12 | 224,
S[c++] = A >> 6 & 63 | 128),
S[c++] = 63 & A | 128)
}
for (var p = m, F = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(97) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(54)), D = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(51) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(98)) + ("" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(102)), b = 0; b < S.length; b++)
p += S[b],
p = n(p, F);
return p = n(p, D),
p ^= s,
0 > p && (p = (2147483647 & p) + 2147483648),
p %= 1e6,
p.toString() + "." + (p ^ m)
}
此时,再次利用execjs
库调用JS代码,运行结果如下
从结果来看:缺少对象,检查JS代码可知,p = n(p, F)
中函数n
尚未定义。接下来,就是在源码中寻找函数n的定义。
3.2 函数n构建
同样,需要在我们想要知道的函数n下方打断点进行调试,如下图所示将鼠标放置函数n上即可点击链接进行跳转。(注意:如果不打断点,是无法跳转的!)
跳转函数n的结果如下
其实,就是在我们一开始找到的e
函数正上方!此时,再将函数n的定义加入我们的JS代码中进行完善,代码如下
#函数n的定义
function n(r, o) {
for (var t = 0; t < o.length - 2; t += 3) {
var a = o.charAt(t + 2);
a = a >= "a" ? a.charCodeAt(0) - 87 : Number(a),
a = "+" === o.charAt(t + 1) ? r >>> a : r << a,
r = "+" === o.charAt(t) ? r + a & 4294967295 : r ^ a
}
return r
}
function e(r) {
var i ="320305.131321201" #参数i的定义
var o = r.match(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g);
if (null === o) {
var t = r.length;
t > 30 && (r = "" + r.substr(0, 10) + r.substr(Math.floor(t / 2) - 5, 10) + r.substr(-10, 10))
} else {
for (var e = r.split(/[\uD800-\uDBFF][\uDC00-\uDFFF]/), C = 0, h = e.length, f = []; h > C; C++)
"" !== e[C] && f.push.apply(f, a(e[C].split(""))),
C !== h - 1 && f.push(o[C]);
var g = f.length;
g > 30 && (r = f.slice(0, 10).join("") + f.slice(Math.floor(g / 2) - 5, Math.floor(g / 2) + 5).join("") + f.slice(-10).join(""))
}
var u = void 0
, l = "" + String.fromCharCode(103) + String.fromCharCode(116) + String.fromCharCode(107);
u = null !== i ? i : (i = window[l] || "") || "";
for (var d = u.split("."), m = Number(d[0]) || 0, s = Number(d[1]) || 0, S = [], c = 0, v = 0; v < r.length; v++) {
var A = r.charCodeAt(v);
128 > A ? S[c++] = A : (2048 > A ? S[c++] = A >> 6 | 192 : (55296 === (64512 & A) && v + 1 < r.length && 56320 === (64512 & r.charCodeAt(v + 1)) ? (A = 65536 + ((1023 & A) << 10) + (1023 & r.charCodeAt(++v)),
S[c++] = A >> 18 | 240,
S[c++] = A >> 12 & 63 | 128) : S[c++] = A >> 12 | 224,
S[c++] = A >> 6 & 63 | 128),
S[c++] = 63 & A | 128)
}
for (var p = m, F = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(97) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(54)), D = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(51) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(98)) + ("" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(102)), b = 0; b < S.length; b++)
p += S[b],
p = n(p, F);
return p = n(p, D),
p ^= s,
0 > p && (p = (2147483647 & p) + 2147483648),
p %= 1e6,
p.toString() + "." + (p ^ m)
}
添加完成后,再次运行execjs
库调用baidu.js
文件,结果如下
成功返回sign
参数!
四、全部代码
参数sign
破解后,剩下就是利用requests传参即可。全部代码如下
import execjs
import requests
def get_sign(word):
with open('baidu.js','r') as f:
jscode = f.read()
sign = execjs.compile(jscode).call('e',word)
return sign
def main(word):
from_data = {
'from': 'en',
'to': 'zh',
'query': word,
'transtype': 'realtime',
'simple_means_flag': '3',
'sign': get_sign(word),
'token': '5822d89edd8120552250bd957d623139',
'domain': 'common'
}
headers = {
'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36',
'cookie':'PSTM=1596245797; BIDUPSID=F0E7203595DAE0CCDB3B84642A11DE00; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; BDUSS=jU2ZDhUZVFMaDM1aFJGQnBNMkJnOG92d1dtNllaU20wek9Qb2hQN3VkSWoxdWxmRVFBQUFBJCQAAAAAAAAAAAEAAADgH7kfNzI5NzU3OTE1AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACNJwl8jScJfd; BDUSS_BFESS=jU2ZDhUZVFMaDM1aFJGQnBNMkJnOG92d1dtNllaU20wek9Qb2hQN3VkSWoxdWxmRVFBQUFBJCQAAAAAAAAAAAEAAADgH7kfNzI5NzU3OTE1AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACNJwl8jScJfd; H_WISE_SIDS=107319_110085_127969_131423_144966_151532_155689_155933_156286_158995_161422_162079_162155_162898_163233_163274_163321_163390_163805_163933_164109_164163_164215_164545_164692_164869_164940_164946_164954_164961_165048_165086_165133_165135_165144_165328_165565_165689_165736_165801_165963_166055_166148_166167_166174_166176_166181_166184_166209_166214_166312_166450_166570_166631_166692_166696_166826_167305_167388_167393_167405; BAIDUID=444709A0C93D294B855A4CF6C022A543:FG=1; __yjs_duid=1_a7791c0f7d2021de3208effb4940d4de1611904747262; BAIDUID_BFESS=444709A0C93D294B855A4CF6C022A543:FG=1; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1613629191; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1613631158; __yjsv5_shitong=1.0_7_16675289e0373770030fa50e129a31867a16_300_1613631148848_120.243.195.4_43f8de8f; ab_sr=1.0.0_YmEwNjI3ZjEzMDEyYWM5Mzc0ZTJmMDMxNDdmMWFiYzhkZWM0MDViYjg1MTQ0ZGY0YzA3OWZkNzhhMjg5MzRiMzgwYzg4ZjY1MGE1ZTQ4MTc5M2M0MTAyNTkyZDgzY2Mz'
}
url = 'https://fanyi.baidu.com/v2transapi?from=en&to=zh'
r = requests.post(url,headers=headers,data=from_data)
data = r.json()['trans_result']['data'][0]['dst']
print('输入的词为{} , 翻译为{}'.format(word,data))
if __name__ =='__main__':
main('ensure')
JS逆向基础总结
通过有道翻译和百度翻译的逆向过程,我们不难看出JS逆向就是慢慢"扣代码"。遇到简单点的JS代码,可以利用Python进行复写;若JS代码较为复杂,可以借助execjs库来代替执行,但执行过程必定不是一帆风顺的,通常会缺少某个参数,这时我们遵循缺啥补啥的原则,挨个慢慢进行即可。
以上就是本次分享的全部内容~
标签:String,Python,爬虫,JS,length,&&,var,fromCharCode 来源: https://blog.csdn.net/shine4869/article/details/113859186