身份证名字转四六级成绩爬虫
作者:互联网
来源:吾爱破解的ai酸的博文
https://www.52pojie.cn/thread-1112199-1-1.html
修改了一下用xlwt和xlrd批量读写只为以后方便用,详情还是上上面这个网址找原作..因为没论坛账号找不到作者..
目前时间:2020-2-22 20:04:25
#score.py
# @Time:2020.02.20
# @Author:ai酸的博文
from zkzh import *
from urllib.parse import quote
import requests,re,json
#import x
import xlwt,xlrd
from xlutils.copy import copy
class score:
def __init__(self):
s1 = requests.Session()
self.s = s1
self.url1 = 'http://cet.neea.edu.cn/cet/query_c.html'#四六级成绩查询网站
#三个{}内分别是:4还是6 准考证号 姓名
self.url2 = 'http://cachecloud.neea.cn/cet/query?data=CET{}_192_DANGCI%2C{}%2C{}'
self.headers1 = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.116 Safari/537.36'}
self.headers2 = {'Referer': 'http://cet.neea.edu.cn/cet',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.116 Safari/537.36',
'Host': 'cachecloud.neea.cn'}
def gScore(self,name,grade,zkzhNum):
response=None
if name==0:
return
self.s.get(url=self.url1,headers=self.headers1)
#url注意是urlencode编码
print('test!!',name,zkzhNum)
self.url2 = self.url2.format(grade,zkzhNum,quote(name))
response = self.s.get(url=self.url2,headers=self.headers2)
html = response.content.decode()
#with open('test.html','wb') as f:
# f.write(response.content)
school = re.findall('x:\'(.*?)\'',html)[0]
score = re.findall('s:(.*?),',html)[0]
tingLi = re.findall('l:(.*?),',html)[0]
yueDu = re.findall('r:(.*?),',html)[0]
xieZuoFanYi = re.findall('w:(.*?),',html)[0]
###########用xlrd和xlwt模块操作保存成表格
data =xlrd.open_workbook(r"res.xls")
nrows=data.sheets()[0].nrows#读取当前文件行数
excel =copy(wb=data)#把读变成写的格式
#workbook =xlwt.Workbook(encoding ="utf-8",style_compression=0)
sheet = excel.get_sheet(0)
#sheet =data.sheets()[0]
sheet.write(nrows,0,score)
sheet.write(nrows,1,tingLi)
sheet.write(nrows,2,yueDu)
sheet.write(nrows,3,xieZuoFanYi)
sheet.write(nrows,4,name)
excel.save('res.xls')#保存
########
#with open()
print(name)
#print('学校:',school)
print('英语'+str(grade)+'级成绩:',score)
print('听力得分:',tingLi)
print('阅读得分:',yueDu)
print('写作和翻译得分:',xieZuoFanYi)
def run_(self,ZKZH,name,num):
name,num,grade,zkzhNum = zkzh.run_(ZKZH,name,num)
#print(name,',',zkzhNum,',!!!',grade)
self.gScore(name=name,grade=grade,zkzhNum=zkzhNum)
if __name__ == '__main__':
z = zkzh()
s = score()
s.run_(z)
#main.py
from zkzh import *
from score import *
import xlrd
workbook = xlrd.open_workbook(r'info.xls')#读取身份证与名字的表格
sheet2 = workbook.sheet_by_index(0)
while index<sheet2.nrows:
z = zkzh()
s = score()
rows = sheet2.row_values(index)
name=rows[2]
num=rows[3]
index=index+1
if not num.startswith("44"):#如果不是广东的身份证则跳过
print(name,num,'跳过')
else:
print(name+'--'+num)
s.run_(z,name,num)
#z = zkzh()
#s = score()
#s.run(z,name,num)
#zhzh.py
# @Time:2020.02.uteResultType @Author:ai酸的博文
import requests,random,subprocess,time,json,re,sys
class zkzh:
def __init__(self):
s1 = requests.Session()
self.s = s1
self.url = 'http://cet-kw.neea.edu.cn/Home/ToQueryTestTicket'
self.headers ={'Referer': 'http://cet-kw.neea.edu.cn/Home/QueryTestTicket'
,'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.116 Safari/537.36'
#'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.116 Safari/537.36'
,'X-Requested-With': 'XMLHttpRequest'
,'Origin': 'http://cet-kw.neea.edu.cn'
,'Host': 'cet-kw.neea.edu.cn'}
#获取准考证号
def get(self,num,name,img):
data = {'provinceCode': '44',#省编号,这里默认是四川51 其他的省在查询准考证号的网页源码上有,广东是44
'IDTypeCode': '1',#证件类型,这里也是网页源码有的
'IDNumber': num,#身份证号
'Name': name,#姓名
'verificationCode': img}#验证码识别
response = self.s.post(self.url,headers=self.headers,data=data)
html = response.content.decode()
json_html = json.loads(html)
ExceuteResult = json_html['ExceuteResultType']
if ExceuteResult==1 :
List = re.findall('"(.*?)"',json_html['Message'])
return List[1],List[3]#List[1]是英语四或六级 List[3]是准考证号
elif ExceuteResult==-1:
#print('验证码输入错误ee!')
print(json_html['Message'])
if '验证码' in json_html['Message']:
return 0,1
elif '报名' in json_html['Message']:
return 0,2
else:
print('未知错误!')
#获取验证码图片并且读取
def gImg(self):
urlImg = 'http://cet-kw.neea.edu.cn/Home/VerifyCodeImg'
response = self.s.get(url=urlImg)
#将图片保存在本地
with open('./img.png','wb') as f:
f.write(response.content)
#将保存在本地的图片使用本地默认打开方式打开
subprocess.Popen('img.png',shell=True)#
img = input('输入验证码:')
return img
def run_(self,name,num):
zkzhNum=1
while zkzhNum==1:
img = self.gImg()
grade,zkzhNum = self.get(num=num,name=name,img=img)
if zkzhNum==2:
return 0,0,0,0
print('您的'+grade+'准考证号是:'+zkzhNum)
if grade=='英语六级笔试':
grade = 6
elif grade=='英语四级笔试':
grade = 4
else:
print('未知错误!')
return name,num,grade,zkzhNum
if __name__ == '__main__':
a = zkzh()
a.run()
ptgood 发布了17 篇原创文章 · 获赞 9 · 访问量 3万+ 私信 关注
标签:name,grade,self,爬虫,四六级,html,身份证,print,zkzhNum 来源: https://blog.csdn.net/ptgood/article/details/104449497