爬取意林杂志&&Python操作excel
作者:互联网
前言
刘太太对一篇意林的文章印象很深,但现在只记得两句话
想着爬完所有意林文章应该能找到
于是就写了个小玩意
BTW
刘太太已经是我npy啦!
import re
import requests
from bs4 import BeautifulSoup
import os
def GetLinkSum():
url='https://www.yilinzazhi.com/'
road='td>a'
strhtml=requests.get(url)
soup=BeautifulSoup(strhtml.text,'lxml')#lxml
data = soup.select(road)
#print("read:data[1]:",data[1])
result=[]
for item in data:
result.append({
#'title':item.get_text(),
'link':item.get('href'),
#'ID':re.findall('\d+',item.get('href'))
})
print('read:',len(result))
#print(result)
return result
def articaltitle(aim):
for i in range(0,len(aim)):
url2='https://www.yilinzazhi.com/'+aim[i]['link']
date=(aim[i]['link']).split('index.html')
#print(date)
strhtml=requests.get(url2)
soup=BeautifulSoup(strhtml.text,'lxml')
data=soup.select('span > a')
#print('next:data[1]',data[0])
for item in data:
url3='https://www.yilinzazhi.com/'+date[0]+item.get('href')
artical(url3)
print('第{0}期第{1}篇'.format(date[0],item.get('href')))
print('i:',i)
def artical(url):
strhtml=requests.get(url)
Html=strhtml.text.encode('iso-8859-1').decode('utf-8')
soup=BeautifulSoup(Html,'lxml')
data = soup.select('div> p')
#body>div.wrap>div>div.blkContainer>div>h1
str1='异乡的情侣'
str2='共同的兰州'
str3='兰州'
if str(data).find(str1)!=-1:
print(str(data))
print('str1:',url)
if str(data).find(str2)!=-1:
print(str(data))
print('str2:',url)
if str(data).find(str3)!=-1:
print(str(data))
print('str3:',url)
url=url.split('/')
dictionary=url[3]
title = soup.select('body>div.wrap>div>div.blkContainer>div>h1')
demotitle=str(title).split('<h1>')
titlename=demotitle[1].split('</h1>')
path="F:\\PyCode\\Notes\\YiLin\\{0}".format(dictionary)
isExists=os.path.exists(path)
if not isExists:
os.makedirs(path)
with open("F:\\PyCode\\Notes\\YiLin\\{0}\\{1}.txt".format(dictionary,titlename[0]),"w",encoding="utf-8") as f:
sentencese=str(data).split('<p>')
for item in sentencese:
demo=item.split('</p>')
for i in demo:
f.write(i)
f.write("\n")
if __name__ == '__main__':
status=GetLinkSum()
aim=articaltitle(status)
import xlwt
def ReadParameter():
JMTaiJiaNum=int(input("请输入加密段台架个数:"))
JMSFNum=int(input("请输入加密段每台架水阀数:"))
CGTaiJiaNum=int(input("请输入常规冷却段台架个数:"))
CGSFNum=int(input("请输入常规冷却段每台架水阀数:"))
WTTaiJiaNum=int(input("请输入微调段台架个数:"))
WTSFNum=int(input("请输入微调段每台架水阀个数:"))
print("加密段台架个数:{0},水阀个数{1}".format(JMTaiJiaNum,JMSFNum))
print("常规冷却段台架个数:{0},水阀个数{1}".format(CGTaiJiaNum,CGSFNum))
print("微调段台架个数:{0},水阀个数{1}".format(WTTaiJiaNum,WTSFNum))
demo=[[JMTaiJiaNum,JMSFNum],[CGTaiJiaNum,CGSFNum],[WTTaiJiaNum,WTSFNum]]
return demo
def change8(demo):
demo=demo+1
if demo>=8:
demo=0
return demo
def change16(demo):
demo=demo+1
if demo>16:
demo=1
return demo
def First(base):
xls = xlwt.Workbook()
sht1 = xls.add_sheet('L1->L2(CTC)',cell_overwrite_ok=True)
sht2 = xls.add_sheet('L2->L1(CTC)',cell_overwrite_ok=True)
tittle=['Group Name','Pin Comment','管脚注释','Type','Unit','Offset','L2 Var Name','Wincc Var Name','Note']
firstdata=['','数据包ID','','INT','','0','','','IOMASTER读配置文件,自动添加']
sht1.col(0).width=200*20
sht1.col(1).width=450*20
sht1.col(6).width=450*20
sht1.col(7).width=450*20
sht1.col(8).width=400*20
# style=XFStyle()
# pattern=Pattern()
# fnt=Font()
startpoint=5
offset=2
spr=0
bit=0
CTCL2=1
sht1.write_merge(startpoint-5,startpoint-5,0,8,
'DC TO CTC Data.Communication Table(CTC)')
sht1.write_merge(startpoint-4,startpoint-4,0,8,
'CTC IP:10.{0}.10.161 端口:1611 二级服务器主机IP:10.{0}.10.184 端口:4611 数据长度:224 byte ID:1 DB:159'.format(24))
sht1.write_merge(startpoint-3,startpoint-3,0,8,
'CTC IP:10.{0}.10.161 端口:2611 二级服务器主机IP:10.{0}.10.186 端口:4611 数据长度:224 byte ID:1 DB:159'.format(24))
#标题
for i in range(0,len(tittle)):
sht1.write(startpoint-2,i,tittle[i])
sht1.write(startpoint-1,i,firstdata[i])
#上喷信号
for i in range(0,base[0][0]+base[1][0]+base[2][0]):
sht1.write_merge(startpoint+i*16,startpoint+i*16+15,0,0,'')
if i<base[0][0]:
for h in range(0,base[0][1]):
sht1.write(startpoint+16*i+h,1,'BANK {0}-{1}上喷有效信号'.format(i+1,h+1))
if bit==0:
sht1.write(startpoint+16*i+h,3,'BYTE')
sht1.write(startpoint+16*i+h,5,offset)
offset=offset+1
else:
sht1.write(startpoint+16*i+h,3,'---')
sht1.write(startpoint+16*i+h,4,'BIT{0}'.format(bit))
bit=change8(bit)
sht1.write(startpoint+16*i+h,6,'pIO->ctcRead.rtsAvl.spr[0][{0}]'.format(spr))
spr=spr+1
sht1.write(startpoint+16*i+h,7,'CTCL2_OUT_Bank{0}_Top.spr{1}'.format(i+1,CTCL2))
CTCL2=change16(CTCL2)
for demo in range(base[0][1],16):
sht1.write(startpoint+16*i+demo,1,'预留')
if bit==0:
sht1.write(startpoint+16*i+demo,3,'BYTE')
sht1.write(startpoint+16*i+demo,5,offset)
offset=offset+1
else:
sht1.write(startpoint+16*i+demo,3,'---')
sht1.write(startpoint+16*i+demo,4,'BIT{0}'.format(bit))
bit=change8(bit)
sht1.write(startpoint+16*i+demo,7,'CTCL2_OUT_Bank{0}_Top.spr{1}'.format(i+1,CTCL2))
CTCL2=change16(CTCL2)
elif i<base[0][0]+base[1][0]:
for h in range(0,base[1][1]):
sht1.write(startpoint+16*i+h,1,'BANK {0}-{1}上喷有效信号'.format(i+1,h+1))
if bit==0:
sht1.write(startpoint+16*i+h,3,'BYTE')
sht1.write(startpoint+16*i+h,5,offset)
offset=offset+1
else:
sht1.write(startpoint+16*i+h,3,'---')
sht1.write(startpoint+16*i+h,4,'BIT{0}'.format(bit))
bit=change8(bit)
sht1.write(startpoint+16*i+h,6,'pIO->ctcRead.rtsAvl.spr[0][{0}]'.format(spr))
spr=spr+1
sht1.write(startpoint+16*i+h,7,'CTCL2_OUT_Bank{0}_Top.spr{1}'.format(i+1,CTCL2))
CTCL2=change16(CTCL2)
for demo in range(base[1][1],16):
sht1.write(startpoint+16*i+demo,1,'预留')
if bit==0:
sht1.write(startpoint+16*i+demo,3,'BYTE')
sht1.write(startpoint+16*i+demo,5,offset)
offset=offset+1
else:
sht1.write(startpoint+16*i+demo,3,'---')
sht1.write(startpoint+16*i+demo,4,'BIT{0}'.format(bit))
bit=change8(bit)
sht1.write(startpoint+16*i+demo,7,'CTCL2_OUT_Bank{0}_Top.spr{1}'.format(i+1,CTCL2))
CTCL2=change16(CTCL2)
elif i<base[0][0]+base[1][0]+base[2][0]:
for h in range(0,base[2][1]):
sht1.write(startpoint+16*i+h,1,'BANK {0}-{1}上喷有效信号'.format(i+1,h+1))
if bit==0:
sht1.write(startpoint+16*i+h,3,'BYTE')
sht1.write(startpoint+16*i+h,5,offset)
offset=offset+1
else:
sht1.write(startpoint+16*i+h,3,'---')
sht1.write(startpoint+16*i+h,4,'BIT{0}'.format(bit))
bit=change8(bit)
sht1.write(startpoint+16*i+h,6,'pIO->ctcRead.rtsAvl.spr[0][{0}]'.format(spr))
spr=spr+1
sht1.write(startpoint+16*i+h,7,'CTCL2_OUT_Bank{0}_Top.spr{1}'.format(i+1,CTCL2))
CTCL2=change16(CTCL2)
for demo in range(base[2][1],16):
sht1.write(startpoint+16*i+demo,1,'预留')
if bit==0:
sht1.write(startpoint+16*i+demo,3,'BYTE')
sht1.write(startpoint+16*i+demo,5,offset)
offset=offset+1
else:
sht1.write(startpoint+16*i+demo,3,'---')
sht1.write(startpoint+16*i+demo,4,'BIT{0}'.format(bit))
bit=change8(bit)
sht1.write(startpoint+16*i+demo,7,'CTCL2_OUT_Bank{0}_Top.spr{1}'.format(i+1,CTCL2))
CTCL2=change16(CTCL2)
#下喷信号
#name=input("请输入项目名称:")
name="DaDongHai"
xls.save('./层冷L1-L2通讯变量表({0}).xls'.format(name))
if __name__ == '__main__':
# base=ReadParameter()
# print(base)
base=[[8,10],[8,4],[2,8]]
First(base)
标签:sht1,write,16,Python,demo,CTCL2,excel,爬取,startpoint 来源: https://blog.csdn.net/weixin_47241488/article/details/118024571