黄页88 -- 字体解密
作者:互联网
# https://kekee000.github.io/fonteditor/
import re
import requests
from io import BytesIO
import base64
from fontTools.ttLib import TTFont
from lxml import etree
NUMBER_MAP = {
'zero': '0',
'one': '1',
'two': '2',
'three': '3',
'four': '4',
'five': '5',
'six': '6',
'seven': '7',
'eight': '8',
'nine': '9',
'asterisk': "*",
'plus': '+',
'hyphen': '-',
'slash': '/',
'numbersign': '#',
}
def handle_ttfont(data):
font = TTFont(BytesIO(base64.b64decode(data)))
font.save('font.ttf')
font.saveXML('font.xml')
font_map = {}
best_cmap = font.getBestCmap()
for k, v in best_cmap.items():
k = str(hex(k)).replace('0x', '&#x') + ';'
font_map[k] = NUMBER_MAP[v]
return font_map
if __name__ == '__main__':
url = 'http://b2b.huangye88.com/qiye10000/company_contact.html'
headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.64 Safari/537.36'}
r = requests.get(url=url, headers=headers)
with open('./font.html', 'w') as fl:
fl.write(r.text)
content = r.text
font_re = re.search(r';base64,(.*?)"\)', content)
if font_re:
font_str = font_re.group(1)
font_map = handle_ttfont(font_str)
print(font_map)
for k, v in font_map.items():
content = content.replace(k, v)
html = etree.HTML(content, parser=etree.HTMLParser(encoding='utf-8'))
phone_str = html.xpath('//li[contains(label, "手机:")]/span[@class="secret"]/text()')[0]
print(phone_str)
标签:map,font,--,content,re,88,str,import,黄页 来源: https://www.cnblogs.com/wangshx666/p/16333262.html