其他分享
首页 > 其他分享> > 黄页88 -- 字体解密

黄页88 -- 字体解密

作者:互联网

# https://kekee000.github.io/fonteditor/

import re
import requests
from io import BytesIO
import base64
from fontTools.ttLib import TTFont
from lxml import etree


NUMBER_MAP = {
    'zero': '0',
    'one': '1',
    'two': '2',
    'three': '3',
    'four': '4',
    'five': '5',
    'six': '6',
    'seven': '7',
    'eight': '8',
    'nine': '9',
    'asterisk': "*",
    'plus': '+',
    'hyphen': '-',
    'slash': '/',
    'numbersign': '#',
}


def handle_ttfont(data):
    font = TTFont(BytesIO(base64.b64decode(data)))
    font.save('font.ttf')
    font.saveXML('font.xml')

    font_map = {}

    best_cmap = font.getBestCmap()
    for k, v in best_cmap.items():
        k = str(hex(k)).replace('0x', '&#x') + ';'
        font_map[k] = NUMBER_MAP[v]

    return font_map


if __name__ == '__main__':
    url = 'http://b2b.huangye88.com/qiye10000/company_contact.html'
    headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.64 Safari/537.36'}

    r = requests.get(url=url, headers=headers)
    with open('./font.html', 'w') as fl:
        fl.write(r.text)

    content = r.text
    font_re = re.search(r';base64,(.*?)"\)', content)
    if font_re:
        font_str = font_re.group(1)
        font_map = handle_ttfont(font_str)
        print(font_map)

        for k, v in font_map.items():
            content = content.replace(k, v)

        html = etree.HTML(content, parser=etree.HTMLParser(encoding='utf-8'))
        phone_str = html.xpath('//li[contains(label, "手机:")]/span[@class="secret"]/text()')[0]
        print(phone_str)

标签:map,font,--,content,re,88,str,import,黄页
来源: https://www.cnblogs.com/wangshx666/p/16333262.html