pyquery的中文编码问题
作者:互联网
# coding=UTF-8 import urllib.request import pyquery import requests import time import json import pymysql import sys import math from datetime import datetime import time import csv from time import sleep import random from bs4 import BeautifulSoup import asyncio from pyppeteer import launch import pyppeteer from pyquery import PyQuery as pq import chardet import codecs path='D:/code-py-download/02fang/xqxq_demo_utf8.html' txt = open(path, 'rb').read() encodings = chardet.detect(txt)['encoding'] with open(path, "r", encoding=encodings)as f: content = f.read() doc = pq(content) name=doc('.tit.clearfix h1>strong') title=name.text() print(title) #扬州天下花园 # path='D:/code-py-download/02fang/xqxq_demo_utf8.html' # with open(path, "r")as f: # content = f.read() # doc = pq(content) # name=doc('.tit.clearfix h1>strong') # title=name.text() # print(title) #UnicodeDecodeError: 'gbk' codec can't decode byte # path='D:/code-py-download/02fang/小区详情demo.txt' #ansi编码都不行 # with open(path, "r")as f: # content = f.read() # doc = pq(content) # name=doc('.tit.clearfix h1>strong') # title=name.text() # print(title) # 扬州天下花园 # path='D:/code-py-download/02fang/xqxq_demo_utf8.html' # doc = pq(filename=path, encoding='utf-8') 直接读也不行 # name=doc('.tit.clearfix h1>strong') # title=name.text() # print(title) # UnicodeDecodeError: 'gbk' codec can't decode byte # path='D:/code-py-download/02fang/小区详情demo.txt' # doc = pq(filename=path, encoding='utf-8') # name=doc('.tit.clearfix h1>strong') # title=name.text() # print(title) # æ¬å·¤©ä¸è±å # path='D:/code-py-download/02fang/小区详情demo.txt' # doc = pq(filename=path, encoding='gbk') # name=doc('.tit.clearfix h1>strong') # title=name.text() # print(title) # æ¬å·¤©ä¸è±å
标签:编码,中文,pq,pyquery,name,title,doc,import,path 来源: https://www.cnblogs.com/yansc/p/15512731.html