其他分享
首页 > 其他分享> > 词云

词云

作者:互联网

from wordcloud import WordCloud,ImageColorGenerator
import jieba
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image

with open('围城.txt','r',encoding='utf-8') as f1:
    data=f1.read()
    wordlist=jieba.cut(data,cut_all=False)
    data=','.join(wordlist)

    font='C:\Windows\Fonts\SIMLI.TTF'

    wc1=WordCloud(font_path=font).generate(data)

    plt.imshow(wc1,interpolation='bilinear')
    plt.axis('off')
    plt.show()
    
import jieba as j
excludes={}
txt=open('围城.txt','r',encoding='gb18030').read()
words=j.lcut(txt)
counts={}
for word in words:
    if len(word)==1:
        continue
    elif word == '柔嘉'or word == '孙小姐':
        rword='孙柔嘉'
    elif word == '方鸿渐'or word == '鸿渐':
        rword='方鸿渐'
    else:
        rword=word
    counts[rword]=counts.get(rword,0)+1
for i in excludes:
    del(counts[word])
items=list(counts.items())
items.sort(key=lambda x:x[1],reverse=True)
for i in range(15):
    word,count=items[i]
    print('{0:<10}{1:>5}'.format(word,count))

标签:rword,word,plt,词云,import,counts,txt
来源: https://www.cnblogs.com/sk822911/p/14673393.html