python用jieba进行分词并可视化
import jieba
import pandas as pd
import matplotlib.pyplot as plt
txt = open("news.txt",encoding='UTF-8').read()
words = jieba.lcut(txt)
result = open('results.txt','w',encoding='UTF-8')
result.write(' '.join(words))
counts = {}
for word in words:
if len(word) == 1:
continue
else:
counts[word] = counts.get(word,0) + 1
items = list(counts.items())
items.sort(key=lambda x:x[1], reverse=True)
df = pd.DataFrame(items, columns=['word', 'count'])
print(df)
plt.figure(figsize=(20, 8), dpi=80)
a = df[:80]["word"]
b = df[:80]["count"]
plt.bar(range(len(a)), b, width=0.3) # 绘制条形图,设置线条宽度
plt.xticks(range(len(a)), a, rotation=55) # 设置X轴刻度标注
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.show()
result.close()
版权声明:本文为liudaoqun原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。