编写程序统计《红楼梦》中前20位出场最多的人物。
# a6.6CalRedChamber
import jieba
jieba.add_word("二姑娘")
jieba.add_word("三姑娘")
jieba.add_word("四姑娘")
jieba.add_word("林姑娘")
jieba.add_word("史姑娘")
jieba.add_word("邢姑娘")
jieba.add_word("琴姑娘")
excludes = {"什么", "一个", "我们", "那里", "你们", "如今", "说道", "知道", "起来", "姑娘", "这里", "出来", "他们", "众人", "自己",
"一面", "太太", "只见", "怎么", "奶奶", "两个", "没有", "不是", "不知", "这个", "听见", "这样", "进来", "咱们", "告诉",
"就是", "东西", "回来", "只是", "大家", "老爷", "只得", "丫头", "这些", "不敢", "出去", "所以", "不过", "的话", "不好",
"姐姐", "一时", "不能", "过来", "心里", "二爷", "如此", "今日", "银子", "几个", "答应", "二人", "还有", "只管", "这么",
"说话", "一回", "那边", "这话", "外头", "打发", "自然", "今儿", "罢了", "屋里", "那些", "听说", "小丫头", "如何", "问道",
"看见", "妹妹", "人家", "不用", "媳妇"}
txt = open("redchamber.txt", "r", encoding='utf-8').read()
words = jieba.lcut(txt)
counts = {}
for word in words:
if len(word) == 1:
continue
elif word == "贾母" or word == "老太太":
rword = "贾母"
elif word == "凤姐" or word == "凤姐儿":
rword = "凤姐"
elif word == "黛玉" or word == "林黛玉" or word == "林姑娘":
rword = "林黛玉"
elif word == "探春" or word == "三姑娘":
rword = "探春"
else:
rword = word
counts[rword] = counts.get(rword, 0) + 1
for word in excludes:
del counts[word]
items = list(counts.items())
items.sort(key=lambda x: x[1], reverse=True)
for i in range(20):
word, count = items[i]
print("{0:<10}{1:>5}".format(word, count))
版权声明:本文为Ashley_hello原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。