資源簡介
用python分別讀取了哈姆雷特和三國演義的txt文本文件,并結合了python第三方庫jieba對三國演義中的人物出場進行了排序。

代碼片段和文件信息
#?-*-?coding:?utf-8?-*-
“““
Created?on?Tue?Sep??4?15:48:05?2018
@author:?weilangao
“““
#def?getText():
#????txt=open(“hamlet.txt““r“).read()
#????txt=txt.lower()
#????for?ch?in?‘!@#$%^&*():“;<>.?/{[]}\+-=~|‘:#去掉特殊字符
#???????txt=txt.replace(ch““)
#????return?txt
#
#hamletTxt=getText()
#words=hamletTxt.split()
#counts={}
#for?word?in?words:
#????counts[word]=counts.get(word0)+1
#items=list(counts.items())
#items.sort(key=lambda?x:x[1]reverse=True)#按照列表的鍵值對的第二個值進行排序
#for?i?in?range(10):
#????wordcount=items[i]
#????print(“{0:<10}{1:>5}“.format(wordcount))
import?jieba
txt=open(“threekingdoms.txt““r“encoding=“utf-8“).read()
excludes={“將軍““卻說““荊州““二人““不可““不能““如此““如何““主公““軍士““左右“}
words=jieba.lcut(txt)
counts={}
for?word?in?words:
????if?len(word)==1:
????????continue
????elif?word==“諸葛亮“?or?word==“孔明曰“:
????????rword=“孔明“
????elif?word==“關公“?or?word==“云長“:
????????rword=“關羽“
????elif?word==“玄德“?or?word==“玄德曰“:
????????rword=“劉備“
????elif?word==“孟德“?or?word==“丞相“:
????????rword=“曹操“
????else:
????????rword=word
????counts[rword]=counts.get(rword0)+1
for?word?in?excludes:
????del?counts[word]
items=list(counts.items())
items.sort(key=lambda?x:x[1]reverse=True)
for?i?in?range(15):
????wordcount=items[i]
????print(“{0:<10}{1:>5}“.format(wordcount))
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????文件????????1483??2018-09-04?16:39??5.py
?????文件??????180768??2017-03-17?08:38??hamlet.txt
?????文件?????1767830??2017-03-17?08:38??threekingdoms.txt
- 上一篇:西電軟院算法上機1代碼
- 下一篇:人工魚群算法求函數的最大值
評論
共有 條評論