資源簡介
python實現基于詞典的文本情感分析,包含測試數據和實現代碼

代碼片段和文件信息
#?-*-?coding:?utf-8?-*-
“““
Created?on?Sun?May?14?16:04:08?2017
wordcloud?required?C++?14.0
running?on?python?3.5
@author:?wangmin
“““
import?jieba
import?collections
import?numpy?as?np
from?PIL?import?Image
from?wordcloud?import?WordCloud?ImageColorGenerator
import?matplotlib.pyplot?as?plt?
#?讀入評論數據,正負情感詞典并合并
evaluation?=?[]
stopwords?=?[]
pos?=?[]
neg?=?[]
mydict?=?[]
infile?=?open(“evaluation.csv“?‘r‘)
for?line?in?infile:
????data?=?line.rstrip().split(‘‘)
????evaluation.append(data[1])
del?evaluation[0]
????
infile?=?open(“negative.csv“?‘r‘)
for?line?in?infile:
????data?=?line.rstrip().split(‘‘)
????neg.append(data[1])
infile?=?open(“positive.csv“?‘r‘)
for?line?in?infile:
????data?=?line.rstrip().split(‘‘)
????pos.append(data[1])
mydict?=?pos?+?neg
???
file?=?open(“stopwords.csv“?‘r‘)
for?s?in?file:
????data?=?s.rstrip().split(‘‘)
????stopwords.append(data[1])
?
#?對每條評論分詞并保存分詞結果
eva?=?[]
for?i?in?range(len(evaluation)):
????seg_list?=?jieba.cut(evaluation[i]?cut_all=False)
????seg_list?=?list(seg_list)
????eva.append(seg_list)
????????
#?刪除一個字的詞
new_eva?=?eva
tmp?=?[]
t?=?0
for?j?in?range(3321):
????for?k?in?range(len(eva[j])):
????????if?len(eva[j][k])?>=?2:
????????????tmp.append(eva[j][k])
????new_eva[t]?=?tmp
????tmp?=?[]
????t=t+1
??
#?刪除停止詞(對分析沒有意義的詞)
#for?word?in?stopwords:
??????????
#?自定義情感類型得分函數
def?GetScore(list):
????neg_s?=?0
????pos_s?=?0
????for?w?in?list:
????????if?(w?in?neg)?==?True:
????????????neg_s?=?neg_s?+?1
????????elif?(w?in?pos)?==?True:
????????????pos_s?=?pos_s?+?1
????if?(neg_s-pos_s)?>?0:
????????score?=?‘NEGATIVE‘
????????return?score
????elif?(neg_s-pos_s)?0:
????????score?=?‘POSITIVE‘
????????return?score
????else:
????????score?=?‘NEUTRAL‘
????????return?score
????????
#?計算每條評論的正負得分
Score?=?[]
for?l?in?range(len(new_eva)):
????Score.append(GetScore(new_eva[l]))
????
‘‘‘???
def?find_all_index(arritem):
????return?[i?for?ia?in?enumerate(arr)?if?a==item]
????????????
NEG=find_all_index(Score‘NEGATIVE‘)
POS=find_all_index(Score‘POSITIVE‘)
NEU=find_all_index(Score‘NEUTRAL‘)???????
print(len(NEG))
print(len(POS))
print(len(NEU))?
‘‘‘
????
#?統計詞頻
wf?=?{}
for?p?in?range(len(new_eva)):
????for?word?in?new_eva[p]:
????????if?word?not?in?wf:
????????????wf[word]=0
????????wf[word]+=1
def?Sort_by_count(d):
????d?=?collections.OrderedDict(sorted(d.items()key?=?lambda?t:?-t[1]))
????return?d
wf?=?Sort_by_count(wf)????
top_key?=?[]
top_word?=?[]
for?key?in?wf.items():
????top_key.append(key)
top_word?=?top_key[1:51]???
print(top_key[0:49])
#for?keyvalues?in?wf.items():
#????print(key?+?“%d“?%?values)
#?繪制詞云
word_space_split?=?‘a‘
for?i?in?range(3322):
?????new_eva[i]?=?“?“.join(new_eva[i])
?????word_space_split?+=?new_eva[i]
?
word_space_split?=?word_space_split.replace(‘word‘‘‘)
?????
?
??
abel_mask?=?np.array(Image.open(‘C:/Users/wangmin/Pictures/aaa/ab
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2019-11-04?13:47??python實現基于詞典的文本情感分析\
?????文件????????3902??2017-05-16?01:37??python實現基于詞典的文本情感分析\python\python實現.py
?????文件??????124695??2017-05-14?16:35??python實現基于詞典的文本情感分析\python\positive.csv
?????文件??????110411??2017-05-14?16:33??python實現基于詞典的文本情感分析\python\negative.csv
?????文件??????254017??2017-05-14?16:10??python實現基于詞典的文本情感分析\python\evaluation.csv
?????文件???????33619??2017-05-15?17:23??python實現基于詞典的文本情感分析\python\Rplot.png
?????目錄???????????0??2019-11-04?13:47??python實現基于詞典的文本情感分析\python\
?????文件??????124468??2017-05-14?22:33??python實現基于詞典的文本情感分析\python\stopwords.csv
?????目錄???????????0??2017-05-16?01:37??python\
?????文件???????33619??2017-05-15?17:23??python\Rplot.png
?????文件??????254017??2017-05-14?16:10??python\evaluation.csv
?????文件??????110411??2017-05-14?16:33??python\negative.csv
?????文件??????124695??2017-05-14?16:35??python\positive.csv
?????文件????????3902??2017-05-16?01:37??python\python實現.py
?????文件??????124468??2017-05-14?22:33??python\stopwords.csv
評論
共有 條評論