資源簡(jiǎn)介
里面包含情感詞典和否定詞停用詞程度副詞等,還有python的代碼,用的是python的ide,pycharm
代碼片段和文件信息
from?collections?import?defaultdict
import?os
import?re
import?jieba
import?codecs
def?seg_word(sentence):
????“““使用jieba對(duì)文檔分詞“““
????seg_list?=?jieba.cut(sentence)
????seg_result?=?[]
????for?w?in?seg_list:
????????seg_result.append(w)
????print(seg_result)
????#?讀取停用詞文件
????stopwords?=?set()
????fr?=?codecs.open(‘stopwords.txt‘?‘r‘?‘utf-8‘)
????for?word?in?fr:
????????stopwords.add(word.strip())
????fr.close()
????#?去除停用詞
????return?list(filter(lambda?x:?x?not?in?stopwords?seg_result))
def?classify_words(word_dict):
????“““詞語(yǔ)分類(lèi)找出情感詞、否定詞、程度副詞“““
????#?讀取情感字典文件
????sen_file?=?open(‘BosonNLP_sentiment_score.txt‘?‘r+‘?encoding=‘utf-8‘)
????#?獲取字典文件內(nèi)容
????sen_list?=?sen_file.readlines()
????#?創(chuàng)建情感字典
????sen_dict?=?defaultdict()
????#?讀取字典文件每一行內(nèi)容,將其轉(zhuǎn)換為字典對(duì)象,key為情感詞,value為對(duì)應(yīng)的分值
????for?s?in?sen_list:
????????#?每一行內(nèi)容根據(jù)空格分割,索引0是情感詞,索引01是情感分值
????????try:
????????????sen_dict[s.split(‘?‘)[0]]?=?s.split(‘?‘)[1]
????????except?IndexError:
????????????pass
????#?讀取否定詞文件
????not_word_file?=?open(‘notDic.txt‘?‘r+‘?encoding=‘utf-8‘)
????#?由于否定詞只有詞,沒(méi)有分值,使用list即可
????not_word_list?=?not_word_file.readlines()
????for?i?in?range(0??len(not_word_list)):
????????not_word_list[i]?=?not_word_list[i].strip(‘\n‘)
????#?print(not_word_list)
????#?讀取程度副詞文件
????degree_file?=?open(‘degree.txt‘?‘r+‘?encoding=‘gbk‘)
????degree_list?=?degree_file.readlines()
????degree_dic?=?defaultdict()
????#?程度副詞與情感詞處理方式一樣,轉(zhuǎn)為程度副詞字典對(duì)象,key為程度副詞,value為對(duì)應(yīng)的程度值
????for?d?in?degree_list:
????????try:
????????????degree_dic[d.split(‘‘)[0]]?=?d.split(‘‘)[1]
????????except?IndexError:
????????????pass
????#?分類(lèi)結(jié)果,詞語(yǔ)的index作為key詞語(yǔ)的分值作為value,否定詞分值設(shè)為-1
????sen_word?=?dict()
????not_word?=?dict()
????degree_word?=?dict()
????#?分類(lèi)
????for?word?in?word_dict.keys():
????????if?word?in?sen_dict.keys()?and?word?not?in?not_word_list?and?word?not?in?degree_dic.keys():
????????????sen_word[word_dict[word]]?=?sen_dict[word]
????????elif?word?in?degree_dic.keys()?and?word?not?in?not_word_list:
????????????degree_word[word_dict[word]]?=?degree_dic[word]
????????elif?word?in?not_word_list:
????????????not_word[word_dict[word]]?=?-1
????????#?if?word?in?sen_dict.keys()?and?word?not?in?not_word_list?and?word?not?in?degree_dic.keys():
????????#?????#?找出分詞結(jié)果中在情感字典中的詞
????????#?????sen_word[word_dict[word]]?=?sen_dict[word]
????????#?elif?word?in?not_word_list?and?word?not?in?degree_dic.keys():
????????#?????#?分詞結(jié)果中在否定詞列表中的詞
????????#?????not_word[word_dict[word]]?=?-1
????????#?elif?word?in?degree_dic.keys():
????????#?????#?分詞結(jié)果中在程度副詞中的詞
????????#?????degree_word[word_dict[word]]?=?degree_dic[word]
????sen_file.close()
????degree_file.close()
????not_word_file.close()
????#?將分類(lèi)結(jié)果返回
????return?sen_word?not_word?degree_word
def?list_to_dict(word_list):
????“““將分詞后的列表轉(zhuǎn)為字典,key為單詞,value為單詞在列表中的索引,索引相當(dāng)于詞語(yǔ)在文檔中出現(xiàn)的位置“““
????data?=?{}
????for?x?in?range(0?len(word_list)):
????????data[word_list[x]]?=?x
????return?
?屬性????????????大小?????日期????時(shí)間???名稱(chēng)
-----------?---------??----------?-----??----
?????文件???????6276??2007-10-21?16:16??正面情感詞語(yǔ)(中文).txt
?????文件??????14190??2007-10-21?16:16??正面情感詞語(yǔ)(英文).txt
?????文件??????30409??2007-10-21?16:16??正面評(píng)價(jià)詞語(yǔ)(中文).txt
?????文件??????61667??2007-10-21?16:16??正面評(píng)價(jià)詞語(yǔ)(英文).txt
?????文件???????1480??2007-10-21?16:16??程度級(jí)別詞語(yǔ)(中文).txt
?????文件???????2114??2007-10-21?16:16??程度級(jí)別詞語(yǔ)(英文).txt
?????文件???????9952??2007-10-21?16:16??負(fù)面情感詞語(yǔ)(中文).txt
?????文件??????18511??2007-10-21?16:16??負(fù)面情感詞語(yǔ)(英文).txt
?????文件??????26101??2007-10-21?16:16??負(fù)面評(píng)價(jià)詞語(yǔ)(中文).txt
?????文件??????57704??2007-10-21?16:16??負(fù)面評(píng)價(jià)詞語(yǔ)(英文).txt
?????文件????????289??2007-10-21?16:16??主張?jiān)~語(yǔ)(中文).txt
?????文件????????451??2007-10-21?16:16??主張?jiān)~語(yǔ)(英文).txt
?????文件????????181??2018-12-22?19:41??sentiment\.idea\encodings.xm
?????文件????????300??2018-12-22?18:21??sentiment\.idea\misc.xm
?????文件????????270??2018-12-22?18:21??sentiment\.idea\modules.xm
?????文件????????466??2018-12-22?18:21??sentiment\.idea\sentiment.iml
?????文件??????21273??2019-01-16?21:27??sentiment\.idea\workspace.xm
?????文件???????6472??2019-01-16?17:21??sentiment\1_1.py
?????文件????????714??2019-01-14?22:26??sentiment\1_2.py
?????文件????2528956??2019-01-15?15:57??sentiment\BosonNLP_sentiment_score.txt
?????文件???????2070??2019-01-15?15:29??sentiment\degree.txt
?????文件????????553??2017-09-03?15:38??sentiment\notDic.txt
?????文件???????9213??2019-01-15?16:42??sentiment\stopwords.txt
?????文件?????????55??2018-12-22?18:21??sentiment\venv\Lib\site-packages\easy-install.pth
?????文件???????1403??2018-12-22?18:24??sentiment\venv\Lib\site-packages\jieba\analyse\analyzer.py
?????文件????6200957??2018-12-22?18:24??sentiment\venv\Lib\site-packages\jieba\analyse\idf.txt
?????文件???????3772??2018-12-22?18:24??sentiment\venv\Lib\site-packages\jieba\analyse\textrank.py
?????文件???????4310??2018-12-22?18:24??sentiment\venv\Lib\site-packages\jieba\analyse\tfidf.py
?????文件????????501??2018-12-22?18:24??sentiment\venv\Lib\site-packages\jieba\analyse\__init__.py
?????文件????5071852??2018-12-22?18:24??sentiment\venv\Lib\site-packages\jieba\dict.txt
............此處省略438個(gè)文件信息
評(píng)論
共有 條評(píng)論