資源簡介
這個是Python編寫的一個情感文本分析程序,定義兩種term weight實現,分別為TF 和BOOL,實現了特征選擇算法。文件夾中附帶數據集

代碼片段和文件信息
#?coding=gbk
import?re
import?numpy?as?np
from?numpy?import?*
#################################文本處理############################################?
def?testTextParse(filename):
????text?=?open(filename).read()
????pattern?=?‘(.*?) ‘
????str_list?=?re.findall(pattern?text?re.S)
????doc_list?=?[]
????ptn?=?re.compile(‘\\s*‘)
????for?doc?in?str_list:
????????doc?=?ptn.split(doc)
????????doc_list.append([term?for?term?in?doc?if?len(term)>=1?and?term?!=?‘‘and?term?!=?‘.‘and?term?!=?‘!‘and?term?!=?‘?‘and?term?!=?‘(‘and?term?!=?‘)‘
?????????????????????????and?term?!=?‘\“‘and?term?!=?‘\‘‘
?????????????????????????and?term?!=?‘\xa1\xa3‘?and?term?!=?‘\xa3\xac‘?and?term?!=?‘\xa3\xbf‘and?term?!=?‘\xa3\xa1‘and?term?!=?‘\xa3\xbb‘
?????????????????????????and?term?!=?‘\xa3\xba‘and?term?!=?‘\xa1\xb0‘and?term?!=?‘\xa1\xb1‘and?term?!=?‘\xa1\xae‘and?term?!=?‘\xa1\xaf‘
?????????????????????????and?term?!=?‘\xa3\xa8‘and?term?!=?‘\xa3\xa9‘and?term?!=?‘\xa1\xa2‘
?????????????????????????])?
????return?doc_list
def?cvTextParse(filenamestartend):????#用于交叉驗證的文檔解析
????text?=?open(filename).read()
????pattern?=?‘(.*?) ‘
????str_list?=?re.findall(pattern?text?re.S)
????doc_list?=?[]
????start_index?=?0
????end_index?=?start
????ptn?=?re.compile(‘\\s*‘)
????for?doc?in?str_list:
????????start_index?+=1
????????
????????if?start_index?>=?start:
????????????end_index?+=1
????????????if?end_index?<=?end:
????????????????doc?=?ptn.split(doc)
????????????????doc_list.append([term?for?term?in?doc?if?len(term)>=1?and?term?!=?‘‘and?term?!=?‘.‘and?term?!=?‘!‘and?term?!=?‘?‘and?term?!=?‘(‘and?term?!=?‘)‘
?????????????????????????????????and?term?!=?‘\“‘and?term?!=?‘\‘‘
?????????????????????????????????and?term?!=?‘\xa1\xa3‘?and?term?!=?‘\xa3\xac‘?and?term?!=?‘\xa3\xbf‘and?term?!=?‘\xa3\xa1‘and?term?!=?‘\xa3\xbb‘
?????????????????????????????????and?term?!=?‘\xa3\xba‘and?term?!=?‘\xa1\xb0‘and?term?!=?‘\xa1\xb1‘and?term?!=?‘\xa1\xae‘and?term?!=?‘\xa1\xaf‘
?????????????????????????????????and?term?!=?‘\xa3\xa8‘and?term?!=?‘\xa3\xa9‘and?term?!=?‘\xa1\xa2‘
?????????????????????????????????])
????????????????
????return?doc_list
def?outputTextParse(filename):
????text?=?open(filename).read()
????ptn?=?re.compile(‘| |\\s*‘)
????outputText?=?ptn.sub(‘‘text)
????return?outputText
##############################類別向量生成#############################################
def?gen_class_list_n(k):
????class_list?=?[]
????for?i?in?range(k):
class_list.append(0)??#生成否定性評論類別列表
????return?class_list
def?gen_class_list_p(k):
????class_list?=?[]
????for?i?in?range(k):
class_list.append(1)??#生成肯定性評論類別列表
????return?class_list
##############################詞條向量生成#############################################
def?createTermSet(doc_list):????????#返回文檔中出現的所有詞組成的詞條集合
????termSet?=?set([])
????for?doc?in?doc_list:
????????termSet?=?termSet?|?set(doc)
????
????return?list(termSet)
def?saveTermSet(te
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2014-01-10?11:07??樸素貝葉斯文本分類\
?????目錄???????????0??2014-01-10?11:07??樸素貝葉斯文本分類\dataset\
?????文件??????934320??2013-12-09?22:09??樸素貝葉斯文本分類\dataset\negative.txt
?????文件??????600153??2013-12-09?22:09??樸素貝葉斯文本分類\dataset\positive.txt
?????文件??????????20??2013-12-13?20:39??樸素貝葉斯文本分類\dataset\testDataset0.txt
?????文件??????????99??2013-12-17?10:00??樸素貝葉斯文本分類\dataset\testDataset1.txt
?????文件???????15973??2013-12-18?22:05??樸素貝葉斯文本分類\nbayes.py
?????文件??????????87??2013-12-18?13:27??樸素貝葉斯文本分類\nbayes_calssify.py
?????文件?????????108??2013-12-18?09:43??樸素貝葉斯文本分類\nbayes_validate.py
?????文件?????????457??2013-12-18?16:26??樸素貝葉斯文本分類\代碼說明.txt
評論
共有 條評論