-
大小: 1.89MB文件類型: .rar金幣: 2下載: 0 次發(fā)布日期: 2023-08-17
- 語言: C/C++
- 標簽: 文本分類??數(shù)據(jù)挖掘??
資源簡介
基于文本內(nèi)容的垃圾郵件過濾程序,VC++6.0, 利用貝葉斯算法進行中文文本分類,過濾垃圾郵件

代碼片段和文件信息
//?BYS.cpp:?implementation?of?the?BYS?class.
//
//////////////////////////////////////////////////////////////////////
#include?“stdafx.h“
#include?“Mail.h“
#include?“BYS.h“
#ifdef?_DEBUG
#undef?THIS_FILE
static?char?THIS_FILE[]=__FILE__;
#define?new?DEBUG_NEW
#endif
//////////////////////////////////////////////////////////////////////
//?Construction/Destruction
//////////////////////////////////////////////////////////////////////
BYS::BYS()
{
mode=FALSE;
arrword=NULL;
arrbad=NULL;
arrgood=NULL;
arrgl=NULL;
goodsum=0;
badsum=0;
}
BYS::~BYS()
{
if(arrword)?delete[]?arrword?;
if(arrbad)?delete[]?arrbad;
if(arrgood)?delete[]?arrgood;
if(arrgl)?delete[]?arrgl;
}
void?BYS::Create(CString?path)
{
CString?strpath=path;
strpath+=“dir\\Result.txt“;
strbysdbpathsum=strpath;
CString?strLineFirstStr;
//讀取郵件總數(shù)
CStdioFile?csFile(strpathCFile::modeRead);?
for(int?i=0;i<2;i++)
{
csFile.ReadString(strLine);
strLine.TrimLeft();???
strLine.TrimRight();?
//獲取
if(strLine.Left(4)==“good“)
{
strLine=strLine.Mid(4strLine.GetLength()-4);
strLine.TrimLeft();???
goodsum=atoi(strLine);
}
if(strLine.Left(4)==“spam“)
{
strLine=strLine.Mid(4strLine.GetLength()-4);
strLine.TrimLeft();???
badsum=atoi(strLine);
}
}
csFile.Close();
strpath=path;
strpath+=“dir\\WordList.txt“;
strbysdbpath=strpath;
CStdioFile?csFileList(strpathCFile::modeRead);?
//建立哈希表
int?size=72*94;
arrword=new?CStringArray[size];
arrgood=new?CStringArray[size];
arrbad?=new?CStringArray[size];
arrgl??=new?CStringArray[size];
int?indexpos;
CString?tmp;
while(csFileList.ReadString(strLine))
{
strLine.TrimLeft();???
strLine.TrimRight();?
FirstStr=strLine.Left(2);
index=GetCharIndex(FirstStr);
if(index>72*94-1)?
{
continue;
}
if(-1!=index)
{
/* for(int?k=0;k {
char?a=strLine.GetAt(k);
}*/
//加載詞/字
pos=strLine.Find(90);
tmp=strLine.Left(pos);
arrword[index].Add(tmp);
strLine=strLine.Right(strLine.GetLength()-pos-1);
strLine.TrimLeft();
//加載good
pos=strLine.Find(90);
tmp=strLine.Left(pos);
arrgood[index].Add(tmp);
strLine=strLine.Right(strLine.GetLength()-pos-1);
strLine.TrimLeft();
//加載bad
pos=strLine.Find(90);
tmp=strLine.Left(pos);
arrbad[index].Add(tmp);
strLine=strLine.Right(strLine.GetLength()-pos-1);
strLine.TrimLeft();
//加載gl
arrgl[index].Add(strLine);
}
}
mode=TRUE;
}
int?BYS::GetCharIndex(CString?strName)//獲取hash下標
{
TBYTE?ucHigh?ucLow;
????int??nCode=-1;
if(strName.GetLength()>0)
{
if?((TBYTE)strName[0]?>?0x80?)
{
ucHigh?=?(TBYTE)strName[0];
ucLow??=?(TBYTE)strName[1];
if?(!(ucHigh?0xa1?||?ucLow?0xa1))?nCode?=(ucHigh?-176)*94+ucLow-161;//hash函數(shù)
}
}
return?nCode;
}
float?BYS::bysjs(CStringArray?*arr)
{
CStringArray?tot
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????文件???????6422??2008-05-22?12:05??Mail\BYS.cpp
?????文件???????1070??2008-05-26?18:57??Mail\BYS.h
?????文件???????1523??2008-05-22?12:28??Mail\DlgLogin.cpp
?????文件???????1227??2008-05-17?10:49??Mail\DlgLogin.h
?????文件????????517??2008-05-17?11:25??Mail\Email.cpp
?????文件????????482??2008-05-17?11:25??Mail\Email.h
?????文件??????28060??2012-10-11?00:18??Mail\Mail.aps
?????文件???????3286??2012-10-11?01:42??Mail\Mail.clw
?????文件???????2330??2008-05-17?12:38??Mail\Mail.cpp
?????文件???????5022??2008-05-20?17:17??Mail\Mail.dsp
?????文件????????533??2008-05-17?09:12??Mail\Mail.dsw
?????文件???????1357??2008-05-17?09:22??Mail\Mail.h
?????文件?????205824??2012-10-11?01:42??Mail\Mail.ncb
?????文件??????51712??2012-10-11?01:42??Mail\Mail.opt
?????文件???????2764??2012-10-11?00:19??Mail\Mail.plg
?????文件???????8839??2008-05-23?13:09??Mail\Mail.rc
?????文件??????14249??2008-07-24?20:35??Mail\MailDlg.cpp
?????文件???????2358??2008-05-26?19:04??Mail\MailDlg.h
?????文件???????2089??2008-05-20?10:39??Mail\MailShow.cpp
?????文件???????1308??2008-05-20?03:43??Mail\MailShow.h
?????文件??????14344??2008-05-26?19:03??Mail\MyEMail.cpp
?????文件???????1001??2008-05-26?19:00??Mail\MyEMail.h
?????文件???????9252??2008-05-26?18:54??Mail\MyPOP3.cpp
?????文件???????1443??2008-05-26?18:47??Mail\MyPOP3.h
?????文件???????3543??2008-05-17?09:12??Mail\ReadMe.txt
?????文件??????16703??2012-10-11?00:19??Mail\Release\BYS.obj
?????文件??????10312??2012-10-11?00:19??Mail\Release\DlgLogin.obj
?????文件?????270336??2012-10-11?00:19??Mail\Release\Mail.exe
?????文件??????14565??2012-10-11?00:19??Mail\Release\Mail.obj
?????文件????5657800??2012-10-11?00:19??Mail\Release\Mail.pch
............此處省略28個文件信息
評論
共有 條評論