資源簡介
這是我研究生的一個作業,要用貝葉斯分類器去實現垃圾郵件的分類。
第一次是用c語言實現。
第二次用Java,并且用了哈希表,用以保證其計算速度

代碼片段和文件信息
#include
#include
#include?
#include
#include
#include
#include
#define?TYPE?struct?words
#define?LEN?sizeof(struct?words)
TYPE?*create_list(FILE?*?char[]);
void?print_list(TYPE?*);
void?classification(TYPE?*?TYPE?*);
struct?words{
char?word[5000];??????//the?word?name
int?count;??????????//the?amount?of?this?word?appears?in?this?file
char?classify[10];??//the?class?of?this?word
double?pHam;?????????//possibility?of?class?ham?for?this?word
double?pSpam;????????//possibility?of?class?spam?
struct?words?*next;?//point?to?the?next?word
};
//int?TotCount?=?0;
//int?DifCount?=?1;
int?numSpamWords?=?0;??//all?distinct?words?of?class?spam?
int?numHamWords?=?0;???//all?distinct?words?of?class?ham
char?finalClass[5];????//final?class?of?test?file
int?numHamFile?=?0;???//the?number?of?ham?files;
int?numSpamFile?=?0;???//the?number?of?spam?files
int?main(int?argc?char?*argv[?])
{
TYPE?*head=NULL*temp?*last?*head1;
???? FILE?*fp?*fp1;
DIR?*dirptr?=?NULL;
int?count?=?0;????//count?that?the?amount?of?files
struct?dirent?*entry;
char?trainDir[300]?testFile[300]?trainFile[300];
strcpy(trainDirargv[1]);
strcpy(testFileargv[2]);
if((dirptr=opendir(trainDir))==NULL)
{
printf(“There?are?no?this?dir“);
return?1;
}
else
{
while((entry=readdir(dirptr))!=NULL)
{
if((entry->d_type)==8)
{
strcpy(trainFile?trainDir);
strcat(trainFile?“/“);
strcat(trainFileentry->d_name);
if((fp?=?fopen(trainFile“r“))==NULL)
{
printf(“Can‘t?open?this?file\n“);
exit?(1);
}
if(strstr(entry->d_name?“ham“))
{
temp=create_list(fp“ham“);
numHamFile++;
}
if(strstr(entry->d_name?“spam“))
{
temp=create_list(fp“spam“);
numSpamFile++;
}
fclose(fp);
count++;
if(count==1)
head=temp;
else
last->next=temp;
while((temp->next)!=NULL)
temp=temp->next;
last=temp;
}
}
closedir(dirptr);
}
if((fp1=fopen(testFile“r“))==NULL)
{
printf(“Can?not?open?this?test?file\n“);
exit?(1);
}
head1=create_list(fp1“unknow“);
???? fclose(fp1);
//print_list(head4);
classification(headhead1);????//classify?the?test?file
printf(“%s\n“finalClass);
}
TYPE?*create_list(FILE?*fp?char?classify[10])
{
TYPE?*in*last*head=NULL*temp;
char?ch;
char?chr[5000];
int?n?=?0;
/*int?TotCount?=?0;
int?DifCount?=?1;*/
ch?=?fgetc(fp);???????????????????????????????????//get?the?first?char?in?the?file?
while(ch==‘?‘)
ch?=?fgetc(fp);
while(ch!=EOF)
????{
//if?the?‘ch‘?is?‘?‘jump?it?if?the?‘ch‘?is?EOF?exit?this?loop
int?i?=?0;
if(ch?==?EOF)
break;
//create?a?node
if((in?=?malloc(LEN))==NULL)???????
{
printf(“out?of?memory!\n“);
exit(0);
}
//get?words?from?file
???? while(((ch>=‘a‘)&&(ch<=‘z‘)
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????文件???????5558??2007-11-20?06:32??垃圾郵件分類器(c&Java)\filter.c
?????文件???????9879??2007-12-18?02:12??垃圾郵件分類器(c&Java)\filter.java
?????文件????1467809??2007-12-15?05:36??垃圾郵件分類器(c&Java)\knowledge.txt
?????文件????????773??2007-12-11?13:59??垃圾郵件分類器(c&Java)\Word.java
?????目錄??????????0??2009-05-28?08:39??垃圾郵件分類器(c&Java)
-----------?---------??----------?-----??----
??????????????1484019????????????????????5
評論
共有 條評論