資源簡介
使用貝葉斯分類器實現文本文件的分類判別。
1、可以指定訓練集進行文本訓練;
2、使用訓練好的特征值進行未知文件類型的判別。

代碼片段和文件信息
package?MyTextClassify;
import?java.io.File;
import?java.io.FileInputStream;
import?java.io.FileNotFoundException;
import?java.io.IOException;
import?java.io.InputStreamReader;
import?java.io.StringReader;
import?java.util.HashMap;
import?java.util.HashSet;
import?java.util.Iterator;
import?java.util.Map;
import?java.util.Set;
import?java.util.Vector;
import?jeasy.analysis.MMAnalyzer;
import?org.apache.lucene.analysis.Analyzer;
import?org.apache.lucene.analysis.Token;
import?org.apache.lucene.analysis.TokenStream;
public?class?BeyesClassification{
private?String?label=null;
private?long?trainTime=0;
public?String[]?labelsName=null;
public?Vector?labels=null;
public?Set?vocabulary=new?HashSet();
public?String?trainPath=null;
public?String?testPath=null;
public?int?findMax(double[]?values){
double?max=values[0];
int?mark=0;
for(int?i=0;i if(values[i]>max){
max=values[i];
mark=i;
}
}
return?mark;
}
public?String[]?sort(String[]?pData?int?left?int?right){
String?middlestrTemp;
int?i?=?left;
int?j?=?right;
middle?=?pData[(left+right)/2];
do{
while((pData[i].compareTo(middle)<0)?&&?(i i++;
while((pData[j].compareTo(middle)>0)?&&?(j>left))
j--;
if(i<=j){
strTemp?=?pData[i];
pData[i]?=?pData[j];
pData[j]?=?strTemp;
i++;
j--;
}
// for(int?k=0;k // System.out.print(pData[k]+“?“);
// }
// System.out.println();
}while(i if(left sort(pDataleftj);?//遞歸調用
if(right>i)
sort(pDatairight);?//遞歸調用
return?pData;
}
?Vector?readFile(String?fileName)?throws?IOException?FileNotFoundException{
File?f=new?File(fileName);
InputStreamReader?isr=new?InputStreamReader(new?FileInputStream(f)“GBK“);
char[]?cbuf=new?char[(int)?f.length()];
isr.read(cbuf);
Analyzer?analyzer=new?MMAnalyzer();
TokenStream?tokens=analyzer.tokenStream(“Contents“?new?StringReader(new?String(cbuf)));
Token?token=null;
Vector?v=new?Vector();
while((token=tokens.next(new?Token()))!=null){
v.add(token.term());
}
return?v;
// String[]?words=new?String[v.size()];
// for(int?i=0;i // words[i]=v.elementAt(i);
// sort(words0v.size()-1);
// for(int?i=0;i // System.out.println(words[i]);
// }
}
// public?static?void?main(String[]?args)?throws?IOException{
// long?startTrain=System.currentTimeMillis();
// String?folderPath=“文本分類語料庫“;
// //process(folderPath);
// //readFile(“52.txt“);
// long?endTrain=System.currentTimeMillis();
// System.out.println(“Traing?costs?“+(endTrain-startTrain)/1000+“s“);
// long?startTest=System.currentTimeMillis();
// String?testPath=“軍事.txt“;
// //test(testPath);
// long?endTest=System.currentTimeMillis();
// System.out.println(“Test?costs?“+(endTest-startTest)/1000+“s
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2010-05-04?21:10??BayesClassifier\src\
?????目錄???????????0??2014-06-03?20:57??BayesClassifier\src\MyTextClassify\
?????文件????????7666??2010-05-04?20:27??BayesClassifier\src\MyTextClassify\BeyesClassification.java
?????文件????????3360??2010-05-04?20:37??BayesClassifier\src\MyTextClassify\GUI.java
?????文件?????2173952??2010-05-04?20:48??BayesClassifier\樸素貝葉斯分類器.exe
?????目錄???????????0??2014-06-03?20:58??BayesClassifier\
評論
共有 條評論