資源簡介
特征選擇DF方法實現(xiàn)源代碼
要求要先自行分好詞
代碼中有詳細(xì)注釋
代碼片段和文件信息
package?df;
import?java.io.BufferedReader;
import?java.io.BufferedWriter;
import?java.io.File;
import?java.io.FileInputStream;
import?java.io.FileNotFoundException;
import?java.io.FileOutputStream;
import?java.io.InputStreamReader;
import?java.io.OutputStreamWriter;
import?java.nio.CharBuffer;
public?class?DF?{
public?static?void?main(String[]?args)?{
//?TODO?Auto-generated?method?stub
int?termNum?=?1111;??//詞的數(shù)目,也就是沒選擇前特征數(shù)目
String?[]?terms;??//用來記錄每個詞
String?termFile=“KeyWordsDf.txt“;?//要求先分好詞,存放詞的文件,每一行一個詞
int?[]?w?;?//用來記錄每個詞的權(quán)重
int?k=100;?//取權(quán)重最高的前K個feature
String?termWeighFile=“termWeigh.txt“;??//用于保存全部《特征?,權(quán)重》對
String?topFeatureFile=“top“?+?k?+?“Feature.txt“;??//保存前k個《特征?,權(quán)重》對
String?dirName=“F:\\\\ben\\TextCategory\\“;??//存放所有語料集的目錄地址,如C://
//初始化w
w=new?int[termNum];
//把keywords導(dǎo)進來保存在terms數(shù)組里面
terms?=?new?String[termNum];
//把語料集全部列出來
File?dir?=?new?File(dirName);??
File[]?files?=?dir.listFiles();?
try?{
FileInputStream?fis2?=?new?FileInputStream(termFile);
InputStreamReader?isr2?=?new?InputStreamReader(fis2);
BufferedReader?br2?=?new?BufferedReader(isr2);
for(int?i=0;i terms[i]?=?br2.readLine();
}
br2.close();
isr2.close();
fis2.close();
}?catch?(Exception?e)?{
System.out.println(e.getMessage());
}
//使用DF方法進行特征選擇
df_count(filestermsw);
//輸出《特征,權(quán)重》對
try?{
FileOutputStream?fos?=?new?FileOutputStream(termWeighFile);
OutputStreamWriter?osw?=?new?OutputStreamWriter(fos);
BufferedWriter?bw?=?new?BufferedWriter(osw);
for(int?i=0;i bw.write(terms[i]+“??“);
bw.write(String.valueOf(w[i]));
bw.newLine();
}
bw.close();
osw.close();
fos.close();
}?catch?(Exception?e)?{
System.out.println(e.getMessage());
}?
//對《特征,權(quán)重》進行排序
top_feature(ter
評論
共有 條評論