資源簡介
基于內(nèi)容的文本分類系統(tǒng)
(這是一個完整的分類系統(tǒng),用java寫的,分詞是中科院64位的分詞)
詳情:http://blog.csdn.net/yinchuandong2/article/details/17717449
使用libsvm 進行分類
使用中科院的分詞器ICTLAS對訓練集進行分詞
代碼片段和文件信息
import?libsvm.*;
import?java.io.*;
import?java.util.*;
class?svm_predict?{
private?static?svm_print_interface?svm_print_null?=?new?svm_print_interface()
{
public?void?print(String?s)?{}
};
private?static?svm_print_interface?svm_print_stdout?=?new?svm_print_interface()
{
public?void?print(String?s)
{
System.out.print(s);
}
};
private?static?svm_print_interface?svm_print_string?=?svm_print_stdout;
static?void?info(String?s)?
{
svm_print_string.print(s);
}
private?static?double?atof(String?s)
{
return?Double.valueOf(s).doubleValue();
}
private?static?int?atoi(String?s)
{
return?Integer.parseInt(s);
}
private?static?void?predict(BufferedReader?input?DataOutputStream?output?svm_model?model?int?predict_probability)?throws?IOException
{
int?correct?=?0;
int?total?=?0;
double?error?=?0;
double?sumv?=?0?sumy?=?0?sumvv?=?0?sumyy?=?0?sumvy?=?0;
int?svm_type=svm.svm_get_svm_type(model);
int?nr_class=svm.svm_get_nr_class(model);
double[]?prob_estimates=null;
if(predict_probability?==?1)
{
if(svm_type?==?svm_parameter.EPSILON_SVR?||
???svm_type?==?svm_parameter.NU_SVR)
{
svm_predict.info(“Prob.?model?for?test?data:?target?value?=?predicted?value?+?z\nz:?Laplace?distribution?e^(-|z|/sigma)/(2sigma)sigma=“+svm.svm_get_svr_probability(model)+“\n“);
}
else
{
int[]?labels=new?int[nr_class];
svm.svm_get_labels(modellabels);
prob_estimates?=?new?double[nr_class];
output.writeBytes(“l(fā)abels“);
for(int?j=0;j output.writeBytes(“?“+labels[j]);
output.writeBytes(“\n“);
}
}
while(true)
{
String?line?=?input.readLine();
if(line?==?null)?break;
StringTokenizer?st?=?new?StringTokenizer(line“?\t\n\r\f:“);
double?target?=?atof(st.nextToken());
int?m?=?st.countTokens()/2;
svm_node[]?x?=?new?svm_node[m];
for(int?j=0;j {
x[j]?=?new?svm_node();
x[j].index?=?atoi(st.nextToken());
x[j].value?=?atof(st.nextToken());
}
double?v;
if?(predict_probability==1?&&?(svm_type==svm_parameter.C_SVC?||?svm_type==svm_parameter.NU_SVC))
{
v?=?svm.svm_predict_probability(modelxprob_estimates);
output.writeBytes(v+“?“);
for(int?j=0;j output.writeBytes(prob_estimates[j]+“?“);
output.writeBytes(“\n“);
}
else
{
v?=?svm.svm_predict(modelx);
output.writeBytes(v+“\n“);
}
if(v?==?target)
++correct;
error?+=?(v-target)*(v-target);
sumv?+=?v;
sumy?+=?target;
sumvv?+=?v*v;
sumyy?+=?target*target;
sumvy?+=?v*target;
++total;
}
if(svm_type?==?svm_parameter.EPSILON_SVR?||
???svm_type?==?svm_parameter.NU_SVR)
{
svm_predict.info(“Mean?squared?error?=?“+error/total+“?(regression)\n“);
svm_predict.info(“Squared?correlation?coefficient?=?“+
?((total*sumvy-sumv*sumy)*(total*sumvy-sumv*sumy))/
?((total*sumvv-sumv*sumv)*(total*sumyy-sumy*sumy))+
?“?(regression)\n“);
}
else
svm_predict.info(“Accuracy?=?“
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????文件????????2425??2013-12-29?22:31??.classpath
?????文件?????????383??2013-12-29?16:52??.fatjar
?????文件?????????390??2013-12-16?15:34??.project
?????目錄???????????0??2013-12-17?11:50??.settings\
?????文件?????????598??2013-12-16?15:34??.settings\org.eclipse.jdt.core.prefs
?????文件?????9720077??2013-12-31?14:36??Classification_fat.jar
?????文件????????1042??2013-10-19?11:37??Configure.xm
?????目錄???????????0??2013-12-30?21:37??Data\
?????文件?????3520144??2013-10-19?11:37??Data\BiWord.big
?????文件?????1696620??2013-10-19?11:37??Data\CoreDict.pdat
?????文件?????1786424??2013-10-19?11:37??Data\CoreDict.pos
?????文件??????478168??2013-10-19?11:37??Data\CoreDict.unig
?????文件??????262236??2013-10-19?11:37??Data\FieldDict.pdat
?????文件??????????72??2013-10-19?11:37??Data\FieldDict.pos
?????文件?????1978128??2013-10-19?11:37??Data\GranDict.pdat
?????文件?????1778776??2013-10-19?11:37??Data\GranDict.pos
?????文件???????37253??2013-10-19?11:37??Data\ICTCLAS30.ctx
?????文件?????????288??2013-10-19?11:37??Data\ICTCLAS_First.map
?????文件?????????406??2013-10-19?11:37??Data\ICTPOS.map
?????文件?????????307??2013-10-19?11:37??Data\PKU.map
?????文件?????????288??2013-10-19?11:37??Data\PKU_First.map
?????文件???????????0??2013-12-30?21:37??Data\UserDict.map
?????文件??????262560??2013-12-30?21:37??Data\UserDict.pdat
?????文件??????524280??2013-10-19?11:37??Data\character.idx
?????文件???????65540??2013-10-19?11:37??Data\character.type
?????文件????????2213??2013-10-19?11:37??Data\nr.ctx
?????文件????????3008??2013-10-19?11:37??Data\nr.fsa
?????文件?????1757200??2013-10-19?11:37??Data\nr.role
?????文件?????????127??2013-12-31?16:27??ICTCLAS.log
?????文件??????240640??2013-10-19?11:37??ICTCLAS50.dll
?????文件????????9898??2013-10-19?11:37??ICTCLAS50.h
............此處省略161個文件信息
評論
共有 條評論