資源簡介
java實現(xiàn)的文本分類程序
代碼片段和文件信息
/**
?*?
?*/
package?com.tassemble.classify.svm;
import?java.io.FileOutputStream;
import?java.io.IOException;
import?java.io.objectOutputStream;
import?java.io.UnsupportedEncodingException;
import?java.net.URLDecoder;
import?java.util.ArrayList;
import?java.util.HashMap;
import?java.util.Map;
import?java.util.Set;
import?org.apache.log4j.Logger;
import?org.apache.log4j.PropertyConfigurator;
import?libsvm.svm_model;
import?libsvm.svm_node;
import?libsvm.svm_parameter;
import?libsvm.svm_problem;
import?com.tassemble.analyzer.ArticleProcessor;
import?com.tassemble.constants.Constant;
import?com.tassemble.feature.Character;
import?com.tassemble.feature.FeatureSelector;
import?com.tassemble.feature.Selector;
import?com.tassemble.tfidf.Category;
import?com.tassemble.tfidf.TFIDF;
import?com.tassemble.vsm.VSM;
/**
?*?@author?chen-hongqin@163.com?2011-3-23
?*?
?*/
public?class?App?{
static?Logger?logger?=?Logger.getLogger(App.class);
void?predict()?{
}
/**
?*?
?*?@param?args
?*?@throws?IOException
?*?????????????return?void
?*?
?*/
public?static?void?main(String[]?args)?throws?IOException?{
PropertyConfigurator.configure(Constant.ROOT_PATH
+?“configures/classificationLog4j.properties“);
/**
?*?1.?classifier.jar?-train?c:\語料庫2\?
?*?2.?classifier.jar?-predict?c:\predictTest\?
?*?3.?classifier.jar?-check?c:\predictTest\
?*/
if?(args[0].equals(“-train“))?{
train(args[1]);
}?else?if?(args[0].equals(“-predict“))?{
logger.info(“l(fā)oad?model?please?waiting?...“);
svm_model?model?=?(svm_model)?Constant.load(“model“);
logger.info(“l(fā)oad?model?completely!“);
Map?map?=?(HashMap)?Constant.load(“map“);
Predictor?p?=?new?Predictor(model?map);
p.predict(args[1]?““);
}?else?if?(args[0].equals(“-test“))?{
logger.info(“l(fā)oad?model?please?waiting?...“);
svm_model?model?=?(svm_model)?Constant.load(“model“);
logger.info(“l(fā)oad?model?completely!“);
Map?map?=?(HashMap)?Constant.load(“map“);
ArticleProcessor?processor?=?new?ArticleProcessor();
ArrayList?categories?=?processor.getCategories(args[1]);
VSM?vsm?=?new?VSM();
svm_problem?problem?=?vsm.convertToVSM(categories?map);
SVMScale?svmScale?=?new?SVMScale();
logger.info(“scale?problems“);
problem?=?svmScale.scale(problem?Constant.PREDICT_SCALE_ARGV);
logger.info(“end?scale?problems“);
Classifier?t?=?new?Classifier();
logger.info(“start?predict?...“);
testPredict(t?problem?model);
}
}
/**
?*?
?*?return?void
?*?
?*?@throws?IOException
?*?
?*/
private?static?void?test()?throws?IOException?{
//?TODO?Auto-generated?method?stub
Classifier?t?=?new?Classifier();
ArrayList?categories?=?new?TFIDF().process();
Selector?selector?=?new?FeatureSelector(categories);
HashMap>?map?=?selector
.select(Constant.DEFAULT_NUMBER_OF_TOTAL_FE
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????文件????????867??2011-03-28?20:15??Classification\.classpath
?????文件????????390??2011-03-19?20:59??Classification\.project
?????文件????????246??2011-03-19?20:59??Classification\.settings\.svn\all-wcprops
?????文件????????368??2011-03-28?20:15??Classification\.settings\.svn\entries
?????文件????????629??2011-03-19?20:59??Classification\.settings\.svn\text-ba
?????文件????????629??2011-03-19?20:59??Classification\.settings\org.eclipse.jdt.core.prefs
?????文件???????1356??2011-03-28?20:15??Classification\.svn\all-wcprops
?????文件???????2130??2011-03-28?20:16??Classification\.svn\entries
?????文件?????????53??2011-03-19?20:58??Classification\.svn\prop-ba
?????文件?????????53??2011-03-19?20:58??Classification\.svn\prop-ba
?????文件?????????53??2011-03-19?20:58??Classification\.svn\prop-ba
?????文件?????????53??2011-03-19?20:58??Classification\.svn\prop-ba
?????文件?????????53??2011-03-19?20:58??Classification\.svn\prop-ba
?????文件?????????53??2011-03-19?20:58??Classification\.svn\prop-ba
?????文件?????????53??2011-03-19?20:59??Classification\.svn\prop-ba
?????文件?????????53??2011-03-20?23:38??Classification\.svn\prop-ba
?????文件????????867??2011-03-28?20:14??Classification\.svn\text-ba
?????文件????????390??2011-03-19?20:58??Classification\.svn\text-ba
?????文件??????14160??2011-03-19?20:59??Classification\.svn\text-ba
?????文件?????155648??2011-03-19?20:58??Classification\.svn\text-ba
?????文件??????32775??2011-03-19?20:58??Classification\.svn\text-ba
?????文件?????104915??2011-03-19?20:58??Classification\.svn\text-ba
?????文件??????28000??2011-03-19?20:58??Classification\.svn\text-ba
?????文件?????519159??2011-03-19?20:58??Classification\.svn\text-ba
?????文件??????49782??2011-03-19?20:58??Classification\.svn\text-ba
?????文件?????481534??2011-03-19?20:59??Classification\.svn\text-ba
?????文件????2678651??2011-03-20?23:38??Classification\.svn\text-ba
?????文件?????581999??2011-03-30?10:41??Classification\ans7.txt
?????文件???????5901??2011-03-30?10:32??Classification\ans8.txt
?????文件???????1285??2011-03-28?20:14??Classification\bin\.svn\all-wcprops
............此處省略1358個文件信息
評論
共有 條評論