資源簡介
代碼對10個txt文件進行分詞、去除停止詞,并提取每個詞的tfidf特征值輸出
代碼片段和文件信息
//input為HashMap>
//output為ArrayList>??為各行的tfidf值
import?java.util.ArrayList;
import?java.util.HashMap;
public?class?FeatureArray?{
????private?HashMap?strNumOfLines?=?new?HashMap<>();???//該詞出現在哪幾個帖子中
????//private?ArrayList>>?featureLinesList?=?new?ArrayList>>();
????private?int?totalLines?=?0;
????public?FeatureArray(ArrayList>?txtLinesInput)?{
???????//?ArrayList?ArrayListTmp?=?new?ArrayList();
????????//HashMap?HashMapTmp?=?new?HashMap();??//HashMapTmp中存一行的tf
????????for?(int?i?=?0;i?????????????ArrayList?ArrayListTmp?=?new?ArrayList();
????????????ArrayListTmp?=?txtLinesInput.get(i);?????????//ArrayListTmp中存某個txt中所有的行
????????????for?(int?j?=?0;?j?????????????????HashMap?HashMapTmp?=?new?HashMap();??//HashMapTmp中存一行的tf
????????????????totalLines++;
????????????????HashMapTmp?=?ArrayListTmp.get(j).getTf();
????????????????for?(String?s?:?HashMapTmp.keySet())?{
????????????????????if?(strNumOfLines.containsKey(s))
????????????????????????strNumOfLines.put(s?strNumOfLines.get(s)?+?1);
????????????????????else
????????????????????????strNumOfLines.put(s?1);
????????????????}
????????????????//HashMapTmp.clear();
????????????}
????????????//ArrayListTmp.clear();
????????}
????}
????????public?HashMap?getStrNumOfLines()?{
????????????return?strNumOfLines;
????????}
????????public?int?getTotalLines(){
????????????return?totalLines;
????????}
}
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
????..A..H.?????10244??2015-10-04?16:38??TextProcessing\.DS_Store
????..A..H.????????14??2015-09-28?09:47??TextProcessing\.idea\.name
?????文件????????782??2015-09-28?11:53??TextProcessing\.idea\compiler.xm
?????文件?????????76??2015-09-28?09:47??TextProcessing\.idea\copyright\profiles_settings.xm
?????文件????????159??2015-09-28?11:52??TextProcessing\.idea\encodings.xm
?????文件????????271??2015-09-28?09:52??TextProcessing\.idea\libraries\IKAnalyzer2012_u6.xm
?????文件????????247??2015-09-29?21:27??TextProcessing\.idea\libraries\IKAnalyzer2012_u61.xm
?????文件????????236??2015-09-28?10:36??TextProcessing\.idea\libraries\lucene_core_3_6_0.xm
?????文件???????2738??2015-09-29?12:16??TextProcessing\.idea\misc.xm
?????文件????????268??2015-09-28?09:47??TextProcessing\.idea\modules.xm
?????文件???????8792??2015-09-28?12:28??TextProcessing\.idea\uiDesigner.xm
?????文件????????164??2015-09-28?09:47??TextProcessing\.idea\vcs.xm
?????文件??????46869??2015-10-07?18:45??TextProcessing\.idea\workspace.xm
????..A..H.??????6148??2015-10-04?16:20??TextProcessing\lily\.DS_Store
????..A..H.??????4096??2015-10-07?16:37??TextProcessing\lily\._.DS_Store
????..A..H.??????4096??2015-10-07?16:37??TextProcessing\lily\._Basketball.txt
????..A..H.??????5148??2015-10-07?16:37??TextProcessing\lily\._WarAndPeace.txt
?????文件?????141368??2015-10-04?16:20??TextProcessing\lily\Basketball.txt
?????文件?????349550??2014-09-17?18:36??TextProcessing\lily\D_Computer.txt
?????文件??????85270??2014-09-17?18:39??TextProcessing\lily\FleaMarket.txt
?????文件?????147299??2014-09-17?18:40??TextProcessing\lily\Girls.txt
?????文件?????321323??2014-09-17?18:45??TextProcessing\lily\JobExpress.txt
?????文件??????80899??2014-09-17?18:35??TextProcessing\lily\Mobile.txt
?????文件?????112748??2014-09-17?18:36??TextProcessing\lily\Stock.txt
?????文件?????112149??2014-09-17?18:37??TextProcessing\lily\V_Suggestions.txt
?????文件?????120245??2015-10-04?16:19??TextProcessing\lily\WarAndPeace.txt
?????文件?????109592??2014-09-17?18:29??TextProcessing\lily\WorldFootball.txt
????..A..H.??????6148??2015-10-04?16:38??TextProcessing\out\.DS_Store
????..A..H.??????4096??2015-10-07?16:37??TextProcessing\out\._.DS_Store
????..A..H.??????6148??2015-10-04?16:38??TextProcessing\out\production\.DS_Store
............此處省略40個文件信息
- 上一篇:RFPA模擬軟件
- 下一篇:MyEclipse10.7.1破解+漢化 補丁+教程
評論
共有 條評論