91av视频/亚洲h视频/操亚洲美女/外国一级黄色毛片 - 国产三级三级三级三级

  • 大小: 2KB
    文件類型: .zip
    金幣: 2
    下載: 0 次
    發布日期: 2021-05-21
  • 語言: Java
  • 標簽: Java??FMM??中文分詞??

資源簡介

Java語言編寫的優秀的中文前向最大分詞程序。程序簡單容易理解,對于理解中文分詞思想有很大的幫助。

資源截圖

代碼片段和文件信息

import?java.io.*;
import?java.util.*;

public?class?fenci?{

private?String?separator?=?“/“;
private?static?fenci?segmenter?=?null;
private?static?TreeMap?cnWords;
private?TreeSet?cForeign?cNumbers;

private?fenci(){
cForeign?=?new?TreeSet();
cNumbers?=?new?TreeSet();

loadset(cNumbers?“data/snumbers_u8.txt“);
loadset(cForeign?“data/sforeign_u8.txt“);

System.out.print(“Loading?Lexicon“);
cnWords?=?new?TreeMap();

String?newword?=?null;

try?{
InputStream?worddata?=?null;
worddata?=?new?FileInputStream(new?File(“data/lexicon.txt“));

BufferedReader?in?=?new?BufferedReader(new?InputStreamReader(worddata));

int?i?=?0;

while?((newword?=?in.readLine())?!=?null){
if?((++i)%10000?==?0){
System.out.print(‘.‘);
}

if?((newword.indexOf(“#“)?==?-1)?&&?(newword.length()? cnWords.put(newword.intern()?true);

if?(newword.length()?==?3){
if?(cnWords.containsKey(newword.substring(0?2).intern())?==?false){
cnWords.put(newword.substring(0?2).intern()?false);
}
}

if?(newword.length()?==?4){
if?(cnWords.containsKey(newword.substring(0?2).intern())?==?false){
cnWords.put(newword.substring(0?3).intern()?false);
}
}
}
}

in.close();
System.out.println();
System.out.println(“Loading?Lexicon?OK“);
System.out.println(“load?words?number?is“?+?i);
}?catch?(IOException?e){
System.out.println(“Loading?Lexicon?failuer“);

e.printStackTrace();
}
}

public?synchronized?static?void?reset(){
fenci.segmenter?=?null;
}

public?synchronized?static?fenci?getSegmenter(){
if?(fenci.segmenter?==?null){
fenci.segmenter?=?new?fenci();
}

return?fenci.segmenter;
}

private?void?loadset(TreeSet?targetset?String?sourcefile){
String?dataline;

try{
InputStream?fr?=?new?FileInputStream(new?File(sourcefile));
BufferedReader?in?=?new?BufferedReader(new?InputStreamReader(fr?“UTF-8“));

while?((dataline?=?in.readLine())?!=?null){
if?((dataline.indexOf(“#“)?>?-1)?||?(dataline.length()?==?0)){
continue;
}

targetset.add(dataline.intern());
}

in.close();
}?catch?(Exception?e){
System.err.println(“Exception?loading?data?file“?+?sourcefile?+?“?“?+?e);

e.printStackTrace();
}
}

public?boolean?isNumber(String?testword){
boolean?result?=?true;

for?(int?i=0;?i if?(!cNumbers.contains(testword.substring(i?i+1).intern())){
result?=?false;

break;
}
}

return?result;
}

public?boolean?isAllForeign(String?testword){
boolean?result?=?true;

for?(int?i=0;?i if?(!cForeign.contains(testword.substring(i?i+1).intern())){
result?=?false;

break;

?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????文件????????5854??2011-04-28?22:01??fenci.java

評論

共有 條評論