91av视频/亚洲h视频/操亚洲美女/外国一级黄色毛片 - 国产三级三级三级三级

  • 大小: 662KB
    文件類型: .rar
    金幣: 2
    下載: 0 次
    發布日期: 2021-06-06
  • 語言: Java
  • 標簽: 中文??詞性標注??java??

資源簡介

java語言實現的關于中文詞性標注的問題,在Eclipse上通過編譯,可運行。歡迎下載,并提出意見。

資源截圖

代碼片段和文件信息

import?java.util.*;
import?java.io.BufferedReader;
import?java.io.FileReader;
import?java.io.FileWriter;
import?java.io.IOException;
import?java.math.*;

public?class?Viterbi
{
public?static?void?main(String[]?args)
{

//----------------------------------------------------------------------------------
//統計出訓練樣本中詞性種類及其頻率
??String?content=““;
??BufferedReader?reader=null;
??try{??//讀取199801train.txt文本中的內容,并保存在content的字符流中
????reader=new?BufferedReader(new?FileReader(“c:/199801train.txt“));
????String?line;
????while((line=reader.readLine())!=null)?content+=line;
???}
??catch(IOException?e)
??{
???e.printStackTrace();
??}
??finally
??{
???if(reader!=null)
???????{
???? ???try{reader.close();}??? ???
???? ???catch(IOException?e){}
???????}
??}
??
??String[]?text;??//text[]用于存儲訓練樣本中的詞語
??text=content.split(“(/[a-z]*\\s{0})|(][a-z]*\\s{1})“);?//去除詞性標注
??//for(String?wd:text)
????//System.out.println(wd);
???
????
??String[]?temp;??//temp[]數組用于存儲單個詞的詞性標注符號
????temp=content.split(“[0-9|-]*/|\\s{1}[^a-z]*|][a-z]“);?//僅保留詞性標注符號。
????String[]?temp1;
????temp1=new?String[temp.length-1];//去除temp[0]為空的情況
????for(int?i=0;i??????temp1[i]=temp[i+1];
????//for(String?wd:temp1)
??????//System.out.print(wd+“??“);
??????
????String[]?temp2;??//temp2[]數組用于存儲每兩個詞的詞性標注符號
????temp2=new?String[temp1.length-1];
????for(int?i=0;i??????temp2[i]=temp1[i]+‘‘+temp1[i+1];????
????//for(String?wd:temp2)
??????//System.out.println(wd);
??????
????String[]?word_pos;
????word_pos=new?String[text.length];
????for(int?i=0;i??????word_pos[i]=text[i]+‘‘+temp1[i];
????//for(String?wd:word_pos)
??????//System.out.println(wd);
?????????
??????
????Hashtable?hash1=new?Hashtable();??//創建hash1,存儲單個詞的詞性及其頻率
????for(String?wd:temp1)
????{
???? if(hash1.containsKey(wd))
???? ??hash1.put(wdhash1.get(wd).hashCode()+1);
???? else
???? ??hash1.put(wd1);???? ?????? ??
????}
????int?sp=hash1.size();??//統計詞性個數
??????//System.out.println(hash1);
????
????Hashtable?hash2=new?Hashtable();??//創建hash2,存儲每兩個詞的詞性及其頻率
????for(String?wd:temp2)
????{
???? if(hash2.containsKey(wd))
???? ??hash2.put(wdhash2.get(wd).hashCode()+1);
???? else
???? ??hash2.put(wd1);
????}
????//System.out.println(hash2);????
???
???Hashtable?hash3=new?Hashtable();??//創建hash3存儲詞語、詞性和詞頻
???for(String?wd:word_pos)
???{
??? if(hash3.containsKey(wd))
??? ??hash3.put(wdhash3.get(wd).hashCode()+1);
??? else
??? ??hash3.put(wd1);
???}
???//System.out.println(hash3);
???
????String[]?table_pos;??//table_pos[]用于存儲所有不同的詞性符號
????table_pos=new?String[sp];
????Enumeration?key=hash1.keys();????
????for(int?i=0;i????{???????
??????String?str=(String)key.nextElement();
??????table_pos[i]=str;
????}
????//for(String?wd:table_pos)
???????//System.out.println(wd);
??????????
???
???//-------------------------------------------------

?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----

?????文件??????22374??2009-10-27?16:57??Wordpos\199801test.txt

?????文件????2437276??2009-10-27?16:57??Wordpos\199801train.txt

?????文件??????22320??2009-12-18?16:59??Wordpos\result.txt

?????文件???????8894??2009-12-18?16:55??Wordpos\Viterbi.java

?????文件??????16953??2010-01-06?13:46??Wordpos\程序說明.docx

?????目錄??????????0??2010-01-06?13:46??Wordpos

-----------?---------??----------?-----??----

??????????????2507817????????????????????6


評論

共有 條評論