資源簡介
該程序是一個java的中文漢字字頻統計,從諸如txt文件中讀取語料,然后生成一個前100個高頻漢字的字頻統計結果的txt文件,并且還會生成前100、200、600、2000、等漢字的字頻和的另一個txt文件。該程序是借用treeMap來實現數據的儲存的,很方便。輸入語料文件名時是要輸入全名的,例如:E:\1.txt
代碼片段和文件信息
import?java.util.*;
import?java.io.*;
import?java.math.*;
public?class?Tongji?implements?Comparatorject>
{
private?static?TreeMap?treeMap?=?new?TreeMap();
public?static?int?totalNum?=?0;
public?static?int?totalKind?=?0;
public?static?float?sum1?sum20?sum100?sum600;
public?static?float?sum2000?sum3000?sum6000;
public?static?float?shang;
public?int?compare(object?o1?object?o2)?
{
//?TODO?Auto-generated?method?stub
return?-((Map.Entry)o1).getValue()+((Map.Entry)o2).getValue();
}
public?static?void?main(String[]?args)throws?Exception
{
System.out.println(“請輸入文件名稱:“);
Scanner?sc??=?new?Scanner(System.in);
String?fileName?=?sc.nextLine();
BufferedReader?reader?=?new?BufferedReader(new?FileReader(fileName));
String?line;
while((line?=?reader.readLine())!=null)?
{
for(int?i?=?0;i {
????????????????????char?c?=?line.charAt(i);
if((c>=0x4e00)&&(c<=0x9fbb))
{
totalNum?++;
String?cStr?=?String.valueOf(c);
if(treeMap.containsKey(cStr))
{
treeMap.put(cStr?treeMap.get(cStr)+1);
}
else
{
treeMap.put(cStr?1);
totalKind?++;
}
}
}
}
object?[]stats?=?treeMap.entrySet().toArray(); //用Arrays類的靜態方法為treeMap按字頻排序
Arrays.sort(statsnew?Tongji());
Listject>?list?=?Arrays.asList(stats); //轉成List以便排序
FileWriter?writer1?=?new?FileWriter(fileName+“—前100漢字統計結果.txt“); //輸出前100個漢字
writer1.write(“漢字總數\t=?“+totalNum+“\r\n“);
writer1.write(“漢字種數\t=?“+totalKind+“\r\n“);
writer1.write(“*************************\r\n“);
writer1.write(“前100漢字高頻字的頻率統計結果\r\n“);
writer1.write(“序號???????漢字?=?個數?????????字頻\r\n“);
int?lastFreq?=?-1;
- 上一篇:一個模擬Windows的畫圖程序 java版
- 下一篇:動態sin和cos函數圖像
評論
共有 條評論