-
大小: 18.08MB文件類型: .rar金幣: 1下載: 0 次發布日期: 2023-08-02
- 語言: 其他
- 標簽: 中文地址??地址分詞??地址匹配??Levenshtein??
資源簡介
處理中文地址的分詞和匹配 采用混合分詞算法進行中文地址分詞 在中文地址分詞基礎上采用Double Levenshetin算法進行中文地址相似度進行地址匹配
代碼片段和文件信息
package?experiment;
import?java.io.BufferedReader;
import?java.io.FileInputStream;
import?java.io.FileOutputStream;
import?java.io.IOException;
import?java.io.InputStreamReader;
import?java.io.PrintStream;
import?java.net.URI;
import?java.net.URISyntaxException;
import?java.util.ArrayList;
import?org.apache.hadoop.conf.Configuration;
import?org.apache.hadoop.fs.FileSystem;
import?org.apache.hadoop.hbase.HbaseConfiguration;
import?com.AddressSegment.data.dao.impl.AddressQueryImpl;
import?com.AddressSegment.logic.AddressSplitImpl;
import?com.AddressSegment.logic.UndefinedWordRecognize;
import?com.AddressSegment.metadata.model.CharDictionary;
import?com.AddressSegment.metadata.model.WordDictionary;
import?com.AddressSegment.tool.dao.impl.DictionaryFileOperationDAOImpl;
import?com.AddressSegment.util.Config;
public?class?CountAddress?{
public?static?Configuration?config?=?null;
public?static?FileSystem?fs?=?null;
public?static?DictionaryFileOperationDAOImpl?DF?=?null;
public?static?WordDictionary?wordDict?=?null;
public?static?CharDictionary?charDict?=?null;
public?static?int?rowkey?=?0;
//?public?static?HTablePool?pool?=?new?HTablePool(config?1000);
static?{
config?=?HbaseConfiguration.create();
wordDict?=?new?WordDictionary();
charDict?=?new?CharDictionary();
Configuration?conf?=?new?Configuration();
try?{
fs?=?FileSystem.get(URI.create(“hdfs://192.168.31.172:9000“)?conf);
}?catch?(IOException?e1)?{
e1.printStackTrace();
}
try?{
DF?=?new?DictionaryFileOperationDAOImpl(Config.getDefaultDictionaryHDFSURL()
Config.getCharDictionaryHDFSURL()?fs);
}?catch?(URISyntaxException?e)?{
e.printStackTrace();
}?catch?(IOException?e)?{
e.printStackTrace();
}
DF.putFileToDict(wordDict?charDict);
}
public?static?void?ComputeAddressCount(String?fileInputPath?String?fileOutputPath)?throws?IOException?URISyntaxException{
BufferedReader?br?=?new?BufferedReader(new?InputStreamReader(new?FileInputStream(fileInputPath)“GBK“));
String?line?=?““;
FileOutputStream?out=new?FileOutputStream(fileOutputPath);
PrintStream?p=new?PrintStream(out);
while?((line?=?br.readLine())?!=?null)?{
Integer?count?=?0;
AddressSplitImpl?asi?=?new?AddressSplitImpl();
ArrayList?strArray?=?asi.Split(line?fs);
UndefinedWordRecognize?uwr?=?new?UndefinedWordRecognize();
ArrayList?wordArray1?=?uwr.getUndefinedWord(strArray);
AddressQueryImpl?aqi?=?new?AddressQueryImpl();
count?=?aqi.queryAddressCount(wordArray1);
System.out.println(line);
System.out.println(count);
p.println(line+“\t“+count);
}
p.close();
br.close();
}
public?static?void?main(String[]?args)?throws?IOException?URISyntaxException?{
ComputeAddressCount(“C:/Users/HYFrank/Desktop/Noname1.txt“?“C:/Users/HYFrank/Desktop/countAddress.txt“);
}
}
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????文件???????5930??2016-08-09?22:51??src\com\AddressSegment\data\dao\ba
?????文件????????411??2016-08-01?11:36??src\com\AddressSegment\data\dao\declare\AddressQuery.java
?????文件????????238??2016-01-31?15:22??src\com\AddressSegment\data\dao\declare\SegmentInsert.java
?????文件???????5207??2016-08-09?22:24??src\com\AddressSegment\data\dao\impl\AddressQueryImpl.java
?????文件???????1480??2016-02-25?14:56??src\com\AddressSegment\data\dao\impl\SegmentInsertImpl.java
?????文件????????212??2016-01-29?23:28??src\com\AddressSegment\data\dao\ModelRowMapper.java
?????文件????????814??2016-02-25?23:13??src\com\AddressSegment\logic\AddressEncodingService.java
?????文件???????1020??2016-04-05?00:21??src\com\AddressSegment\logic\AddressSplitImpl.java
?????文件???????3924??2016-08-30?00:37??src\com\AddressSegment\logic\AlgorithmDaoImpl.java
?????文件???????3620??2016-02-26?00:36??src\com\AddressSegment\logic\GaodeEncodingServiceInvoker.java
?????文件????????283??2016-02-25?17:00??src\com\AddressSegment\logic\service\AddressEncoding.java
?????文件????????621??2016-04-05?00:20??src\com\AddressSegment\logic\service\AddressSplit.java
?????文件???????1201??2016-04-12?21:53??src\com\AddressSegment\logic\service\AddressTageMaking.java
?????文件????????219??2016-01-25?21:49??src\com\AddressSegment\logic\service\AlgorithmInterface.java
?????文件???????3418??2016-03-05?14:01??src\com\AddressSegment\logic\service\HttpRequestTemplate.java
?????文件????????258??2016-07-24?23:35??src\com\AddressSegment\logic\service\IHttpResponseHandler.java
?????文件????????271??2016-07-24?23:35??src\com\AddressSegment\logic\service\UndefinedWordRecognizeInterface.java
?????文件???????4771??2016-07-22?09:49??src\com\AddressSegment\logic\UndefinedWordRecognize.java
?????文件???????5195??2016-04-12?23:04??src\com\AddressSegment\main\AddressRegexTage.java
?????文件???????4395??2016-04-09?01:07??src\com\AddressSegment\main\AddressSegment.java
?????文件???????5349??2016-07-07?21:17??src\com\AddressSegment\main\AddressSegmentTage.java
?????文件???????6496??2016-07-20?14:06??src\com\AddressSegment\main\AddressSegmentToHba
?????文件???????5976??2016-07-24?23:26??src\com\AddressSegment\main\AddressSegmentToHDFS.java
?????文件???????6047??2016-07-27?00:45??src\com\AddressSegment\main\AddressSegmentToHDFSToHba
?????文件???????2661??2016-04-07?23:20??src\com\AddressSegment\main\WordCount.java
?????文件????????719??2016-01-24?00:56??src\com\AddressSegment\me
?????文件???????1985??2016-01-23?16:59??src\com\AddressSegment\me
?????文件????????836??2016-01-23?16:59??src\com\AddressSegment\me
?????文件????????407??2016-02-25?16:59??src\com\AddressSegment\me
?????文件???????1179??2016-01-30?22:38??src\com\AddressSegment\me
............此處省略226個文件信息
- 上一篇:愛心銀行-漂亮的公益活動網站程序
- 下一篇:普朗特《流體力學概論》
評論
共有 條評論