資源簡介
本工具為復旦大學計算機學院機器人研究實驗室開發的基于深度學習的中文自然語言處理工具FudanDNN-NLP2.0,該工具可用于中文分詞、自定義詞匯、文本規范化、命名識別、詞性標注、語義分析,用戶可以根據需要重新訓練或者精調模型。深度學習方法的優點在于不需要預先根據任務進行特征選擇(特征工程),系統所需參數較少(節省內存開銷),并且解碼速度(實際使用)遠遠快于其它相似性能的系統。
代碼片段和文件信息
package?cn.edu.fudan.corpus;
import?java.io.BufferedReader;
import?java.io.FileInputStream;
import?java.io.FileOutputStream;
import?java.io.IOException;
import?java.io.InputStreamReader;
import?java.io.objectOutputStream;
import?java.util.HashMap;
import?java.util.Properties;
import?java.util.Random;
public?class?LookupTableGeneratorStart?{
/**
?*?@param?args
?*/
public?static?void?main(String[]?args)?{
//?TODO?Auto-generated?method?stub
Properties?prop?=?new?Properties();
try?{
prop.load(new?FileInputStream(
“conf/LookupTabelGenerator.properties“));
}?catch?(IOException?e)?{
e.printStackTrace();
}
String?inputFile?=?prop.getProperty(“embeddingTextFile“);
String?outputFile?=?prop.getProperty(“embeddingFile“);
HashMap?lookuptable?=?new?HashMap();
int?dimension?=?Integer.parseInt(prop.getProperty(“dimension“));
String?tokenFile?=?prop.getProperty(“tokenFile“);
double[]?feature?=?null;
double?divisor?=?0.0d;
boolean?isDebug?=?false;
try?{
FileInputStream?fis?=?new?FileInputStream(inputFile);
InputStreamReader?isr?=?new?InputStreamReader(fis?“UTF-8“);
BufferedReader?br?=?new?BufferedReader(isr);
String?line?=?null;
String[]?tokens?=?null;
int?num?=?0;
while?((line?=?br.readLine())?!=?null)?{
line?=?line.trim();
if?(!line.equals(““))?{
num++;
tokens?=?line.split(“\\s+“);
if?(tokens.length?!=?(dimension?+?1))?{
System.out.println(“Check?the?embeddings?at?the?line?“
+?num?+?“?(the?dimensionality?is?“
+?(tokens.length?-?1)?+?“):?“?+?line);
System.exit(0);
}?else?{
feature?=?new?double[dimension];
for?(int?i?=?1;?i? feature[i?-?1]?=?Double.parseDouble(tokens[i]);
}
//?Normalize
divisor?=?0.0d;
for?(int?d?=?0;?d? divisor?+=?Math.pow(feature[d]?2);
}
divisor?=?Math.sqrt(divisor);
for?(int?d?=?0;?d? feature[d]?=?feature[d]?/?divisor;
}
lookuptable.put(tokens[0]?feature);
}
}
}
br.close();
isr.close();
fis.close();
}?catch?(Exception?e)?{
e.printStackTrace();
}
//?Supplement?the?special?tokens
try?{
FileInputStream?fis?=?new?FileInputStream(tokenFile);
InputStreamReader?isr?=?new?InputStreamReader(fis?“UTF-8“);
BufferedReader?br?=?new?BufferedReader(isr);
String?line?=?null;
Random?randomgen?=?new?Random();
while?((line?=?br.readLine())?!=?null)?{
line?=?line.trim();
if?(!lookuptable.containsKey(line))?{
System.out.println(“Message:?the?tokens?“?+?line?+?“?is?missing?in?the?embeddings.“);
feature?=?new?double[dimension];
for?(int?i?=?0;?i? feature[i]?=?(randomgen.nextDouble()?-?0.5d)?*?2?/?dimension;
}
//?Normalize
divisor?=?0.
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2016-03-04?01:14??FudanDNN-NLPv2.0\
?????文件?????????377??2016-03-03?18:46??FudanDNN-NLPv2.0\.classpath
?????文件????????8196??2016-03-04?01:14??FudanDNN-NLPv2.0\.DS_Store
?????目錄???????????0??2016-03-04?10:53??__MACOSX\
?????目錄???????????0??2016-03-04?10:53??__MACOSX\FudanDNN-NLPv2.0\
?????文件?????????120??2016-03-04?01:14??__MACOSX\FudanDNN-NLPv2.0\._.DS_Store
?????文件?????????375??2016-03-03?17:06??FudanDNN-NLPv2.0\.project
?????目錄???????????0??2016-03-03?17:06??FudanDNN-NLPv2.0\.settings\
?????文件?????????587??2016-03-03?17:06??FudanDNN-NLPv2.0\.settings\org.eclipse.jdt.core.prefs
?????目錄???????????0??2016-03-03?18:46??FudanDNN-NLPv2.0\bin\
?????目錄???????????0??2016-03-03?18:46??FudanDNN-NLPv2.0\bin\cn\
?????目錄???????????0??2016-03-03?18:46??FudanDNN-NLPv2.0\bin\cn\edu\
?????目錄???????????0??2016-03-03?18:46??FudanDNN-NLPv2.0\bin\cn\edu\fudan\
?????目錄???????????0??2016-03-03?18:46??FudanDNN-NLPv2.0\bin\cn\edu\fudan\corpus\
?????文件????????5463??2016-03-03?18:46??FudanDNN-NLPv2.0\bin\cn\edu\fudan\corpus\LookupTableGeneratorStart.class
?????文件????????1392??2016-03-03?18:58??FudanDNN-NLPv2.0\bin\cn\edu\fudan\corpus\SemanticCorpusPrepareStart.class
?????文件????????1192??2016-03-03?19:00??FudanDNN-NLPv2.0\bin\cn\edu\fudan\corpus\TokenExtractorStart.class
?????目錄???????????0??2016-03-03?18:46??FudanDNN-NLPv2.0\bin\cn\edu\fudan\crf\
?????文件?????????819??2016-03-03?18:46??FudanDNN-NLPv2.0\bin\cn\edu\fudan\crf\ConditionalRandomFieldLargeScaleStart.class
?????目錄???????????0??2016-03-03?18:46??FudanDNN-NLPv2.0\bin\cn\edu\fudan\dnn\
?????文件????????3367??2016-03-03?18:46??FudanDNN-NLPv2.0\bin\cn\edu\fudan\dnn\WindowConvolutionNetworkDecoderStart.class
?????文件????????4962??2016-03-03?18:46??FudanDNN-NLPv2.0\bin\cn\edu\fudan\dnn\WindowConvolutionNetworkStart.class
?????目錄???????????0??2016-03-03?18:46??FudanDNN-NLPv2.0\bin\cn\edu\fudan\flow\
?????文件????????3358??2016-03-03?18:46??FudanDNN-NLPv2.0\bin\cn\edu\fudan\flow\CRFSemanticAnalyzerStart.class
?????文件????????3064??2016-03-03?19:07??FudanDNN-NLPv2.0\bin\cn\edu\fudan\flow\LSTMSemanticAnalyzerStart.class
?????文件????????1924??2016-03-03?18:46??FudanDNN-NLPv2.0\bin\cn\edu\fudan\flow\NamedIdentityRecognizerStart.class
?????文件????????1916??2016-03-03?18:46??FudanDNN-NLPv2.0\bin\cn\edu\fudan\flow\PosTaggerStart.class
?????文件????????1892??2016-03-03?19:08??FudanDNN-NLPv2.0\bin\cn\edu\fudan\flow\PrepreocessStart.class
?????文件????????1861??2016-03-03?18:46??FudanDNN-NLPv2.0\bin\cn\edu\fudan\flow\WordSegmentorStart.class
?????目錄???????????0??2016-03-03?18:51??FudanDNN-NLPv2.0\bin\cn\edu\fudan\rnn\
?????文件????????3364??2016-03-03?18:46??FudanDNN-NLPv2.0\bin\cn\edu\fudan\rnn\LSTMDecoderStart.class
............此處省略100個文件信息
- 上一篇:網上資源管理系統ssm-源碼
- 下一篇:spring 共享單車管理系統
評論
共有 條評論