-
大小: 1.91MB文件類型: .rar金幣: 2下載: 0 次發(fā)布日期: 2023-10-26
- 語言: Java
- 標(biāo)簽: 機(jī)器學(xué)習(xí)??中文分詞??詞性標(biāo)注??crf??hmm??
資源簡(jiǎn)介
采用機(jī)器學(xué)習(xí)的方法進(jìn)行自然語言處理,對(duì)中文進(jìn)行分詞和詞性標(biāo)注。分詞采用crf模型,詞性標(biāo)注用hmm模型,解碼算法為Vertibi算法。本系統(tǒng)使用java語言編寫

代碼片段和文件信息
#include?
#include?“crfpp.h“
//?c++?-O3?example.cpp?-lcrfpp
int?main(int?argc?char?**argv)?{
??//?-v?3:?access?deep?information?like?alphabetaprob
??//?-nN:?enable?nbest?output.?N?should?be?>=?2
??CRFPP::Tagger?*tagger?=
????CRFPP::createTagger(“-m?model?-v?3?-n2“);
??if?(!tagger)?{
????std::cerr?<????return?-1;
??}
??//?clear?internal?context
??tagger->clear();
??//?add?context
??tagger->add(“Confidence?NN“);
??tagger->add(“in?IN“);
??tagger->add(“the?DT“);
??tagger->add(“pound?NN“);
??tagger->add(“is?VBZ“);
??tagger->add(“widely?RB“);
??tagger->add(“expected?VBN“);
??tagger->add(“to?TO“);
??tagger->add(“take?VB“);
??tagger->add(“another?DT“);
??tagger->add(“sharp?JJ“);
??tagger->add(“dive?NN“);
??tagger->add(“if?IN“);
??tagger->add(“trade?NN“);
??tagger->add(“figures?NNS“);
??tagger->add(“for?IN“);
??tagger->add(“September?NNP“);
??std::cout?<“column?size:?“?<xsize()?<??std::cout?<“token?size:?“?<size()?<??std::cout?<“tag?size:?“?<ysize()?<
??std::cout?<“tagset?information:“?<??for?(size_t?i?=?0;?i?ysize();?++i)?{
????std::cout?<“tag?“?<yname(i)?<??}
??//?parse?and?change?internal?stated?as?‘parsed‘
??if?(!?tagger->parse())?return?-1;
??std::cout?<“conditional?prob=“?<prob()
????????????<“?log(Z)=“?<Z()?<
??for?(size_t?i?=?0;?i?size();?++i)?{
????for?(size_t?j?=?0;?j?xsize();?++j)?{
??????std::cout?<x(i?j)?<‘\t‘;
????}
????std::cout?<y2(i)?<‘\t‘;
????std::cout?<
????std::cout?<“Details“;
????for?(size_t?j?=?0;?j?ysize();?++j)?{
??????std::cout?<‘\t‘?<yname(j)?<“/prob=“?<prob(ij)
????????????????<“/alpha=“?<alpha(i?j)
????????????????<“/beta=“?<beta(i?j);
????}
????std::cout?<??}
??//?when?-n20?is?specified?you?can?access?nbest?outputs
??std::cout?<“nbest?outputs:“?<??for?(size_t?n?=?0;?n?10;?++n)?{
????if?(!?tagger->next())?break;
????std::cout?<“nbest?n=“?<prob()?<????//?you?can?access?any?information?using?tagger->y()...
??}
??std::cout?<“Done“?<}
?屬性????????????大小?????日期????時(shí)間???名稱
-----------?---------??----------?-----??----
?????文件????????301??2010-11-12?12:48??nlu_project\.classpath
?????文件????????387??2010-11-12?12:48??nlu_project\.project
?????文件????????629??2010-11-12?12:48??nlu_project\.settings\org.eclipse.jdt.core.prefs
?????文件???????4637??2010-12-14?16:52??nlu_project\bin\convertor\InputConvertor.class
?????文件???????3810??2010-12-14?16:52??nlu_project\bin\convertor\OutputConvertor.class
?????文件???????2731??2010-12-14?16:52??nlu_project\bin\convertor\TestFileConvertor.class
?????文件???????2230??2010-12-14?16:52??nlu_project\bin\execute\execute.class
?????文件???????3467??2010-12-14?16:52??nlu_project\bin\org\langkit\tagger\cli\Evaluate$EvalHandler.class
?????文件???????4122??2010-12-14?16:52??nlu_project\bin\org\langkit\tagger\cli\Evaluate.class
?????文件???????4743??2010-12-14?16:52??nlu_project\bin\org\langkit\tagger\cli\Tag.class
?????文件???????4013??2010-12-14?16:52??nlu_project\bin\org\langkit\tagger\cli\Train$TrainHandler.class
?????文件???????5347??2010-12-14?16:52??nlu_project\bin\org\langkit\tagger\cli\Train.class
?????文件???????3989??2010-12-14?16:52??nlu_project\bin\org\langkit\tagger\corpus\BrownCorpusReader.class
?????文件???????1885??2010-12-14?16:52??nlu_project\bin\org\langkit\tagger\corpus\CorpusReader.class
?????文件???????1346??2010-12-14?16:52??nlu_project\bin\org\langkit\tagger\corpus\CorpusReaderException$CorpusReadError.class
?????文件????????928??2010-12-14?16:52??nlu_project\bin\org\langkit\tagger\corpus\CorpusReaderException.class
?????文件????????308??2010-12-14?16:52??nlu_project\bin\org\langkit\tagger\corpus\CorpusSentenceHandler.class
?????文件????????623??2010-12-14?16:52??nlu_project\bin\org\langkit\tagger\corpus\TaggedWord.class
?????文件???????1343??2010-12-14?16:52??nlu_project\bin\org\langkit\tagger\data\BiGram.class
?????文件???????1897??2010-12-14?16:52??nlu_project\bin\org\langkit\tagger\data\Model$NGrams.class
?????文件???????6306??2010-12-14?16:52??nlu_project\bin\org\langkit\tagger\data\Model.class
?????文件???????1517??2010-12-14?16:52??nlu_project\bin\org\langkit\tagger\data\TriGram.class
?????文件????????954??2010-12-14?16:52??nlu_project\bin\org\langkit\tagger\data\UniGram.class
?????文件???????1500??2010-12-14?16:52??nlu_project\bin\org\langkit\tagger\data\util\ProbEntryComparator.class
?????文件????????201??2010-12-14?16:52??nlu_project\bin\org\langkit\tagger\languagemodel\LanguageModel.class
?????文件???????4622??2010-12-14?16:52??nlu_project\bin\org\langkit\tagger\languagemodel\LinearInterpolationLM.class
?????文件???????1880??2010-12-14?16:52??nlu_project\bin\org\langkit\tagger\tagger\HMMTagger$Sequence.class
?????文件????????857??2010-12-14?16:52??nlu_project\bin\org\langkit\tagger\tagger\HMMTagger$TagMatrixEntry.class
?????文件???????5748??2010-12-14?16:52??nlu_project\bin\org\langkit\tagger\tagger\HMMTagger.class
?????文件???????3820??2010-12-14?16:52??nlu_project\bin\org\langkit\tagger\wordhandler\KnownWordHandler.class
............此處省略139個(gè)文件信息
評(píng)論
共有 條評(píng)論