-
大小: 9.68MB文件類(lèi)型: .rar金幣: 2下載: 0 次發(fā)布日期: 2023-11-06
- 語(yǔ)言: 其他
- 標(biāo)簽:
資源簡(jiǎn)介
中國(guó)科學(xué)院計(jì)算技術(shù)研究所在多年研究工作積累的基礎(chǔ)上,研制出了漢語(yǔ)詞法分析系統(tǒng)ICTCLAS(Institute of Computing Technology, Chinese Lexical Analysis
System),主要功能包括中文分詞;詞性標(biāo)注;命名實(shí)體識(shí)別;新詞識(shí)別;同時(shí)支持用戶詞典。我們先后精心打造五年,內(nèi)核升級(jí)7次,目前已經(jīng)升級(jí)到了ICTCLAS2009
用戶詞典接口擴(kuò)展
用戶可以動(dòng)態(tài)增加、刪除用戶詞典中的詞,調(diào)節(jié)分詞的效果。提高了用戶詞典使用的靈活性。
分詞粒度可調(diào)
可以控制分詞結(jié)果的粒度。共享版本提供兩種分詞粒度,標(biāo)準(zhǔn)粒度和粗粒度,滿足不同用戶的需求。
詞性標(biāo)注功能加強(qiáng)
多種標(biāo)注級(jí)的選擇,系統(tǒng)可供選擇的標(biāo)注級(jí)有:計(jì)算所一級(jí)標(biāo)注級(jí),計(jì)算所二級(jí)標(biāo)注集,北大一級(jí)標(biāo)注集,北大二級(jí)標(biāo)注集。
關(guān)鍵詞提取
自動(dòng)抽取出能很好地代表文檔主題的若干個(gè)詞或短語(yǔ)。關(guān)鍵詞抽取技術(shù)廣泛應(yīng)用于信息檢索、文本分類(lèi)/聚類(lèi)、信息過(guò)濾、文檔摘要等各種智能文本信息處理領(lǐng)域,具有很好的應(yīng)用價(jià)值。
指紋提取
根據(jù)文章的內(nèi)容,結(jié)構(gòu),詞語(yǔ)間的關(guān)系,分析出能夠表示該文章的語(yǔ)義指紋,使用數(shù)字序列表示。

代碼片段和文件信息
//?win_cDemo.cpp?:?定義控制臺(tái)應(yīng)用程序的入口點(diǎn)。
//
#include?“stdafx.h“
#include?“win_cDemo.h“
#include?
#include?
using?namespace?std;
#ifdef?_DEBUG
#define?new?DEBUG_NEW
#endif
#include?“ICTCLAS30.h“
//?唯一的應(yīng)用程序?qū)ο?br/>
CWinApp?theApp;
using?namespace?std;
int?_tmain(int?argc?TCHAR*?argv[]?TCHAR*?envp[])
{
_CrtSetDbgFlag?(?_CRTDBG_ALLOC_MEM_DF?|?_CRTDBG_LEAK_CHECK_DF?);//|?_CRTDBG_CHECK_ALWAYS_DF?|?_CRTDBG_CHECK_CRT_DF?);
_CrtDumpMemoryLeaks();
?_CrtSetBreakAlloc(129);
_crtBreakAlloc?=?77;
int?nRetCode?=?0;
//?初始化?MFC?并在失敗時(shí)顯示錯(cuò)誤
if?(!AfxWinInit(::GetModuleHandle(NULL)?NULL?::GetCommandLine()?0))
{
//?TODO:?更改錯(cuò)誤代碼以符合您的需要
_tprintf(_T(“致命錯(cuò)誤:?MFC?初始化失敗\n“));
nRetCode?=?1;
}
else
{
//?TODO:?在此處為應(yīng)用程序的行為編寫(xiě)代碼。
printf(“hello?world!\n“);
//init?ICTCLAS
if(!ICTCLAS_Init())
{
printf(“ICTCLAS?INIT?FAILED!\n“);
system(“pause“);
return?-1;
}
printf(“ICTCLAS?init?success!\n“);
////未加詞典
char?sString[1000]?=?“少兒少兒節(jié)目節(jié)目節(jié)目節(jié)目節(jié)目節(jié)目節(jié)目節(jié)目節(jié)目節(jié)目節(jié)目節(jié)目節(jié)目“;
? int?nCount?=?ICTCLAS_GetParagraphProcessAWordCount(sString);
result_t?*result?=(result_t*)malloc(sizeof(result_t)*nCount);
ICTCLAS_ParagraphProcessAW(nCountresult);//獲取結(jié)果存到客戶的內(nèi)存中
for?(int?i=0;?i {
char?buf[100];
memset(buf?0?100);
int?index?=?result[i].start;
memcpy(buf(void?*)(sString+index)?result[i].length);
printf(“%s\t“?buf);
printf(“%s\t“?result[i].sPOS);
switch(result[i].word_type)
{
case?0:
printf(“核心詞典\n“);
break;
case?1:
printf(“用戶詞典\n“);
break;
case?2:
printf(“領(lǐng)域詞典\n“);
break;
default:break;
}
}
printf(“\n--------------------------------------\n“);
result_t?*resultKey?=?(result_t*)malloc(sizeof(result_t)*nCount);
int?nCountKey;
ICTCLAS_KeyWord(resultKey?nCountKey);
for?(int?i=0;?i {
char?buf[100];
memset(buf?0?100);
int?index?=?resultKey[i].start;
memcpy(buf(void?*)(sString+index)?resultKey[i].length);
printf(“%s\t%d\n“?buf?resultKey[i].freq);
}
free(resultKey);
free(result);
unsigned?long?lFinger?=?ICTCLAS_FingerPrint();
const?char?*?sResult;
//printf(“Before?User-defined?dictionary?used:\n“);
//sResult?=?ICTCLAS_ParagraphProcess(sString1);
//printf(“%s\n“sResult);
//ICTCLAS_ImportUserDict(“userdict.txt“);
//printf(“import?user?words!\n“);
//while?(true)
//for?(int?k=0;?k<3;k++)
{
string?s;
cout<<“insert?the?user?word:“;
getline(cin?s);
cout<
ICTCLAS_AddUserWord(s.c_str());
sResult?=?ICTCLAS_ParagraphProcess(sString0);
sResult?=?ICTCLAS_ParagraphProcess(sString1);
printf(“%s\n“sResult);
ICTCLAS_SaveTheUsrDic();
}
////imp?userdict
////ICTCLAS_ImportUserDict(“userdict.txt“);
////printf(“import?%d?user?words!\n“iWordCount);
////加詞典后
//printf(“After?User-defined?dictionary?
?屬性????????????大小?????日期????時(shí)間???名稱(chēng)
-----------?---------??----------?-----??----
?????文件?????262144??2009-02-15?16:12??windows_C_32\api\ICTCLAS30.dll
?????文件??????11718??2009-02-15?14:52??windows_C_32\api\ICTCLAS30.h
?????文件???????6470??2009-02-15?16:12??windows_C_32\api\ICTCLAS30.lib
?????目錄??????????0??2009-02-19?13:24??windows_C_32\api
?????文件????4297124??2009-02-19?12:11??windows_C_32\bin\Setup.exe
?????目錄??????????0??2009-02-19?14:04??windows_C_32\bin
?????文件?????302232??2009-02-19?09:55??windows_C_32\doc\ICTCLAS2009接口文檔.doc
?????文件??????61952??2008-08-23?18:22??windows_C_32\doc\ICTPOS3.0漢語(yǔ)詞性標(biāo)記集.doc
?????目錄??????????0??2009-02-19?14:04??windows_C_32\doc
?????文件???????4876??2009-02-18?15:26??windows_C_32\readme.txt
?????文件???????4876??2009-02-18?15:26??windows_C_32\sample\windows_cDemo_32_sample\readme.txt
?????文件???????4883??2009-02-18?15:18??windows_C_32\sample\windows_cDemo_32_sample\readme.txt.bak
?????文件???????3183??2008-06-11?15:53??windows_C_32\sample\windows_cDemo_32_sample\UpgradeLog.xm
?????文件????????716??2009-02-03?16:34??windows_C_32\sample\windows_cDemo_32_sample\win_cDemo\Configure.xm
?????文件????3520144??2009-01-16?13:48??windows_C_32\sample\windows_cDemo_32_sample\win_cDemo\Data\BiWord.big
?????文件??????65540??2009-01-16?13:48??windows_C_32\sample\windows_cDemo_32_sample\win_cDemo\Data\charset.type
?????文件????1696620??2009-01-16?13:48??windows_C_32\sample\windows_cDemo_32_sample\win_cDemo\Data\CoreDict.pdat
?????文件????1786424??2009-01-16?13:48??windows_C_32\sample\windows_cDemo_32_sample\win_cDemo\Data\CoreDict.pos
?????文件?????478168??2009-01-16?13:48??windows_C_32\sample\windows_cDemo_32_sample\win_cDemo\Data\CoreDict.unig
?????文件?????262236??2009-01-16?13:48??windows_C_32\sample\windows_cDemo_32_sample\win_cDemo\Data\FieldDict.pdat
?????文件?????????72??2009-01-16?13:48??windows_C_32\sample\windows_cDemo_32_sample\win_cDemo\Data\FieldDict.pos
?????文件????1978128??2009-01-16?13:48??windows_C_32\sample\windows_cDemo_32_sample\win_cDemo\Data\GranDict.pdat
?????文件????1778776??2009-01-16?13:48??windows_C_32\sample\windows_cDemo_32_sample\win_cDemo\Data\GranDict.pos
?????文件??????37253??2009-01-16?13:48??windows_C_32\sample\windows_cDemo_32_sample\win_cDemo\Data\ICTCLAS30.ctx
?????文件????????288??2009-01-16?13:48??windows_C_32\sample\windows_cDemo_32_sample\win_cDemo\Data\ICTCLAS_First.map
?????文件????????406??2009-01-16?13:48??windows_C_32\sample\windows_cDemo_32_sample\win_cDemo\Data\ICTPOS.map
?????文件???????2213??2009-01-16?13:48??windows_C_32\sample\windows_cDemo_32_sample\win_cDemo\Data\nr.ctx
?????文件???????3008??2009-01-16?13:48??windows_C_32\sample\windows_cDemo_32_sample\win_cDemo\Data\nr.fsa
?????文件????1757200??2009-01-16?13:48??windows_C_32\sample\windows_cDemo_32_sample\win_cDemo\Data\nr.role
?????文件????????307??2009-01-16?13:48??windows_C_32\sample\windows_cDemo_32_sample\win_cDemo\Data\PKU.map
............此處省略36個(gè)文件信息
評(píng)論
共有 條評(píng)論