資源簡介
正向最大匹配中文分詞c++源程序

代碼片段和文件信息
#include?
#include?
#include?
#include?
#include?
using?namespace?std;
typedef??struct?indexl
{
long?int?lbeg;
long?int?lend;
}?Id;
void?Sort(vector?&vstr)
{
for(vector::iterator?i?=?vstr.begin();?i? {
for(vector::iterator?j?=?i;?j? if((*i).length()?(*j).length())
{
(*i).swap(*j);
}
}
}?
void?Init(Id?*?&id)//生成索引文檔
{
ifstream?in_src(“gbciku.txt“);
ofstream?out_des(“gbindex.txt“);
vector?vstr;
string?line;
id?=?new?Id[6768];
for(int?x?=?0;?x?6768?;?x++)
id[x].lbeg?=?-1;
int?ivnum?=?0;
int?ipos?=?0;?
int?ima?=?0;
while(getline(in_srcline))
{
//int?len?=?line.length();
int?blankpos?=?line.find(“?“);
line.resize(blankpos);
//cout<<(int)line[0];
//int?temp_x?=?line[0];
//int?temp_y?=?(unsigned?char)line[0]?-?0xB0;
//int?temp_x1?=?((unsigned?char)line[0]?-?0xB0)*94;
//int?temp_y1?=?line[1];
//int?temp_y3=(unsigned?char)line[1];
//int?temp_y4=(unsigned?char)line[1]?-?0xA1;
//int?temp_y2=((unsigned?char)line[0]?-?0xB0)*94+(unsigned?char)line[1]?-?0xA1;
int?icurma?=?((unsigned?char)line[0]?-?0xB0)*94+?(unsigned?char)line[1]?-?0xA1;?
if(icurma?>?6768||icurma?0)
{
cout?< }
if(icurma?==?ima)
{
line?=?line.substr(2?line.length()?-?2);
//vstr[ivnum]?=?line;
vstr.insert(vstr.end()?line);//首字匹配
}
else//?icur_ma?!=?ima
{
Sort(vstr);//vstr從大到小排列
for(vector::iterator?i?=?vstr.begin();?i? {
out_des?<(*i)?<“?“;//將索引文件注入容器中
ipos?+=?(*i).length()?+?1;//每一個i都指向了空格后的字符串
}
id[ima].lend?=?ipos;//ipos的值就確定了結構體的。。。
//ivnum?=?0;
vstr.clear();//清除容器中所有元素i
ima?=icurma?;
//cout< id[ima].lbeg?=?ipos?-?1;
line?=?line.substr(2?line.length()?-?2);
vstr.insert(vstr.end()?line);//循環,首字匹配
}
}
Sort(vstr);
for(vector::iterator?i?=?vstr.begin();?i? {
out_des?<(*i)?<“?“;
ipos?+=?(*i).length()?+?1;
}
id[ima].lend?=?ipos;
//cout?< //
}
int?Search(const?char?*str_in?Id?*id?int?cur_ma)//查找索引文件
{
//cout?< ifstream?in(“gbindex.txt“);
string?line_temp?=?str_in;//輸入串
// cout?< char*?szstr?=?new?char[id[cur_ma].lend?-?id[cur_ma].lbeg?+?1];
//cout?<“?readfile?“;
in.seekg(id[cur_ma].lbeg?ios::beg);
in.read(szstr?id[cur_ma].lend?-?id[cur_ma].lbeg);
// cout?<“id[cur_ma].lend“?< // cout?<?“id[cur_ma].lbeg“?< szstr[id[cur_ma].lend?-?id[cur_ma].lbeg]?=?0;
//cout?< char?word[40];
vector?vstr;
char*?szstr_temp?=?szstr;
// cout?<
//cout?<
//string?szstr_temp?=?szstr;
while(strlen(szstr_temp)?>?0)
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
????.CA....????135822??2007-12-22?01:37??proc\aa
????.CA....?????10616??2009-05-31?08:37??proc\Debug\BuildLog.htm
????.CA....????628736??2009-05-31?08:37??proc\Debug\maxinum-matching.exe
????.CA....???????406??2009-05-31?08:37??proc\Debug\maxinum-matching.exe.em
????.CA....???????472??2009-05-31?08:37??proc\Debug\maxinum-matching.exe.em
????.CA....???????381??2009-05-31?08:37??proc\Debug\maxinum-matching.exe.intermediate.manifest
????.CA....???1470592??2009-05-31?08:37??proc\Debug\maxinum-matching.ilk
????.CA....????594752??2009-05-31?08:37??proc\Debug\maxinum-matching.obj
????.CA....???2993152??2009-05-31?08:37??proc\Debug\maxinum-matching.pdb
????.CA....????????65??2009-05-31?08:37??proc\Debug\mt.dep
????.CA....????183296??2009-05-31?08:37??proc\Debug\vc90.idb
????.CA....????200704??2009-05-31?08:37??proc\Debug\vc90.pdb
????.CA....???2847520??2007-12-01?19:18??proc\gbciku.TXT
????.CA....????241740??2009-05-31?08:37??proc\gbindex.txt
????.CA....???????227??2009-05-31?08:22??proc\input.txt
????.CA....??????7135??2009-05-31?08:34??proc\maxinum-matching.cpp
????.CA....??????6961??2007-12-19?18:04??proc\maxinum-matching.cpp.bak
????.CA....??????3523??2007-12-22?13:35??proc\maxinum-matching.dsp
????.CA....???????540??2007-12-22?20:17??proc\maxinum-matching.dsw
????.CA....????188416??2007-12-02?18:59??proc\maxinum-matching.exe
????.CA....???2501632??2009-05-31?08:38??proc\maxinum-matching.ncb
????.CA....?????48640??2007-12-22?20:17??proc\maxinum-matching.opt
????.CA....??????3346??2007-12-22?14:05??proc\maxinum-matching.plg
????.CA....???????897??2009-05-31?08:02??proc\maxinum-matching.sln
????.CA..H.?????11264??2009-05-31?08:38??proc\maxinum-matching.suo
????.CA....??????4996??2009-05-31?08:02??proc\maxinum-matching.vcproj
????.CA....??????1409??2009-05-31?08:38??proc\maxinum-matching.vcproj.SUN-PC.SUN.user
????.CA....??????1892??2007-12-09?16:56??proc\rmm.cpp
????.CA....???????204??2007-12-07?22:17??proc\Test.cpp
????.C.D...?????????0??2009-05-31?08:37??proc\Debug
............此處省略4個文件信息
- 上一篇:vc6.0下mfc控件加載jpg格式圖片
- 下一篇:C語言寫的推箱子游戲
評論
共有 條評論