91av视频/亚洲h视频/操亚洲美女/外国一级黄色毛片 - 国产三级三级三级三级

  • 大小: 78KB
    文件類型: .rar
    金幣: 2
    下載: 1 次
    發布日期: 2021-07-29
  • 語言: C/C++
  • 標簽: C++??網絡爬蟲??

資源簡介

一個基于C++的網絡爬蟲程序,實現了最基本的功能,適合入門級的源碼分析

資源截圖

代碼片段和文件信息

/*?HTMLParser.cpp
*?Author?SL.
*?Date:??2005/03/01.
*?Abstract:??HTML分析器
*?*/

#include?“StdAfx.h“
#include?“HTMLParser.h“
#define?__ENABLE_POCO_LIB__
#ifdef???__ENABLE_POCO_LIB__
#include??
#include??

#endif?

#define?A_TEXT_LEN?10

static?StateDescr????????automaton[MaxState];

void?convertToLowerCase?(char?*string)
{
int?i;

if?(!string)
return;

for?(i?=?0;?string[i]?!=?EOS;?i++)
{
string[i]?=?tolower?(string[i]);
}
}

int?strncasecmp(?char?*string1?char?*string2)
{
convertToLowerCase(string1);
convertToLowerCase(string2);
return?strcmp(string1string2);
}

char?isGBFullChar(char*?str)
{
unsigned?int?iChar;

iChar?=?GBToInt(str[0]?str[1]);

if?(Between(iChar?GB_FULL_START?GB_FULL_END))
return?(char)(iChar?-?GB_FULL_base);

switch?(iChar)?
{
case?0xA1A1:
return?‘?‘;
default:
return?0;
}
}
void?dealspecialchar(char*?srcchar?ch)
{
if(src==NULL)
return;
if(strlen(src)==0)
return;
char?*data*dest;
data=dest=src;?????????
while?(*dest?)?
{???????????????
if?(*dest?==ch)
{
dest?+=?1;????????????????????????
}?
else?
{

*data++?=?*dest++;

}
}
*data?=?‘\0‘;


}

void?changeAngle(char*?src)
{
char?ch*data*dest;
data=dest=src;
while?(*dest)?
{
if?((ch?=?isGBFullChar(dest))!=0)
{
dest?+=?2;
*data++?=?ch;
}
else?if?((*dest?&?0x80)?==?0)?
{
*data++?=?*dest++;
}?
else?
{
*data++?=?*dest++;
if?(*dest)
{
*data++?=?*dest++;
}
}
}
*data?=?‘\0‘;

}

string?compressSpace(char?*str)
{
char?*p;
int?ijlen;
string?temp;
if(str==NULL)
{
temp=““;
return?temp;
}
if(strcmp(str“\r\n“)==0?||strcmp(str“\n\r“)==0?||?strcmp(str“\1\1“)==0)
{
temp=““;
return?temp;
}

len=strlen(str);
p=str;
for(i=0;i {
if(str[i]==‘?‘?||?str[i]==‘\t‘?||?str[i]==‘\r‘?||?str[i]==‘\n‘?||?str[i]==‘\1‘)
{
;
}
else
{
p=&str[i];
break;
}

}
for(j=len-1;j>=0;j--)
{
if(str[j]==‘?‘?||?str[j]==‘\r‘?||?str[j]==‘\n‘?||?str[j]==‘\t‘?||?str[j]==‘\1‘)
{
str[j]=‘\0‘;
}
else
{
break;
}
}
if(*p==‘\0‘)
temp=““;
else
temp=p;

return?temp;

}

static?ElemMapping????XHTMLElemMappingTable[]?={
//?This?table?MUST?be?in?alphabetical?order?and?in?lower?case?
{“a“?SPACE?HTML_EL_Anchor?L_BasicValue?true}?
{“abbr“?SPACE?HTML_EL_ABBR?L_BasicValue?true}?
{“acronym“?SPACE?HTML_EL_ACRONYM?L_BasicValue?true}
{“address“?SPACE?HTML_EL_Address?L_BasicValue?false}?
{“applet“?SPACE?HTML_EL_applet?L_TransitionalValue?true}
{“area“?‘E‘?HTML_EL_AREA?L_StrictValue?false}?
{“b“?SPACE?HTML_EL_Bold_text?L_StrictValue?true}
{“base“?‘E‘?HTML_EL_base?L_BasicValue?false}
{“basefont“?‘E‘?HTML_EL_baseFont?L_TransitionalValue?false}
{“bdo“?SPACE?HTML_EL_BDO?L_StrictValue?true}
{“big“?SPACE?HTML_EL_Big_text?L_StrictValue?true}
{“blockquote“?SPACE?HTML_EL_Block_Quote?L_BasicValue?false}
{“body“?SPACE?HTML_EL_BODY?L_BasicValue?fa

?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----

?????文件??????????0??2008-05-29?22:50??spider_new\spider_new\spider\error.txt

?????文件??????59753??2008-05-16?13:47??spider_new\spider_new\spider\HTMLParser.cpp

?????文件??????15354??2008-04-25?16:57??spider_new\spider_new\spider\HTMLParser.h

?????文件???????9088??2008-05-16?13:47??spider_new\spider_new\spider\HTMLTree.cpp

?????文件???????2838??2008-05-16?13:47??spider_new\spider_new\spider\HTMLTree.h

?????文件???????1965??2007-09-14?14:10??spider_new\spider_new\spider\hz.h

?????文件??????????0??2008-05-19?14:46??spider_new\spider_new\spider\MVC116.tmp

?????文件??????????0??2008-05-16?15:28??spider_new\spider_new\spider\MVC15E.tmp

?????文件??????????0??2008-05-18?16:11??spider_new\spider_new\spider\MVCD0.tmp

?????文件????????394??2008-05-16?19:58??spider_new\spider_new\spider\New?Text?Document.txt

?????文件???????3579??2008-05-16?12:51??spider_new\spider_new\spider\ReadMe.txt

?????文件???????1078??2008-05-16?12:51??spider_new\spider_new\spider\res\spider.ico

?????文件????????398??2008-05-16?12:51??spider_new\spider_new\spider\res\spider.rc2

?????文件????????914??2008-05-19?14:36??spider_new\spider_new\spider\resource.h

?????文件??????36168??2008-05-29?22:48??spider_new\spider_new\spider\spider.aps

?????文件???????1984??2008-05-30?23:15??spider_new\spider_new\spider\spider.clw

?????文件???????2063??2008-05-16?12:51??spider_new\spider_new\spider\spider.cpp

?????文件???????4591??2008-05-16?15:18??spider_new\spider_new\spider\spider.dsp

?????文件????????518??2008-05-16?12:51??spider_new\spider_new\spider\spider.dsw

?????文件???????1324??2008-05-16?12:51??spider_new\spider_new\spider\spider.h

?????文件?????197632??2008-05-30?23:15??spider_new\spider_new\spider\spider.ncb

?????文件??????57856??2008-05-30?23:15??spider_new\spider_new\spider\spider.opt

?????文件???????2121??2008-05-30?23:15??spider_new\spider_new\spider\spider.plg

?????文件???????5525??2008-05-29?22:48??spider_new\spider_new\spider\spider.rc

?????文件??????12409??2008-05-30?23:13??spider_new\spider_new\spider\spiderDlg.cpp

?????文件???????2073??2008-05-19?15:02??spider_new\spider_new\spider\spiderDlg.h

?????文件????????208??2008-05-16?12:51??spider_new\spider_new\spider\StdAfx.cpp

?????文件???????1168??2008-05-19?14:45??spider_new\spider_new\spider\StdAfx.h

?????文件???????1671??2008-05-18?18:05??spider_new\spider_new\spider\Url.cpp

?????文件????????880??2008-05-18?14:43??spider_new\spider_new\spider\Url.h

............此處省略9個文件信息

評論

共有 條評論