資源簡(jiǎn)介
通過curl工具寫的一個(gè)C語言版網(wǎng)頁爬蟲工具,主要在vim里方便使用!
代碼片段和文件信息
#include?
#include?
#include?
#include?
typedef?struct?_img_url_lnk?{
????char?*url;
????struct?_img_url_lnk?*pre;
????struct?_img_url_lnk?*next;
}?img_url_lnk;
static?const?char?g_filter[][32]?=?{
????“html“
????“htm“
};
static?img_url_lnk?*g_first_img_url?=?NULL;
static?img_url_lnk?*g_last_img_url?=?NULL;
static?img_url_lnk*?img_url_malloc(void)
{
????img_url_lnk?*ret?=?(img_url_lnk?*)malloc(sizeof(img_url_lnk));
????if?(!ret)
????????return?NULL;
????ret->url?=?NULL;
????ret->pre?=?NULL;
????ret->next?=?NULL;
}
static?void?img_url_free(img_url_lnk?*img)
{
????if?(!img)
????????return;
????if?(img->url)
????????free(img->url);
????img->pre?=?NULL;
????img->next?=?NULL;
}
static?int?img_url_push(img_url_lnk?*img)
{
????int?ret?=?-1;
????if?(!img)
????????return?ret;
????if?(!g_first_img_url)?{
????????g_first_img_url?=?img;
????????g_last_img_url?=?g_first_img_url;
????}?else?{
????????img->pre?=?g_last_img_url;
????????g_last_img_url->next?=?img;
????????g_last_img_url?=?img;
????}
????return?0;
}
static?img_url_lnk*?img_url_pop(void)
{
????img_url_lnk?*tmp?=?g_last_img_url;
????if?(!tmp)
????????return?tmp;
????if?(g_last_img_url?==?g_first_img_url)?{
????????g_first_img_url?=?g_last_img_url?=?NULL;
????}?else?{
????????g_last_img_url?=?g_last_img_url->pre;
????}
????return?tmp;
}
static?int?update_url_name(const?char?*url?char?*html_name)
{
????const?char?*find?=?NULL;
????if?(!url)
????????return?NULL;
????int?url_len?=?strlen(url);
????int?i?=?0;
????int?j?=?0;
????if?(!find)
????????find?=?url;
????for?(j?=?0;?j?????????if?(find[j]?==?‘/‘)?{
????????????html_name[j]?=?‘_‘;
????????????continue;
????????}
????????html_name[j]?=?find[j];
????}
????printf(“[%s]?name?=?%s\r\n“?__FUNCTION__?html_name);
????return?url_len;
}
static?int?gen_html_name(const?char?*url?char?*html_name)
{
????const?char?*find?=?NULL;
????if?(!url)
????????return?NULL;
????int?url_len?=?strlen(url);
????int?i?=?0;
????int?j?=?0;
????for?(i=url_len-1;?i>=0;?i--?)?{
????????if?(url[i]?==?‘/‘?||?url[i]?==?‘?‘)?{
????????????find?=?url+i+1;
????????????if?((url_len?=?strlen(find))?>?0)?{
????????????????break;
????????????}
????????}?else?if?(url[i]?==?‘%‘)?{
????????????find?=?url+i+3;
????????????if?((url_len?=?strlen(find))?>?0)?{
????????????????break;
????????????}
????????}?else?if?(url_len-i>16)?{
????????????find?=?url+i+1;
????????????url_len?=?strlen(find);
????????????break;
????????}
????}
????if?(!find)
????????find?=?url;
????for?(j?=?0;?j?????????if?(find[j]?==?‘/‘)?{
????????????html_name[j]?=?‘_‘;
????????????continue;
????????}
????????html_name[j]?=?find[j];
????}
????printf(“[%s]?name?=?%s\r\n“?__FUNCTION__?html_name);
????return?url_len;
}
static?int?get_html_inner_urls_ex(char?*buf)
{
????int?ret?=?-1;
????int?buf_len?=?0;
????int?push_flag?=?0;
????char?url[512]?=?{?‘\0‘?};
????img_url_lnk?*tmp?=?NULL;
????int?i?=?0?j?=?0;
????if?(!buf)
????????ret
- 上一篇:Vc++流媒體播放器源碼
- 下一篇:GDI+SDK參考文檔
評(píng)論
共有 條評(píng)論