資源簡介
使用C#,xpath語法的簡單爬蟲 ,具有導出,下載execl功能

代碼片段和文件信息
using?NPOI.HSSF.UserModel;
using?NPOI.SS.UserModel;
using?System;
using?System.Collections.Generic;
using?System.Data;
using?System.IO;
using?System.Linq;
using?System.Net;
using?System.Text;
using?System.Web;
using?System.Web.UI;
using?System.Web.UI.WebControls;
public?partial?class?NewList?:?System.Web.UI.Page
{
????protected?void?Page_Load(object?sender?EventArgs?e)
????{
????}
????protected?void?Button1_Click(object?sender?EventArgs?e)
????{
????????List>?result?=?new?List>();
????????var?url?=?“http://www.cricchina.com/research/NewsList?cId=4“;
????????using?(var?ct?=?new?WebClient())
????????{
????????????ct.Encoding?=?Encoding.UTF8;
????????????var?resultHtml?=?ct.DownloadString(url);
????????????//定義請求頭部
????????????ct.Headers.Add(“User-Agent“?“Mozilla/5.0?(Windows?NT?10.0;?WOW64)?AppleWebKit/537.36?(KHTML?like?Gecko)?Chrome/55.0.2883.87?Safari/537.36“);
????????????ct.Headers.Add(“Accept“?“text/htmlapplication/xhtml+xmlapplication/xml;q=0.9image/webp*/*;q=0.8“);
????????????ct.Headers.Add(“Accept-Language“?“zh-CNzh;q=0.8“);
????????????//ct.Headers.Add(“Cookie“?analyzeCookie(resultHtml));?//解析cookie用于二次訪問獲取數據列表
????????????resultHtml?=?ct.DownloadString(url);//獲取數據列表
????????????//把html字符串轉換成htmlDocument對象,方便解析數據,
????????????var?htmlDocument?=?new?HtmlAgilityPack.HtmlDocument();
????????????htmlDocument.LoadHtml(resultHtml);
????????????//以XPath語法來解析html?http://www.w3school.com.cn/xpath/
????????????var?html?=?htmlDocument.DocumentNode;
????????????var?list?=?html.SelectNodes(“//dl[@class=‘ev_p_dl‘]“);//找新聞數據數據
????????????foreach?(var?item?in?list)
????????????{
????????????????var?dic?=?new?Dictionary();
????????????????dic.Add(“href“?item.ChildNodes[1].ChildNodes[2].Attributes[“href“].Value);
????????????????dic.Add(“title“?item.ChildNodes[1].ChildNodes[2].InnerHtml);
????????????????dic.Add(“date“?item.ChildNodes[3].InnerHtml);
????????????????result.Add(dic);
????????????}
????????????if?(result.Count?>?0)
????????????{
????????????????RenderToBrowser(RenderToExcel(toData(result))?System.DateTime.Now.Ticks?+?“.xls“);
????????????}
????????}
????}
????public?DataTable?toData(List>?list)
????{
????????DataTable?dte;
????????dte?=?new?DataTable(“mid“);
????????DataColumn?x1?=?new?DataColumn(“標題“?typeof(string));
????????DataColumn?x2?=?new?DataColumn(“地址“?typeof(string));
????????DataColumn?x3?=?new?DataColumn(“時間“?typeof(string));
????????dte.Columns.Add(x1);
????????dte.Columns.Add(x2);
????????dte.Columns.Add(x3);
????????foreach?(var?item?in?list)
????????{
????????????DataRow?dr?=?dte.NewRow();
????????????dr[“標題“]?=?item[“title“];
????????????dr[“地址“]?=?“http://www.cricchina.com/“+item[“href“];
????????????dr[“時間“]?=?item[“date“];
????????????dte.Rows.Add(dr);
????????}
????????//添加數據到DataTable
????????return?dte;
????}
????public??
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2017-09-18?17:55??CRIC\
?????目錄???????????0??2017-09-18?17:49??CRIC\App_Data\
?????目錄???????????0??2017-09-18?17:49??CRIC\App_Data\PublishProfiles\
?????文件?????????836??2017-09-18?17:55??CRIC\App_Data\PublishProfiles\CRIC.pubxm
?????文件?????????484??2017-09-18?16:09??CRIC\NewList.aspx
?????文件????????4955??2017-09-18?17:52??CRIC\NewList.aspx.cs
?????文件????????1306??2017-09-18?15:53??CRIC\Web.Debug.config
?????文件?????????387??2017-09-18?15:53??CRIC\Web.config
?????目錄???????????0??2017-09-18?17:00??CRIC\bin\
?????文件??????134656??2017-03-09?12:07??CRIC\bin\HtmlAgilityPack.dll
?????文件??????298496??2017-03-09?12:07??CRIC\bin\HtmlAgilityPack.pdb
?????文件?????1624064??2017-01-19?13:05??CRIC\bin\NPOI.dll
?????文件????????2527??2017-09-18?17:40??CRIC\website.publishproj
- 上一篇:C# 有向圖 鄰接矩陣 路徑查詢
- 下一篇:阿基米德平面螺旋天線
評論
共有 條評論