91av视频/亚洲h视频/操亚洲美女/外国一级黄色毛片 - 国产三级三级三级三级

  • 大小: 662KB
    文件類型: .zip
    金幣: 2
    下載: 0 次
    發布日期: 2021-06-14
  • 語言: C#
  • 標簽: 爬蟲,C#??

資源簡介

使用C#,xpath語法的簡單爬蟲 ,具有導出,下載execl功能

資源截圖

代碼片段和文件信息

using?NPOI.HSSF.UserModel;
using?NPOI.SS.UserModel;
using?System;
using?System.Collections.Generic;
using?System.Data;
using?System.IO;
using?System.Linq;
using?System.Net;
using?System.Text;
using?System.Web;
using?System.Web.UI;
using?System.Web.UI.WebControls;

public?partial?class?NewList?:?System.Web.UI.Page
{
????protected?void?Page_Load(object?sender?EventArgs?e)
????{

????}
????protected?void?Button1_Click(object?sender?EventArgs?e)
????{

????????List>?result?=?new?List>();
????????var?url?=?“http://www.cricchina.com/research/NewsList?cId=4“;
????????using?(var?ct?=?new?WebClient())
????????{
????????????ct.Encoding?=?Encoding.UTF8;
????????????var?resultHtml?=?ct.DownloadString(url);
????????????//定義請求頭部
????????????ct.Headers.Add(“User-Agent“?“Mozilla/5.0?(Windows?NT?10.0;?WOW64)?AppleWebKit/537.36?(KHTML?like?Gecko)?Chrome/55.0.2883.87?Safari/537.36“);
????????????ct.Headers.Add(“Accept“?“text/htmlapplication/xhtml+xmlapplication/xml;q=0.9image/webp*/*;q=0.8“);
????????????ct.Headers.Add(“Accept-Language“?“zh-CNzh;q=0.8“);
????????????//ct.Headers.Add(“Cookie“?analyzeCookie(resultHtml));?//解析cookie用于二次訪問獲取數據列表
????????????resultHtml?=?ct.DownloadString(url);//獲取數據列表

????????????//把html字符串轉換成htmlDocument對象,方便解析數據,
????????????var?htmlDocument?=?new?HtmlAgilityPack.HtmlDocument();
????????????htmlDocument.LoadHtml(resultHtml);
????????????//以XPath語法來解析html?http://www.w3school.com.cn/xpath/
????????????var?html?=?htmlDocument.DocumentNode;
????????????var?list?=?html.SelectNodes(“//dl[@class=‘ev_p_dl‘]“);//找新聞數據數據
????????????foreach?(var?item?in?list)
????????????{
????????????????var?dic?=?new?Dictionary();
????????????????dic.Add(“href“?item.ChildNodes[1].ChildNodes[2].Attributes[“href“].Value);
????????????????dic.Add(“title“?item.ChildNodes[1].ChildNodes[2].InnerHtml);
????????????????dic.Add(“date“?item.ChildNodes[3].InnerHtml);
????????????????result.Add(dic);
????????????}
????????????if?(result.Count?>?0)
????????????{
????????????????RenderToBrowser(RenderToExcel(toData(result))?System.DateTime.Now.Ticks?+?“.xls“);
????????????}
????????}
????}
????public?DataTable?toData(List>?list)
????{
????????DataTable?dte;
????????dte?=?new?DataTable(“mid“);
????????DataColumn?x1?=?new?DataColumn(“標題“?typeof(string));
????????DataColumn?x2?=?new?DataColumn(“地址“?typeof(string));
????????DataColumn?x3?=?new?DataColumn(“時間“?typeof(string));
????????dte.Columns.Add(x1);
????????dte.Columns.Add(x2);
????????dte.Columns.Add(x3);
????????foreach?(var?item?in?list)
????????{
????????????DataRow?dr?=?dte.NewRow();
????????????dr[“標題“]?=?item[“title“];
????????????dr[“地址“]?=?“http://www.cricchina.com/“+item[“href“];
????????????dr[“時間“]?=?item[“date“];
????????????dte.Rows.Add(dr);
????????}
????????//添加數據到DataTable
????????return?dte;

????}

????public??

?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2017-09-18?17:55??CRIC\
?????目錄???????????0??2017-09-18?17:49??CRIC\App_Data\
?????目錄???????????0??2017-09-18?17:49??CRIC\App_Data\PublishProfiles\
?????文件?????????836??2017-09-18?17:55??CRIC\App_Data\PublishProfiles\CRIC.pubxml
?????文件?????????484??2017-09-18?16:09??CRIC\NewList.aspx
?????文件????????4955??2017-09-18?17:52??CRIC\NewList.aspx.cs
?????文件????????1306??2017-09-18?15:53??CRIC\Web.Debug.config
?????文件?????????387??2017-09-18?15:53??CRIC\Web.config
?????目錄???????????0??2017-09-18?17:00??CRIC\bin\
?????文件??????134656??2017-03-09?12:07??CRIC\bin\HtmlAgilityPack.dll
?????文件??????298496??2017-03-09?12:07??CRIC\bin\HtmlAgilityPack.pdb
?????文件?????1624064??2017-01-19?13:05??CRIC\bin\NPOI.dll
?????文件????????2527??2017-09-18?17:40??CRIC\website.publishproj

評論

共有 條評論

相關資源