資源簡介
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using HtmlAgilityPack;
using System.Net;
namespace PacksModels
{
public class HtmlAgilityPackHelper
{
public static string getHtml(string url, string charSet)
{
string html = QueryHtml(url, charSet);
while (html == "isExp"||html==null)
{
html = QueryHtml(url, charSet);
}
return html;
}
//獲取網頁源碼
public static string QueryHtml(string url, string charSet)
{
bool isExp = false;
Byte[] pageData = null;
XWebClient wc = new XWebClient();
try
{
if (url == null || url.Trim() == "")
return null;
//XWebClient wc = new XWebClient();
wc.Credentials = CredentialCache.DefaultCredentials;
wc.Headers["User-Agent"] = "blah";
//Helpers.WriteLog("下載html資源開始:" url, "Log\\error.log");
pageData = wc.DownloadData(url);
//Helpers.WriteLog("下載html資源結束:" url, "Log\\error.log");
}
catch (WebException ex)
{
isExp = true;
if (ex.ToString().Contains("未能解析此遠程名稱"))
{
Helpers.WriteLog("未能解析此遠程名稱,請檢查網絡,正在重試下載此資源...:" DateTime.Now.ToString() ":" url, "Log\\error.log");
}
else if (ex.ToString().Contains("操作超時") || ex.ToString().Contains("操作已超時"))
{
Helpers.WriteLog("操作超時,請檢查資源請求頻率,正在重試下載此資源...:" DateTime.Now.ToString() ":" url, "Log\\error.log");
}
else
{
Helpers.WriteLog("發送請求期間異常,請檢查網絡:" DateTime.Now.ToString() ":" ex.ToString(), "Log\\error.log");
}
//釋放資源
wc.Dispose();
Helpers.WriteLog("釋放資源等1分鐘重試:" DateTime.Now.ToString() ":" url, "Log\\error.log");//
System.Threading.Thread.Sleep(60000); //延時30秒
Helpers.WriteLog("開始重試:" DateTime.Now.ToString() ":" url, "Log\\error.log");//
}
if (pageData == null)
{
return null;
}
else if (isExp)
{
return "isExp";
}
string charset = "";
var r_utf8 = new System.IO.StreamReader(new System.IO.MemoryStream(pageData), Encoding.UTF8); //將html放到utf8編碼的StreamReader內
var r_gbk = new System.IO.StreamReader(new System.IO.MemoryStream(pageData), Encoding.Default); //將html放到gbk編碼的StreamReader內
var t_utf8 = r_utf8.ReadToEnd(); //讀出html內容
var t_gbk = r_gbk.ReadToEnd(); //讀出html內容
bool aa = isLuan(t_utf8);
bool bb = isLuan(t_gbk);
bool aa1 = isLuan1(t_utf8);
bool bb1 = isLuan1(t_gbk);
if (!isLuan(t_utf8)) //判斷utf8是否有亂碼
{
charset = "utf-8";
}
else
{
charset = "gbk";
}
//System.Threading.Thread.Sleep(60000); //延時1分鐘
return Encoding.GetEncoding(charset).GetString(pageData);
}
/// <summary>
/// 判斷是否有亂碼
/// </summary>
/// <param name="txt">需判斷的文本</param>
/// <returns></returns>
private static bool isLuan(string txt)
{
var bytes = Encoding.UTF8.GetBytes(txt);
//239 191 189
for (var i = 0; i < bytes.Length; i )
{
if (i < bytes.Length - 3)
if (bytes[i] == 239 && bytes[i 1] == 191 && bytes[i 2] == 189)
{
return true;
}
}
return false;
}
private static bool isLuan1 (string txt)
{
var bytes = Encoding.ASCII.GetBytes(txt);
//239 191 189
for (var i = 0; i < bytes.Length; i )
{
//if (i < bytes.Length - 3)
// if (bytes[i] == 239 && bytes[i 1] == 191 && bytes[i 2] == 189)
// {
// return true;
// }
if (bytes[i]>255)
{
return true;
}
}
return false;
}
/**///// <summary>
/// 判斷句子中是否含有中文
/// </summary>
/// <param >字符串</param>
private static bool WordsIScn(string words)
{
string TmmP;
for (int i = 0; i < words.Length; i )
{
TmmP = words.Substring(i, 1);
byte[] sarr = System.Text.Encoding.GetEncoding("gb2312").GetBytes(TmmP);
if (sarr.Length == 2)
{
return true;
}
}
return false;
}
/// <summary>
/// 獲得html代碼塊的節點集合
/// </summary>
/// <param name="url"></param>
/// <param name="xpath"></param>
/// <returns></returns>
public static HtmlNodeCollection GetHtmlNodes(string url, string xpath)
{
HtmlNodeCollection navNodes = null;
try
{
//獲取html源碼
string htmlStr = getHtml(url.Trim(), "");
//實例化HtmlAgilityPack.HtmlDocument對象
HtmlDocument doc = new HtmlDocument();
//載入HTML
doc.LoadHtml(htmlStr);
//根據Xpath節點NODE的ID獲取節點集
navNodes = doc.DocumentNode.SelectNodes(xpath);
}
catch (Exception ex)
{
Helpers.WriteLog("獲取節點集異常:" ex.ToString() ":" url, "Log\\error.log");
}
return navNodes;
}
/// <summary>
/// 獲得html代碼的節點
/// </summary>
/// <param name="url"></param>
/// <param name="xpath"></param>
/// <returns></returns>
public static HtmlNode GetNode(HtmlDocument doc, string xpath)
{
//根據節點
HtmlNode navNode = null;
try
{
navNode = doc.DocumentNode.SelectSingleNode(xpath);
}
catch (Exception ex)
{
Helpers.WriteLog("獲取單節點異常:" ex.ToString(), "Log\\error.log");
}
return navNode;
}
}
}
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using HtmlAgilityPack;
using System.Net;
namespace PacksModels
{
public class HtmlAgilityPackHelper
{
public static string getHtml(string url, string charSet)
{
string html = QueryHtml(url, charSet);
while (html == "isExp"||html==null)
{
html = QueryHtml(url, charSet);
}
return html;
}
//獲取網頁源碼
public static string QueryHtml(string url, string charSet)
{
bool isExp = false;
Byte[] pageData = null;
XWebClient wc = new XWebClient();
try
{
if (url == null || url.Trim() == "")
return null;
//XWebClient wc = new XWebClient();
wc.Credentials = CredentialCache.DefaultCredentials;
wc.Headers["User-Agent"] = "blah";
//Helpers.WriteLog("下載html資源開始:" url, "Log\\error.log");
pageData = wc.DownloadData(url);
//Helpers.WriteLog("下載html資源結束:" url, "Log\\error.log");
}
catch (WebException ex)
{
isExp = true;
if (ex.ToString().Contains("未能解析此遠程名稱"))
{
Helpers.WriteLog("未能解析此遠程名稱,請檢查網絡,正在重試下載此資源...:" DateTime.Now.ToString() ":" url, "Log\\error.log");
}
else if (ex.ToString().Contains("操作超時") || ex.ToString().Contains("操作已超時"))
{
Helpers.WriteLog("操作超時,請檢查資源請求頻率,正在重試下載此資源...:" DateTime.Now.ToString() ":" url, "Log\\error.log");
}
else
{
Helpers.WriteLog("發送請求期間異常,請檢查網絡:" DateTime.Now.ToString() ":" ex.ToString(), "Log\\error.log");
}
//釋放資源
wc.Dispose();
Helpers.WriteLog("釋放資源等1分鐘重試:" DateTime.Now.ToString() ":" url, "Log\\error.log");//
System.Threading.Thread.Sleep(60000); //延時30秒
Helpers.WriteLog("開始重試:" DateTime.Now.ToString() ":" url, "Log\\error.log");//
}
if (pageData == null)
{
return null;
}
else if (isExp)
{
return "isExp";
}
string charset = "";
var r_utf8 = new System.IO.StreamReader(new System.IO.MemoryStream(pageData), Encoding.UTF8); //將html放到utf8編碼的StreamReader內
var r_gbk = new System.IO.StreamReader(new System.IO.MemoryStream(pageData), Encoding.Default); //將html放到gbk編碼的StreamReader內
var t_utf8 = r_utf8.ReadToEnd(); //讀出html內容
var t_gbk = r_gbk.ReadToEnd(); //讀出html內容
bool aa = isLuan(t_utf8);
bool bb = isLuan(t_gbk);
bool aa1 = isLuan1(t_utf8);
bool bb1 = isLuan1(t_gbk);
if (!isLuan(t_utf8)) //判斷utf8是否有亂碼
{
charset = "utf-8";
}
else
{
charset = "gbk";
}
//System.Threading.Thread.Sleep(60000); //延時1分鐘
return Encoding.GetEncoding(charset).GetString(pageData);
}
/// <summary>
/// 判斷是否有亂碼
/// </summary>
/// <param name="txt">需判斷的文本</param>
/// <returns></returns>
private static bool isLuan(string txt)
{
var bytes = Encoding.UTF8.GetBytes(txt);
//239 191 189
for (var i = 0; i < bytes.Length; i )
{
if (i < bytes.Length - 3)
if (bytes[i] == 239 && bytes[i 1] == 191 && bytes[i 2] == 189)
{
return true;
}
}
return false;
}
private static bool isLuan1 (string txt)
{
var bytes = Encoding.ASCII.GetBytes(txt);
//239 191 189
for (var i = 0; i < bytes.Length; i )
{
//if (i < bytes.Length - 3)
// if (bytes[i] == 239 && bytes[i 1] == 191 && bytes[i 2] == 189)
// {
// return true;
// }
if (bytes[i]>255)
{
return true;
}
}
return false;
}
/**///// <summary>
/// 判斷句子中是否含有中文
/// </summary>
/// <param >字符串</param>
private static bool WordsIScn(string words)
{
string TmmP;
for (int i = 0; i < words.Length; i )
{
TmmP = words.Substring(i, 1);
byte[] sarr = System.Text.Encoding.GetEncoding("gb2312").GetBytes(TmmP);
if (sarr.Length == 2)
{
return true;
}
}
return false;
}
/// <summary>
/// 獲得html代碼塊的節點集合
/// </summary>
/// <param name="url"></param>
/// <param name="xpath"></param>
/// <returns></returns>
public static HtmlNodeCollection GetHtmlNodes(string url, string xpath)
{
HtmlNodeCollection navNodes = null;
try
{
//獲取html源碼
string htmlStr = getHtml(url.Trim(), "");
//實例化HtmlAgilityPack.HtmlDocument對象
HtmlDocument doc = new HtmlDocument();
//載入HTML
doc.LoadHtml(htmlStr);
//根據Xpath節點NODE的ID獲取節點集
navNodes = doc.DocumentNode.SelectNodes(xpath);
}
catch (Exception ex)
{
Helpers.WriteLog("獲取節點集異常:" ex.ToString() ":" url, "Log\\error.log");
}
return navNodes;
}
/// <summary>
/// 獲得html代碼的節點
/// </summary>
/// <param name="url"></param>
/// <param name="xpath"></param>
/// <returns></returns>
public static HtmlNode GetNode(HtmlDocument doc, string xpath)
{
//根據節點
HtmlNode navNode = null;
try
{
navNode = doc.DocumentNode.SelectSingleNode(xpath);
}
catch (Exception ex)
{
Helpers.WriteLog("獲取單節點異常:" ex.ToString(), "Log\\error.log");
}
return navNode;
}
}
}
代碼片段和文件信息
using?System;
using?System.Collections.Generic;
using?System.Linq;
using?System.Text;
using?System.Threading.Tasks;
using?HtmlAgilityPack;
using?System.Net;
namespace?PacksModels
{
????public?class?HtmlAgilityPackHelper
????{
????????public?static?string?getHtml(string?url?string?charSet)
????????{
????????????string?html?=?QueryHtml(url?charSet);
????????????while?(html?==?“isExp“||html==null)
????????????{
????????????????html?=?QueryHtml(url?charSet);
????????????}
????????????return?html;
????????}
????????//獲取網頁源碼
????????public?static?string?QueryHtml(string?url?string?charSet)
????????{
????????????bool?isExp?=?false;
????????????Byte[]?pageData?=?null;
????????????XWebClient?wc?=?new?XWebClient();
????????????try
????????????{
???????????????
評論
共有 條評論
相關資源
- asp.net 網頁靜態化組件(shipingx-Stati
- c# 同時將圖片和文字復制到剪貼版 (
- HTML壓縮
- Word文件轉Html文檔目錄
- html5 繪圖以及 動畫
- bbsharp bbcode 轉html 寫的很簡潔
- MVC通過過濾器 實現輸出前對html修改(
- MVC輸出前對html修改
- 帶Html編輯器CSkin.dll版本
- NHtmlFilter1.0過濾Html危險腳本 防止XSS攻
- Html文件上傳控件(整理前臺使用版)
- html5簡單進度條效(progressbar)
- QQ聊天記錄Mht轉Html格式(附工具源碼
- WinformHTMLEditor winform 富文本編輯器
- 純HTML彈出必填信息(popHint)
- C#讀取HTML文件并插入到數據庫
- C#實現WebSocket源碼c#寫的服務端html寫的
- Web網頁控制攝像頭
- HtmlAgilityPack 1.11.2最新版本
- Winista.Htmlparser.Net 源碼 +Demo
- c# winform html編輯器
- Winform中顯示HTML富文本編輯器
- c#用webkit內核支持html5
- ASP.NET實現網頁快照/網頁截圖(將ht
- HTML5 WebSocket 構建實時 Web 應用
- html5 實時推送消息到客戶端(SSE/Eve
- C# Word檔轉Html檔範例
- C# 剪貼板功能 同時黏貼圖片和文字等
- 修改 webbrowser 支持IE8/IE9/IE10/HTML5 (
- 手表電商首頁、注冊頁html模板