資源簡介
C#網頁爬蟲抓取國家地區省、市、區/縣三級。需要更多級稍微改動即可實現。抓取國家統計局統計用區劃和城鄉劃分代碼2018年數據。數據截止于2018年10月31日.
代碼片段和文件信息
using?System;
using?System.ComponentModel.DataAnnotations;
using?System.ComponentModel.DataAnnotations.Schema;
using?System.Net;
using?System.Text;
using?System.IO;
using?HtmlAgilityPack;
namespace?Ioc.Web.Common
{
????public?class?Address
????{
????????public?Guid?Id?{?get;?set;?}
????????public?Guid?ParentId?{?get;?set;?}
????????public?string?AreaCode?{?get;?set;?}
????????public?string?AreaName?{?get;?set;?}
????????public?int?AreaLevel?{?get;?set;?}
????????public?int?DisplayOrder?{?get;?set;?}
????????public?int?Deleted?{?get;?set;?}
????}
????public?class?GetAddress
????{
????????private?const?string?urlStr?=?“http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2018/“;
????????///?
????????///?下載頁面
????????///?
????????///?
????????///?
????????///?
????????private?static?string?DownloadHtml(string?url?Encoding?encod)
????????{
????????????string?html?=?string.Empty;
????????????try
????????????{
????????????????//設置請求參數
????????????????HttpWebRequest?request?=?HttpWebRequest.Create(url)?as?HttpWebRequest;
????????????????request.Timeout?=?10?*?1000;//10s超時
????????????????request.ContentType?=?“text/html;charset=utf-8“;
????????????????request.UserAgent?=?“Mozilla/5.0?(Windows?NT?6.1;?WOW64)?AppleWebKit/537.36?(KHTML?like?Gecko)?Chrome/50.0.2661.102?Safari/537.36“;
????????????????//獲取結果
????????????????using?(HttpWebResponse?resp?=?request.GetResponse()?as?HttpWebResponse)
????????????????{
????????????????????if?(resp.StatusCode?==?HttpStatusCode.OK)
????????????????????{
????????????????????????try
????????????????????????{
????????????????????????????StreamReader?sr?=?new?StreamReader(resp.GetResponseStream()?encod);
????????????????????????????html?=?sr.ReadToEnd();
????????????????????????????sr.Close();
????????????????????????}
????????????????????????catch
????????????????????????{?}
????????????????????}
????????????????}
????????????}
????????????catch
????????????{
????????????}
????????????return?html;
????????}
????????///?
????????///?讀取頁面的地址列表
????????///?
????????///?
????????///?
????????///?
????????///?
????????private?static?HtmlNodeCollection?GetAddressList(string?url?string?classStr?string?level?=?““)
????????{
????????????string?HtmlStr?=?DownloadHtml(url?Encoding.Default);
????????????HtmlAgilityPack.HtmlDocument?doc?=?new?HtmlAgilityPack.HtmlDocument();
????????????doc.LoadHtml(HtmlStr);
????????????string?liPath?=?“//tr[@class=‘“?+?classStr?+?“‘]/td“?+?level;
????????????HtmlNodeCollection?goodsNodeCollection?=?doc.DocumentNode.SelectNodes(liPath);
????????????return?goodsNodeCollection;
????????}
????????public?static?void?GraspAddress()
????????{
????????????var?provinceUrl?=?urlStr?+?“index.html“;
???????????
- 上一篇:C#多元線性回歸算法
- 下一篇:C#新中新DKQ-116D二次開發代碼
評論
共有 條評論