資源簡介
包含省市區街道4級數據,sql數據表,還有一個用c#寫的抓取國家統計局最新數據的程序,可以擴展成5級!

代碼片段和文件信息
using?System;
using?System.Collections.Generic;
using?System.IO;
using?System.Net;
using?System.Text;
using?System.Text.Regularexpressions;
using?System.Web;
///?
///?????網頁采集輔助類
///?
public?static?class?Collectionhelper
{
????///?
????///?????取得字符里的Dom元素?不包含元素屬性
????///?
????///?
????///?
????///?
????public?static?List?GetDomElem(string?source?string?domElem)
????{
????????var?matchList?=?new?List();
????????string?regStr?=?string.Format(“<{0}[^>]*?>[\\s\\S]+?<\\/{0}>“?domElem);
????????try
????????{
????????????var?regex?=?new?Regex(regStr?RegexOptions.Compiled?|?RegexOptions.IgnoreCase);
????????????MatchCollection?matches?=?regex.Matches(source);
????????????foreach?(Match?match?in?matches)
????????????{
????????????????matchList.Add(match.Value);
????????????}
????????}
????????catch?(Exception?ex)
????????{
????????????matchList.Add(ex.Message);
????????}
????????return?matchList;
????}
????///?
????///?????取得字符里的Dom元素?包含元素屬性?如:class=“aa“
????///?
????///?
????///?
????///?
????///?
????public?static?List?GetDomElemByAttr(string?source?string?tagName?string?tagValue)
????{
????????var?matchList?=?new?List();
????????string?regStr?=
????????????string.Format(
????????????????@“<(?[\w]+)[^>]*\s{0}[\s]*?=[\s]*?(?[““‘]?){1}(?(Quote)\k)[““‘]?[^>]*>((?<\k[^>]*>)|\k>(?<-Nested>)|[\s\S]*?)*\k>“
????????????????tagName.ToLower()?tagValue);
????????try
????????{
????????????var?regex?=?new?Regex(regStr?RegexOptions.Compiled?|?RegexOptions.IgnoreCase);
????????????var?matches?=?regex.Matches(source);
????????????foreach?(Match?match?in?matches)
????????????{
????????????????matchList.Add(match.Value);
????????????}
????????}
????????catch?(Exception?ex)
????????{
????????????matchList.Add(ex.Message);
????????}
????????return?matchList;
????}
????///?
????///?????取得字符里的A元素鍵值對??[name,url]
????///?
????///?
????///?
????public?static?Dictionary?GetDomElem_A(string?source)
????{
????????var?matchList?=?new?Dictionary();
????????const?string?pattern?=?“]*??href=[\“‘](?[^\“‘]*?)[\“‘][^>]*?>(?[\\w\\W]*?) “;
????????try
????????{
????????????var?regex?=?new?Regex(pattern?RegexOptions.Compiled?|?RegexOptions.IgnoreCase);
????????????MatchCollection?matches?=?regex.Matches(source);
????????????foreach?(Match?match?in?matches)
????????????{
????????????????string?key?=?RemoveHtml(match.Value);
????????????????if?(!matchList.ContainsKey(key))
????????????????{
????????????????????matchList.Add(key?GetUrlArray(matc
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????文件???????9891??2016-06-29?17:44??省市區街道\Collection
?????文件????????460??2018-11-06?16:50??省市區街道\data\Default.aspx
?????文件??????10026??2018-11-08?14:54??省市區街道\data\Default.aspx.cs
?????文件????????535??2018-11-08?10:16??省市區街道\Whir_Cmn_Area.cs
?????文件???32440950??2018-11-08?14:57??省市區街道\截止2017年10月31日.sql
?????目錄??????????0??2018-11-08?15:05??省市區街道\data
?????目錄??????????0??2018-11-08?15:05??省市區街道
-----------?---------??----------?-----??----
?????????????32461862????????????????????7
評論
共有 條評論