資源簡(jiǎn)介
c# 網(wǎng)頁(yè)抓取分析 表格圖形生成 數(shù)據(jù)挖掘

代碼片段和文件信息
using?System;
using?System.Collections.Generic;
using?System.ComponentModel;
using?System.Data;
using?System.Drawing;
using?System.Text;
using?System.Windows.Forms;
using?System.Text.Regularexpressions;
using?System.Collections.Specialized;
namespace?WebCount
{
????public?partial?class?mainForm?:?Form
????{
????????int[]?ipage;//記錄已經(jīng)處理的頁(yè)面
????????NameValueCollection?nvc;?//解析Url用
????????public?mainForm()
????????{
????????????ipage?=?new?int[10];
????????????//webBrowser1?=?new?System.Windows.Forms.WebBrowser();
????????????//this.webBrowser1.Location?=?new?System.Drawing.Point(0?40);
????????????//this.webBrowser1.Name?=?“webBrowserShow“;
????????????//Controls.Add(this.webBrowser1);
????????????InitializeComponent();
????????????//this.webBrowser1.Size?=?new?Size(this.Size.Width?200);
????????}
????????private?void?button1_Click(object?sender?EventArgs?e)
????????{
????????????webBrowser1.Navigate(tbUrl.Text“_self“);
????????}
????????private?void?mainForm_SizeChanged(object?sender?EventArgs?e)
????????{
????????????//this.webBrowser1.Size?=?new?Size(this.Size.Width?200);
????????}
????????private?void?mainForm_Load(object?sender?EventArgs?e)
????????{
????????}
????????private?void?webBrowser1_DocumentCompleted(object?sender?WebBrowserDocumentCompletedEventArgs?e)
????????{
????????????if?(webBrowser1.Document.Url?!=?e.Url?&&?e.Url.AbsoluteUri?!=?“about:blank“)?return;
????????????string?strUri?=?webBrowser1.Url.AbsoluteUri;
????????????string?baseUri;
????????????ParseUrl(strUriout?baseUriout?nvc);
????????????if?(nvc.Count!=0&&int.Parse(nvc.Get(“pageNO“))?>?10)?//十頁(yè)以后不再記錄
????????????????return;
????????????ParsePage();?//解析當(dāng)前頁(yè)面,向下一頁(yè)面跳轉(zhuǎn)
????????}
????????private?void?ParsePage()
????????{
????????????System.Windows.Forms.HtmlDocument?document?=?this.webBrowser1.Document;
????????????HtmlElement?btn?=?null;
????????????int?i=0;
????????????string?bbstitle;
????????????string?bbsurl;
????????????string?bbsauthor;
????????????int?bbsview;
????????????int?bbsreply;
????????????string?bbsuptime;
????????????foreach?(HtmlElement?htmelt?in?document.GetElementsByTagName(“table“))
????????????{
????????????????if?(i?>?14)
????????????????{
????????????????????HtmlElement?htmtr?=?htmelt.GetElementsByTagName(“tr“)[0];
????????????????????if?(htmtr.GetAttribute(“bgcolor“)?==?“#6699cc“)?
????????????????????????????break;
????????????????????int?j?=?0;
????????????????????foreach?(HtmlElement?htmtd?in?htmelt.GetElementsByTagName(“td“))
????????????????????{
????????????????????????switch?(j)
????????????????????????{?
????????????????????????????case?0:
????????????????????????????????break;
????????????????????????????case?1:
????????????????????????????????HtmlElement?htmtltle?=?htmtd.GetElementsByTagName(“a“)[0];
????????????????????????????????bbstitle?=?htmtltle.InnerText;
????????????????????????????????bbsurl?=?htmtltle.GetAttribute(“href“);
???????
?屬性????????????大小?????日期????時(shí)間???名稱
-----------?---------??----------?-----??----
?????文件????????914??2008-06-18?09:12??數(shù)據(jù)抓取程序\WebCount.sln
????..A..H.?????38400??2010-12-23?01:26??數(shù)據(jù)抓取程序\WebCount.suo
?????文件????????913??2008-06-12?21:57??數(shù)據(jù)抓取程序\Backup\WebCount.sln
?????文件??????17408??2008-06-13?21:22??數(shù)據(jù)抓取程序\Backup\WebCount.suo
?????文件????????427??2008-06-13?21:20??數(shù)據(jù)抓取程序\Backup\WebCount\app.config
?????文件?????147456??2008-06-14?00:23??數(shù)據(jù)抓取程序\Backup\WebCount\bbstopic.mdb
?????文件???????5469??2008-06-13?21:19??數(shù)據(jù)抓取程序\Backup\WebCount\Form1.cs
?????文件???????4984??2008-06-13?21:19??數(shù)據(jù)抓取程序\Backup\WebCount\Form1.Designer.cs
?????文件???????5814??2008-06-13?21:19??數(shù)據(jù)抓取程序\Backup\WebCount\Form1.resx
?????文件????????470??2008-06-12?21:58??數(shù)據(jù)抓取程序\Backup\WebCount\Program.cs
?????文件???????3404??2008-06-13?21:22??數(shù)據(jù)抓取程序\Backup\WebCount\WebCount.csproj
?????文件???????1188??2008-06-12?21:57??數(shù)據(jù)抓取程序\Backup\WebCount\Properties\AssemblyInfo.cs
?????文件???????2870??2008-06-12?21:57??數(shù)據(jù)抓取程序\Backup\WebCount\Properties\Resources.Designer.cs
?????文件???????5612??2008-06-12?21:57??數(shù)據(jù)抓取程序\Backup\WebCount\Properties\Resources.resx
?????文件???????1742??2008-06-13?21:20??數(shù)據(jù)抓取程序\Backup\WebCount\Properties\Settings.Designer.cs
?????文件???????1041??2008-06-13?21:20??數(shù)據(jù)抓取程序\Backup\WebCount\Properties\Settings.settings
?????文件????????427??2008-06-13?21:20??數(shù)據(jù)抓取程序\WebCount\app.config
?????文件????1974272??2008-09-08?16:33??數(shù)據(jù)抓取程序\WebCount\bbstopic.mdb
?????文件????????508??2008-06-16?21:31??數(shù)據(jù)抓取程序\WebCount\CMD5.cs
?????文件??????13356??2008-07-02?09:00??數(shù)據(jù)抓取程序\WebCount\DataAccess.cs
?????文件??????12138??2008-07-02?08:56??數(shù)據(jù)抓取程序\WebCount\Form1.cs
?????文件??????23007??2008-06-24?09:40??數(shù)據(jù)抓取程序\WebCount\Form1.Designer.cs
?????文件??????45085??2008-06-24?09:40??數(shù)據(jù)抓取程序\WebCount\Form1.resx
?????文件???????5656??2008-06-30?15:15??數(shù)據(jù)抓取程序\WebCount\FormChart.cs
?????文件???????6201??2008-06-30?15:13??數(shù)據(jù)抓取程序\WebCount\FormChart.Designer.cs
?????文件???????7295??2008-06-30?15:13??數(shù)據(jù)抓取程序\WebCount\FormChart.resx
?????文件???????1612??2008-06-18?15:20??數(shù)據(jù)抓取程序\WebCount\Inf_bbstopic.cs
?????文件???????2114??2008-06-20?11:31??數(shù)據(jù)抓取程序\WebCount\Inf_Chart.cs
?????文件????????470??2008-06-12?21:58??數(shù)據(jù)抓取程序\WebCount\Program.cs
?????文件???????5835??2008-06-24?21:43??數(shù)據(jù)抓取程序\WebCount\WebCount.csproj
............此處省略72個(gè)文件信息
評(píng)論
共有 條評(píng)論