91av视频/亚洲h视频/操亚洲美女/外国一级黄色毛片 - 国产三级三级三级三级

  • 大小: 4KB
    文件類型: .java
    金幣: 1
    下載: 0 次
    發(fā)布日期: 2021-06-06
  • 語言: Java
  • 標(biāo)簽: 爬蟲??

資源簡(jiǎn)介

網(wǎng)絡(luò)爬蟲,輕松獲取網(wǎng)絡(luò)資源!網(wǎng)絡(luò)爬蟲為搜索引擎從萬維網(wǎng)下載網(wǎng)頁。一般分為傳統(tǒng)爬蟲和聚焦爬蟲。

資源截圖

代碼片段和文件信息

import?java.io.IOException;
import?java.io.InputStream;
import?java.io.InputStreamReader;
import?java.io.Reader;
import?java.net.MalformedURLException;
import?java.net.URL;
import?java.net.URLConnection;
import?java.util.ArrayList;
import?java.util.Date;
import?java.util.List;

import?javax.swing.text.MutableAttributeSet;
import?javax.swing.text.html.HTML;
import?javax.swing.text.html.HTMLEditorKit;

public?class?Crawler?{

private?List?urlWaiting?=?new?ArrayList(); //A?list?of?URLs?that?are?waiting?to?be?processed
private?List?urlProcessed?=?new?ArrayList(); //A?list?of?URLs?that?were?processed
private?List?urlError?=?new?ArrayList(); //A?list?of?URLs?that?resulted?in?an?error

private?int?numFindUrl?=?0; //find?the?number?of?url

public?Crawler()?{}


/**
?*?start?crawling
?*/
public?void?begin()?{

while?(!urlWaiting.isEmpty())?{
processURL(urlWaiting.remove(0));
}

log(“finish?crawling“);
log(“the?number?of?urls?that?were?found:“?+?numFindUrl);
log(“the?number?of?urls?that?were?processed:“?+?urlProcessed.size());
log(“the?number?of?urls?that?resulted?in?an?error:“?+?urlError.size());
}

/**
?*?Called?internally?to?process?a?URL
?*?
?*?@param?strUrl
?*????????????The?URL?to?be?processed.
?*/
public?void?processURL(String?strUrl)?{
URL?url?=?null;
try?{
url?=?new?URL(strUrl);
log(“Processing:?“?+?url);
//?get?the?URL‘s?contents
URLConnection?connection?=?url.openConnection();
connection.setRequestProperty(“User-Agent“?“Test?Crawler?for?Course?NIR“);

if?((connection.getContentType()?!=?null)
&&?!connection.getContentType().toLowerCase()
.startsWith(“text/“))?{
log(“Not?processing?because?content?type?is:?“
+?connection.getContentType());
return;
}

//?read?the?URL
InputStream?is?=?connection.getInputStream();
Reader?r?=?new?InputStreamReader(is);
//?parse?the?URL
HTMLEditorKit.Parser?parse?=?new?HTMLParse().getParser();
parse.parse(r?new?Parser(url)?true);
}?catch?(IOException?e)?{
urlError

評(píng)論

共有 條評(píng)論