資源簡介
Jsoup網絡爬蟲

代碼片段和文件信息
package?com.github.webcrawder;
import?java.io.IOException;
import?org.apache.http.HttpResponse;
import?org.apache.http.client.ClientProtocolException;
import?org.apache.http.client.HttpClient;
import?org.apache.http.client.methods.HttpGet;
import?org.apache.http.impl.client.DefaultHttpClient;
import?org.apache.http.util.EntityUtils;
import?org.jsoup.Jsoup;
import?org.jsoup.nodes.Document;
import?org.jsoup.select.Elements;
public?class?CrawderDemo?{
public?static?void?main(String[]?args)?throws?ClientProtocolException?IOException?{
//?創建httpClient客戶端
HttpClient?hClient?=?new?DefaultHttpClient();
//?創建http發送請求對象,Httpget
HttpGet?hget?=?new?HttpGet(“http://www.itcast.cn“);
//?發送請求
HttpResponse?response?=?hClient.execute(hget);
//?獲取網頁內容
String?content?=?EntityUtils.toString(response.getEntity()?“utf-8“);
//?使用Jsoup解析網頁內容
Document?document?=?Jsoup.parse(content);
//?使用元素選擇器選擇網頁的內容
Elements?elements?=?document.select(“ul.nav_li?a“);
System.out.println(elements.text());
System.out.println(elements);
}
}
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????文件????????859??2017-03-15?17:08??WebCrawler\.classpath
?????文件????????386??2017-03-15?17:04??WebCrawler\.project
?????文件????????670??2017-03-15?17:05??WebCrawler\.settings\org.eclipse.jdt.core.prefs
?????文件???????1887??2017-03-15?19:16??WebCrawler\bin\com\github\webcrawder\CrawderDemo.class
?????文件???????2679??2017-03-15?18:04??WebCrawler\bin\com\github\webcrawder\HttpClientCrawder.class
?????文件???????2718??2017-03-15?18:27??WebCrawler\bin\com\github\webcrawder\HttpClientJsoup.class
?????文件???????1786??2017-03-15?17:31??WebCrawler\bin\com\github\webcrawder\JsoupCrawder.class
?????文件???????1963??2017-03-15?19:06??WebCrawler\bin\com\github\webcrawder\Jsouptest.class
?????文件???????1891??2017-03-15?17:47??WebCrawler\bin\com\github\webcrawder\MyHttpClient.class
?????文件???????1707??2017-03-15?17:32??WebCrawler\bin\com\github\webcrawder\MyJsoup.class
?????文件?????345035??2017-03-15?17:07??WebCrawler\lib\apache-mime4j-0.6.jar
?????文件??????58160??2017-03-15?17:07??WebCrawler\lib\commons-codec-1.4.jar
?????文件??????60841??2017-03-15?17:07??WebCrawler\lib\commons-logging-1.1.1.jar
?????文件?????291039??2017-03-15?17:07??WebCrawler\lib\httpclient-4.0.1.jar
?????文件?????172888??2017-03-15?17:07??WebCrawler\lib\httpcore-4.0.1.jar
?????文件??????25443??2017-03-15?17:07??WebCrawler\lib\httpmime-4.0.1.jar
?????文件?????119888??2017-03-15?17:07??WebCrawler\lib\json.jar
?????文件?????293672??2017-03-15?17:07??WebCrawler\lib\jsoup-1.7.2.jar
?????文件?????489884??2017-03-15?17:07??WebCrawler\lib\log4j-1.2.17.jar
?????文件?????724225??2017-03-15?17:07??WebCrawler\lib\mysql-connector-java-5.1.10-bin.jar
?????文件???????1175??2017-03-15?19:16??WebCrawler\src\com\github\webcrawder\CrawderDemo.java
?????文件???????2213??2017-03-15?18:04??WebCrawler\src\com\github\webcrawder\HttpClientCrawder.java
?????文件???????2214??2017-03-15?18:27??WebCrawler\src\com\github\webcrawder\HttpClientJsoup.java
?????文件???????1595??2017-03-15?17:31??WebCrawler\src\com\github\webcrawder\JsoupCrawder.java
?????文件???????1282??2017-03-15?19:06??WebCrawler\src\com\github\webcrawder\Jsouptest.java
?????文件???????1546??2017-03-15?17:47??WebCrawler\src\com\github\webcrawder\MyHttpClient.java
?????文件????????824??2017-03-15?17:32??WebCrawler\src\com\github\webcrawder\MyJsoup.java
?????目錄??????????0??2017-03-15?19:09??WebCrawler\bin\com\github\webcrawder
?????目錄??????????0??2017-03-15?19:09??WebCrawler\src\com\github\webcrawder
?????目錄??????????0??2017-03-15?17:09??WebCrawler\bin\com\github
............此處省略11個文件信息
評論
共有 條評論