-
大小: 372KB文件類型: .zip金幣: 2下載: 0 次發(fā)布日期: 2021-06-02
- 語(yǔ)言: Java
- 標(biāo)簽: java??網(wǎng)絡(luò)爬蟲??
資源簡(jiǎn)介
通過(guò)java代碼實(shí)現(xiàn)一個(gè)簡(jiǎn)單的網(wǎng)絡(luò)爬蟲效果,爬取智聯(lián)招聘網(wǎng)站上的職位名,公司名,工作地點(diǎn),工作薪資等。

代碼片段和文件信息
package?com.wpc.util;
import?java.io.BufferedReader;
import?java.io.IOException;
import?java.io.InputStreamReader;
import?java.net.URL;
import?java.net.URLConnection;
import?org.jsoup.Jsoup;
import?org.jsoup.nodes.Document;
import?org.jsoup.nodes.Element;
import?org.jsoup.select.Elements;
/**
?*?用java代碼實(shí)現(xiàn)一個(gè)簡(jiǎn)單的網(wǎng)絡(luò)爬蟲
?*?
?*?@author?wpc?
?*?@創(chuàng)建時(shí)間:2018-01-11
?*/
public?class?DownloadHtml?{
/**
?*?
?*?@param?url??要抓取的網(wǎng)頁(yè)地址
?*?@param?encoding???要抓取網(wǎng)頁(yè)編碼
?*?@return
?*/
public?static?String?getHtmlResourceByUrl(String?url?String?encoding)?{
URL?urlObj?=?null;
URLConnection?uc?=?null;
InputStreamReader?isr?=?null;
BufferedReader?reader?=?null;
StringBuffer?buffer?=?new?StringBuffer();
//?建立網(wǎng)絡(luò)連接
try?{
urlObj?=?new?URL(url);
//?打開網(wǎng)絡(luò)連接
uc?=?urlObj.openConnection();
//?建立文件輸入流
isr?=?new?InputStreamReader(uc.getInputStream()?encoding);
//?建立緩存導(dǎo)入?將網(wǎng)頁(yè)源代碼下載下來(lái)
reader?=?new?BufferedReader(isr);
//?臨時(shí)
String?temp?=?null;
while?((temp?=?reader.readLine())?!=?null)?{//?一次讀一行?只要不為空就說(shuō)明沒(méi)讀完繼續(xù)讀
//?System.out.println(temp+“\n“);
buffer.append(temp?+?“\n“);
}
}?catch?(Exception?e)?{
e.printStackTrace();
}?finally?{
//?關(guān)流
if?(isr?!=?null)?{
try?{
isr.close();
}?catch?(IOException?e)?{
e.printStackTrace();
}
}
}
return?buffer.toString();
}
/**
?*?
?*?@param?url????要抓取的網(wǎng)頁(yè)地址
?*?@param?encoding???要抓取網(wǎng)頁(yè)編碼
?*/
public?static?void?getJobInfo(String?url?String?encoding)?{
//?拿到網(wǎng)頁(yè)源代碼
String?html?=?getHtmlResourceByUrl(url?encoding);
//?這里需要jar包???jsoupd.jar包
//?我們要抓取的智聯(lián)招聘的職位,按放12點(diǎn)擊最左上角的按鈕
Document?document?=?Jsoup.parse(html);
//?獲取newlist_list_context_table里面的內(nèi)容?id
//?id是唯一的,通過(guò)id那到容器(指定了容器)
Element?element?=?document.getElementById(“dw_tlc_mk“);
//?獲取結(jié)果?根據(jù)class進(jìn)一步確定目標(biāo)的內(nèi)容位置
Elements?elements?=?document.getElementsByClass(“el“);
//?如果要從某個(gè)學(xué)校的每個(gè)教室拿某個(gè)東西,那么Element?element?=
//?document.getElementById(“dw_tlc_mk“);表示先通過(guò)dw_tlc_mk找到學(xué)校(dw_tlc_mk相當(dāng)于學(xué)校名字是唯一的)
//?那么Elements?elements?=
//?document.getElementsByClass(“el“);表示找到每一間教室,el則表示教室(不止一間),之后的循環(huán)就是從每一件教室那你要的東西。
for?(Element?el?:?elements)?{
//?職位名
String?jobtitle?=?el.getElementsByClass(“t1“).text();
//?System.out.println(jobtitle);
//?公司名
String?companyName?=?el.getElementsByClass(“t2“).text();
//?System.out.println(companyName);
//?工作地點(diǎn)
String?jobAddress?=?el.getElementsByClass(“t3“).text();
//?System.out.println(jobAddress);
//?工資
String?wages?=?el.getElementsByClass(“t4“).text();
//?System.out.println(wages);
//?直接在控制臺(tái)打印
System.out.println(“職位名:“?+?jobtitle?+?“\t公司名:“?+?companyName?+?“\t工作地點(diǎn):“?+?jobAddress?+?“\t工作:“?+?wages);
}
}
public?static?void?main(String[]?args)?{
//網(wǎng)頁(yè)地址
String?url?=?“https://search.51job.com/list/190200000000000000999Java%2B%25E6%259E%25B6%25E6%259E%258421.html?lang=c&stype=1&postcha
?屬性????????????大小?????日期????時(shí)間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2019-02-15?22:10??java簡(jiǎn)單網(wǎng)絡(luò)爬蟲\
?????目錄???????????0??2019-02-15?22:07??java簡(jiǎn)單網(wǎng)絡(luò)爬蟲\Search\
?????文件?????????842??2019-01-10?21:11??java簡(jiǎn)單網(wǎng)絡(luò)爬蟲\Search\.classpath
?????文件?????????906??2019-01-10?21:11??java簡(jiǎn)單網(wǎng)絡(luò)爬蟲\Search\.project
?????目錄???????????0??2019-02-15?22:07??java簡(jiǎn)單網(wǎng)絡(luò)爬蟲\Search\.settings\
?????文件?????????567??2019-01-10?21:11??java簡(jiǎn)單網(wǎng)絡(luò)爬蟲\Search\.settings\.jsdtscope
?????文件??????????83??2019-01-12?19:35??java簡(jiǎn)單網(wǎng)絡(luò)爬蟲\Search\.settings\org.eclipse.core.resources.prefs
?????文件?????????364??2019-01-10?21:11??java簡(jiǎn)單網(wǎng)絡(luò)爬蟲\Search\.settings\org.eclipse.jdt.core.prefs
?????文件?????????470??2019-01-10?21:11??java簡(jiǎn)單網(wǎng)絡(luò)爬蟲\Search\.settings\org.eclipse.wst.common.component
?????文件?????????345??2019-01-10?21:11??java簡(jiǎn)單網(wǎng)絡(luò)爬蟲\Search\.settings\org.eclipse.wst.common.project.facet.core.xm
?????文件??????????49??2019-01-10?21:11??java簡(jiǎn)單網(wǎng)絡(luò)爬蟲\Search\.settings\org.eclipse.wst.jsdt.ui.superType.container
?????文件???????????6??2019-01-10?21:11??java簡(jiǎn)單網(wǎng)絡(luò)爬蟲\Search\.settings\org.eclipse.wst.jsdt.ui.superType.name
?????目錄???????????0??2019-02-15?22:07??java簡(jiǎn)單網(wǎng)絡(luò)爬蟲\Search\WebContent\
?????目錄???????????0??2019-02-15?22:07??java簡(jiǎn)單網(wǎng)絡(luò)爬蟲\Search\WebContent\me
?????文件??????????39??2019-01-10?21:11??java簡(jiǎn)單網(wǎng)絡(luò)爬蟲\Search\WebContent\me
?????目錄???????????0??2019-02-15?22:07??java簡(jiǎn)單網(wǎng)絡(luò)爬蟲\Search\WebContent\WEB-INF\
?????目錄???????????0??2019-02-15?22:07??java簡(jiǎn)單網(wǎng)絡(luò)爬蟲\Search\WebContent\WEB-INF\lib\
?????文件??????395748??2019-01-10?21:39??java簡(jiǎn)單網(wǎng)絡(luò)爬蟲\Search\WebContent\WEB-INF\lib\jsoup-1.11.3.jar
?????目錄???????????0??2019-02-15?22:07??java簡(jiǎn)單網(wǎng)絡(luò)爬蟲\Search\build\
?????目錄???????????0??2019-02-15?22:07??java簡(jiǎn)單網(wǎng)絡(luò)爬蟲\Search\build\classes\
?????目錄???????????0??2019-02-15?22:07??java簡(jiǎn)單網(wǎng)絡(luò)爬蟲\Search\build\classes\com\
?????目錄???????????0??2019-02-15?22:07??java簡(jiǎn)單網(wǎng)絡(luò)爬蟲\Search\build\classes\com\wpc\
?????目錄???????????0??2019-02-15?22:07??java簡(jiǎn)單網(wǎng)絡(luò)爬蟲\Search\build\classes\com\wpc\util\
?????文件????????4639??2019-02-15?22:01??java簡(jiǎn)單網(wǎng)絡(luò)爬蟲\Search\build\classes\com\wpc\util\DateDownUtil.class
?????文件????????3961??2019-02-15?22:01??java簡(jiǎn)單網(wǎng)絡(luò)爬蟲\Search\build\classes\com\wpc\util\DownloadHtml.class
?????目錄???????????0??2019-02-15?22:07??java簡(jiǎn)單網(wǎng)絡(luò)爬蟲\Search\src\
?????目錄???????????0??2019-02-15?22:07??java簡(jiǎn)單網(wǎng)絡(luò)爬蟲\Search\src\com\
?????目錄???????????0??2019-02-15?22:07??java簡(jiǎn)單網(wǎng)絡(luò)爬蟲\Search\src\com\wpc\
?????目錄???????????0??2019-02-15?22:08??java簡(jiǎn)單網(wǎng)絡(luò)爬蟲\Search\src\com\wpc\util\
?????文件????????3901??2019-02-15?22:08??java簡(jiǎn)單網(wǎng)絡(luò)爬蟲\Search\src\com\wpc\util\DownloadHtml.java
?????文件??????????35??2019-02-15?22:10??java簡(jiǎn)單網(wǎng)絡(luò)爬蟲\readme.txt
............此處省略0個(gè)文件信息
- 上一篇:聯(lián)通面試題
- 下一篇:基于JAVA的kmeans算法
評(píng)論
共有 條評(píng)論