資源簡介
很簡易的java爬蟲 可以爬取攜程的航班數據,并使用jsoup進行解析

代碼片段和文件信息
package?com.Interface;
import?java.io.IOException;
import?java.io.UnsupportedEncodingException;
import?java.net.URLEncoder;
import?java.text.ParseException;
import?java.text.SimpleDateFormat;
import?java.util.Calendar;
import?java.util.Date;
import?java.util.HashMap;
import?java.util.Iterator;
import?java.util.Map;
import?org.apache.http.HttpEntity;
import?org.apache.http.client.ClientProtocolException;
import?org.apache.http.client.methods.CloseableHttpResponse;
import?org.apache.http.client.methods.HttpGet;
import?org.apache.http.impl.client.CloseableHttpClient;
import?org.apache.http.impl.client.HttpClientBuilder;
import?org.apache.http.util.EntityUtils;
import?org.jsoup.Jsoup;
import?org.jsoup.nodes.Document;
import?org.jsoup.nodes.Element;
import?org.jsoup.select.Elements;
import?net.sf.json.JSONArray;
import?net.sf.json.JSONobject;
public?class?CrawlerHttpClient?{
//?創建一個客戶端?類似打開一個瀏覽器
private?static?CloseableHttpClient?httpClient?=?HttpClientBuilder.create().build();
//?創建一個get方法,類似在瀏覽器中輸入一個地址,path則為URL的值
static?HttpGet?httpGet;
static?CloseableHttpResponse?response;
static?HttpEntity?httpEntity;
private?static?Map?map;
static?{
map?=?new?HashMap();
map.put(“KY“?“昆明航空“);
}
????
public?static?void?getAirLineDetails(String?flight)?{
String?time?=?getDateString(-1?“yyyyMMdd“);//?new?Date()為獲取當前系統時間
String?path?=?“https://flights.ctrip.com/actualtime/fno--“?+?flight?+?“-“?+?time?+?“.html“;
//?創建get請求
httpGet?=?new?HttpGet(path);
httpGet.setHeader(“User-Agent“
“Mozilla/5.0?(Windows?NT?10.0;?WOW64)?AppleWebKit/537.36?(KHTML?like?Gecko)?Chrome/74.0.3729.131?Safari/537.36“);
String?airLineName?=?null;
String?airLineNo?=?null;
String?airLineDate?=?null;
String?airLineDay?=?““;
String?timetakeOff?=?““;
String?planTakeOff?=?““;
String?timeArrive?=?““;
String?planArrive?=?““;
String?airLineStartCity?=?““;
String?airLineEndCity?=?““;
try?{
//?獲取響應模型
?response?=?httpClient.execute(httpGet);
//?獲取響應實體
?????httpEntity?=?response.getEntity();
if?(httpEntity?!=?null)?{
//?System.out.println(“響應內容長度為:“?+?httpEntity.getContentLength());
//?System.out.println(“響應內容為:“?+?EntityUtils.toString(httpEntity));
Document?doc?=?Jsoup.parse(EntityUtils.toString(httpEntity));
airLineName?=?doc.select(“div.detail-t“).select(“span.ml5“).first().text();
//?System.out.println(“航空公司:“?+?airLineName);
airLineNo?=?doc.select(“div.detail-t“).select(“strong.ml5“).text();
//?System.out.println(“航班號:“?+?airLineNo);
airLineDate?=?doc.select(“div.detail-t“).select(“span.ml10“).first().text();
//?System.out.println(“航班日期:“?+?airLineDate);
airLineDay?=?doc.select(“div.detail-t“).select(“span.ml5“).last().text();
//?System.out.println(“周幾:“?+?airLineDay);
Elements?airLineDetails?=?doc.select(“div.detail-m“);
//System.out.println(doc.select(“div.detai
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????文件??????13360??2019-07-18?11:59??air\.classpath
?????文件????????585??2019-07-18?11:52??air\.project
?????文件????????718??2019-07-18?11:52??air\.settings\org.eclipse.jdt.core.prefs
?????文件????????206??2019-07-18?11:58??air\.settings\org.eclipse.wst.common.project.facet.core.xm
?????文件???????9658??2019-07-18?14:20??air\bin\com\Interface\CrawlerHttpClient.class
?????文件??????10804??2019-07-18?14:20??air\src\com\Interface\CrawlerHttpClient.java
?????目錄??????????0??2019-07-19?11:22??air\bin\com\Interface
?????目錄??????????0??2019-07-19?11:22??air\src\com\Interface
?????目錄??????????0??2019-07-19?11:22??air\bin\com
?????目錄??????????0??2019-07-19?11:22??air\src\com
?????目錄??????????0??2019-07-19?11:22??air\.settings
?????目錄??????????0??2019-07-19?11:22??air\bin
?????目錄??????????0??2019-07-19?11:22??air\src
?????目錄??????????0??2019-07-19?11:22??air
-----------?---------??----------?-----??----
????????????????35331????????????????????14
評論
共有 條評論