資源簡介
自己寫的京東 蘇寧 商品Java爬蟲 用jsoup httpClient

代碼片段和文件信息
package?com.yxg.crawler;
import?java.util.ArrayList;
import?java.util.HashMap;
import?java.util.List;
import?java.util.Map;
import?org.apache.commons.lang3.StringUtils;
import?org.apache.http.client.methods.CloseableHttpResponse;
import?org.apache.http.client.methods.HttpGet;
import?org.apache.http.impl.client.CloseableHttpClient;
import?org.apache.http.impl.client.HttpClients;
import?org.apache.http.util.EntityUtils;
import?org.jsoup.Jsoup;
import?org.jsoup.nodes.Document;
import?org.jsoup.nodes.Element;
import?org.jsoup.select.Elements;
import?com.fasterxml.jackson.databind.JsonNode;
import?com.fasterxml.jackson.databind.objectMapper;
import?com.fasterxml.jackson.databind.node.ArrayNode;
import?com.yxg.crawler.pojo.Item;
/**
?*?
?*?爬蟲程序入口
?*/
public?class?jdMain?{
public?static?final?String?URL?=?“http://list.jd.com/list.html?cat=9987653655&page={page}&trans=1&JL=6_0_0&ms=5#J_main“;
private?static?final?objectMapper?MAPPER?=?new?objectMapper();
public?static?void?main(String[]?args)?throws?Exception?{
start();
}
public?static?void?start()?throws?Exception?{
//?給入口url
Integer?totalPage?=?getTotalPage();
for?(int?i?=?1;?i? System.out.println(“當前執行文件“?+?i?+?“/“?+?totalPage);
String?url?=?StringUtils.replace(URL?“{page}“?““?+?i);
doStart(url);
break;
}
}
public?static?void?doStart(String?url)?throws?Exception?{
String?content?=?doGet(url);
//?變成Document
Document?document?=?Jsoup.parse(content);
Elements?ems?=?document.select(“#plist?li.gl-item“);
//?List- ?items?=?new?ArrayList
- ();
Map?items?=?new?HashMap();
for?(Element?em?:?ems)?{
//?獲取id
String?id?=?em.select(“.gl-i-wrap“).attr(“data-sku“);
//?獲取名稱
String?name?=?em.select(“.p-name?a?em“).text();
//?獲取圖片
String?imgage?=?em.select(“.gl-i-wrap?>.p-img?>?a?>img“).attr(“src“)
.replace(“//“?““);
//?構造商品
Item?item?=?new?Item();
item.setId(Long.valueOf(id));
item.settitle(name);
item.setImage(imgage);
items.put(item.getId()?item);
}
//?添加id?要以這個格式?J_3466744
List?strIds?=?new?ArrayList();
for?(Long?id?:?items.keySet())?{
strIds.add(“J_“?+?id);
}
//?獲取商品的價格StringUtils.join([1?2?3]?‘;‘)?=?“1;2;3“
String?priceUrl?=?“http://p.3.cn/prices/mgets?type=1&area=19_1607_3155_0&skuIds=“
+?StringUtils.join(strIds?““);
String?priceDate?=?doGet(priceUrl);
//?解析json
ArrayNode?arrayNode?=?(ArrayNode)?MAPPER.readTree(priceDate);
for?(JsonNode?jsonNode?:?arrayNode)?{
Long?id?=?Long.valueOf(StringUtils.substringAfter(
jsonNode.get(“id“).asText()?“_“));
//?利用map?將price?回填?高明
Long?price?=?jsonNode.get(“p“).asLong();
items.get(id).setPrice(price);
}
//?打印商品
for?(Item?item?:?items.values())?{
System.out.println(item.toString());
}
}
/**
?*?獲取總頁數
?*?
?*?@return
?*?@throws?Exception
?*/
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????文件????????703??2016-09-06?09:44??yxg-jd-crawler\.classpath
?????文件????????566??2016-09-05?17:31??yxg-jd-crawler\.project
?????文件????????664??2016-09-06?09:44??yxg-jd-crawler\.settings\org.eclipse.jdt.core.prefs
?????文件?????????90??2016-09-05?17:31??yxg-jd-crawler\.settings\org.eclipse.m2e.core.prefs
?????文件???????2874??2016-09-06?16:49??yxg-jd-crawler\pom.xm
?????文件???????4374??2016-09-07?15:25??yxg-jd-crawler\src\main\java\com\yxg\crawler\jdMain.java
?????文件???????1948??2016-09-07?15:17??yxg-jd-crawler\src\main\java\com\yxg\crawler\pojo\Item.java
?????文件???????4781??2016-09-07?15:15??yxg-jd-crawler\src\main\java\com\yxg\crawler\suniMain.java
?????文件???????6698??2016-09-07?15:25??yxg-jd-crawler\target\classes\com\yxg\crawler\jdMain.class
?????文件???????2656??2016-09-07?15:17??yxg-jd-crawler\target\classes\com\yxg\crawler\pojo\Item.class
?????文件???????6747??2016-09-07?15:15??yxg-jd-crawler\target\classes\com\yxg\crawler\suniMain.class
?????文件????????111??2016-09-07?09:44??yxg-jd-crawler\target\classes\me
?????文件????????276??2016-09-07?15:25??yxg-jd-crawler\target\classes\me
?????文件???????2874??2016-09-07?15:25??yxg-jd-crawler\target\classes\me
?????目錄??????????0??2016-09-07?15:31??yxg-jd-crawler\src\main\java\com\yxg\crawler\pojo
?????目錄??????????0??2016-09-07?15:31??yxg-jd-crawler\src\main\java\com\yxg\crawler
?????目錄??????????0??2016-09-07?15:31??yxg-jd-crawler\target\classes\com\yxg\crawler\pojo
?????目錄??????????0??2016-09-07?15:31??yxg-jd-crawler\target\classes\me
?????目錄??????????0??2016-09-07?15:31??yxg-jd-crawler\src\main\java\com\yxg
?????目錄??????????0??2016-09-07?15:31??yxg-jd-crawler\target\classes\com\yxg\crawler
?????目錄??????????0??2016-09-07?15:31??yxg-jd-crawler\target\classes\me
?????目錄??????????0??2016-09-07?15:31??yxg-jd-crawler\src\main\java\com
?????目錄??????????0??2016-09-07?15:31??yxg-jd-crawler\target\classes\com\yxg
?????目錄??????????0??2016-09-07?15:31??yxg-jd-crawler\target\classes\me
?????目錄??????????0??2016-09-07?15:31??yxg-jd-crawler\src\main\java
?????目錄??????????0??2016-09-05?17:31??yxg-jd-crawler\src\main\resources
?????目錄??????????0??2016-09-05?17:31??yxg-jd-crawler\src\test\java
?????目錄??????????0??2016-09-05?17:31??yxg-jd-crawler\src\test\resources
?????目錄??????????0??2016-09-07?15:31??yxg-jd-crawler\target\classes\com
?????目錄??????????0??2016-09-07?15:31??yxg-jd-crawler\target\classes\me
............此處省略11個文件信息
評論
共有 條評論