91av视频/亚洲h视频/操亚洲美女/外国一级黄色毛片 - 国产三级三级三级三级

  • 大小: 17KB
    文件類型: .rar
    金幣: 2
    下載: 0 次
    發布日期: 2021-06-10
  • 語言: Java
  • 標簽: 京東??爬蟲??蘇寧??

資源簡介

自己寫的京東 蘇寧 商品Java爬蟲 用jsoup httpClient

資源截圖

代碼片段和文件信息

package?com.yxg.crawler;

import?java.util.ArrayList;
import?java.util.HashMap;
import?java.util.List;
import?java.util.Map;

import?org.apache.commons.lang3.StringUtils;
import?org.apache.http.client.methods.CloseableHttpResponse;
import?org.apache.http.client.methods.HttpGet;
import?org.apache.http.impl.client.CloseableHttpClient;
import?org.apache.http.impl.client.HttpClients;
import?org.apache.http.util.EntityUtils;
import?org.jsoup.Jsoup;
import?org.jsoup.nodes.Document;
import?org.jsoup.nodes.Element;
import?org.jsoup.select.Elements;

import?com.fasterxml.jackson.databind.JsonNode;
import?com.fasterxml.jackson.databind.objectMapper;
import?com.fasterxml.jackson.databind.node.ArrayNode;
import?com.yxg.crawler.pojo.Item;

/**
?*?
?*?爬蟲程序入口
?*/
public?class?jdMain?{

public?static?final?String?URL?=?“http://list.jd.com/list.html?cat=9987653655&page={page}&trans=1&JL=6_0_0&ms=5#J_main“;
private?static?final?objectMapper?MAPPER?=?new?objectMapper();

public?static?void?main(String[]?args)?throws?Exception?{
start();

}

public?static?void?start()?throws?Exception?{
//?給入口url
Integer?totalPage?=?getTotalPage();
for?(int?i?=?1;?i? System.out.println(“當前執行文件“?+?i?+?“/“?+?totalPage);
String?url?=?StringUtils.replace(URL?“{page}“?““?+?i);
doStart(url);
break;
}

}

public?static?void?doStart(String?url)?throws?Exception?{

String?content?=?doGet(url);
//?變成Document
Document?document?=?Jsoup.parse(content);
Elements?ems?=?document.select(“#plist?li.gl-item“);
//?List?items?=?new?ArrayList();
Map?items?=?new?HashMap();
for?(Element?em?:?ems)?{
//?獲取id
String?id?=?em.select(“.gl-i-wrap“).attr(“data-sku“);
//?獲取名稱
String?name?=?em.select(“.p-name?a?em“).text();
//?獲取圖片
String?imgage?=?em.select(“.gl-i-wrap?>.p-img?>?a?>img“).attr(“src“)
.replace(“//“?““);

//?構造商品
Item?item?=?new?Item();
item.setId(Long.valueOf(id));
item.settitle(name);
item.setImage(imgage);
items.put(item.getId()?item);
}

//?添加id?要以這個格式?J_3466744
List?strIds?=?new?ArrayList();
for?(Long?id?:?items.keySet())?{
strIds.add(“J_“?+?id);
}

//?獲取商品的價格StringUtils.join([1?2?3]?‘;‘)?=?“1;2;3“
String?priceUrl?=?“http://p.3.cn/prices/mgets?type=1&area=19_1607_3155_0&skuIds=“
+?StringUtils.join(strIds?““);
String?priceDate?=?doGet(priceUrl);
//?解析json
ArrayNode?arrayNode?=?(ArrayNode)?MAPPER.readTree(priceDate);
for?(JsonNode?jsonNode?:?arrayNode)?{
Long?id?=?Long.valueOf(StringUtils.substringAfter(
jsonNode.get(“id“).asText()?“_“));
//?利用map?將price?回填?高明
Long?price?=?jsonNode.get(“p“).asLong();
items.get(id).setPrice(price);
}

//?打印商品
for?(Item?item?:?items.values())?{
System.out.println(item.toString());
}
}

/**
?*?獲取總頁數
?*?
?*?@return
?*?@throws?Exception
?*/

?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----

?????文件????????703??2016-09-06?09:44??yxg-jd-crawler\.classpath

?????文件????????566??2016-09-05?17:31??yxg-jd-crawler\.project

?????文件????????664??2016-09-06?09:44??yxg-jd-crawler\.settings\org.eclipse.jdt.core.prefs

?????文件?????????90??2016-09-05?17:31??yxg-jd-crawler\.settings\org.eclipse.m2e.core.prefs

?????文件???????2874??2016-09-06?16:49??yxg-jd-crawler\pom.xml

?????文件???????4374??2016-09-07?15:25??yxg-jd-crawler\src\main\java\com\yxg\crawler\jdMain.java

?????文件???????1948??2016-09-07?15:17??yxg-jd-crawler\src\main\java\com\yxg\crawler\pojo\Item.java

?????文件???????4781??2016-09-07?15:15??yxg-jd-crawler\src\main\java\com\yxg\crawler\suniMain.java

?????文件???????6698??2016-09-07?15:25??yxg-jd-crawler\target\classes\com\yxg\crawler\jdMain.class

?????文件???????2656??2016-09-07?15:17??yxg-jd-crawler\target\classes\com\yxg\crawler\pojo\Item.class

?????文件???????6747??2016-09-07?15:15??yxg-jd-crawler\target\classes\com\yxg\crawler\suniMain.class

?????文件????????111??2016-09-07?09:44??yxg-jd-crawler\target\classes\meta-INF\MANIFEST.MF

?????文件????????276??2016-09-07?15:25??yxg-jd-crawler\target\classes\meta-INF\maven\com.yxg.jd.crawler\yxg-jd-crawler\pom.properties

?????文件???????2874??2016-09-07?15:25??yxg-jd-crawler\target\classes\meta-INF\maven\com.yxg.jd.crawler\yxg-jd-crawler\pom.xml

?????目錄??????????0??2016-09-07?15:31??yxg-jd-crawler\src\main\java\com\yxg\crawler\pojo

?????目錄??????????0??2016-09-07?15:31??yxg-jd-crawler\src\main\java\com\yxg\crawler

?????目錄??????????0??2016-09-07?15:31??yxg-jd-crawler\target\classes\com\yxg\crawler\pojo

?????目錄??????????0??2016-09-07?15:31??yxg-jd-crawler\target\classes\meta-INF\maven\com.yxg.jd.crawler\yxg-jd-crawler

?????目錄??????????0??2016-09-07?15:31??yxg-jd-crawler\src\main\java\com\yxg

?????目錄??????????0??2016-09-07?15:31??yxg-jd-crawler\target\classes\com\yxg\crawler

?????目錄??????????0??2016-09-07?15:31??yxg-jd-crawler\target\classes\meta-INF\maven\com.yxg.jd.crawler

?????目錄??????????0??2016-09-07?15:31??yxg-jd-crawler\src\main\java\com

?????目錄??????????0??2016-09-07?15:31??yxg-jd-crawler\target\classes\com\yxg

?????目錄??????????0??2016-09-07?15:31??yxg-jd-crawler\target\classes\meta-INF\maven

?????目錄??????????0??2016-09-07?15:31??yxg-jd-crawler\src\main\java

?????目錄??????????0??2016-09-05?17:31??yxg-jd-crawler\src\main\resources

?????目錄??????????0??2016-09-05?17:31??yxg-jd-crawler\src\test\java

?????目錄??????????0??2016-09-05?17:31??yxg-jd-crawler\src\test\resources

?????目錄??????????0??2016-09-07?15:31??yxg-jd-crawler\target\classes\com

?????目錄??????????0??2016-09-07?15:31??yxg-jd-crawler\target\classes\meta-INF

............此處省略11個文件信息

評論

共有 條評論