91av视频/亚洲h视频/操亚洲美女/外国一级黄色毛片 - 国产三级三级三级三级

  • 大小: 29.21MB
    文件類型: .rar
    金幣: 1
    下載: 0 次
    發(fā)布日期: 2023-07-08
  • 語(yǔ)言: Java
  • 標(biāo)簽: Jsoup+Java??

資源簡(jiǎn)介

經(jīng)過(guò)測(cè)試,可以抓取一個(gè)完整的網(wǎng)站,包括網(wǎng)站的圖片、css、js等。同時(shí)根據(jù)網(wǎng)站目錄,在本地生成相同目錄。使用Jsoup+Java。下載之后,可以直接運(yùn)行。

資源截圖

代碼片段和文件信息

package?cn.yuping.haha;

import?cn.yuping.haha.util.FileUtils;
import?org.jsoup.Jsoup;
import?org.jsoup.nodes.Document;
import?org.jsoup.nodes.Element;
import?org.jsoup.select.Elements;
import?javax.servlet.ServletException;
import?javax.servlet.annotation.WebServlet;
import?javax.servlet.http.HttpServlet;
import?javax.servlet.http.HttpServletRequest;
import?javax.servlet.http.HttpServletResponse;
import?javax.servlet.http.HttpSession;
import?java.io.File;
import?java.io.IOException;
import?java.util.*;

@WebServlet(name?=?“crawlWebServlet“?urlPatterns?=?“/crawlWeb“)
public?class?CrawlWebSite?extends?HttpServlet?{
?
private?final?static?String?charSet?=?“utf-8“;
private??static?String?rootDir;
private??static?String?rootUrl; //“http://www.17sucai.com/preview/216556/2016-02-25/%E6%A9%99%E8%89%B2%E5%95%86%E5%9C%BAwap/“

private?final?static?int?timeOut?=?30000;

/**網(wǎng)站上相對(duì)地址與絕對(duì)地址的映射*/
private?static?Map?absRelativeUrlMap?=?new?HashMap();
/**網(wǎng)站上的url與最終本地使用的url映射*/
private?static?Map?urlmapMap?=?new?HashMap();
/**網(wǎng)站上的cssjs*/
private?static?Map?cssjsmapMap?=?new?HashMap();
private?static?List?allFiles?=?new?ArrayList();
private?static?Set?imgList?=?new?HashSet();
/**過(guò)濾掉不爬取的內(nèi)容格式*/
// public?static?final?String?filterExtArray?[]??=?{“rar““zip““bmp““dib““gif““jfif““jpe““jpeg““jpg““png““tif““tiff““ico““pdf““doc““docx““xls““xlsx“};
public?static?final?String?filterExtArray?[]??=?{“rar““zip““bmp““dib““jfif““jpe““jpeg““tif““tiff““ico““pdf““doc““docx““xls““xlsx“};

@Override
public?void?doPost(HttpServletRequest?request?HttpServletResponse?response)
throws?ServletException?IOException?{
?System.out.println(“start....“);

?request.setCharacterEncoding(“UTF-8“);
?rootUrl?=?request.getParameter(“rootUrl“);
?rootDir?=?request.getParameter(“rootDir“);

HttpSession?session?=?request.getSession();

if(rootUrl?!=?null?&&?!rootUrl.equals(““)?&&?rootDir?!=?null?&&?!rootDir.equals(““)){
if(rootUrl.contains(“.html“)?||?rootUrl.contains(“.jsp“)?||?rootUrl.contains(“.htm“)){
session.setAttribute(“msg““網(wǎng)站抓取失敗網(wǎng)址輸入有誤,不能有‘.html、.jsp’等結(jié)尾“);
response.sendRedirect(“index.jsp“);
return;
}

if(!rootUrl.endsWith(“/“)){
rootUrl?=?rootUrl.concat(“/“);
}

//獲取所有urls
getSubUrls(rootUrlrootUrl);

//保存文件
for(String?absUrl?:?absRelativeUrlMap.keySet()){

String?content;
try?{
content?=?readContent(absUrl);
}?catch?(IOException?e)?{
System.err.println(“url3=“+absUrl+“?頁(yè)面無(wú)效!“);
continue;
}
if(!absUrl.startsWith(rootUrl)){
continue;
}
String?filePath?=?absUrl.substring(rootUrl.length());
filePath?=?FileUtils.parseFilePath(filePath);

//urlmapMap.put(absRelativeUrlMap.get(absUrl)?filePath);//脫機(jī)運(yùn)行和在服務(wù)器運(yùn)行有所不同。。。
urlmapMap.put(absRela

?屬性????????????大小?????日期????時(shí)間???名稱
-----------?---------??----------?-----??----

?????文件?????????36??2017-12-13?10:09??haha\.git\COMMIT_EDITMSG

?????文件????????321??2017-12-05?10:07??haha\.git\config

?????文件?????????73??2017-12-05?10:07??haha\.git\description

?????文件?????????89??2017-12-13?10:16??haha\.git\FETCH_HEAD

?????文件?????????23??2017-12-05?10:07??haha\.git\HEAD

?????文件????????478??2017-12-05?10:07??haha\.git\hooks\applypatch-msg.sample

?????文件????????896??2017-12-05?10:07??haha\.git\hooks\commit-msg.sample

?????文件????????189??2017-12-05?10:07??haha\.git\hooks\post-update.sample

?????文件????????424??2017-12-05?10:07??haha\.git\hooks\pre-applypatch.sample

?????文件???????1642??2017-12-05?10:07??haha\.git\hooks\pre-commit.sample

?????文件???????1348??2017-12-05?10:07??haha\.git\hooks\pre-push.sample

?????文件???????4951??2017-12-05?10:07??haha\.git\hooks\pre-rebase.sample

?????文件???????1239??2017-12-05?10:07??haha\.git\hooks\prepare-commit-msg.sample

?????文件???????3610??2017-12-05?10:07??haha\.git\hooks\update.sample

?????文件???????1888??2017-12-26?10:25??haha\.git\index

?????文件????????240??2017-12-05?10:07??haha\.git\info\exclude

?????文件???????1013??2017-12-13?10:09??haha\.git\logs\HEAD

?????文件???????1013??2017-12-13?10:09??haha\.git\logs\refs\heads\master

?????文件????????176??2017-12-05?10:07??haha\.git\logs\refs\remotes\origin\HEAD

?????文件????????784??2017-12-13?10:09??haha\.git\logs\refs\remotes\origin\master

?????文件????????119??2017-12-05?10:07??haha\.git\objects\00\56a658c14ce9b6665b9d92864145109fdbe623

?????文件???????2149??2017-12-20?09:44??haha\.git\objects\00\86e5780143a92e4910d9d3e567f7013035ec74

?????文件?????????65??2017-12-13?10:09??haha\.git\objects\01\93c2673203020e3726e397a6870107f9e38aa9

?????文件????????521??2017-12-05?10:07??haha\.git\objects\04\fa92d24d70315d66797bf126b61ba2731b0411

?????文件????????266??2017-12-13?09:26??haha\.git\objects\06\d8b0fef21e1f6a7ab4210217b0428f33aecc84

?????文件????????987??2017-12-13?09:26??haha\.git\objects\08\56bbc012e6df8b5c49c9642f0b02e3e75c358f

?????文件??????10484??2017-12-13?09:26??haha\.git\objects\0d\545b98739ec69d5b04fb244df18bd1ec762f62

?????文件????????182??2017-12-13?09:27??haha\.git\objects\0e\14a2a0223fa5ec422ade6d9a7804769dc06939

?????文件???????1478??2017-12-13?09:26??haha\.git\objects\14\451bd2293d2f0774b40d48de0e666f69a37621

?????文件??????15794??2017-12-13?09:26??haha\.git\objects\14\48095eb6c3cc24a04d8390c7c68856dcd5bdf1

............此處省略402個(gè)文件信息

評(píng)論

共有 條評(píng)論

相關(guān)資源