91av视频/亚洲h视频/操亚洲美女/外国一级黄色毛片 - 国产三级三级三级三级

資源簡介

WebMagic(Java)爬蟲實現(xiàn),實現(xiàn)數(shù)據(jù)爬取,并導出到excel文件

資源截圖

代碼片段和文件信息

package?com.chao.crawler;
import?us.codecraft.webmagic.Spider;

public?class?Client?{
public?static?void?main(String[]?args)?{
String?domain?=?“http://www.babysittersnow.com“;
//總共355頁數(shù)據(jù)每頁22個子頁面,為了防止數(shù)據(jù)丟失每讀取一頁數(shù)據(jù),對數(shù)據(jù)進行保存一次
for(int?i=1;i<356;i++){
String?startUrl?=?“http://www.babysittersnow.com.au/babysitters/search?display=grid&order=lastlogin&page=“+1;
Spider.create(new?PageProcesser(domain?startUrl)).pipeline(new?URLPipeline()).thread(5).run();
PoiHelper.getHelper().saveExcel();
}
//String?startUrl1?=?“http://www.babysittersnow.com.au/babysitters/search“;

}

}

?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2016-03-16?15:24??crawler3\
?????文件????????2282??2016-03-16?15:23??crawler3\.classpath
?????文件?????????384??2016-03-16?15:23??crawler3\.project
?????目錄???????????0??2016-03-16?15:23??crawler3\.settings\
?????文件?????????598??2016-03-16?15:23??crawler3\.settings\org.eclipse.jdt.core.prefs
?????目錄???????????0??2016-03-16?15:23??crawler3\bin\
?????目錄???????????0??2016-03-16?15:23??crawler3\bin\com\
?????目錄???????????0??2016-03-16?15:23??crawler3\bin\com\chao\
?????目錄???????????0??2016-03-16?15:23??crawler3\bin\com\chao\crawler\
?????文件????????1275??2016-03-16?15:23??crawler3\bin\com\chao\crawler\Client.class
?????文件????????2368??2016-03-16?15:23??crawler3\bin\com\chao\crawler\DownloadHelper.class
?????文件????????2805??2016-03-16?15:23??crawler3\bin\com\chao\crawler\PageProcesser.class
?????文件????????3278??2016-03-16?15:23??crawler3\bin\com\chao\crawler\PageProcesser1.class
?????文件????????3155??2016-03-16?15:23??crawler3\bin\com\chao\crawler\PageProcesserProduct.class
?????文件????????1355??2016-03-16?15:23??crawler3\bin\com\chao\crawler\PathHelper.class
?????文件????????3255??2016-03-16?15:23??crawler3\bin\com\chao\crawler\PoiHelper.class
?????文件????????1554??2016-03-16?15:23??crawler3\bin\com\chao\crawler\Product.class
?????文件????????1229??2016-03-16?15:23??crawler3\bin\com\chao\crawler\ProductPipeline.class
?????文件????????1951??2016-03-16?15:23??crawler3\bin\com\chao\crawler\URLHelper.class
?????文件????????1266??2016-03-16?15:23??crawler3\bin\com\chao\crawler\URLPipeline.class
?????目錄???????????0??2016-03-16?15:23??crawler3\bin\com\chao\test\
?????文件????????2142??2016-03-16?15:23??crawler3\bin\com\chao\test\GithubRepoPageProcessor.class
?????目錄???????????0??2016-03-16?15:23??crawler3\bin\com\chao\util\
?????文件????????1058??2016-03-16?15:23??crawler3\bin\com\chao\util\ListUtil.class
?????文件????????2522??2016-03-16?15:23??crawler3\bin\com\chao\util\pageProcesserTest1.class
?????目錄???????????0??2016-03-16?15:23??crawler3\lib\
?????文件??????576338??2016-03-16?15:23??crawler3\lib\assertj-core-1.5.0.jar
?????文件??????232771??2016-03-16?15:23??crawler3\lib\commons-codec-1.6.jar
?????文件??????575389??2016-03-16?15:23??crawler3\lib\commons-collections-3.2.1.jar
?????文件???????87776??2016-03-16?15:23??crawler3\lib\commons-io-1.3.2.jar
?????文件??????284220??2016-03-16?15:23??crawler3\lib\commons-lang-2.6.jar
............此處省略44個文件信息

評論

共有 條評論