資源簡介
一個圖片爬蟲和音樂爬蟲可以完美運行,如又不懂可以看我的博客

代碼片段和文件信息
package?crawlers;
import?cn.wanghaomiao.seimi.annotation.Crawler;
import?cn.wanghaomiao.seimi.def.baseSeimiCrawler;
import?cn.wanghaomiao.seimi.struct.Request;
import?cn.wanghaomiao.seimi.struct.Response;
import?org.seimicrawler.xpath.JXDocument;
import?java.io.File;
import?java.io.FileOutputStream;
import?java.io.InputStream;
import?java.net.URL;
import?java.util.Date;
import?java.util.List;
@Crawler(name?=?“basic“)
public?class?Basic?extends?baseSeimiCrawler?{
????@Override
????public?String[]?startUrls()?{
????????return?new?String[]{“https://www.csdn.net/“};
????}
????@Override
????public?void?start(Response?response)?{
????????JXDocument?doc?=?response.document();
????????try?{
????????????List?urls?=?doc.sel(“//a/@href“);
????????????logger.info(“{}“?urls.size());
????????????for?(object?s:urls){
????????????????push(new?Request(s.toString()Basic::gettitle));
????????????}
????????}?catch?(Exception?e)?{
????????????e.printStackTrace();
????????}
????}
????public?void?gettitle(Response?response){
????????JXDocument?doc?=?response.document();
????????List?urls2?=doc.sel(“//img/@src“);
????????Download(urls2);
????????try?{
????????????logger.info(“url:{}?{}“?response.getUrl()?doc.sel(“//img/@src“));
????????????//do?something
????????}?catch?(Exception?e)?{
????????????e.printStackTrace();
????????}
????}
????//下載圖片
????private?void?Download(List?listImgSrc)?{
????????try?{
????????????//開始時間
????????????Date?begindate?=?new?Date();
????????????for?(String?url?:?listImgSrc)?{
????????????????//開始時間
????????????????Date?begindate2?=?new?Date();
????????????????String?imageName?=?url.substring(url.lastIndexOf(“/“)?+?1?url.length());
????????????????URL?uri?=?new?URL(url);
????????????????InputStream?in?=?uri.openStream();
????????????????FileOutputStream?fo?=?new?FileOutputStream(new?File(“F:/image/“+imageName));//文件輸出流
????????????????byte[]?buf?=?new?byte[1024];
????????????????int?length?=?0;
????????????????System.out.println(“開始下載:“?+?url);
????????????????while?((length?=?in.read(buf?0?buf.length))?!=?-1)?{
????????????????????fo.write(buf?0?length);
????????????????}
????????????????//關(guān)閉流
????????????????in.close();
????????????????fo.close();
????????????????System.out.println(imageName?+?“下載完成“);
????????????????//結(jié)束時間
????????????????Date?overdate2?=?new?Date();
????????????????double?time?=?overdate2.getTime()?-?begindate2.getTime();
????????????????System.out.println(“耗時:“?+?time?/?1000?+?“s“);
????????????}
????????????Date?overdate?=?new?Date();
????????????double?time?=?overdate.getTime()?-?begindate.getTime();
????????????System.out.println(“總耗時:“?+?time?/?1000?+?“s“);
????????}?catch?(Exception?e)?{
????????????System.out.println(“下載失敗“);
????????}
????}
}
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2019-01-10?11:56??pachong1\
?????目錄???????????0??2019-01-10?11:56??pachong1\.idea\
?????文件?????????541??2019-01-10?09:26??pachong1\.idea\compiler.xm
?????文件?????????138??2019-01-09?10:12??pachong1\.idea\encodings.xm
?????文件?????????526??2019-01-09?10:12??pachong1\.idea\misc.xm
?????文件???????18720??2019-01-10?11:56??pachong1\.idea\workspace.xm
?????文件??????????81??2019-01-09?10:12??pachong1\pachong1.iml
?????文件????????1149??2019-01-10?10:27??pachong1\pom.xm
?????目錄???????????0??2019-01-10?11:56??pachong1\src\
?????目錄???????????0??2019-01-10?11:56??pachong1\src\main\
?????目錄???????????0??2019-01-10?11:56??pachong1\src\main\java\
?????目錄???????????0??2019-01-10?11:56??pachong1\src\main\java\crawlers\
?????文件????????2831??2019-01-10?10:12??pachong1\src\main\java\crawlers\Basic.java
?????文件????????2711??2019-01-10?10:26??pachong1\src\main\java\crawlers\FileDownload.java
?????文件????????1839??2019-01-10?10:26??pachong1\src\main\java\crawlers\HtmlManage.java
?????文件????????3333??2019-01-10?10:26??pachong1\src\main\java\crawlers\HttpGetConnect.java
?????文件????????2972??2019-01-10?10:36??pachong1\src\main\java\crawlers\SpiderKugou.java
?????文件?????????223??2019-01-09?10:15??pachong1\src\main\java\crawlers\test.java
?????目錄???????????0??2019-01-09?10:12??pachong1\src\main\resources\
?????目錄???????????0??2019-01-10?11:56??pachong1\src\test\
?????目錄???????????0??2019-01-09?10:12??pachong1\src\test\java\
?????目錄???????????0??2019-01-10?11:56??pachong1\target\
?????目錄???????????0??2019-01-10?11:56??pachong1\target\classes\
?????目錄???????????0??2019-01-10?11:56??pachong1\target\classes\crawlers\
?????文件????????5055??2019-01-10?10:36??pachong1\target\classes\crawlers\Basic.class
?????文件????????3825??2019-01-10?10:36??pachong1\target\classes\crawlers\FileDownload.class
?????文件????????2887??2019-01-10?10:36??pachong1\target\classes\crawlers\HtmlManage.class
?????文件????????4629??2019-01-10?10:36??pachong1\target\classes\crawlers\HttpGetConnect.class
?????文件????????4326??2019-01-10?10:36??pachong1\target\classes\crawlers\SpiderKugou.class
?????文件?????????545??2019-01-10?10:36??pachong1\target\classes\crawlers\test.class
?????目錄???????????0??2019-01-10?11:56??pachong1\target\generated-sources\
............此處省略1個文件信息
- 上一篇:Kaldi單音素GMM學(xué)習(xí)筆記
- 下一篇:加密日記本
評論
共有 條評論