資源簡介
Gather Platform 數據抓取平臺是一套基于Webmagic內核的,具有Web任務配置和任務管理界面的數據采集與搜索平臺.
代碼片段和文件信息
package?com.gs.spider.controller;
import?com.gs.spider.model.async.State;
import?com.gs.spider.model.async.Task;
import?com.gs.spider.model.utils.ResultBundle;
import?com.gs.spider.model.utils.ResultListBundle;
import?com.gs.spider.service.AsyncGatherService;
import?org.apache.logging.log4j.LogManager;
import?org.apache.logging.log4j.Logger;
import?org.springframework.web.bind.annotation.RequestMapping;
import?org.springframework.web.bind.annotation.RequestMethod;
import?org.springframework.web.bind.annotation.RequestParam;
import?org.springframework.web.bind.annotation.ResponseBody;
import?java.io.IOException;
/**
?*?AsyncGatherbaseController
?*?異步抓取器的Controller
?*
?*?@author?Gao?Shen
?*?@version?16/2/23
?*/
public?class?AsyncGatherbaseController?extends?baseController?{
????private?AsyncGatherService?asyncGatherService;
????private?Logger?LOG?=?LogManager.getLogger(AsyncGatherbaseController.class);
????public?AsyncGatherbaseController(AsyncGatherService?asyncGatherService)?{
????????this.asyncGatherService?=?asyncGatherService;
????}
????/**
?????*?列出所有任務
?????*
?????*?@return?0表示正在進行?1表示已經完成
?????*?@throws?IOException
?????*/
????@RequestMapping(value?=?“listTasks“?method?=?RequestMethod.GET?produces?=?“application/json“)
????@ResponseBody
????public?ResultListBundle?listTasks(@RequestParam(value?=?“containsExtraInfo“?required?=?false?defaultValue?=?“false“)?boolean?containsExtraInfo)?throws?IOException?{
????????return?asyncGatherService.getTaskList(containsExtraInfo);
????}
????/**
?????*?根據id獲取task
?????*
?????*?@param?taskId
?????*?@return
?????*?@throws?IOException
?????*/
????@RequestMapping(value?=?“getTaskById“?method?=?RequestMethod.GET?produces?=?“application/json“)
????@ResponseBody
????public?ResultBundle?getTaskById(String?taskId?@RequestParam(value?=?“containsExtraInfo“?required?=?false?defaultValue?=?“true“)?boolean?containsExtraInfo)?throws?IOException?{
????????return?asyncGatherService.getTaskById(taskId?containsExtraInfo);
????}
????/**
?????*?獲取異步抓取長連接服務器端口號
?????*
?????*?@return
?????*/
????@RequestMapping(value?=?“getLongConnectionPort“?method?=?RequestMethod.GET?produces?=?“application/json“)
????@ResponseBody
????public?ResultBundle?getLongConnectionPort()?throws?IOException?{
????????return?asyncGatherService.getLongConnectionPort();
????}
????/**
?????*?獲取當前task已經抓取的文章數
?????*
?????*?@param?taskId
?????*?@return
?????*?@throws?IOException
?????*/
????@RequestMapping(value?=?“getTaskCount“?method?=?RequestMethod.GET?produces?=?“application/json“)
????@ResponseBody
????public?ResultBundle?getTaskCount(String?taskId)?throws?IOException?{
????????return?asyncGatherService.getTaskCount(taskId);
????}
????/**
?????*?根據taskId刪除任務
?????*
?????*?@param?taskId?任務ID
?????*?@return?成功返回OK!
?????*/
????@RequestMapping(value?=?“deleteTaskById“?method?=?RequestMethod.GET?produces?=?“application/json“)
????@ResponseBody
????public?ResultBundle?deleteTaskById(String?taskId
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2017-05-16?11:12??gsh199449-spider-a5f9f85\
?????文件??????????24??2017-05-16?11:12??gsh199449-spider-a5f9f85\.gitignore
?????文件??????????34??2017-05-16?11:12??gsh199449-spider-a5f9f85\.travis.yml
?????文件???????35141??2017-05-16?11:12??gsh199449-spider-a5f9f85\LICENSE
?????文件????????1674??2017-05-16?11:12??gsh199449-spider-a5f9f85\README.md
?????目錄???????????0??2017-05-16?11:12??gsh199449-spider-a5f9f85\ajaxDownloader\
?????文件????????8662??2017-05-16?11:12??gsh199449-spider-a5f9f85\ajaxDownloader\phantomjs_fetcher.js
?????目錄???????????0??2017-05-16?11:12??gsh199449-spider-a5f9f85\doc\
?????文件???????12203??2017-05-16?11:12??gsh199449-spider-a5f9f85\doc\README-en.md
?????目錄???????????0??2017-05-16?11:12??gsh199449-spider-a5f9f85\doc\imgs\
?????文件??????192097??2017-05-16?11:12??gsh199449-spider-a5f9f85\doc\imgs\domainList.png
?????文件???????30399??2017-05-16?11:12??gsh199449-spider-a5f9f85\doc\imgs\dynamic.png
?????文件??????209349??2017-05-16?11:12??gsh199449-spider-a5f9f85\doc\imgs\editSpiderInfo.png
?????文件???????57069??2017-05-16?11:12??gsh199449-spider-a5f9f85\doc\imgs\home.png
?????文件???????80330??2017-05-16?11:12??gsh199449-spider-a5f9f85\doc\imgs\need.png
?????文件??????273752??2017-05-16?11:12??gsh199449-spider-a5f9f85\doc\imgs\search.png
?????文件????15576616??2017-05-16?11:12??gsh199449-spider-a5f9f85\doc\imgs\show.gif
?????文件???????64154??2017-05-16?11:12??gsh199449-spider-a5f9f85\doc\imgs\showRelatedInfo.png
?????文件??????226173??2017-05-16?11:12??gsh199449-spider-a5f9f85\doc\imgs\showWebpageById.png
?????文件??????184284??2017-05-16?11:12??gsh199449-spider-a5f9f85\doc\imgs\spiderList.png
?????文件???????99702??2017-05-16?11:12??gsh199449-spider-a5f9f85\doc\imgs\spiderinfo.png
?????文件??????348633??2017-05-16?11:12??gsh199449-spider-a5f9f85\doc\imgs\testSpiderinfo.png
?????目錄???????????0??2017-05-16?11:12??gsh199449-spider-a5f9f85\examples\
?????文件?????????921??2017-05-16?11:12??gsh199449-spider-a5f9f85\examples\README.md
?????文件????????1239??2017-05-16?11:12??gsh199449-spider-a5f9f85\examples\news.163.com.json
?????文件????????1142??2017-05-16?11:12??gsh199449-spider-a5f9f85\examples\news.qq.com.json
?????文件????????1181??2017-05-16?11:12??gsh199449-spider-a5f9f85\examples\news.qq.com_time_autodetect.json
?????文件????????1270??2017-05-16?11:12??gsh199449-spider-a5f9f85\examples\news.sohu.com.json
?????文件????????8404??2017-05-16?11:12??gsh199449-spider-a5f9f85\examples\phantomjs_fetcher.js
?????文件????????1205??2017-05-16?11:12??gsh199449-spider-a5f9f85\examples\www.chinanews.com.json
?????文件????????1273??2017-05-16?11:12??gsh199449-spider-a5f9f85\examples\www.oschina.net.json
............此處省略130個文件信息
評論
共有 條評論