資源簡介
58同城-趕集-智聯招聘-51job企業數據采集程序源碼,采集后的數據可篩選,可生成execl,可把圖片形式的電話號碼采集下來

代碼片段和文件信息
import?java.io.File;
import?java.io.FileOutputStream;
import?java.io.IOException;
import?java.io.InputStream;
import?java.net.HttpURLConnection;
import?java.net.URL;
import?java.text.ParseException;
import?java.text.SimpleDateFormat;
import?java.util.ArrayList;
import?java.util.Calendar;
import?java.util.Date;
import?java.util.linkedHashSet;
import?java.util.List;
import?java.util.Map;
import?java.util.regex.Matcher;
import?java.util.regex.Pattern;
import?org.apache.http.NameValuePair;
import?org.apache.http.message.BasicNameValuePair;
import?org.jsoup.Jsoup;
import?org.jsoup.nodes.Document;
import?org.jsoup.nodes.Element;
import?org.jsoup.select.Elements;
import?com.alibaba.fastjson.JSON;
import?com.alibaba.fastjson.TypeReference;
import?com.jfinal.util.PathUtil;
import?collect.core.CoreConstant;
import?collect.httpclient.CoachCollect;
import?collect.httpclient.HttpClientCode;
import?collect.model.CollectData;
import?collect.plugin.GanjiPlugin;
import?collect.plugin.Job51Plugin;
import?collect.plugin.ZhaopinPlugin;
public?class?Test?{
// private?String?fetchImageViaHttp(URL?imgUrl)?throws?IOException?{
// String?sURL?=?imgUrl.toString();
// String?imgFile?=?imgUrl.getPath();
// HttpURLConnection?cnx?=?(HttpURLConnection)imgUrl.openConnection();
// String?uri?=?null;
// try{
// cnx.setAllowUserInteraction(false);?????????
// cnx.setDoOutput(true);
// cnx.addRequestProperty(“Cache-Control“?“no-cache“);
//
// if(request.getHeader(“user-agent“)!=?null){
// cnx.addRequestProperty(“User-Agent“?request.getHeader(“user-agent“));
// }else{
// cnx.addRequestProperty(“User-Agent“?user_agent);
// }
// cnx.addRequestProperty(“Referer“?sURL.substring(0?sURL.indexOf(‘/‘?sURL.indexOf(‘.‘))+1));
// cnx.connect();
// if(cnx.getResponseCode()?!=?HttpURLConnection.HTTP_OK)
// return?null;
// InputStream?imgData?=?cnx.getInputStream();
//
// String?ext?=?FilenameUtils.getExtension(imgFile).toLowerCase();
// if(!“jpg“.equalsIgnoreCase(ext)
// &&!“png“.equalsIgnoreCase(ext)
// &&!“gif“.equalsIgnoreCase(ext)
// &&!“bmp“.equalsIgnoreCase(ext))
// ext?=?“jpg“;
// uri?=?img_path+Tools.rand()+‘.‘+?ext;
// File?fileDest?=?new?File(application.getRealPath(uri));
// if(!fileDest.getParentFile().exists())
// fileDest.getParentFile().mkdirs();
// FileOutputStream?fos?=?new?FileOutputStream(fileDest);
// try{
// IOUtils.copy(imgData?fos);
// }finally{
// IOUtils.closeQuietly(imgData);
// IOUtils.closeQuietly(fos);
// }
// }finally{
// cnx.disconnect();
// }
// return?uri;
// }
public?static?String?dtime(long?s){
try?{
SimpleDateFormat?fTime?=?new?SimpleDateFormat(“yyyy-MM-dd?HH:mm:ss“);
return?fTime.format(s);
}?catch?(Exception?e)?{
//?TODO:?handle?exception
e.printStackTrace();
return?null;
}
}
private?static?String?detectWebRootPath()?{
try?{
String?p
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????文件????????727??2012-11-22?11:50??reptile\.classpath
?????文件???????1040??2012-10-15?10:30??reptile\.project
?????文件????????500??2012-10-15?10:30??reptile\.settings\.jsdtscope
?????文件????????364??2012-10-15?10:30??reptile\.settings\org.eclipse.jdt.core.prefs
?????文件????????472??2012-10-15?10:30??reptile\.settings\org.eclipse.wst.common.component
?????文件????????305??2012-11-22?10:54??reptile\.settings\org.eclipse.wst.common.project.facet.core.xm
?????文件?????????49??2012-10-15?10:30??reptile\.settings\org.eclipse.wst.jsdt.ui.superType.container
?????文件??????????6??2012-10-15?10:30??reptile\.settings\org.eclipse.wst.jsdt.ui.superType.name
?????文件???????4647??2013-07-16?21:22??reptile\build\classes\collect\config\AppConfig.class
?????文件????????315??2013-07-16?21:22??reptile\build\classes\collect\controller\ba
?????文件???????1204??2013-07-16?21:22??reptile\build\classes\collect\controller\DefaultController.class
?????文件???????8398??2013-07-16?21:22??reptile\build\classes\collect\controller\MainController.class
?????文件????????581??2013-07-16?21:22??reptile\build\classes\collect\core\CoreConstant.class
?????文件???????3699??2013-07-16?21:22??reptile\build\classes\collect\core\FreemarkGenerate.class
?????文件????????161??2013-07-16?21:22??reptile\build\classes\collect\core\IReptilePlugin.class
?????文件???????9112??2013-07-16?21:22??reptile\build\classes\collect\httpclient\CoachCollect.class
?????文件???????7988??2013-07-16?21:22??reptile\build\classes\collect\httpclient\HttpClientCode.class
?????文件???????1533??2013-07-16?21:22??reptile\build\classes\collect\httpclient\HttpSingleton.class
?????文件???????1257??2013-07-16?21:22??reptile\build\classes\collect\httpclient\Verification.class
?????文件???????1319??2013-07-16?21:22??reptile\build\classes\collect\model\Coach.class
?????文件???????1350??2013-07-16?21:22??reptile\build\classes\collect\model\CoachConfig.class
?????文件????????481??2013-07-16?21:22??reptile\build\classes\collect\model\CoachImg.class
?????文件????????485??2013-07-16?21:22??reptile\build\classes\collect\model\CoachNorm.class
?????文件????????493??2013-07-16?21:22??reptile\build\classes\collect\model\CollectData.class
?????文件???????1337??2013-07-16?21:22??reptile\build\classes\collect\model\Sql.class
?????文件???????5521??2013-07-16?21:22??reptile\build\classes\collect\plugin\GanjiPlugin.class
?????文件???????5071??2013-07-16?21:22??reptile\build\classes\collect\plugin\HaozePlugin.class
?????文件???????5453??2013-07-16?21:22??reptile\build\classes\collect\plugin\Job51Plugin.class
?????文件???????1294??2013-07-16?21:22??reptile\build\classes\collect\timer\CollectTimer$1.class
?????文件???????1298??2013-07-16?21:22??reptile\build\classes\collect\timer\CollectTimer$2.class
............此處省略541個文件信息
評論
共有 條評論