資源簡介
從大眾點評完首頁:http:dianping.com/citylist 開始爬取數據,用戶在輸出界面輸入想要搜索的地域名以及商鋪名以后,可以獲得爬取到的結果,實現了簡單的檢索功能,達到了大眾點評網數據爬取的目的

代碼片段和文件信息
package?DaZhongjiansuo;
import?java.io.IOException;
import?java.io.UnsupportedEncodingException;
import?java.net.URLEncoder;
import?java.util.ArrayList;
import?java.util.List;
import?java.util.Scanner;
import?org.apache.commons.httpclient.HttpClient;
import?org.apache.commons.httpclient.HttpException;
import?org.apache.commons.httpclient.HttpStatus;
import?org.apache.commons.httpclient.methods.GetMethod;
import?org.jsoup.Jsoup;
import?org.jsoup.nodes.Document;
import?org.jsoup.nodes.Element;
import?org.jsoup.select.Elements;
import?org.junit.Test;
public?class?DZDemo{
//獲得httpclient
private?HttpClient?httpclient=new?HttpClient();
//定義一個url,url表示最終所搜的域名
private?String?url;
//定義i為1,i表示頁碼編號,作用是循環獲得所有頁碼
private?int?i=1;
//Scanner?s?=?new?Scanner(System.in);
//定義str,str表示輸入的地名
private?String?str?=?null;
//定義keywordstr,keywordstr表示輸入的要搜索的字詞
private?String?keywordstr?=?null;
//定義shop,shop表示你搜索到的商鋪信息(包括商鋪名稱、星級、點評數)
private?String?shop=null;
//定義shoplist,為了存放搜索到的所有商鋪信息
List?shoplist=new?ArrayList();
//有參構造,作用是獲得在頁面上輸入的參數
public?DZDemo(String?str?String?keywordstr)?{
super();
this.str?=?str;
this.keywordstr?=?keywordstr;
}
public?String?getStr()?{
return?str;
}
public?void?setStr(String?str)?{
this.str?=?str;
}
public?String?getKeywordstr()?{
return?keywordstr;
}
public?void?setKeywordstr(String?keywordstr)?{
this.keywordstr?=?keywordstr;
}
//獲得區域Url
public?String?getAreaUrl(){
//定義區域URL
String?areaurl=null;
GetMethod?getmethod=new?GetMethod(“http://www.dianping.com/citylist“);
getmethod.setRequestHeader(“Accept“?“text/html“);
getmethod.setRequestHeader(“Accept-Language“?“zh-CNzh;q=0.8en;q=0.6“);
getmethod.setRequestHeader(“User-Agent“?“Mozilla/5.0?(Windows?NT?6.1;?WOW64)?AppleWebKit/537.36?(KHTML?like?Gecko)?Chrome/47.0.2526.106?Safari/537.36“);
try?{
//String?st=“愛知“;
//System.out.println(“愛知“==(st));
//獲得方法執行返回碼,返回碼為200表示方法執行成功
int?statucode=httpclient.executeMethod(getmethod);
//System.out.println(statucode);
if(statucode==HttpStatus.SC_OK){
//獲得請求返回的string類型的響應
String?html=getmethod.getResponseBodyAsString();
//jsoup將string類型的響應解析稱document類型
Document?doc=getDocument(html);
//獲得所有區域所在的塊
Elements?elements=doc.select(“[class=glossary-list?gl-py]“).select(“li“).select(“a“);
for(Element?element:elements){
//獲得區域名稱
String?AreaStr=element.text();
//System.out.println(AreaStr);
//獲得你所輸入的區域對應的URL
if(str.equals(AreaStr)){
areaurl=“http://www.dianping.com“+element.attr(“href“);
}
}
}
}?catch?(Exception?e)?{
e.printStackTrace();
}?finally{
//釋放鏈接
getmethod.releaseConnection();
}
//System.out.println(areaurl);
return?areaurl;
}
//獲得搜索店名URL
public?String?getSearchUrl(){
//獲得你所搜索的區域URL
String?searchUrl=getAreaUrl();
//Scanner?s?=?new?Scanne
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2016-02-18?15:49??DZDP\
?????文件????????1093??2016-02-02?20:11??DZDP\.classpath
?????文件????????1329??2016-02-02?11:25??DZDP\.project
?????目錄???????????0??2016-02-18?15:49??DZDP\.settings\
?????文件?????????522??2016-02-01?23:32??DZDP\.settings\.jsdtscope
?????文件?????????364??2016-02-01?23:32??DZDP\.settings\org.eclipse.jdt.core.prefs
?????文件?????????471??2016-02-01?23:32??DZDP\.settings\org.eclipse.wst.common.component
?????文件?????????414??2016-02-01?23:32??DZDP\.settings\org.eclipse.wst.common.project.facet.core.xm
?????文件??????????49??2016-02-01?23:32??DZDP\.settings\org.eclipse.wst.jsdt.ui.superType.container
?????文件???????????6??2016-02-01?23:32??DZDP\.settings\org.eclipse.wst.jsdt.ui.superType.name
?????目錄???????????0??2016-02-18?15:49??DZDP\src\
?????目錄???????????0??2016-02-18?15:49??DZDP\src\DaZhongjiansuo\
?????文件????????9427??2016-02-03?18:50??DZDP\src\DaZhongjiansuo\DZDemo.java
?????文件????????1999??2016-02-04?11:54??DZDP\src\DaZhongjiansuo\DZServlet.java
?????目錄???????????0??2016-02-18?15:49??DZDP\WebRoot\
?????文件????????1206??2016-02-03?20:44??DZDP\WebRoot\index.jsp
?????文件????????2098??2016-02-02?16:03??DZDP\WebRoot\index_01.jsp
?????文件?????????932??2016-02-02?13:26??DZDP\WebRoot\JavaBean_03.jsp
?????目錄???????????0??2016-02-18?15:49??DZDP\WebRoot\me
?????文件??????????39??2016-02-01?23:32??DZDP\WebRoot\me
?????文件?????????881??2016-02-02?11:30??DZDP\WebRoot\MyJsp01.jsp
?????文件?????????828??2016-02-02?16:04??DZDP\WebRoot\Suc.jsp
?????目錄???????????0??2016-02-18?15:49??DZDP\WebRoot\WEB-INF\
?????目錄???????????0??2016-02-18?15:49??DZDP\WebRoot\WEB-INF\classes\
?????目錄???????????0??2016-02-18?15:49??DZDP\WebRoot\WEB-INF\classes\com\
?????文件?????????865??2016-02-02?20:11??DZDP\WebRoot\WEB-INF\classes\com\UserBean.class
?????目錄???????????0??2016-02-18?15:49??DZDP\WebRoot\WEB-INF\classes\DaZhongjiansuo\
?????文件????????7655??2016-02-03?18:50??DZDP\WebRoot\WEB-INF\classes\DaZhongjiansuo\DZDemo.class
?????文件????????3120??2016-02-04?11:54??DZDP\WebRoot\WEB-INF\classes\DaZhongjiansuo\DZServlet.class
?????目錄???????????0??2016-02-18?15:49??DZDP\WebRoot\WEB-INF\classes\demo\
?????目錄???????????0??2016-02-18?15:49??DZDP\WebRoot\WEB-INF\classes\demo\serv\
............此處省略24個文件信息
評論
共有 條評論