資源簡介
一個JAVA開發的簡單網絡爬蟲 可以實現對指定站點新聞內容的獲取 程序很簡單 大家一起學習
代碼片段和文件信息
package?com.sohu;
import?com.sohu.bean.NewsBean;
import?com.sohu.db.ConnectionManager;
import?java.util.ArrayList;
import?java.util.List;
import?java.util.logging.Level;
import?java.util.logging.Logger;
import?org.htmlparser.NodeFilter;
import?org.htmlparser.Parser;
import?org.htmlparser.beans.StringBean;
import?org.htmlparser.filters.AndFilter;
import?org.htmlparser.filters.HasAttributeFilter;
import?org.htmlparser.filters.TagNameFilter;
import?org.htmlparser.tags.Div;
import?org.htmlparser.tags.HeadingTag;
import?org.htmlparser.tags.Span;
import?org.htmlparser.util.NodeList;
import?org.htmlparser.util.ParserException;
import?java.sql.PreparedStatement;
import?java.sql.SQLException;
/**
?*?用于對搜狐網站上的新聞進行抓取
?*?@author?guanminglin?
?*/
public?class?SohuNews?{
????p
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????文件???????2448??2009-05-20?11:39??build\web\WEB-INF\classes\com\sohu\db\ConnectionManager.class
?????文件????????779??2009-05-20?19:51??build\web\WEB-INF\classes\com\sohu\crawler\Crawler$1.class
?????文件???????2003??2009-05-20?19:51??build\web\WEB-INF\classes\com\sohu\crawler\Crawler.class
?????文件????????969??2009-05-20?19:51??build\web\WEB-INF\classes\com\sohu\servlet\GetNewsServlet$1.class
?????文件???????2304??2009-05-20?19:51??build\web\WEB-INF\classes\com\sohu\servlet\GetNewsServlet.class
?????文件???????1679??2009-05-20?11:39??build\web\WEB-INF\classes\com\sohu\crawler\li
?????文件????????203??2009-05-20?11:39??build\web\WEB-INF\classes\com\sohu\crawler\li
?????文件????????819??2009-05-20?19:51??build\web\WEB-INF\classes\com\sohu\crawler\li
?????文件????????796??2009-05-20?19:51??build\web\WEB-INF\classes\com\sohu\crawler\li
?????文件???????3485??2009-05-20?19:51??build\web\WEB-INF\classes\com\sohu\crawler\li
?????文件???????1330??2009-05-20?12:18??build\web\WEB-INF\classes\com\sohu\bean\NewsBean.class
?????文件????????453??2009-05-20?11:39??build\web\WEB-INF\classes\com\sohu\crawler\NewsToDB.class
?????文件???????1300??2009-05-20?11:39??build\web\WEB-INF\classes\com\sohu\crawler\Queue.class
?????文件????????885??2009-05-20?19:59??build\web\WEB-INF\classes\com\sohu\SohuNews$1.class
?????文件???????7990??2009-05-20?19:59??build\web\WEB-INF\classes\com\sohu\SohuNews.class
?????文件??????46725??2007-03-16?17:16??src\lib\commons-codec-1.3.jar
?????文件?????305001??2009-05-17?15:16??src\lib\commons-httpclient-3.1.jar
?????文件??????38015??2007-03-16?17:16??src\lib\commons-logging-1.0.4.jar
?????文件??????70029??2009-05-20?11:28??build\web\WEB-INF\lib\htmllexer.jar
?????文件??????70029??2009-05-17?15:16??src\lib\htmllexer.jar
?????文件?????288106??2009-05-20?11:28??build\web\WEB-INF\lib\htmlparser.jar
?????文件?????288106??2009-05-17?15:16??src\lib\htmlparser.jar
????.......????703265??2009-05-19?09:17??build\web\WEB-INF\lib\mysql-connector-java-5.1.6-bin.jar
?????文件???????1855??2009-05-18?14:55??src\java\com\sohu\db\ConnectionManager.java
?????文件???????1957??2009-05-20?11:05??src\java\com\sohu\crawler\Crawler.java
?????文件???????2980??2009-05-20?11:06??src\java\com\sohu\servlet\GetNewsServlet.java
?????文件???????1209??2009-05-18?14:55??src\java\com\sohu\crawler\li
?????文件????????231??2009-05-18?14:55??src\java\com\sohu\crawler\li
?????文件???????3784??2009-05-20?09:49??src\java\com\sohu\crawler\li
?????文件???????1568??2009-05-18?14:55??src\java\com\sohu\bean\NewsBean.java
............此處省略59個文件信息
評論
共有 條評論