91av视频/亚洲h视频/操亚洲美女/外国一级黄色毛片 - 国产三级三级三级三级

資源簡介

用Lucene框架實現的搜索引擎,供大家參考。

資源截圖

代碼片段和文件信息

/**
?*?@(#)Crawler.java
?*?@author?
?*?@version 1.00?2007/11/21
?*/

import?java.net.URL;
import?java.util.regex.Matcher;
import?java.util.regex.Pattern;
import?java.io.*;
import?java.util.Vector;

public?class?Crawler?{
private String?strUrl?= null;//網頁的URL字符串
private URL url =?null;//URL對象
private static?int?count;//網頁計數器
private Vector?v?=?null;//存儲URL地址

/*
?*讀取給定URL的HTML文檔
?**/
private String?getHtmlDoc(String?strUrl)
{
try
{
url =?new?URL(strUrl);
StringBuffer?strBuf =?new?StringBuffer();
InputStream inputStream =?url.openStream();
BufferedReader?bufferedReader?= new BufferedReader(
new InputStreamReader(inputStream));
String?s?=?““;
while?((s?= bufferedReader.readLine())?!=?null)?{
strBuf.append(s?+?“\n“);
}
inputStream.close();
return?strBuf.toString();
}
catch(Exception?e)
{???
e.printStackTrace();
return?null;
}
}

/**
?*從HTML文檔中抽取出URL并存入向量中
?*/
private void?extractURL()
{
String?strHttp?=?this.getHtmlDoc(strUrl);
Pattern p?= Pattern.compile(“(]*>)“);//正則表達式 ??
StringBuffer?buf?=?new?StringBuffer();
Matcher m?= p.matcher(strHttp);?
boolean result?=?m.find();?
while(result){?
for(int i=1;i<=m.groupCount();i++){?
String?str?=?m.group(i);
if(str.indexOf(“http“)?!=?-1)//是標準的URL地址
{
String?temp =?str.substring(str.indexOf(“http“)?str.indexOf(“>“)?- 2);
????v.add(temp);
}
}?
result=m.find();?
}?
}

/*
?*從向量中抽取出URL地址,并將這些地址對應的HTML文檔保存到本地
?**/ ?
public?void saveDocument()
{
int?i?=?0;
BufferedWriter?bw?=?null;
String?strURL?=?null;
URL?url?=?null;
try
{
this.extractURL();
while(i? {
strURL?=?v.get(i).toString();
bw?=?new?BufferedWriter(new?FileWriter(“doct\\“?+?String.valueOf(count)?+?“.txt“));
bw.write(strURL?+?“\n“?+?this.getHtmlDoc(strURL));
System.out.println(“SAVE:“?+?strURL);
bw.flush();
bw.close();

i++;
count++;
}
}
catch(Exception?e)
{
e.printStackTrace();
}
finally
{
try{
bw?=?new?BufferedWriter(new?FileWriter(“count.txt“));
bw.write(String.valueOf(count));
bw.flush();
bw.close();
}
catch(Exception?e)
{
e.printStackTrace();
}
}
}
/*
?*構造函數,用于變量初始化
?*/
public?Crawler()?
{
try
{
BufferedReader?br?= new BufferedReader(new?InputStreamReader(System.in));?
System.out.println(“Enter?your?url?string:“);?
strUrl?=?br.readLine();

BufferedReader?br_count?=?new?BufferedReader(new?FileReader(“count.txt“));
count?= Integer.valueOf(br_count.readLine());

v?= new Vector();
}
catch(Exception?e)
{
e.printStackTrace();
}
}

public?static?void?main(String?args[])
{
?Crawler?c?=?new?Crawler();
?c.saveDocument();
}
}

?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----

?????文件???????3379??2007-12-25?21:27??MyLucene\Crawler.class

?????文件???????2994??2007-12-01?21:12??MyLucene\Crawler.java.bak

?????文件????1871202??2008-03-12?08:55??MyLucene\Lucene?2.1.0?jar.rar

?????文件?????112640??2008-03-12?22:38??MyLucene\Lucene寫搜引擎.ppt

?????文件???????1572??2007-11-23?20:58??MyLucene\MyLucene.jcp

?????文件????????860??2007-11-23?20:58??MyLucene\MyLucene.jcu

?????文件????????293??2007-11-23?20:58??MyLucene\MyLucene.jcw

?????文件????????994??2007-11-22?11:56??MyLucene\readme.txt

????....SH.??????5120??2008-01-21?12:51??MyLucene\Thumbs.db

?????文件???????3676??2007-02-14?11:46??MyLucene\Lucene?2.1.0?jar\BUILD.txt

?????文件??????18757??2007-02-14?11:46??MyLucene\Lucene?2.1.0?jar\build.xml

?????文件??????69593??2007-02-14?11:46??MyLucene\Lucene?2.1.0?jar\CHANGES.txt

?????文件?????898886??2007-07-09?13:30??MyLucene\Lucene?2.1.0?jar\je-analysis-1[1].4.0-5251(1).jar

?????文件??????11358??2007-02-14?11:46??MyLucene\Lucene?2.1.0?jar\LICENSE.txt

?????文件?????463578??2007-02-14?11:49??MyLucene\Lucene?2.1.0?jar\lucene-core-2.1.0.jar

?????文件??????48244??2007-02-14?11:49??MyLucene\Lucene?2.1.0?jar\lucene-demos-2.1.0.jar

?????文件?????473676??2007-02-14?11:49??MyLucene\Lucene?2.1.0?jar\luceneweb.war

?????文件????????163??2007-02-14?11:46??MyLucene\Lucene?2.1.0?jar\NOTICE.txt

?????文件???????1157??2007-02-14?11:46??MyLucene\Lucene?2.1.0?jar\README.txt

?????文件??????????1??2007-12-25?21:27??MyLucene\JSP\count.txt

?????文件???????3379??2007-12-25?21:26??MyLucene\JSP\Crawler.classd

?????文件???????2995??2007-12-25?21:25??MyLucene\JSP\Crawler.java

?????文件????????198??2007-11-21?23:29??MyLucene\JSP\DocIndexer.class

?????文件???????1963??2007-11-22?11:21??MyLucene\JSP\DoQuery.class

?????文件???????1868??2007-11-22?11:56??MyLucene\JSP\DoQuery.java

?????文件??????58723??2007-12-25?20:14??MyLucene\JSP\jinrong.jpg

?????文件???????3186??1999-04-20?12:02??MyLucene\JSP\query.html

?????文件???????2542??2007-12-25?20:52??MyLucene\JSP\query.html.bak

?????文件???????4003??1999-04-20?12:02??MyLucene\JSP\query.jsp

?????文件????????150??2007-11-23?15:53??MyLucene\JSP\src_mylucene.txt

............此處省略169個文件信息

評論

共有 條評論