資源簡介
lucene文檔檢索系統,java源碼,可執行程序,支持pdf,doc,xls,ppt,html,txt。

代碼片段和文件信息
/**
?*?Licensed?to?the?Apache?Software?Foundation?(ASF)?under?one?or?more
?*?contributor?license?agreements.??See?the?NOTICE?file?distributed?with
?*?this?work?for?additional?information?regarding?copyright?ownership.
?*?The?ASF?licenses?this?file?to?You?under?the?Apache?License?Version?2.0
?*?(the?“License“);?you?may?not?use?this?file?except?in?compliance?with
?*?the?License.??You?may?obtain?a?copy?of?the?License?at
?*
?*?????http://www.apache.org/licenses/LICENSE-2.0
?*
?*?Unless?required?by?applicable?law?or?agreed?to?in?writing?software
?*?distributed?under?the?License?is?distributed?on?an?“AS?IS“?BASIS
?*?WITHOUT?WARRANTIES?OR?CONDITIONS?OF?ANY?KIND?either?express?or?implied.
?*?See?the?License?for?the?specific?language?governing?permissions?and
?*?limitations?under?the?License.
?*/
import?java.io.File;
import?java.io.FileReader;
import?org.apache.lucene.document.DateTools;
import?org.apache.lucene.document.Document;
import?org.apache.lucene.document.Field;
import?org.htmlparser.*;
import?org.htmlparser.visitors.*;
import?org.htmlparser.util.*;
import?org.apache.pdfbox.*;
import?org.apache.pdfbox.pdmodel.*;
import?java.io.*;
import?org.apache.pdfbox.util.*;
import?org.apache.pdfbox.searchengine.lucene.*;
import?org.apache.poi.hwpf.extractor.*;
import?org.apache.poi.hslf.usermodel.*;
import?org.apache.poi.hslf.*;
import?org.apache.poi.hslf.model.*;
import?org.apache.poi.hssf.usermodel.*;
import?java.util.*;
import?org.apache.poi.ss.usermodel.*;
/**?A?utility?for?making?Lucene?Documents?from?a?File.?*/
public?class?FileDocument?{
????/**?Makes?a?document?for?a?File.
??????
??????The?document?has?three?fields:
??????
??????path
--containing?the?pathname?of?the?file?as?a?stored
??????untokenized?field;
?????modified
--containing?the?last?modified?date?of?the?file?as
??????a?field?as?created?by???????href=“lucene.document.DateTools.html“>DateTools;?and
??????contents
--containing?the?full?contents?of?the?file?as?a
??????Reader?field;
?????*/
????public?static?Document?Document(File?f)?throws?java.io.
????????????FileNotFoundException?{
????????//?make?a?new?empty?document
????????Document?doc?=?new?Document();
????????String[]?encoding?=?{“UTF-8“?“GBK“?“GB2312“?“UTF-8“?“ISO8859_1“};
????????//?Add?the?path?of?the?file?as?a?field?named?“path“.??Use?a?field?that?is
????????//?indexed?(i.e.?searchable)?but?don‘t?tokenize?the?field?into?words.
????????doc.add(new?Field(“path“?f.getPath()?Field.Store.YES
??????????????????????????Field.Index.NOT_ANALYZED));
????????//?Add?the?last?modified?date?of?the?file?a?field?named?“modified“.??Use
????????//?a?field?that?is?indexed?(i.e.?searchable)?but?don‘t?tokenize?the?field
????????//?into?words.
????????doc.add(new?Field(“modified“
??????????????????????????DateTools.timeToString(f.lastModified()
?????????????????????????????????????????????????DateTool
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????文件????9272335??2009-12-21?23:28??檢索課程設計上交內容\可執行程序\InfRetrW2.2.exe
?????文件???????9743??2009-12-13?09:41??檢索課程設計上交內容\源代碼\FileDocument.java
?????文件???????2359??2009-12-13?08:35??檢索課程設計上交內容\源代碼\IndexFiles.java
?????文件???????4064??2009-12-13?11:27??檢索課程設計上交內容\源代碼\LuceneProc.java
?????文件??????17678??2009-12-13?11:03??檢索課程設計上交內容\源代碼\Mainfr
?????文件???????1070??2009-12-30?21:23??檢索課程設計上交內容\源代碼\MyTable.java
?????文件??????52736??2009-12-13?11:27??檢索課程設計上交內容\設計與說明文檔\使用說明文檔.doc
?????文件??????35328??2009-12-24?10:15??檢索課程設計上交內容\設計與說明文檔\程序設計說明書.doc
?????文件??????67584??2009-10-20?10:06??檢索課程設計上交內容\設計與說明文檔\課程設計要求.doc
?????目錄??????????0??2009-12-24?10:11??檢索課程設計上交內容\可執行程序
?????目錄??????????0??2009-12-13?11:35??檢索課程設計上交內容\源代碼
?????目錄??????????0??2009-12-24?10:15??檢索課程設計上交內容\設計與說明文檔
?????目錄??????????0??2009-12-24?10:15??檢索課程設計上交內容
-----------?---------??----------?-----??----
??????????????9462897????????????????????13
評論
共有 條評論