資源簡介
可以直接提取word里的圖片信息,代碼有注釋,一看就明白
代碼片段和文件信息
import?java.io.FileInputStream;
import?java.io.FileNotFoundException;
import?java.io.FileOutputStream;
import?java.io.IOException;
import?java.util.ArrayList;
import?java.util.List;
import?org.apache.poi.hwpf.HWPFDocument;
import?org.apache.poi.hwpf.model.*;
import?org.apache.poi.hwpf.usermodel.CharacterRun;
import?org.apache.poi.hwpf.usermodel.Paragraph;
import?org.apache.poi.hwpf.usermodel.Picture;
import?org.apache.poi.hwpf.usermodel.Range;
/**
?*?類?MsWordExtractor用來提取Microsoft?Word?里面的文字和圖片
?*?注意提取圖片后,可以把圖片放在由用戶指定的路徑下面
?*?
?*?@author?Zhou?Xiaolong
?*?@email??shaolongchou@126.com
?*/
public?class?MsWordExtractor?{
private?HWPFDocument?doc?=?null;
private?Range?range?=?null;
private?List?pictsList?=?null;
//?用來標記是否存在圖片
boolean?hasPic?=?false;
/**
?*?構造器,注意到所傳入的參數必須是微軟word文檔的名字
?*?@param?msDocName
?*?@throws?IOException?
?*?@throws?FileNotFoundException?
?*/
public?MsWordExtractor(String?msDocName)?{
if?(msDocName.endsWith(“.doc“))?{
try?{
doc?=?new?HWPFDocument(new?FileInputStream(msDocName));
range?=?doc.getRange();
}?catch?(FileNotFoundException?e)?{
//?TODO?Auto-generated?catch?block
e.printStackTrace();
}?catch?(IOException?e)?{
//?TODO?Auto-generated?catch?block
e.printStackTrace();
}
}
}
/**
?*?默認構造器,為私有函數
?*
?*/
private?MsWordExtractor()?{
}
/**
?*?從word文檔中獲取所有文字
?*?@return
?*/
public?String?getAllText()?{
int?numP?=?range.numParagraphs();
StringBuffer?ret?=?new?StringBuffer();
for?(int?i?=?0;?i? //從每一段落中獲取文字
Paragraph?p?=?range.getParagraph(i);
ret.append(p.text());
}
return?ret.toString();
}
/**
?*?從word里面提取圖片
?*?@return
?*/
public?boolean?extractPictures()?{
pictsList?=?new?ArrayList();
//?得到文檔的數據流
byte[]?dataStream?=?doc.getDataStream();
int?numChar?=?range.numCharacterRuns();
PicturesTable?pTable?=?new?PicturesTable(dataStream);
for?(int?j?=?0;?j?
評論
共有 條評論