資源簡介
通過爬得的網(wǎng)頁來獲取平行網(wǎng)頁,java語言開發(fā)的,開源
代碼片段和文件信息
package?com.googlecode.pupsniffer;
import?java.io.ByteArrayInputStream;
import?java.io.IOException;
import?java.io.InputStream;
import?java.io.UnsupportedEncodingException;
import?java.net.MalformedURLException;
import?java.net.URL;
import?java.nio.charset.Charset;
import?java.util.Iterator;
import?java.util.Set;
import?java.util.SortedMap;
import?info.monitorenter.cpdetector.io.*;
/**
?*?A?simple?encoding?detector?based?on?cpdetector.sf.net
?*?@author?Xuchen?Yao
?*?@since?2010-03-30
?*/
public?class?EncodingDetector?{
protected?CodepageDetectorProxy?detector;
public?EncodingDetector()?{
detector?=?CodepageDetectorProxy.getInstance();?//?A?singleton.
//?Add?the?implementations?of?info.monitorenter.cpdetector.io.ICodepageDetector:
//?This?one?is?quick?if?we?deal?with?unicode?codepages:
detector.add(new?ByteOrderMarkDetector());
//?The?first?instance?delegated?to?tries?to?detect?the?meta?charset?attribut?in?html?pages.
detector.add(new?ParsingDetector(false));?//?be?verbose?about?parsing.
//?This?one?does?the?tricks?of?exclusion?and?frequency?detection?if?first?implementation?is
//?unsuccessful:
detector.add(JChardetFacade.getInstance());?//?Another?singleton.
detector.add(ASCIIDetector.getInstance());?//?Fallback?see?javadoc.
}
/**
?*?Detect?the?encoding?of?a?URL
?*?@param?url?the?URL?address
?*?@return?the?encoding?in?upper?case
?*?@throws?IOException
?*?@throws?MalformedURLException
?*/
public?String?detect(String?url)?throws?MalformedURLException?IOException?{
//?Work?with?the?configured?proxy:
Charset?charset?=?null;
charset?=?detector.detectCodepage(new?URL(url));
if(charset?==?null){
return?null;
}
else{
//?Open?the?document?in?the?given?code?page:
//java.io.Reader?reader?=?new?java.io.InputStreamReader(new?java.io.FileInputStream(document)charset);
//?Read?from?it?do?sth.?whatever?you?desire.?The?character?are?now?-?hopefully?-?correct..
return?charset.name().toUpperCase();
}
}
public?String?detectFromRaw(String?raw?String?encoding)?throws?IOException?{
//?Work?with?the?configured?proxy:
Charset?charset?=?null;
InputStream?is;
byte[]?bs;
//?convert?String?to?inputstream
try?{
if?(encoding?==?null)
bs?=?raw.getBytes();
else
bs?=?raw.getBytes(encoding);
????????????is?=?new?ByteArrayInputStream(bs);
???? charset?=?detector.detectCodepage(is?bs.length);
???? if(charset?==?null){
???? return?null;
???? }?else{
???? //?Open?the?document?in?the?given?code?page:
???? //java.io.Reader?reader?=?new?java.io.InputStreamReader(new?java.io.FileInputStream(document)charset);
???? //?Read?from?it?do?sth.?whatever?you?desire.?The?character?are?now?-?hopefully?-?correct..
???? return?charset.name();
???? }
????????}?catch?(UnsupportedEncodingException?e)?{
????????????e.printStackTrace();
????????}
????????return?null;
}
/**
?*?List?the?supported?encoding?on?your?system.?For?debugging?and?coding.
?*/
public?static?void?supporte
- 上一篇:android中TextView高亮并可以點擊
- 下一篇:JSmooth中文版+教程
評論
共有 條評論