91av视频/亚洲h视频/操亚洲美女/外国一级黄色毛片 - 国产三级三级三级三级

  • 大小: 3.87MB
    文件類型: .bz2
    金幣: 1
    下載: 0 次
    發(fā)布日期: 2023-09-12
  • 語言: Java
  • 標(biāo)簽: 爬蟲??

資源簡介

通過爬得的網(wǎng)頁來獲取平行網(wǎng)頁,java語言開發(fā)的,開源

資源截圖

代碼片段和文件信息


package?com.googlecode.pupsniffer;

import?java.io.ByteArrayInputStream;
import?java.io.IOException;
import?java.io.InputStream;
import?java.io.UnsupportedEncodingException;
import?java.net.MalformedURLException;
import?java.net.URL;
import?java.nio.charset.Charset;
import?java.util.Iterator;
import?java.util.Set;
import?java.util.SortedMap;

import?info.monitorenter.cpdetector.io.*;

/**
?*?A?simple?encoding?detector?based?on?cpdetector.sf.net
?*?@author?Xuchen?Yao
?*?@since?2010-03-30
?*/
public?class?EncodingDetector?{
protected?CodepageDetectorProxy?detector;

public?EncodingDetector()?{
detector?=?CodepageDetectorProxy.getInstance();?//?A?singleton.
//?Add?the?implementations?of?info.monitorenter.cpdetector.io.ICodepageDetector:
//?This?one?is?quick?if?we?deal?with?unicode?codepages:
detector.add(new?ByteOrderMarkDetector());
//?The?first?instance?delegated?to?tries?to?detect?the?meta?charset?attribut?in?html?pages.
detector.add(new?ParsingDetector(false));?//?be?verbose?about?parsing.
//?This?one?does?the?tricks?of?exclusion?and?frequency?detection?if?first?implementation?is
//?unsuccessful:
detector.add(JChardetFacade.getInstance());?//?Another?singleton.
detector.add(ASCIIDetector.getInstance());?//?Fallback?see?javadoc.
}

/**
?*?Detect?the?encoding?of?a?URL
?*?@param?url?the?URL?address
?*?@return?the?encoding?in?upper?case
?*?@throws?IOException
?*?@throws?MalformedURLException
?*/
public?String?detect(String?url)?throws?MalformedURLException?IOException?{

//?Work?with?the?configured?proxy:
Charset?charset?=?null;

charset?=?detector.detectCodepage(new?URL(url));
if(charset?==?null){
return?null;
}
else{
//?Open?the?document?in?the?given?code?page:
//java.io.Reader?reader?=?new?java.io.InputStreamReader(new?java.io.FileInputStream(document)charset);
//?Read?from?it?do?sth.?whatever?you?desire.?The?character?are?now?-?hopefully?-?correct..
return?charset.name().toUpperCase();
}
}

public?String?detectFromRaw(String?raw?String?encoding)?throws?IOException?{

//?Work?with?the?configured?proxy:
Charset?charset?=?null;
InputStream?is;
byte[]?bs;

//?convert?String?to?inputstream
try?{
if?(encoding?==?null)
bs?=?raw.getBytes();
else
bs?=?raw.getBytes(encoding);
????????????is?=?new?ByteArrayInputStream(bs);

???? charset?=?detector.detectCodepage(is?bs.length);
???? if(charset?==?null){
???? return?null;
???? }?else{
???? //?Open?the?document?in?the?given?code?page:
???? //java.io.Reader?reader?=?new?java.io.InputStreamReader(new?java.io.FileInputStream(document)charset);
???? //?Read?from?it?do?sth.?whatever?you?desire.?The?character?are?now?-?hopefully?-?correct..
???? return?charset.name();
???? }
????????}?catch?(UnsupportedEncodingException?e)?{
????????????e.printStackTrace();
????????}
????????return?null;

}

/**
?*?List?the?supported?encoding?on?your?system.?For?debugging?and?coding.
?*/
public?static?void?supporte

評論

共有 條評論