91av视频/亚洲h视频/操亚洲美女/外国一级黄色毛片 - 国产三级三级三级三级

  • 大小: 12KB
    文件類型: .java
    金幣: 1
    下載: 0 次
    發布日期: 2022-05-29
  • 語言: Java
  • 標簽: html轉txt??

資源簡介

只需要把html讀出來,放到方法里面,就能得到html的文本,很好的方法,我找了好久,現在發上來

資源截圖

代碼片段和文件信息

/*
?*?File:?WebFormatter.java
?*?Created?on?2005-6-24
?*?Author:?Liao?Xuefeng?asklxf@163.com
?*?Copyright?(C)?2005?Liao?Xuefeng.
?*/
import?java.util.*;
import?java.text.SimpleDateFormat;

/**
?*?Do?some?format?on?web?display.
?*?
?*?@author?Xuefeng
?*/
public?class?WebFormatter?{

????public?static?String?html2text(String?html)?{
????????StringBuffer?sb?=?new?StringBuffer(html.length());
????????char[]?data?=?html.toCharArray();
????????int?start?=?0;
????????boolean?previousIsPre?=?false;
????????Token?token?=?null;
????????for(;;)?{
????????????token?=?parse(data?start?previousIsPre);
????????????if(token==null)
????????????????break;
????????????previousIsPre?=?token.isPreTag();
????????????sb?=?sb.append(token.getText());
????????????start?+=?token.getLength();
????????}
????????return?sb.toString();
????}

????private?static?Token?parse(char[]?data?int?start?boolean?previousIsPre)?{
????????if(start>=data.length)
????????????return?null;
????????//?try?to?read?next?char:
????????char?c?=?data[start];
????????if(c==‘<‘)?{
????????????//?this?is?a?tag?or?comment?or?script:
????????????int?end_index?=?indexOf(data?start+1?‘>‘);
????????????if(end_index==(-1))?{
????????????????//?the?left?is?all?text!
????????????????return?new?Token(Token.TOKEN_TEXT?data?start?data.length?previousIsPre);
????????????}
????????????String?s?=?new?String(data?start?end_index-start+1);
????????????//?now?we?got?s=“<...>“:
????????????if(s.startsWith(““);
????????????????if(end_comment_index==(-1))?{
????????????????????//?illegal?end?but?treat?as?comment:
????????????????????return?new?Token(Token.TOKEN_COMMENT?data?start?data.length?previousIsPre);
????????????????}
????????????????else
????????????????????return?new?Token(Token.TOKEN_COMMENT?data?start?end_comment_index+3?previousIsPre);
????????????}
????????????String?s_lowerCase?=?s.toLowerCase();
????????????if(s_lowerCase.startsWith(“ript“))?{?//?this?is?a?script:
????????????????int?end_script_index?=?indexOf(data?start+1?“
ript>“);
????????????????if(end_script_index==(-1))
????????????????????//?illegal?end?but?treat?as?script:
????????????????????return?new?Token(Token.TOKEN_script?data?start?data.length?previousIsPre);
????????????????else
????????????????????return?new?Token(Token.TOKEN_script?data?start?end_script_index+9?previousIsPre);
????????????}
????????????else?{?//?this?is?a?tag:
????????????????return?new?Token(Token.TOKEN_TAG?data?start?start+s.length()?previousIsPre);
????????????}
????????}
????????//?this?is?a?text:
????????int?next_tag_index?=?indexOf(data?start+1?‘<‘);
????????if(next_tag_index==(-1))
????????????return?new?Token(Token.TOKEN_TEXT?data?start?data.length?previousIsPre);
????????return?new?Token(Token.TOKEN_TEXT?data?start?next_tag_index?previousIsP

評論

共有 條評論

相關資源