91av视频/亚洲h视频/操亚洲美女/外国一级黄色毛片 - 国产三级三级三级三级

資源簡介

【SVM】文本多分類源碼,加了很多注釋,按照README里面就能運行,,數據集也在里面,,自己把數據集直接放到E盤根目錄下(程序里面寫死了),或者自己放個里面,然后TrainProcess.java里面改也可以。

資源截圖

代碼片段和文件信息

import?urllib2
import?urllib
import?re
import?chardet
import?sys

class?HTML_Tool:
????BgnCharToNoneRex?=?re.compile(“(\t|\n|?||)“)
????EndCharToNoneRex?=?re.compile(“<.*?>“)
????BgnPartRex?=?re.compile(““)
????CharToNewLineRex?=?re.compile(“(
|

||
|
)“)
????CharToNextTabRex?=?re.compile(““)
????replaceTab?=?[(“<““<“)(“>““>“)(“&““&“)(“&““\““)(“nbsp;““?“)]
????
????def?Replace_Char(selfx):
????????x?=?self.BgnCharToNoneRex.sub(““x)
????????x?=?self.BgnPartRex.sub(“\n????“x)
????????x?=?self.CharToNewLineRex.sub(“\n“x)
????????x?=?self.CharToNextTabRex.sub(“\t“x)
????????x?=?self.EndCharToNoneRex.sub(““x)

????????for?t?in?self.replaceTab:
????????????x?=?x.replace(t[0]t[1])
????????return?x

class?crawler:
????def?__init__(self):
????????self.page?=?11
????????self.myTool?=?HTML_Tool()
????????self.urllist?=?[]

????????self.index?=?1


????def?downloadpage(selfurl):?
????????myResponse??=?urllib2.urlopen(url)
????????myPage?=?myResponse.read()
????????typeEncode?=?sys.getfilesystemencoding()
????????infoencode?=?chardet.detect(myPage).get(‘encoding‘‘utf-8‘)
????????html?=?myPage.decode(infoencode‘ignore‘).encode(typeEncode)
????????links?=?re.findall(‘????????for?link?in?links:
????????????link?=‘http://studa.net‘?+?link
????????????self.download(link)
????????self.index?=+?1
????????url?=?“http://www.studa.net/dilidizhi/index0“?+?str(self.index)+“.html“
????????self.getIndexPage(url)

????def?download(selfurl):
????????print?url
????????url2?=?url.replace(“.html““-2.html“)
????????myResponse1??=?urllib2.urlopen(url)
????????myPage1?=?myResponse1.read()
????????myResponse2??=?urllib2.urlopen(url2)
????????myPage2?=?myResponse2.read()
????????typeEncode?=?sys.getfilesystemencoding()
????????infoencode?=?chardet.detect(myPage1).get(‘encoding‘‘utf-8‘)
????????html1?=?myPage1.decode(infoencode‘ignore‘).encode(typeEncode)
????????html2?=?myPage2.decode(infoencode‘ignore‘).encode(typeEncode)
????????myItems1?=?re.findall(‘(.*?)
‘html1re.S)
????????myItems2?=?re.findall(‘(.*?)
‘html2re.S)????????
????????file_object1?=?open(str(self.page)+‘.txt‘?‘w+‘)
????????file_object1.write(self.myTool.Replace_Char(myItems1[0]))
????????file_object1.close()
????????self.page?+=?1
????????file_object2?=?open(str(self.page)+‘.txt‘?‘w+‘)
????????file_object2.write(self.myTool.Replace_Char(myItems2[0]))
????????file_object2.close()
????????self.page?+=?1

????def?getIndexPage(self?url):
????????print?url
????????if?self.page?==?200:
????????????exit()
????????self.downloadpage(url)






crawler().getIndexPage(“http://www.studa.net/dilidizhi/index.html“)

?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2016-03-10?17:51??article\
?????目錄???????????0??2016-03-10?17:50??article\交通運輸\
?????文件?????????355??2013-12-21?14:35??article\交通運輸\41.TXT
?????文件?????????437??2013-12-21?14:35??article\交通運輸\410.TXT
?????文件?????????491??2013-12-21?14:35??article\交通運輸\4100.TXT
?????文件????????1484??2013-12-21?14:35??article\交通運輸\4101.TXT
?????文件????????1072??2013-12-21?14:35??article\交通運輸\4102.TXT
?????文件?????????629??2013-12-21?14:35??article\交通運輸\4103.TXT
?????文件?????????415??2013-12-21?14:35??article\交通運輸\4104.TXT
?????文件?????????599??2013-12-21?14:35??article\交通運輸\4105.TXT
?????文件?????????405??2013-12-21?14:35??article\交通運輸\4106.TXT
?????文件?????????493??2013-12-21?14:35??article\交通運輸\4107.TXT
?????文件????????2031??2013-12-21?14:35??article\交通運輸\4108.TXT
?????文件?????????705??2013-12-21?14:35??article\交通運輸\4109.TXT
?????文件????????4758??2013-12-21?14:35??article\交通運輸\411.TXT
?????文件?????????707??2013-12-21?14:35??article\交通運輸\4110.TXT
?????文件?????????921??2013-12-21?14:35??article\交通運輸\4111.TXT
?????文件????????2847??2013-12-21?14:35??article\交通運輸\4112.TXT
?????文件?????????440??2013-12-21?14:35??article\交通運輸\4113.TXT
?????文件????????2105??2013-12-21?14:35??article\交通運輸\4114.TXT
?????文件?????????831??2013-12-21?14:35??article\交通運輸\4115.TXT
?????文件?????????589??2013-12-21?14:35??article\交通運輸\4116.TXT
?????文件????????1331??2013-12-21?14:35??article\交通運輸\4117.txt
?????文件????????1532??2013-12-21?14:35??article\交通運輸\4118.txt
?????文件?????????457??2013-12-21?14:35??article\交通運輸\4119.txt
?????文件????????1223??2013-12-21?14:35??article\交通運輸\412.TXT
?????文件????????1796??2013-12-21?14:35??article\交通運輸\4120.txt
?????文件?????????549??2013-12-21?14:35??article\交通運輸\4121.txt
?????文件?????????584??2013-12-21?14:35??article\交通運輸\4122.txt
?????文件?????????518??2013-12-21?14:35??article\交通運輸\4123.txt
?????文件????????1088??2013-12-21?14:35??article\交通運輸\4124.txt
............此處省略3223個文件信息

評論

共有 條評論