91av视频/亚洲h视频/操亚洲美女/外国一级黄色毛片 - 国产三级三级三级三级

  • 大小: 22.36MB
    文件類型: .zip
    金幣: 1
    下載: 0 次
    發布日期: 2023-07-03
  • 語言: 其他
  • 標簽: 垃圾郵件??spam??ham??

資源簡介

該數據集由一系列郵件組成,適用于測試垃圾郵件過濾系統,請勿用作商業目的。

資源截圖

代碼片段和文件信息

#!/usr/bin/python
#?FileName:?Subsampling.py?
#?Version?1.0?by?Tao?Ban?2010.5.26
#?This?function?extract?all?the?contents?ie?subject?and?first?part?from?the?.eml?file?
#?and?store?it?in?a?new?file?with?the?same?name?in?the?dst?dir.?

import?email.parser?
import?os?sys?stat
import?shutil

def?ExtractSubPayload?(filename):
‘‘‘?Extract?the?subject?and?payload?from?the?.eml?file.

‘‘‘
if?not?os.path.exists(filename):?#?dest?path?doesnot?exist
print?“ERROR:?input?file?does?not?exist:“?filename
os.exit(1)
fp?=?open(filename)
msg?=?email.message_from_file(fp)
payload?=?msg.get_payload()
if?type(payload)?==?type(list())?:
payload?=?payload[0]?#?only?use?the?first?part?of?payload
sub?=?msg.get(‘subject‘)
sub?=?str(sub)
if?type(payload)?!=?type(‘‘)?:
payload?=?str(payload)

return?sub?+?payload

def?ExtractBodyFromDir?(?srcdir?dstdir?):
‘‘‘Extract?the?body?information?from?all?.eml?files?in?the?srcdir?and?

save?the?file?to?the?dstdir?with?the?same?name.‘‘‘
if?not?os.path.exists(dstdir):?#?dest?path?doesnot?exist
os.makedirs(dstdir)??
files?=?os.listdir(srcdir)
for?file?in?files:
srcpath?=?os.path.join(srcdir?file)
dstpath?=?os.path.join(dstdir?file)
src_info?=?os.stat(srcpath)
if?stat.S_ISDIR(src_info.st_mode):?#?for?subfolders?recurse
ExtractBodyFromDir(srcpath?dstpath)
else:??#?copy?the?file
body?=?ExtractSubPayload?(srcpath)
dstfile?=?open(dstpath?‘w‘)
dstfile.write(body)
dstfile.close()


###################################################################
#?main?function?start?here
#?srcdir?is?the?directory?where?the?.eml?are?stored
print?‘Input?source?directory:?‘?#ask?for?source?and?dest?dirs
srcdir?=?raw_input()
if?not?os.path.exists(srcdir):
print?‘The?source?directory?%s?does?not?exist?exit...‘?%?(srcdir)
sys.exit()
#?dstdir?is?the?directory?where?the?content?.eml?are?stored
print?‘Input?destination?directory:?‘?#ask?for?source?and?dest?dirs
dstdir?=?raw_input()
if?not?os.path.exists(dstdir):
print?‘The?destination?directory?is?newly?created.‘
os.makedirs(dstdir)

###################################################################
ExtractBodyFromDir?(?srcdir?dstdir?)?


?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2010-05-28?16:46??CSDMC2010_SPAM\
?????目錄???????????0??2010-05-28?16:46??CSDMC2010_SPAM\CSDMC2010_SPAM\
?????文件????????2177??2010-05-27?09:28??CSDMC2010_SPAM\CSDMC2010_SPAM\ExtractContent.py
?????文件????????3411??2010-05-27?09:29??CSDMC2010_SPAM\CSDMC2010_SPAM\readme.txt
?????文件???????77886??2010-05-27?06:27??CSDMC2010_SPAM\CSDMC2010_SPAM\SPAMTrain.label
?????目錄???????????0??2010-05-28?16:47??CSDMC2010_SPAM\CSDMC2010_SPAM\TESTING\
?????文件????????6215??2010-05-27?06:01??CSDMC2010_SPAM\CSDMC2010_SPAM\TESTING\TEST_00000.eml
?????文件????????6484??2010-05-27?06:01??CSDMC2010_SPAM\CSDMC2010_SPAM\TESTING\TEST_00001.eml
?????文件????????7705??2010-05-27?06:01??CSDMC2010_SPAM\CSDMC2010_SPAM\TESTING\TEST_00002.eml
?????文件????????6260??2010-05-27?06:01??CSDMC2010_SPAM\CSDMC2010_SPAM\TESTING\TEST_00003.eml
?????文件???????33094??2010-05-27?06:01??CSDMC2010_SPAM\CSDMC2010_SPAM\TESTING\TEST_00004.eml
?????文件???????49320??2010-05-27?06:01??CSDMC2010_SPAM\CSDMC2010_SPAM\TESTING\TEST_00005.eml
?????文件????????3163??2010-05-27?06:01??CSDMC2010_SPAM\CSDMC2010_SPAM\TESTING\TEST_00006.eml
?????文件????????2519??2010-05-27?06:01??CSDMC2010_SPAM\CSDMC2010_SPAM\TESTING\TEST_00007.eml
?????文件???????30295??2010-05-27?06:01??CSDMC2010_SPAM\CSDMC2010_SPAM\TESTING\TEST_00008.eml
?????文件????????2514??2010-05-27?06:01??CSDMC2010_SPAM\CSDMC2010_SPAM\TESTING\TEST_00009.eml
?????文件???????13698??2010-05-27?06:01??CSDMC2010_SPAM\CSDMC2010_SPAM\TESTING\TEST_00010.eml
?????文件????????5639??2010-05-27?06:01??CSDMC2010_SPAM\CSDMC2010_SPAM\TESTING\TEST_00011.eml
?????文件????????1098??2010-05-27?06:01??CSDMC2010_SPAM\CSDMC2010_SPAM\TESTING\TEST_00012.eml
?????文件????????5555??2010-05-27?06:01??CSDMC2010_SPAM\CSDMC2010_SPAM\TESTING\TEST_00013.eml
?????文件????????6049??2010-05-27?06:01??CSDMC2010_SPAM\CSDMC2010_SPAM\TESTING\TEST_00014.eml
?????文件????????4667??2010-05-27?06:01??CSDMC2010_SPAM\CSDMC2010_SPAM\TESTING\TEST_00015.eml
?????文件????????3945??2010-05-27?06:01??CSDMC2010_SPAM\CSDMC2010_SPAM\TESTING\TEST_00016.eml
?????文件????????7610??2010-05-27?06:01??CSDMC2010_SPAM\CSDMC2010_SPAM\TESTING\TEST_00017.eml
?????文件????????3487??2010-05-27?06:01??CSDMC2010_SPAM\CSDMC2010_SPAM\TESTING\TEST_00018.eml
?????文件????????5110??2010-05-27?06:01??CSDMC2010_SPAM\CSDMC2010_SPAM\TESTING\TEST_00019.eml
?????文件????????5037??2010-05-27?06:01??CSDMC2010_SPAM\CSDMC2010_SPAM\TESTING\TEST_00020.eml
?????文件????????6634??2010-05-27?06:01??CSDMC2010_SPAM\CSDMC2010_SPAM\TESTING\TEST_00021.eml
?????文件????????6406??2010-05-27?06:01??CSDMC2010_SPAM\CSDMC2010_SPAM\TESTING\TEST_00022.eml
?????文件????????2297??2010-05-27?06:01??CSDMC2010_SPAM\CSDMC2010_SPAM\TESTING\TEST_00023.eml
?????文件????????3867??2010-05-27?06:01??CSDMC2010_SPAM\CSDMC2010_SPAM\TESTING\TEST_00024.eml
............此處省略8595個文件信息

評論

共有 條評論