資源簡介

代碼片段和文件信息
#?-*-?coding:?utf-8?-*-
“““
Created?on?Mon?Oct?30?21:39:21?2017
@author:?Q
“““
import?numpy?as?np
import?re
import?feedparser
import?operator
def?loadDataSet():
????postingList=[[‘my‘?‘dog‘?‘has‘?‘flea‘?‘problems‘?‘help‘?‘please‘]
?????????????????[‘maybe‘?‘not‘?‘take‘?‘him‘?‘to‘?‘dog‘?‘park‘?‘stupid‘]
?????????????????[‘my‘?‘dalmation‘?‘is‘?‘so‘?‘cute‘?‘I‘?‘love‘?‘him‘]
?????????????????[‘stop‘?‘posting‘?‘stupid‘?‘worthless‘?‘garbage‘]
?????????????????[‘mr‘?‘licks‘?‘ate‘?‘my‘?‘steak‘?‘how‘?‘to‘?‘stop‘?‘him‘]
?????????????????[‘quit‘?‘buying‘?‘worthless‘?‘dog‘?‘food‘?‘stupid‘]]
????classVec?=?[010101]????#1?is?abusive?0?not
????return?postingListclassVec
def?createVocabList(data):????#創建詞向量
????returnList?=?set([])
????for?subdata?in?data:
????????returnList?=?returnList?|?set(subdata)
????return?list(returnList)
????
def?setofWords2Vec(vocabListdata):??????#將文本轉化為詞條
????returnList?=?[0]*len(vocabList)
????for?vocab?in?data:
????????if?vocab?in?vocabList:
????????????returnList[vocabList.index(vocab)]?+=?1
????return?returnList
????
def?trainNB0(trainMatrixtrainCategory):????????#訓練,得到分類概率
????pAbusive?=?sum(trainCategory)/len(trainCategory)
????p1num?=?np.ones(len(trainMatrix[0]))
????p0num?=?np.ones(len(trainMatrix[0]))
????p1Denom?=?2
????p0Denom?=?2
????for?i?in?range(len(trainCategory)):
????????if?trainCategory[i]?==?1:
????????????p1num?=?p1num?+?trainMatrix[i]
????????????p1Denom?=?p1Denom?+?sum(trainMatrix[i])
????????else:
????????????p0num?=?p0num?+?trainMatrix[i]
????????????p0Denom?=?p0Denom?+?sum(trainMatrix[i])
????p1Vect?=?np.log(p1num/p1Denom)
????p0Vect?=?np.log(p0num/p0Denom)
????return?p0Vectp1VectpAbusive
????
def??classifyNB(vec2Classifyp0Vecp1VecpClass1):????#分類
????p0?=?sum(vec2Classify*p0Vec)+np.log(1-pClass1)
????p1?=?sum(vec2Classify*p1Vec)+np.log(pClass1)
????if?p1?>?p0:
????????return?1
????else:
????????return?0
def?textParse(bigString):??????????#文本解析
????splitdata?=?re.split(r‘\W+‘bigString)
????splitdata?=?[token.lower()?for?token?in?splitdata?if?len(token)?>?2]
????return?splitdata
def?spamTest():
????docList?=?[]
????classList?=?[]
????for?i?in?range(126):
????????with?open(‘spam/%d.txt‘%i)?as?f:
????????????doc?=?f.read()
????????docList.append(doc)
????????classList.append(1)
????????with?open(‘ham/%d.txt‘%i)?as?f:
????????????doc?=?f.read()
????????docList.append(doc)
????????classList.append(0)
????vocalList?=?createVocabList(docList)
????trainList?=?list(range(50))
????testList?=?[]
????for?i?in?range(13):
????????num?=?int(np.random.uniform(0len(docList))-10)
????????testList.append(trainList[num])
????????del(trainList[num])
????docMatrix?=?[]
????docClass?=?[]
????for?i?in?trainList:
????????subVec?=?setofWords2Vec(vocalListdocList[i])
????????docMatrix.append(subVec)
????????docClass.append(classList[i])
????p0vp1vpAb?=?trainNB0(docMatrixdocClass
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????文件??????????6??2017-12-03?20:35??beyes\.git\COMMIT_EDITMSG
?????文件????????297??2017-12-03?20:36??beyes\.git\config
?????文件?????????73??2017-12-03?20:35??beyes\.git\desc
?????文件?????????23??2017-12-03?20:35??beyes\.git\HEAD
?????文件????????478??2017-12-03?20:35??beyes\.git\hooks\applypatch-msg.sample
?????文件????????896??2017-12-03?20:35??beyes\.git\hooks\commit-msg.sample
?????文件????????189??2017-12-03?20:35??beyes\.git\hooks\post-update.sample
?????文件????????424??2017-12-03?20:35??beyes\.git\hooks\pre-applypatch.sample
?????文件???????1642??2017-12-03?20:35??beyes\.git\hooks\pre-commit.sample
?????文件???????1348??2017-12-03?20:35??beyes\.git\hooks\pre-push.sample
?????文件???????4898??2017-12-03?20:35??beyes\.git\hooks\pre-reba
?????文件????????544??2017-12-03?20:35??beyes\.git\hooks\pre-receive.sample
?????文件???????1239??2017-12-03?20:35??beyes\.git\hooks\prepare-commit-msg.sample
?????文件???????3610??2017-12-03?20:35??beyes\.git\hooks\update.sample
?????文件???????4125??2017-12-03?20:35??beyes\.git\index
?????文件????????240??2017-12-03?20:35??beyes\.git\info\exclude
?????文件????????152??2017-12-03?20:35??beyes\.git\logs\HEAD
?????文件????????152??2017-12-03?20:35??beyes\.git\logs\refs\heads\master
?????文件????????143??2017-12-03?20:36??beyes\.git\logs\refs\remotes\origin\master
?????文件????????275??2017-12-03?20:35??beyes\.git\ob
?????文件????????133??2017-12-03?20:35??beyes\.git\ob
?????文件????????227??2017-12-03?20:35??beyes\.git\ob
?????文件????????484??2017-12-03?20:35??beyes\.git\ob
?????文件????????146??2017-12-03?20:35??beyes\.git\ob
?????文件?????????89??2017-12-03?20:35??beyes\.git\ob
?????文件????????110??2017-12-03?20:35??beyes\.git\ob
?????文件????????199??2017-12-03?20:35??beyes\.git\ob
?????文件????????200??2017-12-03?20:35??beyes\.git\ob
?????文件????????188??2017-12-03?20:35??beyes\.git\ob
?????文件????????200??2017-12-03?20:35??beyes\.git\ob
............此處省略149個文件信息
評論
共有 條評論