資源簡介
樸素貝葉斯python版本,內容很詳細,直接可用,可以從代碼更容易理解樸素貝葉斯背后的原理。
代碼片段和文件信息
#?-*-?coding:?utf-8?-*-
“““
Created?on?Mon?Aug?27?16:46:26?2018
@author:?caomao
“““
from?numpy?import?*
import?numpy?as?np
from?math?import?*
def?loadDataSet():
????postingList=[[‘my‘?‘dog‘?‘has‘?‘flea‘?‘problems‘?‘help‘?‘please‘]
?????????????????[‘maybe‘?‘not‘?‘take‘?‘him‘?‘to‘?‘dog‘?‘park‘?‘stupid‘]
?????????????????[‘my‘?‘dalmation‘?‘is‘?‘so‘?‘cute‘?‘I‘?‘love‘?‘him‘]
?????????????????[‘stop‘?‘posting‘?‘stupid‘?‘worthless‘?‘garbage‘]
?????????????????[‘mr‘?‘licks‘?‘ate‘?‘my‘?‘steak‘?‘how‘?‘to‘?‘stop‘?‘him‘]
?????????????????[‘quit‘?‘buying‘?‘worthless‘?‘dog‘?‘food‘?‘stupid‘]]
????classVec?=?[010101]????#1?is?abusive?0?not
????return?postingListclassVec
???
#?remove?repeat?words
def?createVocabList(dataSet):
????vocabSet?=?set([])
????for?document?in?dataSet:
????????vocabSet?=?vocabSet?|?set(document)
????return?list(vocabSet)
def?setOfWord2Vec(vocabListinputSet):
????returnVec?=?[0]*len(vocabList)
????for?word?in?inputSet:
????????if?word?in?vocabList:
????????????returnVec[vocabList.index(word)]=1
????????else:
????????????print?“the?word:?%s?is?not?in?my?vocab“?%?word?
????return?returnVec
????
def?bagOfWords2VecMN(vocabListinputSet):
????returnVec?=?[0]*len(vocabList)
????for?word?in?inputSet:
????????if?word?in?vocabList:
????????????returnVec[vocabList.index(word)]?+=?1
????return?re
評論
共有 條評論