資源簡介
手寫體數字識別原始數據,0~9數字,32*32,貝葉斯代碼實現手寫體識別和大致出錯率計算,可用于python學習實踐。
github免費下載:https://github.com/HeCCXX/CSDNDownloading/raw/main/%E6%89%8B%E5%86%99%E4%BD%93%E6%95%B0%E5%AD%97%E8%AF%86%E5%88%AB%E6%95%B0%E6%8D%AE%E5%92%8C%E8%B4%9D%E5%8F%B6%E6%96%AF%E4%BB%A3%E7%A0%81%E5%AE%9E%E7%8E%B0.zip

代碼片段和文件信息
import?numpy?as?npy
import?os
#P(B|A)=P(A|B)*P(A)/P(B)
class?Bayes:
????def?__init__(self):
????????self.length=-1
????????self.labelrate=dict()
????????self.vectorrate=dict()
????def?fit(selfdataset:listlabels:list):
????????if?len(dataset)!=len(labels):
????????????raise?ValueError(“輸入測試數組和類別數組長度不一致“)
????????self.length=len(dataset[0])#訓練數據特征值的長度
????????labelsnum=len(labels)?#類別的數量
????????norlabels=set(labels)?#不重復類別的數量
????????for?item?in?norlabels:
????????????self.labelrate[item]=labels.count(item)/labelsnum?#求當前類別占總類別的比例
????????for?vectorlabel?in?zip(datasetlabels):
????????????if?label?not?in?self.vectorrate:
????????????????self.vectorrate[label]=[]
????????????self.vectorrate[label].append(vector)
????????print(“訓練結束“)
????????return?self
????def?btest(selftestdatalabelset):
????????if?self.length==-1:
????????????raise?ValueError(“未開始訓練,先訓練“)
????????#計算testdata分別為各個類別的概率
????????lbDict=dict()
????????for?thislb?in?labelset:
????????????p?=?1
????????????alllabel?=?self.labelrate[thislb]
????????????allvector?=?self.vectorrate[thislb]
????????????vnum=len(allvector)
????????????allvector=npy.array(allvector).T
????????????for?index?in?range(0len(testdata)):
????????????????vector=list(allvector[index])
????????????????p*=vector.count(testdata[index])/vnum
????????????lbDict[thislb]=p?*?alllabel
????????thislbabel=sorted(lbDictkey=lambda?x:lbDict[x]reverse=True)[0]
????????return?thislbabel
#加載數據
def?datatoarray(fname):
????arr=[]
????fh=open(fname)
????for?i?in?range(032):
????????thisline=fh.readline()
????????for?j?in?range(0??32):
????????????arr.append(int(thisline[j]))
????return?arr
#建立一個函數取出labels
def?seplabel(fname):
????filestr=fname.split(“.“)[0]
????label=int(filestr.split(“_“)[0])
????return?label
#建立訓練數據
def?traindata():
????labels=[]
????trainfile=os.listdir(“./traindata“)
????num=len(trainfile)
????trainarr=npy.zeros((num1024))
????for?i?in?range(num):
????????thisfname=trainfile[i]
????????thislabel=seplabel(thisfname)
????????labels.append(thislabel)
????????trainarr[i]=datatoarray(“./traindata/“+thisfname)
????return?trainarrlabels
bys=Bayes()
#訓練數據
train_datalabels=traindata()
train_data=list(train_data)
bys.fit(train_datalabels)
#測試
thisdata=datatoarray(“./testdata/8_90.txt“)
labelsall=[0123456789]
#識別單個手寫體數字
#?test=bys.btest(thisdatalabelsall)
#?print(test)
#識別多個手寫體數字(批量處理)
testfile=os.listdir(“./testdata“)
num=len(testfile)
x=0
for?i?in?range(num):
????thisfilename=testfile[i]
????thislabel=seplabel(thisfilename)
????thisdataarr=datatoarray(“./testdata/“+thisfilename)
????label=bys.btest(thisdataarrlabelsall)
????print(“測試數字是:“+str(thislabel)+“識別出來的數字是:“+str(label))
????if?label!=thislabel:
????????x+=1
????????print(“識別出錯“)
print(x)
print(“出錯率:“+str(x/num))
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????文件????????3161??2018-06-06?21:45??bayes_code.py
?????目錄???????????0??2018-05-29?16:15??testdata\
?????文件????????1088??2010-10-07?06:35??testdata\0_0.txt
?????文件????????1088??2010-10-07?06:35??testdata\0_1.txt
?????文件????????1088??2010-10-07?06:35??testdata\0_10.txt
?????文件????????1088??2010-10-07?06:35??testdata\0_11.txt
?????文件????????1088??2010-10-07?06:35??testdata\0_12.txt
?????文件????????1088??2010-10-07?06:35??testdata\0_13.txt
?????文件????????1088??2010-10-07?06:35??testdata\0_14.txt
?????文件????????1088??2010-10-07?06:35??testdata\0_15.txt
?????文件????????1088??2010-10-07?06:35??testdata\0_16.txt
?????文件????????1088??2010-10-07?06:35??testdata\0_17.txt
?????文件????????1088??2010-10-07?06:35??testdata\0_18.txt
?????文件????????1088??2010-10-07?06:35??testdata\0_19.txt
?????文件????????1088??2010-10-07?06:35??testdata\0_2.txt
?????文件????????1088??2010-10-07?06:35??testdata\0_20.txt
?????文件????????1088??2010-10-07?06:35??testdata\0_21.txt
?????文件????????1088??2010-10-07?06:35??testdata\0_22.txt
?????文件????????1088??2010-10-07?06:35??testdata\0_23.txt
?????文件????????1088??2010-10-07?06:35??testdata\0_24.txt
?????文件????????1088??2010-10-07?06:35??testdata\0_25.txt
?????文件????????1088??2010-10-07?06:35??testdata\0_26.txt
?????文件????????1088??2010-10-07?06:35??testdata\0_27.txt
?????文件????????1088??2010-10-07?06:35??testdata\0_28.txt
?????文件????????1088??2010-10-07?06:35??testdata\0_29.txt
?????文件????????1088??2010-10-07?06:35??testdata\0_3.txt
?????文件????????1088??2010-10-07?06:35??testdata\0_30.txt
?????文件????????1088??2010-10-07?06:35??testdata\0_31.txt
?????文件????????1088??2010-10-07?06:35??testdata\0_32.txt
?????文件????????1088??2010-10-07?06:35??testdata\0_33.txt
?????文件????????1088??2010-10-07?06:35??testdata\0_34.txt
............此處省略2852個文件信息
評論
共有 條評論