資源簡介
本實驗,利用PCA 算法,對據人臉數據集進行特征提取,選擇少量特征便能得到理想的結果,實驗驗證采用歐式距離

代碼片段和文件信息
#?-*-?coding:?utf-8?-*-
#include?“ml.hpp“
import?numpy?as?np
import?os?glob?random?cv2
#定義?特征值大小排序方法;
def?percentage2n(eigValspercentage1):
????sortArray=np.sort(eigVals)???#升序
????sortArray=sortArray[-1::-1]??#逆轉,即降序
????arraySum=sum(sortArray)
????tmpSum=0
????num=0
????for?i?in?sortArray:
????????tmpSum+=i
????????num+=1
????????if?tmpSum>=arraySum*percentage1:
????????????return?num
????print?num
#定義PCA?方法
def?pca(datap):???????????????????????????????????#?輸入量為data:數據,k:
????data?=?np.float32(np.mat(data))?????????????????#將輸入數據轉換為浮點型(相當于將圖片轉換成數字型)
????rows?cols?=?data.shape?????????????????????????#?取數據大小(也就是圖片的大小)(輸入為200行,10304列)
????data_mean?=?np.mean(data?0)????????????????????#?求均值,axis?=?0:壓縮行,對各列求均值,返回?1*?n?矩陣,也就是說,對
????Z?=?data?-?np.tile(data_mean?(rows?1))????????#row?=200??????np.tile(b?(2?1))#沿X軸復制1倍(相當于沒有復制),再沿Y軸復制2倍也就是說Z?是減去均值后的值
????D?V?=?np.linalg.eig(Z?*?Z.T)???????????????????#?特征值與特征向量
????eigValIndice?=?np.argsort(D)????????????????????#對特征值進行排序
????n?=?percentage2n(D?p)
????print?u‘向量n的數量為:‘
????print?n
????n_eigValIndice=eigValIndice[-1:-(n+1):-1]???????#最大的n個特征值的下標
????V1=?V?[:n_eigValIndice]????????????????????????#最大的n個特征值對應的特征向量
????V1?=?Z.T?*?V1
????for?i?in?xrange(n):?????????????????????????????#?特征向量歸一化
????????V1[:?i]?/=?np.linalg.norm(V1[:?i])
????return?np.array(Z?*?V1)?data_mean?V1??????????#返回值
#數據讀取
def?loadImageSet(folder=u‘att_faces‘?sampleCount=5):??#?加載圖像集,隨機選擇sampleCount張圖片用于訓練,注意根據自己文件的文件進行修改
????trainData?=?[]???????????????????????????????????????#將4個數據改變設定列表的形式進行存儲
????testData?=?[]
????yTrain?=?[]
????yTest?=?[]
????for?k?in?range(40):?????????????????????????????????????????????#y一共有40個文件夾
????????folder2?=?os.path.join(folder?‘s%d‘?%?(k?+?1))?????????????#從第一個文件夾開始遍歷查找,能夠得到40個文件夾的名稱
????????data?=?[cv2.imread(d.encode(‘gbk‘)?0)?for?d?in?glob.glob(os.path.join(folder2?‘*.pgm‘))]???#將獲取的每一個圖片都轉換為列表的形式。每一幅圖均為(112行,92列)
????????sample?=?random.sample(range(10)?sampleCount)
????????trainData.extend([data[i].ravel()?for?i?in?range(10)?if?i?in?sample])????#訓練集,隨機抽取5個作為訓練集
????????testData.extend([data[i].ravel()?for?i?in?range(10)?if?i?not?in?sample])?#?10個數據當中,另外幾個作為測試集
????????yTest.extend([k]?*?(10?-?sampleCount))??????????????????????????????????#?yTest增加?5位數值從0到10
????????yTrain.extend([k]?*?sampleCount)?????????????????????????????????????????#?yTrain增加?5位數值
????print?yTestyTrain
????return?np.array(trainData)?np.array(yTrain)?np.array(testData)?np.array(yTest)???????#將獲取并轉變得到的數據轉換np.array的格式
#定義主函數
def?main():
????xTrain_?yTrain?xTest_?yTest?=?loadImageSet()?????#獲取數據集,利用loadImageSet()
????num_train?num_test?=?xTrain_.shape[0]?xTest_.shape[0]?????????????????#獲取數據集維度
????print?xTrain_.shapexTest_.shape????????????????????#?結果為(200,10304)?,一共有200條輸入數據,每一條有10304個輸入值(而一張圖片相當于具備?112行*92?列數據)
????xTrain?data_mean?V?=?pca(xTrain_?0.5)???????????????????????????????????#,將訓練數據集輸入到PCA方法當中,得到新生成的數據集xT
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2018-05-31?15:43??pca算法實驗\
?????目錄???????????0??2018-05-31?15:40??pca算法實驗\att_faces\
?????文件????????1579??1995-02-23?18:13??pca算法實驗\att_faces\README
?????目錄???????????0??2018-05-31?15:40??pca算法實驗\att_faces\s1\
?????文件???????10318??1994-04-18?14:17??pca算法實驗\att_faces\s1\1.pgm
?????文件???????10318??1994-04-18?14:07??pca算法實驗\att_faces\s1\10.pgm
?????文件???????10318??1994-04-18?14:17??pca算法實驗\att_faces\s1\2.pgm
?????文件???????10318??1994-04-18?14:17??pca算法實驗\att_faces\s1\3.pgm
?????文件???????10318??1994-04-18?14:17??pca算法實驗\att_faces\s1\4.pgm
?????文件???????10318??1994-04-18?14:17??pca算法實驗\att_faces\s1\5.pgm
?????文件???????10318??1994-04-18?14:07??pca算法實驗\att_faces\s1\6.pgm
?????文件???????10318??1994-04-18?14:07??pca算法實驗\att_faces\s1\7.pgm
?????文件???????10318??1994-04-18?14:07??pca算法實驗\att_faces\s1\8.pgm
?????文件???????10318??1994-04-18?14:07??pca算法實驗\att_faces\s1\9.pgm
?????目錄???????????0??2018-05-31?15:40??pca算法實驗\att_faces\s10\
?????文件???????10318??1994-04-18?14:18??pca算法實驗\att_faces\s10\1.pgm
?????文件???????10318??1994-04-18?14:07??pca算法實驗\att_faces\s10\10.pgm
?????文件???????10318??1994-04-18?14:18??pca算法實驗\att_faces\s10\2.pgm
?????文件???????10318??1994-04-18?14:18??pca算法實驗\att_faces\s10\3.pgm
?????文件???????10318??1994-04-18?14:18??pca算法實驗\att_faces\s10\4.pgm
?????文件???????10318??1994-04-18?14:18??pca算法實驗\att_faces\s10\5.pgm
?????文件???????10318??1994-04-18?14:07??pca算法實驗\att_faces\s10\6.pgm
?????文件???????10318??1994-04-18?14:07??pca算法實驗\att_faces\s10\7.pgm
?????文件???????10318??1994-04-18?14:07??pca算法實驗\att_faces\s10\8.pgm
?????文件???????10318??1994-04-18?14:07??pca算法實驗\att_faces\s10\9.pgm
?????目錄???????????0??2018-05-31?15:40??pca算法實驗\att_faces\s11\
?????文件???????10318??1994-04-18?14:18??pca算法實驗\att_faces\s11\1.pgm
?????文件???????10318??1994-04-18?14:07??pca算法實驗\att_faces\s11\10.pgm
?????文件???????10318??1994-04-18?14:18??pca算法實驗\att_faces\s11\2.pgm
?????文件???????10318??1994-04-18?14:18??pca算法實驗\att_faces\s11\3.pgm
?????文件???????10318??1994-04-18?14:18??pca算法實驗\att_faces\s11\4.pgm
............此處省略414個文件信息
評論
共有 條評論