資源簡介
資源為機器學習實戰第二章kNN所需相關代碼和數據集,大家有用的可以下載哈

代碼片段和文件信息
#?-*-?coding:?cp936?-*-
#?1:約會網站
from?numpy?import?*
import?operator
from?os?import?listdir
#創造數據集
def?createDataSet():
????group?=?array([[1.0?1.1]?[1.0?1.0]?[0?0]?[0?0.1]])
????labels?=?[‘A‘?‘A‘?‘B‘?‘B‘]
????return?group?labels
#第一個kNN分類器??inX-測試數據?dataSet-樣本數據??labels-標簽?k-鄰近的k個樣本
def?classify0(inXdataSet?labels?k):
????#計算距離
????dataSetSize?=?dataSet.shape[0]
????diffMat?=?tile(inX?(dataSetSize1))-?dataSet
????sqDiffMat?=?diffMat?**?2
????sqDistances?=?sqDiffMat.sum(axis?=?1)
????distances?=?sqDistances?**0.5
????sortedDistIndicies?=?distances.argsort()
????classCount?=?{}
????#選擇距離最小的k個點
????for?i?in?range(k):
????????voteIlabel?=?labels[sortedDistIndicies[i]]
????????classCount[voteIlabel]?=?classCount.get(voteIlabel0)+1
????#排序
????sortedClassCount?=?sorted(classCount.iteritems()?key?=?operator.itemgetter(1)reverse?=?True)
????return?sortedClassCount[0][0]
#?將文本記錄到轉換numPy的解析程序
def?file2matrix(filename):
????#打開文件并得到文件行數
????fr?=?open(filename)
????arrayOLines?=?fr.readlines()
????numberOfLines?=?len(arrayOLines)
????#創建返回的numPy矩陣
????returnMat?=?zeros((numberOfLines?3))
????classLabelVector?=?[]
????index?=0
????#解析文件數據到列表
????for?line?in?arrayOLines:
????????line?=?line.strip()
????????listFormLine?=?line.split(‘\t‘)
????????returnMat[index:]?=?listFormLine[0:3]
????????classLabelVector.append(int(listFormLine[-1]))
????????index?+=?1
????return?returnMat?classLabelVector
#歸一化特征值
def?autoNorm(dataSet):
????minVals?=?dataSet.min(0);
????maxVals?=?dataSet.max(0);
????ranges?=?maxVals?-?minVals;
????normDataSet?=?zeros(shape(dataSet))
????m?=?dataSet.shape[0]
????normDataSet?=?dataSet?-?tile(minVals?(m1))
????normDataSet?=?normDataSet/tile(ranges(m1))
????return?normDataSet?ranges?minVals
#測試代碼
def?datingClassTest():
????hoRatio?=?0.10????#測試數據占的百分比
????datingDataMat?datingLabels?=?file2matrix(‘datingTestSet2.txt‘)
????normMat?ranges?minVals?=?autoNorm(datingDataMat)
????m?=?normMat.shape[0]
????numTestVecs?=?int(m*hoRatio)
????errorCount?=?0.0
????for?i?in?range(numTestVecs):
????????classifierResult?=?classify0(normMat[i:]?normMat[numTestVecs:m:]datingLabels[numTestVecs:m]3)
????????print?‘the?classifier?came?back?with:?%d?the?real?answer?is:?%d‘?%(classifierResult?datingLabels[i])
????????if(classifierResult?!=?datingLabels[i]):?errorCount?+=?1.0
????print?“the?total?error?rate?is:?%f?“?%?(errorCount/float(numTestVecs))
????
#輸入某人的信息,便得出對對方喜歡程度的預測值
def?classifyPerson():
????resultList?=?[‘not?at?all‘?‘in?small?doses‘?‘in?large?doses‘]
????percentTats?=?float(raw_input(“percentage?of?time?spent?playing?video?games?“))
????ffMiles?=?float(raw_input(“frequent?flier?miles?earned?per?year?“))
????iceCream?=?float(raw_input(“liters?of?ice?cream?consumed?per?year?“))
????datingDataMat?datingLabels?=?file2matrix(‘datingTestSet2.txt‘)
????normMat?ranges?minVals?=?autoNorm(datingDataMat)
????inArr?=?array([ffMiles
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????文件???????4930??2014-07-18?16:26??kNN.py
?????文件???????1088??2010-10-07?21:35??testDigits\0_0.txt
?????文件???????1088??2010-10-07?21:35??testDigits\0_1.txt
?????文件???????1088??2010-10-07?21:35??testDigits\0_10.txt
?????文件???????1088??2010-10-07?21:35??testDigits\0_11.txt
?????文件???????1088??2010-10-07?21:35??testDigits\0_12.txt
?????文件???????1088??2010-10-07?21:35??testDigits\0_13.txt
?????文件???????1088??2010-10-07?21:35??testDigits\0_14.txt
?????文件???????1088??2010-10-07?21:35??testDigits\0_15.txt
?????文件???????1088??2010-10-07?21:35??testDigits\0_16.txt
?????文件???????1088??2010-10-07?21:35??testDigits\0_17.txt
?????文件???????1088??2010-10-07?21:35??testDigits\0_18.txt
?????文件???????1088??2010-10-07?21:35??testDigits\0_19.txt
?????文件???????1088??2010-10-07?21:35??testDigits\0_2.txt
?????文件???????1088??2010-10-07?21:35??testDigits\0_20.txt
?????文件???????1088??2010-10-07?21:35??testDigits\0_21.txt
?????文件???????1088??2010-10-07?21:35??testDigits\0_22.txt
?????文件???????1088??2010-10-07?21:35??testDigits\0_23.txt
?????文件???????1088??2010-10-07?21:35??testDigits\0_24.txt
?????文件???????1088??2010-10-07?21:35??testDigits\0_25.txt
?????文件???????1088??2010-10-07?21:35??testDigits\0_26.txt
?????文件???????1088??2010-10-07?21:35??testDigits\0_27.txt
?????文件???????1088??2010-10-07?21:35??testDigits\0_28.txt
?????文件???????1088??2010-10-07?21:35??testDigits\0_29.txt
?????文件???????1088??2010-10-07?21:35??testDigits\0_3.txt
?????文件???????1088??2010-10-07?21:35??testDigits\0_30.txt
?????文件???????1088??2010-10-07?21:35??testDigits\0_31.txt
?????文件???????1088??2010-10-07?21:35??testDigits\0_32.txt
?????文件???????1088??2010-10-07?21:35??testDigits\0_33.txt
?????文件???????1088??2010-10-07?21:35??testDigits\0_34.txt
............此處省略2857個文件信息
評論
共有 條評論