資源簡介
1、掌握數據預處理的方法,對數據進行預處理;
2、掌握基本K-MEANS算法的使用;

代碼片段和文件信息
//?K-MEANS.cpp?:?定義控制臺應用程序的入口點。
//
#include?“stdafx.h“
#include???
#include???
#include???
#include???
#include???
#include???
#define?k?5//簇的數目??
using?namespace?std;
//存放元組的屬性信息??
typedef?vector?Tuple;//存儲每條數據記錄??
int?dataNum;//數據集中數據記錄數目??
int?dimNum;//每條記錄的維數??
ofstream?fileout(“out.txt“);
//計算兩個元組間的歐幾里距離??
double?getDistXY(const?Tuple&?t1?const?Tuple&?t2)
{
double?sum?=?0;
for?(int?i?=?1;?i?<=?dimNum;?++i)
{
sum?+=?(t1[i]?-?t2[i])?*?(t1[i]?-?t2[i]);
}
return?sqrt(sum);
}
//根據質心,決定當前元組屬于哪個簇??
int?clusterOfTuple(Tuple?means[]?const?Tuple&?tuple)
{
double?dist?=?getDistXY(means[0]?tuple);
double?tmp;
int?label?=?0;//標示屬于哪一個簇??
for?(int?i?=?1;?i tmp?=?getDistXY(means[i]?tuple);
if?(tmp }
return?label;
}
//獲得給定簇集的平方誤差??
double?getVar(vector?clusters[]?Tuple?means[])
{
double?var?=?0;
for?(int?i?=?0;?i? {
vector?t?=?clusters[i];
for?(int?j?=?0;?j {
var?+=?getDistXY(t[j]?means[i]);
}
}
//cout<<“sum:“< return?var;
}
//獲得當前簇的均值(質心)??
Tuple?getMeans(const?vector&?cluster)
{
int?num?=?cluster.size();
Tuple?t(dimNum?+?1?0);
for?(int?i?=?0;?i? {
for?(int?j?=?1;?j?<=?dimNum;?++j)
{
t[j]?+=?cluster[i][j];
}
}
for?(int?j?=?1;?j?<=?dimNum;?++j)
t[j]?/=?num;
return?t;
//cout<<“sum:“< }
void?print(const?vector?clusters[])
{
for?(int?lable?=?0;?lable {
cout?<“第“?< fileout?<“第“?< vector?t?=?clusters[lable];
for?(int?i?=?0;?i {
cout?< fileout?< for?(int?j?=?0;?j?<=?dimNum;?++j)
{
cout?< fileout?< }
cout?<“)\n“;
fileout?<“)\n“;
}
}
}
void?KMeans(vector&?tuples)
{
vector?clusters[k];//k個簇??
Tuple?means[k];//k個中心點??
int?i?=?0;
//一開始隨機選取k條記錄的值作為k個簇的質心(均值)??
//srand((unsigned?int)time(NULL));
for?(i?=?0;?i {
int?iToSelect?=?rand()?%?tuples.size();
if?(means[iToSelect].size()?==?0)
{
for?(int?j?=?0;?j?<=?dimNum;?++j)
{
means[i].push_back(tuples[iToSelect][j]);
}
++i;
}
}
int?lable?=?0;
//根據默認的質心給簇賦值??
for?(i?=?0;?i?!=?tuples.size();?++i){
lable?=?clusterOfTuple(means?tuples[i]);
clusters[lable].push_back(tuples[i]);
}
double?oldVar?=?-1;
double?newVar?=?getVar(clusters?means);
cout?<“初始的的整體誤差平方和為:“?< fileout?<“初始的的整體誤差平方和為:“?< int?t?=?0;
while?(abs(newVar?-?oldVar)?>=?0.5)?//當新舊函數值相差不到1即準則函數值不發生明顯變化時,算法終止??
{
cout?<“第?“?<++t?<“?次迭代開始:“?< fileout?<“第?“?<++t?<“?次迭代開始:“?< for?(i?=?0;?i? {
means[i]?=?getMeans(cluste
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????文件????3584142??2017-05-23?10:21??K-MEANS\K-MEANS\idf.txt
?????文件?????202230??2013-05-22?13:30??K-MEANS\K-MEANS\input.txt
?????文件??????12300??2017-05-16?14:23??K-MEANS\K-MEANS\iris.txt
?????文件???????4691??2017-05-23?10:26??K-MEANS\K-MEANS\K-MEANS.cpp
?????文件???????4532??2017-05-16?14:11??K-MEANS\K-MEANS\K-MEANS.vcxproj
?????文件???????1314??2017-05-16?14:11??K-MEANS\K-MEANS\K-MEANS.vcxproj.filters
?????文件???????1414??2017-05-16?15:07??K-MEANS\K-MEANS\kmeans.txt
?????文件????1416464??2017-05-23?10:28??K-MEANS\K-MEANS\out.txt
?????文件???????1510??2017-05-16?14:11??K-MEANS\K-MEANS\ReadMe.txt
?????文件????????213??2017-05-16?14:11??K-MEANS\K-MEANS\stdafx.cpp
?????文件????????234??2017-05-16?14:11??K-MEANS\K-MEANS\stdafx.h
?????文件????????236??2017-05-16?14:11??K-MEANS\K-MEANS\targetver.h
?????文件?????858543??2017-05-16?14:19??K-MEANS\K-MEANS\train.data
?????文件?????858545??2009-04-24?16:10??K-MEANS\K-MEANS\train.data.bak
?????文件????????967??2017-05-16?14:11??K-MEANS\K-MEANS.sln
?????目錄??????????0??2017-08-03?22:50??K-MEANS\K-MEANS
?????目錄??????????0??2017-08-03?22:50??K-MEANS
-----------?---------??----------?-----??----
??????????????6947335????????????????????17
- 上一篇:單播通信實驗
- 下一篇:一鍵生成帶有所有文件名的Excel表格
評論
共有 條評論