資源簡介
包含了數據挖掘十大算法中的:Apriori、C4.5、CART、EM、K-means、KNN、PageRank。語言為Python

代碼片段和文件信息
‘‘‘
@version?0.1
@date?2016-03-21
@reference?
‘‘‘
from?numpy?import?*
def?loadDataSet():
????return?[[1?3?4]?[2?3?5]?[1?2?3?5]?[2?5]]
def?createC1(dataSet):
????C1?=?[]
????for?transaction?in?dataSet:
????????for?item?in?transaction:
????????????if?not?[item]?in?C1:
????????????????C1.append([item])
????C1.sort()
????return?map(frozenset?C1)#use?frozen?set?so?we
????????????????????????????#can?use?it?as?a?key?in?a?dict
def?scanD(D?Ck?minSupport):
????ssCnt?=?{}
????for?tid?in?D:
????????for?can?in?Ck:
????????????if?can.issubset(tid):
????????????????if?not?ssCnt.has_key(can):?ssCnt[can]=1
????????????????else:?ssCnt[can]?+=?1
????numItems?=?float(len(D))
????retList?=?[]
????supportData?=?{}
????for?key?in?ssCnt:
????????support?=?ssCnt[key]/numItems
????????if?support?>=?minSupport:
????????????retList.insert(0key)
????????supportData[key]?=?support
????return?retList?supportData
def?aprioriGen(Lk?k):?#creates?Ck
????retList?=?[]
????lenLk?=?len(Lk)
????for?i?in?range(lenLk):
????????for?j?in?range(i+1?lenLk):
????????????L1?=?list(Lk[i])[:k-2];?L2?=?list(Lk[j])[:k-2]
????????????L1.sort();?L2.sort()
????????????if?L1==L2:?#if?first?k-2?elements?are?equal
????????????????retList.append(Lk[i]?|?Lk[j])?#set?union
????return?retList
def?apriori(dataSet?minSupport?=?0.5):
????C1?=?createC1(dataSet)
????D?=?map(set?dataSet)
????L1?supportData?=?scanD(D?C1?minSupport)
????L?=?[L1]
????k?=?2
????while?(len(L[k-2])?>?0):
????????Ck?=?aprioriGen(L[k-2]?k)
????????Lk?supK?=?scanD(D?Ck?minSupport)#scan?DB?to?get?Lk
????????supportData.update(supK)
????????L.append(Lk)
????????k?+=?1
????return?L?supportData
if?__name__?==?‘__main__‘:
????dataSet?=?loadDataSet()
????L?suppData?=?apriori(dataSet)
????print?L
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2016-05-01?03:46??Top10_Algorithms_in_DataMining-master\
?????目錄???????????0??2016-05-01?03:46??Top10_Algorithms_in_DataMining-master\.idea\
?????文件?????????284??2016-05-01?03:46??Top10_Algorithms_in_DataMining-master\.idea\Top10_Algorithms_in_DataMining.iml
?????文件?????????159??2016-05-01?03:46??Top10_Algorithms_in_DataMining-master\.idea\encodings.xm
?????文件?????????735??2016-05-01?03:46??Top10_Algorithms_in_DataMining-master\.idea\misc.xm
?????文件?????????312??2016-05-01?03:46??Top10_Algorithms_in_DataMining-master\.idea\modules.xm
?????文件????????1824??2016-05-01?03:46??Top10_Algorithms_in_DataMining-master\.idea\workspace.xm
?????目錄???????????0??2016-05-01?03:46??Top10_Algorithms_in_DataMining-master\Apriori\
?????文件????????1793??2016-05-01?03:46??Top10_Algorithms_in_DataMining-master\Apriori\Apriori.py
?????目錄???????????0??2016-05-01?03:46??Top10_Algorithms_in_DataMining-master\C4.5\
?????文件????????4501??2016-05-01?03:46??Top10_Algorithms_in_DataMining-master\C4.5\C4.5.py
?????目錄???????????0??2016-05-01?03:46??Top10_Algorithms_in_DataMining-master\CART\
?????文件????????4056??2016-05-01?03:46??Top10_Algorithms_in_DataMining-master\CART\Cart.py
?????文件????????3646??2016-05-01?03:46??Top10_Algorithms_in_DataMining-master\CART\testSet
?????目錄???????????0??2016-05-01?03:46??Top10_Algorithms_in_DataMining-master\EM\
?????文件?????????916??2016-05-01?03:46??Top10_Algorithms_in_DataMining-master\EM\em.py
?????目錄???????????0??2016-05-01?03:46??Top10_Algorithms_in_DataMining-master\K-means\
?????文件????????2313??2016-05-01?03:46??Top10_Algorithms_in_DataMining-master\K-means\Kmeans.py
?????文件????????1519??2016-05-01?03:46??Top10_Algorithms_in_DataMining-master\K-means\testSet
?????目錄???????????0??2016-05-01?03:46??Top10_Algorithms_in_DataMining-master\KNN\
?????文件?????????949??2016-05-01?03:46??Top10_Algorithms_in_DataMining-master\KNN\KNN.py
?????目錄???????????0??2016-05-01?03:46??Top10_Algorithms_in_DataMining-master\PageRank\
?????文件????????1276??2016-05-01?03:46??Top10_Algorithms_in_DataMining-master\PageRank\pagerank.py
?????文件?????????325??2016-05-01?03:46??Top10_Algorithms_in_DataMining-master\README.md
- 上一篇:XModem -發送端源代碼Python語言實現
- 下一篇:OpenOPC指南
評論
共有 條評論