資源簡介
用python實現的Apriori算法和測試用的數據,可以直接將壓縮包里面的apriori.py文件放到python安裝文件夾lib中,進行調用。

代碼片段和文件信息
#?-*-?coding:?utf-8?-*-
from?__future__?import?print_function
import?pandas?as?pd
#自定義連接函數,用于實現L_{k-1}到C_K的連接
def?connect_string(xms):
????x?=?list(map(lambda?i:?sorted(i.split(ms))x))
????l?=?len(x[0])
????r?=?[]
????for?i?in?range(len(x)):
????????for?j?in?range(ilen(x)):
????????????if?x[i][:l-1]?==?x[j][:l-1]?and?x[i][l-1]?!=x[j][l-1]:
????????????????r.append(x[i][:l-1]?+?sorted([x[j][l-1]x[i][l-1]]))
????return?r
#尋找關聯規則的函數
def?find_rule(dsupportconfidencems=u‘--‘):
????result?=?pd.Dataframe(index?=?[‘support‘‘confidence‘])#定義輸出結果
????support_series?=?1.0*d.sum()/len(d)#支持度序列
????column?=?list(support_series[support_series?>?support].index)#初步根據支持度篩選
????k?=?0
????while?len(column)>1:
????????k?=?k+1
????????print(u‘\n正在進行第%s次搜索。。。‘%k)
????????column=connect_string(columnms)
????????print(u‘數目:%s...‘?%?len(column))
????????sf?=?lambda?i?:?d[i].prod(axis=1numeric_only=True)#新一批支持度的計算函數
????????#創建連接數據,這一步耗時、耗內存最嚴重。當數據集較大時,可以考慮并行運算優化
????????d_2?=?pd.Dataframe(list(map(sfcolumn))?index=[ms.join(i)?for?i?in?column]).T
????????support_series_2?=?1.0*d_2[[ms.join(i)?for?i?in?column]].sum()/len(d)#計算連接后的支持度
????????column?=?list(support_series_2[support_series_2?>?support].index)#新一輪支持度篩選
????????support_series?=?support_series.append(support_series_2)
????????column2=[]
????????for?i?in?column:#遍歷可能的推理,如{ABC}究竟是A+B-->C還是B+C-->A還是A+C-->B
????????????i?=?i.split(ms)
????????????for?j?in?range(len(i)):
????????????????column2.append(i[:j]+i[j+1:]?+i[j:j+1])
????????confidence_series?=?pd.Series(index=[ms.join(i)?for?i?in?column2])#定義置信度序列
????????for?i?in?column2:#計算自信度序列
????????????confidence_series[ms.join(i)]=support_series[ms.join(sorted(i))]/support_series[ms.join(i[:len(i)?-?1])]
????????for?i?in?confidence_series[confidence_series>confidence].index:#置信度篩選
????????????result[i]?=?0.0
????????????result[i][‘confidence‘]?=?confidence_series[i]
????????????result[i][‘support‘]?=?support_series[ms.join(sorted(i.split(ms)))]
????result?=?result.T.sort_values([‘confidence‘‘support‘]ascending=False)#結果整理輸出
????print(u‘\n結果為:‘)
????print(result)
????return?result???????????????????????????????????????????????????????????????????????????????????????
???????????????????????????????????????????????????????????????????????????????????????????
???????????????????????????????????????????????????????????????????????????????????????????
???????????????????????????????????????????????????????????????????????????????????????????
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????文件???????2917??2018-12-08?11:13??Apriori算法+數據\apriori.py
?????文件???????8837??2018-12-08?10:29??Apriori算法+數據\lesson_buy.xlsx
?????目錄??????????0??2018-12-08?11:17??Apriori算法+數據
-----------?---------??----------?-----??----
????????????????11754????????????????????3
- 上一篇:opc python 讀取代碼
- 下一篇:教務管理系統
評論
共有 條評論