資源簡介
實驗描述:
對指定數據集進行關聯規則挖掘,選擇適當的挖掘算法,編寫程序實現,提交程序和結果報告。
數據集: retail.txt ,根據數據集中的數據利用合適的挖掘算法得到頻繁項集,并計算置信度,求出滿足置信度的所有的關聯規則
retail.txt中每個數字表示一種商品的ID,一個{}內的表示一次交易
實驗環境和編程語言:
本實驗使用的編程語言為:Java
編程環境為:Intellij idea
實現頻繁項集的挖掘算法為Apriori算法
用于挖掘的樣本個數為:1000個(retail.txt的前1000條數據)
樣本示例:
{ 38,39,47,48}
表示一個顧客購買了ID為38、39、47、48的四種商品。

代碼片段和文件信息
/**
?*?Created?by?李勇志?on?2016/12/6.
?*?2014301500370
?*
?*?本工程包含兩個數據文件
?*?fulldata為老師給的原始數據文件,由于數據量過大程序跑不出來結果,沒有選用進行測試
?*?top1000data是從fulldata中摘取的前1000條數據,本程序運行的結果是基于這前1000條數據進行的頻繁項集挖掘和關聯度分析
?*
?*/
import?java.io.BufferedReader;
import?java.io.File;
import?java.io.FileInputStream;
import?java.io.InputStreamReader;
import?java.util.*;
/**
?*?Apriori算法實現?最大模式挖掘,涉及到支持度,但沒有置信度計算
?*
?*?AssociationRulesMining()函數實現置信度計算和關聯規則挖掘
?*/
public?class?AprioriMyself?{
????public?static??int?times=0;//迭代次數
????private?static??double?MIN_SUPPROT?=?0.02;//最小支持度百分比
????private?static???double?MIN_CONFIDENCE=0.6;//最小置信度
????private?static?boolean?endTag?=?false;//循環狀態,迭代標識
????static?List>?record?=?new?ArrayList>();//數據集
????static??List>?frequentItemset=new?ArrayList<>();//存儲所有的頻繁項集
????static?List?map?=?new?ArrayList();//存放頻繁項集和對應的支持度技術
????public?static?void?main(String?args[]){
????????System.out.println(“請輸入最小支持度(如0.05)和最小置信度(如0.6)“);
????????Scanner?in=new?Scanner(System.in);
????????MIN_SUPPROT=in.nextDouble();
????????MIN_CONFIDENCE=in.nextDouble();
????????/*************讀取數據集**************/
????????record?=?getRecord(“top1000data“);
????????//控制臺輸出記錄
????????System.out.println(“讀取數據集record成功===================================“);
????????ShowData(record);
????????Apriori();//調用Apriori算法獲得頻繁項集
????????System.out.println(“頻繁模式挖掘完畢。\n\n\n\n\n進行關聯度挖掘,最小支持度百分比為:“+MIN_SUPPROT+“??最小置信度為:“+MIN_CONFIDENCE);
?????????AssociationRulesMining();//挖掘關聯規則
????}
????/**********************************************
?????*?****************讀取數據********************/
????public?static?List>?getRecord(String?url)?{
????????List>?record?=?new?ArrayList>();
????????try?{
????????????String?encoding?=?“UTF-8“;?//?字符編碼(可解決中文亂碼問題?)
????????????File?file?=?new?File(url);
????????????if?(file.isFile()?&&?file.exists())?{
????????????????InputStreamReader?read?=?new?InputStreamReader(
????????????????????????new?FileInputStream(file)?encoding);
????????????????BufferedReader?bufferedReader?=?new?BufferedReader(read);
????????????????String?lineTXT?=?null;
????????????????while?((lineTXT?=?bufferedReader.readLine())?!=?null)?{//讀一行文件
????????????????????String[]?lineString?=?lineTXT.split(““);
????????????????????List?lineList?=?new?ArrayList();
????????????????????for?(int?i?=?0;?i?????????????????????????lineList.add(lineString[i]);
????????????????????}
????????????????????record.add(lineList);
????????????????}
????????????????read.close();
????????????}?else?{
????????????????System.out.println(“找不到指定的文件!“);
????????????}
????????}?catch?(Exception?e)?{
????????????System.out.println(“讀取文件內容操作出錯“);
????????????e.printStackTrace();
????????}
????????return?record;
????}
????public?static?void?Apriori()???????????/**實現apriori算法**/
????{
????????//***
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2016-12-10?12:11??Apriori\
?????目錄???????????0??2016-12-10?12:11??Apriori\.idea\
?????文件?????????686??2016-12-06?15:17??Apriori\.idea\compiler.xm
?????目錄???????????0??2016-12-10?12:11??Apriori\.idea\copyright\
?????文件??????????76??2016-12-06?15:17??Apriori\.idea\copyright\profiles_settings.xm
?????文件????????3410??2016-12-06?15:30??Apriori\.idea\misc.xm
?????文件?????????254??2016-12-06?15:17??Apriori\.idea\modules.xm
?????文件???????42581??2016-12-07?13:22??Apriori\.idea\workspace.xm
?????文件?????????423??2016-12-06?15:17??Apriori\Apriori.iml
?????文件?????4167490??2016-12-06?16:08??Apriori\fulldata
?????目錄???????????0??2016-12-07?01:13??Apriori\out\
?????目錄???????????0??2016-12-07?01:13??Apriori\out\production\
?????目錄???????????0??2016-12-10?12:11??Apriori\out\production\Apriori\
?????文件???????10692??2016-12-07?11:23??Apriori\out\production\Apriori\AprioriMyself.class
?????文件????????1099??2016-12-07?11:23??Apriori\out\production\Apriori\Mymap.class
?????文件?????????246??2016-12-06?15:40??Apriori\out\production\Apriori\SetNode.class
?????文件????????2220??2016-12-06?15:44??Apriori\out\production\Apriori\Test.class
?????目錄???????????0??2016-12-10?12:11??Apriori\src\
?????文件???????16289??2016-12-07?12:03??Apriori\src\AprioriMyself.java
?????文件???????37166??2016-12-06?15:42??Apriori\top1000data
評論
共有 條評論