資源簡介
異常檢測(高斯分布模型)+訓練、驗證、測試數據

代碼片段和文件信息
import?numpy?as?np
import?random
import?matplotlib.pyplot?as?plt
def?TXTtoNumpy(TXTfilename?lableState=False?Print=False?delim?=?‘\t‘):
????‘‘‘
????:param?TXTfilename:?Path?about?TXT?file
????:param?lableState:?True?for?have?labels?of?data
????:param?print:?to?print?info?about?data
????:param?delim:?to?split?‘\t‘
????:return:
????‘‘‘
????TXTfr?=?open(TXTfilename)
????TXTList?=?TXTfr.readlines()
????stringArr?=?[line.strip().split(delim)?for?line?in?TXTList]
????n_examples?=?len(stringArr)
????if?lableState:
????????n_features?=?len(stringArr[0])-1
????????labels?=?np.zeros(n_examples)
????????labels?=?[int(line[n_features])?for?line?in?stringArr]
????else:
????????n_features?=?len(stringArr[0])
????if?Print:
????????print(“n_examples:?“?n_examples)
????????print(“n_features:?“?n_features)
????floatList?=?np.zeros((n_examples?n_features))
????for?i?in?range(0?n_features):
????????floatList[:i]?=?[float(line[i])?for?line?in?stringArr]
????if?lableState:
????????return?floatList?labels
????else:
????????return?floatList
def?GaussianParamEstimation(npArr?GaussianType?=?‘Normal‘):
????‘‘‘
????:param?npArr:?shape=(n_examples?n_features)
????:param?GaussianType:?‘Normal‘?or?‘Multi‘
????:return:
????‘‘‘
????n_features?=?npArr.shape[1]
????#?mean?=?np.zeros(n_features)
????mean?=?np.average(npArr?axis=0)
????if?GaussianType?==?‘Normal‘:
????????#?std?=?np.zeros(n_features)
????????std?=?np.std(npArr?axis=0)
????????return?mean?std
????elif?GaussianType?==?‘Multi‘:
????????sigma?=?np.cov(npArr?-?mean?rowvar=0)
????????return?mean?sigma
def?NormalGaussion(X?mean?std):
????‘‘‘
????:param?X:?shape=(1?n_features)
????:param?mean:?shape=(1?n_features)
????:param?std:?shape=(1?n_features)
????:return:
????‘‘‘
????n_feature?=?X.shape[1]
????P?=?1;
????for?i?in?range(0n_feature):
????????temp1?=?(?1?/?(np.sqrt(2*np.pi)?*?std[i]))
????????temp2?=?np.exp(?-pow(X[:i]?-?mean[i]?2)?/?(2?*?pow(std[i]2)))
????????P?=?P?*?(temp1?*?temp2)
????return?P
def?MultiGaussion(X?mean?sigma):
????‘‘‘
????:param?X:?shape=(1?n_features)
????:param?mean:??shape=(1?n_features)
????:param?sigma:?shape=(n_features?n_features)
????:return:
????‘‘‘
????temp1?=?(?1?/?(pow(2*np.pi?np.pi/2)?*?np.sqrt(np.linalg.det(sigma))))
????temp2?=?np.dot((X-mean)?np.linalg.inv(sigma))
????temp3?=?np.exp(?(-1/2)?*?np.dot(temp2?(X-mean).T))
????P?=?temp1?*?temp3
????return?P
def?AnomalyDetection(npArr?labels?iterations?lamda_step=0.001?lamda=0.001):
????‘‘‘
????:param?npArr:?shape=(n_examples?n_features)
????:param?labels:??shape=(n_examples?1)
????:param?iterations:
????:param?lamda_step:
????:param?lamda:
????:return:
????‘‘‘
????n_examples?=?npArr.shape[0]
????n_features?=?npArr.shape[1]
????#?將labels的列表類型轉為numpy類型
????labels?=?np.array(labels).reshape(n_examples?1)
????#?找出標記為非0(異常樣本)的索引
????anomalyIndex?=?[]
????for?i?in?range(0?n_examples):
????????if(labels[i:i+1:]?!=?0):
????????????anomalyIndex.append(i)
????#?根據異常樣本索引得
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2019-04-26?09:08??AnomalyDetection\
?????文件????????4527??2019-04-26?08:59??AnomalyDetection\testSet3.txt
?????文件????????7509??2019-04-26?09:08??AnomalyDetection\Anomaly_detection.py
評論
共有 條評論