資源簡介
在開源的基礎上實現的邏輯回歸,純python實現,采用的是批量梯度下降,用戶可以自己換其他的梯度下降方式
代碼片段和文件信息
#coding=utf-8
import?math
import?numpy?as?np
import?pandas?as?pd
from?sklearn?import?preprocessing
from?sklearn?import?metrics
#數據的標準化/歸一化處理
def??Normalization(data):
????min_max_scaler?=?preprocessing.MinMaxScaler(feature_range=(-1?1))
????return?min_max_scaler.fit_transform(data)
#數據加載及處理
def?DealData(DataPath):
????df=pd.read_csv(DataPathheader=0)
????df.columns=[‘f1‘‘f2‘‘label‘]
????X?=?df[[“f1“?“f2“]]
????X?=?np.array(X)
????X?=?Normalization(X)
????Y=df[‘label‘].map(lambda?x:?float(x.rstrip(‘;‘)))
????Y=np.array(Y)
????return?XY
#Sigmoid函數
def?Sigmoid(z):
????return??float(1.0/(float(1+math.exp(-1.0*z))))
#計算H函數值
def?Hypothesis(thetaxi):
????z?=?0.0
????for?i?in?xrange(len(theta)):
????????z?+=?xi[i]*theta[i]
????return?Sigmoid(z)
#計算每一個theta向量的每一個theta的梯度,使用的是批量梯度下降的方法
def?Cost_Function_Derivative(XYthetajalpha):
????sum_errors=0.0
????m=len(Y)
????for?i?in?xrange(m):
????????xi=X[i]
????????xij=xi[j]
????????hi=Hypothesis(thetaxi)
????????sum_errors?+=?((hi-Y[i])*xij)
????constant?=?float(alpha)/float(m)
????res?=?constant*sum_errors
????return?res
#梯度下降,更新theta的每一個theta的值
def?Gradient_Descent(XYthetaalpha):
????new_theta=[]
????for?j?in?xrange(len(theta)):
????????CFDerivative?=?Cost_Function_Derivative(XYthetajalpha)??#計算第i個權重的梯度
????????new_theta_value=theta[j]-CFDerivative
????????new_theta.append(new_theta_value)
????return?new_theta
#損失函數,用于計算模型的loss
def?Cost_Function(XYtheta):
????sum_errors=0.0
????m=len(Y)
????for?i?in?xrange(m):
????????xi=X[i]
????????hi=Hypothesis(thetaxi)
????????if?Y[i]?==?1:
????????????error=Y[i]*math.log(hi)
????????elif?Y[i]?==?0:
????????????error=(1-Y[i])*math.log(1-hi)
????????sum_errors?+=?error
????J=(-1.0/m)?*?sum_errors
????return?J
#邏輯回歸的主體
def?Logittic_Regression(XYalphathetanum_iters):
????for?x?in?xrange(num_iters):?#多次迭代
????????new_theta=Gradient_Descent(XYthetaalpha)
????????theta=new_theta
????????#每100個樣本計算一下損失,這一步可有可無,可以在最后計算一下即可
????????if?x?%?100?==?0:
????????????res=Cost_Function(XYtheta)
????????????print?‘cost?is?‘res
????return?theta
#模型保存
def?Save_Model(modemodelPath):
????save=open(modelPath‘w‘)
????for?i?in?mode:
????????save.write(str(i))
????????save.write(‘\n‘)
????return
#根據已經訓練好的模型,進行預測,返回的是每一個樣本屬于1的概率組成的list
def?Logittic_Regression_Predict(modelX):
????predictOut?=?[]
????for?i?in?xrange(len(X)):
????????probability?=?Hypothesis(modelX[i])
????????predictOut.append(probability)
????return?predictOut
#根據預測值與真實值計算AUC
def?Logittic_Regression_Auc(PredictTrueValue):
????fpr?tpr?thresholds?=?metrics.roc_curve(TrueValue?Predict?pos_label=1)
????return?metrics.auc(fpr?tpr)
if?__name__?==?‘__main__‘:
????train_Xtrain_Y=DealData(‘train.csv‘)????#處理數據,得到符合訓練格式的數據
????test_X?test_Y=DealData(‘test.csv‘)?????#處理數據,得到符合測試格式的數據
????#?初始化參數
????initial_theta?=?[0.0?for?i?in?range(train_X.ndim)]??#theta初始化
????alpha?=?0.1?#學習率
????iterations?=?10???#迭代次數
????model_lr=Logittic_Regression(train_Xtrain_Yalphainitial_th
- 上一篇:基于循環神經網絡的日內預測
- 下一篇:HTMLTestRunner漢化+優化
評論
共有 條評論