91av视频/亚洲h视频/操亚洲美女/外国一级黄色毛片 - 国产三级三级三级三级

  • 大小: 13KB
    文件類型: .py
    金幣: 1
    下載: 1 次
    發布日期: 2022-09-07
  • 語言: Python
  • 標簽: python??機器學習??

資源簡介

機器學習算法XGboost、LightGBM、Catboost的代碼架構,滿足基本的數據分析,回歸、二分類、多分類。

資源截圖

代碼片段和文件信息

import?pandas?as?pd
import?numpy?as?np
import?scipy?as?sp

#文件讀取f表示文件路徑文件名
def?red_csv_file(flogging?=?False):
????print(“=================讀?取?文?件===================“)
????data?=?pd.read_csv(f)
????if?loggong:
????????print(data.head(5))
????????print(data.columns.values)
????????print(data.describe())
????????print(data.info())
????return?data




#通用的LogisticRegression框架
import?pandas?as?pd
import?numpy?as?np
from?scipy?import?sparse
from?sklearn.preprocessing?import?OneHotEncoder
from?sklearn.linear_model?import?LogisticRegression
from?sklearn.preprocessing?import?StandarScaler

#1.讀入數據
df_train?=?pd.Dataframe()
df_test?=?pd.Dataframe()
y_train?=?df_train[‘label‘].vslues

#2.處理數據
ss?=?StandardScaler()

#3.特征處理/重編碼
#3.1?對分類的變量
enc?=?OneHotEncoder()
feats?=?[“creativeID““adID““campaignID“]
for?i?feat?in?enumerate(feats):
????x_train?=?enc.fit_transform(df_train[feat].values.reshape(-11))
????x_test?=?enc.fit_transform(df_test[feat].values.reshape(-11))
????if?i?==?0:
????????X_train?X_test?=?x_train?x_test
????else:
????????X_train?X_test?=?sparse.hstack((X_train?x_train))?sparse.hstack((X_test?x_test))

#3.2?對數值變量
#對于StandarScalar必須是而分類變量,否則reshape(-1?len(feats))?is?required
feats?=?[“price“?“age“]
x_train?=?ss.fit_transform(df_train[feats].values)
x_test?=?ss.fit_transform(df_test[feats].values)
ss.fit_transform(df_test[feats].values)
X_train?X_test?=?sparse.hstack((X_train?x_train))?sparse.hstack((X_test?x_test))

#模型訓練
lr?=?LogisticRegression()
lr.fit(X_train?y_train)
proba_test?=?lr.predict_predict_proba(X_test)[:1]




#LightGBM二分類
import?lightgbm?as?lgb
import?pandas?as?pd
import?numpy?as?np
import?pickle
from?sklearn.metrics?import?roc_auc_score
from?sklearn.model_selection?import?train_test_split

print(“Loading?Data?...?“)

#導入數據
train_x?train_y?test_x?=?load_data()

#?用sklearn.cross_validation進行訓練數據集劃分,這里訓練集和交叉驗證集比例為7:3,可以自己根據需要設置
X?val_X?y?val_y?=?train_test_split(
????train_x
????train_y
????test_size=0.05
????random_state=1
????stratify=train_y?##?這里保證分割后y的比例分布與原數據一致


X_train?=?X
y_train?=?y
X_test?=?val_X
y_test?=?val_y

#創建LightGBM的數據集
lgb_train?=?lgb.Dataset(X_train?y_train)
lgb_eval?=?lgb.Dataset(X_test?y_test?reference=lgb_train)
#?specify?your?configurations?as?a?dict
params?=?{
????‘boosting_type‘:?‘gbdt‘
????‘objective‘:?‘binary‘
????‘metric‘:?{‘binary_logloss‘?‘auc‘}
????‘num_leaves‘:?5
????‘max_depth‘:?6
????‘min_data_in_leaf‘:?450
????‘learning_rate‘:?0.1
????‘feature_fraction‘:?0.9
????‘bagging_fraction‘:?0.95
????‘bagging_freq‘:?5
????‘lambda_l1‘:?1??
????‘lambda_l2‘:?0.001??#?越小l2正則程度越高
????‘min_gain_to_split‘:?0.2
????‘verbose‘:?5
????‘is_unbalance‘:?True
}

#?train
print(‘Start?training...‘)
gbm?=?lgb.train(params
????????????????lgb_train
????????????????num_boost_round=10000
????????????????valid_sets=lgb_eval
????????????????early_stopping_round

評論

共有 條評論