資源簡介
親自已測可以直接使用安裝的LightGBM包,希望幫助不想安裝vs的小伙伴們安裝python包/
代碼片段和文件信息
#?coding:?utf-8
#?pylint:?disable?=?invalid-name?C0111
import?lightgbm?as?lgb
import?pandas?as?pd
import?numpy?as?np
#?load?or?create?your?dataset
print(‘Load?data...‘)
df_train?=?pd.read_csv(‘../binary_classification/binary.train‘?header=None?sep=‘\t‘)
df_test?=?pd.read_csv(‘../binary_classification/binary.test‘?header=None?sep=‘\t‘)
W_train?=?pd.read_csv(‘../binary_classification/binary.train.weight‘?header=None)[0]
W_test?=?pd.read_csv(‘../binary_classification/binary.test.weight‘?header=None)[0]
y_train?=?df_train[0].values
y_test?=?df_test[0].values
X_train?=?df_train.drop(0?axis=1).values
X_test?=?df_test.drop(0?axis=1).values
num_train?num_feature?=?X_train.shape
#?create?dataset?for?lightgbm
#?if?you?want?to?re-use?data?remember?to?set?free_raw_data=False
lgb_train?=?lgb.Dataset(X_train?y_train
????????????????????????weight=W_train?free_raw_data=False)
lgb_eval?=?lgb.Dataset(X_test?y_test?reference=lgb_train
???????????????????????weight=W_test?free_raw_data=False)
#?specify?your?configurations?as?a?dict
params?=?{
????‘boosting_type‘:?‘gbdt‘
????‘objective‘:?‘binary‘
????‘metric‘:?‘binary_logloss‘
????‘num_leaves‘:?31
????‘learning_rate‘:?0.05
????‘feature_fraction‘:?0.9
????‘bagging_fraction‘:?0.8
????‘bagging_freq‘:?5
????‘verbose‘:?0
}
#?generate?a?feature?name
feature_name?=?[‘feature_‘?+?str(col)?for?col?in?range(num_feature)]
print(‘Start?training...‘)
#?feature_name?and?categorical_feature
gbm?=?lgb.train(params
????????????????lgb_train
????????????????num_boost_round=10
????????????????valid_sets=lgb_train??#?eval?training?data
????????????????feature_name=feature_name
????????????????categorical_feature=[21])
#?check?feature?name
print(‘Finish?first?10?rounds...‘)
print(‘7th?feature?name?is:‘?repr(lgb_train.feature_name[6]))
#?save?model?to?file
gbm.save_model(‘model.txt‘)
#?continue?training
#?init_model?accepts:
#?1.?model?file?name
#?2.?Booster()
gbm?=?lgb.train(params
????????????????lgb_train
????????????????num_boost_round=10
????????????????init_model=‘model.txt‘
????????????????valid_sets=lgb_eval)
print(‘Finish?10?-?20?rounds?with?model?file...‘)
#?decay?learning?rates
#?learning_rates?accepts:
#?1.?list/tuple?with?length?=?num_boost_round
#?2.?function(curr_iter)
gbm?=?lgb.train(params
????????????????lgb_train
????????????????num_boost_round=10
????????????????init_model=gbm
????????????????learning_rates=lambda?iter:?0.05?*?(0.99?**?iter)
????????????????valid_sets=lgb_eval)
print(‘Finish?20?-?30?rounds?with?decay?learning?rates...‘)
#?change?other?parameters?during?training
gbm?=?lgb.train(params
????????????????lgb_train
????????????????num_boost_round=10
????????????????init_model=gbm
????????????????valid_sets=lgb_eval
????????????????callbacks=[lgb.reset_parameter(bagging_fraction=[0.7]?*?5?+?[0.6]?*?5)])
print(‘Finish?30?-?40?rounds?with?changing?bagging_frac
評論
共有 條評論