資源簡介
已經封裝好因子分解算法,包含簡單的模型調參,主要用于二分類、回歸問題
代碼片段和文件信息
#?-*-?coding:utf-8?-*-
from?sklearn.ensemble?import?GradientBoostingClassifier
‘‘‘
Author:?pany
Create:?2017-12-14
Update:?2017-12-18
Description:?factorization?machine?library
‘‘‘
from?pyfm?import?pylibfm
from?sklearn.feature_extraction?import?DictVectorizer
import?numpy?as?np
import?pandas?as?pd
from?sklearn.cross_validation?import?train_test_split
from?sklearn.metrics?import?roc_auc_scoremean_squared_errorlog_lossaccuracy_score
#?train?=?[
#?{“user“:?“1“?“item“:?“5“?“age“:?19}
#?{“user“:?“2“?“item“:?“43“?“age“:?33}
#?{“user“:?“3“?“item“:?“20“?“age“:?55}
#?{“user“:?“4“?“item“:?“10“?“age“:?20}
#?]
#?print?train
#?v?=?DictVectorizer()
#?X?=?v.fit_transform(train)
#
#?print(X.toarray())
#
#?y?=?np.array([1001])
#
#?fm?=?pylibfm.FM()
#?fm.fit(Xy)
#?fm.predict(v.transform({“user“:?“1“?“item“:?“10“?“age“:?24}))
#?print?fm.predict(v.transform({“user“:?“1“?“item“:?“10“?“age“:?24}))
class?FM:
????def?__init__(self?X_train?Y_train?task):?#?data-type?is?dataframe
????????self.task?=?task??#?task:?classificationregression
????????self.X_train?=?X_train
????????self.Y_train?=?Y_train
????????self.feat_num?=?len(X_train.columns)
????def?__str__(self):
????????return?‘task:?%s?feat_num:?%s‘?%(self.task?self.feat_num)
????def?get_model(self):
????????fm?=?pylibfm.FM()
????????num_factors?=?0
????????if?self.feat_num?100:
????????????num_factors?=?self.feat_num/5
????????elif?self.feat_num?in?range(1011001):
????????????num_factors?=?self.feat_num/10
????????else:
????????????num_factors?=?100
????????if?self.task?==?‘classification‘:
???
評論
共有 條評論