資源簡介
是基于python環境的一個lda算法的推薦模型,里面包括配置文件,文件設置代碼,數據集,python腳本,內容很全面。

代碼片段和文件信息
#-*-?coding:utf-8?-*-
import?logging
import?logging.config
import?ConfigParser
import?numpy?as?np
import?random
import?codecs
import?os
from?collections?import?OrderedDict
#獲取當前路徑
path?=?os.getcwd()
#導入日志配置文件
logging.config.fileConfig(“logging.conf“)
#創建日志對象
logger?=?logging.getLogger()
#?loggerInfo?=?logging.getLogger(“TimeInfoLogger“)
#?Consolelogger?=?logging.getLogger(“ConsoleLogger“)
#導入配置文件
conf?=?ConfigParser.ConfigParser()
conf.read(“setting.conf“)?
#文件路徑
trainfile?=?os.path.join(pathos.path.normpath(conf.get(“filepath“?“trainfile“)))
wordidmapfile?=?os.path.join(pathos.path.normpath(conf.get(“filepath““wordidmapfile“)))
thetafile?=?os.path.join(pathos.path.normpath(conf.get(“filepath““thetafile“)))
phifile?=?os.path.join(pathos.path.normpath(conf.get(“filepath““phifile“)))
paramfile?=?os.path.join(pathos.path.normpath(conf.get(“filepath““paramfile“)))
topNfile?=?os.path.join(pathos.path.normpath(conf.get(“filepath““topNfile“)))
tassginfile?=?os.path.join(pathos.path.normpath(conf.get(“filepath““tassginfile“)))
#模型初始參數
K?=?int(conf.get(“model_args““K“))
alpha?=?float(conf.get(“model_args““alpha“))
beta?=?float(conf.get(“model_args““beta“))
iter_times?=?int(conf.get(“model_args““iter_times“))
top_words_num?=?int(conf.get(“model_args““top_words_num“))
class?Document(object):
????def?__init__(self):
????????self.words?=?[]
????????self.length?=?0
class?DataPreProcessing(object):
????def?__init__(self):
????????self.docs_count?=?0
????????self.words_count?=?0
????????self.docs?=?[]
????????self.word2id?=?OrderedDict()
????def?cachewordidmap(self):
????????with?codecs.open(wordidmapfile?‘w‘‘utf-8‘)?as?f:
????????????for?wordid?in?self.word2id.items():
????????????????f.write(word?+“\t“+str(id)+“\n“)
class?LDAModel(object):
????
????def?__init__(selfdpre):
????????self.dpre?=?dpre?#獲取預處理參數
????????#
????????#模型參數
????????#聚類個數K,迭代次數iter_times每個類特征詞個數top_words_num超參數α(alpha)?β(beta)
????????#
????????self.K?=?K
????????self.beta?=?beta
????????self.alpha?=?alpha
????????self.iter_times?=?iter_times
????????self.top_words_num?=?top_words_num?
????????#
????????#文件變量
????????#分好詞的文件trainfile
????????#詞對應id文件wordidmapfile
????????#文章-主題分布文件thetafile
????????#詞-主題分布文件phifile
????????#每個主題topN詞文件topNfile
????????#最后分派結果文件tassginfile
????????#模型訓練選擇的參數文件paramfile
????????#
????????self.wordidmapfile?=?wordidmapfile
????????self.trainfile?=?trainfile
????????self.thetafile?=?thetafile
????????self.phifile?=?phifile
????????self.topNfile?=?topNfile
????????self.tassginfile?=?tassginfile
????????self.paramfile?=?paramfile
????????#?p概率向量?double類型,存儲采樣的臨時變量
????????#?nw詞word在主題topic上的分布
????????#?nwsum每各topic的詞的總數
????????#?nd每個doc中各個topic的詞的總數
????????#?ndsum每各doc中詞的總數
????????self.p?=?np.zeros(self.K)????????
????????self.nw?=?np.zeros((self.dpre.words_countself.K)dtype=“int“)???????
????????self.nwsum?=?np.zeros(self.Kdtype=“int“)????
????????self.nd?=?np.zeros((self.dpre.docs_countself.K)dtype=“int“)???????
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2019-01-31?18:53??python_lda_learning\
?????文件????????2274??2016-08-01?14:50??python_lda_learning\README.md
?????目錄???????????0??2019-01-29?12:36??python_lda_learning\data\
?????目錄???????????0??2019-01-29?12:36??python_lda_learning\data\tmp\
?????文件?????????104??2016-08-01?14:50??python_lda_learning\data\tmp\model_parameter.dat
?????文件???????10703??2016-08-01?14:50??python_lda_learning\data\tmp\model_phi.dat
?????文件????????1734??2016-08-01?14:50??python_lda_learning\data\tmp\model_tassign.dat
?????文件?????????537??2016-08-01?14:50??python_lda_learning\data\tmp\model_theta.dat
?????文件????????1561??2016-08-01?14:50??python_lda_learning\data\tmp\model_twords.dat
?????文件????????2428??2016-08-01?14:50??python_lda_learning\data\tmp\wordidmap.dat
?????文件????????2530??2016-08-01?14:50??python_lda_learning\data\train.dat
?????文件????????9501??2019-01-30?11:18??python_lda_learning\lda.py
?????目錄???????????0??2019-01-29?12:36??python_lda_learning\log\
?????文件???????10015??2016-08-01?14:50??python_lda_learning\log\info.log
?????文件???????????0??2016-08-01?14:50??python_lda_learning\log\info.log.2015-08-06
?????文件????????1136??2016-08-01?14:50??python_lda_learning\logging.conf
?????文件?????????385??2016-08-01?14:50??python_lda_learning\setting.conf
- 上一篇:隨機森林的代碼實現和相應的數據集 python代碼
- 下一篇:問卷星爬蟲帶驗證碼
評論
共有 條評論