-
大小: 3.44MB文件類型: .zip金幣: 2下載: 1 次發(fā)布日期: 2023-09-10
- 語言: Python
- 標(biāo)簽: python,LDA??
資源簡介
利用python對文本進(jìn)行LDA主題生成模型,里面有使用方法說明,可以自己設(shè)置參數(shù)等。

代碼片段和文件信息
#-*-?coding:utf-8?-*-
import?logging
import?logging.config
import?ConfigParser
import?numpy?as?np
import?random
import?codecs
import?os
from?collections?import?OrderedDict
#獲取當(dāng)前路徑
path?=?os.getcwd()
#導(dǎo)入日志配置文件
logging.config.fileConfig(“l(fā)ogging.conf“)
#創(chuàng)建日志對象
logger?=?logging.getLogger()
#?loggerInfo?=?logging.getLogger(“TimeInfoLogger“)
#?Consolelogger?=?logging.getLogger(“ConsoleLogger“)
#導(dǎo)入配置文件
conf?=?ConfigParser.ConfigParser()
conf.read(“setting.conf“)?
#文件路徑
trainfile?=?os.path.join(pathos.path.normpath(conf.get(“filepath“?“trainfile“)))
wordidmapfile?=?os.path.join(pathos.path.normpath(conf.get(“filepath““wordidmapfile“)))
thetafile?=?os.path.join(pathos.path.normpath(conf.get(“filepath““thetafile“)))
phifile?=?os.path.join(pathos.path.normpath(conf.get(“filepath““phifile“)))
paramfile?=?os.path.join(pathos.path.normpath(conf.get(“filepath““paramfile“)))
topNfile?=?os.path.join(pathos.path.normpath(conf.get(“filepath““topNfile“)))
tassginfile?=?os.path.join(pathos.path.normpath(conf.get(“filepath““tassginfile“)))
#模型初始參數(shù)
K?=?int(conf.get(“model_args““K“))
alpha?=?float(conf.get(“model_args““alpha“))
beta?=?float(conf.get(“model_args““beta“))
iter_times?=?int(conf.get(“model_args““iter_times“))
top_words_num?=?int(conf.get(“model_args““top_words_num“))
class?Document(object):
????def?__init__(self):
????????self.words?=?[]
????????self.length?=?0
class?DataPreProcessing(object):
????def?__init__(self):
????????self.docs_count?=?0
????????self.words_count?=?0
????????self.docs?=?[]
????????self.word2id?=?OrderedDict()
????def?cachewordidmap(self):
????????with?codecs.open(wordidmapfile?‘w‘‘utf-8‘)?as?f:
????????????for?wordid?in?self.word2id.items():
????????????????f.write(word?+“\t“+str(id)+“\n“)
class?LDAModel(object):
????
????def?__init__(selfdpre):
????????self.dpre?=?dpre?#獲取預(yù)處理參數(shù)
????????#
????????#模型參數(shù)
????????#聚類個數(shù)K,迭代次數(shù)iter_times每個類特征詞個數(shù)top_words_num超參數(shù)α(alpha)?β(beta)
????????#
????????self.K?=?K
????????self.beta?=?beta
????????self.alpha?=?alpha
????????self.iter_times?=?iter_times
????????self.top_words_num?=?top_words_num?
????????#
????????#文件變量
????????#分好詞的文件trainfile
????????#詞對應(yīng)id文件wordidmapfile
????????#文章-主題分布文件thetafile
????????#詞-主題分布文件phifile
????????#每個主題topN詞文件topNfile
????????#最后分派結(jié)果文件tassginfile
????????#模型訓(xùn)練選擇的參數(shù)文件paramfile
????????#
????????self.wordidmapfile?=?wordidmapfile
????????self.trainfile?=?trainfile
????????self.thetafile?=?thetafile
????????self.phifile?=?phifile
????????self.topNfile?=?topNfile
????????self.tassginfile?=?tassginfile
????????self.paramfile?=?paramfile
????????#?p概率向量?double類型,存儲采樣的臨時變量
????????#?nw詞word在主題topic上的分布
????????#?nwsum每各topic的詞的總數(shù)
????????#?nd每個doc中各個topic的詞的總數(shù)
????????#?ndsum每各doc中詞的總數(shù)
????????self.p?=?np.zeros(self.K)????????
????????self.nw?=?np.zeros((self.dpre.words_countself.K)dtype=“int“)???????
????????self.nwsum?=?np.zeros(self.Kdtype=“int“)????
????????self.nd?=?np.zeros((self.dpre.docs_countself.K)dtype=“int“)???????
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2016-03-30?00:50??python-LDA-master\
?????目錄???????????0??2016-06-25?13:39??python-LDA-master\data\
?????文件????????6148??2016-06-22?21:55??python-LDA-master\data\.DS_Store
?????目錄???????????0??2016-07-20?09:41??__MACOSX\
?????目錄???????????0??2016-07-20?09:41??__MACOSX\python-LDA-master\
?????目錄???????????0??2016-07-20?09:41??__MACOSX\python-LDA-master\data\
?????文件?????????120??2016-06-22?21:55??__MACOSX\python-LDA-master\data\._.DS_Store
?????目錄???????????0??2016-03-30?00:50??python-LDA-master\data\tmp\
?????文件?????????105??2016-06-25?21:41??python-LDA-master\data\tmp\model_parameter.dat
?????目錄???????????0??2016-07-20?09:41??__MACOSX\python-LDA-master\data\tmp\
?????文件?????????177??2016-06-25?21:41??__MACOSX\python-LDA-master\data\tmp\._model_parameter.dat
?????文件?????7203259??2016-06-25?21:41??python-LDA-master\data\tmp\model_phi.dat
?????文件?????????177??2016-06-25?21:41??__MACOSX\python-LDA-master\data\tmp\._model_phi.dat
?????文件?????3207452??2016-06-25?21:42??python-LDA-master\data\tmp\model_tassign.dat
?????文件?????????177??2016-06-25?21:42??__MACOSX\python-LDA-master\data\tmp\._model_tassign.dat
?????文件?????9792271??2016-06-25?21:41??python-LDA-master\data\tmp\model_theta.dat
?????文件?????????177??2016-06-25?21:41??__MACOSX\python-LDA-master\data\tmp\._model_theta.dat
?????文件???????12237??2016-06-25?21:42??python-LDA-master\data\tmp\model_twords.dat
?????文件?????????177??2016-06-25?21:42??__MACOSX\python-LDA-master\data\tmp\._model_twords.dat
?????文件??????532215??2016-06-25?21:25??python-LDA-master\data\tmp\wordidmap.dat
?????文件?????????177??2016-06-25?21:25??__MACOSX\python-LDA-master\data\tmp\._wordidmap.dat
?????文件?????????177??2016-03-30?00:50??__MACOSX\python-LDA-master\data\._tmp
?????文件?????3394059??2016-06-25?21:24??python-LDA-master\data\train.dat
?????文件?????????177??2016-06-25?13:39??__MACOSX\python-LDA-master\._data
?????文件????????9501??2016-03-30?00:50??python-LDA-master\lda.py
?????文件?????????177??2016-03-30?00:50??__MACOSX\python-LDA-master\._lda.py
?????目錄???????????0??2016-06-25?00:36??python-LDA-master\log\
?????文件????????7356??2016-06-25?21:42??python-LDA-master\log\info.log
?????文件???????????0??2016-03-30?00:50??python-LDA-master\log\info.log.2015-08-06
?????目錄???????????0??2016-07-20?09:41??__MACOSX\python-LDA-master\log\
?????文件?????????177??2016-03-30?00:50??__MACOSX\python-LDA-master\log\._info.log.2015-08-06
............此處省略11個文件信息
評論
共有 條評論