-
大小: 780KB文件類型: .zip金幣: 2下載: 0 次發(fā)布日期: 2021-05-13
- 語言: Python
- 標簽: q-learning??
資源簡介
一個用python語言來實現(xiàn)的 q-learning實例,供學習參考。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。

代碼片段和文件信息
#!/usr/bin/env?python2
import?random
class?Agent:
????def?__init__(self?MDP):
????????self.MDP?=?MDP
????????self.state?=?None
????def?executeAction(self?a):
????????s2?r?=?self.MDP.executeAction(a?self.state)
????????return?s2?r
????def?selectRandomAction(self):
????????return?random.choice(self.MDP.A_s[self.state])
????def?selectBestActionFromQTable(self?s?Q):
????????#?discover?what?is?the?best?possible?value?considering
????????#?all?possible?actions?for?the?state
????????#?FIXME:?usar?a?tabela?V
????????maxValue?=?0.0
????????for?a?in?self.MDP.A_s[self.state]:
????????????maxValue?=?max(maxValue?Q[s][a])
????????#?obtain?all?the?actions?whose?value?equals?the?maximum
????????A?=?[]
????????for?a?in?self.MDP.A_s[self.state]:
????????????#?FIXME:?make?it?a?parameter
????????????delta?=?1e-10
????????????if?abs(Q[s][a]?-?maxValue)?<=?delta:
????????????????A.append(a)
????????#?obtain?a?random?action?from?all?the?possible?ones
????????if?len(A)?>?0:
????????????a?=?random.choice(A)
????????else:
????????????a?=?‘---‘
????????return?a
????def?selectBestActionFromProbPolicy(self?s?Pi):
????????P?=?[]
????????acum?=?0.0
????????#?FIXME:?eliminar?a?necessidade?de?ter?que?rodar?a?soma
????????#?cumulativa?a?toda?chamada
????????#
????????#?Fazer?essa?checagem?na?leitura?da?politica?agregar?uma?lista
????????#?de?pares?ordenados?ja?com?a?probabilidade?acumulada.
????????#?Sortear?e?somente?buscar?na?lista?ate?encontrar?a?acao?da
????????#?vez.
????????#
????????#?PROBLEMA:?a?politica?pode?mudar!??Alternativa:?mudar?a?forma
????????#?como?a?politica?e?carregada:?Fazer?chegar?aqui?ja?uma?Pi[s]
????????#?=?[(action?cumsum)]
????????
????????for?a?in?Pi[s].iterkeys():
????????????if?Pi[s][a]?>?0.0:
????????????????p?=?[]
????????????????p.append(a)
????????????????acum?=?acum?+?Pi[s][a]
????????????????p.append(acum)
????????????????P.append(p)
????????#sorteia?um?numero?no?intervalo?[0?1]
????????x?=?random.random()
????????for?p?in?P:
????????????if?x?<=?p[1]:
????????????????a?=?p[0]
????????????????break
????????return?a
????def?selectBestAction(self?s?source?=?None?Q?=?None?Pi?=?None):
????????if?source?==?‘Q-Table‘:
????????????a?=?self.selectBestActionFromQTable(s?Q)
????????elif?source?==?‘Probabilistic?Policy‘:
????????????a?=?self.selectBestActionFromProbPolicy(s?Pi)
????????else:
????????????‘ERROR:?wrong?source?(‘?+?source
????????????sys.exit(1)
????????return?a
????def?setInitialState(self):
????????if?self.MDP.P?==?None:
????????????self.state?=?random.choice(self.MDP.S)
????????else:
????????????self.state?=?self.setInitialStateByProb()
????def?setInitialStateByProb(self):
????????x?=?random.random()
????????for?p?in?self.MDP.P:
????????????if?x?<=?p[1]:
????????????????s?=?p[0]
????????????????break
????????return?s
?屬性????????????大小?????日期????時間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2012-09-20?23:06??Q-Learning-in-Python-master\
?????文件?????????118??2012-09-20?23:06??Q-Learning-in-Python-master\.gitignore
?????文件???????????0??2012-09-20?23:06??Q-Learning-in-Python-master\README
?????目錄???????????0??2012-09-20?23:06??Q-Learning-in-Python-master\src\
?????文件????????2812??2012-09-20?23:06??Q-Learning-in-Python-master\src\Agent.py
?????文件????????4312??2012-09-20?23:06??Q-Learning-in-Python-master\src\MDP.py
?????文件?????????929??2012-09-20?23:06??Q-Learning-in-Python-master\src\PRQL-interval.sh
?????文件???????11108??2012-09-20?23:06??Q-Learning-in-Python-master\src\PRQLearning.py
?????文件?????????866??2012-09-20?23:06??Q-Learning-in-Python-master\src\QL-interval.sh
?????文件????????4539??2012-09-20?23:06??Q-Learning-in-Python-master\src\QLearning.py
?????文件????????2157??2012-09-20?23:06??Q-Learning-in-Python-master\src\QabLearning.py
?????文件????????6588??2012-09-20?23:06??Q-Learning-in-Python-master\src\RL-PRQL.py
?????文件????????1072??2012-09-20?23:06??Q-Learning-in-Python-master\src\RL-PRQL.sh
?????文件????????4277??2012-09-20?23:06??Q-Learning-in-Python-master\src\RL-QL.py
?????文件?????????893??2012-09-20?23:06??Q-Learning-in-Python-master\src\RL-QL.sh
?????文件????????1072??2012-09-20?23:06??Q-Learning-in-Python-master\src\RL.sh
?????文件????????1138??2012-09-20?23:06??Q-Learning-in-Python-master\src\meanError.py
?????文件?????????302??2012-09-20?23:06??Q-Learning-in-Python-master\src\prepareFolders.py
?????目錄???????????0??2012-09-20?23:06??Q-Learning-in-Python-master\src\tools\
?????目錄???????????0??2012-09-20?23:06??Q-Learning-in-Python-master\src\tools\Danny\
?????目錄???????????0??2012-09-20?23:06??Q-Learning-in-Python-master\src\tools\Danny\OOo\
?????文件????????9985??2012-09-20?23:06??Q-Learning-in-Python-master\src\tools\Danny\OOo\OOoLib.py
?????文件???????????0??2012-09-20?23:06??Q-Learning-in-Python-master\src\tools\Danny\OOo\__init__.py
?????文件???????????0??2012-09-20?23:06??Q-Learning-in-Python-master\src\tools\Danny\__init__.py
?????文件????????1695??2012-09-20?23:06??Q-Learning-in-Python-master\src\tools\apagaCelulasEmBranco.py
?????文件??????????77??2012-09-20?23:06??Q-Learning-in-Python-master\src\tools\changeExtension.sh
?????文件?????????220??2012-09-20?23:06??Q-Learning-in-Python-master\src\tools\compactaArquivos.sh
?????文件?????????221??2012-09-20?23:06??Q-Learning-in-Python-master\src\tools\compactaArquivosSecundarios.sh
?????文件??????????92??2012-09-20?23:06??Q-Learning-in-Python-master\src\tools\delBackupFiles.sh
?????文件?????????188??2012-09-20?23:06??Q-Learning-in-Python-master\src\tools\delOutputFiles.sh
?????文件?????????414??2012-09-20?23:06??Q-Learning-in-Python-master\src\tools\descompacta.sh
............此處省略63個文件信息
評論
共有 條評論