91av视频/亚洲h视频/操亚洲美女/外国一级黄色毛片 - 国产三级三级三级三级

資源簡介

根據《Reinforcement Learning An Introduction》里的策略梯度方法,控制open AI gym庫里的倒立擺。

資源截圖

代碼片段和文件信息

“““
Translated?from?https://webdocs.cs.ualberta.ca/~sutton/MountainCar/MountainCar1.cp
Algorithm?described?at?https://webdocs.cs.ualberta.ca/~sutton/book/ebook/node89.html
Some?minor?adjustments?to?constants?were?made?to?make?the?program?work?on?environments
besides?Mountain?Car.
“““

import?random
import?math
import?numpy?as?np
import?gym
import?sys

np.random.seed(0)

env?=?gym.make(‘CartPole-v0‘)
outdir?=?‘CartPole‘

initial_epsilon?=?0.1?#?probability?of?choosing?a?random?action?(changed?from?original?value?of?0.0)
alpha?=?0.5?#?learning?rate
lambda_?=?0.9?#?trace?decay?rate
gamma?=?1.0?#?discount?rate
N?=?3000?#?memory?for?storing?parameters?(changed?from?original?value?of?3000)

M?=?env.action_space.n
NUM_TILINGS?=?10
NUM_TILES?=?8

def?main():
????env.monitor.start(outdir)

????epsilon?=?initial_epsilon
????theta?=?np.zeros(N)?#?parameters?(memory)

????for?episode_num?in?xrange(500):
????????print?episode_num?episode(epsilon?theta?env.spec.timestep_limit)
????????epsilon?=?epsilon?*?0.999?#?added?epsilon?decay

????env.monitor.close()

def?episode(epsilon?theta?max_steps):
????Q?=?np.zeros(M)?#?action?values
????e?=?np.zeros(N)?#?eligibility?traces
????F?=?np.zeros((M?NUM_TILINGS)?dtype=np.int32)?#?features?for?each?action

????def?load_F(observation):
????????state_vars?=?[]
????????for?i?var?in?enumerate(observation):
????????????range_?=?(env.observation_space.high[i]?-?env.observation_space.low[i])
????????????#?in?CartPole?there?is?no?range?on?the?velocities?so?default?to?1
????????????if?range_?==?float(‘inf‘):
????????????????range_?=?1
????????????state_vars.append(var?/?range_?*?NUM_TILES)

????????for?a?in?xrange(M):
????????????F[a]?=?get_tiles(NUM_TILINGS?state_vars?N?a)

????def?load_Q():
????????for?a?in?xrange(M):
????????????Q[a]?=?0
????????????for?j?in?xrange(NUM_TILINGS):
????????????????Q[a]?+=?theta[F[aj]]

????observation?=?env.reset()
????load_F(observation)
????load_Q()
????action?=?np.argmax(Q)?#?numpy?argmax?chooses?first?in?a?tie?not?random?like?original?implementation
????if?np.random.random()?????????action?=?env.action_space.sample()

????step?=?0
????while?True:
????????step?

評論

共有 條評論