-
大小: 1.52MB文件類型: .zip金幣: 2下載: 0 次發(fā)布日期: 2023-08-30
- 語言: Python
- 標(biāo)簽:
資源簡(jiǎn)介
我為了你我設(shè)計(jì)這個(gè)挑戰(zhàn):在這60天里深入學(xué)習(xí)“深度強(qiáng)化學(xué)習(xí)”。

代碼片段和文件信息
import?gym
import?random
from?collections?import?namedtuple
import?collections
import?numpy?as?np
import?matplotlib.pyplot?as?plt
def?select_eps_greedy_action(table?obs?n_actions):
‘‘‘
Select?the?action?using?a?ε-greedy?policy?(add?a?randomness?ε?for?the?choice?of?the?action)
‘‘‘
value?action?=?best_action_value(table?obs)
if?random.random()? return?random.randint(0?n_actions?-?1)
else:
return?action
def?select_greedy_action(table?obs?n_actions):
‘‘‘
Select?the?action?using?a?greedy?policy?(take?the?best?action?according?to?the?policy)
‘‘‘
value?action?=?best_action_value(table?obs)
return?action
def?best_action_value(table?state):
‘‘‘
Exploring?the?table?take?the?best?action?that?maximize?Q(sa)
‘‘‘
best_action?=?0
max_value?=?0
for?action?in?range(n_actions):
if?table[(state?action)]?>?max_value:
best_action?=?action
max_value?=?table[(state?action)]
return?max_value?best_action
def?Q_learning(table?obs0?obs1?reward?action):
‘‘‘
Q-learning.?Update?Q(obs0action)?according?to?Q(obs1*)?and?the?reward?just?obtained
‘‘‘
#?Take?the?best?value?reachable?from?the?state?obs1
best_value?_?=?best_action_value(table?obs1)
#?Calculate?Q-target?value
Q_target?=?reward?+?GAMMA?*?best_value
#?Calculate?the?Q-error?between?the?target?and?the?previous?value
Q_error?=?Q_target?-?table[(obs0?action)]
#?Update?Q(obs0action)
table[(obs0?action)]?+=?LEARNING_RATE?*?Q_error
def?test_game(env?table?n_actions):
‘‘‘
Test?the?new?table?playing?TEST_EPISODES?games
‘‘‘
reward_games?=?[]
for?_?in?range(TEST_EPISODES):
obs?=?env.reset()
rewards?=?0
while?True:
#?Act?greedly
next_obs?reward?done?_?=?env.step(select_greedy_action(table?obs?n_actions))
obs?=?next_obs
rewards?+=?reward
if?done:
reward_games.append(rewards)
break
return?np.mean(reward_games)
#?Some?hyperparameters..
GAMMA?=?0.95
#?NB:?the?decay?rate?allow?to?regulate?the?Exploration?-?Exploitation?trade-off
#?????start?with?a?EPSILON?of?1?and?decay?until?reach?0
epsilon?=?1.0
EPS_DECAY_RATE?=?0.9993
LEARNING_RATE?=?0.8
#?..?and?constants
TEST_EPISODES?=?100
MAX_GAMES?=?15001
#?Create?the?environment
#?env?=?gym.make(‘Taxi-v2‘)
env?=?gym.make(“FrozenLake-v0“)
obs?=?env.reset()
obs_length?=?env.observation_space.n
n_actions?=?env.action_space.n
reward_count?=?0
games_count?=?0
#?Create?and?initialize?the?table?with?0.0
table?=?collections.defaultdict(float)
test_rewards_list?=?[]
while?games_count?
#?Select?the?action?following?an?ε-greedy?policy
action?=?select_eps_greedy_action(table?obs?n_actions)
next_obs?reward?done?_?=?env.step(action)
#?Update?the?Q-table
Q_learning(table?obs?next_obs?reward?action)
reward_count?+=?reward
obs?=?next_obs
if?done:
epsilon?*=?EPS_DECAY_RATE
#?Test?the?new?ta
?屬性????????????大小?????日期????時(shí)間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2018-09-15?09:48??60_Days_RL_Challenge-master\
?????文件????????1203??2018-09-15?09:48??60_Days_RL_Challenge-master\.gitignore
?????文件????????1075??2018-09-15?09:48??60_Days_RL_Challenge-master\LICENSE
?????文件????????7868??2018-09-15?09:48??60_Days_RL_Challenge-master\README.md
?????目錄???????????0??2018-09-15?09:48??60_Days_RL_Challenge-master\Week2\
?????文件???????47757??2018-09-15?09:48??60_Days_RL_Challenge-master\Week2\frozenlake_Qlearning.ipynb
?????文件????????3465??2018-09-15?09:48??60_Days_RL_Challenge-master\Week2\frozenlake_Qlearning.py
?????目錄???????????0??2018-09-15?09:48??60_Days_RL_Challenge-master\Week2\img\
?????文件???????60162??2018-09-15?09:48??60_Days_RL_Challenge-master\Week2\img\Q_function.png
?????文件???????88665??2018-09-15?09:48??60_Days_RL_Challenge-master\Week2\img\frozenlake_v0.png
?????文件???????19080??2018-09-15?09:48??60_Days_RL_Challenge-master\Week2\img\short_diag.jpg
?????目錄???????????0??2018-09-15?09:48??60_Days_RL_Challenge-master\Week3\
?????文件????????5205??2018-09-15?09:48??60_Days_RL_Challenge-master\Week3\README.md
?????文件????????4258??2018-09-15?09:48??60_Days_RL_Challenge-master\Week3\agent.py
?????文件????????5665??2018-09-15?09:48??60_Days_RL_Challenge-master\Week3\atari_wrappers.py
?????文件????????1860??2018-09-15?09:48??60_Days_RL_Challenge-master\Week3\buffers.py
?????文件????????3522??2018-09-15?09:48??60_Days_RL_Challenge-master\Week3\central_control.py
?????目錄???????????0??2018-09-15?09:48??60_Days_RL_Challenge-master\Week3\imgs\
?????文件??????106449??2018-09-15?09:48??60_Days_RL_Challenge-master\Week3\imgs\DQN_variations.png
?????文件???????21564??2018-09-15?09:48??60_Days_RL_Challenge-master\Week3\imgs\Dueling_img.png
?????文件????????8790??2018-09-15?09:48??60_Days_RL_Challenge-master\Week3\imgs\double_Qlearning_formula.png
?????文件???????14527??2018-09-15?09:48??60_Days_RL_Challenge-master\Week3\imgs\multistep_formula.png
?????文件????????8952??2018-09-15?09:48??60_Days_RL_Challenge-master\Week3\imgs\noisenet_formula.png
?????文件??????469317??2018-09-15?09:48??60_Days_RL_Challenge-master\Week3\imgs\pong_gif.gif
?????文件????????2184??2018-09-15?09:48??60_Days_RL_Challenge-master\Week3\main.py
?????文件????????4661??2018-09-15?09:48??60_Days_RL_Challenge-master\Week3\neural_net.py
?????文件?????????395??2018-09-15?09:48??60_Days_RL_Challenge-master\Week3\utils.py
?????目錄???????????0??2018-09-15?09:48??60_Days_RL_Challenge-master\images\
?????文件??????716068??2018-09-15?09:48??60_Days_RL_Challenge-master\images\logo5.png
?????文件???????61360??2018-09-15?09:48??60_Days_RL_Challenge-master\images\logo6.png
評(píng)論
共有 條評(píng)論