91av视频/亚洲h视频/操亚洲美女/外国一级黄色毛片 - 国产三级三级三级三级

  • 大小: 1.52MB
    文件類型: .zip
    金幣: 2
    下載: 0 次
    發(fā)布日期: 2023-08-30
  • 語言: Python
  • 標(biāo)簽:

資源簡(jiǎn)介

我為了你我設(shè)計(jì)這個(gè)挑戰(zhàn):在這60天里深入學(xué)習(xí)“深度強(qiáng)化學(xué)習(xí)”。

資源截圖

代碼片段和文件信息

import?gym
import?random
from?collections?import?namedtuple
import?collections
import?numpy?as?np
import?matplotlib.pyplot?as?plt


def?select_eps_greedy_action(table?obs?n_actions):
‘‘‘
Select?the?action?using?a?ε-greedy?policy?(add?a?randomness?ε?for?the?choice?of?the?action)
‘‘‘
value?action?=?best_action_value(table?obs)

if?random.random()? return?random.randint(0?n_actions?-?1)
else:
return?action


def?select_greedy_action(table?obs?n_actions):
‘‘‘
Select?the?action?using?a?greedy?policy?(take?the?best?action?according?to?the?policy)
‘‘‘
value?action?=?best_action_value(table?obs)
return?action


def?best_action_value(table?state):
‘‘‘
Exploring?the?table?take?the?best?action?that?maximize?Q(sa)
‘‘‘
best_action?=?0
max_value?=?0
for?action?in?range(n_actions):
if?table[(state?action)]?>?max_value:
best_action?=?action
max_value?=?table[(state?action)]

return?max_value?best_action


def?Q_learning(table?obs0?obs1?reward?action):
‘‘‘
Q-learning.?Update?Q(obs0action)?according?to?Q(obs1*)?and?the?reward?just?obtained
‘‘‘

#?Take?the?best?value?reachable?from?the?state?obs1
best_value?_?=?best_action_value(table?obs1)

#?Calculate?Q-target?value
Q_target?=?reward?+?GAMMA?*?best_value

#?Calculate?the?Q-error?between?the?target?and?the?previous?value
Q_error?=?Q_target?-?table[(obs0?action)]

#?Update?Q(obs0action)
table[(obs0?action)]?+=?LEARNING_RATE?*?Q_error


def?test_game(env?table?n_actions):
‘‘‘
Test?the?new?table?playing?TEST_EPISODES?games
‘‘‘
reward_games?=?[]
for?_?in?range(TEST_EPISODES):
obs?=?env.reset()
rewards?=?0
while?True:
#?Act?greedly
next_obs?reward?done?_?=?env.step(select_greedy_action(table?obs?n_actions))
obs?=?next_obs
rewards?+=?reward

if?done:
reward_games.append(rewards)
break

return?np.mean(reward_games)


#?Some?hyperparameters..
GAMMA?=?0.95

#?NB:?the?decay?rate?allow?to?regulate?the?Exploration?-?Exploitation?trade-off
#?????start?with?a?EPSILON?of?1?and?decay?until?reach?0
epsilon?=?1.0
EPS_DECAY_RATE?=?0.9993

LEARNING_RATE?=?0.8

#?..?and?constants
TEST_EPISODES?=?100
MAX_GAMES?=?15001

#?Create?the?environment
#?env?=?gym.make(‘Taxi-v2‘)
env?=?gym.make(“FrozenLake-v0“)
obs?=?env.reset()

obs_length?=?env.observation_space.n
n_actions?=?env.action_space.n

reward_count?=?0
games_count?=?0

#?Create?and?initialize?the?table?with?0.0
table?=?collections.defaultdict(float)

test_rewards_list?=?[]

while?games_count?
#?Select?the?action?following?an?ε-greedy?policy
action?=?select_eps_greedy_action(table?obs?n_actions)
next_obs?reward?done?_?=?env.step(action)

#?Update?the?Q-table
Q_learning(table?obs?next_obs?reward?action)

reward_count?+=?reward
obs?=?next_obs

if?done:
epsilon?*=?EPS_DECAY_RATE

#?Test?the?new?ta

?屬性????????????大小?????日期????時(shí)間???名稱
-----------?---------??----------?-----??----
?????目錄???????????0??2018-09-15?09:48??60_Days_RL_Challenge-master\
?????文件????????1203??2018-09-15?09:48??60_Days_RL_Challenge-master\.gitignore
?????文件????????1075??2018-09-15?09:48??60_Days_RL_Challenge-master\LICENSE
?????文件????????7868??2018-09-15?09:48??60_Days_RL_Challenge-master\README.md
?????目錄???????????0??2018-09-15?09:48??60_Days_RL_Challenge-master\Week2\
?????文件???????47757??2018-09-15?09:48??60_Days_RL_Challenge-master\Week2\frozenlake_Qlearning.ipynb
?????文件????????3465??2018-09-15?09:48??60_Days_RL_Challenge-master\Week2\frozenlake_Qlearning.py
?????目錄???????????0??2018-09-15?09:48??60_Days_RL_Challenge-master\Week2\img\
?????文件???????60162??2018-09-15?09:48??60_Days_RL_Challenge-master\Week2\img\Q_function.png
?????文件???????88665??2018-09-15?09:48??60_Days_RL_Challenge-master\Week2\img\frozenlake_v0.png
?????文件???????19080??2018-09-15?09:48??60_Days_RL_Challenge-master\Week2\img\short_diag.jpg
?????目錄???????????0??2018-09-15?09:48??60_Days_RL_Challenge-master\Week3\
?????文件????????5205??2018-09-15?09:48??60_Days_RL_Challenge-master\Week3\README.md
?????文件????????4258??2018-09-15?09:48??60_Days_RL_Challenge-master\Week3\agent.py
?????文件????????5665??2018-09-15?09:48??60_Days_RL_Challenge-master\Week3\atari_wrappers.py
?????文件????????1860??2018-09-15?09:48??60_Days_RL_Challenge-master\Week3\buffers.py
?????文件????????3522??2018-09-15?09:48??60_Days_RL_Challenge-master\Week3\central_control.py
?????目錄???????????0??2018-09-15?09:48??60_Days_RL_Challenge-master\Week3\imgs\
?????文件??????106449??2018-09-15?09:48??60_Days_RL_Challenge-master\Week3\imgs\DQN_variations.png
?????文件???????21564??2018-09-15?09:48??60_Days_RL_Challenge-master\Week3\imgs\Dueling_img.png
?????文件????????8790??2018-09-15?09:48??60_Days_RL_Challenge-master\Week3\imgs\double_Qlearning_formula.png
?????文件???????14527??2018-09-15?09:48??60_Days_RL_Challenge-master\Week3\imgs\multistep_formula.png
?????文件????????8952??2018-09-15?09:48??60_Days_RL_Challenge-master\Week3\imgs\noisenet_formula.png
?????文件??????469317??2018-09-15?09:48??60_Days_RL_Challenge-master\Week3\imgs\pong_gif.gif
?????文件????????2184??2018-09-15?09:48??60_Days_RL_Challenge-master\Week3\main.py
?????文件????????4661??2018-09-15?09:48??60_Days_RL_Challenge-master\Week3\neural_net.py
?????文件?????????395??2018-09-15?09:48??60_Days_RL_Challenge-master\Week3\utils.py
?????目錄???????????0??2018-09-15?09:48??60_Days_RL_Challenge-master\images\
?????文件??????716068??2018-09-15?09:48??60_Days_RL_Challenge-master\images\logo5.png
?????文件???????61360??2018-09-15?09:48??60_Days_RL_Challenge-master\images\logo6.png

評(píng)論

共有 條評(píng)論

相關(guān)資源