91av视频/亚洲h视频/操亚洲美女/外国一级黄色毛片 - 国产三级三级三级三级

  • 大小: 6KB
    文件類型: .py
    金幣: 1
    下載: 1 次
    發(fā)布日期: 2021-06-07
  • 語言: Python
  • 標(biāo)簽:

資源簡介

基于Tensorflow實(shí)現(xiàn)的PPO算法,依賴庫:tensorflow-1.4及以上,gym

資源截圖

代碼片段和文件信息

import?tensorflow?as?tf
import?numpy?as?np
import?gym
import?copy

class?PPO:
????def?__init__(self?n_features?n_actions):
????????self.n_actions?=?n_actions
????????self.n_features?=?n_features
????????self.learning_rate?=?0.0015
????????self.sess?=?tf.Session()
????????self.observe?=?tf.placeholder(tf.float32?[None?self.n_features])
????????self.v?self.act_prob?self.params?=?self._build_net(‘pi‘?train=True)
????????_?self.act_prob_old?self.params_old?=?self._build_net(‘old_pi‘?train=False)
????????self._get_loss()
????????self.sess.run(tf.global_variables_initializer())

????def?_build_net(self?name?train):
????????with?tf.variable_scope(name):
????????????initer?=?tf.initializers.truncated_normal(0.0?0.1)
????????????hidden?=?tf.layers.dense(self.observe?20?tf.nn.tanh?trainable=train)
????????????hidden?=?tf.layers.dense(hidden?20?tf.nn.tanh?trainable=train)
????????????v?=?tf.layers.dense(hidden?1?activation=None?trainable=train)

????????????hidden1?=?tf.layers.dense(self.observe?20?tf.nn.tanh?trainable=train)
????????????hidden1?=?tf.layers.dense(hidden1?20?tf.nn.tanh?trainable=train)
????????????hidden1?=?tf.layers.dense(hidden1?self.n_actions?tf.nn.tanh?trainable=train)
????????????act_prob?=?tf.layers.dense(hidden1?self.n_actions?tf.nn.softmax?trainable=train)

????????params?=?tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES?scope=name)
????????return?v?act_prob?params

????def?_get_loss(self):
????????self.adv?=?tf.placeholder(tf.float32?[None])
????????self.v_next?=?tf.placeholder(tf.float32?[None])
????????self.action?=?tf.placeholder(tf.int32?[None])
????????self.reward?=?tf.placeholder(tf.float32?[None])

????????td_error?=?self.reward?+?0.95*self.v_next?-?self.v
????????v_loss?=?tf.reduce_mean(tf.square(td_error))

????????act_encode?=?tf.one_hot(self.action?self.n_actions)

????????prob?=?tf.reduce_sum(self.act_prob*act_encode?axis=1)
????????prob_old?=?tf.reduce_sum(self.act_prob_old*act_encode?axis=1)

????????ratio?=?tf.exp(tf.log(tf.clip_by_value(prob?1e-10?1.0))?-?tf.log(tf.clip_by_value(prob_old?1e-10?1.0)))
????????clip_ratio?=?tf.clip_by_value(ratio?1.0-0.2?1.0+0.2)
????????clip_loss?=?tf.reduce_mean(tf.minimum(ratio*self.adv?clip_ratio*self.adv))

????????entroy_loss?=?-tf.reduce_mean(tf.reduce_sum(self.act_prob*tf.log(tf.clip_by_value(self.act_prob?1e-10?1.0))?axis=1))

????????self.total_loss?=?clip_loss?-?v_loss?+?0.01*entroy_loss
????????learning_rate?=?tf.train.exponential_decay(0.0015?0?200?0.95)
????????self.train_op?=?tf.train.AdamOptimizer(learning_rate).minimize(-self.total_loss)
????????self.old_pi_update?=?[tf.assign(t?e)?for?t?e?in?zip(self.params_old?self.params)]

????def?learn(self?observe?v_pred?adv?reward?act):
????????loss?_?=?self.sess.run([self.total_loss?self.train_op]
??????????????????????feed_dict={self.observe:?observe?self.v_next:?v_pred
?????

評(píng)論

共有 條評(píng)論