浏览代码

Add option to not condition sigma on obs

/develop/nopreviousactions
Ervin Teng 4 年前
当前提交
be9d772e
共有 2 个文件被更改,包括 21 次插入9 次删除
  1. 29
      ml-agents/mlagents/trainers/common/nn_policy.py
  2. 1
      ml-agents/mlagents/trainers/ppo/trainer.py

29
ml-agents/mlagents/trainers/common/nn_policy.py


load: bool,
tanh_squash: bool = False,
resample: bool = False,
condition_sigma_on_obs: bool = True,
create_tf_graph: bool = True,
):
"""

)
self.tanh_squash = tanh_squash
self.resample = resample
self.condition_sigma_on_obs = condition_sigma_on_obs
self.trainable_variables: List[tf.Variable] = []
# Non-exposed parameters; these aren't exposed because they don't have a

self.vis_encode_type,
self.tanh_squash,
self.resample,
self.condition_sigma_on_obs,
)
else:
self._create_dc_actor(

vis_encode_type: EncoderType,
tanh_squash: bool = False,
resample: bool = False,
condition_sigma_on_obs: bool = True,
) -> None:
"""
Creates Continuous control actor-critic model.

reuse=tf.AUTO_REUSE,
)
# Policy-dependent log_sigma_sq
log_sigma = tf.layers.dense(
hidden_policy,
self.act_size[0],
activation=None,
name="log_std",
kernel_initializer=LearningModel.scaled_init(0.01),
)
# Policy-dependent log_sigma
if condition_sigma_on_obs:
log_sigma = tf.layers.dense(
hidden_policy,
self.act_size[0],
activation=None,
name="log_std",
kernel_initializer=LearningModel.scaled_init(0.01),
)
else:
log_sigma = tf.get_variable(
"log_sigma_squared",
[self.act_size[0]],
dtype=tf.float32,
initializer=tf.zeros_initializer(),
)
log_sigma = tf.clip_by_value(log_sigma, self.log_std_min, self.log_std_max)
sigma = tf.exp(log_sigma)

1
ml-agents/mlagents/trainers/ppo/trainer.py


self.trainer_parameters,
self.is_training,
self.load,
condition_sigma_on_obs=False, # Faster training for PPO
create_tf_graph=False, # We will create the TF graph in the Optimizer
)

正在加载...
取消
保存