比较提交

...
此合并请求有变更与目标分支冲突。
/ml-agents/mlagents/trainers/ppo/trainer.py
/ml-agents/mlagents/trainers/tests/torch/test_hybrid.py
/ml-agents/mlagents/trainers/torch/distributions.py
/ml-agents/mlagents/trainers/torch/components/bc/module.py

2 次代码提交

作者 SHA1 备注 提交日期
GitHub 3c1e98ca Update ml-agents/mlagents/trainers/torch/distributions.py 4 年前
vincentpierre bcec7303 merging master 4 年前
共有 4 个文件被更改,包括 16 次插入14 次删除
  1. 1
      ml-agents/mlagents/trainers/ppo/trainer.py
  2. 10
      ml-agents/mlagents/trainers/tests/torch/test_hybrid.py
  3. 1
      ml-agents/mlagents/trainers/torch/components/bc/module.py
  4. 18
      ml-agents/mlagents/trainers/torch/distributions.py

1
ml-agents/mlagents/trainers/ppo/trainer.py


behavior_spec,
self.trainer_settings,
condition_sigma_on_obs=False, # Faster training for PPO
tanh_squash=True,
separate_critic=True, # Match network architecture with TF
)
return policy

10
ml-agents/mlagents/trainers/tests/torch/test_hybrid.py


PPO_TORCH_CONFIG,
hyperparameters=new_hyperparams,
network_settings=new_network_settings,
max_steps=10000,
max_steps=15000,
)
check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)

@pytest.mark.parametrize("action_size", [(1, 1), (2, 2), (1, 2), (2, 1)])
def test_hybrid_sac(action_size):
env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_size, step_size=0.8)
env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_size, step_size=0.5)
new_hyperparams = attr.evolve(
SAC_TORCH_CONFIG.hyperparameters,

)
config = attr.evolve(
SAC_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=2200
)
check_environment_trains(
env, {BRAIN_NAME: config}, success_threshold=0.9, training_seed=1336
SAC_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=2500
check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
@pytest.mark.parametrize("num_visual", [1, 2])

1
ml-agents/mlagents/trainers/torch/components/bc/module.py


memories = []
if self.policy.use_recurrent:
memories = torch.zeros(1, self.n_sequences, self.policy.m_size)
selected_actions, log_probs, _, _ = self.policy.sample_actions(
tensor_obs,
masks=act_masks,

18
ml-agents/mlagents/trainers/torch/distributions.py


return squashed
def _inverse_tanh(self, value):
capped_value = torch.clamp(value, -1 + EPSILON, 1 - EPSILON)
return 0.5 * torch.log((1 + capped_value) / (1 - capped_value) + EPSILON)
return 0.5 * torch.log((1 + value) / (1 - value) + EPSILON)
value = torch.clamp(value, -1 + EPSILON, 1 - EPSILON)
return super().log_prob(unsquashed) - self.transform.log_abs_det_jacobian(
result = super().log_prob(unsquashed) - self.transform.log_abs_det_jacobian(
return torch.clamp(result, -20, 20)
class CategoricalDistInstance(DiscreteDistInstance):

bias_init=Initialization.Zero,
)
self.tanh_squash = tanh_squash
if conditional_sigma:
if self.conditional_sigma:
self.log_sigma = linear_layer(
hidden_size,
num_outputs,

)
torch.zeros(1, num_outputs, requires_grad=True)
torch.ones(1, num_outputs, requires_grad=True)
torch.nn.init.constant_(self.log_sigma.data, -1.1)
# Note: we initialize the output of log_sigma around log(1/3)
def forward(self, inputs: torch.Tensor) -> List[DistInstance]:
mu = self.mu(inputs)

# Expand so that entropy matches batch size. Note that we're using
# mu*0 here to get the batch size implicitly since Barracuda 1.2.1
# throws error on runtime broadcasting due to unknown reason. We
# use this to replace torch.expand() becuase it is not supported in
# use this to replace torch.expand() because it is not supported in
log_sigma = mu * 0 + self.log_sigma
log_sigma = mu * 0 + torch.clamp(self.log_sigma, -20, 2)
if self.tanh_squash:
return TanhGaussianDistInstance(mu, torch.exp(log_sigma))
else:

正在加载...
取消
保存