比较提交

...

2 次代码提交

作者 SHA1 备注 提交日期
vincentpierre 5d384292 forgot one 4 年前
vincentpierre 983982ee Removing misleading learning rate 4 年前
共有 4 个文件被更改,包括 6 次插入16 次删除
  1. 12
      ml-agents/mlagents/trainers/sac/optimizer_torch.py
  2. 4
      ml-agents/mlagents/trainers/poca/optimizer_torch.py
  3. 2
      ml-agents/mlagents/trainers/torch/components/bc/module.py
  4. 4
      ml-agents/mlagents/trainers/ppo/optimizer_torch.py

12
ml-agents/mlagents/trainers/sac/optimizer_torch.py


1e-10,
self.trainer_settings.max_steps,
)
self.policy_optimizer = torch.optim.Adam(
policy_params, lr=hyperparameters.learning_rate
)
self.value_optimizer = torch.optim.Adam(
value_params, lr=hyperparameters.learning_rate
)
self.entropy_optimizer = torch.optim.Adam(
self._log_ent_coef.parameters(), lr=hyperparameters.learning_rate
)
self.policy_optimizer = torch.optim.Adam(policy_params)
self.value_optimizer = torch.optim.Adam(value_params)
self.entropy_optimizer = torch.optim.Adam(self._log_ent_coef.parameters())
self._move_to_device(default_device())
@property

4
ml-agents/mlagents/trainers/poca/optimizer_torch.py


self.trainer_settings.max_steps,
)
self.optimizer = torch.optim.Adam(
params, lr=self.trainer_settings.hyperparameters.learning_rate
)
self.optimizer = torch.optim.Adam(params)
self.stats_name_to_update_name = {
"Losses/Value Loss": "value_loss",
"Losses/Policy Loss": "policy_loss",

2
ml-agents/mlagents/trainers/torch/components/bc/module.py


learning_rate_schedule, self.current_lr, 1e-10, self._anneal_steps
)
params = self.policy.actor.parameters()
self.optimizer = torch.optim.Adam(params, lr=self.current_lr)
self.optimizer = torch.optim.Adam(params)
_, self.demonstration_buffer = demo_to_buffer(
settings.demo_path, policy.sequence_length, policy.behavior_spec
)

4
ml-agents/mlagents/trainers/ppo/optimizer_torch.py


self.trainer_settings.max_steps,
)
self.optimizer = torch.optim.Adam(
params, lr=self.trainer_settings.hyperparameters.learning_rate
)
self.optimizer = torch.optim.Adam(params)
self.stats_name_to_update_name = {
"Losses/Value Loss": "value_loss",
"Losses/Policy Loss": "policy_loss",

正在加载...
取消
保存