|
|
|
|
|
|
1e-10, |
|
|
|
self.trainer_settings.max_steps, |
|
|
|
) |
|
|
|
self.policy_optimizer = torch.optim.Adam( |
|
|
|
policy_params, lr=hyperparameters.learning_rate |
|
|
|
) |
|
|
|
self.value_optimizer = torch.optim.Adam( |
|
|
|
value_params, lr=hyperparameters.learning_rate |
|
|
|
) |
|
|
|
self.entropy_optimizer = torch.optim.Adam( |
|
|
|
self._log_ent_coef.parameters(), lr=hyperparameters.learning_rate |
|
|
|
) |
|
|
|
self.policy_optimizer = torch.optim.Adam(policy_params) |
|
|
|
self.value_optimizer = torch.optim.Adam(value_params) |
|
|
|
self.entropy_optimizer = torch.optim.Adam(self._log_ent_coef.parameters()) |
|
|
|
self._move_to_device(default_device()) |
|
|
|
|
|
|
|
@property |
|
|
|