|
|
|
|
|
|
self.trainer_settings.max_steps, |
|
|
|
) |
|
|
|
self.policy_optimizer = torch.optim.Adam( |
|
|
|
policy_params, lr=hyperparameters.learning_rate |
|
|
|
policy_params, lr=hyperparameters.learning_rate, weight_decay=1e-6 |
|
|
|
value_params, lr=hyperparameters.learning_rate |
|
|
|
value_params, lr=hyperparameters.learning_rate, weight_decay=1e-6 |
|
|
|
) |
|
|
|
self.entropy_optimizer = torch.optim.Adam( |
|
|
|
self._log_ent_coef.parameters(), lr=hyperparameters.learning_rate |
|
|
|