|
|
|
|
|
|
self.trainer_settings.max_steps, |
|
|
|
) |
|
|
|
self.policy_optimizer = torch.optim.Adam( |
|
|
|
policy_params, lr=hyperparameters.learning_rate |
|
|
|
policy_params, |
|
|
|
lr=hyperparameters.learning_rate, |
|
|
|
weight_decay=hyperparameters.weight_decay, |
|
|
|
value_params, lr=hyperparameters.learning_rate |
|
|
|
value_params, |
|
|
|
lr=hyperparameters.learning_rate, |
|
|
|
weight_decay=hyperparameters.weight_decay, |
|
|
|
) |
|
|
|
self.entropy_optimizer = torch.optim.Adam( |
|
|
|
self._log_ent_coef.parameters(), lr=hyperparameters.learning_rate |
|
|
|