|
|
|
|
|
|
def _is_ready_update(self) -> bool: |
|
|
|
""" |
|
|
|
Returns whether or not the trainer has enough elements to run update model |
|
|
|
:return: A boolean corresponding to whether or not update_model() can be run |
|
|
|
:return: A boolean corresponding to whether or not _update_policy() can be run |
|
|
|
""" |
|
|
|
return ( |
|
|
|
self.update_buffer.num_experiences >= self.trainer_parameters["batch_size"] |
|
|
|
|
|
|
@timed |
|
|
|
def _update_policy(self) -> None: |
|
|
|
""" |
|
|
|
If train_interval is met, update the SAC policy given the current reward signals. |
|
|
|
If reward_signal_train_interval is met, update the reward signals from the buffer. |
|
|
|
Update the SAC policy and reward signals until the steps_per_update ratio |
|
|
|
is met. |
|
|
|
""" |
|
|
|
self.update_sac_policy() |
|
|
|
self.update_reward_signals() |
|
|
|