|
|
|
|
|
|
behavior_spec: BehaviorSpec, |
|
|
|
create_graph: bool = False, |
|
|
|
) -> TFPolicy: |
|
|
|
""" |
|
|
|
Creates a policy with a Tensorflow backend and SAC hyperparameters |
|
|
|
:param parsed_behavior_id: |
|
|
|
:param behavior_spec: specifications for policy construction |
|
|
|
:param create_graph: whether to create the tensorflow graph on construction |
|
|
|
:return policy |
|
|
|
""" |
|
|
|
policy = TFPolicy( |
|
|
|
self.seed, |
|
|
|
behavior_spec, |
|
|
|
|
|
|
self, parsed_behavior_id: BehaviorIdentifiers, behavior_spec: BehaviorSpec |
|
|
|
) -> TorchPolicy: |
|
|
|
""" |
|
|
|
Creates a PPO policy to trainers list of policies. |
|
|
|
Creates a policy with a PyTorch backend and SAC hyperparameters |
|
|
|
:param parsed_behavior_id: |
|
|
|
:param behavior_spec: specifications for policy construction |
|
|
|
:return policy |
|
|
|