|
|
|
|
|
|
brain_name = parsed_behavior_id.brain_name |
|
|
|
self.trainer.export_model(brain_name) |
|
|
|
|
|
|
|
def create_policy(self, brain_parameters: BrainParameters) -> TFPolicy: |
|
|
|
def create_policy( |
|
|
|
self, parsed_behavior_id: BehaviorIdentifiers, brain_parameters: BrainParameters |
|
|
|
) -> TFPolicy: |
|
|
|
""" |
|
|
|
return self.trainer.create_policy(brain_parameters) |
|
|
|
|
|
|
|
def add_policy( |
|
|
|
self, parsed_behavior_id: BehaviorIdentifiers, brain_parameters: BrainParameters |
|
|
|
) -> None: |
|
|
|
""" |
|
|
|
Adds policy to trainer. The first policy encountered sets the wrapped |
|
|
|
The first policy encountered sets the wrapped |
|
|
|
:param name_behavior_id: Behavior ID that the policy should belong to. |
|
|
|
:param policy: Policy to associate with name_behavior_id. |
|
|
|
name_behavior_id = parsed_behavior_id.behavior_id |
|
|
|
team_id = parsed_behavior_id.team_id |
|
|
|
self.controller.subscribe_team_id(team_id, self) |
|
|
|
policy = self.create_policy(brain_parameters) |
|
|
|
policy = self.trainer.create_policy(parsed_behavior_id, brain_parameters) |
|
|
|
self.policies[name_behavior_id] = policy |
|
|
|
|
|
|
|
self._name_to_parsed_behavior_id[name_behavior_id] = parsed_behavior_id |
|
|
|
# for saving/swapping snapshots |
|
|
|
team_id = parsed_behavior_id.team_id |
|
|
|
self.controller.subscribe_team_id(team_id, self) |
|
|
|
# creates an internal trainer policy. This always contains the current learning policy |
|
|
|
# parameterization and is the object the wrapped trainer uses to compute gradients. |
|
|
|
self.trainer.add_policy(parsed_behavior_id, brain_parameters) |
|
|
|
internal_trainer_policy = self.trainer.get_policy( |
|
|
|
parsed_behavior_id.brain_name |
|
|
|
internal_trainer_policy = self.trainer.create_policy( |
|
|
|
parsed_behavior_id, brain_parameters |
|
|
|
|
|
|
|
# initialize ghost level policy to have the same weights |
|
|
|
|
|
|
|
self.trainer.add_policy(parsed_behavior_id, internal_trainer_policy) |
|
|
|
return policy |
|
|
|
|
|
|
|
def add_policy( |
|
|
|
self, parsed_behavior_id: BehaviorIdentifiers, policy: TFPolicy |
|
|
|
) -> None: |
|
|
|
""" |
|
|
|
Adds policy to GhostTrainer. |
|
|
|
:param parsed_behavior_id: Behavior ID that the policy should belong to. |
|
|
|
:param policy: Policy to associate with name_behavior_id. |
|
|
|
""" |
|
|
|
name_behavior_id = parsed_behavior_id.behavior_id |
|
|
|
self._name_to_parsed_behavior_id[name_behavior_id] = parsed_behavior_id |
|
|
|
self.policies[name_behavior_id] = policy |
|
|
|
|
|
|
|
def get_policy(self, name_behavior_id: str) -> TFPolicy: |
|
|
|
""" |
|
|
|