|
|
|
|
|
|
def save_model(self, name_behavior_id: str) -> None: |
|
|
|
""" |
|
|
|
Forwarding call to wrapped trainers save_model |
|
|
|
Loads the latest policy weights, saves it, then reloads |
|
|
|
the current policy weights before resuming training. |
|
|
|
policy = self.trainer.get_policy(brain_name) |
|
|
|
reload_weights = policy.get_weights() |
|
|
|
# save current snapshot to policy |
|
|
|
policy.load_weights(self.current_policy_snapshot[brain_name]) |
|
|
|
self.trainer.save_model(name_behavior_id) |
|
|
|
# reload |
|
|
|
policy.load_weights(reload_weights) |
|
|
|
self.trainer.save_model(brain_name) |
|
|
|
First loads the latest snapshot. |
|
|
|
policy = self.trainer.get_policy(brain_name) |
|
|
|
policy.load_weights(self.current_policy_snapshot[brain_name]) |
|
|
|
self.trainer.export_model(brain_name) |
|
|
|
|
|
|
|
def create_policy(self, brain_parameters: BrainParameters) -> TFPolicy: |
|
|
|
|
|
|
return self.trainer.create_policy(brain_parameters) |
|
|
|
|
|
|
|
def add_policy( |
|
|
|
self, parsed_behavior_id: BehaviorIdentifiers, policy: TFPolicy |
|
|
|
self, parsed_behavior_id: BehaviorIdentifiers, brain_parameters: BrainParameters |
|
|
|
) -> None: |
|
|
|
""" |
|
|
|
Adds policy to trainer. The first policy encountered sets the wrapped |
|
|
|
|
|
|
name_behavior_id = parsed_behavior_id.behavior_id |
|
|
|
team_id = parsed_behavior_id.team_id |
|
|
|
self.controller.subscribe_team_id(team_id, self) |
|
|
|
policy = self.create_policy(brain_parameters) |
|
|
|
policy.init_load_weights() |
|
|
|
policy.create_tf_graph() |
|
|
|
policy.create_tf_graph() |
|
|
|
policy.init_load_weights() |
|
|
|
# creates an internal trainer policy. This always contains the current learning policy |
|
|
|
# parameterization and is the object the wrapped trainer uses to compute gradients. |
|
|
|
self.trainer.add_policy(parsed_behavior_id, brain_parameters) |
|
|
|
internal_trainer_policy = self.trainer.get_policy( |
|
|
|
parsed_behavior_id.brain_name |
|
|
|
) |
|
|
|
internal_trainer_policy.init_load_weights() |
|
|
|
internal_trainer_policy.create_tf_graph() |
|
|
|
|
|
|
|
] = policy.get_weights() |
|
|
|
] = internal_trainer_policy.get_weights() |
|
|
|
|
|
|
|
# initialize ghost level policy to have the same weights |
|
|
|
policy.load(internal_trainer_policy.get_weights()) |
|
|
|
self.trainer.add_policy(parsed_behavior_id, policy) |
|
|
|
self._learning_team = self.controller.get_learning_team |
|
|
|
self.wrapped_trainer_team = team_id |
|
|
|
|
|
|
|