浏览代码

wrapped trainer has internal policy ghost

/internal-policy-ghost
Andrew Cohen 5 年前
当前提交
3de78baa
共有 5 个文件被更改,包括 31 次插入32 次删除
  1. 35
      ml-agents/mlagents/trainers/ghost/trainer.py
  2. 10
      ml-agents/mlagents/trainers/ppo/trainer.py
  3. 10
      ml-agents/mlagents/trainers/sac/trainer.py
  4. 2
      ml-agents/mlagents/trainers/trainer/trainer.py
  5. 6
      ml-agents/mlagents/trainers/trainer_controller.py

35
ml-agents/mlagents/trainers/ghost/trainer.py


def save_model(self, name_behavior_id: str) -> None:
"""
Forwarding call to wrapped trainers save_model
Loads the latest policy weights, saves it, then reloads
the current policy weights before resuming training.
policy = self.trainer.get_policy(brain_name)
reload_weights = policy.get_weights()
# save current snapshot to policy
policy.load_weights(self.current_policy_snapshot[brain_name])
self.trainer.save_model(name_behavior_id)
# reload
policy.load_weights(reload_weights)
self.trainer.save_model(brain_name)
First loads the latest snapshot.
policy = self.trainer.get_policy(brain_name)
policy.load_weights(self.current_policy_snapshot[brain_name])
self.trainer.export_model(brain_name)
def create_policy(self, brain_parameters: BrainParameters) -> TFPolicy:

return self.trainer.create_policy(brain_parameters)
def add_policy(
self, parsed_behavior_id: BehaviorIdentifiers, policy: TFPolicy
self, parsed_behavior_id: BehaviorIdentifiers, brain_parameters: BrainParameters
) -> None:
"""
Adds policy to trainer. The first policy encountered sets the wrapped

name_behavior_id = parsed_behavior_id.behavior_id
team_id = parsed_behavior_id.team_id
self.controller.subscribe_team_id(team_id, self)
policy = self.create_policy(brain_parameters)
policy.init_load_weights()
policy.create_tf_graph()
policy.create_tf_graph()
policy.init_load_weights()
# creates an internal trainer policy. This always contains the current learning policy
# parameterization and is the object the wrapped trainer uses to compute gradients.
self.trainer.add_policy(parsed_behavior_id, brain_parameters)
internal_trainer_policy = self.trainer.get_policy(
parsed_behavior_id.brain_name
)
internal_trainer_policy.init_load_weights()
internal_trainer_policy.create_tf_graph()
] = policy.get_weights()
] = internal_trainer_policy.get_weights()
# initialize ghost level policy to have the same weights
policy.load(internal_trainer_policy.get_weights())
self.trainer.add_policy(parsed_behavior_id, policy)
self._learning_team = self.controller.get_learning_team
self.wrapped_trainer_team = team_id

10
ml-agents/mlagents/trainers/ppo/trainer.py


self._check_param_keys()
self.load = load
self.seed = seed
self.policy: NNPolicy = None # type: ignore
self.policy: TFPolicy = None # type: ignore
def _check_param_keys(self):
super()._check_param_keys()

return policy
def add_policy(
self, parsed_behavior_id: BehaviorIdentifiers, policy: TFPolicy
self, parsed_behavior_id: BehaviorIdentifiers, brain_parameters: BrainParameters
) -> None:
"""
Adds policy to trainer.

self.__class__.__name__
)
)
if not isinstance(policy, NNPolicy):
raise RuntimeError("Non-NNPolicy passed to PPOTrainer.add_policy()")
self.policy = policy
self.policy = self.create_policy(brain_parameters)
self.step = policy.get_current_step()
self.step = self.policy.get_current_step()
self.next_summary_step = self._get_next_summary_step()
def get_policy(self, name_behavior_id: str) -> TFPolicy:

10
ml-agents/mlagents/trainers/sac/trainer.py


self._check_param_keys()
self.load = load
self.seed = seed
self.policy: NNPolicy = None # type: ignore
self.policy: TFPolicy = None # type: ignore
self.optimizer: SACOptimizer = None # type: ignore
self.step = 0

self._stats_reporter.add_stat(stat, np.mean(stat_list))
def add_policy(
self, parsed_behavior_id: BehaviorIdentifiers, policy: TFPolicy
self, parsed_behavior_id: BehaviorIdentifiers, brain_parameters: BrainParameters
) -> None:
"""
Adds policy to trainer.

self.__class__.__name__
)
)
if not isinstance(policy, NNPolicy):
raise RuntimeError("Non-SACPolicy passed to SACTrainer.add_policy()")
self.policy = policy
self.policy = self.create_policy(brain_parameters)
self.step = policy.get_current_step()
self.step = self.policy.get_current_step()
self.next_summary_step = self._get_next_summary_step()
def get_policy(self, name_behavior_id: str) -> TFPolicy:

2
ml-agents/mlagents/trainers/trainer/trainer.py


@abc.abstractmethod
def add_policy(
self, parsed_behavior_id: BehaviorIdentifiers, policy: TFPolicy
self, parsed_behavior_id: BehaviorIdentifiers, brain_parameters: BrainParameters
) -> None:
"""
Adds policy to trainer.

6
ml-agents/mlagents/trainers/trainer_controller.py


trainer = self.trainer_factory.generate(brain_name)
self.trainers[brain_name] = trainer
policy = trainer.create_policy(env_manager.external_brains[name_behavior_id])
trainer.add_policy(parsed_behavior_id, policy)
trainer.add_policy(
parsed_behavior_id, env_manager.external_brains[name_behavior_id]
)
policy = trainer.get_policy(name_behavior_id)
agent_manager = AgentManager(
policy,

正在加载...
取消
保存