浏览代码

alternative to internal-policy fix

/develop/add-fire
Andrew Cohen 4 年前
当前提交
3013774b
共有 8 个文件被更改,包括 52 次插入43 次删除
  1. 57
      ml-agents/mlagents/trainers/ghost/trainer.py
  2. 4
      ml-agents/mlagents/trainers/ppo/trainer.py
  3. 4
      ml-agents/mlagents/trainers/sac/trainer.py
  4. 12
      ml-agents/mlagents/trainers/tests/test_ghost.py
  5. 4
      ml-agents/mlagents/trainers/tests/test_ppo.py
  6. 6
      ml-agents/mlagents/trainers/tests/test_sac.py
  7. 4
      ml-agents/mlagents/trainers/trainer/trainer.py
  8. 4
      ml-agents/mlagents/trainers/trainer_controller.py

57
ml-agents/mlagents/trainers/ghost/trainer.py


"""
parsed_behavior_id = self._name_to_parsed_behavior_id[name_behavior_id]
brain_name = parsed_behavior_id.brain_name
policy = self.trainer.get_policy(brain_name)
reload_weights = policy.get_weights()
policy.load_weights(self.current_policy_snapshot[brain_name])
self.trainer.save_model(name_behavior_id)
# reload
policy.load_weights(reload_weights)
self.trainer.save_model(brain_name)
def export_model(self, name_behavior_id: str) -> None:
"""

parsed_behavior_id = self._name_to_parsed_behavior_id[name_behavior_id]
brain_name = parsed_behavior_id.brain_name
policy = self.trainer.get_policy(brain_name)
policy.load_weights(self.current_policy_snapshot[brain_name])
def create_policy(self, brain_parameters: BrainParameters) -> TFPolicy:
def create_policy(
self, parsed_behavior_id: BehaviorIdentifiers, brain_parameters: BrainParameters
) -> TFPolicy:
"""
return self.trainer.create_policy(brain_parameters)
def add_policy(
self, parsed_behavior_id: BehaviorIdentifiers, policy: TFPolicy
) -> None:
"""
Adds policy to trainer. The first policy encountered sets the wrapped
The first policy encountered sets the wrapped
:param name_behavior_id: Behavior ID that the policy should belong to.
:param policy: Policy to associate with name_behavior_id.
name_behavior_id = parsed_behavior_id.behavior_id
policy = self.trainer.create_policy(parsed_behavior_id, brain_parameters)
policy.create_tf_graph()
policy.init_load_weights()
self.policies[name_behavior_id] = policy
policy.create_tf_graph()
self._name_to_parsed_behavior_id[name_behavior_id] = parsed_behavior_id
# for saving/swapping snapshots
policy.init_load_weights()
internal_trainer_policy = self.trainer.create_policy(
parsed_behavior_id, brain_parameters
)
internal_trainer_policy.create_tf_graph()
internal_trainer_policy.init_load_weights()
] = policy.get_weights()
] = internal_trainer_policy.get_weights()
policy.load_weights(internal_trainer_policy.get_weights())
self.trainer.add_policy(parsed_behavior_id, policy)
self.trainer.add_policy(parsed_behavior_id, internal_trainer_policy)
return policy
def add_policy(
self, parsed_behavior_id: BehaviorIdentifiers, policy: TFPolicy
) -> None:
"""
Adds policy to GhostTrainer.
:param parsed_behavior_id: Behavior ID that the policy should belong to.
:param policy: Policy to associate with name_behavior_id.
"""
name_behavior_id = parsed_behavior_id.behavior_id
self._name_to_parsed_behavior_id[name_behavior_id] = parsed_behavior_id
self.policies[name_behavior_id] = policy
def get_policy(self, name_behavior_id: str) -> TFPolicy:
"""

4
ml-agents/mlagents/trainers/ppo/trainer.py


self._stats_reporter.add_stat(stat, val)
self._clear_update_buffer()
def create_policy(self, brain_parameters: BrainParameters) -> TFPolicy:
def create_policy(
self, parsed_behavior_id: BehaviorIdentifiers, brain_parameters: BrainParameters
) -> TFPolicy:
"""
Creates a PPO policy to trainers list of policies.
:param brain_parameters: specifications for policy construction

4
ml-agents/mlagents/trainers/sac/trainer.py


self.update_sac_policy()
self.update_reward_signals()
def create_policy(self, brain_parameters: BrainParameters) -> TFPolicy:
def create_policy(
self, parsed_behavior_id: BehaviorIdentifiers, brain_parameters: BrainParameters
) -> TFPolicy:
policy = NNPolicy(
self.seed,
brain_parameters,

12
ml-agents/mlagents/trainers/tests/test_ghost.py


trainer_params = dummy_config
trainer = PPOTrainer(mock_brain.brain_name, 0, trainer_params, True, False, 0, "0")
trainer.seed = 1
policy = trainer.create_policy(mock_brain)
policy = trainer.create_policy(mock_brain.brain_name, mock_brain)
to_load_policy = trainer.create_policy(mock_brain)
to_load_policy = trainer.create_policy(mock_brain.brain_name, mock_brain)
to_load_policy.create_tf_graph()
to_load_policy.init_load_weights()

)
# first policy encountered becomes policy trained by wrapped PPO
policy = trainer.create_policy(brain_params_team0)
policy = trainer.create_policy(parsed_behavior_id0, brain_params_team0)
policy = trainer.create_policy(brain_params_team1)
policy = trainer.create_policy(parsed_behavior_id1, brain_params_team1)
trainer.add_policy(parsed_behavior_id1, policy)
trajectory_queue1 = AgentManagerQueue(brain_params_team1.brain_name)
trainer.subscribe_trajectory_queue(trajectory_queue1)

# First policy encountered becomes policy trained by wrapped PPO
# This queue should remain empty after swap snapshot
policy = trainer.create_policy(brain_params_team0)
policy = trainer.create_policy(parsed_behavior_id0, brain_params_team0)
policy = trainer.create_policy(brain_params_team1)
policy = trainer.create_policy(parsed_behavior_id1, brain_params_team1)
trainer.add_policy(parsed_behavior_id1, policy)
policy_queue1 = AgentManagerQueue(brain_params_team1.brain_name)
trainer.publish_policy_queue(policy_queue1)

4
ml-agents/mlagents/trainers/tests/test_ppo.py


trainer_params["reward_signals"]["curiosity"]["encoding_size"] = 128
trainer = PPOTrainer(mock_brain.brain_name, 0, trainer_params, True, False, 0, "0")
policy = trainer.create_policy(mock_brain)
policy = trainer.create_policy(mock_brain.brain_name, mock_brain)
trainer.add_policy(mock_brain.brain_name, policy)
# Test update with sequence length smaller than batch size
buffer = mb.simulate_rollout(BUFFER_INIT_SAMPLES, mock_brain)

dummy_config["summary_path"] = "./summaries/test_trainer_summary"
dummy_config["model_path"] = "./models/test_trainer_models/TestModel"
trainer = PPOTrainer(brain_params, 0, dummy_config, True, False, 0, "0")
policy = trainer.create_policy(brain_params)
policy = trainer.create_policy(brain_params.brain_name, brain_params)
trainer.add_policy(brain_params.brain_name, policy)
trajectory_queue = AgentManagerQueue("testbrain")
trainer.subscribe_trajectory_queue(trajectory_queue)

6
ml-agents/mlagents/trainers/tests/test_sac.py


trainer_params["model_path"] = str(tmpdir)
trainer_params["save_replay_buffer"] = True
trainer = SACTrainer(mock_brain.brain_name, 1, trainer_params, True, False, 0, 0)
policy = trainer.create_policy(mock_brain)
policy = trainer.create_policy(mock_brain.brain_name, mock_brain)
trainer.add_policy(mock_brain.brain_name, policy)
trainer.update_buffer = mb.simulate_rollout(BUFFER_INIT_SAMPLES, policy.brain)

# Wipe Trainer and try to load
trainer2 = SACTrainer(mock_brain.brain_name, 1, trainer_params, True, True, 0, 0)
policy = trainer2.create_policy(mock_brain)
policy = trainer2.create_policy(mock_brain.brain_name, mock_brain)
trainer2.add_policy(mock_brain.brain_name, policy)
assert trainer2.update_buffer.num_experiences == buffer_len

dummy_config["summary_path"] = "./summaries/test_trainer_summary"
dummy_config["model_path"] = "./models/test_trainer_models/TestModel"
trainer = SACTrainer(brain_params, 0, dummy_config, True, False, 0, "0")
policy = trainer.create_policy(brain_params)
policy = trainer.create_policy(brain_params.brain_name, brain_params)
trainer.add_policy(brain_params.brain_name, policy)
trajectory_queue = AgentManagerQueue("testbrain")

4
ml-agents/mlagents/trainers/trainer/trainer.py


pass
@abc.abstractmethod
def create_policy(self, brain_parameters: BrainParameters) -> TFPolicy:
def create_policy(
self, parsed_behavior_id: BehaviorIdentifiers, brain_parameters: BrainParameters
) -> TFPolicy:
"""
Creates policy
"""

4
ml-agents/mlagents/trainers/trainer_controller.py


trainer = self.trainer_factory.generate(brain_name)
self.trainers[brain_name] = trainer
policy = trainer.create_policy(env_manager.external_brains[name_behavior_id])
policy = trainer.create_policy(
parsed_behavior_id, env_manager.external_brains[name_behavior_id]
)
trainer.add_policy(parsed_behavior_id, policy)
agent_manager = AgentManager(

正在加载...
取消
保存