浏览代码

Fix bug in add_rewards_output and add test (#2442)

/develop-gpu-test
GitHub 6 年前
当前提交
43696d60
共有 2 个文件被更改,包括 33 次插入2 次删除
  1. 4
      ml-agents/mlagents/trainers/ppo/trainer.py
  2. 31
      ml-agents/mlagents/trainers/tests/test_ppo.py

4
ml-agents/mlagents/trainers/ppo/trainer.py


for name, reward_result in rewards_out.reward_signals.items():
# 0 because we use the scaled reward to train the agent
self.training_buffer[agent_id]["{}_rewards".format(name)].append(
reward_result.scaled_reward[agent_idx]
reward_result.scaled_reward[agent_next_idx]
values[name][agent_next_idx][0]
values[name][agent_idx][0]
)
def is_ready_update(self):

31
ml-agents/mlagents/trainers/tests/test_ppo.py


from mlagents.trainers.ppo.models import PPOModel
from mlagents.trainers.ppo.trainer import PPOTrainer, discount_rewards
from mlagents.trainers.ppo.policy import PPOPolicy
from mlagents.trainers.rl_trainer import AllRewardsOutput
from mlagents.trainers.components.reward_signals import RewardSignalResult
from mlagents.envs import UnityEnvironment, BrainParameters
from mlagents.envs.mock_communicator import MockCommunicator

trainer.increment_step(5)
policy_mock.increment_step.assert_called_with(5)
assert trainer.step == 10
def test_add_rewards_output(dummy_config):
brain_params = BrainParameters("test_brain", 1, 1, [], [2], [], 0)
dummy_config["summary_path"] = "./summaries/test_trainer_summary"
dummy_config["model_path"] = "./models/test_trainer_models/TestModel"
trainer = PPOTrainer(brain_params, 0, dummy_config, True, False, 0, "0", False)
rewardsout = AllRewardsOutput(
reward_signals={
"extrinsic": RewardSignalResult(
scaled_reward=np.array([1.0, 1.0]), unscaled_reward=np.array([1.0, 1.0])
)
},
environment=np.array([1.0, 1.0]),
)
values = {"extrinsic": np.array([[2.0]])}
agent_id = "123"
idx = 0
# make sure that we're grabbing from the next_idx for rewards. If we're not, the test will fail.
next_idx = 1
trainer.add_rewards_outputs(
rewardsout,
values=values,
agent_id=agent_id,
agent_idx=idx,
agent_next_idx=next_idx,
)
assert trainer.training_buffer[agent_id]["extrinsic_value_estimates"][0] == 2.0
assert trainer.training_buffer[agent_id]["extrinsic_rewards"][0] == 1.0
if __name__ == "__main__":
正在加载...
取消
保存