ml-agents/ml-agents/mlagents/trainers/tests/test_rl_trainer.py


								import yaml

								from unittest import mock

								import pytest

								import mlagents.trainers.tests.mock_brain as mb

								from mlagents.trainers.trainer.rl_trainer import RLTrainer

								from mlagents.trainers.tests.test_buffer import construct_fake_buffer

								from mlagents.trainers.agent_processor import AgentManagerQueue


								def dummy_config():

								    return yaml.safe_load(

								        """

								        output_path: "test/"

								        summary_freq: 1000

								        max_steps: 100

								        reward_signals:

								          extrinsic:

								            strength: 1.0

								            gamma: 0.99

								        """

								    )


								def create_mock_brain():

								    mock_brain = mb.create_mock_brainparams(

								        vector_action_space_type="continuous",

								        vector_action_space_size=[2],

								        vector_observation_space_size=8,

								        number_visual_observations=1,

								    )

								    return mock_brain


								# Add concrete implementations of abstract methods

								class FakeTrainer(RLTrainer):

								    def set_is_policy_updating(self, is_updating):

								        self.update_policy = is_updating


								    def get_policy(self, name_behavior_id):

								        return mock.Mock()


								    def _is_ready_update(self):

								        return True


								    def _update_policy(self):

								        return self.update_policy


								    def add_policy(self):

								        pass


								    def create_policy(self):

								        return mock.Mock()


								    def _process_trajectory(self, trajectory):

								        super()._process_trajectory(trajectory)


								def create_rl_trainer():

								    mock_brainparams = create_mock_brain()

								    trainer = FakeTrainer(mock_brainparams, dummy_config(), True, 0)

								    trainer.set_is_policy_updating(True)

								    return trainer


								def test_rl_trainer():

								    trainer = create_rl_trainer()

								    agent_id = "0"

								    trainer.collected_rewards["extrinsic"] = {agent_id: 3}

								    # Test end episode

								    trainer.end_episode()

								    for rewards in trainer.collected_rewards.values():

								        for agent_id in rewards:

								            assert rewards[agent_id] == 0


								def test_clear_update_buffer():

								    trainer = create_rl_trainer()

								    trainer.update_buffer = construct_fake_buffer(0)

								    trainer._clear_update_buffer()

								    for _, arr in trainer.update_buffer.items():

								        assert len(arr) == 0


								@mock.patch("mlagents.trainers.trainer.rl_trainer.RLTrainer._clear_update_buffer")

								def test_advance(mocked_clear_update_buffer):

								    trainer = create_rl_trainer()

								    trajectory_queue = AgentManagerQueue("testbrain")

								    policy_queue = AgentManagerQueue("testbrain")

								    trainer.subscribe_trajectory_queue(trajectory_queue)

								    trainer.publish_policy_queue(policy_queue)

								    time_horizon = 10

								    trajectory = mb.make_fake_trajectory(

								        length=time_horizon,

								        max_step_complete=True,

								        vec_obs_size=1,

								        num_vis_obs=0,

								        action_space=[2],

								    )

								    trajectory_queue.put(trajectory)


								    trainer.advance()

								    policy_queue.get_nowait()

								    # Check that get_step is correct

								    assert trainer.get_step == time_horizon

								    # Check that we can turn off the trainer and that the buffer is cleared

								    for _ in range(0, 5):

								        trajectory_queue.put(trajectory)

								        trainer.advance()

								        # Check that there is stuff in the policy queue

								        policy_queue.get_nowait()


								    # Check that if the policy doesn't update, we don't push it to the queue

								    trainer.set_is_policy_updating(False)

								    for _ in range(0, 10):

								        trajectory_queue.put(trajectory)

								        trainer.advance()

								        # Check that there nothing  in the policy queue

								        with pytest.raises(AgentManagerQueue.Empty):

								            policy_queue.get_nowait()


								    # Check that the buffer has been cleared

								    assert not trainer.should_still_train

								    assert mocked_clear_update_buffer.call_count > 0