|
|
|
|
|
|
import attr |
|
|
|
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers |
|
|
|
|
|
|
|
from mlagents.trainers.trainer.rl_trainer import RLTrainer |
|
|
|
from mlagents.trainers.ppo.trainer import PPOTrainer, discount_rewards |
|
|
|
from mlagents.trainers.ppo.optimizer_tf import PPOOptimizer |
|
|
|
from mlagents.trainers.policy.tf_policy import TFPolicy |
|
|
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
@mock.patch.object(RLTrainer, "create_saver") |
|
|
|
def test_trainer_increment_step(ppo_optimizer, dummy_config): |
|
|
|
def test_trainer_increment_step(ppo_optimizer, mock_create_saver): |
|
|
|
trainer_params = PPO_CONFIG |
|
|
|
mock_optimizer = mock.Mock() |
|
|
|
mock_optimizer.reward_signals = {} |
|
|
|
|
|
|
) |
|
|
|
policy_mock.increment_step = mock.Mock(return_value=step_count) |
|
|
|
behavior_id = BehaviorIdentifiers.from_name_behavior_id(trainer.brain_name) |
|
|
|
trainer.add_policy(behavior_id, policy_mock, register_saver=False) |
|
|
|
trainer.add_policy(behavior_id, policy_mock) |
|
|
|
|
|
|
|
trainer._increment_step(5, trainer.brain_name) |
|
|
|
policy_mock.increment_step.assert_called_with(5) |
|
|
|
|
|
|
assert trainer.stats_reporter.get_stats_summaries("Policy/Extrinsic Reward").num > 0 |
|
|
|
|
|
|
|
|
|
|
|
@mock.patch.object(RLTrainer, "create_saver") |
|
|
|
def test_add_get_policy(ppo_optimizer, dummy_config): |
|
|
|
def test_add_get_policy(ppo_optimizer, mock_create_saver, dummy_config): |
|
|
|
mock_optimizer = mock.Mock() |
|
|
|
mock_optimizer.reward_signals = {} |
|
|
|
ppo_optimizer.return_value = mock_optimizer |
|
|
|
|
|
|
policy.get_current_step.return_value = 2000 |
|
|
|
|
|
|
|
behavior_id = BehaviorIdentifiers.from_name_behavior_id(trainer.brain_name) |
|
|
|
trainer.add_policy(behavior_id, policy, register_saver=False) |
|
|
|
trainer.add_policy(behavior_id, policy) |
|
|
|
assert trainer.get_policy("test_policy") == policy |
|
|
|
|
|
|
|
# Make sure the summary steps were loaded properly |
|
|
|