浏览代码

fix tests

/develop/add-fire/ckpt-2
Ruo-Ping Dong 4 年前
当前提交
e06812aa
共有 11 个文件被更改,包括 50 次插入129 次删除
  1. 5
      ml-agents/mlagents/trainers/ghost/trainer.py
  2. 6
      ml-agents/mlagents/trainers/policy/torch_policy.py
  3. 7
      ml-agents/mlagents/trainers/ppo/trainer.py
  4. 9
      ml-agents/mlagents/trainers/sac/trainer.py
  5. 27
      ml-agents/mlagents/trainers/tests/test_barracuda_converter.py
  6. 59
      ml-agents/mlagents/trainers/tests/test_nn_policy.py
  7. 10
      ml-agents/mlagents/trainers/tests/test_ppo.py
  8. 29
      ml-agents/mlagents/trainers/tests/test_rl_trainer.py
  9. 2
      ml-agents/mlagents/trainers/tests/test_sac.py
  10. 20
      ml-agents/mlagents/trainers/tests/test_tf_policy.py
  11. 5
      ml-agents/mlagents/trainers/trainer/trainer.py

5
ml-agents/mlagents/trainers/ghost/trainer.py


return policy
def add_policy(
self, parsed_behavior_id: BehaviorIdentifiers, policy: Policy
self,
parsed_behavior_id: BehaviorIdentifiers,
policy: Policy,
create_saver: bool = True,
) -> None:
"""
Adds policy to GhostTrainer.

6
ml-agents/mlagents/trainers/policy/torch_policy.py


from mlagents.trainers.settings import TrainerSettings, TestingConfiguration
from mlagents.trainers.trajectory import SplitObservations
from mlagents.trainers.torch.networks import SharedActorCritic, SeparateActorCritic
from mlagents.trainers.torch.networks import (
SharedActorCritic,
SeparateActorCritic,
GlobalSteps,
)
from mlagents.trainers.torch.utils import ModelUtils
EPSILON = 1e-7 # Small value to avoid divide by zero

7
ml-agents/mlagents/trainers/ppo/trainer.py


return policy
def add_policy(
self, parsed_behavior_id: BehaviorIdentifiers, policy: Policy
self,
parsed_behavior_id: BehaviorIdentifiers,
policy: Policy,
create_saver: bool = True,
) -> None:
"""
Adds policy to trainer.

for _reward_signal in self.optimizer.reward_signals.keys():
self.collected_rewards[_reward_signal] = defaultdict(lambda: 0)
if self.saver is None:
if self.saver is None and create_saver:
self.saver = self.create_saver(
self.framework,
policy,

9
ml-agents/mlagents/trainers/sac/trainer.py


:param artifact_path: The directory within which to store artifacts from this trainer.
"""
super().__init__(
brain_name, trainer_settings, training, artifact_path, reward_buff_cap
brain_name, trainer_settings, training, load, artifact_path, reward_buff_cap
)
self.load = load

self._stats_reporter.add_stat(stat, np.mean(stat_list))
def add_policy(
self, parsed_behavior_id: BehaviorIdentifiers, policy: Policy
self,
parsed_behavior_id: BehaviorIdentifiers,
policy: Policy,
create_saver: bool = True,
) -> None:
"""
Adds policy to trainer.

for _reward_signal in self.optimizer.reward_signals.keys():
self.collected_rewards[_reward_signal] = defaultdict(lambda: 0)
if self.saver is None:
if self.saver is None and create_saver:
self.saver = self.create_saver(
self.framework,
policy,

27
ml-agents/mlagents/trainers/tests/test_barracuda_converter.py


import os
import tempfile
import pytest
from mlagents.trainers.tests.test_nn_policy import create_policy_mock
from mlagents.trainers.settings import TrainerSettings
from mlagents.tf_utils import tf
from mlagents.model_serialization import SerializationSettings
def test_barracuda_converter():

# cleanup
os.remove(tmpfile)
@pytest.mark.parametrize("discrete", [True, False], ids=["discrete", "continuous"])
@pytest.mark.parametrize("visual", [True, False], ids=["visual", "vector"])
@pytest.mark.parametrize("rnn", [True, False], ids=["rnn", "no_rnn"])
def test_policy_conversion(tmpdir, rnn, visual, discrete):
tf.reset_default_graph()
dummy_config = TrainerSettings()
policy = create_policy_mock(
dummy_config,
use_rnn=rnn,
model_path=os.path.join(tmpdir, "test"),
use_discrete=discrete,
use_visual=visual,
)
settings = SerializationSettings(policy.model_path, "MockBrain")
checkpoint_path = f"{tmpdir}/MockBrain-1"
policy.checkpoint(checkpoint_path, settings)
# These checks taken from test_barracuda_converter
assert os.path.isfile(checkpoint_path + ".nn")
assert os.path.getsize(checkpoint_path + ".nn") > 100

59
ml-agents/mlagents/trainers/tests/test_nn_policy.py


import pytest
import os
import unittest
import tempfile
from mlagents.model_serialization import SerializationSettings
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.tf.models import ModelUtils, Tensor3DShape

from mlagents.trainers.tests.test_trajectory import make_fake_trajectory
from mlagents.trainers import __version__
VECTOR_ACTION_SPACE = 2

use_rnn: bool = False,
use_discrete: bool = True,
use_visual: bool = False,
model_path: str = "",
load: bool = False,
seed: int = 0,
) -> TFPolicy:
mock_spec = mb.setup_test_behavior_specs(

)
policy = TFPolicy(seed, mock_spec, trainer_settings)
return policy
def test_load_save(tmp_path):
path1 = os.path.join(tmp_path, "runid1")
path2 = os.path.join(tmp_path, "runid2")
trainer_params = TrainerSettings()
policy = create_policy_mock(trainer_params, model_path=path1)
policy.initialize_or_load()
policy._set_step(2000)
mock_brain_name = "MockBrain"
checkpoint_path = f"{policy.model_path}/{mock_brain_name}-2000"
serialization_settings = SerializationSettings(policy.model_path, mock_brain_name)
policy.checkpoint(checkpoint_path, serialization_settings)
assert len(os.listdir(tmp_path)) > 0
# Try load from this path
policy2 = create_policy_mock(trainer_params, model_path=path1, load=True, seed=1)
policy2.initialize_or_load()
_compare_two_policies(policy, policy2)
assert policy2.get_current_step() == 2000
# Try initialize from path 1
trainer_params.output_path = path2
trainer_params.init_path = path1
policy3 = create_policy_mock(trainer_params, model_path=path1, load=False, seed=2)
policy3.initialize_or_load()
_compare_two_policies(policy2, policy3)
# Assert that the steps are 0.
assert policy3.get_current_step() == 0
class ModelVersionTest(unittest.TestCase):
def test_version_compare(self):
# Test write_stats
with self.assertLogs("mlagents.trainers", level="WARNING") as cm:
path1 = tempfile.mkdtemp()
trainer_params = TrainerSettings()
policy = create_policy_mock(trainer_params, model_path=path1)
policy.initialize_or_load()
policy._check_model_version(
"0.0.0"
) # This is not the right version for sure
# Assert that 1 warning has been thrown with incorrect version
assert len(cm.output) == 1
policy._check_model_version(__version__) # This should be the right version
# Assert that no additional warnings have been thrown wth correct ver
assert len(cm.output) == 1
def _compare_two_policies(policy1: TFPolicy, policy2: TFPolicy) -> None:

10
ml-agents/mlagents/trainers/tests/test_ppo.py


)
@mock.patch("mlagents.trainers.ppo.trainer.PPOOptimizer")
def test_trainer_increment_step(ppo_optimizer):
@mock.patch("mlagents.trainers.ppo.trainer.TFPPOOptimizer")
def test_trainer_increment_step(ppo_optimizer, dummy_config):
trainer_params = PPO_CONFIG
mock_optimizer = mock.Mock()
mock_optimizer.reward_signals = {}

)
policy_mock.increment_step = mock.Mock(return_value=step_count)
behavior_id = BehaviorIdentifiers.from_name_behavior_id(trainer.brain_name)
trainer.add_policy(behavior_id, policy_mock)
trainer.add_policy(behavior_id, policy_mock, create_saver=False)
trainer._increment_step(5, trainer.brain_name)
policy_mock.increment_step.assert_called_with(5)

assert trainer.stats_reporter.get_stats_summaries("Policy/Extrinsic Reward").num > 0
@mock.patch("mlagents.trainers.ppo.trainer.PPOOptimizer")
@mock.patch("mlagents.trainers.ppo.trainer.TFPPOOptimizer")
def test_add_get_policy(ppo_optimizer, dummy_config):
mock_optimizer = mock.Mock()
mock_optimizer.reward_signals = {}

policy.get_current_step.return_value = 2000
behavior_id = BehaviorIdentifiers.from_name_behavior_id(trainer.brain_name)
trainer.add_policy(behavior_id, policy)
trainer.add_policy(behavior_id, policy, create_saver=False)
assert trainer.get_policy("test_policy") == policy
# Make sure the summary steps were loaded properly

29
ml-agents/mlagents/trainers/tests/test_rl_trainer.py


from unittest import mock
import os
import pytest
import mlagents.trainers.tests.mock_brain as mb
from mlagents.trainers.policy.checkpoint_manager import NNCheckpoint

def _update_policy(self):
return self.update_policy
def add_policy(self, mock_behavior_id, mock_policy):
def add_policy(self, mock_behavior_id, mock_policy, create_saver=True):
def checkpoint_path(brain_name, step):
return os.path.join(self.saver.model_path, f"{brain_name}-{step}")
mock_saver = mock.Mock()
mock_saver.model_path = self.artifact_path
mock_saver.save_checkpoint.side_effect = checkpoint_path
self.saver = mock_saver
def create_policy(self):
return mock.Mock()

def create_torch_policy(self, parsed_behavior_id, behavior_spec):
return mock.Mock()
def create_tf_policy(self, parsed_behavior_id, behavior_spec):
return mock.Mock()
def create_rl_trainer():
trainer = FakeTrainer(

False,
"mock_model_path",
0,
)
trainer.set_is_policy_updating(True)

def test_advance(mocked_clear_update_buffer, mocked_save_model):
trainer = create_rl_trainer()
mock_policy = mock.Mock()
mock_policy.model_path = "mock_model_path"
trainer.add_policy("TestBrain", mock_policy)
trajectory_queue = AgentManagerQueue("testbrain")
policy_queue = AgentManagerQueue("testbrain")

def test_summary_checkpoint(mock_add_checkpoint, mock_write_summary):
trainer = create_rl_trainer()
mock_policy = mock.Mock()
mock_policy.model_path = "mock_model_path"
trainer.add_policy("TestBrain", mock_policy)
trajectory_queue = AgentManagerQueue("testbrain")
policy_queue = AgentManagerQueue("testbrain")

checkpoint_range = range(
checkpoint_interval, num_trajectories * time_horizon, checkpoint_interval
)
calls = [
mock.call(f"{mock_policy.model_path}/{trainer.brain_name}-{step}", mock.ANY)
for step in checkpoint_range
]
mock_policy.checkpoint.assert_has_calls(calls, any_order=True)
calls = [mock.call(trainer.brain_name, step) for step in checkpoint_range]
trainer.saver.save_checkpoint.assert_has_calls(calls, any_order=True)
add_checkpoint_calls = [
mock.call(

f"{mock_policy.model_path}/{trainer.brain_name}-{step}.nn",
f"{trainer.saver.model_path}/{trainer.brain_name}-{step}.nn",
None,
mock.ANY,
),

2
ml-agents/mlagents/trainers/tests/test_sac.py


policy = mock.Mock(spec=TFPolicy)
policy.get_current_step.return_value = 2000
behavior_id = BehaviorIdentifiers.from_name_behavior_id(trainer.brain_name)
trainer.add_policy(behavior_id, policy)
trainer.add_policy(behavior_id, policy, create_saver=False)
assert trainer.get_policy(behavior_id.behavior_id) == policy
# Make sure the summary steps were loaded properly

20
ml-agents/mlagents/trainers/tests/test_tf_policy.py


from mlagents.model_serialization import SerializationSettings
from unittest import mock
from mlagents.trainers.settings import TrainerSettings
import numpy as np

# Test dev versions
result = TFPolicy._convert_version_string("200.300.100.dev0")
assert result == (200, 300, 100)
@mock.patch("mlagents.trainers.policy.tf_policy.export_policy_model")
@mock.patch("time.time", mock.MagicMock(return_value=12345))
def test_checkpoint_writes_tf_and_nn_checkpoints(export_policy_model_mock):
mock_brain = basic_mock_brain()
test_seed = 4 # moving up in the world
policy = FakePolicy(test_seed, mock_brain, TrainerSettings(), "output")
n_steps = 5
policy.get_current_step = MagicMock(return_value=n_steps)
policy.saver = MagicMock()
serialization_settings = SerializationSettings("output", mock_brain.brain_name)
checkpoint_path = f"output/{mock_brain.brain_name}-{n_steps}"
policy.checkpoint(checkpoint_path, serialization_settings)
policy.saver.save.assert_called_once_with(policy.sess, f"{checkpoint_path}.ckpt")
export_policy_model_mock.assert_called_once_with(
checkpoint_path, serialization_settings, policy.graph, policy.sess
)

5
ml-agents/mlagents/trainers/trainer/trainer.py


@abc.abstractmethod
def add_policy(
self, parsed_behavior_id: BehaviorIdentifiers, policy: Policy
self,
parsed_behavior_id: BehaviorIdentifiers,
policy: Policy,
create_saver: bool = True,
) -> None:
"""
Adds policy to trainer.

正在加载...
取消
保存