|
|
|
|
|
|
import pytest |
|
|
|
import os |
|
|
|
|
|
|
|
import numpy as np |
|
|
|
|
|
|
|
from mlagents.trainers.ppo.optimizer_torch import TorchPPOOptimizer |
|
|
|
from mlagents.trainers.sac.optimizer_torch import TorchSACOptimizer |
|
|
|
from mlagents.trainers.model_saver.torch_model_saver import TorchModelSaver |
|
|
|
|
|
|
CuriositySettings, |
|
|
|
GAILSettings, |
|
|
|
RNDSettings, |
|
|
|
from mlagents.trainers.tests.torch.test_reward_providers.utils import ( |
|
|
|
create_agent_buffer, |
|
|
|
) |
|
|
|
|
|
|
|
DEMO_PATH = ( |
|
|
|
os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, os.pardir) |
|
|
|
|
|
|
trainer_settings.reward_signals = { |
|
|
|
RewardSignalType.CURIOSITY: CuriositySettings(), |
|
|
|
RewardSignalType.GAIL: GAILSettings(demo_path=DEMO_PATH), |
|
|
|
RewardSignalType.RND: RNDSettings(), |
|
|
|
} |
|
|
|
policy = create_policy_mock(trainer_settings, use_discrete=False) |
|
|
|
optimizer = OptimizerClass(policy, trainer_settings) |
|
|
|
|
|
|
module_dict_2 = optimizer2.get_modules() |
|
|
|
assert "Module:GAIL" in module_dict_1 |
|
|
|
assert "Module:GAIL" in module_dict_2 |
|
|
|
assert "Module:Curiosity" in module_dict_1 |
|
|
|
assert "Module:Curiosity" in module_dict_2 |
|
|
|
assert "Module:RND-pred" in module_dict_1 |
|
|
|
assert "Module:RND-pred" in module_dict_2 |
|
|
|
assert "Module:RND-target" in module_dict_1 |
|
|
|
assert "Module:RND-target" in module_dict_2 |
|
|
|
for name, module1 in module_dict_1.items(): |
|
|
|
assert name in module_dict_2 |
|
|
|
module2 = module_dict_2[name] |
|
|
|
|
|
|
|
|
|
|
# Run some rewards |
|
|
|
data = create_agent_buffer(policy.behavior_spec, 1) |
|
|
|
for reward_name in optimizer.reward_signals.keys(): |
|
|
|
rp_1 = optimizer.reward_signals[reward_name] |
|
|
|
rp_2 = optimizer2.reward_signals[reward_name] |
|
|
|
assert np.array_equal(rp_1.evaluate(data), rp_2.evaluate(data)) |