浏览代码

Torch : Saving/Loading of the reward providers (#4405)

* Saving the reward providers

* adding tests

* Moved the tests around

* Update ml-agents/mlagents/trainers/tests/torch/saver/test_saver_reward_providers.py

* Update ml-agents/mlagents/trainers/tests/torch/saver/test_saver_reward_providers.py

* Update ml-agents/mlagents/trainers/tests/torch/saver/test_saver_reward_providers.py

Co-authored-by: Ruo-Ping (Rachel) Dong <ruoping.dong@unity3d.com>

* Update ml-agents/mlagents/trainers/tests/torch/saver/test_saver_reward_providers.py

Co-authored-by: Ruo-Ping (Rachel) Dong <ruoping.dong@unity3d.com>

Co-authored-by: Ruo-Ping (Rachel) Dong <ruoping.dong@unity3d.com>
/MLA-1734-demo-provider
GitHub 4 年前
当前提交
328353bc
共有 8 个文件被更改,包括 92 次插入2 次删除
  1. 5
      ml-agents/mlagents/trainers/ppo/optimizer_torch.py
  2. 5
      ml-agents/mlagents/trainers/sac/optimizer_torch.py
  3. 9
      ml-agents/mlagents/trainers/torch/components/reward_providers/base_reward_provider.py
  4. 3
      ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py
  5. 3
      ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py
  6. 69
      ml-agents/mlagents/trainers/tests/torch/saver/test_saver_reward_providers.py
  7. 0
      /ml-agents/mlagents/trainers/tests/torch/saver/test_saver.py

5
ml-agents/mlagents/trainers/ppo/optimizer_torch.py


return update_stats
def get_modules(self):
return {"Optimizer": self.optimizer}
modules = {"Optimizer": self.optimizer}
for reward_provider in self.reward_signals.values():
modules.update(reward_provider.get_modules())
return modules

5
ml-agents/mlagents/trainers/sac/optimizer_torch.py


return {}
def get_modules(self):
return {
modules = {
"Optimizer:value_network": self.value_network,
"Optimizer:target_network": self.target_network,
"Optimizer:policy_optimizer": self.policy_optimizer,

for reward_provider in self.reward_signals.values():
modules.update(reward_provider.get_modules())
return modules

9
ml-agents/mlagents/trainers/torch/components/reward_providers/base_reward_provider.py


import numpy as np
import torch
from abc import ABC, abstractmethod
from typing import Dict

raise NotImplementedError(
"The reward provider's update method has not been implemented "
)
def get_modules(self) -> Dict[str, torch.nn.Module]:
"""
Returns a dictionary of string identifiers to the torch.nn.Modules used by
the reward providers. This method is used for loading and saving the weights
of the reward providers.
"""
return {}

3
ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py


"Losses/Curiosity Inverse Loss": inverse_loss.detach().cpu().numpy(),
}
def get_modules(self):
return {f"Module:{self.name}": self._network}
class CuriosityNetwork(torch.nn.Module):
EPSILON = 1e-10

3
ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py


self.optimizer.step()
return stats_dict
def get_modules(self):
return {f"Module:{self.name}": self._discriminator_network}
class DiscriminatorNetwork(torch.nn.Module):
gradient_penalty_weight = 10.0

69
ml-agents/mlagents/trainers/tests/torch/saver/test_saver_reward_providers.py


import pytest
import os
from mlagents.trainers.ppo.optimizer_torch import TorchPPOOptimizer
from mlagents.trainers.sac.optimizer_torch import TorchSACOptimizer
from mlagents.trainers.saver.torch_saver import TorchSaver
from mlagents.trainers.settings import (
TrainerSettings,
RewardSignalType,
CuriositySettings,
GAILSettings,
PPOSettings,
SACSettings,
)
from mlagents.trainers.tests.torch.test_policy import create_policy_mock
DEMO_PATH = (
os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, os.pardir)
+ "/test.demo"
)
@pytest.mark.parametrize(
"optimizer",
[(TorchPPOOptimizer, PPOSettings), (TorchSACOptimizer, SACSettings)],
ids=["ppo", "sac"],
)
def test_reward_provider_save(tmp_path, optimizer):
OptimizerClass, HyperparametersClass = optimizer
trainer_settings = TrainerSettings()
trainer_settings.hyperparameters = HyperparametersClass()
trainer_settings.reward_signals = {
RewardSignalType.CURIOSITY: CuriositySettings(),
RewardSignalType.GAIL: GAILSettings(demo_path=DEMO_PATH),
}
policy = create_policy_mock(trainer_settings, use_discrete=False)
optimizer = OptimizerClass(policy, trainer_settings)
# save at path 1
path1 = os.path.join(tmp_path, "runid1")
saver = TorchSaver(trainer_settings, path1)
saver.register(policy)
saver.register(optimizer)
saver.initialize_or_load()
policy.set_step(2000)
saver.save_checkpoint("MockBrain", 2000)
# create a new optimizer and policy
optimizer2 = OptimizerClass(policy, trainer_settings)
policy2 = create_policy_mock(trainer_settings, use_discrete=False)
# load weights
saver2 = TorchSaver(trainer_settings, path1, load=True)
saver2.register(policy2)
saver2.register(optimizer2)
saver2.initialize_or_load() # This is to load the optimizers
# assert the models have the same weights
module_dict_1 = optimizer.get_modules()
module_dict_2 = optimizer2.get_modules()
assert "Module:GAIL" in module_dict_1
assert "Module:GAIL" in module_dict_2
for name, module1 in module_dict_1.items():
assert name in module_dict_2
module2 = module_dict_2[name]
if hasattr(module1, "parameters"):
for param1, param2 in zip(module1.parameters(), module2.parameters()):
assert param1.data.ne(param2.data).sum() == 0

/ml-agents/mlagents/trainers/tests/torch/test_saver.py → /ml-agents/mlagents/trainers/tests/torch/saver/test_saver.py

正在加载...
取消
保存