浏览代码

Load individual elements if state dict load fails (#5213)

Co-authored-by: Vincent-Pierre BERGES <vincentpierre@unity3d.com>
Co-authored-by: Ervin T. <ervin@unity3d.com>
/check-for-ModelOverriders
GitHub 3 年前
当前提交
9dfe6c7f
共有 5 个文件被更改,包括 147 次插入3 次删除
  1. 4
      com.unity.ml-agents/CHANGELOG.md
  2. 7
      docs/Training-ML-Agents.md
  3. 29
      ml-agents/mlagents/trainers/model_saver/torch_model_saver.py
  4. 46
      ml-agents/mlagents/trainers/tests/torch/saver/test_saver.py
  5. 64
      ml-agents/mlagents/trainers/tests/torch/saver/test_saver_reward_providers.py

4
com.unity.ml-agents/CHANGELOG.md


different sizes using the same model. For a summary of the interface changes, please see the Migration Guide. (##5189)
#### ml-agents / ml-agents-envs / gym-unity (Python)
- The `--resume` flag now supports resuming experiments with additional reward providers or
loading partial models if the network architecture has changed. See
[here](https://github.com/Unity-Technologies/ml-agents/blob/main/docs/Training-ML-Agents.md#loading-an-existing-model)
for more details. (#5213)
### Minor Changes
#### com.unity.ml-agents / com.unity.ml-agents.extensions (C#)

7
docs/Training-ML-Agents.md


want to run inference in Unity, you should use the
[Unity Inference Engine](Getting-Started.md#running-a-pre-trained-model).
Additionally, if the network architecture changes, you may still load an existing model,
but ML-Agents will only load the parts of the model it can load and ignore all others. For instance,
if you add a new reward signal, the existing model will load but the new reward signal
will be initialized from scratch. If you have a model with a visual encoder (CNN) but
change the `hidden_units`, the CNN will be loaded but the body of the network will be
initialized from scratch.
Alternatively, you might want to start a new training run but _initialize_ it
using an already-trained model. You may want to do this, for instance, if your
environment changed and you want a new model, but the old behavior is still

29
ml-agents/mlagents/trainers/model_saver/torch_model_saver.py


policy = cast(TorchPolicy, policy)
for name, mod in modules.items():
mod.load_state_dict(saved_state_dict[name])
try:
if isinstance(mod, torch.nn.Module):
missing_keys, unexpected_keys = mod.load_state_dict(
saved_state_dict[name], strict=False
)
if missing_keys:
logger.warning(
f"Did not find these keys {missing_keys} in checkpoint. Initializing."
)
if unexpected_keys:
logger.warning(
f"Did not expect these keys {unexpected_keys} in checkpoint. Ignoring."
)
else:
# If module is not an nn.Module, try to load as one piece
mod.load_state_dict(saved_state_dict[name])
# KeyError is raised if the module was not present in the last run but is being
# accessed in the saved_state_dict.
# ValueError is raised by the optimizer's load_state_dict if the parameters have
# have changed. Note, the optimizer uses a completely different load_state_dict
# function because it is not an nn.Module.
# RuntimeError is raised by PyTorch if there is a size mismatch between modules
# of the same name. This will still partially assign values to those layers that
# have not changed shape.
except (KeyError, ValueError, RuntimeError) as err:
logger.warning(f"Failed to load for module {name}. Initializing")
logger.debug(f"Module loading error : {err}")
if reset_global_steps:
policy.set_step(0)

46
ml-agents/mlagents/trainers/tests/torch/saver/test_saver.py


from mlagents.trainers.model_saver.torch_model_saver import TorchModelSaver
from mlagents.trainers.settings import (
TrainerSettings,
NetworkSettings,
EncoderType,
PPOSettings,
SACSettings,
POCASettings,

_compare_two_policies(policy2, policy3)
# Assert that the steps are 0.
assert policy3.get_current_step() == 0
@pytest.mark.parametrize("vis_encode_type", ["resnet", "nature_cnn", "match3"])
def test_load_policy_different_hidden_units(tmp_path, vis_encode_type):
path1 = os.path.join(tmp_path, "runid1")
trainer_params = TrainerSettings()
trainer_params.network_settings = NetworkSettings(
hidden_units=12, vis_encode_type=EncoderType(vis_encode_type)
)
policy = create_policy_mock(trainer_params, use_visual=True)
conv_params = [mod for mod in policy.actor.parameters() if len(mod.shape) > 2]
model_saver = TorchModelSaver(trainer_params, path1)
model_saver.register(policy)
model_saver.initialize_or_load(policy)
policy.set_step(2000)
mock_brain_name = "MockBrain"
model_saver.save_checkpoint(mock_brain_name, 2000)
# Try load from this path
trainer_params2 = TrainerSettings()
trainer_params2.network_settings = NetworkSettings(
hidden_units=10, vis_encode_type=EncoderType(vis_encode_type)
)
model_saver2 = TorchModelSaver(trainer_params2, path1, load=True)
policy2 = create_policy_mock(trainer_params2, use_visual=True)
conv_params2 = [mod for mod in policy2.actor.parameters() if len(mod.shape) > 2]
# asserts convolutions have different parameters before load
for conv1, conv2 in zip(conv_params, conv_params2):
assert not torch.equal(conv1, conv2)
# asserts layers still have different dimensions
for mod1, mod2 in zip(policy.actor.parameters(), policy2.actor.parameters()):
if mod1.shape[0] == 12:
assert mod2.shape[0] == 10
model_saver2.register(policy2)
model_saver2.initialize_or_load(policy2)
# asserts convolutions have same parameters after load
for conv1, conv2 in zip(conv_params, conv_params2):
assert torch.equal(conv1, conv2)
# asserts layers still have different dimensions
for mod1, mod2 in zip(policy.actor.parameters(), policy2.actor.parameters()):
if mod1.shape[0] == 12:
assert mod2.shape[0] == 10
@pytest.mark.parametrize(

64
ml-agents/mlagents/trainers/tests/torch/saver/test_saver_reward_providers.py


import numpy as np
from mlagents_envs.logging_util import WARNING
from mlagents.trainers.poca.optimizer_torch import TorchPOCAOptimizer
from mlagents.trainers.model_saver.torch_model_saver import TorchModelSaver
from mlagents.trainers.settings import (
TrainerSettings,

RNDSettings,
PPOSettings,
SACSettings,
POCASettings,
)
from mlagents.trainers.tests.torch.test_policy import create_policy_mock
from mlagents.trainers.tests.torch.test_reward_providers.utils import (

DEMO_PATH = (
os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, os.pardir)
+ "/test.demo"

@pytest.mark.parametrize(
"optimizer",
[(TorchPPOOptimizer, PPOSettings), (TorchSACOptimizer, SACSettings)],
ids=["ppo", "sac"],
[
(TorchPPOOptimizer, PPOSettings),
(TorchSACOptimizer, SACSettings),
(TorchPOCAOptimizer, POCASettings),
],
ids=["ppo", "sac", "poca"],
)
def test_reward_provider_save(tmp_path, optimizer):
OptimizerClass, HyperparametersClass = optimizer

rp_1 = optimizer.reward_signals[reward_name]
rp_2 = optimizer2.reward_signals[reward_name]
assert np.array_equal(rp_1.evaluate(data), rp_2.evaluate(data))
@pytest.mark.parametrize(
"optimizer",
[
(TorchPPOOptimizer, PPOSettings),
(TorchSACOptimizer, SACSettings),
(TorchPOCAOptimizer, POCASettings),
],
ids=["ppo", "sac", "poca"],
)
def test_load_different_reward_provider(caplog, tmp_path, optimizer):
OptimizerClass, HyperparametersClass = optimizer
trainer_settings = TrainerSettings()
trainer_settings.hyperparameters = HyperparametersClass()
trainer_settings.reward_signals = {
RewardSignalType.CURIOSITY: CuriositySettings(),
RewardSignalType.RND: RNDSettings(),
}
policy = create_policy_mock(trainer_settings, use_discrete=False)
optimizer = OptimizerClass(policy, trainer_settings)
# save at path 1
path1 = os.path.join(tmp_path, "runid1")
model_saver = TorchModelSaver(trainer_settings, path1)
model_saver.register(policy)
model_saver.register(optimizer)
model_saver.initialize_or_load()
assert len(optimizer.critic.value_heads.stream_names) == 2
policy.set_step(2000)
model_saver.save_checkpoint("MockBrain", 2000)
trainer_settings2 = TrainerSettings()
trainer_settings2.hyperparameters = HyperparametersClass()
trainer_settings2.reward_signals = {
RewardSignalType.GAIL: GAILSettings(demo_path=DEMO_PATH)
}
# create a new optimizer and policy
policy2 = create_policy_mock(trainer_settings2, use_discrete=False)
optimizer2 = OptimizerClass(policy2, trainer_settings2)
# load weights
model_saver2 = TorchModelSaver(trainer_settings2, path1, load=True)
model_saver2.register(policy2)
model_saver2.register(optimizer2)
assert len(optimizer2.critic.value_heads.stream_names) == 1
model_saver2.initialize_or_load() # This is to load the optimizers
messages = [rec.message for rec in caplog.records if rec.levelno == WARNING]
assert len(messages) > 0
正在加载...
取消
保存