浏览代码

[versioning] Save ML-Agents version in checkpoints and check on load (#4035)

/MLA-1734-demo-provider
GitHub 5 年前
当前提交
335cff3e
共有 6 个文件被更改,包括 110 次插入9 次删除
  1. 2
      com.unity.ml-agents/CHANGELOG.md
  2. 10
      ml-agents/mlagents/model_serialization.py
  3. 62
      ml-agents/mlagents/trainers/policy/tf_policy.py
  4. 21
      ml-agents/mlagents/trainers/tests/test_nn_policy.py
  5. 8
      ml-agents/mlagents/trainers/tests/test_policy.py
  6. 16
      ml-agents/mlagents/trainers/tests/test_simple_rl.py

2
com.unity.ml-agents/CHANGELOG.md


directory. (#3829)
- Unity Player logs are now written out to the results directory. (#3877)
- Run configuration YAML files are written out to the results directory at the end of the run. (#3815)
- When trying to load/resume from a checkpoint created with an earlier verison of ML-Agents,
a warning will be thrown. (#4035)
### Bug Fixes
#### com.unity.ml-agents (C#)
#### ml-agents / ml-agents-envs / gym-unity (Python)

10
ml-agents/mlagents/model_serialization.py


)
MODEL_CONSTANTS = frozenset(
["action_output_shape", "is_continuous_control", "memory_size", "version_number"]
[
"action_output_shape",
"is_continuous_control",
"memory_size",
"version_number",
"trainer_major_version",
"trainer_minor_version",
"trainer_patch_version",
]
)
VISUAL_OBSERVATION_PREFIX = "visual_observation_"

62
ml-agents/mlagents/trainers/policy/tf_policy.py


from typing import Any, Dict, List, Optional
from typing import Any, Dict, List, Optional, Tuple
from distutils.version import LooseVersion
from mlagents.tf_utils import tf
from mlagents import tf_utils
from mlagents_envs.exception import UnityException

from mlagents.trainers.models import ModelUtils
from mlagents.trainers.settings import TrainerSettings, NetworkSettings
from mlagents.trainers.brain import BrainParameters
from mlagents.trainers import __version__
# This is the version number of the inputs and outputs of the model, and
# determines compatibility with inference in Barracuda.
MODEL_FORMAT_VERSION = 2
class UnityPolicyException(UnityException):

:param brain: The corresponding Brain for this policy.
:param trainer_settings: The trainer parameters.
"""
self._version_number_ = 2
self.m_size = 0
self.trainer_settings = trainer_settings
self.network_settings: NetworkSettings = trainer_settings.network_settings

"""
pass
@staticmethod
def _convert_version_string(version_string: str) -> Tuple[int, ...]:
"""
Converts the version string into a Tuple of ints (major_ver, minor_ver, patch_ver).
:param version_string: The semantic-versioned version string (X.Y.Z).
:return: A Tuple containing (major_ver, minor_ver, patch_ver).
"""
ver = LooseVersion(version_string)
return tuple(map(int, ver.version[0:3]))
def _check_model_version(self, version: str) -> None:
"""
Checks whether the model being loaded was created with the same version of
ML-Agents, and throw a warning if not so.
"""
if self.version_tensors is not None:
loaded_ver = tuple(
num.eval(session=self.sess) for num in self.version_tensors
)
if loaded_ver != TFPolicy._convert_version_string(version):
logger.warning(
f"The model checkpoint you are loading from was saved with ML-Agents version "
f"{loaded_ver[0]}.{loaded_ver[1]}.{loaded_ver[2]} but your current ML-Agents"
f"version is {version}. Model may not behave properly."
)
def _initialize_graph(self):
with self.graph.as_default():
self.saver = tf.train.Saver(max_to_keep=self.keep_checkpoints)

model_path
)
)
self._check_model_version(__version__)
if reset_global_steps:
self._set_step(0)
logger.info(

self.prev_action: Optional[tf.Tensor] = None
self.memory_in: Optional[tf.Tensor] = None
self.memory_out: Optional[tf.Tensor] = None
self.version_tensors: Optional[Tuple[tf.Tensor, tf.Tensor, tf.Tensor]] = None
def create_input_placeholders(self):
with self.graph.as_default():

trainable=False,
dtype=tf.int32,
)
int_version = TFPolicy._convert_version_string(__version__)
major_ver_t = tf.Variable(
int_version[0],
name="trainer_major_version",
trainable=False,
dtype=tf.int32,
)
minor_ver_t = tf.Variable(
int_version[1],
name="trainer_minor_version",
trainable=False,
dtype=tf.int32,
)
patch_ver_t = tf.Variable(
int_version[2],
name="trainer_patch_version",
trainable=False,
dtype=tf.int32,
)
self.version_tensors = (major_ver_t, minor_ver_t, patch_ver_t)
self._version_number_,
MODEL_FORMAT_VERSION,
name="version_number",
trainable=False,
dtype=tf.int32,

21
ml-agents/mlagents/trainers/tests/test_nn_policy.py


import pytest
import os
import unittest
import tempfile
import numpy as np
from mlagents.tf_utils import tf

from mlagents.trainers.tests import mock_brain as mb
from mlagents.trainers.settings import TrainerSettings, NetworkSettings
from mlagents.trainers.tests.test_trajectory import make_fake_trajectory
from mlagents.trainers import __version__
VECTOR_ACTION_SPACE = [2]

_compare_two_policies(policy2, policy3)
# Assert that the steps are 0.
assert policy3.get_current_step() == 0
class ModelVersionTest(unittest.TestCase):
def test_version_compare(self):
# Test write_stats
with self.assertLogs("mlagents.trainers", level="WARNING") as cm:
path1 = tempfile.mkdtemp()
trainer_params = TrainerSettings(output_path=path1)
policy = create_policy_mock(trainer_params)
policy.initialize_or_load()
policy._check_model_version(
"0.0.0"
) # This is not the right version for sure
# Assert that 1 warning has been thrown with incorrect version
assert len(cm.output) == 1
policy._check_model_version(__version__) # This should be the right version
# Assert that no additional warnings have been thrown wth correct ver
assert len(cm.output) == 1
def _compare_two_policies(policy1: NNPolicy, policy2: NNPolicy) -> None:

8
ml-agents/mlagents/trainers/tests/test_policy.py


policy_eval_out["action"], policy_eval_out["value"], policy_eval_out, [0]
)
assert result == expected
def test_convert_version_string():
result = TFPolicy._convert_version_string("200.300.100")
assert result == (200, 300, 100)
# Test dev versions
result = TFPolicy._convert_version_string("200.300.100.dev0")
assert result == (200, 300, 100)

16
ml-agents/mlagents/trainers/tests/test_simple_rl.py


@pytest.mark.parametrize("use_discrete", [True, False])
def test_2d_ppo(use_discrete):
env = SimpleEnvironment(
[BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.5
[BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.8
)
new_hyperparams = attr.evolve(
PPO_CONFIG.hyperparameters, batch_size=64, buffer_size=640
config = attr.evolve(PPO_CONFIG)
config = attr.evolve(PPO_CONFIG, hyperparameters=new_hyperparams, max_steps=10000)
_check_environment_trains(env, {BRAIN_NAME: config})

@pytest.mark.parametrize("use_discrete", [True, False])
def test_recurrent_sac(use_discrete):
env = MemoryEnvironment([BRAIN_NAME], use_discrete=use_discrete)
step_size = 0.2 if use_discrete else 1.0
env = MemoryEnvironment(
[BRAIN_NAME], use_discrete=use_discrete, step_size=step_size
)
memory=NetworkSettings.MemorySettings(memory_size=16, sequence_length=32),
memory=NetworkSettings.MemorySettings(memory_size=16, sequence_length=16),
batch_size=64,
batch_size=128,
learning_rate=1e-3,
buffer_init_steps=500,
steps_per_update=2,

正在加载...
取消
保存