Merge pull request #3264 from Unity-Technologies/hotfix-0.13.1

Merge hotfix 0.13.1 into master
4 年前 · d52fb483
--- a/gym-unity/gym_unity/init.py
+++ b/gym-unity/gym_unity/init.py
-__version__ = "0.13.0"
+__version__ = "0.13.1"
--- a/ml-agents-envs/mlagents_envs/init.py
+++ b/ml-agents-envs/mlagents_envs/init.py
-__version__ = "0.13.0"
+__version__ = "0.13.1"
--- a/ml-agents/mlagents/tf_utils/init.py
+++ b/ml-agents/mlagents/tf_utils/init.py
 from mlagents.tf_utils.tf import tf as tf  # noqa
 from mlagents.tf_utils.tf import set_warnings_enabled  # noqa
+from mlagents.tf_utils.tf import generate_session_config  # noqa
--- a/ml-agents/mlagents/tf_utils/tf.py
+++ b/ml-agents/mlagents/tf_utils/tf.py

 def set_warnings_enabled(is_enabled: bool) -> None:
    """
-    Enable or disable tensorflow warnings (notabley, this disables deprecation warnings.
+    Enable or disable tensorflow warnings (notably, this disables deprecation warnings.
+
+
+def generate_session_config() -> tf.ConfigProto:
+    """
+    Generate a ConfigProto to use for ML-Agents that doesn't consume all of the GPU memory
+    and allows for soft placement in the case of multi-GPU.
+    """
+    config = tf.ConfigProto()
+    config.gpu_options.allow_growth = True
+    # For multi-GPU training, set allow_soft_placement to True to allow
+    # placing the operation into an alternative device automatically
+    # to prevent from exceptions if the device doesn't suppport the operation
+    # or the device does not exist
+    config.allow_soft_placement = True
+    return config
--- a/ml-agents/mlagents/trainers/init.py
+++ b/ml-agents/mlagents/trainers/init.py
-__version__ = "0.13.0"
+__version__ = "0.13.1"
--- a/ml-agents/mlagents/trainers/ppo/trainer.py
+++ b/ml-agents/mlagents/trainers/ppo/trainer.py
        if not isinstance(policy, PPOPolicy):
            raise RuntimeError("Non-PPOPolicy passed to PPOTrainer.add_policy()")
        self.policy = policy
+        self.step = policy.get_current_step()

    def get_policy(self, name_behavior_id: str) -> TFPolicy:
        """
--- a/ml-agents/mlagents/trainers/sac/trainer.py
+++ b/ml-agents/mlagents/trainers/sac/trainer.py
        if not isinstance(policy, SACPolicy):
            raise RuntimeError("Non-SACPolicy passed to SACTrainer.add_policy()")
        self.policy = policy
+        self.step = policy.get_current_step()

    def get_policy(self, name_behavior_id: str) -> TFPolicy:
        """
--- a/ml-agents/mlagents/trainers/tests/test_ppo.py
+++ b/ml-agents/mlagents/trainers/tests/test_ppo.py
        brain_params.brain_name, 0, trainer_params, True, False, 0, "0", False
    )
    policy_mock = mock.Mock(spec=PPOPolicy)
+    policy_mock.get_current_step.return_value = 0
    step_count = (
        5
    )  # 10 hacked because this function is no longer called through trainer
--- a/ml-agents/mlagents/trainers/tf_policy.py
+++ b/ml-agents/mlagents/trainers/tf_policy.py

 import numpy as np
 from mlagents.tf_utils import tf
+from mlagents import tf_utils

 from mlagents_envs.exception import UnityException
 from mlagents.trainers.policy import Policy
        self.model_path = trainer_parameters["model_path"]
        self.keep_checkpoints = trainer_parameters.get("keep_checkpoints", 5)
        self.graph = tf.Graph()
-        config = tf.ConfigProto()
-        config.gpu_options.allow_growth = True
-        # For multi-GPU training, set allow_soft_placement to True to allow
-        # placing the operation into an alternative device automatically
-        # to prevent from exceptions if the device doesn't suppport the operation
-        # or the device does not exist
-        config.allow_soft_placement = True
-        self.sess = tf.Session(config=config, graph=self.graph)
+        self.sess = tf.Session(
+            config=tf_utils.generate_session_config(), graph=self.graph
+        )
        self.saver = None
        if self.use_recurrent:
            self.m_size = trainer_parameters["memory_size"]
--- a/ml-agents/mlagents/trainers/trainer.py
+++ b/ml-agents/mlagents/trainers/trainer.py
 import abc

 from mlagents.tf_utils import tf
+from mlagents import tf_utils

 from collections import deque

        :param input_dict: A dictionary that will be displayed in a table on Tensorboard.
        """
        try:
-            with tf.Session() as sess:
+            with tf.Session(config=tf_utils.generate_session_config()) as sess:
                s_op = tf.summary.text(
                    key,
                    tf.convert_to_tensor(