浏览代码

Merge pull request #3264 from Unity-Technologies/hotfix-0.13.1

Merge hotfix 0.13.1 into master
/asymm-envs
GitHub 5 年前
当前提交
d52fb483
共有 10 个文件被更改,包括 29 次插入13 次删除
  1. 2
      gym-unity/gym_unity/__init__.py
  2. 2
      ml-agents-envs/mlagents_envs/__init__.py
  3. 1
      ml-agents/mlagents/tf_utils/__init__.py
  4. 17
      ml-agents/mlagents/tf_utils/tf.py
  5. 2
      ml-agents/mlagents/trainers/__init__.py
  6. 1
      ml-agents/mlagents/trainers/ppo/trainer.py
  7. 1
      ml-agents/mlagents/trainers/sac/trainer.py
  8. 1
      ml-agents/mlagents/trainers/tests/test_ppo.py
  9. 12
      ml-agents/mlagents/trainers/tf_policy.py
  10. 3
      ml-agents/mlagents/trainers/trainer.py

2
gym-unity/gym_unity/__init__.py


__version__ = "0.13.0"
__version__ = "0.13.1"

2
ml-agents-envs/mlagents_envs/__init__.py


__version__ = "0.13.0"
__version__ = "0.13.1"

1
ml-agents/mlagents/tf_utils/__init__.py


from mlagents.tf_utils.tf import tf as tf # noqa
from mlagents.tf_utils.tf import set_warnings_enabled # noqa
from mlagents.tf_utils.tf import generate_session_config # noqa

17
ml-agents/mlagents/tf_utils/tf.py


def set_warnings_enabled(is_enabled: bool) -> None:
"""
Enable or disable tensorflow warnings (notabley, this disables deprecation warnings.
Enable or disable tensorflow warnings (notably, this disables deprecation warnings.
def generate_session_config() -> tf.ConfigProto:
"""
Generate a ConfigProto to use for ML-Agents that doesn't consume all of the GPU memory
and allows for soft placement in the case of multi-GPU.
"""
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
# For multi-GPU training, set allow_soft_placement to True to allow
# placing the operation into an alternative device automatically
# to prevent from exceptions if the device doesn't suppport the operation
# or the device does not exist
config.allow_soft_placement = True
return config

2
ml-agents/mlagents/trainers/__init__.py


__version__ = "0.13.0"
__version__ = "0.13.1"

1
ml-agents/mlagents/trainers/ppo/trainer.py


if not isinstance(policy, PPOPolicy):
raise RuntimeError("Non-PPOPolicy passed to PPOTrainer.add_policy()")
self.policy = policy
self.step = policy.get_current_step()
def get_policy(self, name_behavior_id: str) -> TFPolicy:
"""

1
ml-agents/mlagents/trainers/sac/trainer.py


if not isinstance(policy, SACPolicy):
raise RuntimeError("Non-SACPolicy passed to SACTrainer.add_policy()")
self.policy = policy
self.step = policy.get_current_step()
def get_policy(self, name_behavior_id: str) -> TFPolicy:
"""

1
ml-agents/mlagents/trainers/tests/test_ppo.py


brain_params.brain_name, 0, trainer_params, True, False, 0, "0", False
)
policy_mock = mock.Mock(spec=PPOPolicy)
policy_mock.get_current_step.return_value = 0
step_count = (
5
) # 10 hacked because this function is no longer called through trainer

12
ml-agents/mlagents/trainers/tf_policy.py


import numpy as np
from mlagents.tf_utils import tf
from mlagents import tf_utils
from mlagents_envs.exception import UnityException
from mlagents.trainers.policy import Policy

self.model_path = trainer_parameters["model_path"]
self.keep_checkpoints = trainer_parameters.get("keep_checkpoints", 5)
self.graph = tf.Graph()
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
# For multi-GPU training, set allow_soft_placement to True to allow
# placing the operation into an alternative device automatically
# to prevent from exceptions if the device doesn't suppport the operation
# or the device does not exist
config.allow_soft_placement = True
self.sess = tf.Session(config=config, graph=self.graph)
self.sess = tf.Session(
config=tf_utils.generate_session_config(), graph=self.graph
)
self.saver = None
if self.use_recurrent:
self.m_size = trainer_parameters["memory_size"]

3
ml-agents/mlagents/trainers/trainer.py


import abc
from mlagents.tf_utils import tf
from mlagents import tf_utils
from collections import deque

:param input_dict: A dictionary that will be displayed in a table on Tensorboard.
"""
try:
with tf.Session() as sess:
with tf.Session(config=tf_utils.generate_session_config()) as sess:
s_op = tf.summary.text(
key,
tf.convert_to_tensor(

正在加载...
取消
保存