浏览代码

Fix flake8 import warnings (#2584)

We have been ignoring unused imports and star imports via flake8. These are
both bad practice and grow over time without automated checking.  This
commit attempts to fix all existing import errors and add back the corresponding
flake8 checks.
/develop-gpu-test
GitHub 5 年前
当前提交
67d754c5
共有 70 个文件被更改,包括 668 次插入774 次删除
  1. 1
      gym-unity/gym_unity/__init__.py
  2. 372
      gym-unity/gym_unity/envs/__init__.py
  3. 6
      gym-unity/gym_unity/tests/test_gym.py
  4. 5
      ml-agents-envs/mlagents/envs/__init__.py
  5. 2
      ml-agents-envs/mlagents/envs/base_unity_environment.py
  6. 3
      ml-agents-envs/mlagents/envs/communicator.py
  7. 22
      ml-agents-envs/mlagents/envs/communicator_objects/__init__.py
  8. 4
      ml-agents-envs/mlagents/envs/env_manager.py
  9. 20
      ml-agents-envs/mlagents/envs/environment.py
  10. 14
      ml-agents-envs/mlagents/envs/mock_communicator.py
  11. 4
      ml-agents-envs/mlagents/envs/policy.py
  12. 6
      ml-agents-envs/mlagents/envs/rpc_communicator.py
  13. 4
      ml-agents-envs/mlagents/envs/sampler_class.py
  14. 3
      ml-agents-envs/mlagents/envs/simple_env_manager.py
  15. 4
      ml-agents-envs/mlagents/envs/socket_communicator.py
  16. 7
      ml-agents-envs/mlagents/envs/subprocess_env_manager.py
  17. 27
      ml-agents-envs/mlagents/envs/tests/test_envs.py
  18. 4
      ml-agents-envs/mlagents/envs/tests/test_rpc_communicator.py
  19. 1
      ml-agents-envs/mlagents/envs/tests/test_sampler_class.py
  20. 3
      ml-agents-envs/mlagents/envs/tests/test_subprocess_env_manager.py
  21. 20
      ml-agents/mlagents/trainers/__init__.py
  22. 4
      ml-agents/mlagents/trainers/bc/__init__.py
  23. 4
      ml-agents/mlagents/trainers/bc/online_trainer.py
  24. 5
      ml-agents/mlagents/trainers/bc/trainer.py
  25. 3
      ml-agents/mlagents/trainers/buffer.py
  26. 1
      ml-agents/mlagents/trainers/components/bc/__init__.py
  27. 1
      ml-agents/mlagents/trainers/components/bc/model.py
  28. 110
      ml-agents/mlagents/trainers/components/reward_signals/__init__.py
  29. 1
      ml-agents/mlagents/trainers/components/reward_signals/curiosity/__init__.py
  30. 1
      ml-agents/mlagents/trainers/components/reward_signals/curiosity/signal.py
  31. 1
      ml-agents/mlagents/trainers/components/reward_signals/extrinsic/__init__.py
  32. 1
      ml-agents/mlagents/trainers/components/reward_signals/extrinsic/signal.py
  33. 1
      ml-agents/mlagents/trainers/components/reward_signals/gail/__init__.py
  34. 1
      ml-agents/mlagents/trainers/components/reward_signals/gail/signal.py
  35. 2
      ml-agents/mlagents/trainers/components/reward_signals/reward_signal_factory.py
  36. 6
      ml-agents/mlagents/trainers/demo_loader.py
  37. 4
      ml-agents/mlagents/trainers/learn.py
  38. 3
      ml-agents/mlagents/trainers/ppo/__init__.py
  39. 2
      ml-agents/mlagents/trainers/ppo/multi_gpu_policy.py
  40. 2
      ml-agents/mlagents/trainers/ppo/policy.py
  41. 7
      ml-agents/mlagents/trainers/ppo/trainer.py
  42. 12
      ml-agents/mlagents/trainers/rl_trainer.py
  43. 3
      ml-agents/mlagents/trainers/sac/__init__.py
  44. 8
      ml-agents/mlagents/trainers/sac/policy.py
  45. 11
      ml-agents/mlagents/trainers/sac/trainer.py
  46. 1
      ml-agents/mlagents/trainers/tests/mock_brain.py
  47. 2
      ml-agents/mlagents/trainers/tests/test_barracuda_converter.py
  48. 22
      ml-agents/mlagents/trainers/tests/test_bc.py
  49. 10
      ml-agents/mlagents/trainers/tests/test_bcmodule.py
  50. 3
      ml-agents/mlagents/trainers/tests/test_curriculum.py
  51. 2
      ml-agents/mlagents/trainers/tests/test_demo_loader.py
  52. 5
      ml-agents/mlagents/trainers/tests/test_learn.py
  53. 26
      ml-agents/mlagents/trainers/tests/test_meta_curriculum.py
  54. 6
      ml-agents/mlagents/trainers/tests/test_multigpu.py
  55. 5
      ml-agents/mlagents/trainers/tests/test_policy.py
  56. 35
      ml-agents/mlagents/trainers/tests/test_ppo.py
  57. 27
      ml-agents/mlagents/trainers/tests/test_reward_signals.py
  58. 45
      ml-agents/mlagents/trainers/tests/test_sac.py
  59. 4
      ml-agents/mlagents/trainers/tests/test_simple_rl.py
  60. 2
      ml-agents/mlagents/trainers/tests/test_trainer_controller.py
  61. 2
      ml-agents/mlagents/trainers/tests/test_trainer_metrics.py
  62. 8
      ml-agents/mlagents/trainers/tests/test_trainer_util.py
  63. 7
      ml-agents/mlagents/trainers/tf_policy.py
  64. 10
      ml-agents/mlagents/trainers/trainer.py
  65. 4
      ml-agents/mlagents/trainers/trainer_controller.py
  66. 4
      ml-agents/mlagents/trainers/trainer_util.py
  67. 4
      setup.cfg
  68. 1
      utils/validate_meta_files.py
  69. 371
      gym-unity/gym_unity/envs/unity_env.py
  70. 110
      ml-agents/mlagents/trainers/components/reward_signals/reward_signal.py

1
gym-unity/gym_unity/__init__.py


from gym.envs.registration import register

372
gym-unity/gym_unity/envs/__init__.py


from gym_unity.envs.unity_env import UnityEnv, UnityGymException
import logging
import itertools
import gym
import numpy as np
from mlagents.envs.environment import UnityEnvironment
from gym import error, spaces
class UnityGymException(error.Error):
"""
Any error related to the gym wrapper of ml-agents.
"""
pass
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("gym_unity")
class UnityEnv(gym.Env):
"""
Provides Gym wrapper for Unity Learning Environments.
Multi-agent environments use lists for object types, as done here:
https://github.com/openai/multiagent-particle-envs
"""
def __init__(
self,
environment_filename: str,
worker_id: int = 0,
use_visual: bool = False,
uint8_visual: bool = False,
multiagent: bool = False,
flatten_branched: bool = False,
no_graphics: bool = False,
allow_multiple_visual_obs: bool = False,
):
"""
Environment initialization
:param environment_filename: The UnityEnvironment path or file to be wrapped in the gym.
:param worker_id: Worker number for environment.
:param use_visual: Whether to use visual observation or vector observation.
:param uint8_visual: Return visual observations as uint8 (0-255) matrices instead of float (0.0-1.0).
:param multiagent: Whether to run in multi-agent mode (lists of obs, reward, done).
:param flatten_branched: If True, turn branched discrete action spaces into a Discrete space rather than
MultiDiscrete.
:param no_graphics: Whether to run the Unity simulator in no-graphics mode
:param allow_multiple_visual_obs: If True, return a list of visual observations instead of only one.
"""
self._env = UnityEnvironment(
environment_filename, worker_id, no_graphics=no_graphics
)
self.name = self._env.academy_name
self.visual_obs = None
self._current_state = None
self._n_agents = None
self._multiagent = multiagent
self._flattener = None
self.game_over = (
False
) # Hidden flag used by Atari environments to determine if the game is over
self._allow_multiple_visual_obs = allow_multiple_visual_obs
# Check brain configuration
if len(self._env.brains) != 1:
raise UnityGymException(
"There can only be one brain in a UnityEnvironment "
"if it is wrapped in a gym."
)
if len(self._env.external_brain_names) <= 0:
raise UnityGymException(
"There are not any external brain in the UnityEnvironment"
)
self.brain_name = self._env.external_brain_names[0]
brain = self._env.brains[self.brain_name]
if use_visual and brain.number_visual_observations == 0:
raise UnityGymException(
"`use_visual` was set to True, however there are no"
" visual observations as part of this environment."
)
self.use_visual = brain.number_visual_observations >= 1 and use_visual
if not use_visual and uint8_visual:
logger.warning(
"`uint8_visual was set to true, but visual observations are not in use. "
"This setting will not have any effect."
)
else:
self.uint8_visual = uint8_visual
if brain.number_visual_observations > 1 and not self._allow_multiple_visual_obs:
logger.warning(
"The environment contains more than one visual observation. "
"You must define allow_multiple_visual_obs=True to received them all. "
"Otherwise, please note that only the first will be provided in the observation."
)
if brain.num_stacked_vector_observations != 1:
raise UnityGymException(
"There can only be one stacked vector observation in a UnityEnvironment "
"if it is wrapped in a gym."
)
# Check for number of agents in scene.
initial_info = self._env.reset()[self.brain_name]
self._check_agents(len(initial_info.agents))
# Set observation and action spaces
if brain.vector_action_space_type == "discrete":
if len(brain.vector_action_space_size) == 1:
self._action_space = spaces.Discrete(brain.vector_action_space_size[0])
else:
if flatten_branched:
self._flattener = ActionFlattener(brain.vector_action_space_size)
self._action_space = self._flattener.action_space
else:
self._action_space = spaces.MultiDiscrete(
brain.vector_action_space_size
)
else:
if flatten_branched:
logger.warning(
"The environment has a non-discrete action space. It will "
"not be flattened."
)
high = np.array([1] * brain.vector_action_space_size[0])
self._action_space = spaces.Box(-high, high, dtype=np.float32)
high = np.array([np.inf] * brain.vector_observation_space_size)
self.action_meanings = brain.vector_action_descriptions
if self.use_visual:
if brain.camera_resolutions[0]["blackAndWhite"]:
depth = 1
else:
depth = 3
self._observation_space = spaces.Box(
0,
1,
dtype=np.float32,
shape=(
brain.camera_resolutions[0]["height"],
brain.camera_resolutions[0]["width"],
depth,
),
)
else:
self._observation_space = spaces.Box(-high, high, dtype=np.float32)
def reset(self):
"""Resets the state of the environment and returns an initial observation.
In the case of multi-agent environments, this is a list.
Returns: observation (object/list): the initial observation of the
space.
"""
info = self._env.reset()[self.brain_name]
n_agents = len(info.agents)
self._check_agents(n_agents)
self.game_over = False
if not self._multiagent:
obs, reward, done, info = self._single_step(info)
else:
obs, reward, done, info = self._multi_step(info)
return obs
def step(self, action):
"""Run one timestep of the environment's dynamics. When end of
episode is reached, you are responsible for calling `reset()`
to reset this environment's state.
Accepts an action and returns a tuple (observation, reward, done, info).
In the case of multi-agent environments, these are lists.
Args:
action (object/list): an action provided by the environment
Returns:
observation (object/list): agent's observation of the current environment
reward (float/list) : amount of reward returned after previous action
done (boolean/list): whether the episode has ended.
info (dict): contains auxiliary diagnostic information, including BrainInfo.
"""
# Use random actions for all other agents in environment.
if self._multiagent:
if not isinstance(action, list):
raise UnityGymException(
"The environment was expecting `action` to be a list."
)
if len(action) != self._n_agents:
raise UnityGymException(
"The environment was expecting a list of {} actions.".format(
self._n_agents
)
)
else:
if self._flattener is not None:
# Action space is discrete and flattened - we expect a list of scalars
action = [self._flattener.lookup_action(_act) for _act in action]
action = np.array(action)
else:
if self._flattener is not None:
# Translate action into list
action = self._flattener.lookup_action(action)
info = self._env.step(action)[self.brain_name]
n_agents = len(info.agents)
self._check_agents(n_agents)
self._current_state = info
if not self._multiagent:
obs, reward, done, info = self._single_step(info)
self.game_over = done
else:
obs, reward, done, info = self._multi_step(info)
self.game_over = all(done)
return obs, reward, done, info
def _single_step(self, info):
if self.use_visual:
visual_obs = info.visual_observations
if self._allow_multiple_visual_obs:
visual_obs_list = []
for obs in visual_obs:
visual_obs_list.append(self._preprocess_single(obs[0]))
self.visual_obs = visual_obs_list
else:
self.visual_obs = self._preprocess_single(visual_obs[0][0])
default_observation = self.visual_obs
else:
default_observation = info.vector_observations[0, :]
return (
default_observation,
info.rewards[0],
info.local_done[0],
{"text_observation": info.text_observations[0], "brain_info": info},
)
def _preprocess_single(self, single_visual_obs):
if self.uint8_visual:
return (255.0 * single_visual_obs).astype(np.uint8)
else:
return single_visual_obs
def _multi_step(self, info):
if self.use_visual:
self.visual_obs = self._preprocess_multi(info.visual_observations)
default_observation = self.visual_obs
else:
default_observation = info.vector_observations
return (
list(default_observation),
info.rewards,
info.local_done,
{"text_observation": info.text_observations, "brain_info": info},
)
def _preprocess_multi(self, multiple_visual_obs):
if self.uint8_visual:
return [
(255.0 * _visual_obs).astype(np.uint8)
for _visual_obs in multiple_visual_obs
]
else:
return multiple_visual_obs
def render(self, mode="rgb_array"):
return self.visual_obs
def close(self):
"""Override _close in your subclass to perform any necessary cleanup.
Environments will automatically close() themselves when
garbage collected or when the program exits.
"""
self._env.close()
def get_action_meanings(self):
return self.action_meanings
def seed(self, seed=None):
"""Sets the seed for this env's random number generator(s).
Currently not implemented.
"""
logger.warn("Could not seed environment %s", self.name)
return
def _check_agents(self, n_agents):
if not self._multiagent and n_agents > 1:
raise UnityGymException(
"The environment was launched as a single-agent environment, however"
"there is more than one agent in the scene."
)
elif self._multiagent and n_agents <= 1:
raise UnityGymException(
"The environment was launched as a mutli-agent environment, however"
"there is only one agent in the scene."
)
if self._n_agents is None:
self._n_agents = n_agents
logger.info("{} agents within environment.".format(n_agents))
elif self._n_agents != n_agents:
raise UnityGymException(
"The number of agents in the environment has changed since "
"initialization. This is not supported."
)
@property
def metadata(self):
return {"render.modes": ["rgb_array"]}
@property
def reward_range(self):
return -float("inf"), float("inf")
@property
def spec(self):
return None
@property
def action_space(self):
return self._action_space
@property
def observation_space(self):
return self._observation_space
@property
def number_agents(self):
return self._n_agents
class ActionFlattener:
"""
Flattens branched discrete action spaces into single-branch discrete action spaces.
"""
def __init__(self, branched_action_space):
"""
Initialize the flattener.
:param branched_action_space: A List containing the sizes of each branch of the action
space, e.g. [2,3,3] for three branches with size 2, 3, and 3 respectively.
"""
self._action_shape = branched_action_space
self.action_lookup = self._create_lookup(self._action_shape)
self.action_space = spaces.Discrete(len(self.action_lookup))
@classmethod
def _create_lookup(self, branched_action_space):
"""
Creates a Dict that maps discrete actions (scalars) to branched actions (lists).
Each key in the Dict maps to one unique set of branched actions, and each value
contains the List of branched actions.
"""
possible_vals = [range(_num) for _num in branched_action_space]
all_actions = [list(_action) for _action in itertools.product(*possible_vals)]
# Dict should be faster than List for large action spaces
action_lookup = {
_scalar: _action for (_scalar, _action) in enumerate(all_actions)
}
return action_lookup
def lookup_action(self, action):
"""
Convert a scalar discrete action into a unique set of branched actions.
:param: action: A scalar value representing one of the discrete actions.
:return: The List containing the branched actions.
"""
return self.action_lookup[action]

6
gym-unity/gym_unity/tests/test_gym.py


from gym_unity.envs import UnityEnv, UnityGymException
@mock.patch("gym_unity.envs.unity_env.UnityEnvironment")
@mock.patch("gym_unity.envs.UnityEnvironment")
def test_gym_wrapper(mock_env):
mock_brain = create_mock_brainparams()
mock_braininfo = create_mock_vector_braininfo()

assert isinstance(info, dict)
@mock.patch("gym_unity.envs.unity_env.UnityEnvironment")
@mock.patch("gym_unity.envs.UnityEnvironment")
def test_multi_agent(mock_env):
mock_brain = create_mock_brainparams()
mock_braininfo = create_mock_vector_braininfo(num_agents=2)

assert isinstance(info, dict)
@mock.patch("gym_unity.envs.unity_env.UnityEnvironment")
@mock.patch("gym_unity.envs.UnityEnvironment")
def test_branched_flatten(mock_env):
mock_brain = create_mock_brainparams(
vector_action_space_type="discrete", vector_action_space_size=[2, 2, 3]

5
ml-agents-envs/mlagents/envs/__init__.py


from .brain import AllBrainInfo, BrainInfo, BrainParameters
from .action_info import ActionInfo, ActionInfoOutputs
from .policy import Policy
from .environment import *
from .exception import *

2
ml-agents-envs/mlagents/envs/base_unity_environment.py


from abc import ABC, abstractmethod
from typing import Dict
from mlagents.envs import AllBrainInfo, BrainParameters
from mlagents.envs.brain import AllBrainInfo, BrainParameters
class BaseUnityEnvironment(ABC):

3
ml-agents-envs/mlagents/envs/communicator.py


import logging
from typing import Optional
from .communicator_objects import UnityOutput, UnityInput
from mlagents.envs.communicator_objects.unity_output_pb2 import UnityOutput
from mlagents.envs.communicator_objects.unity_input_pb2 import UnityInput
logger = logging.getLogger("mlagents.envs")

22
ml-agents-envs/mlagents/envs/communicator_objects/__init__.py


from .agent_action_proto_pb2 import *
from .agent_info_proto_pb2 import *
from .brain_parameters_proto_pb2 import *
from .command_proto_pb2 import *
from .custom_action_pb2 import *
from .custom_observation_pb2 import *
from .custom_reset_parameters_pb2 import *
from .demonstration_meta_proto_pb2 import *
from .engine_configuration_proto_pb2 import *
from .environment_parameters_proto_pb2 import *
from .header_pb2 import *
from .resolution_proto_pb2 import *
from .space_type_proto_pb2 import *
from .unity_input_pb2 import *
from .unity_message_pb2 import *
from .unity_output_pb2 import *
from .unity_rl_initialization_input_pb2 import *
from .unity_rl_initialization_output_pb2 import *
from .unity_rl_input_pb2 import *
from .unity_rl_output_pb2 import *
from .unity_to_external_pb2 import *
from .unity_to_external_pb2_grpc import *

4
ml-agents-envs/mlagents/envs/env_manager.py


from abc import ABC, abstractmethod
from typing import List, Dict, NamedTuple, Optional
from mlagents.envs import AllBrainInfo, BrainParameters, Policy, ActionInfo
from mlagents.envs.brain import AllBrainInfo, BrainParameters
from mlagents.envs.policy import Policy
from mlagents.envs.action_info import ActionInfo
class EnvironmentStep(NamedTuple):

20
ml-agents-envs/mlagents/envs/environment.py


import numpy as np
import os
import subprocess
from typing import *
from typing import Dict, List, Optional
from mlagents.envs.base_unity_environment import BaseUnityEnvironment
from mlagents.envs.timers import timed, hierarchical_timer

UnityTimeOutException,
)
from .communicator_objects import (
UnityRLInput,
UnityRLOutput,
AgentActionProto,
from mlagents.envs.communicator_objects.unity_rl_input_pb2 import UnityRLInput
from mlagents.envs.communicator_objects.unity_rl_output_pb2 import UnityRLOutput
from mlagents.envs.communicator_objects.agent_action_proto_pb2 import AgentActionProto
from mlagents.envs.communicator_objects.environment_parameters_proto_pb2 import (
)
from mlagents.envs.communicator_objects.unity_rl_initialization_input_pb2 import (
)
from mlagents.envs.communicator_objects.unity_rl_initialization_output_pb2 import (
UnityInput,
UnityOutput,
CustomResetParameters,
CustomAction,
from mlagents.envs.communicator_objects.unity_input_pb2 import UnityInput
from mlagents.envs.communicator_objects.custom_action_pb2 import CustomAction
from .rpc_communicator import RpcCommunicator
from sys import platform

14
ml-agents-envs/mlagents/envs/mock_communicator.py


from .communicator import Communicator
from .communicator_objects import (
UnityOutput,
UnityInput,
ResolutionProto,
from mlagents.envs.communicator_objects.unity_rl_output_pb2 import UnityRLOutput
from mlagents.envs.communicator_objects.brain_parameters_proto_pb2 import (
)
from mlagents.envs.communicator_objects.unity_rl_initialization_output_pb2 import (
AgentInfoProto,
UnityRLOutput,
from mlagents.envs.communicator_objects.unity_input_pb2 import UnityInput
from mlagents.envs.communicator_objects.unity_output_pb2 import UnityOutput
from mlagents.envs.communicator_objects.resolution_proto_pb2 import ResolutionProto
from mlagents.envs.communicator_objects.agent_info_proto_pb2 import AgentInfoProto
class MockCommunicator(Communicator):

4
ml-agents-envs/mlagents/envs/policy.py


from abc import ABC, abstractmethod
from mlagents.envs import BrainInfo
from mlagents.envs import ActionInfo
from mlagents.envs.brain import BrainInfo
from mlagents.envs.action_info import ActionInfo
class Policy(ABC):

6
ml-agents-envs/mlagents/envs/rpc_communicator.py


from concurrent.futures import ThreadPoolExecutor
from .communicator import Communicator
from .communicator_objects import (
from mlagents.envs.communicator_objects.unity_to_external_pb2_grpc import (
from .communicator_objects import UnityMessage, UnityInput, UnityOutput
from mlagents.envs.communicator_objects.unity_message_pb2 import UnityMessage
from mlagents.envs.communicator_objects.unity_input_pb2 import UnityInput
from mlagents.envs.communicator_objects.unity_output_pb2 import UnityOutput
from .exception import UnityTimeOutException, UnityWorkerInUseException
logger = logging.getLogger("mlagents.envs")

4
ml-agents-envs/mlagents/envs/sampler_class.py


import numpy as np
from typing import *
from functools import *
from collections import OrderedDict
from typing import Union, Optional, Type, List, Dict, Any
from abc import ABC, abstractmethod
from .exception import SamplerException

3
ml-agents-envs/mlagents/envs/simple_env_manager.py


from mlagents.envs.base_unity_environment import BaseUnityEnvironment
from mlagents.envs.env_manager import EnvManager, EnvironmentStep
from mlagents.envs.timers import timed
from mlagents.envs import ActionInfo, BrainParameters
from mlagents.envs.action_info import ActionInfo
from mlagents.envs.brain import BrainParameters
class SimpleEnvManager(EnvManager):

4
ml-agents-envs/mlagents/envs/socket_communicator.py


from typing import Optional
from .communicator import Communicator
from .communicator_objects import UnityMessage, UnityOutput, UnityInput
from mlagents.envs.communicator_objects.unity_message_pb2 import UnityMessage
from mlagents.envs.communicator_objects.unity_output_pb2 import UnityOutput
from mlagents.envs.communicator_objects.unity_input_pb2 import UnityInput
from .exception import UnityTimeOutException

7
ml-agents-envs/mlagents/envs/subprocess_env_manager.py


from typing import *
from typing import Dict, NamedTuple, List, Any, Optional, Callable, Set
from mlagents.envs import UnityEnvironment
from mlagents.envs.environment import UnityEnvironment
from mlagents.envs.exception import UnityCommunicationException
from multiprocessing import Process, Pipe, Queue
from multiprocessing.connection import Connection

reset_timers,
get_timer_root,
)
from mlagents.envs import AllBrainInfo, BrainParameters, ActionInfo
from mlagents.envs.brain import AllBrainInfo, BrainParameters
from mlagents.envs.action_info import ActionInfo
class EnvironmentCommand(NamedTuple):

27
ml-agents-envs/mlagents/envs/tests/test_envs.py


import numpy as np
from mlagents.envs import (
UnityEnvironment,
UnityEnvironmentException,
UnityActionException,
BrainInfo,
)
from mlagents.envs.environment import UnityEnvironment
from mlagents.envs.exception import UnityEnvironmentException, UnityActionException
from mlagents.envs.brain import BrainInfo
@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
@mock.patch("mlagents.envs.environment.UnityEnvironment.get_communicator")
@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
@mock.patch("mlagents.envs.environment.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.environment.UnityEnvironment.get_communicator")
def test_initialization(mock_communicator, mock_launcher):
mock_communicator.return_value = MockCommunicator(
discrete_action=False, visual_inputs=0

env.close()
@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
@mock.patch("mlagents.envs.environment.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.environment.UnityEnvironment.get_communicator")
def test_reset(mock_communicator, mock_launcher):
mock_communicator.return_value = MockCommunicator(
discrete_action=False, visual_inputs=0

)
@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
@mock.patch("mlagents.envs.environment.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.environment.UnityEnvironment.get_communicator")
def test_step(mock_communicator, mock_launcher):
mock_communicator.return_value = MockCommunicator(
discrete_action=False, visual_inputs=0

assert brain_info["RealFakeBrain"].local_done[2]
@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
@mock.patch("mlagents.envs.environment.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.environment.UnityEnvironment.get_communicator")
def test_close(mock_communicator, mock_launcher):
comm = MockCommunicator(discrete_action=False, visual_inputs=0)
mock_communicator.return_value = comm

4
ml-agents-envs/mlagents/envs/tests/test_rpc_communicator.py


import pytest
from mlagents.envs import RpcCommunicator
from mlagents.envs import UnityWorkerInUseException
from mlagents.envs.rpc_communicator import RpcCommunicator
from mlagents.envs.exception import UnityWorkerInUseException
def test_rpc_communicator_checks_port_on_create():

1
ml-agents-envs/mlagents/envs/tests/test_sampler_class.py


from math import isclose
import pytest
from mlagents.envs.sampler_class import SamplerManager

3
ml-agents-envs/mlagents/envs/tests/test_subprocess_env_manager.py


import unittest.mock as mock
from unittest.mock import Mock, MagicMock
import unittest
import cloudpickle
EnvironmentCommand,
worker,
StepResponse,
)
from mlagents.envs.base_unity_environment import BaseUnityEnvironment

20
ml-agents/mlagents/trainers/__init__.py


from .buffer import *
from .curriculum import *
from .meta_curriculum import *
from .models import *
from .trainer_metrics import *
from .trainer import *
from .tf_policy import *
from .trainer_controller import *
from .bc.models import *
from .bc.offline_trainer import *
from .bc.online_trainer import *
from .bc.policy import *
from .ppo.models import *
from .ppo.trainer import *
from .ppo.policy import *
from .sac.models import *
from .sac.trainer import *
from .sac.policy import *
from .exception import *
from .demo_loader import *

4
ml-agents/mlagents/trainers/bc/__init__.py


from .models import *
from .online_trainer import *
from .offline_trainer import *
from .policy import *

4
ml-agents/mlagents/trainers/bc/online_trainer.py


import logging
import numpy as np
from mlagents.envs import AllBrainInfo
from mlagents.trainers import ActionInfoOutputs
from mlagents.envs.brain import AllBrainInfo
from mlagents.envs.action_info import ActionInfoOutputs
from mlagents.trainers.bc.trainer import BCTrainer
logger = logging.getLogger("mlagents.trainers")

5
ml-agents/mlagents/trainers/bc/trainer.py


import logging
import numpy as np
import tensorflow as tf
from mlagents.envs import AllBrainInfo
from mlagents.trainers import ActionInfoOutputs
from mlagents.envs.brain import AllBrainInfo
from mlagents.envs.action_info import ActionInfoOutputs
from mlagents.trainers.bc.policy import BCPolicy
from mlagents.trainers.buffer import Buffer
from mlagents.trainers.trainer import Trainer

3
ml-agents/mlagents/trainers/buffer.py


import random
from collections import defaultdict
import numpy as np
import h5py

1
ml-agents/mlagents/trainers/components/bc/__init__.py


from .module import BCModule

1
ml-agents/mlagents/trainers/components/bc/model.py


import tensorflow as tf
import numpy as np
from mlagents.trainers.models import LearningModel

110
ml-agents/mlagents/trainers/components/reward_signals/__init__.py


from .reward_signal import *
import logging
from typing import Any, Dict, List
from collections import namedtuple
import numpy as np
import abc
import tensorflow as tf
from mlagents.envs.brain import BrainInfo
from mlagents.trainers.trainer import UnityTrainerException
from mlagents.trainers.tf_policy import TFPolicy
from mlagents.trainers.models import LearningModel
logger = logging.getLogger("mlagents.trainers")
RewardSignalResult = namedtuple(
"RewardSignalResult", ["scaled_reward", "unscaled_reward"]
)
class RewardSignal(abc.ABC):
def __init__(
self,
policy: TFPolicy,
policy_model: LearningModel,
strength: float,
gamma: float,
):
"""
Initializes a reward signal. At minimum, you must pass in the policy it is being applied to,
the reward strength, and the gamma (discount factor.)
:param policy: The Policy object (e.g. PPOPolicy) that this Reward Signal will apply to.
:param strength: The strength of the reward. The reward's raw value will be multiplied by this value.
:param gamma: The time discounting factor used for this reward.
:return: A RewardSignal object.
"""
class_name = self.__class__.__name__
short_name = class_name.replace("RewardSignal", "")
self.stat_name = f"Policy/{short_name} Reward"
self.value_name = f"Policy/{short_name} Value Estimate"
# Terminate discounted reward computation at Done. Can disable to mitigate positive bias in rewards with
# no natural end, e.g. GAIL or Curiosity
self.use_terminal_states = True
self.update_dict: Dict[str, tf.Tensor] = {}
self.gamma = gamma
self.policy = policy
self.policy_model = policy_model
self.strength = strength
self.stats_name_to_update_name: Dict[str, str] = {}
def evaluate(
self, current_info: BrainInfo, next_info: BrainInfo
) -> RewardSignalResult:
"""
Evaluates the reward for the agents present in current_info given the next_info
:param current_info: The current BrainInfo.
:param next_info: The BrainInfo from the next timestep.
:return: a RewardSignalResult of (scaled intrinsic reward, unscaled intrinsic reward) provided by the generator
"""
return RewardSignalResult(
self.strength * np.zeros(len(current_info.agents)),
np.zeros(len(current_info.agents)),
)
def evaluate_batch(self, mini_batch: Dict[str, np.array]) -> RewardSignalResult:
"""
Evaluates the reward for the data present in the Dict mini_batch. Note the distiction between
evaluate(), which takes in two BrainInfos. This reflects the different data formats (i.e. from the Buffer
vs. before being placed into the Buffer. Use this when evaluating a reward function drawn straight from a
Buffer.
:param mini_batch: A Dict of numpy arrays (the format used by our Buffer)
when drawing from the update buffer.
:return: a RewardSignalResult of (scaled intrinsic reward, unscaled intrinsic reward) provided by the generator
"""
mini_batch_len = len(next(iter(mini_batch.values())))
return RewardSignalResult(
self.strength * np.zeros(mini_batch_len), np.zeros(mini_batch_len)
)
def prepare_update(
self,
policy_model: LearningModel,
mini_batch: Dict[str, np.ndarray],
num_sequences: int,
) -> Dict[tf.Tensor, Any]:
"""
If the reward signal has an internal model (e.g. GAIL or Curiosity), get the feed_dict
needed to update the buffer..
:param update_buffer: An AgentBuffer that contains the live data from which to update.
:param n_sequences: The number of sequences in the training buffer.
:return: A dict that corresponds to the feed_dict needed for the update.
"""
return {}
@classmethod
def check_config(
cls, config_dict: Dict[str, Any], param_keys: List[str] = None
) -> None:
"""
Check the config dict, and throw an error if there are missing hyperparameters.
"""
param_keys = param_keys or []
for k in param_keys:
if k not in config_dict:
raise UnityTrainerException(
"The hyper-parameter {0} could not be found for {1}.".format(
k, cls.__name__
)
)

1
ml-agents/mlagents/trainers/components/reward_signals/curiosity/__init__.py


from .signal import CuriosityRewardSignal

1
ml-agents/mlagents/trainers/components/reward_signals/curiosity/signal.py


import tensorflow as tf
from mlagents.envs.brain import BrainInfo
from mlagents.trainers.buffer import Buffer
from mlagents.trainers.components.reward_signals import RewardSignal, RewardSignalResult
from mlagents.trainers.components.reward_signals.curiosity.model import CuriosityModel
from mlagents.trainers.tf_policy import TFPolicy

1
ml-agents/mlagents/trainers/components/reward_signals/extrinsic/__init__.py


from .signal import ExtrinsicRewardSignal

1
ml-agents/mlagents/trainers/components/reward_signals/extrinsic/signal.py


import numpy as np
from mlagents.envs.brain import BrainInfo
from mlagents.trainers.buffer import Buffer
from mlagents.trainers.components.reward_signals import RewardSignal, RewardSignalResult
from mlagents.trainers.tf_policy import TFPolicy
from mlagents.trainers.models import LearningModel

1
ml-agents/mlagents/trainers/components/reward_signals/gail/__init__.py


from .signal import GAILRewardSignal

1
ml-agents/mlagents/trainers/components/reward_signals/gail/signal.py


import tensorflow as tf
from mlagents.envs.brain import BrainInfo
from mlagents.trainers.buffer import Buffer
from mlagents.trainers.components.reward_signals import RewardSignal, RewardSignalResult
from mlagents.trainers.tf_policy import TFPolicy
from mlagents.trainers.models import LearningModel

2
ml-agents/mlagents/trainers/components/reward_signals/reward_signal_factory.py


from typing import Any, Dict, Type
from mlagents.trainers.trainer import UnityTrainerException
from mlagents.trainers.components.reward_signals.reward_signal import RewardSignal
from mlagents.trainers.components.reward_signals import RewardSignal
from mlagents.trainers.components.reward_signals.extrinsic.signal import (
ExtrinsicRewardSignal,
)

6
ml-agents/mlagents/trainers/demo_loader.py


from typing import List, Tuple
from mlagents.trainers.buffer import Buffer
from mlagents.envs.brain import BrainParameters, BrainInfo
from mlagents.envs.communicator_objects import (
AgentInfoProto,
from mlagents.envs.communicator_objects.agent_info_proto_pb2 import AgentInfoProto
from mlagents.envs.communicator_objects.brain_parameters_proto_pb2 import (
)
from mlagents.envs.communicator_objects.demonstration_meta_proto_pb2 import (
DemonstrationMetaProto,
)
from google.protobuf.internal.decoder import _DecodeVarint32 # type: ignore

4
ml-agents/mlagents/trainers/learn.py


from mlagents.trainers.trainer_controller import TrainerController
from mlagents.trainers.exception import TrainerError
from mlagents.trainers import MetaCurriculumError, MetaCurriculum
from mlagents.trainers.meta_curriculum import MetaCurriculumError, MetaCurriculum
from mlagents.envs import UnityEnvironment
from mlagents.envs.environment import UnityEnvironment
from mlagents.envs.sampler_class import SamplerManager
from mlagents.envs.exception import UnityEnvironmentException, SamplerException
from mlagents.envs.base_unity_environment import BaseUnityEnvironment

3
ml-agents/mlagents/trainers/ppo/__init__.py


from .models import *
from .trainer import *
from .policy import *

2
ml-agents/mlagents/trainers/ppo/multi_gpu_policy.py


import logging
import numpy as np
import tensorflow as tf
from tensorflow.python.client import device_lib

from mlagents.trainers.components.reward_signals.reward_signal_factory import (
create_reward_signal,
)
from mlagents.trainers.components.bc.module import BCModule
# Variable scope in which created variables will be placed under
TOWER_SCOPE_NAME = "tower"

2
ml-agents/mlagents/trainers/ppo/policy.py


import tensorflow as tf
from mlagents.envs.timers import timed
from mlagents.trainers import BrainInfo, ActionInfo
from mlagents.envs.brain import BrainInfo
from mlagents.trainers.models import EncoderType, LearningRateSchedule
from mlagents.trainers.ppo.models import PPOModel
from mlagents.trainers.tf_policy import TFPolicy

7
ml-agents/mlagents/trainers/ppo/trainer.py


import logging
from collections import defaultdict
from typing import List, Any, Dict
from typing import Dict
from mlagents.envs import AllBrainInfo, BrainInfo
from mlagents.trainers.buffer import Buffer
from mlagents.envs.brain import AllBrainInfo
from mlagents.trainers.trainer import UnityTrainerException
from mlagents.trainers.components.reward_signals import RewardSignalResult
from mlagents.envs.action_info import ActionInfoOutputs
logger = logging.getLogger("mlagents.trainers")

12
ml-agents/mlagents/trainers/rl_trainer.py


# # Unity ML-Agents Toolkit
import logging
from typing import Dict, List, Deque, Any, Optional, NamedTuple
import os
import tensorflow as tf
from typing import Dict, List, Any, NamedTuple
from collections import deque, defaultdict
from mlagents.envs import UnityException, AllBrainInfo, ActionInfoOutputs, BrainInfo
from mlagents.envs.brain import AllBrainInfo, BrainInfo
from mlagents.envs.action_info import ActionInfoOutputs
from mlagents.trainers.tf_policy import Policy
from mlagents.trainers.components.reward_signals.reward_signal import RewardSignalResult
from mlagents.envs import BrainParameters
from mlagents.trainers.components.reward_signals import RewardSignalResult
LOGGER = logging.getLogger("mlagents.trainers")

3
ml-agents/mlagents/trainers/sac/__init__.py


from .models import *
from .trainer import *
from .policy import *

8
ml-agents/mlagents/trainers/sac/policy.py


import logging
from typing import Dict, List, Any
from typing import Dict, Any
from mlagents.trainers import BrainInfo, ActionInfo, BrainParameters
from mlagents.envs.brain import BrainInfo, BrainParameters
from mlagents.trainers.models import EncoderType, LearningRateSchedule
from mlagents.trainers.sac.models import SACModel
from mlagents.trainers.tf_policy import TFPolicy

from mlagents.trainers.components.reward_signals.reward_signal import RewardSignal
from mlagents.trainers.components.bc import BCModule
from mlagents.trainers.components.reward_signals import RewardSignal
from mlagents.trainers.components.bc.module import BCModule
logger = logging.getLogger("mlagents.trainers")

11
ml-agents/mlagents/trainers/sac/trainer.py


# and implemented in https://github.com/hill-a/stable-baselines
import logging
from collections import deque, defaultdict
from typing import List, Any, Dict
from collections import defaultdict
from typing import List, Dict
import tensorflow as tf
from mlagents.envs import AllBrainInfo, BrainInfo
from mlagents.envs.brain import AllBrainInfo
from mlagents.envs.timers import timed, hierarchical_timer
from mlagents.envs.timers import timed
from mlagents.trainers.trainer import UnityTrainerException
from mlagents.trainers.components.reward_signals import RewardSignalResult
LOGGER = logging.getLogger("mlagents.trainers")

1
ml-agents/mlagents/trainers/tests/mock_brain.py


import unittest.mock as mock
import pytest
import numpy as np
from mlagents.trainers.buffer import Buffer

2
ml-agents/mlagents/trainers/tests/test_barracuda_converter.py


import unittest.mock as mock
import pytest
import os
import tempfile

22
ml-agents/mlagents/trainers/tests/test_bc.py


import mlagents.trainers.tests.mock_brain as mb
from mlagents.trainers.bc.policy import BCPolicy
from mlagents.trainers.bc.offline_trainer import BCTrainer
from mlagents.envs import UnityEnvironment
from mlagents.envs.environment import UnityEnvironment
from mlagents.envs.mock_communicator import MockCommunicator

assert trainer.cumulative_rewards[agent_id] == 0
@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
@mock.patch("mlagents.envs.environment.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.environment.UnityEnvironment.get_communicator")
def test_bc_policy_evaluate(mock_communicator, mock_launcher, dummy_config):
tf.reset_default_graph()
mock_communicator.return_value = MockCommunicator(

env.close()
@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
@mock.patch("mlagents.envs.environment.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.environment.UnityEnvironment.get_communicator")
def test_cc_bc_model(mock_communicator, mock_launcher):
tf.reset_default_graph()
with tf.Session() as sess:

env.close()
@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
@mock.patch("mlagents.envs.environment.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.environment.UnityEnvironment.get_communicator")
def test_dc_bc_model(mock_communicator, mock_launcher):
tf.reset_default_graph()
with tf.Session() as sess:

env.close()
@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
@mock.patch("mlagents.envs.environment.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.environment.UnityEnvironment.get_communicator")
def test_visual_dc_bc_model(mock_communicator, mock_launcher):
tf.reset_default_graph()
with tf.Session() as sess:

env.close()
@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
@mock.patch("mlagents.envs.environment.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.environment.UnityEnvironment.get_communicator")
def test_visual_cc_bc_model(mock_communicator, mock_launcher):
tf.reset_default_graph()
with tf.Session() as sess:

10
ml-agents/mlagents/trainers/tests/test_bcmodule.py


# Test default values
@mock.patch("mlagents.envs.UnityEnvironment")
@mock.patch("mlagents.envs.environment.UnityEnvironment")
def test_bcmodule_defaults(mock_env):
# See if default values match
mock_brain = mb.create_mock_3dball_brain()

@pytest.mark.parametrize(
"trainer_config", [ppo_dummy_config(), sac_dummy_config()], ids=["ppo", "sac"]
)
@mock.patch("mlagents.envs.UnityEnvironment")
@mock.patch("mlagents.envs.environment.UnityEnvironment")
def test_bcmodule_update(mock_env, trainer_config):
mock_brain = mb.create_mock_3dball_brain()
env, policy = create_policy_with_bc_mock(

@pytest.mark.parametrize(
"trainer_config", [ppo_dummy_config(), sac_dummy_config()], ids=["ppo", "sac"]
)
@mock.patch("mlagents.envs.UnityEnvironment")
@mock.patch("mlagents.envs.environment.UnityEnvironment")
def test_bcmodule_rnn_update(mock_env, trainer_config):
mock_brain = mb.create_mock_3dball_brain()
env, policy = create_policy_with_bc_mock(

@pytest.mark.parametrize(
"trainer_config", [ppo_dummy_config(), sac_dummy_config()], ids=["ppo", "sac"]
)
@mock.patch("mlagents.envs.UnityEnvironment")
@mock.patch("mlagents.envs.environment.UnityEnvironment")
def test_bcmodule_dc_visual_update(mock_env, trainer_config):
mock_brain = mb.create_mock_banana_brain()
env, policy = create_policy_with_bc_mock(

@pytest.mark.parametrize(
"trainer_config", [ppo_dummy_config(), sac_dummy_config()], ids=["ppo", "sac"]
)
@mock.patch("mlagents.envs.UnityEnvironment")
@mock.patch("mlagents.envs.environment.UnityEnvironment")
def test_bcmodule_rnn_dc_update(mock_env, trainer_config):
mock_brain = mb.create_mock_banana_brain()
env, policy = create_policy_with_bc_mock(

3
ml-agents/mlagents/trainers/tests/test_curriculum.py


import pytest
import json
from mlagents.trainers import Curriculum
from mlagents.trainers.curriculum import Curriculum
dummy_curriculum_json_str = """

2
ml-agents/mlagents/trainers/tests/test_demo_loader.py


import unittest.mock as mock
import pytest
import os
from mlagents.trainers.demo_loader import load_demonstration, make_demo_buffer

5
ml-agents/mlagents/trainers/tests/test_learn.py


import pytest
from unittest.mock import *
from mlagents.trainers import learn, TrainerController
from unittest.mock import MagicMock, patch
from mlagents.trainers import learn
from mlagents.trainers.trainer_controller import TrainerController
@pytest.fixture

26
ml-agents/mlagents/trainers/tests/test_meta_curriculum.py


import pytest
from unittest.mock import patch, call, Mock
from unittest.mock import patch, call
from mlagents.trainers.meta_curriculum import MetaCurriculum
from mlagents.trainers.exception import MetaCurriculumError

return {"Brain1": 7, "Brain2": 8}
@patch("mlagents.trainers.Curriculum.get_config", return_value={})
@patch("mlagents.trainers.Curriculum.__init__", return_value=None)
@patch("mlagents.trainers.curriculum.Curriculum.get_config", return_value={})
@patch("mlagents.trainers.curriculum.Curriculum.__init__", return_value=None)
@patch("os.listdir", return_value=["Brain1.json", "Brain2.json"])
def test_init_meta_curriculum_happy_path(
listdir, mock_curriculum_init, mock_curriculum_get_config, default_reset_parameters

MetaCurriculum("test/", default_reset_parameters)
@patch("mlagents.trainers.Curriculum")
@patch("mlagents.trainers.Curriculum")
@patch("mlagents.trainers.curriculum.Curriculum")
@patch("mlagents.trainers.curriculum.Curriculum")
def test_set_lesson_nums(curriculum_a, curriculum_b):
meta_curriculum = MetaCurriculumTest(
{"Brain1": curriculum_a, "Brain2": curriculum_b}

assert curriculum_b.lesson_num == 3
@patch("mlagents.trainers.Curriculum")
@patch("mlagents.trainers.Curriculum")
@patch("mlagents.trainers.curriculum.Curriculum")
@patch("mlagents.trainers.curriculum.Curriculum")
def test_increment_lessons(curriculum_a, curriculum_b, measure_vals):
meta_curriculum = MetaCurriculumTest(
{"Brain1": curriculum_a, "Brain2": curriculum_b}

curriculum_b.increment_lesson.assert_called_with(0.3)
@patch("mlagents.trainers.Curriculum")
@patch("mlagents.trainers.Curriculum")
@patch("mlagents.trainers.curriculum.Curriculum")
@patch("mlagents.trainers.curriculum.Curriculum")
def test_increment_lessons_with_reward_buff_sizes(
curriculum_a, curriculum_b, measure_vals, reward_buff_sizes
):

curriculum_b.increment_lesson.assert_not_called()
@patch("mlagents.trainers.Curriculum")
@patch("mlagents.trainers.Curriculum")
@patch("mlagents.trainers.curriculum.Curriculum")
@patch("mlagents.trainers.curriculum.Curriculum")
def test_set_all_curriculums_to_lesson_num(curriculum_a, curriculum_b):
meta_curriculum = MetaCurriculumTest(
{"Brain1": curriculum_a, "Brain2": curriculum_b}

assert curriculum_b.lesson_num == 2
@patch("mlagents.trainers.Curriculum")
@patch("mlagents.trainers.Curriculum")
@patch("mlagents.trainers.curriculum.Curriculum")
@patch("mlagents.trainers.curriculum.Curriculum")
def test_get_config(
curriculum_a, curriculum_b, default_reset_parameters, more_reset_parameters
):

6
ml-agents/mlagents/trainers/tests/test_multigpu.py


import unittest.mock as mock
import pytest
import numpy as np
from mlagents.trainers.ppo.trainer import PPOTrainer
from mlagents.trainers.ppo.multi_gpu_policy import MultiGpuPPOPolicy, get_devices
from mlagents.envs import UnityEnvironment, BrainParameters
from mlagents.envs.mock_communicator import MockCommunicator
from mlagents.trainers.ppo.multi_gpu_policy import MultiGpuPPOPolicy
from mlagents.trainers.tests.mock_brain import create_mock_brainparams

5
ml-agents/mlagents/trainers/tests/test_policy.py


from mlagents.trainers.tf_policy import *
from mlagents.trainers.tf_policy import TFPolicy
from mlagents.envs.brain import BrainInfo
from mlagents.envs.action_info import ActionInfo
import numpy as np
def basic_mock_brain():

35
ml-agents/mlagents/trainers/tests/test_ppo.py


from mlagents.trainers.ppo.policy import PPOPolicy
from mlagents.trainers.rl_trainer import AllRewardsOutput
from mlagents.trainers.components.reward_signals import RewardSignalResult
from mlagents.envs import UnityEnvironment, BrainParameters
from mlagents.envs.brain import BrainParameters
from mlagents.envs.environment import UnityEnvironment
from mlagents.envs.mock_communicator import MockCommunicator

)
@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
@mock.patch("mlagents.envs.environment.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.environment.UnityEnvironment.get_communicator")
def test_ppo_policy_evaluate(mock_communicator, mock_launcher, dummy_config):
tf.reset_default_graph()
mock_communicator.return_value = MockCommunicator(

env.close()
@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
@mock.patch("mlagents.envs.environment.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.environment.UnityEnvironment.get_communicator")
def test_ppo_get_value_estimates(mock_communicator, mock_launcher, dummy_config):
tf.reset_default_graph()
mock_communicator.return_value = MockCommunicator(

env.close()
@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
@mock.patch("mlagents.envs.environment.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.environment.UnityEnvironment.get_communicator")
def test_ppo_model_cc_vector(mock_communicator, mock_launcher):
tf.reset_default_graph()
with tf.Session() as sess:

env.close()
@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
@mock.patch("mlagents.envs.environment.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.environment.UnityEnvironment.get_communicator")
def test_ppo_model_cc_visual(mock_communicator, mock_launcher):
tf.reset_default_graph()
with tf.Session() as sess:

env.close()
@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
@mock.patch("mlagents.envs.environment.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.environment.UnityEnvironment.get_communicator")
def test_ppo_model_dc_visual(mock_communicator, mock_launcher):
tf.reset_default_graph()
with tf.Session() as sess:

env.close()
@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
@mock.patch("mlagents.envs.environment.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.environment.UnityEnvironment.get_communicator")
def test_ppo_model_dc_vector(mock_communicator, mock_launcher):
tf.reset_default_graph()
with tf.Session() as sess:

env.close()
@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
@mock.patch("mlagents.envs.environment.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.environment.UnityEnvironment.get_communicator")
def test_ppo_model_dc_vector_rnn(mock_communicator, mock_launcher):
tf.reset_default_graph()
with tf.Session() as sess:

env.close()
@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
@mock.patch("mlagents.envs.environment.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.environment.UnityEnvironment.get_communicator")
def test_ppo_model_cc_vector_rnn(mock_communicator, mock_launcher):
tf.reset_default_graph()
with tf.Session() as sess:

27
ml-agents/mlagents/trainers/tests/test_reward_signals.py


import unittest.mock as mock
import pytest
import mlagents.trainers.tests.mock_brain as mb
import numpy as np
import tensorflow as tf
from mlagents.trainers.ppo.models import PPOModel
from mlagents.trainers.ppo.trainer import discount_rewards
import mlagents.trainers.tests.mock_brain as mb
from mlagents.trainers.demo_loader import make_demo_buffer
from mlagents.envs import UnityEnvironment
from mlagents.envs.mock_communicator import MockCommunicator
def ppo_dummy_config():

@pytest.mark.parametrize(
"trainer_config", [ppo_dummy_config(), sac_dummy_config()], ids=["ppo", "sac"]
)
@mock.patch("mlagents.envs.UnityEnvironment")
@mock.patch("mlagents.envs.environment.UnityEnvironment")
def test_gail_cc(mock_env, trainer_config, gail_dummy_config):
env, policy = create_policy_mock(
mock_env, trainer_config, gail_dummy_config, False, False, False

@pytest.mark.parametrize(
"trainer_config", [ppo_dummy_config(), sac_dummy_config()], ids=["ppo", "sac"]
)
@mock.patch("mlagents.envs.UnityEnvironment")
@mock.patch("mlagents.envs.environment.UnityEnvironment")
def test_gail_dc_visual(mock_env, trainer_config, gail_dummy_config):
gail_dummy_config["gail"]["demo_path"] = (
os.path.dirname(os.path.abspath(__file__)) + "/testdcvis.demo"

@pytest.mark.parametrize(
"trainer_config", [ppo_dummy_config(), sac_dummy_config()], ids=["ppo", "sac"]
)
@mock.patch("mlagents.envs.UnityEnvironment")
@mock.patch("mlagents.envs.environment.UnityEnvironment")
def test_gail_rnn(mock_env, trainer_config, gail_dummy_config):
env, policy = create_policy_mock(
mock_env, trainer_config, gail_dummy_config, True, False, False

@pytest.mark.parametrize(
"trainer_config", [ppo_dummy_config(), sac_dummy_config()], ids=["ppo", "sac"]
)
@mock.patch("mlagents.envs.UnityEnvironment")
@mock.patch("mlagents.envs.environment.UnityEnvironment")
def test_curiosity_cc(mock_env, trainer_config, curiosity_dummy_config):
env, policy = create_policy_mock(
mock_env, trainer_config, curiosity_dummy_config, False, False, False

@pytest.mark.parametrize(
"trainer_config", [ppo_dummy_config(), sac_dummy_config()], ids=["ppo", "sac"]
)
@mock.patch("mlagents.envs.UnityEnvironment")
@mock.patch("mlagents.envs.environment.UnityEnvironment")
def test_curiosity_dc(mock_env, trainer_config, curiosity_dummy_config):
env, policy = create_policy_mock(
mock_env, trainer_config, curiosity_dummy_config, False, True, False

@pytest.mark.parametrize(
"trainer_config", [ppo_dummy_config(), sac_dummy_config()], ids=["ppo", "sac"]
)
@mock.patch("mlagents.envs.UnityEnvironment")
@mock.patch("mlagents.envs.environment.UnityEnvironment")
def test_curiosity_visual(mock_env, trainer_config, curiosity_dummy_config):
env, policy = create_policy_mock(
mock_env, trainer_config, curiosity_dummy_config, False, False, True

@pytest.mark.parametrize(
"trainer_config", [ppo_dummy_config(), sac_dummy_config()], ids=["ppo", "sac"]
)
@mock.patch("mlagents.envs.UnityEnvironment")
@mock.patch("mlagents.envs.environment.UnityEnvironment")
def test_curiosity_rnn(mock_env, trainer_config, curiosity_dummy_config):
env, policy = create_policy_mock(
mock_env, trainer_config, curiosity_dummy_config, True, False, False

@pytest.mark.parametrize(
"trainer_config", [ppo_dummy_config(), sac_dummy_config()], ids=["ppo", "sac"]
)
@mock.patch("mlagents.envs.UnityEnvironment")
@mock.patch("mlagents.envs.environment.UnityEnvironment")
def test_extrinsic(mock_env, trainer_config, curiosity_dummy_config):
env, policy = create_policy_mock(
mock_env, trainer_config, curiosity_dummy_config, False, False, False

45
ml-agents/mlagents/trainers/tests/test_sac.py


import unittest.mock as mock
import pytest
import tempfile
import math
import numpy as np
import tensorflow as tf

from mlagents.trainers.sac.trainer import SACTrainer
from mlagents.trainers.tests.test_simple_rl import Simple1DEnvironment, SimpleEnvManager
from mlagents.trainers.trainer_util import initialize_trainers
from mlagents.envs import UnityEnvironment
from mlagents.envs.environment import UnityEnvironment
from mlagents.trainers.trainer_controller import TrainerController
from mlagents.envs.base_unity_environment import BaseUnityEnvironment
from mlagents.envs import BrainInfo, AllBrainInfo, BrainParameters
from mlagents.envs.communicator_objects import AgentInfoProto
from mlagents.envs.sampler_class import SamplerManager
from mlagents.trainers.tests import mock_brain as mb

return env, policy
@mock.patch("mlagents.envs.UnityEnvironment")
@mock.patch("mlagents.envs.environment.UnityEnvironment")
def test_sac_cc_policy(mock_env, dummy_config):
# Test evaluate
tf.reset_default_graph()

env.close()
@mock.patch("mlagents.envs.UnityEnvironment")
@mock.patch("mlagents.envs.environment.UnityEnvironment")
def test_sac_update_reward_signals(mock_env, dummy_config):
# Test evaluate
tf.reset_default_graph()

env.close()
@mock.patch("mlagents.envs.UnityEnvironment")
@mock.patch("mlagents.envs.environment.UnityEnvironment")
def test_sac_dc_policy(mock_env, dummy_config):
# Test evaluate
tf.reset_default_graph()

env.close()
@mock.patch("mlagents.envs.UnityEnvironment")
@mock.patch("mlagents.envs.environment.UnityEnvironment")
def test_sac_visual_policy(mock_env, dummy_config):
# Test evaluate
tf.reset_default_graph()

assert type(run_out) is dict
@mock.patch("mlagents.envs.UnityEnvironment")
@mock.patch("mlagents.envs.environment.UnityEnvironment")
def test_sac_rnn_policy(mock_env, dummy_config):
# Test evaluate
tf.reset_default_graph()

env.close()
@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
@mock.patch("mlagents.envs.environment.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.environment.UnityEnvironment.get_communicator")
def test_sac_model_cc_vector(mock_communicator, mock_launcher):
tf.reset_default_graph()
with tf.Session() as sess:

env.close()
@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
@mock.patch("mlagents.envs.environment.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.environment.UnityEnvironment.get_communicator")
def test_sac_model_cc_visual(mock_communicator, mock_launcher):
tf.reset_default_graph()
with tf.Session() as sess:

env.close()
@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
@mock.patch("mlagents.envs.environment.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.environment.UnityEnvironment.get_communicator")
def test_sac_model_dc_visual(mock_communicator, mock_launcher):
tf.reset_default_graph()
with tf.Session() as sess:

env.close()
@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
@mock.patch("mlagents.envs.environment.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.environment.UnityEnvironment.get_communicator")
def test_sac_model_dc_vector(mock_communicator, mock_launcher):
tf.reset_default_graph()
with tf.Session() as sess:

env.close()
@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
@mock.patch("mlagents.envs.environment.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.environment.UnityEnvironment.get_communicator")
def test_sac_model_dc_vector_rnn(mock_communicator, mock_launcher):
tf.reset_default_graph()
with tf.Session() as sess:

env.close()
@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
@mock.patch("mlagents.envs.environment.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.environment.UnityEnvironment.get_communicator")
def test_sac_model_cc_vector_rnn(mock_communicator, mock_launcher):
tf.reset_default_graph()
with tf.Session() as sess:

4
ml-agents/mlagents/trainers/tests/test_simple_rl.py


from mlagents.trainers.trainer_controller import TrainerController
from mlagents.trainers.trainer_util import initialize_trainers
from mlagents.envs.base_unity_environment import BaseUnityEnvironment
from mlagents.envs import BrainInfo, AllBrainInfo, BrainParameters
from mlagents.envs.communicator_objects import AgentInfoProto
from mlagents.envs.brain import BrainInfo, AllBrainInfo, BrainParameters
from mlagents.envs.communicator_objects.agent_info_proto_pb2 import AgentInfoProto
from mlagents.envs.simple_env_manager import SimpleEnvManager
from mlagents.envs.sampler_class import SamplerManager

2
ml-agents/mlagents/trainers/tests/test_trainer_controller.py


from unittest.mock import *
from unittest.mock import MagicMock, Mock, patch
import yaml
import pytest

2
ml-agents/mlagents/trainers/tests/test_trainer_metrics.py


import unittest.mock as mock
from mlagents.trainers import TrainerMetrics
from mlagents.trainers.trainer_metrics import TrainerMetrics
class TestTrainerMetrics:

8
ml-agents/mlagents/trainers/tests/test_trainer_util.py


)
@patch("mlagents.envs.BrainParameters")
@patch("mlagents.envs.brain.BrainParameters")
def test_initialize_trainer_parameters_override_defaults(BrainParametersMock):
summaries_dir = "test_dir"
run_id = "testrun"

assert isinstance(trainers["testbrain"], OfflineBCTrainer)
@patch("mlagents.envs.BrainParameters")
@patch("mlagents.envs.brain.BrainParameters")
def test_initialize_online_bc_trainer(BrainParametersMock):
summaries_dir = "test_dir"
run_id = "testrun"

assert isinstance(trainers["testbrain"], OnlineBCTrainer)
@patch("mlagents.envs.BrainParameters")
@patch("mlagents.envs.brain.BrainParameters")
def test_initialize_ppo_trainer(BrainParametersMock):
brain_params_mock = BrainParametersMock()
external_brains = {"testbrain": BrainParametersMock()}

assert isinstance(trainers["testbrain"], PPOTrainer)
@patch("mlagents.envs.BrainParameters")
@patch("mlagents.envs.brain.BrainParameters")
def test_initialize_invalid_trainer_raises_exception(BrainParametersMock):
summaries_dir = "test_dir"
run_id = "testrun"

7
ml-agents/mlagents/trainers/tf_policy.py


import numpy as np
import tensorflow as tf
from mlagents.trainers import UnityException
from mlagents.envs import Policy, ActionInfo
from mlagents.envs.exception import UnityException
from mlagents.envs.policy import Policy
from mlagents.envs.action_info import ActionInfo
from mlagents.envs import BrainInfo
from mlagents.envs.brain import BrainInfo
logger = logging.getLogger("mlagents.trainers")

10
ml-agents/mlagents/trainers/trainer.py


import numpy as np
from collections import deque, defaultdict
from mlagents.envs import UnityException, AllBrainInfo, ActionInfoOutputs, BrainInfo
from mlagents.envs.action_info import ActionInfoOutputs
from mlagents.envs.exception import UnityException
from mlagents.trainers.buffer import Buffer
from mlagents.trainers.tf_policy import Policy
from mlagents.envs import BrainParameters
from mlagents.trainers.tf_policy import TFPolicy
from mlagents.envs.brain import BrainParameters, AllBrainInfo
LOGGER = logging.getLogger("mlagents.trainers")

)
self.summary_writer = tf.summary.FileWriter(self.summary_path)
self._reward_buffer: Deque[float] = deque(maxlen=reward_buff_cap)
self.policy: Policy = None
self.policy: TFPolicy = None
self.step: int = 0
def check_param_keys(self):

4
ml-agents/mlagents/trainers/trainer_controller.py


import os
import json
import logging
from typing import *
from typing import Dict, List, Optional
import numpy as np
import tensorflow as tf

)
from mlagents.envs.sampler_class import SamplerManager
from mlagents.envs.timers import hierarchical_timer, get_timer_tree, timed
from mlagents.trainers import Trainer, TrainerMetrics
from mlagents.trainers.trainer import Trainer, TrainerMetrics
from mlagents.trainers.meta_curriculum import MetaCurriculum

4
ml-agents/mlagents/trainers/trainer_util.py


from typing import Any, Dict
from mlagents.trainers import MetaCurriculum
from mlagents.trainers.meta_curriculum import MetaCurriculum
from mlagents.trainers import Trainer
from mlagents.trainers.trainer import Trainer
from mlagents.envs.brain import BrainParameters
from mlagents.trainers.ppo.trainer import PPOTrainer
from mlagents.trainers.sac.trainer import SACTrainer

4
setup.cfg


# Black tends to introduce things flake8 doesn't like, such as "line break before binary operator"
# or whitespace before ':'. Rather than fight with black, just ignore these for now.
W503, E203,
# "may be undefined, or defined from star imports" and related warnings
# We should stop doing these, but for now, leave them in.
F405, F403, F401,

1
utils/validate_meta_files.py


import json
import os

371
gym-unity/gym_unity/envs/unity_env.py


import logging
import itertools
import gym
import numpy as np
from mlagents.envs import UnityEnvironment
from gym import error, spaces
class UnityGymException(error.Error):
"""
Any error related to the gym wrapper of ml-agents.
"""
pass
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("gym_unity")
class UnityEnv(gym.Env):
"""
Provides Gym wrapper for Unity Learning Environments.
Multi-agent environments use lists for object types, as done here:
https://github.com/openai/multiagent-particle-envs
"""
def __init__(
self,
environment_filename: str,
worker_id: int = 0,
use_visual: bool = False,
uint8_visual: bool = False,
multiagent: bool = False,
flatten_branched: bool = False,
no_graphics: bool = False,
allow_multiple_visual_obs: bool = False,
):
"""
Environment initialization
:param environment_filename: The UnityEnvironment path or file to be wrapped in the gym.
:param worker_id: Worker number for environment.
:param use_visual: Whether to use visual observation or vector observation.
:param uint8_visual: Return visual observations as uint8 (0-255) matrices instead of float (0.0-1.0).
:param multiagent: Whether to run in multi-agent mode (lists of obs, reward, done).
:param flatten_branched: If True, turn branched discrete action spaces into a Discrete space rather than
MultiDiscrete.
:param no_graphics: Whether to run the Unity simulator in no-graphics mode
:param allow_multiple_visual_obs: If True, return a list of visual observations instead of only one.
"""
self._env = UnityEnvironment(
environment_filename, worker_id, no_graphics=no_graphics
)
self.name = self._env.academy_name
self.visual_obs = None
self._current_state = None
self._n_agents = None
self._multiagent = multiagent
self._flattener = None
self.game_over = (
False
) # Hidden flag used by Atari environments to determine if the game is over
self._allow_multiple_visual_obs = allow_multiple_visual_obs
# Check brain configuration
if len(self._env.brains) != 1:
raise UnityGymException(
"There can only be one brain in a UnityEnvironment "
"if it is wrapped in a gym."
)
if len(self._env.external_brain_names) <= 0:
raise UnityGymException(
"There are not any external brain in the UnityEnvironment"
)
self.brain_name = self._env.external_brain_names[0]
brain = self._env.brains[self.brain_name]
if use_visual and brain.number_visual_observations == 0:
raise UnityGymException(
"`use_visual` was set to True, however there are no"
" visual observations as part of this environment."
)
self.use_visual = brain.number_visual_observations >= 1 and use_visual
if not use_visual and uint8_visual:
logger.warning(
"`uint8_visual was set to true, but visual observations are not in use. "
"This setting will not have any effect."
)
else:
self.uint8_visual = uint8_visual
if brain.number_visual_observations > 1 and not self._allow_multiple_visual_obs:
logger.warning(
"The environment contains more than one visual observation. "
"You must define allow_multiple_visual_obs=True to received them all. "
"Otherwise, please note that only the first will be provided in the observation."
)
if brain.num_stacked_vector_observations != 1:
raise UnityGymException(
"There can only be one stacked vector observation in a UnityEnvironment "
"if it is wrapped in a gym."
)
# Check for number of agents in scene.
initial_info = self._env.reset()[self.brain_name]
self._check_agents(len(initial_info.agents))
# Set observation and action spaces
if brain.vector_action_space_type == "discrete":
if len(brain.vector_action_space_size) == 1:
self._action_space = spaces.Discrete(brain.vector_action_space_size[0])
else:
if flatten_branched:
self._flattener = ActionFlattener(brain.vector_action_space_size)
self._action_space = self._flattener.action_space
else:
self._action_space = spaces.MultiDiscrete(
brain.vector_action_space_size
)
else:
if flatten_branched:
logger.warning(
"The environment has a non-discrete action space. It will "
"not be flattened."
)
high = np.array([1] * brain.vector_action_space_size[0])
self._action_space = spaces.Box(-high, high, dtype=np.float32)
high = np.array([np.inf] * brain.vector_observation_space_size)
self.action_meanings = brain.vector_action_descriptions
if self.use_visual:
if brain.camera_resolutions[0]["blackAndWhite"]:
depth = 1
else:
depth = 3
self._observation_space = spaces.Box(
0,
1,
dtype=np.float32,
shape=(
brain.camera_resolutions[0]["height"],
brain.camera_resolutions[0]["width"],
depth,
),
)
else:
self._observation_space = spaces.Box(-high, high, dtype=np.float32)
def reset(self):
"""Resets the state of the environment and returns an initial observation.
In the case of multi-agent environments, this is a list.
Returns: observation (object/list): the initial observation of the
space.
"""
info = self._env.reset()[self.brain_name]
n_agents = len(info.agents)
self._check_agents(n_agents)
self.game_over = False
if not self._multiagent:
obs, reward, done, info = self._single_step(info)
else:
obs, reward, done, info = self._multi_step(info)
return obs
def step(self, action):
"""Run one timestep of the environment's dynamics. When end of
episode is reached, you are responsible for calling `reset()`
to reset this environment's state.
Accepts an action and returns a tuple (observation, reward, done, info).
In the case of multi-agent environments, these are lists.
Args:
action (object/list): an action provided by the environment
Returns:
observation (object/list): agent's observation of the current environment
reward (float/list) : amount of reward returned after previous action
done (boolean/list): whether the episode has ended.
info (dict): contains auxiliary diagnostic information, including BrainInfo.
"""
# Use random actions for all other agents in environment.
if self._multiagent:
if not isinstance(action, list):
raise UnityGymException(
"The environment was expecting `action` to be a list."
)
if len(action) != self._n_agents:
raise UnityGymException(
"The environment was expecting a list of {} actions.".format(
self._n_agents
)
)
else:
if self._flattener is not None:
# Action space is discrete and flattened - we expect a list of scalars
action = [self._flattener.lookup_action(_act) for _act in action]
action = np.array(action)
else:
if self._flattener is not None:
# Translate action into list
action = self._flattener.lookup_action(action)
info = self._env.step(action)[self.brain_name]
n_agents = len(info.agents)
self._check_agents(n_agents)
self._current_state = info
if not self._multiagent:
obs, reward, done, info = self._single_step(info)
self.game_over = done
else:
obs, reward, done, info = self._multi_step(info)
self.game_over = all(done)
return obs, reward, done, info
def _single_step(self, info):
if self.use_visual:
visual_obs = info.visual_observations
if self._allow_multiple_visual_obs:
visual_obs_list = []
for obs in visual_obs:
visual_obs_list.append(self._preprocess_single(obs[0]))
self.visual_obs = visual_obs_list
else:
self.visual_obs = self._preprocess_single(visual_obs[0][0])
default_observation = self.visual_obs
else:
default_observation = info.vector_observations[0, :]
return (
default_observation,
info.rewards[0],
info.local_done[0],
{"text_observation": info.text_observations[0], "brain_info": info},
)
def _preprocess_single(self, single_visual_obs):
if self.uint8_visual:
return (255.0 * single_visual_obs).astype(np.uint8)
else:
return single_visual_obs
def _multi_step(self, info):
if self.use_visual:
self.visual_obs = self._preprocess_multi(info.visual_observations)
default_observation = self.visual_obs
else:
default_observation = info.vector_observations
return (
list(default_observation),
info.rewards,
info.local_done,
{"text_observation": info.text_observations, "brain_info": info},
)
def _preprocess_multi(self, multiple_visual_obs):
if self.uint8_visual:
return [
(255.0 * _visual_obs).astype(np.uint8)
for _visual_obs in multiple_visual_obs
]
else:
return multiple_visual_obs
def render(self, mode="rgb_array"):
return self.visual_obs
def close(self):
"""Override _close in your subclass to perform any necessary cleanup.
Environments will automatically close() themselves when
garbage collected or when the program exits.
"""
self._env.close()
def get_action_meanings(self):
return self.action_meanings
def seed(self, seed=None):
"""Sets the seed for this env's random number generator(s).
Currently not implemented.
"""
logger.warn("Could not seed environment %s", self.name)
return
def _check_agents(self, n_agents):
if not self._multiagent and n_agents > 1:
raise UnityGymException(
"The environment was launched as a single-agent environment, however"
"there is more than one agent in the scene."
)
elif self._multiagent and n_agents <= 1:
raise UnityGymException(
"The environment was launched as a mutli-agent environment, however"
"there is only one agent in the scene."
)
if self._n_agents is None:
self._n_agents = n_agents
logger.info("{} agents within environment.".format(n_agents))
elif self._n_agents != n_agents:
raise UnityGymException(
"The number of agents in the environment has changed since "
"initialization. This is not supported."
)
@property
def metadata(self):
return {"render.modes": ["rgb_array"]}
@property
def reward_range(self):
return -float("inf"), float("inf")
@property
def spec(self):
return None
@property
def action_space(self):
return self._action_space
@property
def observation_space(self):
return self._observation_space
@property
def number_agents(self):
return self._n_agents
class ActionFlattener:
"""
Flattens branched discrete action spaces into single-branch discrete action spaces.
"""
def __init__(self, branched_action_space):
"""
Initialize the flattener.
:param branched_action_space: A List containing the sizes of each branch of the action
space, e.g. [2,3,3] for three branches with size 2, 3, and 3 respectively.
"""
self._action_shape = branched_action_space
self.action_lookup = self._create_lookup(self._action_shape)
self.action_space = spaces.Discrete(len(self.action_lookup))
@classmethod
def _create_lookup(self, branched_action_space):
"""
Creates a Dict that maps discrete actions (scalars) to branched actions (lists).
Each key in the Dict maps to one unique set of branched actions, and each value
contains the List of branched actions.
"""
possible_vals = [range(_num) for _num in branched_action_space]
all_actions = [list(_action) for _action in itertools.product(*possible_vals)]
# Dict should be faster than List for large action spaces
action_lookup = {
_scalar: _action for (_scalar, _action) in enumerate(all_actions)
}
return action_lookup
def lookup_action(self, action):
"""
Convert a scalar discrete action into a unique set of branched actions.
:param: action: A scalar value representing one of the discrete actions.
:return: The List containing the branched actions.
"""
return self.action_lookup[action]

110
ml-agents/mlagents/trainers/components/reward_signals/reward_signal.py


import logging
from typing import Any, Dict, List
from collections import namedtuple
import numpy as np
import abc
import tensorflow as tf
from mlagents.envs.brain import BrainInfo
from mlagents.trainers.trainer import UnityTrainerException
from mlagents.trainers.tf_policy import TFPolicy
from mlagents.trainers.models import LearningModel
from mlagents.trainers.buffer import Buffer
logger = logging.getLogger("mlagents.trainers")
RewardSignalResult = namedtuple(
"RewardSignalResult", ["scaled_reward", "unscaled_reward"]
)
class RewardSignal(abc.ABC):
def __init__(
self,
policy: TFPolicy,
policy_model: LearningModel,
strength: float,
gamma: float,
):
"""
Initializes a reward signal. At minimum, you must pass in the policy it is being applied to,
the reward strength, and the gamma (discount factor.)
:param policy: The Policy object (e.g. PPOPolicy) that this Reward Signal will apply to.
:param strength: The strength of the reward. The reward's raw value will be multiplied by this value.
:param gamma: The time discounting factor used for this reward.
:return: A RewardSignal object.
"""
class_name = self.__class__.__name__
short_name = class_name.replace("RewardSignal", "")
self.stat_name = f"Policy/{short_name} Reward"
self.value_name = f"Policy/{short_name} Value Estimate"
# Terminate discounted reward computation at Done. Can disable to mitigate positive bias in rewards with
# no natural end, e.g. GAIL or Curiosity
self.use_terminal_states = True
self.update_dict: Dict[str, tf.Tensor] = {}
self.gamma = gamma
self.policy = policy
self.policy_model = policy_model
self.strength = strength
self.stats_name_to_update_name: Dict[str, str] = {}
def evaluate(
self, current_info: BrainInfo, next_info: BrainInfo
) -> RewardSignalResult:
"""
Evaluates the reward for the agents present in current_info given the next_info
:param current_info: The current BrainInfo.
:param next_info: The BrainInfo from the next timestep.
:return: a RewardSignalResult of (scaled intrinsic reward, unscaled intrinsic reward) provided by the generator
"""
return RewardSignalResult(
self.strength * np.zeros(len(current_info.agents)),
np.zeros(len(current_info.agents)),
)
def evaluate_batch(self, mini_batch: Dict[str, np.array]) -> RewardSignalResult:
"""
Evaluates the reward for the data present in the Dict mini_batch. Note the distiction between
evaluate(), which takes in two BrainInfos. This reflects the different data formats (i.e. from the Buffer
vs. before being placed into the Buffer. Use this when evaluating a reward function drawn straight from a
Buffer.
:param mini_batch: A Dict of numpy arrays (the format used by our Buffer)
when drawing from the update buffer.
:return: a RewardSignalResult of (scaled intrinsic reward, unscaled intrinsic reward) provided by the generator
"""
mini_batch_len = len(next(iter(mini_batch.values())))
return RewardSignalResult(
self.strength * np.zeros(mini_batch_len), np.zeros(mini_batch_len)
)
def prepare_update(
self,
policy_model: LearningModel,
mini_batch: Dict[str, np.ndarray],
num_sequences: int,
) -> Dict[tf.Tensor, Any]:
"""
If the reward signal has an internal model (e.g. GAIL or Curiosity), get the feed_dict
needed to update the buffer..
:param update_buffer: An AgentBuffer that contains the live data from which to update.
:param n_sequences: The number of sequences in the training buffer.
:return: A dict that corresponds to the feed_dict needed for the update.
"""
return {}
@classmethod
def check_config(
cls, config_dict: Dict[str, Any], param_keys: List[str] = None
) -> None:
"""
Check the config dict, and throw an error if there are missing hyperparameters.
"""
param_keys = param_keys or []
for k in param_keys:
if k not in config_dict:
raise UnityTrainerException(
"The hyper-parameter {0} could not be found for {1}.".format(
k, cls.__name__
)
)
正在加载...
取消
保存