Fix flake8 import warnings (#2584)

We have been ignoring unused imports and star imports via flake8. These are both bad practice and grow over time without automated checking. This commit attempts to fix all existing import errors and add back the corresponding flake8 checks.
5 年前 · 67d754c5
--- a/gym-unity/gym_unity/init.py
+++ b/gym-unity/gym_unity/init.py
-from gym.envs.registration import register
--- a/gym-unity/gym_unity/envs/init.py
+++ b/gym-unity/gym_unity/envs/init.py
-from gym_unity.envs.unity_env import UnityEnv, UnityGymException
+import logging
+import itertools
+import gym
+import numpy as np
+from mlagents.envs.environment import UnityEnvironment
+from gym import error, spaces
+
+
+class UnityGymException(error.Error):
+    """
+    Any error related to the gym wrapper of ml-agents.
+    """
+
+    pass
+
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger("gym_unity")
+
+
+class UnityEnv(gym.Env):
+    """
+    Provides Gym wrapper for Unity Learning Environments.
+    Multi-agent environments use lists for object types, as done here:
+    https://github.com/openai/multiagent-particle-envs
+    """
+
+    def __init__(
+        self,
+        environment_filename: str,
+        worker_id: int = 0,
+        use_visual: bool = False,
+        uint8_visual: bool = False,
+        multiagent: bool = False,
+        flatten_branched: bool = False,
+        no_graphics: bool = False,
+        allow_multiple_visual_obs: bool = False,
+    ):
+        """
+        Environment initialization
+        :param environment_filename: The UnityEnvironment path or file to be wrapped in the gym.
+        :param worker_id: Worker number for environment.
+        :param use_visual: Whether to use visual observation or vector observation.
+        :param uint8_visual: Return visual observations as uint8 (0-255) matrices instead of float (0.0-1.0).
+        :param multiagent: Whether to run in multi-agent mode (lists of obs, reward, done).
+        :param flatten_branched: If True, turn branched discrete action spaces into a Discrete space rather than
+            MultiDiscrete.
+        :param no_graphics: Whether to run the Unity simulator in no-graphics mode
+        :param allow_multiple_visual_obs: If True, return a list of visual observations instead of only one.
+        """
+        self._env = UnityEnvironment(
+            environment_filename, worker_id, no_graphics=no_graphics
+        )
+        self.name = self._env.academy_name
+        self.visual_obs = None
+        self._current_state = None
+        self._n_agents = None
+        self._multiagent = multiagent
+        self._flattener = None
+        self.game_over = (
+            False
+        )  # Hidden flag used by Atari environments to determine if the game is over
+        self._allow_multiple_visual_obs = allow_multiple_visual_obs
+
+        # Check brain configuration
+        if len(self._env.brains) != 1:
+            raise UnityGymException(
+                "There can only be one brain in a UnityEnvironment "
+                "if it is wrapped in a gym."
+            )
+        if len(self._env.external_brain_names) <= 0:
+            raise UnityGymException(
+                "There are not any external brain in the UnityEnvironment"
+            )
+
+        self.brain_name = self._env.external_brain_names[0]
+        brain = self._env.brains[self.brain_name]
+
+        if use_visual and brain.number_visual_observations == 0:
+            raise UnityGymException(
+                "`use_visual` was set to True, however there are no"
+                " visual observations as part of this environment."
+            )
+        self.use_visual = brain.number_visual_observations >= 1 and use_visual
+
+        if not use_visual and uint8_visual:
+            logger.warning(
+                "`uint8_visual was set to true, but visual observations are not in use. "
+                "This setting will not have any effect."
+            )
+        else:
+            self.uint8_visual = uint8_visual
+
+        if brain.number_visual_observations > 1 and not self._allow_multiple_visual_obs:
+            logger.warning(
+                "The environment contains more than one visual observation. "
+                "You must define allow_multiple_visual_obs=True to received them all. "
+                "Otherwise, please note that only the first will be provided in the observation."
+            )
+
+        if brain.num_stacked_vector_observations != 1:
+            raise UnityGymException(
+                "There can only be one stacked vector observation in a UnityEnvironment "
+                "if it is wrapped in a gym."
+            )
+
+        # Check for number of agents in scene.
+        initial_info = self._env.reset()[self.brain_name]
+        self._check_agents(len(initial_info.agents))
+
+        # Set observation and action spaces
+        if brain.vector_action_space_type == "discrete":
+            if len(brain.vector_action_space_size) == 1:
+                self._action_space = spaces.Discrete(brain.vector_action_space_size[0])
+            else:
+                if flatten_branched:
+                    self._flattener = ActionFlattener(brain.vector_action_space_size)
+                    self._action_space = self._flattener.action_space
+                else:
+                    self._action_space = spaces.MultiDiscrete(
+                        brain.vector_action_space_size
+                    )
+
+        else:
+            if flatten_branched:
+                logger.warning(
+                    "The environment has a non-discrete action space. It will "
+                    "not be flattened."
+                )
+            high = np.array([1] * brain.vector_action_space_size[0])
+            self._action_space = spaces.Box(-high, high, dtype=np.float32)
+        high = np.array([np.inf] * brain.vector_observation_space_size)
+        self.action_meanings = brain.vector_action_descriptions
+        if self.use_visual:
+            if brain.camera_resolutions[0]["blackAndWhite"]:
+                depth = 1
+            else:
+                depth = 3
+            self._observation_space = spaces.Box(
+                0,
+                1,
+                dtype=np.float32,
+                shape=(
+                    brain.camera_resolutions[0]["height"],
+                    brain.camera_resolutions[0]["width"],
+                    depth,
+                ),
+            )
+        else:
+            self._observation_space = spaces.Box(-high, high, dtype=np.float32)
+
+    def reset(self):
+        """Resets the state of the environment and returns an initial observation.
+        In the case of multi-agent environments, this is a list.
+        Returns: observation (object/list): the initial observation of the
+            space.
+        """
+        info = self._env.reset()[self.brain_name]
+        n_agents = len(info.agents)
+        self._check_agents(n_agents)
+        self.game_over = False
+
+        if not self._multiagent:
+            obs, reward, done, info = self._single_step(info)
+        else:
+            obs, reward, done, info = self._multi_step(info)
+        return obs
+
+    def step(self, action):
+        """Run one timestep of the environment's dynamics. When end of
+        episode is reached, you are responsible for calling `reset()`
+        to reset this environment's state.
+        Accepts an action and returns a tuple (observation, reward, done, info).
+        In the case of multi-agent environments, these are lists.
+        Args:
+            action (object/list): an action provided by the environment
+        Returns:
+            observation (object/list): agent's observation of the current environment
+            reward (float/list) : amount of reward returned after previous action
+            done (boolean/list): whether the episode has ended.
+            info (dict): contains auxiliary diagnostic information, including BrainInfo.
+        """
+
+        # Use random actions for all other agents in environment.
+        if self._multiagent:
+            if not isinstance(action, list):
+                raise UnityGymException(
+                    "The environment was expecting `action` to be a list."
+                )
+            if len(action) != self._n_agents:
+                raise UnityGymException(
+                    "The environment was expecting a list of {} actions.".format(
+                        self._n_agents
+                    )
+                )
+            else:
+                if self._flattener is not None:
+                    # Action space is discrete and flattened - we expect a list of scalars
+                    action = [self._flattener.lookup_action(_act) for _act in action]
+                action = np.array(action)
+        else:
+            if self._flattener is not None:
+                # Translate action into list
+                action = self._flattener.lookup_action(action)
+
+        info = self._env.step(action)[self.brain_name]
+        n_agents = len(info.agents)
+        self._check_agents(n_agents)
+        self._current_state = info
+
+        if not self._multiagent:
+            obs, reward, done, info = self._single_step(info)
+            self.game_over = done
+        else:
+            obs, reward, done, info = self._multi_step(info)
+            self.game_over = all(done)
+        return obs, reward, done, info
+
+    def _single_step(self, info):
+        if self.use_visual:
+            visual_obs = info.visual_observations
+
+            if self._allow_multiple_visual_obs:
+                visual_obs_list = []
+                for obs in visual_obs:
+                    visual_obs_list.append(self._preprocess_single(obs[0]))
+                self.visual_obs = visual_obs_list
+            else:
+                self.visual_obs = self._preprocess_single(visual_obs[0][0])
+
+            default_observation = self.visual_obs
+        else:
+            default_observation = info.vector_observations[0, :]
+
+        return (
+            default_observation,
+            info.rewards[0],
+            info.local_done[0],
+            {"text_observation": info.text_observations[0], "brain_info": info},
+        )
+
+    def _preprocess_single(self, single_visual_obs):
+        if self.uint8_visual:
+            return (255.0 * single_visual_obs).astype(np.uint8)
+        else:
+            return single_visual_obs
+
+    def _multi_step(self, info):
+        if self.use_visual:
+            self.visual_obs = self._preprocess_multi(info.visual_observations)
+            default_observation = self.visual_obs
+        else:
+            default_observation = info.vector_observations
+        return (
+            list(default_observation),
+            info.rewards,
+            info.local_done,
+            {"text_observation": info.text_observations, "brain_info": info},
+        )
+
+    def _preprocess_multi(self, multiple_visual_obs):
+        if self.uint8_visual:
+            return [
+                (255.0 * _visual_obs).astype(np.uint8)
+                for _visual_obs in multiple_visual_obs
+            ]
+        else:
+            return multiple_visual_obs
+
+    def render(self, mode="rgb_array"):
+        return self.visual_obs
+
+    def close(self):
+        """Override _close in your subclass to perform any necessary cleanup.
+        Environments will automatically close() themselves when
+        garbage collected or when the program exits.
+        """
+        self._env.close()
+
+    def get_action_meanings(self):
+        return self.action_meanings
+
+    def seed(self, seed=None):
+        """Sets the seed for this env's random number generator(s).
+        Currently not implemented.
+        """
+        logger.warn("Could not seed environment %s", self.name)
+        return
+
+    def _check_agents(self, n_agents):
+        if not self._multiagent and n_agents > 1:
+            raise UnityGymException(
+                "The environment was launched as a single-agent environment, however"
+                "there is more than one agent in the scene."
+            )
+        elif self._multiagent and n_agents <= 1:
+            raise UnityGymException(
+                "The environment was launched as a mutli-agent environment, however"
+                "there is only one agent in the scene."
+            )
+        if self._n_agents is None:
+            self._n_agents = n_agents
+            logger.info("{} agents within environment.".format(n_agents))
+        elif self._n_agents != n_agents:
+            raise UnityGymException(
+                "The number of agents in the environment has changed since "
+                "initialization. This is not supported."
+            )
+
+    @property
+    def metadata(self):
+        return {"render.modes": ["rgb_array"]}
+
+    @property
+    def reward_range(self):
+        return -float("inf"), float("inf")
+
+    @property
+    def spec(self):
+        return None
+
+    @property
+    def action_space(self):
+        return self._action_space
+
+    @property
+    def observation_space(self):
+        return self._observation_space
+
+    @property
+    def number_agents(self):
+        return self._n_agents
+
+
+class ActionFlattener:
+    """
+    Flattens branched discrete action spaces into single-branch discrete action spaces.
+    """
+
+    def __init__(self, branched_action_space):
+        """
+        Initialize the flattener.
+        :param branched_action_space: A List containing the sizes of each branch of the action
+        space, e.g. [2,3,3] for three branches with size 2, 3, and 3 respectively.
+        """
+        self._action_shape = branched_action_space
+        self.action_lookup = self._create_lookup(self._action_shape)
+        self.action_space = spaces.Discrete(len(self.action_lookup))
+
+    @classmethod
+    def _create_lookup(self, branched_action_space):
+        """
+        Creates a Dict that maps discrete actions (scalars) to branched actions (lists).
+        Each key in the Dict maps to one unique set of branched actions, and each value
+        contains the List of branched actions.
+        """
+        possible_vals = [range(_num) for _num in branched_action_space]
+        all_actions = [list(_action) for _action in itertools.product(*possible_vals)]
+        # Dict should be faster than List for large action spaces
+        action_lookup = {
+            _scalar: _action for (_scalar, _action) in enumerate(all_actions)
+        }
+        return action_lookup
+
+    def lookup_action(self, action):
+        """
+        Convert a scalar discrete action into a unique set of branched actions.
+        :param: action: A scalar value representing one of the discrete actions.
+        :return: The List containing the branched actions.
+        """
+        return self.action_lookup[action]
--- a/gym-unity/gym_unity/tests/test_gym.py
+++ b/gym-unity/gym_unity/tests/test_gym.py
 from gym_unity.envs import UnityEnv, UnityGymException


-@mock.patch("gym_unity.envs.unity_env.UnityEnvironment")
+@mock.patch("gym_unity.envs.UnityEnvironment")
 def test_gym_wrapper(mock_env):
    mock_brain = create_mock_brainparams()
    mock_braininfo = create_mock_vector_braininfo()
    assert isinstance(info, dict)


-@mock.patch("gym_unity.envs.unity_env.UnityEnvironment")
+@mock.patch("gym_unity.envs.UnityEnvironment")
 def test_multi_agent(mock_env):
    mock_brain = create_mock_brainparams()
    mock_braininfo = create_mock_vector_braininfo(num_agents=2)
    assert isinstance(info, dict)


-@mock.patch("gym_unity.envs.unity_env.UnityEnvironment")
+@mock.patch("gym_unity.envs.UnityEnvironment")
 def test_branched_flatten(mock_env):
    mock_brain = create_mock_brainparams(
        vector_action_space_type="discrete", vector_action_space_size=[2, 2, 3]
--- a/ml-agents-envs/mlagents/envs/init.py
+++ b/ml-agents-envs/mlagents/envs/init.py
-from .brain import AllBrainInfo, BrainInfo, BrainParameters
-from .action_info import ActionInfo, ActionInfoOutputs
-from .policy import Policy
-from .environment import *
-from .exception import *
--- a/ml-agents-envs/mlagents/envs/base_unity_environment.py
+++ b/ml-agents-envs/mlagents/envs/base_unity_environment.py
 from abc import ABC, abstractmethod
 from typing import Dict

-from mlagents.envs import AllBrainInfo, BrainParameters
+from mlagents.envs.brain import AllBrainInfo, BrainParameters


 class BaseUnityEnvironment(ABC):
--- a/ml-agents-envs/mlagents/envs/communicator.py
+++ b/ml-agents-envs/mlagents/envs/communicator.py
 import logging
 from typing import Optional

-from .communicator_objects import UnityOutput, UnityInput
+from mlagents.envs.communicator_objects.unity_output_pb2 import UnityOutput
+from mlagents.envs.communicator_objects.unity_input_pb2 import UnityInput

 logger = logging.getLogger("mlagents.envs")

--- a/ml-agents-envs/mlagents/envs/communicator_objects/init.py
+++ b/ml-agents-envs/mlagents/envs/communicator_objects/init.py
-from .agent_action_proto_pb2 import *
-from .agent_info_proto_pb2 import *
-from .brain_parameters_proto_pb2 import *
-from .command_proto_pb2 import *
-from .custom_action_pb2 import *
-from .custom_observation_pb2 import *
-from .custom_reset_parameters_pb2 import *
-from .demonstration_meta_proto_pb2 import *
-from .engine_configuration_proto_pb2 import *
-from .environment_parameters_proto_pb2 import *
-from .header_pb2 import *
-from .resolution_proto_pb2 import *
-from .space_type_proto_pb2 import *
-from .unity_input_pb2 import *
-from .unity_message_pb2 import *
-from .unity_output_pb2 import *
-from .unity_rl_initialization_input_pb2 import *
-from .unity_rl_initialization_output_pb2 import *
-from .unity_rl_input_pb2 import *
-from .unity_rl_output_pb2 import *
-from .unity_to_external_pb2 import *
-from .unity_to_external_pb2_grpc import *
--- a/ml-agents-envs/mlagents/envs/env_manager.py
+++ b/ml-agents-envs/mlagents/envs/env_manager.py
 from abc import ABC, abstractmethod
 from typing import List, Dict, NamedTuple, Optional
-from mlagents.envs import AllBrainInfo, BrainParameters, Policy, ActionInfo
+from mlagents.envs.brain import AllBrainInfo, BrainParameters
+from mlagents.envs.policy import Policy
+from mlagents.envs.action_info import ActionInfo


 class EnvironmentStep(NamedTuple):
--- a/ml-agents-envs/mlagents/envs/environment.py
+++ b/ml-agents-envs/mlagents/envs/environment.py
 import numpy as np
 import os
 import subprocess
-from typing import *
+from typing import Dict, List, Optional

 from mlagents.envs.base_unity_environment import BaseUnityEnvironment
 from mlagents.envs.timers import timed, hierarchical_timer
    UnityTimeOutException,
 )

-from .communicator_objects import (
-    UnityRLInput,
-    UnityRLOutput,
-    AgentActionProto,
+from mlagents.envs.communicator_objects.unity_rl_input_pb2 import UnityRLInput
+from mlagents.envs.communicator_objects.unity_rl_output_pb2 import UnityRLOutput
+from mlagents.envs.communicator_objects.agent_action_proto_pb2 import AgentActionProto
+from mlagents.envs.communicator_objects.environment_parameters_proto_pb2 import (
+)
+from mlagents.envs.communicator_objects.unity_rl_initialization_input_pb2 import (
+)
+from mlagents.envs.communicator_objects.unity_rl_initialization_output_pb2 import (
-    UnityInput,
-    UnityOutput,
-    CustomResetParameters,
-    CustomAction,
+from mlagents.envs.communicator_objects.unity_input_pb2 import UnityInput
+from mlagents.envs.communicator_objects.custom_action_pb2 import CustomAction

 from .rpc_communicator import RpcCommunicator
 from sys import platform
--- a/ml-agents-envs/mlagents/envs/mock_communicator.py
+++ b/ml-agents-envs/mlagents/envs/mock_communicator.py
 from .communicator import Communicator
-from .communicator_objects import (
-    UnityOutput,
-    UnityInput,
-    ResolutionProto,
+from mlagents.envs.communicator_objects.unity_rl_output_pb2 import UnityRLOutput
+from mlagents.envs.communicator_objects.brain_parameters_proto_pb2 import (
+)
+from mlagents.envs.communicator_objects.unity_rl_initialization_output_pb2 import (
-    AgentInfoProto,
-    UnityRLOutput,
+from mlagents.envs.communicator_objects.unity_input_pb2 import UnityInput
+from mlagents.envs.communicator_objects.unity_output_pb2 import UnityOutput
+from mlagents.envs.communicator_objects.resolution_proto_pb2 import ResolutionProto
+from mlagents.envs.communicator_objects.agent_info_proto_pb2 import AgentInfoProto


 class MockCommunicator(Communicator):
--- a/ml-agents-envs/mlagents/envs/policy.py
+++ b/ml-agents-envs/mlagents/envs/policy.py
 from abc import ABC, abstractmethod

-from mlagents.envs import BrainInfo
-from mlagents.envs import ActionInfo
+from mlagents.envs.brain import BrainInfo
+from mlagents.envs.action_info import ActionInfo


 class Policy(ABC):
--- a/ml-agents-envs/mlagents/envs/rpc_communicator.py
+++ b/ml-agents-envs/mlagents/envs/rpc_communicator.py
 from concurrent.futures import ThreadPoolExecutor

 from .communicator import Communicator
-from .communicator_objects import (
+from mlagents.envs.communicator_objects.unity_to_external_pb2_grpc import (
-from .communicator_objects import UnityMessage, UnityInput, UnityOutput
+from mlagents.envs.communicator_objects.unity_message_pb2 import UnityMessage
+from mlagents.envs.communicator_objects.unity_input_pb2 import UnityInput
+from mlagents.envs.communicator_objects.unity_output_pb2 import UnityOutput
 from .exception import UnityTimeOutException, UnityWorkerInUseException

 logger = logging.getLogger("mlagents.envs")
--- a/ml-agents-envs/mlagents/envs/sampler_class.py
+++ b/ml-agents-envs/mlagents/envs/sampler_class.py
 import numpy as np
-from typing import *
-from functools import *
-from collections import OrderedDict
+from typing import Union, Optional, Type, List, Dict, Any
 from abc import ABC, abstractmethod

 from .exception import SamplerException
--- a/ml-agents-envs/mlagents/envs/simple_env_manager.py
+++ b/ml-agents-envs/mlagents/envs/simple_env_manager.py
 from mlagents.envs.base_unity_environment import BaseUnityEnvironment
 from mlagents.envs.env_manager import EnvManager, EnvironmentStep
 from mlagents.envs.timers import timed
-from mlagents.envs import ActionInfo, BrainParameters
+from mlagents.envs.action_info import ActionInfo
+from mlagents.envs.brain import BrainParameters


 class SimpleEnvManager(EnvManager):
--- a/ml-agents-envs/mlagents/envs/socket_communicator.py
+++ b/ml-agents-envs/mlagents/envs/socket_communicator.py
 from typing import Optional

 from .communicator import Communicator
-from .communicator_objects import UnityMessage, UnityOutput, UnityInput
+from mlagents.envs.communicator_objects.unity_message_pb2 import UnityMessage
+from mlagents.envs.communicator_objects.unity_output_pb2 import UnityOutput
+from mlagents.envs.communicator_objects.unity_input_pb2 import UnityInput
 from .exception import UnityTimeOutException


--- a/ml-agents-envs/mlagents/envs/subprocess_env_manager.py
+++ b/ml-agents-envs/mlagents/envs/subprocess_env_manager.py
-from typing import *
+from typing import Dict, NamedTuple, List, Any, Optional, Callable, Set
-from mlagents.envs import UnityEnvironment
+from mlagents.envs.environment import UnityEnvironment
 from mlagents.envs.exception import UnityCommunicationException
 from multiprocessing import Process, Pipe, Queue
 from multiprocessing.connection import Connection
    reset_timers,
    get_timer_root,
 )
-from mlagents.envs import AllBrainInfo, BrainParameters, ActionInfo
+from mlagents.envs.brain import AllBrainInfo, BrainParameters
+from mlagents.envs.action_info import ActionInfo


 class EnvironmentCommand(NamedTuple):
--- a/ml-agents-envs/mlagents/envs/tests/test_envs.py
+++ b/ml-agents-envs/mlagents/envs/tests/test_envs.py

 import numpy as np

-from mlagents.envs import (
-    UnityEnvironment,
-    UnityEnvironmentException,
-    UnityActionException,
-    BrainInfo,
-)
+from mlagents.envs.environment import UnityEnvironment
+from mlagents.envs.exception import UnityEnvironmentException, UnityActionException
+from mlagents.envs.brain import BrainInfo
-@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
+@mock.patch("mlagents.envs.environment.UnityEnvironment.get_communicator")
-@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
-@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
+@mock.patch("mlagents.envs.environment.UnityEnvironment.executable_launcher")
+@mock.patch("mlagents.envs.environment.UnityEnvironment.get_communicator")
 def test_initialization(mock_communicator, mock_launcher):
    mock_communicator.return_value = MockCommunicator(
        discrete_action=False, visual_inputs=0
    env.close()


-@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
-@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
+@mock.patch("mlagents.envs.environment.UnityEnvironment.executable_launcher")
+@mock.patch("mlagents.envs.environment.UnityEnvironment.get_communicator")
 def test_reset(mock_communicator, mock_launcher):
    mock_communicator.return_value = MockCommunicator(
        discrete_action=False, visual_inputs=0
    )


-@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
-@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
+@mock.patch("mlagents.envs.environment.UnityEnvironment.executable_launcher")
+@mock.patch("mlagents.envs.environment.UnityEnvironment.get_communicator")
 def test_step(mock_communicator, mock_launcher):
    mock_communicator.return_value = MockCommunicator(
        discrete_action=False, visual_inputs=0
    assert brain_info["RealFakeBrain"].local_done[2]


-@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
-@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
+@mock.patch("mlagents.envs.environment.UnityEnvironment.executable_launcher")
+@mock.patch("mlagents.envs.environment.UnityEnvironment.get_communicator")
 def test_close(mock_communicator, mock_launcher):
    comm = MockCommunicator(discrete_action=False, visual_inputs=0)
    mock_communicator.return_value = comm
--- a/ml-agents-envs/mlagents/envs/tests/test_rpc_communicator.py
+++ b/ml-agents-envs/mlagents/envs/tests/test_rpc_communicator.py
 import pytest

-from mlagents.envs import RpcCommunicator
-from mlagents.envs import UnityWorkerInUseException
+from mlagents.envs.rpc_communicator import RpcCommunicator
+from mlagents.envs.exception import UnityWorkerInUseException


 def test_rpc_communicator_checks_port_on_create():
--- a/ml-agents-envs/mlagents/envs/tests/test_sampler_class.py
+++ b/ml-agents-envs/mlagents/envs/tests/test_sampler_class.py
-from math import isclose
 import pytest

 from mlagents.envs.sampler_class import SamplerManager
--- a/ml-agents-envs/mlagents/envs/tests/test_subprocess_env_manager.py
+++ b/ml-agents-envs/mlagents/envs/tests/test_subprocess_env_manager.py
 import unittest.mock as mock
 from unittest.mock import Mock, MagicMock
 import unittest
-import cloudpickle
-    EnvironmentCommand,
-    worker,
    StepResponse,
 )
 from mlagents.envs.base_unity_environment import BaseUnityEnvironment
--- a/ml-agents/mlagents/trainers/init.py
+++ b/ml-agents/mlagents/trainers/init.py
-from .buffer import *
-from .curriculum import *
-from .meta_curriculum import *
-from .models import *
-from .trainer_metrics import *
-from .trainer import *
-from .tf_policy import *
-from .trainer_controller import *
-from .bc.models import *
-from .bc.offline_trainer import *
-from .bc.online_trainer import *
-from .bc.policy import *
-from .ppo.models import *
-from .ppo.trainer import *
-from .ppo.policy import *
-from .sac.models import *
-from .sac.trainer import *
-from .sac.policy import *
-from .exception import *
-from .demo_loader import *
--- a/ml-agents/mlagents/trainers/bc/init.py
+++ b/ml-agents/mlagents/trainers/bc/init.py
-from .models import *
-from .online_trainer import *
-from .offline_trainer import *
-from .policy import *
--- a/ml-agents/mlagents/trainers/bc/online_trainer.py
+++ b/ml-agents/mlagents/trainers/bc/online_trainer.py
 import logging
 import numpy as np

-from mlagents.envs import AllBrainInfo
-from mlagents.trainers import ActionInfoOutputs
+from mlagents.envs.brain import AllBrainInfo
+from mlagents.envs.action_info import ActionInfoOutputs
 from mlagents.trainers.bc.trainer import BCTrainer

 logger = logging.getLogger("mlagents.trainers")
--- a/ml-agents/mlagents/trainers/bc/trainer.py
+++ b/ml-agents/mlagents/trainers/bc/trainer.py
 import logging

 import numpy as np
-import tensorflow as tf
-from mlagents.envs import AllBrainInfo
-from mlagents.trainers import ActionInfoOutputs
+from mlagents.envs.brain import AllBrainInfo
+from mlagents.envs.action_info import ActionInfoOutputs
 from mlagents.trainers.bc.policy import BCPolicy
 from mlagents.trainers.buffer import Buffer
 from mlagents.trainers.trainer import Trainer
--- a/ml-agents/mlagents/trainers/buffer.py
+++ b/ml-agents/mlagents/trainers/buffer.py
-import random
-from collections import defaultdict
-
 import numpy as np
 import h5py

--- a/ml-agents/mlagents/trainers/components/bc/init.py
+++ b/ml-agents/mlagents/trainers/components/bc/init.py
-from .module import BCModule
--- a/ml-agents/mlagents/trainers/components/bc/model.py
+++ b/ml-agents/mlagents/trainers/components/bc/model.py
 import tensorflow as tf
-import numpy as np
 from mlagents.trainers.models import LearningModel


--- a/ml-agents/mlagents/trainers/components/reward_signals/init.py
+++ b/ml-agents/mlagents/trainers/components/reward_signals/init.py
-from .reward_signal import *
+import logging
+from typing import Any, Dict, List
+from collections import namedtuple
+import numpy as np
+import abc
+
+import tensorflow as tf
+
+from mlagents.envs.brain import BrainInfo
+from mlagents.trainers.trainer import UnityTrainerException
+from mlagents.trainers.tf_policy import TFPolicy
+from mlagents.trainers.models import LearningModel
+
+logger = logging.getLogger("mlagents.trainers")
+
+RewardSignalResult = namedtuple(
+    "RewardSignalResult", ["scaled_reward", "unscaled_reward"]
+)
+
+
+class RewardSignal(abc.ABC):
+    def __init__(
+        self,
+        policy: TFPolicy,
+        policy_model: LearningModel,
+        strength: float,
+        gamma: float,
+    ):
+        """
+        Initializes a reward signal. At minimum, you must pass in the policy it is being applied to,
+        the reward strength, and the gamma (discount factor.)
+        :param policy: The Policy object (e.g. PPOPolicy) that this Reward Signal will apply to.
+        :param strength: The strength of the reward. The reward's raw value will be multiplied by this value.
+        :param gamma: The time discounting factor used for this reward.
+        :return: A RewardSignal object.
+        """
+        class_name = self.__class__.__name__
+        short_name = class_name.replace("RewardSignal", "")
+        self.stat_name = f"Policy/{short_name} Reward"
+        self.value_name = f"Policy/{short_name} Value Estimate"
+        # Terminate discounted reward computation at Done. Can disable to mitigate positive bias in rewards with
+        # no natural end, e.g. GAIL or Curiosity
+        self.use_terminal_states = True
+        self.update_dict: Dict[str, tf.Tensor] = {}
+        self.gamma = gamma
+        self.policy = policy
+        self.policy_model = policy_model
+        self.strength = strength
+        self.stats_name_to_update_name: Dict[str, str] = {}
+
+    def evaluate(
+        self, current_info: BrainInfo, next_info: BrainInfo
+    ) -> RewardSignalResult:
+        """
+        Evaluates the reward for the agents present in current_info given the next_info
+        :param current_info: The current BrainInfo.
+        :param next_info: The BrainInfo from the next timestep.
+        :return: a RewardSignalResult of (scaled intrinsic reward, unscaled intrinsic reward) provided by the generator
+        """
+        return RewardSignalResult(
+            self.strength * np.zeros(len(current_info.agents)),
+            np.zeros(len(current_info.agents)),
+        )
+
+    def evaluate_batch(self, mini_batch: Dict[str, np.array]) -> RewardSignalResult:
+        """
+        Evaluates the reward for the data present in the Dict mini_batch. Note the distiction between
+        evaluate(), which takes in two BrainInfos. This reflects the different data formats (i.e. from the Buffer
+        vs. before being placed into the Buffer. Use this when evaluating a reward function drawn straight from a
+        Buffer.
+        :param mini_batch: A Dict of numpy arrays (the format used by our Buffer)
+            when drawing from the update buffer.
+        :return: a RewardSignalResult of (scaled intrinsic reward, unscaled intrinsic reward) provided by the generator
+        """
+        mini_batch_len = len(next(iter(mini_batch.values())))
+        return RewardSignalResult(
+            self.strength * np.zeros(mini_batch_len), np.zeros(mini_batch_len)
+        )
+
+    def prepare_update(
+        self,
+        policy_model: LearningModel,
+        mini_batch: Dict[str, np.ndarray],
+        num_sequences: int,
+    ) -> Dict[tf.Tensor, Any]:
+        """
+        If the reward signal has an internal model (e.g. GAIL or Curiosity), get the feed_dict
+        needed to update the buffer..
+        :param update_buffer: An AgentBuffer that contains the live data from which to update.
+        :param n_sequences: The number of sequences in the training buffer.
+        :return: A dict that corresponds to the feed_dict needed for the update.
+        """
+        return {}
+
+    @classmethod
+    def check_config(
+        cls, config_dict: Dict[str, Any], param_keys: List[str] = None
+    ) -> None:
+        """
+        Check the config dict, and throw an error if there are missing hyperparameters.
+        """
+        param_keys = param_keys or []
+        for k in param_keys:
+            if k not in config_dict:
+                raise UnityTrainerException(
+                    "The hyper-parameter {0} could not be found for {1}.".format(
+                        k, cls.__name__
+                    )
+                )
--- a/ml-agents/mlagents/trainers/components/reward_signals/curiosity/init.py
+++ b/ml-agents/mlagents/trainers/components/reward_signals/curiosity/init.py
-from .signal import CuriosityRewardSignal
--- a/ml-agents/mlagents/trainers/components/reward_signals/curiosity/signal.py
+++ b/ml-agents/mlagents/trainers/components/reward_signals/curiosity/signal.py
 import tensorflow as tf
 from mlagents.envs.brain import BrainInfo

-from mlagents.trainers.buffer import Buffer
 from mlagents.trainers.components.reward_signals import RewardSignal, RewardSignalResult
 from mlagents.trainers.components.reward_signals.curiosity.model import CuriosityModel
 from mlagents.trainers.tf_policy import TFPolicy
--- a/ml-agents/mlagents/trainers/components/reward_signals/extrinsic/init.py
+++ b/ml-agents/mlagents/trainers/components/reward_signals/extrinsic/init.py
-from .signal import ExtrinsicRewardSignal
--- a/ml-agents/mlagents/trainers/components/reward_signals/extrinsic/signal.py
+++ b/ml-agents/mlagents/trainers/components/reward_signals/extrinsic/signal.py
 import numpy as np
 from mlagents.envs.brain import BrainInfo

-from mlagents.trainers.buffer import Buffer
 from mlagents.trainers.components.reward_signals import RewardSignal, RewardSignalResult
 from mlagents.trainers.tf_policy import TFPolicy
 from mlagents.trainers.models import LearningModel
--- a/ml-agents/mlagents/trainers/components/reward_signals/gail/init.py
+++ b/ml-agents/mlagents/trainers/components/reward_signals/gail/init.py
-from .signal import GAILRewardSignal
--- a/ml-agents/mlagents/trainers/components/reward_signals/gail/signal.py
+++ b/ml-agents/mlagents/trainers/components/reward_signals/gail/signal.py
 import tensorflow as tf

 from mlagents.envs.brain import BrainInfo
-from mlagents.trainers.buffer import Buffer
 from mlagents.trainers.components.reward_signals import RewardSignal, RewardSignalResult
 from mlagents.trainers.tf_policy import TFPolicy
 from mlagents.trainers.models import LearningModel
--- a/ml-agents/mlagents/trainers/components/reward_signals/reward_signal_factory.py
+++ b/ml-agents/mlagents/trainers/components/reward_signals/reward_signal_factory.py
 from typing import Any, Dict, Type

 from mlagents.trainers.trainer import UnityTrainerException
-from mlagents.trainers.components.reward_signals.reward_signal import RewardSignal
+from mlagents.trainers.components.reward_signals import RewardSignal
 from mlagents.trainers.components.reward_signals.extrinsic.signal import (
    ExtrinsicRewardSignal,
 )
--- a/ml-agents/mlagents/trainers/demo_loader.py
+++ b/ml-agents/mlagents/trainers/demo_loader.py
 from typing import List, Tuple
 from mlagents.trainers.buffer import Buffer
 from mlagents.envs.brain import BrainParameters, BrainInfo
-from mlagents.envs.communicator_objects import (
-    AgentInfoProto,
+from mlagents.envs.communicator_objects.agent_info_proto_pb2 import AgentInfoProto
+from mlagents.envs.communicator_objects.brain_parameters_proto_pb2 import (
+)
+from mlagents.envs.communicator_objects.demonstration_meta_proto_pb2 import (
    DemonstrationMetaProto,
 )
 from google.protobuf.internal.decoder import _DecodeVarint32  # type: ignore
--- a/ml-agents/mlagents/trainers/learn.py
+++ b/ml-agents/mlagents/trainers/learn.py

 from mlagents.trainers.trainer_controller import TrainerController
 from mlagents.trainers.exception import TrainerError
-from mlagents.trainers import MetaCurriculumError, MetaCurriculum
+from mlagents.trainers.meta_curriculum import MetaCurriculumError, MetaCurriculum
-from mlagents.envs import UnityEnvironment
+from mlagents.envs.environment import UnityEnvironment
 from mlagents.envs.sampler_class import SamplerManager
 from mlagents.envs.exception import UnityEnvironmentException, SamplerException
 from mlagents.envs.base_unity_environment import BaseUnityEnvironment
--- a/ml-agents/mlagents/trainers/ppo/init.py
+++ b/ml-agents/mlagents/trainers/ppo/init.py
-from .models import *
-from .trainer import *
-from .policy import *
--- a/ml-agents/mlagents/trainers/ppo/multi_gpu_policy.py
+++ b/ml-agents/mlagents/trainers/ppo/multi_gpu_policy.py
 import logging
-import numpy as np

 import tensorflow as tf
 from tensorflow.python.client import device_lib
 from mlagents.trainers.components.reward_signals.reward_signal_factory import (
    create_reward_signal,
 )
-from mlagents.trainers.components.bc.module import BCModule

 # Variable scope in which created variables will be placed under
 TOWER_SCOPE_NAME = "tower"
--- a/ml-agents/mlagents/trainers/ppo/policy.py
+++ b/ml-agents/mlagents/trainers/ppo/policy.py
 import tensorflow as tf

 from mlagents.envs.timers import timed
-from mlagents.trainers import BrainInfo, ActionInfo
+from mlagents.envs.brain import BrainInfo
 from mlagents.trainers.models import EncoderType, LearningRateSchedule
 from mlagents.trainers.ppo.models import PPOModel
 from mlagents.trainers.tf_policy import TFPolicy
--- a/ml-agents/mlagents/trainers/ppo/trainer.py
+++ b/ml-agents/mlagents/trainers/ppo/trainer.py

 import logging
 from collections import defaultdict
-from typing import List, Any, Dict
+from typing import Dict
-from mlagents.envs import AllBrainInfo, BrainInfo
-from mlagents.trainers.buffer import Buffer
+from mlagents.envs.brain import AllBrainInfo
-from mlagents.trainers.trainer import UnityTrainerException
-from mlagents.trainers.components.reward_signals import RewardSignalResult
 from mlagents.envs.action_info import ActionInfoOutputs

 logger = logging.getLogger("mlagents.trainers")
--- a/ml-agents/mlagents/trainers/rl_trainer.py
+++ b/ml-agents/mlagents/trainers/rl_trainer.py
 # # Unity ML-Agents Toolkit
 import logging
-from typing import Dict, List, Deque, Any, Optional, NamedTuple
-import os
-import tensorflow as tf
+from typing import Dict, List, Any, NamedTuple
-from collections import deque, defaultdict
-from mlagents.envs import UnityException, AllBrainInfo, ActionInfoOutputs, BrainInfo
+from mlagents.envs.brain import AllBrainInfo, BrainInfo
+from mlagents.envs.action_info import ActionInfoOutputs
-from mlagents.trainers.tf_policy import Policy
-from mlagents.trainers.components.reward_signals.reward_signal import RewardSignalResult
-from mlagents.envs import BrainParameters
+from mlagents.trainers.components.reward_signals import RewardSignalResult

 LOGGER = logging.getLogger("mlagents.trainers")

--- a/ml-agents/mlagents/trainers/sac/init.py
+++ b/ml-agents/mlagents/trainers/sac/init.py
-from .models import *
-from .trainer import *
-from .policy import *
--- a/ml-agents/mlagents/trainers/sac/policy.py
+++ b/ml-agents/mlagents/trainers/sac/policy.py
 import logging
-from typing import Dict, List, Any
+from typing import Dict, Any
-from mlagents.trainers import BrainInfo, ActionInfo, BrainParameters
+from mlagents.envs.brain import BrainInfo, BrainParameters
 from mlagents.trainers.models import EncoderType, LearningRateSchedule
 from mlagents.trainers.sac.models import SACModel
 from mlagents.trainers.tf_policy import TFPolicy
-from mlagents.trainers.components.reward_signals.reward_signal import RewardSignal
-from mlagents.trainers.components.bc import BCModule
+from mlagents.trainers.components.reward_signals import RewardSignal
+from mlagents.trainers.components.bc.module import BCModule

 logger = logging.getLogger("mlagents.trainers")

--- a/ml-agents/mlagents/trainers/sac/trainer.py
+++ b/ml-agents/mlagents/trainers/sac/trainer.py
 # and implemented in https://github.com/hill-a/stable-baselines

 import logging
-from collections import deque, defaultdict
-from typing import List, Any, Dict
+from collections import defaultdict
+from typing import List, Dict
-import tensorflow as tf
-from mlagents.envs import AllBrainInfo, BrainInfo
+from mlagents.envs.brain import AllBrainInfo
-from mlagents.envs.timers import timed, hierarchical_timer
+from mlagents.envs.timers import timed
-from mlagents.trainers.trainer import UnityTrainerException
-from mlagents.trainers.components.reward_signals import RewardSignalResult


 LOGGER = logging.getLogger("mlagents.trainers")
--- a/ml-agents/mlagents/trainers/tests/mock_brain.py
+++ b/ml-agents/mlagents/trainers/tests/mock_brain.py
 import unittest.mock as mock
-import pytest
 import numpy as np

 from mlagents.trainers.buffer import Buffer
--- a/ml-agents/mlagents/trainers/tests/test_barracuda_converter.py
+++ b/ml-agents/mlagents/trainers/tests/test_barracuda_converter.py
-import unittest.mock as mock
-import pytest
 import os
 import tempfile

--- a/ml-agents/mlagents/trainers/tests/test_bc.py
+++ b/ml-agents/mlagents/trainers/tests/test_bc.py
 import mlagents.trainers.tests.mock_brain as mb
 from mlagents.trainers.bc.policy import BCPolicy
 from mlagents.trainers.bc.offline_trainer import BCTrainer
-from mlagents.envs import UnityEnvironment
+from mlagents.envs.environment import UnityEnvironment
 from mlagents.envs.mock_communicator import MockCommunicator


        assert trainer.cumulative_rewards[agent_id] == 0


-@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
-@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
+@mock.patch("mlagents.envs.environment.UnityEnvironment.executable_launcher")
+@mock.patch("mlagents.envs.environment.UnityEnvironment.get_communicator")
 def test_bc_policy_evaluate(mock_communicator, mock_launcher, dummy_config):
    tf.reset_default_graph()
    mock_communicator.return_value = MockCommunicator(
    env.close()


-@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
-@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
+@mock.patch("mlagents.envs.environment.UnityEnvironment.executable_launcher")
+@mock.patch("mlagents.envs.environment.UnityEnvironment.get_communicator")
 def test_cc_bc_model(mock_communicator, mock_launcher):
    tf.reset_default_graph()
    with tf.Session() as sess:
            env.close()


-@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
-@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
+@mock.patch("mlagents.envs.environment.UnityEnvironment.executable_launcher")
+@mock.patch("mlagents.envs.environment.UnityEnvironment.get_communicator")
 def test_dc_bc_model(mock_communicator, mock_launcher):
    tf.reset_default_graph()
    with tf.Session() as sess:
            env.close()


-@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
-@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
+@mock.patch("mlagents.envs.environment.UnityEnvironment.executable_launcher")
+@mock.patch("mlagents.envs.environment.UnityEnvironment.get_communicator")
 def test_visual_dc_bc_model(mock_communicator, mock_launcher):
    tf.reset_default_graph()
    with tf.Session() as sess:
            env.close()


-@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
-@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
+@mock.patch("mlagents.envs.environment.UnityEnvironment.executable_launcher")
+@mock.patch("mlagents.envs.environment.UnityEnvironment.get_communicator")
 def test_visual_cc_bc_model(mock_communicator, mock_launcher):
    tf.reset_default_graph()
    with tf.Session() as sess:
--- a/ml-agents/mlagents/trainers/tests/test_bcmodule.py
+++ b/ml-agents/mlagents/trainers/tests/test_bcmodule.py


 # Test default values
-@mock.patch("mlagents.envs.UnityEnvironment")
+@mock.patch("mlagents.envs.environment.UnityEnvironment")
 def test_bcmodule_defaults(mock_env):
    # See if default values match
    mock_brain = mb.create_mock_3dball_brain()
@pytest.mark.parametrize(
    "trainer_config", [ppo_dummy_config(), sac_dummy_config()], ids=["ppo", "sac"]
 )
-@mock.patch("mlagents.envs.UnityEnvironment")
+@mock.patch("mlagents.envs.environment.UnityEnvironment")
 def test_bcmodule_update(mock_env, trainer_config):
    mock_brain = mb.create_mock_3dball_brain()
    env, policy = create_policy_with_bc_mock(
@pytest.mark.parametrize(
    "trainer_config", [ppo_dummy_config(), sac_dummy_config()], ids=["ppo", "sac"]
 )
-@mock.patch("mlagents.envs.UnityEnvironment")
+@mock.patch("mlagents.envs.environment.UnityEnvironment")
 def test_bcmodule_rnn_update(mock_env, trainer_config):
    mock_brain = mb.create_mock_3dball_brain()
    env, policy = create_policy_with_bc_mock(
@pytest.mark.parametrize(
    "trainer_config", [ppo_dummy_config(), sac_dummy_config()], ids=["ppo", "sac"]
 )
-@mock.patch("mlagents.envs.UnityEnvironment")
+@mock.patch("mlagents.envs.environment.UnityEnvironment")
 def test_bcmodule_dc_visual_update(mock_env, trainer_config):
    mock_brain = mb.create_mock_banana_brain()
    env, policy = create_policy_with_bc_mock(
@pytest.mark.parametrize(
    "trainer_config", [ppo_dummy_config(), sac_dummy_config()], ids=["ppo", "sac"]
 )
-@mock.patch("mlagents.envs.UnityEnvironment")
+@mock.patch("mlagents.envs.environment.UnityEnvironment")
 def test_bcmodule_rnn_dc_update(mock_env, trainer_config):
    mock_brain = mb.create_mock_banana_brain()
    env, policy = create_policy_with_bc_mock(
--- a/ml-agents/mlagents/trainers/tests/test_curriculum.py
+++ b/ml-agents/mlagents/trainers/tests/test_curriculum.py
 import pytest
-import json
-from mlagents.trainers import Curriculum
+from mlagents.trainers.curriculum import Curriculum


 dummy_curriculum_json_str = """
--- a/ml-agents/mlagents/trainers/tests/test_demo_loader.py
+++ b/ml-agents/mlagents/trainers/tests/test_demo_loader.py
-import unittest.mock as mock
-import pytest
 import os

 from mlagents.trainers.demo_loader import load_demonstration, make_demo_buffer
--- a/ml-agents/mlagents/trainers/tests/test_learn.py
+++ b/ml-agents/mlagents/trainers/tests/test_learn.py
 import pytest
-from unittest.mock import *
-from mlagents.trainers import learn, TrainerController
+from unittest.mock import MagicMock, patch
+from mlagents.trainers import learn
+from mlagents.trainers.trainer_controller import TrainerController


@pytest.fixture
--- a/ml-agents/mlagents/trainers/tests/test_meta_curriculum.py
+++ b/ml-agents/mlagents/trainers/tests/test_meta_curriculum.py
 import pytest
-from unittest.mock import patch, call, Mock
+from unittest.mock import patch, call

 from mlagents.trainers.meta_curriculum import MetaCurriculum
 from mlagents.trainers.exception import MetaCurriculumError
    return {"Brain1": 7, "Brain2": 8}


-@patch("mlagents.trainers.Curriculum.get_config", return_value={})
-@patch("mlagents.trainers.Curriculum.__init__", return_value=None)
+@patch("mlagents.trainers.curriculum.Curriculum.get_config", return_value={})
+@patch("mlagents.trainers.curriculum.Curriculum.__init__", return_value=None)
@patch("os.listdir", return_value=["Brain1.json", "Brain2.json"])
 def test_init_meta_curriculum_happy_path(
    listdir, mock_curriculum_init, mock_curriculum_get_config, default_reset_parameters
        MetaCurriculum("test/", default_reset_parameters)


-@patch("mlagents.trainers.Curriculum")
-@patch("mlagents.trainers.Curriculum")
+@patch("mlagents.trainers.curriculum.Curriculum")
+@patch("mlagents.trainers.curriculum.Curriculum")
 def test_set_lesson_nums(curriculum_a, curriculum_b):
    meta_curriculum = MetaCurriculumTest(
        {"Brain1": curriculum_a, "Brain2": curriculum_b}
    assert curriculum_b.lesson_num == 3


-@patch("mlagents.trainers.Curriculum")
-@patch("mlagents.trainers.Curriculum")
+@patch("mlagents.trainers.curriculum.Curriculum")
+@patch("mlagents.trainers.curriculum.Curriculum")
 def test_increment_lessons(curriculum_a, curriculum_b, measure_vals):
    meta_curriculum = MetaCurriculumTest(
        {"Brain1": curriculum_a, "Brain2": curriculum_b}
    curriculum_b.increment_lesson.assert_called_with(0.3)


-@patch("mlagents.trainers.Curriculum")
-@patch("mlagents.trainers.Curriculum")
+@patch("mlagents.trainers.curriculum.Curriculum")
+@patch("mlagents.trainers.curriculum.Curriculum")
 def test_increment_lessons_with_reward_buff_sizes(
    curriculum_a, curriculum_b, measure_vals, reward_buff_sizes
 ):
    curriculum_b.increment_lesson.assert_not_called()


-@patch("mlagents.trainers.Curriculum")
-@patch("mlagents.trainers.Curriculum")
+@patch("mlagents.trainers.curriculum.Curriculum")
+@patch("mlagents.trainers.curriculum.Curriculum")
 def test_set_all_curriculums_to_lesson_num(curriculum_a, curriculum_b):
    meta_curriculum = MetaCurriculumTest(
        {"Brain1": curriculum_a, "Brain2": curriculum_b}
    assert curriculum_b.lesson_num == 2


-@patch("mlagents.trainers.Curriculum")
-@patch("mlagents.trainers.Curriculum")
+@patch("mlagents.trainers.curriculum.Curriculum")
+@patch("mlagents.trainers.curriculum.Curriculum")
 def test_get_config(
    curriculum_a, curriculum_b, default_reset_parameters, more_reset_parameters
 ):
--- a/ml-agents/mlagents/trainers/tests/test_multigpu.py
+++ b/ml-agents/mlagents/trainers/tests/test_multigpu.py
 import unittest.mock as mock
 import pytest

-import numpy as np
-from mlagents.trainers.ppo.trainer import PPOTrainer
-from mlagents.trainers.ppo.multi_gpu_policy import MultiGpuPPOPolicy, get_devices
-from mlagents.envs import UnityEnvironment, BrainParameters
-from mlagents.envs.mock_communicator import MockCommunicator
+from mlagents.trainers.ppo.multi_gpu_policy import MultiGpuPPOPolicy
 from mlagents.trainers.tests.mock_brain import create_mock_brainparams


--- a/ml-agents/mlagents/trainers/tests/test_policy.py
+++ b/ml-agents/mlagents/trainers/tests/test_policy.py
-from mlagents.trainers.tf_policy import *
+from mlagents.trainers.tf_policy import TFPolicy
+from mlagents.envs.brain import BrainInfo
+from mlagents.envs.action_info import ActionInfo
+import numpy as np


 def basic_mock_brain():
--- a/ml-agents/mlagents/trainers/tests/test_ppo.py
+++ b/ml-agents/mlagents/trainers/tests/test_ppo.py
 from mlagents.trainers.ppo.policy import PPOPolicy
 from mlagents.trainers.rl_trainer import AllRewardsOutput
 from mlagents.trainers.components.reward_signals import RewardSignalResult
-from mlagents.envs import UnityEnvironment, BrainParameters
+from mlagents.envs.brain import BrainParameters
+from mlagents.envs.environment import UnityEnvironment
 from mlagents.envs.mock_communicator import MockCommunicator


    )


-@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
-@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
+@mock.patch("mlagents.envs.environment.UnityEnvironment.executable_launcher")
+@mock.patch("mlagents.envs.environment.UnityEnvironment.get_communicator")
 def test_ppo_policy_evaluate(mock_communicator, mock_launcher, dummy_config):
    tf.reset_default_graph()
    mock_communicator.return_value = MockCommunicator(
    env.close()


-@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
-@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
+@mock.patch("mlagents.envs.environment.UnityEnvironment.executable_launcher")
+@mock.patch("mlagents.envs.environment.UnityEnvironment.get_communicator")
 def test_ppo_get_value_estimates(mock_communicator, mock_launcher, dummy_config):
    tf.reset_default_graph()
    mock_communicator.return_value = MockCommunicator(
    env.close()


-@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
-@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
+@mock.patch("mlagents.envs.environment.UnityEnvironment.executable_launcher")
+@mock.patch("mlagents.envs.environment.UnityEnvironment.get_communicator")
 def test_ppo_model_cc_vector(mock_communicator, mock_launcher):
    tf.reset_default_graph()
    with tf.Session() as sess:
            env.close()


-@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
-@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
+@mock.patch("mlagents.envs.environment.UnityEnvironment.executable_launcher")
+@mock.patch("mlagents.envs.environment.UnityEnvironment.get_communicator")
 def test_ppo_model_cc_visual(mock_communicator, mock_launcher):
    tf.reset_default_graph()
    with tf.Session() as sess:
            env.close()


-@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
-@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
+@mock.patch("mlagents.envs.environment.UnityEnvironment.executable_launcher")
+@mock.patch("mlagents.envs.environment.UnityEnvironment.get_communicator")
 def test_ppo_model_dc_visual(mock_communicator, mock_launcher):
    tf.reset_default_graph()
    with tf.Session() as sess:
            env.close()


-@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
-@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
+@mock.patch("mlagents.envs.environment.UnityEnvironment.executable_launcher")
+@mock.patch("mlagents.envs.environment.UnityEnvironment.get_communicator")
 def test_ppo_model_dc_vector(mock_communicator, mock_launcher):
    tf.reset_default_graph()
    with tf.Session() as sess:
            env.close()


-@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
-@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
+@mock.patch("mlagents.envs.environment.UnityEnvironment.executable_launcher")
+@mock.patch("mlagents.envs.environment.UnityEnvironment.get_communicator")
 def test_ppo_model_dc_vector_rnn(mock_communicator, mock_launcher):
    tf.reset_default_graph()
    with tf.Session() as sess:
            env.close()


-@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
-@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
+@mock.patch("mlagents.envs.environment.UnityEnvironment.executable_launcher")
+@mock.patch("mlagents.envs.environment.UnityEnvironment.get_communicator")
 def test_ppo_model_cc_vector_rnn(mock_communicator, mock_launcher):
    tf.reset_default_graph()
    with tf.Session() as sess:
--- a/ml-agents/mlagents/trainers/tests/test_reward_signals.py
+++ b/ml-agents/mlagents/trainers/tests/test_reward_signals.py
 import unittest.mock as mock
 import pytest
-import mlagents.trainers.tests.mock_brain as mb
-
-import numpy as np
-import tensorflow as tf
-
-from mlagents.trainers.ppo.models import PPOModel
-from mlagents.trainers.ppo.trainer import discount_rewards
+import mlagents.trainers.tests.mock_brain as mb
-from mlagents.trainers.demo_loader import make_demo_buffer
-from mlagents.envs import UnityEnvironment
-from mlagents.envs.mock_communicator import MockCommunicator


 def ppo_dummy_config():
@pytest.mark.parametrize(
    "trainer_config", [ppo_dummy_config(), sac_dummy_config()], ids=["ppo", "sac"]
 )
-@mock.patch("mlagents.envs.UnityEnvironment")
+@mock.patch("mlagents.envs.environment.UnityEnvironment")
 def test_gail_cc(mock_env, trainer_config, gail_dummy_config):
    env, policy = create_policy_mock(
        mock_env, trainer_config, gail_dummy_config, False, False, False
@pytest.mark.parametrize(
    "trainer_config", [ppo_dummy_config(), sac_dummy_config()], ids=["ppo", "sac"]
 )
-@mock.patch("mlagents.envs.UnityEnvironment")
+@mock.patch("mlagents.envs.environment.UnityEnvironment")
 def test_gail_dc_visual(mock_env, trainer_config, gail_dummy_config):
    gail_dummy_config["gail"]["demo_path"] = (
        os.path.dirname(os.path.abspath(__file__)) + "/testdcvis.demo"
@pytest.mark.parametrize(
    "trainer_config", [ppo_dummy_config(), sac_dummy_config()], ids=["ppo", "sac"]
 )
-@mock.patch("mlagents.envs.UnityEnvironment")
+@mock.patch("mlagents.envs.environment.UnityEnvironment")
 def test_gail_rnn(mock_env, trainer_config, gail_dummy_config):
    env, policy = create_policy_mock(
        mock_env, trainer_config, gail_dummy_config, True, False, False
@pytest.mark.parametrize(
    "trainer_config", [ppo_dummy_config(), sac_dummy_config()], ids=["ppo", "sac"]
 )
-@mock.patch("mlagents.envs.UnityEnvironment")
+@mock.patch("mlagents.envs.environment.UnityEnvironment")
 def test_curiosity_cc(mock_env, trainer_config, curiosity_dummy_config):
    env, policy = create_policy_mock(
        mock_env, trainer_config, curiosity_dummy_config, False, False, False
@pytest.mark.parametrize(
    "trainer_config", [ppo_dummy_config(), sac_dummy_config()], ids=["ppo", "sac"]
 )
-@mock.patch("mlagents.envs.UnityEnvironment")
+@mock.patch("mlagents.envs.environment.UnityEnvironment")
 def test_curiosity_dc(mock_env, trainer_config, curiosity_dummy_config):
    env, policy = create_policy_mock(
        mock_env, trainer_config, curiosity_dummy_config, False, True, False
@pytest.mark.parametrize(
    "trainer_config", [ppo_dummy_config(), sac_dummy_config()], ids=["ppo", "sac"]
 )
-@mock.patch("mlagents.envs.UnityEnvironment")
+@mock.patch("mlagents.envs.environment.UnityEnvironment")
 def test_curiosity_visual(mock_env, trainer_config, curiosity_dummy_config):
    env, policy = create_policy_mock(
        mock_env, trainer_config, curiosity_dummy_config, False, False, True
@pytest.mark.parametrize(
    "trainer_config", [ppo_dummy_config(), sac_dummy_config()], ids=["ppo", "sac"]
 )
-@mock.patch("mlagents.envs.UnityEnvironment")
+@mock.patch("mlagents.envs.environment.UnityEnvironment")
 def test_curiosity_rnn(mock_env, trainer_config, curiosity_dummy_config):
    env, policy = create_policy_mock(
        mock_env, trainer_config, curiosity_dummy_config, True, False, False
@pytest.mark.parametrize(
    "trainer_config", [ppo_dummy_config(), sac_dummy_config()], ids=["ppo", "sac"]
 )
-@mock.patch("mlagents.envs.UnityEnvironment")
+@mock.patch("mlagents.envs.environment.UnityEnvironment")
 def test_extrinsic(mock_env, trainer_config, curiosity_dummy_config):
    env, policy = create_policy_mock(
        mock_env, trainer_config, curiosity_dummy_config, False, False, False
--- a/ml-agents/mlagents/trainers/tests/test_sac.py
+++ b/ml-agents/mlagents/trainers/tests/test_sac.py
 import unittest.mock as mock
 import pytest
-import tempfile
-import math

 import numpy as np
 import tensorflow as tf
 from mlagents.trainers.sac.trainer import SACTrainer
-from mlagents.trainers.tests.test_simple_rl import Simple1DEnvironment, SimpleEnvManager
-from mlagents.trainers.trainer_util import initialize_trainers
-from mlagents.envs import UnityEnvironment
+from mlagents.envs.environment import UnityEnvironment
-from mlagents.trainers.trainer_controller import TrainerController
-from mlagents.envs.base_unity_environment import BaseUnityEnvironment
-from mlagents.envs import BrainInfo, AllBrainInfo, BrainParameters
-from mlagents.envs.communicator_objects import AgentInfoProto
-from mlagents.envs.sampler_class import SamplerManager
 from mlagents.trainers.tests import mock_brain as mb


    return env, policy


-@mock.patch("mlagents.envs.UnityEnvironment")
+@mock.patch("mlagents.envs.environment.UnityEnvironment")
 def test_sac_cc_policy(mock_env, dummy_config):
    # Test evaluate
    tf.reset_default_graph()
    env.close()


-@mock.patch("mlagents.envs.UnityEnvironment")
+@mock.patch("mlagents.envs.environment.UnityEnvironment")
 def test_sac_update_reward_signals(mock_env, dummy_config):
    # Test evaluate
    tf.reset_default_graph()
    env.close()


-@mock.patch("mlagents.envs.UnityEnvironment")
+@mock.patch("mlagents.envs.environment.UnityEnvironment")
 def test_sac_dc_policy(mock_env, dummy_config):
    # Test evaluate
    tf.reset_default_graph()
    env.close()


-@mock.patch("mlagents.envs.UnityEnvironment")
+@mock.patch("mlagents.envs.environment.UnityEnvironment")
 def test_sac_visual_policy(mock_env, dummy_config):
    # Test evaluate
    tf.reset_default_graph()
    assert type(run_out) is dict


-@mock.patch("mlagents.envs.UnityEnvironment")
+@mock.patch("mlagents.envs.environment.UnityEnvironment")
 def test_sac_rnn_policy(mock_env, dummy_config):
    # Test evaluate
    tf.reset_default_graph()
    env.close()


-@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
-@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
+@mock.patch("mlagents.envs.environment.UnityEnvironment.executable_launcher")
+@mock.patch("mlagents.envs.environment.UnityEnvironment.get_communicator")
 def test_sac_model_cc_vector(mock_communicator, mock_launcher):
    tf.reset_default_graph()
    with tf.Session() as sess:
            env.close()


-@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
-@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
+@mock.patch("mlagents.envs.environment.UnityEnvironment.executable_launcher")
+@mock.patch("mlagents.envs.environment.UnityEnvironment.get_communicator")
 def test_sac_model_cc_visual(mock_communicator, mock_launcher):
    tf.reset_default_graph()
    with tf.Session() as sess:
            env.close()


-@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
-@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
+@mock.patch("mlagents.envs.environment.UnityEnvironment.executable_launcher")
+@mock.patch("mlagents.envs.environment.UnityEnvironment.get_communicator")
 def test_sac_model_dc_visual(mock_communicator, mock_launcher):
    tf.reset_default_graph()
    with tf.Session() as sess:
            env.close()


-@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
-@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
+@mock.patch("mlagents.envs.environment.UnityEnvironment.executable_launcher")
+@mock.patch("mlagents.envs.environment.UnityEnvironment.get_communicator")
 def test_sac_model_dc_vector(mock_communicator, mock_launcher):
    tf.reset_default_graph()
    with tf.Session() as sess:
            env.close()


-@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
-@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
+@mock.patch("mlagents.envs.environment.UnityEnvironment.executable_launcher")
+@mock.patch("mlagents.envs.environment.UnityEnvironment.get_communicator")
 def test_sac_model_dc_vector_rnn(mock_communicator, mock_launcher):
    tf.reset_default_graph()
    with tf.Session() as sess:
            env.close()


-@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
-@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
+@mock.patch("mlagents.envs.environment.UnityEnvironment.executable_launcher")
+@mock.patch("mlagents.envs.environment.UnityEnvironment.get_communicator")
 def test_sac_model_cc_vector_rnn(mock_communicator, mock_launcher):
    tf.reset_default_graph()
    with tf.Session() as sess:
--- a/ml-agents/mlagents/trainers/tests/test_simple_rl.py
+++ b/ml-agents/mlagents/trainers/tests/test_simple_rl.py
 from mlagents.trainers.trainer_controller import TrainerController
 from mlagents.trainers.trainer_util import initialize_trainers
 from mlagents.envs.base_unity_environment import BaseUnityEnvironment
-from mlagents.envs import BrainInfo, AllBrainInfo, BrainParameters
-from mlagents.envs.communicator_objects import AgentInfoProto
+from mlagents.envs.brain import BrainInfo, AllBrainInfo, BrainParameters
+from mlagents.envs.communicator_objects.agent_info_proto_pb2 import AgentInfoProto
 from mlagents.envs.simple_env_manager import SimpleEnvManager
 from mlagents.envs.sampler_class import SamplerManager

--- a/ml-agents/mlagents/trainers/tests/test_trainer_controller.py
+++ b/ml-agents/mlagents/trainers/tests/test_trainer_controller.py
-from unittest.mock import *
+from unittest.mock import MagicMock, Mock, patch

 import yaml
 import pytest
--- a/ml-agents/mlagents/trainers/tests/test_trainer_metrics.py
+++ b/ml-agents/mlagents/trainers/tests/test_trainer_metrics.py
 import unittest.mock as mock
-from mlagents.trainers import TrainerMetrics
+from mlagents.trainers.trainer_metrics import TrainerMetrics


 class TestTrainerMetrics:
--- a/ml-agents/mlagents/trainers/tests/test_trainer_util.py
+++ b/ml-agents/mlagents/trainers/tests/test_trainer_util.py
    )


-@patch("mlagents.envs.BrainParameters")
+@patch("mlagents.envs.brain.BrainParameters")
 def test_initialize_trainer_parameters_override_defaults(BrainParametersMock):
    summaries_dir = "test_dir"
    run_id = "testrun"
        assert isinstance(trainers["testbrain"], OfflineBCTrainer)


-@patch("mlagents.envs.BrainParameters")
+@patch("mlagents.envs.brain.BrainParameters")
 def test_initialize_online_bc_trainer(BrainParametersMock):
    summaries_dir = "test_dir"
    run_id = "testrun"
        assert isinstance(trainers["testbrain"], OnlineBCTrainer)


-@patch("mlagents.envs.BrainParameters")
+@patch("mlagents.envs.brain.BrainParameters")
 def test_initialize_ppo_trainer(BrainParametersMock):
    brain_params_mock = BrainParametersMock()
    external_brains = {"testbrain": BrainParametersMock()}
        assert isinstance(trainers["testbrain"], PPOTrainer)


-@patch("mlagents.envs.BrainParameters")
+@patch("mlagents.envs.brain.BrainParameters")
 def test_initialize_invalid_trainer_raises_exception(BrainParametersMock):
    summaries_dir = "test_dir"
    run_id = "testrun"
--- a/ml-agents/mlagents/trainers/tf_policy.py
+++ b/ml-agents/mlagents/trainers/tf_policy.py
 import numpy as np
 import tensorflow as tf

-from mlagents.trainers import UnityException
-from mlagents.envs import Policy, ActionInfo
+from mlagents.envs.exception import UnityException
+from mlagents.envs.policy import Policy
+from mlagents.envs.action_info import ActionInfo
-from mlagents.envs import BrainInfo
+from mlagents.envs.brain import BrainInfo


 logger = logging.getLogger("mlagents.trainers")
--- a/ml-agents/mlagents/trainers/trainer.py
+++ b/ml-agents/mlagents/trainers/trainer.py
 import numpy as np
 from collections import deque, defaultdict

-from mlagents.envs import UnityException, AllBrainInfo, ActionInfoOutputs, BrainInfo
+from mlagents.envs.action_info import ActionInfoOutputs
+from mlagents.envs.exception import UnityException
-from mlagents.trainers.buffer import Buffer
-from mlagents.trainers.tf_policy import Policy
-from mlagents.envs import BrainParameters
+from mlagents.trainers.tf_policy import TFPolicy
+from mlagents.envs.brain import BrainParameters, AllBrainInfo

 LOGGER = logging.getLogger("mlagents.trainers")

        )
        self.summary_writer = tf.summary.FileWriter(self.summary_path)
        self._reward_buffer: Deque[float] = deque(maxlen=reward_buff_cap)
-        self.policy: Policy = None
+        self.policy: TFPolicy = None
        self.step: int = 0

    def check_param_keys(self):
--- a/ml-agents/mlagents/trainers/trainer_controller.py
+++ b/ml-agents/mlagents/trainers/trainer_controller.py
 import os
 import json
 import logging
-from typing import *
+from typing import Dict, List, Optional

 import numpy as np
 import tensorflow as tf
 )
 from mlagents.envs.sampler_class import SamplerManager
 from mlagents.envs.timers import hierarchical_timer, get_timer_tree, timed
-from mlagents.trainers import Trainer, TrainerMetrics
+from mlagents.trainers.trainer import Trainer, TrainerMetrics
 from mlagents.trainers.meta_curriculum import MetaCurriculum


--- a/ml-agents/mlagents/trainers/trainer_util.py
+++ b/ml-agents/mlagents/trainers/trainer_util.py
 from typing import Any, Dict

-from mlagents.trainers import MetaCurriculum
+from mlagents.trainers.meta_curriculum import MetaCurriculum
-from mlagents.trainers import Trainer
+from mlagents.trainers.trainer import Trainer
 from mlagents.envs.brain import BrainParameters
 from mlagents.trainers.ppo.trainer import PPOTrainer
 from mlagents.trainers.sac.trainer import SACTrainer
--- a/setup.cfg
+++ b/setup.cfg
    # Black tends to introduce things flake8 doesn't like, such as "line break before binary operator"
    # or whitespace before ':'. Rather than fight with black, just ignore these for now.
    W503, E203,
-
-    # "may be undefined, or defined from star imports" and related warnings
-    # We should stop doing these, but for now, leave them in.
-    F405, F403, F401,
--- a/utils/validate_meta_files.py
+++ b/utils/validate_meta_files.py
-import json
 import os


--- a/gym-unity/gym_unity/envs/unity_env.py
+++ b/gym-unity/gym_unity/envs/unity_env.py
-import logging
-import itertools
-import gym
-import numpy as np
-from mlagents.envs import UnityEnvironment
-from gym import error, spaces
-
-
-class UnityGymException(error.Error):
-    """
-    Any error related to the gym wrapper of ml-agents.
-    """
-
-    pass
-
-
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger("gym_unity")
-
-
-class UnityEnv(gym.Env):
-    """
-    Provides Gym wrapper for Unity Learning Environments.
-    Multi-agent environments use lists for object types, as done here:
-    https://github.com/openai/multiagent-particle-envs
-    """
-
-    def __init__(
-        self,
-        environment_filename: str,
-        worker_id: int = 0,
-        use_visual: bool = False,
-        uint8_visual: bool = False,
-        multiagent: bool = False,
-        flatten_branched: bool = False,
-        no_graphics: bool = False,
-        allow_multiple_visual_obs: bool = False,
-    ):
-        """
-        Environment initialization
-        :param environment_filename: The UnityEnvironment path or file to be wrapped in the gym.
-        :param worker_id: Worker number for environment.
-        :param use_visual: Whether to use visual observation or vector observation.
-        :param uint8_visual: Return visual observations as uint8 (0-255) matrices instead of float (0.0-1.0).
-        :param multiagent: Whether to run in multi-agent mode (lists of obs, reward, done).
-        :param flatten_branched: If True, turn branched discrete action spaces into a Discrete space rather than
-            MultiDiscrete.
-        :param no_graphics: Whether to run the Unity simulator in no-graphics mode
-        :param allow_multiple_visual_obs: If True, return a list of visual observations instead of only one.
-        """
-        self._env = UnityEnvironment(
-            environment_filename, worker_id, no_graphics=no_graphics
-        )
-        self.name = self._env.academy_name
-        self.visual_obs = None
-        self._current_state = None
-        self._n_agents = None
-        self._multiagent = multiagent
-        self._flattener = None
-        self.game_over = (
-            False
-        )  # Hidden flag used by Atari environments to determine if the game is over
-        self._allow_multiple_visual_obs = allow_multiple_visual_obs
-
-        # Check brain configuration
-        if len(self._env.brains) != 1:
-            raise UnityGymException(
-                "There can only be one brain in a UnityEnvironment "
-                "if it is wrapped in a gym."
-            )
-        if len(self._env.external_brain_names) <= 0:
-            raise UnityGymException(
-                "There are not any external brain in the UnityEnvironment"
-            )
-
-        self.brain_name = self._env.external_brain_names[0]
-        brain = self._env.brains[self.brain_name]
-
-        if use_visual and brain.number_visual_observations == 0:
-            raise UnityGymException(
-                "`use_visual` was set to True, however there are no"
-                " visual observations as part of this environment."
-            )
-        self.use_visual = brain.number_visual_observations >= 1 and use_visual
-
-        if not use_visual and uint8_visual:
-            logger.warning(
-                "`uint8_visual was set to true, but visual observations are not in use. "
-                "This setting will not have any effect."
-            )
-        else:
-            self.uint8_visual = uint8_visual
-
-        if brain.number_visual_observations > 1 and not self._allow_multiple_visual_obs:
-            logger.warning(
-                "The environment contains more than one visual observation. "
-                "You must define allow_multiple_visual_obs=True to received them all. "
-                "Otherwise, please note that only the first will be provided in the observation."
-            )
-
-        if brain.num_stacked_vector_observations != 1:
-            raise UnityGymException(
-                "There can only be one stacked vector observation in a UnityEnvironment "
-                "if it is wrapped in a gym."
-            )
-
-        # Check for number of agents in scene.
-        initial_info = self._env.reset()[self.brain_name]
-        self._check_agents(len(initial_info.agents))
-
-        # Set observation and action spaces
-        if brain.vector_action_space_type == "discrete":
-            if len(brain.vector_action_space_size) == 1:
-                self._action_space = spaces.Discrete(brain.vector_action_space_size[0])
-            else:
-                if flatten_branched:
-                    self._flattener = ActionFlattener(brain.vector_action_space_size)
-                    self._action_space = self._flattener.action_space
-                else:
-                    self._action_space = spaces.MultiDiscrete(
-                        brain.vector_action_space_size
-                    )
-
-        else:
-            if flatten_branched:
-                logger.warning(
-                    "The environment has a non-discrete action space. It will "
-                    "not be flattened."
-                )
-            high = np.array([1] * brain.vector_action_space_size[0])
-            self._action_space = spaces.Box(-high, high, dtype=np.float32)
-        high = np.array([np.inf] * brain.vector_observation_space_size)
-        self.action_meanings = brain.vector_action_descriptions
-        if self.use_visual:
-            if brain.camera_resolutions[0]["blackAndWhite"]:
-                depth = 1
-            else:
-                depth = 3
-            self._observation_space = spaces.Box(
-                0,
-                1,
-                dtype=np.float32,
-                shape=(
-                    brain.camera_resolutions[0]["height"],
-                    brain.camera_resolutions[0]["width"],
-                    depth,
-                ),
-            )
-        else:
-            self._observation_space = spaces.Box(-high, high, dtype=np.float32)
-
-    def reset(self):
-        """Resets the state of the environment and returns an initial observation.
-        In the case of multi-agent environments, this is a list.
-        Returns: observation (object/list): the initial observation of the
-            space.
-        """
-        info = self._env.reset()[self.brain_name]
-        n_agents = len(info.agents)
-        self._check_agents(n_agents)
-        self.game_over = False
-
-        if not self._multiagent:
-            obs, reward, done, info = self._single_step(info)
-        else:
-            obs, reward, done, info = self._multi_step(info)
-        return obs
-
-    def step(self, action):
-        """Run one timestep of the environment's dynamics. When end of
-        episode is reached, you are responsible for calling `reset()`
-        to reset this environment's state.
-        Accepts an action and returns a tuple (observation, reward, done, info).
-        In the case of multi-agent environments, these are lists.
-        Args:
-            action (object/list): an action provided by the environment
-        Returns:
-            observation (object/list): agent's observation of the current environment
-            reward (float/list) : amount of reward returned after previous action
-            done (boolean/list): whether the episode has ended.
-            info (dict): contains auxiliary diagnostic information, including BrainInfo.
-        """
-
-        # Use random actions for all other agents in environment.
-        if self._multiagent:
-            if not isinstance(action, list):
-                raise UnityGymException(
-                    "The environment was expecting `action` to be a list."
-                )
-            if len(action) != self._n_agents:
-                raise UnityGymException(
-                    "The environment was expecting a list of {} actions.".format(
-                        self._n_agents
-                    )
-                )
-            else:
-                if self._flattener is not None:
-                    # Action space is discrete and flattened - we expect a list of scalars
-                    action = [self._flattener.lookup_action(_act) for _act in action]
-                action = np.array(action)
-        else:
-            if self._flattener is not None:
-                # Translate action into list
-                action = self._flattener.lookup_action(action)
-
-        info = self._env.step(action)[self.brain_name]
-        n_agents = len(info.agents)
-        self._check_agents(n_agents)
-        self._current_state = info
-
-        if not self._multiagent:
-            obs, reward, done, info = self._single_step(info)
-            self.game_over = done
-        else:
-            obs, reward, done, info = self._multi_step(info)
-            self.game_over = all(done)
-        return obs, reward, done, info
-
-    def _single_step(self, info):
-        if self.use_visual:
-            visual_obs = info.visual_observations
-
-            if self._allow_multiple_visual_obs:
-                visual_obs_list = []
-                for obs in visual_obs:
-                    visual_obs_list.append(self._preprocess_single(obs[0]))
-                self.visual_obs = visual_obs_list
-            else:
-                self.visual_obs = self._preprocess_single(visual_obs[0][0])
-
-            default_observation = self.visual_obs
-        else:
-            default_observation = info.vector_observations[0, :]
-
-        return (
-            default_observation,
-            info.rewards[0],
-            info.local_done[0],
-            {"text_observation": info.text_observations[0], "brain_info": info},
-        )
-
-    def _preprocess_single(self, single_visual_obs):
-        if self.uint8_visual:
-            return (255.0 * single_visual_obs).astype(np.uint8)
-        else:
-            return single_visual_obs
-
-    def _multi_step(self, info):
-        if self.use_visual:
-            self.visual_obs = self._preprocess_multi(info.visual_observations)
-            default_observation = self.visual_obs
-        else:
-            default_observation = info.vector_observations
-        return (
-            list(default_observation),
-            info.rewards,
-            info.local_done,
-            {"text_observation": info.text_observations, "brain_info": info},
-        )
-
-    def _preprocess_multi(self, multiple_visual_obs):
-        if self.uint8_visual:
-            return [
-                (255.0 * _visual_obs).astype(np.uint8)
-                for _visual_obs in multiple_visual_obs
-            ]
-        else:
-            return multiple_visual_obs
-
-    def render(self, mode="rgb_array"):
-        return self.visual_obs
-
-    def close(self):
-        """Override _close in your subclass to perform any necessary cleanup.
-        Environments will automatically close() themselves when
-        garbage collected or when the program exits.
-        """
-        self._env.close()
-
-    def get_action_meanings(self):
-        return self.action_meanings
-
-    def seed(self, seed=None):
-        """Sets the seed for this env's random number generator(s).
-        Currently not implemented.
-        """
-        logger.warn("Could not seed environment %s", self.name)
-        return
-
-    def _check_agents(self, n_agents):
-        if not self._multiagent and n_agents > 1:
-            raise UnityGymException(
-                "The environment was launched as a single-agent environment, however"
-                "there is more than one agent in the scene."
-            )
-        elif self._multiagent and n_agents <= 1:
-            raise UnityGymException(
-                "The environment was launched as a mutli-agent environment, however"
-                "there is only one agent in the scene."
-            )
-        if self._n_agents is None:
-            self._n_agents = n_agents
-            logger.info("{} agents within environment.".format(n_agents))
-        elif self._n_agents != n_agents:
-            raise UnityGymException(
-                "The number of agents in the environment has changed since "
-                "initialization. This is not supported."
-            )
-
-    @property
-    def metadata(self):
-        return {"render.modes": ["rgb_array"]}
-
-    @property
-    def reward_range(self):
-        return -float("inf"), float("inf")
-
-    @property
-    def spec(self):
-        return None
-
-    @property
-    def action_space(self):
-        return self._action_space
-
-    @property
-    def observation_space(self):
-        return self._observation_space
-
-    @property
-    def number_agents(self):
-        return self._n_agents
-
-
-class ActionFlattener:
-    """
-    Flattens branched discrete action spaces into single-branch discrete action spaces.
-    """
-
-    def __init__(self, branched_action_space):
-        """
-        Initialize the flattener.
-        :param branched_action_space: A List containing the sizes of each branch of the action
-        space, e.g. [2,3,3] for three branches with size 2, 3, and 3 respectively.
-        """
-        self._action_shape = branched_action_space
-        self.action_lookup = self._create_lookup(self._action_shape)
-        self.action_space = spaces.Discrete(len(self.action_lookup))
-
-    @classmethod
-    def _create_lookup(self, branched_action_space):
-        """
-        Creates a Dict that maps discrete actions (scalars) to branched actions (lists).
-        Each key in the Dict maps to one unique set of branched actions, and each value
-        contains the List of branched actions.
-        """
-        possible_vals = [range(_num) for _num in branched_action_space]
-        all_actions = [list(_action) for _action in itertools.product(*possible_vals)]
-        # Dict should be faster than List for large action spaces
-        action_lookup = {
-            _scalar: _action for (_scalar, _action) in enumerate(all_actions)
-        }
-        return action_lookup
-
-    def lookup_action(self, action):
-        """
-        Convert a scalar discrete action into a unique set of branched actions.
-        :param: action: A scalar value representing one of the discrete actions.
-        :return: The List containing the branched actions.
-        """
-        return self.action_lookup[action]
--- a/ml-agents/mlagents/trainers/components/reward_signals/reward_signal.py
+++ b/ml-agents/mlagents/trainers/components/reward_signals/reward_signal.py
-import logging
-from typing import Any, Dict, List
-from collections import namedtuple
-import numpy as np
-import abc
-
-import tensorflow as tf
-
-from mlagents.envs.brain import BrainInfo
-from mlagents.trainers.trainer import UnityTrainerException
-from mlagents.trainers.tf_policy import TFPolicy
-from mlagents.trainers.models import LearningModel
-from mlagents.trainers.buffer import Buffer
-
-logger = logging.getLogger("mlagents.trainers")
-
-RewardSignalResult = namedtuple(
-    "RewardSignalResult", ["scaled_reward", "unscaled_reward"]
-)
-
-
-class RewardSignal(abc.ABC):
-    def __init__(
-        self,
-        policy: TFPolicy,
-        policy_model: LearningModel,
-        strength: float,
-        gamma: float,
-    ):
-        """
-        Initializes a reward signal. At minimum, you must pass in the policy it is being applied to,
-        the reward strength, and the gamma (discount factor.)
-        :param policy: The Policy object (e.g. PPOPolicy) that this Reward Signal will apply to.
-        :param strength: The strength of the reward. The reward's raw value will be multiplied by this value.
-        :param gamma: The time discounting factor used for this reward.
-        :return: A RewardSignal object.
-        """
-        class_name = self.__class__.__name__
-        short_name = class_name.replace("RewardSignal", "")
-        self.stat_name = f"Policy/{short_name} Reward"
-        self.value_name = f"Policy/{short_name} Value Estimate"
-        # Terminate discounted reward computation at Done. Can disable to mitigate positive bias in rewards with
-        # no natural end, e.g. GAIL or Curiosity
-        self.use_terminal_states = True
-        self.update_dict: Dict[str, tf.Tensor] = {}
-        self.gamma = gamma
-        self.policy = policy
-        self.policy_model = policy_model
-        self.strength = strength
-        self.stats_name_to_update_name: Dict[str, str] = {}
-
-    def evaluate(
-        self, current_info: BrainInfo, next_info: BrainInfo
-    ) -> RewardSignalResult:
-        """
-        Evaluates the reward for the agents present in current_info given the next_info
-        :param current_info: The current BrainInfo.
-        :param next_info: The BrainInfo from the next timestep.
-        :return: a RewardSignalResult of (scaled intrinsic reward, unscaled intrinsic reward) provided by the generator
-        """
-        return RewardSignalResult(
-            self.strength * np.zeros(len(current_info.agents)),
-            np.zeros(len(current_info.agents)),
-        )
-
-    def evaluate_batch(self, mini_batch: Dict[str, np.array]) -> RewardSignalResult:
-        """
-        Evaluates the reward for the data present in the Dict mini_batch. Note the distiction between
-        evaluate(), which takes in two BrainInfos. This reflects the different data formats (i.e. from the Buffer
-        vs. before being placed into the Buffer. Use this when evaluating a reward function drawn straight from a
-        Buffer.
-        :param mini_batch: A Dict of numpy arrays (the format used by our Buffer)
-            when drawing from the update buffer.
-        :return: a RewardSignalResult of (scaled intrinsic reward, unscaled intrinsic reward) provided by the generator
-        """
-        mini_batch_len = len(next(iter(mini_batch.values())))
-        return RewardSignalResult(
-            self.strength * np.zeros(mini_batch_len), np.zeros(mini_batch_len)
-        )
-
-    def prepare_update(
-        self,
-        policy_model: LearningModel,
-        mini_batch: Dict[str, np.ndarray],
-        num_sequences: int,
-    ) -> Dict[tf.Tensor, Any]:
-        """
-        If the reward signal has an internal model (e.g. GAIL or Curiosity), get the feed_dict
-        needed to update the buffer..
-        :param update_buffer: An AgentBuffer that contains the live data from which to update.
-        :param n_sequences: The number of sequences in the training buffer.
-        :return: A dict that corresponds to the feed_dict needed for the update.
-        """
-        return {}
-
-    @classmethod
-    def check_config(
-        cls, config_dict: Dict[str, Any], param_keys: List[str] = None
-    ) -> None:
-        """
-        Check the config dict, and throw an error if there are missing hyperparameters.
-        """
-        param_keys = param_keys or []
-        for k in param_keys:
-            if k not in config_dict:
-                raise UnityTrainerException(
-                    "The hyper-parameter {0} could not be found for {1}.".format(
-                        k, cls.__name__
-                    )
-                )