Making Gym a wrapper (#3812)

* Making Gym a wrapper * Readding no graphics to the run gym test * typo * Modifying the changelog and the migrating doc * Applying pre-commit * [skip ci] Update gym-unity/gym_unity/tests/test_gym.py Co-Authored-By: Chris Elion <chris.elion@unity3d.com> * Adding a note that the BaseEnv will close when the wrapper closes * FoRgOt To rUn PrE-ComMiT Co-authored-by: Chris Elion <chris.elion@unity3d.com>
5 年前 · 78f4da76
--- a/com.unity.ml-agents/CHANGELOG.md
+++ b/com.unity.ml-agents/CHANGELOG.md
  asymmetric example environment Strikers Vs. Goalie has been added.
 - CameraSensorComponent.m_Grayscale and RenderTextureSensorComponent.m_Grayscale
  were changed from `public` to `private` (#3808).
+- The `UnityEnv` class from the `gym-unity` package was renamed
+  `UnityToGymWrapper` and no longer creates the `UnityEnvironment`.
+  Instead, the `UnityEnvironment` must be passed as input to the
+  constructor of `UnityToGymWrapper`

 ### Minor Changes

--- a/docs/Migrating.md
+++ b/docs/Migrating.md
  parameter, instead of returning the array. This was done to prevent a common
  source of error where users would return arrays of the wrong size.
 - `num_updates` and `train_interval` for SAC have been replaced with `steps_per_update`.
-
+- The `UnityEnv` class from the `gym-unity` package was renamed
+  `UnityToGymWrapper` and no longer creates the `UnityEnvironment`. Instead,
+  the `UnityEnvironment` must be passed as input to the
+  constructor of `UnityToGymWrapper`

 ### Steps to Migrate

  `actionsOut` instead of returning an array.
 - Set `steps_per_update` to be around equal to the number of agents in your environment,
  times `num_updates` and divided by `train_interval`.
+- Replace `UnityEnv` with `UnityToGymWrapper` in your code. The constructor
+  no longer takes a file name as input but a fully constructed
+  `UnityEnvironment` instead.

 ## Migrating from 0.14 to 0.15

--- a/gym-unity/README.md
+++ b/gym-unity/README.md
 from the root of the project repository use:

 ```python
-from gym_unity.envs import UnityEnv
+from gym_unity.envs import UnityToGymWrapper
-env = UnityEnv(environment_filename, worker_id, use_visual, uint8_visual)
+env = UnityToGymWrapper(unity_environment, worker_id, use_visual, uint8_visual)
-*  `environment_filename` refers to the path to the Unity environment.
-
-*  `worker_id` refers to the port to use for communication with the environment.
-   Defaults to `0`.
+*  `unity_environment` refers to the Unity environment to be wrapped.

 *  `use_visual` refers to whether to use visual observations (True) or vector
   observations (False) as the default observation provided by the `reset` and
 from baselines import deepq
 from baselines import logger

-from gym_unity.envs import UnityEnv
+from mlagents_envs import UnityEnvironment
+from gym_unity.envs import UnityToGymWrapper
-    env = UnityEnv("./envs/GridWorld", 0, use_visual=True, uint8_visual=True)
+    unity_env = UnityEnvironment("./envs/GridWorld")
+    env = UnityToGymWrapper(unity_env, 0, use_visual=True, uint8_visual=True)
    logger.configure('./logs') # Çhange to log in a different directory
    act = deepq.learn(
        env,

 Other algorithms in the Baselines repository can be run using scripts similar to
 the examples from the baselines package. In most cases, the primary changes needed
-to use a Unity environment are to import `UnityEnv`, and to replace the environment
-creation code, typically `gym.make()`, with a call to `UnityEnv(env_path)`
-passing the environment binary path.
+to use a Unity environment are to import `UnityToGymWrapper`, and to replace the
+environment creation code, typically `gym.make()`, with a call to
+`UnityToGymWrapper(unity_environment)` passing the environment as input.

 A typical rule of thumb is that for vision-based environments, modification
 should be done to Atari training scripts, and for vector observation
 such a method using the PPO2 baseline:

 ```python
-from gym_unity.envs import UnityEnv
+from mlagents_envs.environment import UnityEnvironment
+from gym_unity.envs import UnityToGymWrapper
 from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv
 from baselines.common.vec_env.dummy_vec_env import DummyVecEnv
 from baselines.bench import Monitor
    """
    def make_env(rank, use_visual=True): # pylint: disable=C0111
        def _thunk():
-            env = UnityEnv(env_directory, rank, use_visual=use_visual, uint8_visual=True)
+            unity_env = UnityEnvironment(env_directory)
+            env = UnityToGymWrapper(unity_env, rank, use_visual=use_visual, uint8_visual=True)
            env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank)))
            return env
        return _thunk
 instantiated, just as in the Baselines example. At the top of the file, insert

 ```python
-from gym_unity.envs import UnityEnv
+from mlagents_envs.environment import UnityEnvironment
+from gym_unity.envs import UnityToGymWrapper
 ```

 to import the Gym Wrapper. Navigate to the `create_atari_environment` method
 ```python
    game_version = 'v0' if sticky_actions else 'v4'
    full_game_name = '{}NoFrameskip-{}'.format(game_name, game_version)
-    env = UnityEnv('./envs/GridWorld', 0, use_visual=True, uint8_visual=True)
+    unity_env = UnityEnvironment('./envs/GridWorld')
+    env = UnityToGymWrapper(unity_env, use_visual=True, uint8_visual=True)
    return env
 ```

 with discrete action spaces, and specifically the Discrete Gym space. For environments
 that use branched discrete action spaces (e.g.
 [VisualBanana](../docs/Learning-Environment-Examples.md)), you can enable the
-`flatten_branched` parameter in `UnityEnv`, which treats each combination of branched
+`flatten_branched` parameter in `UnityToGymWrapper`, which treats each combination of branched
 actions as separate actions.

 Furthermore, when building your environments, ensure that your Agent is using visual
--- a/gym-unity/gym_unity/envs/init.py
+++ b/gym-unity/gym_unity/envs/init.py
 import gym
 from gym import error, spaces

-from mlagents_envs.environment import UnityEnvironment
+from mlagents_envs.base_env import BaseEnv
 from mlagents_envs.base_env import DecisionSteps, TerminalSteps
 from mlagents_envs import logging_util

 GymStepResult = Tuple[np.ndarray, float, bool, Dict]


-class UnityEnv(gym.Env):
+class UnityToGymWrapper(gym.Env):
    """
    Provides Gym wrapper for Unity Learning Environments.
    """
-        environment_filename: str,
-        worker_id: int = 0,
+        unity_env: BaseEnv,
-        no_graphics: bool = False,
-        :param environment_filename: The UnityEnvironment path or file to be wrapped in the gym.
-        :param worker_id: Worker number for environment.
+        :param unity_env: The Unity BaseEnv to be wrapped in the gym. Will be closed when the UnityToGymWrapper closes.
-        :param no_graphics: Whether to run the Unity simulator in no-graphics mode
-        base_port = UnityEnvironment.BASE_ENVIRONMENT_PORT
-        if environment_filename is None:
-            base_port = UnityEnvironment.DEFAULT_EDITOR_PORT
-
-        self._env = UnityEnvironment(
-            environment_filename,
-            worker_id,
-            base_port=base_port,
-            no_graphics=no_graphics,
-        )
+        self._env = unity_env

        # Take a single step so that the brain information will be sent over
        if not self._env.get_behavior_names():
--- a/gym-unity/gym_unity/tests/test_gym.py
+++ b/gym-unity/gym_unity/tests/test_gym.py
 import numpy as np

 from gym import spaces
-from gym_unity.envs import UnityEnv
+from gym_unity.envs import UnityToGymWrapper
 from mlagents_envs.base_env import (
    BehaviorSpec,
    ActionType,


-@mock.patch("gym_unity.envs.UnityEnvironment")
-def test_gym_wrapper(mock_env):
+def test_gym_wrapper():
+    mock_env = mock.MagicMock()
    mock_spec = create_mock_group_spec()
    mock_decision_step, mock_terminal_step = create_mock_vector_steps(mock_spec)
    setup_mock_unityenvironment(
-    env = UnityEnv(" ", use_visual=False)
-    assert isinstance(env, UnityEnv)
+    env = UnityToGymWrapper(mock_env, use_visual=False)
+    assert isinstance(env, UnityToGymWrapper)
    assert isinstance(env.reset(), np.ndarray)
    actions = env.action_space.sample()
    assert actions.shape[0] == 2
    assert isinstance(info, dict)


-@mock.patch("gym_unity.envs.UnityEnvironment")
-def test_branched_flatten(mock_env):
+def test_branched_flatten():
+    mock_env = mock.MagicMock()
    mock_spec = create_mock_group_spec(
        vector_action_space_type="discrete", vector_action_space_size=[2, 2, 3]
    )
        mock_env, mock_spec, mock_decision_step, mock_terminal_step
    )

-    env = UnityEnv(" ", use_visual=False, flatten_branched=True)
+    env = UnityToGymWrapper(mock_env, use_visual=False, flatten_branched=True)
    assert isinstance(env.action_space, spaces.Discrete)
    assert env.action_space.n == 12
    assert env._flattener.lookup_action(0) == [0, 0, 0]
-    env = UnityEnv(" ", use_visual=False, flatten_branched=False)
+    env = UnityToGymWrapper(mock_env, use_visual=False, flatten_branched=False)
-@mock.patch("gym_unity.envs.UnityEnvironment")
-def test_gym_wrapper_visual(mock_env, use_uint8):
+def test_gym_wrapper_visual(use_uint8):
+    mock_env = mock.MagicMock()
    mock_spec = create_mock_group_spec(number_visual_observations=1)
    mock_decision_step, mock_terminal_step = create_mock_vector_steps(
        mock_spec, number_visual_observations=1
    )

-    env = UnityEnv(" ", use_visual=True, uint8_visual=use_uint8)
-    assert isinstance(env, UnityEnv)
+    env = UnityToGymWrapper(mock_env, use_visual=True, uint8_visual=use_uint8)
+    assert isinstance(env, UnityToGymWrapper)
    assert isinstance(env.reset(), np.ndarray)
    actions = env.action_space.sample()
    assert actions.shape[0] == 2
    :Mock mock_decision: A DecisionSteps object that will be returned at each step and reset.
    :Mock mock_termination: A TerminationSteps object that will be returned at each step and reset.
    """
-    mock_env.return_value.get_behavior_names.return_value = ["MockBrain"]
-    mock_env.return_value.get_behavior_spec.return_value = mock_spec
-    mock_env.return_value.get_steps.return_value = (mock_decision, mock_termination)
+    mock_env.get_behavior_names.return_value = ["MockBrain"]
+    mock_env.get_behavior_spec.return_value = mock_spec
+    mock_env.get_steps.return_value = (mock_decision, mock_termination)
--- a/ml-agents/tests/yamato/scripts/run_gym.py
+++ b/ml-agents/tests/yamato/scripts/run_gym.py
 import argparse

-from gym_unity.envs import UnityEnv
+from mlagents_envs.environment import UnityEnvironment
+from gym_unity.envs import UnityToGymWrapper


 def test_run_environment(env_name):
    """
-    env = UnityEnv(env_name, worker_id=1, use_visual=False, no_graphics=True)
+    u_env = UnityEnvironment(env_name, worker_id=1, no_graphics=True)
+    env = UnityToGymWrapper(u_env, use_visual=False)

    try:
        # Examine environment parameters
    """

    try:
-        env1 = UnityEnv(env_name, worker_id=1, use_visual=False, no_graphics=True)
+        env1 = UnityToGymWrapper(
+            UnityEnvironment(env_name, worker_id=1, no_graphics=True), use_visual=False
+        )
-        env1 = UnityEnv(env_name, worker_id=1, use_visual=False, no_graphics=True)
-        env2 = UnityEnv(env_name, worker_id=2, use_visual=False, no_graphics=True)
+        env1 = UnityToGymWrapper(
+            UnityEnvironment(env_name, worker_id=1, no_graphics=True), use_visual=False
+        )
+        env2 = UnityToGymWrapper(
+            UnityEnvironment(env_name, worker_id=2, no_graphics=True), use_visual=False
+        )
        env2.reset()
    finally:
        env1.close()