Merge branch 'master' into asymm-envs

5 年前 · 0ec2a890
--- a/com.unity.ml-agents/CHANGELOG.md
+++ b/com.unity.ml-agents/CHANGELOG.md
  asymmetric example environment Strikers Vs. Goalie has been added.
 - CameraSensorComponent.m_Grayscale and RenderTextureSensorComponent.m_Grayscale
  were changed from `public` to `private` (#3808).
+- The `UnityEnv` class from the `gym-unity` package was renamed
+  `UnityToGymWrapper` and no longer creates the `UnityEnvironment`.
+  Instead, the `UnityEnvironment` must be passed as input to the
+  constructor of `UnityToGymWrapper`

 ### Minor Changes

--- a/com.unity.ml-agents/Documentation~/com.unity.ml-agents.md
+++ b/com.unity.ml-agents/Documentation~/com.unity.ml-agents.md

 ## Known limitations

+### Training
+Training is limited to the Unity Editor and Standalone builds on Windows, MacOS, and Linux.  Your environment will default to inference mode if training is not supported or is not currently running.
+
+### Inference
+Inference is executed via the [Unity Inference Engine](https://docs.unity3d.com/Packages/com.unity.barracuda@latest/index.html).
+
+**CPU**
+- All platforms supported.
+
+**GPU**
+- All platforms supported except:
+  - WebGL and GLES 3/2 on Android / iPhone
+
+ **NOTE:** Mobile platform support includes:
+ - Vulkan for Android
+ - Metal for iOS.
+
 ### Headless Mode

 If you enable Headless mode, you will not be able to collect visual observations
--- a/docs/Migrating.md
+++ b/docs/Migrating.md
  parameter, instead of returning the array. This was done to prevent a common
  source of error where users would return arrays of the wrong size.
 - `num_updates` and `train_interval` for SAC have been replaced with `steps_per_update`.
-
+- The `UnityEnv` class from the `gym-unity` package was renamed
+  `UnityToGymWrapper` and no longer creates the `UnityEnvironment`. Instead,
+  the `UnityEnvironment` must be passed as input to the
+  constructor of `UnityToGymWrapper`

 ### Steps to Migrate

  `actionsOut` instead of returning an array.
 - Set `steps_per_update` to be around equal to the number of agents in your environment,
  times `num_updates` and divided by `train_interval`.
+- Replace `UnityEnv` with `UnityToGymWrapper` in your code. The constructor
+  no longer takes a file name as input but a fully constructed
+  `UnityEnvironment` instead.

 ## Migrating from 0.14 to 0.15

--- a/gym-unity/README.md
+++ b/gym-unity/README.md
 from the root of the project repository use:

 ```python
-from gym_unity.envs import UnityEnv
+from gym_unity.envs import UnityToGymWrapper
-env = UnityEnv(environment_filename, worker_id, use_visual, uint8_visual)
+env = UnityToGymWrapper(unity_environment, worker_id, use_visual, uint8_visual)
-*  `environment_filename` refers to the path to the Unity environment.
-
-*  `worker_id` refers to the port to use for communication with the environment.
-   Defaults to `0`.
+*  `unity_environment` refers to the Unity environment to be wrapped.

 *  `use_visual` refers to whether to use visual observations (True) or vector
   observations (False) as the default observation provided by the `reset` and
 from baselines import deepq
 from baselines import logger

-from gym_unity.envs import UnityEnv
+from mlagents_envs import UnityEnvironment
+from gym_unity.envs import UnityToGymWrapper
-    env = UnityEnv("./envs/GridWorld", 0, use_visual=True, uint8_visual=True)
+    unity_env = UnityEnvironment("./envs/GridWorld")
+    env = UnityToGymWrapper(unity_env, 0, use_visual=True, uint8_visual=True)
    logger.configure('./logs') # Çhange to log in a different directory
    act = deepq.learn(
        env,

 Other algorithms in the Baselines repository can be run using scripts similar to
 the examples from the baselines package. In most cases, the primary changes needed
-to use a Unity environment are to import `UnityEnv`, and to replace the environment
-creation code, typically `gym.make()`, with a call to `UnityEnv(env_path)`
-passing the environment binary path.
+to use a Unity environment are to import `UnityToGymWrapper`, and to replace the
+environment creation code, typically `gym.make()`, with a call to
+`UnityToGymWrapper(unity_environment)` passing the environment as input.

 A typical rule of thumb is that for vision-based environments, modification
 should be done to Atari training scripts, and for vector observation
 such a method using the PPO2 baseline:

 ```python
-from gym_unity.envs import UnityEnv
+from mlagents_envs.environment import UnityEnvironment
+from gym_unity.envs import UnityToGymWrapper
 from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv
 from baselines.common.vec_env.dummy_vec_env import DummyVecEnv
 from baselines.bench import Monitor
    """
    def make_env(rank, use_visual=True): # pylint: disable=C0111
        def _thunk():
-            env = UnityEnv(env_directory, rank, use_visual=use_visual, uint8_visual=True)
+            unity_env = UnityEnvironment(env_directory)
+            env = UnityToGymWrapper(unity_env, rank, use_visual=use_visual, uint8_visual=True)
            env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank)))
            return env
        return _thunk
 instantiated, just as in the Baselines example. At the top of the file, insert

 ```python
-from gym_unity.envs import UnityEnv
+from mlagents_envs.environment import UnityEnvironment
+from gym_unity.envs import UnityToGymWrapper
 ```

 to import the Gym Wrapper. Navigate to the `create_atari_environment` method
 ```python
    game_version = 'v0' if sticky_actions else 'v4'
    full_game_name = '{}NoFrameskip-{}'.format(game_name, game_version)
-    env = UnityEnv('./envs/GridWorld', 0, use_visual=True, uint8_visual=True)
+    unity_env = UnityEnvironment('./envs/GridWorld')
+    env = UnityToGymWrapper(unity_env, use_visual=True, uint8_visual=True)
    return env
 ```

 with discrete action spaces, and specifically the Discrete Gym space. For environments
 that use branched discrete action spaces (e.g.
 [VisualBanana](../docs/Learning-Environment-Examples.md)), you can enable the
-`flatten_branched` parameter in `UnityEnv`, which treats each combination of branched
+`flatten_branched` parameter in `UnityToGymWrapper`, which treats each combination of branched
 actions as separate actions.

 Furthermore, when building your environments, ensure that your Agent is using visual
--- a/gym-unity/gym_unity/envs/init.py
+++ b/gym-unity/gym_unity/envs/init.py
 import gym
 from gym import error, spaces

-from mlagents_envs.environment import UnityEnvironment
+from mlagents_envs.base_env import BaseEnv
 from mlagents_envs.base_env import DecisionSteps, TerminalSteps
 from mlagents_envs import logging_util

 GymStepResult = Tuple[np.ndarray, float, bool, Dict]


-class UnityEnv(gym.Env):
+class UnityToGymWrapper(gym.Env):
    """
    Provides Gym wrapper for Unity Learning Environments.
    """
-        environment_filename: str,
-        worker_id: int = 0,
+        unity_env: BaseEnv,
-        no_graphics: bool = False,
-        :param environment_filename: The UnityEnvironment path or file to be wrapped in the gym.
-        :param worker_id: Worker number for environment.
+        :param unity_env: The Unity BaseEnv to be wrapped in the gym. Will be closed when the UnityToGymWrapper closes.
-        :param no_graphics: Whether to run the Unity simulator in no-graphics mode
-        base_port = UnityEnvironment.BASE_ENVIRONMENT_PORT
-        if environment_filename is None:
-            base_port = UnityEnvironment.DEFAULT_EDITOR_PORT
-
-        self._env = UnityEnvironment(
-            environment_filename,
-            worker_id,
-            base_port=base_port,
-            no_graphics=no_graphics,
-        )
+        self._env = unity_env

        # Take a single step so that the brain information will be sent over
        if not self._env.get_behavior_names():
--- a/gym-unity/gym_unity/tests/test_gym.py
+++ b/gym-unity/gym_unity/tests/test_gym.py
 import numpy as np

 from gym import spaces
-from gym_unity.envs import UnityEnv
+from gym_unity.envs import UnityToGymWrapper
 from mlagents_envs.base_env import (
    BehaviorSpec,
    ActionType,


-@mock.patch("gym_unity.envs.UnityEnvironment")
-def test_gym_wrapper(mock_env):
+def test_gym_wrapper():
+    mock_env = mock.MagicMock()
    mock_spec = create_mock_group_spec()
    mock_decision_step, mock_terminal_step = create_mock_vector_steps(mock_spec)
    setup_mock_unityenvironment(
-    env = UnityEnv(" ", use_visual=False)
-    assert isinstance(env, UnityEnv)
+    env = UnityToGymWrapper(mock_env, use_visual=False)
+    assert isinstance(env, UnityToGymWrapper)
    assert isinstance(env.reset(), np.ndarray)
    actions = env.action_space.sample()
    assert actions.shape[0] == 2
    assert isinstance(info, dict)


-@mock.patch("gym_unity.envs.UnityEnvironment")
-def test_branched_flatten(mock_env):
+def test_branched_flatten():
+    mock_env = mock.MagicMock()
    mock_spec = create_mock_group_spec(
        vector_action_space_type="discrete", vector_action_space_size=[2, 2, 3]
    )
        mock_env, mock_spec, mock_decision_step, mock_terminal_step
    )

-    env = UnityEnv(" ", use_visual=False, flatten_branched=True)
+    env = UnityToGymWrapper(mock_env, use_visual=False, flatten_branched=True)
    assert isinstance(env.action_space, spaces.Discrete)
    assert env.action_space.n == 12
    assert env._flattener.lookup_action(0) == [0, 0, 0]
-    env = UnityEnv(" ", use_visual=False, flatten_branched=False)
+    env = UnityToGymWrapper(mock_env, use_visual=False, flatten_branched=False)
-@mock.patch("gym_unity.envs.UnityEnvironment")
-def test_gym_wrapper_visual(mock_env, use_uint8):
+def test_gym_wrapper_visual(use_uint8):
+    mock_env = mock.MagicMock()
    mock_spec = create_mock_group_spec(number_visual_observations=1)
    mock_decision_step, mock_terminal_step = create_mock_vector_steps(
        mock_spec, number_visual_observations=1
    )

-    env = UnityEnv(" ", use_visual=True, uint8_visual=use_uint8)
-    assert isinstance(env, UnityEnv)
+    env = UnityToGymWrapper(mock_env, use_visual=True, uint8_visual=use_uint8)
+    assert isinstance(env, UnityToGymWrapper)
    assert isinstance(env.reset(), np.ndarray)
    actions = env.action_space.sample()
    assert actions.shape[0] == 2
    :Mock mock_decision: A DecisionSteps object that will be returned at each step and reset.
    :Mock mock_termination: A TerminationSteps object that will be returned at each step and reset.
    """
-    mock_env.return_value.get_behavior_names.return_value = ["MockBrain"]
-    mock_env.return_value.get_behavior_spec.return_value = mock_spec
-    mock_env.return_value.get_steps.return_value = (mock_decision, mock_termination)
+    mock_env.get_behavior_names.return_value = ["MockBrain"]
+    mock_env.get_behavior_spec.return_value = mock_spec
+    mock_env.get_steps.return_value = (mock_decision, mock_termination)
--- a/ml-agents/mlagents/trainers/components/bc/module.py
+++ b/ml-agents/mlagents/trainers/components/bc/module.py
        self.policy = policy
        self.current_lr = policy_learning_rate * strength
        self.model = BCModel(policy, self.current_lr, steps)
-        _, self.demonstration_buffer = demo_to_buffer(demo_path, policy.sequence_length)
+        _, self.demonstration_buffer = demo_to_buffer(
+            demo_path, policy.sequence_length, policy.brain
+        )

        self.batch_size = batch_size if batch_size else default_batch_size
        self.num_epoch = num_epoch if num_epoch else default_num_epoch
--- a/ml-agents/mlagents/trainers/components/reward_signals/gail/signal.py
+++ b/ml-agents/mlagents/trainers/components/reward_signals/gail/signal.py
        self.model = GAILModel(
            policy, 128, learning_rate, encoding_size, use_actions, use_vail
        )
-        _, self.demonstration_buffer = demo_to_buffer(demo_path, policy.sequence_length)
+        _, self.demonstration_buffer = demo_to_buffer(
+            demo_path, policy.sequence_length, policy.brain
+        )
        self.has_updated = False
        self.update_dict: Dict[str, tf.Tensor] = {
            "gail_loss": self.model.loss,
        self, policy: TFPolicy, mini_batch: AgentBuffer, num_sequences: int
    ) -> Dict[tf.Tensor, Any]:
        """
-        Prepare inputs for update. .
-        :param mini_batch_demo: A mini batch of expert trajectories
-        :param mini_batch_policy: A mini batch of trajectories sampled from the current policy
+        Prepare inputs for update.
+        :param policy: The policy learning from GAIL signal
+        :param mini_batch: A mini batch from trajectories sampled from the current policy
+        :param num_sequences: Number of samples in batch
        :return: Feed_dict for update process.
        """
        # Get batch from demo buffer. Even if demo buffer is smaller, we sample with replacement
--- a/ml-agents/mlagents/trainers/demo_loader.py
+++ b/ml-agents/mlagents/trainers/demo_loader.py

@timed
 def demo_to_buffer(
-    file_path: str, sequence_length: int
+    file_path: str, sequence_length: int, expected_brain_params: BrainParameters = None
 ) -> Tuple[BrainParameters, AgentBuffer]:
    """
    Loads demonstration file and uses it to fill training buffer.
    behavior_spec, info_action_pair, _ = load_demonstration(file_path)
    demo_buffer = make_demo_buffer(info_action_pair, behavior_spec, sequence_length)
    brain_params = behavior_spec_to_brain_parameters("DemoBrain", behavior_spec)
+    if expected_brain_params:
+        # check action dimensions in demonstration match
+        if (
+            brain_params.vector_action_space_size
+            != expected_brain_params.vector_action_space_size
+        ):
+            raise RuntimeError(
+                "The action dimensions {} in demonstration do not match the policy's {}.".format(
+                    brain_params.vector_action_space_size,
+                    expected_brain_params.vector_action_space_size,
+                )
+            )
+        # check the action types in demonstration match
+        if (
+            brain_params.vector_action_space_type
+            != expected_brain_params.vector_action_space_type
+        ):
+            raise RuntimeError(
+                "The action type of {} in demonstration do not match the policy's {}.".format(
+                    brain_params.vector_action_space_type,
+                    expected_brain_params.vector_action_space_type,
+                )
+            )
+        # check number of vector observations in demonstration match
+        if (
+            brain_params.vector_observation_space_size
+            != expected_brain_params.vector_observation_space_size
+        ):
+            raise RuntimeError(
+                "The vector observation dimensions of {} in demonstration do not match the policy's {}.".format(
+                    brain_params.vector_observation_space_size,
+                    expected_brain_params.vector_observation_space_size,
+                )
+            )
+        # check number of visual observations/resolutions in demonstration match
+        if (
+            brain_params.number_visual_observations
+            != expected_brain_params.number_visual_observations
+        ):
+            raise RuntimeError(
+                "Number of visual observations {} in demonstrations do not match the policy's {}.".format(
+                    brain_params.number_visual_observations,
+                    expected_brain_params.number_visual_observations,
+                )
+            )
+        for i, (resolution, expected_resolution) in enumerate(
+            zip(
+                brain_params.camera_resolutions,
+                expected_brain_params.camera_resolutions,
+            )
+        ):
+            if resolution != expected_resolution:
+                raise RuntimeError(
+                    "The resolution of visual observation {} in demonstrations do not match the policy's.".format(
+                        i
+                    )
+                )
    return brain_params, demo_buffer


--- a/ml-agents/mlagents/trainers/tests/test_demo_loader.py
+++ b/ml-agents/mlagents/trainers/tests/test_demo_loader.py
    DemonstrationMetaProto,
 )

+from mlagents.trainers.brain import BrainParameters
 from mlagents.trainers.demo_loader import (
    load_demonstration,
    demo_to_buffer,

+BRAIN_PARAMS = BrainParameters(
+    brain_name="test_brain",
+    vector_observation_space_size=8,
+    camera_resolutions=[],
+    vector_action_space_size=[2],
+    vector_action_descriptions=[],
+    vector_action_space_type=1,
+)
+

 def test_load_demo():
    path_prefix = os.path.dirname(os.path.abspath(__file__))
    assert np.sum(behavior_spec.observation_shapes[0]) == 8
    assert len(pair_infos) == total_expected

-    _, demo_buffer = demo_to_buffer(path_prefix + "/test.demo", 1)
+    _, demo_buffer = demo_to_buffer(path_prefix + "/test.demo", 1, BRAIN_PARAMS)
    assert len(demo_buffer["actions"]) == total_expected - 1


    assert np.sum(behavior_spec.observation_shapes[0]) == 8
    assert len(pair_infos) == total_expected

-    _, demo_buffer = demo_to_buffer(path_prefix + "/test_demo_dir", 1)
+    _, demo_buffer = demo_to_buffer(path_prefix + "/test_demo_dir", 1, BRAIN_PARAMS)
+
+
+def test_demo_mismatch():
+    path_prefix = os.path.dirname(os.path.abspath(__file__))
+    # observation mismatch
+    with pytest.raises(RuntimeError):
+        brain_params_obs = BrainParameters(
+            brain_name="test_brain",
+            vector_observation_space_size=9,
+            camera_resolutions=[],
+            vector_action_space_size=[2],
+            vector_action_descriptions=[],
+            vector_action_space_type=1,
+        )
+        _, demo_buffer = demo_to_buffer(path_prefix + "/test.demo", 1, brain_params_obs)
+    # action mismatch
+    with pytest.raises(RuntimeError):
+        brain_params_act = BrainParameters(
+            brain_name="test_brain",
+            vector_observation_space_size=8,
+            camera_resolutions=[],
+            vector_action_space_size=[3],
+            vector_action_descriptions=[],
+            vector_action_space_type=1,
+        )
+        _, demo_buffer = demo_to_buffer(path_prefix + "/test.demo", 1, brain_params_act)
+    # action type mismatch
+    with pytest.raises(RuntimeError):
+        brain_params_type = BrainParameters(
+            brain_name="test_brain",
+            vector_observation_space_size=8,
+            camera_resolutions=[],
+            vector_action_space_size=[2],
+            vector_action_descriptions=[],
+            vector_action_space_type=0,
+        )
+        _, demo_buffer = demo_to_buffer(
+            path_prefix + "/test.demo", 1, brain_params_type
+        )
+    # vis obs mismatch
+    with pytest.raises(RuntimeError):
+        brain_params_vis = BrainParameters(
+            brain_name="test_brain",
+            vector_observation_space_size=8,
+            camera_resolutions=[[30, 40]],
+            vector_action_space_size=[2],
+            vector_action_descriptions=[],
+            vector_action_space_type=1,
+        )
+        _, demo_buffer = demo_to_buffer(path_prefix + "/test.demo", 1, brain_params_vis)


 def test_edge_cases():
--- a/ml-agents/mlagents/trainers/tests/test_reward_signals.py
+++ b/ml-agents/mlagents/trainers/tests/test_reward_signals.py
 from mlagents.trainers.sac.optimizer import SACOptimizer
 from mlagents.trainers.ppo.optimizer import PPOOptimizer

+CONTINUOUS_PATH = os.path.dirname(os.path.abspath(__file__)) + "/test.demo"
+DISCRETE_PATH = os.path.dirname(os.path.abspath(__file__)) + "/testdcvis.demo"
+

 def ppo_dummy_config():
    return yaml.safe_load(
        use_recurrent: false
        memory_size: 8
        reward_signals:
-          extrinsic:
-            strength: 1.0
-            gamma: 0.99
+            extrinsic:
+                strength: 1.0
+                gamma: 0.99
        """
    )

        tau: 0.005
        use_recurrent: false
        vis_encode_type: simple
-        behavioral_cloning:
-            demo_path: ./Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
-            strength: 1.0
-            steps: 10000000
        reward_signals:
            extrinsic:
                strength: 1.0
            "gamma": 0.9,
            "encoding_size": 128,
            "use_vail": True,
-            "demo_path": os.path.dirname(os.path.abspath(__file__)) + "/test.demo",
+            "demo_path": CONTINUOUS_PATH,
        }
    }

        vector_obs_space=VECTOR_OBS_SPACE,
        discrete_action_space=DISCRETE_ACTION_SPACE,
    )
-
    trainer_parameters = trainer_config
    model_path = "testpath"
    trainer_parameters["model_path"] = model_path
    "trainer_config", [ppo_dummy_config(), sac_dummy_config()], ids=["ppo", "sac"]
 )
 def test_gail_cc(trainer_config, gail_dummy_config):
+    trainer_config.update(
+        {
+            "behavioral_cloning": {
+                "demo_path": CONTINUOUS_PATH,
+                "strength": 1.0,
+                "steps": 10000000,
+            }
+        }
+    )
    optimizer = create_optimizer_mock(
        trainer_config, gail_dummy_config, False, False, False
    )
    "trainer_config", [ppo_dummy_config(), sac_dummy_config()], ids=["ppo", "sac"]
 )
 def test_gail_dc_visual(trainer_config, gail_dummy_config):
-    gail_dummy_config["gail"]["demo_path"] = (
-        os.path.dirname(os.path.abspath(__file__)) + "/testdcvis.demo"
+    gail_dummy_config["gail"]["demo_path"] = DISCRETE_PATH
+    trainer_config.update(
+        {
+            "behavioral_cloning": {
+                "demo_path": DISCRETE_PATH,
+                "strength": 1.0,
+                "steps": 10000000,
+            }
+        }
    )
    optimizer = create_optimizer_mock(
        trainer_config, gail_dummy_config, False, True, True
    "trainer_config", [ppo_dummy_config(), sac_dummy_config()], ids=["ppo", "sac"]
 )
 def test_gail_rnn(trainer_config, gail_dummy_config):
+    trainer_config.update(
+        {
+            "behavioral_cloning": {
+                "demo_path": CONTINUOUS_PATH,
+                "strength": 1.0,
+                "steps": 10000000,
+            }
+        }
+    )
    policy = create_optimizer_mock(
        trainer_config, gail_dummy_config, True, False, False
    )
--- a/ml-agents/mlagents/trainers/tests/test_simple_rl.py
+++ b/ml-agents/mlagents/trainers/tests/test_simple_rl.py
        agent_info_protos = env.demonstration_protos[BRAIN_NAME]
        meta_data_proto = DemonstrationMetaProto()
        brain_param_proto = BrainParametersProto(
-            vector_action_size=[1],
+            vector_action_size=[2] if use_discrete else [1],
            vector_action_descriptions=[""],
            vector_action_space_type=discrete if use_discrete else continuous,
            brain_name=BRAIN_NAME,
--- a/ml-agents/tests/yamato/scripts/run_gym.py
+++ b/ml-agents/tests/yamato/scripts/run_gym.py
 import argparse

-from gym_unity.envs import UnityEnv
+from mlagents_envs.environment import UnityEnvironment
+from gym_unity.envs import UnityToGymWrapper


 def test_run_environment(env_name):
    """
-    env = UnityEnv(env_name, worker_id=1, use_visual=False, no_graphics=True)
+    u_env = UnityEnvironment(env_name, worker_id=1, no_graphics=True)
+    env = UnityToGymWrapper(u_env, use_visual=False)

    try:
        # Examine environment parameters
    """

    try:
-        env1 = UnityEnv(env_name, worker_id=1, use_visual=False, no_graphics=True)
+        env1 = UnityToGymWrapper(
+            UnityEnvironment(env_name, worker_id=1, no_graphics=True), use_visual=False
+        )
-        env1 = UnityEnv(env_name, worker_id=1, use_visual=False, no_graphics=True)
-        env2 = UnityEnv(env_name, worker_id=2, use_visual=False, no_graphics=True)
+        env1 = UnityToGymWrapper(
+            UnityEnvironment(env_name, worker_id=1, no_graphics=True), use_visual=False
+        )
+        env2 = UnityToGymWrapper(
+            UnityEnvironment(env_name, worker_id=2, no_graphics=True), use_visual=False
+        )
        env2.reset()
    finally:
        env1.close()