Replaced get_behavior_names and get_behavior_spec with behavior_specs property (#3946)

* Replaced get_behavior_names and get_behavior_spec with behavior_specs property * Fixing the test * [ci] * addressing some comments * use typing.Mapping (#3948) * Update ml-agents-envs/mlagents_envs/base_env.py Co-authored-by: Chris Elion <chris.elion@unity3d.com> * Adding the documentation Co-authored-by: Chris Elion <chris.elion@unity3d.com>
5 年前 · c6ed3789
--- a/com.unity.ml-agents/CHANGELOG.md
+++ b/com.unity.ml-agents/CHANGELOG.md
 #### ml-agents / ml-agents-envs / gym-unity (Python)
 - `max_step` in the `TerminalStep` and `TerminalSteps` objects was renamed `interrupted`.
 - `beta` and `epsilon` in `PPO` are no longer decayed by default but follow the same schedule as learning rate. (#3940)
+- `get_behavior_names()` and `get_behavior_spec()` on UnityEnvironment were replaced by the `behavior_specs` property. (#3946)
 ### Minor Changes
 #### com.unity.ml-agents (C#)
 #### ml-agents / ml-agents-envs / gym-unity (Python)
--- a/docs/Migrating.md
+++ b/docs/Migrating.md
 - Trainer configuration, curriculum configuration, and parameter randomization
  configuration have all been moved to a single YAML file. (#3791)
 - `max_step` in the `TerminalStep` and `TerminalSteps` objects was renamed `interrupted`.
+- On the UnityEnvironment API, `get_behavior_names()` and `get_behavior_specs()` methods were combined into the property `behavior_specs` that contains a mapping from behavior names to behavior spec.

 ### Steps to Migrate
 - Before upgrading, copy your `Behavior Name` sections from `trainer_config.yaml` into
  the contents of the sampler config to `parameter_randomization` in the main trainer configuration.
 - If you are using `UnityEnvironment` directly, replace `max_step` with `interrupted`
 in the `TerminalStep` and `TerminalSteps` objects.
+ - Replace usage of `get_behavior_names()` and `get_behavior_specs()` in UnityEnvironment with `behavior_specs`.

 ## Migrating from 0.15 to Release 1

--- a/docs/Python-API.md
+++ b/docs/Python-API.md
 env = UnityEnvironment(file_name="3DBall", seed=1, side_channels=[])
 # Start interacting with the evironment.
 env.reset()
-behavior_names = env.get_behavior_names()
+behavior_names = env.behavior_spec.keys()
 ...
 ```
 **NOTE:** Please read [Interacting with a Unity Environment](#interacting-with-a-unity-environment)
  act.
 - **Close : `env.close()`** Sends a shutdown signal to the environment and
  terminates the communication.
- **Get Behavior Names : `env.get_behavior_names()`** Returns a list of
-  `BehaviorName`. Note that the number of groups can change over time in the
-  simulation if new Agent behaviors are created in the simulation.
- **Get Behavior Spec : `env.get_behavior_spec(behavior_name: str)`** Returns
-  the `BehaviorSpec` corresponding to the behavior_name given as input. A
-  `BehaviorSpec` contains information such as the observation shapes, the action
-  type (multi-discrete or continuous) and the action shape. Note that the
-  `BehaviorSpec` for a specific group is fixed throughout the simulation.
+- **Behavior Specs : `env.behavior_specs`** Returns a Mapping of
+  `BehaviorName` to `BehaviorSpec` objects (read only).
+  A `BehaviorSpec` contains information such as the observation shapes, the
+  action type (multi-discrete or continuous) and the action shape. Note that
+  the `BehaviorSpec` for a specific group is fixed throughout the simulation.
+  The number of entries in the Mapping can change over time in the simulation
+  if new Agent behaviors are created in the simulation.
 - **Get Steps : `env.get_steps(behavior_name: str)`** Returns a tuple
  `DecisionSteps, TerminalSteps` corresponding to the behavior_name given as
  input. The `DecisionSteps` contains information about the state of the agents
--- a/gym-unity/gym_unity/envs/init.py
+++ b/gym-unity/gym_unity/envs/init.py
        self._env = unity_env

        # Take a single step so that the brain information will be sent over
-        if not self._env.get_behavior_names():
+        if not self._env.behavior_specs:
            self._env.step()

        self.visual_obs = None
        self._allow_multiple_visual_obs = allow_multiple_visual_obs

        # Check brain configuration
-        if len(self._env.get_behavior_names()) != 1:
+        if len(self._env.behavior_specs) != 1:
-        self.name = self._env.get_behavior_names()[0]
-        self.group_spec = self._env.get_behavior_spec(self.name)
+        self.name = list(self._env.behavior_specs.keys())[0]
+        self.group_spec = self._env.behavior_specs[self.name]

        if use_visual and self._get_n_vis_obs() == 0:
            raise UnityGymException(
--- a/gym-unity/gym_unity/tests/test_gym.py
+++ b/gym-unity/gym_unity/tests/test_gym.py
    ActionType,
    DecisionSteps,
    TerminalSteps,
+    BehaviorMapping,
 )


    setup_mock_unityenvironment(
        mock_env, mock_spec, mock_decision_step, mock_terminal_step
    )
-
    env = UnityToGymWrapper(mock_env, use_visual=False)
    assert isinstance(env, UnityToGymWrapper)
    assert isinstance(env.reset(), np.ndarray)
    :Mock mock_decision: A DecisionSteps object that will be returned at each step and reset.
    :Mock mock_termination: A TerminationSteps object that will be returned at each step and reset.
    """
-    mock_env.get_behavior_names.return_value = ["MockBrain"]
-    mock_env.get_behavior_spec.return_value = mock_spec
+    mock_env.behavior_specs = BehaviorMapping({"MockBrain": mock_spec})
    mock_env.get_steps.return_value = (mock_decision, mock_termination)
--- a/ml-agents-envs/mlagents_envs/base_env.py
+++ b/ml-agents-envs/mlagents_envs/base_env.py

 from abc import ABC, abstractmethod
 from collections.abc import Mapping
-from typing import List, NamedTuple, Tuple, Optional, Union, Dict, Iterator, Any
+from typing import (
+    List,
+    NamedTuple,
+    Tuple,
+    Optional,
+    Union,
+    Dict,
+    Iterator,
+    Any,
+    Mapping as MappingType,
+)
 import numpy as np
 from enum import Enum

            return np.zeros((n_agents, self.action_size), dtype=np.float32)


+class BehaviorMapping(Mapping):
+    def __init__(self, specs: Dict[BehaviorName, BehaviorSpec]):
+        self._dict = specs
+
+    def __len__(self) -> int:
+        return len(self._dict)
+
+    def __getitem__(self, behavior: BehaviorName) -> BehaviorSpec:
+        return self._dict[behavior]
+
+    def __iter__(self) -> Iterator[Any]:
+        yield from self._dict
+
+
 class BaseEnv(ABC):
    @abstractmethod
    def step(self) -> None:
        """
        pass

+    @property
-    def get_behavior_names(self) -> List[BehaviorName]:
+    def behavior_specs(self) -> MappingType[str, BehaviorSpec]:
-        Returns the list of the behavior names present in the environment.
+        Returns a Mapping from behavior names to behavior specs.
-        This list can grow with time as new policies are instantiated.
-        :return: the list of agent BehaviorName.
+        Note that new keys can be added to this mapping as new policies are instantiated.
-        pass

    @abstractmethod
    def set_actions(self, behavior_name: BehaviorName, action: np.ndarray) -> None:
         episode terminated last step.
        """
        pass
-
-    @abstractmethod
-    def get_behavior_spec(self, behavior_name: BehaviorName) -> BehaviorSpec:
-        """
-        Get the BehaviorSpec corresponding to the behavior name
-        :param behavior_name: The name of the behavior the agents are part of
-        :return: A BehaviorSpec corresponding to that behavior
-        """
-        pass
--- a/ml-agents-envs/mlagents_envs/environment.py
+++ b/ml-agents-envs/mlagents_envs/environment.py
 import numpy as np
 import os
 import subprocess
-from typing import Dict, List, Optional, Any, Tuple
+from typing import Dict, List, Optional, Any, Tuple, Mapping as MappingType

 import mlagents_envs

    BehaviorSpec,
    BehaviorName,
    AgentId,
+    BehaviorMapping,
 )
 from mlagents_envs.timers import timed, hierarchical_timer
 from mlagents_envs.exception import (
        self._update_state(rl_output)
        self._env_actions.clear()

-    def get_behavior_names(self):
-        return list(self._env_specs.keys())
+    @property
+    def behavior_specs(self) -> MappingType[str, BehaviorSpec]:
+        return BehaviorMapping(self._env_specs)

    def _assert_behavior_exists(self, behavior_name: str) -> None:
        if behavior_name not in self._env_specs:
    ) -> Tuple[DecisionSteps, TerminalSteps]:
        self._assert_behavior_exists(behavior_name)
        return self._env_state[behavior_name]
-
-    def get_behavior_spec(self, behavior_name: BehaviorName) -> BehaviorSpec:
-        self._assert_behavior_exists(behavior_name)
-        return self._env_specs[behavior_name]

    def close(self):
        """
--- a/ml-agents-envs/mlagents_envs/tests/test_envs.py
+++ b/ml-agents-envs/mlagents_envs/tests/test_envs.py
        discrete_action=False, visual_inputs=0
    )
    env = UnityEnvironment(" ")
-    assert env.get_behavior_names() == ["RealFakeBrain"]
+    assert list(env.behavior_specs.keys()) == ["RealFakeBrain"]
    env.close()


        discrete_action=False, visual_inputs=0
    )
    env = UnityEnvironment(" ")
-    spec = env.get_behavior_spec("RealFakeBrain")
+    spec = env.behavior_specs["RealFakeBrain"]
    env.reset()
    decision_steps, terminal_steps = env.get_steps("RealFakeBrain")
    env.close()
        discrete_action=False, visual_inputs=0
    )
    env = UnityEnvironment(" ")
-    spec = env.get_behavior_spec("RealFakeBrain")
+    spec = env.behavior_specs["RealFakeBrain"]
    env.step()
    decision_steps, terminal_steps = env.get_steps("RealFakeBrain")
    n_agents = len(decision_steps)
--- a/ml-agents/mlagents/trainers/simple_env_manager.py
+++ b/ml-agents/mlagents/trainers/simple_env_manager.py
    @property
    def external_brains(self) -> Dict[BehaviorName, BrainParameters]:
        result = {}
-        for brain_name in self.env.get_behavior_names():
-            result[brain_name] = behavior_spec_to_brain_parameters(
-                brain_name, self.env.get_behavior_spec(brain_name)
+        for behavior_name, behavior_spec in self.env.behavior_specs.items():
+            result[behavior_name] = behavior_spec_to_brain_parameters(
+                behavior_name, behavior_spec
            )
        return result


    def _generate_all_results(self) -> AllStepResult:
        all_step_result: AllStepResult = {}
-        for brain_name in self.env.get_behavior_names():
+        for brain_name in self.env.behavior_specs:
            all_step_result[brain_name] = self.env.get_steps(brain_name)
        return all_step_result
--- a/ml-agents/mlagents/trainers/subprocess_env_manager.py
+++ b/ml-agents/mlagents/trainers/subprocess_env_manager.py

    def _generate_all_results() -> AllStepResult:
        all_step_result: AllStepResult = {}
-        for brain_name in env.get_behavior_names():
+        for brain_name in env.behavior_specs:
-        for brain_name in env.get_behavior_names():
-            result[brain_name] = behavior_spec_to_brain_parameters(
-                brain_name, env.get_behavior_spec(brain_name)
+        for behavior_name, behavior_specs in env.behavior_specs.items():
+            result[behavior_name] = behavior_spec_to_brain_parameters(
+                behavior_name, behavior_specs
            )
        return result

--- a/ml-agents/mlagents/trainers/tests/simple_test_envs.py
+++ b/ml-agents/mlagents/trainers/tests/simple_test_envs.py
    DecisionSteps,
    TerminalSteps,
    ActionType,
+    BehaviorMapping,
 )
 from mlagents_envs.tests.test_rpc_utils import proto_from_steps_and_action
 from mlagents_envs.communicator_objects.agent_info_action_pair_pb2 import (
            obs.append(np.ones((1,) + self.vis_obs_size, dtype=np.float32) * value)
        return obs

-    def get_behavior_names(self):
-        return self.names
-
-    def get_behavior_spec(self, behavior_name):
-        return self.behavior_spec
+    @property
+    def behavior_specs(self):
+        behavior_dict = {}
+        for n in self.names:
+            behavior_dict[n] = self.behavior_spec
+        return BehaviorMapping(behavior_dict)

    def set_action_for_agent(self, behavior_name, agent_id, action):
        pass
--- a/ml-agents/tests/yamato/scripts/run_llapi.py
+++ b/ml-agents/tests/yamato/scripts/run_llapi.py
        env.reset()

        # Set the default brain to work with
-        group_name = env.get_behavior_names()[0]
-        group_spec = env.get_behavior_spec(group_name)
+        group_name = list(env.behavior_specs.keys())[0]
+        group_spec = env.behavior_specs[group_name]

        # Set the time scale of the engine
        engine_configuration_channel.set_configuration_parameters(time_scale=3.0)