Merge pull request #4750 from Unity-Technologies/gc-onehot

AddOneHotGoal
Simpler solution
--- a/protobuf-definitions/proto/mlagents_envs/communicator_objects/observation.proto
+++ b/protobuf-definitions/proto/mlagents_envs/communicator_objects/observation.proto
    PNG = 1;
 }

+enum SensorTypeProto {
+    OBSERVATION = 0;
+    GOAL = 1;
+    REWARD = 2;
+}
+
 message ObservationProto {
    message FloatData {
        repeated float data = 1;
        FloatData float_data = 4;
    }
    repeated int32 compressed_channel_mapping = 5;
+    SensorTypeProto sensor_type = 6;
 }
--- a/gym-unity/gym_unity/tests/test_gym.py
+++ b/gym-unity/gym_unity/tests/test_gym.py
 from mlagents_envs.base_env import (
    BehaviorSpec,
    ActionSpec,
+    SensorType,
    DecisionSteps,
    TerminalSteps,
    BehaviorMapping,
    obs_shapes = [(vector_observation_space_size,)]
    for _ in range(number_visual_observations):
        obs_shapes += [(8, 8, 3)]
-    return BehaviorSpec(obs_shapes, action_spec)
+    sensor_types = [SensorType.OBSERVATION for _ in range(len(obs_shapes))]
+    return BehaviorSpec(obs_shapes, sensor_types, action_spec)


 def create_mock_vector_steps(specs, num_agents=1, number_visual_observations=0):
--- a/com.unity.ml-agents.extensions/Runtime/Match3/Match3Sensor.cs
+++ b/com.unity.ml-agents.extensions/Runtime/Match3/Match3Sensor.cs
        }

        /// <inheritdoc/>
+        public virtual SensorType GetSensorType()
+        {
+            return SensorType.Observation;
+        }
+
+        /// <inheritdoc/>
        public int Write(ObservationWriter writer)
        {
            if (m_Board.Rows != m_Rows || m_Board.Columns != m_Columns || m_Board.NumCellTypes != m_NumCellTypes)
--- a/com.unity.ml-agents.extensions/Runtime/Sensors/GridSensor.cs
+++ b/com.unity.ml-agents.extensions/Runtime/Sensors/GridSensor.cs
        }

        /// <inheritdoc/>
+        public virtual SensorType GetSensorType()
+        {
+            return SensorType.Observation;
+        }
+
+        /// <inheritdoc/>
        public int Write(ObservationWriter writer)
        {
            using (TimerStack.Instance.Scoped("GridSensor.WriteToTensor"))
--- a/com.unity.ml-agents.extensions/Runtime/Sensors/PhysicsBodySensor.cs
+++ b/com.unity.ml-agents.extensions/Runtime/Sensors/PhysicsBodySensor.cs
        }

        /// <inheritdoc/>
+        public virtual SensorType GetSensorType()
+        {
+            return SensorType.Observation;
+        }
+
+        /// <inheritdoc/>
        public void Update()
        {
            if (m_Settings.UseModelSpace)
--- a/ml-agents/mlagents/trainers/tests/mock_brain.py
+++ b/ml-agents/mlagents/trainers/tests/mock_brain.py
    TerminalSteps,
    BehaviorSpec,
    ActionSpec,
+    SensorType,
    ActionTuple,
 )

    obs_list = []
    for _shape in observation_shapes:
        obs_list.append(np.ones((num_agents,) + _shape, dtype=np.float32))
+    sensor_types = [SensorType.OBSERVATION for i in range(len(obs_list))]
    action_mask = None
    if action_spec.is_discrete():
        action_mask = [
    reward = np.array(num_agents * [1.0], dtype=np.float32)
    interrupted = np.array(num_agents * [False], dtype=np.bool)
    agent_id = np.arange(num_agents, dtype=np.int32)
-    behavior_spec = BehaviorSpec(observation_shapes, action_spec)
+    behavior_spec = BehaviorSpec(observation_shapes, sensor_types, action_spec)
    if done:
        return (
            DecisionSteps.empty(behavior_spec),
    else:
        action_spec = ActionSpec.create_continuous(vector_action_space)
    behavior_spec = BehaviorSpec(
-        [(84, 84, 3)] * int(use_visual) + [(vector_obs_space,)], action_spec
+        [(84, 84, 3)] * int(use_visual) + [(vector_obs_space,)],
+        [SensorType.OBSERVATION],
+        action_spec,
    )
    return behavior_spec

--- a/ml-agents/mlagents/trainers/tests/tensorflow/test_models.py
+++ b/ml-agents/mlagents/trainers/tests/tensorflow/test_models.py

 from mlagents.trainers.tf.models import ModelUtils
 from mlagents.tf_utils import tf
-from mlagents_envs.base_env import BehaviorSpec, ActionSpec
+from mlagents_envs.base_env import BehaviorSpec, ActionSpec, SensorType
+    obs_shapes = [(84, 84, 3)] * int(num_visual) + [(vector_size,)] * int(num_vector)
+    sensor_types = [SensorType.OBSERVATION for _ in range(len(obs_shapes))]
-        [(84, 84, 3)] * int(num_visual) + [(vector_size,)] * int(num_vector),
-        ActionSpec.create_discrete((1,)),
+        obs_shapes, sensor_types, ActionSpec.create_discrete((1,))
    )
    return behavior_spec

--- a/ml-agents/mlagents/trainers/tests/tensorflow/test_tf_policy.py
+++ b/ml-agents/mlagents/trainers/tests/tensorflow/test_tf_policy.py
 from unittest.mock import MagicMock
 from mlagents.trainers.settings import TrainerSettings
 import numpy as np
-from mlagents_envs.base_env import ActionSpec
+from mlagents_envs.base_env import ActionSpec, SensorType
-    dummy_groupspec = BehaviorSpec([(1,)], dummy_actionspec)
+    dummy_groupspec = BehaviorSpec([(1,)], [SensorType.OBSERVATION], dummy_actionspec)
    return dummy_groupspec


--- a/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_extrinsic.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_extrinsic.py
    ExtrinsicRewardProvider,
    create_reward_provider,
 )
-from mlagents_envs.base_env import BehaviorSpec, ActionSpec
+from mlagents_envs.base_env import BehaviorSpec, ActionSpec, SensorType
 from mlagents.trainers.settings import RewardSignalSettings, RewardSignalType
 from mlagents.trainers.tests.torch.test_reward_providers.utils import (
    create_agent_buffer,
@pytest.mark.parametrize(
    "behavior_spec",
    [
-        BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS),
-        BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
+        BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_CONTINUOUS),
+        BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_TWODISCRETE),
    ],
 )
 def test_construction(behavior_spec: BehaviorSpec) -> None:
@pytest.mark.parametrize(
    "behavior_spec",
    [
-        BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS),
-        BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
+        BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_CONTINUOUS),
+        BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_TWODISCRETE),
    ],
 )
 def test_factory(behavior_spec: BehaviorSpec) -> None:
@pytest.mark.parametrize(
    "behavior_spec",
    [
-        BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS),
-        BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
+        BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_CONTINUOUS),
+        BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_TWODISCRETE),
    ],
 )
 def test_reward(behavior_spec: BehaviorSpec, reward: float) -> None:
--- a/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py
    GAILRewardProvider,
    create_reward_provider,
 )
-from mlagents_envs.base_env import BehaviorSpec, ActionSpec
+from mlagents_envs.base_env import BehaviorSpec, ActionSpec, SensorType
 from mlagents.trainers.settings import GAILSettings, RewardSignalType
 from mlagents.trainers.tests.torch.test_reward_providers.utils import (
    create_agent_buffer,
 ACTIONSPEC_DISCRETE = ActionSpec.create_discrete((20,))


-@pytest.mark.parametrize("behavior_spec", [BehaviorSpec([(8,)], ACTIONSPEC_CONTINUOUS)])
+@pytest.mark.parametrize(
+    "behavior_spec",
+    [BehaviorSpec([(8,)], [SensorType.OBSERVATION], ACTIONSPEC_CONTINUOUS)],
+)
 def test_construction(behavior_spec: BehaviorSpec) -> None:
    gail_settings = GAILSettings(demo_path=CONTINUOUS_PATH)
    gail_rp = GAILRewardProvider(behavior_spec, gail_settings)
-@pytest.mark.parametrize("behavior_spec", [BehaviorSpec([(8,)], ACTIONSPEC_CONTINUOUS)])
+@pytest.mark.parametrize(
+    "behavior_spec",
+    [BehaviorSpec([(8,)], [SensorType.OBSERVATION], ACTIONSPEC_CONTINUOUS)],
+)
 def test_factory(behavior_spec: BehaviorSpec) -> None:
    gail_settings = GAILSettings(demo_path=CONTINUOUS_PATH)
    gail_rp = create_reward_provider(
@pytest.mark.parametrize(
    "behavior_spec",
    [
-        BehaviorSpec([(8,), (24, 26, 1)], ACTIONSPEC_CONTINUOUS),
-        BehaviorSpec([(50,)], ACTIONSPEC_FOURDISCRETE),
-        BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE),
+        BehaviorSpec(
+            [(8,), (24, 26, 1)],
+            [SensorType.OBSERVATION, SensorType.OBSERVATION],
+            ACTIONSPEC_CONTINUOUS,
+        ),
+        BehaviorSpec([(50,)], [SensorType.OBSERVATION], ACTIONSPEC_FOURDISCRETE),
+        BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_DISCRETE),
    ],
 )
@pytest.mark.parametrize("use_actions", [False, True])
@pytest.mark.parametrize(
    "behavior_spec",
    [
-        BehaviorSpec([(8,), (24, 26, 1)], ACTIONSPEC_CONTINUOUS),
-        BehaviorSpec([(50,)], ACTIONSPEC_FOURDISCRETE),
-        BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE),
+        BehaviorSpec(
+            [(8,), (24, 26, 1)],
+            [SensorType.OBSERVATION, SensorType.OBSERVATION],
+            ACTIONSPEC_CONTINUOUS,
+        ),
+        BehaviorSpec([(50,)], [SensorType.OBSERVATION], ACTIONSPEC_FOURDISCRETE),
+        BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_DISCRETE),
    ],
 )
@pytest.mark.parametrize("use_actions", [False, True])
--- a/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_rnd.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_rnd.py
    RNDRewardProvider,
    create_reward_provider,
 )
-from mlagents_envs.base_env import BehaviorSpec, ActionSpec
+from mlagents_envs.base_env import BehaviorSpec, ActionSpec, SensorType
 from mlagents.trainers.settings import RNDSettings, RewardSignalType
 from mlagents.trainers.tests.torch.test_reward_providers.utils import (
    create_agent_buffer,
@pytest.mark.parametrize(
    "behavior_spec",
    [
-        BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS),
-        BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
+        BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_CONTINUOUS),
+        BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_TWODISCRETE),
    ],
 )
 def test_construction(behavior_spec: BehaviorSpec) -> None:
@pytest.mark.parametrize(
    "behavior_spec",
    [
-        BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS),
-        BehaviorSpec([(10,), (64, 66, 3), (84, 86, 1)], ACTIONSPEC_CONTINUOUS),
-        BehaviorSpec([(10,), (64, 66, 1)], ACTIONSPEC_TWODISCRETE),
-        BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE),
+        BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_CONTINUOUS),
+        BehaviorSpec(
+            [(10,), (64, 66, 3), (84, 86, 1)],
+            [SensorType.OBSERVATION, SensorType.OBSERVATION, SensorType.OBSERVATION],
+            ACTIONSPEC_CONTINUOUS,
+        ),
+        BehaviorSpec(
+            [(10,), (64, 66, 1)],
+            [SensorType.OBSERVATION, SensorType.OBSERVATION],
+            ACTIONSPEC_TWODISCRETE,
+        ),
+        BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_DISCRETE),
    ],
 )
 def test_factory(behavior_spec: BehaviorSpec) -> None:
@pytest.mark.parametrize(
    "behavior_spec",
    [
-        BehaviorSpec([(10,), (64, 66, 3), (24, 26, 1)], ACTIONSPEC_CONTINUOUS),
-        BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
-        BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE),
+        BehaviorSpec(
+            [(10,), (64, 66, 3), (24, 26, 1)],
+            [SensorType.OBSERVATION, SensorType.OBSERVATION, SensorType.OBSERVATION],
+            ACTIONSPEC_CONTINUOUS,
+        ),
+        BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_TWODISCRETE),
+        BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_DISCRETE),
    ],
 )
 def test_reward_decreases(behavior_spec: BehaviorSpec, seed: int) -> None:
--- a/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py
    CuriosityRewardProvider,
    create_reward_provider,
 )
-from mlagents_envs.base_env import BehaviorSpec, ActionSpec
+from mlagents_envs.base_env import BehaviorSpec, ActionSpec, SensorType
 from mlagents.trainers.settings import CuriositySettings, RewardSignalType
 from mlagents.trainers.tests.torch.test_reward_providers.utils import (
    create_agent_buffer,
@pytest.mark.parametrize(
    "behavior_spec",
    [
-        BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS),
-        BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
+        BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_CONTINUOUS),
+        BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_TWODISCRETE),
    ],
 )
 def test_construction(behavior_spec: BehaviorSpec) -> None:
@pytest.mark.parametrize(
    "behavior_spec",
    [
-        BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS),
-        BehaviorSpec([(10,), (64, 66, 3), (84, 86, 1)], ACTIONSPEC_CONTINUOUS),
-        BehaviorSpec([(10,), (64, 66, 1)], ACTIONSPEC_TWODISCRETE),
-        BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE),
+        BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_CONTINUOUS),
+        BehaviorSpec(
+            [(10,), (64, 66, 3), (84, 86, 1)],
+            [SensorType.OBSERVATION, SensorType.OBSERVATION, SensorType.OBSERVATION],
+            ACTIONSPEC_CONTINUOUS,
+        ),
+        BehaviorSpec(
+            [(10,), (64, 66, 1)],
+            [SensorType.OBSERVATION, SensorType.OBSERVATION],
+            ACTIONSPEC_TWODISCRETE,
+        ),
+        BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_DISCRETE),
    ],
 )
 def test_factory(behavior_spec: BehaviorSpec) -> None:
@pytest.mark.parametrize(
    "behavior_spec",
    [
-        BehaviorSpec([(10,), (64, 66, 3), (24, 26, 1)], ACTIONSPEC_CONTINUOUS),
-        BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
-        BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE),
+        BehaviorSpec(
+            [(10,), (64, 66, 3), (24, 26, 1)],
+            [SensorType.OBSERVATION, SensorType.OBSERVATION, SensorType.OBSERVATION],
+            ACTIONSPEC_CONTINUOUS,
+        ),
+        BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_TWODISCRETE),
+        BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_DISCRETE),
    ],
 )
 def test_reward_decreases(behavior_spec: BehaviorSpec, seed: int) -> None:

@pytest.mark.parametrize("seed", SEED)
@pytest.mark.parametrize(
-    "behavior_spec", [BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS)]
+    "behavior_spec",
+    [BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_CONTINUOUS)],
 )
 def test_continuous_action_prediction(behavior_spec: BehaviorSpec, seed: int) -> None:
    np.random.seed(seed)
@pytest.mark.parametrize(
    "behavior_spec",
    [
-        BehaviorSpec([(10,), (64, 66, 3), (24, 26, 1)], ACTIONSPEC_CONTINUOUS),
-        BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
-        BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE),
+        BehaviorSpec(
+            [(10,), (64, 66, 3), (24, 26, 1)],
+            [SensorType.OBSERVATION, SensorType.OBSERVATION, SensorType.OBSERVATION],
+            ACTIONSPEC_CONTINUOUS,
+        ),
+        BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_TWODISCRETE),
+        BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_DISCRETE),
    ],
 )
 def test_next_state_prediction(behavior_spec: BehaviorSpec, seed: int) -> None:
--- a/ml-agents/mlagents/trainers/tests/simple_test_envs.py
+++ b/ml-agents/mlagents/trainers/tests/simple_test_envs.py
    ActionTuple,
    BaseEnv,
    BehaviorSpec,
+    SensorType,
    DecisionSteps,
    TerminalSteps,
    BehaviorMapping,
        self.num_vector = num_vector
        self.vis_obs_size = vis_obs_size
        self.vec_obs_size = vec_obs_size
+        sensor_types = [
+            SensorType.OBSERVATION for _ in range(len(self._make_obs_spec()))
+        ]
        continuous_action_size, discrete_action_size = action_sizes
        discrete_tuple = tuple(2 for _ in range(discrete_action_size))
        action_spec = ActionSpec(continuous_action_size, discrete_tuple)
        self.action_spec = action_spec
-        self.behavior_spec = BehaviorSpec(self._make_obs_spec(), action_spec)
+        self.behavior_spec = BehaviorSpec(
+            self._make_obs_spec(), sensor_types, action_spec
+        )
        self.action_spec = action_spec
        self.names = brain_names
        self.positions: Dict[str, List[float]] = {}
--- a/ml-agents/mlagents/trainers/optimizer/torch_optimizer.py
+++ b/ml-agents/mlagents/trainers/optimizer/torch_optimizer.py
        self, batch: AgentBuffer, next_obs: List[np.ndarray], done: bool
    ) -> Tuple[Dict[str, np.ndarray], Dict[str, float]]:
        vector_obs = [ModelUtils.list_to_tensor(batch["vector_obs"])]
+        goals = [ModelUtils.list_to_tensor(batch["goals"])]
        if self.policy.use_vis_obs:
            visual_obs = []
            for idx, _ in enumerate(

        memory = torch.zeros([1, 1, self.policy.m_size])

-        vec_vis_obs = SplitObservations.from_observations(next_obs)
+        vec_vis_obs = SplitObservations.from_observations(
+            next_obs, self.policy.behavior_spec
+        )
        next_vec_obs = [
            ModelUtils.list_to_tensor(vec_vis_obs.vector_observations).unsqueeze(0)
        ]
        ]

+        # goals dont change but otherwise broken
+        next_goals = [torch.as_tensor(vec_vis_obs.goals)]
-            vector_obs, visual_obs, memory, sequence_length=batch.num_experiences
+            vector_obs, visual_obs, goals, memory, sequence_length=batch.num_experiences
-            next_vec_obs, next_vis_obs, next_memory, sequence_length=1
+            next_vec_obs, next_vis_obs, next_goals, next_memory, sequence_length=1
        )

        for name, estimate in value_estimates.items():
--- a/ml-agents/mlagents/trainers/policy/policy.py
+++ b/ml-agents/mlagents/trainers/policy/policy.py
 from typing import Dict, List, Optional
 import numpy as np

-from mlagents_envs.base_env import ActionTuple, BehaviorSpec, DecisionSteps
+from mlagents_envs.base_env import ActionTuple, BehaviorSpec, DecisionSteps, SensorType
 from mlagents_envs.exception import UnityException

 from mlagents.trainers.action_info import ActionInfo
            else [self.behavior_spec.action_spec.continuous_size]
        )
        self.vec_obs_size = sum(
-            shape[0] for shape in behavior_spec.observation_shapes if len(shape) == 1
+            shape[0]
+            for shape, obs_type in zip(
+                behavior_spec.observation_shapes, behavior_spec.sensor_types
+            )
+            if len(shape) == 1 and obs_type == SensorType.OBSERVATION
        )
        self.vis_obs_size = sum(
            1 for shape in behavior_spec.observation_shapes if len(shape) == 3
--- a/ml-agents/mlagents/trainers/policy/torch_policy.py
+++ b/ml-agents/mlagents/trainers/policy/torch_policy.py
    def _split_decision_step(
        self, decision_requests: DecisionSteps
    ) -> Tuple[SplitObservations, np.ndarray]:
-        vec_vis_obs = SplitObservations.from_observations(decision_requests.obs)
+        vec_vis_obs = SplitObservations.from_observations(
+            decision_requests.obs, self.behavior_spec
+        )
        mask = None
        if self.behavior_spec.action_spec.discrete_size > 0:
            mask = torch.ones([len(decision_requests), np.sum(self.act_size)])
        self,
        vec_obs: List[torch.Tensor],
        vis_obs: List[torch.Tensor],
+        goals: List[torch.Tensor],
        masks: Optional[torch.Tensor] = None,
        memories: Optional[torch.Tensor] = None,
        seq_len: int = 1,
        :return: Tuple of AgentAction, ActionLogProbs, entropies, and output memories.
        """
        actions, log_probs, entropies, _, memories = self.actor_critic.get_action_stats_and_value(
-            vec_obs, vis_obs, masks, memories, seq_len
+            vec_obs, vis_obs, goals, masks, memories, seq_len
        )
        return (actions, log_probs, entropies, memories)

        vis_obs: torch.Tensor,
+        goals: torch.Tensor,
        actions: AgentAction,
        masks: Optional[torch.Tensor] = None,
        memories: Optional[torch.Tensor] = None,
-            vec_obs, vis_obs, actions, masks, memories, seq_len
+            vec_obs, vis_obs, goals, actions, masks, memories, seq_len
        )
        return log_probs, entropies, value_heads

        vis_obs = [
            torch.as_tensor(vis_ob) for vis_ob in vec_vis_obs.visual_observations
        ]
+        goals = [torch.as_tensor(vec_vis_obs.goals)]
        memories = torch.as_tensor(self.retrieve_memories(global_agent_ids)).unsqueeze(
            0
        )
            action, log_probs, entropy, memories = self.sample_actions(
-                vec_obs, vis_obs, masks=masks, memories=memories
+                vec_obs, vis_obs, goals, masks=masks, memories=memories
            )
        action_tuple = action.to_action_tuple()
        run_out["action"] = action_tuple
--- a/ml-agents/mlagents/trainers/ppo/optimizer_torch.py
+++ b/ml-agents/mlagents/trainers/ppo/optimizer_torch.py
            returns[name] = ModelUtils.list_to_tensor(batch[f"{name}_returns"])

        vec_obs = [ModelUtils.list_to_tensor(batch["vector_obs"])]
+        goals = [ModelUtils.list_to_tensor(batch["goals"])]
        act_masks = ModelUtils.list_to_tensor(batch["action_mask"])
        actions = AgentAction.from_dict(batch)

        log_probs, entropy, values = self.policy.evaluate_actions(
            vec_obs,
            vis_obs,
+            goals,
            masks=act_masks,
            actions=actions,
            memories=memories,
            + 0.5 * value_loss
            - decay_bet * ModelUtils.masked_mean(entropy, loss_masks)
        )
-
        # Set optimizer learning rate
        ModelUtils.update_learning_rate(self.optimizer, decay_lr)
        self.optimizer.zero_grad()
--- a/ml-agents/mlagents/trainers/ppo/trainer.py
+++ b/ml-agents/mlagents/trainers/ppo/trainer.py
        super()._process_trajectory(trajectory)
        agent_id = trajectory.agent_id  # All the agents should have the same ID

-        agent_buffer_trajectory = trajectory.to_agentbuffer()
+        agent_buffer_trajectory = trajectory.to_agentbuffer(self.policy.behavior_spec)
        # Update the normalization
        if self.is_training:
            self.policy.update_normalization(agent_buffer_trajectory["vector_obs"])
            self.seed,
            behavior_spec,
            self.trainer_settings,
-            condition_sigma_on_obs=False,  # Faster training for PPO
+            condition_sigma_on_obs=True,  # Faster training for PPO
            separate_critic=True,  # Match network architecture with TF
        )
        return policy
--- a/ml-agents/mlagents/trainers/sac/trainer.py
+++ b/ml-agents/mlagents/trainers/sac/trainer.py
        last_step = trajectory.steps[-1]
        agent_id = trajectory.agent_id  # All the agents should have the same ID

-        agent_buffer_trajectory = trajectory.to_agentbuffer()
+        agent_buffer_trajectory = trajectory.to_agentbuffer(self.policy.behavior_spec)

        # Update the normalization
        if self.is_training:
        # Bootstrap using the last step rather than the bootstrap step if max step is reached.
        # Set last element to duplicate obs and remove dones.
        if last_step.interrupted:
-            vec_vis_obs = SplitObservations.from_observations(last_step.obs)
+            vec_vis_obs = SplitObservations.from_observations(
+                last_step.obs, self.policy.behavior_spec
+            )
            for i, obs in enumerate(vec_vis_obs.visual_observations):
                agent_buffer_trajectory["next_visual_obs%d" % i][-1] = obs
            if vec_vis_obs.vector_observations.size > 1:
--- a/ml-agents/mlagents/trainers/torch/decoders.py
+++ b/ml-agents/mlagents/trainers/torch/decoders.py
 from typing import List, Dict

 from mlagents.torch_utils import torch, nn
-from mlagents.trainers.torch.layers import linear_layer
+from mlagents.trainers.torch.layers import (
+    linear_layer,
+    LinearEncoder,
+    Initialization,
+    Swish,
+)
+
+from collections import defaultdict


 class ValueHeads(nn.Module):
        for stream_name, head in self.value_heads.items():
            value_outputs[stream_name] = head(hidden).squeeze(-1)
        return value_outputs
+
+
+class ValueHeadsHyperNetwork(nn.Module):
+    def __init__(
+        self,
+        num_layers,
+        layer_size,
+        num_goals,
+        stream_names: List[str],
+        input_size: int,
+        output_size: int = 1,
+    ):
+        super().__init__()
+        self.stream_names = stream_names
+        self._num_goals = num_goals
+        self.input_size = input_size
+        self.output_size = output_size
+        self.streams_size = len(stream_names)
+        layers = []
+        layers.append(
+            linear_layer(
+                num_goals,
+                layer_size,
+                kernel_init=Initialization.KaimingHeNormal,
+                kernel_gain=1.0,
+                bias_init=Initialization.Zero,
+            )
+        )
+        layers.append(Swish())
+        for _ in range(num_layers - 1):
+            layers.append(
+                linear_layer(
+                    layer_size,
+                    layer_size,
+                    kernel_init=Initialization.KaimingHeNormal,
+                    kernel_gain=1.0,
+                    bias_init=Initialization.Zero,
+                )
+            )
+            layers.append(Swish())
+        flat_output = linear_layer(
+            layer_size,
+            input_size * output_size * self.streams_size
+            + self.output_size * self.streams_size,
+            kernel_init=Initialization.KaimingHeNormal,
+            kernel_gain=0.1,
+            bias_init=Initialization.Zero,
+        )
+        self.hypernet = torch.nn.Sequential(*layers, flat_output)
+
+    def forward(
+        self, hidden: torch.Tensor, goal: torch.Tensor
+    ) -> Dict[str, torch.Tensor]:
+        goal_onehot = torch.nn.functional.one_hot(
+            goal[0].long(), self._num_goals
+        ).float()
+        # (b, i * o * streams + o * streams)
+        flat_output_weights = self.hypernet(goal_onehot)
+        b = hidden.size(0)
+
+        output_weights, output_bias = torch.split(
+            flat_output_weights,
+            self.streams_size * self.input_size * self.output_size,
+            dim=-1,
+        )
+        output_weights = torch.reshape(
+            output_weights, (self.streams_size, b, self.input_size, self.output_size)
+        )
+        output_bias = torch.reshape(
+            output_bias, (self.streams_size, b, self.output_size)
+        )
+        output_bias = output_bias.unsqueeze(dim=2)
+        value_outputs = {}
+        for stream_name, out_w, out_b in zip(
+            self.stream_names, output_weights, output_bias
+        ):
+            inp_out_w = torch.bmm(hidden.unsqueeze(dim=1), out_w)
+            inp_out_w_out_b = inp_out_w + out_b
+            value_outputs[stream_name] = inp_out_w_out_b.squeeze()
+        return value_outputs
--- a/ml-agents/mlagents/trainers/torch/distributions.py
+++ b/ml-agents/mlagents/trainers/torch/distributions.py
 from mlagents.torch_utils import torch, nn
 import numpy as np
 import math
-from mlagents.trainers.torch.layers import linear_layer, Initialization
+from mlagents.trainers.torch.layers import (
+    linear_layer,
+    Initialization,
+    LinearEncoder,
+    Swish,
+)
+from mlagents.trainers.torch.utils import ModelUtils

 EPSILON = 1e-7  # Small value to avoid divide by zero

            # torch.cat here instead of torch.expand() becuase it is not supported in the
            # verified version of Barracuda (1.0.2).
            log_sigma = torch.cat([self.log_sigma] * inputs.shape[0], axis=0)
+        if self.tanh_squash:
+            return TanhGaussianDistInstance(mu, torch.exp(log_sigma))
+        else:
+            return GaussianDistInstance(mu, torch.exp(log_sigma))
+
+
+class GaussianHyperNetwork(nn.Module):
+    def __init__(
+        self,
+        num_layers,
+        layer_size,
+        hidden_size,
+        num_outputs,
+        conditional_sigma,
+        tanh_squash,
+        num_goals,
+    ):
+        super().__init__()
+        self._num_goals = num_goals
+        self.hidden_size = hidden_size
+        self.tanh_squash = tanh_squash
+        self.conditional_sigma = conditional_sigma
+        self.num_outputs = num_outputs
+        layers = []
+        layers.append(
+            linear_layer(
+                num_goals,
+                layer_size,
+                kernel_init=Initialization.KaimingHeNormal,
+                kernel_gain=0.1,
+                bias_init=Initialization.Zero,
+            )
+        )
+        layers.append(Swish())
+        for _ in range(num_layers - 1):
+            layers.append(
+                linear_layer(
+                    layer_size,
+                    layer_size,
+                    kernel_init=Initialization.KaimingHeNormal,
+                    kernel_gain=0.1,
+                    bias_init=Initialization.Zero,
+                )
+            )
+            layers.append(Swish())
+        if conditional_sigma:
+            flat_output = linear_layer(
+                layer_size,
+                2 * (hidden_size * num_outputs + num_outputs),
+                kernel_init=Initialization.KaimingHeNormal,
+                kernel_gain=0.1,
+                bias_init=Initialization.Zero,
+            )
+            self._log_sigma_w = None
+        else:
+            flat_output = linear_layer(
+                layer_size,
+                hidden_size * num_outputs + num_outputs,
+                kernel_init=Initialization.KaimingHeNormal,
+                kernel_gain=0.1,
+                bias_init=Initialization.Zero,
+            )
+            self._log_sigma_w = linear_layer(
+                num_goals,
+                num_outputs,
+                kernel_init=Initialization.KaimingHeNormal,
+                kernel_gain=0.1,
+                bias_init=Initialization.Zero,
+            )
+        self.hypernet = torch.nn.Sequential(*layers, flat_output)
+
+    def forward(self, inputs: torch.Tensor, goal: torch.Tensor):
+        goal_onehot = torch.nn.functional.one_hot(
+            goal[0].long(), self._num_goals
+        ).float()
+
+        # cond (b, 2 * H * O + O
+        # not cond (b, H * O + O
+        flat_output_weights = self.hypernet(goal_onehot)
+        b = inputs.size(0)
+        inputs = inputs.unsqueeze(dim=1)
+        if self.conditional_sigma:
+            mu_w_log_sigma_w, mu_b, log_sigma_b = torch.split(
+                flat_output_weights,
+                [
+                    2 * self.hidden_size * self.num_outputs,
+                    self.num_outputs,
+                    self.num_outputs,
+                ],
+                dim=-1,
+            )
+            mu_w_log_sigma_w = torch.reshape(
+                mu_w_log_sigma_w, (b, 2 * self.hidden_size, self.num_outputs)
+            )
+
+            mu_w, log_sigma_w = torch.split(mu_w_log_sigma_w, self.hidden_size, dim=1)
+            log_sigma = torch.bmm(inputs, log_sigma_w)
+            log_sigma = log_sigma + log_sigma_b
+            log_sigma = log_sigma.squeeze()
+            log_sigma = torch.clamp(log_sigma, min=-20, max=2)
+        else:
+            mu_w, mu_b = torch.split(
+                flat_output_weights, self.hidden_size * self.num_outputs, dim=-1
+            )
+            mu_w = torch.reshape(mu_w, (b, self.hidden_size, self.num_outputs))
+            log_sigma = self._log_sigma_w(goal_onehot)
+            log_sigma = torch.squeeze(log_sigma)
+
+        mu = torch.bmm(inputs, mu_w)
+        mu = mu + mu_b
+        mu = mu.squeeze()
        if self.tanh_squash:
            return TanhGaussianDistInstance(mu, torch.exp(log_sigma))
        else:
--- a/ml-agents/mlagents/trainers/torch/action_model.py
+++ b/ml-agents/mlagents/trainers/torch/action_model.py
    DistInstance,
    DiscreteDistInstance,
    GaussianDistribution,
+    GaussianHyperNetwork,
    MultiCategoricalDistribution,
 )
 from mlagents.trainers.torch.agent_action import AgentAction
        self._discrete_distribution = None

        if self.action_spec.continuous_size > 0:
-            self._continuous_distribution = GaussianDistribution(
-                self.encoding_size,
-                self.action_spec.continuous_size,
+            self._continuous_distribution = GaussianHyperNetwork(
+                num_layers=1,
+                layer_size=256,
+                hidden_size=self.encoding_size,
+                num_outputs=self.action_spec.continuous_size,
+                num_goals=2,
            )

        if self.action_spec.discrete_size > 0:
                discrete_action.append(discrete_dist.sample())
        return AgentAction(continuous_action, discrete_action)

-    def _get_dists(self, inputs: torch.Tensor, masks: torch.Tensor) -> DistInstances:
+    def _get_dists(
+        self, inputs: torch.Tensor, masks: torch.Tensor, goal: torch.Tensor
+    ) -> DistInstances:
        """
        Creates a DistInstances tuple using the continuous and discrete distributions
        :params inputs: The encoding from the network body
        discrete_dist: Optional[List[DiscreteDistInstance]] = None
        # This checks None because mypy complains otherwise
        if self._continuous_distribution is not None:
-            continuous_dist = self._continuous_distribution(inputs)
+            continuous_dist = self._continuous_distribution(inputs, goal)
        if self._discrete_distribution is not None:
            discrete_dist = self._discrete_distribution(inputs, masks)
        return DistInstances(continuous_dist, discrete_dist)
        return action_log_probs, entropies

    def evaluate(
-        self, inputs: torch.Tensor, masks: torch.Tensor, actions: AgentAction
+        self,
+        inputs: torch.Tensor,
+        masks: torch.Tensor,
+        actions: AgentAction,
+        goal: torch.Tensor,
    ) -> Tuple[ActionLogProbs, torch.Tensor]:
        """
        Given actions and encoding from the network body, gets the distributions and
        :params actions: The AgentAction
        :return: An ActionLogProbs tuple and a torch tensor of the distribution entropies.
        """
-        dists = self._get_dists(inputs, masks)
+        dists = self._get_dists(inputs, masks, goal)
-    def get_action_out(self, inputs: torch.Tensor, masks: torch.Tensor) -> torch.Tensor:
+    def get_action_out(
+        self, inputs: torch.Tensor, masks: torch.Tensor, goal: torch.Tensor
+    ) -> torch.Tensor:
        """
        Gets the tensors corresponding to the output of the policy network to be used for
        inference. Called by the Actor's forward call.
        """
-        dists = self._get_dists(inputs, masks)
+        dists = self._get_dists(inputs, masks, goal)
        continuous_out, discrete_out, action_out_deprecated = None, None, None
        if self.action_spec.continuous_size > 0 and dists.continuous is not None:
            continuous_out = dists.continuous.exported_model_output()
        return continuous_out, discrete_out, action_out_deprecated

    def forward(
-        self, inputs: torch.Tensor, masks: torch.Tensor
+        self, inputs: torch.Tensor, masks: torch.Tensor, goal: torch.Tensor
    ) -> Tuple[AgentAction, ActionLogProbs, torch.Tensor]:
        """
        The forward method of this module. Outputs the action, log probs,
        :return: Given the input, an AgentAction of the actions generated by the policy and the corresponding
        ActionLogProbs and entropies.
        """
-        dists = self._get_dists(inputs, masks)
+        dists = self._get_dists(inputs, masks, goal)
        actions = self._sample_action(dists)
        log_probs, entropies = self._get_probs_and_entropy(actions, dists)
        # Use the sum of entropy across actions, not the mean
--- a/ml-agents/mlagents/trainers/torch/model_serialization.py
+++ b/ml-agents/mlagents/trainers/torch/model_serialization.py
            for shape in self.policy.behavior_spec.observation_shapes
            if len(shape) == 3
        ]
+        dummy_goals = [torch.zeros(batch_dim + [1])]
        dummy_masks = torch.ones(
            batch_dim + [sum(self.policy.behavior_spec.action_spec.discrete_branches)]
        )

-        self.dummy_input = (dummy_vec_obs, dummy_vis_obs, dummy_masks, dummy_memories)
+        self.dummy_input = (
+            dummy_vec_obs,
+            dummy_vis_obs,
+            dummy_goals,
+            dummy_masks,
+            dummy_memories,
+        )
+            + ["goals"]
            + [f"visual_observation_{i}" for i in range(self.policy.vis_obs_size)]
            + ["action_masks", "memories"]
        )
--- a/ml-agents/mlagents/trainers/torch/networks.py
+++ b/ml-agents/mlagents/trainers/torch/networks.py
 from mlagents.trainers.torch.action_log_probs import ActionLogProbs
 from mlagents.trainers.settings import NetworkSettings
 from mlagents.trainers.torch.utils import ModelUtils
-from mlagents.trainers.torch.decoders import ValueHeads
+from mlagents.trainers.torch.decoders import ValueHeads, ValueHeadsHyperNetwork
 from mlagents.trainers.torch.layers import LSTM, LinearEncoder
 from mlagents.trainers.torch.model_serialization import exporting_to_onnx

            self.vector_processors,
            encoder_input_size,
        ) = ModelUtils.create_input_processors(
-            observation_shapes,
+            observation_shapes[1:],
            self.h_size,
            network_settings.vis_encode_type,
            normalize=self.normalize,
            encoding_size = network_settings.memory.memory_size // 2
        else:
            encoding_size = network_settings.hidden_units
-        self.value_heads = ValueHeads(stream_names, encoding_size, outputs_per_stream)
+        self.value_heads = ValueHeadsHyperNetwork(
+            num_layers=1,
+            layer_size=256,
+            num_goals=2,
+            stream_names=stream_names,
+            input_size=encoding_size,
+            output_size=outputs_per_stream,
+        )

    @property
    def memory_size(self) -> int:
        self,
        vec_inputs: List[torch.Tensor],
        vis_inputs: List[torch.Tensor],
+        goals: List[torch.tensor],
        actions: Optional[torch.Tensor] = None,
        memories: Optional[torch.Tensor] = None,
        sequence_length: int = 1,
        )
-        output = self.value_heads(encoding)
+        output = self.value_heads(encoding, goals)
        return output, memories


        self,
        vec_inputs: List[torch.Tensor],
        vis_inputs: List[torch.Tensor],
+        goals: List[torch.Tensor],
        memories: Optional[torch.Tensor] = None,
        sequence_length: int = 1,
    ) -> Tuple[Dict[str, torch.Tensor], torch.Tensor]:
        self,
        vec_inputs: List[torch.Tensor],
        vis_inputs: List[torch.Tensor],
+        goals: List[torch.Tensor],
        masks: Optional[torch.Tensor] = None,
        memories: Optional[torch.Tensor] = None,
    ) -> Tuple[Union[int, torch.Tensor], ...]:
            cont_action_out,
            disc_action_out,
            action_out_deprecated,
-        ) = self.action_model.get_action_out(encoding, masks)
+        ) = self.action_model.get_action_out(encoding, masks, goals)
        export_out = [
            self.version_number,
            torch.Tensor([self.network_body.memory_size]),
        self,
        vec_inputs: List[torch.Tensor],
        vis_inputs: List[torch.Tensor],
+        goals: List[torch.Tensor],
        memories: Optional[torch.Tensor] = None,
        sequence_length: int = 1,
    ) -> Tuple[Dict[str, torch.Tensor], torch.Tensor]:
        self,
        vec_inputs: List[torch.Tensor],
        vis_inputs: List[torch.Tensor],
+        goals: List[torch.Tensor],
        actions: AgentAction,
        masks: Optional[torch.Tensor] = None,
        memories: Optional[torch.Tensor] = None,
            vec_inputs, vis_inputs, memories=memories, sequence_length=sequence_length
        )
-        log_probs, entropies = self.action_model.evaluate(encoding, masks, actions)
+        log_probs, entropies = self.action_model.evaluate(
+            encoding, masks, actions, goals
+        )
        value_outputs = self.value_heads(encoding)
        return log_probs, entropies, value_outputs

        vis_inputs: List[torch.Tensor],
+        goals: List[torch.Tensor],
        masks: Optional[torch.Tensor] = None,
        memories: Optional[torch.Tensor] = None,
        sequence_length: int = 1,
        encoding, memories = self.network_body(
            vec_inputs, vis_inputs, memories=memories, sequence_length=sequence_length
        )
-        action, log_probs, entropies = self.action_model(encoding, masks)
+        action, log_probs, entropies = self.action_model(encoding, masks, goals)
        value_outputs = self.value_heads(encoding)
        return action, log_probs, entropies, value_outputs, memories

        self,
        vec_inputs: List[torch.Tensor],
        vis_inputs: List[torch.Tensor],
+        goals: List[torch.Tensor],
        memories: Optional[torch.Tensor] = None,
        sequence_length: int = 1,
    ) -> Tuple[Dict[str, torch.Tensor], torch.Tensor]:
            actor_mem, critic_mem = torch.split(memories, self.memory_size // 2, -1)
        value_outputs, critic_mem_out = self.critic(
-            vec_inputs, vis_inputs, memories=critic_mem, sequence_length=sequence_length
+            vec_inputs,
+            vis_inputs,
+            goals,
+            memories=critic_mem,
+            sequence_length=sequence_length,
        )
        if actor_mem is not None:
            # Make memories with the actor mem unchanged
        self,
        vec_inputs: List[torch.Tensor],
        vis_inputs: List[torch.Tensor],
+        goals: List[torch.Tensor],
        actions: AgentAction,
        masks: Optional[torch.Tensor] = None,
        memories: Optional[torch.Tensor] = None,
        else:
            critic_mem = None
            actor_mem = None
+
-        log_probs, entropies = self.action_model.evaluate(encoding, masks, actions)
+        log_probs, entropies = self.action_model.evaluate(
+            encoding, masks, actions, goals
+        )
-            vec_inputs, vis_inputs, memories=critic_mem, sequence_length=sequence_length
+            vec_inputs,
+            vis_inputs,
+            goals,
+            memories=critic_mem,
+            sequence_length=sequence_length,
        )

        return log_probs, entropies, value_outputs
        vec_inputs: List[torch.Tensor],
        vis_inputs: List[torch.Tensor],
+        goals: List[torch.Tensor],
        masks: Optional[torch.Tensor] = None,
        memories: Optional[torch.Tensor] = None,
        sequence_length: int = 1,
        encoding, actor_mem_outs = self.network_body(
            vec_inputs, vis_inputs, memories=actor_mem, sequence_length=sequence_length
        )
-        action, log_probs, entropies = self.action_model(encoding, masks)
+        action, log_probs, entropies = self.action_model(encoding, masks, goals)
-            vec_inputs, vis_inputs, memories=critic_mem, sequence_length=sequence_length
+            vec_inputs,
+            vis_inputs,
+            goals,
+            memories=critic_mem,
+            sequence_length=sequence_length,
        )
        if self.use_lstm:
            mem_out = torch.cat([actor_mem_outs, critic_mem_outs], dim=-1)
--- a/ml-agents/mlagents/trainers/trajectory.py
+++ b/ml-agents/mlagents/trainers/trajectory.py
 import numpy as np

 from mlagents.trainers.buffer import AgentBuffer
-from mlagents_envs.base_env import ActionTuple
+from mlagents_envs.base_env import ActionTuple, BehaviorSpec, SensorType
 from mlagents.trainers.torch.action_log_probs import LogProbsTuple


 class SplitObservations(NamedTuple):
    vector_observations: np.ndarray
    visual_observations: List[np.ndarray]
+    goals: np.ndarray
-    def from_observations(obs: List[np.ndarray]) -> "SplitObservations":
+    def from_observations(obs: List[np.ndarray], behavior_spec) -> "SplitObservations":
        """
        Divides a List of numpy arrays into a SplitObservations NamedTuple.
        This allows you to access the vector and visual observations directly,
        """
        vis_obs_list: List[np.ndarray] = []
        vec_obs_list: List[np.ndarray] = []
+        goal_list: List[np.ndarray] = []
-        for observation in obs:
-            # Obs could be batched or single
-            if len(observation.shape) == 1 or len(observation.shape) == 2:
-                vec_obs_list.append(observation)
-            if len(observation.shape) == 3 or len(observation.shape) == 4:
-                vis_obs_list.append(observation)
-            last_obs = observation
+        for observation, sensor_type in zip(obs, behavior_spec.sensor_types):
+            if sensor_type == SensorType.PARAMETERIZATION:
+                goal_list.append(observation)
+            elif sensor_type == SensorType.OBSERVATION:
+                # Obs could be batched or single
+                if len(observation.shape) == 1 or len(observation.shape) == 2:
+                    vec_obs_list.append(observation)
+                if len(observation.shape) == 3 or len(observation.shape) == 4:
+                    vis_obs_list.append(observation)
+                last_obs = observation
        if last_obs is not None:
            is_batched = len(last_obs.shape) == 2 or len(last_obs.shape) == 4
            if is_batched:
                    else np.zeros((last_obs.shape[0], 0), dtype=np.float32)
                )
+                goals = (
+                    np.concatenate(goal_list, axis=1)
+                    if len(goal_list) > 0
+                    else np.zeros((last_obs.shape[0], 0), dtype=np.float32)
+                )
+
            else:
                vec_obs = (
                    np.concatenate(vec_obs_list, axis=0)
+                goals = (
+                    np.concatenate(goal_list, axis=0)
+                    if len(goal_list) > 0
+                    else np.array([], dtype=np.float32)
+                )
+
-            vector_observations=vec_obs, visual_observations=vis_obs_list
+            vector_observations=vec_obs, visual_observations=vis_obs_list, goals=goals
        )


    agent_id: str
    behavior_id: str

-    def to_agentbuffer(self) -> AgentBuffer:
+    def to_agentbuffer(self, behavior_spec: BehaviorSpec) -> AgentBuffer:
        """
        Converts a Trajectory to an AgentBuffer
        :param trajectory: A Trajectory
        """
        agent_buffer_trajectory = AgentBuffer()
-        vec_vis_obs = SplitObservations.from_observations(self.steps[0].obs)
+        vec_vis_obs = SplitObservations.from_observations(
+            self.steps[0].obs, behavior_spec
+        )
-                    self.steps[step + 1].obs
+                    self.steps[step + 1].obs, behavior_spec
-                next_vec_vis_obs = SplitObservations.from_observations(self.next_obs)
+                next_vec_vis_obs = SplitObservations.from_observations(
+                    self.next_obs, behavior_spec
+                )

            for i, _ in enumerate(vec_vis_obs.visual_observations):
                agent_buffer_trajectory["visual_obs%d" % i].append(
            agent_buffer_trajectory["vector_obs"].append(
                vec_vis_obs.vector_observations
            )
+
+            agent_buffer_trajectory["goals"].append(vec_vis_obs.goals)
+            # this shouldnt be necessary in an optimized implementation since the goal does not change
+            agent_buffer_trajectory["next_goals"].append(next_vec_vis_obs.goals)
+
            if exp.memory is not None:
                agent_buffer_trajectory["memory"].append(exp.memory)

--- a/com.unity.ml-agents/Runtime/Sensors/CameraSensor.cs
+++ b/com.unity.ml-agents/Runtime/Sensors/CameraSensor.cs
        }

        /// <summary>
+        /// Camera sensors are always Observations.
+        /// </summary>
+        /// <returns>Sensor type of observation.</returns>
+        public SensorType GetSensorType()
+        {
+            return SensorType.Observation;
+        }
+
+        /// <summary>
        /// Generates a compressed image. This can be valuable in speeding-up training.
        /// </summary>
        /// <returns>Compressed image.</returns>
--- a/com.unity.ml-agents/Runtime/Sensors/ISensor.cs
+++ b/com.unity.ml-agents/Runtime/Sensors/ISensor.cs
    }

    /// <summary>
+    /// The semantic meaning of the sensor.
+    /// </summary>
+    public enum SensorType
+    {
+        /// <summary>
+        /// Sensor represents an agent's observation.
+        /// </summary>
+        Observation,
+
+        /// <summary>
+        /// Sensor represents an agent's task/goal parameterization.
+        /// </summary>
+        Goal,
+
+        /// <summary>
+        /// Sensor represents one or more reward signals.
+        /// </summary>
+        Reward
+    }
+
+
+    /// <summary>
    /// Sensor interface for generating observations.
    /// </summary>
    public interface ISensor
        /// </summary>
        /// <returns>The name of the sensor.</returns>
        string GetName();
+
+        /// <summary>
+        /// Get the semantic meaning of the sensor, i.e. whether it is an observation or other type
+        /// of data to be sent to the Agent.
+        /// </summary>
+        /// <returns>The type of the sensor.</returns>
+        SensorType GetSensorType();
    }


--- a/com.unity.ml-agents/Runtime/Sensors/RayPerceptionSensor.cs
+++ b/com.unity.ml-agents/Runtime/Sensors/RayPerceptionSensor.cs
        }

        /// <inheritdoc/>
+        public SensorType GetSensorType()
+        {
+            return SensorType.Observation;
+        }
+
+        /// <inheritdoc/>
        public string GetName()
        {
            return m_Name;
--- a/com.unity.ml-agents/Runtime/Sensors/Reflection/ReflectionSensorBase.cs
+++ b/com.unity.ml-agents/Runtime/Sensors/Reflection/ReflectionSensorBase.cs
        }

        /// <inheritdoc/>
+        public SensorType GetSensorType()
+        {
+            return SensorType.Observation;
+        }
+
+        /// <inheritdoc/>
        public void Update() { }

        /// <inheritdoc/>
--- a/com.unity.ml-agents/Runtime/Sensors/RenderTextureSensor.cs
+++ b/com.unity.ml-agents/Runtime/Sensors/RenderTextureSensor.cs
        }

        /// <summary>
+        /// RenderTexture sensors are always Observations.
+        /// </summary>
+        /// <returns>Sensor type of observation.</returns>
+        public SensorType GetSensorType()
+        {
+            return SensorType.Observation;
+        }
+
+        /// <summary>
        /// Converts a RenderTexture to a 2D texture.
        /// </summary>
        /// <returns>The 2D texture.</returns>
--- a/com.unity.ml-agents/Runtime/Sensors/StackingSensor.cs
+++ b/com.unity.ml-agents/Runtime/Sensors/StackingSensor.cs
            return m_WrappedSensor.GetCompressionType();
        }

+        /// <inheritdoc/>
+        public SensorType GetSensorType()
+        {
+            return SensorType.Observation;
+        }
+
        /// <summary>
        /// Create Empty PNG for initializing the buffer for stacking.
        /// </summary>
--- a/com.unity.ml-agents/Runtime/Sensors/VectorSensor.cs
+++ b/com.unity.ml-agents/Runtime/Sensors/VectorSensor.cs
        }

        /// <inheritdoc/>
+        public virtual SensorType GetSensorType()
+        {
+            return SensorType.Observation;
+        }
+
+        /// <inheritdoc/>
        public virtual byte[] GetCompressedObservation()
        {
            return null;
--- a/com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
+++ b/com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
                    observationProto.CompressedChannelMapping.AddRange(compressibleSensor.GetCompressedChannelMapping());
                }
            }
+            observationProto.SensorType = (SensorTypeProto)sensor.GetSensorType();
            observationProto.Shape.AddRange(shape);
            return observationProto;
        }
--- a/com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/Observation.cs
+++ b/com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/Observation.cs
      byte[] descriptorData = global::System.Convert.FromBase64String(
          string.Concat(
            "CjRtbGFnZW50c19lbnZzL2NvbW11bmljYXRvcl9vYmplY3RzL29ic2VydmF0",
-            "aW9uLnByb3RvEhRjb21tdW5pY2F0b3Jfb2JqZWN0cyKdAgoQT2JzZXJ2YXRp",
+            "aW9uLnByb3RvEhRjb21tdW5pY2F0b3Jfb2JqZWN0cyLZAgoQT2JzZXJ2YXRp",
-            "KAUaGQoJRmxvYXREYXRhEgwKBGRhdGEYASADKAJCEgoQb2JzZXJ2YXRpb25f",
-            "ZGF0YSopChRDb21wcmVzc2lvblR5cGVQcm90bxIICgROT05FEAASBwoDUE5H",
-            "EAFCJaoCIlVuaXR5Lk1MQWdlbnRzLkNvbW11bmljYXRvck9iamVjdHNiBnBy",
-            "b3RvMw=="));
+            "KAUSOgoLc2Vuc29yX3R5cGUYBiABKA4yJS5jb21tdW5pY2F0b3Jfb2JqZWN0",
+            "cy5TZW5zb3JUeXBlUHJvdG8aGQoJRmxvYXREYXRhEgwKBGRhdGEYASADKAJC",
+            "EgoQb2JzZXJ2YXRpb25fZGF0YSopChRDb21wcmVzc2lvblR5cGVQcm90bxII",
+            "CgROT05FEAASBwoDUE5HEAEqOAoPU2Vuc29yVHlwZVByb3RvEg8KC09CU0VS",
+            "VkFUSU9OEAASCAoER09BTBABEgoKBlJFV0FSRBACQiWqAiJVbml0eS5NTEFn",
+            "ZW50cy5Db21tdW5pY2F0b3JPYmplY3RzYgZwcm90bzM="));
-          new pbr::GeneratedClrTypeInfo(new[] {typeof(global::Unity.MLAgents.CommunicatorObjects.CompressionTypeProto), }, new pbr::GeneratedClrTypeInfo[] {
-            new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.ObservationProto), global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Parser, new[]{ "Shape", "CompressionType", "CompressedData", "FloatData", "CompressedChannelMapping" }, new[]{ "ObservationData" }, null, new pbr::GeneratedClrTypeInfo[] { new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Types.FloatData), global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Types.FloatData.Parser, new[]{ "Data" }, null, null, null)})
+          new pbr::GeneratedClrTypeInfo(new[] {typeof(global::Unity.MLAgents.CommunicatorObjects.CompressionTypeProto), typeof(global::Unity.MLAgents.CommunicatorObjects.SensorTypeProto), }, new pbr::GeneratedClrTypeInfo[] {
+            new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.ObservationProto), global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Parser, new[]{ "Shape", "CompressionType", "CompressedData", "FloatData", "CompressedChannelMapping", "SensorType" }, new[]{ "ObservationData" }, null, new pbr::GeneratedClrTypeInfo[] { new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Types.FloatData), global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Types.FloatData.Parser, new[]{ "Data" }, null, null, null)})
          }));
    }
    #endregion
  internal enum CompressionTypeProto {
    [pbr::OriginalName("NONE")] None = 0,
    [pbr::OriginalName("PNG")] Png = 1,
+  }
+
+  internal enum SensorTypeProto {
+    [pbr::OriginalName("OBSERVATION")] Observation = 0,
+    [pbr::OriginalName("GOAL")] Goal = 1,
+    [pbr::OriginalName("REWARD")] Reward = 2,
  }

  #endregion
      shape_ = other.shape_.Clone();
      compressionType_ = other.compressionType_;
      compressedChannelMapping_ = other.compressedChannelMapping_.Clone();
+      sensorType_ = other.sensorType_;
      switch (other.ObservationDataCase) {
        case ObservationDataOneofCase.CompressedData:
          CompressedData = other.CompressedData;
      get { return compressedChannelMapping_; }
    }

+    /// <summary>Field number for the "sensor_type" field.</summary>
+    public const int SensorTypeFieldNumber = 6;
+    private global::Unity.MLAgents.CommunicatorObjects.SensorTypeProto sensorType_ = 0;
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public global::Unity.MLAgents.CommunicatorObjects.SensorTypeProto SensorType {
+      get { return sensorType_; }
+      set {
+        sensorType_ = value;
+      }
+    }
+
    private object observationData_;
    /// <summary>Enum of possible cases for the "observation_data" oneof.</summary>
    public enum ObservationDataOneofCase {
      if (CompressedData != other.CompressedData) return false;
      if (!object.Equals(FloatData, other.FloatData)) return false;
      if(!compressedChannelMapping_.Equals(other.compressedChannelMapping_)) return false;
+      if (SensorType != other.SensorType) return false;
      if (ObservationDataCase != other.ObservationDataCase) return false;
      return Equals(_unknownFields, other._unknownFields);
    }
      if (observationDataCase_ == ObservationDataOneofCase.CompressedData) hash ^= CompressedData.GetHashCode();
      if (observationDataCase_ == ObservationDataOneofCase.FloatData) hash ^= FloatData.GetHashCode();
      hash ^= compressedChannelMapping_.GetHashCode();
+      if (SensorType != 0) hash ^= SensorType.GetHashCode();
      hash ^= (int) observationDataCase_;
      if (_unknownFields != null) {
        hash ^= _unknownFields.GetHashCode();
        output.WriteMessage(FloatData);
      }
      compressedChannelMapping_.WriteTo(output, _repeated_compressedChannelMapping_codec);
+      if (SensorType != 0) {
+        output.WriteRawTag(48);
+        output.WriteEnum((int) SensorType);
+      }
      if (_unknownFields != null) {
        _unknownFields.WriteTo(output);
      }
        size += 1 + pb::CodedOutputStream.ComputeMessageSize(FloatData);
      }
      size += compressedChannelMapping_.CalculateSize(_repeated_compressedChannelMapping_codec);
+      if (SensorType != 0) {
+        size += 1 + pb::CodedOutputStream.ComputeEnumSize((int) SensorType);
+      }
      if (_unknownFields != null) {
        size += _unknownFields.CalculateSize();
      }
        CompressionType = other.CompressionType;
      }
      compressedChannelMapping_.Add(other.compressedChannelMapping_);
+      if (other.SensorType != 0) {
+        SensorType = other.SensorType;
+      }
      switch (other.ObservationDataCase) {
        case ObservationDataOneofCase.CompressedData:
          CompressedData = other.CompressedData;
          case 42:
          case 40: {
            compressedChannelMapping_.AddEntriesFrom(input, _repeated_compressedChannelMapping_codec);
+            break;
+          }
+          case 48: {
+            sensorType_ = (global::Unity.MLAgents.CommunicatorObjects.SensorTypeProto) input.ReadEnum();
            break;
          }
        }
--- a/com.unity.ml-agents/Tests/Editor/Communicator/GrpcExtensionsTests.cs
+++ b/com.unity.ml-agents/Tests/Editor/Communicator/GrpcExtensionsTests.cs
                return new byte[] { 13, 37 };
            }

+            /// <inheritdoc/>
+            public virtual SensorType GetSensorType()
+            {
+                return SensorType.Observation;
+            }
+
            public void Update() { }

            public void Reset() { }
--- a/com.unity.ml-agents/Tests/Editor/Sensor/FloatVisualSensorTests.cs
+++ b/com.unity.ml-agents/Tests/Editor/Sensor/FloatVisualSensorTests.cs
            m_Shape = new[] { Height, Width, 1 };
        }

+        public SensorType GetSensorType()
+        {
+            return SensorType.Observation;
+        }
+
        public string GetName()
        {
            return m_Name;
--- a/com.unity.ml-agents/Tests/Editor/Sensor/SensorShapeValidatorTests.cs
+++ b/com.unity.ml-agents/Tests/Editor/Sensor/SensorShapeValidatorTests.cs
            return m_Shape;
        }

+        public SensorType GetSensorType()
+        {
+            return SensorType.Observation;
+        }
+
        public byte[] GetCompressedObservation()
        {
            return null;
--- a/com.unity.ml-agents/Tests/Editor/Sensor/StackingSensorTests.cs
+++ b/com.unity.ml-agents/Tests/Editor/Sensor/StackingSensorTests.cs
                return Shape;
            }

+            /// <inheritdoc/>
+            public virtual SensorType GetSensorType()
+            {
+                return SensorType.Observation;
+            }
+
            public int Write(ObservationWriter writer)
            {
                for (var h = 0; h < Shape[0]; h++)
--- a/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
+++ b/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
            return new byte[] { 0 };
        }

+        public SensorType GetSensorType()
+        {
+            return SensorType.Observation;
+        }
+
        public SensorCompressionType GetCompressionType()
        {
            return compressionType;
--- a/com.unity.ml-agents/Tests/Editor/ParameterLoaderTest.cs
+++ b/com.unity.ml-agents/Tests/Editor/ParameterLoaderTest.cs
            return new[] { m_Height, m_Width, m_Channels };
        }

+        public SensorType GetSensorType()
+        {
+            return SensorType.Observation;
+        }
+
        public int Write(ObservationWriter writer)
        {
            for (int i = 0; i < m_Width * m_Height * m_Channels; i++)
--- a/ml-agents-envs/mlagents_envs/communicator_objects/unity_to_external_pb2.py
+++ b/ml-agents-envs/mlagents_envs/communicator_objects/unity_to_external_pb2.py
+# -*- coding: utf-8 -*-
-
-import sys
-_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
+"""Generated protocol buffer code."""
-from google.protobuf import descriptor_pb2
 # @@protoc_insertion_point(imports)

 _sym_db = _symbol_database.Default()
  name='mlagents_envs/communicator_objects/unity_to_external.proto',
  package='communicator_objects',
  syntax='proto3',
-  serialized_pb=_b('\n:mlagents_envs/communicator_objects/unity_to_external.proto\x12\x14\x63ommunicator_objects\x1a\x36mlagents_envs/communicator_objects/unity_message.proto2v\n\x14UnityToExternalProto\x12^\n\x08\x45xchange\x12\'.communicator_objects.UnityMessageProto\x1a\'.communicator_objects.UnityMessageProto\"\x00\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
+  serialized_options=b'\252\002\"Unity.MLAgents.CommunicatorObjects',
+  create_key=_descriptor._internal_create_key,
+  serialized_pb=b'\n:mlagents_envs/communicator_objects/unity_to_external.proto\x12\x14\x63ommunicator_objects\x1a\x36mlagents_envs/communicator_objects/unity_message.proto2v\n\x14UnityToExternalProto\x12^\n\x08\x45xchange\x12\'.communicator_objects.UnityMessageProto\x1a\'.communicator_objects.UnityMessageProto\"\x00\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3'
  ,
  dependencies=[mlagents__envs_dot_communicator__objects_dot_unity__message__pb2.DESCRIPTOR,])



-DESCRIPTOR.has_options = True
-DESCRIPTOR._options = _descriptor._ParseOptions(descriptor_pb2.FileOptions(), _b('\252\002\"Unity.MLAgents.CommunicatorObjects'))
+DESCRIPTOR._options = None

 _UNITYTOEXTERNALPROTO = _descriptor.ServiceDescriptor(
  name='UnityToExternalProto',
-  options=None,
+  serialized_options=None,
+  create_key=_descriptor._internal_create_key,
  serialized_start=140,
  serialized_end=258,
  methods=[
    containing_service=None,
    input_type=mlagents__envs_dot_communicator__objects_dot_unity__message__pb2._UNITYMESSAGEPROTO,
    output_type=mlagents__envs_dot_communicator__objects_dot_unity__message__pb2._UNITYMESSAGEPROTO,
-    options=None,
+    serialized_options=None,
+    create_key=_descriptor._internal_create_key,
  ),
 ])
 _sym_db.RegisterServiceDescriptor(_UNITYTOEXTERNALPROTO)
--- a/ml-agents-envs/mlagents_envs/communicator_objects/unity_to_external_pb2_grpc.py
+++ b/ml-agents-envs/mlagents_envs/communicator_objects/unity_to_external_pb2_grpc.py
 # Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
+"""Client and server classes corresponding to protobuf-defined services."""
 import grpc

 from mlagents_envs.communicator_objects import unity_message_pb2 as mlagents__envs_dot_communicator__objects_dot_unity__message__pb2
-  # missing associated documentation comment in .proto file
-  pass
+    """Missing associated documentation comment in .proto file."""
-  def __init__(self, channel):
-    """Constructor.
+    def __init__(self, channel):
+        """Constructor.
-    Args:
-      channel: A grpc.Channel.
-    """
-    self.Exchange = channel.unary_unary(
-        '/communicator_objects.UnityToExternalProto/Exchange',
-        request_serializer=mlagents__envs_dot_communicator__objects_dot_unity__message__pb2.UnityMessageProto.SerializeToString,
-        response_deserializer=mlagents__envs_dot_communicator__objects_dot_unity__message__pb2.UnityMessageProto.FromString,
-        )
+        Args:
+            channel: A grpc.Channel.
+        """
+        self.Exchange = channel.unary_unary(
+                '/communicator_objects.UnityToExternalProto/Exchange',
+                request_serializer=mlagents__envs_dot_communicator__objects_dot_unity__message__pb2.UnityMessageProto.SerializeToString,
+                response_deserializer=mlagents__envs_dot_communicator__objects_dot_unity__message__pb2.UnityMessageProto.FromString,
+                )
-  # missing associated documentation comment in .proto file
-  pass
+    """Missing associated documentation comment in .proto file."""
-  def Exchange(self, request, context):
-    """Sends the academy parameters
-    """
-    context.set_code(grpc.StatusCode.UNIMPLEMENTED)
-    context.set_details('Method not implemented!')
-    raise NotImplementedError('Method not implemented!')
+    def Exchange(self, request, context):
+        """Sends the academy parameters
+        """
+        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+        context.set_details('Method not implemented!')
+        raise NotImplementedError('Method not implemented!')
-  rpc_method_handlers = {
-      'Exchange': grpc.unary_unary_rpc_method_handler(
-          servicer.Exchange,
-          request_deserializer=mlagents__envs_dot_communicator__objects_dot_unity__message__pb2.UnityMessageProto.FromString,
-          response_serializer=mlagents__envs_dot_communicator__objects_dot_unity__message__pb2.UnityMessageProto.SerializeToString,
-      ),
-  }
-  generic_handler = grpc.method_handlers_generic_handler(
-      'communicator_objects.UnityToExternalProto', rpc_method_handlers)
-  server.add_generic_rpc_handlers((generic_handler,))
+    rpc_method_handlers = {
+            'Exchange': grpc.unary_unary_rpc_method_handler(
+                    servicer.Exchange,
+                    request_deserializer=mlagents__envs_dot_communicator__objects_dot_unity__message__pb2.UnityMessageProto.FromString,
+                    response_serializer=mlagents__envs_dot_communicator__objects_dot_unity__message__pb2.UnityMessageProto.SerializeToString,
+            ),
+    }
+    generic_handler = grpc.method_handlers_generic_handler(
+            'communicator_objects.UnityToExternalProto', rpc_method_handlers)
+    server.add_generic_rpc_handlers((generic_handler,))
+
+
+ # This class is part of an EXPERIMENTAL API.
+class UnityToExternalProto(object):
+    """Missing associated documentation comment in .proto file."""
+
+    @staticmethod
+    def Exchange(request,
+            target,
+            options=(),
+            channel_credentials=None,
+            call_credentials=None,
+            insecure=False,
+            compression=None,
+            wait_for_ready=None,
+            timeout=None,
+            metadata=None):
+        return grpc.experimental.unary_unary(request, target, '/communicator_objects.UnityToExternalProto/Exchange',
+            mlagents__envs_dot_communicator__objects_dot_unity__message__pb2.UnityMessageProto.SerializeToString,
+            mlagents__envs_dot_communicator__objects_dot_unity__message__pb2.UnityMessageProto.FromString,
+            options, channel_credentials,
+            insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
--- a/ml-agents-envs/mlagents_envs/communicator_objects/observation_pb2.py
+++ b/ml-agents-envs/mlagents_envs/communicator_objects/observation_pb2.py
  name='mlagents_envs/communicator_objects/observation.proto',
  package='communicator_objects',
  syntax='proto3',
-  serialized_pb=_b('\n4mlagents_envs/communicator_objects/observation.proto\x12\x14\x63ommunicator_objects\"\x9d\x02\n\x10ObservationProto\x12\r\n\x05shape\x18\x01 \x03(\x05\x12\x44\n\x10\x63ompression_type\x18\x02 \x01(\x0e\x32*.communicator_objects.CompressionTypeProto\x12\x19\n\x0f\x63ompressed_data\x18\x03 \x01(\x0cH\x00\x12\x46\n\nfloat_data\x18\x04 \x01(\x0b\x32\x30.communicator_objects.ObservationProto.FloatDataH\x00\x12\"\n\x1a\x63ompressed_channel_mapping\x18\x05 \x03(\x05\x1a\x19\n\tFloatData\x12\x0c\n\x04\x64\x61ta\x18\x01 \x03(\x02\x42\x12\n\x10observation_data*)\n\x14\x43ompressionTypeProto\x12\x08\n\x04NONE\x10\x00\x12\x07\n\x03PNG\x10\x01\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
+  serialized_pb=_b('\n4mlagents_envs/communicator_objects/observation.proto\x12\x14\x63ommunicator_objects\"\xd9\x02\n\x10ObservationProto\x12\r\n\x05shape\x18\x01 \x03(\x05\x12\x44\n\x10\x63ompression_type\x18\x02 \x01(\x0e\x32*.communicator_objects.CompressionTypeProto\x12\x19\n\x0f\x63ompressed_data\x18\x03 \x01(\x0cH\x00\x12\x46\n\nfloat_data\x18\x04 \x01(\x0b\x32\x30.communicator_objects.ObservationProto.FloatDataH\x00\x12\"\n\x1a\x63ompressed_channel_mapping\x18\x05 \x03(\x05\x12:\n\x0bsensor_type\x18\x06 \x01(\x0e\x32%.communicator_objects.SensorTypeProto\x1a\x19\n\tFloatData\x12\x0c\n\x04\x64\x61ta\x18\x01 \x03(\x02\x42\x12\n\x10observation_data*)\n\x14\x43ompressionTypeProto\x12\x08\n\x04NONE\x10\x00\x12\x07\n\x03PNG\x10\x01*8\n\x0fSensorTypeProto\x12\x0f\n\x0bOBSERVATION\x10\x00\x12\x08\n\x04GOAL\x10\x01\x12\n\n\x06REWARD\x10\x02\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
 )

 _COMPRESSIONTYPEPROTO = _descriptor.EnumDescriptor(
  ],
  containing_type=None,
  options=None,
-  serialized_start=366,
-  serialized_end=407,
+  serialized_start=426,
+  serialized_end=467,
+_SENSORTYPEPROTO = _descriptor.EnumDescriptor(
+  name='SensorTypeProto',
+  full_name='communicator_objects.SensorTypeProto',
+  filename=None,
+  file=DESCRIPTOR,
+  values=[
+    _descriptor.EnumValueDescriptor(
+      name='OBSERVATION', index=0, number=0,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='GOAL', index=1, number=1,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='REWARD', index=2, number=2,
+      options=None,
+      type=None),
+  ],
+  containing_type=None,
+  options=None,
+  serialized_start=469,
+  serialized_end=525,
+)
+_sym_db.RegisterEnumDescriptor(_SENSORTYPEPROTO)
+
+SensorTypeProto = enum_type_wrapper.EnumTypeWrapper(_SENSORTYPEPROTO)
+OBSERVATION = 0
+GOAL = 1
+REWARD = 2



  extension_ranges=[],
  oneofs=[
  ],
-  serialized_start=319,
-  serialized_end=344,
+  serialized_start=379,
+  serialized_end=404,
 )

 _OBSERVATIONPROTO = _descriptor.Descriptor(
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      options=None, file=DESCRIPTOR),
+    _descriptor.FieldDescriptor(
+      name='sensor_type', full_name='communicator_objects.ObservationProto.sensor_type', index=5,
+      number=6, type=14, cpp_type=8, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
      index=0, containing_type=None, fields=[]),
  ],
  serialized_start=79,
-  serialized_end=364,
+  serialized_end=424,
+_OBSERVATIONPROTO.fields_by_name['sensor_type'].enum_type = _SENSORTYPEPROTO
 _OBSERVATIONPROTO.oneofs_by_name['observation_data'].fields.append(
  _OBSERVATIONPROTO.fields_by_name['compressed_data'])
 _OBSERVATIONPROTO.fields_by_name['compressed_data'].containing_oneof = _OBSERVATIONPROTO.oneofs_by_name['observation_data']
 DESCRIPTOR.message_types_by_name['ObservationProto'] = _OBSERVATIONPROTO
 DESCRIPTOR.enum_types_by_name['CompressionTypeProto'] = _COMPRESSIONTYPEPROTO
+DESCRIPTOR.enum_types_by_name['SensorTypeProto'] = _SENSORTYPEPROTO
 _sym_db.RegisterFileDescriptor(DESCRIPTOR)

 ObservationProto = _reflection.GeneratedProtocolMessageType('ObservationProto', (_message.Message,), dict(
--- a/ml-agents-envs/mlagents_envs/communicator_objects/observation_pb2.pyi
+++ b/ml-agents-envs/mlagents_envs/communicator_objects/observation_pb2.pyi
 NONE = typing___cast('CompressionTypeProto', 0)
 PNG = typing___cast('CompressionTypeProto', 1)

+class SensorTypeProto(builtin___int):
+    DESCRIPTOR: google___protobuf___descriptor___EnumDescriptor = ...
+    @classmethod
+    def Name(cls, number: builtin___int) -> builtin___str: ...
+    @classmethod
+    def Value(cls, name: builtin___str) -> 'SensorTypeProto': ...
+    @classmethod
+    def keys(cls) -> typing___List[builtin___str]: ...
+    @classmethod
+    def values(cls) -> typing___List['SensorTypeProto']: ...
+    @classmethod
+    def items(cls) -> typing___List[typing___Tuple[builtin___str, 'SensorTypeProto']]: ...
+    OBSERVATION = typing___cast('SensorTypeProto', 0)
+    GOAL = typing___cast('SensorTypeProto', 1)
+    REWARD = typing___cast('SensorTypeProto', 2)
+OBSERVATION = typing___cast('SensorTypeProto', 0)
+GOAL = typing___cast('SensorTypeProto', 1)
+REWARD = typing___cast('SensorTypeProto', 2)
+
 class ObservationProto(google___protobuf___message___Message):
    DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...
    class FloatData(google___protobuf___message___Message):
    compression_type = ... # type: CompressionTypeProto
    compressed_data = ... # type: builtin___bytes
    compressed_channel_mapping = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[builtin___int]
+    sensor_type = ... # type: SensorTypeProto

    @property
    def float_data(self) -> ObservationProto.FloatData: ...
        compressed_data : typing___Optional[builtin___bytes] = None,
        float_data : typing___Optional[ObservationProto.FloatData] = None,
        compressed_channel_mapping : typing___Optional[typing___Iterable[builtin___int]] = None,
+        sensor_type : typing___Optional[SensorTypeProto] = None,
        ) -> None: ...
    @classmethod
    def FromString(cls, s: builtin___bytes) -> ObservationProto: ...
        def HasField(self, field_name: typing_extensions___Literal[u"compressed_data",u"float_data",u"observation_data"]) -> builtin___bool: ...
-        def ClearField(self, field_name: typing_extensions___Literal[u"compressed_channel_mapping",u"compressed_data",u"compression_type",u"float_data",u"observation_data",u"shape"]) -> None: ...
+        def ClearField(self, field_name: typing_extensions___Literal[u"compressed_channel_mapping",u"compressed_data",u"compression_type",u"float_data",u"observation_data",u"sensor_type",u"shape"]) -> None: ...
-        def ClearField(self, field_name: typing_extensions___Literal[u"compressed_channel_mapping",b"compressed_channel_mapping",u"compressed_data",b"compressed_data",u"compression_type",b"compression_type",u"float_data",b"float_data",u"observation_data",b"observation_data",u"shape",b"shape"]) -> None: ...
+        def ClearField(self, field_name: typing_extensions___Literal[u"compressed_channel_mapping",b"compressed_channel_mapping",u"compressed_data",b"compressed_data",u"compression_type",b"compression_type",u"float_data",b"float_data",u"observation_data",b"observation_data",u"sensor_type",b"sensor_type",u"shape",b"shape"]) -> None: ...
    def WhichOneof(self, oneof_group: typing_extensions___Literal[u"observation_data",b"observation_data"]) -> typing_extensions___Literal["compressed_data","float_data"]: ...
--- a/ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py
+++ b/ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py
 from mlagents_envs.base_env import (
    BehaviorSpec,
    ActionSpec,
+    SensorType,
    DecisionSteps,
    TerminalSteps,
 )
 def test_batched_step_result_from_proto():
    n_agents = 10
    shapes = [(3,), (4,)]
-    spec = BehaviorSpec(shapes, ActionSpec.create_continuous(3))
+    sensor_type = [SensorType.OBSERVATION, SensorType.OBSERVATION]
+    spec = BehaviorSpec(shapes, sensor_type, ActionSpec.create_continuous(3))
    ap_list = generate_list_agent_proto(n_agents, shapes)
    decision_steps, terminal_steps = steps_from_proto(ap_list, spec)
    for agent_id in range(n_agents):
 def test_action_masking_discrete():
    n_agents = 10
    shapes = [(3,), (4,)]
-    behavior_spec = BehaviorSpec(shapes, ActionSpec.create_discrete((7, 3)))
+    sensor_type = [SensorType.OBSERVATION, SensorType.OBSERVATION]
+    behavior_spec = BehaviorSpec(
+        shapes, sensor_type, ActionSpec.create_discrete((7, 3))
+    )
    ap_list = generate_list_agent_proto(n_agents, shapes)
    decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
    masks = decision_steps.action_mask
 def test_action_masking_discrete_1():
    n_agents = 10
    shapes = [(3,), (4,)]
-    behavior_spec = BehaviorSpec(shapes, ActionSpec.create_discrete((10,)))
+    sensor_type = [SensorType.OBSERVATION, SensorType.OBSERVATION]
+    behavior_spec = BehaviorSpec(shapes, sensor_type, ActionSpec.create_discrete((10,)))
    ap_list = generate_list_agent_proto(n_agents, shapes)
    decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
    masks = decision_steps.action_mask
 def test_action_masking_discrete_2():
    n_agents = 10
    shapes = [(3,), (4,)]
-    behavior_spec = BehaviorSpec(shapes, ActionSpec.create_discrete((2, 2, 6)))
+    sensor_type = [SensorType.OBSERVATION, SensorType.OBSERVATION]
+    behavior_spec = BehaviorSpec(
+        shapes, sensor_type, ActionSpec.create_discrete((2, 2, 6))
+    )
    ap_list = generate_list_agent_proto(n_agents, shapes)
    decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
    masks = decision_steps.action_mask
 def test_action_masking_continuous():
    n_agents = 10
    shapes = [(3,), (4,)]
-    behavior_spec = BehaviorSpec(shapes, ActionSpec.create_continuous(10))
+    sensor_type = [SensorType.OBSERVATION, SensorType.OBSERVATION]
+    behavior_spec = BehaviorSpec(shapes, sensor_type, ActionSpec.create_continuous(10))
    ap_list = generate_list_agent_proto(n_agents, shapes)
    decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
    masks = decision_steps.action_mask
 def test_batched_step_result_from_proto_raises_on_infinite():
    n_agents = 10
    shapes = [(3,), (4,)]
-    behavior_spec = BehaviorSpec(shapes, ActionSpec.create_continuous(3))
+    sensor_type = [SensorType.OBSERVATION, SensorType.OBSERVATION]
+    behavior_spec = BehaviorSpec(shapes, sensor_type, ActionSpec.create_continuous(3))
    ap_list = generate_list_agent_proto(n_agents, shapes, infinite_rewards=True)
    with pytest.raises(RuntimeError):
        steps_from_proto(ap_list, behavior_spec)
    n_agents = 10
    shapes = [(3,), (4,)]
-    behavior_spec = BehaviorSpec(shapes, ActionSpec.create_continuous(3))
+    sensor_type = [SensorType.OBSERVATION, SensorType.OBSERVATION]
+    behavior_spec = BehaviorSpec(shapes, sensor_type, ActionSpec.create_continuous(3))
    ap_list = generate_list_agent_proto(n_agents, shapes, nan_observations=True)
    with pytest.raises(RuntimeError):
        steps_from_proto(ap_list, behavior_spec)
--- a/ml-agents-envs/mlagents_envs/tests/test_steps.py
+++ b/ml-agents-envs/mlagents_envs/tests/test_steps.py
    TerminalSteps,
    ActionSpec,
    BehaviorSpec,
+    SensorType,
 )




 def test_empty_decision_steps():
+    sensor_type = [SensorType.OBSERVATION, SensorType.OBSERVATION]
-        observation_shapes=[(3, 2), (5,)], action_spec=ActionSpec.create_continuous(3)
+        observation_shapes=[(3, 2), (5,)],
+        sensor_types=sensor_type,
+        action_spec=ActionSpec.create_continuous(3),
    )
    ds = DecisionSteps.empty(specs)
    assert len(ds.obs) == 2


 def test_empty_terminal_steps():
+    sensor_type = [SensorType.OBSERVATION, SensorType.OBSERVATION]
-        observation_shapes=[(3, 2), (5,)], action_spec=ActionSpec.create_continuous(3)
+        observation_shapes=[(3, 2), (5,)],
+        sensor_types=sensor_type,
+        action_spec=ActionSpec.create_continuous(3),
    )
    ts = TerminalSteps.empty(specs)
    assert len(ts.obs) == 2
--- a/ml-agents-envs/mlagents_envs/rpc_utils.py
+++ b/ml-agents-envs/mlagents_envs/rpc_utils.py
 from mlagents_envs.base_env import (
    ActionSpec,
    BehaviorSpec,
+    SensorType,
    DecisionSteps,
    TerminalSteps,
 )
    :return: BehaviorSpec object.
    """
    observation_shape = [tuple(obs.shape) for obs in agent_info.observations]
+    sensor_type = [SensorType(obs.sensor_type) for obs in agent_info.observations]
    # proto from comminicator < v1.3 does not set action spec, use deprecated fields instead
    if (
        brain_param_proto.action_spec.num_continuous_actions == 0
            action_spec_proto.num_continuous_actions,
            tuple(branch for branch in action_spec_proto.discrete_branch_sizes),
        )
-    return BehaviorSpec(observation_shape, action_spec)
+    return BehaviorSpec(observation_shape, sensor_type, action_spec)


 class OffsetBytesIO:
--- a/ml-agents-envs/mlagents_envs/base_env.py
+++ b/ml-agents-envs/mlagents_envs/base_env.py

 from abc import ABC, abstractmethod
 from collections.abc import Mapping
+from enum import Enum
 from typing import (
    List,
    NamedTuple,
        )


+class SensorType(Enum):
+    OBSERVATION = 0
+    GOAL = 1
+    REWARD = 2
+
+
 class _ActionTupleBase(ABC):
    """
    An object whose fields correspond to action data of continuous and discrete
    - observation_shapes is a List of Tuples of int : Each Tuple corresponds
    to an observation's dimensions. The shape tuples have the same ordering as
    the ordering of the DecisionSteps and TerminalSteps.
+    - sensor_types is a List of SensorTypes, each corresponding to the type of
+    sensor (i.e. observation, goal, etc).
+    sensor_types: List[SensorType]
    action_spec: ActionSpec


--- a/Project/Assets/ML-Agents/TestScenes/TestCompressedTexture/TestTextureSensor.cs
+++ b/Project/Assets/ML-Agents/TestScenes/TestCompressedTexture/TestTextureSensor.cs
    }

    /// <inheritdoc/>
+    public virtual SensorType GetSensorType()
+    {
+        return SensorType.Observation;
+    }
+
+    /// <inheritdoc/>
    public byte[] GetCompressedObservation()
    {
        var compressed = m_Texture.EncodeToPNG();
--- a/Project/Assets/ML-Agents/Examples/GridWorld/Prefabs/Area.prefab
+++ b/Project/Assets/ML-Agents/Examples/GridWorld/Prefabs/Area.prefab
  - component: {fileID: 114650561397225712}
  - component: {fileID: 114889700908650620}
  - component: {fileID: 7980686505185502968}
+  - component: {fileID: 8359584214300847863}
  m_Layer: 8
  m_Name: Agent
  m_TagString: agent
  m_Name: 
  m_EditorClassIdentifier: 
  m_BrainParameters:
-    vectorObservationSize: 0
-    numStackedVectorObservations: 1
-    vectorActionSize: 05000000
-    vectorActionDescriptions: []
-    vectorActionSpaceType: 0
-  m_Model: {fileID: 11400000, guid: a812f1ce7763a4a0c912717f3594fe20, type: 3}
+    VectorObservationSize: 0
+    NumStackedVectorObservations: 1
+    VectorActionSize: 05000000
+    VectorActionDescriptions: []
+    VectorActionSpaceType: 0
+  m_Model: {fileID: 0}
+  m_UseChildActuators: 1
+  m_ObservableAttributeHandling: 0
 --- !u!114 &114650561397225712
 MonoBehaviour:
  m_ObjectHideFlags: 0
  agentParameters:
    maxStep: 0
  hasUpgradedFromAgentParameters: 1
-  maxStep: 100
+  MaxStep: 100
+  gridGoal: 0
 --- !u!114 &114889700908650620
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Width: 84
  m_Height: 64
  m_Grayscale: 0
+  m_ObservationStacks: 1
  m_Compression: 1
 --- !u!114 &7980686505185502968
 MonoBehaviour:
  m_Script: {fileID: 11500000, guid: 3a6da8f78a394c6ab027688eab81e04d, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
+  debugCommandLineOverride: 
+--- !u!114 &8359584214300847863
+MonoBehaviour:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 1488387672112076}
+  m_Enabled: 1
+  m_EditorHideFlags: 0
+  m_Script: {fileID: 11500000, guid: 163dac4bcbb2f4d8499db2cdcb22a89e, type: 3}
+  m_Name: 
+  m_EditorClassIdentifier: 
+  observationSize: 2
 --- !u!1 &1625008366184734
 GameObject:
  m_ObjectHideFlags: 0
--- a/Project/Assets/ML-Agents/Examples/GridWorld/Scenes/GridWorld.unity
+++ b/Project/Assets/ML-Agents/Examples/GridWorld/Scenes/GridWorld.unity
  - component: {fileID: 125487790}
  - component: {fileID: 125487787}
  - component: {fileID: 125487791}
+  - component: {fileID: 125487792}
  m_Layer: 8
  m_Name: RenderTextureAgent
  m_TagString: agent
  agentParameters:
    maxStep: 100
  hasUpgradedFromAgentParameters: 1
-  maxStep: 100
+  MaxStep: 100
+  gridGoal: 0
 --- !u!65 &125487788
 BoxCollider:
  m_ObjectHideFlags: 0
  m_Name: 
  m_EditorClassIdentifier: 
  m_BrainParameters:
-    vectorObservationSize: 0
-    numStackedVectorObservations: 1
-    vectorActionSize: 05000000
-    vectorActionDescriptions: []
-    vectorActionSpaceType: 0
+    VectorObservationSize: 0
+    NumStackedVectorObservations: 1
+    VectorActionSize: 05000000
+    VectorActionDescriptions: []
+    VectorActionSpaceType: 0
  m_Model: {fileID: 11400000, guid: a812f1ce7763a4a0c912717f3594fe20, type: 3}
  m_InferenceDevice: 0
  m_BehaviorType: 0
+  m_UseChildActuators: 1
+  m_ObservableAttributeHandling: 0
 --- !u!114 &125487791
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_RenderTexture: {fileID: 8400000, guid: 114608d5384404f89bff4b6f88432958, type: 2}
  m_SensorName: RenderTextureSensor
  m_Grayscale: 0
+  m_ObservationStacks: 1
+--- !u!114 &125487792
+MonoBehaviour:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 125487785}
+  m_Enabled: 1
+  m_EditorHideFlags: 0
+  m_Script: {fileID: 11500000, guid: 163dac4bcbb2f4d8499db2cdcb22a89e, type: 3}
+  m_Name: 
+  m_EditorClassIdentifier: 
+  observationSize: 2
 --- !u!1 &260425459
 GameObject:
  m_ObjectHideFlags: 0
        type: 3}
      propertyPath: compression
      value: 0
+      objectReference: {fileID: 0}
+    - target: {fileID: 114889700908650620, guid: 5c2bd19e4bbda4991b74387ca5d28156,
+        type: 3}
+      propertyPath: m_Compression
+      value: 0
+      objectReference: {fileID: 0}
+    - target: {fileID: 114935253044749092, guid: 5c2bd19e4bbda4991b74387ca5d28156,
+        type: 3}
+      propertyPath: m_BrainParameters.VectorObservationSize
+      value: 0
+      objectReference: {fileID: 0}
+    - target: {fileID: 114935253044749092, guid: 5c2bd19e4bbda4991b74387ca5d28156,
+        type: 3}
+      propertyPath: m_Model
+      value: 
      objectReference: {fileID: 0}
    m_RemovedComponents: []
  m_SourcePrefab: {fileID: 100100000, guid: 5c2bd19e4bbda4991b74387ca5d28156, type: 3}
--- a/Project/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAgent.cs
 using UnityEngine;
 using System.Linq;
 using Unity.MLAgents;
+using Unity.MLAgents.Sensors;
 using Unity.MLAgents.Actuators;
 using UnityEngine.Serialization;

        "masking turned on may not behave optimally when action masking is turned off.")]
    public bool maskActions = true;

+    GoalSensorComponent goalSensor;
+
+    public GridGoal gridGoal;
+
    const int k_NoAction = 0;  // do nothing!
    const int k_Up = 1;
    const int k_Down = 2;
+    public enum GridGoal
+    {
+        Plus,
+        Cross,
+    }
+
+    }
+
+    public override void CollectObservations(VectorSensor sensor)
+    {
+        Array values = Enum.GetValues(typeof(GridGoal));
+        int goalNum = (int)gridGoal;
+        goalSensor = this.GetComponent<GoalSensorComponent>();
+        goalSensor.AddOneHotGoal(goalNum, values.Length);
    }

    public override void WriteDiscreteActionMask(IDiscreteActionMask actionMask)
        }
    }

+    private void ProvideReward(GridGoal hitObject)
+    {
+        if (gridGoal == hitObject)
+        {
+            SetReward(1f);
+        }
+        else
+        {
+            SetReward(-1f);
+        }
+    }
+
    // to be implemented by the developer
    public override void OnActionReceived(ActionBuffers actionBuffers)


            if (hit.Where(col => col.gameObject.CompareTag("goal")).ToArray().Length == 1)
            {
-                SetReward(1f);
+                ProvideReward(GridGoal.Plus);
-                SetReward(-1f);
+                ProvideReward(GridGoal.Cross);
                EndEpisode();
            }
        }
    public override void OnEpisodeBegin()
    {
        area.AreaReset();
+        Array values = Enum.GetValues(typeof(GridGoal));
+        gridGoal = (GridGoal)values.GetValue(UnityEngine.Random.Range(0, values.Length));
    }

    public void FixedUpdate()
--- a/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/SensorBase.cs
+++ b/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/SensorBase.cs
        }

        /// <inheritdoc/>
+        public virtual SensorType GetSensorType()
+        {
+            return SensorType.Observation;
+        }
+
+        /// <inheritdoc/>
        public virtual SensorCompressionType GetCompressionType()
        {
            return SensorCompressionType.None;
--- a/config/ppo/MultiDirWalker.yaml
+++ b/config/ppo/MultiDirWalker.yaml
+behaviors:
+  MultiDirWalker:
+    trainer_type: ppo
+    hyperparameters:
+      batch_size: 2048
+      buffer_size: 20480
+      learning_rate: 0.0003
+      beta: 0.005
+      epsilon: 0.2
+      lambd: 0.95
+      num_epoch: 3
+      learning_rate_schedule: linear
+    network_settings:
+      normalize: true
+      hidden_units: 256
+      num_layers: 2
+      vis_encode_type: simple
+    reward_signals:
+      extrinsic:
+        gamma: 0.995
+        strength: 1.0
+    keep_checkpoints: 5
+    max_steps: 30000000
+    time_horizon: 1000
+    summary_freq: 30000
+    threaded: true
--- a/Project/Assets/ML-Agents/Examples/Walker/Prefabs/Ragdoll/MultiDirRagDoll.prefab
+++ b/Project/Assets/ML-Agents/Examples/Walker/Prefabs/Ragdoll/MultiDirRagDoll.prefab
+%YAML 1.1
+%TAG !u! tag:unity3d.com,2011:
+--- !u!114 &4469182458895145650
+MonoBehaviour:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 1077752704035527923}
+  m_Enabled: 1
+  m_EditorHideFlags: 0
+  m_Script: {fileID: 11500000, guid: c52bddbfaf39944a6bb673a9dfcfe4b6, type: 3}
+  m_Name: 
+  m_EditorClassIdentifier: 
+  agentParameters:
+    maxStep: 0
+  hasUpgradedFromAgentParameters: 1
+  MaxStep: 5000
+  m_TargetWalkingSpeed: 10
+  randomizeWalkSpeedEachEpisode: 0
+  target: {fileID: 4058446934158437408}
+  hips: {fileID: 1077752704392483292}
+  chest: {fileID: 7818481575961221087}
+  spine: {fileID: 7818481575902529953}
+  head: {fileID: 7818481576732930258}
+  thighL: {fileID: 7818481576528932657}
+  shinL: {fileID: 7818481576468061548}
+  footL: {fileID: 7818481575932963445}
+  thighR: {fileID: 7818481577110242841}
+  shinR: {fileID: 7818481577111017236}
+  footR: {fileID: 7818481576882516798}
+  armL: {fileID: 7818481576458883964}
+  forearmL: {fileID: 7818481576500842159}
+  handL: {fileID: 7818481576440584931}
+  armR: {fileID: 7818481575774466714}
+  forearmR: {fileID: 7818481576563420652}
+  handR: {fileID: 7818481575132336870}
+  goals: 2
+--- !u!114 &1800586501491974962
+MonoBehaviour:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 1077752704035527923}
+  m_Enabled: 1
+  m_EditorHideFlags: 0
+  m_Script: {fileID: 11500000, guid: 163dac4bcbb2f4d8499db2cdcb22a89e, type: 3}
+  m_Name: 
+  m_EditorClassIdentifier: 
+  observationSize: 1
+--- !u!1001 &186987432828422960
+PrefabInstance:
+  m_ObjectHideFlags: 0
+  serializedVersion: 2
+  m_Modification:
+    m_TransformParent: {fileID: 0}
+    m_Modifications:
+    - target: {fileID: 7408209125961349353, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: walkDirectionMethod
+      value: 0
+      objectReference: {fileID: 0}
+    - target: {fileID: 7408209125961349353, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: target
+      value: 
+      objectReference: {fileID: 4058446934158437408}
+    - target: {fileID: 7408209125961349353, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: randomizeWalkSpeedEachEpisode
+      value: 1
+      objectReference: {fileID: 0}
+    - target: {fileID: 7408209125961349353, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_Enabled
+      value: 0
+      objectReference: {fileID: 0}
+    - target: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_LocalPosition.x
+      value: -500
+      objectReference: {fileID: 0}
+    - target: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_LocalPosition.y
+      value: 2.57
+      objectReference: {fileID: 0}
+    - target: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_LocalPosition.z
+      value: -250
+      objectReference: {fileID: 0}
+    - target: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_LocalRotation.x
+      value: -0
+      objectReference: {fileID: 0}
+    - target: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_LocalRotation.y
+      value: 0.7071068
+      objectReference: {fileID: 0}
+    - target: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_LocalRotation.z
+      value: -0
+      objectReference: {fileID: 0}
+    - target: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_LocalRotation.w
+      value: 0.7071068
+      objectReference: {fileID: 0}
+    - target: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_RootOrder
+      value: 0
+      objectReference: {fileID: 0}
+    - target: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_LocalEulerAnglesHint.x
+      value: 0
+      objectReference: {fileID: 0}
+    - target: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_LocalEulerAnglesHint.y
+      value: 0
+      objectReference: {fileID: 0}
+    - target: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_LocalEulerAnglesHint.z
+      value: 0
+      objectReference: {fileID: 0}
+    - target: {fileID: 693499830, guid: 765582efd9dda46ed98564603316353f, type: 3}
+      propertyPath: updateManually
+      value: 1
+      objectReference: {fileID: 0}
+    - target: {fileID: 693499830, guid: 765582efd9dda46ed98564603316353f, type: 3}
+      propertyPath: updateViaScript
+      value: 1
+      objectReference: {fileID: 0}
+    - target: {fileID: 693499830, guid: 765582efd9dda46ed98564603316353f, type: 3}
+      propertyPath: updatedByAgent
+      value: 1
+      objectReference: {fileID: 0}
+    - target: {fileID: 895268871377934275, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_Name
+      value: MultiDirRagDoll
+      objectReference: {fileID: 0}
+    - target: {fileID: 895268871377934275, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_IsActive
+      value: 1
+      objectReference: {fileID: 0}
+    - target: {fileID: 895268871377934297, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_BehaviorName
+      value: MultiDirWalker
+      objectReference: {fileID: 0}
+    - target: {fileID: 895268871377934297, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_Model
+      value: 
+      objectReference: {fileID: 5022602860645237092, guid: c5c81d94c2dfe4c2b9f7440f533957fa,
+        type: 3}
+    - target: {fileID: 895268871377934297, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_BrainParameters.VectorObservationSize
+      value: 243
+      objectReference: {fileID: 0}
+    - target: {fileID: 6359877977706987617, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_LocalPosition.y
+      value: -2.517
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235353030744139, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.y
+      value: -0.00000011920929
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235353030744139, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.x
+      value: -0.699997
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235353030744139, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.z
+      value: -0.000000059604645
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235353041637847, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.y
+      value: -0.00000011920929
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235353041637847, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.x
+      value: -0.69999707
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235353041637847, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.z
+      value: -0.00000023841858
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235353195701979, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.y
+      value: -0.00000011920929
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235353195701979, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.x
+      value: 0.5000001
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235353195701979, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.z
+      value: 0.00000023841858
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235353228551180, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.y
+      value: -0.29999995
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235353228551180, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.z
+      value: -0.000000059604645
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235353240438170, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.y
+      value: -0.00000011920929
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235353240438170, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.x
+      value: -0.5000001
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235353240438170, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.z
+      value: -0.00000023841858
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235353713167636, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.y
+      value: -0.29999995
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235353713167636, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.z
+      value: 0.000000059604645
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235354074184678, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.y
+      value: 0.5119995
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235354616748522, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.y
+      value: -0.00000011920929
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235354616748522, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.x
+      value: 0.69999707
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235354616748522, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.z
+      value: 0.00000023841858
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235354652902044, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.y
+      value: 0.3829999
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235354845945066, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.y
+      value: 0.3050002
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235355057813929, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.y
+      value: -0.00000011920929
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235355057813929, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.x
+      value: 0.699997
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235355057813929, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.z
+      value: 0.000000059604645
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235353272702555, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.z
+      value: -0.000000059604645
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235353655703554, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.z
+      value: 0.000000059604645
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235353711811619, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.z
+      value: 0.000000059604645
+      objectReference: {fileID: 0}
+    - target: {fileID: 7933235354882597209, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_ConnectedAnchor.z
+      value: -0.000000059604645
+      objectReference: {fileID: 0}
+    m_RemovedComponents:
+    - {fileID: 7408209125961349353, guid: 765582efd9dda46ed98564603316353f, type: 3}
+  m_SourcePrefab: {fileID: 100100000, guid: 765582efd9dda46ed98564603316353f, type: 3}
+--- !u!1 &1077752704035527923 stripped
+GameObject:
+  m_CorrespondingSourceObject: {fileID: 895268871377934275, guid: 765582efd9dda46ed98564603316353f,
+    type: 3}
+  m_PrefabInstance: {fileID: 186987432828422960}
+  m_PrefabAsset: {fileID: 0}
+--- !u!4 &1077752704035527914 stripped
+Transform:
+  m_CorrespondingSourceObject: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,
+    type: 3}
+  m_PrefabInstance: {fileID: 186987432828422960}
+  m_PrefabAsset: {fileID: 0}
+--- !u!4 &1077752704392483292 stripped
+Transform:
+  m_CorrespondingSourceObject: {fileID: 895268871264836332, guid: 765582efd9dda46ed98564603316353f,
+    type: 3}
+  m_PrefabInstance: {fileID: 186987432828422960}
+  m_PrefabAsset: {fileID: 0}
+--- !u!4 &7818481576528932657 stripped
+Transform:
+  m_CorrespondingSourceObject: {fileID: 7933235353228551169, guid: 765582efd9dda46ed98564603316353f,
+    type: 3}
+  m_PrefabInstance: {fileID: 186987432828422960}
+  m_PrefabAsset: {fileID: 0}
+--- !u!4 &7818481576468061548 stripped
+Transform:
+  m_CorrespondingSourceObject: {fileID: 7933235353272702556, guid: 765582efd9dda46ed98564603316353f,
+    type: 3}
+  m_PrefabInstance: {fileID: 186987432828422960}
+  m_PrefabAsset: {fileID: 0}
+--- !u!4 &7818481575932963445 stripped
+Transform:
+  m_CorrespondingSourceObject: {fileID: 7933235354882597189, guid: 765582efd9dda46ed98564603316353f,
+    type: 3}
+  m_PrefabInstance: {fileID: 186987432828422960}
+  m_PrefabAsset: {fileID: 0}
+--- !u!4 &7818481577110242841 stripped
+Transform:
+  m_CorrespondingSourceObject: {fileID: 7933235353713167657, guid: 765582efd9dda46ed98564603316353f,
+    type: 3}
+  m_PrefabInstance: {fileID: 186987432828422960}
+  m_PrefabAsset: {fileID: 0}
+--- !u!4 &7818481577111017236 stripped
+Transform:
+  m_CorrespondingSourceObject: {fileID: 7933235353711811620, guid: 765582efd9dda46ed98564603316353f,
+    type: 3}
+  m_PrefabInstance: {fileID: 186987432828422960}
+  m_PrefabAsset: {fileID: 0}
+--- !u!4 &7818481576882516798 stripped
+Transform:
+  m_CorrespondingSourceObject: {fileID: 7933235353655703566, guid: 765582efd9dda46ed98564603316353f,
+    type: 3}
+  m_PrefabInstance: {fileID: 186987432828422960}
+  m_PrefabAsset: {fileID: 0}
+--- !u!4 &7818481575902529953 stripped
+Transform:
+  m_CorrespondingSourceObject: {fileID: 7933235354652902033, guid: 765582efd9dda46ed98564603316353f,
+    type: 3}
+  m_PrefabInstance: {fileID: 186987432828422960}
+  m_PrefabAsset: {fileID: 0}
+--- !u!4 &7818481575961221087 stripped
+Transform:
+  m_CorrespondingSourceObject: {fileID: 7933235354845945071, guid: 765582efd9dda46ed98564603316353f,
+    type: 3}
+  m_PrefabInstance: {fileID: 186987432828422960}
+  m_PrefabAsset: {fileID: 0}
+--- !u!4 &7818481576458883964 stripped
+Transform:
+  m_CorrespondingSourceObject: {fileID: 7933235353030744140, guid: 765582efd9dda46ed98564603316353f,
+    type: 3}
+  m_PrefabInstance: {fileID: 186987432828422960}
+  m_PrefabAsset: {fileID: 0}
+--- !u!4 &7818481576500842159 stripped
+Transform:
+  m_CorrespondingSourceObject: {fileID: 7933235353240438175, guid: 765582efd9dda46ed98564603316353f,
+    type: 3}
+  m_PrefabInstance: {fileID: 186987432828422960}
+  m_PrefabAsset: {fileID: 0}
+--- !u!4 &7818481576440584931 stripped
+Transform:
+  m_CorrespondingSourceObject: {fileID: 7933235353041637843, guid: 765582efd9dda46ed98564603316353f,
+    type: 3}
+  m_PrefabInstance: {fileID: 186987432828422960}
+  m_PrefabAsset: {fileID: 0}
+--- !u!4 &7818481575774466714 stripped
+Transform:
+  m_CorrespondingSourceObject: {fileID: 7933235355057813930, guid: 765582efd9dda46ed98564603316353f,
+    type: 3}
+  m_PrefabInstance: {fileID: 186987432828422960}
+  m_PrefabAsset: {fileID: 0}
+--- !u!4 &7818481576563420652 stripped
+Transform:
+  m_CorrespondingSourceObject: {fileID: 7933235353195701980, guid: 765582efd9dda46ed98564603316353f,
+    type: 3}
+  m_PrefabInstance: {fileID: 186987432828422960}
+  m_PrefabAsset: {fileID: 0}
+--- !u!4 &7818481575132336870 stripped
+Transform:
+  m_CorrespondingSourceObject: {fileID: 7933235354616748502, guid: 765582efd9dda46ed98564603316353f,
+    type: 3}
+  m_PrefabInstance: {fileID: 186987432828422960}
+  m_PrefabAsset: {fileID: 0}
+--- !u!4 &7818481576732930258 stripped
+Transform:
+  m_CorrespondingSourceObject: {fileID: 7933235354074184674, guid: 765582efd9dda46ed98564603316353f,
+    type: 3}
+  m_PrefabInstance: {fileID: 186987432828422960}
+  m_PrefabAsset: {fileID: 0}
+--- !u!1001 &942701540323662238
+PrefabInstance:
+  m_ObjectHideFlags: 0
+  serializedVersion: 2
+  m_Modification:
+    m_TransformParent: {fileID: 1077752704035527914}
+    m_Modifications:
+    - target: {fileID: 3839136118347789758, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e,
+        type: 3}
+      propertyPath: m_LocalPosition.x
+      value: 0
+      objectReference: {fileID: 0}
+    - target: {fileID: 3839136118347789758, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e,
+        type: 3}
+      propertyPath: m_LocalPosition.y
+      value: 1
+      objectReference: {fileID: 0}
+    - target: {fileID: 3839136118347789758, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e,
+        type: 3}
+      propertyPath: m_LocalPosition.z
+      value: 1800
+      objectReference: {fileID: 0}
+    - target: {fileID: 3839136118347789758, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e,
+        type: 3}
+      propertyPath: m_LocalRotation.x
+      value: 0
+      objectReference: {fileID: 0}
+    - target: {fileID: 3839136118347789758, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e,
+        type: 3}
+      propertyPath: m_LocalRotation.y
+      value: 0
+      objectReference: {fileID: 0}
+    - target: {fileID: 3839136118347789758, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e,
+        type: 3}
+      propertyPath: m_LocalRotation.z
+      value: 0
+      objectReference: {fileID: 0}
+    - target: {fileID: 3839136118347789758, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e,
+        type: 3}
+      propertyPath: m_LocalRotation.w
+      value: 1
+      objectReference: {fileID: 0}
+    - target: {fileID: 3839136118347789758, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e,
+        type: 3}
+      propertyPath: m_RootOrder
+      value: 3
+      objectReference: {fileID: 0}
+    - target: {fileID: 3839136118347789758, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e,
+        type: 3}
+      propertyPath: m_LocalEulerAnglesHint.x
+      value: 0
+      objectReference: {fileID: 0}
+    - target: {fileID: 3839136118347789758, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e,
+        type: 3}
+      propertyPath: m_LocalEulerAnglesHint.y
+      value: 0
+      objectReference: {fileID: 0}
+    - target: {fileID: 3839136118347789758, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e,
+        type: 3}
+      propertyPath: m_LocalEulerAnglesHint.z
+      value: 0
+      objectReference: {fileID: 0}
+    - target: {fileID: 3840539935788495952, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e,
+        type: 3}
+      propertyPath: m_Name
+      value: StaticTarget
+      objectReference: {fileID: 0}
+    m_RemovedComponents: []
+  m_SourcePrefab: {fileID: 100100000, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e, type: 3}
+--- !u!4 &4058446934158437408 stripped
+Transform:
+  m_CorrespondingSourceObject: {fileID: 3839136118347789758, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e,
+    type: 3}
+  m_PrefabInstance: {fileID: 942701540323662238}
+  m_PrefabAsset: {fileID: 0}
--- a/Project/Assets/ML-Agents/Examples/Walker/Prefabs/Ragdoll/MultiDirRagDoll.prefab.meta
+++ b/Project/Assets/ML-Agents/Examples/Walker/Prefabs/Ragdoll/MultiDirRagDoll.prefab.meta
+fileFormatVersion: 2
+guid: d32d9be22fe544fd38de3cf5db023465
+PrefabImporter:
+  externalObjects: {}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/Project/Assets/ML-Agents/Examples/Walker/Scenes/MultiDirWalkerStatic.unity
+++ b/Project/Assets/ML-Agents/Examples/Walker/Scenes/MultiDirWalkerStatic.unity
--- a/Project/Assets/ML-Agents/Examples/Walker/Scenes/MultiDirWalkerStatic.unity.meta
+++ b/Project/Assets/ML-Agents/Examples/Walker/Scenes/MultiDirWalkerStatic.unity.meta
+fileFormatVersion: 2
+guid: 0c5ba64aa7c084a63b21f8e2b900fc29
+timeCreated: 1520420566
+licenseType: Free
+DefaultImporter:
+  externalObjects: {}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/Project/Assets/ML-Agents/Examples/Walker/Scripts/MultiDirWalkerAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/Walker/Scripts/MultiDirWalkerAgent.cs
+using System;
+using UnityEngine;
+using Unity.MLAgents;
+using Unity.MLAgents.Actuators;
+using Unity.MLAgentsExamples;
+using Unity.MLAgents.Sensors;
+using BodyPart = Unity.MLAgentsExamples.BodyPart;
+using Random = UnityEngine.Random;
+
+public class MultiDirWalkerAgent : Agent
+{
+    [Header("Walk Speed")]
+    [Range(0.1f, 10)]
+    [SerializeField]
+    //The walking speed to try and achieve
+    private float m_TargetWalkingSpeed = 10;
+    private Vector3 m_startingPos; //the starting position of the target
+    public float MTargetWalkingSpeed // property
+    {
+        get { return m_TargetWalkingSpeed; }
+        set { m_TargetWalkingSpeed = Mathf.Clamp(value, .1f, m_maxWalkingSpeed); }
+    }
+
+    const float m_maxWalkingSpeed = 10; //The max walking speed
+
+    //Should the agent sample a new goal velocity each episode?
+    //If true, walkSpeed will be randomly set between zero and m_maxWalkingSpeed in OnEpisodeBegin() 
+    //If false, the goal velocity will be walkingSpeed
+    public bool randomizeWalkSpeedEachEpisode;
+
+    //The direction an agent will walk during training.
+    private Vector3 m_WorldDirToWalk = Vector3.right;
+
+    [Header("Target To Walk Towards")] public Transform target; //Target the agent will walk towards during training.
+
+    [Header("Body Parts")] public Transform hips;
+    public Transform chest;
+    public Transform spine;
+    public Transform head;
+    public Transform thighL;
+    public Transform shinL;
+    public Transform footL;
+    public Transform thighR;
+    public Transform shinR;
+    public Transform footR;
+    public Transform armL;
+    public Transform forearmL;
+    public Transform handL;
+    public Transform armR;
+    public Transform forearmR;
+    public Transform handR;
+
+    int m_Goal;
+    float dir;
+    public int goals;
+    float[] m_GoalOneHot;
+    //This will be used as a stabilized model space reference point for observations
+    //Because ragdolls can move erratically during training, using a stabilized reference transform improves learning
+    OrientationCubeController m_OrientationCube;
+    GoalSensorComponent goalSensor;
+
+    //The indicator graphic gameobject that points towards the target
+    DirectionIndicator m_DirectionIndicator;
+    JointDriveController m_JdController;
+    EnvironmentParameters m_ResetParams;
+
+    public override void Initialize()
+    {
+        m_startingPos = target.position;
+        m_Goal = Random.Range(0, goals);
+        //m_Goal = 0;
+        m_GoalOneHot = new float[goals];
+        System.Array.Clear(m_GoalOneHot, 0, m_GoalOneHot.Length);
+        m_GoalOneHot[m_Goal] = 1;
+        if (m_Goal == 0)
+        {
+            var newTargetPos = new Vector3(1800f, 1f, m_startingPos.z);
+            target.position = newTargetPos;
+            dir = 1f;
+        }
+        else
+        {
+            var newTargetPos = new Vector3(-1800f, 1f, m_startingPos.z);
+            target.position = newTargetPos;
+            dir = -1f;
+        }
+        m_OrientationCube = GetComponentInChildren<OrientationCubeController>();
+        m_DirectionIndicator = GetComponentInChildren<DirectionIndicator>();
+
+        //Setup each body part
+        m_JdController = GetComponent<JointDriveController>();
+        m_JdController.SetupBodyPart(hips);
+        m_JdController.SetupBodyPart(chest);
+        m_JdController.SetupBodyPart(spine);
+        m_JdController.SetupBodyPart(head);
+        m_JdController.SetupBodyPart(thighL);
+        m_JdController.SetupBodyPart(shinL);
+        m_JdController.SetupBodyPart(footL);
+        m_JdController.SetupBodyPart(thighR);
+        m_JdController.SetupBodyPart(shinR);
+        m_JdController.SetupBodyPart(footR);
+        m_JdController.SetupBodyPart(armL);
+        m_JdController.SetupBodyPart(forearmL);
+        m_JdController.SetupBodyPart(handL);
+        m_JdController.SetupBodyPart(armR);
+        m_JdController.SetupBodyPart(forearmR);
+        m_JdController.SetupBodyPart(handR);
+
+        m_ResetParams = Academy.Instance.EnvironmentParameters;
+
+        SetResetParameters();
+    }
+
+    /// <summary>
+    /// Loop over body parts and reset them to initial conditions.
+    /// </summary>
+    public override void OnEpisodeBegin()
+    {
+        m_Goal = Random.Range(0, goals);
+        //m_Goal = 0;
+        System.Array.Clear(m_GoalOneHot, 0, m_GoalOneHot.Length);
+        m_GoalOneHot[m_Goal] = 1;
+        if (m_Goal == 0)
+        {
+            var newTargetPos = new Vector3(1800f, 1f, m_startingPos.z);
+            target.position = newTargetPos;
+            dir = 1f;
+        }
+        else
+        {
+            var newTargetPos = new Vector3(-1800f, 1f, m_startingPos.z);
+            target.position = newTargetPos;
+            dir = -1f;
+        }
+        //Reset all of the body parts
+        foreach (var bodyPart in m_JdController.bodyPartsDict.Values)
+        {
+            bodyPart.Reset(bodyPart);
+        }
+
+        //Random start rotation to help generalize
+        hips.rotation = Quaternion.Euler(0, Random.Range(0.0f, 360.0f), 0);
+
+        UpdateOrientationObjects();
+
+        //Set our goal walking speed
+        MTargetWalkingSpeed =
+            randomizeWalkSpeedEachEpisode ? Random.Range(0.1f, m_maxWalkingSpeed) : MTargetWalkingSpeed;
+
+        SetResetParameters();
+    }
+
+    /// <summary>
+    /// Add relevant information on each body part to observations.
+    /// </summary>
+    public void CollectObservationBodyPart(BodyPart bp, VectorSensor sensor)
+    {
+        //GROUND CHECK
+        sensor.AddObservation(bp.groundContact.touchingGround); // Is this bp touching the ground
+
+        //Get velocities in the context of our orientation cube's space
+        //Note: You can get these velocities in world space as well but it may not train as well.
+        sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(bp.rb.velocity));
+        sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(bp.rb.angularVelocity));
+
+        //Get position relative to hips in the context of our orientation cube's space
+        sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(bp.rb.position - hips.position));
+
+        if (bp.rb.transform != hips && bp.rb.transform != handL && bp.rb.transform != handR)
+        {
+            sensor.AddObservation(bp.rb.transform.localRotation);
+            sensor.AddObservation(bp.currentStrength / m_JdController.maxJointForceLimit);
+        }
+    }
+
+    /// <summary>
+    /// Loop over body parts to add them to observation.
+    /// </summary>
+    public override void CollectObservations(VectorSensor sensor)
+    {
+        var cubeForward = m_OrientationCube.transform.forward;
+
+        //velocity we want to match
+        var velGoal = cubeForward * MTargetWalkingSpeed;
+        //ragdoll's avg vel
+        var avgVel = GetAvgVelocity();
+
+        //current ragdoll velocity. normalized 
+        sensor.AddObservation(Vector3.Distance(velGoal, avgVel));
+        //avg body vel relative to cube
+        sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(avgVel));
+        //vel goal relative to cube
+        sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(velGoal));
+
+        //rotation deltas
+        sensor.AddObservation(Quaternion.FromToRotation(hips.forward, cubeForward));
+        sensor.AddObservation(Quaternion.FromToRotation(head.forward, cubeForward));
+
+        //Position of target position relative to cube
+        sensor.AddObservation(m_OrientationCube.transform.InverseTransformPoint(target.transform.position));
+
+        foreach (var bodyPart in m_JdController.bodyPartsList)
+        {
+            CollectObservationBodyPart(bodyPart, sensor);
+        }
+
+        //sensor.AddObservation(m_GoalOneHot);
+        goalSensor = this.GetComponent<GoalSensorComponent>();
+        goalSensor.AddGoal(m_Goal);
+    }
+
+    public override void OnActionReceived(ActionBuffers actionBuffers)
+
+    {
+        var bpDict = m_JdController.bodyPartsDict;
+        var i = -1;
+
+        var continuousActions = actionBuffers.ContinuousActions;
+        bpDict[chest].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], continuousActions[++i]);
+        bpDict[spine].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], continuousActions[++i]);
+
+        bpDict[thighL].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], 0);
+        bpDict[thighR].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], 0);
+        bpDict[shinL].SetJointTargetRotation(continuousActions[++i], 0, 0);
+        bpDict[shinR].SetJointTargetRotation(continuousActions[++i], 0, 0);
+        bpDict[footR].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], continuousActions[++i]);
+        bpDict[footL].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], continuousActions[++i]);
+
+        bpDict[armL].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], 0);
+        bpDict[armR].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], 0);
+        bpDict[forearmL].SetJointTargetRotation(continuousActions[++i], 0, 0);
+        bpDict[forearmR].SetJointTargetRotation(continuousActions[++i], 0, 0);
+        bpDict[head].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], 0);
+
+        //update joint strength settings
+        bpDict[chest].SetJointStrength(continuousActions[++i]);
+        bpDict[spine].SetJointStrength(continuousActions[++i]);
+        bpDict[head].SetJointStrength(continuousActions[++i]);
+        bpDict[thighL].SetJointStrength(continuousActions[++i]);
+        bpDict[shinL].SetJointStrength(continuousActions[++i]);
+        bpDict[footL].SetJointStrength(continuousActions[++i]);
+        bpDict[thighR].SetJointStrength(continuousActions[++i]);
+        bpDict[shinR].SetJointStrength(continuousActions[++i]);
+        bpDict[footR].SetJointStrength(continuousActions[++i]);
+        bpDict[armL].SetJointStrength(continuousActions[++i]);
+        bpDict[forearmL].SetJointStrength(continuousActions[++i]);
+        bpDict[armR].SetJointStrength(continuousActions[++i]);
+        bpDict[forearmR].SetJointStrength(continuousActions[++i]);
+    }
+
+    //Update OrientationCube and DirectionIndicator
+    void UpdateOrientationObjects()
+    {
+        m_WorldDirToWalk = target.position - hips.position;
+        m_OrientationCube.UpdateOrientation(hips, target);
+        if (m_DirectionIndicator)
+        {
+            m_DirectionIndicator.MatchOrientation(m_OrientationCube.transform);
+        }
+    }
+
+    void FixedUpdate()
+    {
+        UpdateOrientationObjects();
+
+        var cubeForward = m_OrientationCube.transform.forward;
+
+        // Set reward for this step according to mixture of the following elements.
+        // a. Match target speed
+        //This reward will approach 1 if it matches perfectly and approach zero as it deviates
+        var matchSpeedReward = GetMatchingVelocityReward(cubeForward * MTargetWalkingSpeed, GetAvgVelocity());
+
+        //Check for NaNs
+        if (float.IsNaN(matchSpeedReward))
+        {
+            throw new ArgumentException(
+                "NaN in moveTowardsTargetReward.\n" +
+                $" cubeForward: {cubeForward}\n" +
+                $" hips.velocity: {m_JdController.bodyPartsDict[hips].rb.velocity}\n" +
+                $" maximumWalkingSpeed: {m_maxWalkingSpeed}"
+            );
+        }
+
+        // b. Rotation alignment with target direction.
+        //This reward will approach 1 if it faces the target direction perfectly and approach zero as it deviates
+        var lookAtTargetReward = (Vector3.Dot(cubeForward, dir * head.forward) + 1) * .5F;
+
+        //Check for NaNs
+        if (float.IsNaN(lookAtTargetReward))
+        {
+            throw new ArgumentException(
+                "NaN in lookAtTargetReward.\n" +
+                $" cubeForward: {cubeForward}\n" +
+                $" head.forward: {head.forward}"
+            );
+        }
+
+        Debug.Log(lookAtTargetReward);
+        Debug.Log(matchSpeedReward);
+        AddReward(matchSpeedReward * lookAtTargetReward);
+    }
+
+    //Returns the average velocity of all of the body parts
+    //Using the velocity of the hips only has shown to result in more erratic movement from the limbs, so...
+    //...using the average helps prevent this erratic movement
+    Vector3 GetAvgVelocity()
+    {
+        Vector3 velSum = Vector3.zero;
+        Vector3 avgVel = Vector3.zero;
+
+        //ALL RBS
+        int numOfRB = 0;
+        foreach (var item in m_JdController.bodyPartsList)
+        {
+            numOfRB++;
+            velSum += item.rb.velocity;
+        }
+
+        avgVel = velSum / numOfRB;
+        return avgVel;
+    }
+
+    //normalized value of the difference in avg speed vs goal walking speed.
+    public float GetMatchingVelocityReward(Vector3 velocityGoal, Vector3 actualVelocity)
+    {
+        //distance between our actual velocity and goal velocity
+        var velDeltaMagnitude = Mathf.Clamp(Vector3.Distance(actualVelocity, velocityGoal), 0, MTargetWalkingSpeed);
+
+        //return the value on a declining sigmoid shaped curve that decays from 1 to 0
+        //This reward will approach 1 if it matches perfectly and approach zero as it deviates
+        return Mathf.Pow(1 - Mathf.Pow(velDeltaMagnitude / MTargetWalkingSpeed, 2), 2);
+    }
+
+    /// <summary>
+    /// Agent touched the target
+    /// </summary>
+    public void TouchedTarget()
+    {
+        AddReward(1f);
+    }
+
+    public void SetTorsoMass()
+    {
+        m_JdController.bodyPartsDict[chest].rb.mass = m_ResetParams.GetWithDefault("chest_mass", 8);
+        m_JdController.bodyPartsDict[spine].rb.mass = m_ResetParams.GetWithDefault("spine_mass", 8);
+        m_JdController.bodyPartsDict[hips].rb.mass = m_ResetParams.GetWithDefault("hip_mass", 8);
+    }
+
+    public void SetResetParameters()
+    {
+        SetTorsoMass();
+    }
+}
--- a/Project/Assets/ML-Agents/Examples/Walker/Scripts/MultiDirWalkerAgent.cs.meta
+++ b/Project/Assets/ML-Agents/Examples/Walker/Scripts/MultiDirWalkerAgent.cs.meta
+fileFormatVersion: 2
+guid: c52bddbfaf39944a6bb673a9dfcfe4b6
+MonoImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  defaultReferences: []
+  executionOrder: 0
+  icon: {instanceID: 0}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/Project/Assets/ML-Agents/Examples/Walker/TFModels/MultiDirWalker-5997779.onnx
+++ b/Project/Assets/ML-Agents/Examples/Walker/TFModels/MultiDirWalker-5997779.onnx
--- a/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/GoalSensorComponent.cs.meta
+++ b/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/GoalSensorComponent.cs.meta
+fileFormatVersion: 2
+guid: 163dac4bcbb2f4d8499db2cdcb22a89e
+MonoImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  defaultReferences: []
+  executionOrder: 0
+  icon: {instanceID: 0}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/GoalSensorComponent.cs
+++ b/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/GoalSensorComponent.cs
+using Unity.MLAgents.Sensors;
+
+
+public class GoalSensorComponent : SensorComponent
+{
+    public int observationSize;
+    public GoalSensor goalSensor;
+    /// <summary>
+    /// Creates a GoalSensor.
+    /// </summary>
+    /// <returns></returns>
+    public override ISensor CreateSensor()
+    {
+        goalSensor = new GoalSensor(observationSize);
+        return goalSensor;
+    }
+
+    /// <inheritdoc/>
+    public override int[] GetObservationShape()
+    {
+        return new[] { observationSize };
+    }
+
+    public void AddGoal(float goal)
+    {
+        if (goalSensor != null)
+        {
+            goalSensor.AddObservation(goal);
+        }
+    }
+
+    public void AddOneHotGoal(int goal, int range)
+    {
+        if (goalSensor != null)
+        {
+            goalSensor.AddOneHotObservation(goal, range);
+        }
+    }
+}
+
+public class GoalSensor : VectorSensor
+{
+
+    public GoalSensor(int observationSize, string name = null) : base(observationSize)
+    {
+        if (name == null)
+        {
+            name = $"GoalSensor_size{observationSize}";
+        }
+    }
+
+    public override SensorType GetSensorType()
+    {
+        return SensorType.Goal;
+    }
+}
作者	SHA1	备注	提交日期
GitHub	a8aabd7d	Merge pull request #4750 from Unity-Technologies/gc-onehot AddOneHotGoal	4 年前
Arthur Juliani	e8849803	Simpler solution	4 年前
Arthur Juliani	e6a973cd	Add OneHot util to goal sensor	4 年前
GitHub	22658a40	use sensor types to differentiate obs (#4749 )	4 年前
GitHub	ba21e419	Merge pull request #4737 from Unity-Technologies/goal-gridworld-sensor Use GoalSensor in GridWorld	4 年前
Arthur Juliani	2be6af80	Fix black	4 年前
Arthur Juliani	4060202d	Use GoalSensor in GridWorld	4 年前
GitHub	cc6b4564	Multi Directional Walker and Initial Hypernetwork (#4740 )	4 年前
Arthur Juliani	0d2f8887	Merge remote-tracking branch 'origin/master' into goal-conditioning # Conflicts: # ml-agents-envs/mlagents_envs/base_env.py # ml-agents-envs/mlagents_envs/rpc_utils.py # ml-agents/mlagents/trainers/tests/mock_brain.py # ml-agents/mlagents/trainers/tests/simple_test_envs.py	4 年前
GitHub	ded1f79b	Merge pull request #4732 from Unity-Technologies/goal-sensors Adds SensorTypes and GoalSensors	4 年前
GitHub	76faf383	Merge pull request #4733 from Unity-Technologies/gc-food-goals Adds goal signal to GridWorld environment	4 年前