比较提交

...
此合并请求有变更与目标分支冲突。
/protobuf-definitions/proto/mlagents_envs/communicator_objects/observation.proto
/gym-unity/gym_unity/tests/test_gym.py
/Project/Assets/ML-Agents/TestScenes/TestCompressedTexture/TestTextureSensor.cs
/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/SensorBase.cs
/Project/Assets/ML-Agents/Examples/GridWorld/Prefabs/Area.prefab
/Project/Assets/ML-Agents/Examples/GridWorld/Scenes/GridWorld.unity
/Project/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAgent.cs
/com.unity.ml-agents.extensions/Runtime/Sensors/PhysicsBodySensor.cs
/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
/com.unity.ml-agents/Tests/Editor/Communicator/GrpcExtensionsTests.cs
/com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
/com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/Observation.cs
/com.unity.ml-agents/Runtime/Sensors/RayPerceptionSensor.cs
/com.unity.ml-agents/Runtime/Sensors/Reflection/ReflectionSensorBase.cs
/com.unity.ml-agents/Runtime/Sensors/RenderTextureSensor.cs
/com.unity.ml-agents/Runtime/Sensors/StackingSensor.cs
/com.unity.ml-agents/Runtime/Sensors/CameraSensor.cs
/com.unity.ml-agents/Runtime/Sensors/ISensor.cs
/com.unity.ml-agents/Runtime/Sensors/VectorSensor.cs
/ml-agents-envs/mlagents_envs/rpc_utils.py
/ml-agents-envs/mlagents_envs/communicator_objects/observation_pb2.py
/ml-agents-envs/mlagents_envs/communicator_objects/observation_pb2.pyi
/ml-agents-envs/mlagents_envs/base_env.py
/ml-agents-envs/mlagents_envs/tests/test_steps.py
/ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py
/ml-agents/mlagents/trainers/optimizer/torch_optimizer.py
/ml-agents/mlagents/trainers/policy/torch_policy.py
/ml-agents/mlagents/trainers/policy/policy.py
/ml-agents/mlagents/trainers/ppo/optimizer_torch.py
/ml-agents/mlagents/trainers/ppo/trainer.py
/ml-agents/mlagents/trainers/sac/trainer.py
/ml-agents/mlagents/trainers/tests/mock_brain.py
/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py
/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_rnd.py
/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py
/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_extrinsic.py
/ml-agents/mlagents/trainers/tests/simple_test_envs.py
/ml-agents/mlagents/trainers/torch/distributions.py
/ml-agents/mlagents/trainers/torch/model_serialization.py
/ml-agents/mlagents/trainers/torch/networks.py
/ml-agents/mlagents/trainers/trajectory.py
/com.unity.ml-agents.extensions/Runtime/Match3/Match3Sensor.cs
/com.unity.ml-agents/Tests/Editor/Sensor/FloatVisualSensorTests.cs
/com.unity.ml-agents/Tests/Editor/Sensor/SensorShapeValidatorTests.cs
/com.unity.ml-agents/Tests/Editor/Sensor/StackingSensorTests.cs
/com.unity.ml-agents.extensions/Runtime/Sensors/GridSensor.cs
/ml-agents/mlagents/trainers/tests/tensorflow/test_models.py
/ml-agents/mlagents/trainers/tests/tensorflow/test_tf_policy.py
/com.unity.ml-agents/Tests/Editor/ParameterLoaderTest.cs

11 次代码提交

作者 SHA1 备注 提交日期
GitHub a8aabd7d Merge pull request #4750 from Unity-Technologies/gc-onehot 4 年前
Arthur Juliani e8849803 Simpler solution 4 年前
Arthur Juliani e6a973cd Add OneHot util to goal sensor 4 年前
GitHub 22658a40 use sensor types to differentiate obs (#4749) 4 年前
GitHub ba21e419 Merge pull request #4737 from Unity-Technologies/goal-gridworld-sensor 4 年前
Arthur Juliani 2be6af80 Fix black 4 年前
Arthur Juliani 4060202d Use GoalSensor in GridWorld 4 年前
GitHub cc6b4564 Multi Directional Walker and Initial Hypernetwork (#4740) 4 年前
Arthur Juliani 0d2f8887 Merge remote-tracking branch 'origin/master' into goal-conditioning 4 年前
GitHub ded1f79b Merge pull request #4732 from Unity-Technologies/goal-sensors 4 年前
GitHub 76faf383 Merge pull request #4733 from Unity-Technologies/gc-food-goals 4 年前
共有 63 个文件被更改,包括 3895 次插入182 次删除
  1. 7
      protobuf-definitions/proto/mlagents_envs/communicator_objects/observation.proto
  2. 4
      gym-unity/gym_unity/tests/test_gym.py
  3. 6
      com.unity.ml-agents.extensions/Runtime/Match3/Match3Sensor.cs
  4. 6
      com.unity.ml-agents.extensions/Runtime/Sensors/GridSensor.cs
  5. 6
      com.unity.ml-agents.extensions/Runtime/Sensors/PhysicsBodySensor.cs
  6. 8
      ml-agents/mlagents/trainers/tests/mock_brain.py
  7. 7
      ml-agents/mlagents/trainers/tests/tensorflow/test_models.py
  8. 4
      ml-agents/mlagents/trainers/tests/tensorflow/test_tf_policy.py
  9. 14
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_extrinsic.py
  10. 32
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py
  11. 32
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_rnd.py
  12. 45
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py
  13. 8
      ml-agents/mlagents/trainers/tests/simple_test_envs.py
  14. 11
      ml-agents/mlagents/trainers/optimizer/torch_optimizer.py
  15. 8
      ml-agents/mlagents/trainers/policy/policy.py
  16. 13
      ml-agents/mlagents/trainers/policy/torch_policy.py
  17. 3
      ml-agents/mlagents/trainers/ppo/optimizer_torch.py
  18. 4
      ml-agents/mlagents/trainers/ppo/trainer.py
  19. 6
      ml-agents/mlagents/trainers/sac/trainer.py
  20. 89
      ml-agents/mlagents/trainers/torch/decoders.py
  21. 119
      ml-agents/mlagents/trainers/torch/distributions.py
  22. 34
      ml-agents/mlagents/trainers/torch/action_model.py
  23. 10
      ml-agents/mlagents/trainers/torch/model_serialization.py
  24. 57
      ml-agents/mlagents/trainers/torch/networks.py
  25. 54
      ml-agents/mlagents/trainers/trajectory.py
  26. 9
      com.unity.ml-agents/Runtime/Sensors/CameraSensor.cs
  27. 29
      com.unity.ml-agents/Runtime/Sensors/ISensor.cs
  28. 6
      com.unity.ml-agents/Runtime/Sensors/RayPerceptionSensor.cs
  29. 6
      com.unity.ml-agents/Runtime/Sensors/Reflection/ReflectionSensorBase.cs
  30. 9
      com.unity.ml-agents/Runtime/Sensors/RenderTextureSensor.cs
  31. 6
      com.unity.ml-agents/Runtime/Sensors/StackingSensor.cs
  32. 6
      com.unity.ml-agents/Runtime/Sensors/VectorSensor.cs
  33. 1
      com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
  34. 50
      com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/Observation.cs
  35. 6
      com.unity.ml-agents/Tests/Editor/Communicator/GrpcExtensionsTests.cs
  36. 5
      com.unity.ml-agents/Tests/Editor/Sensor/FloatVisualSensorTests.cs
  37. 5
      com.unity.ml-agents/Tests/Editor/Sensor/SensorShapeValidatorTests.cs
  38. 6
      com.unity.ml-agents/Tests/Editor/Sensor/StackingSensorTests.cs
  39. 5
      com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
  40. 5
      com.unity.ml-agents/Tests/Editor/ParameterLoaderTest.cs
  41. 19
      ml-agents-envs/mlagents_envs/communicator_objects/unity_to_external_pb2.py
  42. 81
      ml-agents-envs/mlagents_envs/communicator_objects/unity_to_external_pb2_grpc.py
  43. 51
      ml-agents-envs/mlagents_envs/communicator_objects/observation_pb2.py
  44. 25
      ml-agents-envs/mlagents_envs/communicator_objects/observation_pb2.pyi
  45. 26
      ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py
  46. 11
      ml-agents-envs/mlagents_envs/tests/test_steps.py
  47. 4
      ml-agents-envs/mlagents_envs/rpc_utils.py
  48. 10
      ml-agents-envs/mlagents_envs/base_env.py
  49. 6
      Project/Assets/ML-Agents/TestScenes/TestCompressedTexture/TestTextureSensor.cs
  50. 33
      Project/Assets/ML-Agents/Examples/GridWorld/Prefabs/Area.prefab
  51. 45
      Project/Assets/ML-Agents/Examples/GridWorld/Scenes/GridWorld.unity
  52. 37
      Project/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAgent.cs
  53. 6
      Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/SensorBase.cs
  54. 26
      config/ppo/MultiDirWalker.yaml
  55. 507
      Project/Assets/ML-Agents/Examples/Walker/Prefabs/Ragdoll/MultiDirRagDoll.prefab
  56. 7
      Project/Assets/ML-Agents/Examples/Walker/Prefabs/Ragdoll/MultiDirRagDoll.prefab.meta
  57. 1001
      Project/Assets/ML-Agents/Examples/Walker/Scenes/MultiDirWalkerStatic.unity
  58. 9
      Project/Assets/ML-Agents/Examples/Walker/Scenes/MultiDirWalkerStatic.unity.meta
  59. 353
      Project/Assets/ML-Agents/Examples/Walker/Scripts/MultiDirWalkerAgent.cs
  60. 11
      Project/Assets/ML-Agents/Examples/Walker/Scripts/MultiDirWalkerAgent.cs.meta
  61. 1001
      Project/Assets/ML-Agents/Examples/Walker/TFModels/MultiDirWalker-5997779.onnx
  62. 11
      Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/GoalSensorComponent.cs.meta
  63. 56
      Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/GoalSensorComponent.cs

7
protobuf-definitions/proto/mlagents_envs/communicator_objects/observation.proto


PNG = 1;
}
enum SensorTypeProto {
OBSERVATION = 0;
GOAL = 1;
REWARD = 2;
}
message ObservationProto {
message FloatData {
repeated float data = 1;

FloatData float_data = 4;
}
repeated int32 compressed_channel_mapping = 5;
SensorTypeProto sensor_type = 6;
}

4
gym-unity/gym_unity/tests/test_gym.py


from mlagents_envs.base_env import (
BehaviorSpec,
ActionSpec,
SensorType,
DecisionSteps,
TerminalSteps,
BehaviorMapping,

obs_shapes = [(vector_observation_space_size,)]
for _ in range(number_visual_observations):
obs_shapes += [(8, 8, 3)]
return BehaviorSpec(obs_shapes, action_spec)
sensor_types = [SensorType.OBSERVATION for _ in range(len(obs_shapes))]
return BehaviorSpec(obs_shapes, sensor_types, action_spec)
def create_mock_vector_steps(specs, num_agents=1, number_visual_observations=0):

6
com.unity.ml-agents.extensions/Runtime/Match3/Match3Sensor.cs


}
/// <inheritdoc/>
public virtual SensorType GetSensorType()
{
return SensorType.Observation;
}
/// <inheritdoc/>
public int Write(ObservationWriter writer)
{
if (m_Board.Rows != m_Rows || m_Board.Columns != m_Columns || m_Board.NumCellTypes != m_NumCellTypes)

6
com.unity.ml-agents.extensions/Runtime/Sensors/GridSensor.cs


}
/// <inheritdoc/>
public virtual SensorType GetSensorType()
{
return SensorType.Observation;
}
/// <inheritdoc/>
public int Write(ObservationWriter writer)
{
using (TimerStack.Instance.Scoped("GridSensor.WriteToTensor"))

6
com.unity.ml-agents.extensions/Runtime/Sensors/PhysicsBodySensor.cs


}
/// <inheritdoc/>
public virtual SensorType GetSensorType()
{
return SensorType.Observation;
}
/// <inheritdoc/>
public void Update()
{
if (m_Settings.UseModelSpace)

8
ml-agents/mlagents/trainers/tests/mock_brain.py


TerminalSteps,
BehaviorSpec,
ActionSpec,
SensorType,
ActionTuple,
)

obs_list = []
for _shape in observation_shapes:
obs_list.append(np.ones((num_agents,) + _shape, dtype=np.float32))
sensor_types = [SensorType.OBSERVATION for i in range(len(obs_list))]
action_mask = None
if action_spec.is_discrete():
action_mask = [

reward = np.array(num_agents * [1.0], dtype=np.float32)
interrupted = np.array(num_agents * [False], dtype=np.bool)
agent_id = np.arange(num_agents, dtype=np.int32)
behavior_spec = BehaviorSpec(observation_shapes, action_spec)
behavior_spec = BehaviorSpec(observation_shapes, sensor_types, action_spec)
if done:
return (
DecisionSteps.empty(behavior_spec),

else:
action_spec = ActionSpec.create_continuous(vector_action_space)
behavior_spec = BehaviorSpec(
[(84, 84, 3)] * int(use_visual) + [(vector_obs_space,)], action_spec
[(84, 84, 3)] * int(use_visual) + [(vector_obs_space,)],
[SensorType.OBSERVATION],
action_spec,
)
return behavior_spec

7
ml-agents/mlagents/trainers/tests/tensorflow/test_models.py


from mlagents.trainers.tf.models import ModelUtils
from mlagents.tf_utils import tf
from mlagents_envs.base_env import BehaviorSpec, ActionSpec
from mlagents_envs.base_env import BehaviorSpec, ActionSpec, SensorType
obs_shapes = [(84, 84, 3)] * int(num_visual) + [(vector_size,)] * int(num_vector)
sensor_types = [SensorType.OBSERVATION for _ in range(len(obs_shapes))]
[(84, 84, 3)] * int(num_visual) + [(vector_size,)] * int(num_vector),
ActionSpec.create_discrete((1,)),
obs_shapes, sensor_types, ActionSpec.create_discrete((1,))
)
return behavior_spec

4
ml-agents/mlagents/trainers/tests/tensorflow/test_tf_policy.py


from unittest.mock import MagicMock
from mlagents.trainers.settings import TrainerSettings
import numpy as np
from mlagents_envs.base_env import ActionSpec
from mlagents_envs.base_env import ActionSpec, SensorType
dummy_groupspec = BehaviorSpec([(1,)], dummy_actionspec)
dummy_groupspec = BehaviorSpec([(1,)], [SensorType.OBSERVATION], dummy_actionspec)
return dummy_groupspec

14
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_extrinsic.py


ExtrinsicRewardProvider,
create_reward_provider,
)
from mlagents_envs.base_env import BehaviorSpec, ActionSpec
from mlagents_envs.base_env import BehaviorSpec, ActionSpec, SensorType
from mlagents.trainers.settings import RewardSignalSettings, RewardSignalType
from mlagents.trainers.tests.torch.test_reward_providers.utils import (
create_agent_buffer,

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_TWODISCRETE),
],
)
def test_construction(behavior_spec: BehaviorSpec) -> None:

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_TWODISCRETE),
],
)
def test_factory(behavior_spec: BehaviorSpec) -> None:

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_TWODISCRETE),
],
)
def test_reward(behavior_spec: BehaviorSpec, reward: float) -> None:

32
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py


GAILRewardProvider,
create_reward_provider,
)
from mlagents_envs.base_env import BehaviorSpec, ActionSpec
from mlagents_envs.base_env import BehaviorSpec, ActionSpec, SensorType
from mlagents.trainers.settings import GAILSettings, RewardSignalType
from mlagents.trainers.tests.torch.test_reward_providers.utils import (
create_agent_buffer,

ACTIONSPEC_DISCRETE = ActionSpec.create_discrete((20,))
@pytest.mark.parametrize("behavior_spec", [BehaviorSpec([(8,)], ACTIONSPEC_CONTINUOUS)])
@pytest.mark.parametrize(
"behavior_spec",
[BehaviorSpec([(8,)], [SensorType.OBSERVATION], ACTIONSPEC_CONTINUOUS)],
)
def test_construction(behavior_spec: BehaviorSpec) -> None:
gail_settings = GAILSettings(demo_path=CONTINUOUS_PATH)
gail_rp = GAILRewardProvider(behavior_spec, gail_settings)

@pytest.mark.parametrize("behavior_spec", [BehaviorSpec([(8,)], ACTIONSPEC_CONTINUOUS)])
@pytest.mark.parametrize(
"behavior_spec",
[BehaviorSpec([(8,)], [SensorType.OBSERVATION], ACTIONSPEC_CONTINUOUS)],
)
def test_factory(behavior_spec: BehaviorSpec) -> None:
gail_settings = GAILSettings(demo_path=CONTINUOUS_PATH)
gail_rp = create_reward_provider(

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(8,), (24, 26, 1)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(50,)], ACTIONSPEC_FOURDISCRETE),
BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE),
BehaviorSpec(
[(8,), (24, 26, 1)],
[SensorType.OBSERVATION, SensorType.OBSERVATION],
ACTIONSPEC_CONTINUOUS,
),
BehaviorSpec([(50,)], [SensorType.OBSERVATION], ACTIONSPEC_FOURDISCRETE),
BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_DISCRETE),
],
)
@pytest.mark.parametrize("use_actions", [False, True])

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(8,), (24, 26, 1)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(50,)], ACTIONSPEC_FOURDISCRETE),
BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE),
BehaviorSpec(
[(8,), (24, 26, 1)],
[SensorType.OBSERVATION, SensorType.OBSERVATION],
ACTIONSPEC_CONTINUOUS,
),
BehaviorSpec([(50,)], [SensorType.OBSERVATION], ACTIONSPEC_FOURDISCRETE),
BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_DISCRETE),
],
)
@pytest.mark.parametrize("use_actions", [False, True])

32
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_rnd.py


RNDRewardProvider,
create_reward_provider,
)
from mlagents_envs.base_env import BehaviorSpec, ActionSpec
from mlagents_envs.base_env import BehaviorSpec, ActionSpec, SensorType
from mlagents.trainers.settings import RNDSettings, RewardSignalType
from mlagents.trainers.tests.torch.test_reward_providers.utils import (
create_agent_buffer,

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_TWODISCRETE),
],
)
def test_construction(behavior_spec: BehaviorSpec) -> None:

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,), (64, 66, 3), (84, 86, 1)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,), (64, 66, 1)], ACTIONSPEC_TWODISCRETE),
BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE),
BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_CONTINUOUS),
BehaviorSpec(
[(10,), (64, 66, 3), (84, 86, 1)],
[SensorType.OBSERVATION, SensorType.OBSERVATION, SensorType.OBSERVATION],
ACTIONSPEC_CONTINUOUS,
),
BehaviorSpec(
[(10,), (64, 66, 1)],
[SensorType.OBSERVATION, SensorType.OBSERVATION],
ACTIONSPEC_TWODISCRETE,
),
BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_DISCRETE),
],
)
def test_factory(behavior_spec: BehaviorSpec) -> None:

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,), (64, 66, 3), (24, 26, 1)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE),
BehaviorSpec(
[(10,), (64, 66, 3), (24, 26, 1)],
[SensorType.OBSERVATION, SensorType.OBSERVATION, SensorType.OBSERVATION],
ACTIONSPEC_CONTINUOUS,
),
BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_TWODISCRETE),
BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_DISCRETE),
],
)
def test_reward_decreases(behavior_spec: BehaviorSpec, seed: int) -> None:

45
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py


CuriosityRewardProvider,
create_reward_provider,
)
from mlagents_envs.base_env import BehaviorSpec, ActionSpec
from mlagents_envs.base_env import BehaviorSpec, ActionSpec, SensorType
from mlagents.trainers.settings import CuriositySettings, RewardSignalType
from mlagents.trainers.tests.torch.test_reward_providers.utils import (
create_agent_buffer,

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_TWODISCRETE),
],
)
def test_construction(behavior_spec: BehaviorSpec) -> None:

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,), (64, 66, 3), (84, 86, 1)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,), (64, 66, 1)], ACTIONSPEC_TWODISCRETE),
BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE),
BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_CONTINUOUS),
BehaviorSpec(
[(10,), (64, 66, 3), (84, 86, 1)],
[SensorType.OBSERVATION, SensorType.OBSERVATION, SensorType.OBSERVATION],
ACTIONSPEC_CONTINUOUS,
),
BehaviorSpec(
[(10,), (64, 66, 1)],
[SensorType.OBSERVATION, SensorType.OBSERVATION],
ACTIONSPEC_TWODISCRETE,
),
BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_DISCRETE),
],
)
def test_factory(behavior_spec: BehaviorSpec) -> None:

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,), (64, 66, 3), (24, 26, 1)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE),
BehaviorSpec(
[(10,), (64, 66, 3), (24, 26, 1)],
[SensorType.OBSERVATION, SensorType.OBSERVATION, SensorType.OBSERVATION],
ACTIONSPEC_CONTINUOUS,
),
BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_TWODISCRETE),
BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_DISCRETE),
],
)
def test_reward_decreases(behavior_spec: BehaviorSpec, seed: int) -> None:

@pytest.mark.parametrize("seed", SEED)
@pytest.mark.parametrize(
"behavior_spec", [BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS)]
"behavior_spec",
[BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_CONTINUOUS)],
)
def test_continuous_action_prediction(behavior_spec: BehaviorSpec, seed: int) -> None:
np.random.seed(seed)

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,), (64, 66, 3), (24, 26, 1)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE),
BehaviorSpec(
[(10,), (64, 66, 3), (24, 26, 1)],
[SensorType.OBSERVATION, SensorType.OBSERVATION, SensorType.OBSERVATION],
ACTIONSPEC_CONTINUOUS,
),
BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_TWODISCRETE),
BehaviorSpec([(10,)], [SensorType.OBSERVATION], ACTIONSPEC_DISCRETE),
],
)
def test_next_state_prediction(behavior_spec: BehaviorSpec, seed: int) -> None:

8
ml-agents/mlagents/trainers/tests/simple_test_envs.py


ActionTuple,
BaseEnv,
BehaviorSpec,
SensorType,
DecisionSteps,
TerminalSteps,
BehaviorMapping,

self.num_vector = num_vector
self.vis_obs_size = vis_obs_size
self.vec_obs_size = vec_obs_size
sensor_types = [
SensorType.OBSERVATION for _ in range(len(self._make_obs_spec()))
]
continuous_action_size, discrete_action_size = action_sizes
discrete_tuple = tuple(2 for _ in range(discrete_action_size))
action_spec = ActionSpec(continuous_action_size, discrete_tuple)

self.action_spec = action_spec
self.behavior_spec = BehaviorSpec(self._make_obs_spec(), action_spec)
self.behavior_spec = BehaviorSpec(
self._make_obs_spec(), sensor_types, action_spec
)
self.action_spec = action_spec
self.names = brain_names
self.positions: Dict[str, List[float]] = {}

11
ml-agents/mlagents/trainers/optimizer/torch_optimizer.py


self, batch: AgentBuffer, next_obs: List[np.ndarray], done: bool
) -> Tuple[Dict[str, np.ndarray], Dict[str, float]]:
vector_obs = [ModelUtils.list_to_tensor(batch["vector_obs"])]
goals = [ModelUtils.list_to_tensor(batch["goals"])]
if self.policy.use_vis_obs:
visual_obs = []
for idx, _ in enumerate(

memory = torch.zeros([1, 1, self.policy.m_size])
vec_vis_obs = SplitObservations.from_observations(next_obs)
vec_vis_obs = SplitObservations.from_observations(
next_obs, self.policy.behavior_spec
)
next_vec_obs = [
ModelUtils.list_to_tensor(vec_vis_obs.vector_observations).unsqueeze(0)
]

]
# goals dont change but otherwise broken
next_goals = [torch.as_tensor(vec_vis_obs.goals)]
vector_obs, visual_obs, memory, sequence_length=batch.num_experiences
vector_obs, visual_obs, goals, memory, sequence_length=batch.num_experiences
next_vec_obs, next_vis_obs, next_memory, sequence_length=1
next_vec_obs, next_vis_obs, next_goals, next_memory, sequence_length=1
)
for name, estimate in value_estimates.items():

8
ml-agents/mlagents/trainers/policy/policy.py


from typing import Dict, List, Optional
import numpy as np
from mlagents_envs.base_env import ActionTuple, BehaviorSpec, DecisionSteps
from mlagents_envs.base_env import ActionTuple, BehaviorSpec, DecisionSteps, SensorType
from mlagents_envs.exception import UnityException
from mlagents.trainers.action_info import ActionInfo

else [self.behavior_spec.action_spec.continuous_size]
)
self.vec_obs_size = sum(
shape[0] for shape in behavior_spec.observation_shapes if len(shape) == 1
shape[0]
for shape, obs_type in zip(
behavior_spec.observation_shapes, behavior_spec.sensor_types
)
if len(shape) == 1 and obs_type == SensorType.OBSERVATION
)
self.vis_obs_size = sum(
1 for shape in behavior_spec.observation_shapes if len(shape) == 3

13
ml-agents/mlagents/trainers/policy/torch_policy.py


def _split_decision_step(
self, decision_requests: DecisionSteps
) -> Tuple[SplitObservations, np.ndarray]:
vec_vis_obs = SplitObservations.from_observations(decision_requests.obs)
vec_vis_obs = SplitObservations.from_observations(
decision_requests.obs, self.behavior_spec
)
mask = None
if self.behavior_spec.action_spec.discrete_size > 0:
mask = torch.ones([len(decision_requests), np.sum(self.act_size)])

self,
vec_obs: List[torch.Tensor],
vis_obs: List[torch.Tensor],
goals: List[torch.Tensor],
masks: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
seq_len: int = 1,

:return: Tuple of AgentAction, ActionLogProbs, entropies, and output memories.
"""
actions, log_probs, entropies, _, memories = self.actor_critic.get_action_stats_and_value(
vec_obs, vis_obs, masks, memories, seq_len
vec_obs, vis_obs, goals, masks, memories, seq_len
)
return (actions, log_probs, entropies, memories)

vis_obs: torch.Tensor,
goals: torch.Tensor,
actions: AgentAction,
masks: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,

vec_obs, vis_obs, actions, masks, memories, seq_len
vec_obs, vis_obs, goals, actions, masks, memories, seq_len
)
return log_probs, entropies, value_heads

vis_obs = [
torch.as_tensor(vis_ob) for vis_ob in vec_vis_obs.visual_observations
]
goals = [torch.as_tensor(vec_vis_obs.goals)]
memories = torch.as_tensor(self.retrieve_memories(global_agent_ids)).unsqueeze(
0
)

action, log_probs, entropy, memories = self.sample_actions(
vec_obs, vis_obs, masks=masks, memories=memories
vec_obs, vis_obs, goals, masks=masks, memories=memories
)
action_tuple = action.to_action_tuple()
run_out["action"] = action_tuple

3
ml-agents/mlagents/trainers/ppo/optimizer_torch.py


returns[name] = ModelUtils.list_to_tensor(batch[f"{name}_returns"])
vec_obs = [ModelUtils.list_to_tensor(batch["vector_obs"])]
goals = [ModelUtils.list_to_tensor(batch["goals"])]
act_masks = ModelUtils.list_to_tensor(batch["action_mask"])
actions = AgentAction.from_dict(batch)

log_probs, entropy, values = self.policy.evaluate_actions(
vec_obs,
vis_obs,
goals,
masks=act_masks,
actions=actions,
memories=memories,

+ 0.5 * value_loss
- decay_bet * ModelUtils.masked_mean(entropy, loss_masks)
)
# Set optimizer learning rate
ModelUtils.update_learning_rate(self.optimizer, decay_lr)
self.optimizer.zero_grad()

4
ml-agents/mlagents/trainers/ppo/trainer.py


super()._process_trajectory(trajectory)
agent_id = trajectory.agent_id # All the agents should have the same ID
agent_buffer_trajectory = trajectory.to_agentbuffer()
agent_buffer_trajectory = trajectory.to_agentbuffer(self.policy.behavior_spec)
# Update the normalization
if self.is_training:
self.policy.update_normalization(agent_buffer_trajectory["vector_obs"])

self.seed,
behavior_spec,
self.trainer_settings,
condition_sigma_on_obs=False, # Faster training for PPO
condition_sigma_on_obs=True, # Faster training for PPO
separate_critic=True, # Match network architecture with TF
)
return policy

6
ml-agents/mlagents/trainers/sac/trainer.py


last_step = trajectory.steps[-1]
agent_id = trajectory.agent_id # All the agents should have the same ID
agent_buffer_trajectory = trajectory.to_agentbuffer()
agent_buffer_trajectory = trajectory.to_agentbuffer(self.policy.behavior_spec)
# Update the normalization
if self.is_training:

# Bootstrap using the last step rather than the bootstrap step if max step is reached.
# Set last element to duplicate obs and remove dones.
if last_step.interrupted:
vec_vis_obs = SplitObservations.from_observations(last_step.obs)
vec_vis_obs = SplitObservations.from_observations(
last_step.obs, self.policy.behavior_spec
)
for i, obs in enumerate(vec_vis_obs.visual_observations):
agent_buffer_trajectory["next_visual_obs%d" % i][-1] = obs
if vec_vis_obs.vector_observations.size > 1:

89
ml-agents/mlagents/trainers/torch/decoders.py


from typing import List, Dict
from mlagents.torch_utils import torch, nn
from mlagents.trainers.torch.layers import linear_layer
from mlagents.trainers.torch.layers import (
linear_layer,
LinearEncoder,
Initialization,
Swish,
)
from collections import defaultdict
class ValueHeads(nn.Module):

for stream_name, head in self.value_heads.items():
value_outputs[stream_name] = head(hidden).squeeze(-1)
return value_outputs
class ValueHeadsHyperNetwork(nn.Module):
def __init__(
self,
num_layers,
layer_size,
num_goals,
stream_names: List[str],
input_size: int,
output_size: int = 1,
):
super().__init__()
self.stream_names = stream_names
self._num_goals = num_goals
self.input_size = input_size
self.output_size = output_size
self.streams_size = len(stream_names)
layers = []
layers.append(
linear_layer(
num_goals,
layer_size,
kernel_init=Initialization.KaimingHeNormal,
kernel_gain=1.0,
bias_init=Initialization.Zero,
)
)
layers.append(Swish())
for _ in range(num_layers - 1):
layers.append(
linear_layer(
layer_size,
layer_size,
kernel_init=Initialization.KaimingHeNormal,
kernel_gain=1.0,
bias_init=Initialization.Zero,
)
)
layers.append(Swish())
flat_output = linear_layer(
layer_size,
input_size * output_size * self.streams_size
+ self.output_size * self.streams_size,
kernel_init=Initialization.KaimingHeNormal,
kernel_gain=0.1,
bias_init=Initialization.Zero,
)
self.hypernet = torch.nn.Sequential(*layers, flat_output)
def forward(
self, hidden: torch.Tensor, goal: torch.Tensor
) -> Dict[str, torch.Tensor]:
goal_onehot = torch.nn.functional.one_hot(
goal[0].long(), self._num_goals
).float()
# (b, i * o * streams + o * streams)
flat_output_weights = self.hypernet(goal_onehot)
b = hidden.size(0)
output_weights, output_bias = torch.split(
flat_output_weights,
self.streams_size * self.input_size * self.output_size,
dim=-1,
)
output_weights = torch.reshape(
output_weights, (self.streams_size, b, self.input_size, self.output_size)
)
output_bias = torch.reshape(
output_bias, (self.streams_size, b, self.output_size)
)
output_bias = output_bias.unsqueeze(dim=2)
value_outputs = {}
for stream_name, out_w, out_b in zip(
self.stream_names, output_weights, output_bias
):
inp_out_w = torch.bmm(hidden.unsqueeze(dim=1), out_w)
inp_out_w_out_b = inp_out_w + out_b
value_outputs[stream_name] = inp_out_w_out_b.squeeze()
return value_outputs

119
ml-agents/mlagents/trainers/torch/distributions.py


from mlagents.torch_utils import torch, nn
import numpy as np
import math
from mlagents.trainers.torch.layers import linear_layer, Initialization
from mlagents.trainers.torch.layers import (
linear_layer,
Initialization,
LinearEncoder,
Swish,
)
from mlagents.trainers.torch.utils import ModelUtils
EPSILON = 1e-7 # Small value to avoid divide by zero

# torch.cat here instead of torch.expand() becuase it is not supported in the
# verified version of Barracuda (1.0.2).
log_sigma = torch.cat([self.log_sigma] * inputs.shape[0], axis=0)
if self.tanh_squash:
return TanhGaussianDistInstance(mu, torch.exp(log_sigma))
else:
return GaussianDistInstance(mu, torch.exp(log_sigma))
class GaussianHyperNetwork(nn.Module):
def __init__(
self,
num_layers,
layer_size,
hidden_size,
num_outputs,
conditional_sigma,
tanh_squash,
num_goals,
):
super().__init__()
self._num_goals = num_goals
self.hidden_size = hidden_size
self.tanh_squash = tanh_squash
self.conditional_sigma = conditional_sigma
self.num_outputs = num_outputs
layers = []
layers.append(
linear_layer(
num_goals,
layer_size,
kernel_init=Initialization.KaimingHeNormal,
kernel_gain=0.1,
bias_init=Initialization.Zero,
)
)
layers.append(Swish())
for _ in range(num_layers - 1):
layers.append(
linear_layer(
layer_size,
layer_size,
kernel_init=Initialization.KaimingHeNormal,
kernel_gain=0.1,
bias_init=Initialization.Zero,
)
)
layers.append(Swish())
if conditional_sigma:
flat_output = linear_layer(
layer_size,
2 * (hidden_size * num_outputs + num_outputs),
kernel_init=Initialization.KaimingHeNormal,
kernel_gain=0.1,
bias_init=Initialization.Zero,
)
self._log_sigma_w = None
else:
flat_output = linear_layer(
layer_size,
hidden_size * num_outputs + num_outputs,
kernel_init=Initialization.KaimingHeNormal,
kernel_gain=0.1,
bias_init=Initialization.Zero,
)
self._log_sigma_w = linear_layer(
num_goals,
num_outputs,
kernel_init=Initialization.KaimingHeNormal,
kernel_gain=0.1,
bias_init=Initialization.Zero,
)
self.hypernet = torch.nn.Sequential(*layers, flat_output)
def forward(self, inputs: torch.Tensor, goal: torch.Tensor):
goal_onehot = torch.nn.functional.one_hot(
goal[0].long(), self._num_goals
).float()
# cond (b, 2 * H * O + O
# not cond (b, H * O + O
flat_output_weights = self.hypernet(goal_onehot)
b = inputs.size(0)
inputs = inputs.unsqueeze(dim=1)
if self.conditional_sigma:
mu_w_log_sigma_w, mu_b, log_sigma_b = torch.split(
flat_output_weights,
[
2 * self.hidden_size * self.num_outputs,
self.num_outputs,
self.num_outputs,
],
dim=-1,
)
mu_w_log_sigma_w = torch.reshape(
mu_w_log_sigma_w, (b, 2 * self.hidden_size, self.num_outputs)
)
mu_w, log_sigma_w = torch.split(mu_w_log_sigma_w, self.hidden_size, dim=1)
log_sigma = torch.bmm(inputs, log_sigma_w)
log_sigma = log_sigma + log_sigma_b
log_sigma = log_sigma.squeeze()
log_sigma = torch.clamp(log_sigma, min=-20, max=2)
else:
mu_w, mu_b = torch.split(
flat_output_weights, self.hidden_size * self.num_outputs, dim=-1
)
mu_w = torch.reshape(mu_w, (b, self.hidden_size, self.num_outputs))
log_sigma = self._log_sigma_w(goal_onehot)
log_sigma = torch.squeeze(log_sigma)
mu = torch.bmm(inputs, mu_w)
mu = mu + mu_b
mu = mu.squeeze()
if self.tanh_squash:
return TanhGaussianDistInstance(mu, torch.exp(log_sigma))
else:

34
ml-agents/mlagents/trainers/torch/action_model.py


DistInstance,
DiscreteDistInstance,
GaussianDistribution,
GaussianHyperNetwork,
MultiCategoricalDistribution,
)
from mlagents.trainers.torch.agent_action import AgentAction

self._discrete_distribution = None
if self.action_spec.continuous_size > 0:
self._continuous_distribution = GaussianDistribution(
self.encoding_size,
self.action_spec.continuous_size,
self._continuous_distribution = GaussianHyperNetwork(
num_layers=1,
layer_size=256,
hidden_size=self.encoding_size,
num_outputs=self.action_spec.continuous_size,
num_goals=2,
)
if self.action_spec.discrete_size > 0:

discrete_action.append(discrete_dist.sample())
return AgentAction(continuous_action, discrete_action)
def _get_dists(self, inputs: torch.Tensor, masks: torch.Tensor) -> DistInstances:
def _get_dists(
self, inputs: torch.Tensor, masks: torch.Tensor, goal: torch.Tensor
) -> DistInstances:
"""
Creates a DistInstances tuple using the continuous and discrete distributions
:params inputs: The encoding from the network body

discrete_dist: Optional[List[DiscreteDistInstance]] = None
# This checks None because mypy complains otherwise
if self._continuous_distribution is not None:
continuous_dist = self._continuous_distribution(inputs)
continuous_dist = self._continuous_distribution(inputs, goal)
if self._discrete_distribution is not None:
discrete_dist = self._discrete_distribution(inputs, masks)
return DistInstances(continuous_dist, discrete_dist)

return action_log_probs, entropies
def evaluate(
self, inputs: torch.Tensor, masks: torch.Tensor, actions: AgentAction
self,
inputs: torch.Tensor,
masks: torch.Tensor,
actions: AgentAction,
goal: torch.Tensor,
) -> Tuple[ActionLogProbs, torch.Tensor]:
"""
Given actions and encoding from the network body, gets the distributions and

:params actions: The AgentAction
:return: An ActionLogProbs tuple and a torch tensor of the distribution entropies.
"""
dists = self._get_dists(inputs, masks)
dists = self._get_dists(inputs, masks, goal)
def get_action_out(self, inputs: torch.Tensor, masks: torch.Tensor) -> torch.Tensor:
def get_action_out(
self, inputs: torch.Tensor, masks: torch.Tensor, goal: torch.Tensor
) -> torch.Tensor:
"""
Gets the tensors corresponding to the output of the policy network to be used for
inference. Called by the Actor's forward call.

"""
dists = self._get_dists(inputs, masks)
dists = self._get_dists(inputs, masks, goal)
continuous_out, discrete_out, action_out_deprecated = None, None, None
if self.action_spec.continuous_size > 0 and dists.continuous is not None:
continuous_out = dists.continuous.exported_model_output()

return continuous_out, discrete_out, action_out_deprecated
def forward(
self, inputs: torch.Tensor, masks: torch.Tensor
self, inputs: torch.Tensor, masks: torch.Tensor, goal: torch.Tensor
) -> Tuple[AgentAction, ActionLogProbs, torch.Tensor]:
"""
The forward method of this module. Outputs the action, log probs,

:return: Given the input, an AgentAction of the actions generated by the policy and the corresponding
ActionLogProbs and entropies.
"""
dists = self._get_dists(inputs, masks)
dists = self._get_dists(inputs, masks, goal)
actions = self._sample_action(dists)
log_probs, entropies = self._get_probs_and_entropy(actions, dists)
# Use the sum of entropy across actions, not the mean

10
ml-agents/mlagents/trainers/torch/model_serialization.py


for shape in self.policy.behavior_spec.observation_shapes
if len(shape) == 3
]
dummy_goals = [torch.zeros(batch_dim + [1])]
dummy_masks = torch.ones(
batch_dim + [sum(self.policy.behavior_spec.action_spec.discrete_branches)]
)

self.dummy_input = (dummy_vec_obs, dummy_vis_obs, dummy_masks, dummy_memories)
self.dummy_input = (
dummy_vec_obs,
dummy_vis_obs,
dummy_goals,
dummy_masks,
dummy_memories,
)
+ ["goals"]
+ [f"visual_observation_{i}" for i in range(self.policy.vis_obs_size)]
+ ["action_masks", "memories"]
)

57
ml-agents/mlagents/trainers/torch/networks.py


from mlagents.trainers.torch.action_log_probs import ActionLogProbs
from mlagents.trainers.settings import NetworkSettings
from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.torch.decoders import ValueHeads
from mlagents.trainers.torch.decoders import ValueHeads, ValueHeadsHyperNetwork
from mlagents.trainers.torch.layers import LSTM, LinearEncoder
from mlagents.trainers.torch.model_serialization import exporting_to_onnx

self.vector_processors,
encoder_input_size,
) = ModelUtils.create_input_processors(
observation_shapes,
observation_shapes[1:],
self.h_size,
network_settings.vis_encode_type,
normalize=self.normalize,

encoding_size = network_settings.memory.memory_size // 2
else:
encoding_size = network_settings.hidden_units
self.value_heads = ValueHeads(stream_names, encoding_size, outputs_per_stream)
self.value_heads = ValueHeadsHyperNetwork(
num_layers=1,
layer_size=256,
num_goals=2,
stream_names=stream_names,
input_size=encoding_size,
output_size=outputs_per_stream,
)
@property
def memory_size(self) -> int:

self,
vec_inputs: List[torch.Tensor],
vis_inputs: List[torch.Tensor],
goals: List[torch.tensor],
actions: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
sequence_length: int = 1,

)
output = self.value_heads(encoding)
output = self.value_heads(encoding, goals)
return output, memories

self,
vec_inputs: List[torch.Tensor],
vis_inputs: List[torch.Tensor],
goals: List[torch.Tensor],
memories: Optional[torch.Tensor] = None,
sequence_length: int = 1,
) -> Tuple[Dict[str, torch.Tensor], torch.Tensor]:

self,
vec_inputs: List[torch.Tensor],
vis_inputs: List[torch.Tensor],
goals: List[torch.Tensor],
masks: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
) -> Tuple[Union[int, torch.Tensor], ...]:

cont_action_out,
disc_action_out,
action_out_deprecated,
) = self.action_model.get_action_out(encoding, masks)
) = self.action_model.get_action_out(encoding, masks, goals)
export_out = [
self.version_number,
torch.Tensor([self.network_body.memory_size]),

self,
vec_inputs: List[torch.Tensor],
vis_inputs: List[torch.Tensor],
goals: List[torch.Tensor],
memories: Optional[torch.Tensor] = None,
sequence_length: int = 1,
) -> Tuple[Dict[str, torch.Tensor], torch.Tensor]:

self,
vec_inputs: List[torch.Tensor],
vis_inputs: List[torch.Tensor],
goals: List[torch.Tensor],
actions: AgentAction,
masks: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,

vec_inputs, vis_inputs, memories=memories, sequence_length=sequence_length
)
log_probs, entropies = self.action_model.evaluate(encoding, masks, actions)
log_probs, entropies = self.action_model.evaluate(
encoding, masks, actions, goals
)
value_outputs = self.value_heads(encoding)
return log_probs, entropies, value_outputs

vis_inputs: List[torch.Tensor],
goals: List[torch.Tensor],
masks: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
sequence_length: int = 1,

encoding, memories = self.network_body(
vec_inputs, vis_inputs, memories=memories, sequence_length=sequence_length
)
action, log_probs, entropies = self.action_model(encoding, masks)
action, log_probs, entropies = self.action_model(encoding, masks, goals)
value_outputs = self.value_heads(encoding)
return action, log_probs, entropies, value_outputs, memories

self,
vec_inputs: List[torch.Tensor],
vis_inputs: List[torch.Tensor],
goals: List[torch.Tensor],
memories: Optional[torch.Tensor] = None,
sequence_length: int = 1,
) -> Tuple[Dict[str, torch.Tensor], torch.Tensor]:

actor_mem, critic_mem = torch.split(memories, self.memory_size // 2, -1)
value_outputs, critic_mem_out = self.critic(
vec_inputs, vis_inputs, memories=critic_mem, sequence_length=sequence_length
vec_inputs,
vis_inputs,
goals,
memories=critic_mem,
sequence_length=sequence_length,
)
if actor_mem is not None:
# Make memories with the actor mem unchanged

self,
vec_inputs: List[torch.Tensor],
vis_inputs: List[torch.Tensor],
goals: List[torch.Tensor],
actions: AgentAction,
masks: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,

else:
critic_mem = None
actor_mem = None
log_probs, entropies = self.action_model.evaluate(encoding, masks, actions)
log_probs, entropies = self.action_model.evaluate(
encoding, masks, actions, goals
)
vec_inputs, vis_inputs, memories=critic_mem, sequence_length=sequence_length
vec_inputs,
vis_inputs,
goals,
memories=critic_mem,
sequence_length=sequence_length,
)
return log_probs, entropies, value_outputs

vec_inputs: List[torch.Tensor],
vis_inputs: List[torch.Tensor],
goals: List[torch.Tensor],
masks: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
sequence_length: int = 1,

encoding, actor_mem_outs = self.network_body(
vec_inputs, vis_inputs, memories=actor_mem, sequence_length=sequence_length
)
action, log_probs, entropies = self.action_model(encoding, masks)
action, log_probs, entropies = self.action_model(encoding, masks, goals)
vec_inputs, vis_inputs, memories=critic_mem, sequence_length=sequence_length
vec_inputs,
vis_inputs,
goals,
memories=critic_mem,
sequence_length=sequence_length,
)
if self.use_lstm:
mem_out = torch.cat([actor_mem_outs, critic_mem_outs], dim=-1)

54
ml-agents/mlagents/trainers/trajectory.py


import numpy as np
from mlagents.trainers.buffer import AgentBuffer
from mlagents_envs.base_env import ActionTuple
from mlagents_envs.base_env import ActionTuple, BehaviorSpec, SensorType
from mlagents.trainers.torch.action_log_probs import LogProbsTuple

class SplitObservations(NamedTuple):
vector_observations: np.ndarray
visual_observations: List[np.ndarray]
goals: np.ndarray
def from_observations(obs: List[np.ndarray]) -> "SplitObservations":
def from_observations(obs: List[np.ndarray], behavior_spec) -> "SplitObservations":
"""
Divides a List of numpy arrays into a SplitObservations NamedTuple.
This allows you to access the vector and visual observations directly,

"""
vis_obs_list: List[np.ndarray] = []
vec_obs_list: List[np.ndarray] = []
goal_list: List[np.ndarray] = []
for observation in obs:
# Obs could be batched or single
if len(observation.shape) == 1 or len(observation.shape) == 2:
vec_obs_list.append(observation)
if len(observation.shape) == 3 or len(observation.shape) == 4:
vis_obs_list.append(observation)
last_obs = observation
for observation, sensor_type in zip(obs, behavior_spec.sensor_types):
if sensor_type == SensorType.PARAMETERIZATION:
goal_list.append(observation)
elif sensor_type == SensorType.OBSERVATION:
# Obs could be batched or single
if len(observation.shape) == 1 or len(observation.shape) == 2:
vec_obs_list.append(observation)
if len(observation.shape) == 3 or len(observation.shape) == 4:
vis_obs_list.append(observation)
last_obs = observation
if last_obs is not None:
is_batched = len(last_obs.shape) == 2 or len(last_obs.shape) == 4
if is_batched:

else np.zeros((last_obs.shape[0], 0), dtype=np.float32)
)
goals = (
np.concatenate(goal_list, axis=1)
if len(goal_list) > 0
else np.zeros((last_obs.shape[0], 0), dtype=np.float32)
)
else:
vec_obs = (
np.concatenate(vec_obs_list, axis=0)

goals = (
np.concatenate(goal_list, axis=0)
if len(goal_list) > 0
else np.array([], dtype=np.float32)
)
vector_observations=vec_obs, visual_observations=vis_obs_list
vector_observations=vec_obs, visual_observations=vis_obs_list, goals=goals
)

agent_id: str
behavior_id: str
def to_agentbuffer(self) -> AgentBuffer:
def to_agentbuffer(self, behavior_spec: BehaviorSpec) -> AgentBuffer:
"""
Converts a Trajectory to an AgentBuffer
:param trajectory: A Trajectory

"""
agent_buffer_trajectory = AgentBuffer()
vec_vis_obs = SplitObservations.from_observations(self.steps[0].obs)
vec_vis_obs = SplitObservations.from_observations(
self.steps[0].obs, behavior_spec
)
self.steps[step + 1].obs
self.steps[step + 1].obs, behavior_spec
next_vec_vis_obs = SplitObservations.from_observations(self.next_obs)
next_vec_vis_obs = SplitObservations.from_observations(
self.next_obs, behavior_spec
)
for i, _ in enumerate(vec_vis_obs.visual_observations):
agent_buffer_trajectory["visual_obs%d" % i].append(

agent_buffer_trajectory["vector_obs"].append(
vec_vis_obs.vector_observations
)
agent_buffer_trajectory["goals"].append(vec_vis_obs.goals)
# this shouldnt be necessary in an optimized implementation since the goal does not change
agent_buffer_trajectory["next_goals"].append(next_vec_vis_obs.goals)
if exp.memory is not None:
agent_buffer_trajectory["memory"].append(exp.memory)

9
com.unity.ml-agents/Runtime/Sensors/CameraSensor.cs


}
/// <summary>
/// Camera sensors are always Observations.
/// </summary>
/// <returns>Sensor type of observation.</returns>
public SensorType GetSensorType()
{
return SensorType.Observation;
}
/// <summary>
/// Generates a compressed image. This can be valuable in speeding-up training.
/// </summary>
/// <returns>Compressed image.</returns>

29
com.unity.ml-agents/Runtime/Sensors/ISensor.cs


}
/// <summary>
/// The semantic meaning of the sensor.
/// </summary>
public enum SensorType
{
/// <summary>
/// Sensor represents an agent's observation.
/// </summary>
Observation,
/// <summary>
/// Sensor represents an agent's task/goal parameterization.
/// </summary>
Goal,
/// <summary>
/// Sensor represents one or more reward signals.
/// </summary>
Reward
}
/// <summary>
/// Sensor interface for generating observations.
/// </summary>
public interface ISensor

/// </summary>
/// <returns>The name of the sensor.</returns>
string GetName();
/// <summary>
/// Get the semantic meaning of the sensor, i.e. whether it is an observation or other type
/// of data to be sent to the Agent.
/// </summary>
/// <returns>The type of the sensor.</returns>
SensorType GetSensorType();
}

6
com.unity.ml-agents/Runtime/Sensors/RayPerceptionSensor.cs


}
/// <inheritdoc/>
public SensorType GetSensorType()
{
return SensorType.Observation;
}
/// <inheritdoc/>
public string GetName()
{
return m_Name;

6
com.unity.ml-agents/Runtime/Sensors/Reflection/ReflectionSensorBase.cs


}
/// <inheritdoc/>
public SensorType GetSensorType()
{
return SensorType.Observation;
}
/// <inheritdoc/>
public void Update() { }
/// <inheritdoc/>

9
com.unity.ml-agents/Runtime/Sensors/RenderTextureSensor.cs


}
/// <summary>
/// RenderTexture sensors are always Observations.
/// </summary>
/// <returns>Sensor type of observation.</returns>
public SensorType GetSensorType()
{
return SensorType.Observation;
}
/// <summary>
/// Converts a RenderTexture to a 2D texture.
/// </summary>
/// <returns>The 2D texture.</returns>

6
com.unity.ml-agents/Runtime/Sensors/StackingSensor.cs


return m_WrappedSensor.GetCompressionType();
}
/// <inheritdoc/>
public SensorType GetSensorType()
{
return SensorType.Observation;
}
/// <summary>
/// Create Empty PNG for initializing the buffer for stacking.
/// </summary>

6
com.unity.ml-agents/Runtime/Sensors/VectorSensor.cs


}
/// <inheritdoc/>
public virtual SensorType GetSensorType()
{
return SensorType.Observation;
}
/// <inheritdoc/>
public virtual byte[] GetCompressedObservation()
{
return null;

1
com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs


observationProto.CompressedChannelMapping.AddRange(compressibleSensor.GetCompressedChannelMapping());
}
}
observationProto.SensorType = (SensorTypeProto)sensor.GetSensorType();
observationProto.Shape.AddRange(shape);
return observationProto;
}

50
com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/Observation.cs


byte[] descriptorData = global::System.Convert.FromBase64String(
string.Concat(
"CjRtbGFnZW50c19lbnZzL2NvbW11bmljYXRvcl9vYmplY3RzL29ic2VydmF0",
"aW9uLnByb3RvEhRjb21tdW5pY2F0b3Jfb2JqZWN0cyKdAgoQT2JzZXJ2YXRp",
"aW9uLnByb3RvEhRjb21tdW5pY2F0b3Jfb2JqZWN0cyLZAgoQT2JzZXJ2YXRp",
"KAUaGQoJRmxvYXREYXRhEgwKBGRhdGEYASADKAJCEgoQb2JzZXJ2YXRpb25f",
"ZGF0YSopChRDb21wcmVzc2lvblR5cGVQcm90bxIICgROT05FEAASBwoDUE5H",
"EAFCJaoCIlVuaXR5Lk1MQWdlbnRzLkNvbW11bmljYXRvck9iamVjdHNiBnBy",
"b3RvMw=="));
"KAUSOgoLc2Vuc29yX3R5cGUYBiABKA4yJS5jb21tdW5pY2F0b3Jfb2JqZWN0",
"cy5TZW5zb3JUeXBlUHJvdG8aGQoJRmxvYXREYXRhEgwKBGRhdGEYASADKAJC",
"EgoQb2JzZXJ2YXRpb25fZGF0YSopChRDb21wcmVzc2lvblR5cGVQcm90bxII",
"CgROT05FEAASBwoDUE5HEAEqOAoPU2Vuc29yVHlwZVByb3RvEg8KC09CU0VS",
"VkFUSU9OEAASCAoER09BTBABEgoKBlJFV0FSRBACQiWqAiJVbml0eS5NTEFn",
"ZW50cy5Db21tdW5pY2F0b3JPYmplY3RzYgZwcm90bzM="));
new pbr::GeneratedClrTypeInfo(new[] {typeof(global::Unity.MLAgents.CommunicatorObjects.CompressionTypeProto), }, new pbr::GeneratedClrTypeInfo[] {
new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.ObservationProto), global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Parser, new[]{ "Shape", "CompressionType", "CompressedData", "FloatData", "CompressedChannelMapping" }, new[]{ "ObservationData" }, null, new pbr::GeneratedClrTypeInfo[] { new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Types.FloatData), global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Types.FloatData.Parser, new[]{ "Data" }, null, null, null)})
new pbr::GeneratedClrTypeInfo(new[] {typeof(global::Unity.MLAgents.CommunicatorObjects.CompressionTypeProto), typeof(global::Unity.MLAgents.CommunicatorObjects.SensorTypeProto), }, new pbr::GeneratedClrTypeInfo[] {
new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.ObservationProto), global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Parser, new[]{ "Shape", "CompressionType", "CompressedData", "FloatData", "CompressedChannelMapping", "SensorType" }, new[]{ "ObservationData" }, null, new pbr::GeneratedClrTypeInfo[] { new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Types.FloatData), global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Types.FloatData.Parser, new[]{ "Data" }, null, null, null)})
}));
}
#endregion

internal enum CompressionTypeProto {
[pbr::OriginalName("NONE")] None = 0,
[pbr::OriginalName("PNG")] Png = 1,
}
internal enum SensorTypeProto {
[pbr::OriginalName("OBSERVATION")] Observation = 0,
[pbr::OriginalName("GOAL")] Goal = 1,
[pbr::OriginalName("REWARD")] Reward = 2,
}
#endregion

shape_ = other.shape_.Clone();
compressionType_ = other.compressionType_;
compressedChannelMapping_ = other.compressedChannelMapping_.Clone();
sensorType_ = other.sensorType_;
switch (other.ObservationDataCase) {
case ObservationDataOneofCase.CompressedData:
CompressedData = other.CompressedData;

get { return compressedChannelMapping_; }
}
/// <summary>Field number for the "sensor_type" field.</summary>
public const int SensorTypeFieldNumber = 6;
private global::Unity.MLAgents.CommunicatorObjects.SensorTypeProto sensorType_ = 0;
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public global::Unity.MLAgents.CommunicatorObjects.SensorTypeProto SensorType {
get { return sensorType_; }
set {
sensorType_ = value;
}
}
private object observationData_;
/// <summary>Enum of possible cases for the "observation_data" oneof.</summary>
public enum ObservationDataOneofCase {

if (CompressedData != other.CompressedData) return false;
if (!object.Equals(FloatData, other.FloatData)) return false;
if(!compressedChannelMapping_.Equals(other.compressedChannelMapping_)) return false;
if (SensorType != other.SensorType) return false;
if (ObservationDataCase != other.ObservationDataCase) return false;
return Equals(_unknownFields, other._unknownFields);
}

if (observationDataCase_ == ObservationDataOneofCase.CompressedData) hash ^= CompressedData.GetHashCode();
if (observationDataCase_ == ObservationDataOneofCase.FloatData) hash ^= FloatData.GetHashCode();
hash ^= compressedChannelMapping_.GetHashCode();
if (SensorType != 0) hash ^= SensorType.GetHashCode();
hash ^= (int) observationDataCase_;
if (_unknownFields != null) {
hash ^= _unknownFields.GetHashCode();

output.WriteMessage(FloatData);
}
compressedChannelMapping_.WriteTo(output, _repeated_compressedChannelMapping_codec);
if (SensorType != 0) {
output.WriteRawTag(48);
output.WriteEnum((int) SensorType);
}
if (_unknownFields != null) {
_unknownFields.WriteTo(output);
}

size += 1 + pb::CodedOutputStream.ComputeMessageSize(FloatData);
}
size += compressedChannelMapping_.CalculateSize(_repeated_compressedChannelMapping_codec);
if (SensorType != 0) {
size += 1 + pb::CodedOutputStream.ComputeEnumSize((int) SensorType);
}
if (_unknownFields != null) {
size += _unknownFields.CalculateSize();
}

CompressionType = other.CompressionType;
}
compressedChannelMapping_.Add(other.compressedChannelMapping_);
if (other.SensorType != 0) {
SensorType = other.SensorType;
}
switch (other.ObservationDataCase) {
case ObservationDataOneofCase.CompressedData:
CompressedData = other.CompressedData;

case 42:
case 40: {
compressedChannelMapping_.AddEntriesFrom(input, _repeated_compressedChannelMapping_codec);
break;
}
case 48: {
sensorType_ = (global::Unity.MLAgents.CommunicatorObjects.SensorTypeProto) input.ReadEnum();
break;
}
}

6
com.unity.ml-agents/Tests/Editor/Communicator/GrpcExtensionsTests.cs


return new byte[] { 13, 37 };
}
/// <inheritdoc/>
public virtual SensorType GetSensorType()
{
return SensorType.Observation;
}
public void Update() { }
public void Reset() { }

5
com.unity.ml-agents/Tests/Editor/Sensor/FloatVisualSensorTests.cs


m_Shape = new[] { Height, Width, 1 };
}
public SensorType GetSensorType()
{
return SensorType.Observation;
}
public string GetName()
{
return m_Name;

5
com.unity.ml-agents/Tests/Editor/Sensor/SensorShapeValidatorTests.cs


return m_Shape;
}
public SensorType GetSensorType()
{
return SensorType.Observation;
}
public byte[] GetCompressedObservation()
{
return null;

6
com.unity.ml-agents/Tests/Editor/Sensor/StackingSensorTests.cs


return Shape;
}
/// <inheritdoc/>
public virtual SensorType GetSensorType()
{
return SensorType.Observation;
}
public int Write(ObservationWriter writer)
{
for (var h = 0; h < Shape[0]; h++)

5
com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs


return new byte[] { 0 };
}
public SensorType GetSensorType()
{
return SensorType.Observation;
}
public SensorCompressionType GetCompressionType()
{
return compressionType;

5
com.unity.ml-agents/Tests/Editor/ParameterLoaderTest.cs


return new[] { m_Height, m_Width, m_Channels };
}
public SensorType GetSensorType()
{
return SensorType.Observation;
}
public int Write(ObservationWriter writer)
{
for (int i = 0; i < m_Width * m_Height * m_Channels; i++)

19
ml-agents-envs/mlagents_envs/communicator_objects/unity_to_external_pb2.py


# -*- coding: utf-8 -*-
import sys
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
"""Generated protocol buffer code."""
from google.protobuf import descriptor_pb2
# @@protoc_insertion_point(imports)
_sym_db = _symbol_database.Default()

name='mlagents_envs/communicator_objects/unity_to_external.proto',
package='communicator_objects',
syntax='proto3',
serialized_pb=_b('\n:mlagents_envs/communicator_objects/unity_to_external.proto\x12\x14\x63ommunicator_objects\x1a\x36mlagents_envs/communicator_objects/unity_message.proto2v\n\x14UnityToExternalProto\x12^\n\x08\x45xchange\x12\'.communicator_objects.UnityMessageProto\x1a\'.communicator_objects.UnityMessageProto\"\x00\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
serialized_options=b'\252\002\"Unity.MLAgents.CommunicatorObjects',
create_key=_descriptor._internal_create_key,
serialized_pb=b'\n:mlagents_envs/communicator_objects/unity_to_external.proto\x12\x14\x63ommunicator_objects\x1a\x36mlagents_envs/communicator_objects/unity_message.proto2v\n\x14UnityToExternalProto\x12^\n\x08\x45xchange\x12\'.communicator_objects.UnityMessageProto\x1a\'.communicator_objects.UnityMessageProto\"\x00\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3'
,
dependencies=[mlagents__envs_dot_communicator__objects_dot_unity__message__pb2.DESCRIPTOR,])

DESCRIPTOR.has_options = True
DESCRIPTOR._options = _descriptor._ParseOptions(descriptor_pb2.FileOptions(), _b('\252\002\"Unity.MLAgents.CommunicatorObjects'))
DESCRIPTOR._options = None
_UNITYTOEXTERNALPROTO = _descriptor.ServiceDescriptor(
name='UnityToExternalProto',

options=None,
serialized_options=None,
create_key=_descriptor._internal_create_key,
serialized_start=140,
serialized_end=258,
methods=[

containing_service=None,
input_type=mlagents__envs_dot_communicator__objects_dot_unity__message__pb2._UNITYMESSAGEPROTO,
output_type=mlagents__envs_dot_communicator__objects_dot_unity__message__pb2._UNITYMESSAGEPROTO,
options=None,
serialized_options=None,
create_key=_descriptor._internal_create_key,
),
])
_sym_db.RegisterServiceDescriptor(_UNITYTOEXTERNALPROTO)

81
ml-agents-envs/mlagents_envs/communicator_objects/unity_to_external_pb2_grpc.py


# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
"""Client and server classes corresponding to protobuf-defined services."""
import grpc
from mlagents_envs.communicator_objects import unity_message_pb2 as mlagents__envs_dot_communicator__objects_dot_unity__message__pb2

# missing associated documentation comment in .proto file
pass
"""Missing associated documentation comment in .proto file."""
def __init__(self, channel):
"""Constructor.
def __init__(self, channel):
"""Constructor.
Args:
channel: A grpc.Channel.
"""
self.Exchange = channel.unary_unary(
'/communicator_objects.UnityToExternalProto/Exchange',
request_serializer=mlagents__envs_dot_communicator__objects_dot_unity__message__pb2.UnityMessageProto.SerializeToString,
response_deserializer=mlagents__envs_dot_communicator__objects_dot_unity__message__pb2.UnityMessageProto.FromString,
)
Args:
channel: A grpc.Channel.
"""
self.Exchange = channel.unary_unary(
'/communicator_objects.UnityToExternalProto/Exchange',
request_serializer=mlagents__envs_dot_communicator__objects_dot_unity__message__pb2.UnityMessageProto.SerializeToString,
response_deserializer=mlagents__envs_dot_communicator__objects_dot_unity__message__pb2.UnityMessageProto.FromString,
)
# missing associated documentation comment in .proto file
pass
"""Missing associated documentation comment in .proto file."""
def Exchange(self, request, context):
"""Sends the academy parameters
"""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def Exchange(self, request, context):
"""Sends the academy parameters
"""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
rpc_method_handlers = {
'Exchange': grpc.unary_unary_rpc_method_handler(
servicer.Exchange,
request_deserializer=mlagents__envs_dot_communicator__objects_dot_unity__message__pb2.UnityMessageProto.FromString,
response_serializer=mlagents__envs_dot_communicator__objects_dot_unity__message__pb2.UnityMessageProto.SerializeToString,
),
}
generic_handler = grpc.method_handlers_generic_handler(
'communicator_objects.UnityToExternalProto', rpc_method_handlers)
server.add_generic_rpc_handlers((generic_handler,))
rpc_method_handlers = {
'Exchange': grpc.unary_unary_rpc_method_handler(
servicer.Exchange,
request_deserializer=mlagents__envs_dot_communicator__objects_dot_unity__message__pb2.UnityMessageProto.FromString,
response_serializer=mlagents__envs_dot_communicator__objects_dot_unity__message__pb2.UnityMessageProto.SerializeToString,
),
}
generic_handler = grpc.method_handlers_generic_handler(
'communicator_objects.UnityToExternalProto', rpc_method_handlers)
server.add_generic_rpc_handlers((generic_handler,))
# This class is part of an EXPERIMENTAL API.
class UnityToExternalProto(object):
"""Missing associated documentation comment in .proto file."""
@staticmethod
def Exchange(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(request, target, '/communicator_objects.UnityToExternalProto/Exchange',
mlagents__envs_dot_communicator__objects_dot_unity__message__pb2.UnityMessageProto.SerializeToString,
mlagents__envs_dot_communicator__objects_dot_unity__message__pb2.UnityMessageProto.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)

51
ml-agents-envs/mlagents_envs/communicator_objects/observation_pb2.py


name='mlagents_envs/communicator_objects/observation.proto',
package='communicator_objects',
syntax='proto3',
serialized_pb=_b('\n4mlagents_envs/communicator_objects/observation.proto\x12\x14\x63ommunicator_objects\"\x9d\x02\n\x10ObservationProto\x12\r\n\x05shape\x18\x01 \x03(\x05\x12\x44\n\x10\x63ompression_type\x18\x02 \x01(\x0e\x32*.communicator_objects.CompressionTypeProto\x12\x19\n\x0f\x63ompressed_data\x18\x03 \x01(\x0cH\x00\x12\x46\n\nfloat_data\x18\x04 \x01(\x0b\x32\x30.communicator_objects.ObservationProto.FloatDataH\x00\x12\"\n\x1a\x63ompressed_channel_mapping\x18\x05 \x03(\x05\x1a\x19\n\tFloatData\x12\x0c\n\x04\x64\x61ta\x18\x01 \x03(\x02\x42\x12\n\x10observation_data*)\n\x14\x43ompressionTypeProto\x12\x08\n\x04NONE\x10\x00\x12\x07\n\x03PNG\x10\x01\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
serialized_pb=_b('\n4mlagents_envs/communicator_objects/observation.proto\x12\x14\x63ommunicator_objects\"\xd9\x02\n\x10ObservationProto\x12\r\n\x05shape\x18\x01 \x03(\x05\x12\x44\n\x10\x63ompression_type\x18\x02 \x01(\x0e\x32*.communicator_objects.CompressionTypeProto\x12\x19\n\x0f\x63ompressed_data\x18\x03 \x01(\x0cH\x00\x12\x46\n\nfloat_data\x18\x04 \x01(\x0b\x32\x30.communicator_objects.ObservationProto.FloatDataH\x00\x12\"\n\x1a\x63ompressed_channel_mapping\x18\x05 \x03(\x05\x12:\n\x0bsensor_type\x18\x06 \x01(\x0e\x32%.communicator_objects.SensorTypeProto\x1a\x19\n\tFloatData\x12\x0c\n\x04\x64\x61ta\x18\x01 \x03(\x02\x42\x12\n\x10observation_data*)\n\x14\x43ompressionTypeProto\x12\x08\n\x04NONE\x10\x00\x12\x07\n\x03PNG\x10\x01*8\n\x0fSensorTypeProto\x12\x0f\n\x0bOBSERVATION\x10\x00\x12\x08\n\x04GOAL\x10\x01\x12\n\n\x06REWARD\x10\x02\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
)
_COMPRESSIONTYPEPROTO = _descriptor.EnumDescriptor(

],
containing_type=None,
options=None,
serialized_start=366,
serialized_end=407,
serialized_start=426,
serialized_end=467,
_SENSORTYPEPROTO = _descriptor.EnumDescriptor(
name='SensorTypeProto',
full_name='communicator_objects.SensorTypeProto',
filename=None,
file=DESCRIPTOR,
values=[
_descriptor.EnumValueDescriptor(
name='OBSERVATION', index=0, number=0,
options=None,
type=None),
_descriptor.EnumValueDescriptor(
name='GOAL', index=1, number=1,
options=None,
type=None),
_descriptor.EnumValueDescriptor(
name='REWARD', index=2, number=2,
options=None,
type=None),
],
containing_type=None,
options=None,
serialized_start=469,
serialized_end=525,
)
_sym_db.RegisterEnumDescriptor(_SENSORTYPEPROTO)
SensorTypeProto = enum_type_wrapper.EnumTypeWrapper(_SENSORTYPEPROTO)
OBSERVATION = 0
GOAL = 1
REWARD = 2

extension_ranges=[],
oneofs=[
],
serialized_start=319,
serialized_end=344,
serialized_start=379,
serialized_end=404,
)
_OBSERVATIONPROTO = _descriptor.Descriptor(

message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='sensor_type', full_name='communicator_objects.ObservationProto.sensor_type', index=5,
number=6, type=14, cpp_type=8, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
],
extensions=[
],

index=0, containing_type=None, fields=[]),
],
serialized_start=79,
serialized_end=364,
serialized_end=424,
_OBSERVATIONPROTO.fields_by_name['sensor_type'].enum_type = _SENSORTYPEPROTO
_OBSERVATIONPROTO.oneofs_by_name['observation_data'].fields.append(
_OBSERVATIONPROTO.fields_by_name['compressed_data'])
_OBSERVATIONPROTO.fields_by_name['compressed_data'].containing_oneof = _OBSERVATIONPROTO.oneofs_by_name['observation_data']

DESCRIPTOR.message_types_by_name['ObservationProto'] = _OBSERVATIONPROTO
DESCRIPTOR.enum_types_by_name['CompressionTypeProto'] = _COMPRESSIONTYPEPROTO
DESCRIPTOR.enum_types_by_name['SensorTypeProto'] = _SENSORTYPEPROTO
_sym_db.RegisterFileDescriptor(DESCRIPTOR)
ObservationProto = _reflection.GeneratedProtocolMessageType('ObservationProto', (_message.Message,), dict(

25
ml-agents-envs/mlagents_envs/communicator_objects/observation_pb2.pyi


NONE = typing___cast('CompressionTypeProto', 0)
PNG = typing___cast('CompressionTypeProto', 1)
class SensorTypeProto(builtin___int):
DESCRIPTOR: google___protobuf___descriptor___EnumDescriptor = ...
@classmethod
def Name(cls, number: builtin___int) -> builtin___str: ...
@classmethod
def Value(cls, name: builtin___str) -> 'SensorTypeProto': ...
@classmethod
def keys(cls) -> typing___List[builtin___str]: ...
@classmethod
def values(cls) -> typing___List['SensorTypeProto']: ...
@classmethod
def items(cls) -> typing___List[typing___Tuple[builtin___str, 'SensorTypeProto']]: ...
OBSERVATION = typing___cast('SensorTypeProto', 0)
GOAL = typing___cast('SensorTypeProto', 1)
REWARD = typing___cast('SensorTypeProto', 2)
OBSERVATION = typing___cast('SensorTypeProto', 0)
GOAL = typing___cast('SensorTypeProto', 1)
REWARD = typing___cast('SensorTypeProto', 2)
class ObservationProto(google___protobuf___message___Message):
DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...
class FloatData(google___protobuf___message___Message):

compression_type = ... # type: CompressionTypeProto
compressed_data = ... # type: builtin___bytes
compressed_channel_mapping = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[builtin___int]
sensor_type = ... # type: SensorTypeProto
@property
def float_data(self) -> ObservationProto.FloatData: ...

compressed_data : typing___Optional[builtin___bytes] = None,
float_data : typing___Optional[ObservationProto.FloatData] = None,
compressed_channel_mapping : typing___Optional[typing___Iterable[builtin___int]] = None,
sensor_type : typing___Optional[SensorTypeProto] = None,
) -> None: ...
@classmethod
def FromString(cls, s: builtin___bytes) -> ObservationProto: ...

def HasField(self, field_name: typing_extensions___Literal[u"compressed_data",u"float_data",u"observation_data"]) -> builtin___bool: ...
def ClearField(self, field_name: typing_extensions___Literal[u"compressed_channel_mapping",u"compressed_data",u"compression_type",u"float_data",u"observation_data",u"shape"]) -> None: ...
def ClearField(self, field_name: typing_extensions___Literal[u"compressed_channel_mapping",u"compressed_data",u"compression_type",u"float_data",u"observation_data",u"sensor_type",u"shape"]) -> None: ...
def ClearField(self, field_name: typing_extensions___Literal[u"compressed_channel_mapping",b"compressed_channel_mapping",u"compressed_data",b"compressed_data",u"compression_type",b"compression_type",u"float_data",b"float_data",u"observation_data",b"observation_data",u"shape",b"shape"]) -> None: ...
def ClearField(self, field_name: typing_extensions___Literal[u"compressed_channel_mapping",b"compressed_channel_mapping",u"compressed_data",b"compressed_data",u"compression_type",b"compression_type",u"float_data",b"float_data",u"observation_data",b"observation_data",u"sensor_type",b"sensor_type",u"shape",b"shape"]) -> None: ...
def WhichOneof(self, oneof_group: typing_extensions___Literal[u"observation_data",b"observation_data"]) -> typing_extensions___Literal["compressed_data","float_data"]: ...

26
ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py


from mlagents_envs.base_env import (
BehaviorSpec,
ActionSpec,
SensorType,
DecisionSteps,
TerminalSteps,
)

def test_batched_step_result_from_proto():
n_agents = 10
shapes = [(3,), (4,)]
spec = BehaviorSpec(shapes, ActionSpec.create_continuous(3))
sensor_type = [SensorType.OBSERVATION, SensorType.OBSERVATION]
spec = BehaviorSpec(shapes, sensor_type, ActionSpec.create_continuous(3))
ap_list = generate_list_agent_proto(n_agents, shapes)
decision_steps, terminal_steps = steps_from_proto(ap_list, spec)
for agent_id in range(n_agents):

def test_action_masking_discrete():
n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(shapes, ActionSpec.create_discrete((7, 3)))
sensor_type = [SensorType.OBSERVATION, SensorType.OBSERVATION]
behavior_spec = BehaviorSpec(
shapes, sensor_type, ActionSpec.create_discrete((7, 3))
)
ap_list = generate_list_agent_proto(n_agents, shapes)
decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
masks = decision_steps.action_mask

def test_action_masking_discrete_1():
n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(shapes, ActionSpec.create_discrete((10,)))
sensor_type = [SensorType.OBSERVATION, SensorType.OBSERVATION]
behavior_spec = BehaviorSpec(shapes, sensor_type, ActionSpec.create_discrete((10,)))
ap_list = generate_list_agent_proto(n_agents, shapes)
decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
masks = decision_steps.action_mask

def test_action_masking_discrete_2():
n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(shapes, ActionSpec.create_discrete((2, 2, 6)))
sensor_type = [SensorType.OBSERVATION, SensorType.OBSERVATION]
behavior_spec = BehaviorSpec(
shapes, sensor_type, ActionSpec.create_discrete((2, 2, 6))
)
ap_list = generate_list_agent_proto(n_agents, shapes)
decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
masks = decision_steps.action_mask

def test_action_masking_continuous():
n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(shapes, ActionSpec.create_continuous(10))
sensor_type = [SensorType.OBSERVATION, SensorType.OBSERVATION]
behavior_spec = BehaviorSpec(shapes, sensor_type, ActionSpec.create_continuous(10))
ap_list = generate_list_agent_proto(n_agents, shapes)
decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
masks = decision_steps.action_mask

def test_batched_step_result_from_proto_raises_on_infinite():
n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(shapes, ActionSpec.create_continuous(3))
sensor_type = [SensorType.OBSERVATION, SensorType.OBSERVATION]
behavior_spec = BehaviorSpec(shapes, sensor_type, ActionSpec.create_continuous(3))
ap_list = generate_list_agent_proto(n_agents, shapes, infinite_rewards=True)
with pytest.raises(RuntimeError):
steps_from_proto(ap_list, behavior_spec)

n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(shapes, ActionSpec.create_continuous(3))
sensor_type = [SensorType.OBSERVATION, SensorType.OBSERVATION]
behavior_spec = BehaviorSpec(shapes, sensor_type, ActionSpec.create_continuous(3))
ap_list = generate_list_agent_proto(n_agents, shapes, nan_observations=True)
with pytest.raises(RuntimeError):
steps_from_proto(ap_list, behavior_spec)

11
ml-agents-envs/mlagents_envs/tests/test_steps.py


TerminalSteps,
ActionSpec,
BehaviorSpec,
SensorType,
)

def test_empty_decision_steps():
sensor_type = [SensorType.OBSERVATION, SensorType.OBSERVATION]
observation_shapes=[(3, 2), (5,)], action_spec=ActionSpec.create_continuous(3)
observation_shapes=[(3, 2), (5,)],
sensor_types=sensor_type,
action_spec=ActionSpec.create_continuous(3),
)
ds = DecisionSteps.empty(specs)
assert len(ds.obs) == 2

def test_empty_terminal_steps():
sensor_type = [SensorType.OBSERVATION, SensorType.OBSERVATION]
observation_shapes=[(3, 2), (5,)], action_spec=ActionSpec.create_continuous(3)
observation_shapes=[(3, 2), (5,)],
sensor_types=sensor_type,
action_spec=ActionSpec.create_continuous(3),
)
ts = TerminalSteps.empty(specs)
assert len(ts.obs) == 2

4
ml-agents-envs/mlagents_envs/rpc_utils.py


from mlagents_envs.base_env import (
ActionSpec,
BehaviorSpec,
SensorType,
DecisionSteps,
TerminalSteps,
)

:return: BehaviorSpec object.
"""
observation_shape = [tuple(obs.shape) for obs in agent_info.observations]
sensor_type = [SensorType(obs.sensor_type) for obs in agent_info.observations]
# proto from comminicator < v1.3 does not set action spec, use deprecated fields instead
if (
brain_param_proto.action_spec.num_continuous_actions == 0

action_spec_proto.num_continuous_actions,
tuple(branch for branch in action_spec_proto.discrete_branch_sizes),
)
return BehaviorSpec(observation_shape, action_spec)
return BehaviorSpec(observation_shape, sensor_type, action_spec)
class OffsetBytesIO:

10
ml-agents-envs/mlagents_envs/base_env.py


from abc import ABC, abstractmethod
from collections.abc import Mapping
from enum import Enum
from typing import (
List,
NamedTuple,

)
class SensorType(Enum):
OBSERVATION = 0
GOAL = 1
REWARD = 2
class _ActionTupleBase(ABC):
"""
An object whose fields correspond to action data of continuous and discrete

- observation_shapes is a List of Tuples of int : Each Tuple corresponds
to an observation's dimensions. The shape tuples have the same ordering as
the ordering of the DecisionSteps and TerminalSteps.
- sensor_types is a List of SensorTypes, each corresponding to the type of
sensor (i.e. observation, goal, etc).
sensor_types: List[SensorType]
action_spec: ActionSpec

6
Project/Assets/ML-Agents/TestScenes/TestCompressedTexture/TestTextureSensor.cs


}
/// <inheritdoc/>
public virtual SensorType GetSensorType()
{
return SensorType.Observation;
}
/// <inheritdoc/>
public byte[] GetCompressedObservation()
{
var compressed = m_Texture.EncodeToPNG();

33
Project/Assets/ML-Agents/Examples/GridWorld/Prefabs/Area.prefab


- component: {fileID: 114650561397225712}
- component: {fileID: 114889700908650620}
- component: {fileID: 7980686505185502968}
- component: {fileID: 8359584214300847863}
m_Layer: 8
m_Name: Agent
m_TagString: agent

m_Name:
m_EditorClassIdentifier:
m_BrainParameters:
vectorObservationSize: 0
numStackedVectorObservations: 1
vectorActionSize: 05000000
vectorActionDescriptions: []
vectorActionSpaceType: 0
m_Model: {fileID: 11400000, guid: a812f1ce7763a4a0c912717f3594fe20, type: 3}
VectorObservationSize: 0
NumStackedVectorObservations: 1
VectorActionSize: 05000000
VectorActionDescriptions: []
VectorActionSpaceType: 0
m_Model: {fileID: 0}
m_UseChildActuators: 1
m_ObservableAttributeHandling: 0
--- !u!114 &114650561397225712
MonoBehaviour:
m_ObjectHideFlags: 0

agentParameters:
maxStep: 0
hasUpgradedFromAgentParameters: 1
maxStep: 100
MaxStep: 100
gridGoal: 0
--- !u!114 &114889700908650620
MonoBehaviour:
m_ObjectHideFlags: 0

m_Width: 84
m_Height: 64
m_Grayscale: 0
m_ObservationStacks: 1
m_Compression: 1
--- !u!114 &7980686505185502968
MonoBehaviour:

m_Script: {fileID: 11500000, guid: 3a6da8f78a394c6ab027688eab81e04d, type: 3}
m_Name:
m_EditorClassIdentifier:
debugCommandLineOverride:
--- !u!114 &8359584214300847863
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 1488387672112076}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 163dac4bcbb2f4d8499db2cdcb22a89e, type: 3}
m_Name:
m_EditorClassIdentifier:
observationSize: 2
--- !u!1 &1625008366184734
GameObject:
m_ObjectHideFlags: 0

45
Project/Assets/ML-Agents/Examples/GridWorld/Scenes/GridWorld.unity


- component: {fileID: 125487790}
- component: {fileID: 125487787}
- component: {fileID: 125487791}
- component: {fileID: 125487792}
m_Layer: 8
m_Name: RenderTextureAgent
m_TagString: agent

agentParameters:
maxStep: 100
hasUpgradedFromAgentParameters: 1
maxStep: 100
MaxStep: 100
gridGoal: 0
--- !u!65 &125487788
BoxCollider:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
m_BrainParameters:
vectorObservationSize: 0
numStackedVectorObservations: 1
vectorActionSize: 05000000
vectorActionDescriptions: []
vectorActionSpaceType: 0
VectorObservationSize: 0
NumStackedVectorObservations: 1
VectorActionSize: 05000000
VectorActionDescriptions: []
VectorActionSpaceType: 0
m_Model: {fileID: 11400000, guid: a812f1ce7763a4a0c912717f3594fe20, type: 3}
m_InferenceDevice: 0
m_BehaviorType: 0

m_UseChildActuators: 1
m_ObservableAttributeHandling: 0
--- !u!114 &125487791
MonoBehaviour:
m_ObjectHideFlags: 0

m_RenderTexture: {fileID: 8400000, guid: 114608d5384404f89bff4b6f88432958, type: 2}
m_SensorName: RenderTextureSensor
m_Grayscale: 0
m_ObservationStacks: 1
--- !u!114 &125487792
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 125487785}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 163dac4bcbb2f4d8499db2cdcb22a89e, type: 3}
m_Name:
m_EditorClassIdentifier:
observationSize: 2
--- !u!1 &260425459
GameObject:
m_ObjectHideFlags: 0

type: 3}
propertyPath: compression
value: 0
objectReference: {fileID: 0}
- target: {fileID: 114889700908650620, guid: 5c2bd19e4bbda4991b74387ca5d28156,
type: 3}
propertyPath: m_Compression
value: 0
objectReference: {fileID: 0}
- target: {fileID: 114935253044749092, guid: 5c2bd19e4bbda4991b74387ca5d28156,
type: 3}
propertyPath: m_BrainParameters.VectorObservationSize
value: 0
objectReference: {fileID: 0}
- target: {fileID: 114935253044749092, guid: 5c2bd19e4bbda4991b74387ca5d28156,
type: 3}
propertyPath: m_Model
value:
objectReference: {fileID: 0}
m_RemovedComponents: []
m_SourcePrefab: {fileID: 100100000, guid: 5c2bd19e4bbda4991b74387ca5d28156, type: 3}

37
Project/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAgent.cs


using UnityEngine;
using System.Linq;
using Unity.MLAgents;
using Unity.MLAgents.Sensors;
using Unity.MLAgents.Actuators;
using UnityEngine.Serialization;

"masking turned on may not behave optimally when action masking is turned off.")]
public bool maskActions = true;
GoalSensorComponent goalSensor;
public GridGoal gridGoal;
const int k_NoAction = 0; // do nothing!
const int k_Up = 1;
const int k_Down = 2;

public enum GridGoal
{
Plus,
Cross,
}
}
public override void CollectObservations(VectorSensor sensor)
{
Array values = Enum.GetValues(typeof(GridGoal));
int goalNum = (int)gridGoal;
goalSensor = this.GetComponent<GoalSensorComponent>();
goalSensor.AddOneHotGoal(goalNum, values.Length);
}
public override void WriteDiscreteActionMask(IDiscreteActionMask actionMask)

}
}
private void ProvideReward(GridGoal hitObject)
{
if (gridGoal == hitObject)
{
SetReward(1f);
}
else
{
SetReward(-1f);
}
}
// to be implemented by the developer
public override void OnActionReceived(ActionBuffers actionBuffers)

if (hit.Where(col => col.gameObject.CompareTag("goal")).ToArray().Length == 1)
{
SetReward(1f);
ProvideReward(GridGoal.Plus);
SetReward(-1f);
ProvideReward(GridGoal.Cross);
EndEpisode();
}
}

public override void OnEpisodeBegin()
{
area.AreaReset();
Array values = Enum.GetValues(typeof(GridGoal));
gridGoal = (GridGoal)values.GetValue(UnityEngine.Random.Range(0, values.Length));
}
public void FixedUpdate()

6
Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/SensorBase.cs


}
/// <inheritdoc/>
public virtual SensorType GetSensorType()
{
return SensorType.Observation;
}
/// <inheritdoc/>
public virtual SensorCompressionType GetCompressionType()
{
return SensorCompressionType.None;

26
config/ppo/MultiDirWalker.yaml


behaviors:
MultiDirWalker:
trainer_type: ppo
hyperparameters:
batch_size: 2048
buffer_size: 20480
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: true
hidden_units: 256
num_layers: 2
vis_encode_type: simple
reward_signals:
extrinsic:
gamma: 0.995
strength: 1.0
keep_checkpoints: 5
max_steps: 30000000
time_horizon: 1000
summary_freq: 30000
threaded: true

507
Project/Assets/ML-Agents/Examples/Walker/Prefabs/Ragdoll/MultiDirRagDoll.prefab


%YAML 1.1
%TAG !u! tag:unity3d.com,2011:
--- !u!114 &4469182458895145650
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 1077752704035527923}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: c52bddbfaf39944a6bb673a9dfcfe4b6, type: 3}
m_Name:
m_EditorClassIdentifier:
agentParameters:
maxStep: 0
hasUpgradedFromAgentParameters: 1
MaxStep: 5000
m_TargetWalkingSpeed: 10
randomizeWalkSpeedEachEpisode: 0
target: {fileID: 4058446934158437408}
hips: {fileID: 1077752704392483292}
chest: {fileID: 7818481575961221087}
spine: {fileID: 7818481575902529953}
head: {fileID: 7818481576732930258}
thighL: {fileID: 7818481576528932657}
shinL: {fileID: 7818481576468061548}
footL: {fileID: 7818481575932963445}
thighR: {fileID: 7818481577110242841}
shinR: {fileID: 7818481577111017236}
footR: {fileID: 7818481576882516798}
armL: {fileID: 7818481576458883964}
forearmL: {fileID: 7818481576500842159}
handL: {fileID: 7818481576440584931}
armR: {fileID: 7818481575774466714}
forearmR: {fileID: 7818481576563420652}
handR: {fileID: 7818481575132336870}
goals: 2
--- !u!114 &1800586501491974962
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 1077752704035527923}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 163dac4bcbb2f4d8499db2cdcb22a89e, type: 3}
m_Name:
m_EditorClassIdentifier:
observationSize: 1
--- !u!1001 &186987432828422960
PrefabInstance:
m_ObjectHideFlags: 0
serializedVersion: 2
m_Modification:
m_TransformParent: {fileID: 0}
m_Modifications:
- target: {fileID: 7408209125961349353, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: walkDirectionMethod
value: 0
objectReference: {fileID: 0}
- target: {fileID: 7408209125961349353, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: target
value:
objectReference: {fileID: 4058446934158437408}
- target: {fileID: 7408209125961349353, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: randomizeWalkSpeedEachEpisode
value: 1
objectReference: {fileID: 0}
- target: {fileID: 7408209125961349353, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_Enabled
value: 0
objectReference: {fileID: 0}
- target: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_LocalPosition.x
value: -500
objectReference: {fileID: 0}
- target: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_LocalPosition.y
value: 2.57
objectReference: {fileID: 0}
- target: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_LocalPosition.z
value: -250
objectReference: {fileID: 0}
- target: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_LocalRotation.x
value: -0
objectReference: {fileID: 0}
- target: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_LocalRotation.y
value: 0.7071068
objectReference: {fileID: 0}
- target: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_LocalRotation.z
value: -0
objectReference: {fileID: 0}
- target: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_LocalRotation.w
value: 0.7071068
objectReference: {fileID: 0}
- target: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_RootOrder
value: 0
objectReference: {fileID: 0}
- target: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_LocalEulerAnglesHint.x
value: 0
objectReference: {fileID: 0}
- target: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_LocalEulerAnglesHint.y
value: 0
objectReference: {fileID: 0}
- target: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_LocalEulerAnglesHint.z
value: 0
objectReference: {fileID: 0}
- target: {fileID: 693499830, guid: 765582efd9dda46ed98564603316353f, type: 3}
propertyPath: updateManually
value: 1
objectReference: {fileID: 0}
- target: {fileID: 693499830, guid: 765582efd9dda46ed98564603316353f, type: 3}
propertyPath: updateViaScript
value: 1
objectReference: {fileID: 0}
- target: {fileID: 693499830, guid: 765582efd9dda46ed98564603316353f, type: 3}
propertyPath: updatedByAgent
value: 1
objectReference: {fileID: 0}
- target: {fileID: 895268871377934275, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_Name
value: MultiDirRagDoll
objectReference: {fileID: 0}
- target: {fileID: 895268871377934275, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_IsActive
value: 1
objectReference: {fileID: 0}
- target: {fileID: 895268871377934297, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_BehaviorName
value: MultiDirWalker
objectReference: {fileID: 0}
- target: {fileID: 895268871377934297, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_Model
value:
objectReference: {fileID: 5022602860645237092, guid: c5c81d94c2dfe4c2b9f7440f533957fa,
type: 3}
- target: {fileID: 895268871377934297, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_BrainParameters.VectorObservationSize
value: 243
objectReference: {fileID: 0}
- target: {fileID: 6359877977706987617, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_LocalPosition.y
value: -2.517
objectReference: {fileID: 0}
- target: {fileID: 7933235353030744139, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.y
value: -0.00000011920929
objectReference: {fileID: 0}
- target: {fileID: 7933235353030744139, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.x
value: -0.699997
objectReference: {fileID: 0}
- target: {fileID: 7933235353030744139, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.z
value: -0.000000059604645
objectReference: {fileID: 0}
- target: {fileID: 7933235353041637847, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.y
value: -0.00000011920929
objectReference: {fileID: 0}
- target: {fileID: 7933235353041637847, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.x
value: -0.69999707
objectReference: {fileID: 0}
- target: {fileID: 7933235353041637847, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.z
value: -0.00000023841858
objectReference: {fileID: 0}
- target: {fileID: 7933235353195701979, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.y
value: -0.00000011920929
objectReference: {fileID: 0}
- target: {fileID: 7933235353195701979, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.x
value: 0.5000001
objectReference: {fileID: 0}
- target: {fileID: 7933235353195701979, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.z
value: 0.00000023841858
objectReference: {fileID: 0}
- target: {fileID: 7933235353228551180, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.y
value: -0.29999995
objectReference: {fileID: 0}
- target: {fileID: 7933235353228551180, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.z
value: -0.000000059604645
objectReference: {fileID: 0}
- target: {fileID: 7933235353240438170, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.y
value: -0.00000011920929
objectReference: {fileID: 0}
- target: {fileID: 7933235353240438170, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.x
value: -0.5000001
objectReference: {fileID: 0}
- target: {fileID: 7933235353240438170, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.z
value: -0.00000023841858
objectReference: {fileID: 0}
- target: {fileID: 7933235353713167636, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.y
value: -0.29999995
objectReference: {fileID: 0}
- target: {fileID: 7933235353713167636, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.z
value: 0.000000059604645
objectReference: {fileID: 0}
- target: {fileID: 7933235354074184678, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.y
value: 0.5119995
objectReference: {fileID: 0}
- target: {fileID: 7933235354616748522, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.y
value: -0.00000011920929
objectReference: {fileID: 0}
- target: {fileID: 7933235354616748522, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.x
value: 0.69999707
objectReference: {fileID: 0}
- target: {fileID: 7933235354616748522, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.z
value: 0.00000023841858
objectReference: {fileID: 0}
- target: {fileID: 7933235354652902044, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.y
value: 0.3829999
objectReference: {fileID: 0}
- target: {fileID: 7933235354845945066, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.y
value: 0.3050002
objectReference: {fileID: 0}
- target: {fileID: 7933235355057813929, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.y
value: -0.00000011920929
objectReference: {fileID: 0}
- target: {fileID: 7933235355057813929, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.x
value: 0.699997
objectReference: {fileID: 0}
- target: {fileID: 7933235355057813929, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.z
value: 0.000000059604645
objectReference: {fileID: 0}
- target: {fileID: 7933235353272702555, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.z
value: -0.000000059604645
objectReference: {fileID: 0}
- target: {fileID: 7933235353655703554, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.z
value: 0.000000059604645
objectReference: {fileID: 0}
- target: {fileID: 7933235353711811619, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.z
value: 0.000000059604645
objectReference: {fileID: 0}
- target: {fileID: 7933235354882597209, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.z
value: -0.000000059604645
objectReference: {fileID: 0}
m_RemovedComponents:
- {fileID: 7408209125961349353, guid: 765582efd9dda46ed98564603316353f, type: 3}
m_SourcePrefab: {fileID: 100100000, guid: 765582efd9dda46ed98564603316353f, type: 3}
--- !u!1 &1077752704035527923 stripped
GameObject:
m_CorrespondingSourceObject: {fileID: 895268871377934275, guid: 765582efd9dda46ed98564603316353f,
type: 3}
m_PrefabInstance: {fileID: 186987432828422960}
m_PrefabAsset: {fileID: 0}
--- !u!4 &1077752704035527914 stripped
Transform:
m_CorrespondingSourceObject: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,
type: 3}
m_PrefabInstance: {fileID: 186987432828422960}
m_PrefabAsset: {fileID: 0}
--- !u!4 &1077752704392483292 stripped
Transform:
m_CorrespondingSourceObject: {fileID: 895268871264836332, guid: 765582efd9dda46ed98564603316353f,
type: 3}
m_PrefabInstance: {fileID: 186987432828422960}
m_PrefabAsset: {fileID: 0}
--- !u!4 &7818481576528932657 stripped
Transform:
m_CorrespondingSourceObject: {fileID: 7933235353228551169, guid: 765582efd9dda46ed98564603316353f,
type: 3}
m_PrefabInstance: {fileID: 186987432828422960}
m_PrefabAsset: {fileID: 0}
--- !u!4 &7818481576468061548 stripped
Transform:
m_CorrespondingSourceObject: {fileID: 7933235353272702556, guid: 765582efd9dda46ed98564603316353f,
type: 3}
m_PrefabInstance: {fileID: 186987432828422960}
m_PrefabAsset: {fileID: 0}
--- !u!4 &7818481575932963445 stripped
Transform:
m_CorrespondingSourceObject: {fileID: 7933235354882597189, guid: 765582efd9dda46ed98564603316353f,
type: 3}
m_PrefabInstance: {fileID: 186987432828422960}
m_PrefabAsset: {fileID: 0}
--- !u!4 &7818481577110242841 stripped
Transform:
m_CorrespondingSourceObject: {fileID: 7933235353713167657, guid: 765582efd9dda46ed98564603316353f,
type: 3}
m_PrefabInstance: {fileID: 186987432828422960}
m_PrefabAsset: {fileID: 0}
--- !u!4 &7818481577111017236 stripped
Transform:
m_CorrespondingSourceObject: {fileID: 7933235353711811620, guid: 765582efd9dda46ed98564603316353f,
type: 3}
m_PrefabInstance: {fileID: 186987432828422960}
m_PrefabAsset: {fileID: 0}
--- !u!4 &7818481576882516798 stripped
Transform:
m_CorrespondingSourceObject: {fileID: 7933235353655703566, guid: 765582efd9dda46ed98564603316353f,
type: 3}
m_PrefabInstance: {fileID: 186987432828422960}
m_PrefabAsset: {fileID: 0}
--- !u!4 &7818481575902529953 stripped
Transform:
m_CorrespondingSourceObject: {fileID: 7933235354652902033, guid: 765582efd9dda46ed98564603316353f,
type: 3}
m_PrefabInstance: {fileID: 186987432828422960}
m_PrefabAsset: {fileID: 0}
--- !u!4 &7818481575961221087 stripped
Transform:
m_CorrespondingSourceObject: {fileID: 7933235354845945071, guid: 765582efd9dda46ed98564603316353f,
type: 3}
m_PrefabInstance: {fileID: 186987432828422960}
m_PrefabAsset: {fileID: 0}
--- !u!4 &7818481576458883964 stripped
Transform:
m_CorrespondingSourceObject: {fileID: 7933235353030744140, guid: 765582efd9dda46ed98564603316353f,
type: 3}
m_PrefabInstance: {fileID: 186987432828422960}
m_PrefabAsset: {fileID: 0}
--- !u!4 &7818481576500842159 stripped
Transform:
m_CorrespondingSourceObject: {fileID: 7933235353240438175, guid: 765582efd9dda46ed98564603316353f,
type: 3}
m_PrefabInstance: {fileID: 186987432828422960}
m_PrefabAsset: {fileID: 0}
--- !u!4 &7818481576440584931 stripped
Transform:
m_CorrespondingSourceObject: {fileID: 7933235353041637843, guid: 765582efd9dda46ed98564603316353f,
type: 3}
m_PrefabInstance: {fileID: 186987432828422960}
m_PrefabAsset: {fileID: 0}
--- !u!4 &7818481575774466714 stripped
Transform:
m_CorrespondingSourceObject: {fileID: 7933235355057813930, guid: 765582efd9dda46ed98564603316353f,
type: 3}
m_PrefabInstance: {fileID: 186987432828422960}
m_PrefabAsset: {fileID: 0}
--- !u!4 &7818481576563420652 stripped
Transform:
m_CorrespondingSourceObject: {fileID: 7933235353195701980, guid: 765582efd9dda46ed98564603316353f,
type: 3}
m_PrefabInstance: {fileID: 186987432828422960}
m_PrefabAsset: {fileID: 0}
--- !u!4 &7818481575132336870 stripped
Transform:
m_CorrespondingSourceObject: {fileID: 7933235354616748502, guid: 765582efd9dda46ed98564603316353f,
type: 3}
m_PrefabInstance: {fileID: 186987432828422960}
m_PrefabAsset: {fileID: 0}
--- !u!4 &7818481576732930258 stripped
Transform:
m_CorrespondingSourceObject: {fileID: 7933235354074184674, guid: 765582efd9dda46ed98564603316353f,
type: 3}
m_PrefabInstance: {fileID: 186987432828422960}
m_PrefabAsset: {fileID: 0}
--- !u!1001 &942701540323662238
PrefabInstance:
m_ObjectHideFlags: 0
serializedVersion: 2
m_Modification:
m_TransformParent: {fileID: 1077752704035527914}
m_Modifications:
- target: {fileID: 3839136118347789758, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e,
type: 3}
propertyPath: m_LocalPosition.x
value: 0
objectReference: {fileID: 0}
- target: {fileID: 3839136118347789758, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e,
type: 3}
propertyPath: m_LocalPosition.y
value: 1
objectReference: {fileID: 0}
- target: {fileID: 3839136118347789758, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e,
type: 3}
propertyPath: m_LocalPosition.z
value: 1800
objectReference: {fileID: 0}
- target: {fileID: 3839136118347789758, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e,
type: 3}
propertyPath: m_LocalRotation.x
value: 0
objectReference: {fileID: 0}
- target: {fileID: 3839136118347789758, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e,
type: 3}
propertyPath: m_LocalRotation.y
value: 0
objectReference: {fileID: 0}
- target: {fileID: 3839136118347789758, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e,
type: 3}
propertyPath: m_LocalRotation.z
value: 0
objectReference: {fileID: 0}
- target: {fileID: 3839136118347789758, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e,
type: 3}
propertyPath: m_LocalRotation.w
value: 1
objectReference: {fileID: 0}
- target: {fileID: 3839136118347789758, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e,
type: 3}
propertyPath: m_RootOrder
value: 3
objectReference: {fileID: 0}
- target: {fileID: 3839136118347789758, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e,
type: 3}
propertyPath: m_LocalEulerAnglesHint.x
value: 0
objectReference: {fileID: 0}
- target: {fileID: 3839136118347789758, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e,
type: 3}
propertyPath: m_LocalEulerAnglesHint.y
value: 0
objectReference: {fileID: 0}
- target: {fileID: 3839136118347789758, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e,
type: 3}
propertyPath: m_LocalEulerAnglesHint.z
value: 0
objectReference: {fileID: 0}
- target: {fileID: 3840539935788495952, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e,
type: 3}
propertyPath: m_Name
value: StaticTarget
objectReference: {fileID: 0}
m_RemovedComponents: []
m_SourcePrefab: {fileID: 100100000, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e, type: 3}
--- !u!4 &4058446934158437408 stripped
Transform:
m_CorrespondingSourceObject: {fileID: 3839136118347789758, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e,
type: 3}
m_PrefabInstance: {fileID: 942701540323662238}
m_PrefabAsset: {fileID: 0}

7
Project/Assets/ML-Agents/Examples/Walker/Prefabs/Ragdoll/MultiDirRagDoll.prefab.meta


fileFormatVersion: 2
guid: d32d9be22fe544fd38de3cf5db023465
PrefabImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

1001
Project/Assets/ML-Agents/Examples/Walker/Scenes/MultiDirWalkerStatic.unity
文件差异内容过多而无法显示
查看文件

9
Project/Assets/ML-Agents/Examples/Walker/Scenes/MultiDirWalkerStatic.unity.meta


fileFormatVersion: 2
guid: 0c5ba64aa7c084a63b21f8e2b900fc29
timeCreated: 1520420566
licenseType: Free
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

353
Project/Assets/ML-Agents/Examples/Walker/Scripts/MultiDirWalkerAgent.cs


using System;
using UnityEngine;
using Unity.MLAgents;
using Unity.MLAgents.Actuators;
using Unity.MLAgentsExamples;
using Unity.MLAgents.Sensors;
using BodyPart = Unity.MLAgentsExamples.BodyPart;
using Random = UnityEngine.Random;
public class MultiDirWalkerAgent : Agent
{
[Header("Walk Speed")]
[Range(0.1f, 10)]
[SerializeField]
//The walking speed to try and achieve
private float m_TargetWalkingSpeed = 10;
private Vector3 m_startingPos; //the starting position of the target
public float MTargetWalkingSpeed // property
{
get { return m_TargetWalkingSpeed; }
set { m_TargetWalkingSpeed = Mathf.Clamp(value, .1f, m_maxWalkingSpeed); }
}
const float m_maxWalkingSpeed = 10; //The max walking speed
//Should the agent sample a new goal velocity each episode?
//If true, walkSpeed will be randomly set between zero and m_maxWalkingSpeed in OnEpisodeBegin()
//If false, the goal velocity will be walkingSpeed
public bool randomizeWalkSpeedEachEpisode;
//The direction an agent will walk during training.
private Vector3 m_WorldDirToWalk = Vector3.right;
[Header("Target To Walk Towards")] public Transform target; //Target the agent will walk towards during training.
[Header("Body Parts")] public Transform hips;
public Transform chest;
public Transform spine;
public Transform head;
public Transform thighL;
public Transform shinL;
public Transform footL;
public Transform thighR;
public Transform shinR;
public Transform footR;
public Transform armL;
public Transform forearmL;
public Transform handL;
public Transform armR;
public Transform forearmR;
public Transform handR;
int m_Goal;
float dir;
public int goals;
float[] m_GoalOneHot;
//This will be used as a stabilized model space reference point for observations
//Because ragdolls can move erratically during training, using a stabilized reference transform improves learning
OrientationCubeController m_OrientationCube;
GoalSensorComponent goalSensor;
//The indicator graphic gameobject that points towards the target
DirectionIndicator m_DirectionIndicator;
JointDriveController m_JdController;
EnvironmentParameters m_ResetParams;
public override void Initialize()
{
m_startingPos = target.position;
m_Goal = Random.Range(0, goals);
//m_Goal = 0;
m_GoalOneHot = new float[goals];
System.Array.Clear(m_GoalOneHot, 0, m_GoalOneHot.Length);
m_GoalOneHot[m_Goal] = 1;
if (m_Goal == 0)
{
var newTargetPos = new Vector3(1800f, 1f, m_startingPos.z);
target.position = newTargetPos;
dir = 1f;
}
else
{
var newTargetPos = new Vector3(-1800f, 1f, m_startingPos.z);
target.position = newTargetPos;
dir = -1f;
}
m_OrientationCube = GetComponentInChildren<OrientationCubeController>();
m_DirectionIndicator = GetComponentInChildren<DirectionIndicator>();
//Setup each body part
m_JdController = GetComponent<JointDriveController>();
m_JdController.SetupBodyPart(hips);
m_JdController.SetupBodyPart(chest);
m_JdController.SetupBodyPart(spine);
m_JdController.SetupBodyPart(head);
m_JdController.SetupBodyPart(thighL);
m_JdController.SetupBodyPart(shinL);
m_JdController.SetupBodyPart(footL);
m_JdController.SetupBodyPart(thighR);
m_JdController.SetupBodyPart(shinR);
m_JdController.SetupBodyPart(footR);
m_JdController.SetupBodyPart(armL);
m_JdController.SetupBodyPart(forearmL);
m_JdController.SetupBodyPart(handL);
m_JdController.SetupBodyPart(armR);
m_JdController.SetupBodyPart(forearmR);
m_JdController.SetupBodyPart(handR);
m_ResetParams = Academy.Instance.EnvironmentParameters;
SetResetParameters();
}
/// <summary>
/// Loop over body parts and reset them to initial conditions.
/// </summary>
public override void OnEpisodeBegin()
{
m_Goal = Random.Range(0, goals);
//m_Goal = 0;
System.Array.Clear(m_GoalOneHot, 0, m_GoalOneHot.Length);
m_GoalOneHot[m_Goal] = 1;
if (m_Goal == 0)
{
var newTargetPos = new Vector3(1800f, 1f, m_startingPos.z);
target.position = newTargetPos;
dir = 1f;
}
else
{
var newTargetPos = new Vector3(-1800f, 1f, m_startingPos.z);
target.position = newTargetPos;
dir = -1f;
}
//Reset all of the body parts
foreach (var bodyPart in m_JdController.bodyPartsDict.Values)
{
bodyPart.Reset(bodyPart);
}
//Random start rotation to help generalize
hips.rotation = Quaternion.Euler(0, Random.Range(0.0f, 360.0f), 0);
UpdateOrientationObjects();
//Set our goal walking speed
MTargetWalkingSpeed =
randomizeWalkSpeedEachEpisode ? Random.Range(0.1f, m_maxWalkingSpeed) : MTargetWalkingSpeed;
SetResetParameters();
}
/// <summary>
/// Add relevant information on each body part to observations.
/// </summary>
public void CollectObservationBodyPart(BodyPart bp, VectorSensor sensor)
{
//GROUND CHECK
sensor.AddObservation(bp.groundContact.touchingGround); // Is this bp touching the ground
//Get velocities in the context of our orientation cube's space
//Note: You can get these velocities in world space as well but it may not train as well.
sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(bp.rb.velocity));
sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(bp.rb.angularVelocity));
//Get position relative to hips in the context of our orientation cube's space
sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(bp.rb.position - hips.position));
if (bp.rb.transform != hips && bp.rb.transform != handL && bp.rb.transform != handR)
{
sensor.AddObservation(bp.rb.transform.localRotation);
sensor.AddObservation(bp.currentStrength / m_JdController.maxJointForceLimit);
}
}
/// <summary>
/// Loop over body parts to add them to observation.
/// </summary>
public override void CollectObservations(VectorSensor sensor)
{
var cubeForward = m_OrientationCube.transform.forward;
//velocity we want to match
var velGoal = cubeForward * MTargetWalkingSpeed;
//ragdoll's avg vel
var avgVel = GetAvgVelocity();
//current ragdoll velocity. normalized
sensor.AddObservation(Vector3.Distance(velGoal, avgVel));
//avg body vel relative to cube
sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(avgVel));
//vel goal relative to cube
sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(velGoal));
//rotation deltas
sensor.AddObservation(Quaternion.FromToRotation(hips.forward, cubeForward));
sensor.AddObservation(Quaternion.FromToRotation(head.forward, cubeForward));
//Position of target position relative to cube
sensor.AddObservation(m_OrientationCube.transform.InverseTransformPoint(target.transform.position));
foreach (var bodyPart in m_JdController.bodyPartsList)
{
CollectObservationBodyPart(bodyPart, sensor);
}
//sensor.AddObservation(m_GoalOneHot);
goalSensor = this.GetComponent<GoalSensorComponent>();
goalSensor.AddGoal(m_Goal);
}
public override void OnActionReceived(ActionBuffers actionBuffers)
{
var bpDict = m_JdController.bodyPartsDict;
var i = -1;
var continuousActions = actionBuffers.ContinuousActions;
bpDict[chest].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], continuousActions[++i]);
bpDict[spine].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], continuousActions[++i]);
bpDict[thighL].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], 0);
bpDict[thighR].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], 0);
bpDict[shinL].SetJointTargetRotation(continuousActions[++i], 0, 0);
bpDict[shinR].SetJointTargetRotation(continuousActions[++i], 0, 0);
bpDict[footR].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], continuousActions[++i]);
bpDict[footL].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], continuousActions[++i]);
bpDict[armL].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], 0);
bpDict[armR].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], 0);
bpDict[forearmL].SetJointTargetRotation(continuousActions[++i], 0, 0);
bpDict[forearmR].SetJointTargetRotation(continuousActions[++i], 0, 0);
bpDict[head].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], 0);
//update joint strength settings
bpDict[chest].SetJointStrength(continuousActions[++i]);
bpDict[spine].SetJointStrength(continuousActions[++i]);
bpDict[head].SetJointStrength(continuousActions[++i]);
bpDict[thighL].SetJointStrength(continuousActions[++i]);
bpDict[shinL].SetJointStrength(continuousActions[++i]);
bpDict[footL].SetJointStrength(continuousActions[++i]);
bpDict[thighR].SetJointStrength(continuousActions[++i]);
bpDict[shinR].SetJointStrength(continuousActions[++i]);
bpDict[footR].SetJointStrength(continuousActions[++i]);
bpDict[armL].SetJointStrength(continuousActions[++i]);
bpDict[forearmL].SetJointStrength(continuousActions[++i]);
bpDict[armR].SetJointStrength(continuousActions[++i]);
bpDict[forearmR].SetJointStrength(continuousActions[++i]);
}
//Update OrientationCube and DirectionIndicator
void UpdateOrientationObjects()
{
m_WorldDirToWalk = target.position - hips.position;
m_OrientationCube.UpdateOrientation(hips, target);
if (m_DirectionIndicator)
{
m_DirectionIndicator.MatchOrientation(m_OrientationCube.transform);
}
}
void FixedUpdate()
{
UpdateOrientationObjects();
var cubeForward = m_OrientationCube.transform.forward;
// Set reward for this step according to mixture of the following elements.
// a. Match target speed
//This reward will approach 1 if it matches perfectly and approach zero as it deviates
var matchSpeedReward = GetMatchingVelocityReward(cubeForward * MTargetWalkingSpeed, GetAvgVelocity());
//Check for NaNs
if (float.IsNaN(matchSpeedReward))
{
throw new ArgumentException(
"NaN in moveTowardsTargetReward.\n" +
$" cubeForward: {cubeForward}\n" +
$" hips.velocity: {m_JdController.bodyPartsDict[hips].rb.velocity}\n" +
$" maximumWalkingSpeed: {m_maxWalkingSpeed}"
);
}
// b. Rotation alignment with target direction.
//This reward will approach 1 if it faces the target direction perfectly and approach zero as it deviates
var lookAtTargetReward = (Vector3.Dot(cubeForward, dir * head.forward) + 1) * .5F;
//Check for NaNs
if (float.IsNaN(lookAtTargetReward))
{
throw new ArgumentException(
"NaN in lookAtTargetReward.\n" +
$" cubeForward: {cubeForward}\n" +
$" head.forward: {head.forward}"
);
}
Debug.Log(lookAtTargetReward);
Debug.Log(matchSpeedReward);
AddReward(matchSpeedReward * lookAtTargetReward);
}
//Returns the average velocity of all of the body parts
//Using the velocity of the hips only has shown to result in more erratic movement from the limbs, so...
//...using the average helps prevent this erratic movement
Vector3 GetAvgVelocity()
{
Vector3 velSum = Vector3.zero;
Vector3 avgVel = Vector3.zero;
//ALL RBS
int numOfRB = 0;
foreach (var item in m_JdController.bodyPartsList)
{
numOfRB++;
velSum += item.rb.velocity;
}
avgVel = velSum / numOfRB;
return avgVel;
}
//normalized value of the difference in avg speed vs goal walking speed.
public float GetMatchingVelocityReward(Vector3 velocityGoal, Vector3 actualVelocity)
{
//distance between our actual velocity and goal velocity
var velDeltaMagnitude = Mathf.Clamp(Vector3.Distance(actualVelocity, velocityGoal), 0, MTargetWalkingSpeed);
//return the value on a declining sigmoid shaped curve that decays from 1 to 0
//This reward will approach 1 if it matches perfectly and approach zero as it deviates
return Mathf.Pow(1 - Mathf.Pow(velDeltaMagnitude / MTargetWalkingSpeed, 2), 2);
}
/// <summary>
/// Agent touched the target
/// </summary>
public void TouchedTarget()
{
AddReward(1f);
}
public void SetTorsoMass()
{
m_JdController.bodyPartsDict[chest].rb.mass = m_ResetParams.GetWithDefault("chest_mass", 8);
m_JdController.bodyPartsDict[spine].rb.mass = m_ResetParams.GetWithDefault("spine_mass", 8);
m_JdController.bodyPartsDict[hips].rb.mass = m_ResetParams.GetWithDefault("hip_mass", 8);
}
public void SetResetParameters()
{
SetTorsoMass();
}
}

11
Project/Assets/ML-Agents/Examples/Walker/Scripts/MultiDirWalkerAgent.cs.meta


fileFormatVersion: 2
guid: c52bddbfaf39944a6bb673a9dfcfe4b6
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

1001
Project/Assets/ML-Agents/Examples/Walker/TFModels/MultiDirWalker-5997779.onnx
文件差异内容过多而无法显示
查看文件

11
Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/GoalSensorComponent.cs.meta


fileFormatVersion: 2
guid: 163dac4bcbb2f4d8499db2cdcb22a89e
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

56
Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/GoalSensorComponent.cs


using Unity.MLAgents.Sensors;
public class GoalSensorComponent : SensorComponent
{
public int observationSize;
public GoalSensor goalSensor;
/// <summary>
/// Creates a GoalSensor.
/// </summary>
/// <returns></returns>
public override ISensor CreateSensor()
{
goalSensor = new GoalSensor(observationSize);
return goalSensor;
}
/// <inheritdoc/>
public override int[] GetObservationShape()
{
return new[] { observationSize };
}
public void AddGoal(float goal)
{
if (goalSensor != null)
{
goalSensor.AddObservation(goal);
}
}
public void AddOneHotGoal(int goal, int range)
{
if (goalSensor != null)
{
goalSensor.AddOneHotObservation(goal, range);
}
}
}
public class GoalSensor : VectorSensor
{
public GoalSensor(int observationSize, string name = null) : base(observationSize)
{
if (name == null)
{
name = $"GoalSensor_size{observationSize}";
}
}
public override SensorType GetSensorType()
{
return SensorType.Goal;
}
}
正在加载...
取消
保存