浏览代码

Merge pull request #4763 from Unity-Technologies/develop-att

WIP Made initial changes to enable dimension properties and added attention module
/MLA-1734-demo-provider
GitHub 4 年前
当前提交
458fee17
共有 56 个文件被更改,包括 956 次插入217 次删除
  1. 11
      com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
  2. 33
      com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/Observation.cs
  3. 9
      docs/Python-API.md
  4. 16
      gym-unity/gym_unity/envs/__init__.py
  5. 4
      gym-unity/gym_unity/tests/test_gym.py
  6. 55
      ml-agents-envs/mlagents_envs/base_env.py
  7. 19
      ml-agents-envs/mlagents_envs/communicator_objects/observation_pb2.py
  8. 6
      ml-agents-envs/mlagents_envs/communicator_objects/observation_pb2.pyi
  9. 24
      ml-agents-envs/mlagents_envs/rpc_utils.py
  10. 20
      ml-agents-envs/mlagents_envs/tests/test_envs.py
  11. 2
      ml-agents-envs/mlagents_envs/tests/test_registry.py
  12. 31
      ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py
  13. 7
      ml-agents-envs/mlagents_envs/tests/test_steps.py
  14. 11
      ml-agents/mlagents/trainers/demo_loader.py
  15. 2
      ml-agents/mlagents/trainers/optimizer/torch_optimizer.py
  16. 6
      ml-agents/mlagents/trainers/policy/policy.py
  17. 2
      ml-agents/mlagents/trainers/policy/torch_policy.py
  18. 2
      ml-agents/mlagents/trainers/ppo/optimizer_torch.py
  19. 18
      ml-agents/mlagents/trainers/sac/optimizer_torch.py
  20. 3
      ml-agents/mlagents/trainers/tests/check_env_trains.py
  21. 11
      ml-agents/mlagents/trainers/tests/dummy_config.py
  22. 32
      ml-agents/mlagents/trainers/tests/mock_brain.py
  23. 15
      ml-agents/mlagents/trainers/tests/simple_test_envs.py
  24. 16
      ml-agents/mlagents/trainers/tests/test_agent_processor.py
  25. 4
      ml-agents/mlagents/trainers/tests/test_demo_loader.py
  26. 6
      ml-agents/mlagents/trainers/tests/test_rl_trainer.py
  27. 4
      ml-agents/mlagents/trainers/tests/test_trajectory.py
  28. 3
      ml-agents/mlagents/trainers/tests/torch/test_ghost.py
  29. 12
      ml-agents/mlagents/trainers/tests/torch/test_hybrid.py
  30. 23
      ml-agents/mlagents/trainers/tests/torch/test_networks.py
  31. 4
      ml-agents/mlagents/trainers/tests/torch/test_policy.py
  32. 2
      ml-agents/mlagents/trainers/tests/torch/test_ppo.py
  33. 40
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py
  34. 13
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_extrinsic.py
  35. 27
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py
  36. 28
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_rnd.py
  37. 8
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/utils.py
  38. 2
      ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py
  39. 4
      ml-agents/mlagents/trainers/tests/torch/test_utils.py
  40. 2
      ml-agents/mlagents/trainers/torch/components/bc/module.py
  41. 4
      ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py
  42. 4
      ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py
  43. 2
      ml-agents/mlagents/trainers/torch/components/reward_providers/rnd_reward_provider.py
  44. 10
      ml-agents/mlagents/trainers/torch/model_serialization.py
  45. 32
      ml-agents/mlagents/trainers/torch/networks.py
  46. 10
      ml-agents/mlagents/trainers/torch/utils.py
  47. 4
      ml-agents/tests/yamato/scripts/run_llapi.py
  48. 1
      protobuf-definitions/proto/mlagents_envs/communicator_objects/observation.proto
  49. 95
      com.unity.ml-agents/Runtime/Sensors/BufferSensor.cs
  50. 11
      com.unity.ml-agents/Runtime/Sensors/BufferSensor.cs.meta
  51. 41
      com.unity.ml-agents/Runtime/Sensors/BufferSensorComponent.cs
  52. 11
      com.unity.ml-agents/Runtime/Sensors/BufferSensorComponent.cs.meta
  53. 47
      com.unity.ml-agents/Runtime/Sensors/IDimensionPropertiesSensor.cs
  54. 11
      com.unity.ml-agents/Runtime/Sensors/IDimensionPropertiesSensor.cs.meta
  55. 162
      ml-agents/mlagents/trainers/tests/torch/test_attention.py
  56. 191
      ml-agents/mlagents/trainers/torch/attention.py

11
com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs


observationProto.CompressedChannelMapping.AddRange(compressibleSensor.GetCompressedChannelMapping());
}
}
// Add the dimension properties if any to the observationProto
var dimensionPropertySensor = sensor as IDimensionPropertiesSensor;
if (dimensionPropertySensor != null)
{
var dimensionProperties = dimensionPropertySensor.GetDimensionProperties();
int[] intDimensionProperties = new int[dimensionProperties.Length];
for (int i = 0; i < dimensionProperties.Length; i++)
{
observationProto.DimensionProperties.Add((int)dimensionProperties[i]);
}
}
observationProto.Shape.AddRange(shape);
return observationProto;
}

33
com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/Observation.cs


byte[] descriptorData = global::System.Convert.FromBase64String(
string.Concat(
"CjRtbGFnZW50c19lbnZzL2NvbW11bmljYXRvcl9vYmplY3RzL29ic2VydmF0",
"aW9uLnByb3RvEhRjb21tdW5pY2F0b3Jfb2JqZWN0cyKdAgoQT2JzZXJ2YXRp",
"aW9uLnByb3RvEhRjb21tdW5pY2F0b3Jfb2JqZWN0cyK7AgoQT2JzZXJ2YXRp",
"KAUaGQoJRmxvYXREYXRhEgwKBGRhdGEYASADKAJCEgoQb2JzZXJ2YXRpb25f",
"ZGF0YSopChRDb21wcmVzc2lvblR5cGVQcm90bxIICgROT05FEAASBwoDUE5H",
"EAFCJaoCIlVuaXR5Lk1MQWdlbnRzLkNvbW11bmljYXRvck9iamVjdHNiBnBy",
"b3RvMw=="));
"KAUSHAoUZGltZW5zaW9uX3Byb3BlcnRpZXMYBiADKAUaGQoJRmxvYXREYXRh",
"EgwKBGRhdGEYASADKAJCEgoQb2JzZXJ2YXRpb25fZGF0YSopChRDb21wcmVz",
"c2lvblR5cGVQcm90bxIICgROT05FEAASBwoDUE5HEAFCJaoCIlVuaXR5Lk1M",
"QWdlbnRzLkNvbW11bmljYXRvck9iamVjdHNiBnByb3RvMw=="));
new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.ObservationProto), global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Parser, new[]{ "Shape", "CompressionType", "CompressedData", "FloatData", "CompressedChannelMapping" }, new[]{ "ObservationData" }, null, new pbr::GeneratedClrTypeInfo[] { new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Types.FloatData), global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Types.FloatData.Parser, new[]{ "Data" }, null, null, null)})
new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.ObservationProto), global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Parser, new[]{ "Shape", "CompressionType", "CompressedData", "FloatData", "CompressedChannelMapping", "DimensionProperties" }, new[]{ "ObservationData" }, null, new pbr::GeneratedClrTypeInfo[] { new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Types.FloatData), global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Types.FloatData.Parser, new[]{ "Data" }, null, null, null)})
}));
}
#endregion

shape_ = other.shape_.Clone();
compressionType_ = other.compressionType_;
compressedChannelMapping_ = other.compressedChannelMapping_.Clone();
dimensionProperties_ = other.dimensionProperties_.Clone();
switch (other.ObservationDataCase) {
case ObservationDataOneofCase.CompressedData:
CompressedData = other.CompressedData;

get { return compressedChannelMapping_; }
}
/// <summary>Field number for the "dimension_properties" field.</summary>
public const int DimensionPropertiesFieldNumber = 6;
private static readonly pb::FieldCodec<int> _repeated_dimensionProperties_codec
= pb::FieldCodec.ForInt32(50);
private readonly pbc::RepeatedField<int> dimensionProperties_ = new pbc::RepeatedField<int>();
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public pbc::RepeatedField<int> DimensionProperties {
get { return dimensionProperties_; }
}
private object observationData_;
/// <summary>Enum of possible cases for the "observation_data" oneof.</summary>
public enum ObservationDataOneofCase {

if (CompressedData != other.CompressedData) return false;
if (!object.Equals(FloatData, other.FloatData)) return false;
if(!compressedChannelMapping_.Equals(other.compressedChannelMapping_)) return false;
if(!dimensionProperties_.Equals(other.dimensionProperties_)) return false;
if (ObservationDataCase != other.ObservationDataCase) return false;
return Equals(_unknownFields, other._unknownFields);
}

if (observationDataCase_ == ObservationDataOneofCase.CompressedData) hash ^= CompressedData.GetHashCode();
if (observationDataCase_ == ObservationDataOneofCase.FloatData) hash ^= FloatData.GetHashCode();
hash ^= compressedChannelMapping_.GetHashCode();
hash ^= dimensionProperties_.GetHashCode();
hash ^= (int) observationDataCase_;
if (_unknownFields != null) {
hash ^= _unknownFields.GetHashCode();

output.WriteMessage(FloatData);
}
compressedChannelMapping_.WriteTo(output, _repeated_compressedChannelMapping_codec);
dimensionProperties_.WriteTo(output, _repeated_dimensionProperties_codec);
if (_unknownFields != null) {
_unknownFields.WriteTo(output);
}

size += 1 + pb::CodedOutputStream.ComputeMessageSize(FloatData);
}
size += compressedChannelMapping_.CalculateSize(_repeated_compressedChannelMapping_codec);
size += dimensionProperties_.CalculateSize(_repeated_dimensionProperties_codec);
if (_unknownFields != null) {
size += _unknownFields.CalculateSize();
}

CompressionType = other.CompressionType;
}
compressedChannelMapping_.Add(other.compressedChannelMapping_);
dimensionProperties_.Add(other.dimensionProperties_);
switch (other.ObservationDataCase) {
case ObservationDataOneofCase.CompressedData:
CompressedData = other.CompressedData;

case 42:
case 40: {
compressedChannelMapping_.AddEntriesFrom(input, _repeated_compressedChannelMapping_codec);
break;
}
case 50:
case 48: {
dimensionProperties_.AddEntriesFrom(input, _repeated_dimensionProperties_codec);
break;
}
}

9
docs/Python-API.md


A `BehaviorSpec` has the following fields :
- `observation_shapes` is a List of Tuples of int : Each Tuple corresponds to an
observation's dimensions (without the number of agents dimension). The shape
tuples have the same ordering as the ordering of the DecisionSteps,
- `sensor_specs` is a List of `SensorSpec` objects : Each `SensorSpec`
corresponds to an observation's properties: `shape` is a tuple of ints that
corresponds to the shape of the observation (without the number of agents dimension).
`dimension_property` is a tuple of flags containing extra information about how the
data should be processed in the corresponding dimension. Note that the `SensorSpec`
have the same ordering as the ordering of observations in the DecisionSteps,
DecisionStep, TerminalSteps and TerminalStep.
- `action_spec` is an `ActionSpec` namedtuple that defines the number and types
of actions for the Agent.

16
gym-unity/gym_unity/envs/__init__.py


def _get_n_vis_obs(self) -> int:
result = 0
for shape in self.group_spec.observation_shapes:
if len(shape) == 3:
for sen_spec in self.group_spec.sensor_specs:
if len(sen_spec.shape) == 3:
for shape in self.group_spec.observation_shapes:
if len(shape) == 3:
result.append(shape)
for sen_spec in self.group_spec.sensor_specs:
if len(sen_spec.shape) == 3:
result.append(sen_spec.shape)
return result
def _get_vis_obs_list(

def _get_vec_obs_size(self) -> int:
result = 0
for shape in self.group_spec.observation_shapes:
if len(shape) == 1:
result += shape[0]
for sen_spec in self.group_spec.sensor_specs:
if len(sen_spec.shape) == 1:
result += sen_spec.shape[0]
return result
def render(self, mode="rgb_array"):

4
gym-unity/gym_unity/tests/test_gym.py


TerminalSteps,
BehaviorMapping,
)
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
def test_gym_wrapper():

obs_shapes = [(vector_observation_space_size,)]
for _ in range(number_visual_observations):
obs_shapes += [(8, 8, 3)]
return BehaviorSpec(obs_shapes, action_spec)
sen_spec = create_sensor_specs_with_shapes(obs_shapes)
return BehaviorSpec(sen_spec, action_spec)
def create_mock_vector_steps(specs, num_agents=1, number_visual_observations=0):

55
ml-agents-envs/mlagents_envs/base_env.py


Any,
Mapping as MappingType,
)
from enum import IntFlag
import numpy as np
from mlagents_envs.exception import UnityActionException

:param spec: The BehaviorSpec for the DecisionSteps
"""
obs: List[np.ndarray] = []
for shape in spec.observation_shapes:
obs += [np.zeros((0,) + shape, dtype=np.float32)]
for sen_spec in spec.sensor_specs:
obs += [np.zeros((0,) + sen_spec.shape, dtype=np.float32)]
return DecisionSteps(
obs=obs,
reward=np.zeros(0, dtype=np.float32),

:param spec: The BehaviorSpec for the TerminalSteps
"""
obs: List[np.ndarray] = []
for shape in spec.observation_shapes:
obs += [np.zeros((0,) + shape, dtype=np.float32)]
for sen_spec in spec.sensor_specs:
obs += [np.zeros((0,) + sen_spec.shape, dtype=np.float32)]
return TerminalSteps(
obs=obs,
reward=np.zeros(0, dtype=np.float32),

return ActionSpec(0, discrete_branches)
class DimensionProperty(IntFlag):
"""
No properties specified.
"""
UNSPECIFIED = 0
"""
No Property of the observation in that dimension. Observation can be processed with
Fully connected networks.
"""
NONE = 1
"""
Means it is suitable to do a convolution in this dimension.
"""
TRANSLATIONAL_EQUIVARIANCE = 2
"""
Means that there can be a variable number of observations in this dimension.
The observations are unordered.
"""
VARIABLE_SIZE = 4
class SensorSpec(NamedTuple):
"""
A NamedTuple containing information about the observation of Agents.
- shape is a Tuple of int : It corresponds to the shape of
an observation's dimensions.
- dimension_property is a Tuple of DimensionProperties flag, one flag for each
dimension.
"""
shape: Tuple[int, ...]
dimension_property: Tuple[DimensionProperty, ...]
- observation_shapes is a List of Tuples of int : Each Tuple corresponds
to an observation's dimensions. The shape tuples have the same ordering as
the ordering of the DecisionSteps and TerminalSteps.
- action_spec is an ActionSpec NamedTuple
- sensor_specs is a List of SensorSpec NamedTuple containing
information about the information of the Agent's observations such as their shapes.
The order of the SensorSpec is the same as the order of the observations of an
agent.
- action_spec is an ActionSpec NamedTuple.
observation_shapes: List[Tuple]
sensor_specs: List[SensorSpec]
action_spec: ActionSpec

19
ml-agents-envs/mlagents_envs/communicator_objects/observation_pb2.py


name='mlagents_envs/communicator_objects/observation.proto',
package='communicator_objects',
syntax='proto3',
serialized_pb=_b('\n4mlagents_envs/communicator_objects/observation.proto\x12\x14\x63ommunicator_objects\"\x9d\x02\n\x10ObservationProto\x12\r\n\x05shape\x18\x01 \x03(\x05\x12\x44\n\x10\x63ompression_type\x18\x02 \x01(\x0e\x32*.communicator_objects.CompressionTypeProto\x12\x19\n\x0f\x63ompressed_data\x18\x03 \x01(\x0cH\x00\x12\x46\n\nfloat_data\x18\x04 \x01(\x0b\x32\x30.communicator_objects.ObservationProto.FloatDataH\x00\x12\"\n\x1a\x63ompressed_channel_mapping\x18\x05 \x03(\x05\x1a\x19\n\tFloatData\x12\x0c\n\x04\x64\x61ta\x18\x01 \x03(\x02\x42\x12\n\x10observation_data*)\n\x14\x43ompressionTypeProto\x12\x08\n\x04NONE\x10\x00\x12\x07\n\x03PNG\x10\x01\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
serialized_pb=_b('\n4mlagents_envs/communicator_objects/observation.proto\x12\x14\x63ommunicator_objects\"\xbb\x02\n\x10ObservationProto\x12\r\n\x05shape\x18\x01 \x03(\x05\x12\x44\n\x10\x63ompression_type\x18\x02 \x01(\x0e\x32*.communicator_objects.CompressionTypeProto\x12\x19\n\x0f\x63ompressed_data\x18\x03 \x01(\x0cH\x00\x12\x46\n\nfloat_data\x18\x04 \x01(\x0b\x32\x30.communicator_objects.ObservationProto.FloatDataH\x00\x12\"\n\x1a\x63ompressed_channel_mapping\x18\x05 \x03(\x05\x12\x1c\n\x14\x64imension_properties\x18\x06 \x03(\x05\x1a\x19\n\tFloatData\x12\x0c\n\x04\x64\x61ta\x18\x01 \x03(\x02\x42\x12\n\x10observation_data*)\n\x14\x43ompressionTypeProto\x12\x08\n\x04NONE\x10\x00\x12\x07\n\x03PNG\x10\x01\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
)
_COMPRESSIONTYPEPROTO = _descriptor.EnumDescriptor(

],
containing_type=None,
options=None,
serialized_start=366,
serialized_end=407,
serialized_start=396,
serialized_end=437,
)
_sym_db.RegisterEnumDescriptor(_COMPRESSIONTYPEPROTO)

extension_ranges=[],
oneofs=[
],
serialized_start=319,
serialized_end=344,
serialized_start=349,
serialized_end=374,
)
_OBSERVATIONPROTO = _descriptor.Descriptor(

message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='dimension_properties', full_name='communicator_objects.ObservationProto.dimension_properties', index=5,
number=6, type=5, cpp_type=1, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
],
extensions=[
],

index=0, containing_type=None, fields=[]),
],
serialized_start=79,
serialized_end=364,
serialized_end=394,
)
_OBSERVATIONPROTO_FLOATDATA.containing_type = _OBSERVATIONPROTO

6
ml-agents-envs/mlagents_envs/communicator_objects/observation_pb2.pyi


compression_type = ... # type: CompressionTypeProto
compressed_data = ... # type: builtin___bytes
compressed_channel_mapping = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[builtin___int]
dimension_properties = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[builtin___int]
@property
def float_data(self) -> ObservationProto.FloatData: ...

compressed_data : typing___Optional[builtin___bytes] = None,
float_data : typing___Optional[ObservationProto.FloatData] = None,
compressed_channel_mapping : typing___Optional[typing___Iterable[builtin___int]] = None,
dimension_properties : typing___Optional[typing___Iterable[builtin___int]] = None,
) -> None: ...
@classmethod
def FromString(cls, s: builtin___bytes) -> ObservationProto: ...

def HasField(self, field_name: typing_extensions___Literal[u"compressed_data",u"float_data",u"observation_data"]) -> builtin___bool: ...
def ClearField(self, field_name: typing_extensions___Literal[u"compressed_channel_mapping",u"compressed_data",u"compression_type",u"float_data",u"observation_data",u"shape"]) -> None: ...
def ClearField(self, field_name: typing_extensions___Literal[u"compressed_channel_mapping",u"compressed_data",u"compression_type",u"dimension_properties",u"float_data",u"observation_data",u"shape"]) -> None: ...
def ClearField(self, field_name: typing_extensions___Literal[u"compressed_channel_mapping",b"compressed_channel_mapping",u"compressed_data",b"compressed_data",u"compression_type",b"compression_type",u"float_data",b"float_data",u"observation_data",b"observation_data",u"shape",b"shape"]) -> None: ...
def ClearField(self, field_name: typing_extensions___Literal[u"compressed_channel_mapping",b"compressed_channel_mapping",u"compressed_data",b"compressed_data",u"compression_type",b"compression_type",u"dimension_properties",b"dimension_properties",u"float_data",b"float_data",u"observation_data",b"observation_data",u"shape",b"shape"]) -> None: ...
def WhichOneof(self, oneof_group: typing_extensions___Literal[u"observation_data",b"observation_data"]) -> typing_extensions___Literal["compressed_data","float_data"]: ...

24
ml-agents-envs/mlagents_envs/rpc_utils.py


from mlagents_envs.base_env import (
ActionSpec,
SensorSpec,
DimensionProperty,
BehaviorSpec,
DecisionSteps,
TerminalSteps,

:return: BehaviorSpec object.
"""
observation_shape = [tuple(obs.shape) for obs in agent_info.observations]
# proto from comminicator < v1.3 does not set action spec, use deprecated fields instead
dim_props = [
tuple(DimensionProperty(dim) for dim in obs.dimension_properties)
for obs in agent_info.observations
]
sensor_specs = [
SensorSpec(obs_shape, dim_p)
for obs_shape, dim_p in zip(observation_shape, dim_props)
]
# proto from communicator < v1.3 does not set action spec, use deprecated fields instead
if (
brain_param_proto.action_spec.num_continuous_actions == 0
and brain_param_proto.action_spec.num_discrete_actions == 0

action_spec_proto.num_continuous_actions,
tuple(branch for branch in action_spec_proto.discrete_branch_sizes),
)
return BehaviorSpec(observation_shape, action_spec)
return BehaviorSpec(sensor_specs, action_spec)
class OffsetBytesIO:

]
decision_obs_list: List[np.ndarray] = []
terminal_obs_list: List[np.ndarray] = []
for obs_index, obs_shape in enumerate(behavior_spec.observation_shapes):
is_visual = len(obs_shape) == 3
for obs_index, sensor_specs in enumerate(behavior_spec.sensor_specs):
is_visual = len(sensor_specs.shape) == 3
obs_shape = cast(Tuple[int, int, int], obs_shape)
obs_shape = cast(Tuple[int, int, int], sensor_specs.shape)
decision_obs_list.append(
_process_visual_observation(
obs_index, obs_shape, decision_agent_info_list

else:
decision_obs_list.append(
_process_vector_observation(
obs_index, obs_shape, decision_agent_info_list
obs_index, sensor_specs.shape, decision_agent_info_list
obs_index, obs_shape, terminal_agent_info_list
obs_index, sensor_specs.shape, terminal_agent_info_list
)
)
decision_rewards = np.array(

20
ml-agents-envs/mlagents_envs/tests/test_envs.py


env.close()
assert isinstance(decision_steps, DecisionSteps)
assert isinstance(terminal_steps, TerminalSteps)
assert len(spec.observation_shapes) == len(decision_steps.obs)
assert len(spec.observation_shapes) == len(terminal_steps.obs)
assert len(spec.sensor_specs) == len(decision_steps.obs)
assert len(spec.sensor_specs) == len(terminal_steps.obs)
for shape, obs in zip(spec.observation_shapes, decision_steps.obs):
assert (n_agents,) + shape == obs.shape
for sen_spec, obs in zip(spec.sensor_specs, decision_steps.obs):
assert (n_agents,) + sen_spec.shape == obs.shape
for shape, obs in zip(spec.observation_shapes, terminal_steps.obs):
assert (n_agents,) + shape == obs.shape
for sen_spec, obs in zip(spec.sensor_specs, terminal_steps.obs):
assert (n_agents,) + sen_spec.shape == obs.shape
@mock.patch("mlagents_envs.env_utils.launch_executable")

env.close()
assert isinstance(decision_steps, DecisionSteps)
assert isinstance(terminal_steps, TerminalSteps)
assert len(spec.observation_shapes) == len(decision_steps.obs)
assert len(spec.observation_shapes) == len(terminal_steps.obs)
for shape, obs in zip(spec.observation_shapes, decision_steps.obs):
assert (n_agents,) + shape == obs.shape
assert len(spec.sensor_specs) == len(decision_steps.obs)
assert len(spec.sensor_specs) == len(terminal_steps.obs)
for spec, obs in zip(spec.sensor_specs, decision_steps.obs):
assert (n_agents,) + spec.shape == obs.shape
assert 0 in decision_steps
assert 2 in terminal_steps

2
ml-agents-envs/mlagents_envs/tests/test_registry.py


for worker_id in range(2):
assert BASIC_ID in registry
env = registry[BASIC_ID].make(
base_port=6005, worker_id=worker_id, no_graphics=True
base_port=6002, worker_id=worker_id, no_graphics=True
)
env.reset()
env.step()

31
ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py


steps_from_proto,
)
from PIL import Image
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
def generate_list_agent_proto(

def test_batched_step_result_from_proto():
n_agents = 10
shapes = [(3,), (4,)]
spec = BehaviorSpec(shapes, ActionSpec.create_continuous(3))
spec = BehaviorSpec(
create_sensor_specs_with_shapes(shapes), ActionSpec.create_continuous(3)
)
ap_list = generate_list_agent_proto(n_agents, shapes)
decision_steps, terminal_steps = steps_from_proto(ap_list, spec)
for agent_id in range(n_agents):

def test_action_masking_discrete():
n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(shapes, ActionSpec.create_discrete((7, 3)))
behavior_spec = BehaviorSpec(
create_sensor_specs_with_shapes(shapes), ActionSpec.create_discrete((7, 3))
)
ap_list = generate_list_agent_proto(n_agents, shapes)
decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
masks = decision_steps.action_mask

def test_action_masking_discrete_1():
n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(shapes, ActionSpec.create_discrete((10,)))
behavior_spec = BehaviorSpec(
create_sensor_specs_with_shapes(shapes), ActionSpec.create_discrete((10,))
)
ap_list = generate_list_agent_proto(n_agents, shapes)
decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
masks = decision_steps.action_mask

def test_action_masking_discrete_2():
n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(shapes, ActionSpec.create_discrete((2, 2, 6)))
behavior_spec = BehaviorSpec(
create_sensor_specs_with_shapes(shapes), ActionSpec.create_discrete((2, 2, 6))
)
ap_list = generate_list_agent_proto(n_agents, shapes)
decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
masks = decision_steps.action_mask

def test_action_masking_continuous():
n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(shapes, ActionSpec.create_continuous(10))
behavior_spec = BehaviorSpec(
create_sensor_specs_with_shapes(shapes), ActionSpec.create_continuous(10)
)
ap_list = generate_list_agent_proto(n_agents, shapes)
decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
masks = decision_steps.action_mask

behavior_spec = behavior_spec_from_proto(bp, agent_proto)
assert behavior_spec.action_spec.is_discrete()
assert not behavior_spec.action_spec.is_continuous()
assert behavior_spec.observation_shapes == [(3,), (4,)]
assert [spec.shape for spec in behavior_spec.sensor_specs] == [(3,), (4,)]
assert behavior_spec.action_spec.discrete_branches == (5, 4)
assert behavior_spec.action_spec.discrete_size == 2
bp = BrainParametersProto()

def test_batched_step_result_from_proto_raises_on_infinite():
n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(shapes, ActionSpec.create_continuous(3))
behavior_spec = BehaviorSpec(
create_sensor_specs_with_shapes(shapes), ActionSpec.create_continuous(3)
)
ap_list = generate_list_agent_proto(n_agents, shapes, infinite_rewards=True)
with pytest.raises(RuntimeError):
steps_from_proto(ap_list, behavior_spec)

n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(shapes, ActionSpec.create_continuous(3))
behavior_spec = BehaviorSpec(
create_sensor_specs_with_shapes(shapes), ActionSpec.create_continuous(3)
)
ap_list = generate_list_agent_proto(n_agents, shapes, nan_observations=True)
with pytest.raises(RuntimeError):
steps_from_proto(ap_list, behavior_spec)

7
ml-agents-envs/mlagents_envs/tests/test_steps.py


ActionSpec,
BehaviorSpec,
)
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
def test_decision_steps():

def test_empty_decision_steps():
specs = BehaviorSpec(
observation_shapes=[(3, 2), (5,)], action_spec=ActionSpec.create_continuous(3)
sensor_specs=create_sensor_specs_with_shapes([(3, 2), (5,)]),
action_spec=ActionSpec.create_continuous(3),
)
ds = DecisionSteps.empty(specs)
assert len(ds.obs) == 2

def test_empty_terminal_steps():
specs = BehaviorSpec(
observation_shapes=[(3, 2), (5,)], action_spec=ActionSpec.create_continuous(3)
sensor_specs=create_sensor_specs_with_shapes([(3, 2), (5,)]),
action_spec=ActionSpec.create_continuous(3),
)
ts = TerminalSteps.empty(specs)
assert len(ts.obs) == 2

11
ml-agents/mlagents/trainers/demo_loader.py


)
)
# check observations match
if len(behavior_spec.observation_shapes) != len(
expected_behavior_spec.observation_shapes
):
if len(behavior_spec.sensor_specs) != len(expected_behavior_spec.sensor_specs):
zip(
behavior_spec.observation_shapes,
expected_behavior_spec.observation_shapes,
)
zip(behavior_spec.sensor_specs, expected_behavior_spec.sensor_specs)
if demo_obs != policy_obs:
if demo_obs.shape != policy_obs.shape:
raise RuntimeError(
f"The shape {demo_obs} for observation {i} in demonstration \
do not match the policy's {policy_obs}."

2
ml-agents/mlagents/trainers/optimizer/torch_optimizer.py


def get_trajectory_value_estimates(
self, batch: AgentBuffer, next_obs: List[np.ndarray], done: bool
) -> Tuple[Dict[str, np.ndarray], Dict[str, float]]:
n_obs = len(self.policy.behavior_spec.observation_shapes)
n_obs = len(self.policy.behavior_spec.sensor_specs)
current_obs = ObsUtil.from_buffer(batch, n_obs)
# Convert to tensors

6
ml-agents/mlagents/trainers/policy/policy.py


else [self.behavior_spec.action_spec.continuous_size]
)
self.vec_obs_size = sum(
shape[0] for shape in behavior_spec.observation_shapes if len(shape) == 1
sen_spec.shape[0]
for sen_spec in behavior_spec.sensor_specs
if len(sen_spec.shape) == 1
1 for shape in behavior_spec.observation_shapes if len(shape) == 3
1 for sen_spec in behavior_spec.sensor_specs if len(sen_spec.shape) == 3
)
self.use_continuous_act = self.behavior_spec.action_spec.is_continuous()
self.previous_action_dict: Dict[str, np.ndarray] = {}

2
ml-agents/mlagents/trainers/policy/torch_policy.py


else:
ac_class = SharedActorCritic
self.actor_critic = ac_class(
observation_shapes=self.behavior_spec.observation_shapes,
sensor_specs=self.behavior_spec.sensor_specs,
network_settings=trainer_settings.network_settings,
action_spec=behavior_spec.action_spec,
stream_names=reward_signal_names,

2
ml-agents/mlagents/trainers/ppo/optimizer_torch.py


)
returns[name] = ModelUtils.list_to_tensor(batch[f"{name}_returns"])
n_obs = len(self.policy.behavior_spec.observation_shapes)
n_obs = len(self.policy.behavior_spec.sensor_specs)
current_obs = ObsUtil.from_buffer(batch, n_obs)
# Convert to tensors
current_obs = [ModelUtils.list_to_tensor(obs) for obs in current_obs]

18
ml-agents/mlagents/trainers/sac/optimizer_torch.py


from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.buffer import AgentBuffer
from mlagents_envs.timers import timed
from mlagents_envs.base_env import ActionSpec
from mlagents_envs.base_env import ActionSpec, SensorSpec
from mlagents.trainers.exception import UnityTrainerException
from mlagents.trainers.settings import TrainerSettings, SACSettings
from contextlib import ExitStack

def __init__(
self,
stream_names: List[str],
observation_shapes: List[Tuple[int, ...]],
sensor_specs: List[SensorSpec],
network_settings: NetworkSettings,
action_spec: ActionSpec,
):

self.q1_network = ValueNetwork(
stream_names,
observation_shapes,
sensor_specs,
network_settings,
num_action_ins,
num_value_outs,

observation_shapes,
sensor_specs,
network_settings,
num_action_ins,
num_value_outs,

# ExitStack allows us to enter the torch.no_grad() context conditionally
with ExitStack() as stack:
if not q1_grad:
stack.enter_context(torch.no_grad())
stack.enter_context(torch.no_grad()) # pylint: disable=E1101
q1_out, _ = self.q1_network(
inputs,
actions=actions,

with ExitStack() as stack:
if not q2_grad:
stack.enter_context(torch.no_grad())
stack.enter_context(torch.no_grad()) # pylint: disable=E1101
q2_out, _ = self.q2_network(
inputs,
actions=actions,

self.value_network = TorchSACOptimizer.PolicyValueNetwork(
self.stream_names,
self.policy.behavior_spec.observation_shapes,
self.policy.behavior_spec.sensor_specs,
policy_network_settings,
self._action_spec,
)

self.policy.behavior_spec.observation_shapes,
self.policy.behavior_spec.sensor_specs,
policy_network_settings,
)
ModelUtils.soft_update(

for name in self.reward_signals:
rewards[name] = ModelUtils.list_to_tensor(batch[f"{name}_rewards"])
n_obs = len(self.policy.behavior_spec.observation_shapes)
n_obs = len(self.policy.behavior_spec.sensor_specs)
current_obs = ObsUtil.from_buffer(batch, n_obs)
# Convert to tensors
current_obs = [ModelUtils.list_to_tensor(obs) for obs in current_obs]

3
ml-agents/mlagents/trainers/tests/check_env_trains.py


env_parameter_manager=None,
success_threshold=0.9,
env_manager=None,
training_seed=None,
):
if env_parameter_manager is None:
env_parameter_manager = EnvironmentParameterManager()

seed = 1337
seed = 1337 if training_seed is None else training_seed
StatsReporter.writers.clear() # Clear StatsReporters so we don't write to file
debug_writer = DebugWriter()
StatsReporter.add_writer(debug_writer)

11
ml-agents/mlagents/trainers/tests/dummy_config.py


from typing import List, Tuple
from mlagents_envs.base_env import SensorSpec, DimensionProperty
import pytest
import copy
import os

@pytest.fixture
def extrinsic_dummy_config():
return {RewardSignalType.EXTRINSIC: RewardSignalSettings()}
def create_sensor_specs_with_shapes(shapes: List[Tuple[int, ...]]) -> List[SensorSpec]:
sen_spec: List[SensorSpec] = []
for shape in shapes:
dim_prop = (DimensionProperty.UNSPECIFIED,) * len(shape)
spec = SensorSpec(shape, dim_prop)
sen_spec.append(spec)
return sen_spec

32
ml-agents/mlagents/trainers/tests/mock_brain.py


from mlagents_envs.base_env import (
DecisionSteps,
TerminalSteps,
SensorSpec,
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
observation_shapes: List[Tuple],
sensor_specs: List[SensorSpec],
action_spec: ActionSpec,
done: bool = False,
) -> Tuple[DecisionSteps, TerminalSteps]:

:int num_agents: Number of "agents" to imitate.
:List observation_shapes: A List of the observation spaces in your steps
:List sensor_specs: A List of the observation specs in your steps
for _shape in observation_shapes:
obs_list.append(np.ones((num_agents,) + _shape, dtype=np.float32))
for sen_spec in sensor_specs:
obs_list.append(np.ones((num_agents,) + sen_spec.shape, dtype=np.float32))
action_mask = None
if action_spec.is_discrete():
action_mask = [

reward = np.array(num_agents * [1.0], dtype=np.float32)
interrupted = np.array(num_agents * [False], dtype=np.bool)
agent_id = np.arange(num_agents, dtype=np.int32)
behavior_spec = BehaviorSpec(observation_shapes, action_spec)
behavior_spec = BehaviorSpec(sensor_specs, action_spec)
if done:
return (
DecisionSteps.empty(behavior_spec),

) -> Tuple[DecisionSteps, TerminalSteps]:
return create_mock_steps(
num_agents=num_agents,
observation_shapes=behavior_spec.observation_shapes,
sensor_specs=behavior_spec.sensor_specs,
action_spec=behavior_spec.action_spec,
)

observation_shapes: List[Tuple],
sensor_specs: List[SensorSpec],
action_spec: ActionSpec,
max_step_complete: bool = False,
memory_size: int = 10,

action_size = action_spec.discrete_size + action_spec.continuous_size
for _i in range(length - 1):
obs = []
for _shape in observation_shapes:
obs.append(np.ones(_shape, dtype=np.float32))
for sen_spec in sensor_specs:
obs.append(np.ones(sen_spec.shape, dtype=np.float32))
reward = 1.0
done = False
action = ActionTuple(

)
steps_list.append(experience)
obs = []
for _shape in observation_shapes:
obs.append(np.ones(_shape, dtype=np.float32))
for sen_spec in sensor_specs:
obs.append(np.ones(sen_spec.shape, dtype=np.float32))
last_experience = AgentExperience(
obs=obs,
reward=reward,

) -> AgentBuffer:
trajectory = make_fake_trajectory(
length,
behavior_spec.observation_shapes,
behavior_spec.sensor_specs,
action_spec=behavior_spec.action_spec,
memory_size=memory_size,
)

action_spec = ActionSpec.create_discrete(tuple(vector_action_space))
else:
action_spec = ActionSpec.create_continuous(vector_action_space)
behavior_spec = BehaviorSpec(
[(84, 84, 3)] * int(use_visual) + [(vector_obs_space,)], action_spec
)
observation_shapes = [(84, 84, 3)] * int(use_visual) + [(vector_obs_space,)]
sen_spec = create_sensor_specs_with_shapes(observation_shapes)
behavior_spec = BehaviorSpec(sen_spec, action_spec)
return behavior_spec

15
ml-agents/mlagents/trainers/tests/simple_test_envs.py


from mlagents_envs.base_env import (
ActionSpec,
SensorSpec,
ActionTuple,
BaseEnv,
BehaviorSpec,

from mlagents_envs.communicator_objects.agent_info_action_pair_pb2 import (
AgentInfoActionPairProto,
)
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
OBS_SIZE = 1
VIS_OBS_SIZE = (20, 20, 3)

continuous_action_size + discrete_action_size
) # to set the goals/positions
self.action_spec = action_spec
self.behavior_spec = BehaviorSpec(self._make_obs_spec(), action_spec)
self.behavior_spec = BehaviorSpec(self._make_sensor_specs(), action_spec)
self.action_spec = action_spec
self.names = brain_names
self.positions: Dict[str, List[float]] = {}

self.action[name] = None
self.step_result[name] = None
def _make_obs_spec(self) -> List[Any]:
obs_spec: List[Any] = []
def _make_sensor_specs(self) -> SensorSpec:
obs_shape: List[Any] = []
obs_spec.append((self.vec_obs_size,))
obs_shape.append((self.vec_obs_size,))
obs_spec.append(self.vis_obs_size)
return obs_spec
obs_shape.append(self.vis_obs_size)
sen_spec = create_sensor_specs_with_shapes(obs_shape)
return sen_spec
def _make_obs(self, value: float) -> List[np.ndarray]:
obs = []

16
ml-agents/mlagents/trainers/tests/test_agent_processor.py


from mlagents.trainers.stats import StatsReporter, StatsSummary
from mlagents.trainers.behavior_id_utils import get_global_agent_id
from mlagents_envs.side_channel.stats_side_channel import StatsAggregationMethod
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
from mlagents_envs.base_env import ActionSpec, ActionTuple

}
mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
num_agents=2,
observation_shapes=[(8,)] + num_vis_obs * [(84, 84, 3)],
sensor_specs=create_sensor_specs_with_shapes(
[(8,)] + num_vis_obs * [(84, 84, 3)]
),
action_spec=ActionSpec.create_continuous(2),
)
fake_action_info = ActionInfo(

# Test empty steps
mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
num_agents=0,
observation_shapes=[(8,)] + num_vis_obs * [(84, 84, 3)],
sensor_specs=create_sensor_specs_with_shapes(
[(8,)] + num_vis_obs * [(84, 84, 3)]
),
action_spec=ActionSpec.create_continuous(2),
)
processor.add_experiences(

mock_decision_step, mock_terminal_step = mb.create_mock_steps(
num_agents=1,
observation_shapes=[(8,)],
sensor_specs=create_sensor_specs_with_shapes([(8,)]),
observation_shapes=[(8,)],
sensor_specs=create_sensor_specs_with_shapes([(8,)]),
action_spec=ActionSpec.create_continuous(2),
done=True,
)

mock_decision_step, mock_terminal_step = mb.create_mock_steps(
num_agents=1,
observation_shapes=[(8,)],
sensor_specs=create_sensor_specs_with_shapes([(8,)]),
action_spec=ActionSpec.create_continuous(2),
)
fake_action_info = ActionInfo(

4
ml-agents/mlagents/trainers/tests/test_demo_loader.py


behavior_spec, pair_infos, total_expected = load_demonstration(
path_prefix + "/test.demo"
)
assert np.sum(behavior_spec.observation_shapes[0]) == 8
assert np.sum(behavior_spec.sensor_specs[0].shape) == 8
assert len(pair_infos) == total_expected
_, demo_buffer = demo_to_buffer(path_prefix + "/test.demo", 1, BEHAVIOR_SPEC)

behavior_spec, pair_infos, total_expected = load_demonstration(
path_prefix + "/test_demo_dir"
)
assert np.sum(behavior_spec.observation_shapes[0]) == 8
assert np.sum(behavior_spec.sensor_specs[0].shape) == 8
assert len(pair_infos) == total_expected
_, demo_buffer = demo_to_buffer(path_prefix + "/test_demo_dir", 1, BEHAVIOR_SPEC)

6
ml-agents/mlagents/trainers/tests/test_rl_trainer.py


from mlagents.trainers.tests.test_buffer import construct_fake_buffer
from mlagents.trainers.agent_processor import AgentManagerQueue
from mlagents.trainers.settings import TrainerSettings
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
from mlagents_envs.base_env import ActionSpec

time_horizon = 10
trajectory = mb.make_fake_trajectory(
length=time_horizon,
observation_shapes=[(1,)],
sensor_specs=create_sensor_specs_with_shapes([(1,)]),
max_step_complete=True,
action_spec=ActionSpec.create_discrete((2,)),
)

checkpoint_interval = trainer.trainer_settings.checkpoint_interval
trajectory = mb.make_fake_trajectory(
length=time_horizon,
observation_shapes=[(1,)],
sensor_specs=create_sensor_specs_with_shapes([(1,)]),
max_step_complete=True,
action_spec=ActionSpec.create_discrete((2,)),
)

4
ml-agents/mlagents/trainers/tests/test_trajectory.py


from mlagents.trainers.tests.mock_brain import make_fake_trajectory
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
from mlagents_envs.base_env import ActionSpec
VEC_OBS_SIZE = 6

wanted_keys = set(wanted_keys)
trajectory = make_fake_trajectory(
length=length,
observation_shapes=[(VEC_OBS_SIZE,), (84, 84, 3)],
sensor_specs=create_sensor_specs_with_shapes([(VEC_OBS_SIZE,), (84, 84, 3)]),
action_spec=ActionSpec.create_continuous(ACTION_SIZE),
)
agentbuffer = trajectory.to_agentbuffer()

3
ml-agents/mlagents/trainers/tests/torch/test_ghost.py


from mlagents.trainers.tests import mock_brain as mb
from mlagents.trainers.tests.test_trajectory import make_fake_trajectory
from mlagents.trainers.settings import TrainerSettings, SelfPlaySettings
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
@pytest.fixture

trajectory = make_fake_trajectory(
length=time_horizon,
max_step_complete=True,
observation_shapes=[(1,)],
sensor_specs=create_sensor_specs_with_shapes([(1,)]),
action_spec=mock_specs.action_spec,
)
trajectory_queue0.put(trajectory)

12
ml-agents/mlagents/trainers/tests/torch/test_hybrid.py


PPO_TORCH_CONFIG.hyperparameters, learning_rate=3.0e-4
)
config = attr.evolve(PPO_TORCH_CONFIG, hyperparameters=new_hyperparams)
check_environment_trains(env, {BRAIN_NAME: config})
check_environment_trains(env, {BRAIN_NAME: config}, training_seed=1336)
def test_hybrid_recurrent_ppo():

PPO_TORCH_CONFIG,
hyperparameters=new_hyperparams,
network_settings=new_network_settings,
max_steps=3000,
max_steps=5000,
)
check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)

SAC_TORCH_CONFIG.hyperparameters,
buffer_size=50000,
batch_size=256,
buffer_init_steps=2000,
buffer_init_steps=0,
SAC_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=6000
SAC_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=2200
check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
check_environment_trains(
env, {BRAIN_NAME: config}, success_threshold=0.9, training_seed=1336
)
@pytest.mark.parametrize("num_visual", [1, 2])

23
ml-agents/mlagents/trainers/tests/torch/test_networks.py


)
from mlagents.trainers.settings import NetworkSettings
from mlagents_envs.base_env import ActionSpec
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
def test_networkbody_vector():

obs_shapes = [(obs_size,)]
networkbody = NetworkBody(obs_shapes, network_settings, encoded_act_size=2)
networkbody = NetworkBody(
create_sensor_specs_with_shapes(obs_shapes),
network_settings,
encoded_act_size=2,
)
optimizer = torch.optim.Adam(networkbody.parameters(), lr=3e-3)
sample_obs = 0.1 * torch.ones((1, obs_size))
sample_act = 0.1 * torch.ones((1, 2))

)
obs_shapes = [(obs_size,)]
networkbody = NetworkBody(obs_shapes, network_settings)
networkbody = NetworkBody(
create_sensor_specs_with_shapes(obs_shapes), network_settings
)
optimizer = torch.optim.Adam(networkbody.parameters(), lr=3e-4)
sample_obs = torch.ones((1, seq_len, obs_size))

network_settings = NetworkSettings()
obs_shapes = [(vec_obs_size,), obs_size]
networkbody = NetworkBody(obs_shapes, network_settings)
networkbody = NetworkBody(
create_sensor_specs_with_shapes(obs_shapes), network_settings
)
optimizer = torch.optim.Adam(networkbody.parameters(), lr=3e-3)
sample_obs = 0.1 * torch.ones((1, 84, 84, 3))
sample_vec_obs = torch.ones((1, vec_obs_size))

obs_size = 4
num_outputs = 2
network_settings = NetworkSettings()
obs_shapes = [(obs_size,)]
sen_spec = create_sensor_specs_with_shapes([(obs_size,)])
stream_names, obs_shapes, network_settings, outputs_per_stream=num_outputs
stream_names, sen_spec, network_settings, outputs_per_stream=num_outputs
)
optimizer = torch.optim.Adam(value_net.parameters(), lr=3e-3)

network_settings = NetworkSettings(
memory=NetworkSettings.MemorySettings() if lstm else None, normalize=True
)
obs_shapes = [(obs_size,)]
sen_spec = create_sensor_specs_with_shapes([(obs_size,)])
actor = ac_type(obs_shapes, network_settings, action_spec, stream_names)
actor = ac_type(sen_spec, network_settings, action_spec, stream_names)
if lstm:
sample_obs = torch.ones((1, network_settings.memory.sequence_length, obs_size))
memories = torch.ones(

4
ml-agents/mlagents/trainers/tests/torch/test_policy.py


buffer = mb.simulate_rollout(64, policy.behavior_spec, memory_size=policy.m_size)
act_masks = ModelUtils.list_to_tensor(buffer["action_mask"])
agent_action = AgentAction.from_dict(buffer)
np_obs = ObsUtil.from_buffer(buffer, len(policy.behavior_spec.observation_shapes))
np_obs = ObsUtil.from_buffer(buffer, len(policy.behavior_spec.sensor_specs))
tensor_obs = [ModelUtils.list_to_tensor(obs) for obs in np_obs]
memories = [

buffer = mb.simulate_rollout(64, policy.behavior_spec, memory_size=policy.m_size)
act_masks = ModelUtils.list_to_tensor(buffer["action_mask"])
np_obs = ObsUtil.from_buffer(buffer, len(policy.behavior_spec.observation_shapes))
np_obs = ObsUtil.from_buffer(buffer, len(policy.behavior_spec.sensor_specs))
tensor_obs = [ModelUtils.list_to_tensor(obs) for obs in np_obs]
memories = [

2
ml-agents/mlagents/trainers/tests/torch/test_ppo.py


time_horizon = 15
trajectory = make_fake_trajectory(
length=time_horizon,
observation_shapes=optimizer.policy.behavior_spec.observation_shapes,
sensor_specs=optimizer.policy.behavior_spec.sensor_specs,
action_spec=DISCRETE_ACTION_SPEC if discrete else CONTINUOUS_ACTION_SPEC,
max_step_complete=True,
)

40
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py


create_agent_buffer,
)
from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
SEED = [42]

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
],
)
def test_construction(behavior_spec: BehaviorSpec) -> None:

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,), (64, 66, 3), (84, 86, 1)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,), (64, 66, 1)], ACTIONSPEC_TWODISCRETE),
BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(
create_sensor_specs_with_shapes([(10,), (64, 66, 3), (84, 86, 1)]),
ACTIONSPEC_CONTINUOUS,
),
BehaviorSpec(
create_sensor_specs_with_shapes([(10,), (64, 66, 1)]),
ACTIONSPEC_TWODISCRETE,
),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
],
)
def test_factory(behavior_spec: BehaviorSpec) -> None:

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,), (64, 66, 3), (24, 26, 1)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE),
BehaviorSpec(
create_sensor_specs_with_shapes([(10,), (64, 66, 3), (24, 26, 1)]),
ACTIONSPEC_CONTINUOUS,
),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
],
)
def test_reward_decreases(behavior_spec: BehaviorSpec, seed: int) -> None:

@pytest.mark.parametrize("seed", SEED)
@pytest.mark.parametrize(
"behavior_spec", [BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS)]
"behavior_spec",
[BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS)],
)
def test_continuous_action_prediction(behavior_spec: BehaviorSpec, seed: int) -> None:
np.random.seed(seed)

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,), (64, 66, 3), (24, 26, 1)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE),
BehaviorSpec(
create_sensor_specs_with_shapes([(10,), (64, 66, 3), (24, 26, 1)]),
ACTIONSPEC_CONTINUOUS,
),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
],
)
def test_next_state_prediction(behavior_spec: BehaviorSpec, seed: int) -> None:

13
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_extrinsic.py


from mlagents.trainers.tests.torch.test_reward_providers.utils import (
create_agent_buffer,
)
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
ACTIONSPEC_CONTINUOUS = ActionSpec.create_continuous(5)

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
],
)
def test_construction(behavior_spec: BehaviorSpec) -> None:

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
],
)
def test_factory(behavior_spec: BehaviorSpec) -> None:

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
],
)
def test_reward(behavior_spec: BehaviorSpec, reward: float) -> None:

27
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py


from mlagents.trainers.torch.components.reward_providers.gail_reward_provider import (
DiscriminatorNetwork,
)
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
CONTINUOUS_PATH = (

ACTIONSPEC_DISCRETE = ActionSpec.create_discrete((20,))
@pytest.mark.parametrize("behavior_spec", [BehaviorSpec([(8,)], ACTIONSPEC_CONTINUOUS)])
@pytest.mark.parametrize(
"behavior_spec",
[BehaviorSpec(create_sensor_specs_with_shapes([(8,)]), ACTIONSPEC_CONTINUOUS)],
)
def test_construction(behavior_spec: BehaviorSpec) -> None:
gail_settings = GAILSettings(demo_path=CONTINUOUS_PATH)
gail_rp = GAILRewardProvider(behavior_spec, gail_settings)

@pytest.mark.parametrize("behavior_spec", [BehaviorSpec([(8,)], ACTIONSPEC_CONTINUOUS)])
@pytest.mark.parametrize(
"behavior_spec",
[BehaviorSpec(create_sensor_specs_with_shapes([(8,)]), ACTIONSPEC_CONTINUOUS)],
)
def test_factory(behavior_spec: BehaviorSpec) -> None:
gail_settings = GAILSettings(demo_path=CONTINUOUS_PATH)
gail_rp = create_reward_provider(

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(8,), (24, 26, 1)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(50,)], ACTIONSPEC_FOURDISCRETE),
BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE),
BehaviorSpec(
create_sensor_specs_with_shapes([(8,), (24, 26, 1)]), ACTIONSPEC_CONTINUOUS
),
BehaviorSpec(create_sensor_specs_with_shapes([(50,)]), ACTIONSPEC_FOURDISCRETE),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
],
)
@pytest.mark.parametrize("use_actions", [False, True])

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(8,), (24, 26, 1)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(50,)], ACTIONSPEC_FOURDISCRETE),
BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE),
BehaviorSpec(
create_sensor_specs_with_shapes([(8,), (24, 26, 1)]), ACTIONSPEC_CONTINUOUS
),
BehaviorSpec(create_sensor_specs_with_shapes([(50,)]), ACTIONSPEC_FOURDISCRETE),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
],
)
@pytest.mark.parametrize("use_actions", [False, True])

28
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_rnd.py


from mlagents.trainers.tests.torch.test_reward_providers.utils import (
create_agent_buffer,
)
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
SEED = [42]

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
],
)
def test_construction(behavior_spec: BehaviorSpec) -> None:

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,), (64, 66, 3), (84, 86, 1)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,), (64, 66, 1)], ACTIONSPEC_TWODISCRETE),
BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(
create_sensor_specs_with_shapes([(10,), (64, 66, 3), (84, 86, 1)]),
ACTIONSPEC_CONTINUOUS,
),
BehaviorSpec(
create_sensor_specs_with_shapes([(10,), (64, 66, 1)]),
ACTIONSPEC_TWODISCRETE,
),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
],
)
def test_factory(behavior_spec: BehaviorSpec) -> None:

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,), (64, 66, 3), (24, 26, 1)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE),
BehaviorSpec(
create_sensor_specs_with_shapes([(10,), (64, 66, 3), (24, 26, 1)]),
ACTIONSPEC_CONTINUOUS,
),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
],
)
def test_reward_decreases(behavior_spec: BehaviorSpec, seed: int) -> None:

8
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/utils.py


) -> AgentBuffer:
buffer = AgentBuffer()
curr_obs = [
np.random.normal(size=shape).astype(np.float32)
for shape in behavior_spec.observation_shapes
np.random.normal(size=sen_spec.shape).astype(np.float32)
for sen_spec in behavior_spec.sensor_specs
np.random.normal(size=shape).astype(np.float32)
for shape in behavior_spec.observation_shapes
np.random.normal(size=sen_spec.shape).astype(np.float32)
for sen_spec in behavior_spec.sensor_specs
]
action_buffer = behavior_spec.action_spec.random_action(1)
action = {}

2
ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py


reward_signals = {
RewardSignalType.GAIL: GAILSettings(encoding_size=32, demo_path=demo_path)
}
hyperparams = attr.evolve(PPO_TORCH_CONFIG.hyperparameters, learning_rate=3e-4)
hyperparams = attr.evolve(PPO_TORCH_CONFIG.hyperparameters, learning_rate=5e-3)
config = attr.evolve(
PPO_TORCH_CONFIG,
reward_signals=reward_signals,

4
ml-agents/mlagents/trainers/tests/torch/test_utils.py


from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.exception import UnityTrainerException
from mlagents.trainers.torch.encoders import VectorInput
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
def test_min_visual_size():

for _ in range(num_visual):
obs_shapes.append(vis_obs_shape)
h_size = 128
sen_spec = create_sensor_specs_with_shapes(obs_shapes)
obs_shapes, h_size, encoder_type, normalize
sen_spec, h_size, encoder_type, normalize
)
total_output = sum(embedding_sizes)
vec_enc = []

2
ml-agents/mlagents/trainers/torch/components/bc/module.py


Helper function for update_batch.
"""
np_obs = ObsUtil.from_buffer(
mini_batch_demo, len(self.policy.behavior_spec.observation_shapes)
mini_batch_demo, len(self.policy.behavior_spec.sensor_specs)
)
# Convert to tensors
tensor_obs = [ModelUtils.list_to_tensor(obs) for obs in np_obs]

4
ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py


vis_encode_type=EncoderType.SIMPLE,
memory=None,
)
self._state_encoder = NetworkBody(
specs.observation_shapes, state_encoder_settings
)
self._state_encoder = NetworkBody(specs.sensor_specs, state_encoder_settings)
self._action_flattener = ActionFlattener(self._action_spec)

4
ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py


unencoded_size = (
self._action_flattener.flattened_size + 1 if settings.use_actions else 0
) # +1 is for dones
self.encoder = NetworkBody(
specs.observation_shapes, encoder_settings, unencoded_size
)
self.encoder = NetworkBody(specs.sensor_specs, encoder_settings, unencoded_size)
estimator_input_size = settings.encoding_size
if settings.use_vail:

2
ml-agents/mlagents/trainers/torch/components/reward_providers/rnd_reward_provider.py


vis_encode_type=EncoderType.SIMPLE,
memory=None,
)
self._encoder = NetworkBody(specs.observation_shapes, state_encoder_settings)
self._encoder = NetworkBody(specs.sensor_specs, state_encoder_settings)
def forward(self, mini_batch: AgentBuffer) -> torch.Tensor:
n_obs = len(self._encoder.processors)

10
ml-agents/mlagents/trainers/torch/model_serialization.py


seq_len_dim = [1]
dummy_vec_obs = [torch.zeros(batch_dim + [self.policy.vec_obs_size])]
# create input shape of NCHW
# (It's NHWC in self.policy.behavior_spec.observation_shapes)
# (It's NHWC in self.policy.behavior_spec.sensor_specs.shape)
torch.zeros(batch_dim + [shape[2], shape[0], shape[1]])
for shape in self.policy.behavior_spec.observation_shapes
if len(shape) == 3
torch.zeros(
batch_dim + [sen_spec.shape[2], sen_spec.shape[0], sen_spec.shape[1]]
)
for sen_spec in self.policy.behavior_spec.sensor_specs
if len(sen_spec.shape) == 3
]
dummy_masks = torch.ones(
batch_dim + [sum(self.policy.behavior_spec.action_spec.discrete_branches)]

32
ml-agents/mlagents/trainers/torch/networks.py


from mlagents.torch_utils import torch, nn
from mlagents_envs.base_env import ActionSpec
from mlagents_envs.base_env import ActionSpec, SensorSpec
from mlagents.trainers.torch.action_model import ActionModel
from mlagents.trainers.torch.agent_action import AgentAction
from mlagents.trainers.torch.action_log_probs import ActionLogProbs

class NetworkBody(nn.Module):
def __init__(
self,
observation_shapes: List[Tuple[int, ...]],
sensor_specs: List[SensorSpec],
network_settings: NetworkSettings,
encoded_act_size: int = 0,
):

)
self.processors, self.embedding_sizes = ModelUtils.create_input_processors(
observation_shapes,
sensor_specs,
self.h_size,
network_settings.vis_encode_type,
normalize=self.normalize,

def __init__(
self,
stream_names: List[str],
observation_shapes: List[Tuple[int, ...]],
sensor_specs: List[SensorSpec],
network_settings: NetworkSettings,
encoded_act_size: int = 0,
outputs_per_stream: int = 1,

nn.Module.__init__(self)
self.network_body = NetworkBody(
observation_shapes, network_settings, encoded_act_size=encoded_act_size
sensor_specs, network_settings, encoded_act_size=encoded_act_size
)
if network_settings.memory is not None:
encoding_size = network_settings.memory.memory_size // 2

class SimpleActor(nn.Module, Actor):
def __init__(
self,
observation_shapes: List[Tuple[int, ...]],
sensor_specs: List[SensorSpec],
network_settings: NetworkSettings,
action_spec: ActionSpec,
conditional_sigma: bool = False,

),
requires_grad=False,
)
self.network_body = NetworkBody(observation_shapes, network_settings)
self.network_body = NetworkBody(sensor_specs, network_settings)
if network_settings.memory is not None:
self.encoding_size = network_settings.memory.memory_size // 2
else:

class SharedActorCritic(SimpleActor, ActorCritic):
def __init__(
self,
observation_shapes: List[Tuple[int, ...]],
sensor_specs: List[SensorSpec],
network_settings: NetworkSettings,
action_spec: ActionSpec,
stream_names: List[str],

self.use_lstm = network_settings.memory is not None
super().__init__(
observation_shapes,
network_settings,
action_spec,
conditional_sigma,
tanh_squash,
sensor_specs, network_settings, action_spec, conditional_sigma, tanh_squash
)
self.stream_names = stream_names
self.value_heads = ValueHeads(stream_names, self.encoding_size)

class SeparateActorCritic(SimpleActor, ActorCritic):
def __init__(
self,
observation_shapes: List[Tuple[int, ...]],
sensor_specs: List[SensorSpec],
network_settings: NetworkSettings,
action_spec: ActionSpec,
stream_names: List[str],

self.use_lstm = network_settings.memory is not None
super().__init__(
observation_shapes,
network_settings,
action_spec,
conditional_sigma,
tanh_squash,
sensor_specs, network_settings, action_spec, conditional_sigma, tanh_squash
self.critic = ValueNetwork(stream_names, observation_shapes, network_settings)
self.critic = ValueNetwork(stream_names, sensor_specs, network_settings)
@property
def memory_size(self) -> int:

10
ml-agents/mlagents/trainers/torch/utils.py


)
from mlagents.trainers.settings import EncoderType, ScheduleType
from mlagents.trainers.exception import UnityTrainerException
from mlagents_envs.base_env import SensorSpec
class ModelUtils:

@staticmethod
def create_input_processors(
observation_shapes: List[Tuple[int, ...]],
sensor_specs: List[SensorSpec],
h_size: int,
vis_encode_type: EncoderType,
normalize: bool = False,

:param observation_shapes: List of Tuples that represent the action dimensions.
:param sensor_specs: List of SensorSpec that represent the observation dimensions.
:param action_size: Number of additional un-normalized inputs to each vector encoder. Used for
conditioning network on other values (e.g. actions for a Q function)
:param h_size: Number of hidden units per layer.

"""
encoders: List[nn.Module] = []
embedding_sizes: List[int] = []
for dimension in observation_shapes:
for sen_spec in sensor_specs:
dimension, normalize, h_size, vis_encode_type
sen_spec.shape, normalize, h_size, vis_encode_type
)
encoders.append(encoder)
embedding_sizes.append(embedding_size)

4
ml-agents/tests/yamato/scripts/run_llapi.py


decision_steps, terminal_steps = env.get_steps(group_name)
# Examine the number of observations per Agent
print("Number of observations : ", len(group_spec.observation_shapes))
print("Number of observations : ", len(group_spec.sensor_specs))
vis_obs = any(len(shape) == 3 for shape in group_spec.observation_shapes)
vis_obs = any(len(sen_spec.shape) == 3 for sen_spec in group_spec.sensor_specs)
print("Is there a visual observation ?", vis_obs)
# Examine the state space for the first observation for the first agent

1
protobuf-definitions/proto/mlagents_envs/communicator_objects/observation.proto


FloatData float_data = 4;
}
repeated int32 compressed_channel_mapping = 5;
repeated int32 dimension_properties = 6;
}

95
com.unity.ml-agents/Runtime/Sensors/BufferSensor.cs


using System;
namespace Unity.MLAgents.Sensors
{
public class BufferSensor : ISensor, IDimensionPropertiesSensor
{
private int m_MaxNumObs;
private int m_ObsSize;
float[] m_ObservationBuffer;
int m_CurrentNumObservables;
public BufferSensor(int maxNumberObs, int obsSize)
{
m_MaxNumObs = maxNumberObs;
m_ObsSize = obsSize;
m_ObservationBuffer = new float[m_ObsSize * m_MaxNumObs];
m_CurrentNumObservables = 0;
}
/// <inheritdoc/>
public int[] GetObservationShape()
{
return new int[] { m_MaxNumObs, m_ObsSize };
}
/// <inheritdoc/>
public DimensionProperty[] GetDimensionProperties()
{
return new DimensionProperty[]{
DimensionProperty.VariableSize,
DimensionProperty.None
};
}
/// <summary>
/// Appends an observation to the buffer. If the buffer is full (maximum number
/// of observation is reached) the observation will be ignored. the length of
/// the provided observation array must be equal to the observation size of
/// the buffer sensor.
/// </summary>
/// <param name="obs"> The float array observation</param>
public void AppendObservation(float[] obs)
{
if (m_CurrentNumObservables >= m_MaxNumObs)
{
return;
}
for (int i = 0; i < obs.Length; i++)
{
m_ObservationBuffer[m_CurrentNumObservables * m_ObsSize + i] = obs[i];
}
m_CurrentNumObservables++;
}
/// <inheritdoc/>
public int Write(ObservationWriter writer)
{
for (int i = 0; i < m_ObsSize * m_MaxNumObs; i++)
{
writer[i] = m_ObservationBuffer[i];
}
return m_ObsSize * m_MaxNumObs;
}
/// <inheritdoc/>
public virtual byte[] GetCompressedObservation()
{
return null;
}
/// <inheritdoc/>
public void Update()
{
Reset();
}
/// <inheritdoc/>
public void Reset()
{
m_CurrentNumObservables = 0;
Array.Clear(m_ObservationBuffer, 0, m_ObservationBuffer.Length);
}
public SensorCompressionType GetCompressionType()
{
return SensorCompressionType.None;
}
public string GetName()
{
return "BufferSensor";
}
}
}

11
com.unity.ml-agents/Runtime/Sensors/BufferSensor.cs.meta


fileFormatVersion: 2
guid: 034f05c858e684e5498d9a548c9d1fc5
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

41
com.unity.ml-agents/Runtime/Sensors/BufferSensorComponent.cs


using UnityEngine;
namespace Unity.MLAgents.Sensors
{
/// <summary>
/// A component for BufferSensor.
/// </summary>
[AddComponentMenu("ML Agents/Buffer Sensor", (int)MenuGroup.Sensors)]
public class BufferSensorComponent : SensorComponent
{
public int ObservableSize;
public int MaxNumObservables;
private BufferSensor m_Sensor;
/// <inheritdoc/>
public override ISensor CreateSensor()
{
m_Sensor = new BufferSensor(MaxNumObservables, ObservableSize);
return m_Sensor;
}
/// <inheritdoc/>
public override int[] GetObservationShape()
{
return new[] { MaxNumObservables, ObservableSize };
}
/// <summary>
/// Appends an observation to the buffer. If the buffer is full (maximum number
/// of observation is reached) the observation will be ignored. the length of
/// the provided observation array must be equal to the observation size of
/// the buffer sensor.
/// </summary>
/// <param name="obs"> The float array observation</param>
public void AppendObservation(float[] obs)
{
m_Sensor.AppendObservation(obs);
}
}
}

11
com.unity.ml-agents/Runtime/Sensors/BufferSensorComponent.cs.meta


fileFormatVersion: 2
guid: dd8012d5925524537b27131fef517017
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

47
com.unity.ml-agents/Runtime/Sensors/IDimensionPropertiesSensor.cs


namespace Unity.MLAgents.Sensors
{
/// <summary>
/// The Dimension property flags of the observations
/// </summary>
[System.Flags]
public enum DimensionProperty
{
/// <summary>
/// No properties specified.
/// </summary>
Unspecified = 0,
/// <summary>
/// No Property of the observation in that dimension. Observation can be processed with
/// fully connected networks.
/// </summary>
None = 1,
/// <summary>
/// Means it is suitable to do a convolution in this dimension.
/// </summary>
TranslationalEquivariance = 2,
/// <summary>
/// Means that there can be a variable number of observations in this dimension.
/// The observations are unordered.
/// </summary>
VariableSize = 4,
}
/// <summary>
/// Sensor interface for sensors with special dimension properties.
/// </summary>
public interface IDimensionPropertiesSensor
{
/// <summary>
/// Returns the array containing the properties of each dimensions of the
/// observation. The length of the array must be equal to the rank of the
/// observation tensor.
/// </summary>
/// <returns>The array of DimensionProperty</returns>
DimensionProperty[] GetDimensionProperties();
}
}

11
com.unity.ml-agents/Runtime/Sensors/IDimensionPropertiesSensor.cs.meta


fileFormatVersion: 2
guid: 297e9ec12d6de45adbcf6dea1a9de019
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

162
ml-agents/mlagents/trainers/tests/torch/test_attention.py


from mlagents.torch_utils import torch
import numpy as np
from mlagents.trainers.torch.layers import linear_layer
from mlagents.trainers.torch.attention import MultiHeadAttention, SimpleTransformer
def test_multi_head_attention_initialization():
q_size, k_size, v_size, o_size, n_h, emb_size = 7, 8, 9, 10, 11, 12
n_k, n_q, b = 13, 14, 15
mha = MultiHeadAttention(q_size, k_size, v_size, o_size, n_h, emb_size)
query = torch.ones((b, n_q, q_size))
key = torch.ones((b, n_k, k_size))
value = torch.ones((b, n_k, v_size))
output, attention = mha.forward(query, key, value)
assert output.shape == (b, n_q, o_size)
assert attention.shape == (b, n_h, n_q, n_k)
def test_multi_head_attention_masking():
epsilon = 0.0001
q_size, k_size, v_size, o_size, n_h, emb_size = 7, 8, 9, 10, 11, 12
n_k, n_q, b = 13, 14, 15
mha = MultiHeadAttention(q_size, k_size, v_size, o_size, n_h, emb_size)
# create a key input with some keys all 0
key = torch.ones((b, n_k, k_size))
mask = torch.zeros((b, n_k))
for i in range(n_k):
if i % 3 == 0:
key[:, i, :] = 0
mask[:, i] = 1
query = torch.ones((b, n_q, q_size))
value = torch.ones((b, n_k, v_size))
_, attention = mha.forward(query, key, value, mask)
for i in range(n_k):
if i % 3 == 0:
assert torch.sum(attention[:, :, :, i] ** 2) < epsilon
else:
assert torch.sum(attention[:, :, :, i] ** 2) > epsilon
def test_multi_head_attention_training():
np.random.seed(1336)
torch.manual_seed(1336)
size, n_h, n_k, n_q = 3, 10, 5, 1
embedding_size = 64
mha = MultiHeadAttention(size, size, size, size, n_h, embedding_size)
optimizer = torch.optim.Adam(mha.parameters(), lr=0.001)
batch_size = 200
point_range = 3
init_error = -1.0
for _ in range(50):
query = torch.rand((batch_size, n_q, size)) * point_range * 2 - point_range
key = torch.rand((batch_size, n_k, size)) * point_range * 2 - point_range
value = key
with torch.no_grad():
# create the target : The key closest to the query in euclidean distance
distance = torch.sum((query - key) ** 2, dim=2)
argmin = torch.argmin(distance, dim=1)
target = []
for i in range(batch_size):
target += [key[i, argmin[i], :]]
target = torch.stack(target, dim=0)
target = target.detach()
prediction, _ = mha.forward(query, key, value)
prediction = prediction.reshape((batch_size, size))
error = torch.mean((prediction - target) ** 2, dim=1)
error = torch.mean(error) / 2
if init_error == -1.0:
init_error = error.item()
else:
assert error.item() < init_error
print(error.item())
optimizer.zero_grad()
error.backward()
optimizer.step()
assert error.item() < 0.5
def test_zero_mask_layer():
batch_size, size = 10, 30
def generate_input_helper(pattern):
_input = torch.zeros((batch_size, 0, size))
for i in range(len(pattern)):
if i % 2 == 0:
_input = torch.cat(
[_input, torch.rand((batch_size, pattern[i], size))], dim=1
)
else:
_input = torch.cat(
[_input, torch.zeros((batch_size, pattern[i], size))], dim=1
)
return _input
masking_pattern_1 = [3, 2, 3, 4]
masking_pattern_2 = [5, 7, 8, 2]
input_1 = generate_input_helper(masking_pattern_1)
input_2 = generate_input_helper(masking_pattern_2)
masks = SimpleTransformer.get_masks([input_1, input_2])
assert len(masks) == 2
masks_1 = masks[0]
masks_2 = masks[1]
assert masks_1.shape == (batch_size, sum(masking_pattern_1))
assert masks_2.shape == (batch_size, sum(masking_pattern_2))
for i in masking_pattern_1:
assert masks_1[0, 1] == 0 if i % 2 == 0 else 1
for i in masking_pattern_2:
assert masks_2[0, 1] == 0 if i % 2 == 0 else 1
def test_simple_transformer_training():
np.random.seed(1336)
torch.manual_seed(1336)
size, n_k, = 3, 5
embedding_size = 64
transformer = SimpleTransformer(size, [size], embedding_size)
l_layer = linear_layer(embedding_size, size)
optimizer = torch.optim.Adam(
list(transformer.parameters()) + list(l_layer.parameters()), lr=0.001
)
batch_size = 200
point_range = 3
init_error = -1.0
for _ in range(100):
center = torch.rand((batch_size, size)) * point_range * 2 - point_range
key = torch.rand((batch_size, n_k, size)) * point_range * 2 - point_range
with torch.no_grad():
# create the target : The key closest to the query in euclidean distance
distance = torch.sum(
(center.reshape((batch_size, 1, size)) - key) ** 2, dim=2
)
argmin = torch.argmin(distance, dim=1)
target = []
for i in range(batch_size):
target += [key[i, argmin[i], :]]
target = torch.stack(target, dim=0)
target = target.detach()
masks = SimpleTransformer.get_masks([key])
prediction = transformer.forward(center, [key], masks)
prediction = l_layer(prediction)
prediction = prediction.reshape((batch_size, size))
error = torch.mean((prediction - target) ** 2, dim=1)
error = torch.mean(error) / 2
if init_error == -1.0:
init_error = error.item()
else:
assert error.item() < init_error
print(error.item())
optimizer.zero_grad()
error.backward()
optimizer.step()
assert error.item() < 0.3

191
ml-agents/mlagents/trainers/torch/attention.py


from mlagents.torch_utils import torch
from typing import Tuple, Optional, List
from mlagents.trainers.torch.layers import LinearEncoder
class MultiHeadAttention(torch.nn.Module):
"""
Multi Head Attention module. We do not use the regular Torch implementation since
Barracuda does not support some operators it uses.
Takes as input to the forward method 3 tensors:
- query: of dimensions (batch_size, number_of_queries, key_size)
- key: of dimensions (batch_size, number_of_keys, key_size)
- value: of dimensions (batch_size, number_of_keys, value_size)
The forward method will return 2 tensors:
- The output: (batch_size, number_of_queries, output_size)
- The attention matrix: (batch_size, num_heads, number_of_queries, number_of_keys)
"""
NEG_INF = -1e6
def __init__(
self,
query_size: int,
key_size: int,
value_size: int,
output_size: int,
num_heads: int,
embedding_size: int,
):
super().__init__()
self.n_heads, self.embedding_size = num_heads, embedding_size
self.output_size = output_size
self.fc_q = torch.nn.Linear(query_size, self.n_heads * self.embedding_size)
self.fc_k = torch.nn.Linear(key_size, self.n_heads * self.embedding_size)
self.fc_v = torch.nn.Linear(value_size, self.n_heads * self.embedding_size)
# self.fc_q = LinearEncoder(query_size, 2, self.n_heads * self.embedding_size)
# self.fc_k = LinearEncoder(key_size,2, self.n_heads * self.embedding_size)
# self.fc_v = LinearEncoder(value_size,2, self.n_heads * self.embedding_size)
self.fc_out = torch.nn.Linear(
self.n_heads * self.embedding_size, self.output_size
)
def forward(
self,
query: torch.Tensor,
key: torch.Tensor,
value: torch.Tensor,
key_mask: Optional[torch.Tensor] = None,
number_of_keys: int = -1,
number_of_queries: int = -1,
) -> Tuple[torch.Tensor, torch.Tensor]:
b = -1 # the batch size
# This is to avoid using .size() when possible as Barracuda does not support
n_q = number_of_queries if number_of_queries != -1 else query.size(1)
n_k = number_of_keys if number_of_keys != -1 else key.size(1)
query = self.fc_q(query) # (b, n_q, h*d)
key = self.fc_k(key) # (b, n_k, h*d)
value = self.fc_v(value) # (b, n_k, h*d)
query = query.reshape(b, n_q, self.n_heads, self.embedding_size)
key = key.reshape(b, n_k, self.n_heads, self.embedding_size)
value = value.reshape(b, n_k, self.n_heads, self.embedding_size)
query = query.permute([0, 2, 1, 3]) # (b, h, n_q, emb)
# The next few lines are equivalent to : key.permute([0, 2, 3, 1])
# This is a hack, ONNX will compress two permute operations and
# Barracuda will not like seeing `permute([0,2,3,1])`
key = key.permute([0, 2, 1, 3]) # (b, h, emb, n_k)
key -= 1
key += 1
key = key.permute([0, 1, 3, 2]) # (b, h, emb, n_k)
qk = torch.matmul(query, key) # (b, h, n_q, n_k)
if key_mask is None:
qk = qk / (self.embedding_size ** 0.5)
else:
key_mask = key_mask.reshape(b, 1, 1, n_k)
qk = (1 - key_mask) * qk / (
self.embedding_size ** 0.5
) + key_mask * self.NEG_INF
att = torch.softmax(qk, dim=3) # (b, h, n_q, n_k)
value = value.permute([0, 2, 1, 3]) # (b, h, n_k, emb)
value_attention = torch.matmul(att, value) # (b, h, n_q, emb)
value_attention = value_attention.permute([0, 2, 1, 3]) # (b, n_q, h, emb)
value_attention = value_attention.reshape(
b, n_q, self.n_heads * self.embedding_size
) # (b, n_q, h*emb)
out = self.fc_out(value_attention) # (b, n_q, emb)
return out, att
class SimpleTransformer(torch.nn.Module):
"""
A simple architecture inspired from https://arxiv.org/pdf/1909.07528.pdf that uses
multi head self attention to encode information about a "Self" and a list of
relevant "Entities".
"""
EPISLON = 1e-7
def __init__(
self,
x_self_size: int,
entities_sizes: List[int],
embedding_size: int,
output_size: Optional[int] = None,
):
super().__init__()
self.self_size = x_self_size
self.entities_sizes = entities_sizes
self.entities_num_max_elements: Optional[List[int]] = None
self.ent_encoders = torch.nn.ModuleList(
[
LinearEncoder(self.self_size + ent_size, 2, embedding_size)
for ent_size in self.entities_sizes
]
)
self.attention = MultiHeadAttention(
query_size=embedding_size,
key_size=embedding_size,
value_size=embedding_size,
output_size=embedding_size,
num_heads=4,
embedding_size=embedding_size,
)
self.residual_layer = LinearEncoder(embedding_size, 1, embedding_size)
if output_size is None:
output_size = embedding_size
self.x_self_residual_layer = LinearEncoder(
embedding_size + x_self_size, 1, output_size
)
def forward(
self,
x_self: torch.Tensor,
entities: List[torch.Tensor],
key_masks: List[torch.Tensor],
) -> torch.Tensor:
# Gather the maximum number of entities information
if self.entities_num_max_elements is None:
self.entities_num_max_elements = []
for ent in entities:
self.entities_num_max_elements.append(ent.shape[1])
# Concatenate all observations with self
self_and_ent: List[torch.Tensor] = []
for num_entities, ent in zip(self.entities_num_max_elements, entities):
expanded_self = x_self.reshape(-1, 1, self.self_size)
# .repeat(
# 1, num_entities, 1
# )
expanded_self = torch.cat([expanded_self] * num_entities, dim=1)
self_and_ent.append(torch.cat([expanded_self, ent], dim=2))
# Generate the tensor that will serve as query, key and value to self attention
qkv = torch.cat(
[ent_encoder(x) for ent_encoder, x in zip(self.ent_encoders, self_and_ent)],
dim=1,
)
mask = torch.cat(key_masks, dim=1)
# Feed to self attention
max_num_ent = sum(self.entities_num_max_elements)
output, _ = self.attention(qkv, qkv, qkv, mask, max_num_ent, max_num_ent)
# Residual
output = self.residual_layer(output) + qkv
# Average Pooling
numerator = torch.sum(output * (1 - mask).reshape(-1, max_num_ent, 1), dim=1)
denominator = torch.sum(1 - mask, dim=1, keepdim=True) + self.EPISLON
output = numerator / denominator
# Residual between x_self and the output of the module
output = self.x_self_residual_layer(torch.cat([output, x_self], dim=1))
return output
@staticmethod
def get_masks(observations: List[torch.Tensor]) -> List[torch.Tensor]:
"""
Takes a List of Tensors and returns a List of mask Tensor with 1 if the input was
all zeros (on dimension 2) and 0 otherwise. This is used in the Attention
layer to mask the padding observations.
"""
with torch.no_grad():
# Generate the masking tensors for each entities tensor (mask only if all zeros)
key_masks: List[torch.Tensor] = [
(torch.sum(ent ** 2, axis=2) < 0.01).type(torch.FloatTensor)
for ent in observations
]
return key_masks
正在加载...
取消
保存