浏览代码

WIP Made initial changes to enale dimension properties and added attention module

/layernorm
vincentpierre 3 年前
当前提交
8cb050ef
共有 38 个文件被更改,包括 777 次插入119 次删除
  1. 6
      com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
  2. 33
      com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/Observation.cs
  3. 6
      gym-unity/gym_unity/envs/__init__.py
  4. 4
      gym-unity/gym_unity/tests/test_gym.py
  5. 57
      ml-agents-envs/mlagents_envs/base_env.py
  6. 19
      ml-agents-envs/mlagents_envs/communicator_objects/observation_pb2.py
  7. 6
      ml-agents-envs/mlagents_envs/communicator_objects/observation_pb2.pyi
  8. 11
      ml-agents-envs/mlagents_envs/rpc_utils.py
  9. 14
      ml-agents-envs/mlagents_envs/tests/test_envs.py
  10. 2
      ml-agents-envs/mlagents_envs/tests/test_registry.py
  11. 31
      ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py
  12. 7
      ml-agents-envs/mlagents_envs/tests/test_steps.py
  13. 8
      ml-agents/mlagents/trainers/demo_loader.py
  14. 6
      ml-agents/mlagents/trainers/policy/policy.py
  15. 2
      ml-agents/mlagents/trainers/policy/torch_policy.py
  16. 4
      ml-agents/mlagents/trainers/sac/optimizer_torch.py
  17. 14
      ml-agents/mlagents/trainers/tests/mock_brain.py
  18. 10
      ml-agents/mlagents/trainers/tests/simple_test_envs.py
  19. 4
      ml-agents/mlagents/trainers/tests/test_demo_loader.py
  20. 6
      ml-agents/mlagents/trainers/tests/torch/test_hybrid.py
  21. 2
      ml-agents/mlagents/trainers/tests/torch/test_ppo.py
  22. 40
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py
  23. 14
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_extrinsic.py
  24. 28
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py
  25. 28
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_rnd.py
  26. 4
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/utils.py
  27. 2
      ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py
  28. 2
      ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py
  29. 2
      ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py
  30. 4
      ml-agents/mlagents/trainers/torch/components/reward_providers/rnd_reward_provider.py
  31. 4
      ml-agents/mlagents/trainers/torch/model_serialization.py
  32. 4
      ml-agents/tests/yamato/scripts/run_llapi.py
  33. 1
      protobuf-definitions/proto/mlagents_envs/communicator_objects/observation.proto
  34. 84
      com.unity.ml-agents/Runtime/Sensors/BufferSensor.cs
  35. 27
      com.unity.ml-agents/Runtime/Sensors/BufferSensorComponent.cs
  36. 47
      com.unity.ml-agents/Runtime/Sensors/IDimensionPropertiesSensor.cs
  37. 162
      ml-agents/mlagents/trainers/tests/torch/test_attention.py
  38. 191
      ml-agents/mlagents/trainers/torch/attention.py

6
com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs


{
observationProto.CompressedChannelMapping.AddRange(compressibleSensor.GetCompressedChannelMapping());
}
var dimensionPropertySensor = sensor as IDimensionPropertiesSensor;
if (dimensionPropertySensor != null)
{
observationProto.DimensionProperties.AddRange(dimensionPropertySensor.GetDimensionProperties());
}
}
observationProto.Shape.AddRange(shape);
return observationProto;

33
com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/Observation.cs


byte[] descriptorData = global::System.Convert.FromBase64String(
string.Concat(
"CjRtbGFnZW50c19lbnZzL2NvbW11bmljYXRvcl9vYmplY3RzL29ic2VydmF0",
"aW9uLnByb3RvEhRjb21tdW5pY2F0b3Jfb2JqZWN0cyKdAgoQT2JzZXJ2YXRp",
"aW9uLnByb3RvEhRjb21tdW5pY2F0b3Jfb2JqZWN0cyK7AgoQT2JzZXJ2YXRp",
"KAUaGQoJRmxvYXREYXRhEgwKBGRhdGEYASADKAJCEgoQb2JzZXJ2YXRpb25f",
"ZGF0YSopChRDb21wcmVzc2lvblR5cGVQcm90bxIICgROT05FEAASBwoDUE5H",
"EAFCJaoCIlVuaXR5Lk1MQWdlbnRzLkNvbW11bmljYXRvck9iamVjdHNiBnBy",
"b3RvMw=="));
"KAUSHAoUZGltZW5zaW9uX3Byb3BlcnRpZXMYBiADKAUaGQoJRmxvYXREYXRh",
"EgwKBGRhdGEYASADKAJCEgoQb2JzZXJ2YXRpb25fZGF0YSopChRDb21wcmVz",
"c2lvblR5cGVQcm90bxIICgROT05FEAASBwoDUE5HEAFCJaoCIlVuaXR5Lk1M",
"QWdlbnRzLkNvbW11bmljYXRvck9iamVjdHNiBnByb3RvMw=="));
new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.ObservationProto), global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Parser, new[]{ "Shape", "CompressionType", "CompressedData", "FloatData", "CompressedChannelMapping" }, new[]{ "ObservationData" }, null, new pbr::GeneratedClrTypeInfo[] { new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Types.FloatData), global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Types.FloatData.Parser, new[]{ "Data" }, null, null, null)})
new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.ObservationProto), global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Parser, new[]{ "Shape", "CompressionType", "CompressedData", "FloatData", "CompressedChannelMapping", "DimensionProperties" }, new[]{ "ObservationData" }, null, new pbr::GeneratedClrTypeInfo[] { new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Types.FloatData), global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Types.FloatData.Parser, new[]{ "Data" }, null, null, null)})
}));
}
#endregion

shape_ = other.shape_.Clone();
compressionType_ = other.compressionType_;
compressedChannelMapping_ = other.compressedChannelMapping_.Clone();
dimensionProperties_ = other.dimensionProperties_.Clone();
switch (other.ObservationDataCase) {
case ObservationDataOneofCase.CompressedData:
CompressedData = other.CompressedData;

get { return compressedChannelMapping_; }
}
/// <summary>Field number for the "dimension_properties" field.</summary>
public const int DimensionPropertiesFieldNumber = 6;
private static readonly pb::FieldCodec<int> _repeated_dimensionProperties_codec
= pb::FieldCodec.ForInt32(50);
private readonly pbc::RepeatedField<int> dimensionProperties_ = new pbc::RepeatedField<int>();
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public pbc::RepeatedField<int> DimensionProperties {
get { return dimensionProperties_; }
}
private object observationData_;
/// <summary>Enum of possible cases for the "observation_data" oneof.</summary>
public enum ObservationDataOneofCase {

if (CompressedData != other.CompressedData) return false;
if (!object.Equals(FloatData, other.FloatData)) return false;
if(!compressedChannelMapping_.Equals(other.compressedChannelMapping_)) return false;
if(!dimensionProperties_.Equals(other.dimensionProperties_)) return false;
if (ObservationDataCase != other.ObservationDataCase) return false;
return Equals(_unknownFields, other._unknownFields);
}

if (observationDataCase_ == ObservationDataOneofCase.CompressedData) hash ^= CompressedData.GetHashCode();
if (observationDataCase_ == ObservationDataOneofCase.FloatData) hash ^= FloatData.GetHashCode();
hash ^= compressedChannelMapping_.GetHashCode();
hash ^= dimensionProperties_.GetHashCode();
hash ^= (int) observationDataCase_;
if (_unknownFields != null) {
hash ^= _unknownFields.GetHashCode();

output.WriteMessage(FloatData);
}
compressedChannelMapping_.WriteTo(output, _repeated_compressedChannelMapping_codec);
dimensionProperties_.WriteTo(output, _repeated_dimensionProperties_codec);
if (_unknownFields != null) {
_unknownFields.WriteTo(output);
}

size += 1 + pb::CodedOutputStream.ComputeMessageSize(FloatData);
}
size += compressedChannelMapping_.CalculateSize(_repeated_compressedChannelMapping_codec);
size += dimensionProperties_.CalculateSize(_repeated_dimensionProperties_codec);
if (_unknownFields != null) {
size += _unknownFields.CalculateSize();
}

CompressionType = other.CompressionType;
}
compressedChannelMapping_.Add(other.compressedChannelMapping_);
dimensionProperties_.Add(other.dimensionProperties_);
switch (other.ObservationDataCase) {
case ObservationDataOneofCase.CompressedData:
CompressedData = other.CompressedData;

case 42:
case 40: {
compressedChannelMapping_.AddEntriesFrom(input, _repeated_compressedChannelMapping_codec);
break;
}
case 50:
case 48: {
dimensionProperties_.AddEntriesFrom(input, _repeated_dimensionProperties_codec);
break;
}
}

6
gym-unity/gym_unity/envs/__init__.py


def _get_n_vis_obs(self) -> int:
result = 0
for shape in self.group_spec.observation_shapes:
for shape in self.group_spec.observation_spec.shapes:
if len(shape) == 3:
result += 1
return result

for shape in self.group_spec.observation_shapes:
for shape in self.group_spec.observation_spec.shapes:
if len(shape) == 3:
result.append(shape)
return result

def _get_vec_obs_size(self) -> int:
result = 0
for shape in self.group_spec.observation_shapes:
for shape in self.group_spec.observation_spec.shapes:
if len(shape) == 1:
result += shape[0]
return result

4
gym-unity/gym_unity/tests/test_gym.py


from gym_unity.envs import UnityToGymWrapper
from mlagents_envs.base_env import (
BehaviorSpec,
ObservationSpec,
ActionSpec,
DecisionSteps,
TerminalSteps,

obs_shapes = [(vector_observation_space_size,)]
for _ in range(number_visual_observations):
obs_shapes += [(8, 8, 3)]
return BehaviorSpec(obs_shapes, action_spec)
obs_spec = ObservationSpec.create_simple(obs_shapes)
return BehaviorSpec(obs_spec, action_spec)
def create_mock_vector_steps(specs, num_agents=1, number_visual_observations=0):

57
ml-agents-envs/mlagents_envs/base_env.py


Any,
Mapping as MappingType,
)
from enum import IntFlag
import numpy as np
from mlagents_envs.exception import UnityActionException

:param spec: The BehaviorSpec for the DecisionSteps
"""
obs: List[np.ndarray] = []
for shape in spec.observation_shapes:
for shape in spec.observation_spec.shapes:
obs += [np.zeros((0,) + shape, dtype=np.float32)]
return DecisionSteps(
obs=obs,

:param spec: The BehaviorSpec for the TerminalSteps
"""
obs: List[np.ndarray] = []
for shape in spec.observation_shapes:
for shape in spec.observation_spec.shapes:
obs += [np.zeros((0,) + shape, dtype=np.float32)]
return TerminalSteps(
obs=obs,

return ActionSpec(0, discrete_branches)
class DimensionProperty(IntFlag):
"""
No properties specified.
"""
UNSPECIFIED = 0
"""
No Property of the observation in that dimension. Observation can be processed with
Fully connected networks.
"""
NONE = 1
"""
Means it is possible to do a convolution in this dimension.
"""
TRANSLATIONAL_EQUIVARIANCE = 2
"""
Means that there can be a variable number of observations in this dimension.
The observations are unordered.
"""
VARIABLE_SIZE = 3
class ObservationSpec(NamedTuple):
"""
A NamedTuple containing information about the observation of Agents under the
same behavior.
- observation_shapes is a List of Tuples of int : Each Tuple corresponds
to an observation's dimensions. The shape tuples have the same ordering as
the ordering of the DecisionSteps and TerminalSteps.
- dimension_properties is a List of Tuples of DimensionProperties flag. Each Tuple
corresponds to an observation's properties. The tuples have the same ordering as
the ordering of the DecisionSteps and TerminalSteps.
"""
shapes: List[Tuple[int, ...]]
dimension_properties: List[Tuple[DimensionProperty, ...]]
@staticmethod
def create_simple(shapes: List[Tuple[int, ...]]) -> "ObservationSpec":
dim_prop: List[Tuple[DimensionProperty, ...]] = []
for shape in shapes:
dim_prop += [(DimensionProperty.UNSPECIFIED,) * len(shape)]
return ObservationSpec(shapes, dim_prop)
- observation_shapes is a List of Tuples of int : Each Tuple corresponds
to an observation's dimensions. The shape tuples have the same ordering as
the ordering of the DecisionSteps and TerminalSteps.
- observation_spec is an ObservationSpec NamedTuple containing information about
the information of the Agent's observations such as their shapes.
observation_shapes: List[Tuple]
observation_spec: ObservationSpec
action_spec: ActionSpec

19
ml-agents-envs/mlagents_envs/communicator_objects/observation_pb2.py


name='mlagents_envs/communicator_objects/observation.proto',
package='communicator_objects',
syntax='proto3',
serialized_pb=_b('\n4mlagents_envs/communicator_objects/observation.proto\x12\x14\x63ommunicator_objects\"\x9d\x02\n\x10ObservationProto\x12\r\n\x05shape\x18\x01 \x03(\x05\x12\x44\n\x10\x63ompression_type\x18\x02 \x01(\x0e\x32*.communicator_objects.CompressionTypeProto\x12\x19\n\x0f\x63ompressed_data\x18\x03 \x01(\x0cH\x00\x12\x46\n\nfloat_data\x18\x04 \x01(\x0b\x32\x30.communicator_objects.ObservationProto.FloatDataH\x00\x12\"\n\x1a\x63ompressed_channel_mapping\x18\x05 \x03(\x05\x1a\x19\n\tFloatData\x12\x0c\n\x04\x64\x61ta\x18\x01 \x03(\x02\x42\x12\n\x10observation_data*)\n\x14\x43ompressionTypeProto\x12\x08\n\x04NONE\x10\x00\x12\x07\n\x03PNG\x10\x01\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
serialized_pb=_b('\n4mlagents_envs/communicator_objects/observation.proto\x12\x14\x63ommunicator_objects\"\xbb\x02\n\x10ObservationProto\x12\r\n\x05shape\x18\x01 \x03(\x05\x12\x44\n\x10\x63ompression_type\x18\x02 \x01(\x0e\x32*.communicator_objects.CompressionTypeProto\x12\x19\n\x0f\x63ompressed_data\x18\x03 \x01(\x0cH\x00\x12\x46\n\nfloat_data\x18\x04 \x01(\x0b\x32\x30.communicator_objects.ObservationProto.FloatDataH\x00\x12\"\n\x1a\x63ompressed_channel_mapping\x18\x05 \x03(\x05\x12\x1c\n\x14\x64imension_properties\x18\x06 \x03(\x05\x1a\x19\n\tFloatData\x12\x0c\n\x04\x64\x61ta\x18\x01 \x03(\x02\x42\x12\n\x10observation_data*)\n\x14\x43ompressionTypeProto\x12\x08\n\x04NONE\x10\x00\x12\x07\n\x03PNG\x10\x01\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
)
_COMPRESSIONTYPEPROTO = _descriptor.EnumDescriptor(

],
containing_type=None,
options=None,
serialized_start=366,
serialized_end=407,
serialized_start=396,
serialized_end=437,
)
_sym_db.RegisterEnumDescriptor(_COMPRESSIONTYPEPROTO)

extension_ranges=[],
oneofs=[
],
serialized_start=319,
serialized_end=344,
serialized_start=349,
serialized_end=374,
)
_OBSERVATIONPROTO = _descriptor.Descriptor(

message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='dimension_properties', full_name='communicator_objects.ObservationProto.dimension_properties', index=5,
number=6, type=5, cpp_type=1, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
],
extensions=[
],

index=0, containing_type=None, fields=[]),
],
serialized_start=79,
serialized_end=364,
serialized_end=394,
)
_OBSERVATIONPROTO_FLOATDATA.containing_type = _OBSERVATIONPROTO

6
ml-agents-envs/mlagents_envs/communicator_objects/observation_pb2.pyi


compression_type = ... # type: CompressionTypeProto
compressed_data = ... # type: builtin___bytes
compressed_channel_mapping = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[builtin___int]
dimension_properties = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[builtin___int]
@property
def float_data(self) -> ObservationProto.FloatData: ...

compressed_data : typing___Optional[builtin___bytes] = None,
float_data : typing___Optional[ObservationProto.FloatData] = None,
compressed_channel_mapping : typing___Optional[typing___Iterable[builtin___int]] = None,
dimension_properties : typing___Optional[typing___Iterable[builtin___int]] = None,
) -> None: ...
@classmethod
def FromString(cls, s: builtin___bytes) -> ObservationProto: ...

def HasField(self, field_name: typing_extensions___Literal[u"compressed_data",u"float_data",u"observation_data"]) -> builtin___bool: ...
def ClearField(self, field_name: typing_extensions___Literal[u"compressed_channel_mapping",u"compressed_data",u"compression_type",u"float_data",u"observation_data",u"shape"]) -> None: ...
def ClearField(self, field_name: typing_extensions___Literal[u"compressed_channel_mapping",u"compressed_data",u"compression_type",u"dimension_properties",u"float_data",u"observation_data",u"shape"]) -> None: ...
def ClearField(self, field_name: typing_extensions___Literal[u"compressed_channel_mapping",b"compressed_channel_mapping",u"compressed_data",b"compressed_data",u"compression_type",b"compression_type",u"float_data",b"float_data",u"observation_data",b"observation_data",u"shape",b"shape"]) -> None: ...
def ClearField(self, field_name: typing_extensions___Literal[u"compressed_channel_mapping",b"compressed_channel_mapping",u"compressed_data",b"compressed_data",u"compression_type",b"compression_type",u"dimension_properties",b"dimension_properties",u"float_data",b"float_data",u"observation_data",b"observation_data",u"shape",b"shape"]) -> None: ...
def WhichOneof(self, oneof_group: typing_extensions___Literal[u"observation_data",b"observation_data"]) -> typing_extensions___Literal["compressed_data","float_data"]: ...

11
ml-agents-envs/mlagents_envs/rpc_utils.py


from mlagents_envs.base_env import (
ActionSpec,
ObservationSpec,
DimensionProperty,
BehaviorSpec,
DecisionSteps,
TerminalSteps,

:return: BehaviorSpec object.
"""
observation_shape = [tuple(obs.shape) for obs in agent_info.observations]
dim_props = [
tuple(DimensionProperty(dim) for dim in obs.dimension_properties)
for obs in agent_info.observations
]
obs_spec = ObservationSpec(observation_shape, dim_props)
# proto from comminicator < v1.3 does not set action spec, use deprecated fields instead
if (
brain_param_proto.action_spec.num_continuous_actions == 0

action_spec_proto.num_continuous_actions,
tuple(branch for branch in action_spec_proto.discrete_branch_sizes),
)
return BehaviorSpec(observation_shape, action_spec)
return BehaviorSpec(obs_spec, action_spec)
class OffsetBytesIO:

]
decision_obs_list: List[np.ndarray] = []
terminal_obs_list: List[np.ndarray] = []
for obs_index, obs_shape in enumerate(behavior_spec.observation_shapes):
for obs_index, obs_shape in enumerate(behavior_spec.observation_spec.shapes):
is_visual = len(obs_shape) == 3
if is_visual:
obs_shape = cast(Tuple[int, int, int], obs_shape)

14
ml-agents-envs/mlagents_envs/tests/test_envs.py


env.close()
assert isinstance(decision_steps, DecisionSteps)
assert isinstance(terminal_steps, TerminalSteps)
assert len(spec.observation_shapes) == len(decision_steps.obs)
assert len(spec.observation_shapes) == len(terminal_steps.obs)
assert len(spec.observation_spec.shapes) == len(decision_steps.obs)
assert len(spec.observation_spec.shapes) == len(terminal_steps.obs)
for shape, obs in zip(spec.observation_shapes, decision_steps.obs):
for shape, obs in zip(spec.observation_spec.shapes, decision_steps.obs):
for shape, obs in zip(spec.observation_shapes, terminal_steps.obs):
for shape, obs in zip(spec.observation_spec.shapes, terminal_steps.obs):
assert (n_agents,) + shape == obs.shape

env.close()
assert isinstance(decision_steps, DecisionSteps)
assert isinstance(terminal_steps, TerminalSteps)
assert len(spec.observation_shapes) == len(decision_steps.obs)
assert len(spec.observation_shapes) == len(terminal_steps.obs)
for shape, obs in zip(spec.observation_shapes, decision_steps.obs):
assert len(spec.observation_spec.shapes) == len(decision_steps.obs)
assert len(spec.observation_spec.shapes) == len(terminal_steps.obs)
for shape, obs in zip(spec.observation_spec.shapes, decision_steps.obs):
assert (n_agents,) + shape == obs.shape
assert 0 in decision_steps
assert 2 in terminal_steps

2
ml-agents-envs/mlagents_envs/tests/test_registry.py


for worker_id in range(2):
assert BASIC_ID in registry
env = registry[BASIC_ID].make(
base_port=6005, worker_id=worker_id, no_graphics=True
base_port=6002, worker_id=worker_id, no_graphics=True
)
env.reset()
env.step()

31
ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py


from mlagents_envs.communicator_objects.agent_action_pb2 import AgentActionProto
from mlagents_envs.base_env import (
BehaviorSpec,
ObservationSpec,
ActionSpec,
DecisionSteps,
TerminalSteps,

def test_batched_step_result_from_proto():
n_agents = 10
shapes = [(3,), (4,)]
spec = BehaviorSpec(shapes, ActionSpec.create_continuous(3))
spec = BehaviorSpec(
ObservationSpec.create_simple(shapes), ActionSpec.create_continuous(3)
)
ap_list = generate_list_agent_proto(n_agents, shapes)
decision_steps, terminal_steps = steps_from_proto(ap_list, spec)
for agent_id in range(n_agents):

def test_action_masking_discrete():
n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(shapes, ActionSpec.create_discrete((7, 3)))
behavior_spec = BehaviorSpec(
ObservationSpec.create_simple(shapes), ActionSpec.create_discrete((7, 3))
)
ap_list = generate_list_agent_proto(n_agents, shapes)
decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
masks = decision_steps.action_mask

def test_action_masking_discrete_1():
n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(shapes, ActionSpec.create_discrete((10,)))
behavior_spec = BehaviorSpec(
ObservationSpec.create_simple(shapes), ActionSpec.create_discrete((10,))
)
ap_list = generate_list_agent_proto(n_agents, shapes)
decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
masks = decision_steps.action_mask

def test_action_masking_discrete_2():
n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(shapes, ActionSpec.create_discrete((2, 2, 6)))
behavior_spec = BehaviorSpec(
ObservationSpec.create_simple(shapes), ActionSpec.create_discrete((2, 2, 6))
)
ap_list = generate_list_agent_proto(n_agents, shapes)
decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
masks = decision_steps.action_mask

def test_action_masking_continuous():
n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(shapes, ActionSpec.create_continuous(10))
behavior_spec = BehaviorSpec(
ObservationSpec.create_simple(shapes), ActionSpec.create_continuous(10)
)
ap_list = generate_list_agent_proto(n_agents, shapes)
decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
masks = decision_steps.action_mask

behavior_spec = behavior_spec_from_proto(bp, agent_proto)
assert behavior_spec.action_spec.is_discrete()
assert not behavior_spec.action_spec.is_continuous()
assert behavior_spec.observation_shapes == [(3,), (4,)]
assert behavior_spec.observation_spec.shapes == [(3,), (4,)]
assert behavior_spec.action_spec.discrete_branches == (5, 4)
assert behavior_spec.action_spec.discrete_size == 2
bp = BrainParametersProto()

def test_batched_step_result_from_proto_raises_on_infinite():
n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(shapes, ActionSpec.create_continuous(3))
behavior_spec = BehaviorSpec(
ObservationSpec.create_simple(shapes), ActionSpec.create_continuous(3)
)
ap_list = generate_list_agent_proto(n_agents, shapes, infinite_rewards=True)
with pytest.raises(RuntimeError):
steps_from_proto(ap_list, behavior_spec)

n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(shapes, ActionSpec.create_continuous(3))
behavior_spec = BehaviorSpec(
ObservationSpec.create_simple(shapes), ActionSpec.create_continuous(3)
)
ap_list = generate_list_agent_proto(n_agents, shapes, nan_observations=True)
with pytest.raises(RuntimeError):
steps_from_proto(ap_list, behavior_spec)

7
ml-agents-envs/mlagents_envs/tests/test_steps.py


from mlagents_envs.base_env import (
DecisionSteps,
ObservationSpec,
TerminalSteps,
ActionSpec,
BehaviorSpec,

def test_empty_decision_steps():
specs = BehaviorSpec(
observation_shapes=[(3, 2), (5,)], action_spec=ActionSpec.create_continuous(3)
observation_spec=ObservationSpec.create_simple([(3, 2), (5,)]),
action_spec=ActionSpec.create_continuous(3),
)
ds = DecisionSteps.empty(specs)
assert len(ds.obs) == 2

def test_empty_terminal_steps():
specs = BehaviorSpec(
observation_shapes=[(3, 2), (5,)], action_spec=ActionSpec.create_continuous(3)
observation_spec=ObservationSpec.create_simple([(3, 2), (5,)]),
action_spec=ActionSpec.create_continuous(3),
)
ts = TerminalSteps.empty(specs)
assert len(ts.obs) == 2

8
ml-agents/mlagents/trainers/demo_loader.py


)
)
# check observations match
if len(behavior_spec.observation_shapes) != len(
expected_behavior_spec.observation_shapes
if len(behavior_spec.observation_spec.shapes) != len(
expected_behavior_spec.observation_spec.shapes
):
raise RuntimeError(
"The demonstrations do not have the same number of observations as the policy."

zip(
behavior_spec.observation_shapes,
expected_behavior_spec.observation_shapes,
behavior_spec.observation_spec.shapes,
expected_behavior_spec.observation_spec.shapes,
)
):
if demo_obs != policy_obs:

6
ml-agents/mlagents/trainers/policy/policy.py


else [self.behavior_spec.action_spec.continuous_size]
)
self.vec_obs_size = sum(
shape[0] for shape in behavior_spec.observation_shapes if len(shape) == 1
shape[0]
for shape in behavior_spec.observation_spec.shapes
if len(shape) == 1
1 for shape in behavior_spec.observation_shapes if len(shape) == 3
1 for shape in behavior_spec.observation_spec.shapes if len(shape) == 3
)
self.use_continuous_act = self.behavior_spec.action_spec.is_continuous()
self.previous_action_dict: Dict[str, np.ndarray] = {}

2
ml-agents/mlagents/trainers/policy/torch_policy.py


else:
ac_class = SharedActorCritic
self.actor_critic = ac_class(
observation_shapes=self.behavior_spec.observation_shapes,
observation_shapes=self.behavior_spec.observation_spec.shapes,
network_settings=trainer_settings.network_settings,
action_spec=behavior_spec.action_spec,
stream_names=reward_signal_names,

4
ml-agents/mlagents/trainers/sac/optimizer_torch.py


self.value_network = TorchSACOptimizer.PolicyValueNetwork(
self.stream_names,
self.policy.behavior_spec.observation_shapes,
self.policy.behavior_spec.observation_spec.shapes,
policy_network_settings,
self._action_spec,
)

self.policy.behavior_spec.observation_shapes,
self.policy.behavior_spec.observation_spec.shapes,
policy_network_settings,
)
ModelUtils.soft_update(

14
ml-agents/mlagents/trainers/tests/mock_brain.py


from mlagents_envs.base_env import (
DecisionSteps,
TerminalSteps,
ObservationSpec,
BehaviorSpec,
ActionSpec,
ActionTuple,

reward = np.array(num_agents * [1.0], dtype=np.float32)
interrupted = np.array(num_agents * [False], dtype=np.bool)
agent_id = np.arange(num_agents, dtype=np.int32)
behavior_spec = BehaviorSpec(observation_shapes, action_spec)
obs_spec = ObservationSpec.create_simple(observation_shapes)
behavior_spec = BehaviorSpec(obs_spec, action_spec)
if done:
return (
DecisionSteps.empty(behavior_spec),

) -> Tuple[DecisionSteps, TerminalSteps]:
return create_mock_steps(
num_agents=num_agents,
observation_shapes=behavior_spec.observation_shapes,
observation_shapes=behavior_spec.observation_spec.shapes,
action_spec=behavior_spec.action_spec,
)

) -> AgentBuffer:
trajectory = make_fake_trajectory(
length,
behavior_spec.observation_shapes,
behavior_spec.observation_spec.shapes,
action_spec=behavior_spec.action_spec,
memory_size=memory_size,
)

action_spec = ActionSpec.create_discrete(tuple(vector_action_space))
else:
action_spec = ActionSpec.create_continuous(vector_action_space)
behavior_spec = BehaviorSpec(
[(84, 84, 3)] * int(use_visual) + [(vector_obs_space,)], action_spec
)
observation_shapes = [(84, 84, 3)] * int(use_visual) + [(vector_obs_space,)]
obs_spec = ObservationSpec.create_simple(observation_shapes)
behavior_spec = BehaviorSpec(obs_spec, action_spec)
return behavior_spec

10
ml-agents/mlagents/trainers/tests/simple_test_envs.py


from mlagents_envs.base_env import (
ActionSpec,
ObservationSpec,
ActionTuple,
BaseEnv,
BehaviorSpec,

self.action[name] = None
self.step_result[name] = None
def _make_obs_spec(self) -> List[Any]:
obs_spec: List[Any] = []
def _make_obs_spec(self) -> ObservationSpec:
obs_shape: List[Any] = []
obs_spec.append((self.vec_obs_size,))
obs_shape.append((self.vec_obs_size,))
obs_spec.append(self.vis_obs_size)
obs_shape.append(self.vis_obs_size)
obs_spec = ObservationSpec.create_simple(obs_shape)
return obs_spec
def _make_obs(self, value: float) -> List[np.ndarray]:

4
ml-agents/mlagents/trainers/tests/test_demo_loader.py


behavior_spec, pair_infos, total_expected = load_demonstration(
path_prefix + "/test.demo"
)
assert np.sum(behavior_spec.observation_shapes[0]) == 8
assert np.sum(behavior_spec.observation_spec.shapes[0]) == 8
assert len(pair_infos) == total_expected
_, demo_buffer = demo_to_buffer(path_prefix + "/test.demo", 1, BEHAVIOR_SPEC)

behavior_spec, pair_infos, total_expected = load_demonstration(
path_prefix + "/test_demo_dir"
)
assert np.sum(behavior_spec.observation_shapes[0]) == 8
assert np.sum(behavior_spec.observation_spec.shapes[0]) == 8
assert len(pair_infos) == total_expected
_, demo_buffer = demo_to_buffer(path_prefix + "/test_demo_dir", 1, BEHAVIOR_SPEC)

6
ml-agents/mlagents/trainers/tests/torch/test_hybrid.py


PPO_TORCH_CONFIG,
hyperparameters=new_hyperparams,
network_settings=new_network_settings,
max_steps=3000,
max_steps=5000,
)
check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)

SAC_TORCH_CONFIG.hyperparameters,
buffer_size=50000,
batch_size=256,
buffer_init_steps=2000,
buffer_init_steps=0,
SAC_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=6000
SAC_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=2000
)
check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)

2
ml-agents/mlagents/trainers/tests/torch/test_ppo.py


time_horizon = 15
trajectory = make_fake_trajectory(
length=time_horizon,
observation_shapes=optimizer.policy.behavior_spec.observation_shapes,
observation_shapes=optimizer.policy.behavior_spec.observation_spec.shapes,
action_spec=DISCRETE_ACTION_SPEC if discrete else CONTINUOUS_ACTION_SPEC,
max_step_complete=True,
)

40
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py


CuriosityRewardProvider,
create_reward_provider,
)
from mlagents_envs.base_env import BehaviorSpec, ActionSpec
from mlagents_envs.base_env import BehaviorSpec, ActionSpec, ObservationSpec
from mlagents.trainers.settings import CuriositySettings, RewardSignalType
from mlagents.trainers.tests.torch.test_reward_providers.utils import (
create_agent_buffer,

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_TWODISCRETE),
],
)
def test_construction(behavior_spec: BehaviorSpec) -> None:

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,), (64, 66, 3), (84, 86, 1)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,), (64, 66, 1)], ACTIONSPEC_TWODISCRETE),
BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE),
BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(
ObservationSpec.create_simple([(10,), (64, 66, 3), (84, 86, 1)]),
ACTIONSPEC_CONTINUOUS,
),
BehaviorSpec(
ObservationSpec.create_simple([(10,), (64, 66, 1)]), ACTIONSPEC_TWODISCRETE
),
BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_DISCRETE),
],
)
def test_factory(behavior_spec: BehaviorSpec) -> None:

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,), (64, 66, 3), (24, 26, 1)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE),
BehaviorSpec(
ObservationSpec.create_simple([(10,), (64, 66, 3), (24, 26, 1)]),
ACTIONSPEC_CONTINUOUS,
),
BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_DISCRETE),
],
)
def test_reward_decreases(behavior_spec: BehaviorSpec, seed: int) -> None:

@pytest.mark.parametrize("seed", SEED)
@pytest.mark.parametrize(
"behavior_spec", [BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS)]
"behavior_spec",
[BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_CONTINUOUS)],
)
def test_continuous_action_prediction(behavior_spec: BehaviorSpec, seed: int) -> None:
np.random.seed(seed)

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,), (64, 66, 3), (24, 26, 1)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE),
BehaviorSpec(
ObservationSpec.create_simple([(10,), (64, 66, 3), (24, 26, 1)]),
ACTIONSPEC_CONTINUOUS,
),
BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_DISCRETE),
],
)
def test_next_state_prediction(behavior_spec: BehaviorSpec, seed: int) -> None:

14
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_extrinsic.py


ExtrinsicRewardProvider,
create_reward_provider,
)
from mlagents_envs.base_env import BehaviorSpec, ActionSpec
from mlagents_envs.base_env import BehaviorSpec, ActionSpec, ObservationSpec
from mlagents.trainers.settings import RewardSignalSettings, RewardSignalType
from mlagents.trainers.tests.torch.test_reward_providers.utils import (
create_agent_buffer,

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_TWODISCRETE),
],
)
def test_construction(behavior_spec: BehaviorSpec) -> None:

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_TWODISCRETE),
],
)
def test_factory(behavior_spec: BehaviorSpec) -> None:

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_TWODISCRETE),
],
)
def test_reward(behavior_spec: BehaviorSpec, reward: float) -> None:

28
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py


GAILRewardProvider,
create_reward_provider,
)
from mlagents_envs.base_env import BehaviorSpec, ActionSpec
from mlagents_envs.base_env import BehaviorSpec, ActionSpec, ObservationSpec
from mlagents.trainers.settings import GAILSettings, RewardSignalType
from mlagents.trainers.tests.torch.test_reward_providers.utils import (
create_agent_buffer,

ACTIONSPEC_DISCRETE = ActionSpec.create_discrete((20,))
@pytest.mark.parametrize("behavior_spec", [BehaviorSpec([(8,)], ACTIONSPEC_CONTINUOUS)])
@pytest.mark.parametrize(
"behavior_spec",
[BehaviorSpec(ObservationSpec.create_simple([(8,)]), ACTIONSPEC_CONTINUOUS)],
)
def test_construction(behavior_spec: BehaviorSpec) -> None:
gail_settings = GAILSettings(demo_path=CONTINUOUS_PATH)
gail_rp = GAILRewardProvider(behavior_spec, gail_settings)

@pytest.mark.parametrize("behavior_spec", [BehaviorSpec([(8,)], ACTIONSPEC_CONTINUOUS)])
@pytest.mark.parametrize(
"behavior_spec",
[BehaviorSpec(ObservationSpec.create_simple([(8,)]), ACTIONSPEC_CONTINUOUS)],
)
def test_factory(behavior_spec: BehaviorSpec) -> None:
gail_settings = GAILSettings(demo_path=CONTINUOUS_PATH)
gail_rp = create_reward_provider(

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(8,), (24, 26, 1)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(50,)], ACTIONSPEC_FOURDISCRETE),
BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE),
BehaviorSpec(
ObservationSpec.create_simple([(8,), (24, 26, 1)]), ACTIONSPEC_CONTINUOUS
),
BehaviorSpec(ObservationSpec.create_simple([(50,)]), ACTIONSPEC_FOURDISCRETE),
BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_DISCRETE),
],
)
@pytest.mark.parametrize("use_actions", [False, True])

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(8,), (24, 26, 1)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(50,)], ACTIONSPEC_FOURDISCRETE),
BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE),
BehaviorSpec(
ObservationSpec.create_simple([(8,), (24, 26, 1)]), ACTIONSPEC_CONTINUOUS
),
BehaviorSpec(ObservationSpec.create_simple([(50,)]), ACTIONSPEC_FOURDISCRETE),
BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_DISCRETE),
],
)
@pytest.mark.parametrize("use_actions", [False, True])

28
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_rnd.py


RNDRewardProvider,
create_reward_provider,
)
from mlagents_envs.base_env import BehaviorSpec, ActionSpec
from mlagents_envs.base_env import BehaviorSpec, ActionSpec, ObservationSpec
from mlagents.trainers.settings import RNDSettings, RewardSignalType
from mlagents.trainers.tests.torch.test_reward_providers.utils import (
create_agent_buffer,

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_TWODISCRETE),
],
)
def test_construction(behavior_spec: BehaviorSpec) -> None:

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,), (64, 66, 3), (84, 86, 1)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,), (64, 66, 1)], ACTIONSPEC_TWODISCRETE),
BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE),
BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(
ObservationSpec.create_simple([(10,), (64, 66, 3), (84, 86, 1)]),
ACTIONSPEC_CONTINUOUS,
),
BehaviorSpec(
ObservationSpec.create_simple([(10,), (64, 66, 1)]), ACTIONSPEC_TWODISCRETE
),
BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_DISCRETE),
],
)
def test_factory(behavior_spec: BehaviorSpec) -> None:

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,), (64, 66, 3), (24, 26, 1)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE),
BehaviorSpec(
ObservationSpec.create_simple([(10,), (64, 66, 3), (24, 26, 1)]),
ACTIONSPEC_CONTINUOUS,
),
BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_DISCRETE),
],
)
def test_reward_decreases(behavior_spec: BehaviorSpec, seed: int) -> None:

4
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/utils.py


buffer = AgentBuffer()
curr_observations = [
np.random.normal(size=shape).astype(np.float32)
for shape in behavior_spec.observation_shapes
for shape in behavior_spec.observation_spec.shapes
for shape in behavior_spec.observation_shapes
for shape in behavior_spec.observation_spec.shapes
]
action_buffer = behavior_spec.action_spec.random_action(1)
action = {}

2
ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py


reward_signals = {
RewardSignalType.GAIL: GAILSettings(encoding_size=32, demo_path=demo_path)
}
hyperparams = attr.evolve(PPO_TORCH_CONFIG.hyperparameters, learning_rate=3e-4)
hyperparams = attr.evolve(PPO_TORCH_CONFIG.hyperparameters, learning_rate=5e-3)
config = attr.evolve(
PPO_TORCH_CONFIG,
reward_signals=reward_signals,

2
ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py


memory=None,
)
self._state_encoder = NetworkBody(
specs.observation_shapes, state_encoder_settings
specs.observation_spec.shapes, state_encoder_settings
)
self._action_flattener = ActionFlattener(self._action_spec)

2
ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py


self._action_flattener.flattened_size + 1 if settings.use_actions else 0
) # +1 is for dones
self.encoder = NetworkBody(
specs.observation_shapes, encoder_settings, unencoded_size
specs.observation_spec.shapes, encoder_settings, unencoded_size
)
estimator_input_size = settings.encoding_size

4
ml-agents/mlagents/trainers/torch/components/reward_providers/rnd_reward_provider.py


vis_encode_type=EncoderType.SIMPLE,
memory=None,
)
self._encoder = NetworkBody(specs.observation_shapes, state_encoder_settings)
self._encoder = NetworkBody(
specs.observation_spec.shapes, state_encoder_settings
)
def forward(self, mini_batch: AgentBuffer) -> torch.Tensor:
n_vis = len(self._encoder.visual_processors)

4
ml-agents/mlagents/trainers/torch/model_serialization.py


seq_len_dim = [1]
dummy_vec_obs = [torch.zeros(batch_dim + [self.policy.vec_obs_size])]
# create input shape of NCHW
# (It's NHWC in self.policy.behavior_spec.observation_shapes)
# (It's NHWC in self.policy.behavior_spec.observation_spec.shapes)
for shape in self.policy.behavior_spec.observation_shapes
for shape in self.policy.behavior_spec.observation_spec.shapes
if len(shape) == 3
]
dummy_masks = torch.ones(

4
ml-agents/tests/yamato/scripts/run_llapi.py


decision_steps, terminal_steps = env.get_steps(group_name)
# Examine the number of observations per Agent
print("Number of observations : ", len(group_spec.observation_shapes))
print("Number of observations : ", len(group_spec.observation_spec.shapes))
vis_obs = any(len(shape) == 3 for shape in group_spec.observation_shapes)
vis_obs = any(len(shape) == 3 for shape in group_spec.observation_spec.shapes)
print("Is there a visual observation ?", vis_obs)
# Examine the state space for the first observation for the first agent

1
protobuf-definitions/proto/mlagents_envs/communicator_objects/observation.proto


FloatData float_data = 4;
}
repeated int32 compressed_channel_mapping = 5;
repeated int32 dimension_properties = 6;
}

84
com.unity.ml-agents/Runtime/Sensors/BufferSensor.cs


namespace Unity.MLAgents.Sensors
{
public class BufferSensor : ISensor
{
private int m_MaxNumObs;
private int m_ObsSize;
float[] m_ObservationBuffer;
int m_CurrentNumObservables;
public BufferSensor(int maxNumberObs, int obsSize)
{
m_MaxNumObs = maxNumberObs;
m_ObsSize = obsSize;
m_ObservationBuffer = new float[m_ObservableSize * m_MaxNumObservables];
m_CurrentNumObservables = 0;
}
/// <inheritdoc/>
public int[] GetObservationShape()
{
return new int[] { m_MaxNumObs, m_ObsSize };
}
/// <summary>
/// Appends an observation to the buffer. If the buffer is full (maximum number
/// of observation is reached) the observation will be ignored. the length of
/// the provided observation array must be equal to the observation size of
/// the buffer sensor.
/// </summary>
/// <param name="obs"> The float array observation</param>
public void AppendObservation(float[] obs)
{
if (m_CurrentNumObservables >= m_MaxNumObs)
{
return;
}
for (int i = 0; i < obs.Length; i++)
{
m_ObservationBuffer[m_CurrentNumObservables * m_MaxNumObs + i] = obs[i];
}
m_CurrentNumObservables++;
}
/// <inheritdoc/>
public int Write(ObservationWriter writer)
{
for (int i = 0; i < m_ObservableSize * m_MaxNumObservables; i++)
{
writer[i] = m_ObservationBuffer[i];
}
return m_ObservableSize * m_MaxNumObservables;
}
/// <inheritdoc/>
public virtual byte[] GetCompressedObservation()
{
return null;
}
/// <inheritdoc/>
public void Update()
{
Reset();
}
/// <inheritdoc/>
public void Reset()
{
m_CurrentNumObservables = 0;
Array.Clear(m_ObservationBuffer, 0, m_ObservationBuffer.Length);
}
public SensorCompressionType GetCompressionType()
{
return SensorCompressionType.None;
}
public string GetName()
{
return "BufferSensor";
}
}
}

27
com.unity.ml-agents/Runtime/Sensors/BufferSensorComponent.cs


using UnityEngine;
namespace Unity.MLAgents.Sensors
{
/// <summary>
/// A component for BufferSensor.
/// </summary>
[AddComponentMenu("ML Agents/Buffer Sensor", (int)MenuGroup.Sensors)]
public class BufferSensorComponent : SensorComponent
{
public int ObservableSize;
public int MaxNumObservables;
/// <inheritdoc/>
public override ISensor CreateSensor()
{
return new BufferSensor(ObservableSize, MaxNumObservables);
}
/// <inheritdoc/>
public override int[] GetObservationShape()
{
return new[] { MaxNumObservables, ObservableSize };
}
}
}

47
com.unity.ml-agents/Runtime/Sensors/IDimensionPropertiesSensor.cs


namespace Unity.MLAgents.Sensors
{
/// <summary>
/// The Dimension property flags of the observations
/// </summary>
[System.Flags]
internal enum DimensionProperty
{
/// <summary>
/// No properties specified.
/// </summary>
Unspecified = 0,
/// <summary>
/// No Property of the observation in that dimension. Observation can be processed with
/// Fully connected networks.
/// </summary>
None = 1,
/// <summary>
/// Means it is possible to do a convolution in this dimension.
/// </summary>
TranslationalEquivariance = 2,
/// <summary>
/// Means that there can be a variable number of observations in this dimension.
/// The observations are unordered.
/// </summary>
VariableSize = 3,
}
/// <summary>
/// Sensor interface for sensors with special dimension properties.
/// </summary>
internal interface IDimensionPropertiesSensor : ISensor
{
/// <summary>
/// Returns the array containing the properties of each dimensions of the
/// observation. The length of the array must be equal to the rank of the
/// observation tensor.
/// </summary>
/// <returns>The array of DimensionProperty</returns>
DimensionProperty[] GetDimensionProperties();
}
}

162
ml-agents/mlagents/trainers/tests/torch/test_attention.py


from mlagents.torch_utils import torch
import numpy as np
from mlagents.trainers.torch.layers import linear_layer
from mlagents.trainers.torch.attention import MultiHeadAttention, SimpleTransformer
def test_multi_head_attention_initialization():
q_size, k_size, v_size, o_size, n_h, emb_size = 7, 8, 9, 10, 11, 12
n_k, n_q, b = 13, 14, 15
mha = MultiHeadAttention(q_size, k_size, v_size, o_size, n_h, emb_size)
query = torch.ones((b, n_q, q_size))
key = torch.ones((b, n_k, k_size))
value = torch.ones((b, n_k, v_size))
output, attention = mha.forward(query, key, value)
assert output.shape == (b, n_q, o_size)
assert attention.shape == (b, n_h, n_q, n_k)
def test_multi_head_attention_masking():
epsilon = 0.0001
q_size, k_size, v_size, o_size, n_h, emb_size = 7, 8, 9, 10, 11, 12
n_k, n_q, b = 13, 14, 15
mha = MultiHeadAttention(q_size, k_size, v_size, o_size, n_h, emb_size)
# create a key input with some keys all 0
key = torch.ones((b, n_k, k_size))
mask = torch.zeros((b, n_k))
for i in range(n_k):
if i % 3 == 0:
key[:, i, :] = 0
mask[:, i] = 1
query = torch.ones((b, n_q, q_size))
value = torch.ones((b, n_k, v_size))
_, attention = mha.forward(query, key, value, mask)
for i in range(n_k):
if i % 3 == 0:
assert torch.sum(attention[:, :, :, i] ** 2) < epsilon
else:
assert torch.sum(attention[:, :, :, i] ** 2) > epsilon
def test_multi_head_attention_training():
np.random.seed(1336)
torch.manual_seed(1336)
size, n_h, n_k, n_q = 3, 10, 5, 1
embedding_size = 64
mha = MultiHeadAttention(size, size, size, size, n_h, embedding_size)
optimizer = torch.optim.Adam(mha.parameters(), lr=0.001)
batch_size = 200
point_range = 3
init_error = -1.0
for _ in range(50):
query = torch.rand((batch_size, n_q, size)) * point_range * 2 - point_range
key = torch.rand((batch_size, n_k, size)) * point_range * 2 - point_range
value = key
with torch.no_grad():
# create the target : The key closest to the query in euclidean distance
distance = torch.sum((query - key) ** 2, dim=2)
argmin = torch.argmin(distance, dim=1)
target = []
for i in range(batch_size):
target += [key[i, argmin[i], :]]
target = torch.stack(target, dim=0)
target = target.detach()
prediction, _ = mha.forward(query, key, value)
prediction = prediction.reshape((batch_size, size))
error = torch.mean((prediction - target) ** 2, dim=1)
error = torch.mean(error) / 2
if init_error == -1.0:
init_error = error.item()
else:
assert error.item() < init_error
print(error.item())
optimizer.zero_grad()
error.backward()
optimizer.step()
assert error.item() < 0.5
def test_zero_mask_layer():
batch_size, size = 10, 30
def generate_input_helper(pattern):
_input = torch.zeros((batch_size, 0, size))
for i in range(len(pattern)):
if i % 2 == 0:
_input = torch.cat(
[_input, torch.rand((batch_size, pattern[i], size))], dim=1
)
else:
_input = torch.cat(
[_input, torch.zeros((batch_size, pattern[i], size))], dim=1
)
return _input
masking_pattern_1 = [3, 2, 3, 4]
masking_pattern_2 = [5, 7, 8, 2]
input_1 = generate_input_helper(masking_pattern_1)
input_2 = generate_input_helper(masking_pattern_2)
masks = SimpleTransformer.get_masks([input_1, input_2])
assert len(masks) == 2
masks_1 = masks[0]
masks_2 = masks[1]
assert masks_1.shape == (batch_size, sum(masking_pattern_1))
assert masks_2.shape == (batch_size, sum(masking_pattern_2))
for i in masking_pattern_1:
assert masks_1[0, 1] == 0 if i % 2 == 0 else 1
for i in masking_pattern_2:
assert masks_2[0, 1] == 0 if i % 2 == 0 else 1
def test_simple_transformer_training():
np.random.seed(1336)
torch.manual_seed(1336)
size, n_k, = 3, 5
embedding_size = 64
transformer = SimpleTransformer(size, [size], embedding_size)
l_layer = linear_layer(embedding_size, size)
optimizer = torch.optim.Adam(
list(transformer.parameters()) + list(l_layer.parameters()), lr=0.001
)
batch_size = 200
point_range = 3
init_error = -1.0
for _ in range(100):
center = torch.rand((batch_size, size)) * point_range * 2 - point_range
key = torch.rand((batch_size, n_k, size)) * point_range * 2 - point_range
with torch.no_grad():
# create the target : The key closest to the query in euclidean distance
distance = torch.sum(
(center.reshape((batch_size, 1, size)) - key) ** 2, dim=2
)
argmin = torch.argmin(distance, dim=1)
target = []
for i in range(batch_size):
target += [key[i, argmin[i], :]]
target = torch.stack(target, dim=0)
target = target.detach()
masks = SimpleTransformer.get_masks([key])
prediction = transformer.forward(center, [key], masks)
prediction = l_layer(prediction)
prediction = prediction.reshape((batch_size, size))
error = torch.mean((prediction - target) ** 2, dim=1)
error = torch.mean(error) / 2
if init_error == -1.0:
init_error = error.item()
else:
assert error.item() < init_error
print(error.item())
optimizer.zero_grad()
error.backward()
optimizer.step()
assert error.item() < 0.3

191
ml-agents/mlagents/trainers/torch/attention.py


from mlagents.torch_utils import torch
from typing import Tuple, Optional, List
from mlagents.trainers.torch.layers import LinearEncoder
class MultiHeadAttention(torch.nn.Module):
"""
Multi Head Attention module. We do not use the regular Torch implementation since
Barracuda does not support some operators it uses.
Takes as input to the forward method 3 tensors:
- query: of dimensions (batch_size, number_of_queries, key_size)
- key: of dimensions (batch_size, number_of_keys, key_size)
- value: of dimensions (batch_size, number_of_keys, value_size)
The forward method will return 2 tensors:
- The output: (batch_size, number_of_queries, output_size)
- The attention matrix: (batch_size, num_heads, number_of_queries, number_of_keys)
"""
NEG_INF = -1e6
def __init__(
self,
query_size: int,
key_size: int,
value_size: int,
output_size: int,
num_heads: int,
embedding_size: int,
):
super().__init__()
self.n_heads, self.embedding_size = num_heads, embedding_size
self.output_size = output_size
self.fc_q = torch.nn.Linear(query_size, self.n_heads * self.embedding_size)
self.fc_k = torch.nn.Linear(key_size, self.n_heads * self.embedding_size)
self.fc_v = torch.nn.Linear(value_size, self.n_heads * self.embedding_size)
# self.fc_q = LinearEncoder(query_size, 2, self.n_heads * self.embedding_size)
# self.fc_k = LinearEncoder(key_size,2, self.n_heads * self.embedding_size)
# self.fc_v = LinearEncoder(value_size,2, self.n_heads * self.embedding_size)
self.fc_out = torch.nn.Linear(
self.n_heads * self.embedding_size, self.output_size
)
def forward(
self,
query: torch.Tensor,
key: torch.Tensor,
value: torch.Tensor,
key_mask: Optional[torch.Tensor] = None,
number_of_keys: int = -1,
number_of_queries: int = -1,
) -> Tuple[torch.Tensor, torch.Tensor]:
b = -1 # the batch size
# This is to avoid using .size() when possible as Barracuda does not support
n_q = number_of_queries if number_of_queries != -1 else query.size(1)
n_k = number_of_keys if number_of_keys != -1 else key.size(1)
query = self.fc_q(query) # (b, n_q, h*d)
key = self.fc_k(key) # (b, n_k, h*d)
value = self.fc_v(value) # (b, n_k, h*d)
query = query.reshape(b, n_q, self.n_heads, self.embedding_size)
key = key.reshape(b, n_k, self.n_heads, self.embedding_size)
value = value.reshape(b, n_k, self.n_heads, self.embedding_size)
query = query.permute([0, 2, 1, 3]) # (b, h, n_q, emb)
# The next few lines are equivalent to : key.permute([0, 2, 3, 1])
# This is a hack, ONNX will compress two permute operations and
# Barracuda will not like seeing `permute([0,2,3,1])`
key = key.permute([0, 2, 1, 3]) # (b, h, emb, n_k)
key -= 1
key += 1
key = key.permute([0, 1, 3, 2]) # (b, h, emb, n_k)
qk = torch.matmul(query, key) # (b, h, n_q, n_k)
if key_mask is None:
qk = qk / (self.embedding_size ** 0.5)
else:
key_mask = key_mask.reshape(b, 1, 1, n_k)
qk = (1 - key_mask) * qk / (
self.embedding_size ** 0.5
) + key_mask * self.NEG_INF
att = torch.softmax(qk, dim=3) # (b, h, n_q, n_k)
value = value.permute([0, 2, 1, 3]) # (b, h, n_k, emb)
value_attention = torch.matmul(att, value) # (b, h, n_q, emb)
value_attention = value_attention.permute([0, 2, 1, 3]) # (b, n_q, h, emb)
value_attention = value_attention.reshape(
b, n_q, self.n_heads * self.embedding_size
) # (b, n_q, h*emb)
out = self.fc_out(value_attention) # (b, n_q, emb)
return out, att
class SimpleTransformer(torch.nn.Module):
"""
A simple architecture inspired from https://arxiv.org/pdf/1909.07528.pdf that uses
multi head self attention to encode information about a "Self" and a list of
relevant "Entities".
"""
EPISLON = 1e-7
def __init__(
self,
x_self_size: int,
entities_sizes: List[int],
embedding_size: int,
output_size: Optional[int] = None,
):
super().__init__()
self.self_size = x_self_size
self.entities_sizes = entities_sizes
self.entities_num_max_elements: Optional[List[int]] = None
self.ent_encoders = torch.nn.ModuleList(
[
LinearEncoder(self.self_size + ent_size, 2, embedding_size)
for ent_size in self.entities_sizes
]
)
self.attention = MultiHeadAttention(
query_size=embedding_size,
key_size=embedding_size,
value_size=embedding_size,
output_size=embedding_size,
num_heads=4,
embedding_size=embedding_size,
)
self.residual_layer = LinearEncoder(embedding_size, 1, embedding_size)
if output_size is None:
output_size = embedding_size
self.x_self_residual_layer = LinearEncoder(
embedding_size + x_self_size, 1, output_size
)
def forward(
self,
x_self: torch.Tensor,
entities: List[torch.Tensor],
key_masks: List[torch.Tensor],
) -> torch.Tensor:
# Gather the maximum number of entities information
if self.entities_num_max_elements is None:
self.entities_num_max_elements = []
for ent in entities:
self.entities_num_max_elements.append(ent.shape[1])
# Concatenate all observations with self
self_and_ent: List[torch.Tensor] = []
for num_entities, ent in zip(self.entities_num_max_elements, entities):
expanded_self = x_self.reshape(-1, 1, self.self_size)
# .repeat(
# 1, num_entities, 1
# )
expanded_self = torch.cat([expanded_self] * num_entities, dim=1)
self_and_ent.append(torch.cat([expanded_self, ent], dim=2))
# Generate the tensor that will serve as query, key and value to self attention
qkv = torch.cat(
[ent_encoder(x) for ent_encoder, x in zip(self.ent_encoders, self_and_ent)],
dim=1,
)
mask = torch.cat(key_masks, dim=1)
# Feed to self attention
max_num_ent = sum(self.entities_num_max_elements)
output, _ = self.attention(qkv, qkv, qkv, mask, max_num_ent, max_num_ent)
# Residual
output = self.residual_layer(output) + qkv
# Average Pooling
numerator = torch.sum(output * (1 - mask).reshape(-1, max_num_ent, 1), dim=1)
denominator = torch.sum(1 - mask, dim=1, keepdim=True) + self.EPISLON
output = numerator / denominator
# Residual between x_self and the output of the module
output = self.x_self_residual_layer(torch.cat([output, x_self], dim=1))
return output
@staticmethod
def get_masks(observations: List[torch.Tensor]) -> List[torch.Tensor]:
"""
Takes a List of Tensors and returns a List of mask Tensor with 1 if the input was
all zeros (on dimension 2) and 0 otherwise. This is used in the Attention
layer to mask the padding observations.
"""
with torch.no_grad():
# Generate the masking tensors for each entities tensor (mask only if all zeros)
key_masks: List[torch.Tensor] = [
(torch.sum(ent ** 2, axis=2) < 0.01).type(torch.FloatTensor)
for ent in observations
]
return key_masks
正在加载...
取消
保存