浏览代码

Merge pull request #4825 from Unity-Technologies/sensor-types

[WIP] Observation Types
/MLA-1734-demo-provider
GitHub 3 年前
当前提交
67ad9651
共有 47 个文件被更改,包括 484 次插入218 次删除
  1. 11
      com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
  2. 52
      com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/Observation.cs
  3. 9
      docs/Python-API.md
  4. 16
      gym-unity/gym_unity/envs/__init__.py
  5. 6
      gym-unity/gym_unity/tests/test_gym.py
  6. 30
      ml-agents-envs/mlagents_envs/base_env.py
  7. 56
      ml-agents-envs/mlagents_envs/communicator_objects/observation_pb2.py
  8. 27
      ml-agents-envs/mlagents_envs/communicator_objects/observation_pb2.pyi
  9. 33
      ml-agents-envs/mlagents_envs/rpc_utils.py
  10. 14
      ml-agents-envs/mlagents_envs/tests/test_envs.py
  11. 19
      ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py
  12. 6
      ml-agents-envs/mlagents_envs/tests/test_steps.py
  13. 9
      ml-agents/mlagents/trainers/demo_loader.py
  14. 2
      ml-agents/mlagents/trainers/optimizer/torch_optimizer.py
  15. 2
      ml-agents/mlagents/trainers/policy/torch_policy.py
  16. 2
      ml-agents/mlagents/trainers/ppo/optimizer_torch.py
  17. 14
      ml-agents/mlagents/trainers/sac/optimizer_torch.py
  18. 14
      ml-agents/mlagents/trainers/tests/dummy_config.py
  19. 32
      ml-agents/mlagents/trainers/tests/mock_brain.py
  20. 12
      ml-agents/mlagents/trainers/tests/simple_test_envs.py
  21. 12
      ml-agents/mlagents/trainers/tests/test_agent_processor.py
  22. 4
      ml-agents/mlagents/trainers/tests/test_demo_loader.py
  23. 6
      ml-agents/mlagents/trainers/tests/test_rl_trainer.py
  24. 6
      ml-agents/mlagents/trainers/tests/test_trajectory.py
  25. 4
      ml-agents/mlagents/trainers/tests/torch/test_ghost.py
  26. 6
      ml-agents/mlagents/trainers/tests/torch/test_hybrid.py
  27. 16
      ml-agents/mlagents/trainers/tests/torch/test_networks.py
  28. 4
      ml-agents/mlagents/trainers/tests/torch/test_policy.py
  29. 2
      ml-agents/mlagents/trainers/tests/torch/test_ppo.py
  30. 48
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py
  31. 26
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_extrinsic.py
  32. 28
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py
  33. 32
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_rnd.py
  34. 8
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/utils.py
  35. 6
      ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py
  36. 6
      ml-agents/mlagents/trainers/tests/torch/test_utils.py
  37. 2
      ml-agents/mlagents/trainers/torch/components/bc/module.py
  38. 4
      ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py
  39. 4
      ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py
  40. 2
      ml-agents/mlagents/trainers/torch/components/reward_providers/rnd_reward_provider.py
  41. 12
      ml-agents/mlagents/trainers/torch/model_serialization.py
  42. 32
      ml-agents/mlagents/trainers/torch/networks.py
  43. 10
      ml-agents/mlagents/trainers/torch/utils.py
  44. 6
      ml-agents/tests/yamato/scripts/run_llapi.py
  45. 8
      protobuf-definitions/proto/mlagents_envs/communicator_objects/observation.proto
  46. 31
      com.unity.ml-agents/Runtime/Sensors/ITypedSensor.cs
  47. 11
      com.unity.ml-agents/Runtime/Sensors/ITypedSensor.cs.meta

11
com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs


}
}
observationProto.Shape.AddRange(shape);
// Add the observation type, if any, to the observationProto
var typeSensor = sensor as ITypedSensor;
if (typeSensor != null)
{
observationProto.ObservationType = (ObservationTypeProto)typeSensor.GetObservationType();
}
else
{
observationProto.ObservationType = ObservationTypeProto.Default;
}
return observationProto;
}

52
com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/Observation.cs


byte[] descriptorData = global::System.Convert.FromBase64String(
string.Concat(
"CjRtbGFnZW50c19lbnZzL2NvbW11bmljYXRvcl9vYmplY3RzL29ic2VydmF0",
"aW9uLnByb3RvEhRjb21tdW5pY2F0b3Jfb2JqZWN0cyK7AgoQT2JzZXJ2YXRp",
"aW9uLnByb3RvEhRjb21tdW5pY2F0b3Jfb2JqZWN0cyKBAwoQT2JzZXJ2YXRp",
"KAUSHAoUZGltZW5zaW9uX3Byb3BlcnRpZXMYBiADKAUaGQoJRmxvYXREYXRh",
"EgwKBGRhdGEYASADKAJCEgoQb2JzZXJ2YXRpb25fZGF0YSopChRDb21wcmVz",
"c2lvblR5cGVQcm90bxIICgROT05FEAASBwoDUE5HEAFCJaoCIlVuaXR5Lk1M",
"QWdlbnRzLkNvbW11bmljYXRvck9iamVjdHNiBnByb3RvMw=="));
"KAUSHAoUZGltZW5zaW9uX3Byb3BlcnRpZXMYBiADKAUSRAoQb2JzZXJ2YXRp",
"b25fdHlwZRgHIAEoDjIqLmNvbW11bmljYXRvcl9vYmplY3RzLk9ic2VydmF0",
"aW9uVHlwZVByb3RvGhkKCUZsb2F0RGF0YRIMCgRkYXRhGAEgAygCQhIKEG9i",
"c2VydmF0aW9uX2RhdGEqKQoUQ29tcHJlc3Npb25UeXBlUHJvdG8SCAoETk9O",
"RRAAEgcKA1BORxABKkYKFE9ic2VydmF0aW9uVHlwZVByb3RvEgsKB0RFRkFV",
"TFQQABIICgRHT0FMEAESCgoGUkVXQVJEEAISCwoHTUVTU0FHRRADQiWqAiJV",
"bml0eS5NTEFnZW50cy5Db21tdW5pY2F0b3JPYmplY3RzYgZwcm90bzM="));
new pbr::GeneratedClrTypeInfo(new[] {typeof(global::Unity.MLAgents.CommunicatorObjects.CompressionTypeProto), }, new pbr::GeneratedClrTypeInfo[] {
new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.ObservationProto), global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Parser, new[]{ "Shape", "CompressionType", "CompressedData", "FloatData", "CompressedChannelMapping", "DimensionProperties" }, new[]{ "ObservationData" }, null, new pbr::GeneratedClrTypeInfo[] { new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Types.FloatData), global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Types.FloatData.Parser, new[]{ "Data" }, null, null, null)})
new pbr::GeneratedClrTypeInfo(new[] {typeof(global::Unity.MLAgents.CommunicatorObjects.CompressionTypeProto), typeof(global::Unity.MLAgents.CommunicatorObjects.ObservationTypeProto), }, new pbr::GeneratedClrTypeInfo[] {
new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.ObservationProto), global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Parser, new[]{ "Shape", "CompressionType", "CompressedData", "FloatData", "CompressedChannelMapping", "DimensionProperties", "ObservationType" }, new[]{ "ObservationData" }, null, new pbr::GeneratedClrTypeInfo[] { new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Types.FloatData), global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Types.FloatData.Parser, new[]{ "Data" }, null, null, null)})
}));
}
#endregion

internal enum CompressionTypeProto {
[pbr::OriginalName("NONE")] None = 0,
[pbr::OriginalName("PNG")] Png = 1,
}
internal enum ObservationTypeProto {
[pbr::OriginalName("DEFAULT")] Default = 0,
[pbr::OriginalName("GOAL")] Goal = 1,
[pbr::OriginalName("REWARD")] Reward = 2,
[pbr::OriginalName("MESSAGE")] Message = 3,
}
#endregion

compressionType_ = other.compressionType_;
compressedChannelMapping_ = other.compressedChannelMapping_.Clone();
dimensionProperties_ = other.dimensionProperties_.Clone();
observationType_ = other.observationType_;
switch (other.ObservationDataCase) {
case ObservationDataOneofCase.CompressedData:
CompressedData = other.CompressedData;

get { return dimensionProperties_; }
}
/// <summary>Field number for the "observation_type" field.</summary>
public const int ObservationTypeFieldNumber = 7;
private global::Unity.MLAgents.CommunicatorObjects.ObservationTypeProto observationType_ = 0;
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public global::Unity.MLAgents.CommunicatorObjects.ObservationTypeProto ObservationType {
get { return observationType_; }
set {
observationType_ = value;
}
}
private object observationData_;
/// <summary>Enum of possible cases for the "observation_data" oneof.</summary>
public enum ObservationDataOneofCase {

if (!object.Equals(FloatData, other.FloatData)) return false;
if(!compressedChannelMapping_.Equals(other.compressedChannelMapping_)) return false;
if(!dimensionProperties_.Equals(other.dimensionProperties_)) return false;
if (ObservationType != other.ObservationType) return false;
if (ObservationDataCase != other.ObservationDataCase) return false;
return Equals(_unknownFields, other._unknownFields);
}

if (observationDataCase_ == ObservationDataOneofCase.FloatData) hash ^= FloatData.GetHashCode();
hash ^= compressedChannelMapping_.GetHashCode();
hash ^= dimensionProperties_.GetHashCode();
if (ObservationType != 0) hash ^= ObservationType.GetHashCode();
hash ^= (int) observationDataCase_;
if (_unknownFields != null) {
hash ^= _unknownFields.GetHashCode();

}
compressedChannelMapping_.WriteTo(output, _repeated_compressedChannelMapping_codec);
dimensionProperties_.WriteTo(output, _repeated_dimensionProperties_codec);
if (ObservationType != 0) {
output.WriteRawTag(56);
output.WriteEnum((int) ObservationType);
}
if (_unknownFields != null) {
_unknownFields.WriteTo(output);
}

}
size += compressedChannelMapping_.CalculateSize(_repeated_compressedChannelMapping_codec);
size += dimensionProperties_.CalculateSize(_repeated_dimensionProperties_codec);
if (ObservationType != 0) {
size += 1 + pb::CodedOutputStream.ComputeEnumSize((int) ObservationType);
}
if (_unknownFields != null) {
size += _unknownFields.CalculateSize();
}

}
compressedChannelMapping_.Add(other.compressedChannelMapping_);
dimensionProperties_.Add(other.dimensionProperties_);
if (other.ObservationType != 0) {
ObservationType = other.ObservationType;
}
switch (other.ObservationDataCase) {
case ObservationDataOneofCase.CompressedData:
CompressedData = other.CompressedData;

case 50:
case 48: {
dimensionProperties_.AddEntriesFrom(input, _repeated_dimensionProperties_codec);
break;
}
case 56: {
observationType_ = (global::Unity.MLAgents.CommunicatorObjects.ObservationTypeProto) input.ReadEnum();
break;
}
}

9
docs/Python-API.md


A `BehaviorSpec` has the following fields :
- `sensor_specs` is a List of `SensorSpec` objects : Each `SensorSpec`
- `observation_specs` is a List of `ObservationSpec` objects : Each `ObservationSpec`
data should be processed in the corresponding dimension. Note that the `SensorSpec`
have the same ordering as the ordering of observations in the DecisionSteps,
DecisionStep, TerminalSteps and TerminalStep.
data should be processed in the corresponding dimension. `observation_type` is an enum
corresponding to what type of observation is generating the data (i.e., default, goal,
etc). Note that the `ObservationSpec` have the same ordering as the ordering of observations
in the DecisionSteps, DecisionStep, TerminalSteps and TerminalStep.
- `action_spec` is an `ActionSpec` namedtuple that defines the number and types
of actions for the Agent.

16
gym-unity/gym_unity/envs/__init__.py


def _get_n_vis_obs(self) -> int:
result = 0
for sen_spec in self.group_spec.sensor_specs:
if len(sen_spec.shape) == 3:
for obs_spec in self.group_spec.observation_specs:
if len(obs_spec.shape) == 3:
for sen_spec in self.group_spec.sensor_specs:
if len(sen_spec.shape) == 3:
result.append(sen_spec.shape)
for obs_spec in self.group_spec.observation_specs:
if len(obs_spec.shape) == 3:
result.append(obs_spec.shape)
return result
def _get_vis_obs_list(

def _get_vec_obs_size(self) -> int:
result = 0
for sen_spec in self.group_spec.sensor_specs:
if len(sen_spec.shape) == 1:
result += sen_spec.shape[0]
for obs_spec in self.group_spec.observation_specs:
if len(obs_spec.shape) == 1:
result += obs_spec.shape[0]
return result
def render(self, mode="rgb_array"):

6
gym-unity/gym_unity/tests/test_gym.py


TerminalSteps,
BehaviorMapping,
)
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
from mlagents.trainers.tests.dummy_config import create_observation_specs_with_shapes
def test_gym_wrapper():

obs_shapes = [(vector_observation_space_size,)]
for _ in range(number_visual_observations):
obs_shapes += [(8, 8, 3)]
sen_spec = create_sensor_specs_with_shapes(obs_shapes)
return BehaviorSpec(sen_spec, action_spec)
obs_spec = create_observation_specs_with_shapes(obs_shapes)
return BehaviorSpec(obs_spec, action_spec)
def create_mock_vector_steps(specs, num_agents=1, number_visual_observations=0):

30
ml-agents-envs/mlagents_envs/base_env.py


Any,
Mapping as MappingType,
)
from enum import IntFlag
from enum import IntFlag, Enum
import numpy as np
from mlagents_envs.exception import UnityActionException

:param spec: The BehaviorSpec for the DecisionSteps
"""
obs: List[np.ndarray] = []
for sen_spec in spec.sensor_specs:
for sen_spec in spec.observation_specs:
obs += [np.zeros((0,) + sen_spec.shape, dtype=np.float32)]
return DecisionSteps(
obs=obs,

:param spec: The BehaviorSpec for the TerminalSteps
"""
obs: List[np.ndarray] = []
for sen_spec in spec.sensor_specs:
for sen_spec in spec.observation_specs:
obs += [np.zeros((0,) + sen_spec.shape, dtype=np.float32)]
return TerminalSteps(
obs=obs,

VARIABLE_SIZE = 4
class SensorSpec(NamedTuple):
class ObservationType(Enum):
"""
An Enum which defines the type of information carried in the observation
of the agent.
"""
# Observation information is generic.
DEFAULT = 0
# Observation contains goal information for current task.
GOAL = 1
# Observation contains reward information for current task.
REWARD = 2
# Observation contains a message from another agent.
MESSAGE = 3
class ObservationSpec(NamedTuple):
"""
A NamedTuple containing information about the observation of Agents.
- shape is a Tuple of int : It corresponds to the shape of

- observation_type is an enum of ObservationType.
observation_type: ObservationType
class BehaviorSpec(NamedTuple):

- sensor_specs is a List of SensorSpec NamedTuple containing
- observation_specs is a List of ObservationSpec NamedTuple containing
information about the information of the Agent's observations such as their shapes.
The order of the SensorSpec is the same as the order of the observations of an
agent.

sensor_specs: List[SensorSpec]
observation_specs: List[ObservationSpec]
action_spec: ActionSpec

56
ml-agents-envs/mlagents_envs/communicator_objects/observation_pb2.py


name='mlagents_envs/communicator_objects/observation.proto',
package='communicator_objects',
syntax='proto3',
serialized_pb=_b('\n4mlagents_envs/communicator_objects/observation.proto\x12\x14\x63ommunicator_objects\"\xbb\x02\n\x10ObservationProto\x12\r\n\x05shape\x18\x01 \x03(\x05\x12\x44\n\x10\x63ompression_type\x18\x02 \x01(\x0e\x32*.communicator_objects.CompressionTypeProto\x12\x19\n\x0f\x63ompressed_data\x18\x03 \x01(\x0cH\x00\x12\x46\n\nfloat_data\x18\x04 \x01(\x0b\x32\x30.communicator_objects.ObservationProto.FloatDataH\x00\x12\"\n\x1a\x63ompressed_channel_mapping\x18\x05 \x03(\x05\x12\x1c\n\x14\x64imension_properties\x18\x06 \x03(\x05\x1a\x19\n\tFloatData\x12\x0c\n\x04\x64\x61ta\x18\x01 \x03(\x02\x42\x12\n\x10observation_data*)\n\x14\x43ompressionTypeProto\x12\x08\n\x04NONE\x10\x00\x12\x07\n\x03PNG\x10\x01\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
serialized_pb=_b('\n4mlagents_envs/communicator_objects/observation.proto\x12\x14\x63ommunicator_objects\"\x81\x03\n\x10ObservationProto\x12\r\n\x05shape\x18\x01 \x03(\x05\x12\x44\n\x10\x63ompression_type\x18\x02 \x01(\x0e\x32*.communicator_objects.CompressionTypeProto\x12\x19\n\x0f\x63ompressed_data\x18\x03 \x01(\x0cH\x00\x12\x46\n\nfloat_data\x18\x04 \x01(\x0b\x32\x30.communicator_objects.ObservationProto.FloatDataH\x00\x12\"\n\x1a\x63ompressed_channel_mapping\x18\x05 \x03(\x05\x12\x1c\n\x14\x64imension_properties\x18\x06 \x03(\x05\x12\x44\n\x10observation_type\x18\x07 \x01(\x0e\x32*.communicator_objects.ObservationTypeProto\x1a\x19\n\tFloatData\x12\x0c\n\x04\x64\x61ta\x18\x01 \x03(\x02\x42\x12\n\x10observation_data*)\n\x14\x43ompressionTypeProto\x12\x08\n\x04NONE\x10\x00\x12\x07\n\x03PNG\x10\x01*F\n\x14ObservationTypeProto\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\x08\n\x04GOAL\x10\x01\x12\n\n\x06REWARD\x10\x02\x12\x0b\n\x07MESSAGE\x10\x03\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
)
_COMPRESSIONTYPEPROTO = _descriptor.EnumDescriptor(

],
containing_type=None,
options=None,
serialized_start=396,
serialized_end=437,
serialized_start=466,
serialized_end=507,
_OBSERVATIONTYPEPROTO = _descriptor.EnumDescriptor(
name='ObservationTypeProto',
full_name='communicator_objects.ObservationTypeProto',
filename=None,
file=DESCRIPTOR,
values=[
_descriptor.EnumValueDescriptor(
name='DEFAULT', index=0, number=0,
options=None,
type=None),
_descriptor.EnumValueDescriptor(
name='GOAL', index=1, number=1,
options=None,
type=None),
_descriptor.EnumValueDescriptor(
name='REWARD', index=2, number=2,
options=None,
type=None),
_descriptor.EnumValueDescriptor(
name='MESSAGE', index=3, number=3,
options=None,
type=None),
],
containing_type=None,
options=None,
serialized_start=509,
serialized_end=579,
)
_sym_db.RegisterEnumDescriptor(_OBSERVATIONTYPEPROTO)
ObservationTypeProto = enum_type_wrapper.EnumTypeWrapper(_OBSERVATIONTYPEPROTO)
DEFAULT = 0
GOAL = 1
REWARD = 2
MESSAGE = 3

extension_ranges=[],
oneofs=[
],
serialized_start=349,
serialized_end=374,
serialized_start=419,
serialized_end=444,
)
_OBSERVATIONPROTO = _descriptor.Descriptor(

message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='observation_type', full_name='communicator_objects.ObservationProto.observation_type', index=6,
number=7, type=14, cpp_type=8, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
],
extensions=[
],

index=0, containing_type=None, fields=[]),
],
serialized_start=79,
serialized_end=394,
serialized_end=464,
_OBSERVATIONPROTO.fields_by_name['observation_type'].enum_type = _OBSERVATIONTYPEPROTO
_OBSERVATIONPROTO.oneofs_by_name['observation_data'].fields.append(
_OBSERVATIONPROTO.fields_by_name['compressed_data'])
_OBSERVATIONPROTO.fields_by_name['compressed_data'].containing_oneof = _OBSERVATIONPROTO.oneofs_by_name['observation_data']

DESCRIPTOR.message_types_by_name['ObservationProto'] = _OBSERVATIONPROTO
DESCRIPTOR.enum_types_by_name['CompressionTypeProto'] = _COMPRESSIONTYPEPROTO
DESCRIPTOR.enum_types_by_name['ObservationTypeProto'] = _OBSERVATIONTYPEPROTO
_sym_db.RegisterFileDescriptor(DESCRIPTOR)
ObservationProto = _reflection.GeneratedProtocolMessageType('ObservationProto', (_message.Message,), dict(

27
ml-agents-envs/mlagents_envs/communicator_objects/observation_pb2.pyi


NONE = typing___cast('CompressionTypeProto', 0)
PNG = typing___cast('CompressionTypeProto', 1)
class ObservationTypeProto(builtin___int):
DESCRIPTOR: google___protobuf___descriptor___EnumDescriptor = ...
@classmethod
def Name(cls, number: builtin___int) -> builtin___str: ...
@classmethod
def Value(cls, name: builtin___str) -> 'ObservationTypeProto': ...
@classmethod
def keys(cls) -> typing___List[builtin___str]: ...
@classmethod
def values(cls) -> typing___List['ObservationTypeProto']: ...
@classmethod
def items(cls) -> typing___List[typing___Tuple[builtin___str, 'ObservationTypeProto']]: ...
DEFAULT = typing___cast('ObservationTypeProto', 0)
GOAL = typing___cast('ObservationTypeProto', 1)
REWARD = typing___cast('ObservationTypeProto', 2)
MESSAGE = typing___cast('ObservationTypeProto', 3)
DEFAULT = typing___cast('ObservationTypeProto', 0)
GOAL = typing___cast('ObservationTypeProto', 1)
REWARD = typing___cast('ObservationTypeProto', 2)
MESSAGE = typing___cast('ObservationTypeProto', 3)
class ObservationProto(google___protobuf___message___Message):
DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...
class FloatData(google___protobuf___message___Message):

compressed_data = ... # type: builtin___bytes
compressed_channel_mapping = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[builtin___int]
dimension_properties = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[builtin___int]
observation_type = ... # type: ObservationTypeProto
@property
def float_data(self) -> ObservationProto.FloatData: ...

float_data : typing___Optional[ObservationProto.FloatData] = None,
compressed_channel_mapping : typing___Optional[typing___Iterable[builtin___int]] = None,
dimension_properties : typing___Optional[typing___Iterable[builtin___int]] = None,
observation_type : typing___Optional[ObservationTypeProto] = None,
) -> None: ...
@classmethod
def FromString(cls, s: builtin___bytes) -> ObservationProto: ...

def HasField(self, field_name: typing_extensions___Literal[u"compressed_data",u"float_data",u"observation_data"]) -> builtin___bool: ...
def ClearField(self, field_name: typing_extensions___Literal[u"compressed_channel_mapping",u"compressed_data",u"compression_type",u"dimension_properties",u"float_data",u"observation_data",u"shape"]) -> None: ...
def ClearField(self, field_name: typing_extensions___Literal[u"compressed_channel_mapping",u"compressed_data",u"compression_type",u"dimension_properties",u"float_data",u"observation_data",u"observation_type",u"shape"]) -> None: ...
def ClearField(self, field_name: typing_extensions___Literal[u"compressed_channel_mapping",b"compressed_channel_mapping",u"compressed_data",b"compressed_data",u"compression_type",b"compression_type",u"dimension_properties",b"dimension_properties",u"float_data",b"float_data",u"observation_data",b"observation_data",u"shape",b"shape"]) -> None: ...
def ClearField(self, field_name: typing_extensions___Literal[u"compressed_channel_mapping",b"compressed_channel_mapping",u"compressed_data",b"compressed_data",u"compression_type",b"compression_type",u"dimension_properties",b"dimension_properties",u"float_data",b"float_data",u"observation_data",b"observation_data",u"observation_type",b"observation_type",u"shape",b"shape"]) -> None: ...
def WhichOneof(self, oneof_group: typing_extensions___Literal[u"observation_data",b"observation_data"]) -> typing_extensions___Literal["compressed_data","float_data"]: ...

33
ml-agents-envs/mlagents_envs/rpc_utils.py


from mlagents_envs.base_env import (
ActionSpec,
SensorSpec,
ObservationSpec,
ObservationType,
)
from mlagents_envs.exception import UnityObservationException
from mlagents_envs.timers import hierarchical_timer, timed

:param agent_info: protobuf object.
:return: BehaviorSpec object.
"""
observation_shape = [tuple(obs.shape) for obs in agent_info.observations]
dim_props = [
tuple(DimensionProperty(dim) for dim in obs.dimension_properties)
for obs in agent_info.observations
]
sensor_specs = [
SensorSpec(obs_shape, dim_p)
for obs_shape, dim_p in zip(observation_shape, dim_props)
]
observation_specs = []
for obs in agent_info.observations:
observation_specs.append(
ObservationSpec(
tuple(obs.shape),
tuple(DimensionProperty(dim) for dim in obs.dimension_properties),
ObservationType(obs.observation_type),
)
)
# proto from communicator < v1.3 does not set action spec, use deprecated fields instead
if (
brain_param_proto.action_spec.num_continuous_actions == 0

action_spec_proto.num_continuous_actions,
tuple(branch for branch in action_spec_proto.discrete_branch_sizes),
)
return BehaviorSpec(sensor_specs, action_spec)
return BehaviorSpec(observation_specs, action_spec)
class OffsetBytesIO:

]
decision_obs_list: List[np.ndarray] = []
terminal_obs_list: List[np.ndarray] = []
for obs_index, sensor_specs in enumerate(behavior_spec.sensor_specs):
is_visual = len(sensor_specs.shape) == 3
for obs_index, observation_specs in enumerate(behavior_spec.observation_specs):
is_visual = len(observation_specs.shape) == 3
obs_shape = cast(Tuple[int, int, int], sensor_specs.shape)
obs_shape = cast(Tuple[int, int, int], observation_specs.shape)
decision_obs_list.append(
_process_visual_observation(
obs_index, obs_shape, decision_agent_info_list

else:
decision_obs_list.append(
_process_vector_observation(
obs_index, sensor_specs.shape, decision_agent_info_list
obs_index, observation_specs.shape, decision_agent_info_list
obs_index, sensor_specs.shape, terminal_agent_info_list
obs_index, observation_specs.shape, terminal_agent_info_list
)
)
decision_rewards = np.array(

14
ml-agents-envs/mlagents_envs/tests/test_envs.py


env.close()
assert isinstance(decision_steps, DecisionSteps)
assert isinstance(terminal_steps, TerminalSteps)
assert len(spec.sensor_specs) == len(decision_steps.obs)
assert len(spec.sensor_specs) == len(terminal_steps.obs)
assert len(spec.observation_specs) == len(decision_steps.obs)
assert len(spec.observation_specs) == len(terminal_steps.obs)
for sen_spec, obs in zip(spec.sensor_specs, decision_steps.obs):
for sen_spec, obs in zip(spec.observation_specs, decision_steps.obs):
for sen_spec, obs in zip(spec.sensor_specs, terminal_steps.obs):
for sen_spec, obs in zip(spec.observation_specs, terminal_steps.obs):
assert (n_agents,) + sen_spec.shape == obs.shape

env.close()
assert isinstance(decision_steps, DecisionSteps)
assert isinstance(terminal_steps, TerminalSteps)
assert len(spec.sensor_specs) == len(decision_steps.obs)
assert len(spec.sensor_specs) == len(terminal_steps.obs)
for spec, obs in zip(spec.sensor_specs, decision_steps.obs):
assert len(spec.observation_specs) == len(decision_steps.obs)
assert len(spec.observation_specs) == len(terminal_steps.obs)
for spec, obs in zip(spec.observation_specs, decision_steps.obs):
assert (n_agents,) + spec.shape == obs.shape
assert 0 in decision_steps
assert 2 in terminal_steps

19
ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py


steps_from_proto,
)
from PIL import Image
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
from mlagents.trainers.tests.dummy_config import create_observation_specs_with_shapes
def generate_list_agent_proto(

n_agents = 10
shapes = [(3,), (4,)]
spec = BehaviorSpec(
create_sensor_specs_with_shapes(shapes), ActionSpec.create_continuous(3)
create_observation_specs_with_shapes(shapes), ActionSpec.create_continuous(3)
)
ap_list = generate_list_agent_proto(n_agents, shapes)
decision_steps, terminal_steps = steps_from_proto(ap_list, spec)

n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(
create_sensor_specs_with_shapes(shapes), ActionSpec.create_discrete((7, 3))
create_observation_specs_with_shapes(shapes), ActionSpec.create_discrete((7, 3))
)
ap_list = generate_list_agent_proto(n_agents, shapes)
decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)

n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(
create_sensor_specs_with_shapes(shapes), ActionSpec.create_discrete((10,))
create_observation_specs_with_shapes(shapes), ActionSpec.create_discrete((10,))
)
ap_list = generate_list_agent_proto(n_agents, shapes)
decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)

n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(
create_sensor_specs_with_shapes(shapes), ActionSpec.create_discrete((2, 2, 6))
create_observation_specs_with_shapes(shapes),
ActionSpec.create_discrete((2, 2, 6)),
)
ap_list = generate_list_agent_proto(n_agents, shapes)
decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)

n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(
create_sensor_specs_with_shapes(shapes), ActionSpec.create_continuous(10)
create_observation_specs_with_shapes(shapes), ActionSpec.create_continuous(10)
)
ap_list = generate_list_agent_proto(n_agents, shapes)
decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)

behavior_spec = behavior_spec_from_proto(bp, agent_proto)
assert behavior_spec.action_spec.is_discrete()
assert not behavior_spec.action_spec.is_continuous()
assert [spec.shape for spec in behavior_spec.sensor_specs] == [(3,), (4,)]
assert [spec.shape for spec in behavior_spec.observation_specs] == [(3,), (4,)]
assert behavior_spec.action_spec.discrete_branches == (5, 4)
assert behavior_spec.action_spec.discrete_size == 2
bp = BrainParametersProto()

n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(
create_sensor_specs_with_shapes(shapes), ActionSpec.create_continuous(3)
create_observation_specs_with_shapes(shapes), ActionSpec.create_continuous(3)
)
ap_list = generate_list_agent_proto(n_agents, shapes, infinite_rewards=True)
with pytest.raises(RuntimeError):

n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(
create_sensor_specs_with_shapes(shapes), ActionSpec.create_continuous(3)
create_observation_specs_with_shapes(shapes), ActionSpec.create_continuous(3)
)
ap_list = generate_list_agent_proto(n_agents, shapes, nan_observations=True)
with pytest.raises(RuntimeError):

6
ml-agents-envs/mlagents_envs/tests/test_steps.py


ActionSpec,
BehaviorSpec,
)
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
from mlagents.trainers.tests.dummy_config import create_observation_specs_with_shapes
def test_decision_steps():

def test_empty_decision_steps():
specs = BehaviorSpec(
sensor_specs=create_sensor_specs_with_shapes([(3, 2), (5,)]),
observation_specs=create_observation_specs_with_shapes([(3, 2), (5,)]),
action_spec=ActionSpec.create_continuous(3),
)
ds = DecisionSteps.empty(specs)

def test_empty_terminal_steps():
specs = BehaviorSpec(
sensor_specs=create_sensor_specs_with_shapes([(3, 2), (5,)]),
observation_specs=create_observation_specs_with_shapes([(3, 2), (5,)]),
action_spec=ActionSpec.create_continuous(3),
)
ts = TerminalSteps.empty(specs)

9
ml-agents/mlagents/trainers/demo_loader.py


)
)
# check observations match
if len(behavior_spec.sensor_specs) != len(expected_behavior_spec.sensor_specs):
if len(behavior_spec.observation_specs) != len(
expected_behavior_spec.observation_specs
):
zip(behavior_spec.sensor_specs, expected_behavior_spec.sensor_specs)
zip(
behavior_spec.observation_specs,
expected_behavior_spec.observation_specs,
)
):
if demo_obs.shape != policy_obs.shape:
raise RuntimeError(

2
ml-agents/mlagents/trainers/optimizer/torch_optimizer.py


def get_trajectory_value_estimates(
self, batch: AgentBuffer, next_obs: List[np.ndarray], done: bool
) -> Tuple[Dict[str, np.ndarray], Dict[str, float]]:
n_obs = len(self.policy.behavior_spec.sensor_specs)
n_obs = len(self.policy.behavior_spec.observation_specs)
current_obs = ObsUtil.from_buffer(batch, n_obs)
# Convert to tensors

2
ml-agents/mlagents/trainers/policy/torch_policy.py


else:
ac_class = SharedActorCritic
self.actor_critic = ac_class(
sensor_specs=self.behavior_spec.sensor_specs,
observation_specs=self.behavior_spec.observation_specs,
network_settings=trainer_settings.network_settings,
action_spec=behavior_spec.action_spec,
stream_names=reward_signal_names,

2
ml-agents/mlagents/trainers/ppo/optimizer_torch.py


)
returns[name] = ModelUtils.list_to_tensor(batch[f"{name}_returns"])
n_obs = len(self.policy.behavior_spec.sensor_specs)
n_obs = len(self.policy.behavior_spec.observation_specs)
current_obs = ObsUtil.from_buffer(batch, n_obs)
# Convert to tensors
current_obs = [ModelUtils.list_to_tensor(obs) for obs in current_obs]

14
ml-agents/mlagents/trainers/sac/optimizer_torch.py


from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.buffer import AgentBuffer
from mlagents_envs.timers import timed
from mlagents_envs.base_env import ActionSpec, SensorSpec
from mlagents_envs.base_env import ActionSpec, ObservationSpec
from mlagents.trainers.exception import UnityTrainerException
from mlagents.trainers.settings import TrainerSettings, SACSettings
from contextlib import ExitStack

def __init__(
self,
stream_names: List[str],
sensor_specs: List[SensorSpec],
observation_specs: List[ObservationSpec],
network_settings: NetworkSettings,
action_spec: ActionSpec,
):

self.q1_network = ValueNetwork(
stream_names,
sensor_specs,
observation_specs,
network_settings,
num_action_ins,
num_value_outs,

sensor_specs,
observation_specs,
network_settings,
num_action_ins,
num_value_outs,

self.value_network = TorchSACOptimizer.PolicyValueNetwork(
self.stream_names,
self.policy.behavior_spec.sensor_specs,
self.policy.behavior_spec.observation_specs,
policy_network_settings,
self._action_spec,
)

self.policy.behavior_spec.sensor_specs,
self.policy.behavior_spec.observation_specs,
policy_network_settings,
)
ModelUtils.soft_update(

for name in self.reward_signals:
rewards[name] = ModelUtils.list_to_tensor(batch[f"{name}_rewards"])
n_obs = len(self.policy.behavior_spec.sensor_specs)
n_obs = len(self.policy.behavior_spec.observation_specs)
current_obs = ObsUtil.from_buffer(batch, n_obs)
# Convert to tensors
current_obs = [ModelUtils.list_to_tensor(obs) for obs in current_obs]

14
ml-agents/mlagents/trainers/tests/dummy_config.py


from typing import List, Tuple
from mlagents_envs.base_env import SensorSpec, DimensionProperty
from mlagents_envs.base_env import ObservationSpec, DimensionProperty, ObservationType
import pytest
import copy
import os

return {RewardSignalType.EXTRINSIC: RewardSignalSettings()}
def create_sensor_specs_with_shapes(shapes: List[Tuple[int, ...]]) -> List[SensorSpec]:
sen_spec: List[SensorSpec] = []
def create_observation_specs_with_shapes(
shapes: List[Tuple[int, ...]]
) -> List[ObservationSpec]:
obs_specs: List[ObservationSpec] = []
spec = SensorSpec(shape, dim_prop)
sen_spec.append(spec)
return sen_spec
spec = ObservationSpec(shape, dim_prop, ObservationType.DEFAULT)
obs_specs.append(spec)
return obs_specs

32
ml-agents/mlagents/trainers/tests/mock_brain.py


from mlagents_envs.base_env import (
DecisionSteps,
TerminalSteps,
SensorSpec,
ObservationSpec,
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
from mlagents.trainers.tests.dummy_config import create_observation_specs_with_shapes
sensor_specs: List[SensorSpec],
observation_specs: List[ObservationSpec],
action_spec: ActionSpec,
done: bool = False,
) -> Tuple[DecisionSteps, TerminalSteps]:

:int num_agents: Number of "agents" to imitate.
:List sensor_specs: A List of the observation specs in your steps
:List observation_specs: A List of the observation specs in your steps
for sen_spec in sensor_specs:
obs_list.append(np.ones((num_agents,) + sen_spec.shape, dtype=np.float32))
for obs_spec in observation_specs:
obs_list.append(np.ones((num_agents,) + obs_spec.shape, dtype=np.float32))
action_mask = None
if action_spec.is_discrete():
action_mask = [

reward = np.array(num_agents * [1.0], dtype=np.float32)
interrupted = np.array(num_agents * [False], dtype=np.bool)
agent_id = np.arange(num_agents, dtype=np.int32)
behavior_spec = BehaviorSpec(sensor_specs, action_spec)
behavior_spec = BehaviorSpec(observation_specs, action_spec)
if done:
return (
DecisionSteps.empty(behavior_spec),

) -> Tuple[DecisionSteps, TerminalSteps]:
return create_mock_steps(
num_agents=num_agents,
sensor_specs=behavior_spec.sensor_specs,
observation_specs=behavior_spec.observation_specs,
action_spec=behavior_spec.action_spec,
)

sensor_specs: List[SensorSpec],
observation_specs: List[ObservationSpec],
action_spec: ActionSpec,
max_step_complete: bool = False,
memory_size: int = 10,

action_size = action_spec.discrete_size + action_spec.continuous_size
for _i in range(length - 1):
obs = []
for sen_spec in sensor_specs:
obs.append(np.ones(sen_spec.shape, dtype=np.float32))
for obs_spec in observation_specs:
obs.append(np.ones(obs_spec.shape, dtype=np.float32))
reward = 1.0
done = False
action = ActionTuple(

)
steps_list.append(experience)
obs = []
for sen_spec in sensor_specs:
obs.append(np.ones(sen_spec.shape, dtype=np.float32))
for obs_spec in observation_specs:
obs.append(np.ones(obs_spec.shape, dtype=np.float32))
last_experience = AgentExperience(
obs=obs,
reward=reward,

) -> AgentBuffer:
trajectory = make_fake_trajectory(
length,
behavior_spec.sensor_specs,
behavior_spec.observation_specs,
action_spec=behavior_spec.action_spec,
memory_size=memory_size,
)

else:
action_spec = ActionSpec.create_continuous(vector_action_space)
observation_shapes = [(84, 84, 3)] * int(use_visual) + [(vector_obs_space,)]
sen_spec = create_sensor_specs_with_shapes(observation_shapes)
behavior_spec = BehaviorSpec(sen_spec, action_spec)
obs_spec = create_observation_specs_with_shapes(observation_shapes)
behavior_spec = BehaviorSpec(obs_spec, action_spec)
return behavior_spec

12
ml-agents/mlagents/trainers/tests/simple_test_envs.py


from mlagents_envs.base_env import (
ActionSpec,
SensorSpec,
ObservationSpec,
ActionTuple,
BaseEnv,
BehaviorSpec,

from mlagents_envs.communicator_objects.agent_info_action_pair_pb2 import (
AgentInfoActionPairProto,
)
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
from mlagents.trainers.tests.dummy_config import create_observation_specs_with_shapes
OBS_SIZE = 1
VIS_OBS_SIZE = (20, 20, 3)

continuous_action_size + discrete_action_size
) # to set the goals/positions
self.action_spec = action_spec
self.behavior_spec = BehaviorSpec(self._make_sensor_specs(), action_spec)
self.behavior_spec = BehaviorSpec(self._make_observation_specs(), action_spec)
self.action_spec = action_spec
self.names = brain_names
self.positions: Dict[str, List[float]] = {}

self.action[name] = None
self.step_result[name] = None
def _make_sensor_specs(self) -> SensorSpec:
def _make_observation_specs(self) -> List[ObservationSpec]:
sen_spec = create_sensor_specs_with_shapes(obs_shape)
return sen_spec
obs_spec = create_observation_specs_with_shapes(obs_shape)
return obs_spec
def _make_obs(self, value: float) -> List[np.ndarray]:
obs = []

12
ml-agents/mlagents/trainers/tests/test_agent_processor.py


from mlagents.trainers.stats import StatsReporter, StatsSummary
from mlagents.trainers.behavior_id_utils import get_global_agent_id
from mlagents_envs.side_channel.stats_side_channel import StatsAggregationMethod
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
from mlagents.trainers.tests.dummy_config import create_observation_specs_with_shapes
from mlagents_envs.base_env import ActionSpec, ActionTuple

}
mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
num_agents=2,
sensor_specs=create_sensor_specs_with_shapes(
observation_specs=create_observation_specs_with_shapes(
[(8,)] + num_vis_obs * [(84, 84, 3)]
),
action_spec=ActionSpec.create_continuous(2),

# Test empty steps
mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
num_agents=0,
sensor_specs=create_sensor_specs_with_shapes(
observation_specs=create_observation_specs_with_shapes(
[(8,)] + num_vis_obs * [(84, 84, 3)]
),
action_spec=ActionSpec.create_continuous(2),

mock_decision_step, mock_terminal_step = mb.create_mock_steps(
num_agents=1,
sensor_specs=create_sensor_specs_with_shapes([(8,)]),
observation_specs=create_observation_specs_with_shapes([(8,)]),
sensor_specs=create_sensor_specs_with_shapes([(8,)]),
observation_specs=create_observation_specs_with_shapes([(8,)]),
action_spec=ActionSpec.create_continuous(2),
done=True,
)

mock_decision_step, mock_terminal_step = mb.create_mock_steps(
num_agents=1,
sensor_specs=create_sensor_specs_with_shapes([(8,)]),
observation_specs=create_observation_specs_with_shapes([(8,)]),
action_spec=ActionSpec.create_continuous(2),
)
fake_action_info = ActionInfo(

4
ml-agents/mlagents/trainers/tests/test_demo_loader.py


behavior_spec, pair_infos, total_expected = load_demonstration(
path_prefix + "/test.demo"
)
assert np.sum(behavior_spec.sensor_specs[0].shape) == 8
assert np.sum(behavior_spec.observation_specs[0].shape) == 8
assert len(pair_infos) == total_expected
_, demo_buffer = demo_to_buffer(path_prefix + "/test.demo", 1, BEHAVIOR_SPEC)

behavior_spec, pair_infos, total_expected = load_demonstration(
path_prefix + "/test_demo_dir"
)
assert np.sum(behavior_spec.sensor_specs[0].shape) == 8
assert np.sum(behavior_spec.observation_specs[0].shape) == 8
assert len(pair_infos) == total_expected
_, demo_buffer = demo_to_buffer(path_prefix + "/test_demo_dir", 1, BEHAVIOR_SPEC)

6
ml-agents/mlagents/trainers/tests/test_rl_trainer.py


from mlagents.trainers.tests.test_buffer import construct_fake_buffer
from mlagents.trainers.agent_processor import AgentManagerQueue
from mlagents.trainers.settings import TrainerSettings
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
from mlagents.trainers.tests.dummy_config import create_observation_specs_with_shapes
from mlagents_envs.base_env import ActionSpec
import os.path

time_horizon = 10
trajectory = mb.make_fake_trajectory(
length=time_horizon,
sensor_specs=create_sensor_specs_with_shapes([(1,)]),
observation_specs=create_observation_specs_with_shapes([(1,)]),
max_step_complete=True,
action_spec=ActionSpec.create_discrete((2,)),
)

checkpoint_interval = trainer.trainer_settings.checkpoint_interval
trajectory = mb.make_fake_trajectory(
length=time_horizon,
sensor_specs=create_sensor_specs_with_shapes([(1,)]),
observation_specs=create_observation_specs_with_shapes([(1,)]),
max_step_complete=True,
action_spec=ActionSpec.create_discrete((2,)),
)

6
ml-agents/mlagents/trainers/tests/test_trajectory.py


from mlagents.trainers.tests.mock_brain import make_fake_trajectory
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
from mlagents.trainers.tests.dummy_config import create_observation_specs_with_shapes
from mlagents_envs.base_env import ActionSpec
VEC_OBS_SIZE = 6

wanted_keys = set(wanted_keys)
trajectory = make_fake_trajectory(
length=length,
sensor_specs=create_sensor_specs_with_shapes([(VEC_OBS_SIZE,), (84, 84, 3)]),
observation_specs=create_observation_specs_with_shapes(
[(VEC_OBS_SIZE,), (84, 84, 3)]
),
action_spec=ActionSpec.create_continuous(ACTION_SIZE),
)
agentbuffer = trajectory.to_agentbuffer()

4
ml-agents/mlagents/trainers/tests/torch/test_ghost.py


from mlagents.trainers.tests import mock_brain as mb
from mlagents.trainers.tests.test_trajectory import make_fake_trajectory
from mlagents.trainers.settings import TrainerSettings, SelfPlaySettings
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
from mlagents.trainers.tests.dummy_config import create_observation_specs_with_shapes
@pytest.fixture

trajectory = make_fake_trajectory(
length=time_horizon,
max_step_complete=True,
sensor_specs=create_sensor_specs_with_shapes([(1,)]),
observation_specs=create_observation_specs_with_shapes([(1,)]),
action_spec=mock_specs.action_spec,
)
trajectory_queue0.put(trajectory)

6
ml-agents/mlagents/trainers/tests/torch/test_hybrid.py


network_settings=new_network_settings,
max_steps=10000,
)
check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
check_environment_trains(
env, {BRAIN_NAME: config}, success_threshold=0.9, training_seed=1212
)
@pytest.mark.parametrize("num_visual", [1, 2])

network_settings=new_networksettings,
max_steps=3500,
)
check_environment_trains(env, {BRAIN_NAME: config})
check_environment_trains(env, {BRAIN_NAME: config}, training_seed=1212)

16
ml-agents/mlagents/trainers/tests/torch/test_networks.py


)
from mlagents.trainers.settings import NetworkSettings
from mlagents_envs.base_env import ActionSpec
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
from mlagents.trainers.tests.dummy_config import create_observation_specs_with_shapes
def test_networkbody_vector():

obs_shapes = [(obs_size,)]
networkbody = NetworkBody(
create_sensor_specs_with_shapes(obs_shapes),
create_observation_specs_with_shapes(obs_shapes),
network_settings,
encoded_act_size=2,
)

obs_shapes = [(obs_size,)]
networkbody = NetworkBody(
create_sensor_specs_with_shapes(obs_shapes), network_settings
create_observation_specs_with_shapes(obs_shapes), network_settings
)
optimizer = torch.optim.Adam(networkbody.parameters(), lr=3e-4)
sample_obs = torch.ones((1, seq_len, obs_size))

obs_shapes = [(vec_obs_size,), obs_size]
networkbody = NetworkBody(
create_sensor_specs_with_shapes(obs_shapes), network_settings
create_observation_specs_with_shapes(obs_shapes), network_settings
)
optimizer = torch.optim.Adam(networkbody.parameters(), lr=3e-3)
sample_obs = 0.1 * torch.ones((1, 84, 84, 3))

obs_size = 4
num_outputs = 2
network_settings = NetworkSettings()
sen_spec = create_sensor_specs_with_shapes([(obs_size,)])
obs_spec = create_observation_specs_with_shapes([(obs_size,)])
stream_names, sen_spec, network_settings, outputs_per_stream=num_outputs
stream_names, obs_spec, network_settings, outputs_per_stream=num_outputs
)
optimizer = torch.optim.Adam(value_net.parameters(), lr=3e-3)

network_settings = NetworkSettings(
memory=NetworkSettings.MemorySettings() if lstm else None, normalize=True
)
sen_spec = create_sensor_specs_with_shapes([(obs_size,)])
obs_spec = create_observation_specs_with_shapes([(obs_size,)])
actor = ac_type(sen_spec, network_settings, action_spec, stream_names)
actor = ac_type(obs_spec, network_settings, action_spec, stream_names)
if lstm:
sample_obs = torch.ones((1, network_settings.memory.sequence_length, obs_size))
memories = torch.ones(

4
ml-agents/mlagents/trainers/tests/torch/test_policy.py


buffer = mb.simulate_rollout(64, policy.behavior_spec, memory_size=policy.m_size)
act_masks = ModelUtils.list_to_tensor(buffer["action_mask"])
agent_action = AgentAction.from_dict(buffer)
np_obs = ObsUtil.from_buffer(buffer, len(policy.behavior_spec.sensor_specs))
np_obs = ObsUtil.from_buffer(buffer, len(policy.behavior_spec.observation_specs))
tensor_obs = [ModelUtils.list_to_tensor(obs) for obs in np_obs]
memories = [

buffer = mb.simulate_rollout(64, policy.behavior_spec, memory_size=policy.m_size)
act_masks = ModelUtils.list_to_tensor(buffer["action_mask"])
np_obs = ObsUtil.from_buffer(buffer, len(policy.behavior_spec.sensor_specs))
np_obs = ObsUtil.from_buffer(buffer, len(policy.behavior_spec.observation_specs))
tensor_obs = [ModelUtils.list_to_tensor(obs) for obs in np_obs]
memories = [

2
ml-agents/mlagents/trainers/tests/torch/test_ppo.py


time_horizon = 15
trajectory = make_fake_trajectory(
length=time_horizon,
sensor_specs=optimizer.policy.behavior_spec.sensor_specs,
observation_specs=optimizer.policy.behavior_spec.observation_specs,
action_spec=DISCRETE_ACTION_SPEC if discrete else CONTINUOUS_ACTION_SPEC,
max_step_complete=True,
)

48
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py


create_agent_buffer,
)
from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
from mlagents.trainers.tests.dummy_config import create_observation_specs_with_shapes
SEED = [42]

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(
create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS
),
BehaviorSpec(
create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE
),
],
)
def test_construction(behavior_spec: BehaviorSpec) -> None:

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
create_sensor_specs_with_shapes([(10,), (64, 66, 3), (84, 86, 1)]),
create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS
),
BehaviorSpec(
create_observation_specs_with_shapes([(10,), (64, 66, 3), (84, 86, 1)]),
create_sensor_specs_with_shapes([(10,), (64, 66, 1)]),
create_observation_specs_with_shapes([(10,), (64, 66, 1)]),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
BehaviorSpec(
create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE
),
],
)
def test_factory(behavior_spec: BehaviorSpec) -> None:

"behavior_spec",
[
BehaviorSpec(
create_sensor_specs_with_shapes([(10,), (64, 66, 3), (24, 26, 1)]),
create_observation_specs_with_shapes([(10,), (64, 66, 3), (24, 26, 1)]),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
BehaviorSpec(
create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE
),
BehaviorSpec(
create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE
),
],
)
def test_reward_decreases(behavior_spec: BehaviorSpec, seed: int) -> None:

@pytest.mark.parametrize("seed", SEED)
@pytest.mark.parametrize(
"behavior_spec",
[BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS)],
[
BehaviorSpec(
create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS
)
],
)
def test_continuous_action_prediction(behavior_spec: BehaviorSpec, seed: int) -> None:
np.random.seed(seed)

"behavior_spec",
[
BehaviorSpec(
create_sensor_specs_with_shapes([(10,), (64, 66, 3), (24, 26, 1)]),
create_observation_specs_with_shapes([(10,), (64, 66, 3), (24, 26, 1)]),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
BehaviorSpec(
create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE
),
BehaviorSpec(
create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE
),
],
)
def test_next_state_prediction(behavior_spec: BehaviorSpec, seed: int) -> None:

26
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_extrinsic.py


from mlagents.trainers.tests.torch.test_reward_providers.utils import (
create_agent_buffer,
)
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
from mlagents.trainers.tests.dummy_config import create_observation_specs_with_shapes
ACTIONSPEC_CONTINUOUS = ActionSpec.create_continuous(5)

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(
create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS
),
BehaviorSpec(
create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE
),
],
)
def test_construction(behavior_spec: BehaviorSpec) -> None:

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(
create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS
),
BehaviorSpec(
create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE
),
],
)
def test_factory(behavior_spec: BehaviorSpec) -> None:

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(
create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS
),
BehaviorSpec(
create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE
),
],
)
def test_reward(behavior_spec: BehaviorSpec, reward: float) -> None:

28
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py


from mlagents.trainers.torch.components.reward_providers.gail_reward_provider import (
DiscriminatorNetwork,
)
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
from mlagents.trainers.tests.dummy_config import create_observation_specs_with_shapes
CONTINUOUS_PATH = (

@pytest.mark.parametrize(
"behavior_spec",
[BehaviorSpec(create_sensor_specs_with_shapes([(8,)]), ACTIONSPEC_CONTINUOUS)],
[BehaviorSpec(create_observation_specs_with_shapes([(8,)]), ACTIONSPEC_CONTINUOUS)],
)
def test_construction(behavior_spec: BehaviorSpec) -> None:
gail_settings = GAILSettings(demo_path=CONTINUOUS_PATH)

@pytest.mark.parametrize(
"behavior_spec",
[BehaviorSpec(create_sensor_specs_with_shapes([(8,)]), ACTIONSPEC_CONTINUOUS)],
[BehaviorSpec(create_observation_specs_with_shapes([(8,)]), ACTIONSPEC_CONTINUOUS)],
)
def test_factory(behavior_spec: BehaviorSpec) -> None:
gail_settings = GAILSettings(demo_path=CONTINUOUS_PATH)

"behavior_spec",
[
BehaviorSpec(
create_sensor_specs_with_shapes([(8,), (24, 26, 1)]), ACTIONSPEC_CONTINUOUS
create_observation_specs_with_shapes([(8,), (24, 26, 1)]),
ACTIONSPEC_CONTINUOUS,
BehaviorSpec(create_sensor_specs_with_shapes([(50,)]), ACTIONSPEC_FOURDISCRETE),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
BehaviorSpec(
create_observation_specs_with_shapes([(50,)]), ACTIONSPEC_FOURDISCRETE
),
BehaviorSpec(
create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE
),
],
)
@pytest.mark.parametrize("use_actions", [False, True])

"behavior_spec",
[
BehaviorSpec(
create_sensor_specs_with_shapes([(8,), (24, 26, 1)]), ACTIONSPEC_CONTINUOUS
create_observation_specs_with_shapes([(8,), (24, 26, 1)]),
ACTIONSPEC_CONTINUOUS,
),
BehaviorSpec(
create_observation_specs_with_shapes([(50,)]), ACTIONSPEC_FOURDISCRETE
),
BehaviorSpec(
create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE
BehaviorSpec(create_sensor_specs_with_shapes([(50,)]), ACTIONSPEC_FOURDISCRETE),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
],
)
@pytest.mark.parametrize("use_actions", [False, True])

32
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_rnd.py


from mlagents.trainers.tests.torch.test_reward_providers.utils import (
create_agent_buffer,
)
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
from mlagents.trainers.tests.dummy_config import create_observation_specs_with_shapes
SEED = [42]

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(
create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS
),
BehaviorSpec(
create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE
),
],
)
def test_construction(behavior_spec: BehaviorSpec) -> None:

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
create_sensor_specs_with_shapes([(10,), (64, 66, 3), (84, 86, 1)]),
create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS
),
BehaviorSpec(
create_observation_specs_with_shapes([(10,), (64, 66, 3), (84, 86, 1)]),
create_sensor_specs_with_shapes([(10,), (64, 66, 1)]),
create_observation_specs_with_shapes([(10,), (64, 66, 1)]),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
BehaviorSpec(
create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE
),
],
)
def test_factory(behavior_spec: BehaviorSpec) -> None:

"behavior_spec",
[
BehaviorSpec(
create_sensor_specs_with_shapes([(10,), (64, 66, 3), (24, 26, 1)]),
create_observation_specs_with_shapes([(10,), (64, 66, 3), (24, 26, 1)]),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
BehaviorSpec(
create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE
),
BehaviorSpec(
create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE
),
],
)
def test_reward_decreases(behavior_spec: BehaviorSpec, seed: int) -> None:

8
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/utils.py


) -> AgentBuffer:
buffer = AgentBuffer()
curr_obs = [
np.random.normal(size=sen_spec.shape).astype(np.float32)
for sen_spec in behavior_spec.sensor_specs
np.random.normal(size=obs_spec.shape).astype(np.float32)
for obs_spec in behavior_spec.observation_specs
np.random.normal(size=sen_spec.shape).astype(np.float32)
for sen_spec in behavior_spec.sensor_specs
np.random.normal(size=obs_spec.shape).astype(np.float32)
for obs_spec in behavior_spec.observation_specs
]
action_buffer = behavior_spec.action_spec.random_action(1)
action = {}

6
ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py


new_hyperparams = attr.evolve(
SAC_TORCH_CONFIG.hyperparameters,
batch_size=256,
learning_rate=1e-3,
learning_rate=1e-4,
buffer_init_steps=1000,
steps_per_update=2,
)

network_settings=new_networksettings,
max_steps=2000,
max_steps=4000,
check_environment_trains(env, {BRAIN_NAME: config})
check_environment_trains(env, {BRAIN_NAME: config}, training_seed=1213)
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])

6
ml-agents/mlagents/trainers/tests/torch/test_utils.py


from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.exception import UnityTrainerException
from mlagents.trainers.torch.encoders import VectorInput
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
from mlagents.trainers.tests.dummy_config import create_observation_specs_with_shapes
def test_min_visual_size():

for _ in range(num_visual):
obs_shapes.append(vis_obs_shape)
h_size = 128
sen_spec = create_sensor_specs_with_shapes(obs_shapes)
obs_spec = create_observation_specs_with_shapes(obs_shapes)
sen_spec, h_size, encoder_type, normalize
obs_spec, h_size, encoder_type, normalize
)
total_output = sum(embedding_sizes)
vec_enc = []

2
ml-agents/mlagents/trainers/torch/components/bc/module.py


Helper function for update_batch.
"""
np_obs = ObsUtil.from_buffer(
mini_batch_demo, len(self.policy.behavior_spec.sensor_specs)
mini_batch_demo, len(self.policy.behavior_spec.observation_specs)
)
# Convert to tensors
tensor_obs = [ModelUtils.list_to_tensor(obs) for obs in np_obs]

4
ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py


vis_encode_type=EncoderType.SIMPLE,
memory=None,
)
self._state_encoder = NetworkBody(specs.sensor_specs, state_encoder_settings)
self._state_encoder = NetworkBody(
specs.observation_specs, state_encoder_settings
)
self._action_flattener = ActionFlattener(self._action_spec)

4
ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py


unencoded_size = (
self._action_flattener.flattened_size + 1 if settings.use_actions else 0
) # +1 is for dones
self.encoder = NetworkBody(specs.sensor_specs, encoder_settings, unencoded_size)
self.encoder = NetworkBody(
specs.observation_specs, encoder_settings, unencoded_size
)
estimator_input_size = settings.encoding_size
if settings.use_vail:

2
ml-agents/mlagents/trainers/torch/components/reward_providers/rnd_reward_provider.py


vis_encode_type=EncoderType.SIMPLE,
memory=None,
)
self._encoder = NetworkBody(specs.sensor_specs, state_encoder_settings)
self._encoder = NetworkBody(specs.observation_specs, state_encoder_settings)
def forward(self, mini_batch: AgentBuffer) -> torch.Tensor:
n_obs = len(self._encoder.processors)

12
ml-agents/mlagents/trainers/torch/model_serialization.py


batch_dim = [1]
seq_len_dim = [1]
vec_obs_size = 0
for sens_spec in self.policy.behavior_spec.sensor_specs:
for sens_spec in self.policy.behavior_spec.observation_specs:
for sens_spec in self.policy.behavior_spec.sensor_specs
for sens_spec in self.policy.behavior_spec.observation_specs
# (It's NHWC in self.policy.behavior_spec.sensor_specs.shape)
# (It's NHWC in self.policy.behavior_spec.observation_specs.shape)
batch_dim + [sen_spec.shape[2], sen_spec.shape[0], sen_spec.shape[1]]
batch_dim + [obs_spec.shape[2], obs_spec.shape[0], obs_spec.shape[1]]
for sen_spec in self.policy.behavior_spec.sensor_specs
if len(sen_spec.shape) == 3
for obs_spec in self.policy.behavior_spec.observation_specs
if len(obs_spec.shape) == 3
]
dummy_masks = torch.ones(
batch_dim + [sum(self.policy.behavior_spec.action_spec.discrete_branches)]

32
ml-agents/mlagents/trainers/torch/networks.py


from mlagents.torch_utils import torch, nn
from mlagents_envs.base_env import ActionSpec, SensorSpec
from mlagents_envs.base_env import ActionSpec, ObservationSpec
from mlagents.trainers.torch.action_model import ActionModel
from mlagents.trainers.torch.agent_action import AgentAction
from mlagents.trainers.torch.action_log_probs import ActionLogProbs

class NetworkBody(nn.Module):
def __init__(
self,
sensor_specs: List[SensorSpec],
observation_specs: List[ObservationSpec],
network_settings: NetworkSettings,
encoded_act_size: int = 0,
):

)
self.processors, self.embedding_sizes = ModelUtils.create_input_processors(
sensor_specs,
observation_specs,
self.h_size,
network_settings.vis_encode_type,
normalize=self.normalize,

def __init__(
self,
stream_names: List[str],
sensor_specs: List[SensorSpec],
observation_specs: List[ObservationSpec],
network_settings: NetworkSettings,
encoded_act_size: int = 0,
outputs_per_stream: int = 1,

nn.Module.__init__(self)
self.network_body = NetworkBody(
sensor_specs, network_settings, encoded_act_size=encoded_act_size
observation_specs, network_settings, encoded_act_size=encoded_act_size
)
if network_settings.memory is not None:
encoding_size = network_settings.memory.memory_size // 2

class SimpleActor(nn.Module, Actor):
def __init__(
self,
sensor_specs: List[SensorSpec],
observation_specs: List[ObservationSpec],
network_settings: NetworkSettings,
action_spec: ActionSpec,
conditional_sigma: bool = False,

),
requires_grad=False,
)
self.network_body = NetworkBody(sensor_specs, network_settings)
self.network_body = NetworkBody(observation_specs, network_settings)
if network_settings.memory is not None:
self.encoding_size = network_settings.memory.memory_size // 2
else:

class SharedActorCritic(SimpleActor, ActorCritic):
def __init__(
self,
sensor_specs: List[SensorSpec],
observation_specs: List[ObservationSpec],
network_settings: NetworkSettings,
action_spec: ActionSpec,
stream_names: List[str],

self.use_lstm = network_settings.memory is not None
super().__init__(
sensor_specs, network_settings, action_spec, conditional_sigma, tanh_squash
observation_specs,
network_settings,
action_spec,
conditional_sigma,
tanh_squash,
)
self.stream_names = stream_names
self.value_heads = ValueHeads(stream_names, self.encoding_size)

class SeparateActorCritic(SimpleActor, ActorCritic):
def __init__(
self,
sensor_specs: List[SensorSpec],
observation_specs: List[ObservationSpec],
network_settings: NetworkSettings,
action_spec: ActionSpec,
stream_names: List[str],

self.use_lstm = network_settings.memory is not None
super().__init__(
sensor_specs, network_settings, action_spec, conditional_sigma, tanh_squash
observation_specs,
network_settings,
action_spec,
conditional_sigma,
tanh_squash,
self.critic = ValueNetwork(stream_names, sensor_specs, network_settings)
self.critic = ValueNetwork(stream_names, observation_specs, network_settings)
@property
def memory_size(self) -> int:

10
ml-agents/mlagents/trainers/torch/utils.py


)
from mlagents.trainers.settings import EncoderType, ScheduleType
from mlagents.trainers.exception import UnityTrainerException
from mlagents_envs.base_env import SensorSpec
from mlagents_envs.base_env import ObservationSpec
class ModelUtils:

@staticmethod
def create_input_processors(
sensor_specs: List[SensorSpec],
observation_specs: List[ObservationSpec],
h_size: int,
vis_encode_type: EncoderType,
normalize: bool = False,

:param sensor_specs: List of SensorSpec that represent the observation dimensions.
:param observation_specs: List of ObservationSpec that represent the observation dimensions.
:param action_size: Number of additional un-normalized inputs to each vector encoder. Used for
conditioning network on other values (e.g. actions for a Q function)
:param h_size: Number of hidden units per layer.

"""
encoders: List[nn.Module] = []
embedding_sizes: List[int] = []
for sen_spec in sensor_specs:
for obs_spec in observation_specs:
sen_spec.shape, normalize, h_size, vis_encode_type
obs_spec.shape, normalize, h_size, vis_encode_type
)
encoders.append(encoder)
embedding_sizes.append(embedding_size)

6
ml-agents/tests/yamato/scripts/run_llapi.py


decision_steps, terminal_steps = env.get_steps(group_name)
# Examine the number of observations per Agent
print("Number of observations : ", len(group_spec.sensor_specs))
print("Number of observations : ", len(group_spec.observation_specs))
vis_obs = any(len(sen_spec.shape) == 3 for sen_spec in group_spec.sensor_specs)
vis_obs = any(
len(obs_spec.shape) == 3 for obs_spec in group_spec.observation_specs
)
print("Is there a visual observation ?", vis_obs)
# Examine the state space for the first observation for the first agent

8
protobuf-definitions/proto/mlagents_envs/communicator_objects/observation.proto


PNG = 1;
}
enum ObservationTypeProto {
DEFAULT = 0;
GOAL = 1;
REWARD = 2;
MESSAGE = 3;
}
message ObservationProto {
message FloatData {
repeated float data = 1;

}
repeated int32 compressed_channel_mapping = 5;
repeated int32 dimension_properties = 6;
ObservationTypeProto observation_type = 7;
}

31
com.unity.ml-agents/Runtime/Sensors/ITypedSensor.cs


namespace Unity.MLAgents.Sensors
{
/// <summary>
/// The ObservationType enum of the Sensor.
/// </summary>
internal enum ObservationType
{
// Collected observations are generic.
Default = 0,
// Collected observations contain goal information.
Goal = 1,
// Collected observations contain reward information.
Reward = 2,
// Collected observations are messages from other agents.
Message = 3,
}
/// <summary>
/// Sensor interface for sensors with variable types.
/// </summary>
internal interface ITypedSensor
{
/// <summary>
/// Returns the ObservationType enum corresponding to the type of the sensor.
/// </summary>
/// <returns>The ObservationType enum</returns>
ObservationType GetObservationType();
}
}

11
com.unity.ml-agents/Runtime/Sensors/ITypedSensor.cs.meta


fileFormatVersion: 2
guid: 3751edac8122c411dbaef8f1b7043b82
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:
正在加载...
取消
保存