浏览代码

Merge pull request #4763 from Unity-Technologies/develop-att

WIP Made initial changes to enable dimension properties and added attention module
/MLA-1734-demo-provider
GitHub 4 年前
当前提交
458fee17
共有 56 个文件被更改,包括 956 次插入217 次删除
  1. 11
      com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
  2. 33
      com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/Observation.cs
  3. 9
      docs/Python-API.md
  4. 16
      gym-unity/gym_unity/envs/__init__.py
  5. 4
      gym-unity/gym_unity/tests/test_gym.py
  6. 55
      ml-agents-envs/mlagents_envs/base_env.py
  7. 19
      ml-agents-envs/mlagents_envs/communicator_objects/observation_pb2.py
  8. 6
      ml-agents-envs/mlagents_envs/communicator_objects/observation_pb2.pyi
  9. 24
      ml-agents-envs/mlagents_envs/rpc_utils.py
  10. 20
      ml-agents-envs/mlagents_envs/tests/test_envs.py
  11. 2
      ml-agents-envs/mlagents_envs/tests/test_registry.py
  12. 31
      ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py
  13. 7
      ml-agents-envs/mlagents_envs/tests/test_steps.py
  14. 11
      ml-agents/mlagents/trainers/demo_loader.py
  15. 2
      ml-agents/mlagents/trainers/optimizer/torch_optimizer.py
  16. 6
      ml-agents/mlagents/trainers/policy/policy.py
  17. 2
      ml-agents/mlagents/trainers/policy/torch_policy.py
  18. 2
      ml-agents/mlagents/trainers/ppo/optimizer_torch.py
  19. 18
      ml-agents/mlagents/trainers/sac/optimizer_torch.py
  20. 3
      ml-agents/mlagents/trainers/tests/check_env_trains.py
  21. 11
      ml-agents/mlagents/trainers/tests/dummy_config.py
  22. 32
      ml-agents/mlagents/trainers/tests/mock_brain.py
  23. 15
      ml-agents/mlagents/trainers/tests/simple_test_envs.py
  24. 16
      ml-agents/mlagents/trainers/tests/test_agent_processor.py
  25. 4
      ml-agents/mlagents/trainers/tests/test_demo_loader.py
  26. 6
      ml-agents/mlagents/trainers/tests/test_rl_trainer.py
  27. 4
      ml-agents/mlagents/trainers/tests/test_trajectory.py
  28. 3
      ml-agents/mlagents/trainers/tests/torch/test_ghost.py
  29. 12
      ml-agents/mlagents/trainers/tests/torch/test_hybrid.py
  30. 23
      ml-agents/mlagents/trainers/tests/torch/test_networks.py
  31. 4
      ml-agents/mlagents/trainers/tests/torch/test_policy.py
  32. 2
      ml-agents/mlagents/trainers/tests/torch/test_ppo.py
  33. 40
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py
  34. 13
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_extrinsic.py
  35. 27
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py
  36. 28
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_rnd.py
  37. 8
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/utils.py
  38. 2
      ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py
  39. 4
      ml-agents/mlagents/trainers/tests/torch/test_utils.py
  40. 2
      ml-agents/mlagents/trainers/torch/components/bc/module.py
  41. 4
      ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py
  42. 4
      ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py
  43. 2
      ml-agents/mlagents/trainers/torch/components/reward_providers/rnd_reward_provider.py
  44. 10
      ml-agents/mlagents/trainers/torch/model_serialization.py
  45. 32
      ml-agents/mlagents/trainers/torch/networks.py
  46. 10
      ml-agents/mlagents/trainers/torch/utils.py
  47. 4
      ml-agents/tests/yamato/scripts/run_llapi.py
  48. 1
      protobuf-definitions/proto/mlagents_envs/communicator_objects/observation.proto
  49. 95
      com.unity.ml-agents/Runtime/Sensors/BufferSensor.cs
  50. 11
      com.unity.ml-agents/Runtime/Sensors/BufferSensor.cs.meta
  51. 41
      com.unity.ml-agents/Runtime/Sensors/BufferSensorComponent.cs
  52. 11
      com.unity.ml-agents/Runtime/Sensors/BufferSensorComponent.cs.meta
  53. 47
      com.unity.ml-agents/Runtime/Sensors/IDimensionPropertiesSensor.cs
  54. 11
      com.unity.ml-agents/Runtime/Sensors/IDimensionPropertiesSensor.cs.meta
  55. 162
      ml-agents/mlagents/trainers/tests/torch/test_attention.py
  56. 191
      ml-agents/mlagents/trainers/torch/attention.py

11
com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs


observationProto.CompressedChannelMapping.AddRange(compressibleSensor.GetCompressedChannelMapping());
}
}
// Add the dimension properties if any to the observationProto
var dimensionPropertySensor = sensor as IDimensionPropertiesSensor;
if (dimensionPropertySensor != null)
{
var dimensionProperties = dimensionPropertySensor.GetDimensionProperties();
int[] intDimensionProperties = new int[dimensionProperties.Length];
for (int i = 0; i < dimensionProperties.Length; i++)
{
observationProto.DimensionProperties.Add((int)dimensionProperties[i]);
}
}
observationProto.Shape.AddRange(shape);
return observationProto;
}

33
com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/Observation.cs


byte[] descriptorData = global::System.Convert.FromBase64String(
string.Concat(
"CjRtbGFnZW50c19lbnZzL2NvbW11bmljYXRvcl9vYmplY3RzL29ic2VydmF0",
"aW9uLnByb3RvEhRjb21tdW5pY2F0b3Jfb2JqZWN0cyKdAgoQT2JzZXJ2YXRp",
"aW9uLnByb3RvEhRjb21tdW5pY2F0b3Jfb2JqZWN0cyK7AgoQT2JzZXJ2YXRp",
"KAUaGQoJRmxvYXREYXRhEgwKBGRhdGEYASADKAJCEgoQb2JzZXJ2YXRpb25f",
"ZGF0YSopChRDb21wcmVzc2lvblR5cGVQcm90bxIICgROT05FEAASBwoDUE5H",
"EAFCJaoCIlVuaXR5Lk1MQWdlbnRzLkNvbW11bmljYXRvck9iamVjdHNiBnBy",
"b3RvMw=="));
"KAUSHAoUZGltZW5zaW9uX3Byb3BlcnRpZXMYBiADKAUaGQoJRmxvYXREYXRh",
"EgwKBGRhdGEYASADKAJCEgoQb2JzZXJ2YXRpb25fZGF0YSopChRDb21wcmVz",
"c2lvblR5cGVQcm90bxIICgROT05FEAASBwoDUE5HEAFCJaoCIlVuaXR5Lk1M",
"QWdlbnRzLkNvbW11bmljYXRvck9iamVjdHNiBnByb3RvMw=="));
new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.ObservationProto), global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Parser, new[]{ "Shape", "CompressionType", "CompressedData", "FloatData", "CompressedChannelMapping" }, new[]{ "ObservationData" }, null, new pbr::GeneratedClrTypeInfo[] { new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Types.FloatData), global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Types.FloatData.Parser, new[]{ "Data" }, null, null, null)})
new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.ObservationProto), global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Parser, new[]{ "Shape", "CompressionType", "CompressedData", "FloatData", "CompressedChannelMapping", "DimensionProperties" }, new[]{ "ObservationData" }, null, new pbr::GeneratedClrTypeInfo[] { new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Types.FloatData), global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Types.FloatData.Parser, new[]{ "Data" }, null, null, null)})
}));
}
#endregion

shape_ = other.shape_.Clone();
compressionType_ = other.compressionType_;
compressedChannelMapping_ = other.compressedChannelMapping_.Clone();
dimensionProperties_ = other.dimensionProperties_.Clone();
switch (other.ObservationDataCase) {
case ObservationDataOneofCase.CompressedData:
CompressedData = other.CompressedData;

get { return compressedChannelMapping_; }
}
/// <summary>Field number for the "dimension_properties" field.</summary>
public const int DimensionPropertiesFieldNumber = 6;
private static readonly pb::FieldCodec<int> _repeated_dimensionProperties_codec
= pb::FieldCodec.ForInt32(50);
private readonly pbc::RepeatedField<int> dimensionProperties_ = new pbc::RepeatedField<int>();
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public pbc::RepeatedField<int> DimensionProperties {
get { return dimensionProperties_; }
}
private object observationData_;
/// <summary>Enum of possible cases for the "observation_data" oneof.</summary>
public enum ObservationDataOneofCase {

if (CompressedData != other.CompressedData) return false;
if (!object.Equals(FloatData, other.FloatData)) return false;
if(!compressedChannelMapping_.Equals(other.compressedChannelMapping_)) return false;
if(!dimensionProperties_.Equals(other.dimensionProperties_)) return false;
if (ObservationDataCase != other.ObservationDataCase) return false;
return Equals(_unknownFields, other._unknownFields);
}

if (observationDataCase_ == ObservationDataOneofCase.CompressedData) hash ^= CompressedData.GetHashCode();
if (observationDataCase_ == ObservationDataOneofCase.FloatData) hash ^= FloatData.GetHashCode();
hash ^= compressedChannelMapping_.GetHashCode();
hash ^= dimensionProperties_.GetHashCode();
hash ^= (int) observationDataCase_;
if (_unknownFields != null) {
hash ^= _unknownFields.GetHashCode();

output.WriteMessage(FloatData);
}
compressedChannelMapping_.WriteTo(output, _repeated_compressedChannelMapping_codec);
dimensionProperties_.WriteTo(output, _repeated_dimensionProperties_codec);
if (_unknownFields != null) {
_unknownFields.WriteTo(output);
}

size += 1 + pb::CodedOutputStream.ComputeMessageSize(FloatData);
}
size += compressedChannelMapping_.CalculateSize(_repeated_compressedChannelMapping_codec);
size += dimensionProperties_.CalculateSize(_repeated_dimensionProperties_codec);
if (_unknownFields != null) {
size += _unknownFields.CalculateSize();
}

CompressionType = other.CompressionType;
}
compressedChannelMapping_.Add(other.compressedChannelMapping_);
dimensionProperties_.Add(other.dimensionProperties_);
switch (other.ObservationDataCase) {
case ObservationDataOneofCase.CompressedData:
CompressedData = other.CompressedData;

case 42:
case 40: {
compressedChannelMapping_.AddEntriesFrom(input, _repeated_compressedChannelMapping_codec);
break;
}
case 50:
case 48: {
dimensionProperties_.AddEntriesFrom(input, _repeated_dimensionProperties_codec);
break;
}
}

9
docs/Python-API.md


A `BehaviorSpec` has the following fields :
- `observation_shapes` is a List of Tuples of int : Each Tuple corresponds to an
observation's dimensions (without the number of agents dimension). The shape
tuples have the same ordering as the ordering of the DecisionSteps,
- `sensor_specs` is a List of `SensorSpec` objects : Each `SensorSpec`
corresponds to an observation's properties: `shape` is a tuple of ints that
corresponds to the shape of the observation (without the number of agents dimension).
`dimension_property` is a tuple of flags containing extra information about how the
data should be processed in the corresponding dimension. Note that the `SensorSpec`
have the same ordering as the ordering of observations in the DecisionSteps,
DecisionStep, TerminalSteps and TerminalStep.
- `action_spec` is an `ActionSpec` namedtuple that defines the number and types
of actions for the Agent.

16
gym-unity/gym_unity/envs/__init__.py


def _get_n_vis_obs(self) -> int:
result = 0
for shape in self.group_spec.observation_shapes:
if len(shape) == 3:
for sen_spec in self.group_spec.sensor_specs:
if len(sen_spec.shape) == 3:
for shape in self.group_spec.observation_shapes:
if len(shape) == 3:
result.append(shape)
for sen_spec in self.group_spec.sensor_specs:
if len(sen_spec.shape) == 3:
result.append(sen_spec.shape)
return result
def _get_vis_obs_list(

def _get_vec_obs_size(self) -> int:
result = 0
for shape in self.group_spec.observation_shapes:
if len(shape) == 1:
result += shape[0]
for sen_spec in self.group_spec.sensor_specs:
if len(sen_spec.shape) == 1:
result += sen_spec.shape[0]
return result
def render(self, mode="rgb_array"):

4
gym-unity/gym_unity/tests/test_gym.py


TerminalSteps,
BehaviorMapping,
)
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
def test_gym_wrapper():

obs_shapes = [(vector_observation_space_size,)]
for _ in range(number_visual_observations):
obs_shapes += [(8, 8, 3)]
return BehaviorSpec(obs_shapes, action_spec)
sen_spec = create_sensor_specs_with_shapes(obs_shapes)
return BehaviorSpec(sen_spec, action_spec)
def create_mock_vector_steps(specs, num_agents=1, number_visual_observations=0):

55
ml-agents-envs/mlagents_envs/base_env.py


Any,
Mapping as MappingType,
)
from enum import IntFlag
import numpy as np
from mlagents_envs.exception import UnityActionException

:param spec: The BehaviorSpec for the DecisionSteps
"""
obs: List[np.ndarray] = []
for shape in spec.observation_shapes:
obs += [np.zeros((0,) + shape, dtype=np.float32)]
for sen_spec in spec.sensor_specs:
obs += [np.zeros((0,) + sen_spec.shape, dtype=np.float32)]
return DecisionSteps(
obs=obs,
reward=np.zeros(0, dtype=np.float32),

:param spec: The BehaviorSpec for the TerminalSteps
"""
obs: List[np.ndarray] = []
for shape in spec.observation_shapes:
obs += [np.zeros((0,) + shape, dtype=np.float32)]
for sen_spec in spec.sensor_specs:
obs += [np.zeros((0,) + sen_spec.shape, dtype=np.float32)]
return TerminalSteps(
obs=obs,
reward=np.zeros(0, dtype=np.float32),

return ActionSpec(0, discrete_branches)
class DimensionProperty(IntFlag):
"""
No properties specified.
"""
UNSPECIFIED = 0
"""
No Property of the observation in that dimension. Observation can be processed with
Fully connected networks.
"""
NONE = 1
"""
Means it is suitable to do a convolution in this dimension.
"""
TRANSLATIONAL_EQUIVARIANCE = 2
"""
Means that there can be a variable number of observations in this dimension.
The observations are unordered.
"""
VARIABLE_SIZE = 4
class SensorSpec(NamedTuple):
"""
A NamedTuple containing information about the observation of Agents.
- shape is a Tuple of int : It corresponds to the shape of
an observation's dimensions.
- dimension_property is a Tuple of DimensionProperties flag, one flag for each
dimension.
"""
shape: Tuple[int, ...]
dimension_property: Tuple[DimensionProperty, ...]
- observation_shapes is a List of Tuples of int : Each Tuple corresponds
to an observation's dimensions. The shape tuples have the same ordering as
the ordering of the DecisionSteps and TerminalSteps.
- action_spec is an ActionSpec NamedTuple
- sensor_specs is a List of SensorSpec NamedTuple containing
information about the information of the Agent's observations such as their shapes.
The order of the SensorSpec is the same as the order of the observations of an
agent.
- action_spec is an ActionSpec NamedTuple.
observation_shapes: List[Tuple]
sensor_specs: List[SensorSpec]
action_spec: ActionSpec

19
ml-agents-envs/mlagents_envs/communicator_objects/observation_pb2.py


name='mlagents_envs/communicator_objects/observation.proto',
package='communicator_objects',
syntax='proto3',
serialized_pb=_b('\n4mlagents_envs/communicator_objects/observation.proto\x12\x14\x63ommunicator_objects\"\x9d\x02\n\x10ObservationProto\x12\r\n\x05shape\x18\x01 \x03(\x05\x12\x44\n\x10\x63ompression_type\x18\x02 \x01(\x0e\x32*.communicator_objects.CompressionTypeProto\x12\x19\n\x0f\x63ompressed_data\x18\x03 \x01(\x0cH\x00\x12\x46\n\nfloat_data\x18\x04 \x01(\x0b\x32\x30.communicator_objects.ObservationProto.FloatDataH\x00\x12\"\n\x1a\x63ompressed_channel_mapping\x18\x05 \x03(\x05\x1a\x19\n\tFloatData\x12\x0c\n\x04\x64\x61ta\x18\x01 \x03(\x02\x42\x12\n\x10observation_data*)\n\x14\x43ompressionTypeProto\x12\x08\n\x04NONE\x10\x00\x12\x07\n\x03PNG\x10\x01\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
serialized_pb=_b('\n4mlagents_envs/communicator_objects/observation.proto\x12\x14\x63ommunicator_objects\"\xbb\x02\n\x10ObservationProto\x12\r\n\x05shape\x18\x01 \x03(\x05\x12\x44\n\x10\x63ompression_type\x18\x02 \x01(\x0e\x32*.communicator_objects.CompressionTypeProto\x12\x19\n\x0f\x63ompressed_data\x18\x03 \x01(\x0cH\x00\x12\x46\n\nfloat_data\x18\x04 \x01(\x0b\x32\x30.communicator_objects.ObservationProto.FloatDataH\x00\x12\"\n\x1a\x63ompressed_channel_mapping\x18\x05 \x03(\x05\x12\x1c\n\x14\x64imension_properties\x18\x06 \x03(\x05\x1a\x19\n\tFloatData\x12\x0c\n\x04\x64\x61ta\x18\x01 \x03(\x02\x42\x12\n\x10observation_data*)\n\x14\x43ompressionTypeProto\x12\x08\n\x04NONE\x10\x00\x12\x07\n\x03PNG\x10\x01\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
)
_COMPRESSIONTYPEPROTO = _descriptor.EnumDescriptor(

],
containing_type=None,
options=None,
serialized_start=366,
serialized_end=407,
serialized_start=396,
serialized_end=437,
)
_sym_db.RegisterEnumDescriptor(_COMPRESSIONTYPEPROTO)

extension_ranges=[],
oneofs=[
],
serialized_start=319,
serialized_end=344,
serialized_start=349,
serialized_end=374,
)
_OBSERVATIONPROTO = _descriptor.Descriptor(

message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='dimension_properties', full_name='communicator_objects.ObservationProto.dimension_properties', index=5,
number=6, type=5, cpp_type=1, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
],
extensions=[
],

index=0, containing_type=None, fields=[]),
],
serialized_start=79,
serialized_end=364,
serialized_end=394,
)
_OBSERVATIONPROTO_FLOATDATA.containing_type = _OBSERVATIONPROTO

6
ml-agents-envs/mlagents_envs/communicator_objects/observation_pb2.pyi


compression_type = ... # type: CompressionTypeProto
compressed_data = ... # type: builtin___bytes
compressed_channel_mapping = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[builtin___int]
dimension_properties = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[builtin___int]
@property
def float_data(self) -> ObservationProto.FloatData: ...

compressed_data : typing___Optional[builtin___bytes] = None,
float_data : typing___Optional[ObservationProto.FloatData] = None,
compressed_channel_mapping : typing___Optional[typing___Iterable[builtin___int]] = None,
dimension_properties : typing___Optional[typing___Iterable[builtin___int]] = None,
) -> None: ...
@classmethod
def FromString(cls, s: builtin___bytes) -> ObservationProto: ...

def HasField(self, field_name: typing_extensions___Literal[u"compressed_data",u"float_data",u"observation_data"]) -> builtin___bool: ...
def ClearField(self, field_name: typing_extensions___Literal[u"compressed_channel_mapping",u"compressed_data",u"compression_type",u"float_data",u"observation_data",u"shape"]) -> None: ...
def ClearField(self, field_name: typing_extensions___Literal[u"compressed_channel_mapping",u"compressed_data",u"compression_type",u"dimension_properties",u"float_data",u"observation_data",u"shape"]) -> None: ...
def ClearField(self, field_name: typing_extensions___Literal[u"compressed_channel_mapping",b"compressed_channel_mapping",u"compressed_data",b"compressed_data",u"compression_type",b"compression_type",u"float_data",b"float_data",u"observation_data",b"observation_data",u"shape",b"shape"]) -> None: ...
def ClearField(self, field_name: typing_extensions___Literal[u"compressed_channel_mapping",b"compressed_channel_mapping",u"compressed_data",b"compressed_data",u"compression_type",b"compression_type",u"dimension_properties",b"dimension_properties",u"float_data",b"float_data",u"observation_data",b"observation_data",u"shape",b"shape"]) -> None: ...
def WhichOneof(self, oneof_group: typing_extensions___Literal[u"observation_data",b"observation_data"]) -> typing_extensions___Literal["compressed_data","float_data"]: ...

24
ml-agents-envs/mlagents_envs/rpc_utils.py


from mlagents_envs.base_env import (
ActionSpec,
SensorSpec,
DimensionProperty,
BehaviorSpec,
DecisionSteps,
TerminalSteps,

:return: BehaviorSpec object.
"""
observation_shape = [tuple(obs.shape) for obs in agent_info.observations]
# proto from comminicator < v1.3 does not set action spec, use deprecated fields instead
dim_props = [
tuple(DimensionProperty(dim) for dim in obs.dimension_properties)
for obs in agent_info.observations
]
sensor_specs = [
SensorSpec(obs_shape, dim_p)
for obs_shape, dim_p in zip(observation_shape, dim_props)
]
# proto from communicator < v1.3 does not set action spec, use deprecated fields instead
if (
brain_param_proto.action_spec.num_continuous_actions == 0
and brain_param_proto.action_spec.num_discrete_actions == 0

action_spec_proto.num_continuous_actions,
tuple(branch for branch in action_spec_proto.discrete_branch_sizes),
)
return BehaviorSpec(observation_shape, action_spec)
return BehaviorSpec(sensor_specs, action_spec)
class OffsetBytesIO:

]
decision_obs_list: List[np.ndarray] = []
terminal_obs_list: List[np.ndarray] = []
for obs_index, obs_shape in enumerate(behavior_spec.observation_shapes):
is_visual = len(obs_shape) == 3
for obs_index, sensor_specs in enumerate(behavior_spec.sensor_specs):
is_visual = len(sensor_specs.shape) == 3
obs_shape = cast(Tuple[int, int, int], obs_shape)
obs_shape = cast(Tuple[int, int, int], sensor_specs.shape)
decision_obs_list.append(
_process_visual_observation(
obs_index, obs_shape, decision_agent_info_list

else:
decision_obs_list.append(
_process_vector_observation(
obs_index, obs_shape, decision_agent_info_list
obs_index, sensor_specs.shape, decision_agent_info_list
obs_index, obs_shape, terminal_agent_info_list
obs_index, sensor_specs.shape, terminal_agent_info_list
)
)
decision_rewards = np.array(

20
ml-agents-envs/mlagents_envs/tests/test_envs.py


env.close()
assert isinstance(decision_steps, DecisionSteps)
assert isinstance(terminal_steps, TerminalSteps)
assert len(spec.observation_shapes) == len(decision_steps.obs)
assert len(spec.observation_shapes) == len(terminal_steps.obs)
assert len(spec.sensor_specs) == len(decision_steps.obs)
assert len(spec.sensor_specs) == len(terminal_steps.obs)
for shape, obs in zip(spec.observation_shapes, decision_steps.obs):
assert (n_agents,) + shape == obs.shape
for sen_spec, obs in zip(spec.sensor_specs, decision_steps.obs):
assert (n_agents,) + sen_spec.shape == obs.shape
for shape, obs in zip(spec.observation_shapes, terminal_steps.obs):
assert (n_agents,) + shape == obs.shape
for sen_spec, obs in zip(spec.sensor_specs, terminal_steps.obs):
assert (n_agents,) + sen_spec.shape == obs.shape
@mock.patch("mlagents_envs.env_utils.launch_executable")

env.close()
assert isinstance(decision_steps, DecisionSteps)
assert isinstance(terminal_steps, TerminalSteps)
assert len(spec.observation_shapes) == len(decision_steps.obs)
assert len(spec.observation_shapes) == len(terminal_steps.obs)
for shape, obs in zip(spec.observation_shapes, decision_steps.obs):
assert (n_agents,) + shape == obs.shape
assert len(spec.sensor_specs) == len(decision_steps.obs)
assert len(spec.sensor_specs) == len(terminal_steps.obs)
for spec, obs in zip(spec.sensor_specs, decision_steps.obs):
assert (n_agents,) + spec.shape == obs.shape
assert 0 in decision_steps
assert 2 in terminal_steps

2
ml-agents-envs/mlagents_envs/tests/test_registry.py


for worker_id in range(2):
assert BASIC_ID in registry
env = registry[BASIC_ID].make(
base_port=6005, worker_id=worker_id, no_graphics=True
base_port=6002, worker_id=worker_id, no_graphics=True
)
env.reset()
env.step()

31
ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py


steps_from_proto,
)
from PIL import Image
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
def generate_list_agent_proto(

def test_batched_step_result_from_proto():
n_agents = 10
shapes = [(3,), (4,)]
spec = BehaviorSpec(shapes, ActionSpec.create_continuous(3))
spec = BehaviorSpec(
create_sensor_specs_with_shapes(shapes), ActionSpec.create_continuous(3)
)
ap_list = generate_list_agent_proto(n_agents, shapes)
decision_steps, terminal_steps = steps_from_proto(ap_list, spec)
for agent_id in range(n_agents):

def test_action_masking_discrete():
n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(shapes, ActionSpec.create_discrete((7, 3)))
behavior_spec = BehaviorSpec(
create_sensor_specs_with_shapes(shapes), ActionSpec.create_discrete((7, 3))
)
ap_list = generate_list_agent_proto(n_agents, shapes)
decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
masks = decision_steps.action_mask

def test_action_masking_discrete_1():
n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(shapes, ActionSpec.create_discrete((10,)))
behavior_spec = BehaviorSpec(
create_sensor_specs_with_shapes(shapes), ActionSpec.create_discrete((10,))
)
ap_list = generate_list_agent_proto(n_agents, shapes)
decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
masks = decision_steps.action_mask

def test_action_masking_discrete_2():
n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(shapes, ActionSpec.create_discrete((2, 2, 6)))
behavior_spec = BehaviorSpec(
create_sensor_specs_with_shapes(shapes), ActionSpec.create_discrete((2, 2, 6))
)
ap_list = generate_list_agent_proto(n_agents, shapes)
decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
masks = decision_steps.action_mask

def test_action_masking_continuous():
n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(shapes, ActionSpec.create_continuous(10))
behavior_spec = BehaviorSpec(
create_sensor_specs_with_shapes(shapes), ActionSpec.create_continuous(10)
)
ap_list = generate_list_agent_proto(n_agents, shapes)
decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
masks = decision_steps.action_mask

behavior_spec = behavior_spec_from_proto(bp, agent_proto)
assert behavior_spec.action_spec.is_discrete()
assert not behavior_spec.action_spec.is_continuous()
assert behavior_spec.observation_shapes == [(3,), (4,)]
assert [spec.shape for spec in behavior_spec.sensor_specs] == [(3,), (4,)]
assert behavior_spec.action_spec.discrete_branches == (5, 4)
assert behavior_spec.action_spec.discrete_size == 2
bp = BrainParametersProto()

def test_batched_step_result_from_proto_raises_on_infinite():
n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(shapes, ActionSpec.create_continuous(3))
behavior_spec = BehaviorSpec(
create_sensor_specs_with_shapes(shapes), ActionSpec.create_continuous(3)
)
ap_list = generate_list_agent_proto(n_agents, shapes, infinite_rewards=True)
with pytest.raises(RuntimeError):
steps_from_proto(ap_list, behavior_spec)

n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(shapes, ActionSpec.create_continuous(3))
behavior_spec = BehaviorSpec(
create_sensor_specs_with_shapes(shapes), ActionSpec.create_continuous(3)
)
ap_list = generate_list_agent_proto(n_agents, shapes, nan_observations=True)
with pytest.raises(RuntimeError):
steps_from_proto(ap_list, behavior_spec)

7
ml-agents-envs/mlagents_envs/tests/test_steps.py


ActionSpec,
BehaviorSpec,
)
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
def test_decision_steps():

def test_empty_decision_steps():
specs = BehaviorSpec(
observation_shapes=[(3, 2), (5,)], action_spec=ActionSpec.create_continuous(3)
sensor_specs=create_sensor_specs_with_shapes([(3, 2), (5,)]),
action_spec=ActionSpec.create_continuous(3),
)
ds = DecisionSteps.empty(specs)
assert len(ds.obs) == 2

def test_empty_terminal_steps():
specs = BehaviorSpec(
observation_shapes=[(3, 2), (5,)], action_spec=ActionSpec.create_continuous(3)
sensor_specs=create_sensor_specs_with_shapes([(3, 2), (5,)]),
action_spec=ActionSpec.create_continuous(3),
)
ts = TerminalSteps.empty(specs)
assert len(ts.obs) == 2

11
ml-agents/mlagents/trainers/demo_loader.py


)
)
# check observations match
if len(behavior_spec.observation_shapes) != len(
expected_behavior_spec.observation_shapes
):
if len(behavior_spec.sensor_specs) != len(expected_behavior_spec.sensor_specs):
zip(
behavior_spec.observation_shapes,
expected_behavior_spec.observation_shapes,
)
zip(behavior_spec.sensor_specs, expected_behavior_spec.sensor_specs)
if demo_obs != policy_obs:
if demo_obs.shape != policy_obs.shape:
raise RuntimeError(
f"The shape {demo_obs} for observation {i} in demonstration \
do not match the policy's {policy_obs}."

2
ml-agents/mlagents/trainers/optimizer/torch_optimizer.py


def get_trajectory_value_estimates(
self, batch: AgentBuffer, next_obs: List[np.ndarray], done: bool
) -> Tuple[Dict[str, np.ndarray], Dict[str, float]]:
n_obs = len(self.policy.behavior_spec.observation_shapes)
n_obs = len(self.policy.behavior_spec.sensor_specs)
current_obs = ObsUtil.from_buffer(batch, n_obs)
# Convert to tensors

6
ml-agents/mlagents/trainers/policy/policy.py


else [self.behavior_spec.action_spec.continuous_size]
)
self.vec_obs_size = sum(
shape[0] for shape in behavior_spec.observation_shapes if len(shape) == 1
sen_spec.shape[0]
for sen_spec in behavior_spec.sensor_specs
if len(sen_spec.shape) == 1
1 for shape in behavior_spec.observation_shapes if len(shape) == 3
1 for sen_spec in behavior_spec.sensor_specs if len(sen_spec.shape) == 3
)
self.use_continuous_act = self.behavior_spec.action_spec.is_continuous()
self.previous_action_dict: Dict[str, np.ndarray] = {}

2
ml-agents/mlagents/trainers/policy/torch_policy.py


else:
ac_class = SharedActorCritic
self.actor_critic = ac_class(
observation_shapes=self.behavior_spec.observation_shapes,
sensor_specs=self.behavior_spec.sensor_specs,
network_settings=trainer_settings.network_settings,
action_spec=behavior_spec.action_spec,
stream_names=reward_signal_names,

2
ml-agents/mlagents/trainers/ppo/optimizer_torch.py


)
returns[name] = ModelUtils.list_to_tensor(batch[f"{name}_returns"])
n_obs = len(self.policy.behavior_spec.observation_shapes)
n_obs = len(self.policy.behavior_spec.sensor_specs)
current_obs = ObsUtil.from_buffer(batch, n_obs)
# Convert to tensors
current_obs = [ModelUtils.list_to_tensor(obs) for obs in current_obs]

18
ml-agents/mlagents/trainers/sac/optimizer_torch.py


from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.buffer import AgentBuffer
from mlagents_envs.timers import timed
from mlagents_envs.base_env import ActionSpec
from mlagents_envs.base_env import ActionSpec, SensorSpec
from mlagents.trainers.exception import UnityTrainerException
from mlagents.trainers.settings import TrainerSettings, SACSettings
from contextlib import ExitStack

def __init__(
self,
stream_names: List[str],
observation_shapes: List[Tuple[int, ...]],
sensor_specs: List[SensorSpec],
network_settings: NetworkSettings,
action_spec: ActionSpec,
):

self.q1_network = ValueNetwork(
stream_names,
observation_shapes,
sensor_specs,
network_settings,
num_action_ins,
num_value_outs,

observation_shapes,
sensor_specs,
network_settings,
num_action_ins,
num_value_outs,

# ExitStack allows us to enter the torch.no_grad() context conditionally
with ExitStack() as stack:
if not q1_grad:
stack.enter_context(torch.no_grad())
stack.enter_context(torch.no_grad()) # pylint: disable=E1101
q1_out, _ = self.q1_network(
inputs,
actions=actions,

with ExitStack() as stack:
if not q2_grad:
stack.enter_context(torch.no_grad())
stack.enter_context(torch.no_grad()) # pylint: disable=E1101
q2_out, _ = self.q2_network(
inputs,
actions=actions,

self.value_network = TorchSACOptimizer.PolicyValueNetwork(
self.stream_names,
self.policy.behavior_spec.observation_shapes,
self.policy.behavior_spec.sensor_specs,
policy_network_settings,
self._action_spec,
)

self.policy.behavior_spec.observation_shapes,
self.policy.behavior_spec.sensor_specs,
policy_network_settings,
)
ModelUtils.soft_update(

for name in self.reward_signals:
rewards[name] = ModelUtils.list_to_tensor(batch[f"{name}_rewards"])
n_obs = len(self.policy.behavior_spec.observation_shapes)
n_obs = len(self.policy.behavior_spec.sensor_specs)
current_obs = ObsUtil.from_buffer(batch, n_obs)
# Convert to tensors
current_obs = [ModelUtils.list_to_tensor(obs) for obs in current_obs]

3
ml-agents/mlagents/trainers/tests/check_env_trains.py


env_parameter_manager=None,
success_threshold=0.9,
env_manager=None,
training_seed=None,
):
if env_parameter_manager is None:
env_parameter_manager = EnvironmentParameterManager()

seed = 1337
seed = 1337 if training_seed is None else training_seed
StatsReporter.writers.clear() # Clear StatsReporters so we don't write to file
debug_writer = DebugWriter()
StatsReporter.add_writer(debug_writer)

11
ml-agents/mlagents/trainers/tests/dummy_config.py


from typing import List, Tuple
from mlagents_envs.base_env import SensorSpec, DimensionProperty
import pytest
import copy
import os

@pytest.fixture
def extrinsic_dummy_config():
return {RewardSignalType.EXTRINSIC: RewardSignalSettings()}
def create_sensor_specs_with_shapes(shapes: List[Tuple[int, ...]]) -> List[SensorSpec]:
sen_spec: List[SensorSpec] = []
for shape in shapes:
dim_prop = (DimensionProperty.UNSPECIFIED,) * len(shape)
spec = SensorSpec(shape, dim_prop)
sen_spec.append(spec)
return sen_spec

32
ml-agents/mlagents/trainers/tests/mock_brain.py


from mlagents_envs.base_env import (
DecisionSteps,
TerminalSteps,
SensorSpec,
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
observation_shapes: List[Tuple],
sensor_specs: List[SensorSpec],
action_spec: ActionSpec,
done: bool = False,
) -> Tuple[DecisionSteps, TerminalSteps]:

:int num_agents: Number of "agents" to imitate.
:List observation_shapes: A List of the observation spaces in your steps
:List sensor_specs: A List of the observation specs in your steps
for _shape in observation_shapes:
obs_list.append(np.ones((num_agents,) + _shape, dtype=np.float32))
for sen_spec in sensor_specs:
obs_list.append(np.ones((num_agents,) + sen_spec.shape, dtype=np.float32))
action_mask = None
if action_spec.is_discrete():
action_mask = [

reward = np.array(num_agents * [1.0], dtype=np.float32)
interrupted = np.array(num_agents * [False], dtype=np.bool)
agent_id = np.arange(num_agents, dtype=np.int32)
behavior_spec = BehaviorSpec(observation_shapes, action_spec)
behavior_spec = BehaviorSpec(sensor_specs, action_spec)
if done:
return (
DecisionSteps.empty(behavior_spec),

) -> Tuple[DecisionSteps, TerminalSteps]:
return create_mock_steps(
num_agents=num_agents,
observation_shapes=behavior_spec.observation_shapes,
sensor_specs=behavior_spec.sensor_specs,
action_spec=behavior_spec.action_spec,
)

observation_shapes: List[Tuple],
sensor_specs: List[SensorSpec],
action_spec: ActionSpec,
max_step_complete: bool = False,
memory_size: int = 10,

action_size = action_spec.discrete_size + action_spec.continuous_size
for _i in range(length - 1):
obs = []
for _shape in observation_shapes:
obs.append(np.ones(_shape, dtype=np.float32))
for sen_spec in sensor_specs:
obs.append(np.ones(sen_spec.shape, dtype=np.float32))
reward = 1.0
done = False
action = ActionTuple(

)
steps_list.append(experience)
obs = []
for _shape in observation_shapes:
obs.append(np.ones(_shape, dtype=np.float32))
for sen_spec in sensor_specs:
obs.append(np.ones(sen_spec.shape, dtype=np.float32))
last_experience = AgentExperience(
obs=obs,
reward=reward,

) -> AgentBuffer:
trajectory = make_fake_trajectory(
length,
behavior_spec.observation_shapes,
behavior_spec.sensor_specs,
action_spec=behavior_spec.action_spec,
memory_size=memory_size,
)

action_spec = ActionSpec.create_discrete(tuple(vector_action_space))
else:
action_spec = ActionSpec.create_continuous(vector_action_space)
behavior_spec = BehaviorSpec(
[(84, 84, 3)] * int(use_visual) + [(vector_obs_space,)], action_spec
)
observation_shapes = [(84, 84, 3)] * int(use_visual) + [(vector_obs_space,)]
sen_spec = create_sensor_specs_with_shapes(observation_shapes)
behavior_spec = BehaviorSpec(sen_spec, action_spec)
return behavior_spec

15
ml-agents/mlagents/trainers/tests/simple_test_envs.py


from mlagents_envs.base_env import (
ActionSpec,
SensorSpec,
ActionTuple,
BaseEnv,
BehaviorSpec,

from mlagents_envs.communicator_objects.agent_info_action_pair_pb2 import (
AgentInfoActionPairProto,
)
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
OBS_SIZE = 1
VIS_OBS_SIZE = (20, 20, 3)

continuous_action_size + discrete_action_size
) # to set the goals/positions
self.action_spec = action_spec
self.behavior_spec = BehaviorSpec(self._make_obs_spec(), action_spec)
self.behavior_spec = BehaviorSpec(self._make_sensor_specs(), action_spec)
self.action_spec = action_spec
self.names = brain_names
self.positions: Dict[str, List[float]] = {}

self.action[name] = None
self.step_result[name] = None
def _make_obs_spec(self) -> List[Any]:
obs_spec: List[Any] = []
def _make_sensor_specs(self) -> SensorSpec:
obs_shape: List[Any] = []
obs_spec.append((self.vec_obs_size,))
obs_shape.append((self.vec_obs_size,))
obs_spec.append(self.vis_obs_size)
return obs_spec
obs_shape.append(self.vis_obs_size)
sen_spec = create_sensor_specs_with_shapes(obs_shape)
return sen_spec
def _make_obs(self, value: float) -> List[np.ndarray]:
obs = []

16
ml-agents/mlagents/trainers/tests/test_agent_processor.py


from mlagents.trainers.stats import StatsReporter, StatsSummary
from mlagents.trainers.behavior_id_utils import get_global_agent_id
from mlagents_envs.side_channel.stats_side_channel import StatsAggregationMethod
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
from mlagents_envs.base_env import ActionSpec, ActionTuple

}
mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
num_agents=2,
observation_shapes=[(8,)] + num_vis_obs * [(84, 84, 3)],
sensor_specs=create_sensor_specs_with_shapes(
[(8,)] + num_vis_obs * [(84, 84, 3)]
),
action_spec=ActionSpec.create_continuous(2),
)
fake_action_info = ActionInfo(

# Test empty steps
mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
num_agents=0,
observation_shapes=[(8,)] + num_vis_obs * [(84, 84, 3)],
sensor_specs=create_sensor_specs_with_shapes(
[(8,)] + num_vis_obs * [(84, 84, 3)]
),
action_spec=ActionSpec.create_continuous(2),
)
processor.add_experiences(

mock_decision_step, mock_terminal_step = mb.create_mock_steps(
num_agents=1,
observation_shapes=[(8,)],
sensor_specs=create_sensor_specs_with_shapes([(8,)]),
observation_shapes=[(8,)],
sensor_specs=create_sensor_specs_with_shapes([(8,)]),
action_spec=ActionSpec.create_continuous(2),
done=True,
)

mock_decision_step, mock_terminal_step = mb.create_mock_steps(
num_agents=1,
observation_shapes=[(8,)],
sensor_specs=create_sensor_specs_with_shapes([(8,)]),
action_spec=ActionSpec.create_continuous(2),
)
fake_action_info = ActionInfo(

4
ml-agents/mlagents/trainers/tests/test_demo_loader.py


behavior_spec, pair_infos, total_expected = load_demonstration(
path_prefix + "/test.demo"
)
assert np.sum(behavior_spec.observation_shapes[0]) == 8
assert np.sum(behavior_spec.sensor_specs[0].shape) == 8
assert len(pair_infos) == total_expected
_, demo_buffer = demo_to_buffer(path_prefix + "/test.demo", 1, BEHAVIOR_SPEC)

behavior_spec, pair_infos, total_expected = load_demonstration(
path_prefix + "/test_demo_dir"
)
assert np.sum(behavior_spec.observation_shapes[0]) == 8
assert np.sum(behavior_spec.sensor_specs[0].shape) == 8
assert len(pair_infos) == total_expected
_, demo_buffer = demo_to_buffer(path_prefix + "/test_demo_dir", 1, BEHAVIOR_SPEC)

6
ml-agents/mlagents/trainers/tests/test_rl_trainer.py


from mlagents.trainers.tests.test_buffer import construct_fake_buffer
from mlagents.trainers.agent_processor import AgentManagerQueue
from mlagents.trainers.settings import TrainerSettings
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
from mlagents_envs.base_env import ActionSpec

time_horizon = 10
trajectory = mb.make_fake_trajectory(
length=time_horizon,
observation_shapes=[(1,)],
sensor_specs=create_sensor_specs_with_shapes([(1,)]),
max_step_complete=True,
action_spec=ActionSpec.create_discrete((2,)),
)

checkpoint_interval = trainer.trainer_settings.checkpoint_interval
trajectory = mb.make_fake_trajectory(
length=time_horizon,
observation_shapes=[(1,)],
sensor_specs=create_sensor_specs_with_shapes([(1,)]),
max_step_complete=True,
action_spec=ActionSpec.create_discrete((2,)),
)

4
ml-agents/mlagents/trainers/tests/test_trajectory.py


from mlagents.trainers.tests.mock_brain import make_fake_trajectory
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
from mlagents_envs.base_env import ActionSpec
VEC_OBS_SIZE = 6

wanted_keys = set(wanted_keys)
trajectory = make_fake_trajectory(
length=length,
observation_shapes=[(VEC_OBS_SIZE,), (84, 84, 3)],
sensor_specs=create_sensor_specs_with_shapes([(VEC_OBS_SIZE,), (84, 84, 3)]),
action_spec=ActionSpec.create_continuous(ACTION_SIZE),
)
agentbuffer = trajectory.to_agentbuffer()

3
ml-agents/mlagents/trainers/tests/torch/test_ghost.py


from mlagents.trainers.tests import mock_brain as mb
from mlagents.trainers.tests.test_trajectory import make_fake_trajectory
from mlagents.trainers.settings import TrainerSettings, SelfPlaySettings
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
@pytest.fixture

trajectory = make_fake_trajectory(
length=time_horizon,
max_step_complete=True,
observation_shapes=[(1,)],
sensor_specs=create_sensor_specs_with_shapes([(1,)]),
action_spec=mock_specs.action_spec,
)
trajectory_queue0.put(trajectory)

12
ml-agents/mlagents/trainers/tests/torch/test_hybrid.py


PPO_TORCH_CONFIG.hyperparameters, learning_rate=3.0e-4
)
config = attr.evolve(PPO_TORCH_CONFIG, hyperparameters=new_hyperparams)
check_environment_trains(env, {BRAIN_NAME: config})
check_environment_trains(env, {BRAIN_NAME: config}, training_seed=1336)
def test_hybrid_recurrent_ppo():

PPO_TORCH_CONFIG,
hyperparameters=new_hyperparams,
network_settings=new_network_settings,
max_steps=3000,
max_steps=5000,
)
check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)

SAC_TORCH_CONFIG.hyperparameters,
buffer_size=50000,
batch_size=256,
buffer_init_steps=2000,
buffer_init_steps=0,
SAC_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=6000
SAC_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=2200
check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
check_environment_trains(
env, {BRAIN_NAME: config}, success_threshold=0.9, training_seed=1336
)
@pytest.mark.parametrize("num_visual", [1, 2])

23
ml-agents/mlagents/trainers/tests/torch/test_networks.py


)
from mlagents.trainers.settings import NetworkSettings
from mlagents_envs.base_env import ActionSpec
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
def test_networkbody_vector():

obs_shapes = [(obs_size,)]
networkbody = NetworkBody(obs_shapes, network_settings, encoded_act_size=2)
networkbody = NetworkBody(
create_sensor_specs_with_shapes(obs_shapes),
network_settings,
encoded_act_size=2,
)
optimizer = torch.optim.Adam(networkbody.parameters(), lr=3e-3)
sample_obs = 0.1 * torch.ones((1, obs_size))
sample_act = 0.1 * torch.ones((1, 2))

)
obs_shapes = [(obs_size,)]
networkbody = NetworkBody(obs_shapes, network_settings)
networkbody = NetworkBody(
create_sensor_specs_with_shapes(obs_shapes), network_settings
)
optimizer = torch.optim.Adam(networkbody.parameters(), lr=3e-4)
sample_obs = torch.ones((1, seq_len, obs_size))

network_settings = NetworkSettings()
obs_shapes = [(vec_obs_size,), obs_size]
networkbody = NetworkBody(obs_shapes, network_settings)
networkbody = NetworkBody(
create_sensor_specs_with_shapes(obs_shapes), network_settings
)
optimizer = torch.optim.Adam(networkbody.parameters(), lr=3e-3)
sample_obs = 0.1 * torch.ones((1, 84, 84, 3))
sample_vec_obs = torch.ones((1, vec_obs_size))

obs_size = 4
num_outputs = 2
network_settings = NetworkSettings()
obs_shapes = [(obs_size,)]
sen_spec = create_sensor_specs_with_shapes([(obs_size,)])
stream_names, obs_shapes, network_settings, outputs_per_stream=num_outputs
stream_names, sen_spec, network_settings, outputs_per_stream=num_outputs
)
optimizer = torch.optim.Adam(value_net.parameters(), lr=3e-3)

network_settings = NetworkSettings(
memory=NetworkSettings.MemorySettings() if lstm else None, normalize=True
)
obs_shapes = [(obs_size,)]
sen_spec = create_sensor_specs_with_shapes([(obs_size,)])
actor = ac_type(obs_shapes, network_settings, action_spec, stream_names)
actor = ac_type(sen_spec, network_settings, action_spec, stream_names)
if lstm:
sample_obs = torch.ones((1, network_settings.memory.sequence_length, obs_size))
memories = torch.ones(

4
ml-agents/mlagents/trainers/tests/torch/test_policy.py


buffer = mb.simulate_rollout(64, policy.behavior_spec, memory_size=policy.m_size)
act_masks = ModelUtils.list_to_tensor(buffer["action_mask"])
agent_action = AgentAction.from_dict(buffer)
np_obs = ObsUtil.from_buffer(buffer, len(policy.behavior_spec.observation_shapes))
np_obs = ObsUtil.from_buffer(buffer, len(policy.behavior_spec.sensor_specs))
tensor_obs = [ModelUtils.list_to_tensor(obs) for obs in np_obs]
memories = [

buffer = mb.simulate_rollout(64, policy.behavior_spec, memory_size=policy.m_size)
act_masks = ModelUtils.list_to_tensor(buffer["action_mask"])
np_obs = ObsUtil.from_buffer(buffer, len(policy.behavior_spec.observation_shapes))
np_obs = ObsUtil.from_buffer(buffer, len(policy.behavior_spec.sensor_specs))
tensor_obs = [ModelUtils.list_to_tensor(obs) for obs in np_obs]
memories = [

2
ml-agents/mlagents/trainers/tests/torch/test_ppo.py


time_horizon = 15
trajectory = make_fake_trajectory(
length=time_horizon,
observation_shapes=optimizer.policy.behavior_spec.observation_shapes,
sensor_specs=optimizer.policy.behavior_spec.sensor_specs,
action_spec=DISCRETE_ACTION_SPEC if discrete else CONTINUOUS_ACTION_SPEC,
max_step_complete=True,
)

40
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py


create_agent_buffer,
)
from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
SEED = [42]

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS),
BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),