浏览代码

addressing comments. ObservationSpec is no longer a list

/layernorm
vincentpierre 4 年前
当前提交
719c969c
共有 37 个文件被更改,包括 223 次插入200 次删除
  1. 2
      com.unity.ml-agents/Runtime/Sensors/IDimensionPropertiesSensor.cs
  2. 16
      gym-unity/gym_unity/envs/__init__.py
  3. 4
      gym-unity/gym_unity/tests/test_gym.py
  4. 42
      ml-agents-envs/mlagents_envs/base_env.py
  5. 17
      ml-agents-envs/mlagents_envs/rpc_utils.py
  6. 20
      ml-agents-envs/mlagents_envs/tests/test_envs.py
  7. 18
      ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py
  8. 6
      ml-agents-envs/mlagents_envs/tests/test_steps.py
  9. 10
      ml-agents/mlagents/trainers/demo_loader.py
  10. 8
      ml-agents/mlagents/trainers/policy/policy.py
  11. 2
      ml-agents/mlagents/trainers/policy/torch_policy.py
  12. 16
      ml-agents/mlagents/trainers/sac/optimizer_torch.py
  13. 3
      ml-agents/mlagents/trainers/tests/check_env_trains.py
  14. 11
      ml-agents/mlagents/trainers/tests/dummy_config.py
  15. 28
      ml-agents/mlagents/trainers/tests/mock_brain.py
  16. 3
      ml-agents/mlagents/trainers/tests/simple_test_envs.py
  17. 16
      ml-agents/mlagents/trainers/tests/test_agent_processor.py
  18. 4
      ml-agents/mlagents/trainers/tests/test_demo_loader.py
  19. 6
      ml-agents/mlagents/trainers/tests/test_rl_trainer.py
  20. 4
      ml-agents/mlagents/trainers/tests/test_trajectory.py
  21. 3
      ml-agents/mlagents/trainers/tests/torch/test_ghost.py
  22. 2
      ml-agents/mlagents/trainers/tests/torch/test_hybrid.py
  23. 17
      ml-agents/mlagents/trainers/tests/torch/test_networks.py
  24. 2
      ml-agents/mlagents/trainers/tests/torch/test_ppo.py
  25. 29
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py
  26. 15
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_extrinsic.py
  27. 19
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py
  28. 21
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_rnd.py
  29. 8
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/utils.py
  30. 4
      ml-agents/mlagents/trainers/tests/torch/test_utils.py
  31. 2
      ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py
  32. 2
      ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py
  33. 4
      ml-agents/mlagents/trainers/torch/components/reward_providers/rnd_reward_provider.py
  34. 10
      ml-agents/mlagents/trainers/torch/model_serialization.py
  35. 24
      ml-agents/mlagents/trainers/torch/networks.py
  36. 21
      ml-agents/mlagents/trainers/torch/utils.py
  37. 4
      ml-agents/tests/yamato/scripts/run_llapi.py

2
com.unity.ml-agents/Runtime/Sensors/IDimensionPropertiesSensor.cs


/// <summary>
/// Sensor interface for sensors with special dimension properties.
/// </summary>
public interface IDimensionPropertiesSensor : ISensor
public interface IDimensionPropertiesSensor
{
/// <summary>
/// Returns the array containing the properties of each dimensions of the

16
gym-unity/gym_unity/envs/__init__.py


def _get_n_vis_obs(self) -> int:
result = 0
for shape in self.group_spec.observation_spec.shapes:
if len(shape) == 3:
for obs_spec in self.group_spec.observation_spec:
if len(obs_spec.shape) == 3:
for shape in self.group_spec.observation_spec.shapes:
if len(shape) == 3:
result.append(shape)
for obs_spec in self.group_spec.observation_spec:
if len(obs_spec.shape) == 3:
result.append(obs_spec.shape)
return result
def _get_vis_obs_list(

def _get_vec_obs_size(self) -> int:
result = 0
for shape in self.group_spec.observation_spec.shapes:
if len(shape) == 1:
result += shape[0]
for obs_spec in self.group_spec.observation_spec:
if len(obs_spec.shape) == 1:
result += obs_spec.shape[0]
return result
def render(self, mode="rgb_array"):

4
gym-unity/gym_unity/tests/test_gym.py


from gym_unity.envs import UnityToGymWrapper
from mlagents_envs.base_env import (
BehaviorSpec,
ObservationSpec,
from mlagents.trainers.tests.dummy_config import create_obs_spec_with_shapes
def test_gym_wrapper():

obs_shapes = [(vector_observation_space_size,)]
for _ in range(number_visual_observations):
obs_shapes += [(8, 8, 3)]
obs_spec = ObservationSpec.create_simple(obs_shapes)
obs_spec = create_obs_spec_with_shapes(obs_shapes)
return BehaviorSpec(obs_spec, action_spec)

42
ml-agents-envs/mlagents_envs/base_env.py


:param spec: The BehaviorSpec for the DecisionSteps
"""
obs: List[np.ndarray] = []
for shape in spec.observation_spec.shapes:
obs += [np.zeros((0,) + shape, dtype=np.float32)]
for obs_spec in spec.observation_spec:
obs += [np.zeros((0,) + obs_spec.shape, dtype=np.float32)]
return DecisionSteps(
obs=obs,
reward=np.zeros(0, dtype=np.float32),

:param spec: The BehaviorSpec for the TerminalSteps
"""
obs: List[np.ndarray] = []
for shape in spec.observation_spec.shapes:
obs += [np.zeros((0,) + shape, dtype=np.float32)]
for obs_spec in spec.observation_spec:
obs += [np.zeros((0,) + obs_spec.shape, dtype=np.float32)]
return TerminalSteps(
obs=obs,
reward=np.zeros(0, dtype=np.float32),

class ObservationSpec(NamedTuple):
"""
A NamedTuple containing information about the observation of Agents under the
same behavior.
- observation_shapes is a List of Tuples of int : Each Tuple corresponds
to an observation's dimensions. The shape tuples have the same ordering as
the ordering of the DecisionSteps and TerminalSteps.
- dimension_properties is a List of Tuples of DimensionProperties flag. Each Tuple
corresponds to an observation's properties. The tuples have the same ordering as
the ordering of the DecisionSteps and TerminalSteps.
A NamedTuple containing information about the observation of Agents.
- shape is a Tuple of int : It corresponds to the shape of
an observation's dimensions.
- dimension_property is a Tuple of DimensionProperties flag, one flag for each
dimension.
shapes: List[Tuple[int, ...]]
dimension_properties: List[Tuple[DimensionProperty, ...]]
@staticmethod
def create_simple(shapes: List[Tuple[int, ...]]) -> "ObservationSpec":
dim_prop: List[Tuple[DimensionProperty, ...]] = []
for shape in shapes:
dim_prop += [(DimensionProperty.UNSPECIFIED,) * len(shape)]
return ObservationSpec(shapes, dim_prop)
shape: Tuple[int, ...]
dimension_property: Tuple[DimensionProperty, ...]
class BehaviorSpec(NamedTuple):

- observation_spec is an ObservationSpec NamedTuple containing information about
the information of the Agent's observations such as their shapes.
- action_spec is an ActionSpec NamedTuple
- observation_spec is a List of ObservationSpec NamedTuple containing
information about the information of the Agent's observations such as their shapes.
The order of the OservationSpec is the same as the order of the observations of an
agent.
- action_spec is an ActionSpec NamedTuple.
observation_spec: ObservationSpec
observation_spec: List[ObservationSpec]
action_spec: ActionSpec

17
ml-agents-envs/mlagents_envs/rpc_utils.py


tuple(DimensionProperty(dim) for dim in obs.dimension_properties)
for obs in agent_info.observations
]
obs_spec = ObservationSpec(observation_shape, dim_props)
# proto from comminicator < v1.3 does not set action spec, use deprecated fields instead
obs_spec = [
ObservationSpec(obs_shape, dim_p)
for obs_shape, dim_p in zip(observation_shape, dim_props)
]
# proto from communicator < v1.3 does not set action spec, use deprecated fields instead
if (
brain_param_proto.action_spec.num_continuous_actions == 0
and brain_param_proto.action_spec.num_discrete_actions == 0

]
decision_obs_list: List[np.ndarray] = []
terminal_obs_list: List[np.ndarray] = []
for obs_index, obs_shape in enumerate(behavior_spec.observation_spec.shapes):
is_visual = len(obs_shape) == 3
for obs_index, obs_spec in enumerate(behavior_spec.observation_spec):
is_visual = len(obs_spec.shape) == 3
obs_shape = cast(Tuple[int, int, int], obs_shape)
obs_shape = cast(Tuple[int, int, int], obs_spec.shape)
decision_obs_list.append(
_process_visual_observation(
obs_index, obs_shape, decision_agent_info_list

else:
decision_obs_list.append(
_process_vector_observation(
obs_index, obs_shape, decision_agent_info_list
obs_index, obs_spec.shape, decision_agent_info_list
obs_index, obs_shape, terminal_agent_info_list
obs_index, obs_spec.shape, terminal_agent_info_list
)
)
decision_rewards = np.array(

20
ml-agents-envs/mlagents_envs/tests/test_envs.py


env.close()
assert isinstance(decision_steps, DecisionSteps)
assert isinstance(terminal_steps, TerminalSteps)
assert len(spec.observation_spec.shapes) == len(decision_steps.obs)
assert len(spec.observation_spec.shapes) == len(terminal_steps.obs)
assert len(spec.observation_spec) == len(decision_steps.obs)
assert len(spec.observation_spec) == len(terminal_steps.obs)
for shape, obs in zip(spec.observation_spec.shapes, decision_steps.obs):
assert (n_agents,) + shape == obs.shape
for spec, obs in zip(spec.observation_spec, decision_steps.obs):
assert (n_agents,) + spec.shape == obs.shape
for shape, obs in zip(spec.observation_spec.shapes, terminal_steps.obs):
assert (n_agents,) + shape == obs.shape
for spec, obs in zip(spec.observation_spec.shapes, terminal_steps.obs):
assert (n_agents,) + spec.shape == obs.shape
@mock.patch("mlagents_envs.env_utils.launch_executable")

env.close()
assert isinstance(decision_steps, DecisionSteps)
assert isinstance(terminal_steps, TerminalSteps)
assert len(spec.observation_spec.shapes) == len(decision_steps.obs)
assert len(spec.observation_spec.shapes) == len(terminal_steps.obs)
for shape, obs in zip(spec.observation_spec.shapes, decision_steps.obs):
assert (n_agents,) + shape == obs.shape
assert len(spec.observation_spec) == len(decision_steps.obs)
assert len(spec.observation_spec) == len(terminal_steps.obs)
for spec, obs in zip(spec.observation_spec, decision_steps.obs):
assert (n_agents,) + spec.shape == obs.shape
assert 0 in decision_steps
assert 2 in terminal_steps

18
ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py


from mlagents_envs.communicator_objects.agent_action_pb2 import AgentActionProto
from mlagents_envs.base_env import (
BehaviorSpec,
ObservationSpec,
ActionSpec,
DecisionSteps,
TerminalSteps,

steps_from_proto,
)
from PIL import Image
from mlagents.trainers.tests.dummy_config import create_obs_spec_with_shapes
def generate_list_agent_proto(

n_agents = 10
shapes = [(3,), (4,)]
spec = BehaviorSpec(
ObservationSpec.create_simple(shapes), ActionSpec.create_continuous(3)
create_obs_spec_with_shapes(shapes), ActionSpec.create_continuous(3)
)
ap_list = generate_list_agent_proto(n_agents, shapes)
decision_steps, terminal_steps = steps_from_proto(ap_list, spec)

n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(
ObservationSpec.create_simple(shapes), ActionSpec.create_discrete((7, 3))
create_obs_spec_with_shapes(shapes), ActionSpec.create_discrete((7, 3))
)
ap_list = generate_list_agent_proto(n_agents, shapes)
decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)

n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(
ObservationSpec.create_simple(shapes), ActionSpec.create_discrete((10,))
create_obs_spec_with_shapes(shapes), ActionSpec.create_discrete((10,))
)
ap_list = generate_list_agent_proto(n_agents, shapes)
decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)

n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(
ObservationSpec.create_simple(shapes), ActionSpec.create_discrete((2, 2, 6))
create_obs_spec_with_shapes(shapes), ActionSpec.create_discrete((2, 2, 6))
)
ap_list = generate_list_agent_proto(n_agents, shapes)
decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)

n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(
ObservationSpec.create_simple(shapes), ActionSpec.create_continuous(10)
create_obs_spec_with_shapes(shapes), ActionSpec.create_continuous(10)
)
ap_list = generate_list_agent_proto(n_agents, shapes)
decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)

behavior_spec = behavior_spec_from_proto(bp, agent_proto)
assert behavior_spec.action_spec.is_discrete()
assert not behavior_spec.action_spec.is_continuous()
assert behavior_spec.observation_spec.shapes == [(3,), (4,)]
assert [spec.shape for spec in behavior_spec.observation_spec] == [(3,), (4,)]
assert behavior_spec.action_spec.discrete_branches == (5, 4)
assert behavior_spec.action_spec.discrete_size == 2
bp = BrainParametersProto()

n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(
ObservationSpec.create_simple(shapes), ActionSpec.create_continuous(3)
create_obs_spec_with_shapes(shapes), ActionSpec.create_continuous(3)
)
ap_list = generate_list_agent_proto(n_agents, shapes, infinite_rewards=True)
with pytest.raises(RuntimeError):

n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(
ObservationSpec.create_simple(shapes), ActionSpec.create_continuous(3)
create_obs_spec_with_shapes(shapes), ActionSpec.create_continuous(3)
)
ap_list = generate_list_agent_proto(n_agents, shapes, nan_observations=True)
with pytest.raises(RuntimeError):

6
ml-agents-envs/mlagents_envs/tests/test_steps.py


from mlagents_envs.base_env import (
DecisionSteps,
ObservationSpec,
from mlagents.trainers.tests.dummy_config import create_obs_spec_with_shapes
def test_decision_steps():

def test_empty_decision_steps():
specs = BehaviorSpec(
observation_spec=ObservationSpec.create_simple([(3, 2), (5,)]),
observation_spec=create_obs_spec_with_shapes([(3, 2), (5,)]),
action_spec=ActionSpec.create_continuous(3),
)
ds = DecisionSteps.empty(specs)

def test_empty_terminal_steps():
specs = BehaviorSpec(
observation_spec=ObservationSpec.create_simple([(3, 2), (5,)]),
observation_spec=create_obs_spec_with_shapes([(3, 2), (5,)]),
action_spec=ActionSpec.create_continuous(3),
)
ts = TerminalSteps.empty(specs)

10
ml-agents/mlagents/trainers/demo_loader.py


)
)
# check observations match
if len(behavior_spec.observation_spec.shapes) != len(
expected_behavior_spec.observation_spec.shapes
if len(behavior_spec.observation_spec) != len(
expected_behavior_spec.observation_spec
):
raise RuntimeError(
"The demonstrations do not have the same number of observations as the policy."

zip(
behavior_spec.observation_spec.shapes,
expected_behavior_spec.observation_spec.shapes,
behavior_spec.observation_spec,
expected_behavior_spec.observation_spec,
if demo_obs != policy_obs:
if demo_obs.shape != policy_obs.shape:
raise RuntimeError(
f"The shape {demo_obs} for observation {i} in demonstration \
do not match the policy's {policy_obs}."

8
ml-agents/mlagents/trainers/policy/policy.py


else [self.behavior_spec.action_spec.continuous_size]
)
self.vec_obs_size = sum(
shape[0]
for shape in behavior_spec.observation_spec.shapes
if len(shape) == 1
obs_spec.shape[0]
for obs_spec in behavior_spec.observation_spec
if len(obs_spec.shape) == 1
1 for shape in behavior_spec.observation_spec.shapes if len(shape) == 3
1 for obs_spec in behavior_spec.observation_spec if len(obs_spec.shape) == 3
)
self.use_continuous_act = self.behavior_spec.action_spec.is_continuous()
self.previous_action_dict: Dict[str, np.ndarray] = {}

2
ml-agents/mlagents/trainers/policy/torch_policy.py


else:
ac_class = SharedActorCritic
self.actor_critic = ac_class(
observation_shapes=self.behavior_spec.observation_spec.shapes,
observation_spec=self.behavior_spec.observation_spec,
network_settings=trainer_settings.network_settings,
action_spec=behavior_spec.action_spec,
stream_names=reward_signal_names,

16
ml-agents/mlagents/trainers/sac/optimizer_torch.py


from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.buffer import AgentBuffer
from mlagents_envs.timers import timed
from mlagents_envs.base_env import ActionSpec
from mlagents_envs.base_env import ActionSpec, ObservationSpec
from mlagents.trainers.exception import UnityTrainerException
from mlagents.trainers.settings import TrainerSettings, SACSettings
from contextlib import ExitStack

def __init__(
self,
stream_names: List[str],
observation_shapes: List[Tuple[int, ...]],
observation_spec: List[ObservationSpec],
network_settings: NetworkSettings,
action_spec: ActionSpec,
):

self.q1_network = ValueNetwork(
stream_names,
observation_shapes,
observation_spec,
network_settings,
num_action_ins,
num_value_outs,

observation_shapes,
observation_spec,
network_settings,
num_action_ins,
num_value_outs,

# ExitStack allows us to enter the torch.no_grad() context conditionally
with ExitStack() as stack:
if not q1_grad:
stack.enter_context(torch.no_grad())
stack.enter_context(torch.no_grad()) # pylint: disable=E1101
q1_out, _ = self.q1_network(
vec_inputs,
vis_inputs,

)
with ExitStack() as stack:
if not q2_grad:
stack.enter_context(torch.no_grad())
stack.enter_context(torch.no_grad()) # pylint: disable=E1101
q2_out, _ = self.q2_network(
vec_inputs,
vis_inputs,

self.value_network = TorchSACOptimizer.PolicyValueNetwork(
self.stream_names,
self.policy.behavior_spec.observation_spec.shapes,
self.policy.behavior_spec.observation_spec,
policy_network_settings,
self._action_spec,
)

self.policy.behavior_spec.observation_spec.shapes,
self.policy.behavior_spec.observation_spec,
policy_network_settings,
)
ModelUtils.soft_update(

3
ml-agents/mlagents/trainers/tests/check_env_trains.py


env_parameter_manager=None,
success_threshold=0.9,
env_manager=None,
training_seed=None,
):
if env_parameter_manager is None:
env_parameter_manager = EnvironmentParameterManager()

seed = 1337
seed = 1337 if training_seed is None else training_seed
StatsReporter.writers.clear() # Clear StatsReporters so we don't write to file
debug_writer = DebugWriter()
StatsReporter.add_writer(debug_writer)

11
ml-agents/mlagents/trainers/tests/dummy_config.py


from typing import List, Tuple
from mlagents_envs.base_env import ObservationSpec, DimensionProperty
import pytest
import copy
import os

@pytest.fixture
def extrinsic_dummy_config():
return {RewardSignalType.EXTRINSIC: RewardSignalSettings()}
def create_obs_spec_with_shapes(shapes: List[Tuple[int, ...]]) -> List[ObservationSpec]:
obs_spec: List[ObservationSpec] = []
for shape in shapes:
dim_prop = (DimensionProperty.UNSPECIFIED,) * len(shape)
spec = ObservationSpec(shape, dim_prop)
obs_spec.append(spec)
return obs_spec

28
ml-agents/mlagents/trainers/tests/mock_brain.py


ActionSpec,
ActionTuple,
)
from mlagents.trainers.tests.dummy_config import create_obs_spec_with_shapes
observation_shapes: List[Tuple],
observation_spec: List[ObservationSpec],
action_spec: ActionSpec,
done: bool = False,
) -> Tuple[DecisionSteps, TerminalSteps]:

:int num_agents: Number of "agents" to imitate.
:List observation_shapes: A List of the observation spaces in your steps
:List observation_spec: A List of the observation specs in your steps
for _shape in observation_shapes:
obs_list.append(np.ones((num_agents,) + _shape, dtype=np.float32))
for obs_spec in observation_spec:
obs_list.append(np.ones((num_agents,) + obs_spec.shape, dtype=np.float32))
action_mask = None
if action_spec.is_discrete():
action_mask = [

reward = np.array(num_agents * [1.0], dtype=np.float32)
interrupted = np.array(num_agents * [False], dtype=np.bool)
agent_id = np.arange(num_agents, dtype=np.int32)
obs_spec = ObservationSpec.create_simple(observation_shapes)
behavior_spec = BehaviorSpec(obs_spec, action_spec)
behavior_spec = BehaviorSpec(observation_spec, action_spec)
if done:
return (
DecisionSteps.empty(behavior_spec),

) -> Tuple[DecisionSteps, TerminalSteps]:
return create_mock_steps(
num_agents=num_agents,
observation_shapes=behavior_spec.observation_spec.shapes,
observation_spec=behavior_spec.observation_spec,
action_spec=behavior_spec.action_spec,
)

observation_shapes: List[Tuple],
observation_spec: List[ObservationSpec],
action_spec: ActionSpec,
max_step_complete: bool = False,
memory_size: int = 10,

action_size = action_spec.discrete_size + action_spec.continuous_size
for _i in range(length - 1):
obs = []
for _shape in observation_shapes:
obs.append(np.ones(_shape, dtype=np.float32))
for obs_spec in observation_spec:
obs.append(np.ones(obs_spec.shape, dtype=np.float32))
reward = 1.0
done = False
action = ActionTuple(

)
steps_list.append(experience)
obs = []
for _shape in observation_shapes:
obs.append(np.ones(_shape, dtype=np.float32))
for obs_spec in observation_spec:
obs.append(np.ones(obs_spec.shape, dtype=np.float32))
last_experience = AgentExperience(
obs=obs,
reward=reward,

) -> AgentBuffer:
trajectory = make_fake_trajectory(
length,
behavior_spec.observation_spec.shapes,
behavior_spec.observation_spec,
action_spec=behavior_spec.action_spec,
memory_size=memory_size,
)

else:
action_spec = ActionSpec.create_continuous(vector_action_space)
observation_shapes = [(84, 84, 3)] * int(use_visual) + [(vector_obs_space,)]
obs_spec = ObservationSpec.create_simple(observation_shapes)
obs_spec = create_obs_spec_with_shapes(observation_shapes)
behavior_spec = BehaviorSpec(obs_spec, action_spec)
return behavior_spec

3
ml-agents/mlagents/trainers/tests/simple_test_envs.py


from mlagents_envs.communicator_objects.agent_info_action_pair_pb2 import (
AgentInfoActionPairProto,
)
from mlagents.trainers.tests.dummy_config import create_obs_spec_with_shapes
OBS_SIZE = 1
VIS_OBS_SIZE = (20, 20, 3)

obs_shape.append((self.vec_obs_size,))
for _ in range(self.num_visual):
obs_shape.append(self.vis_obs_size)
obs_spec = ObservationSpec.create_simple(obs_shape)
obs_spec = create_obs_spec_with_shapes(obs_shape)
return obs_spec
def _make_obs(self, value: float) -> List[np.ndarray]:

16
ml-agents/mlagents/trainers/tests/test_agent_processor.py


from mlagents.trainers.stats import StatsReporter, StatsSummary
from mlagents.trainers.behavior_id_utils import get_global_agent_id
from mlagents_envs.side_channel.stats_side_channel import StatsAggregationMethod
from mlagents.trainers.tests.dummy_config import create_obs_spec_with_shapes
from mlagents_envs.base_env import ActionSpec, ActionTuple

}
mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
num_agents=2,
observation_shapes=[(8,)] + num_vis_obs * [(84, 84, 3)],
observation_spec=create_obs_spec_with_shapes(
[(8,)] + num_vis_obs * [(84, 84, 3)]
),
action_spec=ActionSpec.create_continuous(2),
)
fake_action_info = ActionInfo(

# Test empty steps
mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
num_agents=0,
observation_shapes=[(8,)] + num_vis_obs * [(84, 84, 3)],
observation_spec=create_obs_spec_with_shapes(
[(8,)] + num_vis_obs * [(84, 84, 3)]
),
action_spec=ActionSpec.create_continuous(2),
)
processor.add_experiences(

mock_decision_step, mock_terminal_step = mb.create_mock_steps(
num_agents=1,
observation_shapes=[(8,)],
observation_spec=create_obs_spec_with_shapes([(8,)]),
observation_shapes=[(8,)],
observation_spec=create_obs_spec_with_shapes([(8,)]),
action_spec=ActionSpec.create_continuous(2),
done=True,
)

mock_decision_step, mock_terminal_step = mb.create_mock_steps(
num_agents=1,
observation_shapes=[(8,)],
observation_spec=create_obs_spec_with_shapes([(8,)]),
action_spec=ActionSpec.create_continuous(2),
)
fake_action_info = ActionInfo(

4
ml-agents/mlagents/trainers/tests/test_demo_loader.py


behavior_spec, pair_infos, total_expected = load_demonstration(
path_prefix + "/test.demo"
)
assert np.sum(behavior_spec.observation_spec.shapes[0]) == 8
assert np.sum(behavior_spec.observation_spec[0].shape) == 8
assert len(pair_infos) == total_expected
_, demo_buffer = demo_to_buffer(path_prefix + "/test.demo", 1, BEHAVIOR_SPEC)

behavior_spec, pair_infos, total_expected = load_demonstration(
path_prefix + "/test_demo_dir"
)
assert np.sum(behavior_spec.observation_spec.shapes[0]) == 8
assert np.sum(behavior_spec.observation_spec[0].shape) == 8
assert len(pair_infos) == total_expected
_, demo_buffer = demo_to_buffer(path_prefix + "/test_demo_dir", 1, BEHAVIOR_SPEC)

6
ml-agents/mlagents/trainers/tests/test_rl_trainer.py


from mlagents.trainers.tests.test_buffer import construct_fake_buffer
from mlagents.trainers.agent_processor import AgentManagerQueue
from mlagents.trainers.settings import TrainerSettings
from mlagents.trainers.tests.dummy_config import create_obs_spec_with_shapes
from mlagents_envs.base_env import ActionSpec

time_horizon = 10
trajectory = mb.make_fake_trajectory(
length=time_horizon,
observation_shapes=[(1,)],
observation_spec=create_obs_spec_with_shapes([(1,)]),
max_step_complete=True,
action_spec=ActionSpec.create_discrete((2,)),
)

checkpoint_interval = trainer.trainer_settings.checkpoint_interval
trajectory = mb.make_fake_trajectory(
length=time_horizon,
observation_shapes=[(1,)],
observation_spec=create_obs_spec_with_shapes([(1,)]),
max_step_complete=True,
action_spec=ActionSpec.create_discrete((2,)),
)

4
ml-agents/mlagents/trainers/tests/test_trajectory.py


from mlagents.trainers.trajectory import SplitObservations
from mlagents.trainers.tests.mock_brain import make_fake_trajectory
from mlagents.trainers.tests.dummy_config import create_obs_spec_with_shapes
from mlagents_envs.base_env import ActionSpec
VEC_OBS_SIZE = 6

wanted_keys = set(wanted_keys)
trajectory = make_fake_trajectory(
length=length,
observation_shapes=[(VEC_OBS_SIZE,), (84, 84, 3)],
observation_spec=create_obs_spec_with_shapes([(VEC_OBS_SIZE,), (84, 84, 3)]),
action_spec=ActionSpec.create_continuous(ACTION_SIZE),
)
agentbuffer = trajectory.to_agentbuffer()

3
ml-agents/mlagents/trainers/tests/torch/test_ghost.py


from mlagents.trainers.tests import mock_brain as mb
from mlagents.trainers.tests.test_trajectory import make_fake_trajectory
from mlagents.trainers.settings import TrainerSettings, SelfPlaySettings
from mlagents.trainers.tests.dummy_config import create_obs_spec_with_shapes
@pytest.fixture

trajectory = make_fake_trajectory(
length=time_horizon,
max_step_complete=True,
observation_shapes=[(1,)],
observation_spec=create_obs_spec_with_shapes([(1,)]),
action_spec=mock_specs.action_spec,
)
trajectory_queue0.put(trajectory)

2
ml-agents/mlagents/trainers/tests/torch/test_hybrid.py


PPO_TORCH_CONFIG.hyperparameters, learning_rate=3.0e-4
)
config = attr.evolve(PPO_TORCH_CONFIG, hyperparameters=new_hyperparams)
check_environment_trains(env, {BRAIN_NAME: config})
check_environment_trains(env, {BRAIN_NAME: config}, training_seed=1336)
def test_hybrid_recurrent_ppo():

17
ml-agents/mlagents/trainers/tests/torch/test_networks.py


from mlagents.trainers.settings import NetworkSettings
from mlagents_envs.base_env import ActionSpec
from mlagents.trainers.tests.torch.test_encoders import compare_models
from mlagents.trainers.tests.dummy_config import create_obs_spec_with_shapes
def test_networkbody_vector():

obs_shapes = [(obs_size,)]
networkbody = NetworkBody(obs_shapes, network_settings, encoded_act_size=2)
networkbody = NetworkBody(
create_obs_spec_with_shapes(obs_shapes), network_settings, encoded_act_size=2
)
optimizer = torch.optim.Adam(networkbody.parameters(), lr=3e-3)
sample_obs = 0.1 * torch.ones((1, obs_size))
sample_act = 0.1 * torch.ones((1, 2))

)
obs_shapes = [(obs_size,)]
networkbody = NetworkBody(obs_shapes, network_settings)
networkbody = NetworkBody(create_obs_spec_with_shapes(obs_shapes), network_settings)
optimizer = torch.optim.Adam(networkbody.parameters(), lr=3e-4)
sample_obs = torch.ones((1, seq_len, obs_size))

network_settings = NetworkSettings()
obs_shapes = [(vec_obs_size,), obs_size]
networkbody = NetworkBody(obs_shapes, network_settings)
networkbody = NetworkBody(create_obs_spec_with_shapes(obs_shapes), network_settings)
optimizer = torch.optim.Adam(networkbody.parameters(), lr=3e-3)
sample_obs = 0.1 * torch.ones((1, 84, 84, 3))
sample_vec_obs = torch.ones((1, vec_obs_size))

obs_size = 4
num_outputs = 2
network_settings = NetworkSettings()
obs_shapes = [(obs_size,)]
obs_spec = create_obs_spec_with_shapes([(obs_size,)])
stream_names, obs_shapes, network_settings, outputs_per_stream=num_outputs
stream_names, obs_spec, network_settings, outputs_per_stream=num_outputs
)
optimizer = torch.optim.Adam(value_net.parameters(), lr=3e-3)

network_settings = NetworkSettings(
memory=NetworkSettings.MemorySettings() if lstm else None, normalize=True
)
obs_shapes = [(obs_size,)]
obs_spec = create_obs_spec_with_shapes([(obs_size,)])
actor = ac_type(obs_shapes, network_settings, action_spec, stream_names)
actor = ac_type(obs_spec, network_settings, action_spec, stream_names)
if lstm:
sample_obs = torch.ones((1, network_settings.memory.sequence_length, obs_size))
memories = torch.ones(

2
ml-agents/mlagents/trainers/tests/torch/test_ppo.py


time_horizon = 15
trajectory = make_fake_trajectory(
length=time_horizon,
observation_shapes=optimizer.policy.behavior_spec.observation_spec.shapes,
observation_spec=optimizer.policy.behavior_spec.observation_spec,
action_spec=DISCRETE_ACTION_SPEC if discrete else CONTINUOUS_ACTION_SPEC,
max_step_complete=True,
)

29
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py


CuriosityRewardProvider,
create_reward_provider,
)
from mlagents_envs.base_env import BehaviorSpec, ActionSpec, ObservationSpec
from mlagents_envs.base_env import BehaviorSpec, ActionSpec
from mlagents.trainers.tests.dummy_config import create_obs_spec_with_shapes
SEED = [42]

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_obs_spec_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_obs_spec_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
],
)
def test_construction(behavior_spec: BehaviorSpec) -> None:

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_obs_spec_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
ObservationSpec.create_simple([(10,), (64, 66, 3), (84, 86, 1)]),
create_obs_spec_with_shapes([(10,), (64, 66, 3), (84, 86, 1)]),
ObservationSpec.create_simple([(10,), (64, 66, 1)]), ACTIONSPEC_TWODISCRETE
create_obs_spec_with_shapes([(10,), (64, 66, 1)]), ACTIONSPEC_TWODISCRETE
BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_DISCRETE),
BehaviorSpec(create_obs_spec_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
],
)
def test_factory(behavior_spec: BehaviorSpec) -> None:

"behavior_spec",
[
BehaviorSpec(
ObservationSpec.create_simple([(10,), (64, 66, 3), (24, 26, 1)]),
create_obs_spec_with_shapes([(10,), (64, 66, 3), (24, 26, 1)]),
BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_DISCRETE),
BehaviorSpec(create_obs_spec_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_obs_spec_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
],
)
def test_reward_decreases(behavior_spec: BehaviorSpec, seed: int) -> None:

@pytest.mark.parametrize("seed", SEED)
@pytest.mark.parametrize(
"behavior_spec",
[BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_CONTINUOUS)],
[BehaviorSpec(create_obs_spec_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS)],
)
def test_continuous_action_prediction(behavior_spec: BehaviorSpec, seed: int) -> None:
np.random.seed(seed)

"behavior_spec",
[
BehaviorSpec(
ObservationSpec.create_simple([(10,), (64, 66, 3), (24, 26, 1)]),
create_obs_spec_with_shapes([(10,), (64, 66, 3), (24, 26, 1)]),
BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_DISCRETE),
BehaviorSpec(create_obs_spec_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_obs_spec_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
],
)
def test_next_state_prediction(behavior_spec: BehaviorSpec, seed: int) -> None:

15
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_extrinsic.py


ExtrinsicRewardProvider,
create_reward_provider,
)
from mlagents_envs.base_env import BehaviorSpec, ActionSpec, ObservationSpec
from mlagents_envs.base_env import BehaviorSpec, ActionSpec
from mlagents.trainers.tests.dummy_config import create_obs_spec_with_shapes
ACTIONSPEC_CONTINUOUS = ActionSpec.create_continuous(5)

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_obs_spec_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_obs_spec_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
],
)
def test_construction(behavior_spec: BehaviorSpec) -> None:

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_obs_spec_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_obs_spec_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
],
)
def test_factory(behavior_spec: BehaviorSpec) -> None:

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_obs_spec_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_obs_spec_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
],
)
def test_reward(behavior_spec: BehaviorSpec, reward: float) -> None:

19
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py


GAILRewardProvider,
create_reward_provider,
)
from mlagents_envs.base_env import BehaviorSpec, ActionSpec, ObservationSpec
from mlagents_envs.base_env import BehaviorSpec, ActionSpec
from mlagents.trainers.settings import GAILSettings, RewardSignalType
from mlagents.trainers.tests.torch.test_reward_providers.utils import (
create_agent_buffer,

)
from mlagents.trainers.tests.dummy_config import create_obs_spec_with_shapes
CONTINUOUS_PATH = (

@pytest.mark.parametrize(
"behavior_spec",
[BehaviorSpec(ObservationSpec.create_simple([(8,)]), ACTIONSPEC_CONTINUOUS)],
[BehaviorSpec(create_obs_spec_with_shapes([(8,)]), ACTIONSPEC_CONTINUOUS)],
)
def test_construction(behavior_spec: BehaviorSpec) -> None:
gail_settings = GAILSettings(demo_path=CONTINUOUS_PATH)

@pytest.mark.parametrize(
"behavior_spec",
[BehaviorSpec(ObservationSpec.create_simple([(8,)]), ACTIONSPEC_CONTINUOUS)],
[BehaviorSpec(create_obs_spec_with_shapes([(8,)]), ACTIONSPEC_CONTINUOUS)],
)
def test_factory(behavior_spec: BehaviorSpec) -> None:
gail_settings = GAILSettings(demo_path=CONTINUOUS_PATH)

"behavior_spec",
[
BehaviorSpec(
ObservationSpec.create_simple([(8,), (24, 26, 1)]), ACTIONSPEC_CONTINUOUS
create_obs_spec_with_shapes([(8,), (24, 26, 1)]), ACTIONSPEC_CONTINUOUS
BehaviorSpec(ObservationSpec.create_simple([(50,)]), ACTIONSPEC_FOURDISCRETE),
BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_DISCRETE),
BehaviorSpec(create_obs_spec_with_shapes([(50,)]), ACTIONSPEC_FOURDISCRETE),
BehaviorSpec(create_obs_spec_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
],
)
@pytest.mark.parametrize("use_actions", [False, True])

"behavior_spec",
[
BehaviorSpec(
ObservationSpec.create_simple([(8,), (24, 26, 1)]), ACTIONSPEC_CONTINUOUS
create_obs_spec_with_shapes([(8,), (24, 26, 1)]), ACTIONSPEC_CONTINUOUS
BehaviorSpec(ObservationSpec.create_simple([(50,)]), ACTIONSPEC_FOURDISCRETE),
BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_DISCRETE),
BehaviorSpec(create_obs_spec_with_shapes([(50,)]), ACTIONSPEC_FOURDISCRETE),
BehaviorSpec(create_obs_spec_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
],
)
@pytest.mark.parametrize("use_actions", [False, True])

21
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_rnd.py


RNDRewardProvider,
create_reward_provider,
)
from mlagents_envs.base_env import BehaviorSpec, ActionSpec, ObservationSpec
from mlagents_envs.base_env import BehaviorSpec, ActionSpec
from mlagents.trainers.tests.dummy_config import create_obs_spec_with_shapes
SEED = [42]

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_obs_spec_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_obs_spec_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
],
)
def test_construction(behavior_spec: BehaviorSpec) -> None:

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_obs_spec_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
ObservationSpec.create_simple([(10,), (64, 66, 3), (84, 86, 1)]),
create_obs_spec_with_shapes([(10,), (64, 66, 3), (84, 86, 1)]),
ObservationSpec.create_simple([(10,), (64, 66, 1)]), ACTIONSPEC_TWODISCRETE
create_obs_spec_with_shapes([(10,), (64, 66, 1)]), ACTIONSPEC_TWODISCRETE
BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_DISCRETE),
BehaviorSpec(create_obs_spec_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
],
)
def test_factory(behavior_spec: BehaviorSpec) -> None:

"behavior_spec",
[
BehaviorSpec(
ObservationSpec.create_simple([(10,), (64, 66, 3), (24, 26, 1)]),
create_obs_spec_with_shapes([(10,), (64, 66, 3), (24, 26, 1)]),
BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_DISCRETE),
BehaviorSpec(create_obs_spec_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_obs_spec_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
],
)
def test_reward_decreases(behavior_spec: BehaviorSpec, seed: int) -> None:

8
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/utils.py


) -> AgentBuffer:
buffer = AgentBuffer()
curr_observations = [
np.random.normal(size=shape).astype(np.float32)
for shape in behavior_spec.observation_spec.shapes
np.random.normal(size=obs_spec.shape).astype(np.float32)
for obs_spec in behavior_spec.observation_spec
np.random.normal(size=shape).astype(np.float32)
for shape in behavior_spec.observation_spec.shapes
np.random.normal(size=obs_spec.shape).astype(np.float32)
for obs_spec in behavior_spec.observation_spec
]
action_buffer = behavior_spec.action_spec.random_action(1)
action = {}

4
ml-agents/mlagents/trainers/tests/torch/test_utils.py


from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.exception import UnityTrainerException
from mlagents.trainers.torch.encoders import VectorInput
from mlagents.trainers.tests.dummy_config import create_obs_spec_with_shapes
def test_min_visual_size():

for _ in range(num_visual):
obs_shapes.append(vis_obs_shape)
h_size = 128
obs_spec = create_obs_spec_with_shapes(obs_shapes)
obs_shapes, h_size, encoder_type, normalize
obs_spec, h_size, encoder_type, normalize
)
vec_enc = list(vec_enc)
vis_enc = list(vis_enc)

2
ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py


memory=None,
)
self._state_encoder = NetworkBody(
specs.observation_spec.shapes, state_encoder_settings
specs.observation_spec, state_encoder_settings
)
self._action_flattener = ActionFlattener(self._action_spec)

2
ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py


self._action_flattener.flattened_size + 1 if settings.use_actions else 0
) # +1 is for dones
self.encoder = NetworkBody(
specs.observation_spec.shapes, encoder_settings, unencoded_size
specs.observation_spec, encoder_settings, unencoded_size
)
estimator_input_size = settings.encoding_size

4
ml-agents/mlagents/trainers/torch/components/reward_providers/rnd_reward_provider.py


vis_encode_type=EncoderType.SIMPLE,
memory=None,
)
self._encoder = NetworkBody(
specs.observation_spec.shapes, state_encoder_settings
)
self._encoder = NetworkBody(specs.observation_spec, state_encoder_settings)
def forward(self, mini_batch: AgentBuffer) -> torch.Tensor:
n_vis = len(self._encoder.visual_processors)

10
ml-agents/mlagents/trainers/torch/model_serialization.py


seq_len_dim = [1]
dummy_vec_obs = [torch.zeros(batch_dim + [self.policy.vec_obs_size])]
# create input shape of NCHW
# (It's NHWC in self.policy.behavior_spec.observation_spec.shapes)
# (It's NHWC in self.policy.behavior_spec.observation_spec.shape)
torch.zeros(batch_dim + [shape[2], shape[0], shape[1]])
for shape in self.policy.behavior_spec.observation_spec.shapes
if len(shape) == 3
torch.zeros(
batch_dim + [obs_spec.shape[2], obs_spec.shape[0], obs_spec.shape[1]]
)
for obs_spec in self.policy.behavior_spec.observation_spec
if len(obs_spec.shape) == 3
]
dummy_masks = torch.ones(
batch_dim + [sum(self.policy.behavior_spec.action_spec.discrete_branches)]

24
ml-agents/mlagents/trainers/torch/networks.py


from mlagents.torch_utils import torch, nn
from mlagents_envs.base_env import ActionSpec
from mlagents_envs.base_env import ActionSpec, ObservationSpec
from mlagents.trainers.torch.action_model import ActionModel
from mlagents.trainers.torch.agent_action import AgentAction
from mlagents.trainers.torch.action_log_probs import ActionLogProbs

class NetworkBody(nn.Module):
def __init__(
self,
observation_shapes: List[Tuple[int, ...]],
observation_spec: List[ObservationSpec],
network_settings: NetworkSettings,
encoded_act_size: int = 0,
):

self.vector_processors,
encoder_input_size,
) = ModelUtils.create_input_processors(
observation_shapes,
observation_spec,
self.h_size,
network_settings.vis_encode_type,
normalize=self.normalize,

def __init__(
self,
stream_names: List[str],
observation_shapes: List[Tuple[int, ...]],
observation_spec: List[ObservationSpec],
network_settings: NetworkSettings,
encoded_act_size: int = 0,
outputs_per_stream: int = 1,

nn.Module.__init__(self)
self.network_body = NetworkBody(
observation_shapes, network_settings, encoded_act_size=encoded_act_size
observation_spec, network_settings, encoded_act_size=encoded_act_size
)
if network_settings.memory is not None:
encoding_size = network_settings.memory.memory_size // 2

class SimpleActor(nn.Module, Actor):
def __init__(
self,
observation_shapes: List[Tuple[int, ...]],
observation_spec: List[ObservationSpec],
network_settings: NetworkSettings,
action_spec: ActionSpec,
conditional_sigma: bool = False,

),
requires_grad=False,
)
self.network_body = NetworkBody(observation_shapes, network_settings)
self.network_body = NetworkBody(observation_spec, network_settings)
if network_settings.memory is not None:
self.encoding_size = network_settings.memory.memory_size // 2
else:

class SharedActorCritic(SimpleActor, ActorCritic):
def __init__(
self,
observation_shapes: List[Tuple[int, ...]],
observation_spec: List[ObservationSpec],
network_settings: NetworkSettings,
action_spec: ActionSpec,
stream_names: List[str],

self.use_lstm = network_settings.memory is not None
super().__init__(
observation_shapes,
observation_spec,
network_settings,
action_spec,
conditional_sigma,

class SeparateActorCritic(SimpleActor, ActorCritic):
def __init__(
self,
observation_shapes: List[Tuple[int, ...]],
observation_spec: List[ObservationSpec],
network_settings: NetworkSettings,
action_spec: ActionSpec,
stream_names: List[str],

self.use_lstm = network_settings.memory is not None
super().__init__(
observation_shapes,
observation_spec,
network_settings,
action_spec,
conditional_sigma,

self.critic = ValueNetwork(stream_names, observation_shapes, network_settings)
self.critic = ValueNetwork(stream_names, observation_spec, network_settings)
@property
def memory_size(self) -> int:

21
ml-agents/mlagents/trainers/torch/utils.py


)
from mlagents.trainers.settings import EncoderType, ScheduleType
from mlagents.trainers.exception import UnityTrainerException
from mlagents_envs.base_env import ObservationSpec
class ModelUtils:

@staticmethod
def create_input_processors(
observation_shapes: List[Tuple[int, ...]],
observation_spec: List[ObservationSpec],
h_size: int,
vis_encode_type: EncoderType,
normalize: bool = False,

:param observation_shapes: List of Tuples that represent the action dimensions.
:param observation_spec: List of ObservationSpecs that represent the observation dimensions.
conditioining network on other values (e.g. actions for a Q function)
conditioning network on other values (e.g. actions for a Q function)
:param h_size: Number of hidden units per layer.
:param vis_encode_type: Type of visual encoder to use.
:param unnormalized_inputs: Vector inputs that should not be normalized, and added to the vector

visual_encoder_class = ModelUtils.get_encoder_for_type(vis_encode_type)
vector_size = 0
visual_output_size = 0
for i, dimension in enumerate(observation_shapes):
if len(dimension) == 3:
for i, obs_spec in enumerate(observation_spec):
if len(obs_spec.shape) == 3:
dimension[0], dimension[1], vis_encode_type
obs_spec.shape[0], obs_spec.shape[1], vis_encode_type
dimension[0], dimension[1], dimension[2], h_size
obs_spec.shape[0], obs_spec.shape[1], obs_spec.shape[2], h_size
elif len(dimension) == 1:
vector_size += dimension[0]
elif len(obs_spec.shape) == 1:
vector_size += obs_spec.shape[0]
f"Unsupported shape of {dimension} for observation {i}"
f"Unsupported shape of {obs_spec.shape} for observation {i}"
)
if vector_size > 0:
vector_encoders.append(VectorInput(vector_size, normalize))

4
ml-agents/tests/yamato/scripts/run_llapi.py


decision_steps, terminal_steps = env.get_steps(group_name)
# Examine the number of observations per Agent
print("Number of observations : ", len(group_spec.observation_spec.shapes))
print("Number of observations : ", len(group_spec.observation_spec))
vis_obs = any(len(shape) == 3 for shape in group_spec.observation_spec.shapes)
vis_obs = any(len(o_spec.shape) == 3 for o_spec in group_spec.observation_spec)
print("Is there a visual observation ?", vis_obs)
# Examine the state space for the first observation for the first agent

正在加载...
取消
保存