浏览代码

renaming sensor_spec to sensor_specS

/layernorm
vincentpierre 4 年前
当前提交
449712b0
共有 41 个文件被更改,包括 163 次插入159 次删除
  1. 2
      docs/Python-API.md
  2. 6
      gym-unity/gym_unity/envs/__init__.py
  3. 6
      gym-unity/gym_unity/tests/test_gym.py
  4. 8
      ml-agents-envs/mlagents_envs/base_env.py
  5. 2
      ml-agents-envs/mlagents_envs/environment.py
  6. 14
      ml-agents-envs/mlagents_envs/rpc_utils.py
  7. 14
      ml-agents-envs/mlagents_envs/tests/test_envs.py
  8. 18
      ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py
  9. 8
      ml-agents-envs/mlagents_envs/tests/test_steps.py
  10. 4
      ml-agents/mlagents/trainers/demo_loader.py
  11. 2
      ml-agents/mlagents/trainers/optimizer/torch_optimizer.py
  12. 4
      ml-agents/mlagents/trainers/policy/policy.py
  13. 2
      ml-agents/mlagents/trainers/policy/torch_policy.py
  14. 2
      ml-agents/mlagents/trainers/ppo/optimizer_torch.py
  15. 12
      ml-agents/mlagents/trainers/sac/optimizer_torch.py
  16. 2
      ml-agents/mlagents/trainers/tests/dummy_config.py
  17. 22
      ml-agents/mlagents/trainers/tests/mock_brain.py
  18. 8
      ml-agents/mlagents/trainers/tests/simple_test_envs.py
  19. 12
      ml-agents/mlagents/trainers/tests/test_agent_processor.py
  20. 4
      ml-agents/mlagents/trainers/tests/test_demo_loader.py
  21. 6
      ml-agents/mlagents/trainers/tests/test_rl_trainer.py
  22. 4
      ml-agents/mlagents/trainers/tests/test_trajectory.py
  23. 4
      ml-agents/mlagents/trainers/tests/torch/test_ghost.py
  24. 2
      ml-agents/mlagents/trainers/tests/torch/test_hybrid.py
  25. 14
      ml-agents/mlagents/trainers/tests/torch/test_networks.py
  26. 4
      ml-agents/mlagents/trainers/tests/torch/test_policy.py
  27. 2
      ml-agents/mlagents/trainers/tests/torch/test_ppo.py
  28. 29
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py
  29. 14
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_extrinsic.py
  30. 18
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py
  31. 21
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_rnd.py
  32. 4
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/utils.py
  33. 4
      ml-agents/mlagents/trainers/tests/torch/test_utils.py
  34. 2
      ml-agents/mlagents/trainers/torch/components/bc/module.py
  35. 2
      ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py
  36. 2
      ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py
  37. 2
      ml-agents/mlagents/trainers/torch/components/reward_providers/rnd_reward_provider.py
  38. 4
      ml-agents/mlagents/trainers/torch/model_serialization.py
  39. 22
      ml-agents/mlagents/trainers/torch/networks.py
  40. 6
      ml-agents/mlagents/trainers/torch/utils.py
  41. 4
      ml-agents/tests/yamato/scripts/run_llapi.py

2
docs/Python-API.md


A `BehaviorSpec` has the following fields :
- `sensor_spec` is a List of `SensorSpec` objects : Each `SensorSpec`
- `sensor_specs` is a List of `SensorSpec` objects : Each `SensorSpec`
corresponds to an observation's properties: `shape` is a tuple of ints that
corresponds to the shape of the observation (without the number of agents dimension).
`dimension_property` is a tuple of flags containing extra information about how the

6
gym-unity/gym_unity/envs/__init__.py


def _get_n_vis_obs(self) -> int:
result = 0
for sen_spec in self.group_spec.sensor_spec:
for sen_spec in self.group_spec.sensor_specs:
if len(sen_spec.shape) == 3:
result += 1
return result

for sen_spec in self.group_spec.sensor_spec:
for sen_spec in self.group_spec.sensor_specs:
if len(sen_spec.shape) == 3:
result.append(sen_spec.shape)
return result

def _get_vec_obs_size(self) -> int:
result = 0
for sen_spec in self.group_spec.sensor_spec:
for sen_spec in self.group_spec.sensor_specs:
if len(sen_spec.shape) == 1:
result += sen_spec.shape[0]
return result

6
gym-unity/gym_unity/tests/test_gym.py


TerminalSteps,
BehaviorMapping,
)
from mlagents.trainers.tests.dummy_config import create_sensor_spec_with_shapes
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
def test_gym_wrapper():

obs_shapes = [(vector_observation_space_size,)]
for _ in range(number_visual_observations):
obs_shapes += [(8, 8, 3)]
sen_spec = create_sensor_spec_with_shapes(obs_shapes)
sen_spec = create_sensor_specs_with_shapes(obs_shapes)
return BehaviorSpec(sen_spec, action_spec)

def setup_mock_unityenvironment(mock_env, mock_spec, mock_decision, mock_termination):
"""
Takes a mock UnityEnvironment and adds the appropriate properties, defined by the mock
Takes a mock Unitsensor_specst and adds the appropriate properties, defined by the mock
GroupSpec and BatchedStepResult.
:Mock mock_env: A mock UnityEnvironment, usually empty.

8
ml-agents-envs/mlagents_envs/base_env.py


:param spec: The BehaviorSpec for the DecisionSteps
"""
obs: List[np.ndarray] = []
for sen_spec in spec.sensor_spec:
for sen_spec in spec.sensor_specs:
obs += [np.zeros((0,) + sen_spec.shape, dtype=np.float32)]
return DecisionSteps(
obs=obs,

:param spec: The BehaviorSpec for the TerminalSteps
"""
obs: List[np.ndarray] = []
for sen_spec in spec.sensor_spec:
for sen_spec in spec.sensor_specs:
obs += [np.zeros((0,) + sen_spec.shape, dtype=np.float32)]
return TerminalSteps(
obs=obs,

"""
A NamedTuple containing information about the observation and action
spaces for a group of Agents under the same behavior.
- sensor_spec is a List of SensorSpec NamedTuple containing
- sensor_specs is a List of SensorSpec NamedTuple containing
information about the information of the Agent's observations such as their shapes.
The order of the SensorSpec is the same as the order of the observations of an
agent.

sensor_spec: List[SensorSpec]
sensor_specs: List[SensorSpec]
action_spec: ActionSpec

2
ml-agents-envs/mlagents_envs/environment.py


if not caps.baseRLCapabilities:
logger.warning(
"WARNING: The Unity process is not running with the expected base Reinforcement Learning"
" capabilities. Please be sure upgrade the Unity Package to a version that is compatible with this "
" capabilities. Please be sure upgrade the Unity Package sensor_specsn that is compatible with this "
"python package.\n"
f"Python package version: {python_package_ver}, C# package version: {unity_package_ver}"
f"Please find the versions that work best together from our release page.\n"

14
ml-agents-envs/mlagents_envs/rpc_utils.py


tuple(DimensionProperty(dim) for dim in obs.dimension_properties)
for obs in agent_info.observations
]
sensor_spec = [
sensor_specs = [
SensorSpec(obs_shape, dim_p)
for obs_shape, dim_p in zip(observation_shape, dim_props)
]

action_spec_proto.num_continuous_actions,
tuple(branch for branch in action_spec_proto.discrete_branch_sizes),
)
return BehaviorSpec(sensor_spec, action_spec)
return BehaviorSpec(sensor_specs, action_spec)
class OffsetBytesIO:

]
decision_obs_list: List[np.ndarray] = []
terminal_obs_list: List[np.ndarray] = []
for obs_index, sensor_spec in enumerate(behavior_spec.sensor_spec):
is_visual = len(sensor_spec.shape) == 3
for obs_index, sensor_specs in enumerate(behavior_spec.sensor_specs):
is_visual = len(sensor_specs.shape) == 3
obs_shape = cast(Tuple[int, int, int], sensor_spec.shape)
obs_shape = cast(Tuple[int, int, int], sensor_specs.shape)
decision_obs_list.append(
_process_visual_observation(
obs_index, obs_shape, decision_agent_info_list

else:
decision_obs_list.append(
_process_vector_observation(
obs_index, sensor_spec.shape, decision_agent_info_list
obs_index, sensor_specs.shape, decision_agent_info_list
obs_index, sensor_spec.shape, terminal_agent_info_list
obs_index, sensor_specs.shape, terminal_agent_info_list
)
)
decision_rewards = np.array(

14
ml-agents-envs/mlagents_envs/tests/test_envs.py


env.close()
assert isinstance(decision_steps, DecisionSteps)
assert isinstance(terminal_steps, TerminalSteps)
assert len(spec.sensor_spec) == len(decision_steps.obs)
assert len(spec.sensor_spec) == len(terminal_steps.obs)
assert len(spec.sensor_specs) == len(decision_steps.obs)
assert len(spec.sensor_specs) == len(terminal_steps.obs)
for spec, obs in zip(spec.sensor_spec, decision_steps.obs):
for spec, obs in zip(spec.sensor_specs, decision_steps.obs):
for spec, obs in zip(spec.sensor_spec.shapes, terminal_steps.obs):
for spec, obs in zip(spec.sensor_specs.shapes, terminal_steps.obs):
assert (n_agents,) + spec.shape == obs.shape

env.close()
assert isinstance(decision_steps, DecisionSteps)
assert isinstance(terminal_steps, TerminalSteps)
assert len(spec.sensor_spec) == len(decision_steps.obs)
assert len(spec.sensor_spec) == len(terminal_steps.obs)
for spec, obs in zip(spec.sensor_spec, decision_steps.obs):
assert len(spec.sensor_specs) == len(decision_steps.obs)
assert len(spec.sensor_specs) == len(terminal_steps.obs)
for spec, obs in zip(spec.sensor_specs, decision_steps.obs):
assert (n_agents,) + spec.shape == obs.shape
assert 0 in decision_steps
assert 2 in terminal_steps

18
ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py


steps_from_proto,
)
from PIL import Image
from mlagents.trainers.tests.dummy_config import create_sensor_spec_with_shapes
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
def generate_list_agent_proto(

n_agents = 10
shapes = [(3,), (4,)]
spec = BehaviorSpec(
create_sensor_spec_with_shapes(shapes), ActionSpec.create_continuous(3)
create_sensor_specs_with_shapes(shapes), ActionSpec.create_continuous(3)
)
ap_list = generate_list_agent_proto(n_agents, shapes)
decision_steps, terminal_steps = steps_from_proto(ap_list, spec)

n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(
create_sensor_spec_with_shapes(shapes), ActionSpec.create_discrete((7, 3))
create_sensor_specs_with_shapes(shapes), ActionSpec.create_discrete((7, 3))
)
ap_list = generate_list_agent_proto(n_agents, shapes)
decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)

n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(
create_sensor_spec_with_shapes(shapes), ActionSpec.create_discrete((10,))
create_sensor_specs_with_shapes(shapes), ActionSpec.create_discrete((10,))
)
ap_list = generate_list_agent_proto(n_agents, shapes)
decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)

n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(
create_sensor_spec_with_shapes(shapes), ActionSpec.create_discrete((2, 2, 6))
create_sensor_specs_with_shapes(shapes), ActionSpec.create_discrete((2, 2, 6))
)
ap_list = generate_list_agent_proto(n_agents, shapes)
decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)

n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(
create_sensor_spec_with_shapes(shapes), ActionSpec.create_continuous(10)
create_sensor_specs_with_shapes(shapes), ActionSpec.create_continuous(10)
)
ap_list = generate_list_agent_proto(n_agents, shapes)
decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)

behavior_spec = behavior_spec_from_proto(bp, agent_proto)
assert behavior_spec.action_spec.is_discrete()
assert not behavior_spec.action_spec.is_continuous()
assert [spec.shape for spec in behavior_spec.sensor_spec] == [(3,), (4,)]
assert [spec.shape for spec in behavior_spec.sensor_specs] == [(3,), (4,)]
assert behavior_spec.action_spec.discrete_branches == (5, 4)
assert behavior_spec.action_spec.discrete_size == 2
bp = BrainParametersProto()

n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(
create_sensor_spec_with_shapes(shapes), ActionSpec.create_continuous(3)
create_sensor_specs_with_shapes(shapes), ActionSpec.create_continuous(3)
)
ap_list = generate_list_agent_proto(n_agents, shapes, infinite_rewards=True)
with pytest.raises(RuntimeError):

n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(
create_sensor_spec_with_shapes(shapes), ActionSpec.create_continuous(3)
create_sensor_specs_with_shapes(shapes), ActionSpec.create_continuous(3)
)
ap_list = generate_list_agent_proto(n_agents, shapes, nan_observations=True)
with pytest.raises(RuntimeError):

8
ml-agents-envs/mlagents_envs/tests/test_steps.py


ActionSpec,
BehaviorSpec,
)
from mlagents.trainers.tests.dummy_config import create_sensor_spec_with_shapes
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
def test_decision_steps():

def test_empty_decision_steps():
specs = BehaviorSpec(
sensor_spec=create_sensor_spec_with_shapes([(3, 2), (5,)]),
action_spec=ActionSpec.create_continuous(3),
sensor_specs=create_sensor_specs_with_shapes([(3, 2), (5,)]),
action_specs=ActionSpec.create_continuous(3),
)
ds = DecisionSteps.empty(specs)
assert len(ds.obs) == 2

def test_empty_terminal_steps():
specs = BehaviorSpec(
sensor_spec=create_sensor_spec_with_shapes([(3, 2), (5,)]),
sensor_specs=create_sensor_specs_with_shapes([(3, 2), (5,)]),
action_spec=ActionSpec.create_continuous(3),
)
ts = TerminalSteps.empty(specs)

4
ml-agents/mlagents/trainers/demo_loader.py


)
)
# check observations match
if len(behavior_spec.sensor_spec) != len(expected_behavior_spec.sensor_spec):
if len(behavior_spec.sensor_specs) != len(expected_behavior_spec.sensor_specs):
zip(behavior_spec.sensor_spec, expected_behavior_spec.sensor_spec)
zip(behavior_spec.sensor_specs, expected_behavior_spec.sensor_specs)
):
if demo_obs.shape != policy_obs.shape:
raise RuntimeError(

2
ml-agents/mlagents/trainers/optimizer/torch_optimizer.py


def get_trajectory_value_estimates(
self, batch: AgentBuffer, next_obs: List[np.ndarray], done: bool
) -> Tuple[Dict[str, np.ndarray], Dict[str, float]]:
n_obs = len(self.policy.behavior_spec.sensor_spec)
n_obs = len(self.policy.behavior_spec.sensor_specs)
current_obs = ObsUtil.from_buffer(batch, n_obs)
# Convert to tensors

4
ml-agents/mlagents/trainers/policy/policy.py


)
self.vec_obs_size = sum(
sen_spec.shape[0]
for sen_spec in behavior_spec.sensor_spec
for sen_spec in behavior_spec.sensor_specs
1 for sen_spec in behavior_spec.sensor_spec if len(sen_spec.shape) == 3
1 for sen_spec in behavior_spec.sensor_specs if len(sen_spec.shape) == 3
)
self.use_continuous_act = self.behavior_spec.action_spec.is_continuous()
self.previous_action_dict: Dict[str, np.ndarray] = {}

2
ml-agents/mlagents/trainers/policy/torch_policy.py


else:
ac_class = SharedActorCritic
self.actor_critic = ac_class(
sensor_spec=self.behavior_spec.sensor_spec,
sensor_specs=self.behavior_spec.sensor_specs,
network_settings=trainer_settings.network_settings,
action_spec=behavior_spec.action_spec,
stream_names=reward_signal_names,

2
ml-agents/mlagents/trainers/ppo/optimizer_torch.py


)
returns[name] = ModelUtils.list_to_tensor(batch[f"{name}_returns"])
n_obs = len(self.policy.behavior_spec.sensor_spec)
n_obs = len(self.policy.behavior_spec.sensor_specs)
current_obs = ObsUtil.from_buffer(batch, n_obs)
# Convert to tensors
current_obs = [ModelUtils.list_to_tensor(obs) for obs in current_obs]

12
ml-agents/mlagents/trainers/sac/optimizer_torch.py


def __init__(
self,
stream_names: List[str],
sensor_spec: List[SensorSpec],
sensor_specs: List[SensorSpec],
network_settings: NetworkSettings,
action_spec: ActionSpec,
):

self.q1_network = ValueNetwork(
stream_names,
sensor_spec,
sensor_specs,
network_settings,
num_action_ins,
num_value_outs,

sensor_spec,
sensor_specs,
network_settings,
num_action_ins,
num_value_outs,

self.value_network = TorchSACOptimizer.PolicyValueNetwork(
self.stream_names,
self.policy.behavior_spec.sensor_spec,
self.policy.behavior_spec.sensor_specs,
policy_network_settings,
self._action_spec,
)

self.policy.behavior_spec.sensor_spec,
self.policy.behavior_spec.sensor_specs,
policy_network_settings,
)
ModelUtils.soft_update(

for name in self.reward_signals:
rewards[name] = ModelUtils.list_to_tensor(batch[f"{name}_rewards"])
n_obs = len(self.policy.behavior_spec.sensor_spec)
n_obs = len(self.policy.behavior_spec.sensor_specs)
current_obs = ObsUtil.from_buffer(batch, n_obs)
# Convert to tensors
current_obs = [ModelUtils.list_to_tensor(obs) for obs in current_obs]

2
ml-agents/mlagents/trainers/tests/dummy_config.py


return {RewardSignalType.EXTRINSIC: RewardSignalSettings()}
def create_sensor_spec_with_shapes(shapes: List[Tuple[int, ...]]) -> List[SensorSpec]:
def create_sensor_specs_with_shapes(shapes: List[Tuple[int, ...]]) -> List[SensorSpec]:
sen_spec: List[SensorSpec] = []
for shape in shapes:
dim_prop = (DimensionProperty.UNSPECIFIED,) * len(shape)

22
ml-agents/mlagents/trainers/tests/mock_brain.py


ActionSpec,
ActionTuple,
)
from mlagents.trainers.tests.dummy_config import create_sensor_spec_with_shapes
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
sensor_spec: List[SensorSpec],
sensor_specs: List[SensorSpec],
action_spec: ActionSpec,
done: bool = False,
) -> Tuple[DecisionSteps, TerminalSteps]:

:int num_agents: Number of "agents" to imitate.
:List sensor_spec: A List of the observation specs in your steps
:List sensor_specs: A List of the observation specs in your steps
for sen_spec in sensor_spec:
for sen_spec in sensor_specs:
obs_list.append(np.ones((num_agents,) + sen_spec.shape, dtype=np.float32))
action_mask = None
if action_spec.is_discrete():

reward = np.array(num_agents * [1.0], dtype=np.float32)
interrupted = np.array(num_agents * [False], dtype=np.bool)
agent_id = np.arange(num_agents, dtype=np.int32)
behavior_spec = BehaviorSpec(sensor_spec, action_spec)
behavior_spec = BehaviorSpec(sensor_specs, action_spec)
if done:
return (
DecisionSteps.empty(behavior_spec),

) -> Tuple[DecisionSteps, TerminalSteps]:
return create_mock_steps(
num_agents=num_agents,
sensor_spec=behavior_spec.sensor_spec,
sensor_specs=behavior_spec.sensor_specs,
action_spec=behavior_spec.action_spec,
)

sensor_spec: List[SensorSpec],
sensor_specs: List[SensorSpec],
action_spec: ActionSpec,
max_step_complete: bool = False,
memory_size: int = 10,

action_size = action_spec.discrete_size + action_spec.continuous_size
for _i in range(length - 1):
obs = []
for sen_spec in sensor_spec:
for sen_spec in sensor_specs:
obs.append(np.ones(sen_spec.shape, dtype=np.float32))
reward = 1.0
done = False

)
steps_list.append(experience)
obs = []
for sen_spec in sensor_spec:
for sen_spec in sensor_specs:
obs.append(np.ones(sen_spec.shape, dtype=np.float32))
last_experience = AgentExperience(
obs=obs,

) -> AgentBuffer:
trajectory = make_fake_trajectory(
length,
behavior_spec.sensor_spec,
behavior_spec.sensor_specs,
action_spec=behavior_spec.action_spec,
memory_size=memory_size,
)

else:
action_spec = ActionSpec.create_continuous(vector_action_space)
observation_shapes = [(84, 84, 3)] * int(use_visual) + [(vector_obs_space,)]
sen_spec = create_sensor_spec_with_shapes(observation_shapes)
sen_spec = create_sensor_specs_with_shapes(observation_shapes)
behavior_spec = BehaviorSpec(sen_spec, action_spec)
return behavior_spec

8
ml-agents/mlagents/trainers/tests/simple_test_envs.py


from mlagents_envs.communicator_objects.agent_info_action_pair_pb2 import (
AgentInfoActionPairProto,
)
from mlagents.trainers.tests.dummy_config import create_sensor_spec_with_shapes
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
OBS_SIZE = 1
VIS_OBS_SIZE = (20, 20, 3)

continuous_action_size + discrete_action_size
) # to set the goals/positions
self.action_spec = action_spec
self.behavior_spec = BehaviorSpec(self._make_sensor_spec(), action_spec)
self.behavior_spec = BehaviorSpec(self._make_sensor_specs(), action_spec)
self.action_spec = action_spec
self.names = brain_names
self.positions: Dict[str, List[float]] = {}

self.action[name] = None
self.step_result[name] = None
def _make_sensor_spec(self) -> SensorSpec:
def _make_sensor_specs(self) -> SensorSpec:
sen_spec = create_sensor_spec_with_shapes(obs_shape)
sen_spec = create_sensor_specs_with_shapes(obs_shape)
return sen_spec
def _make_obs(self, value: float) -> List[np.ndarray]:

12
ml-agents/mlagents/trainers/tests/test_agent_processor.py


from mlagents.trainers.stats import StatsReporter, StatsSummary
from mlagents.trainers.behavior_id_utils import get_global_agent_id
from mlagents_envs.side_channel.stats_side_channel import StatsAggregationMethod
from mlagents.trainers.tests.dummy_config import create_sensor_spec_with_shapes
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
from mlagents_envs.base_env import ActionSpec, ActionTuple

}
mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
num_agents=2,
sensor_spec=create_sensor_spec_with_shapes(
sensor_specs=create_sensor_specs_with_shapes(
[(8,)] + num_vis_obs * [(84, 84, 3)]
),
action_spec=ActionSpec.create_continuous(2),

# Test empty steps
mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
num_agents=0,
sensor_spec=create_sensor_spec_with_shapes(
sensor_specs=create_sensor_specs_with_shapes(
[(8,)] + num_vis_obs * [(84, 84, 3)]
),
action_spec=ActionSpec.create_continuous(2),

mock_decision_step, mock_terminal_step = mb.create_mock_steps(
num_agents=1,
sensor_spec=create_sensor_spec_with_shapes([(8,)]),
sensor_specs=create_sensor_specs_with_shapes([(8,)]),
sensor_spec=create_sensor_spec_with_shapes([(8,)]),
sensor_specs=create_sensor_specs_with_shapes([(8,)]),
action_spec=ActionSpec.create_continuous(2),
done=True,
)

mock_decision_step, mock_terminal_step = mb.create_mock_steps(
num_agents=1,
sensor_spec=create_sensor_spec_with_shapes([(8,)]),
sensor_specs=create_sensor_specs_with_shapes([(8,)]),
action_spec=ActionSpec.create_continuous(2),
)
fake_action_info = ActionInfo(

4
ml-agents/mlagents/trainers/tests/test_demo_loader.py


behavior_spec, pair_infos, total_expected = load_demonstration(
path_prefix + "/test.demo"
)
assert np.sum(behavior_spec.sensor_spec[0].shape) == 8
assert np.sum(behavior_spec.sensor_specs[0].shape) == 8
assert len(pair_infos) == total_expected
_, demo_buffer = demo_to_buffer(path_prefix + "/test.demo", 1, BEHAVIOR_SPEC)

behavior_spec, pair_infos, total_expected = load_demonstration(
path_prefix + "/test_demo_dir"
)
assert np.sum(behavior_spec.sensor_spec[0].shape) == 8
assert np.sum(behavior_spec.sensor_specs[0].shape) == 8
assert len(pair_infos) == total_expected
_, demo_buffer = demo_to_buffer(path_prefix + "/test_demo_dir", 1, BEHAVIOR_SPEC)

6
ml-agents/mlagents/trainers/tests/test_rl_trainer.py


from mlagents.trainers.tests.test_buffer import construct_fake_buffer
from mlagents.trainers.agent_processor import AgentManagerQueue
from mlagents.trainers.settings import TrainerSettings
from mlagents.trainers.tests.dummy_config import create_sensor_spec_with_shapes
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
from mlagents_envs.base_env import ActionSpec

time_horizon = 10
trajectory = mb.make_fake_trajectory(
length=time_horizon,
sensor_spec=create_sensor_spec_with_shapes([(1,)]),
sensor_specs=create_sensor_specs_with_shapes([(1,)]),
max_step_complete=True,
action_spec=ActionSpec.create_discrete((2,)),
)

checkpoint_interval = trainer.trainer_settings.checkpoint_interval
trajectory = mb.make_fake_trajectory(
length=time_horizon,
sensor_spec=create_sensor_spec_with_shapes([(1,)]),
sensor_specs=create_sensor_specs_with_shapes([(1,)]),
max_step_complete=True,
action_spec=ActionSpec.create_discrete((2,)),
)

4
ml-agents/mlagents/trainers/tests/test_trajectory.py


from mlagents.trainers.tests.mock_brain import make_fake_trajectory
from mlagents.trainers.tests.dummy_config import create_sensor_spec_with_shapes
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
from mlagents_envs.base_env import ActionSpec
VEC_OBS_SIZE = 6

wanted_keys = set(wanted_keys)
trajectory = make_fake_trajectory(
length=length,
sensor_spec=create_sensor_spec_with_shapes([(VEC_OBS_SIZE,), (84, 84, 3)]),
sensor_specs=create_sensor_specs_with_shapes([(VEC_OBS_SIZE,), (84, 84, 3)]),
action_spec=ActionSpec.create_continuous(ACTION_SIZE),
)
agentbuffer = trajectory.to_agentbuffer()

4
ml-agents/mlagents/trainers/tests/torch/test_ghost.py


from mlagents.trainers.tests import mock_brain as mb
from mlagents.trainers.tests.test_trajectory import make_fake_trajectory
from mlagents.trainers.settings import TrainerSettings, SelfPlaySettings
from mlagents.trainers.tests.dummy_config import create_sensor_spec_with_shapes
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
@pytest.fixture

trajectory = make_fake_trajectory(
length=time_horizon,
max_step_complete=True,
sensor_spec=create_sensor_spec_with_shapes([(1,)]),
sensor_specs=create_sensor_specs_with_shapes([(1,)]),
action_spec=mock_specs.action_spec,
)
trajectory_queue0.put(trajectory)

2
ml-agents/mlagents/trainers/tests/torch/test_hybrid.py


buffer_init_steps=0,
)
config = attr.evolve(
SAC_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=2000
SAC_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=2200
)
check_environment_trains(
env, {BRAIN_NAME: config}, success_threshold=0.9, training_seed=1336

14
ml-agents/mlagents/trainers/tests/torch/test_networks.py


)
from mlagents.trainers.settings import NetworkSettings
from mlagents_envs.base_env import ActionSpec
from mlagents.trainers.tests.dummy_config import create_sensor_spec_with_shapes
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
def test_networkbody_vector():

obs_shapes = [(obs_size,)]
networkbody = NetworkBody(
create_sensor_spec_with_shapes(obs_shapes), network_settings, encoded_act_size=2
create_sensor_specs_with_shapes(obs_shapes),
network_settings,
encoded_act_size=2,
)
optimizer = torch.optim.Adam(networkbody.parameters(), lr=3e-3)
sample_obs = 0.1 * torch.ones((1, obs_size))

obs_shapes = [(obs_size,)]
networkbody = NetworkBody(
create_sensor_spec_with_shapes(obs_shapes), network_settings
create_sensor_specs_with_shapes(obs_shapes), network_settings
)
optimizer = torch.optim.Adam(networkbody.parameters(), lr=3e-4)
sample_obs = torch.ones((1, seq_len, obs_size))

obs_shapes = [(vec_obs_size,), obs_size]
networkbody = NetworkBody(
create_sensor_spec_with_shapes(obs_shapes), network_settings
create_sensor_specs_with_shapes(obs_shapes), network_settings
)
optimizer = torch.optim.Adam(networkbody.parameters(), lr=3e-3)
sample_obs = 0.1 * torch.ones((1, 84, 84, 3))

obs_size = 4
num_outputs = 2
network_settings = NetworkSettings()
sen_spec = create_sensor_spec_with_shapes([(obs_size,)])
sen_spec = create_sensor_specs_with_shapes([(obs_size,)])
stream_names = [f"stream_name{n}" for n in range(4)]
value_net = ValueNetwork(

network_settings = NetworkSettings(
memory=NetworkSettings.MemorySettings() if lstm else None, normalize=True
)
sen_spec = create_sensor_spec_with_shapes([(obs_size,)])
sen_spec = create_sensor_specs_with_shapes([(obs_size,)])
act_size = 2
mask = torch.ones([1, act_size * 2])
stream_names = [f"stream_name{n}" for n in range(4)]

4
ml-agents/mlagents/trainers/tests/torch/test_policy.py


buffer = mb.simulate_rollout(64, policy.behavior_spec, memory_size=policy.m_size)
act_masks = ModelUtils.list_to_tensor(buffer["action_mask"])
agent_action = AgentAction.from_dict(buffer)
np_obs = ObsUtil.from_buffer(buffer, len(policy.behavior_spec.sensor_spec))
np_obs = ObsUtil.from_buffer(buffer, len(policy.behavior_spec.sensor_specs))
tensor_obs = [ModelUtils.list_to_tensor(obs) for obs in np_obs]
memories = [

buffer = mb.simulate_rollout(64, policy.behavior_spec, memory_size=policy.m_size)
act_masks = ModelUtils.list_to_tensor(buffer["action_mask"])
np_obs = ObsUtil.from_buffer(buffer, len(policy.behavior_spec.sensor_spec))
np_obs = ObsUtil.from_buffer(buffer, len(policy.behavior_spec.sensor_specs))
tensor_obs = [ModelUtils.list_to_tensor(obs) for obs in np_obs]
memories = [

2
ml-agents/mlagents/trainers/tests/torch/test_ppo.py


time_horizon = 15
trajectory = make_fake_trajectory(
length=time_horizon,
sensor_spec=optimizer.policy.behavior_spec.sensor_spec,
sensor_specs=optimizer.policy.behavior_spec.sensor_specs,
action_spec=DISCRETE_ACTION_SPEC if discrete else CONTINUOUS_ACTION_SPEC,
max_step_complete=True,
)

29
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py


create_agent_buffer,
)
from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.tests.dummy_config import create_sensor_spec_with_shapes
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
SEED = [42]

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec(create_sensor_spec_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_sensor_spec_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
],
)
def test_construction(behavior_spec: BehaviorSpec) -> None:

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec(create_sensor_spec_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
create_sensor_spec_with_shapes([(10,), (64, 66, 3), (84, 86, 1)]),
create_sensor_specs_with_shapes([(10,), (64, 66, 3), (84, 86, 1)]),
create_sensor_spec_with_shapes([(10,), (64, 66, 1)]), ACTIONSPEC_TWODISCRETE
create_sensor_specs_with_shapes([(10,), (64, 66, 1)]),
ACTIONSPEC_TWODISCRETE,
BehaviorSpec(create_sensor_spec_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
],
)
def test_factory(behavior_spec: BehaviorSpec) -> None:

"behavior_spec",
[
BehaviorSpec(
create_sensor_spec_with_shapes([(10,), (64, 66, 3), (24, 26, 1)]),
create_sensor_specs_with_shapes([(10,), (64, 66, 3), (24, 26, 1)]),
BehaviorSpec(create_sensor_spec_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_sensor_spec_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
],
)
def test_reward_decreases(behavior_spec: BehaviorSpec, seed: int) -> None:

@pytest.mark.parametrize("seed", SEED)
@pytest.mark.parametrize(
"behavior_spec",
[BehaviorSpec(create_sensor_spec_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS)],
[BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS)],
)
def test_continuous_action_prediction(behavior_spec: BehaviorSpec, seed: int) -> None:
np.random.seed(seed)

"behavior_spec",
[
BehaviorSpec(
create_sensor_spec_with_shapes([(10,), (64, 66, 3), (24, 26, 1)]),
create_sensor_specs_with_shapes([(10,), (64, 66, 3), (24, 26, 1)]),
BehaviorSpec(create_sensor_spec_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_sensor_spec_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
],
)
def test_next_state_prediction(behavior_spec: BehaviorSpec, seed: int) -> None:

14
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_extrinsic.py


from mlagents.trainers.tests.torch.test_reward_providers.utils import (
create_agent_buffer,
)
from mlagents.trainers.tests.dummy_config import create_sensor_spec_with_shapes
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
ACTIONSPEC_CONTINUOUS = ActionSpec.create_continuous(5)

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec(create_sensor_spec_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_sensor_spec_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
],
)
def test_construction(behavior_spec: BehaviorSpec) -> None:

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec(create_sensor_spec_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_sensor_spec_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
],
)
def test_factory(behavior_spec: BehaviorSpec) -> None:

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec(create_sensor_spec_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_sensor_spec_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
],
)
def test_reward(behavior_spec: BehaviorSpec, reward: float) -> None:

18
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py


from mlagents.trainers.torch.components.reward_providers.gail_reward_provider import (
DiscriminatorNetwork,
)
from mlagents.trainers.tests.dummy_config import create_sensor_spec_with_shapes
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
CONTINUOUS_PATH = (

@pytest.mark.parametrize(
"behavior_spec",
[BehaviorSpec(create_sensor_spec_with_shapes([(8,)]), ACTIONSPEC_CONTINUOUS)],
[BehaviorSpec(create_sensor_specs_with_shapes([(8,)]), ACTIONSPEC_CONTINUOUS)],
)
def test_construction(behavior_spec: BehaviorSpec) -> None:
gail_settings = GAILSettings(demo_path=CONTINUOUS_PATH)

@pytest.mark.parametrize(
"behavior_spec",
[BehaviorSpec(create_sensor_spec_with_shapes([(8,)]), ACTIONSPEC_CONTINUOUS)],
[BehaviorSpec(create_sensor_specs_with_shapes([(8,)]), ACTIONSPEC_CONTINUOUS)],
)
def test_factory(behavior_spec: BehaviorSpec) -> None:
gail_settings = GAILSettings(demo_path=CONTINUOUS_PATH)

"behavior_spec",
[
BehaviorSpec(
create_sensor_spec_with_shapes([(8,), (24, 26, 1)]), ACTIONSPEC_CONTINUOUS
create_sensor_specs_with_shapes([(8,), (24, 26, 1)]), ACTIONSPEC_CONTINUOUS
BehaviorSpec(create_sensor_spec_with_shapes([(50,)]), ACTIONSPEC_FOURDISCRETE),
BehaviorSpec(create_sensor_spec_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
BehaviorSpec(create_sensor_specs_with_shapes([(50,)]), ACTIONSPEC_FOURDISCRETE),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
],
)
@pytest.mark.parametrize("use_actions", [False, True])

"behavior_spec",
[
BehaviorSpec(
create_sensor_spec_with_shapes([(8,), (24, 26, 1)]), ACTIONSPEC_CONTINUOUS
create_sensor_specs_with_shapes([(8,), (24, 26, 1)]), ACTIONSPEC_CONTINUOUS
BehaviorSpec(create_sensor_spec_with_shapes([(50,)]), ACTIONSPEC_FOURDISCRETE),
BehaviorSpec(create_sensor_spec_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
BehaviorSpec(create_sensor_specs_with_shapes([(50,)]), ACTIONSPEC_FOURDISCRETE),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
],
)
@pytest.mark.parametrize("use_actions", [False, True])

21
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_rnd.py


from mlagents.trainers.tests.torch.test_reward_providers.utils import (
create_agent_buffer,
)
from mlagents.trainers.tests.dummy_config import create_sensor_spec_with_shapes
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
SEED = [42]

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec(create_sensor_spec_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_sensor_spec_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
],
)
def test_construction(behavior_spec: BehaviorSpec) -> None:

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec(create_sensor_spec_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
create_sensor_spec_with_shapes([(10,), (64, 66, 3), (84, 86, 1)]),
create_sensor_specs_with_shapes([(10,), (64, 66, 3), (84, 86, 1)]),
create_sensor_spec_with_shapes([(10,), (64, 66, 1)]), ACTIONSPEC_TWODISCRETE
create_sensor_specs_with_shapes([(10,), (64, 66, 1)]),
ACTIONSPEC_TWODISCRETE,
BehaviorSpec(create_sensor_spec_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
],
)
def test_factory(behavior_spec: BehaviorSpec) -> None:

"behavior_spec",
[
BehaviorSpec(
create_sensor_spec_with_shapes([(10,), (64, 66, 3), (24, 26, 1)]),
create_sensor_specs_with_shapes([(10,), (64, 66, 3), (24, 26, 1)]),
BehaviorSpec(create_sensor_spec_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_sensor_spec_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
],
)
def test_reward_decreases(behavior_spec: BehaviorSpec, seed: int) -> None:

4
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/utils.py


buffer = AgentBuffer()
curr_obs = [
np.random.normal(size=sen_spec.shape).astype(np.float32)
for sen_spec in behavior_spec.sensor_spec
for sen_spec in behavior_spec.sensor_specs
for sen_spec in behavior_spec.sensor_spec
for sen_spec in behavior_spec.sensor_specs
]
action_buffer = behavior_spec.action_spec.random_action(1)
action = {}

4
ml-agents/mlagents/trainers/tests/torch/test_utils.py


from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.exception import UnityTrainerException
from mlagents.trainers.torch.encoders import VectorInput
from mlagents.trainers.tests.dummy_config import create_sensor_spec_with_shapes
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
def test_min_visual_size():

for _ in range(num_visual):
obs_shapes.append(vis_obs_shape)
h_size = 128
sen_spec = create_sensor_spec_with_shapes(obs_shapes)
sen_spec = create_sensor_specs_with_shapes(obs_shapes)
encoders, embedding_sizes = ModelUtils.create_input_processors(
sen_spec, h_size, encoder_type, normalize
)

2
ml-agents/mlagents/trainers/torch/components/bc/module.py


Helper function for update_batch.
"""
np_obs = ObsUtil.from_buffer(
mini_batch_demo, len(self.policy.behavior_spec.sensor_spec)
mini_batch_demo, len(self.policy.behavior_spec.sensor_specs)
)
# Convert to tensors
tensor_obs = [ModelUtils.list_to_tensor(obs) for obs in np_obs]

2
ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py


vis_encode_type=EncoderType.SIMPLE,
memory=None,
)
self._state_encoder = NetworkBody(specs.sensor_spec, state_encoder_settings)
self._state_encoder = NetworkBody(specs.sensor_specs, state_encoder_settings)
self._action_flattener = ActionFlattener(self._action_spec)

2
ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py


unencoded_size = (
self._action_flattener.flattened_size + 1 if settings.use_actions else 0
) # +1 is for dones
self.encoder = NetworkBody(specs.sensor_spec, encoder_settings, unencoded_size)
self.encoder = NetworkBody(specs.sensor_specs, encoder_settings, unencoded_size)
estimator_input_size = settings.encoding_size
if settings.use_vail:

2
ml-agents/mlagents/trainers/torch/components/reward_providers/rnd_reward_provider.py


vis_encode_type=EncoderType.SIMPLE,
memory=None,
)
self._encoder = NetworkBody(specs.sensor_spec, state_encoder_settings)
self._encoder = NetworkBody(specs.sensor_specs, state_encoder_settings)
def forward(self, mini_batch: AgentBuffer) -> torch.Tensor:
n_obs = len(self._encoder.processors)

4
ml-agents/mlagents/trainers/torch/model_serialization.py


seq_len_dim = [1]
dummy_vec_obs = [torch.zeros(batch_dim + [self.policy.vec_obs_size])]
# create input shape of NCHW
# (It's NHWC in self.policy.behavior_spec.sensor_spec.shape)
# (It's NHWC in self.policy.behavior_spec.sensor_specs.shape)
for sen_spec in self.policy.behavior_spec.sensor_spec
for sen_spec in self.policy.behavior_spec.sensor_specs
if len(sen_spec.shape) == 3
]
dummy_masks = torch.ones(

22
ml-agents/mlagents/trainers/torch/networks.py


class NetworkBody(nn.Module):
def __init__(
self,
sensor_spec: List[SensorSpec],
sensor_specs: List[SensorSpec],
network_settings: NetworkSettings,
encoded_act_size: int = 0,
):

)
self.processors, self.embedding_sizes = ModelUtils.create_input_processors(
sensor_spec,
sensor_specs,
self.h_size,
network_settings.vis_encode_type,
normalize=self.normalize,

def __init__(
self,
stream_names: List[str],
sensor_spec: List[SensorSpec],
sensor_specs: List[SensorSpec],
network_settings: NetworkSettings,
encoded_act_size: int = 0,
outputs_per_stream: int = 1,

nn.Module.__init__(self)
self.network_body = NetworkBody(
sensor_spec, network_settings, encoded_act_size=encoded_act_size
sensor_specs, network_settings, encoded_act_size=encoded_act_size
)
if network_settings.memory is not None:
encoding_size = network_settings.memory.memory_size // 2

class SimpleActor(nn.Module, Actor):
def __init__(
self,
sensor_spec: List[SensorSpec],
sensor_specs: List[SensorSpec],
network_settings: NetworkSettings,
action_spec: ActionSpec,
conditional_sigma: bool = False,

),
requires_grad=False,
)
self.network_body = NetworkBody(sensor_spec, network_settings)
self.network_body = NetworkBody(sensor_specs, network_settings)
if network_settings.memory is not None:
self.encoding_size = network_settings.memory.memory_size // 2
else:

class SharedActorCritic(SimpleActor, ActorCritic):
def __init__(
self,
sensor_spec: List[SensorSpec],
sensor_specs: List[SensorSpec],
network_settings: NetworkSettings,
action_spec: ActionSpec,
stream_names: List[str],

self.use_lstm = network_settings.memory is not None
super().__init__(
sensor_spec, network_settings, action_spec, conditional_sigma, tanh_squash
sensor_specs, network_settings, action_spec, conditional_sigma, tanh_squash
)
self.stream_names = stream_names
self.value_heads = ValueHeads(stream_names, self.encoding_size)

class SeparateActorCritic(SimpleActor, ActorCritic):
def __init__(
self,
sensor_spec: List[SensorSpec],