浏览代码

Renaming ObservationSpec to SensorSpec

/layernorm
vincentpierre 3 年前
当前提交
4bba4e8e
共有 27 个文件被更改,包括 86 次插入107 次删除
  1. 6
      gym-unity/gym_unity/envs/__init__.py
  2. 12
      ml-agents-envs/mlagents_envs/base_env.py
  3. 6
      ml-agents-envs/mlagents_envs/rpc_utils.py
  4. 14
      ml-agents-envs/mlagents_envs/tests/test_envs.py
  5. 2
      ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py
  6. 4
      ml-agents-envs/mlagents_envs/tests/test_steps.py
  7. 9
      ml-agents/mlagents/trainers/demo_loader.py
  8. 4
      ml-agents/mlagents/trainers/policy/policy.py
  9. 2
      ml-agents/mlagents/trainers/policy/torch_policy.py
  10. 12
      ml-agents/mlagents/trainers/sac/optimizer_torch.py
  11. 8
      ml-agents/mlagents/trainers/tests/dummy_config.py
  12. 20
      ml-agents/mlagents/trainers/tests/mock_brain.py
  13. 4
      ml-agents/mlagents/trainers/tests/simple_test_envs.py
  14. 14
      ml-agents/mlagents/trainers/tests/test_agent_processor.py
  15. 4
      ml-agents/mlagents/trainers/tests/test_demo_loader.py
  16. 4
      ml-agents/mlagents/trainers/tests/test_rl_trainer.py
  17. 2
      ml-agents/mlagents/trainers/tests/test_trajectory.py
  18. 2
      ml-agents/mlagents/trainers/tests/torch/test_ghost.py
  19. 2
      ml-agents/mlagents/trainers/tests/torch/test_ppo.py
  20. 4
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/utils.py
  21. 4
      ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py
  22. 4
      ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py
  23. 2
      ml-agents/mlagents/trainers/torch/components/reward_providers/rnd_reward_provider.py
  24. 4
      ml-agents/mlagents/trainers/torch/model_serialization.py
  25. 32
      ml-agents/mlagents/trainers/torch/networks.py
  26. 8
      ml-agents/mlagents/trainers/torch/utils.py
  27. 4
      ml-agents/tests/yamato/scripts/run_llapi.py

6
gym-unity/gym_unity/envs/__init__.py


def _get_n_vis_obs(self) -> int:
result = 0
for obs_spec in self.group_spec.observation_spec:
for obs_spec in self.group_spec.sensor_spec:
if len(obs_spec.shape) == 3:
result += 1
return result

for obs_spec in self.group_spec.observation_spec:
for obs_spec in self.group_spec.sensor_spec:
if len(obs_spec.shape) == 3:
result.append(obs_spec.shape)
return result

def _get_vec_obs_size(self) -> int:
result = 0
for obs_spec in self.group_spec.observation_spec:
for obs_spec in self.group_spec.sensor_spec:
if len(obs_spec.shape) == 1:
result += obs_spec.shape[0]
return result

12
ml-agents-envs/mlagents_envs/base_env.py


:param spec: The BehaviorSpec for the DecisionSteps
"""
obs: List[np.ndarray] = []
for obs_spec in spec.observation_spec:
for obs_spec in spec.sensor_spec:
obs += [np.zeros((0,) + obs_spec.shape, dtype=np.float32)]
return DecisionSteps(
obs=obs,

:param spec: The BehaviorSpec for the TerminalSteps
"""
obs: List[np.ndarray] = []
for obs_spec in spec.observation_spec:
for obs_spec in spec.sensor_spec:
obs += [np.zeros((0,) + obs_spec.shape, dtype=np.float32)]
return TerminalSteps(
obs=obs,

VARIABLE_SIZE = 4
class ObservationSpec(NamedTuple):
class SensorSpec(NamedTuple):
"""
A NamedTuple containing information about the observation of Agents.
- shape is a Tuple of int : It corresponds to the shape of

"""
A NamedTuple containing information about the observation and action
spaces for a group of Agents under the same behavior.
- observation_spec is a List of ObservationSpec NamedTuple containing
- sensor_spec is a List of SensorSpec NamedTuple containing
The order of the OservationSpec is the same as the order of the observations of an
The order of the SensorSpec is the same as the order of the observations of an
observation_spec: List[ObservationSpec]
sensor_spec: List[SensorSpec]
action_spec: ActionSpec

6
ml-agents-envs/mlagents_envs/rpc_utils.py


from mlagents_envs.base_env import (
ActionSpec,
ObservationSpec,
SensorSpec,
DimensionProperty,
BehaviorSpec,
DecisionSteps,

for obs in agent_info.observations
]
obs_spec = [
ObservationSpec(obs_shape, dim_p)
SensorSpec(obs_shape, dim_p)
for obs_shape, dim_p in zip(observation_shape, dim_props)
]
# proto from communicator < v1.3 does not set action spec, use deprecated fields instead

]
decision_obs_list: List[np.ndarray] = []
terminal_obs_list: List[np.ndarray] = []
for obs_index, obs_spec in enumerate(behavior_spec.observation_spec):
for obs_index, obs_spec in enumerate(behavior_spec.sensor_spec):
is_visual = len(obs_spec.shape) == 3
if is_visual:
obs_shape = cast(Tuple[int, int, int], obs_spec.shape)

14
ml-agents-envs/mlagents_envs/tests/test_envs.py


env.close()
assert isinstance(decision_steps, DecisionSteps)
assert isinstance(terminal_steps, TerminalSteps)
assert len(spec.observation_spec) == len(decision_steps.obs)
assert len(spec.observation_spec) == len(terminal_steps.obs)
assert len(spec.sensor_spec) == len(decision_steps.obs)
assert len(spec.sensor_spec) == len(terminal_steps.obs)
for spec, obs in zip(spec.observation_spec, decision_steps.obs):
for spec, obs in zip(spec.sensor_spec, decision_steps.obs):
for spec, obs in zip(spec.observation_spec.shapes, terminal_steps.obs):
for spec, obs in zip(spec.sensor_spec.shapes, terminal_steps.obs):
assert (n_agents,) + spec.shape == obs.shape

env.close()
assert isinstance(decision_steps, DecisionSteps)
assert isinstance(terminal_steps, TerminalSteps)
assert len(spec.observation_spec) == len(decision_steps.obs)
assert len(spec.observation_spec) == len(terminal_steps.obs)
for spec, obs in zip(spec.observation_spec, decision_steps.obs):
assert len(spec.sensor_spec) == len(decision_steps.obs)
assert len(spec.sensor_spec) == len(terminal_steps.obs)
for spec, obs in zip(spec.sensor_spec, decision_steps.obs):
assert (n_agents,) + spec.shape == obs.shape
assert 0 in decision_steps
assert 2 in terminal_steps

2
ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py


behavior_spec = behavior_spec_from_proto(bp, agent_proto)
assert behavior_spec.action_spec.is_discrete()
assert not behavior_spec.action_spec.is_continuous()
assert [spec.shape for spec in behavior_spec.observation_spec] == [(3,), (4,)]
assert [spec.shape for spec in behavior_spec.sensor_spec] == [(3,), (4,)]
assert behavior_spec.action_spec.discrete_branches == (5, 4)
assert behavior_spec.action_spec.discrete_size == 2
bp = BrainParametersProto()

4
ml-agents-envs/mlagents_envs/tests/test_steps.py


def test_empty_decision_steps():
specs = BehaviorSpec(
observation_spec=create_obs_spec_with_shapes([(3, 2), (5,)]),
sensor_spec=create_obs_spec_with_shapes([(3, 2), (5,)]),
action_spec=ActionSpec.create_continuous(3),
)
ds = DecisionSteps.empty(specs)

def test_empty_terminal_steps():
specs = BehaviorSpec(
observation_spec=create_obs_spec_with_shapes([(3, 2), (5,)]),
sensor_spec=create_obs_spec_with_shapes([(3, 2), (5,)]),
action_spec=ActionSpec.create_continuous(3),
)
ts = TerminalSteps.empty(specs)

9
ml-agents/mlagents/trainers/demo_loader.py


)
)
# check observations match
if len(behavior_spec.observation_spec) != len(
expected_behavior_spec.observation_spec
):
if len(behavior_spec.sensor_spec) != len(expected_behavior_spec.sensor_spec):
zip(
behavior_spec.observation_spec,
expected_behavior_spec.observation_spec,
)
zip(behavior_spec.sensor_spec, expected_behavior_spec.sensor_spec)
):
if demo_obs.shape != policy_obs.shape:
raise RuntimeError(

4
ml-agents/mlagents/trainers/policy/policy.py


)
self.vec_obs_size = sum(
obs_spec.shape[0]
for obs_spec in behavior_spec.observation_spec
for obs_spec in behavior_spec.sensor_spec
1 for obs_spec in behavior_spec.observation_spec if len(obs_spec.shape) == 3
1 for obs_spec in behavior_spec.sensor_spec if len(obs_spec.shape) == 3
)
self.use_continuous_act = self.behavior_spec.action_spec.is_continuous()
self.previous_action_dict: Dict[str, np.ndarray] = {}

2
ml-agents/mlagents/trainers/policy/torch_policy.py


else:
ac_class = SharedActorCritic
self.actor_critic = ac_class(
observation_spec=self.behavior_spec.observation_spec,
sensor_spec=self.behavior_spec.sensor_spec,
network_settings=trainer_settings.network_settings,
action_spec=behavior_spec.action_spec,
stream_names=reward_signal_names,

12
ml-agents/mlagents/trainers/sac/optimizer_torch.py


from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.buffer import AgentBuffer
from mlagents_envs.timers import timed
from mlagents_envs.base_env import ActionSpec, ObservationSpec
from mlagents_envs.base_env import ActionSpec, SensorSpec
from mlagents.trainers.exception import UnityTrainerException
from mlagents.trainers.settings import TrainerSettings, SACSettings
from contextlib import ExitStack

def __init__(
self,
stream_names: List[str],
observation_spec: List[ObservationSpec],
sensor_spec: List[SensorSpec],
network_settings: NetworkSettings,
action_spec: ActionSpec,
):

self.q1_network = ValueNetwork(
stream_names,
observation_spec,
sensor_spec,
network_settings,
num_action_ins,
num_value_outs,

observation_spec,
sensor_spec,
network_settings,
num_action_ins,
num_value_outs,

self.value_network = TorchSACOptimizer.PolicyValueNetwork(
self.stream_names,
self.policy.behavior_spec.observation_spec,
self.policy.behavior_spec.sensor_spec,
policy_network_settings,
self._action_spec,
)

self.policy.behavior_spec.observation_spec,
self.policy.behavior_spec.sensor_spec,
policy_network_settings,
)
ModelUtils.soft_update(

8
ml-agents/mlagents/trainers/tests/dummy_config.py


from typing import List, Tuple
from mlagents_envs.base_env import ObservationSpec, DimensionProperty
from mlagents_envs.base_env import SensorSpec, DimensionProperty
import pytest
import copy
import os

return {RewardSignalType.EXTRINSIC: RewardSignalSettings()}
def create_obs_spec_with_shapes(shapes: List[Tuple[int, ...]]) -> List[ObservationSpec]:
obs_spec: List[ObservationSpec] = []
def create_obs_spec_with_shapes(shapes: List[Tuple[int, ...]]) -> List[SensorSpec]:
obs_spec: List[SensorSpec] = []
spec = ObservationSpec(shape, dim_prop)
spec = SensorSpec(shape, dim_prop)
obs_spec.append(spec)
return obs_spec

20
ml-agents/mlagents/trainers/tests/mock_brain.py


from mlagents_envs.base_env import (
DecisionSteps,
TerminalSteps,
ObservationSpec,
SensorSpec,
BehaviorSpec,
ActionSpec,
ActionTuple,

def create_mock_steps(
num_agents: int,
observation_spec: List[ObservationSpec],
sensor_spec: List[SensorSpec],
action_spec: ActionSpec,
done: bool = False,
) -> Tuple[DecisionSteps, TerminalSteps]:

:int num_agents: Number of "agents" to imitate.
:List observation_spec: A List of the observation specs in your steps
:List sensor_spec: A List of the observation specs in your steps
for obs_spec in observation_spec:
for obs_spec in sensor_spec:
obs_list.append(np.ones((num_agents,) + obs_spec.shape, dtype=np.float32))
action_mask = None
if action_spec.is_discrete():

reward = np.array(num_agents * [1.0], dtype=np.float32)
interrupted = np.array(num_agents * [False], dtype=np.bool)
agent_id = np.arange(num_agents, dtype=np.int32)
behavior_spec = BehaviorSpec(observation_spec, action_spec)
behavior_spec = BehaviorSpec(sensor_spec, action_spec)
if done:
return (
DecisionSteps.empty(behavior_spec),

) -> Tuple[DecisionSteps, TerminalSteps]:
return create_mock_steps(
num_agents=num_agents,
observation_spec=behavior_spec.observation_spec,
sensor_spec=behavior_spec.sensor_spec,
action_spec=behavior_spec.action_spec,
)

observation_spec: List[ObservationSpec],
sensor_spec: List[SensorSpec],
action_spec: ActionSpec,
max_step_complete: bool = False,
memory_size: int = 10,

action_size = action_spec.discrete_size + action_spec.continuous_size
for _i in range(length - 1):
obs = []
for obs_spec in observation_spec:
for obs_spec in sensor_spec:
obs.append(np.ones(obs_spec.shape, dtype=np.float32))
reward = 1.0
done = False

)
steps_list.append(experience)
obs = []
for obs_spec in observation_spec:
for obs_spec in sensor_spec:
obs.append(np.ones(obs_spec.shape, dtype=np.float32))
last_experience = AgentExperience(
obs=obs,

) -> AgentBuffer:
trajectory = make_fake_trajectory(
length,
behavior_spec.observation_spec,
behavior_spec.sensor_spec,
action_spec=behavior_spec.action_spec,
memory_size=memory_size,
)

4
ml-agents/mlagents/trainers/tests/simple_test_envs.py


from mlagents_envs.base_env import (
ActionSpec,
ObservationSpec,
SensorSpec,
ActionTuple,
BaseEnv,
BehaviorSpec,

self.action[name] = None
self.step_result[name] = None
def _make_obs_spec(self) -> ObservationSpec:
def _make_obs_spec(self) -> SensorSpec:
obs_shape: List[Any] = []
for _ in range(self.num_vector):
obs_shape.append((self.vec_obs_size,))

14
ml-agents/mlagents/trainers/tests/test_agent_processor.py


}
mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
num_agents=2,
observation_spec=create_obs_spec_with_shapes(
[(8,)] + num_vis_obs * [(84, 84, 3)]
),
sensor_spec=create_obs_spec_with_shapes([(8,)] + num_vis_obs * [(84, 84, 3)]),
action_spec=ActionSpec.create_continuous(2),
)
fake_action_info = ActionInfo(

# Test empty steps
mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
num_agents=0,
observation_spec=create_obs_spec_with_shapes(
[(8,)] + num_vis_obs * [(84, 84, 3)]
),
sensor_spec=create_obs_spec_with_shapes([(8,)] + num_vis_obs * [(84, 84, 3)]),
action_spec=ActionSpec.create_continuous(2),
)
processor.add_experiences(

mock_decision_step, mock_terminal_step = mb.create_mock_steps(
num_agents=1,
observation_spec=create_obs_spec_with_shapes([(8,)]),
sensor_spec=create_obs_spec_with_shapes([(8,)]),
observation_spec=create_obs_spec_with_shapes([(8,)]),
sensor_spec=create_obs_spec_with_shapes([(8,)]),
action_spec=ActionSpec.create_continuous(2),
done=True,
)

mock_decision_step, mock_terminal_step = mb.create_mock_steps(
num_agents=1,
observation_spec=create_obs_spec_with_shapes([(8,)]),
sensor_spec=create_obs_spec_with_shapes([(8,)]),
action_spec=ActionSpec.create_continuous(2),
)
fake_action_info = ActionInfo(

4
ml-agents/mlagents/trainers/tests/test_demo_loader.py


behavior_spec, pair_infos, total_expected = load_demonstration(
path_prefix + "/test.demo"
)
assert np.sum(behavior_spec.observation_spec[0].shape) == 8
assert np.sum(behavior_spec.sensor_spec[0].shape) == 8
assert len(pair_infos) == total_expected
_, demo_buffer = demo_to_buffer(path_prefix + "/test.demo", 1, BEHAVIOR_SPEC)

behavior_spec, pair_infos, total_expected = load_demonstration(
path_prefix + "/test_demo_dir"
)
assert np.sum(behavior_spec.observation_spec[0].shape) == 8
assert np.sum(behavior_spec.sensor_spec[0].shape) == 8
assert len(pair_infos) == total_expected
_, demo_buffer = demo_to_buffer(path_prefix + "/test_demo_dir", 1, BEHAVIOR_SPEC)

4
ml-agents/mlagents/trainers/tests/test_rl_trainer.py


time_horizon = 10
trajectory = mb.make_fake_trajectory(
length=time_horizon,
observation_spec=create_obs_spec_with_shapes([(1,)]),
sensor_spec=create_obs_spec_with_shapes([(1,)]),
max_step_complete=True,
action_spec=ActionSpec.create_discrete((2,)),
)

checkpoint_interval = trainer.trainer_settings.checkpoint_interval
trajectory = mb.make_fake_trajectory(
length=time_horizon,
observation_spec=create_obs_spec_with_shapes([(1,)]),
sensor_spec=create_obs_spec_with_shapes([(1,)]),
max_step_complete=True,
action_spec=ActionSpec.create_discrete((2,)),
)

2
ml-agents/mlagents/trainers/tests/test_trajectory.py


wanted_keys = set(wanted_keys)
trajectory = make_fake_trajectory(
length=length,
observation_spec=create_obs_spec_with_shapes([(VEC_OBS_SIZE,), (84, 84, 3)]),
sensor_spec=create_obs_spec_with_shapes([(VEC_OBS_SIZE,), (84, 84, 3)]),
action_spec=ActionSpec.create_continuous(ACTION_SIZE),
)
agentbuffer = trajectory.to_agentbuffer()

2
ml-agents/mlagents/trainers/tests/torch/test_ghost.py


trajectory = make_fake_trajectory(
length=time_horizon,
max_step_complete=True,
observation_spec=create_obs_spec_with_shapes([(1,)]),
sensor_spec=create_obs_spec_with_shapes([(1,)]),
action_spec=mock_specs.action_spec,
)
trajectory_queue0.put(trajectory)

2
ml-agents/mlagents/trainers/tests/torch/test_ppo.py


time_horizon = 15
trajectory = make_fake_trajectory(
length=time_horizon,
observation_spec=optimizer.policy.behavior_spec.observation_spec,
sensor_spec=optimizer.policy.behavior_spec.sensor_spec,
action_spec=DISCRETE_ACTION_SPEC if discrete else CONTINUOUS_ACTION_SPEC,
max_step_complete=True,
)

4
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/utils.py


buffer = AgentBuffer()
curr_observations = [
np.random.normal(size=obs_spec.shape).astype(np.float32)
for obs_spec in behavior_spec.observation_spec
for obs_spec in behavior_spec.sensor_spec
for obs_spec in behavior_spec.observation_spec
for obs_spec in behavior_spec.sensor_spec
]
action_buffer = behavior_spec.action_spec.random_action(1)
action = {}

4
ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py


vis_encode_type=EncoderType.SIMPLE,
memory=None,
)
self._state_encoder = NetworkBody(
specs.observation_spec, state_encoder_settings
)
self._state_encoder = NetworkBody(specs.sensor_spec, state_encoder_settings)
self._action_flattener = ActionFlattener(self._action_spec)

4
ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py


unencoded_size = (
self._action_flattener.flattened_size + 1 if settings.use_actions else 0
) # +1 is for dones
self.encoder = NetworkBody(
specs.observation_spec, encoder_settings, unencoded_size
)
self.encoder = NetworkBody(specs.sensor_spec, encoder_settings, unencoded_size)
estimator_input_size = settings.encoding_size
if settings.use_vail:

2
ml-agents/mlagents/trainers/torch/components/reward_providers/rnd_reward_provider.py


vis_encode_type=EncoderType.SIMPLE,
memory=None,
)
self._encoder = NetworkBody(specs.observation_spec, state_encoder_settings)
self._encoder = NetworkBody(specs.sensor_spec, state_encoder_settings)
def forward(self, mini_batch: AgentBuffer) -> torch.Tensor:
n_vis = len(self._encoder.visual_processors)

4
ml-agents/mlagents/trainers/torch/model_serialization.py


seq_len_dim = [1]
dummy_vec_obs = [torch.zeros(batch_dim + [self.policy.vec_obs_size])]
# create input shape of NCHW
# (It's NHWC in self.policy.behavior_spec.observation_spec.shape)
# (It's NHWC in self.policy.behavior_spec.sensor_spec.shape)
for obs_spec in self.policy.behavior_spec.observation_spec
for obs_spec in self.policy.behavior_spec.sensor_spec
if len(obs_spec.shape) == 3
]
dummy_masks = torch.ones(

32
ml-agents/mlagents/trainers/torch/networks.py


from mlagents.torch_utils import torch, nn
from mlagents_envs.base_env import ActionSpec, ObservationSpec
from mlagents_envs.base_env import ActionSpec, SensorSpec
from mlagents.trainers.torch.action_model import ActionModel
from mlagents.trainers.torch.agent_action import AgentAction
from mlagents.trainers.torch.action_log_probs import ActionLogProbs

class NetworkBody(nn.Module):
def __init__(
self,
observation_spec: List[ObservationSpec],
sensor_spec: List[SensorSpec],
network_settings: NetworkSettings,
encoded_act_size: int = 0,
):

self.vector_processors,
encoder_input_size,
) = ModelUtils.create_input_processors(
observation_spec,
sensor_spec,
self.h_size,
network_settings.vis_encode_type,
normalize=self.normalize,

def __init__(
self,
stream_names: List[str],
observation_spec: List[ObservationSpec],
sensor_spec: List[SensorSpec],
network_settings: NetworkSettings,
encoded_act_size: int = 0,
outputs_per_stream: int = 1,

nn.Module.__init__(self)
self.network_body = NetworkBody(
observation_spec, network_settings, encoded_act_size=encoded_act_size
sensor_spec, network_settings, encoded_act_size=encoded_act_size
)
if network_settings.memory is not None:
encoding_size = network_settings.memory.memory_size // 2

class SimpleActor(nn.Module, Actor):
def __init__(
self,
observation_spec: List[ObservationSpec],
sensor_spec: List[SensorSpec],
network_settings: NetworkSettings,
action_spec: ActionSpec,
conditional_sigma: bool = False,

),
requires_grad=False,
)
self.network_body = NetworkBody(observation_spec, network_settings)
self.network_body = NetworkBody(sensor_spec, network_settings)
if network_settings.memory is not None:
self.encoding_size = network_settings.memory.memory_size // 2
else:

class SharedActorCritic(SimpleActor, ActorCritic):
def __init__(
self,
observation_spec: List[ObservationSpec],
sensor_spec: List[SensorSpec],
network_settings: NetworkSettings,
action_spec: ActionSpec,
stream_names: List[str],

self.use_lstm = network_settings.memory is not None
super().__init__(
observation_spec,
network_settings,
action_spec,
conditional_sigma,
tanh_squash,
sensor_spec, network_settings, action_spec, conditional_sigma, tanh_squash
)
self.stream_names = stream_names
self.value_heads = ValueHeads(stream_names, self.encoding_size)

class SeparateActorCritic(SimpleActor, ActorCritic):
def __init__(
self,
observation_spec: List[ObservationSpec],
sensor_spec: List[SensorSpec],
network_settings: NetworkSettings,
action_spec: ActionSpec,
stream_names: List[str],

self.use_lstm = network_settings.memory is not None
super().__init__(
observation_spec,
network_settings,
action_spec,
conditional_sigma,
tanh_squash,
sensor_spec, network_settings, action_spec, conditional_sigma, tanh_squash
self.critic = ValueNetwork(stream_names, observation_spec, network_settings)
self.critic = ValueNetwork(stream_names, sensor_spec, network_settings)
@property
def memory_size(self) -> int:

8
ml-agents/mlagents/trainers/torch/utils.py


)
from mlagents.trainers.settings import EncoderType, ScheduleType
from mlagents.trainers.exception import UnityTrainerException
from mlagents_envs.base_env import ObservationSpec
from mlagents_envs.base_env import SensorSpec
class ModelUtils:

@staticmethod
def create_input_processors(
observation_spec: List[ObservationSpec],
sensor_spec: List[SensorSpec],
h_size: int,
vis_encode_type: EncoderType,
normalize: bool = False,

:param observation_spec: List of ObservationSpecs that represent the observation dimensions.
:param sensor_spec: List of SensorSpec that represent the observation dimensions.
:param action_size: Number of additional un-normalized inputs to each vector encoder. Used for
conditioning network on other values (e.g. actions for a Q function)
:param h_size: Number of hidden units per layer.

visual_encoder_class = ModelUtils.get_encoder_for_type(vis_encode_type)
vector_size = 0
visual_output_size = 0
for i, obs_spec in enumerate(observation_spec):
for i, obs_spec in enumerate(sensor_spec):
if len(obs_spec.shape) == 3:
ModelUtils._check_resolution_for_encoder(
obs_spec.shape[0], obs_spec.shape[1], vis_encode_type

4
ml-agents/tests/yamato/scripts/run_llapi.py


decision_steps, terminal_steps = env.get_steps(group_name)
# Examine the number of observations per Agent
print("Number of observations : ", len(group_spec.observation_spec))
print("Number of observations : ", len(group_spec.sensor_spec))
vis_obs = any(len(o_spec.shape) == 3 for o_spec in group_spec.observation_spec)
vis_obs = any(len(o_spec.shape) == 3 for o_spec in group_spec.sensor_spec)
print("Is there a visual observation ?", vis_obs)
# Examine the state space for the first observation for the first agent

正在加载...
取消
保存