浏览代码

renaming obs_spec variables

/layernorm
vincentpierre 4 年前
当前提交
c5a057d2
共有 24 个文件被更改,包括 142 次插入134 次删除
  1. 16
      gym-unity/gym_unity/envs/__init__.py
  2. 6
      gym-unity/gym_unity/tests/test_gym.py
  3. 8
      ml-agents-envs/mlagents_envs/base_env.py
  4. 14
      ml-agents-envs/mlagents_envs/rpc_utils.py
  5. 16
      ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py
  6. 6
      ml-agents-envs/mlagents_envs/tests/test_steps.py
  7. 8
      ml-agents/mlagents/trainers/policy/policy.py
  8. 8
      ml-agents/mlagents/trainers/tests/dummy_config.py
  9. 18
      ml-agents/mlagents/trainers/tests/mock_brain.py
  10. 10
      ml-agents/mlagents/trainers/tests/simple_test_envs.py
  11. 16
      ml-agents/mlagents/trainers/tests/test_agent_processor.py
  12. 6
      ml-agents/mlagents/trainers/tests/test_rl_trainer.py
  13. 4
      ml-agents/mlagents/trainers/tests/test_trajectory.py
  14. 4
      ml-agents/mlagents/trainers/tests/torch/test_ghost.py
  15. 20
      ml-agents/mlagents/trainers/tests/torch/test_networks.py
  16. 28
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py
  17. 14
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_extrinsic.py
  18. 18
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py
  19. 20
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_rnd.py
  20. 8
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/utils.py
  21. 6
      ml-agents/mlagents/trainers/tests/torch/test_utils.py
  22. 6
      ml-agents/mlagents/trainers/torch/model_serialization.py
  23. 14
      ml-agents/mlagents/trainers/torch/utils.py
  24. 2
      ml-agents/tests/yamato/scripts/run_llapi.py

16
gym-unity/gym_unity/envs/__init__.py


def _get_n_vis_obs(self) -> int:
result = 0
for obs_spec in self.group_spec.sensor_spec:
if len(obs_spec.shape) == 3:
for sen_spec in self.group_spec.sensor_spec:
if len(sen_spec.shape) == 3:
for obs_spec in self.group_spec.sensor_spec:
if len(obs_spec.shape) == 3:
result.append(obs_spec.shape)
for sen_spec in self.group_spec.sensor_spec:
if len(sen_spec.shape) == 3:
result.append(sen_spec.shape)
return result
def _get_vis_obs_list(

def _get_vec_obs_size(self) -> int:
result = 0
for obs_spec in self.group_spec.sensor_spec:
if len(obs_spec.shape) == 1:
result += obs_spec.shape[0]
for sen_spec in self.group_spec.sensor_spec:
if len(sen_spec.shape) == 1:
result += sen_spec.shape[0]
return result
def render(self, mode="rgb_array"):

6
gym-unity/gym_unity/tests/test_gym.py


TerminalSteps,
BehaviorMapping,
)
from mlagents.trainers.tests.dummy_config import create_obs_spec_with_shapes
from mlagents.trainers.tests.dummy_config import create_sensor_spec_with_shapes
def test_gym_wrapper():

obs_shapes = [(vector_observation_space_size,)]
for _ in range(number_visual_observations):
obs_shapes += [(8, 8, 3)]
obs_spec = create_obs_spec_with_shapes(obs_shapes)
return BehaviorSpec(obs_spec, action_spec)
sen_spec = create_sensor_spec_with_shapes(obs_shapes)
return BehaviorSpec(sen_spec, action_spec)
def create_mock_vector_steps(specs, num_agents=1, number_visual_observations=0):

8
ml-agents-envs/mlagents_envs/base_env.py


:param spec: The BehaviorSpec for the DecisionSteps
"""
obs: List[np.ndarray] = []
for obs_spec in spec.sensor_spec:
obs += [np.zeros((0,) + obs_spec.shape, dtype=np.float32)]
for sen_spec in spec.sensor_spec:
obs += [np.zeros((0,) + sen_spec.shape, dtype=np.float32)]
return DecisionSteps(
obs=obs,
reward=np.zeros(0, dtype=np.float32),

:param spec: The BehaviorSpec for the TerminalSteps
"""
obs: List[np.ndarray] = []
for obs_spec in spec.sensor_spec:
obs += [np.zeros((0,) + obs_spec.shape, dtype=np.float32)]
for sen_spec in spec.sensor_spec:
obs += [np.zeros((0,) + sen_spec.shape, dtype=np.float32)]
return TerminalSteps(
obs=obs,
reward=np.zeros(0, dtype=np.float32),

14
ml-agents-envs/mlagents_envs/rpc_utils.py


tuple(DimensionProperty(dim) for dim in obs.dimension_properties)
for obs in agent_info.observations
]
obs_spec = [
sensor_spec = [
SensorSpec(obs_shape, dim_p)
for obs_shape, dim_p in zip(observation_shape, dim_props)
]

action_spec_proto.num_continuous_actions,
tuple(branch for branch in action_spec_proto.discrete_branch_sizes),
)
return BehaviorSpec(obs_spec, action_spec)
return BehaviorSpec(sensor_spec, action_spec)
class OffsetBytesIO:

]
decision_obs_list: List[np.ndarray] = []
terminal_obs_list: List[np.ndarray] = []
for obs_index, obs_spec in enumerate(behavior_spec.sensor_spec):
is_visual = len(obs_spec.shape) == 3
for obs_index, sensor_spec in enumerate(behavior_spec.sensor_spec):
is_visual = len(sensor_spec.shape) == 3
obs_shape = cast(Tuple[int, int, int], obs_spec.shape)
obs_shape = cast(Tuple[int, int, int], sensor_spec.shape)
decision_obs_list.append(
_process_visual_observation(
obs_index, obs_shape, decision_agent_info_list

else:
decision_obs_list.append(
_process_vector_observation(
obs_index, obs_spec.shape, decision_agent_info_list
obs_index, sensor_spec.shape, decision_agent_info_list
obs_index, obs_spec.shape, terminal_agent_info_list
obs_index, sensor_spec.shape, terminal_agent_info_list
)
)
decision_rewards = np.array(

16
ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py


steps_from_proto,
)
from PIL import Image
from mlagents.trainers.tests.dummy_config import create_obs_spec_with_shapes
from mlagents.trainers.tests.dummy_config import create_sensor_spec_with_shapes
def generate_list_agent_proto(

n_agents = 10
shapes = [(3,), (4,)]
spec = BehaviorSpec(
create_obs_spec_with_shapes(shapes), ActionSpec.create_continuous(3)
create_sensor_spec_with_shapes(shapes), ActionSpec.create_continuous(3)
)
ap_list = generate_list_agent_proto(n_agents, shapes)
decision_steps, terminal_steps = steps_from_proto(ap_list, spec)

n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(
create_obs_spec_with_shapes(shapes), ActionSpec.create_discrete((7, 3))
create_sensor_spec_with_shapes(shapes), ActionSpec.create_discrete((7, 3))
)
ap_list = generate_list_agent_proto(n_agents, shapes)
decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)

n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(
create_obs_spec_with_shapes(shapes), ActionSpec.create_discrete((10,))
create_sensor_spec_with_shapes(shapes), ActionSpec.create_discrete((10,))
)
ap_list = generate_list_agent_proto(n_agents, shapes)
decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)

n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(
create_obs_spec_with_shapes(shapes), ActionSpec.create_discrete((2, 2, 6))
create_sensor_spec_with_shapes(shapes), ActionSpec.create_discrete((2, 2, 6))
)
ap_list = generate_list_agent_proto(n_agents, shapes)
decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)

n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(
create_obs_spec_with_shapes(shapes), ActionSpec.create_continuous(10)
create_sensor_spec_with_shapes(shapes), ActionSpec.create_continuous(10)
)
ap_list = generate_list_agent_proto(n_agents, shapes)
decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)

n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(
create_obs_spec_with_shapes(shapes), ActionSpec.create_continuous(3)
create_sensor_spec_with_shapes(shapes), ActionSpec.create_continuous(3)
)
ap_list = generate_list_agent_proto(n_agents, shapes, infinite_rewards=True)
with pytest.raises(RuntimeError):

n_agents = 10
shapes = [(3,), (4,)]
behavior_spec = BehaviorSpec(
create_obs_spec_with_shapes(shapes), ActionSpec.create_continuous(3)
create_sensor_spec_with_shapes(shapes), ActionSpec.create_continuous(3)
)
ap_list = generate_list_agent_proto(n_agents, shapes, nan_observations=True)
with pytest.raises(RuntimeError):

6
ml-agents-envs/mlagents_envs/tests/test_steps.py


ActionSpec,
BehaviorSpec,
)
from mlagents.trainers.tests.dummy_config import create_obs_spec_with_shapes
from mlagents.trainers.tests.dummy_config import create_sensor_spec_with_shapes
def test_decision_steps():

def test_empty_decision_steps():
specs = BehaviorSpec(
sensor_spec=create_obs_spec_with_shapes([(3, 2), (5,)]),
sensor_spec=create_sensor_spec_with_shapes([(3, 2), (5,)]),
action_spec=ActionSpec.create_continuous(3),
)
ds = DecisionSteps.empty(specs)

def test_empty_terminal_steps():
specs = BehaviorSpec(
sensor_spec=create_obs_spec_with_shapes([(3, 2), (5,)]),
sensor_spec=create_sensor_spec_with_shapes([(3, 2), (5,)]),
action_spec=ActionSpec.create_continuous(3),
)
ts = TerminalSteps.empty(specs)

8
ml-agents/mlagents/trainers/policy/policy.py


else [self.behavior_spec.action_spec.continuous_size]
)
self.vec_obs_size = sum(
obs_spec.shape[0]
for obs_spec in behavior_spec.sensor_spec
if len(obs_spec.shape) == 1
sen_spec.shape[0]
for sen_spec in behavior_spec.sensor_spec
if len(sen_spec.shape) == 1
1 for obs_spec in behavior_spec.sensor_spec if len(obs_spec.shape) == 3
1 for sen_spec in behavior_spec.sensor_spec if len(sen_spec.shape) == 3
)
self.use_continuous_act = self.behavior_spec.action_spec.is_continuous()
self.previous_action_dict: Dict[str, np.ndarray] = {}

8
ml-agents/mlagents/trainers/tests/dummy_config.py


return {RewardSignalType.EXTRINSIC: RewardSignalSettings()}
def create_obs_spec_with_shapes(shapes: List[Tuple[int, ...]]) -> List[SensorSpec]:
obs_spec: List[SensorSpec] = []
def create_sensor_spec_with_shapes(shapes: List[Tuple[int, ...]]) -> List[SensorSpec]:
sen_spec: List[SensorSpec] = []
obs_spec.append(spec)
return obs_spec
sen_spec.append(spec)
return sen_spec

18
ml-agents/mlagents/trainers/tests/mock_brain.py


ActionSpec,
ActionTuple,
)
from mlagents.trainers.tests.dummy_config import create_obs_spec_with_shapes
from mlagents.trainers.tests.dummy_config import create_sensor_spec_with_shapes
def create_mock_steps(

:bool done: Whether all the agents in the batch are done
"""
obs_list = []
for obs_spec in sensor_spec:
obs_list.append(np.ones((num_agents,) + obs_spec.shape, dtype=np.float32))
for sen_spec in sensor_spec:
obs_list.append(np.ones((num_agents,) + sen_spec.shape, dtype=np.float32))
action_mask = None
if action_spec.is_discrete():
action_mask = [

action_size = action_spec.discrete_size + action_spec.continuous_size
for _i in range(length - 1):
obs = []
for obs_spec in sensor_spec:
obs.append(np.ones(obs_spec.shape, dtype=np.float32))
for sen_spec in sensor_spec:
obs.append(np.ones(sen_spec.shape, dtype=np.float32))
reward = 1.0
done = False
action = ActionTuple(

)
steps_list.append(experience)
obs = []
for obs_spec in sensor_spec:
obs.append(np.ones(obs_spec.shape, dtype=np.float32))
for sen_spec in sensor_spec:
obs.append(np.ones(sen_spec.shape, dtype=np.float32))
last_experience = AgentExperience(
obs=obs,
reward=reward,

else:
action_spec = ActionSpec.create_continuous(vector_action_space)
observation_shapes = [(84, 84, 3)] * int(use_visual) + [(vector_obs_space,)]
obs_spec = create_obs_spec_with_shapes(observation_shapes)
behavior_spec = BehaviorSpec(obs_spec, action_spec)
sen_spec = create_sensor_spec_with_shapes(observation_shapes)
behavior_spec = BehaviorSpec(sen_spec, action_spec)
return behavior_spec

10
ml-agents/mlagents/trainers/tests/simple_test_envs.py


from mlagents_envs.communicator_objects.agent_info_action_pair_pb2 import (
AgentInfoActionPairProto,
)
from mlagents.trainers.tests.dummy_config import create_obs_spec_with_shapes
from mlagents.trainers.tests.dummy_config import create_sensor_spec_with_shapes
OBS_SIZE = 1
VIS_OBS_SIZE = (20, 20, 3)

continuous_action_size + discrete_action_size
) # to set the goals/positions
self.action_spec = action_spec
self.behavior_spec = BehaviorSpec(self._make_obs_spec(), action_spec)
self.behavior_spec = BehaviorSpec(self._make_sensor_spec(), action_spec)
self.action_spec = action_spec
self.names = brain_names
self.positions: Dict[str, List[float]] = {}

self.action[name] = None
self.step_result[name] = None
def _make_obs_spec(self) -> SensorSpec:
def _make_sensor_spec(self) -> SensorSpec:
obs_spec = create_obs_spec_with_shapes(obs_shape)
return obs_spec
sen_spec = create_sensor_spec_with_shapes(obs_shape)
return sen_spec
def _make_obs(self, value: float) -> List[np.ndarray]:
obs = []

16
ml-agents/mlagents/trainers/tests/test_agent_processor.py


from mlagents.trainers.stats import StatsReporter, StatsSummary
from mlagents.trainers.behavior_id_utils import get_global_agent_id
from mlagents_envs.side_channel.stats_side_channel import StatsAggregationMethod
from mlagents.trainers.tests.dummy_config import create_obs_spec_with_shapes
from mlagents.trainers.tests.dummy_config import create_sensor_spec_with_shapes
from mlagents_envs.base_env import ActionSpec, ActionTuple

}
mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
num_agents=2,
sensor_spec=create_obs_spec_with_shapes([(8,)] + num_vis_obs * [(84, 84, 3)]),
sensor_spec=create_sensor_spec_with_shapes(
[(8,)] + num_vis_obs * [(84, 84, 3)]
),
action_spec=ActionSpec.create_continuous(2),
)
fake_action_info = ActionInfo(

# Test empty steps
mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
num_agents=0,
sensor_spec=create_obs_spec_with_shapes([(8,)] + num_vis_obs * [(84, 84, 3)]),
sensor_spec=create_sensor_spec_with_shapes(
[(8,)] + num_vis_obs * [(84, 84, 3)]
),
action_spec=ActionSpec.create_continuous(2),
)
processor.add_experiences(

mock_decision_step, mock_terminal_step = mb.create_mock_steps(
num_agents=1,
sensor_spec=create_obs_spec_with_shapes([(8,)]),
sensor_spec=create_sensor_spec_with_shapes([(8,)]),
sensor_spec=create_obs_spec_with_shapes([(8,)]),
sensor_spec=create_sensor_spec_with_shapes([(8,)]),
action_spec=ActionSpec.create_continuous(2),
done=True,
)

mock_decision_step, mock_terminal_step = mb.create_mock_steps(
num_agents=1,
sensor_spec=create_obs_spec_with_shapes([(8,)]),
sensor_spec=create_sensor_spec_with_shapes([(8,)]),
action_spec=ActionSpec.create_continuous(2),
)
fake_action_info = ActionInfo(

6
ml-agents/mlagents/trainers/tests/test_rl_trainer.py


from mlagents.trainers.tests.test_buffer import construct_fake_buffer
from mlagents.trainers.agent_processor import AgentManagerQueue
from mlagents.trainers.settings import TrainerSettings
from mlagents.trainers.tests.dummy_config import create_obs_spec_with_shapes
from mlagents.trainers.tests.dummy_config import create_sensor_spec_with_shapes
from mlagents_envs.base_env import ActionSpec

time_horizon = 10
trajectory = mb.make_fake_trajectory(
length=time_horizon,
sensor_spec=create_obs_spec_with_shapes([(1,)]),
sensor_spec=create_sensor_spec_with_shapes([(1,)]),
max_step_complete=True,
action_spec=ActionSpec.create_discrete((2,)),
)

checkpoint_interval = trainer.trainer_settings.checkpoint_interval
trajectory = mb.make_fake_trajectory(
length=time_horizon,
sensor_spec=create_obs_spec_with_shapes([(1,)]),
sensor_spec=create_sensor_spec_with_shapes([(1,)]),
max_step_complete=True,
action_spec=ActionSpec.create_discrete((2,)),
)

4
ml-agents/mlagents/trainers/tests/test_trajectory.py


from mlagents.trainers.trajectory import SplitObservations
from mlagents.trainers.tests.mock_brain import make_fake_trajectory
from mlagents.trainers.tests.dummy_config import create_obs_spec_with_shapes
from mlagents.trainers.tests.dummy_config import create_sensor_spec_with_shapes
from mlagents_envs.base_env import ActionSpec
VEC_OBS_SIZE = 6

wanted_keys = set(wanted_keys)
trajectory = make_fake_trajectory(
length=length,
sensor_spec=create_obs_spec_with_shapes([(VEC_OBS_SIZE,), (84, 84, 3)]),
sensor_spec=create_sensor_spec_with_shapes([(VEC_OBS_SIZE,), (84, 84, 3)]),
action_spec=ActionSpec.create_continuous(ACTION_SIZE),
)
agentbuffer = trajectory.to_agentbuffer()

4
ml-agents/mlagents/trainers/tests/torch/test_ghost.py


from mlagents.trainers.tests import mock_brain as mb
from mlagents.trainers.tests.test_trajectory import make_fake_trajectory
from mlagents.trainers.settings import TrainerSettings, SelfPlaySettings
from mlagents.trainers.tests.dummy_config import create_obs_spec_with_shapes
from mlagents.trainers.tests.dummy_config import create_sensor_spec_with_shapes
@pytest.fixture

trajectory = make_fake_trajectory(
length=time_horizon,
max_step_complete=True,
sensor_spec=create_obs_spec_with_shapes([(1,)]),
sensor_spec=create_sensor_spec_with_shapes([(1,)]),
action_spec=mock_specs.action_spec,
)
trajectory_queue0.put(trajectory)

20
ml-agents/mlagents/trainers/tests/torch/test_networks.py


from mlagents.trainers.settings import NetworkSettings
from mlagents_envs.base_env import ActionSpec
from mlagents.trainers.tests.torch.test_encoders import compare_models
from mlagents.trainers.tests.dummy_config import create_obs_spec_with_shapes
from mlagents.trainers.tests.dummy_config import create_sensor_spec_with_shapes
def test_networkbody_vector():

obs_shapes = [(obs_size,)]
networkbody = NetworkBody(
create_obs_spec_with_shapes(obs_shapes), network_settings, encoded_act_size=2
create_sensor_spec_with_shapes(obs_shapes), network_settings, encoded_act_size=2
)
optimizer = torch.optim.Adam(networkbody.parameters(), lr=3e-3)
sample_obs = 0.1 * torch.ones((1, obs_size))

)
obs_shapes = [(obs_size,)]
networkbody = NetworkBody(create_obs_spec_with_shapes(obs_shapes), network_settings)
networkbody = NetworkBody(
create_sensor_spec_with_shapes(obs_shapes), network_settings
)
optimizer = torch.optim.Adam(networkbody.parameters(), lr=3e-4)
sample_obs = torch.ones((1, seq_len, obs_size))

network_settings = NetworkSettings()
obs_shapes = [(vec_obs_size,), obs_size]
networkbody = NetworkBody(create_obs_spec_with_shapes(obs_shapes), network_settings)
networkbody = NetworkBody(
create_sensor_spec_with_shapes(obs_shapes), network_settings
)
optimizer = torch.optim.Adam(networkbody.parameters(), lr=3e-3)
sample_obs = 0.1 * torch.ones((1, 84, 84, 3))
sample_vec_obs = torch.ones((1, vec_obs_size))

obs_size = 4
num_outputs = 2
network_settings = NetworkSettings()
obs_spec = create_obs_spec_with_shapes([(obs_size,)])
sen_spec = create_sensor_spec_with_shapes([(obs_size,)])
stream_names, obs_spec, network_settings, outputs_per_stream=num_outputs
stream_names, sen_spec, network_settings, outputs_per_stream=num_outputs
)
optimizer = torch.optim.Adam(value_net.parameters(), lr=3e-3)

network_settings = NetworkSettings(
memory=NetworkSettings.MemorySettings() if lstm else None, normalize=True
)
obs_spec = create_obs_spec_with_shapes([(obs_size,)])
sen_spec = create_sensor_spec_with_shapes([(obs_size,)])
actor = ac_type(obs_spec, network_settings, action_spec, stream_names)
actor = ac_type(sen_spec, network_settings, action_spec, stream_names)
if lstm:
sample_obs = torch.ones((1, network_settings.memory.sequence_length, obs_size))
memories = torch.ones(

28
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py


create_agent_buffer,
)
from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.tests.dummy_config import create_obs_spec_with_shapes
from mlagents.trainers.tests.dummy_config import create_sensor_spec_with_shapes
SEED = [42]

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec(create_obs_spec_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_obs_spec_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_sensor_spec_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_sensor_spec_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
],
)
def test_construction(behavior_spec: BehaviorSpec) -> None:

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec(create_obs_spec_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_sensor_spec_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
create_obs_spec_with_shapes([(10,), (64, 66, 3), (84, 86, 1)]),
create_sensor_spec_with_shapes([(10,), (64, 66, 3), (84, 86, 1)]),
create_obs_spec_with_shapes([(10,), (64, 66, 1)]), ACTIONSPEC_TWODISCRETE
create_sensor_spec_with_shapes([(10,), (64, 66, 1)]), ACTIONSPEC_TWODISCRETE
BehaviorSpec(create_obs_spec_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
BehaviorSpec(create_sensor_spec_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
],
)
def test_factory(behavior_spec: BehaviorSpec) -> None:

"behavior_spec",
[
BehaviorSpec(
create_obs_spec_with_shapes([(10,), (64, 66, 3), (24, 26, 1)]),
create_sensor_spec_with_shapes([(10,), (64, 66, 3), (24, 26, 1)]),
BehaviorSpec(create_obs_spec_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_obs_spec_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
BehaviorSpec(create_sensor_spec_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_sensor_spec_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
],
)
def test_reward_decreases(behavior_spec: BehaviorSpec, seed: int) -> None:

@pytest.mark.parametrize("seed", SEED)
@pytest.mark.parametrize(
"behavior_spec",
[BehaviorSpec(create_obs_spec_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS)],
[BehaviorSpec(create_sensor_spec_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS)],
)
def test_continuous_action_prediction(behavior_spec: BehaviorSpec, seed: int) -> None:
np.random.seed(seed)

"behavior_spec",
[
BehaviorSpec(
create_obs_spec_with_shapes([(10,), (64, 66, 3), (24, 26, 1)]),
create_sensor_spec_with_shapes([(10,), (64, 66, 3), (24, 26, 1)]),
BehaviorSpec(create_obs_spec_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_obs_spec_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
BehaviorSpec(create_sensor_spec_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_sensor_spec_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
],
)
def test_next_state_prediction(behavior_spec: BehaviorSpec, seed: int) -> None:

14
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_extrinsic.py


from mlagents.trainers.tests.torch.test_reward_providers.utils import (
create_agent_buffer,
)
from mlagents.trainers.tests.dummy_config import create_obs_spec_with_shapes
from mlagents.trainers.tests.dummy_config import create_sensor_spec_with_shapes
ACTIONSPEC_CONTINUOUS = ActionSpec.create_continuous(5)

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec(create_obs_spec_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_obs_spec_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_sensor_spec_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_sensor_spec_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
],
)
def test_construction(behavior_spec: BehaviorSpec) -> None:

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec(create_obs_spec_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_obs_spec_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_sensor_spec_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_sensor_spec_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
],
)
def test_factory(behavior_spec: BehaviorSpec) -> None:

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec(create_obs_spec_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_obs_spec_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_sensor_spec_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_sensor_spec_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
],
)
def test_reward(behavior_spec: BehaviorSpec, reward: float) -> None:

18
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py


from mlagents.trainers.torch.components.reward_providers.gail_reward_provider import (
DiscriminatorNetwork,
)
from mlagents.trainers.tests.dummy_config import create_obs_spec_with_shapes
from mlagents.trainers.tests.dummy_config import create_sensor_spec_with_shapes
CONTINUOUS_PATH = (

@pytest.mark.parametrize(
"behavior_spec",
[BehaviorSpec(create_obs_spec_with_shapes([(8,)]), ACTIONSPEC_CONTINUOUS)],
[BehaviorSpec(create_sensor_spec_with_shapes([(8,)]), ACTIONSPEC_CONTINUOUS)],
)
def test_construction(behavior_spec: BehaviorSpec) -> None:
gail_settings = GAILSettings(demo_path=CONTINUOUS_PATH)

@pytest.mark.parametrize(
"behavior_spec",
[BehaviorSpec(create_obs_spec_with_shapes([(8,)]), ACTIONSPEC_CONTINUOUS)],
[BehaviorSpec(create_sensor_spec_with_shapes([(8,)]), ACTIONSPEC_CONTINUOUS)],
)
def test_factory(behavior_spec: BehaviorSpec) -> None:
gail_settings = GAILSettings(demo_path=CONTINUOUS_PATH)

"behavior_spec",
[
BehaviorSpec(
create_obs_spec_with_shapes([(8,), (24, 26, 1)]), ACTIONSPEC_CONTINUOUS
create_sensor_spec_with_shapes([(8,), (24, 26, 1)]), ACTIONSPEC_CONTINUOUS
BehaviorSpec(create_obs_spec_with_shapes([(50,)]), ACTIONSPEC_FOURDISCRETE),
BehaviorSpec(create_obs_spec_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
BehaviorSpec(create_sensor_spec_with_shapes([(50,)]), ACTIONSPEC_FOURDISCRETE),
BehaviorSpec(create_sensor_spec_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
],
)
@pytest.mark.parametrize("use_actions", [False, True])

"behavior_spec",
[
BehaviorSpec(
create_obs_spec_with_shapes([(8,), (24, 26, 1)]), ACTIONSPEC_CONTINUOUS
create_sensor_spec_with_shapes([(8,), (24, 26, 1)]), ACTIONSPEC_CONTINUOUS
BehaviorSpec(create_obs_spec_with_shapes([(50,)]), ACTIONSPEC_FOURDISCRETE),
BehaviorSpec(create_obs_spec_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
BehaviorSpec(create_sensor_spec_with_shapes([(50,)]), ACTIONSPEC_FOURDISCRETE),
BehaviorSpec(create_sensor_spec_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
],
)
@pytest.mark.parametrize("use_actions", [False, True])

20
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_rnd.py


from mlagents.trainers.tests.torch.test_reward_providers.utils import (
create_agent_buffer,
)
from mlagents.trainers.tests.dummy_config import create_obs_spec_with_shapes
from mlagents.trainers.tests.dummy_config import create_sensor_spec_with_shapes
SEED = [42]

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec(create_obs_spec_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_obs_spec_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_sensor_spec_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_sensor_spec_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
],
)
def test_construction(behavior_spec: BehaviorSpec) -> None:

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec(create_obs_spec_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_sensor_spec_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
create_obs_spec_with_shapes([(10,), (64, 66, 3), (84, 86, 1)]),
create_sensor_spec_with_shapes([(10,), (64, 66, 3), (84, 86, 1)]),
create_obs_spec_with_shapes([(10,), (64, 66, 1)]), ACTIONSPEC_TWODISCRETE
create_sensor_spec_with_shapes([(10,), (64, 66, 1)]), ACTIONSPEC_TWODISCRETE
BehaviorSpec(create_obs_spec_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
BehaviorSpec(create_sensor_spec_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
],
)
def test_factory(behavior_spec: BehaviorSpec) -> None:

"behavior_spec",
[
BehaviorSpec(
create_obs_spec_with_shapes([(10,), (64, 66, 3), (24, 26, 1)]),
create_sensor_spec_with_shapes([(10,), (64, 66, 3), (24, 26, 1)]),
BehaviorSpec(create_obs_spec_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_obs_spec_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
BehaviorSpec(create_sensor_spec_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_sensor_spec_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
],
)
def test_reward_decreases(behavior_spec: BehaviorSpec, seed: int) -> None:

8
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/utils.py


) -> AgentBuffer:
buffer = AgentBuffer()
curr_observations = [
np.random.normal(size=obs_spec.shape).astype(np.float32)
for obs_spec in behavior_spec.sensor_spec
np.random.normal(size=sen_spec.shape).astype(np.float32)
for sen_spec in behavior_spec.sensor_spec
np.random.normal(size=obs_spec.shape).astype(np.float32)
for obs_spec in behavior_spec.sensor_spec
np.random.normal(size=sen_spec.shape).astype(np.float32)
for sen_spec in behavior_spec.sensor_spec
]
action_buffer = behavior_spec.action_spec.random_action(1)
action = {}

6
ml-agents/mlagents/trainers/tests/torch/test_utils.py


from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.exception import UnityTrainerException
from mlagents.trainers.torch.encoders import VectorInput
from mlagents.trainers.tests.dummy_config import create_obs_spec_with_shapes
from mlagents.trainers.tests.dummy_config import create_sensor_spec_with_shapes
def test_min_visual_size():

for _ in range(num_visual):
obs_shapes.append(vis_obs_shape)
h_size = 128
obs_spec = create_obs_spec_with_shapes(obs_shapes)
sen_spec = create_sensor_spec_with_shapes(obs_shapes)
obs_spec, h_size, encoder_type, normalize
sen_spec, h_size, encoder_type, normalize
)
vec_enc = list(vec_enc)
vis_enc = list(vis_enc)

6
ml-agents/mlagents/trainers/torch/model_serialization.py


# (It's NHWC in self.policy.behavior_spec.sensor_spec.shape)
dummy_vis_obs = [
torch.zeros(
batch_dim + [obs_spec.shape[2], obs_spec.shape[0], obs_spec.shape[1]]
batch_dim + [sen_spec.shape[2], sen_spec.shape[0], sen_spec.shape[1]]
for obs_spec in self.policy.behavior_spec.sensor_spec
if len(obs_spec.shape) == 3
for sen_spec in self.policy.behavior_spec.sensor_spec
if len(sen_spec.shape) == 3
]
dummy_masks = torch.ones(
batch_dim + [sum(self.policy.behavior_spec.action_spec.discrete_branches)]

14
ml-agents/mlagents/trainers/torch/utils.py


visual_encoder_class = ModelUtils.get_encoder_for_type(vis_encode_type)
vector_size = 0
visual_output_size = 0
for i, obs_spec in enumerate(sensor_spec):
if len(obs_spec.shape) == 3:
for i, sen_spec in enumerate(sensor_spec):
if len(sen_spec.shape) == 3:
obs_spec.shape[0], obs_spec.shape[1], vis_encode_type
sen_spec.shape[0], sen_spec.shape[1], vis_encode_type
obs_spec.shape[0], obs_spec.shape[1], obs_spec.shape[2], h_size
sen_spec.shape[0], sen_spec.shape[1], sen_spec.shape[2], h_size
elif len(obs_spec.shape) == 1:
vector_size += obs_spec.shape[0]
elif len(sen_spec.shape) == 1:
vector_size += sen_spec.shape[0]
f"Unsupported shape of {obs_spec.shape} for observation {i}"
f"Unsupported shape of {sen_spec.shape} for observation {i}"
)
if vector_size > 0:
vector_encoders.append(VectorInput(vector_size, normalize))

2
ml-agents/tests/yamato/scripts/run_llapi.py


print("Number of observations : ", len(group_spec.sensor_spec))
# Is there a visual observation ?
vis_obs = any(len(o_spec.shape) == 3 for o_spec in group_spec.sensor_spec)
vis_obs = any(len(sen_spec.shape) == 3 for sen_spec in group_spec.sensor_spec)
print("Is there a visual observation ?", vis_obs)
# Examine the state space for the first observation for the first agent

正在加载...
取消
保存