浏览代码

Rename more files

/MLA-1734-demo-provider
Arthur Juliani 4 年前
当前提交
0b4b0992
共有 30 个文件被更改,包括 142 次插入142 次删除
  1. 4
      ml-agents/mlagents/trainers/demo_loader.py
  2. 2
      ml-agents/mlagents/trainers/optimizer/torch_optimizer.py
  3. 2
      ml-agents/mlagents/trainers/policy/torch_policy.py
  4. 2
      ml-agents/mlagents/trainers/ppo/optimizer_torch.py
  5. 14
      ml-agents/mlagents/trainers/sac/optimizer_torch.py
  6. 12
      ml-agents/mlagents/trainers/tests/dummy_config.py
  7. 32
      ml-agents/mlagents/trainers/tests/mock_brain.py
  8. 12
      ml-agents/mlagents/trainers/tests/simple_test_envs.py
  9. 12
      ml-agents/mlagents/trainers/tests/test_agent_processor.py
  10. 4
      ml-agents/mlagents/trainers/tests/test_demo_loader.py
  11. 6
      ml-agents/mlagents/trainers/tests/test_rl_trainer.py
  12. 4
      ml-agents/mlagents/trainers/tests/test_trajectory.py
  13. 4
      ml-agents/mlagents/trainers/tests/torch/test_ghost.py
  14. 16
      ml-agents/mlagents/trainers/tests/torch/test_networks.py
  15. 4
      ml-agents/mlagents/trainers/tests/torch/test_policy.py
  16. 2
      ml-agents/mlagents/trainers/tests/torch/test_ppo.py
  17. 28
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py
  18. 14
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_extrinsic.py
  19. 18
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py
  20. 20
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_rnd.py
  21. 8
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/utils.py
  22. 6
      ml-agents/mlagents/trainers/tests/torch/test_utils.py
  23. 2
      ml-agents/mlagents/trainers/torch/components/bc/module.py
  24. 2
      ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py
  25. 2
      ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py
  26. 2
      ml-agents/mlagents/trainers/torch/components/reward_providers/rnd_reward_provider.py
  27. 12
      ml-agents/mlagents/trainers/torch/model_serialization.py
  28. 24
      ml-agents/mlagents/trainers/torch/networks.py
  29. 10
      ml-agents/mlagents/trainers/torch/utils.py
  30. 4
      ml-agents/tests/yamato/scripts/run_llapi.py

4
ml-agents/mlagents/trainers/demo_loader.py


)
)
# check observations match
if len(behavior_spec.sensor_specs) != len(expected_behavior_spec.sensor_specs):
if len(behavior_spec.observation_specs) != len(expected_behavior_spec.observation_specs):
zip(behavior_spec.sensor_specs, expected_behavior_spec.sensor_specs)
zip(behavior_spec.observation_specs, expected_behavior_spec.observation_specs)
):
if demo_obs.shape != policy_obs.shape:
raise RuntimeError(

2
ml-agents/mlagents/trainers/optimizer/torch_optimizer.py


def get_trajectory_value_estimates(
self, batch: AgentBuffer, next_obs: List[np.ndarray], done: bool
) -> Tuple[Dict[str, np.ndarray], Dict[str, float]]:
n_obs = len(self.policy.behavior_spec.sensor_specs)
n_obs = len(self.policy.behavior_spec.observation_spec)
current_obs = ObsUtil.from_buffer(batch, n_obs)
# Convert to tensors

2
ml-agents/mlagents/trainers/policy/torch_policy.py


else:
ac_class = SharedActorCritic
self.actor_critic = ac_class(
sensor_specs=self.behavior_spec.sensor_specs,
observation_spec=self.behavior_spec.observation_spec,
network_settings=trainer_settings.network_settings,
action_spec=behavior_spec.action_spec,
stream_names=reward_signal_names,

2
ml-agents/mlagents/trainers/ppo/optimizer_torch.py


)
returns[name] = ModelUtils.list_to_tensor(batch[f"{name}_returns"])
n_obs = len(self.policy.behavior_spec.sensor_specs)
n_obs = len(self.policy.behavior_spec.observation_specs)
current_obs = ObsUtil.from_buffer(batch, n_obs)
# Convert to tensors
current_obs = [ModelUtils.list_to_tensor(obs) for obs in current_obs]

14
ml-agents/mlagents/trainers/sac/optimizer_torch.py


from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.buffer import AgentBuffer
from mlagents_envs.timers import timed
from mlagents_envs.base_env import ActionSpec, SensorSpec
from mlagents_envs.base_env import ActionSpec, ObservationSpec
from mlagents.trainers.exception import UnityTrainerException
from mlagents.trainers.settings import TrainerSettings, SACSettings
from contextlib import ExitStack

def __init__(
self,
stream_names: List[str],
sensor_specs: List[SensorSpec],
observation_specs: List[ObservationSpec],
network_settings: NetworkSettings,
action_spec: ActionSpec,
):

self.q1_network = ValueNetwork(
stream_names,
sensor_specs,
observation_specs,
network_settings,
num_action_ins,
num_value_outs,

sensor_specs,
observation_specs,
network_settings,
num_action_ins,
num_value_outs,

self.value_network = TorchSACOptimizer.PolicyValueNetwork(
self.stream_names,
self.policy.behavior_spec.sensor_specs,
self.policy.behavior_spec.observation_specs,
policy_network_settings,
self._action_spec,
)

self.policy.behavior_spec.sensor_specs,
self.policy.behavior_spec.observation_specs,
policy_network_settings,
)
ModelUtils.soft_update(

for name in self.reward_signals:
rewards[name] = ModelUtils.list_to_tensor(batch[f"{name}_rewards"])
n_obs = len(self.policy.behavior_spec.sensor_specs)
n_obs = len(self.policy.behavior_spec.observation_spec)
current_obs = ObsUtil.from_buffer(batch, n_obs)
# Convert to tensors
current_obs = [ModelUtils.list_to_tensor(obs) for obs in current_obs]

12
ml-agents/mlagents/trainers/tests/dummy_config.py


from typing import List, Tuple
from mlagents_envs.base_env import SensorSpec, DimensionProperty, SensorType
from mlagents_envs.base_env import ObservationSpec, DimensionProperty, SensorType
import pytest
import copy
import os

def create_observation_specs_with_shapes(
shapes: List[Tuple[int, ...]]
) -> List[SensorSpec]:
sen_spec: List[SensorSpec] = []
) -> List[ObservationSpec]:
obs_spec: List[ObservationSpec] = []
spec = SensorSpec(shape, dim_prop, SensorType.OBSERVATION)
sen_spec.append(spec)
return sen_spec
spec = ObservationSpec(shape, dim_prop, SensorType.OBSERVATION)
obs_spec.append(spec)
return obs_spec

32
ml-agents/mlagents/trainers/tests/mock_brain.py


from mlagents_envs.base_env import (
DecisionSteps,
TerminalSteps,
SensorSpec,
ObservationSpec,
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
from mlagents.trainers.tests.dummy_config import create_observation_specs_with_shapes
sensor_specs: List[SensorSpec],
observation_specs: List[ObservationSpec],
action_spec: ActionSpec,
done: bool = False,
) -> Tuple[DecisionSteps, TerminalSteps]:

:int num_agents: Number of "agents" to imitate.
:List sensor_specs: A List of the observation specs in your steps
:List observation_specs: A List of the observation specs in your steps
for sen_spec in sensor_specs:
obs_list.append(np.ones((num_agents,) + sen_spec.shape, dtype=np.float32))
for obs_spec in observation_specs:
obs_list.append(np.ones((num_agents,) + obs_spec.shape, dtype=np.float32))
action_mask = None
if action_spec.is_discrete():
action_mask = [

reward = np.array(num_agents * [1.0], dtype=np.float32)
interrupted = np.array(num_agents * [False], dtype=np.bool)
agent_id = np.arange(num_agents, dtype=np.int32)
behavior_spec = BehaviorSpec(sensor_specs, action_spec)
behavior_spec = BehaviorSpec(observation_specs, action_spec)
if done:
return (
DecisionSteps.empty(behavior_spec),

) -> Tuple[DecisionSteps, TerminalSteps]:
return create_mock_steps(
num_agents=num_agents,
sensor_specs=behavior_spec.sensor_specs,
observation_spec=behavior_spec.observation_spec,
action_spec=behavior_spec.action_spec,
)

sensor_specs: List[SensorSpec],
observation_spec: List[ObservationSpec],
action_spec: ActionSpec,
max_step_complete: bool = False,
memory_size: int = 10,

action_size = action_spec.discrete_size + action_spec.continuous_size
for _i in range(length - 1):
obs = []
for sen_spec in sensor_specs:
obs.append(np.ones(sen_spec.shape, dtype=np.float32))
for obs_spec in observation_spec:
obs.append(np.ones(obs_spec.shape, dtype=np.float32))
reward = 1.0
done = False
action = ActionTuple(

)
steps_list.append(experience)
obs = []
for sen_spec in sensor_specs:
obs.append(np.ones(sen_spec.shape, dtype=np.float32))
for obs_spec in observation_spec:
obs.append(np.ones(obs_spec.shape, dtype=np.float32))
last_experience = AgentExperience(
obs=obs,
reward=reward,

) -> AgentBuffer:
trajectory = make_fake_trajectory(
length,
behavior_spec.sensor_specs,
behavior_spec.observation_spec,
action_spec=behavior_spec.action_spec,
memory_size=memory_size,
)

else:
action_spec = ActionSpec.create_continuous(vector_action_space)
observation_shapes = [(84, 84, 3)] * int(use_visual) + [(vector_obs_space,)]
sen_spec = create_sensor_specs_with_shapes(observation_shapes)
behavior_spec = BehaviorSpec(sen_spec, action_spec)
obs_spec = create_observation_spec_with_shapes(observation_shapes)
behavior_spec = BehaviorSpec(obs_spec, action_spec)
return behavior_spec

12
ml-agents/mlagents/trainers/tests/simple_test_envs.py


from mlagents_envs.base_env import (
ActionSpec,
SensorSpec,
ObservationSpec,
ActionTuple,
BaseEnv,
BehaviorSpec,

from mlagents_envs.communicator_objects.agent_info_action_pair_pb2 import (
AgentInfoActionPairProto,
)
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
from mlagents.trainers.tests.dummy_config import create_observation_specs_with_shapes
OBS_SIZE = 1
VIS_OBS_SIZE = (20, 20, 3)

continuous_action_size + discrete_action_size
) # to set the goals/positions
self.action_spec = action_spec
self.behavior_spec = BehaviorSpec(self._make_sensor_specs(), action_spec)
self.behavior_spec = BehaviorSpec(self._make_observation_specs(), action_spec)
self.action_spec = action_spec
self.names = brain_names
self.positions: Dict[str, List[float]] = {}

self.action[name] = None
self.step_result[name] = None
def _make_sensor_specs(self) -> List[SensorSpec]:
def _make_observation_specs(self) -> List[ObservationSpec]:
sen_spec = create_sensor_specs_with_shapes(obs_shape)
return sen_spec
obs_spec = create_observation_specs_with_shapes(obs_shape)
return obs_spec
def _make_obs(self, value: float) -> List[np.ndarray]:
obs = []

12
ml-agents/mlagents/trainers/tests/test_agent_processor.py


from mlagents.trainers.stats import StatsReporter, StatsSummary
from mlagents.trainers.behavior_id_utils import get_global_agent_id
from mlagents_envs.side_channel.stats_side_channel import StatsAggregationMethod
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
from mlagents.trainers.tests.dummy_config import create_observation_specs_with_shapes
from mlagents_envs.base_env import ActionSpec, ActionTuple

}
mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
num_agents=2,
sensor_specs=create_sensor_specs_with_shapes(
observation_specs=create_observation_specs_with_shapes(
[(8,)] + num_vis_obs * [(84, 84, 3)]
),
action_spec=ActionSpec.create_continuous(2),

# Test empty steps
mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
num_agents=0,
sensor_specs=create_sensor_specs_with_shapes(
observation_specs=create_observation_specs_with_shapes(
[(8,)] + num_vis_obs * [(84, 84, 3)]
),
action_spec=ActionSpec.create_continuous(2),

mock_decision_step, mock_terminal_step = mb.create_mock_steps(
num_agents=1,
sensor_specs=create_sensor_specs_with_shapes([(8,)]),
observation_specs=create_observation_specs_with_shapes([(8,)]),
sensor_specs=create_sensor_specs_with_shapes([(8,)]),
observation_specs=create_observation_specs_with_shapes([(8,)]),
action_spec=ActionSpec.create_continuous(2),
done=True,
)

mock_decision_step, mock_terminal_step = mb.create_mock_steps(
num_agents=1,
sensor_specs=create_sensor_specs_with_shapes([(8,)]),
observation_specs=create_observation_specs_with_shapes([(8,)]),
action_spec=ActionSpec.create_continuous(2),
)
fake_action_info = ActionInfo(

4
ml-agents/mlagents/trainers/tests/test_demo_loader.py


behavior_spec, pair_infos, total_expected = load_demonstration(
path_prefix + "/test.demo"
)
assert np.sum(behavior_spec.sensor_specs[0].shape) == 8
assert np.sum(behavior_spec.observation_specs[0].shape) == 8
assert len(pair_infos) == total_expected
_, demo_buffer = demo_to_buffer(path_prefix + "/test.demo", 1, BEHAVIOR_SPEC)

behavior_spec, pair_infos, total_expected = load_demonstration(
path_prefix + "/test_demo_dir"
)
assert np.sum(behavior_spec.sensor_specs[0].shape) == 8
assert np.sum(behavior_spec.observation_specs[0].shape) == 8
assert len(pair_infos) == total_expected
_, demo_buffer = demo_to_buffer(path_prefix + "/test_demo_dir", 1, BEHAVIOR_SPEC)

6
ml-agents/mlagents/trainers/tests/test_rl_trainer.py


from mlagents.trainers.tests.test_buffer import construct_fake_buffer
from mlagents.trainers.agent_processor import AgentManagerQueue
from mlagents.trainers.settings import TrainerSettings
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
from mlagents.trainers.tests.dummy_config import create_observation_specs_with_shapes
from mlagents_envs.base_env import ActionSpec

time_horizon = 10
trajectory = mb.make_fake_trajectory(
length=time_horizon,
sensor_specs=create_sensor_specs_with_shapes([(1,)]),
observation_specs=create_observation_specs_with_shapes([(1,)]),
max_step_complete=True,
action_spec=ActionSpec.create_discrete((2,)),
)

checkpoint_interval = trainer.trainer_settings.checkpoint_interval
trajectory = mb.make_fake_trajectory(
length=time_horizon,
sensor_specs=create_sensor_specs_with_shapes([(1,)]),
observation_spec=create_observation_spec_with_shapes([(1,)]),
max_step_complete=True,
action_spec=ActionSpec.create_discrete((2,)),
)

4
ml-agents/mlagents/trainers/tests/test_trajectory.py


from mlagents.trainers.tests.mock_brain import make_fake_trajectory
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
from mlagents.trainers.tests.dummy_config import create_observation_specs_with_shapes
from mlagents_envs.base_env import ActionSpec
VEC_OBS_SIZE = 6

wanted_keys = set(wanted_keys)
trajectory = make_fake_trajectory(
length=length,
sensor_specs=create_sensor_specs_with_shapes([(VEC_OBS_SIZE,), (84, 84, 3)]),
observation_specs=create_observation_specs_with_shapes([(VEC_OBS_SIZE,), (84, 84, 3)]),
action_spec=ActionSpec.create_continuous(ACTION_SIZE),
)
agentbuffer = trajectory.to_agentbuffer()

4
ml-agents/mlagents/trainers/tests/torch/test_ghost.py


from mlagents.trainers.tests import mock_brain as mb
from mlagents.trainers.tests.test_trajectory import make_fake_trajectory
from mlagents.trainers.settings import TrainerSettings, SelfPlaySettings
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
from mlagents.trainers.tests.dummy_config import create_observation_specs_with_shapes
@pytest.fixture

trajectory = make_fake_trajectory(
length=time_horizon,
max_step_complete=True,
sensor_specs=create_sensor_specs_with_shapes([(1,)]),
observation_specs=create_observation_specs_with_shapes([(1,)]),
action_spec=mock_specs.action_spec,
)
trajectory_queue0.put(trajectory)

16
ml-agents/mlagents/trainers/tests/torch/test_networks.py


)
from mlagents.trainers.settings import NetworkSettings
from mlagents_envs.base_env import ActionSpec
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
from mlagents.trainers.tests.dummy_config import create_observation_specs_with_shapes
def test_networkbody_vector():

obs_shapes = [(obs_size,)]
networkbody = NetworkBody(
create_sensor_specs_with_shapes(obs_shapes),
create_observation_specs_with_shapes(obs_shapes),
network_settings,
encoded_act_size=2,
)

obs_shapes = [(obs_size,)]
networkbody = NetworkBody(
create_sensor_specs_with_shapes(obs_shapes), network_settings
create_observation_specs_with_shapes(obs_shapes), network_settings
)
optimizer = torch.optim.Adam(networkbody.parameters(), lr=3e-4)
sample_obs = torch.ones((1, seq_len, obs_size))

obs_shapes = [(vec_obs_size,), obs_size]
networkbody = NetworkBody(
create_sensor_specs_with_shapes(obs_shapes), network_settings
create_observation_specs_with_shapes(obs_shapes), network_settings
)
optimizer = torch.optim.Adam(networkbody.parameters(), lr=3e-3)
sample_obs = 0.1 * torch.ones((1, 84, 84, 3))

obs_size = 4
num_outputs = 2
network_settings = NetworkSettings()
sen_spec = create_sensor_specs_with_shapes([(obs_size,)])
obs_spec = create_observation_specs_with_shapes([(obs_size,)])
stream_names, sen_spec, network_settings, outputs_per_stream=num_outputs
stream_names, obs_spec, network_settings, outputs_per_stream=num_outputs
)
optimizer = torch.optim.Adam(value_net.parameters(), lr=3e-3)

network_settings = NetworkSettings(
memory=NetworkSettings.MemorySettings() if lstm else None, normalize=True
)
sen_spec = create_sensor_specs_with_shapes([(obs_size,)])
obs_spec = create_observation_specs_with_shapes([(obs_size,)])
actor = ac_type(sen_spec, network_settings, action_spec, stream_names)
actor = ac_type(obs_spec, network_settings, action_spec, stream_names)
if lstm:
sample_obs = torch.ones((1, network_settings.memory.sequence_length, obs_size))
memories = torch.ones(

4
ml-agents/mlagents/trainers/tests/torch/test_policy.py


buffer = mb.simulate_rollout(64, policy.behavior_spec, memory_size=policy.m_size)
act_masks = ModelUtils.list_to_tensor(buffer["action_mask"])
agent_action = AgentAction.from_dict(buffer)
np_obs = ObsUtil.from_buffer(buffer, len(policy.behavior_spec.sensor_specs))
np_obs = ObsUtil.from_buffer(buffer, len(policy.behavior_spec.observation_specs))
tensor_obs = [ModelUtils.list_to_tensor(obs) for obs in np_obs]
memories = [

buffer = mb.simulate_rollout(64, policy.behavior_spec, memory_size=policy.m_size)
act_masks = ModelUtils.list_to_tensor(buffer["action_mask"])
np_obs = ObsUtil.from_buffer(buffer, len(policy.behavior_spec.sensor_specs))
np_obs = ObsUtil.from_buffer(buffer, len(policy.behavior_spec.observation_specs))
tensor_obs = [ModelUtils.list_to_tensor(obs) for obs in np_obs]
memories = [

2
ml-agents/mlagents/trainers/tests/torch/test_ppo.py


time_horizon = 15
trajectory = make_fake_trajectory(
length=time_horizon,
sensor_specs=optimizer.policy.behavior_spec.sensor_specs,
observation_spec=optimizer.policy.behavior_spec.observation_spec,
action_spec=DISCRETE_ACTION_SPEC if discrete else CONTINUOUS_ACTION_SPEC,
max_step_complete=True,
)

28
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py


create_agent_buffer,
)
from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
from mlagents.trainers.tests.dummy_config import create_observation_specs_with_shapes
SEED = [42]

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
],
)
def test_construction(behavior_spec: BehaviorSpec) -> None:

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
create_sensor_specs_with_shapes([(10,), (64, 66, 3), (84, 86, 1)]),
create_observation_specs_with_shapes([(10,), (64, 66, 3), (84, 86, 1)]),
create_sensor_specs_with_shapes([(10,), (64, 66, 1)]),
create_observation_specs_with_shapes([(10,), (64, 66, 1)]),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
BehaviorSpec(create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
],
)
def test_factory(behavior_spec: BehaviorSpec) -> None:

"behavior_spec",
[
BehaviorSpec(
create_sensor_specs_with_shapes([(10,), (64, 66, 3), (24, 26, 1)]),
create_observation_specs_with_shapes([(10,), (64, 66, 3), (24, 26, 1)]),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
BehaviorSpec(create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
],
)
def test_reward_decreases(behavior_spec: BehaviorSpec, seed: int) -> None:

@pytest.mark.parametrize("seed", SEED)
@pytest.mark.parametrize(
"behavior_spec",
[BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS)],
[BehaviorSpec(create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS)],
)
def test_continuous_action_prediction(behavior_spec: BehaviorSpec, seed: int) -> None:
np.random.seed(seed)

"behavior_spec",
[
BehaviorSpec(
create_sensor_specs_with_shapes([(10,), (64, 66, 3), (24, 26, 1)]),
create_observation_specs_with_shapes([(10,), (64, 66, 3), (24, 26, 1)]),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
BehaviorSpec(create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_observation_spec_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
],
)
def test_next_state_prediction(behavior_spec: BehaviorSpec, seed: int) -> None:

14
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_extrinsic.py


from mlagents.trainers.tests.torch.test_reward_providers.utils import (
create_agent_buffer,
)
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
from mlagents.trainers.tests.dummy_config import create_observation_specs_with_shapes
ACTIONSPEC_CONTINUOUS = ActionSpec.create_continuous(5)

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
],
)
def test_construction(behavior_spec: BehaviorSpec) -> None:

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
],
)
def test_factory(behavior_spec: BehaviorSpec) -> None:

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
],
)
def test_reward(behavior_spec: BehaviorSpec, reward: float) -> None:

18
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py


from mlagents.trainers.torch.components.reward_providers.gail_reward_provider import (
DiscriminatorNetwork,
)
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
from mlagents.trainers.tests.dummy_config import create_observation_specs_with_shapes
CONTINUOUS_PATH = (

@pytest.mark.parametrize(
"behavior_spec",
[BehaviorSpec(create_sensor_specs_with_shapes([(8,)]), ACTIONSPEC_CONTINUOUS)],
[BehaviorSpec(create_observation_specs_with_shapes([(8,)]), ACTIONSPEC_CONTINUOUS)],
)
def test_construction(behavior_spec: BehaviorSpec) -> None:
gail_settings = GAILSettings(demo_path=CONTINUOUS_PATH)

@pytest.mark.parametrize(
"behavior_spec",
[BehaviorSpec(create_sensor_specs_with_shapes([(8,)]), ACTIONSPEC_CONTINUOUS)],
[BehaviorSpec(create_observation_specs_with_shapes([(8,)]), ACTIONSPEC_CONTINUOUS)],
)
def test_factory(behavior_spec: BehaviorSpec) -> None:
gail_settings = GAILSettings(demo_path=CONTINUOUS_PATH)

"behavior_spec",
[
BehaviorSpec(
create_sensor_specs_with_shapes([(8,), (24, 26, 1)]), ACTIONSPEC_CONTINUOUS
create_observation_specs_with_shapes([(8,), (24, 26, 1)]), ACTIONSPEC_CONTINUOUS
BehaviorSpec(create_sensor_specs_with_shapes([(50,)]), ACTIONSPEC_FOURDISCRETE),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
BehaviorSpec(create_observation_specs_with_shapes([(50,)]), ACTIONSPEC_FOURDISCRETE),
BehaviorSpec(create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
],
)
@pytest.mark.parametrize("use_actions", [False, True])

"behavior_spec",
[
BehaviorSpec(
create_sensor_specs_with_shapes([(8,), (24, 26, 1)]), ACTIONSPEC_CONTINUOUS
create_observation_spec_with_shapes([(8,), (24, 26, 1)]), ACTIONSPEC_CONTINUOUS
BehaviorSpec(create_sensor_specs_with_shapes([(50,)]), ACTIONSPEC_FOURDISCRETE),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
BehaviorSpec(create_observation_spec_with_shapes([(50,)]), ACTIONSPEC_FOURDISCRETE),
BehaviorSpec(create_observation_spec_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
],
)
@pytest.mark.parametrize("use_actions", [False, True])

20
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_rnd.py


from mlagents.trainers.tests.torch.test_reward_providers.utils import (
create_agent_buffer,
)
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
from mlagents.trainers.tests.dummy_config import create_observation_specs_with_shapes
SEED = [42]

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
],
)
def test_construction(behavior_spec: BehaviorSpec) -> None:

@pytest.mark.parametrize(
"behavior_spec",
[
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
BehaviorSpec(create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
create_sensor_specs_with_shapes([(10,), (64, 66, 3), (84, 86, 1)]),
create_observation_specs_with_shapes([(10,), (64, 66, 3), (84, 86, 1)]),
create_sensor_specs_with_shapes([(10,), (64, 66, 1)]),
create_observation_specs_with_shapes([(10,), (64, 66, 1)]),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
BehaviorSpec(create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
],
)
def test_factory(behavior_spec: BehaviorSpec) -> None:

"behavior_spec",
[
BehaviorSpec(
create_sensor_specs_with_shapes([(10,), (64, 66, 3), (24, 26, 1)]),
create_observation_specs_with_shapes([(10,), (64, 66, 3), (24, 26, 1)]),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
BehaviorSpec(create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
BehaviorSpec(create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
],
)
def test_reward_decreases(behavior_spec: BehaviorSpec, seed: int) -> None:

8
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/utils.py


) -> AgentBuffer:
buffer = AgentBuffer()
curr_obs = [
np.random.normal(size=sen_spec.shape).astype(np.float32)
for sen_spec in behavior_spec.sensor_specs
np.random.normal(size=obs_spec.shape).astype(np.float32)
for obs_spec in behavior_spec.observation_specs
np.random.normal(size=sen_spec.shape).astype(np.float32)
for sen_spec in behavior_spec.sensor_specs
np.random.normal(size=obs_spec.shape).astype(np.float32)
for obs_spec in behavior_spec.observation_specs
]
action_buffer = behavior_spec.action_spec.random_action(1)
action = {}

6
ml-agents/mlagents/trainers/tests/torch/test_utils.py


from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.exception import UnityTrainerException
from mlagents.trainers.torch.encoders import VectorInput
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
from mlagents.trainers.tests.dummy_config import create_observation_specs_with_shapes
def test_min_visual_size():

for _ in range(num_visual):
obs_shapes.append(vis_obs_shape)
h_size = 128
sen_spec = create_sensor_specs_with_shapes(obs_shapes)
obs_spec = create_observation_specs_with_shapes(obs_shapes)
sen_spec, h_size, encoder_type, normalize
obs_spec, h_size, encoder_type, normalize
)
total_output = sum(embedding_sizes)
vec_enc = []

2
ml-agents/mlagents/trainers/torch/components/bc/module.py


Helper function for update_batch.
"""
np_obs = ObsUtil.from_buffer(
mini_batch_demo, len(self.policy.behavior_spec.sensor_specs)
mini_batch_demo, len(self.policy.behavior_spec.observation_spec)
)
# Convert to tensors
tensor_obs = [ModelUtils.list_to_tensor(obs) for obs in np_obs]

2
ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py


vis_encode_type=EncoderType.SIMPLE,
memory=None,
)
self._state_encoder = NetworkBody(specs.sensor_specs, state_encoder_settings)
self._state_encoder = NetworkBody(specs.observation_spec, state_encoder_settings)
self._action_flattener = ActionFlattener(self._action_spec)

2
ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py


unencoded_size = (
self._action_flattener.flattened_size + 1 if settings.use_actions else 0
) # +1 is for dones
self.encoder = NetworkBody(specs.sensor_specs, encoder_settings, unencoded_size)
self.encoder = NetworkBody(specs.observation_specs, encoder_settings, unencoded_size)
estimator_input_size = settings.encoding_size
if settings.use_vail:

2
ml-agents/mlagents/trainers/torch/components/reward_providers/rnd_reward_provider.py


vis_encode_type=EncoderType.SIMPLE,
memory=None,
)
self._encoder = NetworkBody(specs.sensor_specs, state_encoder_settings)
self._encoder = NetworkBody(specs.observation_spec, state_encoder_settings)
def forward(self, mini_batch: AgentBuffer) -> torch.Tensor:
n_obs = len(self._encoder.processors)

12
ml-agents/mlagents/trainers/torch/model_serialization.py


batch_dim = [1]
seq_len_dim = [1]
vec_obs_size = 0
for sens_spec in self.policy.behavior_spec.sensor_specs:
for sens_spec in self.policy.behavior_spec.observation_specs:
for sens_spec in self.policy.behavior_spec.sensor_specs
for sens_spec in self.policy.behavior_spec.observation_specs
# (It's NHWC in self.policy.behavior_spec.sensor_specs.shape)
# (It's NHWC in self.policy.behavior_spec.observation_spec.shape)
batch_dim + [sen_spec.shape[2], sen_spec.shape[0], sen_spec.shape[1]]
batch_dim + [obs_spec.shape[2], obs_spec.shape[0], obs_spec.shape[1]]
for sen_spec in self.policy.behavior_spec.sensor_specs
if len(sen_spec.shape) == 3
for obs_spec in self.policy.behavior_spec.observation_spec
if len(obs_spec.shape) == 3
]
dummy_masks = torch.ones(
batch_dim + [sum(self.policy.behavior_spec.action_spec.discrete_branches)]

24
ml-agents/mlagents/trainers/torch/networks.py


from mlagents.torch_utils import torch, nn
from mlagents_envs.base_env import ActionSpec, SensorSpec
from mlagents_envs.base_env import ActionSpec, ObservationSpec
from mlagents.trainers.torch.action_model import ActionModel
from mlagents.trainers.torch.agent_action import AgentAction
from mlagents.trainers.torch.action_log_probs import ActionLogProbs

class NetworkBody(nn.Module):
def __init__(
self,
sensor_specs: List[SensorSpec],
observation_specs: List[ObservationSpec],
network_settings: NetworkSettings,
encoded_act_size: int = 0,
):

)
self.processors, self.embedding_sizes = ModelUtils.create_input_processors(
sensor_specs,
observation_specs,
self.h_size,
network_settings.vis_encode_type,
normalize=self.normalize,

def __init__(
self,
stream_names: List[str],
sensor_specs: List[SensorSpec],
observation_specs: List[ObservationSpec],
network_settings: NetworkSettings,
encoded_act_size: int = 0,
outputs_per_stream: int = 1,

nn.Module.__init__(self)
self.network_body = NetworkBody(
sensor_specs, network_settings, encoded_act_size=encoded_act_size
observation_specs, network_settings, encoded_act_size=encoded_act_size
)
if network_settings.memory is not None:
encoding_size = network_settings.memory.memory_size // 2

class SimpleActor(nn.Module, Actor):
def __init__(
self,
sensor_specs: List[SensorSpec],
observation_specs: List[ObservationSpec],
network_settings: NetworkSettings,
action_spec: ActionSpec,
conditional_sigma: bool = False,

),
requires_grad=False,
)
self.network_body = NetworkBody(sensor_specs, network_settings)
self.network_body = NetworkBody(observation_specs, network_settings)
if network_settings.memory is not None:
self.encoding_size = network_settings.memory.memory_size // 2
else:

class SharedActorCritic(SimpleActor, ActorCritic):
def __init__(
self,
sensor_specs: List[SensorSpec],
observation_spec: List[ObservationSpec],
network_settings: NetworkSettings,
action_spec: ActionSpec,
stream_names: List[str],

self.use_lstm = network_settings.memory is not None
super().__init__(
sensor_specs, network_settings, action_spec, conditional_sigma, tanh_squash
observation_spec, network_settings, action_spec, conditional_sigma, tanh_squash
)
self.stream_names = stream_names
self.value_heads = ValueHeads(stream_names, self.encoding_size)

class SeparateActorCritic(SimpleActor, ActorCritic):
def __init__(
self,
sensor_specs: List[SensorSpec],
observation_spec: List[ObservationSpec],
network_settings: NetworkSettings,
action_spec: ActionSpec,
stream_names: List[str],

self.use_lstm = network_settings.memory is not None
super().__init__(
sensor_specs, network_settings, action_spec, conditional_sigma, tanh_squash
observation_spec, network_settings, action_spec, conditional_sigma, tanh_squash
self.critic = ValueNetwork(stream_names, sensor_specs, network_settings)
self.critic = ValueNetwork(stream_names, observation_spec, network_settings)
@property
def memory_size(self) -> int:

10
ml-agents/mlagents/trainers/torch/utils.py


)
from mlagents.trainers.settings import EncoderType, ScheduleType
from mlagents.trainers.exception import UnityTrainerException
from mlagents_envs.base_env import SensorSpec
from mlagents_envs.base_env import ObservationSpec
class ModelUtils:

@staticmethod
def create_input_processors(
sensor_specs: List[SensorSpec],
observation_specs: List[ObservationSpec],
h_size: int,
vis_encode_type: EncoderType,
normalize: bool = False,

:param sensor_specs: List of SensorSpec that represent the observation dimensions.
:param observation_specs: List of ObservationSpec that represent the observation dimensions.
:param action_size: Number of additional un-normalized inputs to each vector encoder. Used for
conditioning network on other values (e.g. actions for a Q function)
:param h_size: Number of hidden units per layer.

"""
encoders: List[nn.Module] = []
embedding_sizes: List[int] = []
for sen_spec in sensor_specs:
for obs_spec in observation_specs:
sen_spec.shape, normalize, h_size, vis_encode_type
obs_spec.shape, normalize, h_size, vis_encode_type
)
encoders.append(encoder)
embedding_sizes.append(embedding_size)

4
ml-agents/tests/yamato/scripts/run_llapi.py


decision_steps, terminal_steps = env.get_steps(group_name)
# Examine the number of observations per Agent
print("Number of observations : ", len(group_spec.sensor_specs))
print("Number of observations : ", len(group_spec.observation_specs))
vis_obs = any(len(sen_spec.shape) == 3 for sen_spec in group_spec.sensor_specs)
vis_obs = any(len(obs_spec.shape) == 3 for obs_spec in group_spec.observation_specs)
print("Is there a visual observation ?", vis_obs)
# Examine the state space for the first observation for the first agent

正在加载...
取消
保存