浏览代码

make_fake_trajectory/step take ActionSpec arg

/develop/action-spec-gym
Andrew Cohen 4 年前
当前提交
590adc01
共有 12 个文件被更改,包括 52 次插入66 次删除
  1. 4
      ml-agents/mlagents/trainers/demo_loader.py
  2. 50
      ml-agents/mlagents/trainers/tests/mock_brain.py
  3. 2
      ml-agents/mlagents/trainers/tests/tensorflow/test_ghost.py
  4. 8
      ml-agents/mlagents/trainers/tests/tensorflow/test_nn_policy.py
  5. 12
      ml-agents/mlagents/trainers/tests/tensorflow/test_ppo.py
  6. 9
      ml-agents/mlagents/trainers/tests/tensorflow/test_sac.py
  7. 4
      ml-agents/mlagents/trainers/tests/tensorflow/test_saver.py
  8. 5
      ml-agents/mlagents/trainers/tests/test_rl_trainer.py
  9. 4
      ml-agents/mlagents/trainers/tests/test_trajectory.py
  10. 2
      ml-agents/mlagents/trainers/tests/torch/test_ghost.py
  11. 10
      ml-agents/mlagents/trainers/tests/torch/test_policy.py
  12. 8
      ml-agents/mlagents/trainers/tests/torch/test_ppo.py

4
ml-agents/mlagents/trainers/demo_loader.py


!= expected_behavior_spec.action_spec.discrete_branches
):
raise RuntimeError(
"The continuous action dimensions {} in demonstration do not match the policy's {}.".format(
"The discrete action dimensions {} in demonstration do not match the policy's {}.".format(
# check observations match
# check observations match
if len(behavior_spec.observation_shapes) != len(
expected_behavior_spec.observation_shapes
):

50
ml-agents/mlagents/trainers/tests/mock_brain.py


from typing import List, Tuple, Union
from collections.abc import Iterable
from typing import List, Tuple
import numpy as np
from mlagents.trainers.buffer import AgentBuffer

def create_mock_steps(
num_agents: int,
observation_shapes: List[Tuple],
action_shape: Union[int, Tuple[int]] = None,
discrete: bool = False,
action_spec: ActionSpec,
done: bool = False,
) -> Tuple[DecisionSteps, TerminalSteps]:
"""

:bool discrete: Whether or not action space is discrete
:bool done: Whether all the agents in the batch are done
"""
if action_shape is None:
action_shape = 2
if discrete and isinstance(action_shape, Iterable):
if action_spec.is_discrete():
for action_size in action_shape # type: ignore
for action_size in action_spec.discrete_branches # type: ignore
if discrete:
action_spec = ActionSpec(0, action_shape)
else:
action_spec = ActionSpec(action_shape, ())
behavior_spec = BehaviorSpec(observation_shapes, action_spec)
if done:
return (

def create_steps_from_behavior_spec(
behavior_spec: BehaviorSpec, num_agents: int = 1
) -> Tuple[DecisionSteps, TerminalSteps]:
action_spec = behavior_spec.action_spec
is_discrete = action_spec.is_discrete()
action_shape=action_spec.discrete_branches
if is_discrete
else action_spec.continuous_size,
discrete=is_discrete,
action_spec=behavior_spec.action_spec,
)

action_spec: ActionSpec,
action_space: Union[int, Tuple[int]] = 2,
is_discrete: bool = True,
) -> Trajectory:
"""
Makes a fake trajectory of length length. If max_step_complete,

action_size = action_spec.size
action_probs = np.ones(np.sum(action_spec.total_size), dtype=np.float32)
for _i in range(length - 1):
obs = []
for _shape in observation_shapes:

if is_discrete:
action_size = len(action_space) # type: ignore
action_probs = np.ones(np.sum(action_space), dtype=np.float32)
else:
action_size = int(action_space) # type: ignore
action_probs = np.ones((action_size), dtype=np.float32)
[[False for _ in range(branch)] for branch in action_space] # type: ignore
if is_discrete
[
[False for _ in range(branch)]
for branch in action_spec.discrete_branches
] # type: ignore
if action_spec.is_discrete()
else None
)
prev_action = np.ones(action_size, dtype=np.float32)

memory_size: int = 10,
exclude_key_list: List[str] = None,
) -> AgentBuffer:
is_discrete = behavior_spec.action_spec.is_discrete()
if is_discrete:
action_space = behavior_spec.action_spec.discrete_branches
else:
action_space = behavior_spec.action_spec.continuous_size
action_space=action_space,
action_spec=behavior_spec.action_spec,
is_discrete=is_discrete,
)
buffer = trajectory.to_agentbuffer()
# If a key_list was given, remove those keys

2
ml-agents/mlagents/trainers/tests/tensorflow/test_ghost.py


length=time_horizon,
max_step_complete=True,
observation_shapes=[(1,)],
action_space=[2],
action_spec=mock_specs.action_spec,
)
trajectory_queue0.put(trajectory)
trainer.advance()

8
ml-agents/mlagents/trainers/tests/tensorflow/test_nn_policy.py


length=time_horizon,
max_step_complete=True,
observation_shapes=[(1,)],
action_space=[2],
action_spec=behavior_spec.action_spec,
)
for i in range(time_horizon):
trajectory.steps[i].obs[0] = np.array([large_obs1[i]], dtype=np.float32)

length=time_horizon,
max_step_complete=True,
observation_shapes=[(1,)],
action_space=[2],
action_spec=behavior_spec.action_spec,
)
for i in range(time_horizon):
trajectory.steps[i].obs[0] = np.array([large_obs2[i]], dtype=np.float32)

length=time_horizon,
max_step_complete=True,
observation_shapes=[(1,)],
action_space=[2],
action_spec=behavior_spec.action_spec,
)
# Change half of the obs to 0
for i in range(3):

length=time_horizon,
max_step_complete=True,
observation_shapes=[(1,)],
action_space=[2],
action_spec=behavior_spec.action_spec,
)
trajectory_buffer = trajectory.to_agentbuffer()
policy.update_normalization(trajectory_buffer["vector_obs"])

12
ml-agents/mlagents/trainers/tests/tensorflow/test_ppo.py


ppo_dummy_config,
)
from mlagents_envs.base_env import ActionSpec
@pytest.fixture
def dummy_config():

DISCRETE_ACTION_SPACE = [3, 3, 3, 2]
BUFFER_INIT_SAMPLES = 64
NUM_AGENTS = 12
CONTINUOUS_ACTION_SPEC = ActionSpec(VECTOR_ACTION_SPACE, ())
DISCRETE_ACTION_SPEC = ActionSpec(0, tuple(DISCRETE_ACTION_SPACE))
def _create_ppo_optimizer_ops_mock(dummy_config, use_rnn, use_discrete, use_visual):

length=time_horizon,
observation_shapes=optimizer.policy.behavior_spec.observation_shapes,
max_step_complete=True,
action_space=DISCRETE_ACTION_SPACE if discrete else VECTOR_ACTION_SPACE,
is_discrete=discrete,
action_spec=DISCRETE_ACTION_SPEC if discrete else CONTINUOUS_ACTION_SPEC,
)
run_out, final_value_out = optimizer.get_trajectory_value_estimates(
trajectory.to_agentbuffer(), trajectory.next_obs, done=False

length=time_horizon,
observation_shapes=behavior_spec.observation_shapes,
max_step_complete=True,
action_space=[2],
action_spec=behavior_spec.action_spec,
)
trajectory_queue.put(trajectory)
trainer.advance()

length=time_horizon + 1,
max_step_complete=False,
observation_shapes=behavior_spec.observation_shapes,
action_space=[2],
action_spec=behavior_spec.action_spec,
)
trajectory_queue.put(trajectory)
trainer.advance()

9
ml-agents/mlagents/trainers/tests/tensorflow/test_sac.py


length=15,
observation_shapes=specs.observation_shapes,
max_step_complete=True,
action_space=2,
is_discrete=False,
action_spec=specs.action_spec,
)
trajectory_queue.put(trajectory)
trainer.advance()

length=6,
observation_shapes=specs.observation_shapes,
max_step_complete=False,
action_space=2,
is_discrete=False,
action_spec=specs.action_spec,
)
trajectory_queue.put(trajectory)
trainer.advance()

trajectory = make_fake_trajectory(
length=5,
observation_shapes=specs.observation_shapes,
action_spec=specs.action_spec,
action_space=2,
is_discrete=False,
)
trajectory_queue.put(trajectory)
trainer.advance()

4
ml-agents/mlagents/trainers/tests/tensorflow/test_saver.py


length=time_horizon,
max_step_complete=True,
observation_shapes=[(1,)],
action_space=[2],
action_spec=behavior_spec.action_spec,
)
# Change half of the obs to 0
for i in range(3):

length=time_horizon,
max_step_complete=True,
observation_shapes=[(1,)],
action_space=[2],
action_spec=behavior_spec.action_spec,
)
trajectory_buffer = trajectory.to_agentbuffer()
policy1.update_normalization(trajectory_buffer["vector_obs"])

5
ml-agents/mlagents/trainers/tests/test_rl_trainer.py


from mlagents.trainers.tests.test_buffer import construct_fake_buffer
from mlagents.trainers.agent_processor import AgentManagerQueue
from mlagents.trainers.settings import TrainerSettings, FrameworkType
from mlagents_envs.base_env import ActionSpec
# Add concrete implementations of abstract methods

length=time_horizon,
observation_shapes=[(1,)],
max_step_complete=True,
action_space=[2],
action_spec=ActionSpec(0, (2,)),
)
trajectory_queue.put(trajectory)

length=time_horizon,
observation_shapes=[(1,)],
max_step_complete=True,
action_space=[2],
action_spec=ActionSpec(0, (2,)),
)
# Check that we can turn off the trainer and that the buffer is cleared
num_trajectories = 5

4
ml-agents/mlagents/trainers/tests/test_trajectory.py


from mlagents.trainers.trajectory import SplitObservations
from mlagents.trainers.tests.mock_brain import make_fake_trajectory
from mlagents_envs.base_env import ActionSpec
VEC_OBS_SIZE = 6
ACTION_SIZE = 4

trajectory = make_fake_trajectory(
length=length,
observation_shapes=[(VEC_OBS_SIZE,), (84, 84, 3)],
action_space=[ACTION_SIZE],
action_spec=ActionSpec(ACTION_SIZE, ()),
)
agentbuffer = trajectory.to_agentbuffer()
seen_keys = set()

2
ml-agents/mlagents/trainers/tests/torch/test_ghost.py


length=time_horizon,
max_step_complete=True,
observation_shapes=[(1,)],
action_space=[2],
action_spec=mock_specs.action_spec,
)
trajectory_queue0.put(trajectory)
trainer.advance()

10
ml-agents/mlagents/trainers/tests/torch/test_policy.py


memories=memories,
seq_len=policy.sequence_length,
)
assert log_probs.shape == (64, policy.action_spec.action_size)
assert entropy.shape == (64, policy.action_spec.action_size)
assert log_probs.shape == (64, policy.action_spec.size)
assert entropy.shape == (64, policy.action_spec.size)
for val in values.values():
assert val.shape == (64,)

all_log_probs=not policy.use_continuous_act,
)
if discrete:
assert log_probs.shape == (64, sum(policy.action_spec.discrete_action_branches))
assert log_probs.shape == (64, sum(policy.action_spec.discrete_branches))
assert log_probs.shape == (64, policy.action_spec.continuous_action_size)
assert entropies.shape == (64, policy.action_spec.action_size)
assert log_probs.shape == (64, policy.action_spec.continuous_size)
assert entropies.shape == (64, policy.action_spec.size)
if rnn:
assert memories.shape == (1, 1, policy.m_size)

8
ml-agents/mlagents/trainers/tests/torch/test_ppo.py


gail_dummy_config,
)
from mlagents_envs.base_env import ActionSpec
@pytest.fixture
def dummy_config():

DISCRETE_ACTION_SPACE = [3, 3, 3, 2]
BUFFER_INIT_SAMPLES = 64
NUM_AGENTS = 12
CONTINUOUS_ACTION_SPEC = ActionSpec(VECTOR_ACTION_SPACE, ())
DISCRETE_ACTION_SPEC = ActionSpec(0, tuple(DISCRETE_ACTION_SPACE))
def create_test_ppo_optimizer(dummy_config, use_rnn, use_discrete, use_visual):

trajectory = make_fake_trajectory(
length=time_horizon,
observation_shapes=optimizer.policy.behavior_spec.observation_shapes,
action_spec=DISCRETE_ACTION_SPEC if discrete else CONTINUOUS_ACTION_SPEC,
action_space=DISCRETE_ACTION_SPACE if discrete else VECTOR_ACTION_SPACE,
is_discrete=discrete,
)
run_out, final_value_out = optimizer.get_trajectory_value_estimates(
trajectory.to_agentbuffer(), trajectory.next_obs, done=False

正在加载...
取消
保存