浏览代码

fix torch test policy

/develop/action-spec-gym
Andrew Cohen 4 年前
当前提交
a545859e
共有 3 个文件被更改,包括 23 次插入17 次删除
  1. 9
      ml-agents/mlagents/trainers/tests/torch/saver/test_saver.py
  2. 29
      ml-agents/mlagents/trainers/tests/torch/test_policy.py
  3. 2
      ml-agents/mlagents/trainers/torch/components/bc/module.py

9
ml-agents/mlagents/trainers/tests/torch/saver/test_saver.py


with torch.no_grad():
_, log_probs1, _, _ = policy1.sample_actions(
vec_obs, vis_obs, masks=masks, memories=memories, all_log_probs=True
vec_obs, vis_obs, masks=masks, memories=memories
vec_obs, vis_obs, masks=masks, memories=memories, all_log_probs=True
vec_obs, vis_obs, masks=masks, memories=memories
np.testing.assert_array_equal(log_probs1, log_probs2)
np.testing.assert_array_equal(
log_probs1.all_discrete_tensor, log_probs2.all_discrete_tensor
)
@pytest.mark.parametrize("discrete", [True, False], ids=["discrete", "continuous"])

29
ml-agents/mlagents/trainers/tests/torch/test_policy.py


from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.tests import mock_brain as mb
from mlagents.trainers.settings import TrainerSettings, NetworkSettings
from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.torch.utils import ModelUtils, AgentAction
VECTOR_ACTION_SPACE = 2
VECTOR_OBS_SPACE = 8

run_out = policy.evaluate(decision_step, list(decision_step.agent_id))
if discrete:
run_out["action"].shape == (NUM_AGENTS, len(DISCRETE_ACTION_SPACE))
run_out["action"]["discrete_action"].shape == (
NUM_AGENTS,
len(DISCRETE_ACTION_SPACE),
)
assert run_out["action"].shape == (NUM_AGENTS, VECTOR_ACTION_SPACE)
assert run_out["action"]["continuous_action"].shape == (
NUM_AGENTS,
VECTOR_ACTION_SPACE,
)
@pytest.mark.parametrize("discrete", [True, False], ids=["discrete", "continuous"])

buffer = mb.simulate_rollout(64, policy.behavior_spec, memory_size=policy.m_size)
vec_obs = [ModelUtils.list_to_tensor(buffer["vector_obs"])]
act_masks = ModelUtils.list_to_tensor(buffer["action_mask"])
if policy.use_continuous_act:
actions = ModelUtils.list_to_tensor(buffer["actions"]).unsqueeze(-1)
else:
actions = ModelUtils.list_to_tensor(buffer["actions"], dtype=torch.long)
agent_action = AgentAction.from_dict(buffer)
vis_obs = []
for idx, _ in enumerate(policy.actor_critic.network_body.visual_processors):
vis_ob = ModelUtils.list_to_tensor(buffer["visual_obs%d" % idx])

vec_obs,
vis_obs,
masks=act_masks,
actions=actions,
actions=agent_action,
memories=memories,
seq_len=policy.sequence_length,
)

_size = policy.behavior_spec.action_spec.continuous_size
assert log_probs.shape == (64, _size)
assert log_probs.flatten().shape == (64, _size)
assert entropy.shape == (64,)
for val in values.values():
assert val.shape == (64,)

masks=act_masks,
memories=memories,
seq_len=policy.sequence_length,
all_log_probs=not policy.use_continuous_act,
assert log_probs.shape == (
assert log_probs.all_discrete_tensor.shape == (
assert log_probs.shape == (64, policy.behavior_spec.action_spec.continuous_size)
assert log_probs.continuous_tensor.shape == (
64,
policy.behavior_spec.action_spec.continuous_size,
)
assert entropies.shape == (64,)
if rnn:

2
ml-agents/mlagents/trainers/torch/components/bc/module.py


selected_actions: AgentAction,
log_probs: ActionLogProbs,
expert_actions: torch.Tensor,
):
) -> torch.Tensor:
if self.policy.use_continuous_act:
bc_loss = torch.nn.functional.mse_loss(
selected_actions.continuous_tensor, expert_actions

正在加载...
取消
保存