浏览代码

Fix staging tests (#4708)

/fix-conflict-base-env
GitHub 4 年前
当前提交
278911a5
共有 7 个文件被更改,包括 21 次插入13 次删除
  1. 2
      ml-agents/mlagents/trainers/sac/optimizer_torch.py
  2. 10
      ml-agents/mlagents/trainers/tests/tensorflow/test_tf_policy.py
  3. 8
      ml-agents/mlagents/trainers/tests/test_agent_processor.py
  4. 1
      ml-agents/mlagents/trainers/tests/test_trajectory.py
  5. 2
      ml-agents/mlagents/trainers/tests/torch/test_action_model.py
  6. 2
      ml-agents/mlagents/trainers/tests/torch/test_hybrid.py
  7. 9
      ml-agents/mlagents/trainers/torch/action_model.py

2
ml-agents/mlagents/trainers/sac/optimizer_torch.py


self.target_network.network_body.copy_normalization(
self.policy.actor_critic.network_body
)
(sampled_actions, _, log_probs, _, _) = self.policy.sample_actions(
(sampled_actions, log_probs, _, _) = self.policy.sample_actions(
vec_obs,
vis_obs,
masks=act_masks,

10
ml-agents/mlagents/trainers/tests/tensorflow/test_tf_policy.py


[], np.array([], dtype=np.float32), np.array([0]), None
)
result = policy.get_action(step_with_agents, worker_id=0)
assert result == ActionInfo(None, None, {}, [0])
assert result == ActionInfo(None, None, None, {}, [0])
def test_take_action_returns_action_info_when_available():

policy_eval_out = {
"action": np.array([[1.0]], dtype=np.float32),
"pre_action": np.array([[1.0]], dtype=np.float32),
"memory_out": np.array([[2.5]], dtype=np.float32),
"value": np.array([1.1], dtype=np.float32),
}

)
result = policy.get_action(step_with_agents)
print(result)
policy_eval_out["action"], policy_eval_out["value"], policy_eval_out, [0]
policy_eval_out["action"],
policy_eval_out["env_action"],
policy_eval_out["value"],
policy_eval_out,
[0],
)
assert result == expected

8
ml-agents/mlagents/trainers/tests/test_agent_processor.py


"action": ActionTuple(continuous=np.array([[0.1], [0.1]])),
"entropy": np.array([1.0], dtype=np.float32),
"learning_rate": 1.0,
"pre_action": [0.1, 0.1],
"log_probs": LogProbsTuple(continuous=np.array([[0.1], [0.1]])),
}
mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(

)
fake_action_info = ActionInfo(
action=ActionTuple(continuous=np.array([[0.1], [0.1]])),
env_action=ActionTuple(continuous=np.array([[0.1], [0.1]])),
value=[0.1, 0.1],
outputs=fake_action_outputs,
agent_ids=mock_decision_steps.agent_id,

action_spec=ActionSpec.create_continuous(2),
)
processor.add_experiences(
mock_decision_steps, mock_terminal_steps, 0, ActionInfo([], [], {}, [])
mock_decision_steps, mock_terminal_steps, 0, ActionInfo.empty()
)
# Assert that the AgentProcessor is still empty
assert len(processor.experience_buffers[0]) == 0

"action": ActionTuple(continuous=np.array([[0.1]])),
"entropy": np.array([1.0], dtype=np.float32),
"learning_rate": 1.0,
"pre_action": [0.1],
"log_probs": LogProbsTuple(continuous=np.array([[0.1]])),
}

)
fake_action_info = ActionInfo(
action=ActionTuple(continuous=np.array([[0.1]])),
env_action=ActionTuple(continuous=np.array([[0.1]])),
value=[0.1],
outputs=fake_action_outputs,
agent_ids=mock_decision_step.agent_id,

"action": ActionTuple(continuous=np.array([[0.1]])),
"entropy": np.array([1.0], dtype=np.float32),
"learning_rate": 1.0,
"pre_action": [0.1],
"log_probs": LogProbsTuple(continuous=np.array([[0.1]])),
}

)
fake_action_info = ActionInfo(
action=ActionTuple(continuous=np.array([[0.1]])),
env_action=ActionTuple(continuous=np.array([[0.1]])),
value=[0.1],
outputs=fake_action_outputs,
agent_ids=mock_decision_step.agent_id,

1
ml-agents/mlagents/trainers/tests/test_trajectory.py


"memory",
"masks",
"done",
"actions_pre",
"continuous_action",
"discrete_action",
"continuous_log_probs",

2
ml-agents/mlagents/trainers/tests/torch/test_action_model.py


assert log_probs.discrete_list[0] > log_probs.discrete_list[1]
for ent, val in zip(entropies[0], [1.4189, 1.4189, 0.6191, 0.6191]):
for ent, val in zip(entropies[0], [1.4189, 0.6191, 0.6191]):
assert ent == pytest.approx(val, abs=0.01)

2
ml-agents/mlagents/trainers/tests/torch/test_hybrid.py


SAC_TORCH_CONFIG,
hyperparameters=new_hyperparams,
network_settings=new_networksettings,
max_steps=2000,
max_steps=4000,
)
check_environment_trains(env, {BRAIN_NAME: config})

9
ml-agents/mlagents/trainers/torch/action_model.py


# This checks None because mypy complains otherwise
if dists.continuous is not None:
continuous_action = dists.continuous.sample()
if self._clip_action_on_export:
continuous_action = torch.clamp(continuous_action, -3, 3) / 3
if dists.discrete is not None:
discrete_action = []
for discrete_dist in dists.discrete:

out_list: List[torch.Tensor] = []
# This checks None because mypy complains otherwise
if dists.continuous is not None:
out_list.append(dists.continuous.exported_model_output())
continuous_action_export = dists.continuous.exported_model_output()
if self._clip_action_on_export:
continuous_action_export = (
torch.clamp(continuous_action_export, -3, 3) / 3
)
out_list.append(continuous_action_export)
if dists.discrete is not None:
for discrete_dist in dists.discrete:
out_list.append(discrete_dist.exported_model_output())

正在加载...
取消
保存