fixed torch test sac

4 年前 · 7af25330
--- a/ml-agents/mlagents/trainers/sac/optimizer_torch.py
+++ b/ml-agents/mlagents/trainers/sac/optimizer_torch.py
        cont_sampled_actions = sampled_actions.continuous_tensor

        cont_actions = actions.continuous_tensor
-        disc_actions = actions.discrete_tensor
        q1p_out, q2p_out = self.value_network(
            vec_obs,
            vis_obs,
            sequence_length=self.policy.sequence_length,
        )

-        if self._action_spec.discrete_size:
+        if self._action_spec.discrete_size > 0:
+            disc_actions = actions.discrete_tensor
            q1_stream = self._condense_q_streams(q1_out, disc_actions)
            q2_stream = self._condense_q_streams(q2_out, disc_actions)
        else:
--- a/ml-agents/mlagents/trainers/tests/torch/test_sac.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_sac.py
        "Losses/Value Loss",
        "Losses/Q1 Loss",
        "Losses/Q2 Loss",
-        "Policy/Entropy Coeff",
+        "Policy/Continuous Entropy Coeff",
+        "Policy/Discrete Entropy Coeff",
        "Policy/Learning Rate",
    ]
    for stat in required_stats:
--- a/ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py

@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
 def test_recurrent_sac(action_sizes):
-    step_size = 0.2 if action_sizes else 0.5
+    step_size = 0.2 if action_sizes == (0, 1) else 0.5
    env = MemoryEnvironment(
        [BRAIN_NAME], action_sizes=action_sizes, step_size=step_size
    )