[bug-fix] Fix issue with more than one continuous actions (#3547)

5 年前 · 870338b4
--- a/ml-agents/mlagents/trainers/ppo/optimizer.py
+++ b/ml-agents/mlagents/trainers/ppo/optimizer.py
            self.stream_names, hidden_value
        )
        self.all_old_log_probs = tf.placeholder(
-            shape=[None, 1], dtype=tf.float32, name="old_probabilities"
+            shape=[None, sum(self.policy.act_size)],
+            dtype=tf.float32,
+            name="old_probabilities",
        )

        self.old_log_probs = tf.reduce_sum(
--- a/ml-agents/mlagents/trainers/tests/mock_brain.py
+++ b/ml-agents/mlagents/trainers/tests/mock_brain.py
            action_probs = np.ones(np.sum(action_space), dtype=np.float32)
        else:
            action_size = action_space[0]
-            action_probs = np.ones((1), dtype=np.float32)
+            action_probs = np.ones((action_size), dtype=np.float32)
        action = np.zeros(action_size, dtype=np.float32)
        action_pre = np.zeros(action_size, dtype=np.float32)
        action_mask = (
--- a/ml-agents/tests/yamato/training_int_tests.py
+++ b/ml-agents/tests/yamato/training_int_tests.py

    init_venv()

-    # Copy the default training config but override the max_steps parameter
-    override_config_file("config/trainer_config.yaml", "override.yaml", max_steps=100)
+    # Copy the default training config but override the max_steps parameter,
+    # and reduce the batch_size and buffer_size enough to ensure an update step happens.
+    override_config_file(
+        "config/trainer_config.yaml",
+        "override.yaml",
+        max_steps=100,
+        batch_size=10,
+        buffer_size=10,
+    )

    # TODO pass scene name and exe destination to build
    # TODO make sure we fail if the exe isn't found - see MLA-559