Fix academy reset out of order

7 年前 · fc1b8a1b
--- a/python/ppo.py
+++ b/python/ppo.py
 Options:
  --help                     Show this message.
  --batch-size=<n>           How many experiences per gradient descent update step [default: 64].
-  --beta=<n>                 Strength of entropy regularization [default: 5e-3].
+  --beta=<n>                 Strength of entropy regularization [default: 2e-3].
  --buffer-size=<n>          How large the experience buffer should be before gradient descent [default: 2048].
  --curriculum=<file>        Curriculum json file for environment [default: None].
  --epsilon=<n>              Acceptable threshold around ratio of old and new policy probabilities [default: 0.2].
--- a/unity-environment/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAgent.cs
+++ b/unity-environment/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAgent.cs
    // to be implemented by the developer
    public override void AgentStep(float[] act)
    {
-
        reward = -0.01f;
        int action = Mathf.FloorToInt(act[0]);

--- a/unity-environment/Assets/ML-Agents/Scripts/Academy.cs
+++ b/unity-environment/Assets/ML-Agents/Scripts/Academy.cs
        currentStep = 0;
        episodeCount++;
        done = false;
+        AcademyReset();
+

        foreach (Brain brain in brains)
        {

-        AcademyReset();
    }

    // Instructs all brains to collect states from their agents.