no target, increase lambda

4 年前 · bd341f7f
--- a/config/ppo/PushBlock.yaml
+++ b/config/ppo/PushBlock.yaml
      learning_rate: 0.0003
      beta: 0.01
      epsilon: 0.2
-      lambd: 0.8
+      lambd: 0.95
      num_epoch: 3
      learning_rate_schedule: linear
    network_settings:
--- a/ml-agents/mlagents/trainers/ppo/optimizer_torch.py
+++ b/ml-agents/mlagents/trainers/ppo/optimizer_torch.py
        self.optimizer.step()

        ModelUtils.soft_update(
-            self.policy.actor_critic.critic, self.policy.actor_critic.target, 0.005
+            self.policy.actor_critic.critic, self.policy.actor_critic.target, 1.0
        )
        update_stats = {
            # NOTE: abs() is not technically correct, but matches the behavior in TensorFlow.
--- a/ml-agents/mlagents/trainers/ppo/trainer.py
+++ b/ml-agents/mlagents/trainers/ppo/trainer.py
            # This is later use as target for the different value estimates
            # agent_buffer_trajectory[f"{name}_returns"].set(local_return)
            agent_buffer_trajectory[f"{name}_returns_q"].set(returns_v)
-            agent_buffer_trajectory[f"{name}_returns_b"].set(returns_v)
+            agent_buffer_trajectory[f"{name}_returns_b"].set(returns_b)
            agent_buffer_trajectory[f"{name}_returns_v"].set(returns_v)
            agent_buffer_trajectory[f"{name}_advantage"].set(local_advantage)
            tmp_advantages.append(local_advantage)