shared baseline and v

4 年前 · f9ff3fef
--- a/ml-agents/mlagents/trainers/optimizer/torch_optimizer.py
+++ b/ml-agents/mlagents/trainers/optimizer/torch_optimizer.py
            team_act=team_actions,
        )

-        value_estimates, mem = self.policy.actor_critic.target_critic_value(
+        value_estimates, mem = self.policy.actor_critic.critic_value(
            current_obs,
            memory,
            sequence_length=batch.num_experiences,
-        boot_value_estimates, mem = self.policy.actor_critic.target_critic_value(
+        boot_value_estimates, mem = self.policy.actor_critic.critic_value(
            next_obs,
            memory,
            sequence_length=batch.num_experiences,
--- a/ml-agents/mlagents/trainers/torch/networks.py
+++ b/ml-agents/mlagents/trainers/torch/networks.py
            team_obs=team_obs,
            team_act=team_act,
        )
-        value_outputs, _ = self.target_critic_value(inputs, memories=critic_mem, sequence_length=sequence_length, team_obs=team_obs)
+        value_outputs, _ = self.critic_value(inputs, memories=critic_mem, sequence_length=sequence_length, team_obs=team_obs)

        return log_probs, entropies, q_outputs, baseline_outputs, value_outputs