Remove epsilon

5 年前 · 2f82a550
--- a/ml-agents/mlagents/trainers/agent_processor.py
+++ b/ml-agents/mlagents/trainers/agent_processor.py
                    action = take_action_outputs["action"][idx]
                    if self.policy.use_continuous_act:
                        action_pre = take_action_outputs["pre_action"][idx]
-                        epsilon = take_action_outputs["random_normal_epsilon"][idx]
-                        epsilon = None
                    action_probs = take_action_outputs["log_probs"][idx]
                    action_masks = stored_info.action_masks[idx]
                    prev_action = self.policy.retrieve_previous_action([agent_id])[0, :]
                        max_step=max_step,
                        agent_id=agent_id,
                        memory=memory,
-                        epsilon=epsilon,
                    )
                    # Add the value outputs if needed
                    self.experience_buffers[agent_id].append(experience)
--- a/ml-agents/mlagents/trainers/trajectory.py
+++ b/ml-agents/mlagents/trainers/trajectory.py
    action_pre: np.ndarray  # TODO: Remove this
    action_mask: np.array
    prev_action: np.ndarray
-    epsilon: float
    max_step: bool
    memory: np.array
    agent_id: str
        if exp.action_pre is not None:
            actions_pre = exp.action_pre
            agent_buffer_trajectory["actions_pre"].append(actions_pre)
-        if exp.epsilon is not None:
-            epsilons = exp.epsilon
-            agent_buffer_trajectory["random_normal_epsilon"].append(epsilons)
+
        # value is a dictionary from name of reward to value estimate of the value head
        agent_buffer_trajectory["actions"].append(exp.action)
        agent_buffer_trajectory["action_probs"].append(exp.action_probs)