Addressed some comments

4 年前 · ae659ac4
--- a/ml-agents/mlagents/trainers/agent_processor.py
+++ b/ml-agents/mlagents/trainers/agent_processor.py
        """
        self.experience_buffers: Dict[str, List[AgentExperience]] = defaultdict(list)
        self.last_step_result: Dict[str, Tuple[DecisionStep, int]] = {}
-        # current_group_obs is used to collect the last seen obs of all the agents in the same group,
-        # and assemble the group obs.
+        # current_group_obs is used to collect the current, most recently seen
+        # obs of all the agents in the same group, and assemble the group obs.
-        # last_group_obs is used to collect the last seen obs of all the agents in the same group,
-        # and assemble the group obs.
+        # group_status is used to collect the current, most recently seen
+        # group status of all the agents in the same group, and assemble the group obs.
        self.group_status: Dict[str, Dict[str, GroupmateStatus]] = defaultdict(
            lambda: defaultdict(None)
        )
            # Clear the last seen group obs when agents die.
            self._clear_group_obs(global_id)

-        # Clean the last experience dictionary for terminal steps
-        for terminal_step in terminal_steps.values():
-            local_id = terminal_step.agent_id
-            global_id = get_global_agent_id(worker_id, local_id)
-
        # Iterate over all the decision steps, first gather all the teammate obs
        # and then create the trajectories
        for ongoing_step in decision_steps.values():
            global_agent_id, None
        )
        if stored_decision_step is not None and stored_take_action_outputs is not None:
+            # 0, the default group_id, means that the agent doesn't belong to an agent group.
+            # If 0, don't add any groupmate information.
            if step.group_id > 0:
                global_group_id = get_global_group_id(worker_id, step.group_id)
                stored_actions = stored_take_action_outputs["action"]