Fix some mypy issues and remove unused code

5 年前 · e577d5ea
--- a/ml-agents/mlagents/trainers/action_info.py
+++ b/ml-agents/mlagents/trainers/action_info.py
 from typing import NamedTuple, Any, Dict
+from numpy import np
-ActionInfoOutputs = Dict[str, Any]
+ActionInfoOutputs = Dict[str, np.ndarray]


 class ActionInfo(NamedTuple):
--- a/ml-agents/mlagents/trainers/ppo/trainer.py
+++ b/ml-agents/mlagents/trainers/ppo/trainer.py
        """
        Takes a trajectory and processes it, putting it into the update buffer.
        Processing involves calculating value and advantage targets for model updating step.
+        :param trajectory: The Trajectory tuple containing the steps to be processed.
        """
        agent_id = trajectory.agent_id  # All the agents should have the same ID

--- a/ml-agents/mlagents/trainers/rl_trainer.py
+++ b/ml-agents/mlagents/trainers/rl_trainer.py
 # # Unity ML-Agents Toolkit
 import logging
-from typing import Dict, NamedTuple
+from typing import Dict
-import numpy as np

 from mlagents.trainers.buffer import AgentBuffer
 from mlagents.trainers.trainer import Trainer, UnityTrainerException

 RewardSignalResults = Dict[str, RewardSignalResult]
-
-
-class AllRewardsOutput(NamedTuple):
-    """
-    This class stores all of the outputs of the reward signals,
-    as well as the raw reward from the environment.
-    """
-
-    reward_signals: RewardSignalResults
-    environment: np.ndarray


 class RLTrainer(Trainer):
--- a/ml-agents/mlagents/trainers/trainer.py
+++ b/ml-agents/mlagents/trainers/trainer.py
 import numpy as np
 from collections import deque, defaultdict

-from mlagents.trainers.action_info import ActionInfoOutputs
-from mlagents.trainers.brain import BrainParameters, BrainInfo
+from mlagents.trainers.trajectory import Trajectory
+from mlagents.trainers.brain import BrainParameters

 LOGGER = logging.getLogger("mlagents.trainers")

            )
            pass

-    def add_experiences(
-        self,
-        curr_info: BrainInfo,
-        next_info: BrainInfo,
-        take_action_outputs: ActionInfoOutputs,
-    ) -> None:
+    def process_trajectory(self, trajectory: Trajectory) -> None:
-        Adds experiences to each agent's experience history.
-        :param curr_info: current BrainInfo.
-        :param next_info: next BrainInfo.
-        :param take_action_outputs: The outputs of the Policy's get_action method.
-        """
-        raise UnityTrainerException("The add_experiences method was not implemented.")
-
-    def process_experiences(
-        self, current_info: BrainInfo, next_info: BrainInfo
-    ) -> None:
-        """
-        Checks agent histories for processing condition, and processes them as necessary.
+        Takes a trajectory and processes it, putting it into the update buffer.
-        :param current_info: current BrainInfo.
-        :param next_info: next BrainInfo.
+        :param trajectory: The Trajectory tuple containing the steps to be processed.
        """
        raise UnityTrainerException(
            "The process_experiences method was not implemented."