浏览代码

Fix some mypy issues and remove unused code

/develop-newnormalization
Ervin Teng 5 年前
当前提交
e577d5ea
共有 4 个文件被更改,包括 9 次插入35 次删除
  1. 3
      ml-agents/mlagents/trainers/action_info.py
  2. 1
      ml-agents/mlagents/trainers/ppo/trainer.py
  3. 13
      ml-agents/mlagents/trainers/rl_trainer.py
  4. 27
      ml-agents/mlagents/trainers/trainer.py

3
ml-agents/mlagents/trainers/action_info.py


from typing import NamedTuple, Any, Dict
from numpy import np
ActionInfoOutputs = Dict[str, Any]
ActionInfoOutputs = Dict[str, np.ndarray]
class ActionInfo(NamedTuple):

1
ml-agents/mlagents/trainers/ppo/trainer.py


"""
Takes a trajectory and processes it, putting it into the update buffer.
Processing involves calculating value and advantage targets for model updating step.
:param trajectory: The Trajectory tuple containing the steps to be processed.
"""
agent_id = trajectory.agent_id # All the agents should have the same ID

13
ml-agents/mlagents/trainers/rl_trainer.py


# # Unity ML-Agents Toolkit
import logging
from typing import Dict, NamedTuple
from typing import Dict
import numpy as np
from mlagents.trainers.buffer import AgentBuffer
from mlagents.trainers.trainer import Trainer, UnityTrainerException

RewardSignalResults = Dict[str, RewardSignalResult]
class AllRewardsOutput(NamedTuple):
"""
This class stores all of the outputs of the reward signals,
as well as the raw reward from the environment.
"""
reward_signals: RewardSignalResults
environment: np.ndarray
class RLTrainer(Trainer):

27
ml-agents/mlagents/trainers/trainer.py


import numpy as np
from collections import deque, defaultdict
from mlagents.trainers.action_info import ActionInfoOutputs
from mlagents.trainers.brain import BrainParameters, BrainInfo
from mlagents.trainers.trajectory import Trajectory
from mlagents.trainers.brain import BrainParameters
LOGGER = logging.getLogger("mlagents.trainers")

)
pass
def add_experiences(
self,
curr_info: BrainInfo,
next_info: BrainInfo,
take_action_outputs: ActionInfoOutputs,
) -> None:
def process_trajectory(self, trajectory: Trajectory) -> None:
Adds experiences to each agent's experience history.
:param curr_info: current BrainInfo.
:param next_info: next BrainInfo.
:param take_action_outputs: The outputs of the Policy's get_action method.
"""
raise UnityTrainerException("The add_experiences method was not implemented.")
def process_experiences(
self, current_info: BrainInfo, next_info: BrainInfo
) -> None:
"""
Checks agent histories for processing condition, and processes them as necessary.
Takes a trajectory and processes it, putting it into the update buffer.
:param current_info: current BrainInfo.
:param next_info: next BrainInfo.
:param trajectory: The Trajectory tuple containing the steps to be processed.
"""
raise UnityTrainerException(
"The process_experiences method was not implemented."

正在加载...
取消
保存