浏览代码

put team reward in decision steps

/MLA-1734-demo-provider
Ruo-Ping Dong 4 年前
当前提交
a94acec6
共有 1 个文件被更改,包括 10 次插入2 次删除
  1. 12
      ml-agents-envs/mlagents_envs/base_env.py

12
ml-agents-envs/mlagents_envs/base_env.py


this simulation step.
"""
def __init__(self, obs, reward, agent_id, action_mask, team_manager_id):
def __init__(
self, obs, reward, team_reward, agent_id, action_mask, team_manager_id
):
self.team_reward: np.ndarray = team_reward
self.agent_id: np.ndarray = agent_id
self.action_mask: Optional[List[np.ndarray]] = action_mask
self.team_manager_id: np.ndarray = team_manager_id

return DecisionSteps(
obs=obs,
reward=np.zeros(0, dtype=np.float32),
team_reward=np.zeros(0, dtype=np.float32),
agent_id=np.zeros(0, dtype=np.int32),
action_mask=None,
team_manager_id=np.zeros(0, dtype=np.int32),

across simulation steps.
"""
def __init__(self, obs, reward, interrupted, agent_id, team_manager_id):
def __init__(
self, obs, reward, team_reward, interrupted, agent_id, team_manager_id
):
self.team_reward: np.ndarray = team_reward
self.interrupted: np.ndarray = interrupted
self.agent_id: np.ndarray = agent_id
self.team_manager_id: np.ndarray = team_manager_id

return TerminalSteps(
obs=obs,
reward=np.zeros(0, dtype=np.float32),
team_reward=np.zeros(0, dtype=np.float32),
interrupted=np.zeros(0, dtype=np.bool),
agent_id=np.zeros(0, dtype=np.int32),
team_manager_id=np.zeros(0, dtype=np.int32),

正在加载...
取消
保存