浏览代码

put team reward in decision steps

/MLA-1734-demo-provider
Ruo-Ping Dong 3 年前
当前提交
bdc857f7
共有 2 个文件被更改,包括 16 次插入6 次删除
  1. 3
      com.unity.ml-agents/Runtime/ITeamManager.cs
  2. 19
      ml-agents-envs/mlagents_envs/rpc_utils.py

3
com.unity.ml-agents/Runtime/ITeamManager.cs


using System.Collections.Generic;
using Unity.MLAgents.Sensors;
namespace Unity.MLAgents
{
public interface ITeamManager

19
ml-agents-envs/mlagents_envs/rpc_utils.py


[agent_info.reward for agent_info in terminal_agent_info_list], dtype=np.float32
)
decision_team_rewards = np.array(
[agent_info.team_reward for agent_info in decision_agent_info_list],
dtype=np.float32,
)
terminal_team_rewards = np.array(
[agent_info.team_reward for agent_info in terminal_agent_info_list],
dtype=np.float32,
)
_raise_on_nan_and_inf(decision_rewards, "rewards")
_raise_on_nan_and_inf(terminal_rewards, "rewards")
_raise_on_nan_and_inf(decision_team_rewards, "rewards")
_raise_on_nan_and_inf(terminal_team_rewards, "rewards")
decision_team_managers = [
agent_info.team_manager_id for agent_info in decision_agent_info_list
]

_raise_on_nan_and_inf(decision_rewards, "rewards")
_raise_on_nan_and_inf(terminal_rewards, "rewards")
max_step = np.array(
[agent_info.max_step_reached for agent_info in terminal_agent_info_list],

DecisionSteps(
decision_obs_list,
decision_rewards,
decision_team_rewards,
decision_agent_id,
action_mask,
decision_team_managers,

terminal_rewards,
terminal_team_rewards,
max_step,
terminal_agent_id,
terminal_team_managers,

正在加载...
取消
保存