浏览代码

Add team dones

/develop/coma-withq
Ervin Teng 4 年前
当前提交
b71f38cf
共有 2 个文件被更改,包括 16 次插入1 次删除
  1. 1
      ml-agents/mlagents/trainers/agent_processor.py
  2. 16
      ml-agents/mlagents/trainers/trajectory.py

1
ml-agents/mlagents/trainers/agent_processor.py


obs=stored_decision_step.obs,
reward=step.reward,
action=action_tuple,
done=isinstance(step, TerminalStep),
)
self.teammate_status[step.team_manager_id][global_id] = teammate_status
self.current_group_obs[step.team_manager_id][global_id] = step.obs

16
ml-agents/mlagents/trainers/trajectory.py


obs: List[np.ndarray]
reward: float
action: ActionTuple
done: bool
@attr.s(auto_attribs=True)

)
agent_buffer_trajectory["team_rewards"].append(teammate_rewards)
team_reward = teammate_rewards + [exp.reward]
agent_buffer_trajectory["average_team_reward"].append(sum(team_reward)/len(team_reward))
agent_buffer_trajectory["average_team_reward"].append(
sum(team_reward) / len(team_reward)
)
# Next actions
teammate_cont_next_actions = []

agent_buffer_trajectory["masks"].append(1.0)
agent_buffer_trajectory["done"].append(exp.done)
agent_buffer_trajectory["team_dones"].append(
[_status.done for _status in exp.teammate_status]
)
# Adds the log prob and action of continuous/discrete separately
agent_buffer_trajectory["continuous_action"].append(exp.action.continuous)

Returns true if trajectory is terminated with a Done.
"""
return self.steps[-1].done
@property
def teammate_dones_reached(self) -> bool:
"""
Returns true if all teammates are done at the end of the trajectory.
Combine with done_reached to check if the whole team is done.
"""
return all(_status.done for _status in self.steps[-1].teammate_status)
@property
def interrupted(self) -> bool:

正在加载...
取消
保存