浏览代码

Test for group and add team reward

/develop/coma2/samenet
Ervin Teng 3 年前
当前提交
a81512c9
共有 3 个文件被更改,包括 32 次插入16 次删除
  1. 12
      ml-agents/mlagents/trainers/tests/mock_brain.py
  2. 18
      ml-agents/mlagents/trainers/tests/test_trajectory.py
  3. 18
      ml-agents/mlagents/trainers/trajectory.py

12
ml-agents/mlagents/trainers/tests/mock_brain.py


from mlagents.trainers.buffer import AgentBuffer
from mlagents.trainers.torch.action_log_probs import LogProbsTuple
from mlagents.trainers.trajectory import Trajectory, AgentExperience
from mlagents.trainers.trajectory import GroupmateStatus, Trajectory, AgentExperience
from mlagents_envs.base_env import (
DecisionSteps,
TerminalSteps,

action_spec: ActionSpec,
max_step_complete: bool = False,
memory_size: int = 10,
num_other_agents_in_group: int = 0,
) -> Trajectory:
"""
Makes a fake trajectory of length length. If max_step_complete,

memory = np.ones(memory_size, dtype=np.float32)
agent_id = "test_agent"
behavior_id = "test_brain"
group_status = []
for _ in range(num_other_agents_in_group):
group_status.append(GroupmateStatus(obs, reward, action, done))
experience = AgentExperience(
obs=obs,
reward=reward,

prev_action=prev_action,
interrupted=max_step,
memory=memory,
group_status=[],
group_status=group_status,
group_reward=0,
)
steps_list.append(experience)

prev_action=prev_action,
interrupted=max_step_complete,
memory=memory,
group_status=[],
group_status=group_status,
group_reward=0,
)
steps_list.append(last_experience)

behavior_id=behavior_id,
next_obs=obs,
next_group_obs=[],
next_group_obs=[obs] * num_other_agents_in_group,
)

18
ml-agents/mlagents/trainers/tests/test_trajectory.py


"action_mask",
"prev_action",
"environment_rewards",
"group_reward",
wanted_keys = set(wanted_keys)
wanted_group_keys = [
"group_obs_0",
"group_obs_1",
"group_obs_next_0",
"group_obs_next_1",
"groupmate_rewards",
"group_dones",
]
wanted_keys = set(wanted_keys + wanted_group_keys)
trajectory = make_fake_trajectory(
length=length,
observation_specs=create_observation_specs_with_shapes(

num_other_agents_in_group=4,
)
agentbuffer = trajectory.to_agentbuffer()
seen_keys = set()

assert seen_keys == wanted_keys
assert seen_keys.issuperset(wanted_keys)
for _key in wanted_group_keys:
for step in agentbuffer[_key]:
assert len(step) == 4

18
ml-agents/mlagents/trainers/trajectory.py


teammate_discrete_actions.append(group_status.action.discrete)
# Team actions
agent_buffer_trajectory["team_continuous_action"].append(
agent_buffer_trajectory["group_continuous_action"].append(
agent_buffer_trajectory["team_discrete_action"].append(
agent_buffer_trajectory["group_discrete_action"].append(
agent_buffer_trajectory["team_rewards"].append(teammate_rewards)
team_reward = teammate_rewards + [exp.reward]
agent_buffer_trajectory["average_team_reward"].append(
sum(team_reward) / len(team_reward)
)
agent_buffer_trajectory["groupmate_rewards"].append(teammate_rewards)
agent_buffer_trajectory["group_reward"].append(exp.group_reward)
# Next actions
teammate_cont_next_actions = []

teammate_cont_next_actions.append(group_status.action.continuous)
teammate_disc_next_actions.append(group_status.action.discrete)
agent_buffer_trajectory["team_next_continuous_action"].append(
agent_buffer_trajectory["group_next_continuous_action"].append(
agent_buffer_trajectory["team_next_discrete_action"].append(
agent_buffer_trajectory["group_next_discrete_action"].append(
teammate_disc_next_actions
)

agent_buffer_trajectory["masks"].append(1.0)
agent_buffer_trajectory["done"].append(exp.done)
agent_buffer_trajectory["team_dones"].append(
agent_buffer_trajectory["group_dones"].append(
[_status.done for _status in exp.group_status]
)

正在加载...
取消
保存