浏览代码

Renaming fest

/develop/coma2/samenet
Ervin Teng 4 年前
当前提交
eb13a14a
共有 2 个文件被更改,包括 54 次插入52 次删除
  1. 28
      ml-agents/mlagents/trainers/agent_processor.py
  2. 78
      ml-agents/mlagents/trainers/trajectory.py

28
ml-agents/mlagents/trainers/agent_processor.py


StatsAggregationMethod,
EnvironmentStats,
)
from mlagents.trainers.trajectory import TeammateStatus, Trajectory, AgentExperience
from mlagents.trainers.trajectory import GroupmateStatus, Trajectory, AgentExperience
from mlagents.trainers.policy import Policy
from mlagents.trainers.action_info import ActionInfo, ActionInfoOutputs
from mlagents.trainers.torch.action_log_probs import LogProbsTuple

self.experience_buffers: Dict[str, List[AgentExperience]] = defaultdict(list)
self.last_step_result: Dict[str, Tuple[DecisionStep, int]] = {}
# current_group_obs is used to collect the last seen obs of all the agents in the same group,
# and assemble the collab_obs.
# and assemble the group obs.
# and assemble the collab_obs.
self.teammate_status: Dict[str, Dict[str, TeammateStatus]] = defaultdict(
# and assemble the group obs.
self.group_status: Dict[str, Dict[str, GroupmateStatus]] = defaultdict(
lambda: defaultdict(None)
)
# last_take_action_outputs stores the action a_t taken before the current observation s_(t+1), while

continuous=stored_actions.continuous[idx],
discrete=stored_actions.discrete[idx],
)
teammate_status = TeammateStatus(
group_status = GroupmateStatus(
self.teammate_status[step.team_manager_id][global_id] = teammate_status
self.group_status[step.team_manager_id][global_id] = group_status
self._delete_in_nested_dict(self.teammate_status, global_id)
self._delete_in_nested_dict(self.group_status, global_id)
def _delete_in_nested_dict(self, nested_dict, key):
for _manager_id in list(nested_dict.keys()):

prev_action = self.policy.retrieve_previous_action([global_id])[0, :]
# Assemble teammate_obs. If none saved, then it will be an empty list.
teammate_statuses = []
for _id, _obs in self.teammate_status[step.team_manager_id].items():
group_statuses = []
for _id, _obs in self.group_status[step.team_manager_id].items():
teammate_statuses.append(_obs)
group_statuses.append(_obs)
teammate_status=teammate_statuses,
group_status=group_statuses,
reward=step.reward,
done=done,
action=action_tuple,

or terminated
):
next_obs = step.obs
next_collab_obs = []
next_group_obs = []
next_collab_obs.append(_exp)
next_group_obs.append(_exp)
next_collab_obs=next_collab_obs,
next_group_obs=next_group_obs,
behavior_id=self.behavior_id,
)
for traj_queue in self.trajectory_queues:

78
ml-agents/mlagents/trainers/trajectory.py


@attr.s(auto_attribs=True)
class TeammateStatus:
class GroupmateStatus:
"""
Stores data related to an agent's teammate.
"""

@attr.s(auto_attribs=True)
class AgentExperience:
obs: List[np.ndarray]
teammate_status: List[TeammateStatus]
group_status: List[GroupmateStatus]
reward: float
done: bool
action: ActionTuple

return result
class TeamObsUtil:
class GroupObsUtil:
return f"team_obs_{index}"
return f"group_obs_{index}"
@staticmethod
def get_name_at_next(index: int) -> str:

return f"team_obs_next_{index}"
return f"group_obs_next_{index}"
@staticmethod
def _padded_time_to_batch(

"""
# Find the first observation. This should be USUALLY O(1)
obs_shape = None
for _team_obs in agent_buffer_field:
if _team_obs:
obs_shape = _team_obs[0].shape
for _group_obs in agent_buffer_field:
if _group_obs:
obs_shape = _group_obs[0].shape
break
# If there were no critic obs at all
if obs_shape is None:

separated_obs: List[np.array] = []
for i in range(num_obs):
separated_obs.append(
TeamObsUtil._padded_time_to_batch(batch[TeamObsUtil.get_name_at(i)])
GroupObsUtil._padded_time_to_batch(batch[GroupObsUtil.get_name_at(i)])
result = TeamObsUtil._transpose_list_of_lists(separated_obs)
result = GroupObsUtil._transpose_list_of_lists(separated_obs)
return result
@staticmethod

separated_obs: List[np.array] = []
for i in range(num_obs):
separated_obs.append(
TeamObsUtil._padded_time_to_batch(
batch[TeamObsUtil.get_name_at_next(i)]
GroupObsUtil._padded_time_to_batch(
batch[GroupObsUtil.get_name_at_next(i)]
result = TeamObsUtil._transpose_list_of_lists(separated_obs)
result = GroupObsUtil._transpose_list_of_lists(separated_obs)
return result

np.ndarray
] # Observation following the trajectory, for bootstrapping
next_collab_obs: List[List[np.ndarray]]
next_group_obs: List[List[np.ndarray]]
agent_id: str
behavior_id: str

[],
[],
)
for teammate_status in exp.teammate_status:
teammate_rewards.append(teammate_status.reward)
teammate_continuous_actions.append(teammate_status.action.continuous)
teammate_discrete_actions.append(teammate_status.action.discrete)
for group_status in exp.group_status:
teammate_rewards.append(group_status.reward)
teammate_continuous_actions.append(group_status.action.continuous)
teammate_discrete_actions.append(group_status.action.discrete)
# Team actions
agent_buffer_trajectory["team_continuous_action"].append(

teammate_disc_next_actions = []
if not is_last_step:
next_exp = self.steps[step + 1]
for teammate_status in next_exp.teammate_status:
teammate_cont_next_actions.append(teammate_status.action.continuous)
teammate_disc_next_actions.append(teammate_status.action.discrete)
for group_status in next_exp.group_status:
teammate_cont_next_actions.append(group_status.action.continuous)
teammate_disc_next_actions.append(group_status.action.discrete)
for teammate_status in exp.teammate_status:
teammate_cont_next_actions.append(teammate_status.action.continuous)
teammate_disc_next_actions.append(teammate_status.action.discrete)
for group_status in exp.group_status:
teammate_cont_next_actions.append(group_status.action.continuous)
teammate_disc_next_actions.append(group_status.action.discrete)
agent_buffer_trajectory["team_next_continuous_action"].append(
teammate_cont_next_actions

)
for i in range(num_obs):
ith_team_obs = []
for _teammate_status in exp.teammate_status:
ith_group_obs = []
for _group_status in exp.group_status:
ith_team_obs.append(_teammate_status.obs[i])
agent_buffer_trajectory[TeamObsUtil.get_name_at(i)].append(ith_team_obs)
ith_group_obs.append(_group_status.obs[i])
agent_buffer_trajectory[GroupObsUtil.get_name_at(i)].append(
ith_group_obs
)
ith_team_obs_next = []
ith_group_obs_next = []
for _obs in self.next_collab_obs:
ith_team_obs_next.append(_obs[i])
for _obs in self.next_group_obs:
ith_group_obs_next.append(_obs[i])
next_teammate_status = self.steps[step + 1].teammate_status
for _teammate_status in next_teammate_status:
next_group_status = self.steps[step + 1].group_status
for _group_status in next_group_status:
ith_team_obs_next.append(_teammate_status.obs[i])
agent_buffer_trajectory[TeamObsUtil.get_name_at_next(i)].append(
ith_team_obs_next
ith_group_obs_next.append(_group_status.obs[i])
agent_buffer_trajectory[GroupObsUtil.get_name_at_next(i)].append(
ith_group_obs_next
)
if exp.memory is not None:

agent_buffer_trajectory["done"].append(exp.done)
agent_buffer_trajectory["team_dones"].append(
[_status.done for _status in exp.teammate_status]
[_status.done for _status in exp.group_status]
)
# Adds the log prob and action of continuous/discrete separately

Returns true if all teammates are done at the end of the trajectory.
Combine with done_reached to check if the whole team is done.
"""
return all(_status.done for _status in self.steps[-1].teammate_status)
return all(_status.done for _status in self.steps[-1].group_status)
@property
def interrupted(self) -> bool:

正在加载...
取消
保存