浏览代码

New buffer layout, TeamObsUtil, pad dead agents

/develop/centralizedcritic/counterfact
Ervin Teng 4 年前
当前提交
9c3da1b6
共有 4 个文件被更改,包括 84 次插入9 次删除
  1. 8
      ml-agents/mlagents/trainers/optimizer/torch_optimizer.py
  2. 8
      ml-agents/mlagents/trainers/ppo/optimizer_torch.py
  3. 7
      ml-agents/mlagents/trainers/torch/networks.py
  4. 70
      ml-agents/mlagents/trainers/trajectory.py

8
ml-agents/mlagents/trainers/optimizer/torch_optimizer.py


import numpy as np
from mlagents.trainers.buffer import AgentBuffer
from mlagents.trainers.trajectory import ObsUtil
from mlagents.trainers.trajectory import ObsUtil, TeamObsUtil
from mlagents.trainers.torch.components.bc.module import BCModule
from mlagents.trainers.torch.components.reward_providers import create_reward_provider

next_obs = [obs.unsqueeze(0) for obs in next_obs]
critic_obs_np = AgentBuffer.obs_list_list_to_obs_batch(batch["critic_obs"])
critic_obs = TeamObsUtil.from_buffer(batch, n_obs)
ModelUtils.list_to_tensor_list(_agent_obs) for _agent_obs in critic_obs_np
[ModelUtils.list_to_tensor(obs) for obs in _teammate_obs]
for _teammate_obs in critic_obs
next_critic_obs = [
ModelUtils.list_to_tensor_list(_list_obs) for _list_obs in next_critic_obs
]

8
ml-agents/mlagents/trainers/ppo/optimizer_torch.py


from mlagents.trainers.torch.agent_action import AgentAction
from mlagents.trainers.torch.action_log_probs import ActionLogProbs
from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.trajectory import ObsUtil
from mlagents.trainers.trajectory import ObsUtil, TeamObsUtil
class TorchPPOOptimizer(TorchOptimizer):

current_obs = ObsUtil.from_buffer(batch, n_obs)
# Convert to tensors
current_obs = [ModelUtils.list_to_tensor(obs) for obs in current_obs]
critic_obs_np = AgentBuffer.obs_list_list_to_obs_batch(batch["critic_obs"])
critic_obs = TeamObsUtil.from_buffer(batch, n_obs)
ModelUtils.list_to_tensor_list(_agent_obs) for _agent_obs in critic_obs_np
[ModelUtils.list_to_tensor(obs) for obs in _teammate_obs]
for _teammate_obs in critic_obs
]
act_masks = ModelUtils.list_to_tensor(batch["action_mask"])

7
ml-agents/mlagents/trainers/torch/networks.py


self_encodes.append(processed_obs)
x_self = torch.cat(self_encodes, dim=-1)
# Get attention masks by grabbing an arbitrary obs across all the agents
# Since these are raw obs, the padded values are still 0
only_first_obs = [_all_obs[0] for _all_obs in all_net_inputs]
obs_for_mask = torch.stack(only_first_obs, dim=1)
# Get the self encoding separately, but keep it in the entities
concat_encoded_obs = [x_self]
for inputs in all_net_inputs[1:]:

encoded_entity = self.entity_encoder(x_self, [concat_entites])
encoded_state = self.self_attn(
encoded_entity, EntityEmbeddings.get_masks([concat_entites])
encoded_entity, EntityEmbeddings.get_masks([obs_for_mask])
)
if len(concat_encoded_obs) == 0:

70
ml-agents/mlagents/trainers/trajectory.py


from typing import List, NamedTuple
import itertools
import attr
import numpy as np

return result
class TeamObsUtil:
@staticmethod
def get_name_at(index: int) -> str:
"""
returns the name of the observation given the index of the observation
"""
return f"team_obs_{index}"
@staticmethod
def _padded_time_to_batch(
agent_buffer_field: AgentBuffer.AgentBufferField,
) -> List[np.ndarray]:
"""
Convert an AgentBufferField of List of obs, where one of the dimension is time and the other is number (e.g.
in the case of a variable number of critic observations) to a List of obs, where time is in the batch dimension
of the obs, and the List is the variable number of agents. For cases where there are varying number of agents,
pad the non-existent agents with 0.
"""
# Find the first observation. This should be USUALLY O(1)
obs_shape = None
for _team_obs in agent_buffer_field:
if _team_obs:
obs_shape = _team_obs[0].shape
break
# If there were no critic obs at all
if obs_shape is None:
return []
new_list = list(
map(
lambda x: np.asanyarray(x),
itertools.zip_longest(
*agent_buffer_field, fillvalue=np.zeros(obs_shape)
),
)
)
return new_list
@staticmethod
def _transpose_list_of_lists(
list_list: List[List[np.ndarray]],
) -> List[List[np.ndarray]]:
return list(map(list, zip(*list_list)))
@staticmethod
def from_buffer(batch: AgentBuffer, num_obs: int) -> List[np.array]:
"""
Creates the list of observations from an AgentBuffer
"""
separated_obs: List[np.array] = []
for i in range(num_obs):
separated_obs.append(
TeamObsUtil._padded_time_to_batch(batch[TeamObsUtil.get_name_at(i)])
)
# separated_obs contains a List(num_obs) of Lists(num_agents), we want to flip
# that and get a List(num_agents) of Lists(num_obs)
result = TeamObsUtil._transpose_list_of_lists(separated_obs)
return result
class Trajectory(NamedTuple):
steps: List[AgentExperience]
next_obs: List[

for i in range(num_obs):
agent_buffer_trajectory[ObsUtil.get_name_at(i)].append(obs[i])
agent_buffer_trajectory[ObsUtil.get_name_at_next(i)].append(next_obs[i])
agent_buffer_trajectory["critic_obs"].append(exp.collab_obs)
for i in range(num_obs):
ith_team_obs = []
for _team_obs in exp.collab_obs:
# Assume teammates have same obs space
ith_team_obs.append(_team_obs[i])
agent_buffer_trajectory[TeamObsUtil.get_name_at(i)].append(ith_team_obs)
if exp.memory is not None:
agent_buffer_trajectory["memory"].append(exp.memory)

正在加载...
取消
保存