浏览代码

torch utils to and from buffer

/develop/use-action-buffers
Andrew Cohen 4 年前
当前提交
eaecb59e
共有 9 个文件被更改,包括 91 次插入28 次删除
  1. 10
      ml-agents-envs/mlagents_envs/base_env.py
  2. 3
      ml-agents/mlagents/trainers/agent_processor.py
  3. 2
      ml-agents/mlagents/trainers/policy/policy.py
  4. 39
      ml-agents/mlagents/trainers/policy/torch_policy.py
  5. 4
      ml-agents/mlagents/trainers/ppo/optimizer_torch.py
  6. 9
      ml-agents/mlagents/trainers/tests/simple_test_envs.py
  7. 18
      ml-agents/mlagents/trainers/torch/action_model.py
  8. 4
      ml-agents/mlagents/trainers/torch/networks.py
  9. 30
      ml-agents/mlagents/trainers/torch/utils.py

10
ml-agents-envs/mlagents_envs/base_env.py


BehaviorName = str
class ActionBuffer(NamedTuple):
class ActionBuffers(NamedTuple):
"""
Contains continuous and discrete actions as numpy arrays.
"""

return self.discrete_action_size + self.continuous_action_size
def create_empty_action(self, n_agents: int) -> Tuple[np.ndarray, np.ndarray]:
return ActionBuffer(
return ActionBuffers(
np.zeros((n_agents, self.continuous_action_size), dtype=np.float32),
np.zeros((n_agents, self.discrete_action_size), dtype=np.int32),
)

for i in range(self.discrete_action_size)
]
)
return ActionBuffer(continuous_action, discrete_action)
return ActionBuffers(continuous_action, discrete_action)
class BehaviorSpec(NamedTuple):
observation_shapes: List[Tuple]

@abstractmethod
def set_actions(
self, behavior_name: BehaviorName, action: Union[ActionBuffer, np.ndarray]
self, behavior_name: BehaviorName, action: Union[ActionBuffers, np.ndarray]
) -> None:
"""
Sets the action for all of the agents in the simulation for the next

self,
behavior_name: BehaviorName,
agent_id: AgentId,
action: Union[ActionBuffer, np.ndarray],
action: Union[ActionBuffers, np.ndarray],
) -> None:
"""
Sets the action for one of the agents in the simulation for the next

3
ml-agents/mlagents/trainers/agent_processor.py


action_pre = None
action_probs = stored_take_action_outputs["log_probs"][idx]
action_mask = stored_decision_step.action_mask
prev_action = self.policy.retrieve_previous_action([global_id])[0, :]
#prev_action = self.policy.retrieve_previous_action([global_id])[0, :]
prev_action = self.policy.retrieve_previous_action([global_id])
experience = AgentExperience(
obs=obs,
reward=step.reward,

2
ml-agents/mlagents/trainers/policy/policy.py


for index, agent_id in enumerate(agent_ids):
if agent_id in self.previous_action_dict:
action_matrix[index, :] = self.previous_action_dict[agent_id]
return action_matrix
return action_matrix[0, :]
def remove_previous_action(self, agent_ids):
for agent_id in agent_ids:

39
ml-agents/mlagents/trainers/policy/torch_policy.py


from mlagents.trainers.action_info import ActionInfo
from mlagents.trainers.behavior_id_utils import get_global_agent_id
from mlagents.trainers.policy import Policy
from mlagents_envs.base_env import DecisionSteps, BehaviorSpec
from mlagents_envs.base_env import DecisionSteps, BehaviorSpec, ActionBuffers
from mlagents_envs.timers import timed
from mlagents.trainers.settings import TrainerSettings

GlobalSteps()
) # could be much simpler if TorchPolicy is nn.Module
self.grads = None
self.previous_action_dict: Dict[str, ActionBuffers] = {}
reward_signal_configs = trainer_settings.reward_signals
reward_signal_names = [key.value for key, _ in reward_signal_configs.items()]

self,
vec_obs: torch.Tensor,
vis_obs: torch.Tensor,
actions: torch.Tensor,
actions: List[torch.Tensor],
masks: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
seq_len: int = 1,

vec_obs, vis_obs, masks=masks, memories=memories
)
# Todo - make pre_action difference
run_out["pre_action"] = ModelUtils.to_numpy(action)
run_out["action"] = ModelUtils.to_numpy(action)
run_out["action"] = ModelUtils.to_action_buffers(action, self.action_spec)
run_out["pre_action"] = ModelUtils.to_action_buffers(action, self.action_spec)
run_out["log_probs"] = ModelUtils.to_numpy(log_probs)
run_out["entropy"] = ModelUtils.to_numpy(entropy)
run_out["value_heads"] = {

def get_modules(self):
return {"Policy": self.actor_critic, "global_step": self.global_step}
# Overriding for use of ActionBuffers in torch
def make_empty_previous_action(self, num_agents):
"""
Creates empty previous action for use with RNNs and discrete control
:param num_agents: Number of agents.
:return: Numpy array of zeros.
"""
return self.action_spec.create_empty_action(num_agents)
def save_previous_action(
self, agent_ids: List[str], action_matrix: ActionBuffers) -> None:
if action_matrix is None:
return
for index, agent_id in enumerate(agent_ids):
self.previous_action_dict[agent_id] = action_matrix
def retrieve_previous_action(self, agent_ids: List[str]) -> np.ndarray:
action_matrix = self.action_spec.create_empty_action(len(agent_ids))
for index, agent_id in enumerate(agent_ids):
if agent_id in self.previous_action_dict:
action_matrix = self.previous_action_dict[agent_id]
return action_matrix
def remove_previous_action(self, agent_ids):
for agent_id in agent_ids:
if agent_id in self.previous_action_dict:
self.previous_action_dict.pop(agent_id)

4
ml-agents/mlagents/trainers/ppo/optimizer_torch.py


vec_obs = [ModelUtils.list_to_tensor(batch["vector_obs"])]
act_masks = ModelUtils.list_to_tensor(batch["action_mask"])
actions = ModelUtils.list_to_tensor(batch["actions"]).unsqueeze(-1)
#discrete_actions = ModelUtils.list_to_tensor(batch["actions"][self.policy.continuous_act_size:], dtype=torch.long)
#actions = ModelUtils.list_to_tensor(batch["actions"]).unsqueeze(-1)
actions = ModelUtils.action_buffers_to_tensor_list(batch["actions"], self.policy.action_spec)
memories = [
ModelUtils.list_to_tensor(batch["memory"][i])

9
ml-agents/mlagents/trainers/tests/simple_test_envs.py


ActionType,
BehaviorMapping,
BehaviorName,
ActionBuffer,
ActionBuffers,
)
from mlagents_envs.tests.test_rpc_utils import proto_from_steps_and_action
from mlagents_envs.communicator_objects.agent_info_action_pair_pb2 import (

def set_actions(self, behavior_name: BehaviorName, action) -> None:
# print(action, self.goal[behavior_name])
continuous_action = action[:, : self.continuous_action_size]
discrete_action = action[:, self.continuous_action_size :]
#continuous_action = action[:, : self.continuous_action_size]
#discrete_action = action[:, self.continuous_action_size :]
continuous_action = action.continuous
discrete_action = action.discrete
self.continuous_env.set_actions(behavior_name, continuous_action)
self.discrete_env.set_actions(behavior_name, discrete_action)

18
ml-agents/mlagents/trainers/torch/action_model.py


distribution_instances.append(dist_instance)
return distribution_instances
def evaluate(self, inputs: torch.Tensor, masks: torch.Tensor, actions: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
def evaluate(self, inputs: torch.Tensor, masks: torch.Tensor, actions: List[torch.Tensor]) -> Tuple[torch.Tensor, torch.Tensor]:
split_actions = torch.split(actions, self._split_list, dim=1)
action_lists : List[torch.Tensor] = []
for split_action in split_actions:
action_list = [split_action[..., i] for i in range(split_action.shape[-1])]
action_lists += action_list
#split_actions = torch.split(actions, self._split_list, dim=1)
#action_lists : List[torch.Tensor] = []
#for split_action in split_actions:
# action_list = [split_action[..., i] for i in range(split_action.shape[-1])]
# action_lists += action_list
log_probs, entropies, _ = ModelUtils.get_probs_and_entropy(action_lists, dists)
return log_probs, entropies

def forward(self, inputs: torch.Tensor, masks: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
def forward(self, inputs: torch.Tensor, masks: torch.Tensor) -> Tuple[List[torch.Tensor], torch.Tensor, torch.Tensor]:
dists = self._get_dists(inputs, masks)
action_outs : List[torch.Tensor] = []
action_lists = self._sample_action(dists)

log_probs, entropies, _ = ModelUtils.get_probs_and_entropy(action_lists, dists)
action = torch.cat(action_outs, dim=1)
return (action, log_probs, entropies)
#action = torch.cat(action_outs, dim=1)
return (action_outs, log_probs, entropies)

4
ml-agents/mlagents/trainers/torch/networks.py


masks: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
sequence_length: int = 1,
) -> Tuple[List[DistInstance], List[DistInstance], Dict[str, torch.Tensor], torch.Tensor]:
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, Dict[str, torch.Tensor], torch.Tensor]:
"""
Returns distributions, from which actions can be sampled, and value estimates.
If memory is enabled, return the memories as well.

self,
vec_inputs: List[torch.Tensor],
vis_inputs: List[torch.Tensor],
actions: torch.Tensor,
actions: List[torch.Tensor],
masks: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
sequence_length: int = 1,

30
ml-agents/mlagents/trainers/torch/utils.py


)
from mlagents.trainers.settings import EncoderType, ScheduleType
from mlagents.trainers.exception import UnityTrainerException
from mlagents_envs.base_env import BehaviorSpec
from mlagents_envs.base_env import BehaviorSpec, ActionSpec, ActionBuffers
from mlagents.trainers.torch.distributions import DistInstance, DiscreteDistInstance

nn.ModuleList(vector_encoders),
total_processed_size,
)
@staticmethod
def to_action_buffers(actions: List[torch.Tensor], action_spec : ActionSpec) -> ActionBuffers:
"""
Converts a list of action Tensors to an ActionBuffers tuple. Implicitly
assumes order of actions in 'actions' is continuous, discrete
"""
continuous_action = None
discrete_action = []
# offset to index discrete actions depending on presence of continuous actions
_offset = 0
if action_spec.continuous_action_size > 0:
continuous_action = actions[0].detach().cpu().numpy()
_offset = 1
if action_spec.discrete_action_size > 0:
for _disc in range(action_spec.discrete_action_size):
discrete_action.append(actions[_disc + _offset].detach().cpu().numpy())
return ActionBuffers(continuous_action, discrete_action)
@staticmethod
def action_buffers_to_tensor_list(
action_buffers : ActionBuffers, action_spec : ActionSpec, dtype: Optional[torch.dtype] = None
) -> List[torch.Tensor]:
"""
Converts an ActionBuffer of numpy arrays into a List of tensors.
"""
print(action_buffers)
#return torch.as_tensor(np.asanyarray(ndarray_list), dtype=dtype)
@staticmethod
def list_to_tensor(

正在加载...
取消
保存