浏览代码

Convert List[np.ndarray] to np.ndarray before using torch.as_tensor (#4183)

Big speedup in visual obs
/develop/add-fire
GitHub 5 年前
当前提交
cde8bd29
共有 4 个文件被更改,包括 28 次插入16 次删除
  1. 13
      ml-agents/mlagents/trainers/models_torch.py
  2. 7
      ml-agents/mlagents/trainers/optimizer/torch_optimizer.py
  3. 1
      ml-agents/mlagents/trainers/policy/nn_policy.py
  4. 23
      ml-agents/mlagents/trainers/ppo/optimizer_torch.py

13
ml-agents/mlagents/trainers/models_torch.py


from enum import Enum
from typing import Callable, NamedTuple
from typing import Callable, NamedTuple, List, Optional
import numpy as np
from mlagents.trainers.distributions_torch import (
GaussianDistribution,

]
EPSILON = 1e-7
def list_to_tensor(
ndarray_list: List[np.ndarray], dtype: Optional[torch.dtype] = None
) -> torch.Tensor:
"""
Converts a list of numpy arrays into a tensor. MUCH faster than
calling as_tensor on the list directly.
"""
return torch.as_tensor(np.asanyarray(ndarray_list), dtype=dtype)
class ActionType(Enum):

7
ml-agents/mlagents/trainers/optimizer/torch_optimizer.py


from mlagents.trainers.optimizer import Optimizer
from mlagents.trainers.settings import TrainerSettings, RewardSignalType
from mlagents.trainers.trajectory import SplitObservations
from mlagents.trainers.models_torch import list_to_tensor
class TorchOptimizer(Optimizer): # pylint: disable=W0223

def get_trajectory_value_estimates(
self, batch: AgentBuffer, next_obs: List[np.ndarray], done: bool
) -> Tuple[Dict[str, np.ndarray], Dict[str, float]]:
vector_obs = [torch.as_tensor(batch["vector_obs"])]
vector_obs = [list_to_tensor(batch["vector_obs"])]
visual_ob = torch.as_tensor(batch["visual_obs%d" % idx])
visual_ob = list_to_tensor(batch["visual_obs%d" % idx])
visual_obs.append(visual_ob)
else:
visual_obs = []

next_obs = np.concatenate(next_obs, axis=-1)
next_obs = [torch.as_tensor(next_obs).unsqueeze(0)]
next_obs = [list_to_tensor(next_obs).unsqueeze(0)]
next_memory = torch.zeros([1, 1, self.policy.m_size])
value_estimates, mean_value = self.policy.actor_critic.critic_pass(

1
ml-agents/mlagents/trainers/policy/nn_policy.py


seed: int,
brain: BrainParameters,
trainer_settings: TrainerSettings,
is_training: bool,
model_path: str,
load: bool,
tanh_squash: bool = False,

23
ml-agents/mlagents/trainers/ppo/optimizer_torch.py


from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.optimizer.torch_optimizer import TorchOptimizer
from mlagents.trainers.settings import TrainerSettings, PPOSettings
from mlagents.trainers.models_torch import list_to_tensor
class TorchPPOOptimizer(TorchOptimizer):

returns = {}
old_values = {}
for name in self.reward_signals:
old_values[name] = torch.as_tensor(batch["{}_value_estimates".format(name)])
returns[name] = torch.as_tensor(batch["{}_returns".format(name)])
old_values[name] = list_to_tensor(batch["{}_value_estimates".format(name)])
returns[name] = list_to_tensor(batch["{}_returns".format(name)])
vec_obs = [torch.as_tensor(batch["vector_obs"])]
act_masks = torch.as_tensor(batch["action_mask"])
vec_obs = [list_to_tensor(batch["vector_obs"])]
act_masks = list_to_tensor(batch["action_mask"])
actions = torch.as_tensor(batch["actions"]).unsqueeze(-1)
actions = list_to_tensor(batch["actions"]).unsqueeze(-1)
actions = torch.as_tensor(batch["actions"], dtype=torch.long)
actions = list_to_tensor(batch["actions"], dtype=torch.long)
torch.as_tensor(batch["memory"][i])
list_to_tensor(batch["memory"][i])
for i in range(0, len(batch["memory"]), self.policy.sequence_length)
]
if len(memories) > 0:

for idx, _ in enumerate(
self.policy.actor_critic.network_body.visual_encoders
):
vis_ob = torch.as_tensor(batch["visual_obs%d" % idx])
vis_ob = list_to_tensor(batch["visual_obs%d" % idx])
vis_obs.append(vis_ob)
else:
vis_obs = []

)
value_loss = self.ppo_value_loss(values, old_values, returns)
policy_loss = self.ppo_policy_loss(
torch.as_tensor(batch["advantages"]),
list_to_tensor(batch["advantages"]),
torch.as_tensor(batch["action_probs"]),
torch.as_tensor(batch["masks"], dtype=torch.int32),
list_to_tensor(batch["action_probs"]),
list_to_tensor(batch["masks"], dtype=torch.int32),
)
loss = (
policy_loss

正在加载...
取消
保存