浏览代码

move AgentAction, ActionLogProbs, and ActionFlattener to separate files

/develop/actionmodel-csharp
Andrew Cohen 4 年前
当前提交
17496265
共有 10 个文件被更改,包括 28 次插入182 次删除
  1. 4
      ml-agents/mlagents/trainers/policy/torch_policy.py
  2. 4
      ml-agents/mlagents/trainers/ppo/optimizer_torch.py
  3. 4
      ml-agents/mlagents/trainers/sac/optimizer_torch.py
  4. 8
      ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py
  5. 4
      ml-agents/mlagents/trainers/torch/action_model.py
  6. 4
      ml-agents/mlagents/trainers/torch/components/bc/module.py
  7. 6
      ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py
  8. 6
      ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py
  9. 4
      ml-agents/mlagents/trainers/torch/networks.py
  10. 166
      ml-agents/mlagents/trainers/torch/utils.py

4
ml-agents/mlagents/trainers/policy/torch_policy.py


GlobalSteps,
)
from mlagents.trainers.torch.utils import ModelUtils, AgentAction, ActionLogProbs
from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.torch.agent_action import AgentAction
from mlagents.trainers.torch.action_log_probs import ActionLogProbs
EPSILON = 1e-7 # Small value to avoid divide by zero

4
ml-agents/mlagents/trainers/ppo/optimizer_torch.py


from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.optimizer.torch_optimizer import TorchOptimizer
from mlagents.trainers.settings import TrainerSettings, PPOSettings
from mlagents.trainers.torch.utils import ModelUtils, AgentAction, ActionLogProbs
from mlagents.trainers.torch.agent_action import AgentAction
from mlagents.trainers.torch.action_log_probs import ActionLogProbs
from mlagents.trainers.torch.utils import ModelUtils
class TorchPPOOptimizer(TorchOptimizer):

4
ml-agents/mlagents/trainers/sac/optimizer_torch.py


from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.settings import NetworkSettings
from mlagents.trainers.torch.networks import ValueNetwork
from mlagents.trainers.torch.utils import ModelUtils, AgentAction, ActionLogProbs
from mlagents.trainers.torch.agent_action import AgentAction
from mlagents.trainers.torch.action_log_probs import ActionLogProbs
from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.buffer import AgentBuffer
from mlagents_envs.timers import timed
from mlagents.trainers.exception import UnityTrainerException

8
ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py


@pytest.mark.parametrize("action_sizes", [(0, 2), (2, 0)])
def test_2d_ppo(action_sizes):
env = SimpleEnvironment(
[BRAIN_NAME], action_sizes=action_sizes, action_size=2, step_size=0.8
)
env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes, step_size=0.8)
new_hyperparams = attr.evolve(
PPO_TORCH_CONFIG.hyperparameters, batch_size=64, buffer_size=640
)

@pytest.mark.parametrize("action_sizes", [(0, 2), (2, 0)])
def test_2d_sac(action_sizes):
env = SimpleEnvironment(
[BRAIN_NAME], action_sizes=action_sizes, action_size=2, step_size=0.8
)
env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes, step_size=0.8)
new_hyperparams = attr.evolve(
SAC_TORCH_CONFIG.hyperparameters, buffer_init_steps=2000
)

4
ml-agents/mlagents/trainers/torch/action_model.py


GaussianDistribution,
MultiCategoricalDistribution,
)
from mlagents.trainers.torch.utils import AgentAction, ActionLogProbs
from mlagents.trainers.torch.agent_action import AgentAction
from mlagents.trainers.torch.action_log_probs import ActionLogProbs
from mlagents_envs.base_env import ActionSpec
EPSILON = 1e-7 # Small value to avoid divide by zero

4
ml-agents/mlagents/trainers/torch/components/bc/module.py


from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.demo_loader import demo_to_buffer
from mlagents.trainers.settings import BehavioralCloningSettings, ScheduleType
from mlagents.trainers.torch.utils import ModelUtils, AgentAction, ActionLogProbs
from mlagents.trainers.torch.agent_action import AgentAction
from mlagents.trainers.torch.action_log_probs import ActionLogProbs
from mlagents.trainers.torch.utils import ModelUtils
class BCModule:

6
ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py


from mlagents.trainers.settings import CuriositySettings
from mlagents_envs.base_env import BehaviorSpec
from mlagents.trainers.torch.utils import ModelUtils, AgentAction
from mlagents.trainers.torch.agent_action import AgentAction
from mlagents.trainers.torch.action_flattener import ActionFlattener
from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.torch.networks import NetworkBody
from mlagents.trainers.torch.layers import LinearEncoder, linear_layer
from mlagents.trainers.settings import NetworkSettings, EncoderType

specs.observation_shapes, state_encoder_settings
)
self._action_flattener = ModelUtils.ActionFlattener(self._action_spec)
self._action_flattener = ActionFlattener(self._action_spec)
self.inverse_model_action_encoding = torch.nn.Sequential(
LinearEncoder(2 * settings.encoding_size, 1, 256)

6
ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py


)
from mlagents.trainers.settings import GAILSettings
from mlagents_envs.base_env import BehaviorSpec
from mlagents.trainers.torch.utils import ModelUtils, AgentAction
from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.torch.agent_action import AgentAction
from mlagents.trainers.torch.action_flattener import ActionFlattener
from mlagents.trainers.torch.networks import NetworkBody
from mlagents.trainers.torch.layers import linear_layer, Initialization
from mlagents.trainers.settings import NetworkSettings, EncoderType

vis_encode_type=EncoderType.SIMPLE,
memory=None,
)
self._action_flattener = ModelUtils.ActionFlattener(specs.action_spec)
self._action_flattener = ActionFlattener(specs.action_spec)
unencoded_size = (
self._action_flattener.flattened_size + 1 if settings.use_actions else 0
) # +1 is for dones

4
ml-agents/mlagents/trainers/torch/networks.py


from mlagents_envs.base_env import ActionSpec
from mlagents.trainers.torch.action_model import ActionModel
from mlagents.trainers.torch.agent_action import AgentAction
from mlagents.trainers.torch.action_log_probs import ActionLogProbs
from mlagents.trainers.torch.utils import ModelUtils, AgentAction, ActionLogProbs
from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.torch.decoders import ValueHeads
from mlagents.trainers.torch.layers import LSTM, LinearEncoder
from mlagents.trainers.torch.model_serialization import exporting_to_onnx

166
ml-agents/mlagents/trainers/torch/utils.py


from typing import List, Optional, Tuple, NamedTuple, Dict
from typing import List, Optional, Tuple
from mlagents.torch_utils import torch, nn
import numpy as np

)
from mlagents.trainers.settings import EncoderType, ScheduleType
from mlagents.trainers.exception import UnityTrainerException
from mlagents_envs.base_env import ActionSpec
class AgentAction(NamedTuple):
"""
A NamedTuple containing the tensor for continuous actions and list of tensors for
discrete actions. Utility functions provide numpy <=> tensor conversions to be
sent as actions to the environment manager as well as used by the optimizers.
:param continuous_tensor: Torch tensor corresponding to continuous actions
:param discrete_list: List of Torch tensors each corresponding to discrete actions
"""
continuous_tensor: torch.Tensor
discrete_list: Optional[List[torch.Tensor]]
@property
def discrete_tensor(self):
"""
Returns the discrete action list as a stacked tensor
"""
return torch.stack(self.discrete_list, dim=-1)
def to_numpy_dict(self) -> Dict[str, np.ndarray]:
"""
Returns a Dict of np arrays with an entry correspinding to the continuous action
and an entry corresponding to the discrete action. "continuous_action" and
"discrete_action" are added to the agents buffer individually to maintain a flat buffer.
"""
array_dict: Dict[str, np.ndarray] = {}
if self.continuous_tensor is not None:
array_dict["continuous_action"] = ModelUtils.to_numpy(
self.continuous_tensor
)
if self.discrete_list is not None:
array_dict["discrete_action"] = ModelUtils.to_numpy(
self.discrete_tensor[:, 0, :]
)
return array_dict
@staticmethod
def from_dict(buff: Dict[str, np.ndarray]) -> "AgentAction":
"""
A static method that accesses continuous and discrete action fields in an AgentBuffer
and constructs the corresponding AgentAction from the retrieved np arrays.
"""
continuous: torch.Tensor = None
discrete: List[torch.Tensor] = None # type: ignore
if "continuous_action" in buff:
continuous = ModelUtils.list_to_tensor(buff["continuous_action"])
if "discrete_action" in buff:
discrete_tensor = ModelUtils.list_to_tensor(
buff["discrete_action"], dtype=torch.long
)
discrete = [
discrete_tensor[..., i] for i in range(discrete_tensor.shape[-1])
]
return AgentAction(continuous, discrete)
class ActionLogProbs(NamedTuple):
"""
A NamedTuple containing the tensor for continuous log probs and list of tensors for
discrete log probs of individual actions as well as all the log probs for an entire branch.
Utility functions provide numpy <=> tensor conversions to be used by the optimizers.
:param continuous_tensor: Torch tensor corresponding to log probs of continuous actions
:param discrete_list: List of Torch tensors each corresponding to log probs of the discrete actions that were
sampled.
:param all_discrete_list: List of Torch tensors each corresponding to all log probs of
a discrete action branch, even the discrete actions that were not sampled. all_discrete_list is a list of Tensors,
each Tensor corresponds to one discrete branch log probabilities.
"""
continuous_tensor: torch.Tensor
discrete_list: Optional[List[torch.Tensor]]
all_discrete_list: Optional[List[torch.Tensor]]
@property
def discrete_tensor(self):
"""
Returns the discrete log probs list as a stacked tensor
"""
return torch.stack(self.discrete_list, dim=-1)
@property
def all_discrete_tensor(self):
"""
Returns the discrete log probs of each branch as a tensor
"""
return torch.cat(self.all_discrete_list, dim=1)
def to_numpy_dict(self) -> Dict[str, np.ndarray]:
"""
Returns a Dict of np arrays with an entry correspinding to the continuous log probs
and an entry corresponding to the discrete log probs. "continuous_log_probs" and
"discrete_log_probs" are added to the agents buffer individually to maintain a flat buffer.
"""
array_dict: Dict[str, np.ndarray] = {}
if self.continuous_tensor is not None:
array_dict["continuous_log_probs"] = ModelUtils.to_numpy(
self.continuous_tensor
)
if self.discrete_list is not None:
array_dict["discrete_log_probs"] = ModelUtils.to_numpy(self.discrete_tensor)
return array_dict
def _to_tensor_list(self) -> List[torch.Tensor]:
"""
Returns the tensors in the ActionLogProbs as a flat List of torch Tensors. This
is private and serves as a utility for self.flatten()
"""
tensor_list: List[torch.Tensor] = []
if self.continuous_tensor is not None:
tensor_list.append(self.continuous_tensor)
if self.discrete_list is not None:
tensor_list.append(self.discrete_tensor)
return tensor_list
def flatten(self) -> torch.Tensor:
"""
A utility method that returns all log probs in ActionLogProbs as a flattened tensor.
This is useful for algorithms like PPO which can treat all log probs in the same way.
"""
return torch.cat(self._to_tensor_list(), dim=1)
@staticmethod
def from_dict(buff: Dict[str, np.ndarray]) -> "ActionLogProbs":
"""
A static method that accesses continuous and discrete log probs fields in an AgentBuffer
and constructs the corresponding ActionLogProbs from the retrieved np arrays.
"""
continuous: torch.Tensor = None
discrete: List[torch.Tensor] = None # type: ignore
if "continuous_log_probs" in buff:
continuous = ModelUtils.list_to_tensor(buff["continuous_log_probs"])
if "discrete_log_probs" in buff:
discrete_tensor = ModelUtils.list_to_tensor(buff["discrete_log_probs"])
discrete = [
discrete_tensor[..., i] for i in range(discrete_tensor.shape[-1])
]
return ActionLogProbs(continuous, discrete, None)
class ModelUtils:

EncoderType.NATURE_CNN: 36,
EncoderType.RESNET: 15,
}
class ActionFlattener:
def __init__(self, action_spec: ActionSpec):
self._specs = action_spec
@property
def flattened_size(self) -> int:
return self._specs.continuous_size + sum(self._specs.discrete_branches)
def forward(self, action: AgentAction) -> torch.Tensor:
action_list: List[torch.Tensor] = []
if self._specs.continuous_size > 0:
action_list.append(action.continuous_tensor)
if self._specs.discrete_size > 0:
flat_discrete = torch.cat(
ModelUtils.actions_to_onehot(
torch.as_tensor(action.discrete_tensor, dtype=torch.long),
self._specs.discrete_branches,
),
dim=1,
)
action_list.append(flat_discrete)
return torch.cat(action_list, dim=1)
@staticmethod
def update_learning_rate(optim: torch.optim.Optimizer, lr: float) -> None:

正在加载...
取消
保存