浏览代码

ignore commit

/develop/actionmodel-csharp
Andrew Cohen 4 年前
当前提交
8294bc59
共有 3 个文件被更改,包括 74 次插入34 次删除
  1. 12
      ml-agents-envs/mlagents_envs/base_env.py
  2. 42
      ml-agents/mlagents/trainers/torch/distributions.py
  3. 54
      ml-agents/mlagents/trainers/torch/networks.py

12
ml-agents-envs/mlagents_envs/base_env.py


DISCRETE = 0
CONTINUOUS = 1
continuous_action_shape: int
continuous_action_shape: int
@property
@property
return self.discrete_action_size() + self.continuous_action_size()
return self.discrete_action_size + self.continuous_action_size
@property
def discrete_action_branches(self) -> Optional[Tuple[int, ...]]:

return np.zeros((n_agents, self.discrete_action_size + self.continuous_action_size), dtype=np.float32)
return np.zeros((n_agents, self.action_size), dtype=np.float32)
def create_random_action(self, n_agents: int) -> np.ndarray:
continuous_action = np.random.uniform(

size=(n_agents),
dtype=np.int32,
)
for i in range(self.action_size)
for i in range(self.discrete_action_size)
class BehaviorSpec(NamedTuple):
"""

42
ml-agents/mlagents/trainers/torch/distributions.py


def entropy(self):
return 0.5 * torch.log(2 * math.pi * math.e * self.std + EPSILON)
def action_out(self):
return self.sample()
class TanhGaussianDistInstance(GaussianDistInstance):
def __init__(self, mean, std):

def entropy(self):
return -torch.sum(self.probs * torch.log(self.probs), dim=-1)
def action_out(self):
return self.all_log_prob()
class GaussianDistribution(nn.Module):

distribution = CategoricalDistInstance(norm_logits)
branch_distributions.append(distribution)
return branch_distributions
class HybridDistribution(nn.Module):
def __init__(
self,
hidden_size: int,
continuous_act_size: int,
discrete_act_size: int,
conditional_sigma: bool = False,
tanh_squash: bool = False,
):
self.continuous_distributions: List[GaussianDistribution] = []
self.discrete_distributions: List[MultiCategoricalDistribution] = []
if continuous_act_size > 0:
self.continuous_distributions.append(
GaussianDistribution(
self.encoding_size,
continuous_act_size,
conditional_sigma=conditional_sigma,
tanh_squash=tanh_squash,
)
)
if discrete_act_size > 0:
self.discrete_distributions.append(
MultiCategoricalDistribution(self.encoding_size, discrete_act_size)
)
else:
self.discrete_distribution = None
def forward(self, inputs: torch.Tensor, masks: torch.Tensor) -> List[DistInstance]:
distributions: List[DistInstance] = []
for discrete_dist in self.discrete_distributions:
distributions += discrete_dist(inputs, masks)
for continuous_dist in self.continuous_distributions:
distributions += continuous_dist(inputs)
return distributions

54
ml-agents/mlagents/trainers/torch/networks.py


from mlagents.torch_utils import torch, nn
from mlagents_envs.base_env import ActionType
from mlagents.trainers.torch.distributions import (
GaussianDistribution,
MultiCategoricalDistribution,
DistInstance,
)
from mlagents.trainers.torch.distributions import HybridDistribution, DistInstance
from mlagents.trainers.settings import NetworkSettings
from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.torch.decoders import ValueHeads

"""
pass
class HybridSimpleActor(nn.Module, Actor):
def __init__(
self,

self.discrete_act_size = discrete_act_size
self.continuous_act_size = continuous_act_size
self.version_number = torch.nn.Parameter(torch.Tensor([2.0]))
#self.is_continuous_int = torch.nn.Parameter(
# self.is_continuous_int = torch.nn.Parameter(
#)
self.continuous_act_size_vector = torch.nn.Parameter(torch.Tensor(continuous_act_size))
self.discrete_act_size_vector = torch.nn.Parameter(torch.Tensor(discrete_act_size))
# )
self.continuous_act_size_vector = torch.nn.Parameter(
torch.Tensor(continuous_act_size)
)
self.discrete_act_size_vector = torch.nn.Parameter(
torch.Tensor(discrete_act_size)
)
self.network_body = NetworkBody(observation_shapes, network_settings)
if network_settings.memory is not None:
self.encoding_size = network_settings.memory.memory_size // 2

self.continuous_distribution = GaussianDistribution(
self.encoding_size,
continuous_act_size[0],
conditional_sigma=conditional_sigma,
tanh_squash=tanh_squash,
)
self.discrete_distribution = MultiCategoricalDistribution(
self.encoding_size, discrete_act_size
self.distribution = HybridDistribution(
self.encoding_size,
continuous_act_size[0],
discrete_act_size,
conditional_sigma=conditional_sigma,
tanh_squash=tanh_squash,
)
@property

encoding, memories = self.network_body(
vec_inputs, vis_inputs, memories=memories, sequence_length=sequence_length
)
discrete_dists = self.discrete_distribution(encoding, masks)
continuous_dists = self.continuous_distribution(encoding)
return discrete_dists + continuous_dists, memories
dists = self.distribution(encoding, masks)
return dists, memories
def forward(
self,

Note: This forward() method is required for exporting to ONNX. Don't modify the inputs and outputs.
"""
# TODO: This is bad right now
dists _ = self.get_dists(vec_inputs, vis_inputs, masks, memories, 1)
discrete_dists = dists[0]
continuous_dists = dists[1]
dists, _ = self.get_dists(vec_inputs, vis_inputs, masks, memories, 1)
discrete_action_out = discrete_dists[0].all_log_prob()

self.is_continuous_int,
self.act_size_vector,
)
class HybridSharedActorCritic(HybridSimpleActor, ActorCritic):
def __init__(

memories: Optional[torch.Tensor] = None,
sequence_length: int = 1,
) -> Tuple[List[DistInstance], Dict[str, torch.Tensor], torch.Tensor]:
# TODO: this is just a rehashing of get_dists code
if self.act_type == ActionType.CONTINUOUS:
dists = self.distribution(encoding)
else:
dists = self.distribution(encoding, masks=masks)
dists = self.distribution(encoding, masks)
value_outputs = self.value_heads(encoding)
return dists, value_outputs, memories

else:
mem_out = None
return dists, value_outputs, mem_out
################################################################################
######### Continuous xor Discrete cases ##########

正在加载...
取消
保存