|
|
|
|
|
|
import torch |
|
|
|
from torch import nn |
|
|
|
|
|
|
|
from mlagents.trainers.distributions_torch import GaussianDistribution, CategoricalDistInstance |
|
|
|
from mlagents.trainers.distributions_torch import ( |
|
|
|
GaussianDistribution, |
|
|
|
MultiCategoricalDistribution, |
|
|
|
) |
|
|
|
from mlagents.trainers.exception import UnityTrainerException |
|
|
|
|
|
|
|
ActivationFunction = Callable[[torch.Tensor], torch.Tensor] |
|
|
|
EncoderFunction = Callable[ |
|
|
|
|
|
|
hidden = encoder(vis_input) |
|
|
|
vis_embeds.append(hidden) |
|
|
|
|
|
|
|
#embedding = vec_embeds[0] |
|
|
|
if len(vec_embeds) > 0: |
|
|
|
vec_embeds = torch.stack(vec_embeds, dim=-1).sum(dim=-1) |
|
|
|
if len(vis_embeds) > 0: |
|
|
|
|
|
|
super(ActorCritic, self).__init__() |
|
|
|
self.act_type = ActionType.from_str(act_type) |
|
|
|
self.act_size = act_size |
|
|
|
self.version_number = torch.nn.Parameter(torch.Tensor([2.0])) |
|
|
|
self.memory_size = torch.nn.Parameter(torch.Tensor([0])) |
|
|
|
self.is_continuous_int = torch.nn.Parameter(torch.Tensor([1])) |
|
|
|
self.act_size_vector = torch.nn.Parameter(torch.Tensor(act_size)) |
|
|
|
self.separate_critic = separate_critic |
|
|
|
self.network_body = NetworkBody( |
|
|
|
vector_sizes, |
|
|
|
|
|
|
vec_inputs, vis_inputs, masks, memories, sequence_length |
|
|
|
) |
|
|
|
sampled_actions = self.sample_action(dists) |
|
|
|
return sampled_actions, dists[0].pdf(sampled_actions) |
|
|
|
return sampled_actions, dists[0].pdf(sampled_actions), self.version_number, self.memory_size, self.is_continuous_int, self.act_size_vector |
|
|
|
|
|
|
|
|
|
|
|
class Critic(nn.Module): |
|
|
|