浏览代码

Visual observations now train as well

/develop/add-fire
Arthur Juliani 4 年前
当前提交
5f936990
共有 4 个文件被更改,包括 27 次插入11 次删除
  1. 4
      ml-agents/mlagents/trainers/distributions_torch.py
  2. 22
      ml-agents/mlagents/trainers/models_torch.py
  3. 7
      ml-agents/mlagents/trainers/optimizer/torch_optimizer.py
  4. 5
      ml-agents/mlagents/trainers/ppo/optimizer_torch.py

4
ml-agents/mlagents/trainers/distributions_torch.py


from torch import distributions
import numpy as np
EPSILON = 1e-6 # Small value to avoid divide by zero
EPSILON = 1e-7 # Small value to avoid divide by zero
class GaussianDistribution(nn.Module):

def mask_branch(self, logits, mask):
raw_probs = torch.nn.functional.softmax(logits, dim=-1) * mask
normalized_probs = raw_probs / torch.sum(raw_probs, dim=-1).unsqueeze(-1)
normalized_logits = torch.log(normalized_probs)
normalized_logits = torch.log(normalized_probs + EPSILON)
return normalized_logits
def split_masks(self, masks):

22
ml-agents/mlagents/trainers/models_torch.py


visual_encoder = ModelUtils.get_encoder_for_type(vis_encode_type)
for vector_size in vector_sizes:
self.vector_normalizers.append(Normalizer(vector_size))
self.vector_encoders.append(VectorEncoder(vector_size, h_size, num_layers))
if vector_size != 0:
self.vector_normalizers.append(Normalizer(vector_size))
self.vector_encoders.append(
VectorEncoder(vector_size, h_size, num_layers)
)
self.visual_encoders.append(visual_encoder(visual_size))
self.visual_encoders.append(
visual_encoder(visual_size.num_channels, h_size)
)
self.vector_encoders = nn.ModuleList(self.vector_encoders)
self.visual_encoders = nn.ModuleList(self.visual_encoders)

vis_embeds = []
for idx, encoder in enumerate(self.visual_encoders):
hidden = encoder(vis_inputs[idx])
vis_input = vis_inputs[idx]
vis_input = vis_input.permute([0, 3, 1, 2])
hidden = encoder(vis_input)
vis_embeds.append(hidden)
if len(vec_embeds) > 0:

class SimpleVisualEncoder(nn.Module):
def __init__(self, initial_channels):
def __init__(self, initial_channels, output_size):
self.h_size = output_size
self.dense = nn.Linear(1728, self.h_size)
return torch.flatten(conv_2)
hidden = self.dense(conv_2.reshape([-1, 1728]))
return hidden
class NatureVisualEncoder(nn.Module):

7
ml-agents/mlagents/trainers/optimizer/torch_optimizer.py


) -> Tuple[Dict[str, np.ndarray], Dict[str, float]]:
vector_obs = [torch.Tensor(np.array(batch["vector_obs"]))]
if self.policy.use_vis_obs:
visual_obs = batch["visual_obs"]
visual_obs = []
for idx, _ in enumerate(self.policy.actor.network_body.visual_encoders):
visual_ob = torch.Tensor(np.array(batch["visual_obs%d" % idx]))
visual_obs.append(visual_ob)
next_obs = [torch.Tensor(next_obs)]
next_obs = [torch.Tensor(next_obs).unsqueeze(0)]
value_estimates, mean_value = self.policy.critic(vector_obs, visual_obs)

5
ml-agents/mlagents/trainers/ppo/optimizer_torch.py


actions = list(actions[0].permute([1, 0]))
if self.policy.use_vis_obs:
vis_obs = np.array(batch["visual_obs"])
vis_obs = []
for idx, _ in enumerate(self.policy.actor.network_body.visual_encoders):
vis_ob = torch.Tensor(np.array(batch["visual_obs%d" % idx]))
vis_obs.append(vis_ob)
else:
vis_obs = []
_, log_probs, entropy, values = self.policy.execute_model(

正在加载...
取消
保存