浏览代码

Support discrete actions as well

/develop/add-fire
Arthur Juliani 5 年前
当前提交
4a50444f
共有 3 个文件被更改,包括 16 次插入11 次删除
  1. 13
      ml-agents/mlagents/trainers/distributions_torch.py
  2. 10
      ml-agents/mlagents/trainers/policy/torch_policy.py
  3. 4
      ml-agents/mlagents/trainers/ppo/optimizer_torch.py

13
ml-agents/mlagents/trainers/distributions_torch.py


branches = []
for size in act_sizes:
branch_output_layer = nn.Linear(hidden_size, size)
nn.init.xavier_uniform(branch_output_layer.weight, gain=0.01)
nn.init.xavier_uniform_(branch_output_layer.weight, gain=0.01)
return branches
return nn.ModuleList(branches)
raw_probs = torch.sigmoid(logits, dim=-1) * mask
normalized_probs = raw_probs / torch.sum(raw_probs, dim=-1)
raw_probs = torch.nn.functional.softmax(logits, dim=-1) * mask
normalized_probs = raw_probs / torch.sum(raw_probs, dim=-1).unsqueeze(-1)
# Todo - Support multiple branches in mask code
for idx, branch in enumerate(self.branches):
for branch in self.branches:
norm_logits = self.mask_branch(logits, masks[idx])
norm_logits = self.mask_branch(logits, masks)
distribution = distributions.categorical.Categorical(logits=norm_logits)
branch_distributions.append(distribution)
return branch_distributions

10
ml-agents/mlagents/trainers/policy/torch_policy.py


from mlagents.trainers.policy.policy import UnityPolicyException
from mlagents.trainers.trajectory import SplitObservations
from mlagents.trainers.brain import BrainParameters
from mlagents.trainers.models_torch import ActionType, EncoderType, Actor, Critic
from mlagents.trainers.models_torch import EncoderType, Actor, Critic
EPSILON = 1e-7 # Small value to avoid divide by zero

self.global_step = 0
self.act_size = brain.vector_action_space_size
self.act_type = brain.vector_action_space_type
self.sequence_length = 1
if self.use_recurrent:
self.m_size = trainer_params["memory_size"]

self.actor = Actor(
h_size=int(trainer_params["hidden_units"]),
act_type=ActionType.CONTINUOUS,
act_type=self.act_type,
act_size=sum(brain.vector_action_space_size),
act_size=brain.vector_action_space_size,
normalize=trainer_params["normalize"],
num_layers=int(trainer_params["num_layers"]),
m_size=trainer_params["memory_size"],

actions.append(action)
log_probs.append(action_dist.log_prob(action))
entropies.append(action_dist.entropy())
actions = torch.stack(actions).squeeze(0)
actions = torch.stack(actions)
log_probs = torch.stack(log_probs).squeeze(0)
entropies = torch.stack(entropies).squeeze(0)

vec_obs, vis_obs, masks = self.split_decision_step(decision_requests)
vec_obs = [torch.Tensor(vec_obs)]
vis_obs = [torch.Tensor(vis_ob) for vis_ob in vis_obs]
masks = torch.Tensor(masks)
run_out = {}
action, log_probs, entropy, value_heads = self.execute_model(
vec_obs, vis_obs, masks

4
ml-agents/mlagents/trainers/ppo/optimizer_torch.py


vec_obs = np.array(batch["vector_obs"])
vec_obs = [torch.Tensor(vec_obs)]
act_masks = torch.Tensor(np.array(batch["action_mask"]))
vec_obs, vis_obs
vec_obs, vis_obs, act_masks
)
value_loss = self.ppo_value_loss(values, old_values, returns)
policy_loss = self.ppo_policy_loss(

正在加载...
取消
保存