浏览代码

-

/exp-vince
vincentpierre 4 年前
当前提交
181bdec0
共有 8 个文件被更改,包括 26 次插入18 次删除
  1. 4
      ml-agents/mlagents/trainers/ppo/optimizer_torch.py
  2. 10
      ml-agents/mlagents/trainers/sac/optimizer_torch.py
  3. 2
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py
  4. 2
      ml-agents/mlagents/trainers/torch/components/bc/module.py
  5. 4
      ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py
  6. 12
      ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py
  7. 2
      ml-agents/mlagents/trainers/torch/networks.py
  8. 8
      ml-agents/mlagents/trainers/torch/utils.py

4
ml-agents/mlagents/trainers/ppo/optimizer_torch.py


self.optimizer.step()
update_stats = {
"Losses/Policy Loss": policy_loss.item(),
"Losses/Value Loss": value_loss.item(),
"Losses/Policy Loss": ModelUtils.to_item(policy_loss),
"Losses/Value Loss": ModelUtils.to_item(value_loss),
"Policy/Learning Rate": decay_lr,
"Policy/Epsilon": decay_eps,
"Policy/Beta": decay_bet,

10
ml-agents/mlagents/trainers/sac/optimizer_torch.py


# Update target network
self.soft_update(self.policy.actor_critic.critic, self.target_network, self.tau)
update_stats = {
"Losses/Policy Loss": policy_loss.item(),
"Losses/Value Loss": value_loss.item(),
"Losses/Q1 Loss": q1_loss.item(),
"Losses/Q2 Loss": q2_loss.item(),
"Policy/Entropy Coeff": torch.mean(torch.exp(self._log_ent_coef)).item(),
"Losses/Policy Loss": ModelUtils.to_item(policy_loss),
"Losses/Value Loss": ModelUtils.to_item(value_loss),
"Losses/Q1 Loss": ModelUtils.to_item(q1_loss),
"Losses/Q2 Loss": ModelUtils.to_item(q2_loss),
"Policy/Entropy Coeff": ModelUtils.to_item(torch.mean(torch.exp(self._log_ent_coef))),
"Policy/Learning Rate": decay_lr,
}

2
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py


curiosity_rp.update(buffer)
prediction = curiosity_rp._network.predict_action(buffer)[0]
target = torch.tensor(buffer["actions"][0])
error = torch.mean((prediction - target) ** 2).item()
error = ModelUtils.to_item(torch.mean((prediction - target) ** 2))
assert error < 0.001

2
ml-agents/mlagents/trainers/torch/components/bc/module.py


bc_loss.backward()
self.optimizer.step()
run_out = {"loss": bc_loss.item()}
run_out = {"loss": ModelUtils.to_item(bc_loss)}
return run_out

4
ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py


loss.backward()
self.optimizer.step()
return {
"Losses/Curiosity Forward Loss": forward_loss.item(),
"Losses/Curiosity Inverse Loss": inverse_loss.item(),
"Losses/Curiosity Forward Loss": ModelUtils.to_item(forward_loss),
"Losses/Curiosity Inverse Loss": ModelUtils.to_item(inverse_loss),
}
def get_modules(self):

12
ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py


expert_estimate, expert_mu = self.compute_estimate(
expert_batch, use_vail_noise=True
)
stats_dict["Policy/GAIL Policy Estimate"] = policy_estimate.mean().item()
stats_dict["Policy/GAIL Expert Estimate"] = expert_estimate.mean().item()
stats_dict["Policy/GAIL Policy Estimate"] = ModelUtils.to_item(policy_estimate.mean())
stats_dict["Policy/GAIL Expert Estimate"] = ModelUtils.to_item(expert_estimate.mean())
stats_dict["Losses/GAIL Loss"] = discriminator_loss.item()
stats_dict["Losses/GAIL Loss"] = ModelUtils.to_item(discriminator_loss)
total_loss += discriminator_loss
if self._settings.use_vail:
# KL divergence loss (encourage latent representation to be normal)

torch.tensor(0.0),
)
total_loss += vail_loss
stats_dict["Policy/GAIL Beta"] = self._beta.item()
stats_dict["Losses/GAIL KL Loss"] = kl_loss.item()
stats_dict["Policy/GAIL Beta"] = ModelUtils.to_item(self._beta)
stats_dict["Losses/GAIL KL Loss"] = ModelUtils.to_item(kl_loss)
stats_dict["Policy/GAIL Grad Mag Loss"] = gradient_magnitude_loss.item()
stats_dict["Policy/GAIL Grad Mag Loss"] = ModelUtils.to_item(gradient_magnitude_loss)
total_loss += gradient_magnitude_loss
return total_loss, stats_dict

2
ml-agents/mlagents/trainers/torch/networks.py


@property
def current_step(self):
return int(self.__global_step.item())
return int(ModelUtils.to_item(self.__global_step))
@current_step.setter
def current_step(self, value):

8
ml-agents/mlagents/trainers/torch/utils.py


return tensor.detach().cpu().numpy()
@staticmethod
def to_item(tensor: torch.Tensor) -> float:
"""
Converts a Torch Tensor to a float array. If the Tensor is on the GPU, it will
be brought to the CPU.
"""
return tensor.detach().cpu().item()
@staticmethod
def break_into_branches(
concatenated_logits: torch.Tensor, action_size: List[int]
) -> List[torch.Tensor]:

正在加载...
取消
保存