浏览代码

Using item() in place of to_numpy()

/develop/torch-to-np
vincentpierre 4 年前
当前提交
31750e97
共有 5 个文件被更改,包括 12 次插入16 次删除
  1. 4
      ml-agents/mlagents/trainers/ppo/optimizer_torch.py
  2. 4
      ml-agents/mlagents/trainers/sac/optimizer_torch.py
  3. 2
      ml-agents/mlagents/trainers/torch/components/bc/module.py
  4. 4
      ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py
  5. 14
      ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py

4
ml-agents/mlagents/trainers/ppo/optimizer_torch.py


self.optimizer.step()
update_stats = {
"Losses/Policy Loss": abs(ModelUtils.to_numpy(policy_loss)),
"Losses/Value Loss": ModelUtils.to_numpy(value_loss),
"Losses/Policy Loss": policy_loss.item(),
"Losses/Value Loss": value_loss.item(),
"Policy/Learning Rate": decay_lr,
"Policy/Epsilon": decay_eps,
"Policy/Beta": decay_bet,

4
ml-agents/mlagents/trainers/sac/optimizer_torch.py


# Update target network
self.soft_update(self.policy.actor_critic.critic, self.target_network, self.tau)
update_stats = {
"Losses/Policy Loss": abs(ModelUtils.to_numpy(policy_loss)),
"Losses/Policy Loss": policy_loss.item(),
"Policy/Entropy Coeff": ModelUtils.to_numpy(torch.exp(self._log_ent_coef)),
"Policy/Entropy Coeff": torch.exp(self._log_ent_coef).item(),
"Policy/Learning Rate": decay_lr,
}

2
ml-agents/mlagents/trainers/torch/components/bc/module.py


bc_loss.backward()
self.optimizer.step()
run_out = {"loss": ModelUtils.to_numpy(bc_loss)}
run_out = {"loss": bc_loss.item()}
return run_out

4
ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py


loss.backward()
self.optimizer.step()
return {
"Losses/Curiosity Forward Loss": ModelUtils.to_numpy(forward_loss),
"Losses/Curiosity Inverse Loss": ModelUtils.to_numpy(inverse_loss),
"Losses/Curiosity Forward Loss": forward_loss.item(),
"Losses/Curiosity Inverse Loss": inverse_loss.item(),
}

14
ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py


expert_estimate, expert_mu = self.compute_estimate(
expert_batch, use_vail_noise=True
)
stats_dict["Policy/GAIL Policy Estimate"] = ModelUtils.to_numpy(
policy_estimate.mean()
)
stats_dict["Policy/GAIL Expert Estimate"] = ModelUtils.to_numpy(
expert_estimate.mean()
)
stats_dict["Policy/GAIL Policy Estimate"] = policy_estimate.mean().item()
stats_dict["Policy/GAIL Expert Estimate"] = expert_estimate.mean().item()
stats_dict["Losses/GAIL Loss"] = ModelUtils.to_numpy(discriminator_loss)
stats_dict["Losses/GAIL Loss"] = discriminator_loss.item()
total_loss += discriminator_loss
if self._settings.use_vail:
# KL divergence loss (encourage latent representation to be normal)

torch.tensor(0.0),
)
total_loss += vail_loss
stats_dict["Policy/GAIL Beta"] = ModelUtils.to_numpy(self._beta)
stats_dict["Losses/GAIL KL Loss"] = ModelUtils.to_numpy(kl_loss)
stats_dict["Policy/GAIL Beta"] = self._beta.item()
stats_dict["Losses/GAIL KL Loss"] = kl_loss.item()
if self.gradient_penalty_weight > 0.0:
total_loss += (
self.gradient_penalty_weight

正在加载...
取消
保存