浏览代码

Replace torch.detach().cpu().numpy() with a utils method

/develop/torch-to-np
vincentpierre 4 年前
当前提交
108fac9a
共有 9 个文件被更改,包括 38 次插入35 次删除
  1. 4
      ml-agents/mlagents/trainers/optimizer/torch_optimizer.py
  2. 12
      ml-agents/mlagents/trainers/policy/torch_policy.py
  3. 4
      ml-agents/mlagents/trainers/ppo/optimizer_torch.py
  4. 13
      ml-agents/mlagents/trainers/sac/optimizer_torch.py
  5. 5
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py
  6. 2
      ml-agents/mlagents/trainers/torch/components/bc/module.py
  7. 6
      ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py
  8. 19
      ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py
  9. 8
      ml-agents/mlagents/trainers/torch/utils.py

4
ml-agents/mlagents/trainers/optimizer/torch_optimizer.py


)
for name, estimate in value_estimates.items():
value_estimates[name] = estimate.detach().cpu().numpy()
next_value_estimate[name] = next_value_estimate[name].detach().cpu().numpy()
value_estimates[name] = ModelUtils.to_numpy(estimate)
next_value_estimate[name] = ModelUtils.to_numpy(next_value_estimate[name])
if done:
for k in next_value_estimate:

12
ml-agents/mlagents/trainers/policy/torch_policy.py


action, log_probs, entropy, value_heads, memories = self.sample_actions(
vec_obs, vis_obs, masks=masks, memories=memories
)
run_out["action"] = action.detach().cpu().numpy()
run_out["pre_action"] = action.detach().cpu().numpy()
run_out["action"] = ModelUtils.to_numpy(action)
run_out["pre_action"] = ModelUtils.to_numpy(action)
run_out["log_probs"] = log_probs.detach().cpu().numpy()
run_out["entropy"] = entropy.detach().cpu().numpy()
run_out["log_probs"] = ModelUtils.to_numpy(log_probs)
run_out["entropy"] = ModelUtils.to_numpy(entropy)
name: t.detach().cpu().numpy() for name, t in value_heads.items()
name: ModelUtils.to_numpy(t) for name, t in value_heads.items()
run_out["memory_out"] = memories.detach().cpu().numpy().squeeze(0)
run_out["memory_out"] = ModelUtils.to_numpy(memories).squeeze(0)
return run_out
def get_action(

4
ml-agents/mlagents/trainers/ppo/optimizer_torch.py


self.optimizer.step()
update_stats = {
"Losses/Policy Loss": abs(policy_loss.detach().cpu().numpy()),
"Losses/Value Loss": value_loss.detach().cpu().numpy(),
"Losses/Policy Loss": abs(ModelUtils.to_numpy(policy_loss)),
"Losses/Value Loss": ModelUtils.to_numpy(value_loss),
"Policy/Learning Rate": decay_lr,
"Policy/Epsilon": decay_eps,
"Policy/Beta": decay_bet,

13
ml-agents/mlagents/trainers/sac/optimizer_torch.py


# Update target network
self.soft_update(self.policy.actor_critic.critic, self.target_network, self.tau)
update_stats = {
"Losses/Policy Loss": abs(policy_loss.detach().cpu().numpy()),
"Losses/Value Loss": value_loss.detach().cpu().numpy(),
"Losses/Q1 Loss": q1_loss.detach().cpu().numpy(),
"Losses/Q2 Loss": q2_loss.detach().cpu().numpy(),
"Policy/Entropy Coeff": torch.exp(self._log_ent_coef)
.detach()
.cpu()
.numpy(),
"Losses/Policy Loss": abs(ModelUtils.to_numpy(policy_loss)),
"Losses/Value Loss": ModelUtils.to_numpy(value_loss),
"Losses/Q1 Loss": ModelUtils.to_numpy(q1_loss),
"Losses/Q2 Loss": ModelUtils.to_numpy(q2_loss),
"Policy/Entropy Coeff": ModelUtils.to_numpy(torch.exp(self._log_ent_coef)),
"Policy/Learning Rate": decay_lr,
}

5
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py


from mlagents.trainers.tests.torch.test_reward_providers.utils import (
create_agent_buffer,
)
from mlagents.trainers.torch.utils import ModelUtils
SEED = [42]

buffer = create_agent_buffer(behavior_spec, 5)
for _ in range(200):
curiosity_rp.update(buffer)
prediction = curiosity_rp._network.predict_action(buffer)[0].detach()
prediction = ModelUtils.to_numpy(curiosity_rp._network.predict_action(buffer)[0])
target = buffer["actions"][0]
error = float(torch.mean((prediction - target) ** 2))
assert error < 0.001

curiosity_rp.update(buffer)
prediction = curiosity_rp._network.predict_next_state(buffer)[0]
target = curiosity_rp._network.get_next_state(buffer)[0]
error = float(torch.mean((prediction - target) ** 2).detach())
error = float(ModelUtils.to_numpy(torch.mean((prediction - target) ** 2)))
assert error < 0.001

2
ml-agents/mlagents/trainers/torch/components/bc/module.py


bc_loss.backward()
self.optimizer.step()
run_out = {"loss": bc_loss.detach().cpu().numpy()}
run_out = {"loss": ModelUtils.to_numpy(bc_loss)}
return run_out

6
ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py


def evaluate(self, mini_batch: AgentBuffer) -> np.ndarray:
with torch.no_grad():
rewards = self._network.compute_reward(mini_batch).detach().cpu().numpy()
rewards = ModelUtils.to_numpy(self._network.compute_reward(mini_batch))
rewards = np.minimum(rewards, 1.0 / self.strength)
return rewards * self._has_updated_once

loss.backward()
self.optimizer.step()
return {
"Losses/Curiosity Forward Loss": forward_loss.detach().cpu().numpy(),
"Losses/Curiosity Inverse Loss": inverse_loss.detach().cpu().numpy(),
"Losses/Curiosity Forward Loss": ModelUtils.to_numpy(forward_loss),
"Losses/Curiosity Inverse Loss": ModelUtils.to_numpy(inverse_loss),
}

19
ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py


estimates, _ = self._discriminator_network.compute_estimate(
mini_batch, use_vail_noise=False
)
return (
return ModelUtils.to_numpy(
.detach()
.cpu()
.numpy()
)
def update(self, mini_batch: AgentBuffer) -> Dict[str, np.ndarray]:

expert_estimate, expert_mu = self.compute_estimate(
expert_batch, use_vail_noise=True
)
stats_dict["Policy/GAIL Policy Estimate"] = (
policy_estimate.mean().detach().cpu().numpy()
stats_dict["Policy/GAIL Policy Estimate"] = ModelUtils.to_numpy(
policy_estimate.mean()
stats_dict["Policy/GAIL Expert Estimate"] = (
expert_estimate.mean().detach().cpu().numpy()
stats_dict["Policy/GAIL Expert Estimate"] = ModelUtils.to_numpy(
expert_estimate.mean()
stats_dict["Losses/GAIL Loss"] = discriminator_loss.detach().cpu().numpy()
stats_dict["Losses/GAIL Loss"] = ModelUtils.to_numpy(discriminator_loss)
total_loss += discriminator_loss
if self._settings.use_vail:
# KL divergence loss (encourage latent representation to be normal)

torch.tensor(0.0),
)
total_loss += vail_loss
stats_dict["Policy/GAIL Beta"] = self._beta.detach().cpu().numpy()
stats_dict["Losses/GAIL KL Loss"] = kl_loss.detach().cpu().numpy()
stats_dict["Policy/GAIL Beta"] = ModelUtils.to_numpy(self._beta)
stats_dict["Losses/GAIL KL Loss"] = ModelUtils.to_numpy(kl_loss)
if self.gradient_penalty_weight > 0.0:
total_loss += (
self.gradient_penalty_weight

8
ml-agents/mlagents/trainers/torch/utils.py


return torch.as_tensor(np.asanyarray(ndarray_list), dtype=dtype)
@staticmethod
def to_numpy(tensor: torch.Tensor) -> np.ndarray:
"""
Converts a Torch Tensor to a numpy array. If the Tensor is on the GPU, it will
be brought to the CPU.
"""
return tensor.detach().cpu().numpy()
@staticmethod
def break_into_branches(
concatenated_logits: torch.Tensor, action_size: List[int]
) -> List[torch.Tensor]:

正在加载...
取消
保存