比较提交

...
此合并请求有变更与目标分支冲突。
/ml-agents/mlagents/torch_utils/torch.py
/ml-agents/mlagents/trainers/learn.py
/ml-agents/mlagents/trainers/trainer_controller.py
/ml-agents/mlagents/trainers/simple_env_manager.py
/ml-agents/mlagents/trainers/ppo/optimizer_torch.py
/ml-agents/mlagents/trainers/sac/optimizer_torch.py
/ml-agents/mlagents/trainers/trainer/rl_trainer.py
/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py
/ml-agents/mlagents/trainers/torch/components/bc/module.py
/ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py
/ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py
/ml-agents/mlagents/trainers/torch/utils.py
/ml-agents/mlagents/trainers/torch/networks.py

17 次代码提交

作者 SHA1 备注 提交日期
vincentpierre f49aa8c7 - 4 年前
vincentpierre 181bdec0 - 4 年前
vincentpierre a8137478 - 4 年前
vincentpierre e66e7ca1 - 4 年前
vincentpierre d2ad8f12 - 4 年前
vincentpierre 31ea11e0 - 4 年前
vincentpierre 170f47a5 - 4 年前
vincentpierre 29f08b2e - 4 年前
vincentpierre c10da7ef - 4 年前
vincentpierre 8be52c38 - 4 年前
vincentpierre 6cbe892f _ 4 年前
vincentpierre 6b6d4c38 _ 4 年前
vincentpierre a899ecff _ 4 年前
vincentpierre dda6dc1b - 4 年前
vincentpierre 49e08218 - 4 年前
vincentpierre d9e2f974 - 4 年前
vincentpierre c78639a0 - 4 年前
共有 14 个文件被更改,包括 93 次插入24 次删除
  1. 0
      config/imitation/Pyramids.yaml
  2. 2
      ml-agents/mlagents/torch_utils/torch.py
  3. 26
      ml-agents/mlagents/trainers/learn.py
  4. 6
      ml-agents/mlagents/trainers/simple_env_manager.py
  5. 1
      ml-agents/mlagents/trainers/trainer_controller.py
  6. 4
      ml-agents/mlagents/trainers/ppo/optimizer_torch.py
  7. 10
      ml-agents/mlagents/trainers/sac/optimizer_torch.py
  8. 2
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py
  9. 2
      ml-agents/mlagents/trainers/torch/components/bc/module.py
  10. 4
      ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py
  11. 12
      ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py
  12. 2
      ml-agents/mlagents/trainers/torch/networks.py
  13. 8
      ml-agents/mlagents/trainers/torch/utils.py
  14. 38
      ml-agents/mlagents/trainers/trainer/rl_trainer.py

0
config/imitation/Pyramids.yaml

2
ml-agents/mlagents/torch_utils/torch.py


# Known PyLint compatibility with PyTorch https://github.com/pytorch/pytorch/issues/701
# pylint: disable=E1101
if torch.cuda.is_available():
if False:#torch.cuda.is_available():
torch.set_default_tensor_type(torch.cuda.FloatTensor)
device = torch.device("cuda")
else:

26
ml-agents/mlagents/trainers/learn.py


from mlagents.trainers.training_status import GlobalTrainingStatus
from mlagents_envs.base_env import BaseEnv
from mlagents.trainers.subprocess_env_manager import SubprocessEnvManager
from mlagents.trainers.simple_env_manager import SimpleEnvManager
from mlagents_envs.side_channel.side_channel import SideChannel
from mlagents_envs.side_channel.engine_configuration_channel import EngineConfig
from mlagents_envs.timers import (

env_manager = SubprocessEnvManager(
env_factory, engine_config, env_settings.num_envs
)
# env_manager = SimpleEnvManager(env_factory(0, []), env_parameter_manager)
trainer_factory = TrainerFactory(
trainer_config=options.behaviors,

write_run_options(write_path, options)
write_timing_tree(run_logs_dir)
write_training_status(run_logs_dir)
from guppy import hpy
h = hpy()
print(h.heap())
def write_run_options(output_dir: str, run_options: RunOptions) -> None:

) -> UnityEnvironment:
# Make sure that each environment gets a different seed
env_seed = seed + worker_id
return UnityEnvironment(
file_name=env_path,
from mlagents_envs.registry import default_registry
return default_registry["Pyramids"].make(
worker_id=worker_id,
seed=env_seed,
no_graphics=no_graphics,

log_folder=log_folder,
)
log_folder=log_folder
)
# return UnityEnvironment(
# file_name=env_path,
# worker_id=worker_id,
# seed=env_seed,
# no_graphics=no_graphics,
# base_port=start_port,
# additional_args=env_args,
# side_channels=side_channels,
# log_folder=log_folder,
# )
return create_unity_environment

6
ml-agents/mlagents/trainers/simple_env_manager.py


from mlagents_envs.side_channel.environment_parameters_channel import (
EnvironmentParametersChannel,
)
import numpy as np
class SimpleEnvManager(EnvManager):

self.previous_all_action_info = all_action_info
for brain_name, action_info in all_action_info.items():
self.env.set_actions(brain_name, action_info.action)
try:
self.env.set_actions(brain_name, np.array(action_info.action))
except:
pass
self.env.step()
all_step_result = self._generate_all_results()

1
ml-agents/mlagents/trainers/trainer_controller.py


self.logger.info(
"Learning was interrupted. Please wait while the graph is generated."
)
raise ex
if isinstance(ex, KeyboardInterrupt) or isinstance(
ex, UnityCommunicatorStoppedException
):

4
ml-agents/mlagents/trainers/ppo/optimizer_torch.py


self.optimizer.step()
update_stats = {
"Losses/Policy Loss": policy_loss.item(),
"Losses/Value Loss": value_loss.item(),
"Losses/Policy Loss": ModelUtils.to_item(policy_loss),
"Losses/Value Loss": ModelUtils.to_item(value_loss),
"Policy/Learning Rate": decay_lr,
"Policy/Epsilon": decay_eps,
"Policy/Beta": decay_bet,

10
ml-agents/mlagents/trainers/sac/optimizer_torch.py


# Update target network
self.soft_update(self.policy.actor_critic.critic, self.target_network, self.tau)
update_stats = {
"Losses/Policy Loss": policy_loss.item(),
"Losses/Value Loss": value_loss.item(),
"Losses/Q1 Loss": q1_loss.item(),
"Losses/Q2 Loss": q2_loss.item(),
"Policy/Entropy Coeff": torch.mean(torch.exp(self._log_ent_coef)).item(),
"Losses/Policy Loss": ModelUtils.to_item(policy_loss),
"Losses/Value Loss": ModelUtils.to_item(value_loss),
"Losses/Q1 Loss": ModelUtils.to_item(q1_loss),
"Losses/Q2 Loss": ModelUtils.to_item(q2_loss),
"Policy/Entropy Coeff": ModelUtils.to_item(torch.mean(torch.exp(self._log_ent_coef))),
"Policy/Learning Rate": decay_lr,
}

2
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py


curiosity_rp.update(buffer)
prediction = curiosity_rp._network.predict_action(buffer)[0]
target = torch.tensor(buffer["actions"][0])
error = torch.mean((prediction - target) ** 2).item()
error = ModelUtils.to_item(torch.mean((prediction - target) ** 2))
assert error < 0.001

2
ml-agents/mlagents/trainers/torch/components/bc/module.py


bc_loss.backward()
self.optimizer.step()
run_out = {"loss": bc_loss.item()}
run_out = {"loss": ModelUtils.to_item(bc_loss)}
return run_out

4
ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py


loss.backward()
self.optimizer.step()
return {
"Losses/Curiosity Forward Loss": forward_loss.item(),
"Losses/Curiosity Inverse Loss": inverse_loss.item(),
"Losses/Curiosity Forward Loss": ModelUtils.to_item(forward_loss),
"Losses/Curiosity Inverse Loss": ModelUtils.to_item(inverse_loss),
}
def get_modules(self):

12
ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py


expert_estimate, expert_mu = self.compute_estimate(
expert_batch, use_vail_noise=True
)
stats_dict["Policy/GAIL Policy Estimate"] = policy_estimate.mean().item()
stats_dict["Policy/GAIL Expert Estimate"] = expert_estimate.mean().item()
stats_dict["Policy/GAIL Policy Estimate"] = ModelUtils.to_item(policy_estimate.mean())
stats_dict["Policy/GAIL Expert Estimate"] = ModelUtils.to_item(expert_estimate.mean())
stats_dict["Losses/GAIL Loss"] = discriminator_loss.item()
stats_dict["Losses/GAIL Loss"] = ModelUtils.to_item(discriminator_loss)
total_loss += discriminator_loss
if self._settings.use_vail:
# KL divergence loss (encourage latent representation to be normal)

torch.tensor(0.0),
)
total_loss += vail_loss
stats_dict["Policy/GAIL Beta"] = self._beta.item()
stats_dict["Losses/GAIL KL Loss"] = kl_loss.item()
stats_dict["Policy/GAIL Beta"] = ModelUtils.to_item(self._beta)
stats_dict["Losses/GAIL KL Loss"] = ModelUtils.to_item(kl_loss)
stats_dict["Policy/GAIL Grad Mag Loss"] = gradient_magnitude_loss.item()
stats_dict["Policy/GAIL Grad Mag Loss"] = ModelUtils.to_item(gradient_magnitude_loss)
total_loss += gradient_magnitude_loss
return total_loss, stats_dict

2
ml-agents/mlagents/trainers/torch/networks.py


@property
def current_step(self):
return int(self.__global_step.item())
return int(ModelUtils.to_item(self.__global_step))
@current_step.setter
def current_step(self, value):

8
ml-agents/mlagents/trainers/torch/utils.py


return tensor.detach().cpu().numpy()
@staticmethod
def to_item(tensor: torch.Tensor) -> float:
"""
Converts a Torch Tensor to a float array. If the Tensor is on the GPU, it will
be brought to the CPU.
"""
return tensor.detach().cpu().item()
@staticmethod
def break_into_branches(
concatenated_logits: torch.Tensor, action_size: List[int]
) -> List[torch.Tensor]:

38
ml-agents/mlagents/trainers/trainer/rl_trainer.py


logger = get_logger(__name__)
from pympler import muppy, summary
import psutil
import os
import torch
import gc
class RLTrainer(Trainer): # pylint: disable=abstract-method
"""
This class is the base class for trainers that use Reward Signals.

:param step_after_process: the step count after processing the next trajectory.
"""
if self._next_summary_step == 0: # Don't write out the first one
all_objects = muppy.get_objects()
self.past_sum = summary.summarize(all_objects)
print("\n ------------------------------------- ")
process = psutil.Process(os.getpid())
mem = process.memory_info().rss
print("Total memory ", mem)
print("Total Memory in Python")
all_objects = muppy.get_objects()
sum1 = summary.summarize(all_objects)
summary.print_(sum1)
print("Diff Memory")
diff = summary.get_diff( self.past_sum, sum1)
summary.print_(diff)
self.past_sum = sum1
tmp_tensor = 0
tmp_module = 0
for obj in gc.get_objects():
try:
if torch.is_tensor(obj) or (hasattr(obj, 'data') and torch.is_tensor(obj.data)):
tmp_tensor+=1
except:
pass
try:
if isinstance(obj, torch.nn.Module):
tmp_module+=1
except:
pass
print("Total number of tensors", tmp_tensor, " of modules", tmp_module)
def _maybe_save_model(self, step_after_process: int) -> None:
"""

正在加载...
取消
保存