浏览代码

fix formatting and test

/develop/add-fire/ckpt-2
Ruo-Ping Dong 5 年前
当前提交
71fe4df6
共有 20 个文件被更改,包括 89 次插入79 次删除
  1. 8
      ml-agents-envs/mlagents_envs/exception.py
  2. 11
      ml-agents/mlagents/trainers/policy/policy.py
  3. 6
      ml-agents/mlagents/trainers/policy/tf_policy.py
  4. 12
      ml-agents/mlagents/trainers/policy/torch_policy.py
  5. 2
      ml-agents/mlagents/trainers/ppo/optimizer_torch.py
  6. 7
      ml-agents/mlagents/trainers/ppo/trainer.py
  7. 2
      ml-agents/mlagents/trainers/sac/optimizer_torch.py
  8. 4
      ml-agents/mlagents/trainers/sac/trainer.py
  9. 6
      ml-agents/mlagents/trainers/saver/saver.py
  10. 16
      ml-agents/mlagents/trainers/saver/tf_saver.py
  11. 13
      ml-agents/mlagents/trainers/saver/torch_saver.py
  12. 2
      ml-agents/mlagents/trainers/settings.py
  13. 4
      ml-agents/mlagents/trainers/tests/test_nn_policy.py
  14. 11
      ml-agents/mlagents/trainers/tf/model_serialization.py
  15. 4
      ml-agents/mlagents/trainers/torch/encoders.py
  16. 39
      ml-agents/mlagents/trainers/torch/model_serialization.py
  17. 10
      ml-agents/mlagents/trainers/torch/networks.py
  18. 9
      ml-agents/mlagents/trainers/trainer/rl_trainer.py
  19. 2
      ml-agents/mlagents/trainers/trainer/trainer.py
  20. 0
      ml-agents/mlagents/trainers/saver/__init__.py

8
ml-agents-envs/mlagents_envs/exception.py


def __init__(self, worker_id):
message = self.MESSAGE_TEMPLATE.format(str(worker_id))
super().__init__(message)
class UnityPolicyException(UnityException):
"""
Related to errors with the Trainer.
"""
pass

11
ml-agents/mlagents/trainers/policy/policy.py


seed: int,
behavior_spec: BehaviorSpec,
trainer_settings: TrainerSettings,
model_path: str,
load: bool = False,
tanh_squash: bool = False,
reparameterize: bool = False,
condition_sigma_on_obs: bool = True,

self.vis_obs_size = sum(
1 for shape in behavior_spec.observation_shapes if len(shape) == 3
)
self.vis_obs_shape = [
shape for shape in behavior_spec.observation_shapes if len(shape) == 3
][0] if self.vis_obs_size > 0 else None
self.vis_obs_shape = (
[shape for shape in behavior_spec.observation_shapes if len(shape) == 3][0]
if self.vis_obs_size > 0
else None
)
self.use_continuous_act = behavior_spec.is_action_continuous()
self.num_branches = self.behavior_spec.action_size
self.previous_action_dict: Dict[str, np.array] = {}

self.load = load
self.h_size = self.network_settings.hidden_units
num_layers = self.network_settings.num_layers
if num_layers < 1:

6
ml-agents/mlagents/trainers/policy/tf_policy.py


seed: int,
behavior_spec: BehaviorSpec,
trainer_settings: TrainerSettings,
model_path: str,
load: bool = False,
tanh_squash: bool = False,
reparameterize: bool = False,
condition_sigma_on_obs: bool = True,

:param seed: Random seed to use for TensorFlow.
:param brain: The corresponding Brain for this policy.
:param trainer_settings: The trainer parameters.
:param model_path: Where to load/save the model.
:param load: If True, load model from model_path. Otherwise, create new model.
model_path,
load,
tanh_squash,
reparameterize,
condition_sigma_on_obs,

12
ml-agents/mlagents/trainers/policy/torch_policy.py


import numpy as np
import torch
import os
from mlagents.trainers.action_info import ActionInfo
from mlagents.trainers.behavior_id_utils import get_global_agent_id
from mlagents.trainers.policy import Policy

seed: int,
behavior_spec: BehaviorSpec,
trainer_settings: TrainerSettings,
model_path: str,
load: bool = False,
tanh_squash: bool = False,
reparameterize: bool = False,
condition_sigma_on_obs: bool = True,

seed,
behavior_spec,
trainer_settings,
model_path,
load,
self.global_step = GlobalSteps() # could be much simpler if TorchPolicy is nn.Module
self.global_step = (
GlobalSteps()
) # could be much simpler if TorchPolicy is nn.Module
self.grads = None
if TestingConfiguration.device != "cpu":
torch.set_default_tensor_type(torch.cuda.FloatTensor)

:return: The step the model was set to.
"""
self.global_step.current_step = step
return step
def increment_step(self, n_steps):
"""

return []
def get_modules(self):
return {'Policy': self.actor_critic, 'global_step': self.global_step}
return {"Policy": self.actor_critic, "global_step": self.global_step}

2
ml-agents/mlagents/trainers/ppo/optimizer_torch.py


return update_stats
def get_modules(self):
return {'Optimizer': self.optimizer}
return {"Optimizer": self.optimizer}

7
ml-agents/mlagents/trainers/ppo/trainer.py


:param artifact_path: The directory within which to store artifacts from this trainer.
"""
super().__init__(
brain_name, trainer_settings, training, artifact_path, reward_buff_cap
brain_name, trainer_settings, training, load, artifact_path, reward_buff_cap
self.load = load
self.seed = seed
self.framework = "torch" if TestingConfiguration.use_torch else "tf"
if TestingConfiguration.max_steps > 0:

self.seed,
behavior_spec,
self.trainer_settings,
model_path=self.artifact_path,
load=self.load,
condition_sigma_on_obs=False, # Faster training for PPO
)
return policy

self.seed,
behavior_spec,
self.trainer_settings,
self.artifact_path,
self.load,
condition_sigma_on_obs=False, # Faster training for PPO
)
return policy

2
ml-agents/mlagents/trainers/sac/optimizer_torch.py


return {
"Optimizer:value_network": self.value_network,
"Optimizer:target_network": self.target_network,
"Optimizer:policy_optimizer": self.policy_optimizer ,
"Optimizer:policy_optimizer": self.policy_optimizer,
"Optimizer:value_optimizer": self.value_optimizer,
"Optimizer:entropy_optimizer": self.entropy_optimizer,
}

4
ml-agents/mlagents/trainers/sac/trainer.py


self.seed,
behavior_spec,
self.trainer_settings,
self.artifact_path,
self.load,
tanh_squash=True,
reparameterize=True,
create_tf_graph=False,

self.seed,
behavior_spec,
self.trainer_settings,
self.artifact_path,
self.load,
condition_sigma_on_obs=True,
tanh_squash=True,
separate_critic=True,

6
ml-agents/mlagents/trainers/saver/saver.py


pass
@abc.abstractmethod
def register(self):
def register(self, module):
def save_checkpoint(self):
def save_checkpoint(self, brain_name: str, step: int) -> str:
pass
@abc.abstractmethod

@abc.abstractmethod
def export(self):
def export(self, output_filepath: str, brain_name: str) -> None:
pass

16
ml-agents/mlagents/trainers/saver/tf_saver.py


from typing import Tuple
from distutils.version import LooseVersion
from mlagents_envs.exception import UnityException
import os
from mlagents_envs.exception import UnityPolicyException
from mlagents_envs.base_env import BehaviorSpec
from mlagents.trainers.settings import TrainerSettings
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers import __version__

"""
Saver class for TensorFlow
"""
def __init__(
self,
policy: TFPolicy,

self._keep_checkpoints = trainer_settings.keep_checkpoints
self.load = load
def register(self, module_dict):
def register(self, module):
def save_checkpoint(self, brain_name: str, step: int) -> None:
def save_checkpoint(self, brain_name: str, step: int) -> str:
"""
Checkpoints the policy on disk.

)
)
else:
logger.info(f"Resuming training from step {self.policy.get_current_step()}.")
logger.info(
f"Resuming training from step {self.policy.get_current_step()}."
)
def _check_model_version(self, version: str) -> None:
"""

13
ml-agents/mlagents/trainers/saver/torch_saver.py


import os
import torch
from typing import Dict
from mlagents_envs.base_env import BehaviorSpec
from mlagents.trainers.settings import TrainerSettings
from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.torch.model_serialization import ModelSerializer

"""
Saver class for PyTorch
"""
def __init__(
self,
policy: TorchPolicy,

self.load = load
self.exporter = ModelSerializer(self.policy)
self.modules = {}
self.modules: Dict[str, torch.nn.Modules] = {}
def register(self, module):
self.modules.update(module.get_modules())

if not os.path.exists(self.model_path):
os.makedirs(self.model_path)
checkpoint_path = os.path.join(self.model_path, f"{brain_name}-{step}")
state_dict = {name: module.state_dict() for name, module in self.modules.items()}
state_dict = {
name: module.state_dict() for name, module in self.modules.items()
}
torch.save(state_dict, f"{checkpoint_path}.pt")
torch.save(state_dict, os.path.join(self.model_path, "checkpoint.pt"))
self.export(checkpoint_path, brain_name)

)
)
else:
logger.info(f"Resuming training from step {self.policy.get_current_step()}.")
logger.info(
f"Resuming training from step {self.policy.get_current_step()}."
)

2
ml-agents/mlagents/trainers/settings.py


env_name = ""
device = "cpu"
@attr.s(auto_attribs=True)
class ExportableSettings:

4
ml-agents/mlagents/trainers/tests/test_nn_policy.py


trainer_settings.network_settings.memory = (
NetworkSettings.MemorySettings() if use_rnn else None
)
policy = TFPolicy(
seed, mock_spec, trainer_settings, model_path=model_path, load=load
)
policy = TFPolicy(seed, mock_spec, trainer_settings)
return policy

11
ml-agents/mlagents/trainers/tf/model_serialization.py


from distutils.util import strtobool
import os
from typing import Any, List, Set, NamedTuple
from typing import Any, List, Set
from distutils.version import LooseVersion
try:

def export_policy_model(
output_filepath: str,
brain_name: str,
graph: tf.Graph,
sess: tf.Session,
output_filepath: str, brain_name: str, graph: tf.Graph, sess: tf.Session
) -> None:
"""
Exports a TF graph for a Policy to .nn and/or .onnx format for Unity embedding.

return output_graph_def
def convert_frozen_to_onnx(
brain_name: str, frozen_graph_def: tf.GraphDef
) -> Any:
def convert_frozen_to_onnx(brain_name: str, frozen_graph_def: tf.GraphDef) -> Any:
# This is basically https://github.com/onnx/tensorflow-onnx/blob/master/tf2onnx/convert.py
inputs = _get_input_node_names(frozen_graph_def)

4
ml-agents/mlagents/trainers/torch/encoders.py


super().__init__()
self.normalization_steps = nn.Parameter(torch.tensor(1), requires_grad=False)
self.running_mean = nn.Parameter(torch.zeros(vec_obs_size), requires_grad=False)
self.running_variance = nn.Parameter(torch.ones(vec_obs_size), requires_grad=False)
self.running_variance = nn.Parameter(
torch.ones(vec_obs_size), requires_grad=False
)
def forward(self, inputs: torch.Tensor) -> torch.Tensor:
normalized_state = torch.clamp(

39
ml-agents/mlagents/trainers/torch/model_serialization.py


import os
from typing import Any, List, Set, NamedTuple
import torch
from mlagents_envs.logging_util import get_logger

def __init__(self, policy):
self.policy = policy
dummy_vec_obs = [torch.zeros([1] + [self.policy.vec_obs_size])]
dummy_vis_obs = [torch.zeros([1] + list(self.policy.vis_obs_shape))] \
if self.policy.vis_obs_size > 0 else []
dummy_vis_obs = (
[torch.zeros([1] + list(self.policy.vis_obs_shape))]
if self.policy.vis_obs_size > 0
else []
)
self.input_names = ["vector_observation", "visual_observation", \
"action_mask", "memories"]
self.output_names = ["action", "action_probs", "version_number", \
"memory_size", "is_continuous_control", "action_output_shape"]
self.dynamic_axes = {"vector_observation": [0], "visual_observation": [0], \
"action_mask": [0], "memories": [0], "action": [0],"action_probs": [0]}
self.dummy_input = (dummy_vec_obs, dummy_vis_obs, \
dummy_masks, dummy_memories)
self.input_names = [
"vector_observation",
"visual_observation",
"action_mask",
"memories",
]
self.output_names = [
"action",
"action_probs",
"version_number",
"memory_size",
"is_continuous_control",
"action_output_shape",
]
self.dynamic_axes = {
"vector_observation": [0],
"visual_observation": [0],
"action_mask": [0],
"memories": [0],
"action": [0],
"action_probs": [0],
}
self.dummy_input = (dummy_vec_obs, dummy_vis_obs, dummy_masks, dummy_memories)
def export_policy_model(self, output_filepath: str) -> None:
"""

10
ml-agents/mlagents/trainers/torch/networks.py


class GlobalSteps(nn.Module):
def __init__(self):
super().__init__()
self._global_step = nn.Parameter(torch.Tensor([0]), requires_grad=False)
self.__global_step = nn.Parameter(torch.Tensor([0]), requires_grad=False)
return int(self._global_step.item())
return int(self.__global_step.item())
def set_step(self, value):
self._global_step.data = value
def current_step(self, value):
self.__global_step[:] = value
self._global_step += value
self.__global_step += value
class LearningRate(nn.Module):

9
ml-agents/mlagents/trainers/trainer/rl_trainer.py


# # Unity ML-Agents Toolkit
import os
from typing import Dict, List, Optional
from collections import defaultdict
import abc

def create_saver(self, policy: Policy) -> BaseSaver:
if self.framework == "torch":
saver = TorchSaver(
policy,
saver = TorchSaver( # type: ignore
policy, # type: ignore
saver = TFSaver(
policy,
saver = TFSaver( # type: ignore
policy, # type: ignore
self.trainer_settings,
model_path=self.artifact_path,
load=self.load,

2
ml-agents/mlagents/trainers/trainer/trainer.py


brain_name: str,
trainer_settings: TrainerSettings,
training: bool,
load: bool,
artifact_path: str,
reward_buff_cap: int = 1,
):

self._threaded = trainer_settings.threaded
self._stats_reporter = StatsReporter(brain_name)
self.is_training = training
self.load = load
self._reward_buffer: Deque[float] = deque(maxlen=reward_buff_cap)
self.policy_queues: List[AgentManagerQueue[Policy]] = []
self.trajectory_queues: List[AgentManagerQueue[Trajectory]] = []

0
ml-agents/mlagents/trainers/saver/__init__.py

正在加载...
取消
保存