比较提交

...
此合并请求有变更与目标分支冲突。
/ml-agents/mlagents/trainers/learn.py
/ml-agents/mlagents/trainers/trainer_controller.py
/ml-agents/mlagents/trainers/trainer/trainer.py
/config/trainer_config.yaml
/ml-agents/mlagents/tf_utils/tf.py
/ml-agents/mlagents/trainers/optimizer/tf_optimizer.py
/ml-agents/mlagents/trainers/policy/tf_policy.py

1 次代码提交

作者 SHA1 备注 提交日期
Anupam Bhatnagar 50e52d9c Merge branch 'master' into distributed-training 5 年前
共有 11 个文件被更改,包括 101 次插入2 次删除
  1. 1
      config/trainer_config.yaml
  2. 3
      ml-agents/mlagents/tf_utils/tf.py
  3. 18
      ml-agents/mlagents/trainers/optimizer/tf_optimizer.py
  4. 3
      ml-agents/mlagents/trainers/learn.py
  5. 5
      ml-agents/mlagents/trainers/policy/tf_policy.py
  6. 6
      ml-agents/mlagents/trainers/trainer/trainer.py
  7. 7
      ml-agents/mlagents/trainers/trainer_controller.py
  8. 30
      distributed/job_configs/bouncer-multi-short.yaml
  9. 30
      distributed/job_configs/bouncer-single-short.yaml

1
config/trainer_config.yaml


time_horizon: 1000
lambd: 0.99
beta: 0.001
max_steps: 1.0e5
3DBallHard:
normalize: true

3
ml-agents/mlagents/tf_utils/tf.py


# Everywhere else is caught by the banned-modules setting for flake8
import tensorflow as tf # noqa I201
from distutils.version import LooseVersion
import horovod.tensorflow as hvd
# LooseVersion handles things "1.2.3a" or "4.5.6-rc7" fairly sensibly.

"""
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.gpu_options.visible_device_list = str(hvd.local_rank())
# For multi-GPU training, set allow_soft_placement to True to allow
# placing the operation into an alternative device automatically
# to prevent from exceptions if the device doesn't suppport the operation

18
ml-agents/mlagents/trainers/optimizer/tf_optimizer.py


from mlagents.trainers.components.bc.module import BCModule
try:
import horovod.tensorflow as hvd
except ImportError:
hvd = None
class TFOptimizer(Optimizer): # pylint: disable=W0223
def __init__(self, policy: TFPolicy, trainer_params: Dict[str, Any]):
self.sess = policy.sess

def create_optimizer_op(
self, learning_rate: tf.Tensor, name: str = "Adam"
) -> tf.train.Optimizer:
return tf.train.AdamOptimizer(learning_rate=learning_rate, name=name)
if hvd is not None:
adam_optimizer = tf.train.AdamOptimizer(
learning_rate=learning_rate * hvd.size(), name=name
)
horovod_optimizer = hvd.DistributedOptimizer(adam_optimizer)
else:
adam_optimizer = tf.train.AdamOptimizer(
learning_rate=learning_rate, name=name
)
return horovod_optimizer if hvd is not None else adam_optimizer
def _execute_model(
self, feed_dict: Dict[tf.Tensor, np.ndarray], out_dict: Dict[str, tf.Tensor]

3
ml-agents/mlagents/trainers/learn.py


from mlagents_envs.exception import UnityEnvironmentException
from mlagents_envs.timers import hierarchical_timer, get_timer_tree
from mlagents_envs import logging_util
import horovod.tensorflow as hvd
logger = logging_util.get_logger(__name__)

sampler_manager, resampling_interval = create_sampler_manager(
options.sampler_config, run_seed
)
hvd.init()
trainer_factory = TrainerFactory(
options.trainer_config,
summaries_dir,

5
ml-agents/mlagents/trainers/policy/tf_policy.py


from mlagents.trainers.brain_conversion_utils import get_global_agent_id
from mlagents_envs.base_env import BatchedStepResult
from mlagents.trainers.models import ModelUtils
import horovod.tensorflow as hvd
logger = get_logger(__name__)

self._load_graph()
else:
self._initialize_graph()
self.sess.run(hvd.broadcast_global_variables(0))
def get_weights(self):
with self.graph.as_default():

:param steps: The number of steps the model was trained for
:return:
"""
if hvd.rank() != 0:
return
with self.graph.as_default():
last_checkpoint = self.model_path + "/model-" + str(steps) + ".ckpt"
self.saver.save(self.sess, last_checkpoint)

6
ml-agents/mlagents/trainers/trainer/trainer.py


from mlagents.trainers.policy import Policy
from mlagents.trainers.exception import UnityTrainerException
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
import horovod.tensorflow as hvd
logger = get_logger(__name__)

stop training if it wasn't training to begin with, or if max_steps
is reached.
"""
return self.is_training and self.get_step <= self.get_max_steps
if hvd.rank() == 0:
return self.is_training and self.get_step <= self.get_max_steps
else:
return True
@property
def reward_buffer(self) -> Deque[float]:

7
ml-agents/mlagents/trainers/trainer_controller.py


from mlagents.trainers.trainer_util import TrainerFactory
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
from mlagents.trainers.agent_processor import AgentManager
import horovod.tensorflow as hvd
class TrainerController(object):

"""
Saves current model to checkpoint folder.
"""
if hvd.rank() != 0:
return
for brain_name in self.trainers.keys():
for name_behavior_id in self.brain_name_to_identifier[brain_name]:
self.trainers[brain_name].save_model(name_behavior_id)

"""
Exports latest saved models to .nn format for Unity embedding.
"""
if hvd.rank() != 0:
return
for brain_name in self.trainers.keys():
for name_behavior_id in self.brain_name_to_identifier[brain_name]:
self.trainers[brain_name].export_model(name_behavior_id)

30
distributed/job_configs/bouncer-multi-short.yaml


default:
trainer: ppo
batch_size: 1024
beta: 5.0e-3
buffer_size: 10240
epsilon: 0.2
hidden_units: 128
lambd: 0.95
learning_rate: 3.0e-4
learning_rate_schedule: linear
max_steps: 5.0e5
memory_size: 128
normalize: false
num_epoch: 3
num_layers: 2
time_horizon: 64
sequence_length: 64
summary_freq: 10000
use_recurrent: false
vis_encode_type: simple
reward_signals:
extrinsic:
strength: 1.0
gamma: 0.99
Bouncer:
normalize: true
max_steps: 125000
num_layers: 2
hidden_units: 64

30
distributed/job_configs/bouncer-single-short.yaml


default:
trainer: ppo
batch_size: 1024
beta: 5.0e-3
buffer_size: 10240
epsilon: 0.2
hidden_units: 128
lambd: 0.95
learning_rate: 3.0e-4
learning_rate_schedule: linear
max_steps: 5.0e5
memory_size: 128
normalize: false
num_epoch: 3
num_layers: 2
time_horizon: 64
sequence_length: 64
summary_freq: 10000
use_recurrent: false
vis_encode_type: simple
reward_signals:
extrinsic:
strength: 1.0
gamma: 0.99
Bouncer:
normalize: true
max_steps: 1.0e6
num_layers: 2
hidden_units: 64
正在加载...
取消
保存