first commit

5 年前 · e8e0078e
--- a/ml-agents-envs/mlagents_envs/environment.py
+++ b/ml-agents-envs/mlagents_envs/environment.py
                "If the environment name is None, "
                "the worker-id must be 0 in order to connect with the Editor."
            )
-        if file_name is not None:
-            self.executable_launcher(file_name, docker_training, no_graphics, args)
-        else:
-            logger.info(
-                f"Listening on port {self.port}. "
-                f"Start training by pressing the Play button in the Unity Editor."
-            )
+        # if file_name is not None:
+        #     self.executable_launcher(file_name, docker_training, no_graphics, args)
+        # else:
+        #     logger.info(
+        #         f"Listening on port {self.port}. "
+        #         f"Start training by pressing the Play button in the Unity Editor."
+        #     )
        self._loaded = True

        rl_init_parameters_in = UnityRLInitializationInputProto(
--- a/ml-agents/mlagents/tf_utils/tf.py
+++ b/ml-agents/mlagents/tf_utils/tf.py
 # Everywhere else is caught by the banned-modules setting for flake8
 import tensorflow as tf  # noqa I201
 from distutils.version import LooseVersion
+import horovod.tensorflow as hvd


 # LooseVersion handles things "1.2.3a" or "4.5.6-rc7" fairly sensibly.
    """
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
+    config.gpu_options.visible_device_list = str(hvd.local_rank())
+
    # For multi-GPU training, set allow_soft_placement to True to allow
    # placing the operation into an alternative device automatically
    # to prevent from exceptions if the device doesn't suppport the operation
--- a/ml-agents/mlagents/trainers/learn.py
+++ b/ml-agents/mlagents/trainers/learn.py
 from mlagents_envs.side_channel.engine_configuration_channel import EngineConfig
 from mlagents_envs.exception import UnityEnvironmentException
 from mlagents.logging_util import create_logger
+import horovod.tensorflow as hvd


 def _create_parser():
    sampler_manager, resampling_interval = create_sampler_manager(
        options.sampler_config, run_seed
    )
+    hvd.init()
    trainer_factory = TrainerFactory(
        options.trainer_config,
        summaries_dir,
--- a/ml-agents/mlagents/trainers/policy/tf_policy.py
+++ b/ml-agents/mlagents/trainers/policy/tf_policy.py
 from mlagents.trainers.brain_conversion_utils import get_global_agent_id
 from mlagents_envs.base_env import BatchedStepResult
 from mlagents.trainers.models import ModelUtils
+import horovod.tensorflow as hvd


 logger = logging.getLogger("mlagents.trainers")
            self.saver = tf.train.Saver(max_to_keep=self.keep_checkpoints)
            init = tf.global_variables_initializer()
            self.sess.run(init)
+            self.sess.run(hvd.broadcast_global_variables(0))

    def _load_graph(self):
        with self.graph.as_default():
        :param steps: The number of steps the model was trained for
        :return:
        """
+        if hvd.rank() != 0:
+            return
+
        with self.graph.as_default():
            last_checkpoint = self.model_path + "/model-" + str(steps) + ".ckpt"
            self.saver.save(self.sess, last_checkpoint)
--- a/ml-agents/mlagents/trainers/ppo/optimizer.py
+++ b/ml-agents/mlagents/trainers/ppo/optimizer.py
 from mlagents.trainers.policy.tf_policy import TFPolicy
 from mlagents.trainers.optimizer.tf_optimizer import TFOptimizer
 from mlagents.trainers.buffer import AgentBuffer
+import horovod.tensorflow as hvd


 class PPOOptimizer(TFOptimizer):
        )

    def _create_ppo_optimizer_ops(self):
-        self.tf_optimizer = self.create_optimizer_op(self.learning_rate)
+        self.tf_optimizer = self.create_optimizer_op(self.learning_rate * hvd.size())
+        if hvd is not None:
+            self.tf_optimizer = hvd.DistributedOptimizer(self.tf_optimizer)
        self.grads = self.tf_optimizer.compute_gradients(self.loss)
        self.update_batch = self.tf_optimizer.minimize(self.loss)

--- a/ml-agents/mlagents/trainers/trainer/trainer.py
+++ b/ml-agents/mlagents/trainers/trainer/trainer.py
 from mlagents.trainers.policy import Policy
 from mlagents.trainers.exception import UnityTrainerException
 from mlagents_envs.timers import hierarchical_timer
+import horovod.tensorflow as hvd
+

 logger = logging.getLogger("mlagents.trainers")

        """
        Saves the model
        """
+        if hvd.rank() != 0:
+            return
        self.get_policy(name_behavior_id).save_model(self.get_step)

    def export_model(self, name_behavior_id: str) -> None:
+        if hvd.rank() != 0:
+            return
+
        policy = self.get_policy(name_behavior_id)
        settings = SerializationSettings(policy.model_path, policy.brain.brain_name)
        export_policy_model(settings, policy.graph, policy.sess)
--- a/ml-agents/mlagents/trainers/trainer_controller.py
+++ b/ml-agents/mlagents/trainers/trainer_controller.py
 from mlagents.trainers.trainer_util import TrainerFactory
 from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
 from mlagents.trainers.agent_processor import AgentManager
+import horovod.tensorflow as hvd


 class TrainerController(object):
        """
        Saves current model to checkpoint folder.
        """
+        if hvd.rank() != 0:
+            return
+
        for brain_name in self.trainers.keys():
            for name_behavior_id in self.brain_name_to_identifier[brain_name]:
                self.trainers[brain_name].save_model(name_behavior_id)