浏览代码

minor tweaks

/develop-horovod
Jonathan Harper 5 年前
当前提交
47893e9c
共有 4 个文件被更改,包括 21 次插入14 次删除
  1. 2
      .dockerignore
  2. 22
      horovod-mlagents.yaml
  3. 9
      ml-agents/mlagents/trainers/ppo/models.py
  4. 2
      ml-agents/mlagents/trainers/sac/models.py

2
.dockerignore


UnitySDK/
.git/
venv-harper/
summaries/
models/

22
horovod-mlagents.yaml


cpu: 4
command: ["/bin/sh", "-c"]
args: ["
mpirun --allow-run-as-root -np 16 -bind-to none -map-by slot -x NCCL_DEBUG=INFO -x LD_LIBRARY_PATH -x HOROVOD_AUTOTUNE=1 -x PATH -mca pml ob1 -mca btl ^openib mlagents-learn /unity-volume/trainer_config.yaml --run-id=snoopydist15-ppo --train --env=/unity-volume/Walker --num-envs=6;
mpirun --allow-run-as-root -np 16 -bind-to none -map-by slot -x NCCL_DEBUG=INFO -x LD_LIBRARY_PATH -x HOROVOD_AUTOTUNE=1 -x PATH -mca pml ob1 -mca btl ^openib gsutil cp -r models gs://ray-volume/horovod/;
mpirun --allow-run-as-root -np 16 -bind-to none -map-by slot -x NCCL_DEBUG=INFO -x LD_LIBRARY_PATH -x HOROVOD_AUTOTUNE=1 -x PATH -mca pml ob1 -mca btl ^openib gsutil cp -r summaries gs://ray-volume/horovod/;
mpirun --allow-run-as-root -np 8 -bind-to none -map-by slot -x NCCL_DEBUG=INFO -x LD_LIBRARY_PATH -x HOROVOD_AUTOTUNE=1 -x PATH -mca pml ob1 -mca btl ^openib mlagents-learn /unity-volume/trainer_config.yaml --run-id=snoopydfd-ppo-8m --train --env=/unity-volume/SnoopyPop15Levels_dfd --num-envs=6;
mpirun --allow-run-as-root -np 8 -bind-to none -map-by slot -x NCCL_DEBUG=INFO -x LD_LIBRARY_PATH -x HOROVOD_AUTOTUNE=1 -x PATH -mca pml ob1 -mca btl ^openib gsutil cp -r models gs://ray-volume/horovod/;
mpirun --allow-run-as-root -np 8 -bind-to none -map-by slot -x NCCL_DEBUG=INFO -x LD_LIBRARY_PATH -x HOROVOD_AUTOTUNE=1 -x PATH -mca pml ob1 -mca btl ^openib gsutil cp -r summaries gs://ray-volume/horovod/;
replicas: 16
replicas: 8
template:
spec:
containers:

memory: 48G
command: ["/bin/sh", "-c"]
args: ["
sleep 95s;
sleep 45s;
(xvfb-run --auto-servernum --server-args='-screen 0 640x480x24' /unity-volume/SnoopyPop15Levels_10.x86_64 --port 5005 &);
(xvfb-run --auto-servernum --server-args='-screen 0 640x480x24' /unity-volume/SnoopyPop15Levels_10.x86_64 --port 5006 &);
(xvfb-run --auto-servernum --server-args='-screen 0 640x480x24' /unity-volume/SnoopyPop15Levels_10.x86_64 --port 5007 &);
(xvfb-run --auto-servernum --server-args='-screen 0 640x480x24' /unity-volume/SnoopyPop15Levels_10.x86_64 --port 5008 &);
(xvfb-run --auto-servernum --server-args='-screen 0 640x480x24' /unity-volume/SnoopyPop15Levels_10.x86_64 --port 5009 &);
xvfb-run --auto-servernum --server-args='-screen 0 640x480x24' /unity-volume/SnoopyPop15Levels_10.x86_64 --port 5010
(xvfb-run --auto-servernum --server-args='-screen 0 640x480x24' /unity-volume/SnoopyPop15Levels_dfd.x86_64 --port 5005 &);
(xvfb-run --auto-servernum --server-args='-screen 0 640x480x24' /unity-volume/SnoopyPop15Levels_dfd.x86_64 --port 5006 &);
(xvfb-run --auto-servernum --server-args='-screen 0 640x480x24' /unity-volume/SnoopyPop15Levels_dfd.x86_64 --port 5007 &);
(xvfb-run --auto-servernum --server-args='-screen 0 640x480x24' /unity-volume/SnoopyPop15Levels_dfd.x86_64 --port 5008 &);
(xvfb-run --auto-servernum --server-args='-screen 0 640x480x24' /unity-volume/SnoopyPop15Levels_dfd.x86_64 --port 5009 &);
xvfb-run --auto-servernum --server-args='-screen 0 640x480x24' /unity-volume/SnoopyPop15Levels_dfd.x86_64 --port 5010
"]
stdin: true
tty: true

9
ml-agents/mlagents/trainers/ppo/models.py


import tensorflow as tf
from mlagents.trainers.models import LearningModel, EncoderType, LearningRateSchedule
import horovod.tensorflow as hvd
try:
import horovod.tensorflow as hvd
except ImportError:
hvd = None
logger = logging.getLogger("mlagents.trainers")

def create_ppo_optimizer(self):
self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
self.optimizer = hvd.DistributedOptimizer(self.optimizer)
if hvd is not None:
self.optimizer = hvd.DistributedOptimizer(self.optimizer)
self.grads = self.optimizer.compute_gradients(self.loss)
self.update_batch = self.optimizer.minimize(self.loss)

2
ml-agents/mlagents/trainers/sac/models.py


LOG_STD_MIN = -20
EPSILON = 1e-6 # Small value to avoid divide by zero
DISCRETE_TARGET_ENTROPY_SCALE = 0.1 # Roughly equal to e-greedy 0.05
CONTINUOUS_TARGET_ENTROPY_SCALE = 0.8 # TODO: Make these an optional hyperparam.
CONTINUOUS_TARGET_ENTROPY_SCALE = 3.0 # TODO: Make these an optional hyperparam.
LOGGER = logging.getLogger("mlagents.trainers")

正在加载...
取消
保存