Jonathan Harper
5 年前
当前提交
3fc14963
共有 10 个文件被更改,包括 297 次插入 和 26 次删除
-
82Dockerfile
-
21ml-agents-envs/mlagents/envs/environment.py
-
2ml-agents/mlagents/trainers/learn.py
-
2ml-agents/mlagents/trainers/ppo/models.py
-
8ml-agents/mlagents/trainers/sac/models.py
-
10ml-agents/mlagents/trainers/tf_policy.py
-
2ml-agents/setup.py
-
3.dockerignore
-
129Dockerfile-bak
-
64horovod-mlagents.yaml
|
|||
UnitySDK/ |
|||
.git/ |
|||
venv-harper/ |
|
|||
# Based off of python:3.6-slim, except that we are using ubuntu instead of debian. |
|||
FROM ubuntu:16.04 |
|||
|
|||
|
|||
# ensure local python is preferred over distribution python |
|||
ENV PATH /usr/local/bin:$PATH |
|||
|
|||
# http://bugs.python.org/issue19846 |
|||
# > At the moment, setting "LANG=C" on a Linux system *fundamentally breaks Python 3*, and that's not OK. |
|||
ENV LANG C.UTF-8 |
|||
|
|||
# runtime dependencies |
|||
RUN apt-get update && apt-get install -y --no-install-recommends \ |
|||
ca-certificates \ |
|||
libexpat1 \ |
|||
libffi6 \ |
|||
libgdbm3 \ |
|||
libreadline6 \ |
|||
libsqlite3-0 \ |
|||
libssl1.0.0 \ |
|||
&& rm -rf /var/lib/apt/lists/* |
|||
|
|||
ENV GPG_KEY 0D96DF4D4110E5C43FBFB17F2D347EA6AA65421D |
|||
ENV PYTHON_VERSION 3.6.4 |
|||
|
|||
RUN set -ex \ |
|||
&& buildDeps=" \ |
|||
dpkg-dev \ |
|||
gcc \ |
|||
libbz2-dev \ |
|||
libc6-dev \ |
|||
libexpat1-dev \ |
|||
libffi-dev \ |
|||
libgdbm-dev \ |
|||
liblzma-dev \ |
|||
libncursesw5-dev \ |
|||
libreadline-dev \ |
|||
libsqlite3-dev \ |
|||
libssl-dev \ |
|||
make \ |
|||
tcl-dev \ |
|||
tk-dev \ |
|||
wget \ |
|||
xz-utils \ |
|||
zlib1g-dev \ |
|||
# as of Stretch, "gpg" is no longer included by default |
|||
$(command -v gpg > /dev/null || echo 'gnupg dirmngr') \ |
|||
" \ |
|||
&& apt-get update && apt-get install -y $buildDeps --no-install-recommends && rm -rf /var/lib/apt/lists/* \ |
|||
\ |
|||
&& wget -O python.tar.xz "https://www.python.org/ftp/python/${PYTHON_VERSION%%[a-z]*}/Python-$PYTHON_VERSION.tar.xz" \ |
|||
&& wget -O python.tar.xz.asc "https://www.python.org/ftp/python/${PYTHON_VERSION%%[a-z]*}/Python-$PYTHON_VERSION.tar.xz.asc" \ |
|||
&& export GNUPGHOME="$(mktemp -d)" \ |
|||
&& gpg --keyserver ha.pool.sks-keyservers.net --recv-keys "$GPG_KEY" \ |
|||
&& gpg --batch --verify python.tar.xz.asc python.tar.xz \ |
|||
&& rm -rf "$GNUPGHOME" python.tar.xz.asc \ |
|||
&& mkdir -p /usr/src/python \ |
|||
&& tar -xJC /usr/src/python --strip-components=1 -f python.tar.xz \ |
|||
&& rm python.tar.xz \ |
|||
\ |
|||
&& cd /usr/src/python \ |
|||
&& gnuArch="$(dpkg-architecture --query DEB_BUILD_GNU_TYPE)" \ |
|||
&& ./configure \ |
|||
--build="$gnuArch" \ |
|||
--enable-loadable-sqlite-extensions \ |
|||
--enable-shared \ |
|||
--with-system-expat \ |
|||
--with-system-ffi \ |
|||
--without-ensurepip \ |
|||
&& make -j "$(nproc)" \ |
|||
&& make install \ |
|||
&& ldconfig \ |
|||
\ |
|||
&& apt-get purge -y --auto-remove $buildDeps \ |
|||
\ |
|||
&& find /usr/local -depth \ |
|||
\( \ |
|||
\( -type d -a \( -name test -o -name tests \) \) \ |
|||
-o \ |
|||
\( -type f -a \( -name '*.pyc' -o -name '*.pyo' \) \) \ |
|||
\) -exec rm -rf '{}' + \ |
|||
&& rm -rf /usr/src/python |
|||
|
|||
# make some useful symlinks that are expected to exist |
|||
RUN cd /usr/local/bin \ |
|||
&& ln -s idle3 idle \ |
|||
&& ln -s pydoc3 pydoc \ |
|||
&& ln -s python3 python \ |
|||
&& ln -s python3-config python-config |
|||
|
|||
RUN apt-get update && apt-get install -y --no-install-recommends \ |
|||
build-essential \ |
|||
cmake \ |
|||
g++-4.8 \ |
|||
git \ |
|||
curl \ |
|||
vim \ |
|||
wget \ |
|||
ca-certificates \ |
|||
xvfb |
|||
|
|||
# if this is called "PIP_VERSION", pip explodes with "ValueError: invalid truth value '<VERSION>'" |
|||
ENV PYTHON_PIP_VERSION 9.0.3 |
|||
|
|||
RUN set -ex; \ |
|||
wget -O get-pip.py 'https://bootstrap.pypa.io/get-pip.py'; \ |
|||
\ |
|||
python get-pip.py \ |
|||
--disable-pip-version-check \ |
|||
--no-cache-dir \ |
|||
"pip==$PYTHON_PIP_VERSION" \ |
|||
; \ |
|||
pip --version; \ |
|||
\ |
|||
find /usr/local -depth \ |
|||
\( \ |
|||
\( -type d -a \( -name test -o -name tests \) \) \ |
|||
-o \ |
|||
\( -type f -a \( -name '*.pyc' -o -name '*.pyo' \) \) \ |
|||
\) -exec rm -rf '{}' +; \ |
|||
rm -f get-pip.py |
|||
|
|||
COPY unity-volume /unity-volume |
|||
RUN chmod +x /unity-volume/*.x86_64 |
|||
|
|||
# port 5005 is the port used in in Editor training. |
|||
EXPOSE 5005 |
|||
|
|||
ENTRYPOINT ["mlagents-learn"] |
|
|||
apiVersion: kubeflow.org/v1alpha2 |
|||
kind: MPIJob |
|||
metadata: |
|||
name: mlagents-horovod-test |
|||
spec: |
|||
slotsPerWorker: 1 |
|||
cleanPodPolicy: Running |
|||
mpiReplicaSpecs: |
|||
Launcher: |
|||
replicas: 1 |
|||
template: |
|||
spec: |
|||
containers: |
|||
- image: gcr.io/unity-ml-agents-expts-test/mlagents-horovod:latest |
|||
name: mlagents-horovod-test |
|||
resources: |
|||
limits: |
|||
cpu: 4 |
|||
command: ["/bin/sh", "-c"] |
|||
args: [" |
|||
mpirun --allow-run-as-root -np 16 -bind-to none -map-by slot -x NCCL_DEBUG=INFO -x LD_LIBRARY_PATH -x HOROVOD_AUTOTUNE=1 -x PATH -mca pml ob1 -mca btl ^openib mlagents-learn /unity-volume/trainer_config.yaml --run-id=snoopydist15-ppo --train --env=/unity-volume/Walker --num-envs=6; |
|||
mpirun --allow-run-as-root -np 16 -bind-to none -map-by slot -x NCCL_DEBUG=INFO -x LD_LIBRARY_PATH -x HOROVOD_AUTOTUNE=1 -x PATH -mca pml ob1 -mca btl ^openib gsutil cp -r models gs://ray-volume/horovod/; |
|||
mpirun --allow-run-as-root -np 16 -bind-to none -map-by slot -x NCCL_DEBUG=INFO -x LD_LIBRARY_PATH -x HOROVOD_AUTOTUNE=1 -x PATH -mca pml ob1 -mca btl ^openib gsutil cp -r summaries gs://ray-volume/horovod/; |
|||
"] |
|||
|
|||
Worker: |
|||
replicas: 16 |
|||
template: |
|||
spec: |
|||
containers: |
|||
- image: gcr.io/unity-ml-agents-expts-test/mlagents-horovod:latest |
|||
name: mlagents-horovod-test |
|||
resources: |
|||
limits: |
|||
cpu: 2 |
|||
nvidia.com/gpu: 1 |
|||
memory: 8G |
|||
- image: gcr.io/unity-ml-agents-expts-test/mlagents-horovod-env:latest |
|||
name: mlagents-horovod-test-env |
|||
resources: |
|||
requests: |
|||
cpu: 12 |
|||
memory: 48G |
|||
limits: |
|||
cpu: 12 |
|||
memory: 48G |
|||
command: ["/bin/sh", "-c"] |
|||
args: [" |
|||
sleep 95s; |
|||
cd /unity-volume; |
|||
(xvfb-run --auto-servernum --server-args='-screen 0 640x480x24' /unity-volume/SnoopyPop15Levels_10.x86_64 --port 5005 &); |
|||
(xvfb-run --auto-servernum --server-args='-screen 0 640x480x24' /unity-volume/SnoopyPop15Levels_10.x86_64 --port 5006 &); |
|||
(xvfb-run --auto-servernum --server-args='-screen 0 640x480x24' /unity-volume/SnoopyPop15Levels_10.x86_64 --port 5007 &); |
|||
(xvfb-run --auto-servernum --server-args='-screen 0 640x480x24' /unity-volume/SnoopyPop15Levels_10.x86_64 --port 5008 &); |
|||
(xvfb-run --auto-servernum --server-args='-screen 0 640x480x24' /unity-volume/SnoopyPop15Levels_10.x86_64 --port 5009 &); |
|||
xvfb-run --auto-servernum --server-args='-screen 0 640x480x24' /unity-volume/SnoopyPop15Levels_10.x86_64 --port 5010 |
|||
"] |
|||
stdin: true |
|||
tty: true |
|||
securityContext: |
|||
privileged: true |
|||
capabilities: |
|||
add: |
|||
- SYS_ADMIN |
撰写
预览
正在加载...
取消
保存
Reference in new issue