浏览代码

rough pass at tf2 support, needs cleanup

/develop-gpu-test
Chris Elion 5 年前
当前提交
43e23941
共有 24 个文件被更改,包括 166 次插入40 次删除
  1. 20
      ml-agents/mlagents/trainers/__init__.py
  2. 21
      ml-agents/mlagents/trainers/bc/models.py
  3. 6
      ml-agents/mlagents/trainers/components/bc/model.py
  4. 5
      ml-agents/mlagents/trainers/components/reward_signals/__init__.py
  5. 6
      ml-agents/mlagents/trainers/components/reward_signals/curiosity/model.py
  6. 6
      ml-agents/mlagents/trainers/components/reward_signals/curiosity/signal.py
  7. 6
      ml-agents/mlagents/trainers/components/reward_signals/gail/model.py
  8. 5
      ml-agents/mlagents/trainers/components/reward_signals/gail/signal.py
  9. 30
      ml-agents/mlagents/trainers/models.py
  10. 6
      ml-agents/mlagents/trainers/ppo/models.py
  11. 6
      ml-agents/mlagents/trainers/ppo/multi_gpu_policy.py
  12. 6
      ml-agents/mlagents/trainers/ppo/policy.py
  13. 19
      ml-agents/mlagents/trainers/sac/models.py
  14. 6
      ml-agents/mlagents/trainers/sac/policy.py
  15. 5
      ml-agents/mlagents/trainers/tensorflow_to_barracuda.py
  16. 5
      ml-agents/mlagents/trainers/tests/test_bc.py
  17. 6
      ml-agents/mlagents/trainers/tests/test_multigpu.py
  18. 6
      ml-agents/mlagents/trainers/tests/test_ppo.py
  19. 6
      ml-agents/mlagents/trainers/tests/test_sac.py
  20. 11
      ml-agents/mlagents/trainers/tests/test_trainer_controller.py
  21. 5
      ml-agents/mlagents/trainers/tf_policy.py
  22. 7
      ml-agents/mlagents/trainers/trainer.py
  23. 5
      ml-agents/mlagents/trainers/trainer_controller.py
  24. 2
      ml-agents/setup.py

20
ml-agents/mlagents/trainers/__init__.py


import logging
def warnings_as_errors(log_record):
# Raise deprecated warnings as exceptions.
if log_record.levelno == logging.WARNING and "deprecated" in log_record.msg:
merged = log_record.getMessage()
raise RuntimeError(merged)
return True
# TODO only enable this with a environment variable
if False:
logging.getLogger('tensorflow').addFilter(warnings_as_errors)
# TODO better place to put this? move everything to tf.py?
try:
import tensorflow.compat.v1 as tf
except ImportError:
import tensorflow as tf
tf.disable_v2_behavior()

21
ml-agents/mlagents/trainers/bc/models.py


import tensorflow as tf
import tensorflow.contrib.layers as c_layers
try:
import tensorflow.compat.v1 as tf
except ImportError:
import tensorflow as tf
if True: # TODO TF2
tf_variance_scaling = tf.initializers.variance_scaling
tf_flatten = tf.layers.flatten
else:
import tensorflow.contrib.layers as c_layers
tf_variance_scaling = c_layers.variance_scaling_initializer
tf_flatten = c_layers.flatten
from mlagents.trainers.models import LearningModel

size,
activation=None,
use_bias=False,
kernel_initializer=c_layers.variance_scaling_initializer(
factor=0.01
kernel_initializer=tf_variance_scaling(
0.01
),
)
)

activation=None,
use_bias=False,
name="pre_action",
kernel_initializer=c_layers.variance_scaling_initializer(factor=0.01),
kernel_initializer=tf_variance_scaling(0.01),
)
self.clipped_sample_action = tf.clip_by_value(self.policy, -1, 1)
self.sample_action = tf.identity(self.clipped_sample_action, name="action")

6
ml-agents/mlagents/trainers/components/bc/model.py


import tensorflow as tf
try:
import tensorflow.compat.v1 as tf
except ImportError:
import tensorflow as tf
from mlagents.trainers.models import LearningModel

5
ml-agents/mlagents/trainers/components/reward_signals/__init__.py


import numpy as np
import abc
import tensorflow as tf
try:
import tensorflow.compat.v1 as tf
except ImportError:
import tensorflow as tf
from mlagents.envs.brain import BrainInfo
from mlagents.trainers.trainer import UnityTrainerException

6
ml-agents/mlagents/trainers/components/reward_signals/curiosity/model.py


from typing import List, Tuple
import tensorflow as tf
try:
import tensorflow.compat.v1 as tf
except ImportError:
import tensorflow as tf
from mlagents.trainers.models import LearningModel

6
ml-agents/mlagents/trainers/components/reward_signals/curiosity/signal.py


from typing import Any, Dict, List
import numpy as np
import tensorflow as tf
try:
import tensorflow.compat.v1 as tf
except ImportError:
import tensorflow as tf
from mlagents.envs.brain import BrainInfo
from mlagents.trainers.components.reward_signals import RewardSignal, RewardSignalResult

6
ml-agents/mlagents/trainers/components/reward_signals/gail/model.py


from typing import Tuple, List
import tensorflow as tf
try:
import tensorflow.compat.v1 as tf
except ImportError:
import tensorflow as tf
from mlagents.trainers.models import LearningModel
EPSILON = 1e-7

5
ml-agents/mlagents/trainers/components/reward_signals/gail/signal.py


from typing import Any, Dict, List
import logging
import numpy as np
import tensorflow as tf
try:
import tensorflow.compat.v1 as tf
except ImportError:
import tensorflow as tf
from mlagents.envs.brain import BrainInfo
from mlagents.trainers.components.reward_signals import RewardSignal, RewardSignalResult

30
ml-agents/mlagents/trainers/models.py


from typing import Callable, List
import numpy as np
import tensorflow as tf
import tensorflow.contrib.layers as c_layers
try:
import tensorflow.compat.v1 as tf
except ImportError:
import tensorflow as tf
if True: # TODO TF2
tf_variance_scaling = tf.initializers.variance_scaling
tf_flatten = tf.layers.flatten
tf_rnn = tf.nn.rnn_cell
else:
import tensorflow.contrib.layers as c_layers
tf_variance_scaling = c_layers.variance_scaling_initializer
tf_flatten = c_layers.flatten
tf_rnn = tf.contrib.rnn
from mlagents.trainers.trainer import UnityTrainerException
from mlagents.envs.brain import CameraResolution

@staticmethod
def scaled_init(scale):
return c_layers.variance_scaling_initializer(scale)
return tf_variance_scaling(scale)
@staticmethod
def swish(input_activation: tf.Tensor) -> tf.Tensor:

activation=activation,
reuse=reuse,
name="hidden_{}".format(i),
kernel_initializer=c_layers.variance_scaling_initializer(1.0),
kernel_initializer=tf_variance_scaling(1.0),
)
return hidden

reuse=reuse,
name="conv_2",
)
hidden = c_layers.flatten(conv2)
hidden = tf_flatten(conv2)
with tf.variable_scope(scope + "/" + "flat_encoding"):
hidden_flat = LearningModel.create_vector_observation_encoder(

reuse=reuse,
name="conv_3",
)
hidden = c_layers.flatten(conv3)
hidden = tf_flatten(conv3)
with tf.variable_scope(scope + "/" + "flat_encoding"):
hidden_flat = LearningModel.create_vector_observation_encoder(

)
hidden = tf.add(block_input, hidden)
hidden = tf.nn.relu(hidden)
hidden = c_layers.flatten(hidden)
hidden = tf_flatten(hidden)
with tf.variable_scope(scope + "/" + "flat_encoding"):
hidden_flat = LearningModel.create_vector_observation_encoder(

memory_in = tf.reshape(memory_in[:, :], [-1, m_size])
half_point = int(m_size / 2)
with tf.variable_scope(name):
rnn_cell = tf.contrib.rnn.BasicLSTMCell(half_point)
lstm_vector_in = tf.contrib.rnn.LSTMStateTuple(
rnn_cell = tf_rnn.BasicLSTMCell(half_point)
lstm_vector_in = tf_rnn.LSTMStateTuple(
memory_in[:, :half_point], memory_in[:, half_point:]
)
recurrent_output, lstm_state_out = tf.nn.dynamic_rnn(

6
ml-agents/mlagents/trainers/ppo/models.py


import logging
import numpy as np
import tensorflow as tf
try:
import tensorflow.compat.v1 as tf
except ImportError:
import tensorflow as tf
from mlagents.trainers.models import LearningModel, EncoderType, LearningRateSchedule
logger = logging.getLogger("mlagents.trainers")

6
ml-agents/mlagents/trainers/ppo/multi_gpu_policy.py


import logging
import tensorflow as tf
try:
import tensorflow.compat.v1 as tf
except ImportError:
import tensorflow as tf
from tensorflow.python.client import device_lib
from mlagents.envs.timers import timed
from mlagents.trainers.models import EncoderType, LearningRateSchedule

6
ml-agents/mlagents/trainers/ppo/policy.py


import logging
import numpy as np
from typing import Any, Dict, Optional
import tensorflow as tf
try:
import tensorflow.compat.v1 as tf
except ImportError:
import tensorflow as tf
from mlagents.envs.timers import timed
from mlagents.envs.brain import BrainInfo, BrainParameters

19
ml-agents/mlagents/trainers/sac/models.py


import logging
import numpy as np
import tensorflow as tf
try:
import tensorflow.compat.v1 as tf
except ImportError:
import tensorflow as tf
import tensorflow.contrib.layers as c_layers
if True: # TODO TF2
tf_variance_scaling = tf.initializers.variance_scaling
tf_flatten = tf.layers.flatten
else:
import tensorflow.contrib.layers as c_layers
tf_variance_scaling = c_layers.variance_scaling_initializer
tf_flatten = c_layers.flatten
LOG_STD_MAX = 2
LOG_STD_MIN = -20

size,
activation=None,
use_bias=False,
kernel_initializer=c_layers.variance_scaling_initializer(
factor=0.01
kernel_initializer=tf_variance_scaling(
0.01
),
)
)

6
ml-agents/mlagents/trainers/sac/policy.py


import logging
from typing import Dict, Any, Optional
import numpy as np
import tensorflow as tf
try:
import tensorflow.compat.v1 as tf
except ImportError:
import tensorflow as tf
from mlagents.envs.timers import timed
from mlagents.envs.brain import BrainInfo, BrainParameters

5
ml-agents/mlagents/trainers/tensorflow_to_barracuda.py


from __future__ import print_function
import numpy as np
import struct # convert from Python values and C structs
import tensorflow as tf
try:
import tensorflow.compat.v1 as tf
except ImportError:
import tensorflow as tf
import re
# import barracuda

5
ml-agents/mlagents/trainers/tests/test_bc.py


import os
import numpy as np
import tensorflow as tf
try:
import tensorflow.compat.v1 as tf
except ImportError:
import tensorflow as tf
import yaml
from mlagents.trainers.bc.models import BehavioralCloningModel

6
ml-agents/mlagents/trainers/tests/test_multigpu.py


import unittest.mock as mock
import pytest
import tensorflow as tf
try:
import tensorflow.compat.v1 as tf
except ImportError:
import tensorflow as tf
import yaml
from mlagents.trainers.ppo.multi_gpu_policy import MultiGpuPPOPolicy

6
ml-agents/mlagents/trainers/tests/test_ppo.py


import pytest
import numpy as np
import tensorflow as tf
try:
import tensorflow.compat.v1 as tf
except ImportError:
import tensorflow as tf
import yaml
from mlagents.trainers.ppo.models import PPOModel

6
ml-agents/mlagents/trainers/tests/test_sac.py


import yaml
import numpy as np
import tensorflow as tf
try:
import tensorflow.compat.v1 as tf
except ImportError:
import tensorflow as tf
from mlagents.trainers.sac.models import SACModel
from mlagents.trainers.sac.policy import SACPolicy

11
ml-agents/mlagents/trainers/tests/test_trainer_controller.py


from unittest.mock import MagicMock, Mock, patch
try:
import tensorflow.compat.v1 as tf
except ImportError:
import tensorflow as tf
import yaml
import pytest

@patch("numpy.random.seed")
@patch("tensorflow.set_random_seed")
@patch.object(tf, "set_random_seed")
def test_initialization_seed(numpy_random_seed, tensorflow_set_seed):
seed = 27
TrainerController(

return tc, trainer_mock
@patch("tensorflow.reset_default_graph")
@patch.object(tf, "reset_default_graph")
def test_start_learning_trains_forever_if_no_train_model(tf_reset_graph):
tc, trainer_mock = trainer_controller_with_start_learning_mocks()
tc.train_model = False

env_mock.close.assert_called_once()
@patch("tensorflow.reset_default_graph")
@patch.object(tf, "reset_default_graph")
def test_start_learning_trains_until_max_steps_then_saves(tf_reset_graph):
tc, trainer_mock = trainer_controller_with_start_learning_mocks()
tf_reset_graph.return_value = None

5
ml-agents/mlagents/trainers/tf_policy.py


from typing import Any, Dict
import numpy as np
import tensorflow as tf
try:
import tensorflow.compat.v1 as tf
except ImportError:
import tensorflow as tf
from mlagents.envs.exception import UnityException
from mlagents.envs.policy import Policy

7
ml-agents/mlagents/trainers/trainer.py


import logging
from typing import Dict, List, Deque, Any
import os
import tensorflow as tf
try:
import tensorflow.compat.v1 as tf
except ImportError:
import tensorflow as tf
import numpy as np
from collections import deque, defaultdict

5
ml-agents/mlagents/trainers/trainer_controller.py


from typing import Dict, List, Optional
import numpy as np
import tensorflow as tf
try:
import tensorflow.compat.v1 as tf
except ImportError:
import tensorflow as tf
from time import time
from mlagents.envs.env_manager import EnvironmentStep

2
ml-agents/setup.py


"Pillow>=4.2.1",
"protobuf>=3.6",
"pyyaml",
"tensorflow>=1.7,<2.0",
"tensorflow>=1.7,<2.1",
'pypiwin32==223;platform_system=="Windows"',
],
python_requires=">=3.6.1",
正在加载...
取消
保存