比较提交

...
此合并请求有变更与目标分支冲突。
/setup.cfg
/.pre-commit-config.yaml
/ml-agents/setup.py
/ml-agents/mlagents/trainers/trainer_controller.py
/ml-agents/mlagents/trainers/tests/test_trainer_controller.py
/ml-agents/mlagents/trainers/trainer.py
/.circleci/config.yml
/test_constraints_max_tf2_version.txt
/ml-agents/mlagents/trainers/bc/models.py
/ml-agents/mlagents/trainers/components/bc/model.py
/ml-agents/mlagents/trainers/components/reward_signals/__init__.py
/ml-agents/mlagents/trainers/components/reward_signals/curiosity/model.py
/ml-agents/mlagents/trainers/components/reward_signals/curiosity/signal.py
/ml-agents/mlagents/trainers/components/reward_signals/gail/model.py
/ml-agents/mlagents/trainers/components/reward_signals/gail/signal.py
/ml-agents/mlagents/trainers/models.py
/ml-agents/mlagents/trainers/tf_policy.py
/ml-agents/mlagents/trainers/sac/models.py
/ml-agents/mlagents/trainers/sac/policy.py
/ml-agents/mlagents/trainers/tensorflow_to_barracuda.py
/ml-agents/mlagents/trainers/tests/test_bc.py
/ml-agents/mlagents/trainers/tests/test_multigpu.py
/ml-agents/mlagents/trainers/tests/test_ppo.py
/ml-agents/mlagents/trainers/tests/test_sac.py
/ml-agents/mlagents/trainers/ppo/models.py
/ml-agents/mlagents/trainers/ppo/policy.py
/ml-agents/mlagents/trainers/ppo/multi_gpu_policy.py
/test_constraints_max_tf2_version.txt

1 次代码提交

作者 SHA1 备注 提交日期
Chris Elion 7353ad22 Merge remote-tracking branch 'origin/develop' into try-tf2-support 5 年前
共有 33 个文件被更改,包括 97 次插入46 次删除
  1. 8
      .circleci/config.yml
  2. 4
      setup.cfg
  3. 2
      test_constraints_max_tf2_version.txt
  4. 3
      .pre-commit-config.yaml
  5. 2
      ml-agents/setup.py
  6. 0
      ml-agents/mlagents/trainers/__init__.py
  7. 10
      ml-agents/mlagents/trainers/bc/models.py
  8. 3
      ml-agents/mlagents/trainers/components/bc/model.py
  9. 2
      ml-agents/mlagents/trainers/components/reward_signals/__init__.py
  10. 3
      ml-agents/mlagents/trainers/components/reward_signals/curiosity/model.py
  11. 3
      ml-agents/mlagents/trainers/components/reward_signals/curiosity/signal.py
  12. 3
      ml-agents/mlagents/trainers/components/reward_signals/gail/model.py
  13. 2
      ml-agents/mlagents/trainers/components/reward_signals/gail/signal.py
  14. 17
      ml-agents/mlagents/trainers/models.py
  15. 2
      ml-agents/mlagents/trainers/tf_policy.py
  16. 4
      ml-agents/mlagents/trainers/trainer.py
  17. 8
      ml-agents/mlagents/trainers/sac/models.py
  18. 2
      ml-agents/mlagents/trainers/sac/policy.py
  19. 2
      ml-agents/mlagents/trainers/tensorflow_to_barracuda.py
  20. 2
      ml-agents/mlagents/trainers/tests/test_bc.py
  21. 2
      ml-agents/mlagents/trainers/tests/test_multigpu.py
  22. 3
      ml-agents/mlagents/trainers/tests/test_ppo.py
  23. 3
      ml-agents/mlagents/trainers/tests/test_sac.py
  24. 8
      ml-agents/mlagents/trainers/tests/test_trainer_controller.py
  25. 2
      ml-agents/mlagents/trainers/trainer_controller.py
  26. 3
      ml-agents/mlagents/trainers/ppo/models.py
  27. 3
      ml-agents/mlagents/trainers/ppo/policy.py
  28. 3
      ml-agents/mlagents/trainers/ppo/multi_gpu_policy.py
  29. 6
      test_constraints_max_tf1_version.txt
  30. 2
      ml-agents/mlagents/tf_utils/__init__.py
  31. 26
      ml-agents/mlagents/tf_utils/tf.py
  32. 0
      /test_constraints_max_tf2_version.txt

8
.circleci/config.yml


executor: python373
pyversion: 3.7.3
# Test python 3.7 with the newest supported versions
pip_constraints: test_constraints_max_version.txt
pip_constraints: test_constraints_max_tf1_version.txt
- build_python:
name: python_3.7.3+tf2
executor: python373
pyversion: 3.7.3
# Test python 3.7 with the newest supported versions
pip_constraints: test_constraints_max_tf2_version.txt
- markdown_link_check
- protobuf_generation_check
- deploy:

4
setup.cfg


# Black tends to introduce things flake8 doesn't like, such as "line break before binary operator"
# or whitespace before ':'. Rather than fight with black, just ignore these for now.
W503, E203,
# flake-tidy-import adds this warning, which we don't really care about for now
I200
banned-modules = tensorflow = use mlagents.tf instead (it handles tf2 compat).

2
test_constraints_max_tf2_version.txt


# For projects with upper bounds, we should periodically update this list to the latest release version
grpcio>=1.23.0
numpy>=1.17.2
tensorflow>=1.14.0,<2.0
tensorflow>=2.0.0,<2.1.0

3
.pre-commit-config.yaml


.*_pb2.py|
.*_pb2_grpc.py
)$
additional_dependencies: [flake8-comprehensions]
# flake8-tidy-imports is used for banned-modules, not actually tidying
additional_dependencies: [flake8-comprehensions, flake8-tidy-imports]
- id: trailing-whitespace
name: trailing-whitespace-markdown
types: [markdown]

2
ml-agents/setup.py


"Pillow>=4.2.1",
"protobuf>=3.6",
"pyyaml",
"tensorflow>=1.7,<2.0",
"tensorflow>=1.7,<2.1",
'pypiwin32==223;platform_system=="Windows"',
],
python_requires=">=3.6.1",

0
ml-agents/mlagents/trainers/__init__.py

10
ml-agents/mlagents/trainers/bc/models.py


import tensorflow as tf
import tensorflow.contrib.layers as c_layers
from mlagents.tf_utils import tf, tf_variance_scaling
from mlagents.trainers.models import LearningModel

size,
activation=None,
use_bias=False,
kernel_initializer=c_layers.variance_scaling_initializer(
factor=0.01
),
kernel_initializer=tf_variance_scaling(0.01),
)
)
self.action_probs = tf.concat(

activation=None,
use_bias=False,
name="pre_action",
kernel_initializer=c_layers.variance_scaling_initializer(factor=0.01),
kernel_initializer=tf_variance_scaling(0.01),
)
self.clipped_sample_action = tf.clip_by_value(self.policy, -1, 1)
self.sample_action = tf.identity(self.clipped_sample_action, name="action")

3
ml-agents/mlagents/trainers/components/bc/model.py


import tensorflow as tf
from mlagents.tf_utils import tf
from mlagents.trainers.models import LearningModel

2
ml-agents/mlagents/trainers/components/reward_signals/__init__.py


import numpy as np
import abc
import tensorflow as tf
from mlagents.tf_utils import tf
from mlagents.envs.brain import BrainInfo
from mlagents.trainers.trainer import UnityTrainerException

3
ml-agents/mlagents/trainers/components/reward_signals/curiosity/model.py


from typing import List, Tuple
import tensorflow as tf
from mlagents.tf_utils import tf
from mlagents.trainers.models import LearningModel

3
ml-agents/mlagents/trainers/components/reward_signals/curiosity/signal.py


from typing import Any, Dict, List
import numpy as np
import tensorflow as tf
from mlagents.tf_utils import tf
from mlagents.envs.brain import BrainInfo
from mlagents.trainers.components.reward_signals import RewardSignal, RewardSignalResult

3
ml-agents/mlagents/trainers/components/reward_signals/gail/model.py


from typing import List, Optional, Tuple
import tensorflow as tf
from mlagents.tf_utils import tf
from mlagents.trainers.models import LearningModel
EPSILON = 1e-7

2
ml-agents/mlagents/trainers/components/reward_signals/gail/signal.py


from typing import Any, Dict, List
import logging
import numpy as np
import tensorflow as tf
from mlagents.tf_utils import tf
from mlagents.envs.brain import BrainInfo
from mlagents.trainers.components.reward_signals import RewardSignal, RewardSignalResult

17
ml-agents/mlagents/trainers/models.py


from typing import Callable, Dict, List, Optional
import numpy as np
import tensorflow as tf
import tensorflow.contrib.layers as c_layers
from mlagents.tf_utils import tf, tf_variance_scaling, tf_rnn, tf_flatten
from mlagents.trainers.trainer import UnityTrainerException
from mlagents.envs.brain import CameraResolution

@staticmethod
def scaled_init(scale):
return c_layers.variance_scaling_initializer(scale)
return tf_variance_scaling(scale)
@staticmethod
def swish(input_activation: tf.Tensor) -> tf.Tensor:

activation=activation,
reuse=reuse,
name="hidden_{}".format(i),
kernel_initializer=c_layers.variance_scaling_initializer(1.0),
kernel_initializer=tf_variance_scaling(1.0),
)
return hidden

reuse=reuse,
name="conv_2",
)
hidden = c_layers.flatten(conv2)
hidden = tf_flatten(conv2)
with tf.variable_scope(scope + "/" + "flat_encoding"):
hidden_flat = LearningModel.create_vector_observation_encoder(

reuse=reuse,
name="conv_3",
)
hidden = c_layers.flatten(conv3)
hidden = tf_flatten(conv3)
with tf.variable_scope(scope + "/" + "flat_encoding"):
hidden_flat = LearningModel.create_vector_observation_encoder(

)
hidden = tf.add(block_input, hidden)
hidden = tf.nn.relu(hidden)
hidden = c_layers.flatten(hidden)
hidden = tf_flatten(hidden)
with tf.variable_scope(scope + "/" + "flat_encoding"):
hidden_flat = LearningModel.create_vector_observation_encoder(

memory_in = tf.reshape(memory_in[:, :], [-1, m_size])
half_point = int(m_size / 2)
with tf.variable_scope(name):
rnn_cell = tf.contrib.rnn.BasicLSTMCell(half_point)
lstm_vector_in = tf.contrib.rnn.LSTMStateTuple(
rnn_cell = tf_rnn.BasicLSTMCell(half_point)
lstm_vector_in = tf_rnn.LSTMStateTuple(
memory_in[:, :half_point], memory_in[:, half_point:]
)
recurrent_output, lstm_state_out = tf.nn.dynamic_rnn(

2
ml-agents/mlagents/trainers/tf_policy.py


from typing import Any, Dict, List, Optional
import numpy as np
import tensorflow as tf
from mlagents.tf_utils import tf
from mlagents.envs.exception import UnityException
from mlagents.envs.policy import Policy

4
ml-agents/mlagents/trainers/trainer.py


import logging
from typing import Dict, List, Deque, Any
import os
import tensorflow as tf
from mlagents.tf_utils import tf
import numpy as np
from collections import deque, defaultdict

8
ml-agents/mlagents/trainers/sac/models.py


import numpy as np
from typing import Dict, List, Optional
import tensorflow as tf
from mlagents.tf_utils import tf, tf_variance_scaling
import tensorflow.contrib.layers as c_layers
LOG_STD_MAX = 2
LOG_STD_MIN = -20

size,
activation=None,
use_bias=False,
kernel_initializer=c_layers.variance_scaling_initializer(
factor=0.01
),
kernel_initializer=tf_variance_scaling(0.01),
)
)
all_logits = tf.concat(

2
ml-agents/mlagents/trainers/sac/policy.py


import logging
from typing import Dict, Any, Optional
import numpy as np
import tensorflow as tf
from mlagents.tf_utils import tf
from mlagents.envs.timers import timed
from mlagents.envs.brain import BrainInfo, BrainParameters

2
ml-agents/mlagents/trainers/tensorflow_to_barracuda.py


from __future__ import print_function
import numpy as np
import struct # convert from Python values and C structs
import tensorflow as tf
from mlagents.tf_utils import tf
import re
# import barracuda

2
ml-agents/mlagents/trainers/tests/test_bc.py


import os
import numpy as np
import tensorflow as tf
from mlagents.tf_utils import tf
import yaml
from mlagents.trainers.bc.models import BehavioralCloningModel

2
ml-agents/mlagents/trainers/tests/test_multigpu.py


import unittest.mock as mock
import pytest
import tensorflow as tf
from mlagents.tf_utils import tf
import yaml
from mlagents.trainers.ppo.multi_gpu_policy import MultiGpuPPOPolicy

3
ml-agents/mlagents/trainers/tests/test_ppo.py


import pytest
import numpy as np
import tensorflow as tf
from mlagents.tf_utils import tf
import yaml
from mlagents.trainers.ppo.models import PPOModel

3
ml-agents/mlagents/trainers/tests/test_sac.py


import yaml
import numpy as np
import tensorflow as tf
from mlagents.tf_utils import tf
from mlagents.trainers.sac.models import SACModel
from mlagents.trainers.sac.policy import SACPolicy

8
ml-agents/mlagents/trainers/tests/test_trainer_controller.py


from unittest.mock import MagicMock, Mock, patch
from mlagents.tf_utils import tf
import yaml
import pytest

@patch("numpy.random.seed")
@patch("tensorflow.set_random_seed")
@patch.object(tf, "set_random_seed")
def test_initialization_seed(numpy_random_seed, tensorflow_set_seed):
seed = 27
TrainerController(

return tc, trainer_mock
@patch("tensorflow.reset_default_graph")
@patch.object(tf, "reset_default_graph")
def test_start_learning_trains_forever_if_no_train_model(tf_reset_graph):
tc, trainer_mock = trainer_controller_with_start_learning_mocks()
tc.train_model = False

env_mock.close.assert_called_once()
@patch("tensorflow.reset_default_graph")
@patch.object(tf, "reset_default_graph")
def test_start_learning_trains_until_max_steps_then_saves(tf_reset_graph):
tc, trainer_mock = trainer_controller_with_start_learning_mocks()
tf_reset_graph.return_value = None

2
ml-agents/mlagents/trainers/trainer_controller.py


from typing import Dict, List, Optional, Set
import numpy as np
import tensorflow as tf
from mlagents.tf_utils import tf
from time import time
from mlagents.envs.env_manager import EnvironmentStep

3
ml-agents/mlagents/trainers/ppo/models.py


from typing import Optional
import numpy as np
import tensorflow as tf
from mlagents.tf_utils import tf
from mlagents.trainers.models import LearningModel, EncoderType, LearningRateSchedule
logger = logging.getLogger("mlagents.trainers")

3
ml-agents/mlagents/trainers/ppo/policy.py


import logging
import numpy as np
from typing import Any, Dict, Optional
import tensorflow as tf
from mlagents.tf_utils import tf
from mlagents.envs.timers import timed
from mlagents.envs.brain import BrainInfo, BrainParameters

3
ml-agents/mlagents/trainers/ppo/multi_gpu_policy.py


import logging
from typing import Any, Dict, List, Optional
import tensorflow as tf
from mlagents.tf_utils import tf
from tensorflow.python.client import device_lib
from mlagents.envs.brain import BrainParameters
from mlagents.envs.timers import timed

6
test_constraints_max_tf1_version.txt


# pip constraints to use the *highest* versions allowed in ml-agents/setup.py
# with the exception of tensorflow, which is constrained to <2
# For projects with upper bounds, we should periodically update this list to the latest release version
grpcio>=1.23.0
numpy>=1.17.2
tensorflow>=1.14.0,<2.0

2
ml-agents/mlagents/tf_utils/__init__.py


from mlagents.tf_utils.tf import tf as tf # noqa
from mlagents.tf_utils.tf import tf_flatten, tf_rnn, tf_variance_scaling # noqa

26
ml-agents/mlagents/tf_utils/tf.py


# This should be the only place that we import tensorflow directly.
# Everywhere else is caught by the banned-modules setting for flake8
import tensorflow as tf # noqa I201
from distutils.version import LooseVersion
# LooseVersion handles things "1.2.3a" or "4.5.6-rc7" fairly sensibly.
_is_tensorflow2 = LooseVersion(tf.__version__) >= LooseVersion("2.0.0")
# A few things that we use live in different places between tensorflow 1.x and 2.x
# If anything new is added, please add it here
if _is_tensorflow2:
import tensorflow.compat.v1 as tf
tf_variance_scaling = tf.initializers.variance_scaling
tf_flatten = tf.layers.flatten
tf_rnn = tf.nn.rnn_cell
tf.disable_v2_behavior()
else:
import tensorflow.contrib.layers as c_layers
tf_variance_scaling = c_layers.variance_scaling_initializer
tf_flatten = c_layers.flatten
tf_rnn = tf.contrib.rnn

/test_constraints_max_version.txt → /test_constraints_max_tf2_version.txt

正在加载...
取消
保存