浏览代码

removing tensorflow testing for pytest and yamato

/develop/rm-rf-new-models
vincentpierre 4 年前
当前提交
713e65fb
共有 12 个文件被更改,包括 30 次插入103 次删除
  1. 13
      .github/workflows/pytest.yml
  2. 30
      ml-agents/mlagents/trainers/ppo/trainer.py
  3. 53
      ml-agents/mlagents/trainers/sac/trainer.py
  4. 2
      ml-agents/mlagents/trainers/tests/__init__.py
  5. 4
      ml-agents/mlagents/trainers/tests/test_trainer_controller.py
  6. 2
      ml-agents/mlagents/trainers/trainer_controller.py
  7. 1
      ml-agents/setup.py
  8. 3
      ml-agents/tests/yamato/yamato_utils.py
  9. 4
      test_requirements.txt
  10. 8
      test_constraints_min_version.txt
  11. 6
      test_constraints_max_tf2_version.txt
  12. 7
      test_constraints_max_tf1_version.txt

13
.github/workflows/pytest.yml


python-version: [3.6.x, 3.7.x, 3.8.x]
include:
- python-version: 3.6.x
pip_constraints: test_constraints_min_version.txt
pip_constraints: test_constraints_max_tf1_version.txt
pip_constraints: test_constraints_max_tf2_version.txt
steps:
- uses: actions/checkout@v2
- name: Set up Python

# This path is specific to Ubuntu
path: ~/.cache/pip
# Look to see if there is a cache hit for the corresponding requirements file
key: ${{ runner.os }}-pip-${{ hashFiles('ml-agents/setup.py', 'ml-agents-envs/setup.py', 'gym-unity/setup.py', 'test_requirements.txt', matrix.pip_constraints) }}
key: ${{ runner.os }}-pip-${{ hashFiles('ml-agents/setup.py', 'ml-agents-envs/setup.py', 'gym-unity/setup.py', 'test_requirements.txt') }}
restore-keys: |
${{ runner.os }}-pip-
${{ runner.os }}-

# pin pip to workaround https://github.com/pypa/pip/issues/9180
python -m pip install pip==20.2
python -m pip install --upgrade setuptools
python -m pip install --progress-bar=off -e ./ml-agents-envs -c ${{ matrix.pip_constraints }}
python -m pip install --progress-bar=off -e ./ml-agents -c ${{ matrix.pip_constraints }}
python -m pip install --progress-bar=off -r test_requirements.txt -c ${{ matrix.pip_constraints }}
python -m pip install --progress-bar=off -e ./gym-unity -c ${{ matrix.pip_constraints }}
python -m pip install --progress-bar=off -e ./ml-agents-envs
python -m pip install --progress-bar=off -e ./ml-agents
python -m pip install --progress-bar=off -r test_requirements.txt
python -m pip install --progress-bar=off -e ./gym-unity
- name: Save python dependencies
run: |
pip freeze > pip_versions-${{ matrix.python-version }}.txt

30
ml-agents/mlagents/trainers/ppo/trainer.py


from mlagents.trainers.trajectory import Trajectory
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
from mlagents.trainers.settings import TrainerSettings, PPOSettings
from mlagents.trainers.torch.components.reward_providers.base_reward_provider import (
BaseRewardProvider,
)
logger = get_logger(__name__)

for name, v in value_estimates.items():
agent_buffer_trajectory[f"{name}_value_estimates"].extend(v)
if isinstance(self.optimizer.reward_signals[name], BaseRewardProvider):
self._stats_reporter.add_stat(
f"Policy/{self.optimizer.reward_signals[name].name.capitalize()} Value Estimate",
np.mean(v),
)
else:
self._stats_reporter.add_stat(
self.optimizer.reward_signals[name].value_name, np.mean(v)
)
self._stats_reporter.add_stat(
f"Policy/{self.optimizer.reward_signals[name].name.capitalize()} Value Estimate",
np.mean(v),
)
# Evaluate all reward functions
self.collected_rewards["environment"][agent_id] += np.sum(

# BaseRewardProvider is a PyTorch-based reward signal
if isinstance(reward_signal, BaseRewardProvider):
evaluate_result = (
reward_signal.evaluate(agent_buffer_trajectory)
* reward_signal.strength
)
else: # reward_signal is a TensorFlow-based RewardSignal class
evaluate_result = reward_signal.evaluate_batch(
agent_buffer_trajectory
).scaled_reward
evaluate_result = (
reward_signal.evaluate(agent_buffer_trajectory)
* reward_signal.strength
)
agent_buffer_trajectory[f"{name}_rewards"].extend(evaluate_result)
# Report the reward signals
self.collected_rewards[name][agent_id] += np.sum(evaluate_result)

53
ml-agents/mlagents/trainers/sac/trainer.py


from mlagents.trainers.trajectory import Trajectory, SplitObservations
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
from mlagents.trainers.settings import TrainerSettings, SACSettings
from mlagents.trainers.torch.components.reward_providers import BaseRewardProvider
logger = get_logger(__name__)

agent_buffer_trajectory["environment_rewards"]
)
for name, reward_signal in self.optimizer.reward_signals.items():
# BaseRewardProvider is a PyTorch-based reward signal
if isinstance(reward_signal, BaseRewardProvider):
evaluate_result = (
reward_signal.evaluate(agent_buffer_trajectory)
* reward_signal.strength
)
else: # reward_signal uses TensorFlow
evaluate_result = reward_signal.evaluate_batch(
agent_buffer_trajectory
).scaled_reward
evaluate_result = (
reward_signal.evaluate(agent_buffer_trajectory)
* reward_signal.strength
)
# Report the reward signals
self.collected_rewards[name][agent_id] += np.sum(evaluate_result)

agent_buffer_trajectory, trajectory.next_obs, trajectory.done_reached
)
for name, v in value_estimates.items():
# BaseRewardProvider is a PyTorch-based reward signal
if isinstance(self.optimizer.reward_signals[name], BaseRewardProvider):
)
else: # TensorFlow reward signal
self._stats_reporter.add_stat(
self.optimizer.reward_signals[name].value_name, np.mean(v)
)
# Bootstrap using the last step rather than the bootstrap step if max step is reached.

)
# Get rewards for each reward
for name, signal in self.optimizer.reward_signals.items():
# BaseRewardProvider is a PyTorch-based reward signal
if isinstance(signal, BaseRewardProvider):
sampled_minibatch[f"{name}_rewards"] = (
signal.evaluate(sampled_minibatch) * signal.strength
)
else: # reward_signal is a TensorFlow-based RewardSignal class
sampled_minibatch[f"{name}_rewards"] = signal.evaluate_batch(
sampled_minibatch
).scaled_reward
sampled_minibatch[f"{name}_rewards"] = (
signal.evaluate(sampled_minibatch) * signal.strength
)
update_stats = self.optimizer.update(sampled_minibatch, n_sequences)
for stat_name, value in update_stats.items():

reward_signal_minibatches = {}
for name, signal in self.optimizer.reward_signals.items():
logger.debug(f"Updating {name} at step {self.step}")
# BaseRewardProvider is a PyTorch-based reward signal
if not isinstance(signal, BaseRewardProvider):
# Some signals don't need a minibatch to be sampled - so we don't!
if signal.update_dict:
reward_signal_minibatches[name] = buffer.sample_mini_batch(
self.hyperparameters.batch_size,
sequence_length=self.policy.sequence_length,
)
else: # TensorFlow reward signal
if name != "extrinsic":
reward_signal_minibatches[name] = buffer.sample_mini_batch(
self.hyperparameters.batch_size,
sequence_length=self.policy.sequence_length,
)
# Some signals don't need a minibatch to be sampled - so we don't!
if signal.update_dict:
reward_signal_minibatches[name] = buffer.sample_mini_batch(
self.hyperparameters.batch_size,
sequence_length=self.policy.sequence_length,
)
update_stats = self.optimizer.update_reward_signals(
reward_signal_minibatches, n_sequences
)

2
ml-agents/mlagents/trainers/tests/__init__.py


# tb[-2] is the wrapper function, e.g. np_array_no_float64
# we want the calling function, so use tb[-3]
filename = tb[-3].filename
# Only raise if this came from mlagents code, not tensorflow
# Only raise if this came from mlagents code
if (
"ml-agents/mlagents" in filename
or "ml-agents-envs/mlagents" in filename

4
ml-agents/mlagents/trainers/tests/test_trainer_controller.py


@patch("numpy.random.seed")
@patch.object(torch, "manual_seed")
def test_initialization_seed(numpy_random_seed, tensorflow_set_seed):
def test_initialization_seed(numpy_random_seed, torch_set_seed):
seed = 27
trainer_factory_mock = MagicMock()
trainer_factory_mock.ghost_controller = GhostController()

training_seed=seed,
)
numpy_random_seed.assert_called_with(seed)
tensorflow_set_seed.assert_called_with(seed)
torch_set_seed.assert_called_with(seed)
@pytest.fixture

2
ml-agents/mlagents/trainers/trainer_controller.py


:param param_manager: EnvironmentParameterManager object which stores information about all
environment parameters.
:param train: Whether to train model, or only run inference.
:param training_seed: Seed to use for Numpy and Tensorflow random number generation.
:param training_seed: Seed to use for Numpy and Torch random number generation.
:param threaded: Whether or not to run trainers in a separate thread. Disable for testing/debugging.
"""
self.trainers: Dict[str, Trainer] = {}

1
ml-agents/setup.py


]
},
cmdclass={"verify": VerifyVersionCommand},
extras_require={"tensorflow": ["tensorflow>=1.14,<3.0", "six>=1.12.0"]},
)

3
ml-agents/tests/yamato/yamato_utils.py


pip_commands = [
"--upgrade pip",
"--upgrade setuptools",
# TODO build these and publish to internal pypi
"~/tensorflow_pkg/tensorflow-2.0.0-cp37-cp37m-macosx_10_14_x86_64.whl",
"tf2onnx==1.6.1",
]
if mlagents_python_version:
# install from pypi

4
test_requirements.txt


pytest-cov==2.6.1
pytest-xdist==1.34.0
# Tensorflow tests are here for the time being, before they are used in the codebase.
tensorflow>=1.14,<3.0
tf2onnx>=1.5.5

8
test_constraints_min_version.txt


# pip constraints to use the *lowest* versions allowed in ml-agents/setup.py
grpcio==1.11.0
numpy==1.14.1
Pillow==4.2.1
protobuf==3.6
tensorflow==1.14.0
h5py==2.9.0
tensorboard==1.15.0

6
test_constraints_max_tf2_version.txt


# pip constraints to use the *highest* versions allowed in ml-agents/setup.py
# For projects with upper bounds, we should periodically update this list to the latest release version
grpcio>=1.23.0
numpy>=1.17.2
tensorflow==2.3.0
h5py>=2.10.0

7
test_constraints_max_tf1_version.txt


# pip constraints to use the *highest* versions allowed in ml-agents/setup.py
# with the exception of tensorflow, which is constrained to <2
# For projects with upper bounds, we should periodically update this list to the latest release version
grpcio>=1.23.0
numpy>=1.17.2
tensorflow>=1.15.2,<2.0.0
h5py>=2.10.0
正在加载...
取消
保存