浏览代码

Merge remote-tracking branch 'origin/master' into release_1_to_master

/release_1_branch
Chris Elion 5 年前
当前提交
68b68396
共有 107 个文件被更改,包括 1826 次插入293 次删除
  1. 4
      .gitignore
  2. 20
      com.unity.ml-agents/CHANGELOG.md
  3. 4
      com.unity.ml-agents/Editor/BehaviorParametersEditor.cs
  4. 2
      com.unity.ml-agents/Runtime/Agent.cs
  5. 2
      com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs
  6. 4
      com.unity.ml-agents/Runtime/Inference/GeneratorImpl.cs
  7. 2
      com.unity.ml-agents/Runtime/Inference/TensorApplier.cs
  8. 2
      com.unity.ml-agents/Runtime/Inference/TensorGenerator.cs
  9. 2
      com.unity.ml-agents/Runtime/Inference/TensorProxy.cs
  10. 2
      com.unity.ml-agents/Runtime/Sensors/ObservationWriter.cs
  11. 2
      com.unity.ml-agents/Tests/Editor/EditModeTestInternalBrainTensorApplier.cs
  12. 4
      com.unity.ml-agents/Tests/Editor/ModelRunnerTest.cs
  13. 4
      com.unity.ml-agents/Tests/Editor/ParameterLoaderTest.cs
  14. 2
      com.unity.ml-agents/Tests/Editor/Sensor/ObservationWriterTests.cs
  15. 4
      com.unity.ml-agents/Tests/Editor/TensorUtilsTest.cs
  16. 15
      docs/Getting-Started.md
  17. 43
      docs/Learning-Environment-Create-New.md
  18. 4
      docs/Learning-Environment-Examples.md
  19. 13
      docs/Learning-Environment-Executable.md
  20. 19
      docs/Migrating.md
  21. 2
      docs/Using-Tensorboard.md
  22. 3
      gym-unity/README.md
  23. 4
      gym-unity/gym_unity/__init__.py
  24. 4
      ml-agents-envs/mlagents_envs/__init__.py
  25. 30
      ml-agents-envs/mlagents_envs/environment.py
  26. 12
      ml-agents-envs/mlagents_envs/tests/test_envs.py
  27. 4
      ml-agents/mlagents/trainers/__init__.py
  28. 169
      ml-agents/mlagents/trainers/learn.py
  29. 5
      ml-agents/mlagents/trainers/policy/tf_policy.py
  30. 3
      ml-agents/mlagents/trainers/ppo/trainer.py
  31. 7
      ml-agents/mlagents/trainers/sac/trainer.py
  32. 6
      ml-agents/mlagents/trainers/tests/test_barracuda_converter.py
  33. 2
      ml-agents/mlagents/trainers/tests/test_bcmodule.py
  34. 9
      ml-agents/mlagents/trainers/tests/test_ghost.py
  35. 133
      ml-agents/mlagents/trainers/tests/test_learn.py
  36. 8
      ml-agents/mlagents/trainers/tests/test_nn_policy.py
  37. 2
      ml-agents/mlagents/trainers/tests/test_policy.py
  38. 9
      ml-agents/mlagents/trainers/tests/test_ppo.py
  39. 2
      ml-agents/mlagents/trainers/tests/test_reward_signals.py
  40. 2
      ml-agents/mlagents/trainers/tests/test_rl_trainer.py
  41. 14
      ml-agents/mlagents/trainers/tests/test_sac.py
  42. 6
      ml-agents/mlagents/trainers/tests/test_simple_rl.py
  43. 6
      ml-agents/mlagents/trainers/tests/test_trainer_controller.py
  44. 101
      ml-agents/mlagents/trainers/tests/test_trainer_util.py
  45. 3
      ml-agents/mlagents/trainers/trainer/trainer.py
  46. 20
      ml-agents/mlagents/trainers/trainer_controller.py
  47. 55
      ml-agents/mlagents/trainers/trainer_util.py
  48. 17
      ml-agents/tests/yamato/scripts/run_llapi.py
  49. 4
      ml-agents/tests/yamato/training_int_tests.py
  50. 3
      ml-agents/tests/yamato/yamato_utils.py
  51. 41
      ml-agents/mlagents/trainers/cli_utils.py
  52. 29
      config/imitation/CrawlerStatic.yaml
  53. 29
      config/imitation/FoodCollector.yaml
  54. 28
      config/imitation/Hallway.yaml
  55. 25
      config/imitation/PushBlock.yaml
  56. 36
      config/imitation/Pyramids.yaml
  57. 25
      config/ppo/3DBall.yaml
  58. 25
      config/ppo/3DBallHard.yaml
  59. 40
      config/ppo/3DBall_randomize.yaml
  60. 25
      config/ppo/Basic.yaml
  61. 25
      config/ppo/Bouncer.yaml
  62. 25
      config/ppo/CrawlerDynamic.yaml
  63. 25
      config/ppo/CrawlerStatic.yaml
  64. 25
      config/ppo/FoodCollector.yaml
  65. 25
      config/ppo/GridWorld.yaml
  66. 25
      config/ppo/Hallway.yaml
  67. 25
      config/ppo/PushBlock.yaml
  68. 29
      config/ppo/Pyramids.yaml
  69. 25
      config/ppo/Reacher.yaml
  70. 38
      config/ppo/SoccerTwos.yaml
  71. 62
      config/ppo/StrikersVsGoalie.yaml
  72. 31
      config/ppo/Tennis.yaml
  73. 25
      config/ppo/VisualHallway.yaml
  74. 25
      config/ppo/VisualPushBlock.yaml
  75. 29
      config/ppo/VisualPyramids.yaml
  76. 25
      config/ppo/Walker.yaml
  77. 50
      config/ppo/WallJump.yaml
  78. 65
      config/ppo/WallJump_curriculum.yaml
  79. 25
      config/ppo/WormDynamic.yaml
  80. 25
      config/ppo/WormStatic.yaml
  81. 25
      config/sac/3DBall.yaml
  82. 25
      config/sac/3DBallHard.yaml
  83. 25
      config/sac/Basic.yaml
  84. 25
      config/sac/Bouncer.yaml
  85. 25
      config/sac/CrawlerDynamic.yaml
  86. 25
      config/sac/CrawlerStatic.yaml
  87. 25
      config/sac/FoodCollector.yaml
  88. 25
      config/sac/GridWorld.yaml
  89. 25
      config/sac/Hallway.yaml
  90. 25
      config/sac/PushBlock.yaml
  91. 31
      config/sac/Pyramids.yaml
  92. 25
      config/sac/Reacher.yaml
  93. 30
      config/sac/Tennis.yaml
  94. 26
      config/sac/VisualHallway.yaml
  95. 26
      config/sac/VisualPushBlock.yaml

4
.gitignore


# Tensorflow Model Info
# Output Artifacts (Legacy)
# Output Artifacts
/results
# Training environments
/envs

20
com.unity.ml-agents/CHANGELOG.md


and this project adheres to
[Semantic Versioning](http://semver.org/spec/v2.0.0.html).
## [1.0.0-preview] - 2020-05-06
## [Unreleased]
### Major Changes
#### com.unity.ml-agents (C#)
#### ml-agents / ml-agents-envs / gym-unity (Python)
### Minor Changes
#### com.unity.ml-agents (C#)
#### ml-agents / ml-agents-envs / gym-unity (Python)
- Curriculum and Parameter Randomization configurations have been merged
into the main training configuration file. Note that this means training
configuration files are now environment-specific. (#3791)
- Training artifacts (trained models, summaries) are now found in the `results/`
directory. (#3829)
- Unity Player logs are now written out to the results directory. (#3877)
- Run configuration YAML files are written out to the results directory at the end of the run. (#3815)
### Bug Fixes
#### com.unity.ml-agents (C#)
#### ml-agents / ml-agents-envs / gym-unity (Python)
## [1.0.0-preview] - 2020-04-30
### Major Changes

4
com.unity.ml-agents/Editor/BehaviorParametersEditor.cs


using Unity.MLAgents.Sensors;
using Unity.MLAgents.Policies;
using Unity.MLAgents.Policies;
using Unity.MLAgents.Sensors;
using UnityEngine;
namespace Unity.MLAgents.Editor

2
com.unity.ml-agents/Runtime/Agent.cs


using System.Collections.Generic;
using System.Collections.ObjectModel;
using UnityEngine;
using Unity.Barracuda;
using Unity.Barracuda;
using UnityEngine.Serialization;
namespace Unity.MLAgents

2
com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs


using System;
using System.Collections.Generic;
using System.Linq;
using Unity.Barracuda;
using Unity.Barracuda;
namespace Unity.MLAgents.Inference
{

4
com.unity.ml-agents/Runtime/Inference/GeneratorImpl.cs


using System.Collections.Generic;
using System;
using Unity.MLAgents.Inference.Utils;
using Unity.Barracuda;
using Unity.Barracuda;
using Unity.MLAgents.Inference.Utils;
using Unity.MLAgents.Sensors;
namespace Unity.MLAgents.Inference

2
com.unity.ml-agents/Runtime/Inference/TensorApplier.cs


using System.Collections.Generic;
using Unity.MLAgents.Policies;
using Unity.MLAgents.Policies;
namespace Unity.MLAgents.Inference
{

2
com.unity.ml-agents/Runtime/Inference/TensorGenerator.cs


using System.Collections.Generic;
using Unity.MLAgents.Sensors;
using Unity.MLAgents.Sensors;
namespace Unity.MLAgents.Inference
{

2
com.unity.ml-agents/Runtime/Inference/TensorProxy.cs


using System;
using System.Collections.Generic;
using Unity.MLAgents.Inference.Utils;
using Unity.MLAgents.Inference.Utils;
namespace Unity.MLAgents.Inference
{

2
com.unity.ml-agents/Runtime/Sensors/ObservationWriter.cs


using System;
using System.Collections.Generic;
using Unity.MLAgents.Inference;
using Unity.MLAgents.Inference;
namespace Unity.MLAgents.Sensors
{

2
com.unity.ml-agents/Tests/Editor/EditModeTestInternalBrainTensorApplier.cs


using System.Collections.Generic;
using NUnit.Framework;
using Unity.Barracuda;
using Unity.Barracuda;
namespace Unity.MLAgents.Tests
{

4
com.unity.ml-agents/Tests/Editor/ModelRunnerTest.cs


using System.Linq;
using Unity.Barracuda;
using Unity.Barracuda;
using System.Linq;
using Unity.MLAgents.Policies;
namespace Unity.MLAgents.Tests

4
com.unity.ml-agents/Tests/Editor/ParameterLoaderTest.cs


using System.Linq;
using Unity.Barracuda;
using Unity.Barracuda;
using System.Linq;
using Unity.MLAgents.Policies;
namespace Unity.MLAgents.Tests

2
com.unity.ml-agents/Tests/Editor/Sensor/ObservationWriterTests.cs


using NUnit.Framework;
using Unity.Barracuda;
using Unity.Barracuda;
namespace Unity.MLAgents.Tests

4
com.unity.ml-agents/Tests/Editor/TensorUtilsTest.cs


using System;
using NUnit.Framework;
using Unity.Barracuda;
using Unity.Barracuda;
using NUnit.Framework;
namespace Unity.MLAgents.Tests
{

15
docs/Getting-Started.md


1. Navigate to the folder where you cloned the `ml-agents` repository. **Note**:
If you followed the default [installation](Installation.md), then you should
be able to run `mlagents-learn` from any directory.
1. Run `mlagents-learn config/trainer_config.yaml --run-id=first3DBallRun`.
- `config/trainer_config.yaml` is the path to a default training
configuration file that we provide. In includes training configurations for
all our example environments, including 3DBall.
1. Run `mlagents-learn config/ppo/3DBall.yaml --run-id=first3DBallRun`.
- `config/ppo/3DBall.yaml` is the path to a default training
configuration file that we provide. The `config/ppo` folder includes training configuration
files for all our example environments, including 3DBall.
- `run-id` is a unique name for this training session.
1. When the message _"Start training by pressing the Play button in the Unity
Editor"_ is displayed on the screen, you can press the **Play** button in

sequence_length: 64
summary_freq: 1000
use_recurrent: False
summary_path: ./summaries/first3DBallRun
model_path: ./models/first3DBallRun/3DBallLearning
output_path: ./results/first3DBallRun/3DBallLearning
INFO:mlagents.trainers: first3DBallRun: 3DBallLearning: Step: 1000. Mean Reward: 1.242. Std of Reward: 0.746. Training.
INFO:mlagents.trainers: first3DBallRun: 3DBallLearning: Step: 2000. Mean Reward: 1.319. Std of Reward: 0.693. Training.
INFO:mlagents.trainers: first3DBallRun: 3DBallLearning: Step: 3000. Mean Reward: 1.804. Std of Reward: 1.056. Training.

run the same command again, appending the `--resume` flag:
```sh
mlagents-learn config/trainer_config.yaml --run-id=first3DBallRun --resume
mlagents-learn config/ppo/3DBall.yaml --run-id=firstRun --resume
Your trained model will be at `models/<run-identifier>/<behavior_name>.nn` where
Your trained model will be at `results/<run-identifier>/<behavior_name>.nn` where
`<behavior_name>` is the name of the `Behavior Name` of the agents corresponding
to the model. This file corresponds to your model's latest checkpoint. You can
now embed this trained model into your Agents by following the steps below,

43
docs/Learning-Environment-Create-New.md


and include the following hyperparameter values:
```yml
RollerBall:
trainer: ppo
batch_size: 10
beta: 5.0e-3
buffer_size: 100
epsilon: 0.2
hidden_units: 128
lambd: 0.95
learning_rate: 3.0e-4
learning_rate_schedule: linear
max_steps: 5.0e4
normalize: false
num_epoch: 3
num_layers: 2
time_horizon: 64
summary_freq: 10000
use_recurrent: false
reward_signals:
extrinsic:
strength: 1.0
gamma: 0.99
behaviors:
RollerBall:
trainer: ppo
batch_size: 10
beta: 5.0e-3
buffer_size: 100
epsilon: 0.2
hidden_units: 128
lambd: 0.95
learning_rate: 3.0e-4
learning_rate_schedule: linear
max_steps: 5.0e4
normalize: false
num_epoch: 3
num_layers: 2
time_horizon: 64
summary_freq: 10000
use_recurrent: false
reward_signals:
extrinsic:
strength: 1.0
gamma: 0.99
```
Since this example creates a very simple training environment with only a few

4
docs/Learning-Environment-Examples.md


does not train with the provided default training parameters.**
- Float Properties: None
- Benchmark Mean Reward: 0.7
- To speed up training, you can enable curiosity by adding the `curiosity`
reward signal in `config/trainer_config.yaml`
- To train this environment, you can enable curiosity by adding the `curiosity` reward signal
in `config/ppo/Hallway.yaml`
## Bouncer

13
docs/Learning-Environment-Executable.md


the directory where you installed the ML-Agents Toolkit, run:
```sh
mlagents-learn ../config/trainer_config.yaml --env=3DBall --run-id=firstRun
mlagents-learn ../config/ppo/3DBall.yaml --env=3DBall --run-id=firstRun
ml-agents$ mlagents-learn config/trainer_config.yaml --env=3DBall --run-id=first-run
ml-agents$ mlagents-learn config/ppo/3DBall.yaml --env=3DBall --run-id=first-run
▄▄▄▓▓▓▓

sequence_length: 64
summary_freq: 1000
use_recurrent: False
summary_path: ./summaries/first-run-0
model_path: ./models/first-run-0/Ball3DLearning
output_path: ./results/first-run-0/Ball3DLearning
INFO:mlagents.trainers: first-run-0: Ball3DLearning: Step: 1000. Mean Reward: 1.242. Std of Reward: 0.746. Training.
INFO:mlagents.trainers: first-run-0: Ball3DLearning: Step: 2000. Mean Reward: 1.319. Std of Reward: 0.693. Training.
INFO:mlagents.trainers: first-run-0: Ball3DLearning: Step: 3000. Mean Reward: 1.804. Std of Reward: 1.056. Training.

INFO:mlagents.trainers: first-run-0: Ball3DLearning: Step: 10000. Mean Reward: 27.284. Std of Reward: 28.667. Training.
```
You can press `Ctrl+C` to stop the training, and your trained model will be at
`models/<run-identifier>/<behavior_name>.nn`, which corresponds to your model's
You can press Ctrl+C to stop the training, and your trained model will be at
`results/<run-identifier>/<behavior_name>.nn`, which corresponds to your model's
trainer_config.yaml.) You can now embed this trained model into your Agent by
your config YAML.) You can now embed this trained model into your Agent by
following the steps below:
1. Move your model file into

19
docs/Migrating.md


## Migrating from Release 1 to latest
### Important changes
- Training artifacts (trained models, summaries) are now found under `results/`
instead of `summaries/` and `models/`.
- Trainer configuration, curriculum configuration, and parameter randomization
configuration have all been moved to a single YAML file. (#3791)
- Before upgrading, copy your `Behavior Name` sections from `trainer_config.yaml` into
a separate trainer configuration file, under a `behaviors` section. You can move the `default` section too
if it's being used. This file should be specific to your environment, and not contain configurations for
multiple environments (unless they have the same Behavior Names).
- If your training uses [curriculum](Training-Curriculum-Learning.md), move those configurations under
the `Behavior Name` section.
- If your training uses [parameter randomization](Training-Environment-Parameter-Randomization.md), move
the contents of the sampler config to `parameter_randomization` in the main trainer configuration.
## Migrating from 0.15 to Release 1

longer takes a file name as input but a fully constructed `UnityEnvironment`
instead.
- Update uses of "camelCase" fields and properties to "PascalCase".
- If you have a custom `ISensor` implementation, you will need to change the
signature of its `Write()` method to use `ObservationWriter` instead of
`WriteAdapter`.
## Migrating from 0.14 to 0.15

- Multiply `max_steps` and `summary_freq` in your `trainer_config.yaml` by the
number of Agents in the scene.
- Combine curriculum configs into a single file. See
[the WallJump curricula](../config/curricula/wall_jump.yaml) for an example of
[the WallJump curricula](https://github.com/Unity-Technologies/ml-agents/blob/0.14.1/config/curricula/wall_jump.yaml) for an example of
the new curriculum config format. A tool like https://www.json2yaml.com may be
useful to help with the conversion.
- If you have a model trained which uses RayPerceptionSensor and has non-1.0

- It is now required to specify the path to the yaml trainer configuration file
when running `mlagents-learn`. For an example trainer configuration file, see
[trainer_config.yaml](../config/trainer_config.yaml). An example of passing a
[trainer_config.yaml](https://github.com/Unity-Technologies/ml-agents/blob/0.5.0a/config/trainer_config.yaml). An example of passing a
trainer configuration to `mlagents-learn` is shown above.
- The environment name is now passed through the `--env` option.
- Curriculum learning has been changed. In summary:

2
docs/Using-Tensorboard.md


1. Open a terminal or console window:
1. Navigate to the directory where the ML-Agents Toolkit is installed.
1. From the command line run: `tensorboard --logdir=summaries --port=6006`
1. From the command line run: `tensorboard --logdir=results --port=6006`
1. Open a browser window and navigate to
[localhost:6006](http://localhost:6006).

3
gym-unity/README.md


We provide results from our PPO implementation and the DQN from Baselines as
reference. Note that all runs used the same greyscale GridWorld as Dopamine. For
PPO, `num_layers` was set to 2, and all other hyperparameters are the default
for GridWorld in `trainer_config.yaml`. For Baselines DQN, the provided
for GridWorld in `config/ppo/GridWorld.yaml`. For Baselines DQN, the provided
![Dopamine on GridWorld](images/dopamine_gridworld_plot.png)

4
gym-unity/gym_unity/__init__.py


# Version of the library that will be used to upload to pypi
__version__ = "0.16.0"
__version__ = "0.17.0.dev0"
__release_tag__ = "release_1"
__release_tag__ = None

4
ml-agents-envs/mlagents_envs/__init__.py


# Version of the library that will be used to upload to pypi
__version__ = "0.16.0"
__version__ = "0.17.0.dev0"
__release_tag__ = "release_1"
__release_tag__ = None

30
ml-agents-envs/mlagents_envs/environment.py


seed: int = 0,
no_graphics: bool = False,
timeout_wait: int = 60,
args: Optional[List[str]] = None,
additional_args: Optional[List[str]] = None,
log_folder: Optional[str] = None,
):
"""
Starts a new unity environment and establishes a connection with the environment.

:int timeout_wait: Time (in seconds) to wait for connection from environment.
:list args: Addition Unity command line arguments
:list side_channels: Additional side channel for no-rl communication with Unity
:str log_folder: Optional folder to write the Unity Player log file into. Requires absolute path.
args = args or []
self.additional_args = additional_args or []
self.no_graphics = no_graphics
# If base port is not specified, use BASE_ENVIRONMENT_PORT if we have
# an environment, otherwise DEFAULT_EDITOR_PORT
if base_port is None:

)
)
self.side_channels[_sc.channel_id] = _sc
self.log_folder = log_folder
# If the environment name is None, a new environment will not be launched
# and the communicator will directly try to connect to an existing unity environment.

"the worker-id must be 0 in order to connect with the Editor."
)
if file_name is not None:
self.executable_launcher(file_name, no_graphics, args)
self.executable_launcher(file_name, no_graphics, additional_args)
else:
logger.info(
f"Listening on port {self.port}. "

launch_string = candidates[0]
return launch_string
def executable_args(self) -> List[str]:
args: List[str] = []
if self.no_graphics:
args += ["-nographics", "-batchmode"]
args += [UnityEnvironment.PORT_COMMAND_LINE_ARG, str(self.port)]
if self.log_folder:
log_file_path = os.path.join(
self.log_folder, f"Player-{self.worker_id}.log"
)
args += ["-logFile", log_file_path]
# Add in arguments passed explicitly by the user.
args += self.additional_args
return args
def executable_launcher(self, file_name, no_graphics, args):
launch_string = self.validate_environment_path(file_name)
if launch_string is None:

else:
logger.debug("This is the launch string {}".format(launch_string))
# Launch Unity environment
subprocess_args = [launch_string]
if no_graphics:
subprocess_args += ["-nographics", "-batchmode"]
subprocess_args += [UnityEnvironment.PORT_COMMAND_LINE_ARG, str(self.port)]
subprocess_args += args
subprocess_args = [launch_string] + self.executable_args()
try:
self.proc1 = subprocess.Popen(
subprocess_args,

12
ml-agents-envs/mlagents_envs/tests/test_envs.py


@mock.patch("mlagents_envs.environment.UnityEnvironment.executable_launcher")
@mock.patch("mlagents_envs.environment.UnityEnvironment.get_communicator")
def test_log_file_path_is_set(mock_communicator, mock_launcher):
mock_communicator.return_value = MockCommunicator()
env = UnityEnvironment(
file_name="myfile", worker_id=0, log_folder="./some-log-folder-path"
)
args = env.executable_args()
log_file_index = args.index("-logFile")
assert args[log_file_index + 1] == "./some-log-folder-path/Player-0.log"
@mock.patch("mlagents_envs.environment.UnityEnvironment.executable_launcher")
@mock.patch("mlagents_envs.environment.UnityEnvironment.get_communicator")
def test_reset(mock_communicator, mock_launcher):
mock_communicator.return_value = MockCommunicator(
discrete_action=False, visual_inputs=0

4
ml-agents/mlagents/trainers/__init__.py


# Version of the library that will be used to upload to pypi
__version__ = "0.16.0"
__version__ = "0.17.0.dev0"
__release_tag__ = "release_1"
__release_tag__ = None

169
ml-agents/mlagents/trainers/learn.py


# # Unity ML-Agents Toolkit
import argparse
import yaml
import os
import numpy as np

load_config,
TrainerFactory,
handle_existing_directories,
assemble_curriculum_config,
)
from mlagents.trainers.stats import (
TensorboardWriter,

ConsoleWriter,
)
from mlagents.trainers.cli_utils import (
StoreConfigFile,
DetectDefault,
DetectDefaultStoreTrue,
)
from mlagents.trainers.exception import SamplerException
from mlagents.trainers.exception import SamplerException, TrainerConfigError
from mlagents_envs.base_env import BaseEnv
from mlagents.trainers.subprocess_env_manager import SubprocessEnvManager
from mlagents_envs.side_channel.side_channel import SideChannel

argparser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
argparser.add_argument("trainer_config_path")
argparser.add_argument("trainer_config_path", action=StoreConfigFile)
)
argparser.add_argument(
"--curriculum",
default=None,
dest="curriculum_config_path",
help="YAML file for defining the lessons for curriculum training",
action=DetectDefault,
)
argparser.add_argument(
"--lesson",

)
argparser.add_argument(
"--sampler",
default=None,
dest="sampler_file_path",
help="YAML file for defining the sampler for environment parameter randomization",
action=DetectDefault,
)
argparser.add_argument(
"--keep-checkpoints",

"number of steps specified by the save-freq option. Once the maximum number of checkpoints"
"has been reached, the oldest checkpoint is deleted when saving a new checkpoint.",
action=DetectDefault,
action="store_true",
action=DetectDefaultStoreTrue,
help=argparse.SUPPRESS, # Deprecated but still usable for now.
)
argparser.add_argument(

action="store_true",
action=DetectDefaultStoreTrue,
help="Whether to resume training from a checkpoint. Specify a --run-id to use this option. "
"If set, the training code loads an already trained model to initialize the neural network "
"before resuming training. This option is only valid when the models exist, and have the same "

"--force",
default=False,
dest="force",
action="store_true",
action=DetectDefaultStoreTrue,
help="Whether to force-overwrite this run-id's existing summary and model data. (Without "
"this flag, attempting to train a model with a run-id that has been used before will throw "
"an error.",

"as the saved model itself. If you use TensorBoard to view the training statistics, "
"always set a unique run-id for each training run. (The statistics for all runs with the "
"same id are combined as if they were produced by a the same session.)",
action=DetectDefault,
)
argparser.add_argument(
"--initialize-from",

"This can be used, for instance, to fine-tune an existing model on a new environment. "
"Note that the previously saved models must have the same behavior parameters as your "
"current environment.",
action=DetectDefault,
)
argparser.add_argument(
"--save-freq",

action=DetectDefault,
)
argparser.add_argument(
"--seed",

action=DetectDefault,
action="store_true",
action=DetectDefaultStoreTrue,
help=argparse.SUPPRESS,
)
argparser.add_argument(

action="store_true",
action=DetectDefaultStoreTrue,
help="Whether to run in Python inference mode (i.e. no training). Use with --resume to load "
"a model trained with an existing run ID.",
)

"will use the port (base_port + worker_id), where the worker_id is sequential IDs given to "
"each instance from 0 to (num_envs - 1). Note that when training using the Editor rather "
"than an executable, the base port will be ignored.",
action=DetectDefault,
)
argparser.add_argument(
"--num-envs",

"from when training",
action=DetectDefault,
action="store_true",
action=DetectDefaultStoreTrue,
help="Whether to run the Unity executable in no-graphics mode (i.e. without initializing "
"the graphics driver. Use this only if your agents don't use visual observations.",
)

action="store_true",
action=DetectDefaultStoreTrue,
help="Whether to enable debug-level logging for some parts of the code",
)
argparser.add_argument(

"process these as Unity Command Line Arguments. You should choose different argument names if "
"you want to create environment-specific arguments. All arguments after this flag will be "
"passed to the executable.",
action=DetectDefault,
action="store_true",
action=DetectDefaultStoreTrue,
help="Forces training using CPU only",
)

type=int,
help="The width of the executable window of the environment(s) in pixels "
"(ignored for editor training).",
action=DetectDefault,
)
eng_conf.add_argument(
"--height",

"(ignored for editor training)",
action=DetectDefault,
)
eng_conf.add_argument(
"--quality-level",

"QualitySettings.SetQualityLevel in Unity.",
action=DetectDefault,
)
eng_conf.add_argument(
"--time-scale",

"Time.timeScale in Unity.",
action=DetectDefault,
)
eng_conf.add_argument(
"--target-frame-rate",

"Application.targetFrameRate in Unity.",
action=DetectDefault,
)
eng_conf.add_argument(
"--capture-frame-rate",

"Time.captureFramerate in Unity.",
action=DetectDefault,
)
return argparser

class RunOptions(NamedTuple):
trainer_config: Dict
behaviors: Dict
debug: bool = parser.get_default("debug")
seed: int = parser.get_default("seed")
env_path: Optional[str] = parser.get_default("env_path")

lesson: int = parser.get_default("lesson")
no_graphics: bool = parser.get_default("no_graphics")
multi_gpu: bool = parser.get_default("multi_gpu")
sampler_config: Optional[Dict] = None
parameter_randomization: Optional[Dict] = None
env_args: Optional[List[str]] = parser.get_default("env_args")
cpu: bool = parser.get_default("cpu")
width: int = parser.get_default("width")

configs loaded from files.
"""
argparse_args = vars(args)
trainer_config_path = argparse_args["trainer_config_path"]
curriculum_config_path = argparse_args["curriculum_config_path"]
argparse_args["trainer_config"] = load_config(trainer_config_path)
if curriculum_config_path is not None:
argparse_args["curriculum_config"] = load_config(curriculum_config_path)
if argparse_args["sampler_file_path"] is not None:
argparse_args["sampler_config"] = load_config(
argparse_args["sampler_file_path"]
run_options_dict = {}
run_options_dict.update(argparse_args)
config_path = StoreConfigFile.trainer_config_path
# Load YAML
yaml_config = load_config(config_path)
# This is the only option that is not optional and has no defaults.
if "behaviors" not in yaml_config:
raise TrainerConfigError(
"Trainer configurations not found. Make sure your YAML file has a section for behaviors."
# Use the YAML file values for all values not specified in the CLI.
for key, val in yaml_config.items():
# Detect bad config options
if not hasattr(RunOptions, key):
raise TrainerConfigError(
"The option {} was specified in your YAML file, but is invalid.".format(
key
)
)
if key not in DetectDefault.non_default_args:
run_options_dict[key] = val
argparse_args["resume"] = argparse_args["resume"] or argparse_args["load_model"]
# Since argparse accepts file paths in the config options which don't exist in CommandLineOptions,
# these keys will need to be deleted to use the **/splat operator below.
argparse_args.pop("sampler_file_path")
argparse_args.pop("curriculum_config_path")
argparse_args.pop("trainer_config_path")
return RunOptions(**vars(args))
run_options_dict["resume"] = (
run_options_dict["resume"] or run_options_dict["load_model"]
)
return RunOptions(**run_options_dict)
def get_version_string() -> str:

:param run_options: Command line arguments for training.
"""
with hierarchical_timer("run_training.setup"):
model_path = f"./models/{options.run_id}"
base_path = "results"
write_path = os.path.join(base_path, options.run_id)
f"./models/{options.initialize_from}" if options.initialize_from else None
os.path.join(base_path, options.run_id) if options.initialize_from else None
summaries_dir = "./summaries"
run_logs_dir = os.path.join(write_path, "run_logs")
# Check if directory exists
handle_existing_directories(
write_path, options.resume, options.force, maybe_init_path
)
# Make run logs directory
os.makedirs(run_logs_dir, exist_ok=True)
summaries_dir,
write_path,
handle_existing_directories(
model_path, summaries_dir, options.resume, options.force, maybe_init_path
)
tb_writer = TensorboardWriter(summaries_dir, clear_past_data=not options.resume)
tb_writer = TensorboardWriter(write_path, clear_past_data=not options.resume)
gauge_write = GaugeWriter()
console_writer = ConsoleWriter()
StatsReporter.add_writer(tb_writer)

if options.env_path is None:
port = UnityEnvironment.DEFAULT_EDITOR_PORT
env_factory = create_environment_factory(
options.env_path, options.no_graphics, run_seed, port, options.env_args
options.env_path,
options.no_graphics,
run_seed,
port,
options.env_args,
os.path.abspath(run_logs_dir), # Unity environment requires absolute path
)
engine_config = EngineConfig(
width=options.width,

capture_frame_rate=options.capture_frame_rate,
)
env_manager = SubprocessEnvManager(env_factory, engine_config, options.num_envs)
curriculum_config = assemble_curriculum_config(options.behaviors)
options.curriculum_config, env_manager, options.lesson
curriculum_config, env_manager, options.lesson
options.sampler_config, run_seed
options.parameter_randomization, run_seed
options.trainer_config,
summaries_dir,
options.behaviors,
model_path,
write_path,
options.keep_checkpoints,
not options.inference,
options.resume,

# Create controller and begin training.
tc = TrainerController(
trainer_factory,
model_path,
summaries_dir,
write_path,
options.run_id,
options.save_freq,
maybe_meta_curriculum,

tc.start_learning(env_manager)
finally:
env_manager.close()
write_timing_tree(summaries_dir, options.run_id)
write_run_options(write_path, options)
write_timing_tree(run_logs_dir)
def write_timing_tree(summaries_dir: str, run_id: str) -> None:
timing_path = f"{summaries_dir}/{run_id}_timers.json"
def write_run_options(output_dir: str, run_options: RunOptions) -> None:
run_options_path = os.path.join(output_dir, "configuration.yaml")
try:
with open(run_options_path, "w") as f:
try:
yaml.dump(dict(run_options._asdict()), f, sort_keys=False)
except TypeError: # Older versions of pyyaml don't support sort_keys
yaml.dump(dict(run_options._asdict()), f)
except FileNotFoundError:
logger.warning(
f"Unable to save configuration to {run_options_path}. Make sure the directory exists"
)
def write_timing_tree(output_dir: str) -> None:
timing_path = os.path.join(output_dir, "timers.json")
try:
with open(timing_path, "w") as f:
json.dump(get_timer_tree(), f, indent=4)

def try_create_meta_curriculum(
curriculum_config: Optional[Dict], env: SubprocessEnvManager, lesson: int
) -> Optional[MetaCurriculum]:
if curriculum_config is None:
if curriculum_config is None or len(curriculum_config) <= 0:
return None
else:
meta_curriculum = MetaCurriculum(curriculum_config)

seed: int,
start_port: int,
env_args: Optional[List[str]],
log_folder: str,
) -> Callable[[int, List[SideChannel]], BaseEnv]:
if env_path is not None:
launch_string = UnityEnvironment.validate_environment_path(env_path)

seed=env_seed,
no_graphics=no_graphics,
base_port=start_port,
args=env_args,
additional_args=env_args,
log_folder=log_folder,
)
return create_unity_environment

5
ml-agents/mlagents/trainers/policy/tf_policy.py


from typing import Any, Dict, List, Optional
import abc
import os
import numpy as np
from mlagents.tf_utils import tf
from mlagents import tf_utils

self.use_continuous_act = brain.vector_action_space_type == "continuous"
if self.use_continuous_act:
self.num_branches = self.brain.vector_action_space_size[0]
self.model_path = trainer_parameters["model_path"]
self.model_path = trainer_parameters["output_path"]
self.initialize_path = trainer_parameters.get("init_path", None)
self.keep_checkpoints = trainer_parameters.get("keep_checkpoints", 5)
self.graph = tf.Graph()

:return:
"""
with self.graph.as_default():
last_checkpoint = self.model_path + "/model-" + str(steps) + ".ckpt"
last_checkpoint = os.path.join(self.model_path, f"model-{steps}.ckpt")
self.saver.save(self.sess, last_checkpoint)
tf.train.write_graph(
self.graph, self.model_path, "raw_graph_def.pb", as_text=False

3
ml-agents/mlagents/trainers/ppo/trainer.py


"sequence_length",
"summary_freq",
"use_recurrent",
"summary_path",
"model_path",
"output_path",
"reward_signals",
]
self._check_param_keys()

7
ml-agents/mlagents/trainers/sac/trainer.py


"summary_freq",
"tau",
"use_recurrent",
"summary_path",
"model_path",
"output_path",
"reward_signals",
]

Save the training buffer's update buffer to a pickle file.
"""
filename = os.path.join(
self.trainer_parameters["model_path"], "last_replay_buffer.hdf5"
self.trainer_parameters["output_path"], "last_replay_buffer.hdf5"
)
logger.info("Saving Experience Replay Buffer to {}".format(filename))
with open(filename, "wb") as file_object:

Loads the last saved replay buffer from a file.
"""
filename = os.path.join(
self.trainer_parameters["model_path"], "last_replay_buffer.hdf5"
self.trainer_parameters["output_path"], "last_replay_buffer.hdf5"
)
logger.info("Loading Experience Replay Buffer from {}".format(filename))
with open(filename, "rb+") as file_object:

6
ml-agents/mlagents/trainers/tests/test_barracuda_converter.py


memory_size: 8
curiosity_strength: 0.0
curiosity_enc_size: 1
summary_path: test
model_path: test
output_path: test
reward_signals:
extrinsic:
strength: 1.0

@pytest.mark.parametrize("rnn", [True, False], ids=["rnn", "no_rnn"])
def test_policy_conversion(dummy_config, tmpdir, rnn, visual, discrete):
tf.reset_default_graph()
dummy_config["summary_path"] = str(tmpdir)
dummy_config["model_path"] = os.path.join(tmpdir, "test")
dummy_config["output_path"] = os.path.join(tmpdir, "test")
policy = create_policy_mock(
dummy_config, use_rnn=rnn, use_discrete=discrete, use_visual=visual
)

2
ml-agents/mlagents/trainers/tests/test_bcmodule.py


def create_bc_module(mock_brain, trainer_config, use_rnn, demo_file, tanhresample):
# model_path = env.external_brain_names[0]
trainer_config["model_path"] = "testpath"
trainer_config["output_path"] = "testpath"
trainer_config["keep_checkpoints"] = 3
trainer_config["use_recurrent"] = use_rnn
trainer_config["behavioral_cloning"]["demo_path"] = (

9
ml-agents/mlagents/trainers/tests/test_ghost.py


memory_size: 8
curiosity_strength: 0.0
curiosity_enc_size: 1
summary_path: test
model_path: test
output_path: test
reward_signals:
extrinsic:
strength: 1.0

vector_action_descriptions=[],
vector_action_space_type=0,
)
dummy_config["summary_path"] = "./summaries/test_trainer_summary"
dummy_config["model_path"] = "./models/test_trainer_models/TestModel"
dummy_config["output_path"] = "./results/test_trainer_models/TestModel"
ppo_trainer = PPOTrainer(brain_name, 0, dummy_config, True, False, 0, "0")
controller = GhostController(100)
trainer = GhostTrainer(

vector_action_descriptions=[],
vector_action_space_type=0,
)
dummy_config["summary_path"] = "./summaries/test_trainer_summary"
dummy_config["model_path"] = "./models/test_trainer_models/TestModel"
dummy_config["output_path"] = "./results/test_trainer_models/TestModel"
ppo_trainer = PPOTrainer(brain_name, 0, dummy_config, True, False, 0, "0")
controller = GhostController(100)
trainer = GhostTrainer(

133
ml-agents/mlagents/trainers/tests/test_learn.py


import pytest
import yaml
from mlagents.trainers.learn import parse_command_line
from mlagents.trainers.learn import parse_command_line, DetectDefault
from mlagents_envs.exception import UnityEnvironmentException
from mlagents.trainers.stats import StatsReporter

return parse_command_line(args)
MOCK_YAML = """
behaviors:
{}
"""
MOCK_PARAMETER_YAML = """
behaviors:
{}
env_path: "./oldenvfile"
keep_checkpoints: 34
lesson: 2
run_id: uselessrun
save_freq: 654321
seed: 9870
base_port: 4001
num_envs: 4
debug: false
"""
MOCK_SAMPLER_CURRICULUM_YAML = """
behaviors:
behavior1:
curriculum:
curriculum1
behavior2:
curriculum:
curriculum2
parameter_randomization:
sampler1
"""
@patch("mlagents.trainers.learn.write_timing_tree")
@patch("mlagents.trainers.learn.write_run_options")
@patch("mlagents.trainers.learn.handle_existing_directories")
@patch("mlagents.trainers.learn.TrainerFactory")
@patch("mlagents.trainers.learn.SamplerManager")

sampler_manager_mock,
trainer_factory_mock,
handle_dir_mock,
write_run_options_mock,
write_timing_tree_mock,
trainer_config_mock = MagicMock()
load_config.return_value = trainer_config_mock
load_config.return_value = yaml.safe_load(MOCK_YAML)
learn.run_training(0, basic_options())
options = basic_options()
learn.run_training(0, options)
"./models/ppo",
"./summaries",
"results/ppo",
"ppo",
50000,
None,

None,
)
handle_dir_mock.assert_called_once_with(
"./models/ppo", "./summaries", False, False, None
)
handle_dir_mock.assert_called_once_with("results/ppo", False, False, None)
write_timing_tree_mock.assert_called_once_with("results/ppo/run_logs")
write_run_options_mock.assert_called_once_with("results/ppo", options)
StatsReporter.writers.clear() # make sure there aren't any writers as added by learn.py

seed=None,
start_port=8000,
env_args=None,
log_folder="results/log_folder",
@patch("builtins.open", new_callable=mock_open, read_data="{}")
@patch("builtins.open", new_callable=mock_open, read_data=MOCK_YAML)
assert opt.trainer_config == {}
assert opt.behaviors == {}
assert opt.curriculum_config is None
assert opt.sampler_config is None
assert opt.parameter_randomization is None
assert opt.keep_checkpoints == 5
assert opt.lesson == 0
assert opt.resume is False

full_args = [
"mytrainerpath",
"--env=./myenvfile",
"--curriculum=./mycurriculum",
"--sampler=./mysample",
"--keep-checkpoints=42",
"--lesson=3",
"--resume",
"--inference",
"--run-id=myawesomerun",
"--save-freq=123456",
"--seed=7890",
"--train",
"--base-port=4004",
"--num-envs=2",
"--no-graphics",
"--debug",
]
opt = parse_command_line(full_args)
assert opt.behaviors == {}
assert opt.env_path == "./myenvfile"
assert opt.parameter_randomization is None
assert opt.keep_checkpoints == 42
assert opt.lesson == 3
assert opt.run_id == "myawesomerun"
assert opt.save_freq == 123456
assert opt.seed == 7890
assert opt.base_port == 4004
assert opt.num_envs == 2
assert opt.no_graphics is True
assert opt.debug is True
assert opt.inference is True
assert opt.resume is True
@patch("builtins.open", new_callable=mock_open, read_data=MOCK_PARAMETER_YAML)
def test_yaml_args(mock_file):
# Test with opts loaded from YAML
DetectDefault.non_default_args.clear()
opt = parse_command_line(["mytrainerpath"])
assert opt.behaviors == {}
assert opt.env_path == "./oldenvfile"
assert opt.parameter_randomization is None
assert opt.keep_checkpoints == 34
assert opt.lesson == 2
assert opt.run_id == "uselessrun"
assert opt.save_freq == 654321
assert opt.seed == 9870
assert opt.base_port == 4001
assert opt.num_envs == 4
assert opt.no_graphics is False
assert opt.debug is False
assert opt.env_args is None
# Test that CLI overrides YAML
full_args = [
"mytrainerpath",
"--env=./myenvfile",
"--keep-checkpoints=42",
"--lesson=3",
"--resume",

]
opt = parse_command_line(full_args)
assert opt.trainer_config == {}
assert opt.behaviors == {}
assert opt.curriculum_config == {}
assert opt.sampler_config == {}
assert opt.parameter_randomization is None
assert opt.keep_checkpoints == 42
assert opt.lesson == 3
assert opt.run_id == "myawesomerun"

assert opt.resume is True
@patch("builtins.open", new_callable=mock_open, read_data="{}")
@patch("builtins.open", new_callable=mock_open, read_data=MOCK_SAMPLER_CURRICULUM_YAML)
def test_sampler_configs(mock_file):
opt = parse_command_line(["mytrainerpath"])
assert opt.parameter_randomization == "sampler1"
@patch("builtins.open", new_callable=mock_open, read_data=MOCK_YAML)
def test_env_args(mock_file):
full_args = [
"mytrainerpath",

8
ml-agents/mlagents/trainers/tests/test_nn_policy.py


memory_size: 8
curiosity_strength: 0.0
curiosity_enc_size: 1
summary_path: test
model_path: test
output_path: test
reward_signals:
extrinsic:
strength: 1.0

path1 = os.path.join(tmp_path, "runid1")
path2 = os.path.join(tmp_path, "runid2")
trainer_params = dummy_config
trainer_params["model_path"] = path1
trainer_params["output_path"] = path1
policy = create_policy_mock(trainer_params)
policy.initialize_or_load()
policy.save_model(2000)

vector_action_descriptions=[],
vector_action_space_type=0,
)
dummy_config["summary_path"] = "./summaries/test_trainer_summary"
dummy_config["model_path"] = "./models/test_trainer_models/TestModel"
dummy_config["output_path"] = "./results/test_trainer_models/TestModel"
time_horizon = 6
trajectory = make_fake_trajectory(

2
ml-agents/mlagents/trainers/tests/test_policy.py


def basic_params():
return {"use_recurrent": False, "model_path": "my/path"}
return {"use_recurrent": False, "output_path": "my/path"}
class FakePolicy(TFPolicy):

9
ml-agents/mlagents/trainers/tests/test_ppo.py


memory_size: 10
curiosity_strength: 0.0
curiosity_enc_size: 1
summary_path: test
model_path: test
output_path: test
reward_signals:
extrinsic:
strength: 1.0

vector_action_descriptions=[],
vector_action_space_type=0,
)
dummy_config["summary_path"] = "./summaries/test_trainer_summary"
dummy_config["model_path"] = "./models/test_trainer_models/TestModel"
dummy_config["output_path"] = "./results/test_trainer_models/TestModel"
trainer = PPOTrainer(brain_params, 0, dummy_config, True, False, 0, "0")
policy = trainer.create_policy(brain_params.brain_name, brain_params)
trainer.add_policy(brain_params.brain_name, policy)

mock_optimizer.reward_signals = {}
ppo_optimizer.return_value = mock_optimizer
dummy_config["summary_path"] = "./summaries/test_trainer_summary"
dummy_config["model_path"] = "./models/test_trainer_models/TestModel"
dummy_config["output_path"] = "./results/test_trainer_models/TestModel"
trainer = PPOTrainer(brain_params, 0, dummy_config, True, False, 0, "0")
policy = mock.Mock(spec=NNPolicy)
policy.get_current_step.return_value = 2000

2
ml-agents/mlagents/trainers/tests/test_reward_signals.py


)
trainer_parameters = trainer_config
model_path = "testpath"
trainer_parameters["model_path"] = model_path
trainer_parameters["output_path"] = model_path
trainer_parameters["keep_checkpoints"] = 3
trainer_parameters["reward_signals"].update(reward_signal_config)
trainer_parameters["use_recurrent"] = use_rnn

2
ml-agents/mlagents/trainers/tests/test_rl_trainer.py


def dummy_config():
return yaml.safe_load(
"""
summary_path: "test/"
output_path: "test/"
summary_freq: 1000
max_steps: 100
reward_signals:

14
ml-agents/mlagents/trainers/tests/test_sac.py


trainer_parameters = dummy_config
model_path = "testmodel"
trainer_parameters["model_path"] = model_path
trainer_parameters["output_path"] = model_path
trainer_parameters["keep_checkpoints"] = 3
trainer_parameters["use_recurrent"] = use_rnn
policy = NNPolicy(

discrete_action_space=DISCRETE_ACTION_SPACE,
)
trainer_params = dummy_config
trainer_params["summary_path"] = str(tmpdir)
trainer_params["model_path"] = str(tmpdir)
trainer_params["output_path"] = str(tmpdir)
trainer_params["save_replay_buffer"] = True
trainer = SACTrainer(mock_brain.brain_name, 1, trainer_params, True, False, 0, 0)
policy = trainer.create_policy(mock_brain.brain_name, mock_brain)

mock_optimizer.reward_signals = {}
sac_optimizer.return_value = mock_optimizer
dummy_config["summary_path"] = "./summaries/test_trainer_summary"
dummy_config["model_path"] = "./models/test_trainer_models/TestModel"
dummy_config["output_path"] = "./results/test_trainer_models/TestModel"
trainer = SACTrainer(brain_params, 0, dummy_config, True, False, 0, "0")
policy = mock.Mock(spec=NNPolicy)
policy.get_current_step.return_value = 2000