浏览代码

Merge branch 'master' into develop-gym-wrapper

/develop/gym-wrapper
vincentpierre 4 年前
当前提交
6ddfe74f
共有 50 个文件被更改,包括 1203 次插入654 次删除
  1. 1
      .github/ISSUE_TEMPLATE/bug_report.md
  2. 23
      .pre-commit-config.yaml
  3. 11
      .yamato/com.unity.ml-agents-test.yml
  4. 22
      .yamato/gym-interface-test.yml
  5. 14
      .yamato/protobuf-generation-test.yml
  6. 21
      .yamato/python-ll-api-test.yml
  7. 20
      .yamato/standalone-build-test.yml
  8. 25
      .yamato/training-int-tests.yml
  9. 23
      Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs
  10. 35
      README.md
  11. 3
      com.unity.ml-agents/CHANGELOG.md
  12. 20
      com.unity.ml-agents/Editor/BrainParametersDrawer.cs
  13. 18
      com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
  14. 13
      com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
  15. 5
      docs/Learning-Environment-Create-New.md
  16. 3
      docs/Learning-Environment-Design-Agents.md
  17. 2
      docs/Learning-Environment-Executable.md
  18. 4
      docs/Migrating.md
  19. 22
      docs/Python-API.md
  20. 338
      docs/Training-ML-Agents.md
  21. 8
      docs/Using-Tensorboard.md
  22. 21
      gym-unity/gym_unity/envs/__init__.py
  23. 5
      gym-unity/gym_unity/tests/test_gym.py
  24. 50
      ml-agents-envs/mlagents_envs/base_env.py
  25. 293
      ml-agents-envs/mlagents_envs/environment.py
  26. 54
      ml-agents-envs/mlagents_envs/tests/test_envs.py
  27. 118
      ml-agents-envs/mlagents_envs/tests/test_side_channel.py
  28. 14
      ml-agents/mlagents/trainers/learn.py
  29. 25
      ml-agents/mlagents/trainers/models.py
  30. 1
      ml-agents/mlagents/trainers/policy/tf_policy.py
  31. 34
      ml-agents/mlagents/trainers/ppo/optimizer.py
  32. 4
      ml-agents/mlagents/trainers/ppo/trainer.py
  33. 12
      ml-agents/mlagents/trainers/sac/optimizer.py
  34. 8
      ml-agents/mlagents/trainers/simple_env_manager.py
  35. 10
      ml-agents/mlagents/trainers/subprocess_env_manager.py
  36. 12
      ml-agents/mlagents/trainers/tests/simple_test_envs.py
  37. 5
      ml-agents/mlagents/trainers/tests/test_learn.py
  38. 4
      ml-agents/mlagents/trainers/tests/test_nn_policy.py
  39. 4
      ml-agents/mlagents/trainers/tests/test_simple_rl.py
  40. 6
      ml-agents/tests/yamato/check_coverage_percent.py
  41. 4
      ml-agents/tests/yamato/scripts/run_llapi.py
  42. 21
      ml-agents/tests/yamato/training_int_tests.py
  43. 28
      ml-agents/tests/yamato/yamato_utils.py
  44. 6
      utils/validate_versions.py
  45. 37
      com.unity.ml-agents/Tests/Editor/Communicator/GrpcExtensionsTests.cs
  46. 95
      docs/Versioning.md
  47. 108
      ml-agents-envs/mlagents_envs/env_utils.py
  48. 81
      ml-agents-envs/mlagents_envs/side_channel/side_channel_manager.py
  49. 64
      ml-agents-envs/mlagents_envs/tests/test_env_utils.py
  50. 102
      ml-agents-envs/mlagents_envs/tests/test_steps.py

1
.github/ISSUE_TEMPLATE/bug_report.md


If applicable, add screenshots to help explain your problem.
**Environment (please complete the following information):**
- Unity Version: [e.g. Unity 2020.1f1]
- OS + version: [e.g. Windows 10]
- _ML-Agents version_: (e.g. ML-Agents v0.8, or latest `develop` branch from source)
- _TensorFlow version_: (you can run `pip3 show tensorflow` to get this)

23
.pre-commit-config.yaml


files: "gym-unity/.*"
args: [--ignore-missing-imports, --disallow-incomplete-defs]
- repo: https://gitlab.com/pycqa/flake8
rev: 3.8.1
hooks:
- id: flake8
exclude: >
(?x)^(
.*_pb2.py|
.*_pb2_grpc.py
)$
# flake8-tidy-imports is used for banned-modules, not actually tidying
additional_dependencies: [flake8-comprehensions==3.2.2, flake8-tidy-imports==4.1.0, flake8-bugbear==20.1.4]
rev: v2.4.0
rev: v2.5.0
hooks:
- id: mixed-line-ending
exclude: >

.*.meta
)$
args: [--fix=lf]
- id: flake8
exclude: >
(?x)^(
.*_pb2.py|
.*_pb2_grpc.py
)$
# flake8-tidy-imports is used for banned-modules, not actually tidying
additional_dependencies: [flake8-comprehensions==3.1.4, flake8-tidy-imports==4.0.0, flake8-bugbear==20.1.2]
- id: trailing-whitespace
name: trailing-whitespace-markdown
types: [markdown]

11
.yamato/com.unity.ml-agents-test.yml


triggers:
cancel_old_ci: true
{% if platform.name == "mac" %}
changes:
only:
- "com.unity.ml-agents/**"
- "ml-agents/tests/yamato/**"
- ".yamato/com.unity.ml-agents-test.yml"
expression: |
(pull_request.target eq "master" OR
pull_request.target match "release.+") AND
NOT pull_request.draft AND
(pull_request.changes.any match "com.unity.ml-agents/**" OR
pull_request.changes.any match ".yamato/com.unity.ml-agents-test.yml")
{% endif %}
{% endfor %}
{% endfor %}

22
.yamato/gym-interface-test.yml


- .yamato/standalone-build-test.yml#test_mac_standalone_{{ editor.version }}
triggers:
cancel_old_ci: true
changes:
only:
- "com.unity.ml-agents/**"
- "Project/**"
- "ml-agents/**"
- "ml-agents-envs/**"
- ".yamato/gym-interface-test.yml"
except:
- "*.md"
- "com.unity.ml-agents/*.md"
- "com.unity.ml-agents/**/*.md"
expression: |
(pull_request.target eq "master" OR
pull_request.target match "release.+") AND
NOT pull_request.draft AND
(pull_request.changes.any match "com.unity.ml-agents/**" OR
pull_request.changes.any match "Project/**" OR
pull_request.changes.any match "ml-agents/**" OR
pull_request.changes.any match "ml-agents-envs/**" OR
pull_request.changes.any match "gym-unity/**" OR
pull_request.changes.any match ".yamato/gym-interface-test.yml") AND
NOT pull_request.changes.all match "**/*.md"
{% endfor %}

14
.yamato/protobuf-generation-test.yml


git diff -- :/ ":(exclude,top)$CS_PROTO_PATH/*.meta" > artifacts/proto.patch; exit $GIT_ERR; }
triggers:
cancel_old_ci: true
changes:
only:
- "protobuf-definitions/**"
- ".yamato/protobuf-generation-test.yml"
except:
- "protobuf-definitions/*.md"
- "protobuf-definitions/**/*.md"
expression: |
(pull_request.target eq "master" OR
pull_request.target match "release.+") AND
NOT pull_request.draft AND
(pull_request.changes.any match "protobuf-definitions/**" OR
pull_request.changes.any match ".yamato/protobuf-generation-test.yml") AND
NOT pull_request.changes.all match "protobuf-definitions/**/*.md"
artifacts:
patch:
paths:

21
.yamato/python-ll-api-test.yml


- .yamato/standalone-build-test.yml#test_mac_standalone_{{ editor.version }}
triggers:
cancel_old_ci: true
changes:
only:
- "com.unity.ml-agents/**"
- "Project/**"
- "ml-agents/**"
- "ml-agents-envs/**"
- ".yamato/python-ll-api-test.yml"
except:
- "*.md"
- "com.unity.ml-agents/*.md"
- "com.unity.ml-agents/**/*.md"
expression: |
(pull_request.target eq "master" OR
pull_request.target match "release.+") AND
NOT pull_request.draft AND
(pull_request.changes.any match "com.unity.ml-agents/**" OR
pull_request.changes.any match "Project/**" OR
pull_request.changes.any match "ml-agents/**" OR
pull_request.changes.any match "ml-agents-envs/**" OR
pull_request.changes.any match ".yamato/python-ll-api-test.yml") AND
NOT pull_request.changes.all match "**/*.md"
{% endfor %}

20
.yamato/standalone-build-test.yml


- pip install pyyaml
- python -u -m ml-agents.tests.yamato.standalone_build_tests
- python -u -m ml-agents.tests.yamato.standalone_build_tests --scene=Assets/ML-Agents/Examples/Basic/Scenes/Basic.unity
- python -u -m ml-agents.tests.yamato.standalone_build_tests --scene=Assets/ML-Agents/Examples/Bouncer/Scenes/Bouncer.unity
- python -u -m ml-agents.tests.yamato.standalone_build_tests --scene=Assets/ML-Agents/Examples/WallJump/Scenes/WallJump.unity
- python -u -m ml-agents.tests.yamato.standalone_build_tests --scene=Assets/ML-Agents/Examples/Bouncer/Scenes/Bouncer.unity
- python -u -m ml-agents.tests.yamato.standalone_build_tests --scene=Assets/ML-Agents/Examples/WallJump/Scenes/WallJump.unity
changes:
only:
- "com.unity.ml-agents/**"
- "Project/**"
- ".yamato/standalone-build-test.yml"
except:
- "*.md"
- "com.unity.ml-agents/*.md"
- "com.unity.ml-agents/**/*.md"
expression: |
(pull_request.target eq "master" OR
pull_request.target match "release.+") AND
NOT pull_request.draft AND
(pull_request.changes.any match "com.unity.ml-agents/**" OR
pull_request.changes.any match ".yamato/standalone-build-test.yml") AND
NOT pull_request.changes.all match "**/*.md"
artifacts:
logs:
paths:

25
.yamato/training-int-tests.yml


# Backwards-compatibility tests.
# If we make a breaking change to the communication protocol, these will need
# to be disabled until the next release.
# - python -u -m ml-agents.tests.yamato.training_int_tests --python=0.15.0
# - python -u -m ml-agents.tests.yamato.training_int_tests --csharp=0.15.0
- python -u -m ml-agents.tests.yamato.training_int_tests --python=0.16.0
- python -u -m ml-agents.tests.yamato.training_int_tests --csharp=1.0.0
changes:
only:
- "com.unity.ml-agents/**"
- "Project/**"
- "ml-agents/**"
- "ml-agents-envs/**"
- ".yamato/training-int-tests.yml"
except:
- "*.md"
- "com.unity.ml-agents/*.md"
- "com.unity.ml-agents/**/*.md"
expression: |
(pull_request.target eq "master" OR
pull_request.target match "release.+") AND
NOT pull_request.draft AND
(pull_request.changes.any match "com.unity.ml-agents/**" OR
pull_request.changes.any match "Project/**" OR
pull_request.changes.any match "ml-agents/**" OR
pull_request.changes.any match "ml-agents-envs/**" OR
pull_request.changes.any match ".yamato/training-int-tests.yml") AND
NOT pull_request.changes.all match "**/*.md"
artifacts:
logs:
paths:

23
Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs


{
const string k_CommandLineModelOverrideFlag = "--mlagents-override-model";
const string k_CommandLineQuitAfterEpisodesFlag = "--mlagents-quit-after-episodes";
const string k_CommandLineQuitOnLoadFailure = "--mlagents-quit-on-load-failure";
// The attached Agent
Agent m_Agent;

int m_MaxEpisodes;
int m_NumSteps;
bool m_QuitOnLoadFailure;
/// <summary>
/// Get the asset path to use from the commandline arguments.

var maxEpisodes = 0;
var args = Environment.GetCommandLineArgs();
for (var i = 0; i < args.Length - 1; i++)
for (var i = 0; i < args.Length; i++)
{
if (args[i] == k_CommandLineModelOverrideFlag && i < args.Length-2)
{

}
else if (args[i] == k_CommandLineQuitAfterEpisodesFlag)
else if (args[i] == k_CommandLineQuitAfterEpisodesFlag && i < args.Length-1)
}
else if (args[i] == k_CommandLineQuitOnLoadFailure)
{
m_QuitOnLoadFailure = true;
}
}

var behaviorName = bp.BehaviorName;
var nnModel = GetModelForBehaviorName(behaviorName);
Debug.Log($"Overriding behavior {behaviorName} for agent with model {nnModel?.name}");
if (nnModel == null && m_QuitOnLoadFailure)
{
Debug.Log(
$"Didn't find a model for behaviorName {behaviorName}. Make " +
$"sure the behaviorName is set correctly in the commandline " +
$"and that the model file exists"
);
Application.Quit(1);
}
var modelName = nnModel != null ? nnModel.name : "<null>";
Debug.Log($"Overriding behavior {behaviorName} for agent with model {modelName}");
// This might give a null model; that's better because we'll fall back to the Heuristic
m_Agent.SetModel($"Override_{behaviorName}", nnModel);

35
README.md


## Releases & Documentation
**Our latest, stable release is `Release 1`. Click [here](docs/Readme.md) to
get started with the latest release of ML-Agents.**
**Our latest, stable release is `Release 1`. Click
[here](https://github.com/Unity-Technologies/ml-agents/tree/release_1/docs/Readme.md)
to get started with the latest release of ML-Agents.**
The table below lists all our releases, including our `master` branch which is under active
development and may be unstable. A few helpful guidelines:
* The docs links in the table below include installation and usage instructions specific to each
release. Remember to always use the documentation that corresponds to the release version you're
using.
* See the [GitHub releases](https://github.com/Unity-Technologies/ml-agents/releases) for more
details of the changes between versions.
* If you have used an earlier version of the ML-Agents Toolkit, we strongly recommend our
[guide on migrating from earlier versions](docs/Migrating.md).
The table below lists all our releases, including our `master` branch which is
under active development and may be unstable. A few helpful guidelines:
- The [Versioning page](docs/Versioning.md) overviews how we manage our GitHub
releases and the versioning process for each of the ML-Agents components.
- The [Releases page](https://github.com/Unity-Technologies/ml-agents/releases)
contains details of the changes between releases.
- The [Migration page](docs/Migrating.md) contains details on how to upgrade
from earlier releases of the ML-Agents Toolkit.
- The **Documentation** links in the table below include installation and usage
instructions specific to each release. Remember to always use the
documentation that corresponds to the release version you're using.
| **Version** | **Release Date** | **Source** | **Documentation** | **Download** |
|:-------:|:------:|:-------------:|:-------:|:------------:|

If you use Unity or the ML-Agents Toolkit to conduct research, we ask that you
cite the following paper as a reference:
Juliani, A., Berges, V., Vckay, E., Gao, Y., Henry, H., Mattar, M., Lange, D.
(2018). Unity: A General Platform for Intelligent Agents. _arXiv preprint
arXiv:1809.02627._ https://github.com/Unity-Technologies/ml-agents.
Juliani, A., Berges, V., Teng, E., Cohen, A., Harper, J., Elion, C., Goy, C.,
Gao, Y., Henry, H., Mattar, M., Lange, D. (2020). Unity: A General Platform for
Intelligent Agents. _arXiv preprint
[arXiv:1809.02627](https://arxiv.org/abs/1809.02627)._
https://github.com/Unity-Technologies/ml-agents.
- (May 12, 2020)
[Announcing ML-Agents Unity Package v1.0!](https://blogs.unity3d.com/2020/05/12/announcing-ml-agents-unity-package-v1-0/)
- (February 28, 2020)
[Training intelligent adversaries using self-play with ML-Agents](https://blogs.unity3d.com/2020/02/28/training-intelligent-adversaries-using-self-play-with-ml-agents/)
- (November 11, 2019)

3
com.unity.ml-agents/CHANGELOG.md


#### com.unity.ml-agents (C#)
#### ml-agents / ml-agents-envs / gym-unity (Python)
- `max_step` in the `TerminalStep` and `TerminalSteps` objects was renamed `interrupted`.
- `beta` and `epsilon` in `PPO` are no longer decayed by default but follow the same schedule as learning rate. (#3940)
- `get_behavior_names()` and `get_behavior_spec()` on UnityEnvironment were replaced by the `behavior_specs` property. (#3946)
### Minor Changes
#### com.unity.ml-agents (C#)
#### ml-agents / ml-agents-envs / gym-unity (Python)

- Unity Player logs are now written out to the results directory. (#3877)
- Run configuration YAML files are written out to the results directory at the end of the run. (#3815)
### Bug Fixes
- An issue was fixed where using `--initialize-from` would resume from the past step count. (#3962)
#### com.unity.ml-agents (C#)
#### ml-agents / ml-agents-envs / gym-unity (Python)

20
com.unity.ml-agents/Editor/BrainParametersDrawer.cs


static void DrawContinuousVectorAction(Rect position, SerializedProperty property)
{
var vecActionSize = property.FindPropertyRelative(k_ActionSizePropName);
vecActionSize.arraySize = 1;
// This check is here due to:
// https://fogbugz.unity3d.com/f/cases/1246524/
// If this case has been resolved, please remove this if condition.
if (vecActionSize.arraySize != 1)
{
vecActionSize.arraySize = 1;
}
var continuousActionSize =
vecActionSize.GetArrayElementAtIndex(0);
EditorGUI.PropertyField(

static void DrawDiscreteVectorAction(Rect position, SerializedProperty property)
{
var vecActionSize = property.FindPropertyRelative(k_ActionSizePropName);
vecActionSize.arraySize = EditorGUI.IntField(
var newSize = EditorGUI.IntField(
// This check is here due to:
// https://fogbugz.unity3d.com/f/cases/1246524/
// If this case has been resolved, please remove this if condition.
if (newSize != vecActionSize.arraySize)
{
vecActionSize.arraySize = newSize;
}
position.y += k_LineHeight;
position.x += 20;
position.width -= 20;

18
com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs


{
var agentInfoProto = ai.ToAgentInfoProto();
var agentActionProto = new AgentActionProto
var agentActionProto = new AgentActionProto();
if(ai.storedVectorActions != null)
VectorActions = { ai.storedVectorActions }
};
agentActionProto.VectorActions.AddRange(ai.storedVectorActions);
}
return new AgentInfoActionPairProto
{

var brainParametersProto = new BrainParametersProto
{
VectorActionSize = { bp.VectorActionSize },
VectorActionSpaceType =
(SpaceTypeProto)bp.VectorActionSpaceType,
VectorActionSpaceType = (SpaceTypeProto) bp.VectorActionSpaceType,
brainParametersProto.VectorActionDescriptions.AddRange(bp.VectorActionDescriptions);
if(bp.VectorActionDescriptions != null)
{
brainParametersProto.VectorActionDescriptions.AddRange(bp.VectorActionDescriptions);
}
return brainParametersProto;
}

/// </summary>
public static DemonstrationMetaProto ToProto(this DemonstrationMetaData dm)
{
var demonstrationName = dm.demonstrationName ?? "";
var demoProto = new DemonstrationMetaProto
{
ApiVersion = DemonstrationMetaData.ApiVersion,

DemonstrationName = dm.demonstrationName
DemonstrationName = demonstrationName
};
return demoProto;
}

13
com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs


{
public Action OnRequestDecision;
ObservationWriter m_ObsWriter = new ObservationWriter();
public void RequestDecision(AgentInfo info, List<ISensor> sensors) {
foreach(var sensor in sensors){
public void RequestDecision(AgentInfo info, List<ISensor> sensors)
{
foreach (var sensor in sensors)
{
sensor.GetObservationProto(m_ObsWriter);
}
OnRequestDecision?.Invoke();

agent1.SetPolicy(policy);
StackingSensor sensor = null;
foreach(ISensor s in agent1.sensors){
if (s is StackingSensor){
foreach (ISensor s in agent1.sensors)
{
if (s is StackingSensor)
{
sensor = s as StackingSensor;
}
}

{
agent1.RequestDecision();
aca.EnvironmentStep();
}
policy.OnRequestDecision = () => SensorTestHelper.CompareObservation(sensor, new[] {18f, 19f, 21f});

5
docs/Learning-Environment-Create-New.md


learning_rate: 3.0e-4
learning_rate_schedule: linear
max_steps: 5.0e4
memory_size: 128
normalize: false
num_epoch: 3
num_layers: 2

reward_signals:
extrinsic:
strength: 1.0
gamma: 0.99
strength: 1.0
gamma: 0.99
```
Since this example creates a very simple training environment with only a few

3
docs/Learning-Environment-Design-Agents.md


0, rays will be used instead of spheres. Rays may be more efficient,
especially in complex scenes.
- _Ray Length_ The length of the casts
- _Ray Layer Mask_ The [LayerMask](https://docs.unity3d.com/ScriptReference/LayerMask.html)
passed to the raycast or spherecast. This can be used to ignore certain types
of objects when casting.
- _Observation Stacks_ The number of previous results to "stack" with the cast
results. Note that this can be independent of the "Stacked Vectors" setting in
`Behavior Parameters`.

2
docs/Learning-Environment-Executable.md


the directory where you installed the ML-Agents Toolkit, run:
```sh
mlagents-learn ../config/ppo/3DBall.yaml --env=3DBall --run-id=firstRun
mlagents-learn config/ppo/3DBall.yaml --env=3DBall --run-id=firstRun
```
And you should see something like

4
docs/Migrating.md


- Trainer configuration, curriculum configuration, and parameter randomization
configuration have all been moved to a single YAML file. (#3791)
- `max_step` in the `TerminalStep` and `TerminalSteps` objects was renamed `interrupted`.
- On the UnityEnvironment API, `get_behavior_names()` and `get_behavior_specs()` methods were combined into the property `behavior_specs` that contains a mapping from behavior names to behavior spec.
### Steps to Migrate
- Before upgrading, copy your `Behavior Name` sections from `trainer_config.yaml` into

the contents of the sampler config to `parameter_randomization` in the main trainer configuration.
- If you are using `UnityEnvironment` directly, replace `max_step` with `interrupted`
in the `TerminalStep` and `TerminalSteps` objects.
- Replace usage of `get_behavior_names()` and `get_behavior_specs()` in UnityEnvironment with `behavior_specs`.
## Migrating from 0.15 to Release 1

data in the new MonoBehaviour instead.
- If the class overrode the virtual methods, create a new MonoBehaviour and
move the logic to it:
- Move the InitializeAcademy code to MonoBehaviour.OnAwake
- Move the InitializeAcademy code to MonoBehaviour.Awake
- Move the AcademyStep code to MonoBehaviour.FixedUpdate
- Move the OnDestroy code to MonoBehaviour.OnDestroy.
- Move the AcademyReset code to a new method and add it to the

22
docs/Python-API.md


```python
from mlagents_envs.environment import UnityEnvironment
# This is a non-blocking call that only loads the environment.
# Start interacting with the evironment.
env.reset()
behavior_names = env.behavior_spec.keys()
...
**NOTE:** Please read [Interacting with a Unity Environment](#interacting-with-a-unity-environment)
to read more about how you can interact with the Unity environment from Python.
- `file_name` is the name of the environment binary (located in the root
directory of the python project).

act.
- **Close : `env.close()`** Sends a shutdown signal to the environment and
terminates the communication.
- **Get Behavior Names : `env.get_behavior_names()`** Returns a list of
`BehaviorName`. Note that the number of groups can change over time in the
simulation if new Agent behaviors are created in the simulation.
- **Get Behavior Spec : `env.get_behavior_spec(behavior_name: str)`** Returns
the `BehaviorSpec` corresponding to the behavior_name given as input. A
`BehaviorSpec` contains information such as the observation shapes, the action
type (multi-discrete or continuous) and the action shape. Note that the
`BehaviorSpec` for a specific group is fixed throughout the simulation.
- **Behavior Specs : `env.behavior_specs`** Returns a Mapping of
`BehaviorName` to `BehaviorSpec` objects (read only).
A `BehaviorSpec` contains information such as the observation shapes, the
action type (multi-discrete or continuous) and the action shape. Note that
the `BehaviorSpec` for a specific group is fixed throughout the simulation.
The number of entries in the Mapping can change over time in the simulation
if new Agent behaviors are created in the simulation.
- **Get Steps : `env.get_steps(behavior_name: str)`** Returns a tuple
`DecisionSteps, TerminalSteps` corresponding to the behavior_name given as
input. The `DecisionSteps` contains information about the state of the agents

338
docs/Training-ML-Agents.md


#### Observing Training
Regardless of which training methods, configurations or hyperparameters you
provide, the training process will always generate three artifacts:
provide, the training process will always generate three artifacts, all found
in the `results/<run-identifier>` folder:
1. Summaries (under the `summaries/` folder): these are training metrics that
1. Summaries: these are training metrics that
1. Models (under the `models/` folder): these contain the model checkpoints that
1. Models: these contain the model checkpoints that
1. Timers file (also under the `summaries/` folder): this contains aggregated
1. Timers file (under `results/<run-identifier>/run_logs`): this contains aggregated
metrics on your training process, including time spent on specific code
blocks. See [Profiling in Python](Profiling-Python.md) for more information
on the timers generated.

This section offers a detailed guide into how to manage the different training
set-ups withing the toolkit.
More specifically, this section offers a detailed guide on four command-line
More specifically, this section offers a detailed guide on the command-line
Behavior in the scene
- `--curriculum`: defines the set-up for Curriculum Learning
- `--sampler`: defines the set-up for Environment Parameter Randomization
Behavior in the scene, and the set-ups for Curriculum Learning and
Environment Parameter Randomization
- `--num-envs`: number of concurrent Unity instances to use during training
Reminder that a detailed description of all command-line options can be found by

process when the default parameters don't seem to be giving the level of
performance you would like. We provide sample configuration files for our
example environments in the [config/](../config/) directory. The
`config/trainer_config.yaml` was used to train the 3D Balance Ball in the
`config/ppo/3DBall.yaml` was used to train the 3D Balance Ball in the
[Getting Started](Getting-Started.md) guide. That configuration file uses the
PPO trainer, but we also have configuration files for SAC and GAIL.

add typically has its own training configurations or additional configuration
files. For instance:
add typically has its own training configurations. For instance:
- Use PPO or SAC?
- Use Recurrent Neural Networks for adding memory to your agents?

demonstrations.)
- Use self-play? (Assuming your environment includes multiple agents.)
The answers to the above questions will dictate the configuration files and the
parameters within them. The rest of this section breaks down the different
configuration files and explains the possible settings for each.
The trainer config file, `<trainer-config-file>`, determines the features you will
use during training, and the answers to the above questions will dictate its contents.
The rest of this guide breaks down the different sub-sections of the trainer config file
and explains the possible settings for each.
### Trainer Config File
### Behavior Configurations
We begin with the trainer config file, `<trainer-config-file>`, which includes a
set of configurations for each Behavior in your scene. Some of the
The primary section of the trainer config file is a
set of configurations for each Behavior in your scene. These are defined under
the sub-section `behaviors` in your trainer config file. Some of the
curriculum and environment parameter randomization settings are not part of this
file, but their settings live in different files that we'll cover in subsequent
sections.
curriculum and environment parameter randomization settings are not part of the `behaviors`
configuration, but their settings live in different sections that we'll cover subsequently.
BehaviorPPO:
trainer: ppo
behaviors:
BehaviorPPO:
trainer: ppo
# Trainer configs common to PPO/SAC (excluding reward signals)
batch_size: 1024
buffer_size: 10240
hidden_units: 128
learning_rate: 3.0e-4
learning_rate_schedule: linear
max_steps: 5.0e5
normalize: false
num_layers: 2
time_horizon: 64
vis_encoder_type: simple
# Trainer configs common to PPO/SAC (excluding reward signals)
batch_size: 1024
buffer_size: 10240
hidden_units: 128
learning_rate: 3.0e-4
learning_rate_schedule: linear
max_steps: 5.0e5
normalize: false
num_layers: 2
time_horizon: 64
vis_encoder_type: simple
# PPO-specific configs
beta: 5.0e-3
epsilon: 0.2
lambd: 0.95
num_epoch: 3
threaded: true
# PPO-specific configs
beta: 5.0e-3
epsilon: 0.2
lambd: 0.95
num_epoch: 3
threaded: true
# memory
use_recurrent: true
sequence_length: 64
memory_size: 256
# memory
use_recurrent: true
sequence_length: 64
memory_size: 256
# behavior cloning
behavioral_cloning:
demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
strength: 0.5
steps: 150000
batch_size: 512
num_epoch: 3
samples_per_update: 0
init_path:
# behavior cloning
behavioral_cloning:
demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
strength: 0.5
steps: 150000
batch_size: 512
num_epoch: 3
samples_per_update: 0
init_path:
reward_signals:
# environment reward
extrinsic:
strength: 1.0
gamma: 0.99
reward_signals:
# environment reward
extrinsic:
strength: 1.0
gamma: 0.99
# curiosity module
curiosity:
strength: 0.02
gamma: 0.99
encoding_size: 256
learning_rate: 3e-4
# curiosity module
curiosity:
strength: 0.02
gamma: 0.99
encoding_size: 256
learning_rate: 3e-4
# GAIL
gail:
strength: 0.01
gamma: 0.99
encoding_size: 128
demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
learning_rate: 3e-4
use_actions: false
use_vail: false
# GAIL
gail:
strength: 0.01
gamma: 0.99
encoding_size: 128
demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
learning_rate: 3e-4
use_actions: false
use_vail: false
# self-play
self_play:
window: 10
play_against_latest_model_ratio: 0.5
save_steps: 50000
swap_steps: 50000
team_change: 100000
# self-play
self_play:
window: 10
play_against_latest_model_ratio: 0.5
save_steps: 50000
swap_steps: 50000
team_change: 100000
```
Here is an equivalent file if we use an SAC trainer instead. Notice that the

```yaml
BehaviorSAC:
trainer: sac
# Trainer configs common to PPO/SAC (excluding reward signals)
# same as PPO config
behaviors:
BehaviorSAC:
trainer: sac
# SAC-specific configs (replaces the "PPO-specific configs" section above)
buffer_init_steps: 0
tau: 0.005
steps_per_update: 1
train_interval: 1
init_entcoef: 1.0
save_replay_buffer: false
# Trainer configs common to PPO/SAC (excluding reward signals)
# same as PPO config
# memory
# same as PPO config
# SAC-specific configs (replaces the "PPO-specific configs" section above)
buffer_init_steps: 0
tau: 0.005
steps_per_update: 1
train_interval: 1
init_entcoef: 1.0
save_replay_buffer: false
# pre-training using behavior cloning
behavioral_cloning:
# memory
reward_signals:
reward_signal_num_update: 1 # only applies to SAC
# pre-training using behavior cloning
behavioral_cloning:
# same as PPO config
reward_signals:
reward_signal_num_update: 1 # only applies to SAC
# environment reward
extrinsic:
# same as PPO config
# environment reward
extrinsic:
# same as PPO config
# curiosity module
curiosity:
# same as PPO config
# curiosity module
curiosity:
# same as PPO config
# GAIL
gail:
# same as PPO config
# GAIL
gail:
# self-play
self_play:
# self-play
self_play:
# same as PPO config
```
We now break apart the components of the configuration file and describe what

### Curriculum Learning
To enable curriculum learning, you need to provide the `--curriculum` CLI option
and point to a YAML file that defines the curriculum. Here is one example file:
To enable curriculum learning, you need to add a sub-section to the corresponding
`behaivors` entry in the trainer config YAML file that defines the curriculum for that
behavior. Here is one example:
BehaviorY:
measure: progress
thresholds: [0.1, 0.3, 0.5]
min_lesson_length: 100
signal_smoothing: true
parameters:
wall_height: [1.5, 2.0, 2.5, 4.0]
behaviors:
BehaviorY:
# < Same as above >
# Add this section
curriculum:
measure: progress
thresholds: [0.1, 0.3, 0.5]
min_lesson_length: 100
signal_smoothing: true
parameters:
wall_height: [1.5, 2.0, 2.5, 4.0]
```
Each group of Agents under the same `Behavior Name` in an environment can have a

In order to define the curricula, the first step is to decide which parameters
of the environment will vary. In the case of the Wall Jump environment, the
height of the wall is what varies. Rather than adjusting it by hand, we will
create a YAML file which describes the structure of the curricula. Within it, we
create a configuration which describes the structure of the curricula. Within it, we
can specify which points in the training process our wall height will change,
either based on the percentage of training steps which have taken place, or what
the average reward the agent has received in the recent past is. Below is an

BigWallJump:
measure: progress
thresholds: [0.1, 0.3, 0.5]
min_lesson_length: 100
signal_smoothing: true
parameters:
big_wall_min_height: [0.0, 4.0, 6.0, 8.0]
big_wall_max_height: [4.0, 7.0, 8.0, 8.0]
SmallWallJump:
measure: progress
thresholds: [0.1, 0.3, 0.5]
min_lesson_length: 100
signal_smoothing: true
parameters:
small_wall_height: [1.5, 2.0, 2.5, 4.0]
behaviors:
BigWallJump:
# < Trainer parameters for BigWallJump >
# Curriculum configuration
curriculum:
measure: progress
thresholds: [0.1, 0.3, 0.5]
min_lesson_length: 100
signal_smoothing: true
parameters:
big_wall_min_height: [0.0, 4.0, 6.0, 8.0]
big_wall_max_height: [4.0, 7.0, 8.0, 8.0]
SmallWallJump:
# < Trainer parameters for BigWallJump >
# Curriculum configuration
curriculum:
measure: progress
thresholds: [0.1, 0.3, 0.5]
min_lesson_length: 100
signal_smoothing: true
parameters:
small_wall_height: [1.5, 2.0, 2.5, 4.0]
```
The curriculum for each Behavior has the following parameters:

#### Training with a Curriculum
Once we have specified our metacurriculum and curricula, we can launch
`mlagents-learn` using the `–curriculum` flag to point to the config file for
`mlagents-learn` using the config file for
mlagents-learn config/trainer_config.yaml --curriculum=config/curricula/wall_jump.yaml --run-id=wall-jump-curriculum
mlagents-learn config/ppo/WallJump_curriculum.yaml --run-id=wall-jump-curriculum
```
We can then keep track of the current lessons and progresses via TensorBoard.

### Environment Parameter Randomization
To enable parameter randomization, you need to provide the `--sampler` CLI
option and point to a YAML file that defines the curriculum. Here is one example
file:
To enable parameter randomization, you need to add a `parameter-randomization` sub-section
to your trainer config YAML file. Here is one example:
resampling-interval: 5000
behaviors:
# < Same as above>
mass:
sampler-type: "uniform"
min_value: 0.5
max_value: 10
parameter_randomization:
resampling-interval: 5000
gravity:
sampler-type: "multirange_uniform"
intervals: [[7, 10], [15, 20]]
mass:
sampler-type: "uniform"
min_value: 0.5
max_value: 10
scale:
sampler-type: "uniform"
min_value: 0.75
max_value: 3
gravity:
sampler-type: "multirange_uniform"
intervals: [[7, 10], [15, 20]]
scale:
sampler-type: "uniform"
min_value: 0.75
max_value: 3
```
Note that `mass`, `gravity` and `scale` are the names of the environment

`interval_2_max`], ...]
- **sub-arguments** - `intervals`
The implementation of the samplers can be found at
`ml-agents-envs/mlagents_envs/sampler_class.py`.
The implementation of the samplers can be found in the
[sampler_class.py file](../ml-agents/mlagents/trainers/sampler_class.py).
#### Defining a New Sampler Type

#### Training with Environment Parameter Randomization
After the sampler YAML file is defined, we proceed by launching `mlagents-learn`
and specify our configured sampler file with the `--sampler` flag. For example,
After the sampler configuration is defined, we proceed by launching `mlagents-learn`
and specify trainer configuration with `parameter-randomization` defined. For example,
`Environment Parameters` with `config/3dball_randomize.yaml` sampling setup, we
would run
`Environment Parameters` with sampling setup, we would run
mlagents-learn config/trainer_config.yaml --sampler=config/3dball_randomize.yaml
--run-id=3D-Ball-randomize
mlagents-learn config/ppo/3DBall_randomize.yaml --run-id=3D-Ball-randomize
```
We can observe progress and metrics via Tensorboard.

- **Buffer Size** - If you are having trouble getting an agent to train, even
with multiple concurrent Unity instances, you could increase `buffer_size` in
the `config/trainer_config.yaml` file. A common practice is to multiply
the trainer config file. A common practice is to multiply
`buffer_size` by `num-envs`.
- **Resource Constraints** - Invoking concurrent Unity instances is constrained
by the resources on the machine. Please use discretion when setting

8
docs/Using-Tensorboard.md


[TensorBoard](https://www.tensorflow.org/programmers_guide/summaries_and_tensorboard).
The `mlagents-learn` command saves training statistics to a folder named
`summaries`, organized by the `run-id` value you assign to a training session.
`results`, organized by the `run-id` value you assign to a training session.
In order to observe the training process, either during training or afterward,
start TensorBoard:

the --port option.
**Note:** If you don't assign a `run-id` identifier, `mlagents-learn` uses the
default string, "ppo". All the statistics will be saved to the same sub-folder
and displayed as one session in TensorBoard. After a few runs, the displays can
become difficult to interpret in this situation. You can delete the folders
under the `summaries` directory to clear out old statistics.
default string, "ppo". You can delete the folders under the `results` directory
to clear out old statistics.
On the left side of the TensorBoard window, you can select which of the training
runs you want to display. You can select multiple run-ids to compare statistics.

21
gym-unity/gym_unity/envs/__init__.py


self._env = unity_env
# Take a single step so that the brain information will be sent over
if not self._env.get_behavior_names():
if not self._env.behavior_specs:
self._n_agents = -1
# Save the step result from the last time all Agents requested decisions.
self._previous_decision_step: DecisionSteps = None

self._allow_multiple_visual_obs = allow_multiple_visual_obs
# Check brain configuration
if len(self._env.get_behavior_names()) != 1:
if len(self._env.behavior_specs) != 1:
self.name = self._env.get_behavior_names()[0]
self.group_spec = self._env.get_behavior_spec(self.name)
self.name = list(self._env.behavior_specs.keys())[0]
self.group_spec = self._env.behavior_specs[self.name]
if use_visual and self._get_n_vis_obs() == 0:
raise UnityGymException(

self._env.step()
decision_step, terminal_step = self._env.get_steps(self.name)
self._check_agents(max(len(decision_step), len(terminal_step)))
if len(terminal_step) != 0:
# The agent is done
self.game_over = True

logger.warning("Could not seed environment %s", self.name)
return
def _check_agents(self, n_agents: int) -> None:
if self._n_agents > 1:
@staticmethod
def _check_agents(n_agents: int) -> None:
if n_agents > 1:
"There can only be one Agent in the environment but {n_agents} were detected."
f"There can only be one Agent in the environment but {n_agents} were detected."
)
@property

@property
def observation_space(self):
return self._observation_space
@property
def number_agents(self):
return self._n_agents
class ActionFlattener:

5
gym-unity/gym_unity/tests/test_gym.py


ActionType,
DecisionSteps,
TerminalSteps,
BehaviorMapping,
)

setup_mock_unityenvironment(
mock_env, mock_spec, mock_decision_step, mock_terminal_step
)
env = UnityToGymWrapper(mock_env, use_visual=False)
assert isinstance(env, UnityToGymWrapper)
assert isinstance(env.reset(), np.ndarray)

:Mock mock_decision: A DecisionSteps object that will be returned at each step and reset.
:Mock mock_termination: A TerminationSteps object that will be returned at each step and reset.
"""
mock_env.get_behavior_names.return_value = ["MockBrain"]
mock_env.get_behavior_spec.return_value = mock_spec
mock_env.behavior_specs = BehaviorMapping({"MockBrain": mock_spec})
mock_env.get_steps.return_value = (mock_decision, mock_termination)

50
ml-agents-envs/mlagents_envs/base_env.py


from abc import ABC, abstractmethod
from collections.abc import Mapping
from typing import List, NamedTuple, Tuple, Optional, Union, Dict, Iterator, Any
from typing import (
List,
NamedTuple,
Tuple,
Optional,
Union,
Dict,
Iterator,
Any,
Mapping as MappingType,
)
import numpy as np
from enum import Enum

return np.zeros((n_agents, self.action_size), dtype=np.float32)
class BehaviorMapping(Mapping):
def __init__(self, specs: Dict[BehaviorName, BehaviorSpec]):
self._dict = specs
def __len__(self) -> int:
return len(self._dict)
def __getitem__(self, behavior: BehaviorName) -> BehaviorSpec:
return self._dict[behavior]
def __iter__(self) -> Iterator[Any]:
yield from self._dict
class BaseEnv(ABC):
@abstractmethod
def step(self) -> None:

"""
pass
@abstractmethod
def reset(self) -> None:

pass
@abstractmethod
def close(self) -> None:

pass
@property
def get_behavior_names(self) -> List[BehaviorName]:
def behavior_specs(self) -> MappingType[str, BehaviorSpec]:
Returns the list of the behavior names present in the environment.
Returns a Mapping from behavior names to behavior specs.
This list can grow with time as new policies are instantiated.
:return: the list of agent BehaviorName.
Note that new keys can be added to this mapping as new policies are instantiated.
pass
@abstractmethod
def set_actions(self, behavior_name: BehaviorName, action: np.ndarray) -> None:

:param action: A two dimensional np.ndarray corresponding to the action
(either int or float)
"""
pass
@abstractmethod
def set_action_for_agent(

:param action: A one dimensional np.ndarray corresponding to the action
(either int or float)
"""
pass
@abstractmethod
def get_steps(

rewards, agent ids and interrupted flags of the agents that had their
episode terminated last step.
"""
pass
@abstractmethod
def get_behavior_spec(self, behavior_name: BehaviorName) -> BehaviorSpec:
"""
Get the BehaviorSpec corresponding to the behavior name
:param behavior_name: The name of the behavior the agents are part of
:return: A BehaviorSpec corresponding to that behavior
"""
pass

293
ml-agents-envs/mlagents_envs/environment.py


import atexit
from distutils.version import StrictVersion
import glob
import uuid
from typing import Dict, List, Optional, Any, Tuple
from typing import Dict, List, Optional, Tuple, Mapping as MappingType
from mlagents_envs.side_channel.side_channel import SideChannel, IncomingMessage
from mlagents_envs.side_channel.side_channel import SideChannel
from mlagents_envs.side_channel.side_channel_manager import SideChannelManager
from mlagents_envs import env_utils
from mlagents_envs.base_env import (
BaseEnv,

BehaviorName,
AgentId,
BehaviorMapping,
)
from mlagents_envs.timers import timed, hierarchical_timer
from mlagents_envs.exception import (

from mlagents_envs.communicator_objects.unity_input_pb2 import UnityInputProto
from .rpc_communicator import RpcCommunicator
from sys import platform
import struct
SCALAR_ACTION_TYPES = (int, np.int32, np.int64, float, np.float32, np.float64)
SINGLE_BRAIN_ACTION_TYPES = SCALAR_ACTION_TYPES + (list, np.ndarray)
# Communication protocol version.
# When connecting to C#, this must be compatible with Academy.k_ApiVersion.
# We follow semantic versioning on the communication version, so existing

BASE_ENVIRONMENT_PORT = 5005
# Command line argument used to pass the port to the executable environment.
PORT_COMMAND_LINE_ARG = "--mlagents-port"
_PORT_COMMAND_LINE_ARG = "--mlagents-port"
@staticmethod
def _raise_version_exception(unity_com_ver: str) -> None:

)
@staticmethod
def check_communication_compatibility(
def _check_communication_compatibility(
unity_com_ver: str, python_api_version: str, unity_package_version: str
) -> bool:
unity_communicator_version = StrictVersion(unity_com_ver)

return True
@staticmethod
def get_capabilities_proto() -> UnityRLCapabilitiesProto:
def _get_capabilities_proto() -> UnityRLCapabilitiesProto:
def warn_csharp_base_capabitlities(
def _warn_csharp_base_capabilities(
caps: UnityRLCapabilitiesProto, unity_package_ver: str, python_package_ver: str
) -> None:
if not caps.baseRLCapabilities:

:str log_folder: Optional folder to write the Unity Player log file into. Requires absolute path.
"""
atexit.register(self._close)
self.additional_args = additional_args or []
self.no_graphics = no_graphics
self._additional_args = additional_args or []
self._no_graphics = no_graphics
# If base port is not specified, use BASE_ENVIRONMENT_PORT if we have
# an environment, otherwise DEFAULT_EDITOR_PORT
if base_port is None:

self.port = base_port + worker_id
self._port = base_port + worker_id
self.proc1 = None
self.timeout_wait: int = timeout_wait
self.communicator = self.get_communicator(worker_id, base_port, timeout_wait)
self.worker_id = worker_id
self.side_channels: Dict[uuid.UUID, SideChannel] = {}
if side_channels is not None:
for _sc in side_channels:
if _sc.channel_id in self.side_channels:
raise UnityEnvironmentException(
"There cannot be two side channels with the same channel id {0}.".format(
_sc.channel_id
)
)
self.side_channels[_sc.channel_id] = _sc
self.log_folder = log_folder
self._proc1 = None
self._timeout_wait: int = timeout_wait
self._communicator = self._get_communicator(worker_id, base_port, timeout_wait)
self._worker_id = worker_id
self._side_channel_manager = SideChannelManager(side_channels)
self._log_folder = log_folder
# If the environment name is None, a new environment will not be launched
# and the communicator will directly try to connect to an existing unity environment.

"the worker-id must be 0 in order to connect with the Editor."
)
if file_name is not None:
self.executable_launcher(file_name, no_graphics, additional_args)
try:
self._proc1 = env_utils.launch_executable(
file_name, self._executable_args()
)
except UnityEnvironmentException:
self._close(0)
raise
f"Listening on port {self.port}. "
f"Listening on port {self._port}. "
f"Start training by pressing the Play button in the Unity Editor."
)
self._loaded = True

communication_version=self.API_VERSION,
package_version=mlagents_envs.__version__,
capabilities=UnityEnvironment.get_capabilities_proto(),
capabilities=UnityEnvironment._get_capabilities_proto(),
aca_output = self.send_academy_parameters(rl_init_parameters_in)
aca_output = self._send_academy_parameters(rl_init_parameters_in)
if not UnityEnvironment.check_communication_compatibility(
if not UnityEnvironment._check_communication_compatibility(
aca_params.communication_version,
UnityEnvironment.API_VERSION,
aca_params.package_version,

UnityEnvironment.warn_csharp_base_capabitlities(
UnityEnvironment._warn_csharp_base_capabilities(
aca_params.capabilities,
aca_params.package_version,
UnityEnvironment.API_VERSION,

self._update_behavior_specs(aca_output)
@staticmethod
def get_communicator(worker_id, base_port, timeout_wait):
def _get_communicator(worker_id, base_port, timeout_wait):
@staticmethod
def validate_environment_path(env_path: str) -> Optional[str]:
# Strip out executable extensions if passed
env_path = (
env_path.strip()
.replace(".app", "")
.replace(".exe", "")
.replace(".x86_64", "")
.replace(".x86", "")
)
true_filename = os.path.basename(os.path.normpath(env_path))
logger.debug("The true file name is {}".format(true_filename))
if not (glob.glob(env_path) or glob.glob(env_path + ".*")):
return None
cwd = os.getcwd()
launch_string = None
true_filename = os.path.basename(os.path.normpath(env_path))
if platform == "linux" or platform == "linux2":
candidates = glob.glob(os.path.join(cwd, env_path) + ".x86_64")
if len(candidates) == 0:
candidates = glob.glob(os.path.join(cwd, env_path) + ".x86")
if len(candidates) == 0:
candidates = glob.glob(env_path + ".x86_64")
if len(candidates) == 0:
candidates = glob.glob(env_path + ".x86")
if len(candidates) > 0:
launch_string = candidates[0]
elif platform == "darwin":
candidates = glob.glob(
os.path.join(cwd, env_path + ".app", "Contents", "MacOS", true_filename)
)
if len(candidates) == 0:
candidates = glob.glob(
os.path.join(env_path + ".app", "Contents", "MacOS", true_filename)
)
if len(candidates) == 0:
candidates = glob.glob(
os.path.join(cwd, env_path + ".app", "Contents", "MacOS", "*")
)
if len(candidates) == 0:
candidates = glob.glob(
os.path.join(env_path + ".app", "Contents", "MacOS", "*")
)
if len(candidates) > 0:
launch_string = candidates[0]
elif platform == "win32":
candidates = glob.glob(os.path.join(cwd, env_path + ".exe"))
if len(candidates) == 0:
candidates = glob.glob(env_path + ".exe")
if len(candidates) > 0:
launch_string = candidates[0]
return launch_string
def executable_args(self) -> List[str]:
def _executable_args(self) -> List[str]:
if self.no_graphics:
if self._no_graphics:
args += [UnityEnvironment.PORT_COMMAND_LINE_ARG, str(self.port)]
if self.log_folder:
args += [UnityEnvironment._PORT_COMMAND_LINE_ARG, str(self._port)]
if self._log_folder:
self.log_folder, f"Player-{self.worker_id}.log"
self._log_folder, f"Player-{self._worker_id}.log"
args += self.additional_args
args += self._additional_args
def executable_launcher(self, file_name, no_graphics, args):
launch_string = self.validate_environment_path(file_name)
if launch_string is None:
self._close(0)
raise UnityEnvironmentException(
f"Couldn't launch the {file_name} environment. Provided filename does not match any environments."
)
else:
logger.debug("This is the launch string {}".format(launch_string))
# Launch Unity environment
subprocess_args = [launch_string] + self.executable_args()
try:
self.proc1 = subprocess.Popen(
subprocess_args,
# start_new_session=True means that signals to the parent python process
# (e.g. SIGINT from keyboard interrupt) will not be sent to the new process on POSIX platforms.
# This is generally good since we want the environment to have a chance to shutdown,
# but may be undesirable in come cases; if so, we'll add a command-line toggle.
# Note that on Windows, the CTRL_C signal will still be sent.
start_new_session=True,
)
except PermissionError as perm:
# This is likely due to missing read or execute permissions on file.
raise UnityEnvironmentException(
f"Error when trying to launch environment - make sure "
f"permissions are set correctly. For example "
f'"chmod -R 755 {launch_string}"'
) from perm
def _update_behavior_specs(self, output: UnityOutputProto) -> None:
init_output = output.rl_initialization_output
for brain_param in init_output.brain_parameters:

DecisionSteps.empty(self._env_specs[brain_name]),
TerminalSteps.empty(self._env_specs[brain_name]),
)
self._parse_side_channel_message(self.side_channels, output.side_channel)
self._side_channel_manager.process_side_channel_message(output.side_channel)
outputs = self.communicator.exchange(self._generate_reset_input())
outputs = self._communicator.exchange(self._generate_reset_input())
if outputs is None:
raise UnityCommunicatorStoppedException("Communicator has exited.")
self._update_behavior_specs(outputs)

].create_empty_action(n_agents)
step_input = self._generate_step_input(self._env_actions)
with hierarchical_timer("communicator.exchange"):
outputs = self.communicator.exchange(step_input)
outputs = self._communicator.exchange(step_input)
if outputs is None:
raise UnityCommunicatorStoppedException("Communicator has exited.")
self._update_behavior_specs(outputs)

def get_behavior_names(self):
return list(self._env_specs.keys())
@property
def behavior_specs(self) -> MappingType[str, BehaviorSpec]:
return BehaviorMapping(self._env_specs)
def _assert_behavior_exists(self, behavior_name: str) -> None:
if behavior_name not in self._env_specs:

expected_shape = (len(self._env_state[behavior_name][0]), spec.action_size)
if action.shape != expected_shape:
raise UnityActionException(
"The behavior {0} needs an input of dimension {1} but received input of dimension {2}".format(
behavior_name, expected_shape, action.shape
)
"The behavior {0} needs an input of dimension {1} for "
"(<number of agents>, <action size>) but received input of <