浏览代码

Merge branch 'master' into merge-release-0.13.0

/release-0.13.0
GitHub 5 年前
当前提交
d985dded
共有 66 个文件被更改,包括 1302 次插入969 次删除
  1. 2
      .circleci/config.yml
  2. 1
      .gitignore
  3. 2
      .pre-commit-config.yaml
  4. 4
      UnitySDK/Assets/ML-Agents/Editor/DemonstrationImporter.cs
  5. 2
      UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs
  6. 2
      UnitySDK/Assets/ML-Agents/Editor/Tests/Sensor/StackingSensorTests.cs
  7. 2
      UnitySDK/Assets/ML-Agents/Editor/Tests/Sensor/VectorSensorTests.cs
  8. 12
      UnitySDK/Assets/ML-Agents/Editor/Tests/Sensor/WriterAdapterTests.cs
  9. 7
      UnitySDK/Assets/ML-Agents/Editor/Tests/StandaloneBuildTest.cs
  10. 673
      UnitySDK/Assets/ML-Agents/Examples/GridWorld/Scenes/GridWorld.unity
  11. 9
      UnitySDK/Assets/ML-Agents/Scripts/Agent.cs
  12. 19
      UnitySDK/Assets/ML-Agents/Scripts/Grpc/RpcCommunicator.cs
  13. 5
      UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs
  14. 2
      UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorGenerator.cs
  15. 4
      UnitySDK/Assets/ML-Agents/Scripts/Policy/BarracudaPolicy.cs
  16. 4
      UnitySDK/Assets/ML-Agents/Scripts/Policy/RemotePolicy.cs
  17. 9
      UnitySDK/Assets/ML-Agents/Scripts/Sensor/CameraSensor.cs
  18. 3
      UnitySDK/Assets/ML-Agents/Scripts/Sensor/CameraSensorComponent.cs
  19. 4
      UnitySDK/Assets/ML-Agents/Scripts/Sensor/ISensor.cs
  20. 2
      UnitySDK/Assets/ML-Agents/Scripts/Sensor/RayPerceptionSensor.cs
  21. 11
      UnitySDK/Assets/ML-Agents/Scripts/Sensor/RenderTextureSensor.cs
  22. 3
      UnitySDK/Assets/ML-Agents/Scripts/Sensor/RenderTextureSensorComponent.cs
  23. 6
      UnitySDK/Assets/ML-Agents/Scripts/Sensor/SensorBase.cs
  24. 7
      UnitySDK/Assets/ML-Agents/Scripts/Sensor/StackingSensor.cs
  25. 2
      UnitySDK/Assets/ML-Agents/Scripts/Sensor/VectorSensor.cs
  26. 53
      UnitySDK/Assets/ML-Agents/Scripts/Sensor/WriteAdapter.cs
  27. 71
      config/sac_trainer_config.yaml
  28. 72
      config/trainer_config.yaml
  29. 5
      docs/Learning-Environment-Create-New.md
  30. 11
      docs/Migrating.md
  31. 4
      docs/Training-ML-Agents.md
  32. 2
      gym-unity/gym_unity/tests/test_gym.py
  33. 8
      ml-agents-envs/mlagents_envs/exception.py
  34. 43
      ml-agents-envs/mlagents_envs/rpc_utils.py
  35. 2
      ml-agents-envs/mlagents_envs/tests/test_envs.py
  36. 43
      ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py
  37. 84
      ml-agents/mlagents/trainers/agent_processor.py
  38. 32
      ml-agents/mlagents/trainers/brain.py
  39. 58
      ml-agents/mlagents/trainers/curriculum.py
  40. 78
      ml-agents/mlagents/trainers/learn.py
  41. 118
      ml-agents/mlagents/trainers/meta_curriculum.py
  42. 9
      ml-agents/mlagents/trainers/ppo/trainer.py
  43. 14
      ml-agents/mlagents/trainers/rl_trainer.py
  44. 9
      ml-agents/mlagents/trainers/sac/trainer.py
  45. 13
      ml-agents/mlagents/trainers/stats.py
  46. 2
      ml-agents/mlagents/trainers/tests/mock_brain.py
  47. 41
      ml-agents/mlagents/trainers/tests/test_agent_processor.py
  48. 2
      ml-agents/mlagents/trainers/tests/test_bcmodule.py
  49. 32
      ml-agents/mlagents/trainers/tests/test_curriculum.py
  50. 11
      ml-agents/mlagents/trainers/tests/test_learn.py
  51. 56
      ml-agents/mlagents/trainers/tests/test_meta_curriculum.py
  52. 2
      ml-agents/mlagents/trainers/tests/test_multigpu.py
  53. 38
      ml-agents/mlagents/trainers/tests/test_ppo.py
  54. 2
      ml-agents/mlagents/trainers/tests/test_reward_signals.py
  55. 39
      ml-agents/mlagents/trainers/tests/test_rl_trainer.py
  56. 12
      ml-agents/mlagents/trainers/tests/test_sac.py
  57. 2
      ml-agents/mlagents/trainers/tests/test_stats.py
  58. 2
      ml-agents/mlagents/trainers/tests/test_subprocess_env_manager.py
  59. 28
      ml-agents/mlagents/trainers/tests/test_trainer_controller.py
  60. 2
      ml-agents/mlagents/trainers/tests/test_trainer_util.py
  61. 198
      ml-agents/mlagents/trainers/trainer.py
  62. 161
      ml-agents/mlagents/trainers/trainer_controller.py
  63. 6
      ml-agents/mlagents/trainers/trainer_util.py
  64. 1
      test_requirements.txt
  65. 105
      UnitySDK/Assets/ML-Agents/Editor/Tests/Sensor/FloatVisualSensorTests.cs
  66. 3
      UnitySDK/Assets/ML-Agents/Editor/Tests/Sensor/FloatVisualSensorTests.cs.meta

2
.circleci/config.yml


. venv/bin/activate
mkdir test-reports
pip freeze > test-reports/pip_versions.txt
pytest --cov=ml-agents --cov=ml-agents-envs --cov=gym-unity --cov-report html --junitxml=test-reports/junit.xml -p no:warnings
pytest -n 2 --cov=ml-agents --cov=ml-agents-envs --cov=gym-unity --cov-report html --junitxml=test-reports/junit.xml -p no:warnings
- run:
name: Verify there are no hidden/missing metafiles.

1
.gitignore


/UnitySDK/Assets/AssetStoreTools*
/UnitySDK/Assets/Plugins*
/UnitySDK/Assets/Demonstrations*
/UnitySDK/csharp_timers.json
# Tensorflow Model Info
/models

2
.pre-commit-config.yaml


.*_pb2_grpc.py
)$
# flake8-tidy-imports is used for banned-modules, not actually tidying
additional_dependencies: [flake8-comprehensions, flake8-tidy-imports, flake8-bugbear]
additional_dependencies: [flake8-comprehensions==3.1.4, flake8-tidy-imports==4.0.0, flake8-bugbear==20.1.2]
- id: trailing-whitespace
name: trailing-whitespace-markdown
types: [markdown]

4
UnitySDK/Assets/ML-Agents/Editor/DemonstrationImporter.cs


var texture = (Texture2D)
AssetDatabase.LoadAssetAtPath(k_IconPath, typeof(Texture2D));
#if UNITY_2017_3_OR_NEWER
#else
ctx.SetMainAsset(ctx.assetPath, demonstration);
#endif
}
catch
{

2
UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs


sensorName = n;
}
public int[] GetFloatObservationShape()
public int[] GetObservationShape()
{
return new[] { 0 };
}

2
UnitySDK/Assets/ML-Agents/Editor/Tests/Sensor/StackingSensorTests.cs


ISensor wrapped = new VectorSensor(4);
ISensor sensor = new StackingSensor(wrapped, 4);
Assert.AreEqual("StackingSensor_size4_VectorSensor_size4", sensor.GetName());
Assert.AreEqual(sensor.GetFloatObservationShape(), new [] {16});
Assert.AreEqual(sensor.GetObservationShape(), new [] {16});
}
[Test]

2
UnitySDK/Assets/ML-Agents/Editor/Tests/Sensor/VectorSensorTests.cs


Assert.AreEqual(fill, output[0]);
WriteAdapter writer = new WriteAdapter();
writer.SetTarget(output, 0);
writer.SetTarget(output, sensor.GetObservationShape(), 0);
// Make sure WriteAdapter didn't touch anything
Assert.AreEqual(fill, output[0]);

12
UnitySDK/Assets/ML-Agents/Editor/Tests/Sensor/WriterAdapterTests.cs


{
WriteAdapter writer = new WriteAdapter();
var buffer = new[] { 0f, 0f, 0f };
var shape = new[] { 3 };
writer.SetTarget(buffer, 0);
writer.SetTarget(buffer, shape, 0);
// Elementwise writes
writer[0] = 1f;
writer[2] = 2f;

writer.SetTarget(buffer, 1);
writer.SetTarget(buffer, shape, 1);
writer.SetTarget(buffer, 0);
writer.SetTarget(buffer, shape, 0);
writer.SetTarget(buffer, 1);
writer.SetTarget(buffer, shape, 1);
writer.AddRange(new [] {6f, 7f});
Assert.AreEqual(new[] { 4f, 6f, 7f }, buffer);
}

valueType = TensorProxy.TensorType.FloatingPoint,
data = new Tensor(2, 3)
};
writer.SetTarget(t, 0, 0);
Assert.AreEqual(0f, t.data[0, 0]);
writer[0] = 1f;

valueType = TensorProxy.TensorType.FloatingPoint,
data = new Tensor(2, 2, 2, 3)
};
var shape = new[] { 2, 2, 3 };
writer.SetTarget(t, 0, 0);
writer[1, 0, 1] = 1f;

7
UnitySDK/Assets/ML-Agents/Editor/Tests/StandaloneBuildTest.cs


using System;
using UnityEditor;
using UnityEngine;
#if UNITY_2018_1_OR_NEWER
#endif
namespace MLAgents
{

{
string[] scenes = { "Assets/ML-Agents/Examples/3DBall/Scenes/3DBall.unity" };
var buildResult = BuildPipeline.BuildPlayer(scenes, "testPlayer", BuildTarget.StandaloneOSX, BuildOptions.None);
#if UNITY_2018_1_OR_NEWER
var isOk = buildResult.summary.result == BuildResult.Succeeded;
var error = "";
foreach (var stepInfo in buildResult.steps)

}
}
}
#else
var error = buildResult;
var isOk = string.IsNullOrEmpty(error);
#endif
if (isOk)
{
EditorApplication.Exit(0);

673
UnitySDK/Assets/ML-Agents/Examples/GridWorld/Scenes/GridWorld.unity
文件差异内容过多而无法显示
查看文件

9
UnitySDK/Assets/ML-Agents/Scripts/Agent.cs


Debug.Assert(!sensors[i].GetName().Equals(sensors[i + 1].GetName()), "Sensor names must be unique.");
}
#endif
// Create a buffer for writing vector sensor data too
// Create a buffer for writing uncompressed (i.e. float) sensor data to
int numFloatObservations = 0;
for (var i = 0; i < sensors.Count; i++)
{

var sensor = sensors[i];
if (sensor.GetCompressionType() == SensorCompressionType.None)
{
// only handles 1D
m_WriteAdapter.SetTarget(m_VectorSensorBuffer, floatsWritten);
m_WriteAdapter.SetTarget(m_VectorSensorBuffer, sensor.GetObservationShape(), floatsWritten);
Shape = sensor.GetFloatObservationShape(),
Shape = sensor.GetObservationShape(),
CompressionType = sensor.GetCompressionType()
};
m_Info.observations.Add(floatObs);

var compressedObs = new Observation
{
CompressedData = sensor.GetCompressedObservation(),
Shape = sensor.GetFloatObservationShape(),
Shape = sensor.GetObservationShape(),
CompressionType = sensor.GetCompressionType()
};
m_Info.observations.Add(compressedObs);

19
UnitySDK/Assets/ML-Agents/Scripts/Grpc/RpcCommunicator.cs


var result = m_Client.Exchange(WrapMessage(unityOutput, 200));
unityInput = m_Client.Exchange(WrapMessage(null, 200)).UnityInput;
#if UNITY_EDITOR
#if UNITY_2017_2_OR_NEWER
#else
EditorApplication.playmodeStateChanged += HandleOnPlayModeChanged;
#endif
#endif
return result.UnityInput;
#else

#endregion
#if UNITY_EDITOR
#if UNITY_2017_2_OR_NEWER
/// <summary>
/// When the editor exits, the communicator must be closed
/// </summary>

}
}
#else
/// <summary>
/// When the editor exits, the communicator must be closed
/// </summary>
private void HandleOnPlayModeChanged()
{
// This method is run whenever the playmode state is changed.
if (!EditorApplication.isPlayingOrWillChangePlaymode)
{
Close();
}
}
#endif
#endif
}
}

5
UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs


// Write each sensor consecutively to the tensor
foreach (var sensorIndex in m_SensorIndices)
{
m_WriteAdapter.SetTarget(tensorProxy, agentIndex, tensorOffset);
m_WriteAdapter.SetTarget(tensorProxy, agentIndex, tensorOffset);
var numWritten = sensor.Write(m_WriteAdapter);
tensorOffset += numWritten;
}

var agentIndex = 0;
foreach (var agent in agents)
{
var sensor = agent.sensors[m_SensorIndex];
agent.sensors[m_SensorIndex].Write(m_WriteAdapter);
sensor.Write(m_WriteAdapter);
agentIndex++;
}
}

2
UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorGenerator.cs


for (var sensorIndex = 0; sensorIndex < agent.sensors.Count; sensorIndex++)
{
var sensor = agent.sensors[sensorIndex];
var shape = sensor.GetFloatObservationShape();
var shape = sensor.GetObservationShape();
// TODO generalize - we currently only have vector or visual, but can't handle "2D" observations
var isVectorSensor = (shape.Length == 1);
if (isVectorSensor)

4
UnitySDK/Assets/ML-Agents/Scripts/Policy/BarracudaPolicy.cs


// First agent, save the sensor sizes
foreach (var sensor in agent.sensors)
{
m_SensorShapes.Add(sensor.GetFloatObservationShape());
m_SensorShapes.Add(sensor.GetObservationShape());
}
}
else

for (var i = 0; i < m_SensorShapes.Count; i++)
{
var cachedShape = m_SensorShapes[i];
var sensorShape = agent.sensors[i].GetFloatObservationShape();
var sensorShape = agent.sensors[i].GetObservationShape();
Debug.Assert(cachedShape.Length == sensorShape.Length, "Sensor dimensions must match.");
for (var j = 0; j < cachedShape.Length; j++)
{

4
UnitySDK/Assets/ML-Agents/Scripts/Policy/RemotePolicy.cs


// First agent, save the sensor sizes
foreach (var sensor in agent.sensors)
{
m_SensorShapes.Add(sensor.GetFloatObservationShape());
m_SensorShapes.Add(sensor.GetObservationShape());
}
}
else

for (var i = 0; i < m_SensorShapes.Count; i++)
{
var cachedShape = m_SensorShapes[i];
var sensorShape = agent.sensors[i].GetFloatObservationShape();
var sensorShape = agent.sensors[i].GetObservationShape();
Debug.Assert(cachedShape.Length == sensorShape.Length, "Sensor dimensions must match.");
for (var j = 0; j < cachedShape.Length; j++)
{

9
UnitySDK/Assets/ML-Agents/Scripts/Sensor/CameraSensor.cs


bool m_Grayscale;
string m_Name;
int[] m_Shape;
SensorCompressionType m_CompressionType;
public CameraSensor(Camera camera, int width, int height, bool grayscale, string name)
public CameraSensor(Camera camera, int width, int height, bool grayscale, string name,
SensorCompressionType compression)
{
m_Camera = camera;
m_Width = width;

m_Shape = new[] { height, width, grayscale ? 1 : 3 };
m_CompressionType = compression;
}
public string GetName()

public int[] GetFloatObservationShape()
public int[] GetObservationShape()
{
return m_Shape;
}

public SensorCompressionType GetCompressionType()
{
return SensorCompressionType.PNG;
return m_CompressionType;
}
/// <summary>

3
UnitySDK/Assets/ML-Agents/Scripts/Sensor/CameraSensorComponent.cs


public int width = 84;
public int height = 84;
public bool grayscale;
public SensorCompressionType compression = SensorCompressionType.PNG;
return new CameraSensor(camera, width, height, grayscale, sensorName);
return new CameraSensor(camera, width, height, grayscale, sensorName, compression);
}
public override int[] GetObservationShape()

4
UnitySDK/Assets/ML-Agents/Scripts/Sensor/ISensor.cs


/// A sensor that returns an RGB image would return new [] {Width, Height, 3}
/// </summary>
/// <returns></returns>
int[] GetFloatObservationShape();
int[] GetObservationShape();
/// <summary>
/// Write the observation data directly to the WriteAdapter.

/// <returns></returns>
public static int ObservationSize(this ISensor sensor)
{
var shape = sensor.GetFloatObservationShape();
var shape = sensor.GetObservationShape();
int count = 1;
for (var i = 0; i < shape.Length; i++)
{

2
UnitySDK/Assets/ML-Agents/Scripts/Sensor/RayPerceptionSensor.cs


{
}
public int[] GetFloatObservationShape()
public int[] GetObservationShape()
{
return m_Shape;
}

11
UnitySDK/Assets/ML-Agents/Scripts/Sensor/RenderTextureSensor.cs


bool m_Grayscale;
string m_Name;
int[] m_Shape;
SensorCompressionType m_CompressionType;
public RenderTextureSensor(RenderTexture renderTexture, bool grayscale, string name)
public RenderTextureSensor(RenderTexture renderTexture, bool grayscale, string name,
SensorCompressionType compressionType)
{
m_RenderTexture = renderTexture;
var width = renderTexture != null ? renderTexture.width : 0;

m_Shape = new[] { height, width, grayscale ? 1 : 3 };
m_CompressionType = compressionType;
}
public string GetName()

public int[] GetFloatObservationShape()
public int[] GetObservationShape()
{
return m_Shape;
}

public SensorCompressionType GetCompressionType()
{
return SensorCompressionType.PNG;
return m_CompressionType;
/// Converts a RenderTexture and correspinding resolution to a 2D texture.
/// Converts a RenderTexture to a 2D texture.
/// </summary>
/// <returns>The 2D texture.</returns>
/// <param name="obsTexture">RenderTexture.</param>

3
UnitySDK/Assets/ML-Agents/Scripts/Sensor/RenderTextureSensorComponent.cs


public RenderTexture renderTexture;
public string sensorName = "RenderTextureSensor";
public bool grayscale;
public SensorCompressionType compression = SensorCompressionType.PNG;
return new RenderTextureSensor(renderTexture, grayscale, sensorName);
return new RenderTextureSensor(renderTexture, grayscale, sensorName, compression);
}
public override int[] GetObservationShape()

6
UnitySDK/Assets/ML-Agents/Scripts/Sensor/SensorBase.cs


{
/// <summary>
/// Write the observations to the output buffer. This size of the buffer will be product of the sizes returned
/// by GetFloatObservationShape().
/// by GetObservationShape().
public abstract int[] GetFloatObservationShape();
public abstract int[] GetObservationShape();
public abstract string GetName();

/// <param name="adapter"></param>
public virtual int Write(WriteAdapter adapter)
{
// TODO reuse buffer for similar agents, don't call GetFloatObservationShape()
// TODO reuse buffer for similar agents, don't call GetObservationShape()
var numFloats = this.ObservationSize();
float[] buffer = new float[numFloats];
WriteObservation(buffer);

7
UnitySDK/Assets/ML-Agents/Scripts/Sensor/StackingSensor.cs


m_Name = $"StackingSensor_size{numStackedObservations}_{wrapped.GetName()}";
var shape = wrapped.GetFloatObservationShape();
var shape = wrapped.GetObservationShape();
m_Shape = new int[shape.Length];
m_UnstackedObservationSize = wrapped.ObservationSize();

public int Write(WriteAdapter adapter)
{
// First, call the wrapped sensor's write method. Make sure to use our own adapater, not the passed one.
m_LocalAdapter.SetTarget(m_StackedObservations[m_CurrentIndex], 0);
var wrappedShape = m_WrappedSensor.GetObservationShape();
m_LocalAdapter.SetTarget(m_StackedObservations[m_CurrentIndex], wrappedShape, 0);
m_WrappedSensor.Write(m_LocalAdapter);
// Now write the saved observations (oldest first)

m_CurrentIndex = (m_CurrentIndex + 1) % m_NumStackedObservations;
}
public int[] GetFloatObservationShape()
public int[] GetObservationShape()
{
return m_Shape;
}

2
UnitySDK/Assets/ML-Agents/Scripts/Sensor/VectorSensor.cs


Clear();
}
public int[] GetFloatObservationShape()
public int[] GetObservationShape()
{
return m_Shape;
}

53
UnitySDK/Assets/ML-Agents/Scripts/Sensor/WriteAdapter.cs


using System;
using Barracuda;
using MLAgents.InferenceBrain;
namespace MLAgents.Sensor

TensorProxy m_Proxy;
int m_Batch;
TensorShape m_TensorShape;
/// <param name="data"></param>
/// <param name="offset"></param>
public void SetTarget(IList<float> data, int offset)
/// <param name="data">Float array or list that will be written to.</param>
/// <param name="shape">Shape of the observations to be written.</param>
/// <param name="offset">Offset from the start of the float data to write to.</param>
public void SetTarget(IList<float> data, int[] shape, int offset)
m_Batch = -1;
m_Batch = 0;
if (shape.Length == 1)
{
m_TensorShape = new TensorShape(m_Batch, shape[0]);
}
else
{
m_TensorShape = new TensorShape(m_Batch, shape[0], shape[1], shape[2]);
}
/// <param name="tensorProxy"></param>
/// <param name="batchIndex"></param>
/// <param name="channelOffset"></param>
/// <param name="tensorProxy">Tensor proxy that will be writtent to.</param>
/// <param name="batchIndex">Batch index in the tensor proxy (i.e. the index of the Agent)</param>
/// <param name="channelOffset">Offset from the start of the channel to write to.</param>
public void SetTarget(TensorProxy tensorProxy, int batchIndex, int channelOffset)
{
m_Proxy = tensorProxy;

m_TensorShape = m_Proxy.data.shape;
}
/// <summary>

{
set
{
// Only TensorProxy supports 3D access
m_Proxy.data[m_Batch, h, w, ch + m_Offset] = value;
if (m_Data != null)
{
if (h < 0 || h >= m_TensorShape.height)
{
throw new IndexOutOfRangeException($"height value {h} must be in range [0, {m_TensorShape.height-1}]");
}
if (w < 0 || w >= m_TensorShape.width)
{
throw new IndexOutOfRangeException($"width value {w} must be in range [0, {m_TensorShape.width-1}]");
}
if (ch < 0 || ch >= m_TensorShape.channels)
{
throw new IndexOutOfRangeException($"channel value {ch} must be in range [0, {m_TensorShape.channels-1}]");
}
var index = m_TensorShape.Index(m_Batch, h, w, ch + m_Offset);
m_Data[index] = value;
}
else
{
m_Proxy.data[m_Batch, h, w, ch + m_Offset] = value;
}
}
}

71
config/sac_trainer_config.yaml


init_entcoef: 1.0
learning_rate: 3.0e-4
learning_rate_schedule: constant
max_steps: 5.0e4
max_steps: 5.0e5
memory_size: 256
normalize: false
num_update: 1

sequence_length: 64
summary_freq: 1000
summary_freq: 10000
tau: 0.005
use_recurrent: false
vis_encode_type: simple

normalize: false
batch_size: 256
buffer_size: 500000
max_steps: 1.0e5
max_steps: 2.0e6
max_steps: 5.0e5
max_steps: 2.0e7
summary_freq: 1000
summary_freq: 20000
max_steps: 5.0e4
max_steps: 1.5e7
summary_freq: 2000
summary_freq: 60000
max_steps: 1.0e6
max_steps: 3e7
summary_freq: 2000
summary_freq: 20000
time_horizon: 128
init_entcoef: 0.1
num_layers: 2

max_steps: 1.0e6
max_steps: 3e7
summary_freq: 2000
summary_freq: 20000
time_horizon: 128
num_layers: 2
init_entcoef: 0.1

max_steps: 5.0e5
max_steps: 5.0e6
summary_freq: 2000
summary_freq: 20000
time_horizon: 128
init_entcoef: 0.1
num_layers: 2

max_steps: 5.0e5
max_steps: 5.0e6
summary_freq: 2000
summary_freq: 20000
time_horizon: 128
init_entcoef: 0.1
num_layers: 2

summary_freq: 2000
summary_freq: 30000
time_horizon: 128
batch_size: 128
buffer_init_steps: 10000

init_entcoef: 0.01
max_steps: 5.0e5
max_steps: 1.0e7
sequence_length: 16
tau: 0.01
use_recurrent: false

hidden_units: 256
buffer_init_steps: 1000
num_layers: 1
max_steps: 5.0e5
max_steps: 1.0e7
buffer_size: 500000
init_entcoef: 0.01
tau: 0.01

normalize: true
batch_size: 64
buffer_size: 12000
summary_freq: 1000
summary_freq: 12000
time_horizon: 1000
hidden_units: 64
init_entcoef: 0.5

batch_size: 256
summary_freq: 1000
summary_freq: 12000
max_steps: 2e5
max_steps: 4e6
CrawlerStatic:
normalize: true

buffer_size: 500000
buffer_init_steps: 2000
max_steps: 5e5
summary_freq: 3000
max_steps: 5e6
summary_freq: 30000
init_entcoef: 1.0
num_layers: 3
hidden_units: 512

time_horizon: 1000
batch_size: 256
buffer_size: 500000
summary_freq: 3000
summary_freq: 30000
max_steps: 1e6
max_steps: 1e7
hidden_units: 512
reward_signals:
extrinsic:

time_horizon: 1000
batch_size: 256
buffer_size: 500000
max_steps: 2e6
summary_freq: 3000
max_steps: 2e7
summary_freq: 30000
num_layers: 4
train_interval: 2
hidden_units: 512

time_horizon: 1000
batch_size: 128
buffer_size: 500000
max_steps: 2e5
summary_freq: 3000
max_steps: 2e7
summary_freq: 60000
Hallway:
sequence_length: 32

init_entcoef: 0.1
max_steps: 5.0e5
max_steps: 1.0e7
summary_freq: 1000
time_horizon: 64
use_recurrent: true

memory_size: 256
gamma: 0.99
batch_size: 64
max_steps: 5.0e5
summary_freq: 1000
max_steps: 1.0e7
time_horizon: 64
use_recurrent: true

gamma: 0.99
buffer_size: 1024
batch_size: 64
max_steps: 5.0e5
summary_freq: 1000
max_steps: 3.0e6
summary_freq: 60000
time_horizon: 64
GridWorld:

init_entcoef: 0.5
buffer_init_steps: 1000
buffer_size: 50000
max_steps: 50000
summary_freq: 2000
max_steps: 500000
summary_freq: 20000
time_horizon: 5
reward_signals:
extrinsic:

72
config/trainer_config.yaml


lambd: 0.95
learning_rate: 3.0e-4
learning_rate_schedule: linear
max_steps: 5.0e4
max_steps: 5.0e5
memory_size: 256
normalize: false
num_epoch: 3

summary_freq: 1000
summary_freq: 10000
use_recurrent: false
vis_encode_type: simple
reward_signals:

beta: 5.0e-3
batch_size: 1024
buffer_size: 10240
max_steps: 1.0e5
max_steps: 2.0e6
max_steps: 1.0e6
max_steps: 2.0e7
max_steps: 5.0e4
max_steps: 1.5e7
summary_freq: 2000
summary_freq: 60000
max_steps: 1.0e6
max_steps: 3e7
summary_freq: 2000
summary_freq: 20000
max_steps: 1.0e6
max_steps: 3e7
summary_freq: 2000
summary_freq: 20000
max_steps: 5.0e5
max_steps: 5.0e6
learning_rate: 1e-3
batch_size: 128
num_epoch: 3

summary_freq: 2000
summary_freq: 20000
max_steps: 5.0e5
max_steps: 5.0e6
learning_rate: 1e-3
batch_size: 320
num_epoch: 3

summary_freq: 2000
summary_freq: 20000
summary_freq: 2000
summary_freq: 30000
time_horizon: 128
batch_size: 128
buffer_size: 2048

max_steps: 5.0e5
max_steps: 1.0e7
num_epoch: 3
reward_signals:
extrinsic:

hidden_units: 256
num_layers: 1
beta: 1.0e-2
max_steps: 5.0e5
max_steps: 1.0e7
num_epoch: 3
reward_signals:
extrinsic:

normalize: true
batch_size: 64
buffer_size: 12000
summary_freq: 1000
summary_freq: 12000
time_horizon: 1000
lambd: 0.99
beta: 0.001

batch_size: 1200
buffer_size: 12000
summary_freq: 1000
summary_freq: 12000
time_horizon: 1000
max_steps: 5.0e5
beta: 0.001

Tennis:
normalize: true
max_steps: 2e5
max_steps: 4e6
CrawlerStatic:
normalize: true

buffer_size: 20240
max_steps: 1e6
summary_freq: 3000
max_steps: 1e7
summary_freq: 30000
num_layers: 3
hidden_units: 512
reward_signals:

time_horizon: 1000
batch_size: 2024
buffer_size: 20240
max_steps: 1e6
summary_freq: 3000
max_steps: 1e7
summary_freq: 30000
num_layers: 3
hidden_units: 512
reward_signals:

time_horizon: 1000
batch_size: 2048
buffer_size: 20480
max_steps: 2e6
summary_freq: 3000
max_steps: 2e7
summary_freq: 30000
num_layers: 3
hidden_units: 512
reward_signals:

time_horizon: 1000
batch_size: 2024
buffer_size: 20240
max_steps: 1e6
summary_freq: 3000
max_steps: 2e7
summary_freq: 60000
reward_signals:
extrinsic:
strength: 1.0

num_epoch: 3
buffer_size: 1024
batch_size: 128
max_steps: 5.0e5
summary_freq: 1000
max_steps: 1.0e7
summary_freq: 10000
time_horizon: 64
VisualHallway:

num_epoch: 3
buffer_size: 1024
batch_size: 64
max_steps: 5.0e5
summary_freq: 1000
max_steps: 1.0e7
summary_freq: 10000
time_horizon: 64
VisualPushBlock:

num_epoch: 3
buffer_size: 1024
batch_size: 64
max_steps: 5.0e5
summary_freq: 1000
max_steps: 3.0e6
summary_freq: 60000
time_horizon: 64
GridWorld:

hidden_units: 256
beta: 5.0e-3
buffer_size: 256
max_steps: 50000
summary_freq: 2000
max_steps: 500000
summary_freq: 20000
time_horizon: 5
reward_signals:
extrinsic:

5
docs/Learning-Environment-Create-New.md


3. In a file system window, navigate to the folder containing your cloned
ML-Agents repository.
4. Drag the `ML-Agents` folder from `UnitySDK/Assets` to the Unity
Editor Project window.
Editor Project window. If you see console errors about Barracuda, make sure
you've installed Barracuda from the Unity Package Manager. More information
can be found in the [installation instructions](Installation.md) under
**Package Installation**.
Your Unity **Project** window should contain the following assets:

11
docs/Migrating.md


# Migrating
## Migrating from 0.12 to 0.13
## Migrating from 0.13 to latest
### Important changes
* Trainer steps are now counted per-Agent, not per-environment as in previous versions. For instance, if you have 10 Agents in the scene, 20 environment steps now corresponds to 200 steps as printed in the terminal and in Tensorboard.
### Steps to Migrate
* Multiply `max_steps` and `summary_steps` in your `trainer_config.yaml` by the number of Agents in the scene.
## Migrating from ML-Agents toolkit v0.12.0 to v0.13.0
### Important changes
* The low level Python API has changed. You can look at the document [Low Level Python API documentation](Python-API.md) for more information. This should only affect you if you're writing a custom trainer; if you use `mlagents-learn` for training, this should be a transparent change.

4
docs/Training-ML-Agents.md


the oldest checkpoint is deleted when saving a new checkpoint. Defaults to 5.
* `--lesson=<n>`: Specify which lesson to start with when performing curriculum
training. Defaults to 0.
* `--num-runs=<n>`: Sets the number of concurrent training sessions to perform.
Default is set to 1. Set to higher values when benchmarking performance and
multiple training sessions is desired. Training sessions are independent, and
do not improve learning performance.
* `--num-envs=<n>`: Specifies the number of concurrent Unity environment instances to
collect experiences from when training. Defaults to 1.
* `--run-id=<path>`: Specifies an identifier for each training run. This

2
gym-unity/gym_unity/tests/test_gym.py


import unittest.mock as mock
from unittest import mock
import pytest
import numpy as np

8
ml-agents-envs/mlagents_envs/exception.py


pass
class UnityObservationException(UnityException):
"""
Related to errors with receiving observations.
"""
pass
class UnityActionException(UnityException):
"""
Related to errors with sending actions.

43
ml-agents-envs/mlagents_envs/rpc_utils.py


from mlagents_envs.base_env import AgentGroupSpec, ActionType, BatchedStepResult
from mlagents_envs.exception import UnityObservationException
from mlagents_envs.communicator_objects.observation_pb2 import (
ObservationProto,
NONE as COMPRESSION_NONE,
)
from typing import cast, List, Tuple, Union, Collection
from typing import cast, List, Tuple, Union, Collection, Optional, Iterable
from PIL import Image
logger = logging.getLogger("mlagents_envs")

image = Image.open(io.BytesIO(image_bytearray))
# Normally Image loads lazily, this forces it to do loading in the timer scope.
image.load()
s = np.array(image) / 255.0
s = np.array(image, dtype=np.float32) / 255.0
if gray_scale:
s = np.mean(s, axis=2)
s = np.reshape(s, [s.shape[0], s.shape[1], 1])

@timed
def observation_to_np_array(
obs: ObservationProto, expected_shape: Optional[Iterable[int]] = None
) -> np.ndarray:
"""
Converts observation proto into numpy array of the appropriate size.
:param obs: observation proto to be converted
:param expected_shape: optional shape information, used for sanity checks.
:return: processed numpy array of observation from environment
"""
if expected_shape is not None:
if list(obs.shape) != list(expected_shape):
raise UnityObservationException(
f"Observation did not have the expected shape - got {obs.shape} but expected {expected_shape}"
)
gray_scale = obs.shape[2] == 1
if obs.compression_type == COMPRESSION_NONE:
img = np.array(obs.float_data.data, dtype=np.float32)
img = np.reshape(img, obs.shape)
return img
else:
img = process_pixels(obs.compressed_data, gray_scale)
# Compare decompressed image size to observation shape and make sure they match
if list(obs.shape) != list(img.shape):
raise UnityObservationException(
f"Decompressed observation did not have the expected shape - "
f"decompressed had {img.shape} but expected {obs.shape}"
)
return img
@timed
def _process_visual_observation(
obs_index: int,
shape: Tuple[int, int, int],

if len(agent_info_list) == 0:
return np.zeros((0, shape[0], shape[1], shape[2]), dtype=np.float32)
gray_scale = shape[2] == 1
process_pixels(agent_obs.observations[obs_index].compressed_data, gray_scale)
observation_to_np_array(agent_obs.observations[obs_index], shape)
for agent_obs in agent_info_list
]
return np.array(batched_visual, dtype=np.float32)

2
ml-agents-envs/mlagents_envs/tests/test_envs.py


import unittest.mock as mock
from unittest import mock
import pytest
import numpy as np

43
ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py


import io
import numpy as np
import pytest
from mlagents_envs.communicator_objects.agent_info_pb2 import AgentInfoProto
from mlagents_envs.communicator_objects.observation_pb2 import (
ObservationProto,

from mlagents_envs.communicator_objects.brain_parameters_pb2 import BrainParametersProto
import numpy as np
import io
from mlagents_envs.exception import UnityObservationException
from mlagents_envs.rpc_utils import (
agent_group_spec_from_proto,
process_pixels,

return obs_proto
def generate_uncompressed_proto_obs(in_array: np.ndarray) -> ObservationProto:
obs_proto = ObservationProto()
obs_proto.float_data.data.extend(in_array.flatten().tolist())
obs_proto.compression_type = NONE
obs_proto.shape.extend(in_array.shape)
return obs_proto
in_array = np.random.rand(128, 128, 3)
in_array = np.random.rand(128, 64, 3)
assert out_array.shape == (128, 128, 3)
assert out_array.shape == (128, 64, 3)
in_array = np.random.rand(128, 128, 3)
in_array = np.random.rand(128, 64, 3)
assert out_array.shape == (128, 128, 1)
assert out_array.shape == (128, 64, 1)
assert np.mean(in_array.mean(axis=2, keepdims=True) - out_array) < 0.01
assert (in_array.mean(axis=2, keepdims=True) - out_array < 0.01).all()

def test_process_visual_observation():
in_array_1 = np.random.rand(128, 128, 3)
in_array_1 = np.random.rand(128, 64, 3)
in_array_2 = np.random.rand(128, 128, 3)
proto_obs_2 = generate_compressed_proto_obs(in_array_2)
in_array_2 = np.random.rand(128, 64, 3)
proto_obs_2 = generate_uncompressed_proto_obs(in_array_2)
arr = _process_visual_observation(0, (128, 128, 3), ap_list)
assert list(arr.shape) == [2, 128, 128, 3]
arr = _process_visual_observation(0, (128, 64, 3), ap_list)
assert list(arr.shape) == [2, 128, 64, 3]
def test_process_visual_observation_bad_shape():
in_array_1 = np.random.rand(128, 64, 3)
proto_obs_1 = generate_compressed_proto_obs(in_array_1)
ap1 = AgentInfoProto()
ap1.observations.extend([proto_obs_1])
ap_list = [ap1]
with pytest.raises(UnityObservationException):
_process_visual_observation(0, (128, 42, 3), ap_list)
def test_batched_step_result_from_proto():

84
ml-agents/mlagents/trainers/agent_processor.py


import sys
from typing import List, Dict
from collections import defaultdict, Counter
from typing import List, Dict, Deque, TypeVar, Generic
from collections import defaultdict, Counter, deque
from mlagents.trainers.trainer import Trainer
from mlagents.trainers.policy import Policy
T = TypeVar("T")
class AgentProcessor:
"""

def __init__(
self,
trainer: Trainer,
policy: TFPolicy,
behavior_id: str,
stats_reporter: StatsReporter,

self.episode_steps: Counter = Counter()
self.episode_rewards: Dict[str, float] = defaultdict(float)
self.stats_reporter = stats_reporter
self.trainer = trainer
self.trajectory_queues: List[AgentManagerQueue[Trajectory]] = []
self.behavior_id = behavior_id
def add_experiences(

next_obs=next_obs,
behavior_id=self.behavior_id,
)
# This will eventually be replaced with a queue
self.trainer.process_trajectory(trajectory)
for traj_queue in self.trajectory_queues:
traj_queue.put(trajectory)
self.experience_buffers[agent_id] = []
if next_info.local_done[next_idx]:
self.stats_reporter.add_stat(

del self.episode_rewards[agent_id]
elif not next_info.local_done[next_idx]:
self.episode_steps[agent_id] += 1
def publish_trajectory_queue(
self, trajectory_queue: "AgentManagerQueue[Trajectory]"
) -> None:
"""
Adds a trajectory queue to the list of queues to publish to when this AgentProcessor
assembles a Trajectory
:param trajectory_queue: Trajectory queue to publish to.
"""
self.trajectory_queues.append(trajectory_queue)
class AgentManagerQueue(Generic[T]):
"""
Queue used by the AgentManager. Note that we make our own class here because in most implementations
deque is sufficient and faster. However, if we want to switch to multiprocessing, we'll need to change
out this implementation.
"""
class Empty(Exception):
"""
Exception for when the queue is empty.
"""
pass
def __init__(self, behavior_id: str):
"""
Initializes an AgentManagerQueue. Note that we can give it a behavior_id so that it can be identified
separately from an AgentManager.
"""
self.queue: Deque[T] = deque()
self.behavior_id = behavior_id
def empty(self) -> bool:
return len(self.queue) == 0
def get_nowait(self) -> T:
try:
return self.queue.popleft()
except IndexError:
raise self.Empty("The AgentManagerQueue is empty.")
def put(self, item: T) -> None:
self.queue.append(item)
class AgentManager(AgentProcessor):
"""
An AgentManager is an AgentProcessor that also holds a single trajectory and policy queue.
Note: this leaves room for adding AgentProcessors that publish multiple trajectory queues.
"""
def __init__(
self,
policy: TFPolicy,
behavior_id: str,
stats_reporter: StatsReporter,
max_trajectory_length: int = sys.maxsize,
):
super().__init__(policy, behavior_id, stats_reporter, max_trajectory_length)
self.trajectory_queue: AgentManagerQueue[Trajectory] = AgentManagerQueue(
self.behavior_id
)
self.policy_queue: AgentManagerQueue[Policy] = AgentManagerQueue(
self.behavior_id
)
self.publish_trajectory_queue(self.trajectory_queue)

32
ml-agents/mlagents/trainers/brain.py


import logging
import numpy as np
import io
from mlagents_envs.timers import hierarchical_timer, timed
from mlagents_envs.timers import timed
from mlagents_envs import rpc_utils
from PIL import Image
logger = logging.getLogger("mlagents.trainers")

@staticmethod
@timed
def process_pixels(image_bytes: bytes, gray_scale: bool) -> np.ndarray:
"""
Converts byte array observation image into numpy array, re-sizes it,
and optionally converts it to grey scale
:param gray_scale: Whether to convert the image to grayscale.
:param image_bytes: input byte array corresponding to image
:return: processed numpy array of observation from environment
"""
with hierarchical_timer("image_decompress"):
image_bytearray = bytearray(image_bytes)
image = Image.open(io.BytesIO(image_bytearray))
# Normally Image loads lazily, this forces it to do loading in the timer scope.
image.load()
s = np.array(image) / 255.0
if gray_scale:
s = np.mean(s, axis=2)