浏览代码

Merge branch 'master' into self-play-mutex

/develop/cubewars
Andrew Cohen 5 年前
当前提交
59b88be6
共有 62 个文件被更改,包括 441 次插入427 次删除
  1. 2
      .pylintrc
  2. 2
      Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgent.cs
  3. 2
      Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DHardAgent.cs
  4. 2
      Project/Assets/ML-Agents/Examples/Bouncer/Scripts/BouncerAgent.cs
  5. 5
      Project/Assets/ML-Agents/Examples/FoodCollector/Scripts/FoodCollectorAgent.cs
  6. 2
      Project/Assets/ML-Agents/Examples/FoodCollector/Scripts/FoodCollectorSettings.cs
  7. 18
      Project/Assets/ML-Agents/Examples/GridWorld/Scenes/GridWorld.unity
  8. 3
      Project/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAgent.cs
  9. 2
      Project/Assets/ML-Agents/Examples/GridWorld/Scripts/GridArea.cs
  10. 3
      Project/Assets/ML-Agents/Examples/GridWorld/Scripts/GridSettings.cs
  11. 5
      Project/Assets/ML-Agents/Examples/PushBlock/Scripts/PushAgentBasic.cs
  12. 3
      Project/Assets/ML-Agents/Examples/Reacher/Scripts/ReacherAgent.cs
  13. 3
      Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ProjectSettingsOverrides.cs
  14. 3
      Project/Assets/ML-Agents/Examples/Soccer/Scripts/SoccerFieldArea.cs
  15. 2
      Project/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAgent.cs
  16. 2
      Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs
  17. 13
      Project/Assets/ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs
  18. 6
      com.unity.ml-agents/CHANGELOG.md
  19. 71
      com.unity.ml-agents/Runtime/Academy.cs
  20. 29
      com.unity.ml-agents/Runtime/Communicator/ICommunicator.cs
  21. 195
      com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs
  22. 2
      com.unity.ml-agents/Runtime/SideChannels/SideChannel.cs
  23. 8
      com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
  24. 16
      com.unity.ml-agents/Tests/Editor/SideChannelTests.cs
  25. 8
      docs/Custom-SideChannels.md
  26. 3
      docs/Installation.md
  27. 5
      docs/Learning-Environment-Create-New.md
  28. 6
      docs/Migrating.md
  29. 5
      docs/Python-API.md
  30. 1
      docs/Readme.md
  31. 2
      docs/Training-Curriculum-Learning.md
  32. 4
      docs/Training-Environment-Parameter-Randomization.md
  33. 2
      docs/Using-Tensorboard.md
  34. 2
      gym-unity/README.md
  35. 7
      gym-unity/gym_unity/envs/__init__.py
  36. 5
      ml-agents-envs/mlagents_envs/environment.py
  37. 4
      ml-agents-envs/mlagents_envs/side_channel/outgoing_message.py
  38. 4
      ml-agents-envs/mlagents_envs/side_channel/side_channel.py
  39. 5
      ml-agents/mlagents/model_serialization.py
  40. 5
      ml-agents/mlagents/trainers/components/reward_signals/__init__.py
  41. 4
      ml-agents/mlagents/trainers/curriculum.py
  42. 5
      ml-agents/mlagents/trainers/env_manager.py
  43. 5
      ml-agents/mlagents/trainers/ghost/trainer.py
  44. 17
      ml-agents/mlagents/trainers/learn.py
  45. 4
      ml-agents/mlagents/trainers/meta_curriculum.py
  46. 4
      ml-agents/mlagents/trainers/policy/tf_policy.py
  47. 4
      ml-agents/mlagents/trainers/ppo/trainer.py
  48. 4
      ml-agents/mlagents/trainers/sac/optimizer.py
  49. 5
      ml-agents/mlagents/trainers/sac/trainer.py
  50. 7
      ml-agents/mlagents/trainers/stats.py
  51. 5
      ml-agents/mlagents/trainers/subprocess_env_manager.py
  52. 5
      ml-agents/mlagents/trainers/trainer/trainer.py
  53. 4
      ml-agents/mlagents/trainers/trainer_controller.py
  54. 5
      ml-agents/mlagents/trainers/trainer_util.py
  55. 2
      ml-agents/setup.py
  56. 1
      setup.cfg
  57. 234
      com.unity.ml-agents/Runtime/SideChannels/SideChannelUtils.cs
  58. 11
      com.unity.ml-agents/Runtime/SideChannels/SideChannelUtils.cs.meta
  59. 46
      ml-agents-envs/mlagents_envs/logging_util.py
  60. 19
      docs/Background-Jupyter.md
  61. 10
      ml-agents/mlagents/logging_util.py

2
.pylintrc


# Appears to be https://github.com/PyCQA/pylint/issues/2981
W0201,
# Using the global statement
W0603,

2
Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgent.cs


public override void Initialize()
{
m_BallRb = ball.GetComponent<Rigidbody>();
m_ResetParams = Academy.Instance.FloatProperties;
m_ResetParams = SideChannelUtils.GetSideChannel<FloatPropertiesChannel>();
SetResetParameters();
}

2
Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DHardAgent.cs


public override void Initialize()
{
m_BallRb = ball.GetComponent<Rigidbody>();
m_ResetParams = Academy.Instance.FloatProperties;
m_ResetParams = SideChannelUtils.GetSideChannel<FloatPropertiesChannel>();
SetResetParameters();
}

2
Project/Assets/ML-Agents/Examples/Bouncer/Scripts/BouncerAgent.cs


m_Rb = gameObject.GetComponent<Rigidbody>();
m_LookDir = Vector3.zero;
m_ResetParams = Academy.Instance.FloatProperties;
m_ResetParams = SideChannelUtils.GetSideChannel<FloatPropertiesChannel>();
SetResetParameters();
}

5
Project/Assets/ML-Agents/Examples/FoodCollector/Scripts/FoodCollectorAgent.cs


using UnityEngine;
using MLAgents;
using MLAgents.Sensors;
using MLAgents.SideChannels;
public class FoodCollectorAgent : Agent
{

public void SetLaserLengths()
{
m_LaserLength = Academy.Instance.FloatProperties.GetPropertyWithDefault("laser_length", 1.0f);
m_LaserLength = SideChannelUtils.GetSideChannel<FloatPropertiesChannel>().GetPropertyWithDefault("laser_length", 1.0f);
float agentScale = Academy.Instance.FloatProperties.GetPropertyWithDefault("agent_scale", 1.0f);
float agentScale = SideChannelUtils.GetSideChannel<FloatPropertiesChannel>().GetPropertyWithDefault("agent_scale", 1.0f);
gameObject.transform.localScale = new Vector3(agentScale, agentScale, agentScale);
}

2
Project/Assets/ML-Agents/Examples/FoodCollector/Scripts/FoodCollectorSettings.cs


public void Awake()
{
Academy.Instance.OnEnvironmentReset += EnvironmentReset;
m_statsSideChannel = Academy.Instance.GetSideChannel<StatsSideChannel>();
m_statsSideChannel = SideChannelUtils.GetSideChannel<StatsSideChannel>();
}
public void EnvironmentReset()

18
Project/Assets/ML-Agents/Examples/GridWorld/Scenes/GridWorld.unity


m_ReflectionIntensity: 1
m_CustomReflection: {fileID: 0}
m_Sun: {fileID: 0}
m_IndirectSpecularColor: {r: 0.44971162, g: 0.49977726, b: 0.5756362, a: 1}
m_IndirectSpecularColor: {r: 0.44971168, g: 0.4997775, b: 0.57563686, a: 1}
m_UseRadianceAmbientProbe: 0
--- !u!157 &3
LightmapSettings:

m_Father: {fileID: 363761400}
m_RootOrder: 1
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
m_AnchorMin: {x: 0.5, y: 0.5}
m_AnchorMax: {x: 0.5, y: 0.5}
m_AnchoredPosition: {x: -369.5, y: -62.2}
m_SizeDelta: {x: 160, y: 55.6}
m_AnchorMin: {x: 0, y: 1}
m_AnchorMax: {x: 0, y: 1}
m_AnchoredPosition: {x: 150, y: -230}
m_SizeDelta: {x: 160, y: 55.599976}
m_Pivot: {x: 0.5, y: 0.5}
--- !u!114 &918893360
MonoBehaviour:

m_Calls: []
m_FontData:
m_Font: {fileID: 10102, guid: 0000000000000000e000000000000000, type: 0}
m_FontSize: 20
m_FontSize: 22
m_FontStyle: 0
m_BestFit: 0
m_MinSize: 2

m_Father: {fileID: 363761400}
m_RootOrder: 2
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
m_AnchorMin: {x: 0.5, y: 0.5}
m_AnchorMax: {x: 0.5, y: 0.5}
m_AnchoredPosition: {x: -369.5, y: -197}
m_AnchorMin: {x: 0, y: 1}
m_AnchorMax: {x: 0, y: 1}
m_AnchoredPosition: {x: 150, y: -128}
m_SizeDelta: {x: 200, y: 152}
m_Pivot: {x: 0.5, y: 0.5}
--- !u!114 &1305247361

3
Project/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAgent.cs


using MLAgents;
using MLAgents.Sensors;
using UnityEngine.Serialization;
using MLAgents.SideChannels;
public class GridAgent : Agent
{

// Prevents the agent from picking an action that would make it collide with a wall
var positionX = (int)transform.position.x;
var positionZ = (int)transform.position.z;
var maxPosition = (int)Academy.Instance.FloatProperties.GetPropertyWithDefault("gridSize", 5f) - 1;
var maxPosition = (int)SideChannelUtils.GetSideChannel<FloatPropertiesChannel>().GetPropertyWithDefault("gridSize", 5f) - 1;
if (positionX == 0)
{

2
Project/Assets/ML-Agents/Examples/GridWorld/Scripts/GridArea.cs


public void Start()
{
m_ResetParameters = Academy.Instance.FloatProperties;
m_ResetParameters = SideChannelUtils.GetSideChannel<FloatPropertiesChannel>();
m_Objects = new[] { goalPref, pitPref };

3
Project/Assets/ML-Agents/Examples/GridWorld/Scripts/GridSettings.cs


using UnityEngine;
using MLAgents;
using MLAgents.SideChannels;
public class GridSettings : MonoBehaviour
{

{
Academy.Instance.FloatProperties.RegisterCallback("gridSize", f =>
SideChannelUtils.GetSideChannel<FloatPropertiesChannel>().RegisterCallback("gridSize", f =>
{
MainCamera.transform.position = new Vector3(-(f - 1) / 2f, f * 1.25f, -(f - 1) / 2f);
MainCamera.orthographicSize = (f + 5f) / 2f;

5
Project/Assets/ML-Agents/Examples/PushBlock/Scripts/PushAgentBasic.cs


using System.Collections;
using UnityEngine;
using MLAgents;
using MLAgents.SideChannels;
public class PushAgentBasic : Agent
{

public void SetGroundMaterialFriction()
{
var resetParams = Academy.Instance.FloatProperties;
var resetParams = SideChannelUtils.GetSideChannel<FloatPropertiesChannel>();
var groundCollider = ground.GetComponent<Collider>();

public void SetBlockProperties()
{
var resetParams = Academy.Instance.FloatProperties;
var resetParams = SideChannelUtils.GetSideChannel<FloatPropertiesChannel>();
var scale = resetParams.GetPropertyWithDefault("block_scale", 2);
//Set the scale of the block

3
Project/Assets/ML-Agents/Examples/Reacher/Scripts/ReacherAgent.cs


using UnityEngine;
using MLAgents;
using MLAgents.Sensors;
using MLAgents.SideChannels;
public class ReacherAgent : Agent
{

public void SetResetParameters()
{
var fp = Academy.Instance.FloatProperties;
var fp = SideChannelUtils.GetSideChannel<FloatPropertiesChannel>();
m_GoalSize = fp.GetPropertyWithDefault("goal_size", 5);
m_GoalSpeed = Random.Range(-1f, 1f) * fp.GetPropertyWithDefault("goal_speed", 1);
m_Deviation = fp.GetPropertyWithDefault("deviation", 0);

3
Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ProjectSettingsOverrides.cs


using UnityEngine;
using MLAgents;
using MLAgents.SideChannels;
namespace MLAgentsExamples
{

Physics.defaultSolverIterations = solverIterations;
Physics.defaultSolverVelocityIterations = solverVelocityIterations;
Academy.Instance.FloatProperties.RegisterCallback("gravity", f => { Physics.gravity = new Vector3(0, -f, 0); });
SideChannelUtils.GetSideChannel<FloatPropertiesChannel>().RegisterCallback("gravity", f => { Physics.gravity = new Vector3(0, -f, 0); });
}
public void OnDestroy()

3
Project/Assets/ML-Agents/Examples/Soccer/Scripts/SoccerFieldArea.cs


using System.Collections;
using System.Collections.Generic;
using MLAgents;
using MLAgents.SideChannels;
using UnityEngine;
using UnityEngine.Serialization;

ballRb.velocity = Vector3.zero;
ballRb.angularVelocity = Vector3.zero;
var ballScale = Academy.Instance.FloatProperties.GetPropertyWithDefault("ball_scale", 0.015f);
var ballScale = SideChannelUtils.GetSideChannel<FloatPropertiesChannel>().GetPropertyWithDefault("ball_scale", 0.015f);
ballRb.transform.localScale = new Vector3(ballScale, ballScale, ballScale);
}
}

2
Project/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAgent.cs


m_BallRb = ball.GetComponent<Rigidbody>();
var canvas = GameObject.Find(k_CanvasName);
GameObject scoreBoard;
m_ResetParams = Academy.Instance.FloatProperties;
m_ResetParams = SideChannelUtils.GetSideChannel<FloatPropertiesChannel>();
if (invertX)
{
scoreBoard = canvas.transform.Find(k_ScoreBoardBName).gameObject;

2
Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs


m_ChestRb = chest.GetComponent<Rigidbody>();
m_SpineRb = spine.GetComponent<Rigidbody>();
m_ResetParams = Academy.Instance.FloatProperties;
m_ResetParams = SideChannelUtils.GetSideChannel<FloatPropertiesChannel>();
SetResetParameters();
}

13
Project/Assets/ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs


using MLAgents;
using Barracuda;
using MLAgents.Sensors;
using MLAgents.SideChannels;
public class WallJumpAgent : Agent
{

Vector3 m_JumpTargetPos;
Vector3 m_JumpStartingPos;
FloatPropertiesChannel m_FloatProperties;
public override void Initialize()
{
m_WallJumpSettings = FindObjectOfType<WallJumpSettings>();

m_GroundMaterial = m_GroundRenderer.material;
spawnArea.SetActive(false);
m_FloatProperties = SideChannelUtils.GetSideChannel<FloatPropertiesChannel>();
}
// Begin the jump sequence

{
localScale = new Vector3(
localScale.x,
Academy.Instance.FloatProperties.GetPropertyWithDefault("no_wall_height", 0),
m_FloatProperties.GetPropertyWithDefault("no_wall_height", 0),
localScale.z);
wall.transform.localScale = localScale;
SetModel("SmallWallJump", noWallBrain);

localScale = new Vector3(
localScale.x,
Academy.Instance.FloatProperties.GetPropertyWithDefault("small_wall_height", 4),
m_FloatProperties.GetPropertyWithDefault("small_wall_height", 4),
localScale.z);
wall.transform.localScale = localScale;
SetModel("SmallWallJump", smallWallBrain);

var min = Academy.Instance.FloatProperties.GetPropertyWithDefault("big_wall_min_height", 8);
var max = Academy.Instance.FloatProperties.GetPropertyWithDefault("big_wall_max_height", 8);
var min = m_FloatProperties.GetPropertyWithDefault("big_wall_min_height", 8);
var max = m_FloatProperties.GetPropertyWithDefault("big_wall_max_height", 8);
var height = min + Random.value * (max - min);
localScale = new Vector3(
localScale.x,

6
com.unity.ml-agents/CHANGELOG.md


## [Unreleased]
### Major Changes
- The Jupyter notebooks have been removed from the repository.
- Introduced the `SideChannelUtils` to register, unregister and access side channels.
- `Academy.FloatProperties` was removed, please use `SideChannelUtils.GetSideChannel<FloatPropertiesChannel>()` instead.
- Added a feature to allow sending stats from C# environments to TensorBoard (and other python StatsWriters). To do this from your code, use `Academy.Instance.GetSideChannel<StatsSideChannel>().AddStat(key, value)` (#3660)
- Added a feature to allow sending stats from C# environments to TensorBoard (and other python StatsWriters). To do this from your code, use `SideChannelUtils.GetSideChannel<StatsSideChannel>().AddStat(key, value)` (#3660)
- Renamed 'Generalization' feature to 'Environment Parameter Randomization'.
- Fixed an issue where specifying `vis_encode_type` was required only for SAC. (#3677)
- The way that UnityEnvironment decides the port was changed. If no port is specified, the behavior will depend on the `file_name` parameter. If it is `None`, 5004 (the editor port) will be used; otherwise 5005 (the base environment port) will be used.

- Fixed an issue in the gym wrapper that would raise an exception if an Agent called EndEpisode multiple times in the same step. (#3700)
- Fixed an issue where exceptions from environments provided a returncode of 0. (#3680)
- Fixed an issue where logging output was not visible; logging levels are now set consistently (#3703).
## [0.15.0-preview] - 2020-03-18
### Major Changes

71
com.unity.ml-agents/Runtime/Academy.cs


/// </summary>
public static Academy Instance { get { return s_Lazy.Value; } }
/// <summary>
/// Collection of float properties (indexed by a string).
/// </summary>
public FloatPropertiesChannel FloatProperties;
// Fields not provided in the Inspector.
/// <summary>

}
/// <summary>
/// Registers SideChannel to the Academy to send and receive data with Python.
/// If IsCommunicatorOn is false, the SideChannel will not be registered.
/// </summary>
/// <param name="channel"> The side channel to be registered.</param>
public void RegisterSideChannel(SideChannel channel)
{
LazyInitialize();
Communicator?.RegisterSideChannel(channel);
}
/// <summary>
/// Unregisters SideChannel to the Academy. If the side channel was not registered,
/// nothing will happen.
/// </summary>
/// <param name="channel"> The side channel to be unregistered.</param>
public void UnregisterSideChannel(SideChannel channel)
{
Communicator?.UnregisterSideChannel(channel);
}
/// <summary>
/// Returns the SideChannel of Type T if there is one registered, or null if it doesn't.
/// If there are multiple SideChannels of the same type registered, the returned instance is arbitrary.
/// </summary>
/// <typeparam name="T"></typeparam>
/// <returns></returns>
public T GetSideChannel<T>() where T: SideChannel
{
return Communicator?.GetSideChannel<T>();
}
/// <summary>
/// Returns all SideChannels of Type T that are registered. Use <see cref="GetSideChannel{T}()"/> if possible,
/// as that does not make any memory allocations.
/// </summary>
/// <typeparam name="T"></typeparam>
/// <returns></returns>
public List<T> GetSideChannels<T>() where T: SideChannel
{
if (Communicator == null)
{
// Make sure we return a non-null List.
return new List<T>();
}
return Communicator.GetSideChannels<T>();
}
/// <summary>
/// Disable stepping of the Academy during the FixedUpdate phase. If this is called, the Academy must be
/// stepped manually by the user by calling Academy.EnvironmentStep().
/// </summary>

{
EnableAutomaticStepping();
var floatProperties = new FloatPropertiesChannel();
FloatProperties = floatProperties;
SideChannelUtils.RegisterSideChannel(new EngineConfigurationChannel());
SideChannelUtils.RegisterSideChannel(new FloatPropertiesChannel());
SideChannelUtils.RegisterSideChannel(new StatsSideChannel());
// Try to launch the communicator by using the arguments passed at launch
var port = ReadPortFromArgs();

if (Communicator != null)
{
Communicator.RegisterSideChannel(new EngineConfigurationChannel());
Communicator.RegisterSideChannel(floatProperties);
Communicator.RegisterSideChannel(new StatsSideChannel());
// We try to exchange the first message with Python. If this fails, it means
// no Python Process is ready to train the environment. In this case, the
//environment must use Inference.

DecideAction?.Invoke();
}
// If the communicator is not on, we need to clear the SideChannel sending queue
if (!IsCommunicatorOn)
{
SideChannelUtils.GetSideChannelMessage();
}
using (TimerStack.Instance.Scoped("AgentAct"))
{
AgentAct?.Invoke();

Communicator?.Dispose();
Communicator = null;
SideChannelUtils.UnregisterAllSideChannels();
if (m_ModelRunners != null)
{

// TODO - Pass worker ID or some other identifier,
// so that multiple envs won't overwrite each others stats.
TimerStack.Instance.SaveJsonTimers();
FloatProperties = null;
m_Initialized = false;
// Reset the Lazy instance

29
com.unity.ml-agents/Runtime/Communicator/ICommunicator.cs


/// <param name="agentId">A key to identify which Agent actions to get.</param>
/// <returns></returns>
float[] GetActions(string key, int agentId);
/// <summary>
/// Registers a side channel to the communicator. The side channel will exchange
/// messages with its Python equivalent.
/// </summary>
/// <param name="sideChannel"> The side channel to be registered.</param>
void RegisterSideChannel(SideChannel sideChannel);
/// <summary>
/// Unregisters a side channel from the communicator.
/// </summary>
/// <param name="sideChannel"> The side channel to be unregistered.</param>
void UnregisterSideChannel(SideChannel sideChannel);
/// <summary>
/// Returns the SideChannel of Type T if there is one registered, or null if it doesn't.
/// If there are multiple SideChannels of the same type registered, the returned instance is arbitrary.
/// </summary>
/// <typeparam name="T"></typeparam>
/// <returns></returns>
T GetSideChannel<T>() where T : SideChannel;
/// <summary>
/// Returns all SideChannels of Type T that are registered. Use <see cref="GetSideChannel{T}()"/> if possible,
/// as that does not make any memory allocations.
/// </summary>
/// <typeparam name="T"></typeparam>
/// <returns></returns>
List<T> GetSideChannels<T>() where T : SideChannel;
}
}

195
com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs


using MLAgents.Sensors;
using MLAgents.Policies;
using MLAgents.SideChannels;
using System.IO;
using Google.Protobuf;
namespace MLAgents

#endif
/// The communicator parameters sent at construction
CommunicatorInitParameters m_CommunicatorInitParameters;
Dictionary<Guid, SideChannel> m_SideChannels = new Dictionary<Guid, SideChannel>();
/// <summary>
/// Initializes a new instance of the RPCCommunicator class.

void UpdateEnvironmentWithInput(UnityRLInputProto rlInput)
{
ProcessSideChannelData(m_SideChannels, rlInput.SideChannel.ToArray());
SideChannelUtils.ProcessSideChannelData(rlInput.SideChannel.ToArray());
SendCommandEvent(rlInput.Command);
}

message.RlInitializationOutput = tempUnityRlInitializationOutput;
}
byte[] messageAggregated = GetSideChannelMessage(m_SideChannels);
byte[] messageAggregated = SideChannelUtils.GetSideChannelMessage();
message.RlOutput.SideChannel = ByteString.CopyFrom(messageAggregated);
var input = Exchange(message);

{
m_SentBrainKeys.Add(brainProto.BrainName);
m_UnsentBrainKeys.Remove(brainProto.BrainName);
}
}
#endregion
#region Handling side channels
/// <summary>
/// Registers a side channel to the communicator. The side channel will exchange
/// messages with its Python equivalent.
/// </summary>
/// <param name="sideChannel"> The side channel to be registered.</param>
public void RegisterSideChannel(SideChannel sideChannel)
{
var channelId = sideChannel.ChannelId;
if (m_SideChannels.ContainsKey(channelId))
{
throw new UnityAgentsException(string.Format(
"A side channel with type index {0} is already registered. You cannot register multiple " +
"side channels of the same id.", channelId));
}
// Process any messages that we've already received for this channel ID.
var numMessages = m_CachedMessages.Count;
for (int i = 0; i < numMessages; i++)
{
var cachedMessage = m_CachedMessages.Dequeue();
if (channelId == cachedMessage.ChannelId)
{
using (var incomingMsg = new IncomingMessage(cachedMessage.Message))
{
sideChannel.OnMessageReceived(incomingMsg);
}
}
else
{
m_CachedMessages.Enqueue(cachedMessage);
}
}
m_SideChannels.Add(channelId, sideChannel);
}
/// <summary>
/// Unregisters a side channel from the communicator.
/// </summary>
/// <param name="sideChannel"> The side channel to be unregistered.</param>
public void UnregisterSideChannel(SideChannel sideChannel)
{
if (m_SideChannels.ContainsKey(sideChannel.ChannelId))
{
m_SideChannels.Remove(sideChannel.ChannelId);
}
}
/// <inheritdoc/>
public T GetSideChannel<T>() where T: SideChannel
{
foreach (var sc in m_SideChannels.Values)
{
if (sc.GetType() == typeof(T))
{
return (T) sc;
}
}
return null;
}
/// <inheritdoc/>
public List<T> GetSideChannels<T>() where T: SideChannel
{
var output = new List<T>();
foreach (var sc in m_SideChannels.Values)
{
if (sc.GetType() == typeof(T))
{
output.Add((T) sc);
}
}
return output;
}
/// <summary>
/// Grabs the messages that the registered side channels will send to Python at the current step
/// into a singe byte array.
/// </summary>
/// <param name="sideChannels"> A dictionary of channel type to channel.</param>
/// <returns></returns>
public static byte[] GetSideChannelMessage(Dictionary<Guid, SideChannel> sideChannels)
{
using (var memStream = new MemoryStream())
{
using (var binaryWriter = new BinaryWriter(memStream))
{
foreach (var sideChannel in sideChannels.Values)
{
var messageList = sideChannel.MessageQueue;
foreach (var message in messageList)
{
binaryWriter.Write(sideChannel.ChannelId.ToByteArray());
binaryWriter.Write(message.Count());
binaryWriter.Write(message);
}
sideChannel.MessageQueue.Clear();
}
return memStream.ToArray();
}
}
}
private struct CachedSideChannelMessage
{
public Guid ChannelId;
public byte[] Message;
}
private static Queue<CachedSideChannelMessage> m_CachedMessages = new Queue<CachedSideChannelMessage>();
/// <summary>
/// Separates the data received from Python into individual messages for each registered side channel.
/// </summary>
/// <param name="sideChannels">A dictionary of channel type to channel.</param>
/// <param name="dataReceived">The byte array of data received from Python.</param>
public static void ProcessSideChannelData(Dictionary<Guid, SideChannel> sideChannels, byte[] dataReceived)
{
while (m_CachedMessages.Count != 0)
{
var cachedMessage = m_CachedMessages.Dequeue();
if (sideChannels.ContainsKey(cachedMessage.ChannelId))
{
using (var incomingMsg = new IncomingMessage(cachedMessage.Message))
{
sideChannels[cachedMessage.ChannelId].OnMessageReceived(incomingMsg);
}
}
else
{
Debug.Log(string.Format(
"Unknown side channel data received. Channel Id is "
+ ": {0}", cachedMessage.ChannelId));
}
}
if (dataReceived.Length == 0)
{
return;
}
using (var memStream = new MemoryStream(dataReceived))
{
using (var binaryReader = new BinaryReader(memStream))
{
while (memStream.Position < memStream.Length)
{
Guid channelId = Guid.Empty;
byte[] message = null;
try
{
channelId = new Guid(binaryReader.ReadBytes(16));
var messageLength = binaryReader.ReadInt32();
message = binaryReader.ReadBytes(messageLength);
}
catch (Exception ex)
{
throw new UnityAgentsException(
"There was a problem reading a message in a SideChannel. Please make sure the " +
"version of MLAgents in Unity is compatible with the Python version. Original error : "
+ ex.Message);
}
if (sideChannels.ContainsKey(channelId))
{
using (var incomingMsg = new IncomingMessage(message))
{
sideChannels[channelId].OnMessageReceived(incomingMsg);
}
}
else
{
// Don't recognize this ID, but cache it in case the SideChannel that can handle
// it is registered before the next call to ProcessSideChannelData.
m_CachedMessages.Enqueue(new CachedSideChannelMessage
{
ChannelId = channelId,
Message = message
});
}
}
}
}
}

2
com.unity.ml-agents/Runtime/SideChannels/SideChannel.cs


using System.Collections.Generic;
using System;
using System.IO;
using System.Text;
namespace MLAgents.SideChannels
{

8
com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs


using System.Collections.Generic;
using MLAgents.Sensors;
using MLAgents.Policies;
using MLAgents.SideChannels;
namespace MLAgents.Tests
{

Assert.AreEqual(0, aca.EpisodeCount);
Assert.AreEqual(0, aca.StepCount);
Assert.AreEqual(0, aca.TotalStepCount);
Assert.AreNotEqual(null, aca.FloatProperties);
Assert.AreNotEqual(null, SideChannelUtils.GetSideChannel<FloatPropertiesChannel>());
// Check that Dispose is idempotent
aca.Dispose();

[Test]
public void TestAcademyDispose()
{
var floatProperties1 = Academy.Instance.FloatProperties;
var floatProperties1 = SideChannelUtils.GetSideChannel<FloatPropertiesChannel>();
var floatProperties2 = Academy.Instance.FloatProperties;
Academy.Instance.LazyInitialize();
var floatProperties2 = SideChannelUtils.GetSideChannel<FloatPropertiesChannel>();
Academy.Instance.Dispose();
Assert.AreNotEqual(floatProperties1, floatProperties2);

16
com.unity.ml-agents/Tests/Editor/SideChannelTests.cs


intSender.SendInt(5);
intSender.SendInt(6);
byte[] fakeData = RpcCommunicator.GetSideChannelMessage(dictSender);
RpcCommunicator.ProcessSideChannelData(dictReceiver, fakeData);
byte[] fakeData = SideChannelUtils.GetSideChannelMessage(dictSender);
SideChannelUtils.ProcessSideChannelData(dictReceiver, fakeData);
Assert.AreEqual(intReceiver.messagesReceived[0], 4);
Assert.AreEqual(intReceiver.messagesReceived[1], 5);

strSender.SendRawBytes(Encoding.ASCII.GetBytes(str1));
strSender.SendRawBytes(Encoding.ASCII.GetBytes(str2));
byte[] fakeData = RpcCommunicator.GetSideChannelMessage(dictSender);
RpcCommunicator.ProcessSideChannelData(dictReceiver, fakeData);
byte[] fakeData = SideChannelUtils.GetSideChannelMessage(dictSender);
SideChannelUtils.ProcessSideChannelData(dictReceiver, fakeData);
var messages = strReceiver.GetAndClearReceivedMessages();

tmp = propB.GetPropertyWithDefault(k2, 3.0f);
Assert.AreEqual(tmp, 1.0f);
byte[] fakeData = RpcCommunicator.GetSideChannelMessage(dictSender);
RpcCommunicator.ProcessSideChannelData(dictReceiver, fakeData);
byte[] fakeData = SideChannelUtils.GetSideChannelMessage(dictSender);
SideChannelUtils.ProcessSideChannelData(dictReceiver, fakeData);
tmp = propA.GetPropertyWithDefault(k2, 3.0f);
Assert.AreEqual(tmp, 1.0f);

Assert.AreEqual(wasCalled, 0);
fakeData = RpcCommunicator.GetSideChannelMessage(dictSender);
RpcCommunicator.ProcessSideChannelData(dictReceiver, fakeData);
fakeData = SideChannelUtils.GetSideChannelMessage(dictSender);
SideChannelUtils.ProcessSideChannelData(dictReceiver, fakeData);
Assert.AreEqual(wasCalled, 1);
var keysA = propA.ListProperties();

8
docs/Custom-SideChannels.md


`base.QueueMessageToSend(msg)` method inside the side channel, and call the
`OutgoingMessage.Dispose()` method.
To register a side channel on the Unity side, call `Academy.Instance.RegisterSideChannel` with the side channel
To register a side channel on the Unity side, call `SideChannelUtils.RegisterSideChannel` with the side channel
as only argument.
### Python side

// When a Debug.Log message is created, we send it to the stringChannel
Application.logMessageReceived += stringChannel.SendDebugStatementToPython;
// The channel must be registered with the Academy
Academy.Instance.RegisterSideChannel(stringChannel);
// The channel must be registered with the SideChannelUtils class
SideChannelUtils.RegisterSideChannel(stringChannel);
}
public void OnDestroy()

if (Academy.IsInitialized){
Academy.Instance.UnregisterSideChannel(stringChannel);
SideChannelUtils.UnregisterSideChannel(stringChannel);
}
}

3
docs/Installation.md


By installing the `mlagents` package, the dependencies listed in the
[setup.py file](../ml-agents/setup.py) are also installed. These include
[TensorFlow](Background-TensorFlow.md) (Requires a CPU w/ AVX support) and
[Jupyter](Background-Jupyter.md).
[TensorFlow](Background-TensorFlow.md) (Requires a CPU w/ AVX support).
#### Advanced: Installing for Development

5
docs/Learning-Environment-Create-New.md


includes a convenient Monitor class that you can use to easily display Agent
status information in the Game window.
One additional test you can perform is to first ensure that your environment and
the Python API work as expected using the `notebooks/getting-started.ipynb`
[Jupyter notebook](Background-Jupyter.md). Within the notebook, be sure to set
`env_name` to the name of the environment file you specify when building this
environment.
## Training the Environment

6
docs/Migrating.md


## Migrating from 0.15 to latest
### Important changes
* The Jupyter notebooks have been removed from the repository.
* `Academy.FloatProperties` was removed.
* `Academy.RegisterSideChannel` and `Academy.UnregisterSideChannel` were removed.
* Replace `Academy.FloatProperties` with `SideChannelUtils.GetSideChannel<FloatPropertiesChannel>()`.
* Replace `Academy.RegisterSideChannel` with `SideChannelUtils.RegisterSideChannel()`.
* Replace `Academy.UnregisterSideChannel` with `SideChannelUtils.UnregisterSideChannel`.
## Migrating from 0.14 to 0.15

5
docs/Python-API.md


The ML-Agents Toolkit Low Level API is a Python API for controlling the simulation
loop of an environment or game built with Unity. This API is used by the
training algorithms inside the ML-Agent Toolkit, but you can also write your own
Python programs using this API. Go [here](../notebooks/getting-started.ipynb)
for a Jupyter Notebook walking through the functionality of the API.
Python programs using this API.
The key objects in the Python API include:

Once a property has been modified in Python, you can access it in C# after the next call to `step` as follows:
```csharp
var sharedProperties = Academy.Instance.FloatProperties;
var sharedProperties = SideChannelUtils.GetSideChannel<FloatPropertiesChannel>();
float property1 = sharedProperties.GetPropertyWithDefault("parameter_1", 0.0f);
```

1
docs/Readme.md


## Installation & Set-up
* [Installation](Installation.md)
* [Background: Jupyter Notebooks](Background-Jupyter.md)
* [Using Virtual Environment](Using-Virtual-Environment.md)
## Getting Started

2
docs/Training-Curriculum-Learning.md


In order to define the curricula, the first step is to decide which parameters of
the environment will vary. In the case of the Wall Jump environment,
the height of the wall is what varies. We define this as a `Shared Float Property`
that can be accessed in `Academy.Instance.FloatProperties`, and by doing
that can be accessed in `SideChannelUtils.GetSideChannel<FloatPropertiesChannel>()`, and by doing
so it becomes adjustable via the Python API.
Rather than adjusting it by hand, we will create a YAML file which
describes the structure of the curricula. Within it, we can specify which

4
docs/Training-Environment-Parameter-Randomization.md


To enable variations in the environments, we implemented `Environment Parameters`.
`Environment Parameters` are `Academy.Instance.FloatProperties` that can be read when setting
`Environment Parameters` are values in the `FloatPropertiesChannel` that can be read when setting
up the environment. We
also included different sampling methods and the ability to create new kinds of
sampling methods for each `Environment Parameter`. In the 3D ball environment example displayed

environment with a new sample of `Environment Parameters`.
* `Environment Parameter` - Name of the `Environment Parameter` like `mass`, `gravity` and `scale`. This should match the name
specified in the `FloatProperties` of the environment being trained. If a parameter specified in the file doesn't exist in the
specified in the `FloatPropertiesChannel` of the environment being trained. If a parameter specified in the file doesn't exist in the
environment, then this parameter will be ignored. Within each `Environment Parameter`
* `sampler-type` - Specify the sampler type to use for the `Environment Parameter`.

2
docs/Using-Tensorboard.md


## Custom Metrics from C#
To get custom metrics from a C# environment into Tensorboard, you can use the StatsSideChannel:
```csharp
var statsSideChannel = Academy.Instance.GetSideChannel<StatsSideChannel>();
var statsSideChannel = SideChannelUtils.GetSideChannel<StatsSideChannel>();
statsSideChannel.AddStat("MyMetric", 1.0);
```

2
gym-unity/README.md


The returned environment `env` will function as a gym.
For more on using the gym interface, see our
[Jupyter Notebook tutorial](../notebooks/getting-started-gym.ipynb).
## Limitations

7
gym-unity/gym_unity/envs/__init__.py


import logging
import itertools
import numpy as np
from typing import Any, Dict, List, Optional, Tuple, Union

from mlagents_envs.environment import UnityEnvironment
from mlagents_envs.base_env import BatchedStepResult
from mlagents_envs import logging_util
class UnityGymException(error.Error):

pass
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("gym_unity")
logger = logging_util.get_logger(__name__)
logging_util.set_log_level(logging_util.INFO)
GymSingleStepResult = Tuple[np.ndarray, float, bool, Dict]
GymMultiStepResult = Tuple[List[np.ndarray], List[float], List[bool], Dict]

5
ml-agents-envs/mlagents_envs/environment.py


import atexit
import glob
import uuid
import logging
import numpy as np
import os
import subprocess

from mlagents_envs.logging_util import get_logger
from mlagents_envs.side_channel.side_channel import SideChannel, IncomingMessage
from mlagents_envs.base_env import (

import struct
logger = logging.getLogger("mlagents_envs")
logger = get_logger(__name__)
class UnityEnvironment(BaseEnv):

4
ml-agents-envs/mlagents_envs/side_channel/outgoing_message.py


from typing import List
import struct
import logging
from mlagents_envs.logging_util import get_logger
logger = logging.getLogger(__name__)
logger = get_logger(__name__)
class OutgoingMessage:

4
ml-agents-envs/mlagents_envs/side_channel/side_channel.py


from abc import ABC, abstractmethod
from typing import List
import uuid
import logging
from mlagents_envs.logging_util import get_logger
logger = logging.getLogger(__name__)
logger = get_logger(__name__)
class SideChannel(ABC):

5
ml-agents/mlagents/model_serialization.py


from distutils.util import strtobool
import os
import logging
from typing import Any, List, Set, NamedTuple
from distutils.version import LooseVersion

from tensorflow.python.platform import gfile
from tensorflow.python.framework import graph_util
from mlagents_envs.logging_util import get_logger
from mlagents.trainers import tensorflow_to_barracuda as tf2bc
if LooseVersion(tf.__version__) < LooseVersion("1.12.0"):

logger = get_logger(__name__)
logger = logging.getLogger("mlagents.trainers")
POSSIBLE_INPUT_NODES = frozenset(
[

5
ml-agents/mlagents/trainers/components/reward_signals/__init__.py


import logging
from typing import Any, Dict, List
from collections import namedtuple
import numpy as np

from mlagents_envs.logging_util import get_logger
logger = logging.getLogger("mlagents.trainers")
logger = get_logger(__name__)
RewardSignalResult = namedtuple(
"RewardSignalResult", ["scaled_reward", "unscaled_reward"]

4
ml-agents/mlagents/trainers/curriculum.py


from .exception import CurriculumConfigError, CurriculumLoadingError
import logging
from mlagents_envs.logging_util import get_logger
logger = logging.getLogger("mlagents.trainers")
logger = get_logger(__name__)
class Curriculum:

5
ml-agents/mlagents/trainers/env_manager.py


from abc import ABC, abstractmethod
import logging
from typing import List, Dict, NamedTuple, Iterable, Tuple
from mlagents_envs.base_env import BatchedStepResult, AgentGroupSpec, AgentGroup
from mlagents_envs.side_channel.stats_side_channel import StatsAggregationMethod

from mlagents.trainers.action_info import ActionInfo
from mlagents_envs.logging_util import get_logger
logger = logging.getLogger("mlagents.trainers")
logger = get_logger(__name__)
class EnvironmentStep(NamedTuple):

5
ml-agents/mlagents/trainers/ghost/trainer.py


from typing import Deque, Dict, List, cast
import numpy as np
import logging
from mlagents_envs.logging_util import get_logger
from mlagents.trainers.brain import BrainParameters
from mlagents.trainers.policy import Policy
from mlagents.trainers.policy.tf_policy import TFPolicy

create_name_behavior_id,
)
logger = logging.getLogger("mlagents.trainers")
logger = get_logger(__name__)
class GhostTrainer(Trainer):

17
ml-agents/mlagents/trainers/learn.py


# # Unity ML-Agents Toolkit
import logging
import argparse
import os

from mlagents_envs.side_channel.engine_configuration_channel import EngineConfig
from mlagents_envs.exception import UnityEnvironmentException
from mlagents_envs.timers import hierarchical_timer, get_timer_tree
from mlagents.logging_util import create_logger
from mlagents_envs import logging_util
logger = logging_util.get_logger(__name__)
def _create_parser():

with open(timing_path, "w") as f:
json.dump(get_timer_tree(), f, indent=4)
except FileNotFoundError:
logging.warning(
logger.warning(
f"Unable to save to {timing_path}. Make sure the directory exists"
)

print(get_version_string())
if options.debug:
log_level = logging.DEBUG
log_level = logging_util.DEBUG
log_level = logging.INFO
log_level = logging_util.INFO
trainer_logger = create_logger("mlagents.trainers", log_level)
logging_util.set_log_level(log_level)
trainer_logger.debug("Configuration for this run:")
trainer_logger.debug(json.dumps(options._asdict(), indent=4))
logger.debug("Configuration for this run:")
logger.debug(json.dumps(options._asdict(), indent=4))
run_seed = options.seed
if options.cpu:

4
ml-agents/mlagents/trainers/meta_curriculum.py


from typing import Dict, Set
from mlagents.trainers.curriculum import Curriculum
import logging
from mlagents_envs.logging_util import get_logger
logger = logging.getLogger("mlagents.trainers")
logger = get_logger(__name__)
class MetaCurriculum:

4
ml-agents/mlagents/trainers/policy/tf_policy.py


import logging
from typing import Any, Dict, List, Optional
import abc
import numpy as np

from mlagents_envs.logging_util import get_logger
from mlagents.trainers.policy import Policy
from mlagents.trainers.action_info import ActionInfo
from mlagents.trainers.trajectory import SplitObservations

logger = logging.getLogger("mlagents.trainers")
logger = get_logger(__name__)
class UnityPolicyException(UnityException):

4
ml-agents/mlagents/trainers/ppo/trainer.py


# ## ML-Agent Learning (PPO)
# Contains an implementation of PPO as described in: https://arxiv.org/abs/1707.06347
import logging
from mlagents_envs.logging_util import get_logger
from mlagents.trainers.policy.nn_policy import NNPolicy
from mlagents.trainers.trainer.rl_trainer import RLTrainer
from mlagents.trainers.brain import BrainParameters

from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
logger = logging.getLogger("mlagents.trainers")
logger = get_logger(__name__)
class PPOTrainer(RLTrainer):

4
ml-agents/mlagents/trainers/sac/optimizer.py


import logging
from mlagents_envs.logging_util import get_logger
from mlagents.trainers.sac.network import SACPolicyNetwork, SACTargetNetwork
from mlagents.trainers.models import LearningRateSchedule, EncoderType, ModelUtils
from mlagents.trainers.optimizer.tf_optimizer import TFOptimizer

EPSILON = 1e-6 # Small value to avoid divide by zero
logger = logging.getLogger("mlagents.trainers")
logger = get_logger(__name__)
POLICY_SCOPE = ""
TARGET_SCOPE = "target_network"

5
ml-agents/mlagents/trainers/sac/trainer.py


# Contains an implementation of SAC as described in https://arxiv.org/abs/1801.01290
# and implemented in https://github.com/hill-a/stable-baselines
import logging
from collections import defaultdict
from typing import Dict
import os

from mlagents_envs.logging_util import get_logger
from mlagents_envs.timers import timed
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.policy.nn_policy import NNPolicy

from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
logger = logging.getLogger("mlagents.trainers")
logger = get_logger(__name__)
BUFFER_TRUNCATE_PERCENT = 0.8

7
ml-agents/mlagents/trainers/stats.py


import csv
import os
import time
import logging
from mlagents_envs.logging_util import get_logger
from mlagents_envs.timers import set_gauge
from mlagents_envs.timers import set_gauge
logger = logging.getLogger("mlagents.trainers")
logger = get_logger(__name__)
class StatsSummary(NamedTuple):

5
ml-agents/mlagents/trainers/subprocess_env_manager.py


import logging
from typing import Dict, NamedTuple, List, Any, Optional, Callable, Set, Tuple