浏览代码

Merge branch 'master' into develop-splitpolicyoptimizer

/develop/nopreviousactions
Ervin Teng 5 年前
当前提交
bcc25d59
共有 77 个文件被更改,包括 3315 次插入350 次删除
  1. 7
      com.unity.ml-agents/CHANGELOG.md
  2. 3
      com.unity.ml-agents/LICENSE.md
  3. 13
      com.unity.ml-agents/Runtime/Academy.cs
  4. 2
      com.unity.ml-agents/Runtime/ActionMasker.cs
  5. 52
      com.unity.ml-agents/Runtime/Agent.cs
  6. 2
      com.unity.ml-agents/Runtime/DemonstrationRecorder.cs
  7. 32
      com.unity.ml-agents/Runtime/Grpc/RpcCommunicator.cs
  8. 2
      com.unity.ml-agents/Runtime/InferenceBrain/ApplierImpl.cs
  9. 3
      com.unity.ml-agents/Runtime/InferenceBrain/GeneratorImpl.cs
  10. 2
      com.unity.ml-agents/Runtime/Policy/RemotePolicy.cs
  11. 5
      com.unity.ml-agents/Runtime/Sensor/VectorSensor.cs
  12. 6
      com.unity.ml-agents/Runtime/SideChannel/EngineConfigurationChannel.cs
  13. 11
      com.unity.ml-agents/Runtime/SideChannel/FloatPropertiesChannel.cs
  14. 12
      com.unity.ml-agents/Runtime/SideChannel/RawBytesChannel.cs
  15. 22
      com.unity.ml-agents/Runtime/SideChannel/SideChannel.cs
  16. 17
      com.unity.ml-agents/Runtime/Utilities.cs
  17. 1
      com.unity.ml-agents/Tests/Editor/EditModeTestInternalBrainTensorApplier.cs
  18. 65
      com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
  19. 2
      com.unity.ml-agents/Tests/Editor/Sensor/RayPerceptionSensorTests.cs
  20. 20
      com.unity.ml-agents/Tests/Editor/SideChannelTests.cs
  21. 2
      com.unity.ml-agents/Tests/Runtime/SerializationTest.cs
  22. 2
      com.unity.ml-agents/Tests/Runtime/SerializeAgent.cs
  23. 3
      docs/Migrating.md
  24. 3
      docs/Profiling-Python.md
  25. 35
      docs/Python-API.md
  26. 11
      docs/Unity-Inference-Engine.md
  27. 36
      ml-agents-envs/mlagents_envs/environment.py
  28. 8
      ml-agents-envs/mlagents_envs/exception.py
  29. 46
      ml-agents-envs/mlagents_envs/rpc_utils.py
  30. 8
      ml-agents-envs/mlagents_envs/side_channel/engine_configuration_channel.py
  31. 13
      ml-agents-envs/mlagents_envs/side_channel/float_properties_channel.py
  32. 16
      ml-agents-envs/mlagents_envs/side_channel/raw_bytes_channel.py
  33. 24
      ml-agents-envs/mlagents_envs/side_channel/side_channel.py
  34. 30
      ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py
  35. 36
      ml-agents-envs/mlagents_envs/tests/test_side_channel.py
  36. 39
      ml-agents-envs/mlagents_envs/tests/test_timers.py
  37. 22
      ml-agents-envs/mlagents_envs/timers.py
  38. 2
      ml-agents-envs/setup.py
  39. 7
      ml-agents/mlagents/trainers/demo_loader.py
  40. 23
      ml-agents/mlagents/trainers/ghost/trainer.py
  41. 9
      ml-agents/mlagents/trainers/learn.py
  42. 23
      ml-agents/mlagents/trainers/stats.py
  43. 1001
      ml-agents/mlagents/trainers/tests/test.demo
  44. 2
      ml-agents/mlagents/trainers/tests/test_ghost.py
  45. 9
      ml-agents/mlagents/trainers/tests/test_stats.py
  46. 46
      ml-agents/mlagents/trainers/tf_policy.py
  47. 5
      ml-agents/mlagents/trainers/trainer.py
  48. 2
      ml-agents/mlagents/trainers/trainer_controller.py
  49. 2
      test_constraints_min_version.txt
  50. 4
      test_requirements.txt
  51. 3
      com.unity.ml-agents/Editor/Icons.meta
  52. 205
      ml-agents/mlagents/model_serialization.py
  53. 70
      DevProject/.gitignore
  54. 58
      DevProject/Packages/manifest.json
  55. 19
      DevProject/ProjectSettings/AudioManager.asset
  56. 6
      DevProject/ProjectSettings/ClusterInputManager.asset
  57. 36
      DevProject/ProjectSettings/DynamicsManager.asset
  58. 8
      DevProject/ProjectSettings/EditorBuildSettings.asset
  59. 35
      DevProject/ProjectSettings/EditorSettings.asset
  60. 64
      DevProject/ProjectSettings/GraphicsSettings.asset
  61. 295
      DevProject/ProjectSettings/InputManager.asset
  62. 91
      DevProject/ProjectSettings/NavMeshAreas.asset
  63. 56
      DevProject/ProjectSettings/Physics2DSettings.asset
  64. 7
      DevProject/ProjectSettings/PresetManager.asset
  65. 610
      DevProject/ProjectSettings/ProjectSettings.asset
  66. 2
      DevProject/ProjectSettings/ProjectVersion.txt
  67. 236
      DevProject/ProjectSettings/QualitySettings.asset
  68. 43
      DevProject/ProjectSettings/TagManager.asset
  69. 9
      DevProject/ProjectSettings/TimeManager.asset
  70. 34
      DevProject/ProjectSettings/UnityConnectSettings.asset
  71. 12
      DevProject/ProjectSettings/VFXManager.asset
  72. 10
      DevProject/ProjectSettings/XRSettings.asset
  73. 8
      com.unity.ml-agents/Runtime/Resources.meta
  74. 0
      /com.unity.ml-agents/Editor/Icons

7
com.unity.ml-agents/CHANGELOG.md


## [Unreleased]
### Major Changes
- Agent.CollectObservations now takes a VectorSensor argument. It was also overloaded to optionally take an ActionMasker argument. (#3352, #3389)
- Beta support for ONNX export was added. If the `tf2onnx` python package is installed, models will be saved to `.onnx` as well as `.nn` format.
Note that Barracuda 0.6.0 or later is required to import the `.onnx` files properly
### Minor Changes
- Monitor.cs was moved to Examples. (#3372)

- Academy.RegisterSideChannel and UnregisterSideChannel methods were added. (#3391)
- A tutorial on adding custom SideChannels was added (#3391)
- The stepping logic for the Agent and the Academy has been simplified (#3448)
- The checkpoint file suffix was changed from `.cptk` to `.ckpt` (#3470)
- Fixed an issue which caused self-play training sessions to consume a lot of memory. (#3451)
- Fixed an IndexError when using GAIL or behavioral cloning with demonstrations recorded with 0.14.0 or later (#3464)
## [0.14.0-preview] - 2020-02-13

3
com.unity.ml-agents/LICENSE.md


com.unity.ml-agents copyright © 2020 Unity Technologies ApS
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/

13
com.unity.ml-agents/Runtime/Academy.cs


// Signals to all the listeners that the academy is being destroyed
internal event Action DestroyAction;
// Signals the Agent that a new step is about to start.
// This will mark the Agent as Done if it has reached its maxSteps.
internal event Action AgentIncrementStep;
// Signals to all the agents at each environment step along with the
// Academy's maxStepReached, done and stepCount values. The agents rely
// on this event to update their own values of max step reached and done

public bool AutomaticSteppingEnabled
{
get { return m_FixedUpdateStepper != null; }
set {
set
{
if (value)
{
EnableAutomaticStepping();

AgentSetStatus?.Invoke(m_StepCount);
m_StepCount += 1;
m_TotalStepCount += 1;
AgentIncrementStep?.Invoke();
using (TimerStack.Instance.Scoped("AgentSendState"))
{

{
AgentAct?.Invoke();
}
m_StepCount += 1;
m_TotalStepCount += 1;
}
/// <summary>

2
com.unity.ml-agents/Runtime/ActionMasker.cs


/// <summary>
/// Sets an action mask for discrete control agents. When used, the agent will not be
/// able to perform the actions passed as argument at the next decision.
/// able to perform the actions passed as argument at the next decision.
/// The actionIndices correspond to the actions the agent will be unable to perform
/// on the branch 0.
/// </summary>

52
com.unity.ml-agents/Runtime/Agent.cs


public int maxStep;
}
[SerializeField] [HideInInspector]
[SerializeField][HideInInspector]
[SerializeField] [HideInInspector]
[SerializeField][HideInInspector]
internal bool hasUpgradedFromAgentParameters;
/// <summary>

LazyInitialize();
}
}
hasUpgradedFromAgentParameters = true;
}

m_Action = new AgentAction();
sensors = new List<ISensor>();
Academy.Instance.AgentIncrementStep += AgentIncrementStep;
Academy.Instance.AgentSendState += SendInfo;
Academy.Instance.DecideAction += DecideAction;
Academy.Instance.AgentAct += AgentStep;

// We don't want to even try, because this will lazily create a new Academy!
if (Academy.IsInitialized)
{
Academy.Instance.AgentIncrementStep -= AgentIncrementStep;
Academy.Instance.AgentSendState -= SendInfo;
Academy.Instance.DecideAction -= DecideAction;
Academy.Instance.AgentAct -= AgentStep;

// We request a decision so Python knows the Agent is done immediately
m_Brain?.RequestDecision(m_Info, sensors);
if (m_Recorder != null && m_Recorder.record && Application.isEditor)
{
m_Recorder.WriteExperience(m_Info, sensors);
}
UpdateRewardStats();
// The Agent is done, so we give it a new episode Id

public void SetReward(float reward)
{
#if DEBUG
if (float.IsNaN(reward))
{
throw new ArgumentException("NaN reward passed to SetReward.");
}
Utilities.DebugCheckNanAndInfinity(reward, nameof(reward), nameof(SetReward));
#endif
m_CumulativeReward += (reward - m_Reward);
m_Reward = reward;

public void AddReward(float increment)
{
#if DEBUG
if (float.IsNaN(increment))
{
throw new ArgumentException("NaN reward passed to AddReward.");
}
Utilities.DebugCheckNanAndInfinity(increment, nameof(increment), nameof(AddReward));
#endif
m_Reward += increment;
m_CumulativeReward += increment;

/// </summary>
public float[] GetAction()
{
return m_Action.vectorActions;
return m_Action.vectorActions;
}
/// <summary>

}
}
void AgentIncrementStep()
{
m_StepCount += 1;
}
if ((m_StepCount >= maxStep) && (maxStep > 0))
if ((m_RequestAction) && (m_Brain != null))
NotifyAgentDone(true);
_AgentReset();
}
else
{
m_StepCount += 1;
m_RequestAction = false;
AgentAction(m_Action.vectorActions);
if ((m_RequestAction) && (m_Brain != null))
if ((m_StepCount >= maxStep) && (maxStep > 0))
m_RequestAction = false;
if (m_Action.vectorActions != null)
{
AgentAction(m_Action.vectorActions);
}
NotifyAgentDone(true);
_AgentReset();
}
}

if (m_Action.vectorActions == null){
ResetData();
}
}
}
}

2
com.unity.ml-agents/Runtime/DemonstrationRecorder.cs


/// </summary>
public void WriteExperience(AgentInfo info, List<ISensor> sensors)
{
m_DemoStore.Record(info, sensors);
m_DemoStore?.Record(info, sensors);
}
public void Close()

32
com.unity.ml-agents/Runtime/Grpc/RpcCommunicator.cs


/// The communicator parameters sent at construction
CommunicatorInitParameters m_CommunicatorInitParameters;
Dictionary<int, SideChannel> m_SideChannels = new Dictionary<int, SideChannel>();
Dictionary<Guid, SideChannel> m_SideChannels = new Dictionary<Guid, SideChannel>();
/// <summary>
/// Initializes a new instance of the RPCCommunicator class.

/// <param name="sideChannel"> The side channel to be registered.</param>
public void RegisterSideChannel(SideChannel sideChannel)
{
var channelType = sideChannel.ChannelType();
if (m_SideChannels.ContainsKey(channelType))
var channelId = sideChannel.ChannelId;
if (m_SideChannels.ContainsKey(channelId))
"side channels of the same type.", channelType));
"side channels of the same id.", channelId));
m_SideChannels.Add(channelType, sideChannel);
m_SideChannels.Add(channelId, sideChannel);
}
/// <summary>

public void UnregisterSideChannel(SideChannel sideChannel)
{
if (m_SideChannels.ContainsKey(sideChannel.ChannelType()))
if (m_SideChannels.ContainsKey(sideChannel.ChannelId))
m_SideChannels.Remove(sideChannel.ChannelType());
m_SideChannels.Remove(sideChannel.ChannelId);
}
}

/// </summary>
/// <param name="sideChannels"> A dictionary of channel type to channel.</param>
/// <returns></returns>
public static byte[] GetSideChannelMessage(Dictionary<int, SideChannel> sideChannels)
public static byte[] GetSideChannelMessage(Dictionary<Guid, SideChannel> sideChannels)
{
using (var memStream = new MemoryStream())
{

var messageList = sideChannel.MessageQueue;
foreach (var message in messageList)
{
binaryWriter.Write(sideChannel.ChannelType());
binaryWriter.Write(sideChannel.ChannelId.ToByteArray());
binaryWriter.Write(message.Count());
binaryWriter.Write(message);
}

/// </summary>
/// <param name="sideChannels">A dictionary of channel type to channel.</param>
/// <param name="dataReceived">The byte array of data received from Python.</param>
public static void ProcessSideChannelData(Dictionary<int, SideChannel> sideChannels, byte[] dataReceived)
public static void ProcessSideChannelData(Dictionary<Guid, SideChannel> sideChannels, byte[] dataReceived)
{
if (dataReceived.Length == 0)
{

{
while (memStream.Position < memStream.Length)
{
int channelType = 0;
Guid channelId = Guid.Empty;
channelType = binaryReader.ReadInt32();
channelId = new Guid(binaryReader.ReadBytes(16));
var messageLength = binaryReader.ReadInt32();
message = binaryReader.ReadBytes(messageLength);
}

"version of MLAgents in Unity is compatible with the Python version. Original error : "
+ ex.Message);
}
if (sideChannels.ContainsKey(channelType))
if (sideChannels.ContainsKey(channelId))
sideChannels[channelType].OnMessageReceived(message);
sideChannels[channelId].OnMessageReceived(message);
"Unknown side channel data received. Channel type "
+ ": {0}", channelType));
"Unknown side channel data received. Channel Id is "
+ ": {0}", channelId));
}
}
}

2
com.unity.ml-agents/Runtime/InferenceBrain/ApplierImpl.cs


{
actionValue[j] = tensorProxy.data[agentIndex, j];
}
}
agentIndex++;
}

{
actionVal[j] = actionValues[agentIndex, j];
}
}
agentIndex++;
}

3
com.unity.ml-agents/Runtime/InferenceBrain/GeneratorImpl.cs


{
var info = infoSensorPair.agentInfo;
var pastAction = info.storedVectorActions;
if (pastAction != null){
if (pastAction != null)
{
for (var j = 0; j < actionSize; j++)
{
tensorProxy.data[agentIndex, j] = pastAction[j];

2
com.unity.ml-agents/Runtime/Policy/RemotePolicy.cs


/// </summary>
internal class RemotePolicy : IPolicy
{
int m_AgentId;
string m_FullyQualifiedBehaviorName;

{
m_Communicator?.DecideBatch();
return m_Communicator?.GetActions(m_FullyQualifiedBehaviorName, m_AgentId);
}
public void Dispose()

5
com.unity.ml-agents/Runtime/Sensor/VectorSensor.cs


void AddFloatObs(float obs)
{
#if DEBUG
if (float.IsNaN(obs))
{
throw new System.ArgumentException("NaN value passed to observation.");
}
Utilities.DebugCheckNanAndInfinity(obs, nameof(obs), nameof(AddFloatObs));
#endif
m_Observations.Add(obs);
}

6
com.unity.ml-agents/Runtime/SideChannel/EngineConfigurationChannel.cs


using System.IO;
using System;
using UnityEngine;
namespace MLAgents

public override int ChannelType()
private const string k_EngineConfigId = "e951342c-4f7e-11ea-b238-784f4387d1f7";
public EngineConfigurationChannel()
return (int)SideChannelType.EngineSettings;
ChannelId = new Guid(k_EngineConfigId);
}
public override void OnMessageReceived(byte[] data)

11
com.unity.ml-agents/Runtime/SideChannel/FloatPropertiesChannel.cs


{
Dictionary<string, float> m_FloatProperties = new Dictionary<string, float>();
Dictionary<string, Action<float>> m_RegisteredActions = new Dictionary<string, Action<float>>();
private const string k_FloatPropertiesDefaultId = "60ccf7d0-4f7e-11ea-b238-784f4387d1f7";
public override int ChannelType()
public FloatPropertiesChannel(Guid channelId = default(Guid))
return (int)SideChannelType.FloatProperties;
if (channelId == default(Guid))
{
ChannelId = new Guid(k_FloatPropertiesDefaultId);
}
else{
ChannelId = channelId;
}
}
public override void OnMessageReceived(byte[] data)

12
com.unity.ml-agents/Runtime/SideChannel/RawBytesChannel.cs


using System.Collections.Generic;
using System;
int m_ChannelId;
/// <summary>
/// RawBytesChannel provides a way to exchange raw byte arrays between Unity and Python.

public RawBytesChannel(int channelId = 0)
{
m_ChannelId = channelId;
}
public override int ChannelType()
public RawBytesChannel(Guid channelId)
return (int)SideChannelType.RawBytesChannelStart + m_ChannelId;
ChannelId = channelId;
}
public override void OnMessageReceived(byte[] data)

22
com.unity.ml-agents/Runtime/SideChannel/SideChannel.cs


using System.Collections.Generic;
using System;
public enum SideChannelType
{
// Invalid side channel
Invalid = 0,
// Reserved for the FloatPropertiesChannel.
FloatProperties = 1,
//Reserved for the EngineConfigurationChannel.
EngineSettings = 2,
// Raw bytes channels should start here to avoid conflicting with other Unity ones.
RawBytesChannelStart = 1000,
// custom side channels should start here to avoid conflicting with Unity ones.
UserSideChannelStart = 2000,
}
public List<byte[]> MessageQueue = new List<byte[]>();
internal List<byte[]> MessageQueue = new List<byte[]>();
/// <summary>
/// An int identifier for the SideChannel. Ensures that there is only ever one side channel

public abstract int ChannelType();
public Guid ChannelId{
get;
protected set;
}
/// <summary>
/// Is called by the communicator every time a message is received from Python by the SideChannel.

17
com.unity.ml-agents/Runtime/Utilities.cs


using System;
using UnityEngine;
using System.Collections.Generic;

}
return numFloatObservations;
}
#if DEBUG
internal static void DebugCheckNanAndInfinity(float value, string valueCategory, string caller)
{
if (float.IsNaN(value))
{
throw new ArgumentException($"NaN {valueCategory} passed to {caller}.");
}
if (float.IsInfinity(value))
{
throw new ArgumentException($"Inifinity {valueCategory} passed to {caller}.");
}
}
#endif
}

1
com.unity.ml-agents/Tests/Editor/EditModeTestInternalBrainTensorApplier.cs


{
class TestAgent : Agent
{
}
[Test]

65
com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs


namespace MLAgents.Tests
{
public void RequestDecision(AgentInfo info, List<ISensor> sensors) { }
public void RequestDecision(AgentInfo info, List<ISensor> sensors) {}
public void Dispose() { }
public void Dispose() {}
}
public class TestAgent : Agent

public override void AgentReset()
{
agentResetCalls += 1;
collectObservationsCallsSinceLastReset = 0;
agentActionCallsSinceLastReset = 0;

return sensorName;
}
public void Update() { }
public void Update() {}
}
[TestFixture]

agent1.LazyInitialize();
agent2.SetPolicy(new TestPolicy());
var j = 0;
for (var i = 0; i < 500; i++)
var expectedAgent1ActionSinceReset = 0;
for (var i = 0; i < 50; i++)
if (i % 21 == 0)
{
j = 0;
}
else
{
j++;
expectedAgent1ActionSinceReset += 1;
if (expectedAgent1ActionSinceReset == agent1.maxStep || i == 0){
expectedAgent1ActionSinceReset = 0;
Assert.LessOrEqual(Mathf.Abs(j * 10.1f - agent1.GetCumulativeReward()), 0.05f);
Assert.LessOrEqual(Mathf.Abs(expectedAgent1ActionSinceReset * 10.1f - agent1.GetCumulativeReward()), 0.05f);
Assert.LessOrEqual(Mathf.Abs(i * 0.1f - agent2.GetCumulativeReward()), 0.05f);
agent1.AddReward(10f);

decisionRequester.DecisionPeriod = 1;
decisionRequester.Awake();
var maxStep = 6;
const int maxStep = 6;
var expectedAgentStepCount = 0;
var expectedResets= 0;
var expectedAgentAction = 0;
var expectedAgentActionSinceReset = 0;
var expectedCollectObsCalls = 0;
var expectedCollectObsCallsSinceReset = 0;
// We expect resets to occur when there are maxSteps actions since the last reset (and on the first step)
var expectReset = agent1.agentActionCallsSinceLastReset == maxStep || (i == 0);
var previousNumResets = agent1.agentResetCalls;
aca.EnvironmentStep();
// Agent should observe and act on each Academy step
expectedAgentAction += 1;
expectedAgentActionSinceReset += 1;
expectedCollectObsCalls += 1;
expectedCollectObsCallsSinceReset += 1;
expectedAgentStepCount += 1;
if (expectReset)
// If the next step will put the agent at maxSteps, we expect it to reset
if (agent1.GetStepCount() == maxStep - 1 || (i == 0))
Assert.AreEqual(previousNumResets + 1, agent1.agentResetCalls);
expectedResets +=1;
else
if (agent1.GetStepCount() == maxStep - 1)
Assert.AreEqual(previousNumResets, agent1.agentResetCalls);
expectedAgentActionSinceReset = 0;
expectedCollectObsCallsSinceReset = 0;
expectedAgentStepCount = 0;
aca.EnvironmentStep();
Assert.AreEqual(expectedAgentStepCount, agent1.GetStepCount());
Assert.AreEqual(expectedResets, agent1.agentResetCalls);
Assert.AreEqual(expectedAgentAction, agent1.agentActionCalls);
Assert.AreEqual(expectedAgentActionSinceReset, agent1.agentActionCallsSinceLastReset);
Assert.AreEqual(expectedCollectObsCalls, agent1.collectObservationsCalls);
Assert.AreEqual(expectedCollectObsCallsSinceReset, agent1.collectObservationsCallsSinceLastReset);
}
}
}

2
com.unity.ml-agents/Tests/Editor/Sensor/RayPerceptionSensorTests.cs


SetupScene();
var obj = new GameObject("agent");
var perception = obj.AddComponent<RayPerceptionSensorComponent3D>();
obj.transform.localScale = new Vector3(2, 2,2 );
obj.transform.localScale = new Vector3(2, 2, 2);
perception.raysPerDirection = 0;
perception.maxRayDegrees = 45;

20
com.unity.ml-agents/Tests/Editor/SideChannelTests.cs


{
public List<int> messagesReceived = new List<int>();
public override int ChannelType() { return -1; }
public TestSideChannel() {
ChannelId = new Guid("6afa2c06-4f82-11ea-b238-784f4387d1f7");
}
public override void OnMessageReceived(byte[] data)
{

{
var intSender = new TestSideChannel();
var intReceiver = new TestSideChannel();
var dictSender = new Dictionary<int, SideChannel> { { intSender.ChannelType(), intSender } };
var dictReceiver = new Dictionary<int, SideChannel> { { intReceiver.ChannelType(), intReceiver } };
var dictSender = new Dictionary<Guid, SideChannel> { { intSender.ChannelId, intSender } };
var dictReceiver = new Dictionary<Guid, SideChannel> { { intReceiver.ChannelId, intReceiver } };
intSender.SendInt(4);
intSender.SendInt(5);

var str1 = "Test string";
var str2 = "Test string, second";
var strSender = new RawBytesChannel();
var strReceiver = new RawBytesChannel();
var dictSender = new Dictionary<int, SideChannel> { { strSender.ChannelType(), strSender } };
var dictReceiver = new Dictionary<int, SideChannel> { { strReceiver.ChannelType(), strReceiver } };
var strSender = new RawBytesChannel(new Guid("9a5b8954-4f82-11ea-b238-784f4387d1f7"));
var strReceiver = new RawBytesChannel(new Guid("9a5b8954-4f82-11ea-b238-784f4387d1f7"));
var dictSender = new Dictionary<Guid, SideChannel> { { strSender.ChannelId, strSender } };
var dictReceiver = new Dictionary<Guid, SideChannel> { { strReceiver.ChannelId, strReceiver } };
strSender.SendRawBytes(Encoding.ASCII.GetBytes(str1));
strSender.SendRawBytes(Encoding.ASCII.GetBytes(str2));

var propA = new FloatPropertiesChannel();
var propB = new FloatPropertiesChannel();
var dictReceiver = new Dictionary<int, SideChannel> { { propA.ChannelType(), propA } };
var dictSender = new Dictionary<int, SideChannel> { { propB.ChannelType(), propB } };
var dictReceiver = new Dictionary<Guid, SideChannel> { { propA.ChannelId, propA } };
var dictSender = new Dictionary<Guid, SideChannel> { { propB.ChannelId, propB } };
propA.RegisterCallback(k1, f => { wasCalled++; });
var tmp = propB.GetPropertyWithDefault(k2, 3.0f);

2
com.unity.ml-agents/Tests/Runtime/SerializationTest.cs


// using System.Collections;
// using System.Collections;
// using NUnit.Framework;
// #if UNITY_EDITOR
// using UnityEditor.SceneManagement;

2
com.unity.ml-agents/Tests/Runtime/SerializeAgent.cs


using System.Collections;
using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using MLAgents;

3
docs/Migrating.md


### Steps to Migrate
* Replace your Agent's implementation of `CollectObservations()` with `CollectObservations(VectorSensor sensor)`. In addition, replace all calls to `AddVectorObs()` with `sensor.AddObservation()` or `sensor.AddOneHotObservation()` on the `VectorSensor` passed as argument.
* Replace your calls to `SetActionMask` on your Agent to `ActionMasker.SetActionMask` in `CollectObservations`
* Re-import all of your `*.NN` files to work with the updated Barracuda package.
## Migrating from 0.13 to 0.14

* Move the AcademyStep code to MonoBehaviour.FixedUpdate
* Move the OnDestroy code to MonoBehaviour.OnDestroy.
* Move the AcademyReset code to a new method and add it to the Academy.OnEnvironmentReset action.
* Multiply `max_steps` and `summary_steps` in your `trainer_config.yaml` by the number of Agents in the scene.
* Multiply `max_steps` and `summary_freq` in your `trainer_config.yaml` by the number of Agents in the scene.
* Combine curriculum configs into a single file. See [the WallJump curricula](../config/curricula/wall_jump.yaml) for an example of the new curriculum config format.
A tool like https://www.json2yaml.com may be useful to help with the conversion.
* If you have a model trained which uses RayPerceptionSensor and has non-1.0 scale in the Agent's transform, it must be retrained.

3
docs/Profiling-Python.md


## Output
By default, at the end of training, timers are collected and written in json format to
`{summaries_dir}/{run_id}_timers.json`. The output consists of node objects with the following keys:
* name (string): The name of the block of code.
* children (list): A list of child nodes.
* children (dictionary): A dictionary of child nodes, keyed by the node name.
* is_parallel (bool): Indicates that the block of code was executed in multiple threads or processes (see below). This
is optional and defaults to false.

35
docs/Python-API.md


You can create your own `SideChannel` in C# and Python and use it to communicate data between the two.
##### Unity side
The side channel will have to implement the `SideChannel` abstract class. There are two methods
that must be implemented :
The side channel will have to implement the `SideChannel` abstract class and the following method.
* `ChannelType()` : Must return an integer identifying the side channel (This number must be the same on C#
and Python). There can only be one side channel of a certain type during communication.
The side channel must also assign a `ChannelId` property in the constructor. The `ChannelId` is a Guid
(or UUID in Python) used to uniquely identify a side channel. This Guid must be the same on C# and Python.
There can only be one side channel of a certain id during communication.
To send a byte array from C# to Python, call the `base.QueueMessageToSend(data)` method inside the side channel.
The `data` argument must be a `byte[]`.

##### Python side
The side channel will have to implement the `SideChannel` abstract class. You must implement :
* `channel_type(self) -> int` (property) : Must return an integer identifying the side channel (This number must
be the same on C# and Python). There can only be one side channel of a certain type during communication.
The side channel must also assign a `channel_id` property in the constructor. The `channel_id` is a UUID
(referred in C# as Guid) used to uniquely identify a side channel. This number must be the same on C# and
Python. There can only be one side channel of a certain id during communication.
To assign the `channel_id` call the abstract class constructor with the appropriate `channel_id` as follows:
```python
super().__init__(my_channel_id)
```
To send a byte array from Python to C#, call the `super().queue_message_to_send(bytes_data)` method inside the
side channel. The `bytes_data` argument must be a `bytes` object.

using UnityEngine;
using MLAgents;
using System.Text;
using System;
public override int ChannelType()
public StringLogSideChannel()
return (int)SideChannelType.UserSideChannelStart + 1;
ChannelId = new Guid("621f0a70-4f87-11ea-a6bf-784f4387d1f7");
}
public override void OnMessageReceived(byte[] data)

```python
from mlagents_envs.environment import UnityEnvironment
from mlagents_envs.side_channel.side_channel import SideChannel, SideChannelType
from mlagents_envs.side_channel.side_channel import SideChannel
@property
def channel_type(self) -> int:
return SideChannelType.UserSideChannelStart + 1
def __init__(self) -> None:
super().__init__(uuid.UUID("621f0a70-4f87-11ea-a6bf-784f4387d1f7"))
def on_message_received(self, data: bytes) -> None:
"""

env.step() # Move the simulation forward
env.close()
```
Now, if you run this script and press `Play` the Unity Editor when prompted, The console in the Unity Editor will

11
docs/Unity-Inference-Engine.md


* iOS
* Android
## Supported formats
There are currently two supported model formats:
* Barracuda (`.nn`) files use a proprietary format produced by the [`tensorflow_to_barracuda.py`]() script.
* ONNX (`.onnx`) files use an [industry-standard open format](https://onnx.ai/about.html) produced by the [tf2onnx package](https://github.com/onnx/tensorflow-onnx).
Export to ONNX is currently considered beta. To enable it, make sure `tf2onnx>=1.5.5` is installed in pip.
tf2onnx does not currently support tensorflow 2.0.0 or later.
When using a model, drag the `.nn` file into the **Model** field
in the Inspector of the Agent.
When using a model, drag the model file into the **Model** field in the Inspector of the Agent.
Select the **Inference Device** : CPU or GPU you want to use for Inference.
**Note:** For most of the models generated with the ML-Agents toolkit, CPU will be faster than GPU.

36
ml-agents-envs/mlagents_envs/environment.py


import atexit
import glob
import uuid
import logging
import numpy as np
import os

self.timeout_wait: int = timeout_wait
self.communicator = self.get_communicator(worker_id, base_port, timeout_wait)
self.worker_id = worker_id
self.side_channels: Dict[int, SideChannel] = {}
self.side_channels: Dict[uuid.UUID, SideChannel] = {}
if _sc.channel_type in self.side_channels:
if _sc.channel_id in self.side_channels:
"There cannot be two side channels with the same channel type {0}.".format(
_sc.channel_type
"There cannot be two side channels with the same channel id {0}.".format(
_sc.channel_id
self.side_channels[_sc.channel_type] = _sc
self.side_channels[_sc.channel_id] = _sc
# If the environment name is None, a new environment will not be launched
# and the communicator will directly try to connect to an existing unity environment.

@staticmethod
def _parse_side_channel_message(
side_channels: Dict[int, SideChannel], data: bytes
side_channels: Dict[uuid.UUID, SideChannel], data: bytes
channel_type, message_len = struct.unpack_from("<ii", data, offset)
offset = offset + 8
channel_id = uuid.UUID(bytes_le=bytes(data[offset : offset + 16]))
offset += 16
message_len, = struct.unpack_from("<i", data, offset)
offset = offset + 4
message_data = data[offset : offset + message_len]
offset = offset + message_len
except Exception:

raise UnityEnvironmentException(
"The message received by the side channel {0} was "
"unexpectedly short. Make sure your Unity Environment "
"sending side channel data properly.".format(channel_type)
"sending side channel data properly.".format(channel_id)
if channel_type in side_channels:
side_channels[channel_type].on_message_received(message_data)
if channel_id in side_channels:
side_channels[channel_id].on_message_received(message_data)
": {0}.".format(channel_type)
": {0}.".format(channel_id)
def _generate_side_channel_data(side_channels: Dict[int, SideChannel]) -> bytearray:
def _generate_side_channel_data(
side_channels: Dict[uuid.UUID, SideChannel]
) -> bytearray:
for channel_type, channel in side_channels.items():
for channel_id, channel in side_channels.items():
result += struct.pack("<ii", channel_type, len(message))
result += channel_id.bytes_le
result += struct.pack("<i", len(message))
result += message
channel.message_queue = []
return result

8
ml-agents-envs/mlagents_envs/exception.py


pass
class UnitySideChannelException(UnityException):
"""
Related to errors with side channels.
"""
pass
class UnityWorkerInUseException(UnityException):
"""
This error occurs when the port for a certain worker ID is already reserved.

46
ml-agents-envs/mlagents_envs/rpc_utils.py


return np.array(batched_visual, dtype=np.float32)
def _raise_on_nan_and_inf(data: np.array, source: str) -> np.array:
# Check for NaNs or Infinite values in the observation or reward data.
# If there's a NaN in the observations, the np.mean() result will be NaN
# If there's an Infinite value (either sign) then the result will be Inf
# See https://stackoverflow.com/questions/6736590/fast-check-for-nan-in-numpy for background
# Note that a very large values (larger than sqrt(float_max)) will result in an Inf value here
# Raise a Runtime error in the case that NaNs or Infinite values make it into the data.
if data.size == 0:
return data
d = np.mean(data)
has_nan = np.isnan(d)
has_inf = not np.isfinite(d)
if has_nan:
raise RuntimeError(f"The {source} provided had NaN values.")
if has_inf:
raise RuntimeError(f"The {source} provided had Infinite values.")
@timed
def _process_vector_observation(
obs_index: int,

],
dtype=np.float32,
)
# Check for NaNs or infs in the observations
# If there's a NaN in the observations, the np.mean() result will be NaN
# If there's an Inf (either sign) then the result will be Inf
# See https://stackoverflow.com/questions/6736590/fast-check-for-nan-in-numpy for background
# Note that a very large values (larger than sqrt(float_max)) will result in an Inf value here
# This is OK though, worst case it results in an unnecessary (but harmless) nan_to_num call.
d = np.mean(np_obs)
has_nan = np.isnan(d)
has_inf = not np.isfinite(d)
# In we have any NaN or Infs, use np.nan_to_num to replace these with finite values
if has_nan or has_inf:
np_obs = np.nan_to_num(np_obs)
if has_nan:
logger.warning(f"An agent had a NaN observation in the environment")
_raise_on_nan_and_inf(np_obs, "observations")
return np_obs

[agent_info.reward for agent_info in agent_info_list], dtype=np.float32
)
d = np.dot(rewards, rewards)
has_nan = np.isnan(d)
has_inf = not np.isfinite(d)
# In we have any NaN or Infs, use np.nan_to_num to replace these with finite values
if has_nan or has_inf:
rewards = np.nan_to_num(rewards)
if has_nan:
logger.warning(f"An agent had a NaN reward in the environment")
_raise_on_nan_and_inf(rewards, "rewards")
done = np.array([agent_info.done for agent_info in agent_info_list], dtype=np.bool)
max_step = np.array(

8
ml-agents-envs/mlagents_envs/side_channel/engine_configuration_channel.py


from mlagents_envs.side_channel.side_channel import SideChannel, SideChannelType
from mlagents_envs.side_channel.side_channel import SideChannel
import uuid
from typing import NamedTuple

- int targetFrameRate;
"""
@property
def channel_type(self) -> int:
return SideChannelType.EngineSettings
def __init__(self) -> None:
super().__init__(uuid.UUID("e951342c-4f7e-11ea-b238-784f4387d1f7"))
def on_message_received(self, data: bytes) -> None:
"""

13
ml-agents-envs/mlagents_envs/side_channel/float_properties_channel.py


from mlagents_envs.side_channel.side_channel import SideChannel, SideChannelType
from mlagents_envs.side_channel.side_channel import SideChannel
import uuid
from typing import Dict, Tuple, Optional, List

set_property, get_property and list_properties.
"""
def __init__(self) -> None:
def __init__(self, channel_id: uuid.UUID = None) -> None:
super().__init__()
@property
def channel_type(self) -> int:
return SideChannelType.FloatProperties
if channel_id is None:
channel_id = uuid.UUID(("60ccf7d0-4f7e-11ea-b238-784f4387d1f7"))
super().__init__(channel_id)
def on_message_received(self, data: bytes) -> None:
"""

16
ml-agents-envs/mlagents_envs/side_channel/raw_bytes_channel.py


from mlagents_envs.side_channel.side_channel import SideChannel, SideChannelType
from mlagents_envs.side_channel.side_channel import SideChannel
import uuid
class RawBytesChannel(SideChannel):

"""
def __init__(self, channel_id=0):
self._received_messages = []
self._channel_id = channel_id
super().__init__()
@property
def channel_type(self) -> int:
return SideChannelType.RawBytesChannelStart + self._channel_id
def __init__(self, channel_id: uuid.UUID):
self._received_messages: List[bytes] = []
super().__init__(channel_id)
def on_message_received(self, data: bytes) -> None:
"""

"""
self._received_messages.append(data)
def get_and_clear_received_messages(self) -> List[bytearray]:
def get_and_clear_received_messages(self) -> List[bytes]:
"""
returns a list of bytearray received from the environment.
"""

24
ml-agents-envs/mlagents_envs/side_channel/side_channel.py


from abc import ABC, abstractmethod
from enum import IntEnum
class SideChannelType(IntEnum):
FloatProperties = 1
EngineSettings = 2
# Raw bytes channels should start here to avoid conflicting with other
# Unity ones.
RawBytesChannelStart = 1000
# custom side channels should start here to avoid conflicting with Unity
# ones.
UserSideChannelStart = 2000
from typing import List
import uuid
class SideChannel(ABC):

to the Env object at construction.
"""
def __init__(self):
self.message_queue = []
def __init__(self, channel_id):
self._channel_id: uuid.UUID = channel_id
self.message_queue: List[bytearray] = []
def queue_message_to_send(self, data: bytearray) -> None:
"""

pass
@property
@abstractmethod
def channel_type(self) -> int:
def channel_id(self) -> uuid.UUID:
pass
return self._channel_id

30
ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py


def generate_list_agent_proto(
n_agent: int, shape: List[Tuple[int]]
n_agent: int,
shape: List[Tuple[int]],
infinite_rewards: bool = False,
nan_observations: bool = False,
ap.reward = agent_index
ap.reward = float("inf") if infinite_rewards else agent_index
ap.done = agent_index % 2 == 0
ap.max_step_reached = agent_index % 2 == 1
ap.id = agent_index

obs_proto = ObservationProto()
obs_proto.shape.extend(list(shape[obs_index]))
obs_proto.compression_type = NONE
obs_proto.float_data.data.extend([0.1] * np.prod(shape[obs_index]))
obs_proto.float_data.data.extend(
([float("nan")] if nan_observations else [0.1])
* np.prod(shape[obs_index])
)
obs_proto_list.append(obs_proto)
ap.observations.extend(obs_proto_list)
result.append(ap)

assert not group_spec.is_action_discrete()
assert group_spec.is_action_continuous()
assert group_spec.action_size == 6
def test_batched_step_result_from_proto_raises_on_infinite():
n_agents = 10
shapes = [(3,), (4,)]
group_spec = AgentGroupSpec(shapes, ActionType.CONTINUOUS, 3)
ap_list = generate_list_agent_proto(n_agents, shapes, infinite_rewards=True)
with pytest.raises(RuntimeError):
batched_step_result_from_proto(ap_list, group_spec)
def test_batched_step_result_from_proto_raises_on_nan():
n_agents = 10
shapes = [(3,), (4,)]
group_spec = AgentGroupSpec(shapes, ActionType.CONTINUOUS, 3)
ap_list = generate_list_agent_proto(n_agents, shapes, nan_observations=True)
with pytest.raises(RuntimeError):
batched_step_result_from_proto(ap_list, group_spec)

36
ml-agents-envs/mlagents_envs/tests/test_side_channel.py


import struct
import uuid
from mlagents_envs.side_channel.side_channel import SideChannel
from mlagents_envs.side_channel.float_properties_channel import FloatPropertiesChannel
from mlagents_envs.side_channel.raw_bytes_channel import RawBytesChannel

class IntChannel(SideChannel):
def __init__(self):
self.list_int = []
super().__init__()
@property
def channel_type(self):
return -1
super().__init__(uuid.UUID("a85ba5c0-4f87-11ea-a517-784f4387d1f7"))
def on_message_received(self, data):
val = struct.unpack_from("<i", data, 0)[0]

receiver = IntChannel()
sender.send_int(5)
sender.send_int(6)
data = UnityEnvironment._generate_side_channel_data({sender.channel_type: sender})
UnityEnvironment._parse_side_channel_message(
{receiver.channel_type: receiver}, data
)
data = UnityEnvironment._generate_side_channel_data({sender.channel_id: sender})
UnityEnvironment._parse_side_channel_message({receiver.channel_id: receiver}, data)
assert receiver.list_int[0] == 5
assert receiver.list_int[1] == 6

sender.set_property("prop1", 1.0)
data = UnityEnvironment._generate_side_channel_data({sender.channel_type: sender})
UnityEnvironment._parse_side_channel_message(
{receiver.channel_type: receiver}, data
)
data = UnityEnvironment._generate_side_channel_data({sender.channel_id: sender})
UnityEnvironment._parse_side_channel_message({receiver.channel_id: receiver}, data)
val = receiver.get_property("prop1")
assert val == 1.0

data = UnityEnvironment._generate_side_channel_data({sender.channel_type: sender})
UnityEnvironment._parse_side_channel_message(
{receiver.channel_type: receiver}, data
)
data = UnityEnvironment._generate_side_channel_data({sender.channel_id: sender})
UnityEnvironment._parse_side_channel_message({receiver.channel_id: receiver}, data)
val = receiver.get_property("prop1")
assert val == 1.0

def test_raw_bytes():
sender = RawBytesChannel()
receiver = RawBytesChannel()
guid = uuid.uuid4()
sender = RawBytesChannel(guid)
receiver = RawBytesChannel(guid)
data = UnityEnvironment._generate_side_channel_data({sender.channel_type: sender})
UnityEnvironment._parse_side_channel_message(
{receiver.channel_type: receiver}, data
)
data = UnityEnvironment._generate_side_channel_data({sender.channel_id: sender})
UnityEnvironment._parse_side_channel_message({receiver.channel_id: receiver}, data)
messages = receiver.get_and_clear_received_messages()
assert len(messages) == 2

39
ml-agents-envs/mlagents_envs/tests/test_timers.py


"total": mock.ANY,
"count": 1,
"self": mock.ANY,
"children": [
{
"name": "top_level",
"children": {
"top_level": {
"children": [
{
"name": "multiple",
"children": {
"multiple": {
"children": [
{
"name": "decorated_func",
"children": {
"decorated_func": {
],
},
{
"name": "raises",
"total": mock.ANY,
"count": 1,
"self": mock.ANY,
},
{
"name": "post_raise",
"total": mock.ANY,
"count": 1,
"self": mock.ANY,
},
],
"raises": {"total": mock.ANY, "count": 1, "self": mock.ANY},
"post_raise": {"total": mock.ANY, "count": 1, "self": mock.ANY},
},
],
"gauges": [
{"name": "my_gauge", "value": 4.0, "max": 4.0, "min": 0.0, "count": 3}
],
},
"gauges": {"my_gauge": {"value": 4.0, "max": 4.0, "min": 0.0, "count": 3}},
}
assert timer_tree == expected_tree

22
ml-agents-envs/mlagents_envs/timers.py


from time import perf_counter
from contextlib import contextmanager
from typing import Any, Callable, Dict, Generator, List, TypeVar
from typing import Any, Callable, Dict, Generator, TypeVar
class TimerNode:

res["is_parallel"] = True
child_total = 0.0
child_list = []
child_dict = {}
child_res: Dict[str, Any] = {
"name": child_name,
**self.get_timing_tree(child_node),
}
child_list.append(child_res)
child_res: Dict[str, Any] = self.get_timing_tree(child_node)
child_dict[child_name] = child_res
if child_list:
res["children"] = child_list
if child_dict:
res["children"] = child_dict
return res

else:
self.gauges[name] = GaugeNode(value)
def _get_gauges(self) -> List[Dict[str, Any]]:
gauges = []
def _get_gauges(self) -> Dict[str, Dict[str, float]]:
gauges = {}
gauge_dict: Dict[str, Any] = {"name": gauge_name, **gauge_node.as_dict()}
gauges.append(gauge_dict)
gauges[gauge_name] = gauge_node.as_dict()
return gauges

2
ml-agents-envs/setup.py


install_requires=[
"cloudpickle",
"grpcio>=1.11.0",
"numpy>=1.13.3,<2.0",
"numpy>=1.14.1,<2.0",
"Pillow>=4.2.1",
"protobuf>=3.6",