浏览代码

Merge remote-tracking branch 'origin/develop' into try-tf2-support

/develop-gpu-test
Chris Elion 5 年前
当前提交
a1967c19
共有 116 个文件被更改,包括 1605 次插入3589 次删除
  1. 45
      UnitySDK/Assets/ML-Agents/Editor/BroadcastHubDrawer.cs
  2. 2
      UnitySDK/Assets/ML-Agents/Editor/DemonstrationImporter.cs
  3. 15
      UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorApplier.cs
  4. 26
      UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorGenerator.cs
  5. 33
      UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs
  6. 6
      UnitySDK/Assets/ML-Agents/Examples/3DBall/Scenes/3DBall.unity
  7. 6
      UnitySDK/Assets/ML-Agents/Examples/3DBall/Scenes/3DBallHard.unity
  8. 4
      UnitySDK/Assets/ML-Agents/Examples/Basic/Scenes/Basic.unity
  9. 4
      UnitySDK/Assets/ML-Agents/Examples/Bouncer/Scenes/Bouncer.unity
  10. 4
      UnitySDK/Assets/ML-Agents/Examples/Crawler/Scenes/CrawlerDynamicTarget.unity
  11. 4
      UnitySDK/Assets/ML-Agents/Examples/Crawler/Scenes/CrawlerStaticTarget.unity
  12. 2
      UnitySDK/Assets/ML-Agents/Examples/Crawler/Scripts/CrawlerAgent.cs
  13. 6
      UnitySDK/Assets/ML-Agents/Examples/FoodCollector/Scenes/FoodCollector.unity
  14. 4
      UnitySDK/Assets/ML-Agents/Examples/FoodCollector/Scenes/VisualFoodCollector.unity
  15. 6
      UnitySDK/Assets/ML-Agents/Examples/GridWorld/Scenes/GridWorld.unity
  16. 4
      UnitySDK/Assets/ML-Agents/Examples/Hallway/Scenes/Hallway.unity
  17. 3
      UnitySDK/Assets/ML-Agents/Examples/Hallway/Scenes/VisualHallway.unity
  18. 7
      UnitySDK/Assets/ML-Agents/Examples/PushBlock/Brains/PushBlockLearning.asset
  19. 4
      UnitySDK/Assets/ML-Agents/Examples/PushBlock/Scenes/PushBlock.unity
  20. 3
      UnitySDK/Assets/ML-Agents/Examples/PushBlock/Scenes/VisualPushBlock.unity
  21. 2
      UnitySDK/Assets/ML-Agents/Examples/Pyramids/Prefabs/VisualAreaPyramids.prefab
  22. 4
      UnitySDK/Assets/ML-Agents/Examples/Pyramids/Scenes/Pyramids.unity
  23. 5
      UnitySDK/Assets/ML-Agents/Examples/Pyramids/Scenes/VisualPyramids.unity
  24. 4
      UnitySDK/Assets/ML-Agents/Examples/Reacher/Scenes/Reacher.unity
  25. 6
      UnitySDK/Assets/ML-Agents/Examples/Soccer/Scenes/SoccerTwos.unity
  26. 4
      UnitySDK/Assets/ML-Agents/Examples/Tennis/Scenes/Tennis.unity
  27. 4
      UnitySDK/Assets/ML-Agents/Examples/Walker/Scenes/Walker.unity
  28. 8
      UnitySDK/Assets/ML-Agents/Examples/WallJump/Scenes/WallJump.unity
  29. 242
      UnitySDK/Assets/ML-Agents/Scripts/Academy.cs
  30. 53
      UnitySDK/Assets/ML-Agents/Scripts/Agent.cs
  31. 49
      UnitySDK/Assets/ML-Agents/Scripts/Brain.cs
  32. 41
      UnitySDK/Assets/ML-Agents/Scripts/BrainParameters.cs
  33. 36
      UnitySDK/Assets/ML-Agents/Scripts/BroadcastHub.cs
  34. 1
      UnitySDK/Assets/ML-Agents/Scripts/Demonstration.cs
  35. 113
      UnitySDK/Assets/ML-Agents/Scripts/Grpc/GrpcExtensions.cs
  36. 305
      UnitySDK/Assets/ML-Agents/Scripts/Grpc/RpcCommunicator.cs
  37. 27
      UnitySDK/Assets/ML-Agents/Scripts/HeuristicBrain.cs
  38. 123
      UnitySDK/Assets/ML-Agents/Scripts/ICommunicator.cs
  39. 23
      UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ApplierImpl.cs
  40. 49
      UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs
  41. 14
      UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorApplier.cs
  42. 10
      UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorGenerator.cs
  43. 30
      UnitySDK/Assets/ML-Agents/Scripts/LearningBrain.cs
  44. 5
      UnitySDK/Assets/ML-Agents/Scripts/PlayerBrain.cs
  45. 25
      UnitySDK/Assets/ML-Agents/Scripts/ResetParameters.cs
  46. 1
      UnitySDK/UnitySDK.sln.DotSettings
  47. 9
      docs/Basic-Guide.md
  48. 6
      docs/FAQ.md
  49. 5
      docs/Getting-Started-with-Balance-Ball.md
  50. 17
      docs/Installation.md
  51. 4
      docs/Learning-Environment-Create-New.md
  52. 3
      docs/Learning-Environment-Design-Academy.md
  53. 3
      docs/Learning-Environment-Design-Agents.md
  54. 43
      docs/Learning-Environment-Design-Brains.md
  55. 12
      docs/Learning-Environment-Design-Learning-Brains.md
  56. 4
      docs/Learning-Environment-Design-Player-Brains.md
  57. 10
      docs/Learning-Environment-Design.md
  58. 7
      docs/Learning-Environment-Examples.md
  59. 8
      docs/Learning-Environment-Executable.md
  60. 14
      docs/ML-Agents-Overview.md
  61. 4
      docs/Migrating.md
  62. 30
      docs/Python-API.md
  63. 25
      docs/Readme.md
  64. 63
      docs/Training-Behavioral-Cloning.md
  65. 5
      docs/Training-Imitation-Learning.md
  66. 110
      docs/Training-ML-Agents.md
  67. 35
      docs/Training-on-Amazon-Web-Service.md
  68. 17
      docs/Training-on-Microsoft-Azure.md
  69. 2
      docs/Unity-Inference-Engine.md
  70. 6
      docs/Using-Tensorboard.md
  71. 32
      ml-agents-envs/mlagents/envs/environment.py
  72. 2
      ml-agents-envs/mlagents/envs/tests/test_envs.py
  73. 2
      ml-agents/mlagents/trainers/bc/models.py
  74. 9
      ml-agents/mlagents/trainers/bc/trainer.py
  75. 11
      ml-agents/mlagents/trainers/ppo/trainer.py
  76. 13
      ml-agents/mlagents/trainers/tests/mock_brain.py
  77. 29
      ml-agents/mlagents/trainers/tests/test_barracuda_converter.py
  78. 22
      ml-agents/mlagents/trainers/tests/test_bc.py
  79. 2
      ml-agents/mlagents/trainers/tests/test_bcmodule.py
  80. 85
      ml-agents/mlagents/trainers/tests/test_ppo.py
  81. 6
      ml-agents/mlagents/trainers/tests/test_reward_signals.py
  82. 10
      ml-agents/mlagents/trainers/tests/test_sac.py
  83. 76
      ml-agents/mlagents/trainers/tests/test_trainer_util.py
  84. 10
      ml-agents/mlagents/trainers/trainer_util.py
  85. 4
      notebooks/getting-started.ipynb
  86. 36
      UnitySDK/Assets/ML-Agents/Editor/Tests/TimerTest.cs
  87. 3
      UnitySDK/Assets/ML-Agents/Editor/Tests/TimerTest.cs.meta
  88. 343
      UnitySDK/Assets/ML-Agents/Scripts/Timer.cs
  89. 11
      UnitySDK/Assets/ML-Agents/Scripts/Timer.cs.meta
  90. 53
      docs/Using-Virtual-Environment.md
  91. 14
      UnitySDK/Assets/ML-Agents/Editor/Builder.cs
  92. 3
      UnitySDK/Assets/ML-Agents/Editor/Builder.cs.meta
  93. 44
      UnitySDK/Assets/ML-Agents/Editor/BuilderUtils.cs
  94. 3
      UnitySDK/Assets/ML-Agents/Editor/BuilderUtils.cs.meta
  95. 7
      UnitySDK/Assets/ML-Agents/Examples/Bouncer/Scenes/BouncerIL.unity.meta
  96. 1001
      UnitySDK/Assets/ML-Agents/Examples/Bouncer/Scenes/BouncerIL.unity
  97. 880
      UnitySDK/Assets/ML-Agents/Examples/FoodCollector/Scenes/FoodCollectorIL.unity
  98. 9
      UnitySDK/Assets/ML-Agents/Examples/FoodCollector/Scenes/FoodCollectorIL.unity.meta
  99. 7
      UnitySDK/Assets/ML-Agents/Examples/Hallway/Scenes/HallwayIL.unity.meta
  100. 653
      UnitySDK/Assets/ML-Agents/Examples/Hallway/Scenes/HallwayIL.unity

45
UnitySDK/Assets/ML-Agents/Editor/BroadcastHubDrawer.cs


private const float k_LineHeight = 17f;
// The vertical space left below the BroadcastHub UI.
private const float k_ExtraSpaceBelow = 10f;
// The horizontal size of the Control checkbox
private const int k_ControlSize = 80;
/// <summary>
/// Computes the height of the Drawer depending on the property it is showing

position.y += k_LineHeight;
// This is the labels for each columns
var brainWidth = position.width - k_ControlSize;
var brainWidth = position.width;
var controlRect = new Rect(
position.x + brainWidth, position.y, k_ControlSize, position.height);
EditorGUI.LabelField(controlRect, "Control");
controlRect.y += k_LineHeight;
controlRect.x += 15;
DrawBrains(brainRect, controlRect);
DrawBrains(brainRect);
EditorGUI.indentLevel--;
EditorGUI.EndProperty();
}

}
/// <summary>
/// Draws the Brain and Control checkbox for the brains contained in the BroadCastHub.
/// Draws the Brain contained in the BroadcastHub.
/// <param name="controlRect">The Rect to draw the control checkbox.</param>
private void DrawBrains(Rect brainRect, Rect controlRect)
private void DrawBrains(Rect brainRect)
var exposedBrains = m_Hub.broadcastingBrains;
var brain = exposedBrains[index];
var controlledBrains = m_Hub.brainsToControl;
var brain = controlledBrains[index];
brainRect, brain, typeof(Brain), true) as Brain;
brainRect, brain, typeof(LearningBrain), true) as LearningBrain;
m_Hub.broadcastingBrains.RemoveAt(index);
var brainToInsert = exposedBrains.Contains(newBrain) ? null : newBrain;
exposedBrains.Insert(index, brainToInsert);
m_Hub.brainsToControl.RemoveAt(index);
var brainToInsert = controlledBrains.Contains(newBrain) ? null : newBrain;
controlledBrains.Insert(index, brainToInsert);
}
// This is the Rectangle for the control checkbox
EditorGUI.BeginChangeCheck();
if (brain is LearningBrain)
{
var isTraining = m_Hub.IsControlled(brain);
isTraining = EditorGUI.Toggle(controlRect, isTraining);
m_Hub.SetControlled(brain, isTraining);
}
controlRect.y += k_LineHeight;
if (EditorGUI.EndChangeCheck())
{
MarkSceneAsDirty();
}
}
}

{
if (m_Hub.Count > 0)
{
m_Hub.broadcastingBrains.RemoveAt(m_Hub.broadcastingBrains.Count - 1);
m_Hub.brainsToControl.RemoveAt(m_Hub.brainsToControl.Count - 1);
}
}

private void AddBrain()
{
m_Hub.broadcastingBrains.Add(null);
m_Hub.brainsToControl.Add(null);
}
}
}

2
UnitySDK/Assets/ML-Agents/Editor/DemonstrationImporter.cs


reader.Seek(DemonstrationStore.MetaDataBytes + 1, 0);
var brainParamsProto = BrainParametersProto.Parser.ParseDelimitedFrom(reader);
var brainParameters = new BrainParameters(brainParamsProto);
var brainParameters = brainParamsProto.ToBrainParameters();
reader.Close();

15
UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorApplier.cs


using System.Collections.Generic;
using System.Linq;
using NUnit.Framework;
using UnityEngine;
using System.Reflection;

}
}
private Dictionary<Agent, AgentInfo> GetFakeAgentInfos()
private List<Agent> GetFakeAgentInfos()
var infoA = new AgentInfo();
var infoB = new AgentInfo();
return new Dictionary<Agent, AgentInfo>(){{agentA, infoA}, {agentB, infoB}};
return new List<Agent> {agentA, agentB};
}
[Test]

var applier = new ContinuousActionOutputApplier();
applier.Apply(inputTensor, agentInfos);
var agents = agentInfos.Keys.ToList();
var agents = agentInfos;
var agent = agents[0] as TestAgent;
Assert.NotNull(agent);

var alloc = new TensorCachingAllocator();
var applier = new DiscreteActionOutputApplier(new[] {2, 3}, 0, alloc);
applier.Apply(inputTensor, agentInfos);
var agents = agentInfos.Keys.ToList();
var agents = agentInfos;
var agent = agents[0] as TestAgent;
Assert.NotNull(agent);

var applier = new MemoryOutputApplier();
applier.Apply(inputTensor, agentInfos);
var agents = agentInfos.Keys.ToList();
var agents = agentInfos;
var agent = agents[0] as TestAgent;
Assert.NotNull(agent);

var applier = new ValueEstimateApplier();
applier.Apply(inputTensor, agentInfos);
var agents = agentInfos.Keys.ToList();
var agents = agentInfos;
var agent = agents[0] as TestAgent;
Assert.NotNull(agent);

26
UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorGenerator.cs


{
}
private Dictionary<Agent, AgentInfo> GetFakeAgentInfos()
private static IEnumerable<Agent> GetFakeAgentInfos()
var infoA = new AgentInfo()
var infoA = new AgentInfo
stackedVectorObservation = (new[] {1f, 2f, 3f}).ToList(),
stackedVectorObservation = new[] {1f, 2f, 3f}.ToList(),
actionMasks = null,
actionMasks = null
var infoB = new AgentInfo()
var infoB = new AgentInfo
stackedVectorObservation = (new[] {4f, 5f, 6f}).ToList(),
memories = (new[] {1f, 1f, 1f}).ToList(),
stackedVectorObservation = new[] {4f, 5f, 6f}.ToList(),
memories = new[] {1f, 1f, 1f}.ToList(),
agentA.Info = infoA;
agentB.Info = infoB;
return new Dictionary<Agent, AgentInfo>(){{agentA, infoA}, {agentB, infoB}};
return new List<Agent> {agentA, agentB};
}
[Test]

[Test]
public void GenerateVectorObservation()
{
var inputTensor = new TensorProxy()
var inputTensor = new TensorProxy
{
shape = new long[] {2, 3}
};

[Test]
public void GenerateRecurrentInput()
{
var inputTensor = new TensorProxy()
var inputTensor = new TensorProxy
{
shape = new long[] {2, 5}
};

[Test]
public void GeneratePreviousActionInput()
{
var inputTensor = new TensorProxy()
var inputTensor = new TensorProxy
{
shape = new long[] {2, 2},
valueType = TensorProxy.TensorType.Integer

[Test]
public void GenerateActionMaskInput()
{
var inputTensor = new TensorProxy()
var inputTensor = new TensorProxy
{
shape = new long[] {2, 5},
valueType = TensorProxy.TensorType.FloatingPoint

33
UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs


public override void AcademyReset()
{
}
public override void AcademyStep()

protected override void DecideAction()
{
numberOfCallsToDecideAction++;
m_AgentInfos.Clear();
m_Agents.Clear();
}
}

//This will call the method even though it is private
var academyInitializeMethod = typeof(Academy).GetMethod("InitializeEnvironment",
BindingFlags.Instance | BindingFlags.NonPublic);
academyInitializeMethod?.Invoke(aca, new object[] { });
academyInitializeMethod?.Invoke(aca, new object[] {});
Assert.AreEqual(1, aca.initializeAcademyCalls);
Assert.AreEqual(0, aca.GetEpisodeCount());
Assert.AreEqual(0, aca.GetStepCount());

agentEnableMethod?.Invoke(agent2, new object[] { aca });
academyInitializeMethod?.Invoke(aca, new object[] { });
academyInitializeMethod?.Invoke(aca, new object[] {});
agentEnableMethod?.Invoke(agent1, new object[] { aca });
Assert.AreEqual(false, agent1.IsDone());

var aca = acaGo.GetComponent<TestAcademy>();
var academyInitializeMethod = typeof(Academy).GetMethod("InitializeEnvironment",
BindingFlags.Instance | BindingFlags.NonPublic);
academyInitializeMethod?.Invoke(aca, new object[] { });
academyInitializeMethod?.Invoke(aca, new object[] {});
var academyStepMethod = typeof(Academy).GetMethod("EnvironmentStep",
BindingFlags.Instance | BindingFlags.NonPublic);

{
numberReset += 1;
}
academyStepMethod?.Invoke(aca, new object[] { });
academyStepMethod?.Invoke(aca, new object[] {});
}
}

agent2.GiveBrain(brain);
agentEnableMethod?.Invoke(agent1, new object[] { aca });
academyInitializeMethod?.Invoke(aca, new object[] { });
academyInitializeMethod?.Invoke(aca, new object[] {});
var academyStepMethod = typeof(Academy).GetMethod(
"EnvironmentStep", BindingFlags.Instance | BindingFlags.NonPublic);

requestAction += 1;
agent2.RequestAction();
}
academyStepMethod?.Invoke(aca, new object[] { });
academyStepMethod?.Invoke(aca, new object[] {});
}
}
}

var aca = acaGo.GetComponent<TestAcademy>();
var academyInitializeMethod = typeof(Academy).GetMethod(
"InitializeEnvironment", BindingFlags.Instance | BindingFlags.NonPublic);
academyInitializeMethod?.Invoke(aca, new object[] { });
academyInitializeMethod?.Invoke(aca, new object[] {});
var academyStepMethod = typeof(Academy).GetMethod(
"EnvironmentStep", BindingFlags.Instance | BindingFlags.NonPublic);

}
stepsSinceReset += 1;
academyStepMethod.Invoke((object)aca, new object[] { });
academyStepMethod.Invoke(aca, new object[] {});
}
}

agent2.GiveBrain(brain);
agentEnableMethod?.Invoke(agent2, new object[] { aca });
academyInitializeMethod?.Invoke(aca, new object[] { });
academyInitializeMethod?.Invoke(aca, new object[] {});
var numberAgent1Reset = 0;
var numberAgent2Reset = 0;

agent2StepSinceReset += 1;
//Agent 1 is only initialized at step 2
if (i < 2)
{ }
academyStepMethod?.Invoke(aca, new object[] { });
{}
academyStepMethod?.Invoke(aca, new object[] {});
}
}
}

agent2.GiveBrain(brain);
agentEnableMethod?.Invoke(agent2, new object[] { aca });
academyInitializeMethod?.Invoke(aca, new object[] { });
academyInitializeMethod?.Invoke(aca, new object[] {});
agentEnableMethod?.Invoke(agent1, new object[] { aca });
var agent1ResetOnDone = 0;

}
academyStepMethod?.Invoke(aca, new object[] { });
academyStepMethod?.Invoke(aca, new object[] {});
}
}

agent2.GiveBrain(brain);
agentEnableMethod?.Invoke(agent2, new object[] { aca });
academyInitializeMethod?.Invoke(aca, new object[] { });
academyInitializeMethod?.Invoke(aca, new object[] {});
agentEnableMethod?.Invoke(agent1, new object[] { aca });

Assert.LessOrEqual(Mathf.Abs(i * 0.1f - agent2.GetCumulativeReward()), 0.05f);
academyStepMethod?.Invoke(aca, new object[] { });
academyStepMethod?.Invoke(aca, new object[] {});
agent1.AddReward(10f);
if ((i % 21 == 0) && (i > 0))

6
UnitySDK/Assets/ML-Agents/Examples/3DBall/Scenes/3DBall.unity


m_ReflectionIntensity: 1
m_CustomReflection: {fileID: 0}
m_Sun: {fileID: 0}
m_IndirectSpecularColor: {r: 0.4497121, g: 0.4997778, b: 0.5756369, a: 1}
m_IndirectSpecularColor: {r: 0.44971162, g: 0.49977726, b: 0.5756362, a: 1}
--- !u!157 &3
LightmapSettings:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
broadcastHub:
broadcastingBrains:
brainsToControl:
m_BrainsToControl: []
m_MaxSteps: 0
m_TrainingConfiguration:
width: 300
height: 200

6
UnitySDK/Assets/ML-Agents/Examples/3DBall/Scenes/3DBallHard.unity


m_ReflectionIntensity: 1
m_CustomReflection: {fileID: 0}
m_Sun: {fileID: 0}
m_IndirectSpecularColor: {r: 0.4497121, g: 0.4997778, b: 0.5756369, a: 1}
m_IndirectSpecularColor: {r: 0.44971162, g: 0.49977726, b: 0.5756362, a: 1}
--- !u!157 &3
LightmapSettings:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
broadcastHub:
broadcastingBrains:
brainsToControl:
m_BrainsToControl: []
m_MaxSteps: 0
m_TrainingConfiguration:
width: 300
height: 200

4
UnitySDK/Assets/ML-Agents/Examples/Basic/Scenes/Basic.unity


m_Name:
m_EditorClassIdentifier:
broadcastHub:
broadcastingBrains:
brainsToControl:
m_BrainsToControl: []
m_MaxSteps: 0
m_TrainingConfiguration:
width: 80
height: 80

4
UnitySDK/Assets/ML-Agents/Examples/Bouncer/Scenes/Bouncer.unity


m_Name:
m_EditorClassIdentifier:
broadcastHub:
broadcastingBrains:
brainsToControl:
m_BrainsToControl: []
m_MaxSteps: 0
m_TrainingConfiguration:
width: 80
height: 80

4
UnitySDK/Assets/ML-Agents/Examples/Crawler/Scenes/CrawlerDynamicTarget.unity


m_Name:
m_EditorClassIdentifier:
broadcastHub:
broadcastingBrains:
brainsToControl:
m_BrainsToControl: []
m_MaxSteps: 0
m_TrainingConfiguration:
width: 80
height: 80

4
UnitySDK/Assets/ML-Agents/Examples/Crawler/Scenes/CrawlerStaticTarget.unity


m_Name:
m_EditorClassIdentifier:
broadcastHub:
broadcastingBrains:
brainsToControl:
m_BrainsToControl: []
m_MaxSteps: 0
m_TrainingConfiguration:
width: 1280
height: 720

2
UnitySDK/Assets/ML-Agents/Examples/Crawler/Scripts/CrawlerAgent.cs


public Transform ground;
public bool detectTargets;
public bool targetIsStatic = false;
public bool targetIsStatic;
public bool respawnTargetWhenTouched;
public float targetSpawnRadius;

6
UnitySDK/Assets/ML-Agents/Examples/FoodCollector/Scenes/FoodCollector.unity


m_ReflectionIntensity: 1
m_CustomReflection: {fileID: 0}
m_Sun: {fileID: 0}
m_IndirectSpecularColor: {r: 0.4497121, g: 0.4997778, b: 0.5756369, a: 1}
m_IndirectSpecularColor: {r: 0.44971162, g: 0.49977726, b: 0.5756362, a: 1}
--- !u!157 &3
LightmapSettings:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
broadcastHub:
broadcastingBrains:
brainsToControl:
m_BrainsToControl: []
m_MaxSteps: 1500
m_TrainingConfiguration:
width: 500
height: 500

4
UnitySDK/Assets/ML-Agents/Examples/FoodCollector/Scenes/VisualFoodCollector.unity


m_Name:
m_EditorClassIdentifier:
broadcastHub:
broadcastingBrains:
brainsToControl:
m_BrainsToControl: []
m_MaxSteps: 0
m_TrainingConfiguration:
width: 500
height: 500

6
UnitySDK/Assets/ML-Agents/Examples/GridWorld/Scenes/GridWorld.unity


m_ReflectionIntensity: 1
m_CustomReflection: {fileID: 0}
m_Sun: {fileID: 0}
m_IndirectSpecularColor: {r: 0.4497121, g: 0.4997778, b: 0.5756369, a: 1}
m_IndirectSpecularColor: {r: 0.44971162, g: 0.49977726, b: 0.5756362, a: 1}
--- !u!157 &3
LightmapSettings:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
broadcastHub:
broadcastingBrains:
brainsToControl:
m_BrainsToControl: []
m_MaxSteps: 0
m_TrainingConfiguration:
width: 84
height: 84

4
UnitySDK/Assets/ML-Agents/Examples/Hallway/Scenes/Hallway.unity


m_Name:
m_EditorClassIdentifier:
broadcastHub:
broadcastingBrains:
brainsToControl:
m_BrainsToControl: []
m_MaxSteps: 0
m_TrainingConfiguration:
width: 128
height: 128

3
UnitySDK/Assets/ML-Agents/Examples/Hallway/Scenes/VisualHallway.unity


m_Name:
m_EditorClassIdentifier:
broadcastHub:
broadcastingBrains:
brainsToControl:
m_BrainsToControl: []
m_TrainingConfiguration:
width: 128
height: 128

7
UnitySDK/Assets/ML-Agents/Examples/PushBlock/Brains/PushBlockLearning.asset


m_Name: PushBlockLearning
m_EditorClassIdentifier:
brainParameters:
vectorObservationSize: 0
vectorObservationSize: 70
cameraResolutions:
- width: 84
height: 84
blackAndWhite: 0
cameraResolutions: []
vectorActionDescriptions:
-
vectorActionSpaceType: 0

4
UnitySDK/Assets/ML-Agents/Examples/PushBlock/Scenes/PushBlock.unity


m_Name:
m_EditorClassIdentifier:
broadcastHub:
broadcastingBrains:
brainsToControl:
m_BrainsToControl: []
m_MaxSteps: 0
m_TrainingConfiguration:
width: 400
height: 300

3
UnitySDK/Assets/ML-Agents/Examples/PushBlock/Scenes/VisualPushBlock.unity


m_Name:
m_EditorClassIdentifier:
broadcastHub:
broadcastingBrains:
brainsToControl:
m_BrainsToControl: []
m_TrainingConfiguration:
width: 1280
height: 720

2
UnitySDK/Assets/ML-Agents/Examples/Pyramids/Prefabs/VisualAreaPyramids.prefab


m_Script: {fileID: 11500000, guid: b8db44472779248d3be46895c4d562d5, type: 3}
m_Name:
m_EditorClassIdentifier:
brain: {fileID: 11400000, guid: 59a04e208fb8a423586adf25bf1fecd0, type: 2}
brain: {fileID: 11400000, guid: 60f0ffcd08c3b43a6bdc746cfc0c4059, type: 2}
agentParameters:
agentCameras:
- {fileID: 20712684238256298}

4
UnitySDK/Assets/ML-Agents/Examples/Pyramids/Scenes/Pyramids.unity


m_Name:
m_EditorClassIdentifier:
broadcastHub:
broadcastingBrains:
brainsToControl:
m_BrainsToControl: []
m_MaxSteps: 0
m_TrainingConfiguration:
width: 80
height: 80

5
UnitySDK/Assets/ML-Agents/Examples/Pyramids/Scenes/VisualPyramids.unity


m_Name:
m_EditorClassIdentifier:
broadcastHub:
broadcastingBrains:
brainsToControl:
m_BrainsToControl:
- {fileID: 11400000, guid: 60f0ffcd08c3b43a6bdc746cfc0c4059, type: 2}
m_MaxSteps: 0
m_TrainingConfiguration:
width: 80
height: 80

4
UnitySDK/Assets/ML-Agents/Examples/Reacher/Scenes/Reacher.unity


m_Name:
m_EditorClassIdentifier:
broadcastHub:
broadcastingBrains:
brainsToControl:
m_BrainsToControl: []
m_MaxSteps: 0
m_TrainingConfiguration:
width: 80
height: 80

6
UnitySDK/Assets/ML-Agents/Examples/Soccer/Scenes/SoccerTwos.unity


m_Name:
m_EditorClassIdentifier:
broadcastHub:
broadcastingBrains:
- {fileID: 11400000, guid: 29ed78b3e8fef4340b3a1f6954b88f18, type: 2}
brainsToControl:
m_BrainsToControl: []
m_MaxSteps: 0
- {fileID: 11400000, guid: 29ed78b3e8fef4340b3a1f6954b88f18, type: 2}
m_TrainingConfiguration:
width: 800
height: 500

4
UnitySDK/Assets/ML-Agents/Examples/Tennis/Scenes/Tennis.unity


m_Name:
m_EditorClassIdentifier:
broadcastHub:
broadcastingBrains:
brainsToControl:
m_BrainsToControl: []
m_MaxSteps: 25000
m_TrainingConfiguration:
width: 300
height: 200

4
UnitySDK/Assets/ML-Agents/Examples/Walker/Scenes/Walker.unity


m_Name:
m_EditorClassIdentifier:
broadcastHub:
broadcastingBrains:
brainsToControl:
m_BrainsToControl: []
m_MaxSteps: 0
m_TrainingConfiguration:
width: 80
height: 80

8
UnitySDK/Assets/ML-Agents/Examples/WallJump/Scenes/WallJump.unity


m_ReflectionIntensity: 1
m_CustomReflection: {fileID: 0}
m_Sun: {fileID: 0}
m_IndirectSpecularColor: {r: 0.44971484, g: 0.49977952, b: 0.57563835, a: 1}
m_IndirectSpecularColor: {r: 0.44971442, g: 0.499779, b: 0.5756377, a: 1}
--- !u!157 &3
LightmapSettings:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
broadcastHub:
broadcastingBrains:
- {fileID: 11400000, guid: b5f530c5bf8d64bf8a18df92e283bb9c, type: 2}
brainsToControl:
m_BrainsToControl: []
m_MaxSteps: 0
- {fileID: 11400000, guid: b5f530c5bf8d64bf8a18df92e283bb9c, type: 2}
m_TrainingConfiguration:
width: 80
height: 80

242
UnitySDK/Assets/ML-Agents/Scripts/Academy.cs


using UnityEngine;
using System.IO;
using System.Linq;
using UnityEngine.Serialization;
#if UNITY_EDITOR

/// The mode is determined by the presence or absence of a Communicator. In
/// the presence of a communicator, the academy is run in training mode where
/// the states and observations of each agent are sent through the
/// communicator. In the absence of a communciator, the academy is run in
/// communicator. In the absence of a communicator, the academy is run in
/// inference mode where the agent behavior is determined by the brain
/// attached to it (which may be internal, heuristic or player).
/// </remarks>

private Vector3 m_OriginalGravity;
/// Temporary storage for global fixedDeltaTime value
/// Used to restore oringal value when deriving Academy modifies it
/// Used to restore original value when deriving Academy modifies it
/// Used to restore oringal value when deriving Academy modifies it
/// Used to restore original value when deriving Academy modifies it
private float m_OriginalMaximumDeltaTime;
// Fields provided in the Inspector

/// </summary>
/// <remarks>
/// Default reset parameters are specified in the academy Editor, and can
/// be modified when training with an external Brain by passinga config
/// be modified when training with an external Brain by passing a config
/// dictionary at reset.
/// </remarks>
[SerializeField]

// Fields not provided in the Inspector.
/// Boolean flag indicating whether a communicator is accessible by the
/// environment. This also specifies whether the environment is in
/// Training or Inference mode.
bool m_IsCommunicatorOn;
/// Keeps track of the id of the last communicator message received.
/// Remains 0 if there are no communicators. Is used to ensure that
/// the same message is not used multiple times.
private ulong m_LastCommunicatorMessageNumber;
/// <summary>
/// Returns whether or not the communicator is on.
/// </summary>
/// <returns>
/// <c>true</c>, if communicator is on, <c>false</c> otherwise.
/// </returns>
bool IsCommunicatorOn
{
get { return m_Communicator != null; }
}
/// If true, the Academy will use inference settings. This field is
/// initialized in <see cref="Awake"/> depending on the presence

/// each time the environment is reset.
int m_EpisodeCount;
/// The number of steps completed within the current episide. Incremented
/// The number of steps completed within the current episode. Incremented
/// each time a step is taken in the environment. Is reset to 0 during
/// <see cref="AcademyReset"/>.
int m_StepCount;

/// engine settings at the next environment step.
bool m_ModeSwitched;
/// Pointer to the batcher currently in use by the Academy.
Batcher m_BrainBatcher;
/// Pointer to the communicator currently in use by the Academy.
ICommunicator m_Communicator;
// Flag used to keep track of the first time the Academy is reset.
bool m_FirstAcademyReset;

// they have requested a decision.
public event System.Action AgentAct;
// Sigals to all the agents each time the Academy force resets.
// Signals to all the agents each time the Academy force resets.
/// Monobehavior function called at the very beginning of environment
/// MonoBehavior function called at the very beginning of environment
/// creation. Academy uses this time to initialize internal data
/// structures, initialize the environment and check for the existence
/// of a communicator.

}
// Used to read Python-provided environment parameters
private int ReadArgs()
private static int ReadArgs()
{
var args = System.Environment.GetCommandLineArgs();
var inputPort = "";

m_OriginalMaximumDeltaTime = Time.maximumDeltaTime;
InitializeAcademy();
ICommunicator communicator;
var exposedBrains = broadcastHub.broadcastingBrains.Where(x => x != null).ToList();
var controlledBrains = broadcastHub.broadcastingBrains.Where(
x => x != null && x is LearningBrain && broadcastHub.IsControlled(x));
foreach (var brain1 in controlledBrains)
{
var brain = (LearningBrain)brain1;
brain.SetToControlledExternally();
}
var controlledBrains = broadcastHub.brainsToControl.Where(x => x != null).ToList();
// Try to launch the communicator by usig the arguments passed at launch
// Try to launch the communicator by using the arguments passed at launch
communicator = new RpcCommunicator(
new CommunicatorParameters
m_Communicator = new RpcCommunicator(
new CommunicatorInitParameters
// and if Unity is in Editor mode
// If there arn't, there is no need for a communicator and it is set
// to null
communicator = null;
if (controlledBrains.ToList().Count > 0)
#if UNITY_EDITOR
m_Communicator = null;
if (controlledBrains.Any())
communicator = new RpcCommunicator(
new CommunicatorParameters
m_Communicator = new RpcCommunicator(
new CommunicatorInitParameters
#endif
m_BrainBatcher = new Batcher(communicator);
foreach (var trainingBrain in exposedBrains)
foreach (var trainingBrain in controlledBrains)
trainingBrain.SetBatcher(m_BrainBatcher);
trainingBrain.SetCommunicator(m_Communicator);
if (communicator != null)
if (m_Communicator != null)
m_IsCommunicatorOn = true;
m_Communicator.QuitCommandReceived += OnQuitCommandReceived;
m_Communicator.ResetCommandReceived += OnResetCommand;
m_Communicator.RLInputReceived += OnRLInputReceived;
var academyParameters =
new CommunicatorObjects.UnityRLInitializationOutputProto();
academyParameters.Name = gameObject.name;
academyParameters.Version = k_ApiVersion;
foreach (var brain in exposedBrains)
// We try to exchange the first message with Python. If this fails, it means
// no Python Process is ready to train the environment. In this case, the
//environment must use Inference.
try
var bp = brain.brainParameters;
academyParameters.BrainParameters.Add(
bp.ToProto(brain.name, broadcastHub.IsControlled(brain)));
var unityRLInitParameters = m_Communicator.Initialize(
new CommunicatorInitParameters
{
version = k_ApiVersion,
name = gameObject.name,
brains = controlledBrains,
environmentResetParameters = new EnvironmentResetParameters
{
resetParameters = resetParameters,
customResetParameters = customResetParameters
}
}, broadcastHub);
Random.InitState(unityRLInitParameters.seed);
academyParameters.EnvironmentParameters =
new CommunicatorObjects.EnvironmentParametersProto();
foreach (var key in resetParameters.Keys)
catch
academyParameters.EnvironmentParameters.FloatParameters.Add(
key, resetParameters[key]
);
m_Communicator = null;
foreach (var brain in controlledBrains)
{
brain.SetCommunicator(null);
}
var pythonParameters = m_BrainBatcher.SendAcademyParameters(academyParameters);
Random.InitState(pythonParameters.Seed);
m_IsInference = !m_IsCommunicatorOn;
SetIsInference(!IsCommunicatorOn);
BrainDecideAction += () => { };
DestroyAction += () => { };
AgentSetStatus += (i) => { };
AgentResetIfDone += () => { };
AgentSendState += () => { };
AgentAct += () => { };
AgentForceReset += () => { };
BrainDecideAction += () => {};
DestroyAction += () => {};
AgentSetStatus += i => {};
AgentResetIfDone += () => {};
AgentSendState += () => {};
AgentAct += () => {};
AgentForceReset += () => {};
// Configure the environment using the configurations provided by
// the developer in the Editor.
SetIsInference(!m_BrainBatcher.GetIsTraining());
private void UpdateResetParameters()
static void OnQuitCommandReceived()
var newResetParameters = m_BrainBatcher.GetEnvironmentParameters();
if (newResetParameters != null)
#if UNITY_EDITOR
EditorApplication.isPlaying = false;
#endif
Application.Quit();
}
private void OnResetCommand(EnvironmentResetParameters newResetParameters)
{
UpdateResetParameters(newResetParameters);
ForcedFullReset();
}
void OnRLInputReceived(UnityRLInputParameters inputParams)
{
m_IsInference = !inputParams.isTraining;
}
private void UpdateResetParameters(EnvironmentResetParameters newResetParameters)
{
if (newResetParameters.resetParameters != null)
foreach (var kv in newResetParameters.FloatParameters)
foreach (var kv in newResetParameters.resetParameters)
customResetParameters = newResetParameters.CustomResetParameters;
customResetParameters = newResetParameters.customResetParameters;
}
/// <summary>

// This signals to the academy that at the next environment step
// the engine configurations need updating to the respective mode
// (i.e. training vs inference) configuraiton.
// (i.e. training vs inference) configuration.
m_ModeSwitched = true;
}
}

}
/// <summary>
/// Returns whether or not the communicator is on.
/// </summary>
/// <returns>
/// <c>true</c>, if communicator is on, <c>false</c> otherwise.
/// </returns>
public bool IsCommunicatorOn()
{
return m_IsCommunicatorOn;
}
/// <summary>
/// Forces the full reset. The done flags are not affected. Is either
/// called the first reset at inference and every external reset
/// at training.

m_ModeSwitched = false;
}
if ((m_IsCommunicatorOn) &&
(m_LastCommunicatorMessageNumber != m_BrainBatcher.GetNumberMessageReceived()))
{
m_LastCommunicatorMessageNumber = m_BrainBatcher.GetNumberMessageReceived();
if (m_BrainBatcher.GetCommand() ==
CommunicatorObjects.CommandProto.Reset)
{
UpdateResetParameters();
SetIsInference(!m_BrainBatcher.GetIsTraining());
ForcedFullReset();
}
if (m_BrainBatcher.GetCommand() ==
CommunicatorObjects.CommandProto.Quit)
{
#if UNITY_EDITOR
EditorApplication.isPlaying = false;
#endif
Application.Quit();
return;
}
}
else if (!m_FirstAcademyReset)
if (!m_FirstAcademyReset)
UpdateResetParameters();
AgentResetIfDone();
using (TimerStack.Instance.Scoped("AgentResetIfDone"))
{
AgentResetIfDone();
}
AgentSendState();
using (TimerStack.Instance.Scoped("AgentSendState"))
{
AgentSendState();
}
BrainDecideAction();
using (TimerStack.Instance.Scoped("BrainDecideAction"))
{
BrainDecideAction();
}
AcademyStep();
using (TimerStack.Instance.Scoped("AcademyStep"))
{
AcademyStep();
}
AgentAct();
using (TimerStack.Instance.Scoped("AgentAct"))
{
AgentAct();
}
m_StepCount += 1;
m_TotalStepCount += 1;

}
/// <summary>
/// Monobehavior function that dictates each environment step.
/// MonoBehaviour function that dictates each environment step.
/// </summary>
void FixedUpdate()
{

// Signal to listeners that the academy is being destroyed now
DestroyAction();
// TODO - Pass worker ID or some other identifier,
// so that multiple envs won't overwrite each others stats.
TimerStack.Instance.SaveJsonTimers();
}
}
}

53
UnitySDK/Assets/ML-Agents/Scripts/Agent.cs


using System.Collections.Generic;
using MLAgents.CommunicatorObjects;
using UnityEngine;

/// <summary>
/// User-customizable object for sending structured output from Unity to Python in response
/// to an action in addition to a scalar reward.
/// TODO(cgoy): All references to protobuf objects should be removed.
public CustomObservationProto customObservation;
public CommunicatorObjects.CustomObservationProto customObservation;
/// <summary>
/// Remove the visual observations from memory. Call at each timestep

public string textActions;
public List<float> memories;
public float value;
public CustomActionProto customAction;
/// TODO(cgoy): All references to protobuf objects should be removed.
public CommunicatorObjects.CustomActionProto customAction;
}
/// <summary>

/// Current Agent information (message sent to Brain).
AgentInfo m_Info;
public AgentInfo Info
{
get { return m_Info; }
set { m_Info = value; }
}
/// Current Agent action (message sent from Brain).
AgentAction m_Action;

m_Info.storedTextActions = m_Action.textActions;
m_Info.vectorObservation.Clear();
m_ActionMasker.ResetMask();
CollectObservations();
using (TimerStack.Instance.Scoped("CollectObservations"))
{
CollectObservations();
}
m_Info.actionMasks = m_ActionMasker.GetMask();
var param = brain.brainParameters;

m_Info.maxStepReached = m_MaxStepReached;
m_Info.id = m_Id;
brain.SendState(this, m_Info);
brain.SubscribeAgentForDecision(this);
if (m_Recorder != null && m_Recorder.record && Application.isEditor)
{

m_Info.textObservation = "";
}
public void ClearVisualObservations()
{
m_Info.ClearVisualObs();
}
/// <summary>

/// A custom action, defined by the user as custom protobuf message. Useful if the action is hard to encode
/// as either a flat vector or a single string.
/// </param>
public virtual void AgentAction(float[] vectorAction, string textAction, CustomActionProto customAction)
public virtual void AgentAction(float[] vectorAction, string textAction, CommunicatorObjects.CustomActionProto customAction)
{
// We fall back to not using the custom action if the subclassed Agent doesn't override this method.
AgentAction(vectorAction, textAction);

AgentReset();
}
public void UpdateAgentAction(AgentAction action)
{
m_Action = action;
}
/// <summary>
/// Updates the vector action.
/// </summary>

}
/// <summary>
/// Updates the text action.
/// </summary>
/// <param name="textActions">Text actions.</param>
public void UpdateTextAction(string textActions)
{
m_Action.textActions = textActions;
}
/// <summary>
/// Updates the custom action.
/// </summary>
/// <param name="customAction">Custom action.</param>
public void UpdateCustomAction(CustomActionProto customAction)
{
m_Action.customAction = customAction;
}
/// <summary>
/// Updates the value of the agent.
/// </summary>
public void UpdateValueAction(float value)

}
/// <summary>
/// Sets the status of the agent. Will request decisions or actions according
/// Sets the status of the agent. Will request decisions or actions according
/// to the Academy's stepcount.
/// </summary>
/// <param name="academyStepCounter">Number of current steps in episode</param>

/// Sets the custom observation for the agent for this episode.
/// </summary>
/// <param name="customObservation">New value of the agent's custom observation.</param>
public void SetCustomObservation(CustomObservationProto customObservation)
public void SetCustomObservation(CommunicatorObjects.CustomObservationProto customObservation)
{
m_Info.customObservation = customObservation;
}

49
UnitySDK/Assets/ML-Agents/Scripts/Brain.cs


using System;
using System.Collections.Generic;
using UnityEngine;

/// Brain receive data from Agents through calls to SendState. The brain then updates the
/// Brain receive data from Agents through calls to SubscribeAgentForDecision. The brain then updates the
/// actions of the agents at each FixedUpdate.
/// The Brain encapsulates the decision making process. Every Agent must be assigned a Brain,
/// but you can use the same Brain with more than one Agent. You can also create several

{
[SerializeField] public BrainParameters brainParameters;
protected Dictionary<Agent, AgentInfo> m_AgentInfos =
new Dictionary<Agent, AgentInfo>(1024);
/// <summary>
/// List of agents subscribed for decisions.
/// </summary>
protected List<Agent> m_Agents = new List<Agent>(1024);
protected Batcher m_BrainBatcher;
[System.NonSerialized]
[NonSerialized]
/// Sets the Batcher of the Brain. The brain will call the batcher at every step and give
/// it the agent's data using SendBrainInfo at each DecideAction call.
/// </summary>
/// <param name="batcher"> The Batcher the brain will use for the current session</param>
public void SetBatcher(Batcher batcher)
{
if (batcher == null)
{
m_BrainBatcher = null;
}
else
{
m_BrainBatcher = batcher;
m_BrainBatcher.SubscribeBrain(name);
}
LazyInitialize();
}
/// <summary>
/// Adds the data of an agent to the current batch so it will be processed in DecideAction.
/// Registers an agent to current batch so it will be processed in DecideAction.
/// <param name="info"></param>
public void SendState(Agent agent, AgentInfo info)
public void SubscribeAgentForDecision(Agent agent)
m_AgentInfos[agent] = info;
m_Agents.Add(agent);
}
/// <summary>

private void LazyInitialize()
protected void LazyInitialize()
{
if (!m_IsInitialized)
{

{
if (m_IsInitialized)
{
m_AgentInfos.Clear();
m_Agents.Clear();
m_IsInitialized = false;
}
}

/// </summary>
private void BrainDecideAction()
{
m_BrainBatcher?.SendBrainInfo(name, m_AgentInfos);
// Clear the agent Decision subscription collection for the next update cycle.
m_Agents.Clear();
/// Is called only once at the begening of the training or inference session.
/// Is called only once at the beginning of the training or inference session.
/// </summary>
protected abstract void Initialize();

41
UnitySDK/Assets/ML-Agents/Scripts/BrainParameters.cs


using System;
using UnityEngine;
using System.Linq;
namespace MLAgents
{

Continuous
};
}
/// <summary>
/// The resolution of a camera used by an agent.

/// <summary>Defines if the action is discrete or continuous</summary>
public SpaceType vectorActionSpaceType = SpaceType.Discrete;
public BrainParameters()
{
}
/// <summary>
/// Converts Resolution protobuf array to C# Resolution array.
/// </summary>
private static Resolution[] ResolutionProtoToNative(
CommunicatorObjects.ResolutionProto[] resolutionProtos)
{
var localCameraResolutions = new Resolution[resolutionProtos.Length];
for (var i = 0; i < resolutionProtos.Length; i++)
{
localCameraResolutions[i] = new Resolution
{
height = resolutionProtos[i].Height,
width = resolutionProtos[i].Width,
blackAndWhite = resolutionProtos[i].GrayScale
};
}