浏览代码

Merge branch 'soccer-2v1' into asymm-envs

/asymm-envs
Andrew Cohen 5 年前
当前提交
b4f52c88
共有 51 个文件被更改,包括 2887 次插入5855 次删除
  1. 2
      .yamato/gym-interface-test.yml
  2. 4
      .yamato/protobuf-generation-test.yml
  3. 2
      .yamato/python-ll-api-test.yml
  4. 5
      .yamato/standalone-build-test.yml
  5. 11
      .yamato/training-int-tests.yml
  6. 2
      README.md
  7. 4
      com.unity.ml-agents/CHANGELOG.md
  8. 24
      com.unity.ml-agents/Runtime/Academy.cs
  9. 6
      com.unity.ml-agents/Runtime/Agent.cs
  10. 34
      com.unity.ml-agents/Runtime/DecisionRequester.cs
  11. 44
      com.unity.ml-agents/Runtime/SideChannels/IncomingMessage.cs
  12. 69
      com.unity.ml-agents/Runtime/Timer.cs
  13. 47
      com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
  14. 102
      com.unity.ml-agents/Tests/Editor/PublicAPI/PublicApiValidation.cs
  15. 26
      com.unity.ml-agents/Tests/Editor/SideChannelTests.cs
  16. 2
      com.unity.ml-agents/Tests/Editor/TimerTest.cs
  17. 16
      com.unity.ml-agents/package.json
  18. 3
      docs/Getting-Started.md
  19. 10
      docs/ML-Agents-Overview.md
  20. 1
      docs/Migrating.md
  21. 2
      docs/Training-ML-Agents.md
  22. 999
      docs/images/3dball_big.png
  23. 852
      docs/images/3dball_small.png
  24. 974
      docs/images/curriculum.png
  25. 999
      docs/images/ml-agents-LSTM.png
  26. 181
      docs/images/monitor.png
  27. 2
      ml-agents-envs/mlagents_envs/environment.py
  28. 38
      ml-agents-envs/mlagents_envs/side_channel/incoming_message.py
  29. 20
      ml-agents-envs/mlagents_envs/tests/test_side_channel.py
  30. 7
      ml-agents-envs/mlagents_envs/tests/test_timers.py
  31. 39
      ml-agents-envs/mlagents_envs/timers.py
  32. 1
      ml-agents/mlagents/trainers/agent_processor.py
  33. 14
      ml-agents/mlagents/trainers/demo_loader.py
  34. 1
      ml-agents/mlagents/trainers/env_manager.py
  35. 12
      ml-agents/mlagents/trainers/learn.py
  36. 24
      ml-agents/mlagents/trainers/tests/test_demo_loader.py
  37. 2
      ml-agents/tests/yamato/scripts/run_llapi.py
  38. 2
      ml-agents/tests/yamato/standalone_build_tests.py
  39. 13
      ml-agents/tests/yamato/training_int_tests.py
  40. 37
      ml-agents/tests/yamato/yamato_utils.py
  41. 1
      utils/make_readme_table.py
  42. 129
      com.unity.ml-agents/Tests/Runtime/RuntimeAPITest.cs
  43. 11
      com.unity.ml-agents/Tests/Runtime/RuntimeAPITest.cs.meta
  44. 25
      com.unity.ml-agents/Tests/Runtime/Unity.ML-Agents.Runtime.Tests.asmdef
  45. 7
      com.unity.ml-agents/Tests/Runtime/Unity.ML-Agents.Runtime.Tests.asmdef.meta
  46. 429
      com.unity.ml-agents/Tests/Runtime/SerializeTestScene.unity
  47. 7
      com.unity.ml-agents/Tests/Runtime/SerializeTestScene.unity.meta
  48. 1001
      docs/images/banana.png
  49. 1001
      docs/images/running-a-pretrained-model.gif
  50. 497
      docs/images/3dballhard.png
  51. 1001
      docs/images/bananaimitation.png

2
.yamato/gym-interface-test.yml


commands:
- pip install pyyaml
- python -u -m ml-agents.tests.yamato.setup_venv
- ./venv/bin/python ml-agents/tests/yamato/scripts/run_gym.py --env=Project/testPlayer-Basic
- ./venv/bin/python ml-agents/tests/yamato/scripts/run_gym.py --env=artifacts/testPlayer-Basic
dependencies:
- .yamato/standalone-build-test.yml#test_mac_standalone_{{ editor.version }}
triggers:

4
.yamato/protobuf-generation-test.yml


- "protobuf-definitions/*.md"
- "protobuf-definitions/**/*.md"
artifacts:
dist:
patch:
- "artifacts/*"
- "artifacts/*.*"

2
.yamato/python-ll-api-test.yml


- python -u -m ml-agents.tests.yamato.setup_venv
- ./venv/bin/python ml-agents/tests/yamato/scripts/run_llapi.py
dependencies:
- .yamato/standalone-build-test.yml#test_mac_standalone_{{ editor.version }} --env=Project/testPlayer
- .yamato/standalone-build-test.yml#test_mac_standalone_{{ editor.version }}
triggers:
cancel_old_ci: true
changes:

5
.yamato/standalone-build-test.yml


- "com.unity.ml-agents/*.md"
- "com.unity.ml-agents/**/*.md"
artifacts:
logs:
paths:
- "artifacts/standalone_build.txt"
- "Project/testPlayer*/**"
- "artifacts/testPlayer*/**"
{% endfor %}

11
.yamato/training-int-tests.yml


# Backwards-compatibility tests.
# If we make a breaking change to the communication protocol, these will need
# to be disabled until the next release.
- python -u -m ml-agents.tests.yamato.training_int_tests --python=0.15.0
- python -u -m ml-agents.tests.yamato.training_int_tests --csharp=0.15.0
# - python -u -m ml-agents.tests.yamato.training_int_tests --python=0.15.0
# - python -u -m ml-agents.tests.yamato.training_int_tests --csharp=0.15.0
dependencies:
- .yamato/standalone-build-test.yml#test_mac_standalone_{{ editor.version }}
triggers:

- "com.unity.ml-agents/*.md"
- "com.unity.ml-agents/**/*.md"
artifacts:
unit:
logs:
paths:
- "artifacts/standalone_build.txt"
standalonebuild:
- "artifacts/**"
- "artifacts/testplayer*/**"
{% endfor %}

2
README.md


|:-------:|:------:|:-------------:|:-------:|:------------:|
| **master (unstable)** | -- | [source](https://github.com/Unity-Technologies/ml-agents/tree/master) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/master/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/master.zip) |
| **0.15.1** | **March 30, 2020** | **[source](https://github.com/Unity-Technologies/ml-agents/tree/0.15.1)** | **[docs](https://github.com/Unity-Technologies/ml-agents/tree/0.15.1/docs/Readme.md)** | **[download](https://github.com/Unity-Technologies/ml-agents/archive/0.15.1.zip)** |
| **0.15.0** | **March 18, 2020** | **[source](https://github.com/Unity-Technologies/ml-agents/tree/0.15.0)** | **[docs](https://github.com/Unity-Technologies/ml-agents/tree/0.15.0/docs/Readme.md)** | **[download](https://github.com/Unity-Technologies/ml-agents/archive/0.15.0.zip)** |
| **0.15.0** | March 18, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.15.0) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.15.0/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.15.0.zip) |
| **0.14.1** | February 26, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.14.1) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.14.1/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.14.1.zip) |
| **0.14.0** | February 13, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.14.0) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.14.0/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.14.0.zip) |
| **0.13.1** | January 21, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.13.1) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.13.1/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.13.1.zip) |

4
com.unity.ml-agents/CHANGELOG.md


- Removed the multi-agent gym option from the gym wrapper. For multi-agent scenarios, use the [Low Level Python API](Python-API.md).
- The low level Python API has changed. You can look at the document [Low Level Python API documentation](Python-API.md) for more information. If you use `mlagents-learn` for training, this should be a transparent change.
- Added ability to start training (initialize model weights) from a previous run ID. (#3710)
- The internal event `Academy.AgentSetStatus` was renamed to `Academy.AgentPreStep` and made public.
- The offset logic was removed from DecisionRequester.
- Timer files now contain a dictionary of metadata, including things like the package version numbers.
- SideChannel IncomingMessages methods now take an optional default argument, which is used when trying to read more data than the message contains.
- The way that UnityEnvironment decides the port was changed. If no port is specified, the behavior will depend on the `file_name` parameter. If it is `None`, 5004 (the editor port) will be used; otherwise 5005 (the base environment port) will be used.
- Fixed an issue where exceptions from environments provided a returncode of 0. (#3680)
- Running `mlagents-learn` with the same `--run-id` twice will no longer overwrite the existing files. (#3705)

24
com.unity.ml-agents/Runtime/Academy.cs


/// on each side, although we may allow some flexibility in the future.
/// This should be incremented whenever a change is made to the communication protocol.
/// </summary>
const string k_ApiVersion = "0.15.0";
const string k_ApiVersion = "0.16.0";
internal const string k_PackageVersion = "0.15.0-preview";
internal const string k_PackageVersion = "0.15.1-preview";
const int k_EditorTrainingPort = 5004;

// This will mark the Agent as Done if it has reached its maxSteps.
internal event Action AgentIncrementStep;
// Signals to all the agents at each environment step along with the
// Academy's maxStepReached, done and stepCount values. The agents rely
// on this event to update their own values of max step reached and done
// in addition to aligning on the step count of the global episode.
internal event Action<int> AgentSetStatus;
/// <summary>
/// Signals to all of the <see cref="Agent"/>s that their step is about to begin.
/// This is a good time for an <see cref="Agent"/> to decide if it would like to
/// call <see cref="Agent.RequestDecision"/> or <see cref="Agent.RequestAction"/>
/// for this step. Any other pre-step setup could be done during this even as well.
/// </summary>
public event Action<int> AgentPreStep;
// Signals to all the agents at each environment step so they can send
// their state to their Policy if they have requested a decision.

/// </summary>
void InitializeEnvironment()
{
TimerStack.Instance.AddMetadata("communication_protocol_version", k_ApiVersion);
TimerStack.Instance.AddMetadata("package_version", k_PackageVersion);
EnableAutomaticStepping();
SideChannelUtils.RegisterSideChannel(new EngineConfigurationChannel());

{
DecideAction = () => {};
DestroyAction = () => {};
AgentSetStatus = i => {};
AgentPreStep = i => {};
AgentSendState = () => {};
AgentAct = () => {};
AgentForceReset = () => {};

ForcedFullReset();
}
AgentSetStatus?.Invoke(m_StepCount);
AgentPreStep?.Invoke(m_StepCount);
m_StepCount += 1;
m_TotalStepCount += 1;

6
com.unity.ml-agents/Runtime/Agent.cs


m_Info.reward = m_Reward;
m_Info.done = true;
m_Info.maxStepReached = doneReason == DoneReason.MaxStepReached;
if (collectObservationsSensor != null)
{
// Make sure the latest observations are being passed to training.
collectObservationsSensor.Reset();
CollectObservations(collectObservationsSensor);
}
// Request the last decision with no callbacks
// We request a decision so Python knows the Agent is done immediately
m_Brain?.RequestDecision(m_Info, sensors);

34
com.unity.ml-agents/Runtime/DecisionRequester.cs


using System;
using UnityEngine;
using UnityEngine.Serialization;

/// at regular intervals.
/// </summary>
[AddComponentMenu("ML Agents/Decision Requester", (int)MenuGroup.Default)]
internal class DecisionRequester : MonoBehaviour
[RequireComponent(typeof(Agent))]
public class DecisionRequester : MonoBehaviour
/// that the Agent will request a decision every 5 Academy steps.
/// </summary>
/// that the Agent will request a decision every 5 Academy steps. /// </summary>
[Range(1, 20)]
[Tooltip("The frequency with which the agent requests a decision. A DecisionPeriod " +
"of 5 means that the Agent will request a decision every 5 Academy steps.")]

[FormerlySerializedAs("RepeatAction")]
public bool TakeActionsBetweenDecisions = true;
/// <summary>
/// Whether or not the Agent decisions should start at an offset (different for each agent).
/// This does not affect <see cref="DecisionPeriod"/>. Turning this on will distribute
/// the decision-making computations for all the agents across multiple Academy steps.
/// This can be valuable in scenarios where you have many agents in the scene, particularly
/// during the inference phase.
/// </summary>
[Tooltip("Whether or not Agent decisions should start at an offset.")]
public bool offsetStep;
[NonSerialized]
int m_Offset;
m_Offset = offsetStep ? gameObject.GetInstanceID() : 0;
Academy.Instance.AgentSetStatus += MakeRequests;
Debug.Assert(m_Agent != null, "Agent component was not found on this gameObject and is required.");
Academy.Instance.AgentPreStep += MakeRequests;
}
void OnDestroy()

Academy.Instance.AgentSetStatus -= MakeRequests;
Academy.Instance.AgentPreStep -= MakeRequests;
void MakeRequests(int count)
/// <summary>
/// Method that hooks into the Academy in order inform the Agent on whether or not it should request a
/// decision, and whether or not it should take actions between decisions.
/// </summary>
/// <param name="academyStepCount">The current step count of the academy.</param>
void MakeRequests(int academyStepCount)
if ((count + m_Offset) % DecisionPeriod == 0)
if (academyStepCount % DecisionPeriod == 0)
{
m_Agent?.RequestDecision();
}

44
com.unity.ml-agents/Runtime/SideChannels/IncomingMessage.cs


using System.Collections.Generic;
using System.Runtime.CompilerServices;
using System;
using System.IO;
using System.Text;

}
/// <summary>
/// Read a boolan value from the message.
/// Read a boolean value from the message.
/// <param name="defaultValue">Default value to use if the end of the message is reached.</param>
public bool ReadBoolean()
public bool ReadBoolean(bool defaultValue = false)
return m_Reader.ReadBoolean();
return CanReadMore() ? m_Reader.ReadBoolean() : defaultValue;
/// <param name="defaultValue">Default value to use if the end of the message is reached.</param>
public int ReadInt32()
public int ReadInt32(int defaultValue = 0)
return m_Reader.ReadInt32();
return CanReadMore() ? m_Reader.ReadInt32() : defaultValue;
/// <param name="defaultValue">Default value to use if the end of the message is reached.</param>
public float ReadFloat32()
public float ReadFloat32(float defaultValue = 0.0f)
return m_Reader.ReadSingle();
return CanReadMore() ? m_Reader.ReadSingle() : defaultValue;
/// <param name="defaultValue">Default value to use if the end of the message is reached.</param>
public string ReadString()
public string ReadString(string defaultValue = default)
if (!CanReadMore())
{
return defaultValue;
}
var strLength = ReadInt32();
var str = Encoding.ASCII.GetString(m_Reader.ReadBytes(strLength));
return str;

/// Reads a list of floats from the message. The length of the list is stored in the message.
/// </summary>
/// <param name="defaultValue">Default value to use if the end of the message is reached.</param>
public IList<float> ReadFloatList()
public IList<float> ReadFloatList(IList<float> defaultValue = default)
if (!CanReadMore())
{
return defaultValue;
}
var len = ReadInt32();
var output = new float[len];
for (var i = 0; i < len; i++)

{
m_Reader?.Dispose();
m_Stream?.Dispose();
}
/// <summary>
/// Whether or not there is more data left in the stream that can be read.
/// </summary>
/// <returns></returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
bool CanReadMore()
{
return m_Stream.Position < m_Stream.Length;
}
}
}

69
com.unity.ml-agents/Runtime/Timer.cs


Dictionary<string, TimerNode> m_Children;
/// <summary>
/// Gauge Nodes to measure arbitrary values.
/// </summary>
[DataMember(Name = "gauges", EmitDefaultValue = false)]
Dictionary<string, GaugeNode> m_Gauges;
/// <summary>
/// Custom sampler used to add timings to the profiler.
/// </summary>
CustomSampler m_Sampler;

set {} // Serialization needs this, but unused.
}
public Dictionary<string, GaugeNode> Gauges
{
get { return m_Gauges; }
}
/// <summary>
/// Total seconds spent in this block, excluding it's children.
/// </summary>

// The root node doesn't have a sampler since that could interfere with the profiler.
m_NumCalls = 1;
m_TickStart = DateTime.Now.Ticks;
m_Gauges = new Dictionary<string, GaugeNode>();
}
else
{

}
}
[DataContract]
internal class RootNode : TimerNode
{
// Timer output format version
internal const string k_timerFormatVersion = "0.1.0";
[DataMember(Name = "metadata", Order = 0)]
Dictionary<string, string> m_Metadata = new Dictionary<string, string>();
/// <summary>
/// Gauge Nodes to measure arbitrary values.
/// </summary>
[DataMember(Name = "gauges", EmitDefaultValue = false)]
Dictionary<string, GaugeNode> m_Gauges = new Dictionary<string, GaugeNode>();
public RootNode(string name="root") : base(name, true)
{
m_Metadata.Add("timer_format_version", k_timerFormatVersion);
m_Metadata.Add("start_time_seconds", $"{DateTimeOffset.Now.ToUnixTimeSeconds()}");
m_Metadata.Add("unity_version", Application.unityVersion);
m_Metadata.Add("command_line_arguments", String.Join(" ", Environment.GetCommandLineArgs()));
}
public void AddMetadata(string key, string value)
{
m_Metadata[key] = value;
}
public Dictionary<string, GaugeNode> Gauges
{
get { return m_Gauges; }
}
public Dictionary<string, string> Metadata
{
get { return m_Metadata; }
}
}
/// <summary>
/// Tracks the most recent value of a metric. This is analogous to gauges in statsd.
/// </summary>

static readonly TimerStack k_Instance = new TimerStack();
Stack<TimerNode> m_Stack;
TimerNode m_RootNode;
RootNode m_RootNode;
Dictionary<string, string> m_Metadata;
// Explicit static constructor to tell C# compiler
// not to mark type as beforefieldinit

public void Reset(string name = "root")
{
m_Stack = new Stack<TimerNode>();
m_RootNode = new TimerNode(name, true);
m_RootNode = new RootNode(name);
m_Stack.Push(m_RootNode);
}

get { return k_Instance; }
}
internal TimerNode RootNode
internal RootNode RootNode
{
get { return m_RootNode; }
}

m_RootNode.Gauges[name] = new GaugeNode(value);
}
}
}
public void AddMetadata(string key, string value)
{
m_RootNode.AddMetadata(key, value);
}
void Push(string name)

/// <param name="stream"></param>
public void SaveJsonTimers(Stream stream)
{
// Add some final metadata info
AddMetadata("scene_name", SceneManager.GetActiveScene().name);
AddMetadata("end_time_seconds", $"{DateTimeOffset.Now.ToUnixTimeSeconds()}");
var ser = new DataContractJsonSerializer(typeof(TimerNode), jsonSettings);
var ser = new DataContractJsonSerializer(typeof(RootNode), jsonSettings);
ser.WriteObject(stream, m_RootNode);
}
}

47
com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs


using System.CodeDom;
using System;
using UnityEngine;
using NUnit.Framework;
using System.Reflection;

{
internal class TestPolicy : IPolicy
{
public void RequestDecision(AgentInfo info, List<ISensor> sensors) {}
public Action OnRequestDecision;
private WriteAdapter m_Adapter = new WriteAdapter();
public void RequestDecision(AgentInfo info, List<ISensor> sensors) {
foreach(var sensor in sensors){
sensor.GetObservationProto(m_Adapter);
}
OnRequestDecision?.Invoke();
}
public float[] DecideAction() { return new float[0]; }

{
collectObservationsCalls += 1;
collectObservationsCallsForEpisode += 1;
sensor.AddObservation(0f);
sensor.AddObservation(collectObservationsCallsForEpisode);
}
public override void OnActionReceived(float[] vectorAction)

aca.EnvironmentStep();
}
}
[Test]
public void AssertStackingReset()
{
var agentGo1 = new GameObject("TestAgent");
agentGo1.AddComponent<TestAgent>();
var behaviorParameters = agentGo1.GetComponent<BehaviorParameters>();
behaviorParameters.brainParameters.numStackedVectorObservations = 3;
var agent1 = agentGo1.GetComponent<TestAgent>();
var aca = Academy.Instance;
agent1.LazyInitialize();
var policy = new TestPolicy();
agent1.SetPolicy(policy);
StackingSensor sensor = null;
foreach(ISensor s in agent1.sensors){
if (s is StackingSensor){
sensor = s as StackingSensor;
}
}
Assert.NotNull(sensor);
for (int i = 0; i < 20; i++)
{
agent1.RequestDecision();
aca.EnvironmentStep();
}
policy.OnRequestDecision = () => SensorTestHelper.CompareObservation(sensor, new[] {18f, 19f, 21f});
agent1.EndEpisode();
SensorTestHelper.CompareObservation(sensor, new[] {0f, 0f, 0f});
}
}
[TestFixture]

expectedCollectObsCallsForEpisode = 0;
expectedAgentStepCount = 0;
expectedSensorResetCalls++;
expectedCollectObsCalls += 1;
}
aca.EnvironmentStep();

102
com.unity.ml-agents/Tests/Editor/PublicAPI/PublicApiValidation.cs


using MLAgents.Sensors;
using NUnit.Framework;
using UnityEngine;
using UnityEngine.TestTools;
namespace MLAgentsExamples
{

sensorComponent.observationStacks = 2;
sensorComponent.CreateSensor();
}
class PublicApiAgent : Agent
{
public int numHeuristicCalls;
public override float[] Heuristic()
{
numHeuristicCalls++;
return base.Heuristic();
}
}
// Simple SensorComponent that sets up a StackingSensor
class StackingComponent : SensorComponent
{
public SensorComponent wrappedComponent;
public int numStacks;
public override ISensor CreateSensor()
{
var wrappedSensor = wrappedComponent.CreateSensor();
return new StackingSensor(wrappedSensor, numStacks);
}
public override int[] GetObservationShape()
{
int[] shape = (int[]) wrappedComponent.GetObservationShape().Clone();
for (var i = 0; i < shape.Length; i++)
{
shape[i] *= numStacks;
}
return shape;
}
}
[Test]
public void CheckSetupAgent()
{
var gameObject = new GameObject();
var behaviorParams = gameObject.AddComponent<BehaviorParameters>();
behaviorParams.brainParameters.vectorObservationSize = 3;
behaviorParams.brainParameters.numStackedVectorObservations = 2;
behaviorParams.brainParameters.vectorActionDescriptions = new[] { "TestActionA", "TestActionB" };
behaviorParams.brainParameters.vectorActionSize = new[] { 2, 2 };
behaviorParams.brainParameters.vectorActionSpaceType = SpaceType.Discrete;
behaviorParams.behaviorName = "TestBehavior";
behaviorParams.TeamId = 42;
behaviorParams.useChildSensors = true;
var agent = gameObject.AddComponent<PublicApiAgent>();
// Make sure we can set the behavior type correctly after the agent is added
behaviorParams.behaviorType = BehaviorType.InferenceOnly;
// Can't actually create an Agent with InferenceOnly and no model, so change back
behaviorParams.behaviorType = BehaviorType.Default;
// TODO - not internal yet
// var decisionRequester = gameObject.AddComponent<DecisionRequester>();
// decisionRequester.DecisionPeriod = 2;
var sensorComponent = gameObject.AddComponent<RayPerceptionSensorComponent3D>();
sensorComponent.sensorName = "ray3d";
sensorComponent.detectableTags = new List<string> { "Player", "Respawn" };
sensorComponent.raysPerDirection = 3;
// Make a StackingSensor that wraps the RayPerceptionSensorComponent3D
// This isn't necessarily practical, just to ensure that it can be done
var wrappingSensorComponent = gameObject.AddComponent<StackingComponent>();
wrappingSensorComponent.wrappedComponent = sensorComponent;
wrappingSensorComponent.numStacks = 3;
// ISensor isn't set up yet.
Assert.IsNull(sensorComponent.raySensor);
agent.LazyInitialize();
// Make sure we can set the behavior type correctly after the agent is initialized
// (this creates a new policy).
behaviorParams.behaviorType = BehaviorType.HeuristicOnly;
// Initialization should set up the sensors
Assert.IsNotNull(sensorComponent.raySensor);
// Let's change the inference device
var otherDevice = behaviorParams.inferenceDevice == InferenceDevice.CPU ? InferenceDevice.GPU : InferenceDevice.CPU;
agent.SetModel(behaviorParams.behaviorName, behaviorParams.model, otherDevice);
agent.AddReward(1.0f);
agent.RequestAction();
agent.RequestDecision();
Academy.Instance.AutomaticSteppingEnabled = false;
Academy.Instance.EnvironmentStep();
var actions = agent.GetAction();
// default Heuristic implementation should return zero actions.
Assert.AreEqual(new[] {0.0f, 0.0f}, actions);
Assert.AreEqual(1, agent.numHeuristicCalls);
}
}
}

26
com.unity.ml-agents/Tests/Editor/SideChannelTests.cs


Assert.AreEqual(stringVal, incomingMsg.ReadString());
Assert.AreEqual(floatListVal, incomingMsg.ReadFloatList());
}
[Test]
public void TestMessageReadDefaults()
{
// Make sure reading past the end of a message will apply defaults.
IncomingMessage incomingMsg;
using (var outgoingMsg = new OutgoingMessage())
{
incomingMsg = new IncomingMessage(outgoingMsg.ToByteArray());
}
Assert.AreEqual(false, incomingMsg.ReadBoolean());
Assert.AreEqual(true, incomingMsg.ReadBoolean(defaultValue: true));
Assert.AreEqual(0, incomingMsg.ReadInt32());
Assert.AreEqual(42, incomingMsg.ReadInt32(defaultValue: 42));
Assert.AreEqual(0.0f, incomingMsg.ReadFloat32());
Assert.AreEqual(1337.0f, incomingMsg.ReadFloat32(defaultValue: 1337.0f));
Assert.AreEqual(default(string), incomingMsg.ReadString());
Assert.AreEqual("foo", incomingMsg.ReadString(defaultValue: "foo"));
Assert.AreEqual(default(float[]), incomingMsg.ReadFloatList());
Assert.AreEqual(new float[] { 1001, 1002 }, incomingMsg.ReadFloatList(new float[] { 1001, 1002 }));
}
}
}

2
com.unity.ml-agents/Tests/Editor/TimerTest.cs


using (myTimer.Scoped("bar"))
{
myTimer.SetGauge("my_gauge", i);
myTimer.AddMetadata("i", $"{i}");
}
}
}

Assert.AreEqual(0, gauge.minValue);
Assert.AreEqual(4, gauge.maxValue);
Assert.AreEqual(4, gauge.value);
Assert.AreEqual("4", myTimer.RootNode.Metadata["i"]);
var fooChildren = rootChildren["foo"].Children;
Assert.That(fooChildren, Contains.Key("bar"));

16
com.unity.ml-agents/package.json


{
"name": "com.unity.ml-agents",
"displayName":"ML Agents",
"version": "0.15.0-preview",
"unity": "2018.4",
"description": "Add interactivity to your game with Machine Learning Agents trained using Deep Reinforcement Learning.",
"dependencies": {
"com.unity.barracuda": "0.6.1-preview"
}
"name": "com.unity.ml-agents",
"displayName": "ML Agents",
"version": "0.15.1-preview",
"unity": "2018.4",
"description": "Add interactivity to your game with Machine Learning Agents trained using Deep Reinforcement Learning.",
"dependencies": {
"com.unity.barracuda": "0.6.1-preview"
}
}

3
docs/Getting-Started.md


- For a "Hello World" introduction to creating your own Learning Environment,
check out the [Making a New Learning
Environment](Learning-Environment-Create-New.md) page.
- For a series of YouTube video tutorials, checkout the
[Machine Learning Agents PlayList](https://www.youtube.com/playlist?list=PLX2vGYjWbI0R08eWQkO7nQkGiicHAX7IX)
page.

10
docs/ML-Agents-Overview.md


[Training With Environment Parameter Randomization](Training-Environment-Parameter-Randomization.md)
to learn more about this feature.
- **Cloud Training on AWS** - To facilitate using the ML-Agents toolkit on
Amazon Web Services (AWS) machines, we provide a
[guide](Training-on-Amazon-Web-Service.md) on how to set-up EC2 instances in
addition to a public pre-configured Amazon Machine Image (AMI).
- **Cloud Training on Microsoft Azure** - To facilitate using the ML-Agents
toolkit on Azure machines, we provide a
[guide](Training-on-Microsoft-Azure.md) on how to set-up virtual machine
instances in addition to a pre-configured data science image.
## Summary and Next Steps
To briefly summarize: The ML-Agents toolkit enables games and simulations built

1
docs/Migrating.md


* Replace `Academy.RegisterSideChannel` with `SideChannelUtils.RegisterSideChannel()`.
* Replace `Academy.UnregisterSideChannel` with `SideChannelUtils.UnregisterSideChannel`.
## Migrating from 0.14 to 0.15
### Important changes

2
docs/Training-ML-Agents.md


* `--debug`: Specify this option to enable debug-level logging for some parts of the code.
* `--cpu`: Forces training using CPU only.
* Engine Configuration :
* `--width' : The width of the executable window of the environment(s) in pixels
* `--width` : The width of the executable window of the environment(s) in pixels
(ignored for editor training) (Default 84)
* `--height` : The height of the executable window of the environment(s) in pixels
(ignored for editor training). (Default 84)

999
docs/images/3dball_big.png
文件差异内容过多而无法显示
查看文件

852
docs/images/3dball_small.png

之前 之后
宽度: 906  |  高度: 759  |  大小: 165 KiB

974
docs/images/curriculum.png

之前 之后
宽度: 2066  |  高度: 342  |  大小: 152 KiB

999
docs/images/ml-agents-LSTM.png
文件差异内容过多而无法显示
查看文件

181
docs/images/monitor.png

之前 之后
宽度: 961  |  高度: 745  |  大小: 33 KiB

2
ml-agents-envs/mlagents_envs/environment.py


# Currently we require strict equality between the communication protocol
# on each side, although we may allow some flexibility in the future.
# This should be incremented whenever a change is made to the communication protocol.
API_VERSION = "0.15.0"
API_VERSION = "0.16.0"
# Default port that the editor listens on. If an environment executable
# isn't specified, this port will be used.

38
ml-agents-envs/mlagents_envs/side_channel/incoming_message.py


self.buffer = buffer
self.offset = offset
def read_bool(self) -> bool:
def read_bool(self, default_value: bool = False) -> bool:
:param default_value: Default value to use if the end of the message is reached.
:return: The value read from the message, or the default value if the end was reached.
if self._at_end_of_buffer():
return default_value
def read_int32(self) -> int:
def read_int32(self, default_value: int = 0) -> int:
:param default_value: Default value to use if the end of the message is reached.
:return: The value read from the message, or the default value if the end was reached.
if self._at_end_of_buffer():
return default_value
def read_float32(self) -> float:
def read_float32(self, default_value: float = 0.0) -> float:
:param default_value: Default value to use if the end of the message is reached.
:return: The value read from the message, or the default value if the end was reached.
if self._at_end_of_buffer():
return default_value
def read_float32_list(self) -> List[float]:
def read_float32_list(self, default_value: List[float] = None) -> List[float]:
:param default_value: Default value to use if the end of the message is reached.
:return: The value read from the message, or the default value if the end was reached.
if self._at_end_of_buffer():
return [] if default_value is None else default_value
list_len = self.read_int32()
output = []
for _ in range(list_len):

def read_string(self) -> str:
def read_string(self, default_value: str = "") -> str:
:param default_value: Default value to use if the end of the message is reached.
:return: The value read from the message, or the default value if the end was reached.
if self._at_end_of_buffer():
return default_value
encoded_str_len = self.read_int32()
val = self.buffer[self.offset : self.offset + encoded_str_len].decode("ascii")
self.offset += encoded_str_len

Get a copy of the internal bytes used by the message.
"""
return bytearray(self.buffer)
def _at_end_of_buffer(self) -> bool:
return self.offset >= len(self.buffer)

20
ml-agents-envs/mlagents_envs/tests/test_side_channel.py


read_vals.append(msg_in.read_bool())
assert vals == read_vals
# Test reading with defaults
assert msg_in.read_bool() is False
assert msg_in.read_bool(default_value=True) is True
def test_message_int32():
val = 1337

read_val = msg_in.read_int32()
assert val == read_val
# Test reading with defaults
assert 0 == msg_in.read_int32()
assert val == msg_in.read_int32(default_value=val)
def test_message_float32():
val = 42.0

# These won't be exactly equal in general, since python floats are 64-bit.
assert val == read_val
# Test reading with defaults
assert 0.0 == msg_in.read_float32()
assert val == msg_in.read_float32(default_value=val)
def test_message_string():
val = "mlagents!"

read_val = msg_in.read_string()
assert val == read_val
# Test reading with defaults
assert "" == msg_in.read_string()
assert val == msg_in.read_string(default_value=val)
def test_message_float_list():
val = [1.0, 3.0, 9.0]

read_val = msg_in.read_float32_list()
# These won't be exactly equal in general, since python floats are 64-bit.
assert val == read_val
# Test reading with defaults
assert [] == msg_in.read_float32_list()
assert val == msg_in.read_float32_list(default_value=val)

7
ml-agents-envs/mlagents_envs/tests/test_timers.py


}
},
"gauges": {"my_gauge": {"value": 4.0, "max": 4.0, "min": 0.0, "count": 3}},
"metadata": {
"timer_format_version": timers.TIMER_FORMAT_VERSION,
"start_time_seconds": mock.ANY,
"end_time_seconds": mock.ANY,
"python_version": mock.ANY,
"command_line_arguments": mock.ANY,
},
}
assert timer_tree == expected_tree

39
ml-agents-envs/mlagents_envs/timers.py


"""
import math
from time import perf_counter
import sys
import time
TIMER_FORMAT_VERSION = "0.1.0"
class TimerNode:

sure that pushes and pops are already matched.
"""
__slots__ = ["root", "stack", "start_time", "gauges"]
__slots__ = ["root", "stack", "start_time", "gauges", "metadata"]
self.start_time = perf_counter()
self.start_time = time.perf_counter()
self.metadata: Dict[str, str] = {}
self._add_default_metadata()
self.start_time = perf_counter()
self.start_time = time.perf_counter()
self.metadata: Dict[str, str] = {}
self._add_default_metadata()
def push(self, name: str) -> TimerNode:
"""

Update the total time and count of the root name, and return it.
"""
root = self.root
root.total = perf_counter() - self.start_time
root.total = time.perf_counter() - self.start_time
root.count = 1
return root

if self.gauges:
res["gauges"] = self._get_gauges()
if self.metadata:
self.metadata["end_time_seconds"] = str(int(time.time()))
res["metadata"] = self.metadata
res["total"] = node.total
res["count"] = node.count

else:
self.gauges[name] = GaugeNode(value)
def add_metadata(self, key: str, value: str) -> None:
self.metadata[key] = value
def _add_default_metadata(self):
self.metadata["timer_format_version"] = TIMER_FORMAT_VERSION
self.metadata["start_time_seconds"] = str(int(time.time()))
self.metadata["python_version"] = sys.version
self.metadata["command_line_arguments"] = " ".join(sys.argv)
# Global instance of a TimerStack. This is generally all that we need for profiling, but you can potentially

"""
timer_stack = timer_stack or _global_timer_stack
timer_node = timer_stack.push(name)
start_time = perf_counter()
start_time = time.perf_counter()
try:
# The wrapped code block will run here.

# We'll accumulate the time, and the exception (if any) gets raised automatically.
elapsed = perf_counter() - start_time
elapsed = time.perf_counter() - start_time
timer_node.add_time(elapsed)
timer_stack.pop()

"""
timer_stack = timer_stack or _global_timer_stack
timer_stack.set_gauge(name, value)
def add_metadata(key: str, value: str, timer_stack: TimerStack = None) -> None:
timer_stack = timer_stack or _global_timer_stack
timer_stack.add_metadata(key, value)
def get_timer_tree(timer_stack: TimerStack = None) -> Dict[str, Any]:

1
ml-agents/mlagents/trainers/agent_processor.py


self._process_step(
terminal_step, global_id, terminal_steps.agent_id_to_index[local_id]
)
# Iterate over all the decision steps
for ongoing_step in decision_steps.values():
local_id = ongoing_step.agent_id

14
ml-agents/mlagents/trainers/demo_loader.py


from google.protobuf.internal.encoder import _EncodeVarint # type: ignore
INITIAL_POS = 33
SUPPORTED_DEMONSTRATION_VERSIONS = frozenset([0, 1])
@timed
def make_demo_buffer(
pair_infos: List[AgentInfoActionPairProto],

)
INITIAL_POS = 33
@timed
def load_demonstration(
file_path: str

if obs_decoded == 0:
meta_data_proto = DemonstrationMetaProto()
meta_data_proto.ParseFromString(data[pos : pos + next_pos])
if (
meta_data_proto.api_version
not in SUPPORTED_DEMONSTRATION_VERSIONS
):
raise RuntimeError(
f"Can't load Demonstration data from an unsupported version ({meta_data_proto.api_version})"
)
total_expected += meta_data_proto.number_steps
pos = INITIAL_POS
if obs_decoded == 1:

1
ml-agents/mlagents/trainers/env_manager.py


AllStepResult = Dict[BehaviorName, Tuple[DecisionSteps, TerminalSteps]]
AllGroupSpec = Dict[BehaviorName, BehaviorSpec]
logger = get_logger(__name__)

12
ml-agents/mlagents/trainers/learn.py


from mlagents_envs.side_channel.side_channel import SideChannel
from mlagents_envs.side_channel.engine_configuration_channel import EngineConfig
from mlagents_envs.exception import UnityEnvironmentException
from mlagents_envs.timers import hierarchical_timer, get_timer_tree
from mlagents_envs.timers import (
hierarchical_timer,
get_timer_tree,
add_metadata as add_timer_metadata,
)
from mlagents_envs import logging_util
logger = logging_util.get_logger(__name__)

run_seed = options.seed
if options.cpu:
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
# Add some timer metadata
add_timer_metadata("mlagents_version", mlagents.trainers.__version__)
add_timer_metadata("mlagents_envs_version", mlagents_envs.__version__)
add_timer_metadata("communication_protocol_version", UnityEnvironment.API_VERSION)
add_timer_metadata("tensorflow_version", tf_utils.tf.__version__)
if options.seed == -1:
run_seed = np.random.randint(0, 10000)

24
ml-agents/mlagents/trainers/tests/test_demo_loader.py


import io
from unittest import mock
from mlagents_envs.communicator_objects.demonstration_meta_pb2 import (
DemonstrationMetaProto,
)
write_delimited,
)

assert get_demo_files(valid_fname) == [valid_fname]
# valid directory
assert get_demo_files(tmpdirname) == [valid_fname]
@mock.patch("mlagents.trainers.demo_loader.get_demo_files", return_value=["foo.demo"])
def test_unsupported_version_raises_error(mock_get_demo_files):
# Create a metadata proto with an unsupported version
bad_metadata = DemonstrationMetaProto()
bad_metadata.api_version = 1337
# Write the metadata to a temporary buffer, which will get returned by open()
buffer = io.BytesIO()
write_delimited(buffer, bad_metadata)
m = mock.mock_open(read_data=buffer.getvalue())
# Make sure that we get a RuntimeError when trying to load this.
with mock.patch("builtins.open", m):
with pytest.raises(RuntimeError):
load_demonstration("foo")

2
ml-agents/tests/yamato/scripts/run_llapi.py


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--env", default="Project/testPlayer")
parser.add_argument("--env", default="artifacts/testPlayer")
args = parser.parse_args()
main(args.env)

2
ml-agents/tests/yamato/standalone_build_tests.py


executable_name = "testPlayer-" + executable_name
returncode = run_standalone_build(
base_path, verbose=True, output_path=executable_name, scene_path=scene_path
base_path, output_path=executable_name, scene_path=scene_path
)
if returncode == 0:

13
ml-agents/tests/yamato/training_int_tests.py


from .yamato_utils import (
get_base_path,
get_base_output_path,
run_standalone_build,
init_venv,
override_config_file,

if csharp_version is not None:
# We can't rely on the old C# code recognizing the commandline argument to set the output
# So rename testPlayer (containing the most recent build) to something else temporarily
full_player_path = os.path.join("Project", "testPlayer.app")
temp_player_path = os.path.join("Project", "temp_testPlayer.app")
final_player_path = os.path.join("Project", f"testPlayer_{csharp_version}.app")
artifact_path = get_base_output_path()
full_player_path = os.path.join(artifact_path, "testPlayer.app")
temp_player_path = os.path.join(artifact_path, "temp_testPlayer.app")
final_player_path = os.path.join(
artifact_path, f"testPlayer_{csharp_version}.app"
)
os.rename(full_player_path, temp_player_path)

)
mla_learn_cmd = (
f"mlagents-learn override.yaml --train --env=Project/{standalone_player_path} "
f"mlagents-learn override.yaml --train --env="
f"{os.path.join(get_base_output_path(), standalone_player_path)} "
f"--run-id={run_id} --no-graphics --env-args -logFile -"
) # noqa
res = subprocess.run(

37
ml-agents/tests/yamato/yamato_utils.py


import os
import shutil
import subprocess
import yaml
from typing import List, Optional

return os.getcwd()
def get_base_output_path():
""""
Returns the artifact folder to use for yamato jobs.
"""
return os.path.join(get_base_path(), "artifacts")
log_output_path: str = f"{get_base_output_path()}/standalone_build.txt",
Run BuildStandalonePlayerOSX test to produce a player. The location defaults to Project/testPlayer.
Run BuildStandalonePlayerOSX test to produce a player. The location defaults to
artifacts/standalone_build/testPlayer.
"""
unity_exe = get_unity_executable_path()
print(f"Running BuildStandalonePlayerOSX via {unity_exe}")

"-executeMethod",
"MLAgents.StandaloneBuildTest.BuildStandalonePlayerOSX",
]
if verbose:
test_args += ["-logfile", "-"]
os.makedirs(os.path.dirname(log_output_path), exist_ok=True)
subprocess.run(["touch", log_output_path])
test_args += ["-logfile", log_output_path]
output_path = os.path.join(get_base_output_path(), output_path)
os.makedirs(os.path.dirname(output_path), exist_ok=True)
if scene_path is not None:
test_args += ["--mlagents-build-scene-path", scene_path]
print(f"{' '.join(test_args)} ...")

# Copy the default build name into the artifacts folder.
if output_path is None and res.returncode == 0:
shutil.move(
os.path.join(base_path, "Project", "testPlayer.app"),
os.path.join(get_base_output_path(), "testPlayer.app"),
)
# Print if we fail or want verbosity.
if verbose or res.returncode != 0:
subprocess.run(["cat", log_output_path])
return res.returncode

"""
if csharp_version is None:
return
subprocess.check_call(f"rm -rf {csharp_dir}", shell=True)
subprocess.check_call(
f"git checkout {csharp_version} -- {csharp_dir}", shell=True
)

"""
subprocess.check_call("git reset HEAD .", shell=True)
subprocess.check_call("git checkout -- .", shell=True)
# Ensure the cache isn't polluted with old compiled assemblies.
subprocess.check_call(f"rm -rf Project/Library", shell=True)
def override_config_file(src_path, dest_path, **kwargs):

1
utils/make_readme_table.py


["0.14.0", "February 13, 2020"],
["0.14.1", "February 26, 2020"],
["0.15.0", "March 18, 2020"],
["0.15.1", "March 30, 2020"],
]
MAX_DAYS = 150 # do not print releases older than this many days

129
com.unity.ml-agents/Tests/Runtime/RuntimeAPITest.cs


#if UNITY_INCLUDE_TESTS
using System.Collections;
using System.Collections.Generic;
using MLAgents;
using MLAgents.Policies;
using MLAgents.Sensors;
using NUnit.Framework;
using UnityEngine;
using UnityEngine.TestTools;
namespace Tests
{
public class PublicApiAgent : Agent
{
public int numHeuristicCalls;
public override float[] Heuristic()
{
numHeuristicCalls++;
return base.Heuristic();
}
}// Simple SensorComponent that sets up a StackingSensor
public class StackingComponent : SensorComponent
{
public SensorComponent wrappedComponent;
public int numStacks;
public override ISensor CreateSensor()
{
var wrappedSensor = wrappedComponent.CreateSensor();
return new StackingSensor(wrappedSensor, numStacks);
}
public override int[] GetObservationShape()
{
int[] shape = (int[]) wrappedComponent.GetObservationShape().Clone();
for (var i = 0; i < shape.Length; i++)
{
shape[i] *= numStacks;
}
return shape;
}
}
public class RuntimeApiTest
{
[SetUp]
public static void Setup()
{
Academy.Instance.AutomaticSteppingEnabled = false;
}
[UnityTest]
public IEnumerator RuntimeApiTestWithEnumeratorPasses()
{
var gameObject = new GameObject();
var behaviorParams = gameObject.AddComponent<BehaviorParameters>();
behaviorParams.brainParameters.vectorObservationSize = 3;
behaviorParams.brainParameters.numStackedVectorObservations = 2;
behaviorParams.brainParameters.vectorActionDescriptions = new[] { "TestActionA", "TestActionB" };
behaviorParams.brainParameters.vectorActionSize = new[] { 2, 2 };
behaviorParams.brainParameters.vectorActionSpaceType = SpaceType.Discrete;
behaviorParams.behaviorName = "TestBehavior";
behaviorParams.TeamId = 42;
behaviorParams.useChildSensors = true;
// Can't actually create an Agent with InferenceOnly and no model, so change back
behaviorParams.behaviorType = BehaviorType.Default;
var sensorComponent = gameObject.AddComponent<RayPerceptionSensorComponent3D>();
sensorComponent.sensorName = "ray3d";
sensorComponent.detectableTags = new List<string> { "Player", "Respawn" };
sensorComponent.raysPerDirection = 3;
// Make a StackingSensor that wraps the RayPerceptionSensorComponent3D
// This isn't necessarily practical, just to ensure that it can be done
var wrappingSensorComponent = gameObject.AddComponent<StackingComponent>();
wrappingSensorComponent.wrappedComponent = sensorComponent;
wrappingSensorComponent.numStacks = 3;
// ISensor isn't set up yet.
Assert.IsNull(sensorComponent.raySensor);
// Make sure we can set the behavior type correctly after the agent is initialized
// (this creates a new policy).
behaviorParams.behaviorType = BehaviorType.HeuristicOnly;
// Agent needs to be added after everything else is setup.
var agent = gameObject.AddComponent<PublicApiAgent>();
// DecisionRequester has to be added after Agent.
var decisionRequester = gameObject.AddComponent<DecisionRequester>();
decisionRequester.DecisionPeriod = 2;
decisionRequester.TakeActionsBetweenDecisions = true;
// Initialization should set up the sensors
Assert.IsNotNull(sensorComponent.raySensor);
// Let's change the inference device
var otherDevice = behaviorParams.inferenceDevice == InferenceDevice.CPU ? InferenceDevice.GPU : InferenceDevice.CPU;
agent.SetModel(behaviorParams.behaviorName, behaviorParams.model, otherDevice);
agent.AddReward(1.0f);
// skip a frame.
yield return null;
Ac