您最多选择25个主题
主题必须以中文或者字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符
621 行
22 KiB
621 行
22 KiB
using System;
|
|
using UnityEngine;
|
|
using System.Collections.Generic;
|
|
#if UNITY_EDITOR
|
|
using UnityEditor;
|
|
#endif
|
|
using Unity.MLAgents.Inference;
|
|
using Unity.MLAgents.Policies;
|
|
using Unity.MLAgents.SideChannels;
|
|
using Unity.Barracuda;
|
|
|
|
/**
|
|
* Welcome to Unity Machine Learning Agents (ML-Agents).
|
|
*
|
|
* The ML-Agents toolkit contains four entities: Academy, Agent, Communicator and
|
|
* Python API. The academy and connected agents live within
|
|
* a learning environment (herein called Environment), while the communicator
|
|
* manages the communication between the learning environment and the Python
|
|
* API. For more information on each of these entities, in addition to how to
|
|
* set-up a learning environment and train the behavior of characters in a
|
|
* Unity scene, please browse our documentation pages on GitHub:
|
|
* https://github.com/Unity-Technologies/ml-agents/tree/release_5_docs/docs/
|
|
*/
|
|
|
|
namespace Unity.MLAgents
|
|
{
|
|
/// <summary>
|
|
/// Helper class to step the Academy during FixedUpdate phase.
|
|
/// </summary>
|
|
internal class AcademyFixedUpdateStepper : MonoBehaviour
|
|
{
|
|
void FixedUpdate()
|
|
{
|
|
Academy.Instance.EnvironmentStep();
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// The Academy singleton manages agent training and decision making.
|
|
/// </summary>
|
|
/// <remarks>
|
|
/// Access the Academy singleton through the <see cref="Instance"/>
|
|
/// property. The Academy instance is initialized the first time it is accessed (which will
|
|
/// typically be by the first <see cref="Agent"/> initialized in a scene).
|
|
///
|
|
/// At initialization, the Academy attempts to connect to the Python training process through
|
|
/// the external communicator. If successful, the training process can train <see cref="Agent"/>
|
|
/// instances. When you set an agent's <see cref="BehaviorParameters.BehaviorType"/> setting
|
|
/// to <see cref="BehaviorType.Default"/>, the agent exchanges data with the training process
|
|
/// to make decisions. If no training process is available, agents with the default behavior
|
|
/// fall back to inference or heuristic decisions. (You can also set agents to always use
|
|
/// inference or heuristics.)
|
|
/// </remarks>
|
|
[HelpURL("https://github.com/Unity-Technologies/ml-agents/tree/release_5_docs/" +
|
|
"docs/Learning-Environment-Design.md")]
|
|
public class Academy : IDisposable
|
|
{
|
|
/// <summary>
|
|
/// Communication protocol version.
|
|
/// When connecting to python, this must be compatible with UnityEnvironment.API_VERSION.
|
|
/// We follow semantic versioning on the communication version, so existing
|
|
/// functionality will work as long the major versions match.
|
|
/// This should be changed whenever a change is made to the communication protocol.
|
|
/// </summary>
|
|
const string k_ApiVersion = "1.0.0";
|
|
|
|
/// <summary>
|
|
/// Unity package version of com.unity.ml-agents.
|
|
/// This must match the version string in package.json and is checked in a unit test.
|
|
/// </summary>
|
|
internal const string k_PackageVersion = "1.2.0-preview";
|
|
|
|
const int k_EditorTrainingPort = 5004;
|
|
|
|
const string k_PortCommandLineFlag = "--mlagents-port";
|
|
|
|
// Lazy initializer pattern, see https://csharpindepth.com/articles/singleton#lazy
|
|
static Lazy<Academy> s_Lazy = new Lazy<Academy>(() => new Academy());
|
|
|
|
/// <summary>
|
|
///Reports whether the Academy has been initialized yet.
|
|
/// </summary>
|
|
/// <value><c>True</c> if the Academy is initialized, <c>false</c> otherwise.</value>
|
|
public static bool IsInitialized
|
|
{
|
|
get { return s_Lazy.IsValueCreated; }
|
|
}
|
|
|
|
/// <summary>
|
|
/// The singleton Academy object.
|
|
/// </summary>
|
|
/// <value>Getting the instance initializes the Academy, if necessary.</value>
|
|
public static Academy Instance { get { return s_Lazy.Value; } }
|
|
|
|
// Fields not provided in the Inspector.
|
|
|
|
/// <summary>
|
|
/// Reports whether or not the communicator is on.
|
|
/// </summary>
|
|
/// <seealso cref="ICommunicator"/>
|
|
/// <value>
|
|
/// <c>True</c>, if communicator is on, <c>false</c> otherwise.
|
|
/// </value>
|
|
public bool IsCommunicatorOn
|
|
{
|
|
get { return Communicator != null; }
|
|
}
|
|
|
|
/// The number of episodes completed by the environment. Incremented
|
|
/// each time the environment is reset.
|
|
int m_EpisodeCount;
|
|
|
|
/// The number of steps completed within the current episode. Incremented
|
|
/// each time a step is taken in the environment. Is reset to 0 during
|
|
/// <see cref="EnvironmentReset"/>.
|
|
int m_StepCount;
|
|
|
|
/// The number of total number of steps completed during the whole simulation. Incremented
|
|
/// each time a step is taken in the environment.
|
|
int m_TotalStepCount;
|
|
|
|
/// Pointer to the communicator currently in use by the Academy.
|
|
internal ICommunicator Communicator;
|
|
|
|
bool m_Initialized;
|
|
List<ModelRunner> m_ModelRunners = new List<ModelRunner>();
|
|
|
|
// Flag used to keep track of the first time the Academy is reset.
|
|
bool m_HadFirstReset;
|
|
|
|
// Whether the Academy is in the middle of a step. This is used to detect and Academy
|
|
// step called by user code that is also called by the Academy.
|
|
bool m_IsStepping;
|
|
|
|
// Random seed used for inference.
|
|
int m_InferenceSeed;
|
|
|
|
/// <summary>
|
|
/// Set the random seed used for inference. This should be set before any Agents are added
|
|
/// to the scene. The seed is passed to the ModelRunner constructor, and incremented each
|
|
/// time a new ModelRunner is created.
|
|
/// </summary>
|
|
public int InferenceSeed
|
|
{
|
|
set { m_InferenceSeed = value; }
|
|
}
|
|
|
|
/// <summary>
|
|
/// Returns the RLCapabilities of the python client that the unity process is connected to.
|
|
/// </summary>
|
|
internal UnityRLCapabilities TrainerCapabilities { get; set; }
|
|
|
|
|
|
// The Academy uses a series of events to communicate with agents
|
|
// to facilitate synchronization. More specifically, it ensures
|
|
// that all the agents perform their steps in a consistent order (i.e. no
|
|
// agent can act based on a decision before another agent has had a chance
|
|
// to request a decision).
|
|
|
|
// Signals to all the Agents at each environment step so they can use
|
|
// their Policy to decide on their next action.
|
|
internal event Action DecideAction;
|
|
|
|
// Signals to all the listeners that the academy is being destroyed
|
|
internal event Action DestroyAction;
|
|
|
|
// Signals to the Agent that a new step is about to start.
|
|
// This will mark the Agent as Done if it has reached its maxSteps.
|
|
internal event Action AgentIncrementStep;
|
|
|
|
|
|
/// <summary>
|
|
/// Signals to all of the <see cref="Agent"/>s that their step is about to begin.
|
|
/// This is a good time for an <see cref="Agent"/> to decide if it would like to
|
|
/// call <see cref="Agent.RequestDecision"/> or <see cref="Agent.RequestAction"/>
|
|
/// for this step. Any other pre-step setup could be done during this even as well.
|
|
/// </summary>
|
|
public event Action<int> AgentPreStep;
|
|
|
|
// Signals to all the agents at each environment step so they can send
|
|
// their state to their Policy if they have requested a decision.
|
|
internal event Action AgentSendState;
|
|
|
|
// Signals to all the agents at each environment step so they can act if
|
|
// they have requested a decision.
|
|
internal event Action AgentAct;
|
|
|
|
// Signals to all the agents each time the Academy force resets.
|
|
internal event Action AgentForceReset;
|
|
|
|
/// <summary>
|
|
/// Signals that the Academy has been reset by the training process.
|
|
/// </summary>
|
|
public event Action OnEnvironmentReset;
|
|
|
|
AcademyFixedUpdateStepper m_FixedUpdateStepper;
|
|
GameObject m_StepperObject;
|
|
|
|
|
|
/// <summary>
|
|
/// Private constructor called the first time the Academy is used.
|
|
/// Academy uses this time to initialize internal data
|
|
/// structures, initialize the environment and check for the existence
|
|
/// of a communicator.
|
|
/// </summary>
|
|
Academy()
|
|
{
|
|
Application.quitting += Dispose;
|
|
|
|
LazyInitialize();
|
|
}
|
|
|
|
/// <summary>
|
|
/// Initialize the Academy if it hasn't already been initialized.
|
|
/// This method is always safe to call; it will have no effect if the Academy is already
|
|
/// initialized.
|
|
/// </summary>
|
|
internal void LazyInitialize()
|
|
{
|
|
if (!m_Initialized)
|
|
{
|
|
InitializeEnvironment();
|
|
m_Initialized = true;
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Enable stepping of the Academy during the FixedUpdate phase. This is done by creating
|
|
/// a temporary GameObject with a MonoBehaviour that calls Academy.EnvironmentStep().
|
|
/// </summary>
|
|
void EnableAutomaticStepping()
|
|
{
|
|
if (m_FixedUpdateStepper != null)
|
|
{
|
|
return;
|
|
}
|
|
|
|
m_StepperObject = new GameObject("AcademyFixedUpdateStepper");
|
|
// Don't show this object in the hierarchy
|
|
m_StepperObject.hideFlags = HideFlags.HideInHierarchy;
|
|
m_FixedUpdateStepper = m_StepperObject.AddComponent<AcademyFixedUpdateStepper>();
|
|
try
|
|
{
|
|
// This try-catch is because DontDestroyOnLoad cannot be used in Editor Tests
|
|
GameObject.DontDestroyOnLoad(m_StepperObject);
|
|
}
|
|
catch {}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Disable stepping of the Academy during the FixedUpdate phase. If this is called, the Academy must be
|
|
/// stepped manually by the user by calling Academy.EnvironmentStep().
|
|
/// </summary>
|
|
void DisableAutomaticStepping()
|
|
{
|
|
if (m_FixedUpdateStepper == null)
|
|
{
|
|
return;
|
|
}
|
|
|
|
m_FixedUpdateStepper = null;
|
|
if (Application.isEditor)
|
|
{
|
|
UnityEngine.Object.DestroyImmediate(m_StepperObject);
|
|
}
|
|
else
|
|
{
|
|
UnityEngine.Object.Destroy(m_StepperObject);
|
|
}
|
|
|
|
m_StepperObject = null;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Determines whether or not the Academy is automatically stepped during the FixedUpdate phase.
|
|
/// </summary>
|
|
/// <value>Set <c>true</c> to enable automatic stepping; <c>false</c> to disable.</value>
|
|
public bool AutomaticSteppingEnabled
|
|
{
|
|
get { return m_FixedUpdateStepper != null; }
|
|
set
|
|
{
|
|
if (value)
|
|
{
|
|
EnableAutomaticStepping();
|
|
}
|
|
else
|
|
{
|
|
DisableAutomaticStepping();
|
|
}
|
|
}
|
|
}
|
|
|
|
// Used to read Python-provided environment parameters
|
|
static int ReadPortFromArgs()
|
|
{
|
|
var args = Environment.GetCommandLineArgs();
|
|
var inputPort = "";
|
|
for (var i = 0; i < args.Length; i++)
|
|
{
|
|
if (args[i] == k_PortCommandLineFlag)
|
|
{
|
|
inputPort = args[i + 1];
|
|
}
|
|
}
|
|
|
|
try
|
|
{
|
|
return int.Parse(inputPort);
|
|
}
|
|
catch
|
|
{
|
|
// No arg passed, or malformed port number.
|
|
#if UNITY_EDITOR
|
|
// Try connecting on the default editor port
|
|
return k_EditorTrainingPort;
|
|
#else
|
|
// This is an executable, so we don't try to connect.
|
|
return -1;
|
|
#endif
|
|
}
|
|
}
|
|
|
|
EnvironmentParameters m_EnvironmentParameters;
|
|
StatsRecorder m_StatsRecorder;
|
|
|
|
/// <summary>
|
|
/// Returns the <see cref="EnvironmentParameters"/> instance. If training
|
|
/// features such as Curriculum Learning or Environment Parameter Randomization are used,
|
|
/// then the values of the parameters generated from the training process can be
|
|
/// retrieved here.
|
|
/// </summary>
|
|
/// <returns></returns>
|
|
public EnvironmentParameters EnvironmentParameters
|
|
{
|
|
get { return m_EnvironmentParameters; }
|
|
}
|
|
|
|
/// <summary>
|
|
/// Returns the <see cref="StatsRecorder"/> instance. This instance can be used
|
|
/// to record any statistics from the Unity environment.
|
|
/// </summary>
|
|
/// <returns></returns>
|
|
public StatsRecorder StatsRecorder
|
|
{
|
|
get { return m_StatsRecorder; }
|
|
}
|
|
|
|
/// <summary>
|
|
/// Initializes the environment, configures it and initializes the Academy.
|
|
/// </summary>
|
|
void InitializeEnvironment()
|
|
{
|
|
TimerStack.Instance.AddMetadata("communication_protocol_version", k_ApiVersion);
|
|
TimerStack.Instance.AddMetadata("com.unity.ml-agents_version", k_PackageVersion);
|
|
|
|
EnableAutomaticStepping();
|
|
|
|
SideChannelManager.RegisterSideChannel(new EngineConfigurationChannel());
|
|
m_EnvironmentParameters = new EnvironmentParameters();
|
|
m_StatsRecorder = new StatsRecorder();
|
|
|
|
// Try to launch the communicator by using the arguments passed at launch
|
|
var port = ReadPortFromArgs();
|
|
if (port > 0)
|
|
{
|
|
Communicator = new RpcCommunicator(
|
|
new CommunicatorInitParameters
|
|
{
|
|
port = port
|
|
}
|
|
);
|
|
}
|
|
|
|
if (Communicator != null)
|
|
{
|
|
// We try to exchange the first message with Python. If this fails, it means
|
|
// no Python Process is ready to train the environment. In this case, the
|
|
//environment must use Inference.
|
|
try
|
|
{
|
|
var unityRlInitParameters = Communicator.Initialize(
|
|
new CommunicatorInitParameters
|
|
{
|
|
unityCommunicationVersion = k_ApiVersion,
|
|
unityPackageVersion = k_PackageVersion,
|
|
name = "AcademySingleton",
|
|
CSharpCapabilities = new UnityRLCapabilities()
|
|
});
|
|
UnityEngine.Random.InitState(unityRlInitParameters.seed);
|
|
// We might have inference-only Agents, so set the seed for them too.
|
|
m_InferenceSeed = unityRlInitParameters.seed;
|
|
TrainerCapabilities = unityRlInitParameters.TrainerCapabilities;
|
|
TrainerCapabilities.WarnOnPythonMissingBaseRLCapabilities();
|
|
}
|
|
catch
|
|
{
|
|
Debug.Log($"" +
|
|
$"Couldn't connect to trainer on port {port} using API version {k_ApiVersion}. " +
|
|
"Will perform inference instead."
|
|
);
|
|
Communicator = null;
|
|
}
|
|
}
|
|
if (Communicator != null)
|
|
{
|
|
Communicator.QuitCommandReceived += OnQuitCommandReceived;
|
|
Communicator.ResetCommandReceived += OnResetCommand;
|
|
}
|
|
|
|
// If a communicator is enabled/provided, then we assume we are in
|
|
// training mode. In the absence of a communicator, we assume we are
|
|
// in inference mode.
|
|
|
|
ResetActions();
|
|
}
|
|
|
|
void ResetActions()
|
|
{
|
|
DecideAction = () => {};
|
|
DestroyAction = () => {};
|
|
AgentPreStep = i => {};
|
|
AgentSendState = () => {};
|
|
AgentAct = () => {};
|
|
AgentForceReset = () => {};
|
|
OnEnvironmentReset = () => {};
|
|
}
|
|
|
|
static void OnQuitCommandReceived()
|
|
{
|
|
#if UNITY_EDITOR
|
|
EditorApplication.isPlaying = false;
|
|
#endif
|
|
Application.Quit();
|
|
}
|
|
|
|
void OnResetCommand()
|
|
{
|
|
ForcedFullReset();
|
|
}
|
|
|
|
/// <summary>
|
|
/// The current episode count.
|
|
/// </summary>
|
|
/// <value>
|
|
/// Current episode number.
|
|
/// </value>
|
|
public int EpisodeCount
|
|
{
|
|
get { return m_EpisodeCount; }
|
|
}
|
|
|
|
/// <summary>
|
|
/// The current step count (within the current episode).
|
|
/// </summary>
|
|
/// <value>
|
|
/// Current step count.
|
|
/// </value>
|
|
public int StepCount
|
|
{
|
|
get { return m_StepCount; }
|
|
}
|
|
|
|
/// <summary>
|
|
/// Returns the total step count.
|
|
/// </summary>
|
|
/// <value>
|
|
/// Total step count.
|
|
/// </value>
|
|
public int TotalStepCount
|
|
{
|
|
get { return m_TotalStepCount; }
|
|
}
|
|
|
|
/// <summary>
|
|
/// Forces the full reset. The done flags are not affected. Is either
|
|
/// called the first reset at inference and every external reset
|
|
/// at training.
|
|
/// </summary>
|
|
void ForcedFullReset()
|
|
{
|
|
EnvironmentReset();
|
|
AgentForceReset?.Invoke();
|
|
m_HadFirstReset = true;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Performs a single environment update of the Academy and Agent
|
|
/// objects within the environment.
|
|
/// </summary>
|
|
public void EnvironmentStep()
|
|
{
|
|
// Check whether we're already in the middle of a step.
|
|
// This shouldn't happen generally, but could happen if user code (e.g. CollectObservations)
|
|
// that is called by EnvironmentStep() also calls EnvironmentStep(). This would result
|
|
// in an infinite loop and/or stack overflow, so stop it before it happens.
|
|
if (m_IsStepping)
|
|
{
|
|
throw new UnityAgentsException(
|
|
"Academy.EnvironmentStep() called recursively. " +
|
|
"This might happen if you call EnvironmentStep() from custom code such as " +
|
|
"CollectObservations() or OnActionReceived()."
|
|
);
|
|
}
|
|
|
|
m_IsStepping = true;
|
|
|
|
try
|
|
{
|
|
if (!m_HadFirstReset)
|
|
{
|
|
ForcedFullReset();
|
|
}
|
|
|
|
AgentPreStep?.Invoke(m_StepCount);
|
|
|
|
m_StepCount += 1;
|
|
m_TotalStepCount += 1;
|
|
AgentIncrementStep?.Invoke();
|
|
|
|
using (TimerStack.Instance.Scoped("AgentSendState"))
|
|
{
|
|
AgentSendState?.Invoke();
|
|
}
|
|
|
|
using (TimerStack.Instance.Scoped("DecideAction"))
|
|
{
|
|
DecideAction?.Invoke();
|
|
}
|
|
|
|
// If the communicator is not on, we need to clear the SideChannel sending queue
|
|
if (!IsCommunicatorOn)
|
|
{
|
|
SideChannelManager.GetSideChannelMessage();
|
|
}
|
|
|
|
using (TimerStack.Instance.Scoped("AgentAct"))
|
|
{
|
|
AgentAct?.Invoke();
|
|
}
|
|
}
|
|
finally
|
|
{
|
|
// Reset m_IsStepping when we're done (or if an exception occurred).
|
|
m_IsStepping = false;
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Resets the environment, including the Academy.
|
|
/// </summary>
|
|
void EnvironmentReset()
|
|
{
|
|
m_StepCount = 0;
|
|
m_EpisodeCount++;
|
|
OnEnvironmentReset?.Invoke();
|
|
}
|
|
|
|
/// <summary>
|
|
/// Creates or retrieves an existing ModelRunner that uses the same
|
|
/// NNModel and the InferenceDevice as provided.
|
|
/// </summary>
|
|
/// <param name="model">The NNModel the ModelRunner must use.</param>
|
|
/// <param name="brainParameters">The BrainParameters used to create the ModelRunner.</param>
|
|
/// <param name="inferenceDevice">
|
|
/// The inference device (CPU or GPU) the ModelRunner will use.
|
|
/// </param>
|
|
/// <returns> The ModelRunner compatible with the input settings.</returns>
|
|
internal ModelRunner GetOrCreateModelRunner(
|
|
NNModel model, BrainParameters brainParameters, InferenceDevice inferenceDevice)
|
|
{
|
|
var modelRunner = m_ModelRunners.Find(x => x.HasModel(model, inferenceDevice));
|
|
if (modelRunner == null)
|
|
{
|
|
modelRunner = new ModelRunner(model, brainParameters, inferenceDevice, m_InferenceSeed);
|
|
m_ModelRunners.Add(modelRunner);
|
|
m_InferenceSeed++;
|
|
}
|
|
return modelRunner;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Shut down the Academy.
|
|
/// </summary>
|
|
public void Dispose()
|
|
{
|
|
DisableAutomaticStepping();
|
|
|
|
// Signal to listeners that the academy is being destroyed now
|
|
DestroyAction?.Invoke();
|
|
|
|
Communicator?.Dispose();
|
|
Communicator = null;
|
|
|
|
m_EnvironmentParameters.Dispose();
|
|
m_StatsRecorder.Dispose();
|
|
SideChannelManager.UnregisterAllSideChannels(); // unregister custom side channels
|
|
|
|
if (m_ModelRunners != null)
|
|
{
|
|
foreach (var mr in m_ModelRunners)
|
|
{
|
|
mr.Dispose();
|
|
}
|
|
|
|
m_ModelRunners = null;
|
|
}
|
|
|
|
// Clear out the actions so we're not keeping references to any old objects
|
|
ResetActions();
|
|
|
|
// TODO - Pass worker ID or some other identifier,
|
|
// so that multiple envs won't overwrite each others stats.
|
|
TimerStack.Instance.SaveJsonTimers();
|
|
m_Initialized = false;
|
|
|
|
// Reset the Lazy instance
|
|
s_Lazy = new Lazy<Academy>(() => new Academy());
|
|
}
|
|
}
|
|
}
|