using UnityEngine;
using System.Collections.Generic;
using UnityEngine.Serialization;
#if UNITY_EDITOR
using UnityEditor;
#endif
using MLAgents.InferenceBrain;
using Barracuda;
/**
* Welcome to Unity Machine Learning Agents (ML-Agents).
*
* The ML-Agents toolkit contains four entities: Academy, Agent, Communicator and
* Python API. The academy and connected agents live within
* a learning environment (herein called Environment), while the communicator
* manages the communication between the learning environment and the Python
* API. For more information on each of these entities, in addition to how to
* set-up a learning environment and train the behavior of characters in a
* Unity scene, please browse our documentation pages on GitHub:
* https://github.com/Unity-Technologies/ml-agents/blob/master/docs/
*/
namespace MLAgents
{
///
/// Wraps the environment-level parameters that are provided within the
/// Editor. These parameters can be provided for training and inference
/// modes separately and represent screen resolution, rendering quality and
/// frame rate.
///
[System.Serializable]
public class EnvironmentConfiguration
{
[Tooltip("Width of the environment window in pixels.")]
public int width;
[Tooltip("Height of the environment window in pixels.")]
public int height;
[Tooltip("Rendering quality of environment. (Higher is better quality.)")]
[Range(0, 5)]
public int qualityLevel;
[Tooltip("Speed at which environment is run. (Higher is faster.)")]
[Range(1f, 100f)]
public float timeScale;
[Tooltip("Frames per second (FPS) engine attempts to maintain.")]
public int targetFrameRate;
/// Initializes a new instance of the
/// class.
/// Width of environment window (pixels).
/// Height of environment window (pixels).
///
/// Rendering quality of environment. Ranges from 0 to 5, with higher.
///
///
/// Speed at which environment is run. Ranges from 1 to 100, with higher
/// values representing faster speed.
///
///
/// Target frame rate (per second) that the engine tries to maintain.
///
public EnvironmentConfiguration(
int width, int height, int qualityLevel,
float timeScale, int targetFrameRate)
{
this.width = width;
this.height = height;
this.qualityLevel = qualityLevel;
this.timeScale = timeScale;
this.targetFrameRate = targetFrameRate;
}
}
///
/// An Academy is where Agent objects go to train their behaviors.
/// Currently, this class is expected to be extended to
/// implement the desired academy behavior.
///
///
/// When an academy is run, it can either be in inference or training mode.
/// The mode is determined by the presence or absence of a Communicator. In
/// the presence of a communicator, the academy is run in training mode where
/// the states and observations of each agent are sent through the
/// communicator. In the absence of a communicator, the academy is run in
/// inference mode where the agent behavior is determined by the Policy
/// attached to it.
///
[HelpURL("https://github.com/Unity-Technologies/ml-agents/blob/master/" +
"docs/Learning-Environment-Design-Academy.md")]
public abstract class Academy : MonoBehaviour
{
const string k_ApiVersion = "API-12";
/// Temporary storage for global gravity value
/// Used to restore oringal value when deriving Academy modifies it
Vector3 m_OriginalGravity;
/// Temporary storage for global fixedDeltaTime value
/// Used to restore original value when deriving Academy modifies it
float m_OriginalFixedDeltaTime;
/// Temporary storage for global maximumDeltaTime value
/// Used to restore original value when deriving Academy modifies it
float m_OriginalMaximumDeltaTime;
// Fields provided in the Inspector
[FormerlySerializedAs("trainingConfiguration")]
[SerializeField]
[Tooltip("The engine-level settings which correspond to rendering " +
"quality and engine speed during Training.")]
EnvironmentConfiguration m_TrainingConfiguration =
new EnvironmentConfiguration(80, 80, 1, 100.0f, -1);
[FormerlySerializedAs("inferenceConfiguration")]
[SerializeField]
[Tooltip("The engine-level settings which correspond to rendering " +
"quality and engine speed during Inference.")]
EnvironmentConfiguration m_InferenceConfiguration =
new EnvironmentConfiguration(1280, 720, 5, 1.0f, 60);
///
/// Contains a mapping from parameter names to float values. They are
/// used in and
/// to modify elements in the environment at reset time.
///
///
/// Default reset parameters are specified in the academy Editor, and can
/// be modified when training by passing a config
/// dictionary at reset.
///
[SerializeField]
[Tooltip("List of custom parameters that can be changed in the " +
"environment when it resets.")]
public ResetParameters resetParameters;
public CommunicatorObjects.CustomResetParametersProto customResetParameters;
// Fields not provided in the Inspector.
///
/// Returns whether or not the communicator is on.
///
///
/// true, if communicator is on, false otherwise.
///
public bool IsCommunicatorOn
{
get { return Communicator != null; }
}
/// If true, the Academy will use inference settings. This field is
/// initialized in depending on the presence
/// or absence of a communicator. Furthermore, it can be modified during
/// training via .
bool m_IsInference = true;
/// The number of episodes completed by the environment. Incremented
/// each time the environment is reset.
int m_EpisodeCount;
/// The number of steps completed within the current episode. Incremented
/// each time a step is taken in the environment. Is reset to 0 during
/// .
int m_StepCount;
/// The number of total number of steps completed during the whole simulation. Incremented
/// each time a step is taken in the environment.
int m_TotalStepCount;
/// Flag that indicates whether the inference/training mode of the
/// environment was switched by the training process. This impacts the
/// engine settings at the next environment step.
bool m_ModeSwitched;
/// Pointer to the communicator currently in use by the Academy.
public ICommunicator Communicator;
bool m_Initialized;
List m_ModelRunners = new List();
// Flag used to keep track of the first time the Academy is reset.
bool m_FirstAcademyReset;
// The Academy uses a series of events to communicate with agents
// to facilitate synchronization. More specifically, it ensure
// that all the agents performs their steps in a consistent order (i.e. no
// agent can act based on a decision before another agent has had a chance
// to request a decision).
// Signals to all the Agents at each environment step so they can use
// their Policy to decide on their next action.
public event System.Action DecideAction;
// Signals to all the listeners that the academy is being destroyed
public event System.Action DestroyAction;
// Signals to all the agents at each environment step along with the
// Academy's maxStepReached, done and stepCount values. The agents rely
// on this event to update their own values of max step reached and done
// in addition to aligning on the step count of the global episode.
public event System.Action AgentSetStatus;
// Signals to all the agents at each environment step so they can reset
// if their flag has been set to done (assuming the agent has requested a
// decision).
public event System.Action AgentResetIfDone;
// Signals to all the agents at each environment step so they can send
// their state to their Policy if they have requested a decision.
public event System.Action AgentSendState;
// Signals to all the agents at each environment step so they can act if
// they have requested a decision.
public event System.Action AgentAct;
// Signals to all the agents each time the Academy force resets.
public event System.Action AgentForceReset;
///
/// MonoBehavior function called at the very beginning of environment
/// creation. Academy uses this time to initialize internal data
/// structures, initialize the environment and check for the existence
/// of a communicator.
///
void Awake()
{
LazyInitialization();
}
public void LazyInitialization()
{
if (!m_Initialized)
{
InitializeEnvironment();
m_Initialized = true;
}
}
// Used to read Python-provided environment parameters
static int ReadArgs()
{
var args = System.Environment.GetCommandLineArgs();
var inputPort = "";
for (var i = 0; i < args.Length; i++)
{
if (args[i] == "--port")
{
inputPort = args[i + 1];
}
}
return int.Parse(inputPort);
}
///
/// Initializes the environment, configures it and initialized the Academy.
///
void InitializeEnvironment()
{
m_OriginalGravity = Physics.gravity;
m_OriginalFixedDeltaTime = Time.fixedDeltaTime;
m_OriginalMaximumDeltaTime = Time.maximumDeltaTime;
InitializeAcademy();
// Try to launch the communicator by using the arguments passed at launch
try
{
Communicator = new RpcCommunicator(
new CommunicatorInitParameters
{
port = ReadArgs()
});
}
catch
{
#if UNITY_EDITOR
Communicator = new RpcCommunicator(
new CommunicatorInitParameters
{
port = 5004
});
#endif
}
if (Communicator != null)
{
// We try to exchange the first message with Python. If this fails, it means
// no Python Process is ready to train the environment. In this case, the
//environment must use Inference.
try
{
var unityRLInitParameters = Communicator.Initialize(
new CommunicatorInitParameters
{
version = k_ApiVersion,
name = gameObject.name,
environmentResetParameters = new EnvironmentResetParameters
{
resetParameters = resetParameters,
customResetParameters = customResetParameters
}
});
Random.InitState(unityRLInitParameters.seed);
}
catch
{
Communicator = null;
}
if (Communicator != null)
{
Communicator.QuitCommandReceived += OnQuitCommandReceived;
Communicator.ResetCommandReceived += OnResetCommand;
Communicator.RLInputReceived += OnRLInputReceived;
}
}
// If a communicator is enabled/provided, then we assume we are in
// training mode. In the absence of a communicator, we assume we are
// in inference mode.
SetIsInference(!IsCommunicatorOn);
DecideAction += () => { };
DestroyAction += () => { };
AgentSetStatus += i => { };
AgentResetIfDone += () => { };
AgentSendState += () => { };
AgentAct += () => { };
AgentForceReset += () => { };
ConfigureEnvironment();
}
static void OnQuitCommandReceived()
{
#if UNITY_EDITOR
EditorApplication.isPlaying = false;
#endif
Application.Quit();
}
void OnResetCommand(EnvironmentResetParameters newResetParameters)
{
UpdateResetParameters(newResetParameters);
ForcedFullReset();
}
void OnRLInputReceived(UnityRLInputParameters inputParams)
{
m_IsInference = !inputParams.isTraining;
}
void UpdateResetParameters(EnvironmentResetParameters newResetParameters)
{
if (newResetParameters.resetParameters != null)
{
foreach (var kv in newResetParameters.resetParameters)
{
resetParameters[kv.Key] = kv.Value;
}
}
customResetParameters = newResetParameters.customResetParameters;
}
///
/// Configures the environment settings depending on the training/inference
/// mode and the corresponding parameters passed in the Editor.
///
void ConfigureEnvironment()
{
if (m_IsInference)
{
ConfigureEnvironmentHelper(m_InferenceConfiguration);
Monitor.SetActive(true);
}
else
{
ConfigureEnvironmentHelper(m_TrainingConfiguration);
Monitor.SetActive(false);
}
}
///
/// Helper method for initializing the environment based on the provided
/// configuration.
///
///
/// Environment configuration (specified in the Editor).
///
static void ConfigureEnvironmentHelper(EnvironmentConfiguration config)
{
Screen.SetResolution(config.width, config.height, false);
QualitySettings.SetQualityLevel(config.qualityLevel, true);
Time.timeScale = config.timeScale;
Time.captureFramerate = 60;
Application.targetFrameRate = config.targetFrameRate;
}
///
/// Initializes the academy and environment. Called during the waking-up
/// phase of the environment before any of the scene objects/agents have
/// been initialized.
///
public virtual void InitializeAcademy()
{
}
///
/// Specifies the academy behavior at every step of the environment.
///
public virtual void AcademyStep()
{
}
///
/// Specifies the academy behavior when being reset (i.e. at the completion
/// of a global episode).
///
public virtual void AcademyReset()
{
}
///
/// Returns the flag.
///
///
/// true, if current mode is inference, false if training.
///
public bool GetIsInference()
{
return m_IsInference;
}
///
/// Sets the flag to the provided value. If
/// the new flag differs from the current flag value, this signals that
/// the environment configuration needs to be updated.
///
///
/// Environment mode, if true then inference, otherwise training.
///
public void SetIsInference(bool isInference)
{
if (m_IsInference != isInference)
{
m_IsInference = isInference;
// This signals to the academy that at the next environment step
// the engine configurations need updating to the respective mode
// (i.e. training vs inference) configuration.
m_ModeSwitched = true;
}
}
///
/// Returns the current episode counter.
///
///
/// Current episode number.
///
public int GetEpisodeCount()
{
return m_EpisodeCount;
}
///
/// Returns the current step counter (within the current episode).
///
///
/// Current step count.
///
public int GetStepCount()
{
return m_StepCount;
}
///
/// Returns the total step counter.
///
///
/// Total step count.
///
public int GetTotalStepCount()
{
return m_TotalStepCount;
}
///
/// Forces the full reset. The done flags are not affected. Is either
/// called the first reset at inference and every external reset
/// at training.
///
void ForcedFullReset()
{
EnvironmentReset();
AgentForceReset?.Invoke();
m_FirstAcademyReset = true;
}
///
/// Performs a single environment update to the Academy, and Agent
/// objects within the environment.
///
void EnvironmentStep()
{
if (m_ModeSwitched)
{
ConfigureEnvironment();
m_ModeSwitched = false;
}
if (!m_FirstAcademyReset)
{
ForcedFullReset();
}
AgentSetStatus?.Invoke(m_StepCount);
using (TimerStack.Instance.Scoped("AgentResetIfDone"))
{
AgentResetIfDone?.Invoke();
}
using (TimerStack.Instance.Scoped("AgentSendState"))
{
AgentSendState?.Invoke();
}
using (TimerStack.Instance.Scoped("DecideAction"))
{
DecideAction?.Invoke();
}
using (TimerStack.Instance.Scoped("AcademyStep"))
{
AcademyStep();
}
using (TimerStack.Instance.Scoped("AgentAct"))
{
AgentAct?.Invoke();
}
m_StepCount += 1;
m_TotalStepCount += 1;
}
///
/// Resets the environment, including the Academy.
///
void EnvironmentReset()
{
m_StepCount = 0;
m_EpisodeCount++;
AcademyReset();
}
///
/// MonoBehaviour function that dictates each environment step.
///
void FixedUpdate()
{
EnvironmentStep();
}
///
/// Creates or retrieves an existing ModelRunner that uses the same
/// NNModel and the InferenceDevice as provided.
///
/// The NNModel the ModelRunner must use
/// The brainParameters used to create
/// the ModelRunner
/// The inference device (CPU or GPU)
/// the ModelRunner will use
/// The ModelRunner compatible with the input settings
public ModelRunner GetOrCreateModelRunner(
NNModel model, BrainParameters brainParameters, InferenceDevice inferenceDevice)
{
var modelRunner = m_ModelRunners.Find(x => x.HasModel(model, inferenceDevice));
if (modelRunner == null)
{
modelRunner = new ModelRunner(
model, brainParameters, inferenceDevice);
m_ModelRunners.Add(modelRunner);
}
return modelRunner;
}
///
/// Cleanup function
///
protected virtual void OnDestroy()
{
Physics.gravity = m_OriginalGravity;
Time.fixedDeltaTime = m_OriginalFixedDeltaTime;
Time.maximumDeltaTime = m_OriginalMaximumDeltaTime;
// Signal to listeners that the academy is being destroyed now
DestroyAction?.Invoke();
foreach (var mr in m_ModelRunners)
{
mr.Dispose();
}
// TODO - Pass worker ID or some other identifier,
// so that multiple envs won't overwrite each others stats.
TimerStack.Instance.SaveJsonTimers();
}
}
}