using System; using UnityEngine; using System.Collections.Generic; #if UNITY_EDITOR using UnityEditor; #endif using Unity.MLAgents.Actuators; using Unity.MLAgents.Inference; using Unity.MLAgents.Policies; using Unity.MLAgents.SideChannels; using Unity.Barracuda; /** * Welcome to Unity Machine Learning Agents (ML-Agents). * * The ML-Agents toolkit contains four entities: Academy, Agent, Communicator and * Python API. The academy and connected agents live within * a learning environment (herein called Environment), while the communicator * manages the communication between the learning environment and the Python * API. For more information on each of these entities, in addition to how to * set-up a learning environment and train the behavior of characters in a * Unity scene, please browse our documentation pages on GitHub: * https://github.com/Unity-Technologies/ml-agents/tree/release_13_docs/docs/ */ namespace Unity.MLAgents { ///

/// Helper class to step the Academy during FixedUpdate phase. ///

internal class AcademyFixedUpdateStepper : MonoBehaviour { void FixedUpdate() { // Check if the stepper belongs to the current Academy and destroy it if it's not. // This is to prevent from having leaked stepper from previous runs. if (!Academy.IsInitialized || !Academy.Instance.IsStepperOwner(this)) { Destroy(this.gameObject); } else { Academy.Instance.EnvironmentStep(); } } } ///

/// The Academy singleton manages agent training and decision making. ///

/// /// Access the Academy singleton through the /// property. The Academy instance is initialized the first time it is accessed (which will /// typically be by the first initialized in a scene). /// /// At initialization, the Academy attempts to connect to the Python training process through /// the external communicator. If successful, the training process can train /// instances. When you set an agent's setting /// to , the agent exchanges data with the training process /// to make decisions. If no training process is available, agents with the default behavior /// fall back to inference or heuristic decisions. (You can also set agents to always use /// inference or heuristics.) /// [HelpURL("https://github.com/Unity-Technologies/ml-agents/tree/release_13_docs/" + "docs/Learning-Environment-Design.md")] public class Academy : IDisposable { ///

/// Communication protocol version. /// When connecting to python, this must be compatible with UnityEnvironment.API_VERSION. /// We follow semantic versioning on the communication version, so existing /// functionality will work as long the major versions match. /// This should be changed whenever a change is made to the communication protocol. ///

/// /// History: /// /// /// 1.0.0 /// Initial version /// /// /// 1.1.0 /// Support concatenated PNGs for compressed observations. /// /// /// 1.2.0 /// Support compression mapping for stacked compressed observations. /// /// /// 1.3.0 /// Support both continuous and discrete actions. /// /// /// 1.4.0 /// Support training analytics sent from python trainer to the editor. /// /// /// 1.5.0 /// Support variable length observation training. /// /// /// const string k_ApiVersion = "1.5.0"; ///

/// Unity package version of com.unity.ml-agents. /// This must match the version string in package.json and is checked in a unit test. ///

internal const string k_PackageVersion = "1.8.0-preview"; const int k_EditorTrainingPort = 5004; const string k_PortCommandLineFlag = "--mlagents-port"; // Lazy initializer pattern, see https://csharpindepth.com/articles/singleton#lazy static Lazy s_Lazy = new Lazy(() => new Academy()); ///

///Reports whether the Academy has been initialized yet. ///

/// True if the Academy is initialized, false otherwise. public static bool IsInitialized { get { return s_Lazy.IsValueCreated; } } ///

/// The singleton Academy object. ///

/// Getting the instance initializes the Academy, if necessary. public static Academy Instance { get { return s_Lazy.Value; } } // Fields not provided in the Inspector. ///

/// Reports whether or not the communicator is on. ///

/// /// /// True, if communicator is on, false otherwise. /// public bool IsCommunicatorOn { get { return Communicator != null; } } /// The number of episodes completed by the environment. Incremented /// each time the environment is reset. int m_EpisodeCount; /// The number of steps completed within the current episode. Incremented /// each time a step is taken in the environment. Is reset to 0 during /// . int m_StepCount; /// The number of total number of steps completed during the whole simulation. Incremented /// each time a step is taken in the environment. int m_TotalStepCount; /// Pointer to the communicator currently in use by the Academy. internal ICommunicator Communicator; bool m_Initialized; List m_ModelRunners = new List(); // Flag used to keep track of the first time the Academy is reset. bool m_HadFirstReset; // Detect an Academy step called by user code that is also called by the Academy. private RecursionChecker m_StepRecursionChecker = new RecursionChecker("EnvironmentStep"); // Random seed used for inference. int m_InferenceSeed; ///

/// Set the random seed used for inference. This should be set before any Agents are added /// to the scene. The seed is passed to the ModelRunner constructor, and incremented each /// time a new ModelRunner is created. ///

public int InferenceSeed { set { m_InferenceSeed = value; } } ///

/// Returns the RLCapabilities of the python client that the unity process is connected to. ///

internal UnityRLCapabilities TrainerCapabilities { get; set; } // The Academy uses a series of events to communicate with agents // to facilitate synchronization. More specifically, it ensures // that all the agents perform their steps in a consistent order (i.e. no // agent can act based on a decision before another agent has had a chance // to request a decision). // Signals to all the Agents at each environment step so they can use // their Policy to decide on their next action. internal event Action DecideAction; // Signals to all the listeners that the academy is being destroyed internal event Action DestroyAction; // Signals to the Agent that a new step is about to start. // This will mark the Agent as Done if it has reached its maxSteps. internal event Action AgentIncrementStep; ///

/// Signals to all of the s that their step is about to begin. /// This is a good time for an to decide if it would like to /// call or /// for this step. Any other pre-step setup could be done during this even as well. ///

public event Action AgentPreStep; // Signals to all the agents at each environment step so they can send // their state to their Policy if they have requested a decision. internal event Action AgentSendState; // Signals to all the agents at each environment step so they can act if // they have requested a decision. internal event Action AgentAct; // Signals to all the agents each time the Academy force resets. internal event Action AgentForceReset; ///

/// Signals that the Academy has been reset by the training process. ///

public event Action OnEnvironmentReset; AcademyFixedUpdateStepper m_FixedUpdateStepper; GameObject m_StepperObject; ///

/// Private constructor called the first time the Academy is used. /// Academy uses this time to initialize internal data /// structures, initialize the environment and check for the existence /// of a communicator. ///

Academy() { Application.quitting += Dispose; LazyInitialize(); #if UNITY_EDITOR EditorApplication.playModeStateChanged += HandleOnPlayModeChanged; #endif } #if UNITY_EDITOR ///

/// Clean up the Academy when switching from edit mode to play mode ///

/// State. void HandleOnPlayModeChanged(PlayModeStateChange state) { if (state == PlayModeStateChange.ExitingEditMode) { Dispose(); } } #endif ///

/// Initialize the Academy if it hasn't already been initialized. /// This method is always safe to call; it will have no effect if the Academy is already /// initialized. ///

internal void LazyInitialize() { if (!m_Initialized) { InitializeEnvironment(); m_Initialized = true; } } ///

/// Enable stepping of the Academy during the FixedUpdate phase. This is done by creating /// a temporary GameObject with a MonoBehaviour that calls Academy.EnvironmentStep(). ///

void EnableAutomaticStepping() { if (m_FixedUpdateStepper != null) { return; } m_StepperObject = new GameObject("AcademyFixedUpdateStepper"); // Don't show this object in the hierarchy m_StepperObject.hideFlags = HideFlags.HideInHierarchy; m_FixedUpdateStepper = m_StepperObject.AddComponent(); try { // This try-catch is because DontDestroyOnLoad cannot be used in Editor Tests GameObject.DontDestroyOnLoad(m_StepperObject); } catch { } } ///

/// Disable stepping of the Academy during the FixedUpdate phase. If this is called, the Academy must be /// stepped manually by the user by calling Academy.EnvironmentStep(). ///

void DisableAutomaticStepping() { if (m_FixedUpdateStepper == null) { return; } m_FixedUpdateStepper = null; if (Application.isEditor) { UnityEngine.Object.DestroyImmediate(m_StepperObject); } else { UnityEngine.Object.Destroy(m_StepperObject); } m_StepperObject = null; } ///

/// Determines whether or not the Academy is automatically stepped during the FixedUpdate phase. ///

/// Set true to enable automatic stepping; false to disable. public bool AutomaticSteppingEnabled { get { return m_FixedUpdateStepper != null; } set { if (value) { EnableAutomaticStepping(); } else { DisableAutomaticStepping(); } } } // Used to read Python-provided environment parameters static int ReadPortFromArgs() { var args = Environment.GetCommandLineArgs(); var inputPort = ""; for (var i = 0; i < args.Length; i++) { if (args[i] == k_PortCommandLineFlag) { inputPort = args[i + 1]; } } try { return int.Parse(inputPort); } catch { // No arg passed, or malformed port number. #if UNITY_EDITOR // Try connecting on the default editor port return k_EditorTrainingPort; #else // This is an executable, so we don't try to connect. return -1; #endif } } EnvironmentParameters m_EnvironmentParameters; StatsRecorder m_StatsRecorder; ///

/// Returns the instance. If training /// features such as Curriculum Learning or Environment Parameter Randomization are used, /// then the values of the parameters generated from the training process can be /// retrieved here. ///

/// public EnvironmentParameters EnvironmentParameters { get { return m_EnvironmentParameters; } } ///

/// Returns the instance. This instance can be used /// to record any statistics from the Unity environment. ///

/// public StatsRecorder StatsRecorder { get { return m_StatsRecorder; } } ///

/// Initializes the environment, configures it and initializes the Academy. ///

void InitializeEnvironment() { TimerStack.Instance.AddMetadata("communication_protocol_version", k_ApiVersion); TimerStack.Instance.AddMetadata("com.unity.ml-agents_version", k_PackageVersion); EnableAutomaticStepping(); SideChannelManager.RegisterSideChannel(new EngineConfigurationChannel()); SideChannelManager.RegisterSideChannel(new TrainingAnalyticsSideChannel()); m_EnvironmentParameters = new EnvironmentParameters(); m_StatsRecorder = new StatsRecorder(); // Try to launch the communicator by using the arguments passed at launch var port = ReadPortFromArgs(); if (port > 0) { Communicator = CommunicatorFactory.Create(); } if (Communicator != null) { // We try to exchange the first message with Python. If this fails, it means // no Python Process is ready to train the environment. In this case, the // environment must use Inference. bool initSuccessful = false; var communicatorInitParams = new CommunicatorInitParameters { port = port, unityCommunicationVersion = k_ApiVersion, unityPackageVersion = k_PackageVersion, name = "AcademySingleton", CSharpCapabilities = new UnityRLCapabilities() }; try { initSuccessful = Communicator.Initialize( communicatorInitParams, out var unityRlInitParameters ); if (initSuccessful) { UnityEngine.Random.InitState(unityRlInitParameters.seed); // We might have inference-only Agents, so set the seed for them too. m_InferenceSeed = unityRlInitParameters.seed; TrainerCapabilities = unityRlInitParameters.TrainerCapabilities; TrainerCapabilities.WarnOnPythonMissingBaseRLCapabilities(); } else { Debug.Log($"Couldn't connect to trainer on port {port} using API version {k_ApiVersion}. Will perform inference instead."); Communicator = null; } } catch (Exception ex) { Debug.Log($"Unexpected exception when trying to initialize communication: {ex}\nWill perform inference instead."); Communicator = null; } } if (Communicator != null) { Communicator.QuitCommandReceived += OnQuitCommandReceived; Communicator.ResetCommandReceived += OnResetCommand; } // If a communicator is enabled/provided, then we assume we are in // training mode. In the absence of a communicator, we assume we are // in inference mode. ResetActions(); } void ResetActions() { DecideAction = () => { }; DestroyAction = () => { }; AgentPreStep = i => { }; AgentSendState = () => { }; AgentAct = () => { }; AgentForceReset = () => { }; OnEnvironmentReset = () => { }; } static void OnQuitCommandReceived() { #if UNITY_EDITOR EditorApplication.isPlaying = false; #endif Application.Quit(); } void OnResetCommand() { ForcedFullReset(); } ///

/// The current episode count. ///

/// /// Current episode number. /// public int EpisodeCount { get { return m_EpisodeCount; } } ///

/// The current step count (within the current episode). ///

/// /// Current step count. /// public int StepCount { get { return m_StepCount; } } ///

/// Returns the total step count. ///

/// /// Total step count. /// public int TotalStepCount { get { return m_TotalStepCount; } } ///

/// Forces the full reset. The done flags are not affected. Is either /// called the first reset at inference and every external reset /// at training. ///

void ForcedFullReset() { EnvironmentReset(); AgentForceReset?.Invoke(); m_HadFirstReset = true; } ///

/// Performs a single environment update of the Academy and Agent /// objects within the environment. ///

public void EnvironmentStep() { using (m_StepRecursionChecker.Start()) { if (!m_HadFirstReset) { ForcedFullReset(); } AgentPreStep?.Invoke(m_StepCount); m_StepCount += 1; m_TotalStepCount += 1; AgentIncrementStep?.Invoke(); using (TimerStack.Instance.Scoped("AgentSendState")) { AgentSendState?.Invoke(); } using (TimerStack.Instance.Scoped("DecideAction")) { DecideAction?.Invoke(); } // If the communicator is not on, we need to clear the SideChannel sending queue if (!IsCommunicatorOn) { SideChannelManager.GetSideChannelMessage(); } using (TimerStack.Instance.Scoped("AgentAct")) { AgentAct?.Invoke(); } } } ///

/// Resets the environment, including the Academy. ///

void EnvironmentReset() { m_StepCount = 0; m_EpisodeCount++; OnEnvironmentReset?.Invoke(); } ///

/// Creates or retrieves an existing ModelRunner that uses the same /// NNModel and the InferenceDevice as provided. ///

/// The NNModel the ModelRunner must use. /// Description of the actions for the Agent. /// /// The inference device (CPU or GPU) the ModelRunner will use. /// /// The ModelRunner compatible with the input settings. internal ModelRunner GetOrCreateModelRunner( NNModel model, ActionSpec actionSpec, InferenceDevice inferenceDevice) { var modelRunner = m_ModelRunners.Find(x => x.HasModel(model, inferenceDevice)); if (modelRunner == null) { modelRunner = new ModelRunner(model, actionSpec, inferenceDevice, m_InferenceSeed); m_ModelRunners.Add(modelRunner); m_InferenceSeed++; } return modelRunner; } ///

/// Shut down the Academy. ///

public void Dispose() { DisableAutomaticStepping(); // Signal to listeners that the academy is being destroyed now DestroyAction?.Invoke(); Communicator?.Dispose(); Communicator = null; m_EnvironmentParameters.Dispose(); m_StatsRecorder.Dispose(); SideChannelManager.UnregisterAllSideChannels(); // unregister custom side channels if (m_ModelRunners != null) { foreach (var mr in m_ModelRunners) { mr.Dispose(); } m_ModelRunners = null; } // Clear out the actions so we're not keeping references to any old objects ResetActions(); // TODO - Pass worker ID or some other identifier, // so that multiple envs won't overwrite each others stats. TimerStack.Instance.SaveJsonTimers(); m_Initialized = false; // Reset the Lazy instance s_Lazy = new Lazy(() => new Academy()); } ///

/// Check if the input AcademyFixedUpdateStepper belongs to this Academy. ///

internal bool IsStepperOwner(AcademyFixedUpdateStepper stepper) { return GameObject.ReferenceEquals(stepper.gameObject, Academy.Instance.m_StepperObject); } } }