using System.Collections.Generic; using UnityEngine; using System.IO; using System.Linq; #if UNITY_EDITOR using UnityEditor; #endif /** * Welcome to Unity Machine Learning Agents (ML-Agents). * * The ML-Agents toolkit contains five entities: Academy, Brain, Agent, Communicator and * Python API. The academy, and all its brains and connected agents live within * a learning environment (herin called Environment), while the communicator * manages the communication between the learning environment and the Python * API. For more information on each of these entities, in addition to how to * set-up a learning environment and train the behavior of characters in a * Unity scene, please browse our documentation pages on GitHub: * https://github.com/Unity-Technologies/ml-agents/blob/master/docs/ */ namespace MLAgents { /// /// Wraps the environment-level parameters that are provided within the /// Editor. These parameters can be provided for training and inference /// modes separately and represent screen resolution, rendering quality and /// frame rate. /// [System.Serializable] public class EnvironmentConfiguration { [Tooltip("Width of the environment window in pixels.")] public int width; [Tooltip("Height of the environment window in pixels.")] public int height; [Tooltip("Rendering quality of environment. (Higher is better quality.)")] [Range(0, 5)] public int qualityLevel; [Tooltip("Speed at which environment is run. (Higher is faster.)")] [Range(1f, 100f)] public float timeScale; [Tooltip("Frames per second (FPS) engine attempts to maintain.")] public int targetFrameRate; /// Initializes a new instance of the /// class. /// Width of environment window (pixels). /// Height of environment window (pixels). /// /// Rendering quality of environment. Ranges from 0 to 5, with higher. /// /// /// Speed at which environment is run. Ranges from 1 to 100, with higher /// values representing faster speed. /// /// /// Target frame rate (per second) that the engine tries to maintain. /// public EnvironmentConfiguration( int width, int height, int qualityLevel, float timeScale, int targetFrameRate) { this.width = width; this.height = height; this.qualityLevel = qualityLevel; this.timeScale = timeScale; this.targetFrameRate = targetFrameRate; } } /// /// An Academy is where Agent objects go to train their behaviors. More /// specifically, an academy is a collection of Brain objects and each agent /// in a scene is attached to one brain (a single brain may be attached to /// multiple agents). Currently, this class is expected to be extended to /// implement the desired academy behavior. /// /// /// When an academy is run, it can either be in inference or training mode. /// The mode is determined by the presence or absence of a Communicator. In /// the presence of a communicator, the academy is run in training mode where /// the states and observations of each agent are sent through the /// communicator. In the absence of a communciator, the academy is run in /// inference mode where the agent behavior is determined by the brain /// attached to it (which may be internal, heuristic or player). /// [HelpURL("https://github.com/Unity-Technologies/ml-agents/blob/master/" + "docs/Learning-Environment-Design-Academy.md")] public abstract class Academy : MonoBehaviour { [SerializeField] public BroadcastHub broadcastHub = new BroadcastHub(); private const string kApiVersion = "API-9"; /// Temporary storage for global gravity value /// Used to restore oringal value when deriving Academy modifies it private Vector3 originalGravity; /// Temporary storage for global fixedDeltaTime value /// Used to restore oringal value when deriving Academy modifies it private float originalFixedDeltaTime; /// Temporary storage for global maximumDeltaTime value /// Used to restore oringal value when deriving Academy modifies it private float originalMaximumDeltaTime; // Fields provided in the Inspector [SerializeField] [Tooltip("Total number of steps per global episode.\nNon-positive " + "values correspond to episodes without a maximum number of \n" + "steps. Once the step counter reaches this maximum value, the " + "environment will reset.")] int maxSteps; [SerializeField] [Tooltip("The engine-level settings which correspond to rendering " + "quality and engine speed during Training.")] EnvironmentConfiguration trainingConfiguration = new EnvironmentConfiguration(80, 80, 1, 100.0f, -1); [SerializeField] [Tooltip("The engine-level settings which correspond to rendering " + "quality and engine speed during Inference.")] EnvironmentConfiguration inferenceConfiguration = new EnvironmentConfiguration(1280, 720, 5, 1.0f, 60); /// /// Contains a mapping from parameter names to float values. They are /// used in and /// to modify elements in the environment at reset time. /// /// /// Default reset parameters are specified in the academy Editor, and can /// be modified when training with an external Brain by passinga config /// dictionary at reset. /// [SerializeField] [Tooltip("List of custom parameters that can be changed in the " + "environment when it resets.")] public ResetParameters resetParameters; public CommunicatorObjects.CustomResetParameters customResetParameters; // Fields not provided in the Inspector. /// Boolean flag indicating whether a communicator is accessible by the /// environment. This also specifies whether the environment is in /// Training or Inference mode. bool isCommunicatorOn; /// Keeps track of the id of the last communicator message received. /// Remains 0 if there are no communicators. Is used to ensure that /// the same message is not used multiple times. private ulong lastCommunicatorMessageNumber; /// If true, the Academy will use inference settings. This field is /// initialized in depending on the presence /// or absence of a communicator. Furthermore, it can be modified by an /// external Brain during reset via . bool isInference = true; /// The done flag of the academy. When set to true, the academy will /// call instead of /// at step time. If true, all agents done flags will be set to true. bool done; /// Whether the academy has reached the maximum number of steps for the /// current episode. bool maxStepReached; /// The number of episodes completed by the environment. Incremented /// each time the environment is reset. int episodeCount; /// The number of steps completed within the current episide. Incremented /// each time a step is taken in the environment. Is reset to 0 during /// . int stepCount; /// The number of total number of steps completed during the whole simulation. Incremented /// each time a step is taken in the environment. int totalStepCount; /// Flag that indicates whether the inference/training mode of the /// environment was switched by the external Brain. This impacts the /// engine settings at the next environment step. bool modeSwitched; /// Pointer to the batcher currently in use by the Academy. MLAgents.Batcher brainBatcher; /// Used to write error messages. StreamWriter logWriter; /// The path to where the log should be written. string logPath; // Flag used to keep track of the first time the Academy is reset. bool firstAcademyReset; // The Academy uses a series of events to communicate with agents and // brains to facilitate synchronization. More specifically, it ensure // that all the agents performs their steps in a consistent order (i.e. no // agent can act based on a decision before another agent has had a chance // to request a decision). // Signals to all the Brains at each environment step so they can decide // actions for their agents. public event System.Action BrainDecideAction; // Signals to all the listeners that the academy is being destroyed public event System.Action DestroyAction; // Signals to all the agents at each environment step along with the // Academy's maxStepReached, done and stepCount values. The agents rely // on this event to update their own values of max step reached and done // in addition to aligning on the step count of the global episode. public event System.Action AgentSetStatus; // Signals to all the agents at each environment step so they can reset // if their flag has been set to done (assuming the agent has requested a // decision). public event System.Action AgentResetIfDone; // Signals to all the agents at each environment step so they can send // their state to their Brain if they have requested a decision. public event System.Action AgentSendState; // Signals to all the agents at each environment step so they can act if // they have requested a decision. public event System.Action AgentAct; // Sigals to all the agents each time the Academy force resets. public event System.Action AgentForceReset; /// /// Monobehavior function called at the very beginning of environment /// creation. Academy uses this time to initialize internal data /// structures, initialize the environment and check for the existence /// of a communicator. /// void Awake() { InitializeEnvironment(); } // Used to read Python-provided environment parameters private int ReadArgs() { var args = System.Environment.GetCommandLineArgs(); var inputPort = ""; for (var i = 0; i < args.Length; i++) { if (args[i] == "--port") { inputPort = args[i + 1]; } } return int.Parse(inputPort); } /// /// Initializes the environment, configures it and initialized the Academy. /// private void InitializeEnvironment() { originalGravity = Physics.gravity; originalFixedDeltaTime = Time.fixedDeltaTime; originalMaximumDeltaTime = Time.maximumDeltaTime; InitializeAcademy(); Communicator communicator = null; var exposedBrains = broadcastHub.broadcastingBrains.Where(x => x != null).ToList();; var controlledBrains = broadcastHub.broadcastingBrains.Where( x => x != null && x is LearningBrain && broadcastHub.IsControlled(x)); foreach (LearningBrain brain in controlledBrains) { brain.SetToControlledExternally(); } // Try to launch the communicator by usig the arguments passed at launch try { communicator = new RPCCommunicator( new CommunicatorParameters { port = ReadArgs() }); } // If it fails, we check if there are any external brains in the scene // If there are : Launch the communicator on the default port // If there arn't, there is no need for a communicator and it is set // to null catch { communicator = null; if (controlledBrains.ToList().Count > 0) { communicator = new RPCCommunicator( new CommunicatorParameters { port = 5005 }); } } brainBatcher = new Batcher(communicator); foreach (var trainingBrain in exposedBrains) { trainingBrain.SetBatcher(brainBatcher); } if (communicator != null) { isCommunicatorOn = true; var academyParameters = new CommunicatorObjects.UnityRLInitializationOutput(); academyParameters.Name = gameObject.name; academyParameters.Version = kApiVersion; foreach (var brain in exposedBrains) { var bp = brain.brainParameters; academyParameters.BrainParameters.Add( bp.ToProto(brain.name, broadcastHub.IsControlled(brain))); } academyParameters.EnvironmentParameters = new CommunicatorObjects.EnvironmentParametersProto(); foreach (var key in resetParameters.Keys) { academyParameters.EnvironmentParameters.FloatParameters.Add( key, resetParameters[key] ); } var pythonParameters = brainBatcher.SendAcademyParameters(academyParameters); Random.InitState(pythonParameters.Seed); Application.logMessageReceived += HandleLog; logPath = Path.GetFullPath(".") + "/UnitySDK.log"; using (var fs = File.Open(logPath, FileMode.Append, FileAccess.Write, FileShare.ReadWrite)) { logWriter = new StreamWriter(fs); logWriter.WriteLine(System.DateTime.Now.ToString()); logWriter.WriteLine(" "); logWriter.Close(); } } // If a communicator is enabled/provided, then we assume we are in // training mode. In the absence of a communicator, we assume we are // in inference mode. isInference = !isCommunicatorOn; BrainDecideAction += () => { }; DestroyAction += () => { }; AgentSetStatus += (m, d, i) => { }; AgentResetIfDone += () => { }; AgentSendState += () => { }; AgentAct += () => { }; AgentForceReset += () => { }; // Configure the environment using the configurations provided by // the developer in the Editor. SetIsInference(!brainBatcher.GetIsTraining()); ConfigureEnvironment(); } private void UpdateResetParameters() { var newResetParameters = brainBatcher.GetEnvironmentParameters(); if (newResetParameters != null) { foreach (var kv in newResetParameters.FloatParameters) { resetParameters[kv.Key] = kv.Value; } customResetParameters = newResetParameters.CustomResetParameters; } } void HandleLog(string logString, string stackTrace, LogType type) { using (var fs = File.Open(logPath, FileMode.Append, FileAccess.Write, FileShare.ReadWrite)) { logWriter = new StreamWriter(fs); logWriter.WriteLine(type.ToString()); logWriter.WriteLine(logString); logWriter.WriteLine(stackTrace); logWriter.Close(); } } /// /// Configures the environment settings depending on the training/inference /// mode and the corresponding parameters passed in the Editor. /// void ConfigureEnvironment() { if (isInference) { ConfigureEnvironmentHelper(inferenceConfiguration); Monitor.SetActive(true); } else { ConfigureEnvironmentHelper(trainingConfiguration); Monitor.SetActive(false); } } /// /// Helper method for initializing the environment based on the provided /// configuration. /// /// /// Environment configuration (specified in the Editor). /// static void ConfigureEnvironmentHelper(EnvironmentConfiguration config) { Screen.SetResolution(config.width, config.height, false); QualitySettings.SetQualityLevel(config.qualityLevel, true); Time.timeScale = config.timeScale; Time.captureFramerate = 60; Application.targetFrameRate = config.targetFrameRate; } /// /// Initializes the academy and environment. Called during the waking-up /// phase of the environment before any of the scene objects/agents have /// been initialized. /// public virtual void InitializeAcademy() { } /// /// Specifies the academy behavior at every step of the environment. /// public virtual void AcademyStep() { } /// /// Specifies the academy behavior when being reset (i.e. at the completion /// of a global episode). /// public virtual void AcademyReset() { } /// /// Returns the flag. /// /// /// true, if current mode is inference, false if training. /// public bool GetIsInference() { return isInference; } /// /// Sets the flag to the provided value. If /// the new flag differs from the current flag value, this signals that /// the environment configuration needs to be updated. /// /// /// Environment mode, if true then inference, otherwise training. /// public void SetIsInference(bool isInference) { if (this.isInference != isInference) { this.isInference = isInference; // This signals to the academy that at the next environment step // the engine configurations need updating to the respective mode // (i.e. training vs inference) configuraiton. modeSwitched = true; } } /// /// Returns the current episode counter. /// /// /// Current episode number. /// public int GetEpisodeCount() { return episodeCount; } /// /// Returns the current step counter (within the current episode). /// /// /// Current step count. /// public int GetStepCount() { return stepCount; } /// /// Returns the total step counter. /// /// /// Total step count. /// public int GetTotalStepCount() { return totalStepCount; } /// /// Sets the done flag to true. /// public void Done() { done = true; } /// /// Returns whether or not the academy is done. /// /// /// true, if academy is done, false otherwise. /// public bool IsDone() { return done; } /// /// Returns whether or not the communicator is on. /// /// /// true, if communicator is on, false otherwise. /// public bool IsCommunicatorOn() { return isCommunicatorOn; } /// /// Forces the full reset. The done flags are not affected. Is either /// called the first reset at inference and every external reset /// at training. /// void ForcedFullReset() { EnvironmentReset(); AgentForceReset(); firstAcademyReset = true; } /// /// Performs a single environment update to the Academy, Brain and Agent /// objects within the environment. /// void EnvironmentStep() { if (modeSwitched) { ConfigureEnvironment(); modeSwitched = false; } if ((isCommunicatorOn) && (lastCommunicatorMessageNumber != brainBatcher.GetNumberMessageReceived())) { lastCommunicatorMessageNumber = brainBatcher.GetNumberMessageReceived(); if (brainBatcher.GetCommand() == CommunicatorObjects.CommandProto.Reset) { UpdateResetParameters(); SetIsInference(!brainBatcher.GetIsTraining()); ForcedFullReset(); } if (brainBatcher.GetCommand() == CommunicatorObjects.CommandProto.Quit) { #if UNITY_EDITOR EditorApplication.isPlaying = false; #endif Application.Quit(); return; } } else if (!firstAcademyReset) { UpdateResetParameters(); ForcedFullReset(); } if ((stepCount >= maxSteps) && maxSteps > 0) { maxStepReached = true; Done(); } AgentSetStatus(maxStepReached, done, stepCount); brainBatcher.RegisterAcademyDoneFlag(done); if (done) { EnvironmentReset(); } AgentResetIfDone(); AgentSendState(); BrainDecideAction(); AcademyStep(); AgentAct(); stepCount += 1; totalStepCount += 1; } /// /// Resets the environment, including the Academy. /// void EnvironmentReset() { stepCount = 0; episodeCount++; done = false; maxStepReached = false; AcademyReset(); } /// /// Monobehavior function that dictates each environment step. /// void FixedUpdate() { EnvironmentStep(); } /// /// Cleanup function /// protected virtual void OnDestroy() { Physics.gravity = originalGravity; Time.fixedDeltaTime = originalFixedDeltaTime; Time.maximumDeltaTime = originalMaximumDeltaTime; // Signal to listeners that the academy is being destroyed now DestroyAction(); } } }