using System.Collections ;
using System.Collections.Generic ;
using System.Collections.Generic ;
* Welcome to Unity Machine Learning Agents documentation .
* Welcome to Unity Machine Learning Agents ( ML - Agents ) .
*
* ML - Agents contains five entities : Academy , Brain , Agent , Communicator and
* Python API . The academy , and all its brains and connected agents live within
* a learning environment ( herin called Environment ) , while the communicator
* manages the communication between the learning environment and the Python
* API . For more information on each of these entities , in addition to how to
* set - up a learning environment and train the behavior of characters in a
* Unity scene , please browse our documentation pages on GitHub :
* https : //github.com/Unity-Technologies/ml-agents/blob/master/docs/
/// <summary>
/// Wraps the environment-level parameters that are provided within the
/// Editor. These parameters can be provided for training and inference
/// modes separately and represent screen resolution, rendering quality and
/// frame rate.
/// </summary>
public class ScreenConfiguration
public class EnvironmentConfiguration
[Tooltip("Height of the environment window in pixels")]
[Tooltip("Height of the environment window in pixels.")]
[Tooltip("Rendering quality of environment. (Higher is better quality)")]
[Tooltip("Rendering quality of environment. (Higher is better quality.)")]
[Tooltip("Speed at which environment is run. (Higher is faster)")]
[Tooltip("Speed at which environment is run. (Higher is faster.)")]
[Tooltip("FPS engine attempts to maintain.")]
[Tooltip("Frames per second (FPS) engine attempts to maintain.")]
public ScreenConfiguration ( int w , int h , int q , float ts , int tf )
/// Initializes a new instance of the
/// <see cref="EnvironmentConfiguration"/> class.
/// <param name="width">Width of environment window (pixels).</param>
/// <param name="height">Height of environment window (pixels).</param>
/// <param name="qualityLevel">
/// Rendering quality of environment. Ranges from 0 to 5, with higher.
/// </param>
/// <param name="timeScale">
/// Speed at which environment is run. Ranges from 1 to 100, with higher
/// values representing faster speed.
/// </param>
/// <param name="targetFrameRate">
/// Target frame rate (per second) that the engine tries to maintain.
/// </param>
public EnvironmentConfiguration (
int width , int height , int qualityLevel ,
float timeScale , int targetFrameRate )
width = w ;
height = h ;
qualityLevel = q ;
timeScale = ts ;
targetFrameRate = tf ;
this . width = width ;
this . height = height ;
this . qualityLevel = qualityLevel ;
this . timeScale = timeScale ;
this . targetFrameRate = targetFrameRate ;
[HelpURL("https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Agents-Editor-Interface.md#academy")]
/ * * Create a child class to implement InitializeAcademy ( ) , AcademyStep ( )
* and AcademyReset ( ) . The child class script must be attached to an empty game
* object in your scene , and there can only be one such object within the scene .
* /
/// <summary>
/// An Academy is where Agent objects go to train their behaviors. More
/// specifically, an academy is a collection of Brain objects and each agent
/// in a scene is attached to one brain (a single brain may be attached to
/// multiple agents). Currently, this class is expected to be extended to
/// implement the desired academy behavior.
/// </summary>
/// <remarks>
/// When an academy is run, it can either be in inference or training mode.
/// The mode is determined by the presence or absence of a Communicator. In
/// the presence of a communicator, the academy is run in training mode where
/// the states and observations of each agent are sent through the
/// communicator. In the absence of a communciator, the academy is run in
/// inference mode where the agent behavior is determined by the brain
/// attached to it (which may be internal, heuristic or player).
/// </remarks>
[ HelpURL ( "https://github.com/Unity-Technologies/ml-agents/blob/master/" +
"docs/Learning-Environment-Design-Academy.md" ) ]
// Fields provided in the Inspector
[ Tooltip ( "Total number of steps per episode. \n" +
"0 corresponds to episodes without a maximum number of steps. \n" +
"Once the step counter reaches maximum, " +
"the environment will reset." ) ]
private int maxSteps ;
[ Tooltip ( "Total number of steps per global episode.\nNon-positive " +
"values correspond to episodes without a maximum number of \n" +
"steps. Once the step counter reaches this maximum value, the " +
"environment will reset." ) ]
int maxSteps ;
[HideInInspector]
public bool isInference = true ;
/ * * < \ brief Do not modify : If true , the Academy will use inference
* settings . * /
private bool _isCurrentlyInference ;
[SerializeField]
[ Tooltip ( "The engine-level settings which correspond to rendering quality" +
" and engine speed during Training." ) ]
private ScreenConfiguration trainingConfiguration =
new ScreenConfiguration ( 8 0 , 8 0 , 1 , 1 0 0.0f , - 1 ) ;
[ Tooltip ( "The engine-level settings which correspond to rendering " +
"quality and engine speed during Training." ) ]
EnvironmentConfiguration trainingConfiguration =
new EnvironmentConfiguration ( 8 0 , 8 0 , 1 , 1 0 0.0f , - 1 ) ;
[ Tooltip ( "The engine-level settings which correspond to rendering quality" +
" and engine speed during Inference." ) ]
private ScreenConfiguration inferenceConfiguration =
new ScreenConfiguration ( 1 2 8 0 , 7 2 0 , 5 , 1.0f , 6 0 ) ;
[ Tooltip ( "The engine-level settings which correspond to rendering " +
"quality and engine speed during Inference." ) ]
EnvironmentConfiguration inferenceConfiguration =
new EnvironmentConfiguration ( 1 2 8 0 , 7 2 0 , 5 , 1.0f , 6 0 ) ;
/**< \brief Contains a mapping from parameter names to float values. */
/ * * < You can specify the Default Reset Parameters in the Inspector of the
* Academy . You can modify these parameters when training with an External
* brain by passing a config dictionary at reset . Reference resetParameters
* in your AcademyReset ( ) or AcademyStep ( ) to modify elements in your
* environment at reset time . * /
/// <summary>
/// Contains a mapping from parameter names to float values. They are
/// used in <see cref="AcademyReset"/> and <see cref="AcademyStep"/>
/// to modify elements in the environment at reset time.
/// <summary/>
/// <remarks>
/// Default reset parameters are specified in the academy Editor, and can
/// be modified when training with an external Brain by passinga config
/// dictionary at reset.
/// </remarks>
"environment on reset." ) ]
"environment when it resets." ) ]
// Fields not provided in the Inspector.
/// Boolean flag indicating whether a communicator is accessible by the
/// environment. This also specifies whether the environment is in
/// Training or Inference mode.
bool isCommunicatorOn ;
/// If true, the Academy will use inference settings. This field is
/// initialized in <see cref="Awake"/> depending on the presence
/// or absence of a communicator. Furthermore, it can be modified by an
/// external Brain during reset via <see cref="SetIsInference"/>.
bool isInference = true ;
/// The done flag of the academy. When set to true, the academy will
/// call <see cref="AcademyReset"/> instead of <see cref="AcademyStep"/>
/// at step time. If true, all agents done flags will be set to true.
bool done ;
/// Whether the academy has reached the maximum number of steps for the
/// current episode.
bool maxStepReached ;
/// The number of episodes completed by the environment. Incremented
/// each time the environment is reset.
int episodeCount ;
/// The number of steps completed within the current episide. Incremented
/// each time a step is taken in the environment. Is reset to 0 during
/// <see cref="AcademyReset"/>.
int stepCount ;
/// Flag that indicates whether the inference/training mode of the
/// environment was switched by the external Brain. This impacts the
/// engine settings at the next environment step.
bool modeSwitched ;
/// Pointer to the communicator currently in use by the Academy.
Communicator communicator ;
//
bool firstAcademyReset ;
// The Academy uses a series of events to communicate with agents and
// brains to facilitate synchronization. More specifically, it ensure
// that all the agents performs their steps in a consistent order (i.e. no
// agent can act based on a decision before another agent has had a chance
// to request a decision).
// Signals to all the Brains at each environment step so they can decide
// actions for their agents.
// Signals to all the agents at each environment step along with the
// Academy's maxStepReached, done and stepCount values. The agents rely
// on this event to update their own values of max step reached and done
// in addition to aligning on the step count of the global episode.
// Signals to all the agents at each environment step so they can reset
// if their flag has been set to done (assuming the agent has requested a
// decision).
// Signals to all the agents at each environment step so they can send
// their state to their Brain if they have requested a decision.
// Signals to all the agents at each environment step so they can act if
// they have requested a decision.
public event System . Action AgentForceReset ;
/**< \brief The done flag of the Academy. */
/ * * < When set to true , the Academy will call AcademyReset ( ) instead of
* AcademyStep ( ) at step time .
* If true , all agents done flags will be set to true . * /
private bool done ;
// Sigals to all the agents each time the Academy force resets.
public event System . Action AgentForceReset ;
/// The max step reached.
/// Monobehavior function called at the very beginning of environment
/// creation. Academy uses this time to initialize internal data
/// structures, initialize the environment and check for the existence
/// of a communicator.
private bool maxStepReached ;
/**< \brief Increments each time the environment is reset. */
[HideInInspector]
public int episodeCount ;
[HideInInspector]
public int stepsSinceReset ;
/ * * < \ brief Do not modify : pointer to the communicator currently in
* use by the Academy . * /
public Communicator communicator ;
private bool firstAcademyReset ;
_InitializeAcademy ( ) ;
InitializeEnvironment ( ) ;
void _InitializeAcademy ( )
{
List < Brain > brains = GetBrains ( gameObject ) ;
InitializeAcademy ( ) ;
/// <summary>
/// Initializes the environment, configures it and initialized the Academy.
/// </summary>
void InitializeEnvironment ( )
{
// Initialize communicator (if possible)
if ( ! communicator . CommunicatorHandShake ( ) )
if ( communicator . CommunicatorHandShake ( ) )
{
isCommunicatorOn = true ;
communicator . InitializeCommunicator ( ) ;
communicator . UpdateCommand ( ) ;
}
else
// Initialize Academy and Brains.
InitializeAcademy ( ) ;
List < Brain > brains = GetBrains ( gameObject ) ;
if ( communicator ! = null )
{
communicator . InitializeCommunicator ( ) ;
communicator . UpdateCommand ( ) ;
}
isInference = ( communicator = = null ) ;
_isCurrentlyInference = ! isInference ;
// If a communicator is enabled/provided, then we assume we are in
// training mode. In the absence of a communicator, we assume we are
// in inference mode.
isInference = ! isCommunicatorOn ;
BrainDecideAction + = ( ) = > { } ;
AgentSetStatus + = ( m , d , i ) = > { } ;
AgentForceReset + = ( ) = > { } ;
// Configure the environment using the configurations provided by
// the developer in the Editor.
ConfigureEnvironment ( ) ;
}
/// <summary>
/// Configures the environment settings depending on the training/inference
/// mode and the corresponding parameters passed in the Editor.
/// </summary>
void ConfigureEnvironment ( )
{
if ( isInference )
{
ConfigureEnvironmentHelper ( inferenceConfiguration ) ;
Monitor . SetActive ( true ) ;
}
else
{
ConfigureEnvironmentHelper ( trainingConfiguration ) ;
Monitor . SetActive ( false ) ;
}
/// <summary>
/// Helper method for initializing the environment based on the provided
/// configuration.
/// </summary>
/// <param name="config">
/// Environment configuration (specified in the Editor).
/// </param>
static void ConfigureEnvironmentHelper ( EnvironmentConfiguration config )
{
Screen . SetResolution ( config . width , config . height , false ) ;
QualitySettings . SetQualityLevel ( config . qualityLevel , true ) ;
Time . timeScale = config . timeScale ;
Time . captureFramerate = 6 0 ;
Application . targetFrameRate = config . targetFrameRate ;
}
/// Environment specific initialization.
/ * *
* Implemented in environment - specific child class .
* This method is called once when the environment is loaded .
* /
/// <summary>
/// Initializes the academy and environment. Called during the waking-up
/// phase of the environment before any of the scene objects/agents have
/// been initialized.
/// </summary>
/// <summary>
/// Specifies the academy behavior at every step of the environment.
/// </summary>
public virtual void AcademyStep ( )
{
private void ConfigureEngine ( )
}
/// <summary>
/// Specifies the academy behavior when being reset (i.e. at the completion
/// of a global episode).
/// </summary>
public virtual void AcademyReset ( )
if ( ( ! isInference ) )
{
Screen . SetResolution (
trainingConfiguration . width ,
trainingConfiguration . height ,
false ) ;
QualitySettings . SetQualityLevel (
trainingConfiguration . qualityLevel , true ) ;
Time . timeScale = trainingConfiguration . timeScale ;
Application . targetFrameRate =
trainingConfiguration . targetFrameRate ;
QualitySettings . vSyncCount = 0 ;
Time . captureFramerate = 6 0 ;
Monitor . SetActive ( false ) ;
}
else
}
/// <summary>
/// Returns the <see cref="isInference"/> flag.
/// </summary>
/// <returns>
/// <c>true</c>, if current mode is inference, <c>false</c> if training.
/// </returns>
public bool GetIsInference ( )
{
return isInference ;
}
/// <summary>
/// Sets the <see cref="isInference"/> flag to the provided value. If
/// the new flag differs from the current flag value, this signals that
/// the environment configuration needs to be updated.
/// </summary>
/// <param name="isInference">
/// Environment mode, if true then inference, otherwise training.
/// </param>
public void SetIsInference ( bool isInference )
{
if ( this . isInference ! = isInference )
Screen . SetResolution (
inferenceConfiguration . width ,
inferenceConfiguration . height ,
false ) ;
QualitySettings . SetQualityLevel (
inferenceConfiguration . qualityLevel , true ) ;
Time . timeScale = inferenceConfiguration . timeScale ;
Application . targetFrameRate =
inferenceConfiguration . targetFrameRate ;
Time . captureFramerate = 6 0 ;
Monitor . SetActive ( true ) ;
this . isInference = isInference ;
// This signals to the academy that at the next environment step
// the engine configurations need updating to the respective mode
// (i.e. training vs inference) configuraiton.
modeSwitched = true ;
/// Environment specific step logic.
/ * *
* Implemented in environment - specific child class .
* This method is called at every step .
* /
public virtual void AcademyStep ( )
/// <summary>
/// Returns the current episode counter.
/// </summary>
/// <returns>
/// Current episode number.
/// </returns>
public int GetEpisodeCount ( )
return episodeCount ;
/// Environment specific reset logic.
/ * *
* Implemented in environment - specific child class .
* This method is called everytime the Academy resets ( when the global done
* flag is set to true ) .
* /
public virtual void AcademyReset ( )
/// <summary>
/// Returns the current step counter (within the current epside).
/// </summary>
/// <returns>
/// Current episode number.
/// </returns>
public int GetStepCount ( )
return stepCount ;
/// <summary>
/// Sets the done flag to true.
/// </summary>
/// <summary>
/// Returns whether or not the academy is done.
/// </summary>
/// <returns>
/// <c>true</c>, if academy is done, <c>false</c> otherwise.
/// </returns>
public bool IsDone ( )
{
return done ;
/// Forceds the full reset. The done flags are not affected. Is either
/// Returns whether or not the communicator is on.
/// </summary>
/// <returns>
/// <c>true</c>, if communicator is on, <c>false</c> otherwise.
/// </returns>
public bool IsCommunicatorOn ( )
{
return isCommunicatorOn ;
}
/// <summary>
/// Returns the Communicator currently used by the Academy.
/// </summary>
/// <returns>The commincator currently in use (may be null).</returns>
public Communicator GetCommunicator ( )
{
return communicator ;
}
/// <summary>
/// Forces the full reset. The done flags are not affected. Is either
private void ForcedFullReset ( )
void ForcedFullReset ( )
_AcademyReset ( ) ;
EnvironmentReset ( ) ;
internal void _AcademyStep ( )
/// <summary>
/// Performs a single environment update to the Academy, Brain and Agent
/// objects within the environment.
/// </summary>
void EnvironmentStep ( )
if ( isInference ! = _isCurrentlyInference )
if ( modeSwitched )
ConfigureEngine ( ) ;
_isCurrentlyInference = isInference ;
ConfigureEnvironment ( ) ;
modeSwitched = false ;
if ( communicator ! = null )
if ( isCommunicatorOn )
// Update reset parameters.
Dictionary < string , float > NewResetParameters =
communicator . GetResetParameters ( ) ;
foreach ( KeyValuePair < string , float > kv in NewResetParameters )
ForcedFullReset ( ) ;
communicator . SetCommand ( ExternalCommand . STEP ) ;
}
ForcedFullReset ( ) ;
}
if ( ( stepsSinceReset > = maxSteps ) & & maxSteps > 0 )
if ( ( stepCount > = maxSteps ) & & maxSteps > 0 )
AgentSetStatus ( maxStepReached , done , stepsSinceReset ) ;
AgentSetStatus ( maxStepReached , done , stepCount ) ;
_AcademyReset ( ) ;
{
EnvironmentReset ( ) ;
}
AgentResetIfDone ( ) ;
AgentAct ( ) ;
stepsSinceReset + = 1 ;
stepCount + = 1 ;
internal void _AcademyReset ( )
/// <summary>
/// Resets the environment, including the Academy.
/// </summary>
void EnvironmentReset ( )
stepsSinceReset = 0 ;
stepCount = 0 ;
/// <summary>
/// Monobehavior function that dictates each environment step.
/// </summary>
_AcademyStep ( ) ;
EnvironmentStep ( ) ;
private static List < Brain > GetBrains ( GameObject gameObject )
/// <summary>
/// Helper method that retrieves the Brain objects that are currently
/// specified as children of the Academy within the Editor.
/// </summary>
/// <param name="academy">Academy.</param>
/// <returns>
/// List of brains currently attached to academy.
/// </returns>
static List < Brain > GetBrains ( GameObject academy )
var transform = gameObject . transform ;
var transform = academy . transform ;
for ( var i = 0 ; i < transform . childCount ; i + + )
{
if ( brain ! = null & & child . gameObject . activeSelf )
{
}
}
}