Unity 机器学习代理工具包 (ML-Agents) 是一个开源项目,它使游戏和模拟能够作为训练智能代理的环境。
您最多选择25个主题 主题必须以中文或者字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符
 
 
 
 
 

367 行
9.4 KiB

using System.Collections;
using System.Collections.Generic;
using UnityEngine;
/*! \mainpage ML-Agents Index Page
* Welcome to Unity Machine Learning Agents documentation.
*/
[System.Serializable]
public class ScreenConfiguration
{
public int width;
public int height;
[Range(0, 5)]
public int qualityLevel;
[Range(1f, 100f)]
public float timeScale;
public int targetFrameRate;
public ScreenConfiguration(int w, int h, int q, float ts, int tf)
{
width = w;
height = h;
qualityLevel = q;
timeScale = ts;
targetFrameRate = tf;
}
}
/** Create a child class to implement InitializeAcademy(), AcademyStep()
* and AcademyReset(). The child class script must be attached to an empty game
* object in your scene, and there can only be one such object within the scene.
*/
public abstract class Academy : MonoBehaviour
{
[System.Serializable]
private struct ResetParameter
{
public string key;
public float value;
}
[SerializeField]
private int maxSteps;
[SerializeField]
private int frameToSkip;
[SerializeField]
private float waitTime;
[HideInInspector]
public bool isInference = true;
/**< \brief Do not modify : If true, the Academy will use inference
* settings. */
private bool _isCurrentlyInference;
[SerializeField]
private ScreenConfiguration trainingConfiguration = new ScreenConfiguration(80, 80, 1, 100.0f, 60);
[SerializeField]
private ScreenConfiguration inferenceConfiguration = new ScreenConfiguration(1280, 720, 5, 1.0f, 60);
[SerializeField]
private ResetParameter[] defaultResetParameters;
public Dictionary<string, float> resetParameters;
/**< \brief Contains a mapping from parameter names to float values. */
/**< You can specify the Default Reset Parameters in the Inspector of the
* Academy. You can modify these parameters when training with an External
* brain by passing a config dictionary at reset. Reference resetParameters
* in your AcademyReset() or AcademyStep() to modify elements in your
* environment at reset time. */
[HideInInspector]
private List<Brain> brains = new List<Brain>();
ExternalCommand externalCommand;
private bool acceptingSteps;
private int framesSinceAction;
private bool skippingFrames = true;
[HideInInspector]
public bool done;
/**< \brief The done flag of the Academy. */
/**< When set to true, the Academy will call AcademyReset() instead of
* AcademyStep() at step time.
* If true, all agents done flags will be set to true.*/
[HideInInspector]
public int episodeCount;
/**< \brief Increments each time the environment is reset. */
[HideInInspector]
public int currentStep;
/**< \brief Increments each time a step is taken in the environment. Is
* reset to 0 during AcademyReset(). */
public Communicator communicator;
/**< \brief Do not modify : pointer to the communicator currently in
* use by the Academy. */
private float timeAtStep;
void Awake()
{
resetParameters = new Dictionary<string, float>();
foreach (ResetParameter kv in defaultResetParameters)
{
resetParameters[kv.key] = kv.value;
}
GetBrains(gameObject, brains);
InitializeAcademy();
communicator = new ExternalCommunicator(this);
if (!communicator.CommunicatorHandShake())
{
communicator = null;
}
foreach (Brain brain in brains)
{
brain.InitializeBrain();
}
if (communicator != null)
{
communicator.InitializeCommunicator();
externalCommand = communicator.GetCommand();
}
isInference = (communicator == null);
_isCurrentlyInference = !isInference;
done = true;
acceptingSteps = true;
}
/// Environment specific initialization.
/**
* Implemented in environment-specific child class.
* This method is called once when the environment is loaded.
*/
public virtual void InitializeAcademy()
{
}
private void ConfigureEngine()
{
if ((!isInference))
{
Screen.SetResolution(trainingConfiguration.width, trainingConfiguration.height, false);
QualitySettings.SetQualityLevel(trainingConfiguration.qualityLevel, true);
Time.timeScale = trainingConfiguration.timeScale;
Application.targetFrameRate = trainingConfiguration.targetFrameRate;
}
else
{
Screen.SetResolution(inferenceConfiguration.width, inferenceConfiguration.height, false);
QualitySettings.SetQualityLevel(inferenceConfiguration.qualityLevel, true);
Time.timeScale = inferenceConfiguration.timeScale;
Application.targetFrameRate = inferenceConfiguration.targetFrameRate;
}
}
/// Environment specific step logic.
/**
* Implemented in environment-specific child class.
* This method is called at every step.
*/
public virtual void AcademyStep()
{
}
/// Environment specific reset logic.
/**
* Implemented in environment-specific child class.
* This method is called everytime the Academy resets (when the global done
* flag is set to true).
*/
public virtual void AcademyReset()
{
}
// Called after AcademyStep().
internal void Step()
{
// Reset all agents whose flags are set to done.
foreach (Brain brain in brains)
{
// Set all agents to done if academy is done.
if (done)
{
brain.SendDone();
}
brain.ResetIfDone();
}
SendState();
foreach (Brain brain in brains)
{
brain.ResetDoneAndReward();
}
}
// Called before AcademyReset().
internal void Reset()
{
currentStep = 0;
episodeCount++;
done = false;
AcademyReset();
foreach (Brain brain in brains)
{
brain.Reset();
brain.ResetDoneAndReward();
}
}
// Instructs all brains to collect states from their agents.
private void SendState()
{
foreach (Brain brain in brains)
{
brain.SendState();
}
}
// Instructs all brains to process states to produce actions.
private void DecideAction()
{
if (communicator != null)
{
communicator.UpdateActions();
}
foreach (Brain brain in brains)
{
brain.DecideAction();
}
framesSinceAction = 0;
}
void FixedUpdate()
{
if (acceptingSteps)
{
RunMdp();
}
}
// Contains logic for taking steps in environment simulation.
/** Based on presence of communicator, inference mode, and frameSkip,
* decides whether the environment should be stepped or reset.
*/
void RunMdp()
{
if (isInference != _isCurrentlyInference)
{
ConfigureEngine();
_isCurrentlyInference = isInference;
}
if ((isInference) && (timeAtStep + waitTime > Time.time))
{
return;
}
timeAtStep = Time.time;
framesSinceAction += 1;
currentStep += 1;
if ((currentStep >= maxSteps) && maxSteps > 0)
{
done = true;
}
if ((framesSinceAction > frameToSkip) || done)
{
skippingFrames = false;
framesSinceAction = 0;
}
else
{
skippingFrames = true;
}
if (skippingFrames == false)
{
if (communicator != null)
{
if (externalCommand == ExternalCommand.STEP)
{
Step();
externalCommand = communicator.GetCommand();
}
if (externalCommand == ExternalCommand.RESET)
{
Dictionary<string, float> NewResetParameters = communicator.GetResetParameters();
foreach (KeyValuePair<string, float> kv in NewResetParameters)
{
resetParameters[kv.Key] = kv.Value;
}
Reset();
externalCommand = ExternalCommand.STEP;
RunMdp();
return;
}
if (externalCommand == ExternalCommand.QUIT)
{
Application.Quit();
return;
}
}
else
{
if (done)
{
Reset();
RunMdp();
return;
}
else
{
Step();
}
}
DecideAction();
}
AcademyStep();
foreach (Brain brain in brains)
{
brain.Step();
}
}
private static void GetBrains(GameObject gameObject, List<Brain> brains)
{
var transform = gameObject.transform;
for (var i = 0; i < transform.childCount; i++)
{
var child = transform.GetChild(i);
var brain = child.GetComponent<Brain>();
if (brain != null)
brains.Add(brain);
}
}
}