浏览代码

Merge pull request #405 from Unity-Technologies/dev-api-doc-academy

Comment improvements & refactoring to Academy.cs
/develop-generalizationTraining-TrainerController
GitHub 7 年前
当前提交
9cbcf90d
共有 7 个文件被更改,包括 475 次插入283 次删除
  1. 221
      unity-environment/Assets/ML-Agents/Editor/MLAgentsEditModeTest.cs
  2. 8
      unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicAgent.cs
  3. 2
      unity-environment/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAgent.cs
  4. 497
      unity-environment/Assets/ML-Agents/Scripts/Academy.cs
  5. 9
      unity-environment/Assets/ML-Agents/Scripts/Agent.cs
  6. 4
      unity-environment/Assets/ML-Agents/Scripts/Brain.cs
  7. 17
      unity-environment/Assets/ML-Agents/Scripts/ExternalCommunicator.cs

221
unity-environment/Assets/ML-Agents/Editor/MLAgentsEditModeTest.cs


using UnityEngine;
using UnityEditor;
using UnityEngine.TestTools;
using UnityEngine;
using System.Collections;
using System.Reflection;
namespace MLAgentsTests

collectObservationsCalls += 1;
}
public override void AgentAction(float[] vetorAction, string textAction)
public override void AgentAction(float[] vectorAction, string textAction)
{
agentActionCalls += 1;
AddReward(0.1f);

TestAcademy aca = acaGO.GetComponent<TestAcademy>();
Assert.AreNotEqual(null, aca);
Assert.AreEqual(0, aca.initializeAcademyCalls);
Assert.AreEqual(0, aca.episodeCount);
Assert.AreEqual(0, aca.stepsSinceReset);
Assert.AreEqual(0, aca.GetEpisodeCount());
Assert.AreEqual(0, aca.GetStepCount());
}
[Test]

acaGO.AddComponent<TestAcademy>();
TestAcademy aca = acaGO.GetComponent<TestAcademy>();
Assert.AreEqual(0, aca.initializeAcademyCalls);
Assert.AreEqual(0, aca.stepsSinceReset);
Assert.AreEqual(0, aca.episodeCount);
Assert.AreEqual(0, aca.GetStepCount());
Assert.AreEqual(0, aca.GetEpisodeCount());
// This will call the method even though it is private
MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod("_InitializeAcademy",
//This will call the method even though it is private
MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod("InitializeEnvironment",
Assert.AreEqual(0, aca.episodeCount);
Assert.AreEqual(0, aca.stepsSinceReset);
Assert.AreEqual(0, aca.GetEpisodeCount());
Assert.AreEqual(0, aca.GetStepCount());
Assert.AreEqual(false, aca.IsDone());
Assert.AreEqual(0, aca.academyResetCalls);
Assert.AreEqual(0, aca.AcademyStepCalls);

MethodInfo AgentEnableMethod = typeof(Agent).GetMethod("_InitializeAgent",
BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod("_InitializeAcademy",
MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod("InitializeEnvironment",
BindingFlags.Instance | BindingFlags.NonPublic);

GameObject acaGO = new GameObject("TestAcademy");
acaGO.AddComponent<TestAcademy>();
TestAcademy aca = acaGO.GetComponent<TestAcademy>();
MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod("_InitializeAcademy",
MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod("InitializeEnvironment",
MethodInfo AcademyStepMethod = typeof(Academy).GetMethod("_AcademyStep",
MethodInfo AcademyStepMethod = typeof(Academy).GetMethod("EnvironmentStep",
for (int i = 0; i < 10; i++){
for (int i = 0; i < 10; i++)
{
Assert.AreEqual(numberReset, aca.episodeCount);
Assert.AreEqual(i, aca.stepsSinceReset);
Assert.AreEqual(numberReset, aca.GetEpisodeCount());
Assert.AreEqual(i, aca.GetStepCount());
Assert.AreEqual(false, aca.IsDone());
Assert.AreEqual(numberReset, aca.academyResetCalls);
Assert.AreEqual(i, aca.AcademyStepCalls);

{
{
numberReset += 1;
}
AcademyStepMethod.Invoke((object)aca, new object[] { });

TestBrain brain = brainGO.GetComponent<TestBrain>();
MethodInfo AgentEnableMethod = typeof(Agent).GetMethod("_InitializeAgent",
BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod("_InitializeAcademy",
BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AgentEnableMethod = typeof(Agent).GetMethod(
"_InitializeAgent", BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod(
"InitializeEnvironment", BindingFlags.Instance | BindingFlags.NonPublic);
agent1.agentParameters = new AgentParameters();
agent2.agentParameters = new AgentParameters();

AgentEnableMethod.Invoke(agent1, new object[] { aca });
AcademyInitializeMethod.Invoke(aca, new object[] { });
MethodInfo AcademyStepMethod = typeof(Academy).GetMethod("_AcademyStep",
BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyStepMethod = typeof(Academy).GetMethod(
"EnvironmentStep", BindingFlags.Instance | BindingFlags.NonPublic);
int numberAgent1Reset = 0;
int numberAgent2Initialization = 0;
int requestDecision =0;
int requestAction=0;
int numberAgent1Reset = 0;
int numberAgent2Initialization = 0;
int requestDecision = 0;
int requestAction = 0;
Assert.AreEqual(0, agent2.agentResetCalls);
Assert.AreEqual(0, agent2.agentResetCalls);
Assert.AreEqual((i+1)/2, agent1.collectObservationsCalls);
Assert.AreEqual((i + 1) / 2, agent1.collectObservationsCalls);
if (i == 0)
if (i == 0)
if (i == 2)
if (i == 2)
{
AgentEnableMethod.Invoke(agent2, new object[] { aca });
numberAgent2Initialization += 1;

if ((i % 3 == 0) && (i > 2))
{
//Every 3 steps after agent 2 is initialized, request decision
requestDecision +=1;
requestAction+=1;
requestDecision += 1;
requestAction += 1;
agent2.RequestDecision();
}
else if ((i % 5 == 0) && (i > 2))

GameObject acaGO = new GameObject("TestAcademy");
acaGO.AddComponent<TestAcademy>();
TestAcademy aca = acaGO.GetComponent<TestAcademy>();
MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod("_InitializeAcademy",
BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod(
"InitializeEnvironment", BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyStepMethod = typeof(Academy).GetMethod("_AcademyStep",
BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyStepMethod = typeof(Academy).GetMethod(
"EnvironmentStep", BindingFlags.Instance | BindingFlags.NonPublic);
int numberReset = 0;
int stepsSinceReset = 0;

Assert.AreEqual(stepsSinceReset, aca.stepsSinceReset);
Assert.AreEqual(stepsSinceReset, aca.GetStepCount());
Assert.AreEqual(numberReset, aca.episodeCount);
Assert.AreEqual(numberReset, aca.GetEpisodeCount());
Assert.AreEqual(false, aca.IsDone());
Assert.AreEqual(numberReset, aca.academyResetCalls);

stepsSinceReset += 1;
// Regularly set the academy to done to check behavior
if (i % 5 == 3)
if (i % 5 == 3)
{
aca.Done();
numberReset += 1;

TestBrain brain = brainGO.GetComponent<TestBrain>();
MethodInfo AgentEnableMethod = typeof(Agent).GetMethod("_InitializeAgent",
BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod("_InitializeAcademy",
BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AgentEnableMethod = typeof(Agent).GetMethod(
"_InitializeAgent", BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod(
"InitializeEnvironment", BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyStepMethod = typeof(Academy).GetMethod("_AcademyStep",
BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyStepMethod = typeof(Academy).GetMethod(
"EnvironmentStep", BindingFlags.Instance | BindingFlags.NonPublic);
agent1.agentParameters = new AgentParameters();
agent2.agentParameters = new AgentParameters();

AgentEnableMethod.Invoke(agent2, new object[] { aca });
AcademyInitializeMethod.Invoke(aca, new object[] { });
int numberAgent1Reset = 0;
int numberAgent2Reset = 0;
int numberAgent1Reset = 0;
int numberAgent2Reset = 0;
int agent1StepSinceReset =0;
int agent2StepSinceReset=0;
int agent1StepSinceReset = 0;
int agent2StepSinceReset = 0;
Assert.AreEqual(acaStepsSinceReset, aca.stepsSinceReset);
Assert.AreEqual(acaStepsSinceReset, aca.GetStepCount());
Assert.AreEqual(numberAcaReset, aca.episodeCount);
Assert.AreEqual(numberAcaReset, aca.GetEpisodeCount());
Assert.AreEqual(false, aca.IsDone());
Assert.AreEqual(numberAcaReset, aca.academyResetCalls);

Assert.AreEqual(numberAgent2Reset, agent2.agentResetCalls);
// Agent 2 and academy reset at the first step
if (i == 0)
if (i == 0)
if (i == 2)
if (i == 2)
if (i % 100 == 3)
if (i % 100 == 3)
{
aca.Done();
numberAcaReset += 1;

if (i % 11 == 5)
if (i % 11 == 5)
if (i % 13 == 3)
if (i % 13 == 3)
if (!(agent2.IsDone()||aca.IsDone()))
if (!(agent2.IsDone() || aca.IsDone()))
{
// If the agent was already reset before the request decision
// We should not reset again

}
}
// Request a decision for agent 2 regularly
if (i % 3 == 2)
if (i % 3 == 2)
else if (i % 5 == 1)
else if (i % 5 == 1)
if (agent1.IsDone() && (((acaStepsSinceReset) % agent1.agentParameters.numberOfActionsBetweenDecisions==0)) || aca.IsDone())
if (agent1.IsDone() && (((acaStepsSinceReset) % agent1.agentParameters.numberOfActionsBetweenDecisions == 0)) || aca.IsDone())
{
numberAgent1Reset += 1;
agent1StepSinceReset = 0;

agent1StepSinceReset += 1;
agent2StepSinceReset += 1;
//Agent 1 is only initialized at step 2
if (i < 2)
if (i < 2)
{
agent1StepSinceReset = 0;
}

GameObject acaGO = new GameObject("TestAcademy");
acaGO.AddComponent<TestAcademy>();
TestAcademy aca = acaGO.GetComponent<TestAcademy>();
MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod("_InitializeAcademy",
BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod(
"InitializeEnvironment", BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyStepMethod = typeof(Academy).GetMethod("_AcademyStep",
BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyStepMethod = typeof(Academy).GetMethod(
"EnvironmentStep", BindingFlags.Instance | BindingFlags.NonPublic);
FieldInfo maxStep = typeof(Academy).GetField("maxSteps", BindingFlags.Instance | BindingFlags.NonPublic);
FieldInfo maxStep = typeof(Academy).GetField(
"maxSteps", BindingFlags.Instance | BindingFlags.NonPublic);
maxStep.SetValue((object)aca, 20);
int numberReset = 0;

Assert.AreEqual(stepsSinceReset, aca.stepsSinceReset);
Assert.AreEqual(stepsSinceReset, aca.GetStepCount());
Assert.AreEqual(numberReset, aca.episodeCount);
Assert.AreEqual(numberReset, aca.GetEpisodeCount());
if (i % 20 == 0)
if (i % 20 == 0)
{
numberReset += 1;
stepsSinceReset = 1;

TestBrain brain = brainGO.GetComponent<TestBrain>();
MethodInfo AgentEnableMethod = typeof(Agent).GetMethod("_InitializeAgent",
BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod("_InitializeAcademy",
BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AgentEnableMethod = typeof(Agent).GetMethod(
"_InitializeAgent", BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod(
"InitializeEnvironment", BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyStepMethod = typeof(Academy).GetMethod("_AcademyStep",
BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyStepMethod = typeof(Academy).GetMethod(
"EnvironmentStep", BindingFlags.Instance | BindingFlags.NonPublic);
FieldInfo maxStep = typeof(Academy).GetField("maxSteps", BindingFlags.Instance | BindingFlags.NonPublic);
FieldInfo maxStep = typeof(Academy).GetField(
"maxSteps", BindingFlags.Instance | BindingFlags.NonPublic);
maxStep.SetValue((object)aca, 100);
agent1.agentParameters = new AgentParameters();

AcademyInitializeMethod.Invoke(aca, new object[] { });
int numberAgent1Reset = 0;
int numberAgent1Reset = 0;
int numberAgent2Reset = 0;
int numberAcaReset = 0;
int acaStepsSinceReset = 0;

for (int i = 0; i < 500; i++)
{
Assert.AreEqual(acaStepsSinceReset, aca.stepsSinceReset);
Assert.AreEqual(acaStepsSinceReset, aca.GetStepCount());
Assert.AreEqual(1, aca.initializeAcademyCalls);
Assert.AreEqual(i, aca.AcademyStepCalls);

Assert.AreEqual(numberAcaReset, aca.episodeCount);
Assert.AreEqual(numberAcaReset, aca.GetEpisodeCount());
if (i == 0)
if (i == 0)
if (i == 2)
if (i == 2)
agent2.RequestDecision();
agent2.RequestDecision();
if (i % 100 == 0)
if (i % 100 == 0)
{
acaStepsSinceReset = 0;
agent1StepSinceReset = 0;

agent2StepSinceReset += 1;
//Agent 1 is only initialized at step 2
if (i < 2)
if (i < 2)
{
agent1StepSinceReset = 0;
}

TestBrain brain = brainGO.GetComponent<TestBrain>();
MethodInfo AgentEnableMethod = typeof(Agent).GetMethod("_InitializeAgent",
BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod("_InitializeAcademy",
BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AgentEnableMethod = typeof(Agent).GetMethod(
"_InitializeAgent", BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod(
"InitializeEnvironment", BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyStepMethod = typeof(Academy).GetMethod("_AcademyStep",
BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyStepMethod = typeof(Academy).GetMethod(
"EnvironmentStep", BindingFlags.Instance | BindingFlags.NonPublic);
agent1.agentParameters = new AgentParameters();
agent2.agentParameters = new AgentParameters();
brain.brainParameters = new BrainParameters();

agent1.agentParameters.maxStep = 20;
//Here we specify that the agent does not reset when done
agent1.agentParameters.resetOnDone = false;
agent2.agentParameters.resetOnDone = false;
agent2.agentParameters.resetOnDone = false;
brain.brainParameters.vectorObservationSize = 0;
brain.brainParameters.cameraResolutions = new resolution[0];
agent1.GiveBrain(brain);

Assert.AreEqual(agent2ResetOnDone, agent2.agentOnDoneCalls);
// we request a decision at each step
agent2.RequestDecision();
agent2.RequestDecision();
if (agent1ResetOnDone ==0)
if (agent1ResetOnDone == 0)
if ((i > 2) && (i % 21 == 0)){
if ((i > 2) && (i % 21 == 0))
{
agent1ResetOnDone = 1;
}

TestBrain brain = brainGO.GetComponent<TestBrain>();
MethodInfo AgentEnableMethod = typeof(Agent).GetMethod("_InitializeAgent",
BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod("_InitializeAcademy",
BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AgentEnableMethod = typeof(Agent).GetMethod(
"_InitializeAgent", BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod(
"InitializeEnvironment", BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyStepMethod = typeof(Academy).GetMethod("_AcademyStep",
BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyStepMethod = typeof(Academy).GetMethod(
"EnvironmentStep", BindingFlags.Instance | BindingFlags.NonPublic);
agent1.agentParameters = new AgentParameters();
agent2.agentParameters = new AgentParameters();

{
agent2.RequestAction();
Assert.LessOrEqual(Mathf.Abs(j * 0.1f + j * 10f - agent1.GetCumulativeReward()), 0.05f);
Assert.LessOrEqual(Mathf.Abs(i * 0.1f- agent2.GetCumulativeReward()), 0.05f);
Assert.LessOrEqual(Mathf.Abs(i * 0.1f - agent2.GetCumulativeReward()), 0.05f);
if ((i % 21 == 0) && (i>0))
if ((i % 21 == 0) && (i > 0))
{
j = 0;
}

8
unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicAgent.cs


using System.Collections;
using System.Collections;
using System.Collections.Generic;
using UnityEngine;

gameObject.transform.position = new Vector3(position, 0f, 0f);
AddReward( - 0.01f);
AddReward(-0.01f);
AddReward( 0.1f);
AddReward(0.1f);
}
if (position == largeGoalPosition)

private void WaitTimeInference()
{
if (!academy.isInference)
if (!academy.GetIsInference())
{
RequestDecision();
}

2
unity-environment/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAgent.cs


private void WaitTimeInference()
{
if (!academy.isInference)
if (!academy.GetIsInference())
{
RequestDecision();
}

497
unity-environment/Assets/ML-Agents/Scripts/Academy.cs


using System.Collections;
using System.Collections.Generic;
using System.Collections.Generic;
* Welcome to Unity Machine Learning Agents documentation.
* Welcome to Unity Machine Learning Agents (ML-Agents).
*
* ML-Agents contains five entities: Academy, Brain, Agent, Communicator and
* Python API. The academy, and all its brains and connected agents live within
* a learning environment (herin called Environment), while the communicator
* manages the communication between the learning environment and the Python
* API. For more information on each of these entities, in addition to how to
* set-up a learning environment and train the behavior of characters in a
* Unity scene, please browse our documentation pages on GitHub:
* https://github.com/Unity-Technologies/ml-agents/blob/master/docs/
/// <summary>
/// Wraps the environment-level parameters that are provided within the
/// Editor. These parameters can be provided for training and inference
/// modes separately and represent screen resolution, rendering quality and
/// frame rate.
/// </summary>
public class ScreenConfiguration
public class EnvironmentConfiguration
[Tooltip("Height of the environment window in pixels")]
[Tooltip("Height of the environment window in pixels.")]
[Tooltip("Rendering quality of environment. (Higher is better quality)")]
[Tooltip("Rendering quality of environment. (Higher is better quality.)")]
[Tooltip("Speed at which environment is run. (Higher is faster)")]
[Tooltip("Speed at which environment is run. (Higher is faster.)")]
[Tooltip("FPS engine attempts to maintain.")]
[Tooltip("Frames per second (FPS) engine attempts to maintain.")]
public ScreenConfiguration(int w, int h, int q, float ts, int tf)
/// Initializes a new instance of the
/// <see cref="EnvironmentConfiguration"/> class.
/// <param name="width">Width of environment window (pixels).</param>
/// <param name="height">Height of environment window (pixels).</param>
/// <param name="qualityLevel">
/// Rendering quality of environment. Ranges from 0 to 5, with higher.
/// </param>
/// <param name="timeScale">
/// Speed at which environment is run. Ranges from 1 to 100, with higher
/// values representing faster speed.
/// </param>
/// <param name="targetFrameRate">
/// Target frame rate (per second) that the engine tries to maintain.
/// </param>
public EnvironmentConfiguration(
int width, int height, int qualityLevel,
float timeScale, int targetFrameRate)
width = w;
height = h;
qualityLevel = q;
timeScale = ts;
targetFrameRate = tf;
this.width = width;
this.height = height;
this.qualityLevel = qualityLevel;
this.timeScale = timeScale;
this.targetFrameRate = targetFrameRate;
[HelpURL("https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Agents-Editor-Interface.md#academy")]
/** Create a child class to implement InitializeAcademy(), AcademyStep()
* and AcademyReset(). The child class script must be attached to an empty game
* object in your scene, and there can only be one such object within the scene.
*/
/// <summary>
/// An Academy is where Agent objects go to train their behaviors. More
/// specifically, an academy is a collection of Brain objects and each agent
/// in a scene is attached to one brain (a single brain may be attached to
/// multiple agents). Currently, this class is expected to be extended to
/// implement the desired academy behavior.
/// </summary>
/// <remarks>
/// When an academy is run, it can either be in inference or training mode.
/// The mode is determined by the presence or absence of a Communicator. In
/// the presence of a communicator, the academy is run in training mode where
/// the states and observations of each agent are sent through the
/// communicator. In the absence of a communciator, the academy is run in
/// inference mode where the agent behavior is determined by the brain
/// attached to it (which may be internal, heuristic or player).
/// </remarks>
[HelpURL("https://github.com/Unity-Technologies/ml-agents/blob/master/" +
"docs/Learning-Environment-Design-Academy.md")]
// Fields provided in the Inspector
[Tooltip("Total number of steps per episode. \n" +
"0 corresponds to episodes without a maximum number of steps. \n" +
"Once the step counter reaches maximum, " +
"the environment will reset.")]
private int maxSteps;
[Tooltip("Total number of steps per global episode.\nNon-positive " +
"values correspond to episodes without a maximum number of \n" +
"steps. Once the step counter reaches this maximum value, the " +
"environment will reset.")]
int maxSteps;
[HideInInspector]
public bool isInference = true;
/**< \brief Do not modify : If true, the Academy will use inference
* settings. */
private bool _isCurrentlyInference;
[Tooltip("The engine-level settings which correspond to rendering " +
"quality and engine speed during Training.")]
EnvironmentConfiguration trainingConfiguration =
new EnvironmentConfiguration(80, 80, 1, 100.0f, -1);
[Tooltip("The engine-level settings which correspond to rendering quality" +
" and engine speed during Training.")]
private ScreenConfiguration trainingConfiguration =
new ScreenConfiguration(80, 80, 1, 100.0f, -1);
[SerializeField]
[Tooltip("The engine-level settings which correspond to rendering quality" +
" and engine speed during Inference.")]
private ScreenConfiguration inferenceConfiguration =
new ScreenConfiguration(1280, 720, 5, 1.0f, 60);
[Tooltip("The engine-level settings which correspond to rendering " +
"quality and engine speed during Inference.")]
EnvironmentConfiguration inferenceConfiguration =
new EnvironmentConfiguration(1280, 720, 5, 1.0f, 60);
/**< \brief Contains a mapping from parameter names to float values. */
/**< You can specify the Default Reset Parameters in the Inspector of the
* Academy. You can modify these parameters when training with an External
* brain by passing a config dictionary at reset. Reference resetParameters
* in your AcademyReset() or AcademyStep() to modify elements in your
* environment at reset time. */
/// <summary>
/// Contains a mapping from parameter names to float values. They are
/// used in <see cref="AcademyReset"/> and <see cref="AcademyStep"/>
/// to modify elements in the environment at reset time.
/// <summary/>
/// <remarks>
/// Default reset parameters are specified in the academy Editor, and can
/// be modified when training with an external Brain by passinga config
/// dictionary at reset.
/// </remarks>
"environment on reset.")]
"environment when it resets.")]
// Fields not provided in the Inspector.
/// Boolean flag indicating whether a communicator is accessible by the
/// environment. This also specifies whether the environment is in
/// Training or Inference mode.
bool isCommunicatorOn;
/// If true, the Academy will use inference settings. This field is
/// initialized in <see cref="Awake"/> depending on the presence
/// or absence of a communicator. Furthermore, it can be modified by an
/// external Brain during reset via <see cref="SetIsInference"/>.
bool isInference = true;
/// The done flag of the academy. When set to true, the academy will
/// call <see cref="AcademyReset"/> instead of <see cref="AcademyStep"/>
/// at step time. If true, all agents done flags will be set to true.
bool done;
/// Whether the academy has reached the maximum number of steps for the
/// current episode.
bool maxStepReached;
/// The number of episodes completed by the environment. Incremented
/// each time the environment is reset.
int episodeCount;
/// The number of steps completed within the current episide. Incremented
/// each time a step is taken in the environment. Is reset to 0 during
/// <see cref="AcademyReset"/>.
int stepCount;
/// Flag that indicates whether the inference/training mode of the
/// environment was switched by the external Brain. This impacts the
/// engine settings at the next environment step.
bool modeSwitched;
/// Pointer to the communicator currently in use by the Academy.
Communicator communicator;
// Flag used to keep track of the first time the Academy is reset.
bool firstAcademyReset;
// The Academy uses a series of events to communicate with agents and
// brains to facilitate synchronization. More specifically, it ensure
// that all the agents performs their steps in a consistent order (i.e. no
// agent can act based on a decision before another agent has had a chance
// to request a decision).
// Signals to all the Brains at each environment step so they can decide
// actions for their agents.
// Signals to all the agents at each environment step along with the
// Academy's maxStepReached, done and stepCount values. The agents rely
// on this event to update their own values of max step reached and done
// in addition to aligning on the step count of the global episode.
// Signals to all the agents at each environment step so they can reset
// if their flag has been set to done (assuming the agent has requested a
// decision).
// Signals to all the agents at each environment step so they can send
// their state to their Brain if they have requested a decision.
// Signals to all the agents at each environment step so they can act if
// they have requested a decision.
public event System.Action AgentForceReset;
/**< \brief The done flag of the Academy. */
/**< When set to true, the Academy will call AcademyReset() instead of
* AcademyStep() at step time.
* If true, all agents done flags will be set to true.*/
private bool done;
// Sigals to all the agents each time the Academy force resets.
public event System.Action AgentForceReset;
/// The max step reached.
/// Monobehavior function called at the very beginning of environment
/// creation. Academy uses this time to initialize internal data
/// structures, initialize the environment and check for the existence
/// of a communicator.
private bool maxStepReached;
/**< \brief Increments each time the environment is reset. */
[HideInInspector]
public int episodeCount;
[HideInInspector]
public int stepsSinceReset;
/**< \brief Do not modify : pointer to the communicator currently in
* use by the Academy. */
public Communicator communicator;
private bool firstAcademyReset;
_InitializeAcademy();
InitializeEnvironment();
void _InitializeAcademy()
/// <summary>
/// Initializes the environment, configures it and initialized the Academy.
/// </summary>
void InitializeEnvironment()
// Retrieve Brain and initialize Academy
// Check for existence of communicator
communicator = new ExternalCommunicator(this);
if (!communicator.CommunicatorHandShake())
{

// Initialize Brains and communicator (if present)
foreach (Brain brain in brains)
{
brain.InitializeBrain(this, communicator);

isCommunicatorOn = true;
isInference = (communicator == null);
_isCurrentlyInference = !isInference;
// If a communicator is enabled/provided, then we assume we are in
// training mode. In the absence of a communicator, we assume we are
// in inference mode.
isInference = !isCommunicatorOn;
BrainDecideAction += () => { };
AgentSetStatus += (m, d, i) => { };

AgentForceReset += () => { };
// Configure the environment using the configurations provided by
// the developer in the Editor.
ConfigureEnvironment();
/// Environment specific initialization.
/**
* Implemented in environment-specific child class.
* This method is called once when the environment is loaded.
*/
public virtual void InitializeAcademy()
/// <summary>
/// Configures the environment settings depending on the training/inference
/// mode and the corresponding parameters passed in the Editor.
/// </summary>
void ConfigureEnvironment()
}
private void ConfigureEngine()
{
if ((!isInference))
if (isInference)
Screen.SetResolution(
trainingConfiguration.width,
trainingConfiguration.height,
false);
QualitySettings.SetQualityLevel(
trainingConfiguration.qualityLevel, true);
Time.timeScale = trainingConfiguration.timeScale;
Application.targetFrameRate =
trainingConfiguration.targetFrameRate;
QualitySettings.vSyncCount = 0;
Time.captureFramerate = 60;
Monitor.SetActive(false);
ConfigureEnvironmentHelper(inferenceConfiguration);
Monitor.SetActive(true);
Screen.SetResolution(
inferenceConfiguration.width,
inferenceConfiguration.height,
false);
QualitySettings.SetQualityLevel(
inferenceConfiguration.qualityLevel, true);
Time.timeScale = inferenceConfiguration.timeScale;
Application.targetFrameRate =
inferenceConfiguration.targetFrameRate;
Time.captureFramerate = 60;
Monitor.SetActive(true);
ConfigureEnvironmentHelper(trainingConfiguration);
Monitor.SetActive(false);
/// Environment specific step logic.
/**
* Implemented in environment-specific child class.
* This method is called at every step.
*/
/// <summary>
/// Helper method for initializing the environment based on the provided
/// configuration.
/// </summary>
/// <param name="config">
/// Environment configuration (specified in the Editor).
/// </param>
static void ConfigureEnvironmentHelper(EnvironmentConfiguration config)
{
Screen.SetResolution(config.width, config.height, false);
QualitySettings.SetQualityLevel(config.qualityLevel, true);
Time.timeScale = config.timeScale;
Time.captureFramerate = 60;
Application.targetFrameRate = config.targetFrameRate;
}
/// <summary>
/// Initializes the academy and environment. Called during the waking-up
/// phase of the environment before any of the scene objects/agents have
/// been initialized.
/// </summary>
public virtual void InitializeAcademy()
{
}
/// <summary>
/// Specifies the academy behavior at every step of the environment.
/// </summary>
/// Environment specific reset logic.
/**
* Implemented in environment-specific child class.
* This method is called everytime the Academy resets (when the global done
* flag is set to true).
*/
/// <summary>
/// Specifies the academy behavior when being reset (i.e. at the completion
/// of a global episode).
/// </summary>
/// <summary>
/// Returns the <see cref="isInference"/> flag.
/// </summary>
/// <returns>
/// <c>true</c>, if current mode is inference, <c>false</c> if training.
/// </returns>
public bool GetIsInference()
{
return isInference;
}
/// <summary>
/// Sets the <see cref="isInference"/> flag to the provided value. If
/// the new flag differs from the current flag value, this signals that
/// the environment configuration needs to be updated.
/// </summary>
/// <param name="isInference">
/// Environment mode, if true then inference, otherwise training.
/// </param>
public void SetIsInference(bool isInference)
{
if (this.isInference != isInference)
{
this.isInference = isInference;
// This signals to the academy that at the next environment step
// the engine configurations need updating to the respective mode
// (i.e. training vs inference) configuraiton.
modeSwitched = true;
}
}
/// <summary>
/// Returns the current episode counter.
/// </summary>
/// <returns>
/// Current episode number.
/// </returns>
public int GetEpisodeCount()
{
return episodeCount;
}
/// <summary>
/// Returns the current step counter (within the current epside).
/// </summary>
/// <returns>
/// Current episode number.
/// </returns>
public int GetStepCount()
{
return stepCount;
}
/// <summary>
/// Sets the done flag to true.
/// </summary>
/// <summary>
/// Returns whether or not the academy is done.
/// </summary>
/// <returns>
/// <c>true</c>, if academy is done, <c>false</c> otherwise.
/// </returns>
public bool IsDone()
{
return done;

/// Forceds the full reset. The done flags are not affected. Is either
/// Returns whether or not the communicator is on.
/// </summary>
/// <returns>
/// <c>true</c>, if communicator is on, <c>false</c> otherwise.
/// </returns>
public bool IsCommunicatorOn()
{
return isCommunicatorOn;
}
/// <summary>
/// Returns the Communicator currently used by the Academy.
/// </summary>
/// <returns>The commincator currently in use (may be null).</returns>
public Communicator GetCommunicator()
{
return communicator;
}
/// <summary>
/// Forces the full reset. The done flags are not affected. Is either
private void ForcedFullReset()
void ForcedFullReset()
_AcademyReset();
EnvironmentReset();
internal void _AcademyStep()
/// <summary>
/// Performs a single environment update to the Academy, Brain and Agent
/// objects within the environment.
/// </summary>
void EnvironmentStep()
if (isInference != _isCurrentlyInference)
if (modeSwitched)
ConfigureEngine();
_isCurrentlyInference = isInference;
ConfigureEnvironment();
modeSwitched = false;
if (communicator != null)
if (isCommunicatorOn)
// Update reset parameters.
Dictionary<string, float> NewResetParameters =
communicator.GetResetParameters();
foreach (KeyValuePair<string, float> kv in NewResetParameters)

ForcedFullReset();
communicator.SetCommand(ExternalCommand.STEP);
}

return;
}
}
else if (!firstAcademyReset)

if ((stepsSinceReset >= maxSteps) && maxSteps > 0)
if ((stepCount >= maxSteps) && maxSteps > 0)
AgentSetStatus(maxStepReached, done, stepsSinceReset);
AgentSetStatus(maxStepReached, done, stepCount);
_AcademyReset();
{
EnvironmentReset();
}
AgentResetIfDone();

AgentAct();
stepsSinceReset += 1;
stepCount += 1;
internal void _AcademyReset()
/// <summary>
/// Resets the environment, including the Academy.
/// </summary>
void EnvironmentReset()
stepsSinceReset = 0;
stepCount = 0;
/// <summary>
/// Monobehavior function that dictates each environment step.
/// </summary>
_AcademyStep();
EnvironmentStep();
private static List<Brain> GetBrains(GameObject gameObject)
/// <summary>
/// Helper method that retrieves the Brain objects that are currently
/// specified as children of the Academy within the Editor.
/// </summary>
/// <param name="academy">Academy.</param>
/// <returns>
/// List of brains currently attached to academy.
/// </returns>
static List<Brain> GetBrains(GameObject academy)
var transform = gameObject.transform;
var transform = academy.transform;
for (var i = 0; i < transform.childCount; i++)
{

if (brain != null && child.gameObject.activeSelf)
{
}
}
return brains;
}

9
unity-environment/Assets/ML-Agents/Scripts/Agent.cs


}
internal void AddVectorObs(float[] observation)
{
_info.vectorObservation.AddRange(observation);
_info.vectorObservation.AddRange(observation);
_info.vectorObservation.AddRange(observation);
_info.vectorObservation.AddRange(observation);
}
internal void AddVectorObs(Quaternion observation)
{

/// Note: If your state is discrete, you need to convert your
/// state into a list of float with length 1.
/// </summary>
/// <param name="action">The action the agent receives
/// from the brain.</param>
public virtual void AgentAction(float[] vectorAction, string textAction)
{

RenderTexture.active = prevActiveRT;
RenderTexture.ReleaseTemporary(tempRT);
return tex;
}

4
unity-environment/Assets/ML-Agents/Scripts/Brain.cs


using System.Collections;
using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using UnityEngine.UI;

{
coreBrain.DecideAction(agentInfos);
agentInfos.Clear();
}

17
unity-environment/Assets/ML-Agents/Scripts/ExternalCommunicator.cs


sMessage.agents = new List<int>(defaultNumAgents);
sMessage.vectorObservations = new List<float>(defaultNumAgents * defaultNumObservations);
sMessage.rewards = new List<float>(defaultNumAgents);
sMessage.memories= new List<float>(defaultNumAgents * defaultNumObservations);
sMessage.memories = new List<float>(defaultNumAgents * defaultNumObservations);
sMessage.maxes= new List<bool>(defaultNumAgents);
sMessage.maxes = new List<bool>(defaultNumAgents);
foreach(string k in accParamerters.brainNames){
foreach (string k in accParamerters.brainNames)
{
current_agents[k] = new List<Agent>(defaultNumAgents);
hasSentState[k] = false;
triedSendState[k] = false;

sender.Send(Encoding.ASCII.GetBytes("CONFIG_REQUEST"));
Receive();
var resetParams = JsonConvert.DeserializeObject<ResetParametersMessage>(rMessageString.ToString());
academy.isInference = !resetParams.train_model;
academy.SetIsInference(!resetParams.train_model);
return resetParams.parameters;
}

inputSeed = args[i + 1];
}
}
comPort = int.Parse(inputPort);
randomSeed = int.Parse(inputSeed);
Random.InitState(randomSeed);

sMessage.vectorObservations.AddRange(agentInfo[agent].stackedVectorObservation);
sMessage.rewards.Add(agentInfo[agent].reward);
sMessage.memories.AddRange(agentInfo[agent].memories);
for (int j = 0; j < memorySize - agentInfo[agent].memories.Count; j++ )
for (int j = 0; j < memorySize - agentInfo[agent].memories.Count; j++)
{
}
sMessage.dones.Add(agentInfo[agent].done);
sMessage.previousVectorActions.AddRange(agentInfo[agent].StoredVectorActions.ToList());
sMessage.previousTextActions.Add(agentInfo[agent].StoredTextActions);

}
public Dictionary<string, bool> GetHasTried(){
public Dictionary<string, bool> GetHasTried()
{
return triedSendState;
}

正在加载...
取消
保存