浏览代码

Comment improvements & refactoring to Academy.cs

Added several class and method-level comments that are compatibale with Doxygen for auto-generation of documentation. In addition to some stylistic and minor code changes (summarized below).

Stylistic changes:
- Modified comments to /// style instead of /** */
- Removed unnecessary imports
- Removed unnecessary “private” declarations
- Limited code to 80 characters per line
- Re-organized variables to group those that are visible in Inspector (they are now at the top)

Code changes:
- Renamed ScreenConfiguration to EnvironmentConfiguration (variable only used within Academy.cs, thus no other files needed modification)
- Renamed ConfigureEngine to ConfigureEnvironment and created a ConfigureEnvironmentHelper method
- Renamed _isCurrentlyInference to modeSwitched to signify when the engine config needs to be changed
- Added isCommunicatorOn flag to be explicit about the existence of a communicator
- Made isInference private which requ...
/develop-generalizationTraining-TrainerController
Marwan Mattar 7 年前
当前提交
fa638000
共有 7 个文件被更改,包括 523 次插入335 次删除
  1. 217
      unity-environment/Assets/ML-Agents/Editor/MLAgentsEditModeTest.cs
  2. 78
      unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicAgent.cs
  3. 2
      unity-environment/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAgent.cs
  4. 514
      unity-environment/Assets/ML-Agents/Scripts/Academy.cs
  5. 9
      unity-environment/Assets/ML-Agents/Scripts/Agent.cs
  6. 15
      unity-environment/Assets/ML-Agents/Scripts/Brain.cs
  7. 23
      unity-environment/Assets/ML-Agents/Scripts/ExternalCommunicator.cs

217
unity-environment/Assets/ML-Agents/Editor/MLAgentsEditModeTest.cs


using UnityEngine;
using UnityEditor;
using UnityEngine.TestTools;
using System.Collections;
using System.Reflection;
namespace MLAgentsTests

collectObservationsCalls += 1;
}
public override void AgentAction(float[] vetorAction, string textAction)
public override void AgentAction(float[] vectorAction, string textAction)
{
agentActionCalls += 1;
AddReward(0.1f);

TestAcademy aca = acaGO.GetComponent<TestAcademy>();
Assert.AreNotEqual(null, aca);
Assert.AreEqual(0, aca.initializeAcademyCalls);
Assert.AreEqual(0, aca.episodeCount);
Assert.AreEqual(0, aca.stepsSinceReset);
Assert.AreEqual(0, aca.GetEpisodeCount());
Assert.AreEqual(0, aca.GetStepCount());
}
[Test]

acaGO.AddComponent<TestAcademy>();
TestAcademy aca = acaGO.GetComponent<TestAcademy>();
Assert.AreEqual(0, aca.initializeAcademyCalls);
Assert.AreEqual(0, aca.stepsSinceReset);
Assert.AreEqual(0, aca.episodeCount);
Assert.AreEqual(0, aca.GetStepCount());
Assert.AreEqual(0, aca.GetEpisodeCount());
MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod("_InitializeAcademy",
MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod("InitializeEnvironment",
Assert.AreEqual(0, aca.episodeCount);
Assert.AreEqual(0, aca.stepsSinceReset);
Assert.AreEqual(0, aca.GetEpisodeCount());
Assert.AreEqual(0, aca.GetStepCount());
Assert.AreEqual(false, aca.IsDone());
Assert.AreEqual(0, aca.academyResetCalls);
Assert.AreEqual(0, aca.AcademyStepCalls);

MethodInfo AgentEnableMethod = typeof(Agent).GetMethod("_InitializeAgent",
BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod("_InitializeAcademy",
MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod("InitializeEnvironment",
BindingFlags.Instance | BindingFlags.NonPublic);

GameObject acaGO = new GameObject("TestAcademy");
acaGO.AddComponent<TestAcademy>();
TestAcademy aca = acaGO.GetComponent<TestAcademy>();
MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod("_InitializeAcademy",
MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod("InitializeEnvironment",
MethodInfo AcademyStepMethod = typeof(Academy).GetMethod("_AcademyStep",
MethodInfo AcademyStepMethod = typeof(Academy).GetMethod("EnvironmentStep",
for (int i = 0; i < 10; i++){
for (int i = 0; i < 10; i++)
{
Assert.AreEqual(numberReset, aca.episodeCount);
Assert.AreEqual(i, aca.stepsSinceReset);
Assert.AreEqual(numberReset, aca.GetEpisodeCount());
Assert.AreEqual(i, aca.GetStepCount());
Assert.AreEqual(false, aca.IsDone());
Assert.AreEqual(numberReset, aca.academyResetCalls);
Assert.AreEqual(i, aca.AcademyStepCalls);

{
{
numberReset += 1;
}
AcademyStepMethod.Invoke((object)aca, new object[] { });

TestBrain brain = brainGO.GetComponent<TestBrain>();
MethodInfo AgentEnableMethod = typeof(Agent).GetMethod("_InitializeAgent",
BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod("_InitializeAcademy",
BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AgentEnableMethod = typeof(Agent).GetMethod(
"_InitializeAgent", BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod(
"InitializeEnvironment", BindingFlags.Instance | BindingFlags.NonPublic);
agent1.agentParameters = new AgentParameters();
agent2.agentParameters = new AgentParameters();

AgentEnableMethod.Invoke(agent1, new object[] { aca });
AcademyInitializeMethod.Invoke(aca, new object[] { });
MethodInfo AcademyStepMethod = typeof(Academy).GetMethod("_AcademyStep",
BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyStepMethod = typeof(Academy).GetMethod(
"EnvironmentStep", BindingFlags.Instance | BindingFlags.NonPublic);
int numberAgent1Reset = 0;
int numberAgent2Initialization = 0;
int requestDecision =0;
int requestAction=0;
int numberAgent1Reset = 0;
int numberAgent2Initialization = 0;
int requestDecision = 0;
int requestAction = 0;
Assert.AreEqual(0, agent2.agentResetCalls);
Assert.AreEqual(0, agent2.agentResetCalls);
Assert.AreEqual((i+1)/2, agent1.collectObservationsCalls);
Assert.AreEqual((i + 1) / 2, agent1.collectObservationsCalls);
if (i == 0)
if (i == 0)
if (i == 2)
if (i == 2)
{
AgentEnableMethod.Invoke(agent2, new object[] { aca });
numberAgent2Initialization += 1;

if ((i % 3 == 0) && (i > 2))
{
//Every 3 steps after agent 2 is initialized, request decision
requestDecision +=1;
requestAction+=1;
requestDecision += 1;
requestAction += 1;
agent2.RequestDecision();
}
else if ((i % 5 == 0) && (i > 2))

GameObject acaGO = new GameObject("TestAcademy");
acaGO.AddComponent<TestAcademy>();
TestAcademy aca = acaGO.GetComponent<TestAcademy>();
MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod("_InitializeAcademy",
BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod(
"InitializeEnvironment", BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyStepMethod = typeof(Academy).GetMethod("_AcademyStep",
BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyStepMethod = typeof(Academy).GetMethod(
"EnvironmentStep", BindingFlags.Instance | BindingFlags.NonPublic);
int numberReset = 0;
int stepsSinceReset = 0;

Assert.AreEqual(stepsSinceReset, aca.stepsSinceReset);
Assert.AreEqual(stepsSinceReset, aca.GetStepCount());
Assert.AreEqual(numberReset, aca.episodeCount);
Assert.AreEqual(numberReset, aca.GetEpisodeCount());
Assert.AreEqual(false, aca.IsDone());
Assert.AreEqual(numberReset, aca.academyResetCalls);

stepsSinceReset += 1;
// Regularly set the academy to done to check behavior
if (i % 5 == 3)
if (i % 5 == 3)
{
aca.Done();
numberReset += 1;

TestBrain brain = brainGO.GetComponent<TestBrain>();
MethodInfo AgentEnableMethod = typeof(Agent).GetMethod("_InitializeAgent",
BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod("_InitializeAcademy",
BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AgentEnableMethod = typeof(Agent).GetMethod(
"_InitializeAgent", BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod(
"InitializeEnvironment", BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyStepMethod = typeof(Academy).GetMethod("_AcademyStep",
BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyStepMethod = typeof(Academy).GetMethod(
"EnvironmentStep", BindingFlags.Instance | BindingFlags.NonPublic);
agent1.agentParameters = new AgentParameters();
agent2.agentParameters = new AgentParameters();

AgentEnableMethod.Invoke(agent2, new object[] { aca });
AcademyInitializeMethod.Invoke(aca, new object[] { });
int numberAgent1Reset = 0;
int numberAgent2Reset = 0;
int numberAgent1Reset = 0;
int numberAgent2Reset = 0;
int agent1StepSinceReset =0;
int agent2StepSinceReset=0;
int agent1StepSinceReset = 0;
int agent2StepSinceReset = 0;
Assert.AreEqual(acaStepsSinceReset, aca.stepsSinceReset);
Assert.AreEqual(acaStepsSinceReset, aca.GetStepCount());
Assert.AreEqual(numberAcaReset, aca.episodeCount);
Assert.AreEqual(numberAcaReset, aca.GetEpisodeCount());
Assert.AreEqual(false, aca.IsDone());
Assert.AreEqual(numberAcaReset, aca.academyResetCalls);

Assert.AreEqual(numberAgent2Reset, agent2.agentResetCalls);
// Agent 2 and academy reset at the first step
if (i == 0)
if (i == 0)
if (i == 2)
if (i == 2)
if (i % 100 == 3)
if (i % 100 == 3)
{
aca.Done();
numberAcaReset += 1;

if (i % 11 == 5)
if (i % 11 == 5)
if (i % 13 == 3)
if (i % 13 == 3)
if (!(agent2.IsDone()||aca.IsDone()))
if (!(agent2.IsDone() || aca.IsDone()))
{
// If the agent was already reset before the request decision
// We should not reset again

}
}
// Request a decision for agent 2 regularly
if (i % 3 == 2)
if (i % 3 == 2)
else if (i % 5 == 1)
else if (i % 5 == 1)
if (agent1.IsDone() && (((acaStepsSinceReset) % agent1.agentParameters.numberOfActionsBetweenDecisions==0)) || aca.IsDone())
if (agent1.IsDone() && (((acaStepsSinceReset) % agent1.agentParameters.numberOfActionsBetweenDecisions == 0)) || aca.IsDone())
{
numberAgent1Reset += 1;
agent1StepSinceReset = 0;

agent1StepSinceReset += 1;
agent2StepSinceReset += 1;
//Agent 1 is only initialized at step 2
if (i < 2)
if (i < 2)
{
agent1StepSinceReset = 0;
}

GameObject acaGO = new GameObject("TestAcademy");
acaGO.AddComponent<TestAcademy>();
TestAcademy aca = acaGO.GetComponent<TestAcademy>();
MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod("_InitializeAcademy",
BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod(
"InitializeEnvironment", BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyStepMethod = typeof(Academy).GetMethod("_AcademyStep",
BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyStepMethod = typeof(Academy).GetMethod(
"EnvironmentStep", BindingFlags.Instance | BindingFlags.NonPublic);
FieldInfo maxStep = typeof(Academy).GetField("maxSteps", BindingFlags.Instance | BindingFlags.NonPublic);
FieldInfo maxStep = typeof(Academy).GetField(
"maxSteps", BindingFlags.Instance | BindingFlags.NonPublic);
maxStep.SetValue((object)aca, 20);
int numberReset = 0;

Assert.AreEqual(stepsSinceReset, aca.stepsSinceReset);
Assert.AreEqual(stepsSinceReset, aca.GetStepCount());
Assert.AreEqual(numberReset, aca.episodeCount);
Assert.AreEqual(numberReset, aca.GetEpisodeCount());
if (i % 20 == 0)
if (i % 20 == 0)
{
numberReset += 1;
stepsSinceReset = 1;

TestBrain brain = brainGO.GetComponent<TestBrain>();
MethodInfo AgentEnableMethod = typeof(Agent).GetMethod("_InitializeAgent",
BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod("_InitializeAcademy",
BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AgentEnableMethod = typeof(Agent).GetMethod(
"_InitializeAgent", BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod(
"InitializeEnvironment", BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyStepMethod = typeof(Academy).GetMethod("_AcademyStep",
BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyStepMethod = typeof(Academy).GetMethod(
"EnvironmentStep", BindingFlags.Instance | BindingFlags.NonPublic);
FieldInfo maxStep = typeof(Academy).GetField("maxSteps", BindingFlags.Instance | BindingFlags.NonPublic);
FieldInfo maxStep = typeof(Academy).GetField(
"maxSteps", BindingFlags.Instance | BindingFlags.NonPublic);
maxStep.SetValue((object)aca, 100);
agent1.agentParameters = new AgentParameters();

AcademyInitializeMethod.Invoke(aca, new object[] { });
int numberAgent1Reset = 0;
int numberAgent1Reset = 0;
int numberAgent2Reset = 0;
int numberAcaReset = 0;
int acaStepsSinceReset = 0;

for (int i = 0; i < 500; i++)
{
Assert.AreEqual(acaStepsSinceReset, aca.stepsSinceReset);
Assert.AreEqual(acaStepsSinceReset, aca.GetStepCount());
Assert.AreEqual(1, aca.initializeAcademyCalls);
Assert.AreEqual(i, aca.AcademyStepCalls);

Assert.AreEqual(numberAcaReset, aca.episodeCount);
Assert.AreEqual(numberAcaReset, aca.GetEpisodeCount());
if (i == 0)
if (i == 0)
if (i == 2)
if (i == 2)
agent2.RequestDecision();
agent2.RequestDecision();
if (i % 100 == 0)
if (i % 100 == 0)
{
acaStepsSinceReset = 0;
agent1StepSinceReset = 0;

agent2StepSinceReset += 1;
//Agent 1 is only initialized at step 2
if (i < 2)
if (i < 2)
{
agent1StepSinceReset = 0;
}

TestBrain brain = brainGO.GetComponent<TestBrain>();
MethodInfo AgentEnableMethod = typeof(Agent).GetMethod("_InitializeAgent",
BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod("_InitializeAcademy",
BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AgentEnableMethod = typeof(Agent).GetMethod(
"_InitializeAgent", BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod(
"InitializeEnvironment", BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyStepMethod = typeof(Academy).GetMethod("_AcademyStep",
BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyStepMethod = typeof(Academy).GetMethod(
"EnvironmentStep", BindingFlags.Instance | BindingFlags.NonPublic);
agent1.agentParameters = new AgentParameters();
agent2.agentParameters = new AgentParameters();
brain.brainParameters = new BrainParameters();

agent1.agentParameters.maxStep = 20;
//Here we specify that the agent does not reset when done
agent1.agentParameters.resetOnDone = false;
agent2.agentParameters.resetOnDone = false;
agent2.agentParameters.resetOnDone = false;
brain.brainParameters.vectorObservationSize = 0;
brain.brainParameters.cameraResolutions = new resolution[0];
agent1.GiveBrain(brain);

Assert.AreEqual(agent2ResetOnDone, agent2.agentOnDoneCalls);
// we request a decision at each step
agent2.RequestDecision();
agent2.RequestDecision();
if (agent1ResetOnDone ==0)
if (agent1ResetOnDone == 0)
if ((i > 2) && (i % 21 == 0)){
if ((i > 2) && (i % 21 == 0))
{
agent1ResetOnDone = 1;
}

TestBrain brain = brainGO.GetComponent<TestBrain>();
MethodInfo AgentEnableMethod = typeof(Agent).GetMethod("_InitializeAgent",
BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod("_InitializeAcademy",
BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AgentEnableMethod = typeof(Agent).GetMethod(
"_InitializeAgent", BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyInitializeMethod = typeof(Academy).GetMethod(
"InitializeEnvironment", BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyStepMethod = typeof(Academy).GetMethod("_AcademyStep",
BindingFlags.Instance | BindingFlags.NonPublic);
MethodInfo AcademyStepMethod = typeof(Academy).GetMethod(
"EnvironmentStep", BindingFlags.Instance | BindingFlags.NonPublic);
agent1.agentParameters = new AgentParameters();
agent2.agentParameters = new AgentParameters();

{
agent2.RequestAction();
Assert.LessOrEqual(Mathf.Abs(j * 0.1f + j * 10f - agent1.GetCumulativeReward()), 0.05f);
Assert.LessOrEqual(Mathf.Abs(i * 0.1f- agent2.GetCumulativeReward()), 0.05f);
Assert.LessOrEqual(Mathf.Abs(i * 0.1f - agent2.GetCumulativeReward()), 0.05f);
if ((i % 21 == 0) && (i>0))
if ((i % 21 == 0) && (i > 0))
{
j = 0;
}

78
unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicAgent.cs


using System.Collections;
using System.Collections;
using System.Collections.Generic;
using UnityEngine;

private BasicAcademy academy;
public float timeBetweenDecisionsAtInference;
private float timeSinceDecision;
public int position;
public int smallGoalPosition;
public int largeGoalPosition;
public GameObject largeGoal;
public GameObject smallGoal;
public int minPosition;
public int maxPosition;
public int position;
public int smallGoalPosition;
public int largeGoalPosition;
public GameObject largeGoal;
public GameObject smallGoal;
public int minPosition;
public int maxPosition;
public override void InitializeAgent()
{

public override void CollectObservations()
{
AddVectorObs(position);
}
public override void CollectObservations()
{
AddVectorObs(position);
}
public override void AgentAction(float[] vectorAction, string textAction)
{

if (movement == 1) { direction = 1; }
position += direction;
if (position < minPosition) { position = minPosition; }
if (position > maxPosition) { position = maxPosition; }
position += direction;
if (position < minPosition) { position = minPosition; }
if (position > maxPosition) { position = maxPosition; }
gameObject.transform.position = new Vector3(position, 0f, 0f);
gameObject.transform.position = new Vector3(position, 0f, 0f);
AddReward( - 0.01f);
AddReward(-0.01f);
if (position == smallGoalPosition)
{
if (position == smallGoalPosition)
{
AddReward( 0.1f);
}
AddReward(0.1f);
}
if (position == largeGoalPosition)
{
if (position == largeGoalPosition)
{
}
}
}
}
public override void AgentReset()
{
position = 0;
minPosition = -10;
maxPosition = 10;
smallGoalPosition = -3;
largeGoalPosition = 7;
smallGoal.transform.position = new Vector3(smallGoalPosition, 0f, 0f);
largeGoal.transform.position = new Vector3(largeGoalPosition, 0f, 0f);
}
public override void AgentReset()
{
position = 0;
minPosition = -10;
maxPosition = 10;
smallGoalPosition = -3;
largeGoalPosition = 7;
smallGoal.transform.position = new Vector3(smallGoalPosition, 0f, 0f);
largeGoal.transform.position = new Vector3(largeGoalPosition, 0f, 0f);
}
public override void AgentOnDone()
{
public override void AgentOnDone()
{
}
}
public void FixedUpdate()
{

private void WaitTimeInference()
{
if (!academy.isInference)
if (!academy.GetIsInference())
{
RequestDecision();
}

2
unity-environment/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAgent.cs


private void WaitTimeInference()
{
if (!academy.isInference)
if (!academy.GetIsInference())
{
RequestDecision();
}

514
unity-environment/Assets/ML-Agents/Scripts/Academy.cs


using System.Collections;
using System.Collections.Generic;
using System.Collections.Generic;
* Welcome to Unity Machine Learning Agents documentation.
* Welcome to Unity Machine Learning Agents (ML-Agents).
*
* ML-Agents contains five entities: Academy, Brain, Agent, Communicator and
* Python API. The academy, and all its brains and connected agents live within
* a learning environment (herin called Environment), while the communicator
* manages the communication between the learning environment and the Python
* API. For more information on each of these entities, in addition to how to
* set-up a learning environment and train the behavior of characters in a
* Unity scene, please browse our documentation pages on GitHub:
* https://github.com/Unity-Technologies/ml-agents/blob/master/docs/
/// <summary>
/// Wraps the environment-level parameters that are provided within the
/// Editor. These parameters can be provided for training and inference
/// modes separately and represent screen resolution, rendering quality and
/// frame rate.
/// </summary>
public class ScreenConfiguration
public class EnvironmentConfiguration
[Tooltip("Height of the environment window in pixels")]
[Tooltip("Height of the environment window in pixels.")]
[Tooltip("Rendering quality of environment. (Higher is better quality)")]
[Tooltip("Rendering quality of environment. (Higher is better quality.)")]
[Tooltip("Speed at which environment is run. (Higher is faster)")]
[Tooltip("Speed at which environment is run. (Higher is faster.)")]
[Tooltip("FPS engine attempts to maintain.")]
[Tooltip("Frames per second (FPS) engine attempts to maintain.")]
public ScreenConfiguration(int w, int h, int q, float ts, int tf)
/// Initializes a new instance of the
/// <see cref="EnvironmentConfiguration"/> class.
/// <param name="width">Width of environment window (pixels).</param>
/// <param name="height">Height of environment window (pixels).</param>
/// <param name="qualityLevel">
/// Rendering quality of environment. Ranges from 0 to 5, with higher.
/// </param>
/// <param name="timeScale">
/// Speed at which environment is run. Ranges from 1 to 100, with higher
/// values representing faster speed.
/// </param>
/// <param name="targetFrameRate">
/// Target frame rate (per second) that the engine tries to maintain.
/// </param>
public EnvironmentConfiguration(
int width, int height, int qualityLevel,
float timeScale, int targetFrameRate)
width = w;
height = h;
qualityLevel = q;
timeScale = ts;
targetFrameRate = tf;
this.width = width;
this.height = height;
this.qualityLevel = qualityLevel;
this.timeScale = timeScale;
this.targetFrameRate = targetFrameRate;
[HelpURL("https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Agents-Editor-Interface.md#academy")]
/** Create a child class to implement InitializeAcademy(), AcademyStep()
* and AcademyReset(). The child class script must be attached to an empty game
* object in your scene, and there can only be one such object within the scene.
*/
/// <summary>
/// An Academy is where Agent objects go to train their behaviors. More
/// specifically, an academy is a collection of Brain objects and each agent
/// in a scene is attached to one brain (a single brain may be attached to
/// multiple agents). Currently, this class is expected to be extended to
/// implement the desired academy behavior.
/// </summary>
/// <remarks>
/// When an academy is run, it can either be in inference or training mode.
/// The mode is determined by the presence or absence of a Communicator. In
/// the presence of a communicator, the academy is run in training mode where
/// the states and observations of each agent are sent through the
/// communicator. In the absence of a communciator, the academy is run in
/// inference mode where the agent behavior is determined by the brain
/// attached to it (which may be internal, heuristic or player).
/// </remarks>
[HelpURL("https://github.com/Unity-Technologies/ml-agents/blob/master/" +
"docs/Learning-Environment-Design-Academy.md")]
// Fields provided in the Inspector
[Tooltip("Total number of steps per episode. \n" +
"0 corresponds to episodes without a maximum number of steps. \n" +
"Once the step counter reaches maximum, " +
"the environment will reset.")]
private int maxSteps;
[Tooltip("Total number of steps per global episode.\nNon-positive " +
"values correspond to episodes without a maximum number of \n" +
"steps. Once the step counter reaches this maximum value, the " +
"environment will reset.")]
int maxSteps;
[HideInInspector]
public bool isInference = true;
/**< \brief Do not modify : If true, the Academy will use inference
* settings. */
private bool _isCurrentlyInference;
[SerializeField]
[Tooltip("The engine-level settings which correspond to rendering quality" +
" and engine speed during Training.")]
private ScreenConfiguration trainingConfiguration =
new ScreenConfiguration(80, 80, 1, 100.0f, -1);
[Tooltip("The engine-level settings which correspond to rendering " +
"quality and engine speed during Training.")]
EnvironmentConfiguration trainingConfiguration =
new EnvironmentConfiguration(80, 80, 1, 100.0f, -1);
[Tooltip("The engine-level settings which correspond to rendering quality" +
" and engine speed during Inference.")]
private ScreenConfiguration inferenceConfiguration =
new ScreenConfiguration(1280, 720, 5, 1.0f, 60);
[Tooltip("The engine-level settings which correspond to rendering " +
"quality and engine speed during Inference.")]
EnvironmentConfiguration inferenceConfiguration =
new EnvironmentConfiguration(1280, 720, 5, 1.0f, 60);
/**< \brief Contains a mapping from parameter names to float values. */
/**< You can specify the Default Reset Parameters in the Inspector of the
* Academy. You can modify these parameters when training with an External
* brain by passing a config dictionary at reset. Reference resetParameters
* in your AcademyReset() or AcademyStep() to modify elements in your
* environment at reset time. */
/// <summary>
/// Contains a mapping from parameter names to float values. They are
/// used in <see cref="AcademyReset"/> and <see cref="AcademyStep"/>
/// to modify elements in the environment at reset time.
/// <summary/>
/// <remarks>
/// Default reset parameters are specified in the academy Editor, and can
/// be modified when training with an external Brain by passinga config
/// dictionary at reset.
/// </remarks>
"environment on reset.")]
"environment when it resets.")]
// Fields not provided in the Inspector.
/// Boolean flag indicating whether a communicator is accessible by the
/// environment. This also specifies whether the environment is in
/// Training or Inference mode.
bool isCommunicatorOn;
/// If true, the Academy will use inference settings. This field is
/// initialized in <see cref="Awake"/> depending on the presence
/// or absence of a communicator. Furthermore, it can be modified by an
/// external Brain during reset via <see cref="SetIsInference"/>.
bool isInference = true;
/// The done flag of the academy. When set to true, the academy will
/// call <see cref="AcademyReset"/> instead of <see cref="AcademyStep"/>
/// at step time. If true, all agents done flags will be set to true.
bool done;
/// Whether the academy has reached the maximum number of steps for the
/// current episode.
bool maxStepReached;
/// The number of episodes completed by the environment. Incremented
/// each time the environment is reset.
int episodeCount;
/// The number of steps completed within the current episide. Incremented
/// each time a step is taken in the environment. Is reset to 0 during
/// <see cref="AcademyReset"/>.
int stepCount;
/// Flag that indicates whether the inference/training mode of the
/// environment was switched by the external Brain. This impacts the
/// engine settings at the next environment step.
bool modeSwitched;
/// Pointer to the communicator currently in use by the Academy.
Communicator communicator;
//
bool firstAcademyReset;
// The Academy uses a series of events to communicate with agents and
// brains to facilitate synchronization. More specifically, it ensure
// that all the agents performs their steps in a consistent order (i.e. no
// agent can act based on a decision before another agent has had a chance
// to request a decision).
// Signals to all the Brains at each environment step so they can decide
// actions for their agents.
// Signals to all the agents at each environment step along with the
// Academy's maxStepReached, done and stepCount values. The agents rely
// on this event to update their own values of max step reached and done
// in addition to aligning on the step count of the global episode.
// Signals to all the agents at each environment step so they can reset
// if their flag has been set to done (assuming the agent has requested a
// decision).
// Signals to all the agents at each environment step so they can send
// their state to their Brain if they have requested a decision.
// Signals to all the agents at each environment step so they can act if
// they have requested a decision.
public event System.Action AgentForceReset;
/**< \brief The done flag of the Academy. */
/**< When set to true, the Academy will call AcademyReset() instead of
* AcademyStep() at step time.
* If true, all agents done flags will be set to true.*/
private bool done;
// Sigals to all the agents each time the Academy force resets.
public event System.Action AgentForceReset;
/// The max step reached.
/// Monobehavior function called at the very beginning of environment
/// creation. Academy uses this time to initialize internal data
/// structures, initialize the environment and check for the existence
/// of a communicator.
private bool maxStepReached;
/**< \brief Increments each time the environment is reset. */
[HideInInspector]
public int episodeCount;
[HideInInspector]
public int stepsSinceReset;
/**< \brief Do not modify : pointer to the communicator currently in
* use by the Academy. */
public Communicator communicator;
private bool firstAcademyReset;
_InitializeAcademy();
InitializeEnvironment();
void _InitializeAcademy()
{
List<Brain> brains = GetBrains(gameObject);
InitializeAcademy();
/// <summary>
/// Initializes the environment, configures it and initialized the Academy.
/// </summary>
void InitializeEnvironment()
{
// Initialize communicator (if possible)
if (!communicator.CommunicatorHandShake())
if (communicator.CommunicatorHandShake())
{
isCommunicatorOn = true;
communicator.InitializeCommunicator();
communicator.UpdateCommand();
}
else
// Initialize Academy and Brains.
InitializeAcademy();
List<Brain> brains = GetBrains(gameObject);
if (communicator != null)
{
communicator.InitializeCommunicator();
communicator.UpdateCommand();
}
isInference = (communicator == null);
_isCurrentlyInference = !isInference;
// If a communicator is enabled/provided, then we assume we are in
// training mode. In the absence of a communicator, we assume we are
// in inference mode.
isInference = !isCommunicatorOn;
BrainDecideAction += () => { };
AgentSetStatus += (m, d, i) => { };

AgentForceReset += () => { };
// Configure the environment using the configurations provided by
// the developer in the Editor.
ConfigureEnvironment();
}
/// <summary>
/// Configures the environment settings depending on the training/inference
/// mode and the corresponding parameters passed in the Editor.
/// </summary>
void ConfigureEnvironment()
{
if (isInference)
{
ConfigureEnvironmentHelper(inferenceConfiguration);
Monitor.SetActive(true);
}
else
{
ConfigureEnvironmentHelper(trainingConfiguration);
Monitor.SetActive(false);
}
/// <summary>
/// Helper method for initializing the environment based on the provided
/// configuration.
/// </summary>
/// <param name="config">
/// Environment configuration (specified in the Editor).
/// </param>
static void ConfigureEnvironmentHelper(EnvironmentConfiguration config)
{
Screen.SetResolution(config.width, config.height, false);
QualitySettings.SetQualityLevel(config.qualityLevel, true);
Time.timeScale = config.timeScale;
Time.captureFramerate = 60;
Application.targetFrameRate = config.targetFrameRate;
}
/// Environment specific initialization.
/**
* Implemented in environment-specific child class.
* This method is called once when the environment is loaded.
*/
/// <summary>
/// Initializes the academy and environment. Called during the waking-up
/// phase of the environment before any of the scene objects/agents have
/// been initialized.
/// </summary>
/// <summary>
/// Specifies the academy behavior at every step of the environment.
/// </summary>
public virtual void AcademyStep()
{
private void ConfigureEngine()
}
/// <summary>
/// Specifies the academy behavior when being reset (i.e. at the completion
/// of a global episode).
/// </summary>
public virtual void AcademyReset()
if ((!isInference))
{
Screen.SetResolution(
trainingConfiguration.width,
trainingConfiguration.height,
false);
QualitySettings.SetQualityLevel(
trainingConfiguration.qualityLevel, true);
Time.timeScale = trainingConfiguration.timeScale;
Application.targetFrameRate =
trainingConfiguration.targetFrameRate;
QualitySettings.vSyncCount = 0;
Time.captureFramerate = 60;
Monitor.SetActive(false);
}
else
}
/// <summary>
/// Returns the <see cref="isInference"/> flag.
/// </summary>
/// <returns>
/// <c>true</c>, if current mode is inference, <c>false</c> if training.
/// </returns>
public bool GetIsInference()
{
return isInference;
}
/// <summary>
/// Sets the <see cref="isInference"/> flag to the provided value. If
/// the new flag differs from the current flag value, this signals that
/// the environment configuration needs to be updated.
/// </summary>
/// <param name="isInference">
/// Environment mode, if true then inference, otherwise training.
/// </param>
public void SetIsInference(bool isInference)
{
if (this.isInference != isInference)
Screen.SetResolution(
inferenceConfiguration.width,
inferenceConfiguration.height,
false);
QualitySettings.SetQualityLevel(
inferenceConfiguration.qualityLevel, true);
Time.timeScale = inferenceConfiguration.timeScale;
Application.targetFrameRate =
inferenceConfiguration.targetFrameRate;
Time.captureFramerate = 60;
Monitor.SetActive(true);
this.isInference = isInference;
// This signals to the academy that at the next environment step
// the engine configurations need updating to the respective mode
// (i.e. training vs inference) configuraiton.
modeSwitched = true;
/// Environment specific step logic.
/**
* Implemented in environment-specific child class.
* This method is called at every step.
*/
public virtual void AcademyStep()
/// <summary>
/// Returns the current episode counter.
/// </summary>
/// <returns>
/// Current episode number.
/// </returns>
public int GetEpisodeCount()
return episodeCount;
/// Environment specific reset logic.
/**
* Implemented in environment-specific child class.
* This method is called everytime the Academy resets (when the global done
* flag is set to true).
*/
public virtual void AcademyReset()
/// <summary>
/// Returns the current step counter (within the current epside).
/// </summary>
/// <returns>
/// Current episode number.
/// </returns>
public int GetStepCount()
return stepCount;
/// <summary>
/// Sets the done flag to true.
/// </summary>
/// <summary>
/// Returns whether or not the academy is done.
/// </summary>
/// <returns>
/// <c>true</c>, if academy is done, <c>false</c> otherwise.
/// </returns>
public bool IsDone()
{
return done;

/// Forceds the full reset. The done flags are not affected. Is either
/// Returns whether or not the communicator is on.
/// </summary>
/// <returns>
/// <c>true</c>, if communicator is on, <c>false</c> otherwise.
/// </returns>
public bool IsCommunicatorOn()
{
return isCommunicatorOn;
}
/// <summary>
/// Returns the Communicator currently used by the Academy.
/// </summary>
/// <returns>The commincator currently in use (may be null).</returns>
public Communicator GetCommunicator()
{
return communicator;
}
/// <summary>
/// Forces the full reset. The done flags are not affected. Is either
private void ForcedFullReset()
void ForcedFullReset()
_AcademyReset();
EnvironmentReset();
internal void _AcademyStep()
/// <summary>
/// Performs a single environment update to the Academy, Brain and Agent
/// objects within the environment.
/// </summary>
void EnvironmentStep()
if (isInference != _isCurrentlyInference)
if (modeSwitched)
ConfigureEngine();
_isCurrentlyInference = isInference;
ConfigureEnvironment();
modeSwitched = false;
if (communicator != null)
if (isCommunicatorOn)
// Update reset parameters.
Dictionary<string, float> NewResetParameters =
communicator.GetResetParameters();
foreach (KeyValuePair<string, float> kv in NewResetParameters)

ForcedFullReset();
communicator.SetCommand(ExternalCommand.STEP);
}

ForcedFullReset();
}
if ((stepsSinceReset >= maxSteps) && maxSteps > 0)
if ((stepCount >= maxSteps) && maxSteps > 0)
AgentSetStatus(maxStepReached, done, stepsSinceReset);
AgentSetStatus(maxStepReached, done, stepCount);
_AcademyReset();
{
EnvironmentReset();
}
AgentResetIfDone();

AgentAct();
stepsSinceReset += 1;
stepCount += 1;
internal void _AcademyReset()
/// <summary>
/// Resets the environment, including the Academy.
/// </summary>
void EnvironmentReset()
stepsSinceReset = 0;
stepCount = 0;
/// <summary>
/// Monobehavior function that dictates each environment step.
/// </summary>
_AcademyStep();
EnvironmentStep();
private static List<Brain> GetBrains(GameObject gameObject)
/// <summary>
/// Helper method that retrieves the Brain objects that are currently
/// specified as children of the Academy within the Editor.
/// </summary>
/// <param name="academy">Academy.</param>
/// <returns>
/// List of brains currently attached to academy.
/// </returns>
static List<Brain> GetBrains(GameObject academy)
var transform = gameObject.transform;
var transform = academy.transform;
for (var i = 0; i < transform.childCount; i++)
{

if (brain != null && child.gameObject.activeSelf)
{
}
}
}

9
unity-environment/Assets/ML-Agents/Scripts/Agent.cs


}
internal void AddVectorObs(float[] observation)
{
_info.vectorObservation.AddRange(observation);
_info.vectorObservation.AddRange(observation);
_info.vectorObservation.AddRange(observation);
_info.vectorObservation.AddRange(observation);
}

/// Note: If your state is discrete, you need to convert your
/// state into a list of float with length 1.
/// </summary>
/// <param name="action">The action the agent receives
/// from the brain.</param>
public virtual void AgentAction(float[] vectorAction, string textAction)
{

RenderTexture.active = prevActiveRT;
RenderTexture.ReleaseTemporary(tempRT);
return tex;
}

15
unity-environment/Assets/ML-Agents/Scripts/Brain.cs


CoreBrains = new ScriptableObject[numCoreBrains];
foreach (BrainType bt in System.Enum.GetValues(typeof(BrainType)))
{
CoreBrains[(int)bt] =
CoreBrains[(int)bt] =
ScriptableObject.CreateInstance(
"CoreBrain" + bt.ToString());
}

break;
if (CoreBrains[(int)bt] == null)
{
CoreBrains[(int)bt] =
CoreBrains[(int)bt] =
ScriptableObject.CreateInstance(
"CoreBrain" + bt.ToString());
}

if (CoreBrains.Length < System.Enum.GetValues(typeof(BrainType)).Length)
{
int numCoreBrains = System.Enum.GetValues(typeof(BrainType)).Length;
ScriptableObject[] new_CoreBrains =
ScriptableObject[] new_CoreBrains =
new ScriptableObject[numCoreBrains];
foreach (BrainType bt in System.Enum.GetValues(typeof(BrainType)))
{

}
else
{
new_CoreBrains[(int)bt] =
new_CoreBrains[(int)bt] =
ScriptableObject.CreateInstance(
"CoreBrain" + bt.ToString());
}

{
if (CoreBrains[(int)bt] == null)
{
CoreBrains[(int)bt] =
CoreBrains[(int)bt] =
CoreBrains[(int)bt] =
CoreBrains[(int)bt] =
ScriptableObject.Instantiate(CoreBrains[(int)bt]);
}
}

public void SendState(Agent agent, AgentInfo info)
{
agentInfos.Add(agent, info);
}
void DecideAction()

}

23
unity-environment/Assets/ML-Agents/Scripts/ExternalCommunicator.cs


sMessage.agents = new List<int>(defaultNumAgents);
sMessage.vectorObservations = new List<float>(defaultNumAgents * defaultNumObservations);
sMessage.rewards = new List<float>(defaultNumAgents);
sMessage.memories= new List<float>(defaultNumAgents * defaultNumObservations);
sMessage.memories = new List<float>(defaultNumAgents * defaultNumObservations);
sMessage.maxes= new List<bool>(defaultNumAgents);
sMessage.maxes = new List<bool>(defaultNumAgents);
foreach(string k in accParamerters.brainNames){
foreach (string k in accParamerters.brainNames)
{
current_agents[k] = new List<Agent>(defaultNumAgents);
hasSentState[k] = false;
triedSendState[k] = false;

sender.Send(Encoding.ASCII.GetBytes("CONFIG_REQUEST"));
Receive();
var resetParams = JsonConvert.DeserializeObject<ResetParametersMessage>(rMessageString.ToString());
academy.isInference = !resetParams.train_model;
academy.SetIsInference(!resetParams.train_model);
return resetParams.parameters;
}

inputSeed = args[i + 1];
}
}
comPort = int.Parse(inputPort);
randomSeed = int.Parse(inputSeed);
Random.InitState(randomSeed);

sMessage.vectorObservations.AddRange(agentInfo[agent].stackedVectorObservation);
sMessage.rewards.Add(agentInfo[agent].reward);
sMessage.memories.AddRange(agentInfo[agent].memories);
for (int j = 0; j < memorySize - agentInfo[agent].memories.Count; j++ )
for (int j = 0; j < memorySize - agentInfo[agent].memories.Count; j++)
{
}
sMessage.dones.Add(agentInfo[agent].done);
sMessage.previousVectorActions.AddRange(agentInfo[agent].StoredVectorActions.ToList());
sMessage.previousTextActions.Add(agentInfo[agent].StoredTextActions);

}
public Dictionary<string, bool> GetHasTried(){
public Dictionary<string, bool> GetHasTried()
{
public Dictionary<string, bool> GetSent()
{
public Dictionary<string, bool> GetSent()
{
}
}
/// Listens for actions, memories, and values and sends them
/// to the corrensponding brains.

正在加载...
取消
保存