浏览代码

Demonstration Recorder (#1240)

/develop-generalizationTraining-TrainerController
GitHub 6 年前
当前提交
3c9603d6
共有 57 个文件被更改,包括 4009 次插入412 次删除
  1. 4
      .gitignore
  2. 9
      UnitySDK/Assets/ML-Agents/Scripts/Academy.cs
  3. 77
      UnitySDK/Assets/ML-Agents/Scripts/Agent.cs
  4. 152
      UnitySDK/Assets/ML-Agents/Scripts/Batcher.cs
  5. 47
      UnitySDK/Assets/ML-Agents/Scripts/Brain.cs
  6. 71
      UnitySDK/Assets/ML-Agents/Scripts/CoreBrainInternal.cs
  7. 25
      config/trainer_config.yaml
  8. 59
      docs/Training-Imitation-Learning.md
  9. 1
      ml-agents/mlagents/envs/communicator_objects/__init__.py
  10. 9
      ml-agents/mlagents/envs/communicator_objects/unity_to_external_pb2.py
  11. 18
      ml-agents/mlagents/envs/environment.py
  12. 4
      ml-agents/mlagents/trainers/__init__.py
  13. 3
      ml-agents/mlagents/trainers/bc/__init__.py
  14. 2
      ml-agents/mlagents/trainers/bc/policy.py
  15. 106
      ml-agents/mlagents/trainers/bc/trainer.py
  16. 2
      ml-agents/mlagents/trainers/buffer.py
  17. 16
      ml-agents/mlagents/trainers/ppo/trainer.py
  18. 25
      ml-agents/mlagents/trainers/trainer.py
  19. 12
      ml-agents/mlagents/trainers/trainer_controller.py
  20. 16
      ml-agents/tests/mock_communicator.py
  21. 1
      ml-agents/tests/trainers/test_meta_curriculum.py
  22. 104
      ml-agents/tests/trainers/test_trainer_controller.py
  23. 95
      UnitySDK/Assets/ML-Agents/Editor/DemonstrationDrawer.cs
  24. 11
      UnitySDK/Assets/ML-Agents/Editor/DemonstrationDrawer.cs.meta
  25. 60
      UnitySDK/Assets/ML-Agents/Editor/DemonstrationImporter.cs
  26. 11
      UnitySDK/Assets/ML-Agents/Editor/DemonstrationImporter.cs.meta
  27. 66
      UnitySDK/Assets/ML-Agents/Editor/Tests/DemonstrationTests.cs
  28. 11
      UnitySDK/Assets/ML-Agents/Editor/Tests/DemonstrationTests.cs.meta
  29. 1001
      UnitySDK/Assets/ML-Agents/Plugins/System.IO.Abstractions.TestingHelpers.dll
  30. 30
      UnitySDK/Assets/ML-Agents/Plugins/System.IO.Abstractions.TestingHelpers.dll.meta
  31. 623
      UnitySDK/Assets/ML-Agents/Plugins/System.IO.Abstractions.dll
  32. 30
      UnitySDK/Assets/ML-Agents/Plugins/System.IO.Abstractions.dll.meta
  33. 8
      UnitySDK/Assets/ML-Agents/Resources.meta
  34. 289
      UnitySDK/Assets/ML-Agents/Scripts/CommunicatorObjects/DemonstrationMetaProto.cs
  35. 11
      UnitySDK/Assets/ML-Agents/Scripts/CommunicatorObjects/DemonstrationMetaProto.cs.meta
  36. 76
      UnitySDK/Assets/ML-Agents/Scripts/Demonstration.cs
  37. 11
      UnitySDK/Assets/ML-Agents/Scripts/Demonstration.cs.meta
  38. 65
      UnitySDK/Assets/ML-Agents/Scripts/DemonstrationRecorder.cs
  39. 11
      UnitySDK/Assets/ML-Agents/Scripts/DemonstrationRecorder.cs.meta
  40. 138
      UnitySDK/Assets/ML-Agents/Scripts/DemonstrationStore.cs
  41. 3
      UnitySDK/Assets/ML-Agents/Scripts/DemonstrationStore.cs.meta
  42. 73
      UnitySDK/Assets/ML-Agents/Scripts/Utilities.cs
  43. 3
      UnitySDK/Assets/ML-Agents/Scripts/Utilities.cs.meta
  44. 55
      config/bc_config.yaml
  45. 102
      docs/images/demo_component.png
  46. 198
      docs/images/demo_inspector.png
  47. 98
      ml-agents/mlagents/envs/communicator_objects/demonstration_meta_proto_pb2.py
  48. 19
      ml-agents/mlagents/envs/utilities.py
  49. 53
      ml-agents/mlagents/trainers/bc/offline_trainer.py
  50. 116
      ml-agents/mlagents/trainers/bc/online_trainer.py
  51. 151
      ml-agents/mlagents/trainers/demo_loader.py
  52. 60
      ml-agents/tests/trainers/test.demo
  53. 14
      ml-agents/tests/trainers/test_demo_loader.py
  54. 12
      protobuf-definitions/proto/mlagents/envs/communicator_objects/demonstration_meta_proto.proto
  55. 68
      UnitySDK/Assets/ML-Agents/Resources/DemoIcon.png
  56. 86
      UnitySDK/Assets/ML-Agents/Resources/DemoIcon.png.meta

4
.gitignore


/UnitySDK/Assets/AssetStoreTools*
/UnitySDK/Assets/Plugins*
/UnitySDK/Assets/Gizmos*
/UnitySDK/Assets/Demonstrations*
# Tensorflow Model Info
/models

/UnitySDK/Assets/ML-Agents/Plugins/Android*
/UnitySDK/Assets/ML-Agents/Plugins/iOS*
/UnitySDK/Assets/ML-Agents/Plugins/Computer*
/UnitySDK/Assets/ML-Agents/Plugins/System*
/UnitySDK/Assets/ML-Agents/Plugins/System.Numerics*
/UnitySDK/Assets/ML-Agents/Plugins/System.ValueTuple*
# Generated doc folders
/docs/html

9
UnitySDK/Assets/ML-Agents/Scripts/Academy.cs


}
}
brainBatcher = new MLAgents.Batcher(communicator);
brainBatcher = new Batcher(communicator);
// Initialize Brains and communicator (if present)
foreach (var brain in brains)

isCommunicatorOn = true;
var academyParameters =
new MLAgents.CommunicatorObjects.UnityRLInitializationOutput();
new CommunicatorObjects.UnityRLInitializationOutput();
academyParameters.Name = gameObject.name;
academyParameters.Version = kApiVersion;
foreach (var brain in brains)

MLAgents.Batcher.BrainParametersConvertor(
bp,
bp.ToProto(
(MLAgents.CommunicatorObjects.BrainTypeProto)
(CommunicatorObjects.BrainTypeProto)
brain.brainType));
}

77
UnitySDK/Assets/ML-Agents/Scripts/Agent.cs


using System.Collections.Generic;
using System.Linq;
using Google.Protobuf;
using MLAgents.CommunicatorObjects;
using UnityEngine;

/// to separate between different agents in the environment.
/// </summary>
public int id;
/// <summary>
/// Converts a AgentInfo to a protobuffer generated AgentInfoProto
/// </summary>
/// <returns>The protobuf verison of the AgentInfo.</returns>
/// <param name="info">The AgentInfo to convert.</param>
public CommunicatorObjects.AgentInfoProto ToProto()
{
var agentInfoProto = new CommunicatorObjects.AgentInfoProto
{
StackedVectorObservation = {stackedVectorObservation},
StoredVectorActions = {storedVectorActions},
StoredTextActions = storedTextActions,
TextObservation = textObservation,
Reward = reward,
MaxStepReached = maxStepReached,
Done = done,
Id = id,
};
if (memories != null)
{
agentInfoProto.Memories.Add(memories);
}
if (actionMasks != null)
{
agentInfoProto.ActionMask.AddRange(actionMasks);
}
foreach (Texture2D obs in visualObservations)
{
agentInfoProto.VisualObservations.Add(
ByteString.CopyFrom(obs.EncodeToPNG())
);
}
return agentInfoProto;
}
}
/// <summary>

/// Array of Texture2D used to render to from render buffer before
/// transforming into float tensor.
Texture2D[] textureArray;
/// <summary>
/// Demonstration recorder.
/// </summary>
private DemonstrationRecorder recorder;
/// Monobehavior function that is called when the attached GameObject
/// becomes enabled or active.
void OnEnable()

{
textureArray[i] = new Texture2D(1, 1, TextureFormat.RGB24, false);
}
recorder = GetComponent<DemonstrationRecorder>();
}
/// Helper method for the <see cref="OnEnable"/> event, created to

/// </remarks>
public virtual void InitializeAgent()
{
}
/// <summary>

info.id = id;
brain.SendState(this, info);
if (recorder != null && recorder.record && Application.isEditor)
{
recorder.WriteExperience(info);
}
info.textObservation = "";
}

/// </remarks>
public virtual void CollectObservations()
{
}
/// <summary>

{
actionMasker.SetActionMask(0, actionIndices);
}
/// <summary>
/// Sets an action mask for discrete control agents. When used, the agent will not be
/// able to perform the action passed as argument at the next decision. If no branch is

/// <param name="actionIndex">The index of the masked action on branch 0</param>
protected void SetActionMask(int actionIndex)
{
actionMasker.SetActionMask(0, new int[1]{actionIndex});
actionMasker.SetActionMask(0, new int[1] {actionIndex});
/// <summary>
/// Sets an action mask for discrete control agents. When used, the agent will not be
/// able to perform the action passed as argument at the next decision. If no branch is

/// <param name="actionIndex">The index of the masked action</param>
protected void SetActionMask(int branch, int actionIndex)
{
actionMasker.SetActionMask(branch, new int[1]{actionIndex});
actionMasker.SetActionMask(branch, new int[1] {actionIndex});
}
/// <summary>

{
actionMasker.SetActionMask(branch, actionIndices);
}
/// <summary>
/// Adds a float observation to the vector observations of the agent.

info.vectorObservation.Add(observation.z);
info.vectorObservation.Add(observation.w);
}
/// <summary>
/// Adds a boolean observation to the vector observation of the agent.
/// Increases the size of the agent's vector observation by 1.

/// <param name="textAction">Text action.</param>
public virtual void AgentAction(float[] vectorAction, string textAction)
{
}
/// <summary>

/// </summary>
public virtual void AgentOnDone()
{
}
/// <summary>

/// </summary>
public virtual void AgentReset()
{
}
/// <summary>

{
action.textActions = textActions;
}
/// <summary>
/// Updates the value of the agent.
/// </summary>

var tempRT =
RenderTexture.GetTemporary(width, height, depth, format, readWrite);
if (width != texture2D.width || height != texture2D.height)
{
texture2D.Resize(width, height);

152
UnitySDK/Assets/ML-Agents/Scripts/Batcher.cs


/// Keeps track of which brains have data to send on the current step
Dictionary<string, bool> m_hasData =
new Dictionary<string, bool>();
new CommunicatorObjects.UnityRLOutput();
new CommunicatorObjects.UnityRLOutput();
/// Keeps track of last training mode sent by External
bool m_isTraining;

try
{
initializationInput = m_communicator.Initialize(
new CommunicatorObjects.UnityOutput
{
RlInitializationOutput = academyParameters
},
out input);
new CommunicatorObjects.UnityOutput
{
RlInitializationOutput = academyParameters
},
out input);
}
catch
{

}
/// <summary>
/// Converts a AgentInfo to a protobuffer generated AgentInfoProto
/// </summary>
/// <returns>The protobuf verison of the AgentInfo.</returns>
/// <param name="info">The AgentInfo to convert.</param>
public static CommunicatorObjects.AgentInfoProto
AgentInfoConvertor(AgentInfo info)
{
var agentInfoProto = new CommunicatorObjects.AgentInfoProto
{
StackedVectorObservation = { info.stackedVectorObservation },
StoredVectorActions = { info.storedVectorActions },
StoredTextActions = info.storedTextActions,
TextObservation = info.textObservation,
Reward = info.reward,
MaxStepReached = info.maxStepReached,
Done = info.done,
Id = info.id,
};
if (info.memories != null)
{
agentInfoProto.Memories.Add(info.memories);
}
if (info.actionMasks != null)
{
agentInfoProto.ActionMask.AddRange(info.actionMasks);
}
foreach (Texture2D obs in info.visualObservations)
{
agentInfoProto.VisualObservations.Add(
ByteString.CopyFrom(obs.EncodeToPNG())
);
}
return agentInfoProto;
}
/// <summary>
/// Converts a Brain into to a Protobuff BrainInfoProto so it can be sent
/// </summary>
/// <returns>The BrainInfoProto generated.</returns>
/// <param name="brainParameters">The BrainParameters.</param>
/// <param name="name">The name of the brain.</param>
/// <param name="type">The type of brain.</param>
public static CommunicatorObjects.BrainParametersProto BrainParametersConvertor(
BrainParameters brainParameters, string name, CommunicatorObjects.BrainTypeProto type)
{
var brainParametersProto = new CommunicatorObjects.BrainParametersProto
{
VectorObservationSize = brainParameters.vectorObservationSize,
NumStackedVectorObservations = brainParameters.numStackedVectorObservations,
VectorActionSize = {brainParameters.vectorActionSize},
VectorActionSpaceType =
(CommunicatorObjects.SpaceTypeProto)brainParameters.vectorActionSpaceType,
BrainName = name,
BrainType = type
};
brainParametersProto.VectorActionDescriptions.AddRange(
brainParameters.vectorActionDescriptions);
foreach (resolution res in brainParameters.cameraResolutions)
{
brainParametersProto.CameraResolutions.Add(
new CommunicatorObjects.ResolutionProto
{
Width = res.width,
Height = res.height,
GrayScale = res.blackAndWhite
});
}
return brainParametersProto;
}
/// <summary>
/// Sends the brain info. If at least one brain has an agent in need of
/// a decision or if the academy is done, the data is sent via
/// Communicator. Else, a new step is realized. The data can only be

{
m_currentAgents[brainKey].Add(agent);
}
// If at least one agent has data to send, then append data to
// the message and update hasSentState
if (m_currentAgents[brainKey].Count > 0)

CommunicatorObjects.AgentInfoProto agentInfoProto =
AgentInfoConvertor(agentInfo[agent]);
CommunicatorObjects.AgentInfoProto agentInfoProto = agentInfo[agent].ToProto();
m_hasData[brainKey] = true;
}

m_currentUnityRLOutput.GlobalDone = m_academyDone;
SendBatchedMessageHelper();
}
// The message was just sent so we must reset hasSentState and
// triedSendState
foreach (string k in m_currentAgents.Keys)

void SendBatchedMessageHelper()
{
var input = m_communicator.Exchange(
new CommunicatorObjects.UnityOutput{
RlOutput = m_currentUnityRLOutput
});
new CommunicatorObjects.UnityOutput
{
RlOutput = m_currentUnityRLOutput
});
m_messagesReceived += 1;
foreach (string k in m_currentUnityRLOutput.AgentInfos.Keys)

if (input == null)
{
m_command = CommunicatorObjects.CommandProto.Quit;

}
foreach (var brainName in rlInput.AgentActions.Keys)
{
if (!m_currentAgents[brainName].Any())
if (!m_currentAgents[brainName].Any())
{
continue;
}
if (!rlInput.AgentActions[brainName].Value.Any())
{
continue;
}
for (var i = 0; i < m_currentAgents[brainName].Count(); i++)
{
var agent = m_currentAgents[brainName][i];
var action = rlInput.AgentActions[brainName].Value[i];
agent.UpdateVectorAction(
action.VectorActions.ToArray());
agent.UpdateMemoriesAction(
action.Memories.ToList());
agent.UpdateTextAction(
action.TextActions);
agent.UpdateValueAction(
action.Value);
}
continue;
}
if (!rlInput.AgentActions[brainName].Value.Any())
{
continue;
}
for (var i = 0; i < m_currentAgents[brainName].Count; i++)
{
var agent = m_currentAgents[brainName][i];
var action = rlInput.AgentActions[brainName].Value[i];
agent.UpdateVectorAction(action.VectorActions.ToArray());
agent.UpdateMemoriesAction(action.Memories.ToList());
agent.UpdateTextAction(action.TextActions);
agent.UpdateValueAction(action.Value);
}
}
}

47
UnitySDK/Assets/ML-Agents/Scripts/Brain.cs


public SpaceType vectorActionSpaceType = SpaceType.discrete;
/**< \brief Defines if the action is discrete or continuous */
/// <summary>
/// Converts a Brain into to a Protobuff BrainInfoProto so it can be sent
/// </summary>
/// <returns>The BrainInfoProto generated.</returns>
/// <param name="name">The name of the brain.</param>
/// <param name="type">The type of brain.</param>
public CommunicatorObjects.BrainParametersProto
ToProto(string name, CommunicatorObjects.BrainTypeProto type)
{
var brainParametersProto = new CommunicatorObjects.BrainParametersProto
{
VectorObservationSize = vectorObservationSize,
NumStackedVectorObservations = numStackedVectorObservations,
VectorActionSize = {vectorActionSize},
VectorActionSpaceType =
(CommunicatorObjects.SpaceTypeProto)vectorActionSpaceType,
BrainName = name,
BrainType = type
};
brainParametersProto.VectorActionDescriptions.AddRange(vectorActionDescriptions);
foreach (resolution res in cameraResolutions)
{
brainParametersProto.CameraResolutions.Add(
new CommunicatorObjects.ResolutionProto
{
Width = res.width,
Height = res.height,
GrayScale = res.blackAndWhite
});
}
return brainParametersProto;
}
public BrainParameters()
{
}
public BrainParameters(CommunicatorObjects.BrainParametersProto brainParametersProto)
{
vectorObservationSize = brainParametersProto.VectorObservationSize;
numStackedVectorObservations = brainParametersProto.NumStackedVectorObservations;
vectorActionSize = brainParametersProto.VectorActionSize.ToArray();
vectorActionDescriptions = brainParametersProto.VectorActionDescriptions.ToArray();
vectorActionSpaceType = (SpaceType)brainParametersProto.VectorActionSpaceType;
}
}
[HelpURL("https://github.com/Unity-Technologies/ml-agents/blob/master/" +

71
UnitySDK/Assets/ML-Agents/Scripts/CoreBrainInternal.cs


}
observationMatrixList.Add(
BatchVisualObservations(texturesHolder,
Utilities.TextureToFloatArray(texturesHolder,
brain.brainParameters.cameraResolutions[observationIndex].blackAndWhite));
}

#endif
}
/// <summary>
/// Converts a list of Texture2D into a Tensor.
/// </summary>
/// <returns>
/// A 4 dimensional float Tensor of dimension
/// [batch_size, height, width, channel].
/// Where batch_size is the number of input textures,
/// height corresponds to the height of the texture,
/// width corresponds to the width of the texture,
/// channel corresponds to the number of channels extracted from the
/// input textures (based on the input blackAndWhite flag
/// (3 if the flag is false, 1 otherwise).
/// The values of the Tensor are between 0 and 1.
/// </returns>
/// <param name="textures">
/// The list of textures to be put into the tensor.
/// Note that the textures must have same width and height.
/// </param>
/// <param name="blackAndWhite">
/// If set to <c>true</c> the textures
/// will be converted to grayscale before being stored in the tensor.
/// </param>
public static float[,,,] BatchVisualObservations(
List<Texture2D> textures, bool blackAndWhite)
{
int batchSize = textures.Count();
int width = textures[0].width;
int height = textures[0].height;
int pixels = 0;
if (blackAndWhite)
pixels = 1;
else
pixels = 3;
float[,,,] result = new float[batchSize, height, width, pixels];
float[] resultTemp = new float[batchSize * height * width * pixels];
int hwp = height * width * pixels;
int wp = width * pixels;
for (int b = 0; b < batchSize; b++)
{
Color32[] cc = textures[b].GetPixels32();
for (int h = height - 1; h >= 0; h--)
{
for (int w = 0; w < width; w++)
{
Color32 currentPixel = cc[(height - h - 1) * width + w];
if (!blackAndWhite)
{
// For Color32, the r, g and b values are between
// 0 and 255.
resultTemp[b * hwp + h * wp + w * pixels] = currentPixel.r / 255.0f;
resultTemp[b * hwp + h * wp + w * pixels + 1] = currentPixel.g / 255.0f;
resultTemp[b * hwp + h * wp + w * pixels + 2] = currentPixel.b / 255.0f;
}
else
{
resultTemp[b * hwp + h * wp + w * pixels] =
(currentPixel.r + currentPixel.g + currentPixel.b)
/ 3f / 255.0f;
}
}
}
}
System.Buffer.BlockCopy(resultTemp, 0, result, 0, batchSize * hwp * sizeof(float));
return result;
}
}
}

25
config/trainer_config.yaml


max_steps: 5.0e5
summary_freq: 2000
time_horizon: 3
StudentBrain:
trainer: imitation
max_steps: 10000
summary_freq: 1000
brain_to_imitate: TeacherBrain
batch_size: 16
batches_per_epoch: 5
num_layers: 4
hidden_units: 64
sequence_length: 16
buffer_size: 128
StudentRecurrentBrain:
trainer: imitation
max_steps: 10000
summary_freq: 1000
brain_to_imitate: TeacherBrain
batch_size: 16
batches_per_epoch: 5
num_layers: 4
hidden_units: 64
use_recurrent: true
sequence_length: 32
buffer_size: 128

59
docs/Training-Imitation-Learning.md


of training a medic NPC : instead of indirectly training a medic with the help
of a reward function, we can give the medic real world examples of observations
from the game and actions from a game controller to guide the medic's behavior.
More specifically, in this mode, the Brain type during training is set to Player
and all the actions performed with the controller (in addition to the agent
observations) will be recorded and sent to the Python API. The imitation
learning algorithm will then use these pairs of observations and actions from
the human player to learn a policy. [Video Link](https://youtu.be/kpb8ZkMBFYs).
Imitation Learning uses pairs of observations and actions from
from a demonstration to learn a policy. [Video Link](https://youtu.be/kpb8ZkMBFYs).
## Recording Demonstrations
It is possible to record demonstrations of agent behavior from the Unity Editor, and save them as assets. These demonstrations contain information on the observations, actions, and rewards for a given agent during the recording session. They can be managed from the Editor, as well as used for training with Offline Behavioral Cloning (see below).
In order to record demonstrations from an agent, add the `Demonstration Recorder` component to a GameObject in the scene which contains an `Agent` component. Once added, it is possible to name the demonstration that will be recorded from the agent.
## Using Behavioral Cloning
<p align="center">
<img src="images/demo_component.png"
alt="BC Teacher Helper"
width="375" border="10" />
</p>
When `Record` is checked, a demonstration will be created whenever the scene is played from the Editor. Depending on the complexity of the task, anywhere from a few minutes or a few hours of demonstration data may be necessary to be useful for imitation learning. When you have recorded enough data, end the Editor play session, and a `.demo` file will be created in the `Assets/Demonstrations` folder. This file contains the demonstrations. Clicking on the file will provide metadata about the demonstration in the inspector.
<p align="center">
<img src="images/demo_inspector.png"
alt="BC Teacher Helper"
width="375" border="10" />
</p>
## Training with Behavioral Cloning
the simplest one of them is Behavioral Cloning. It works by collecting training
data from a teacher, and then simply uses it to directly learn a policy, in the
the simplest one of them is Behavioral Cloning. It works by collecting demonstrations from a teacher, and then simply uses them to directly learn a policy, in the
1. In order to use imitation learning in a scene, the first thing you will need
is to create two Brains, one which will be the "Teacher," and the other which
### Offline Training
With offline behavioral cloning, we can use demonstrations (`.demo` files) generated using the `Demonstration Recorder` as the dataset used to train a behavior.
1. Choose an agent you would like to learn to imitate some set of demonstrations.
2. Record a set of demonstration using the `Demonstration Recorder` (see above). For illustrative purposes we will refer to this file as `AgentRecording.demo`.
3. Build the scene, assigning the agent a Learning Brain, and set the Brain to Control in the Broadcast Hub. For more information on Brains, see [here](Learning-Environment-Design-Brains.md).
4. Open the `config/bc_config.yaml` file.
5. Modify the `demo_path` parameter in the file to reference the path to the demonstration file recorded in step 2. In our case this is: `./UnitySDK/Assets/Demonstrations/AgentRecording.demo`
6. Launch `mlagent-learn`, and providing `./config/bc_config.yaml` as the config parameter, and your environment as the `--env` parameter.
7. (Optional) Observe training performance using Tensorboard.
This will use the demonstration file to train a nerual network driven agent to directly imitate the actions provided in the demonstration. The environment will launch and be used for evaluating the agent's performance during training.
### Online Training
It is also possible to provide demonstrations in realtime during training, without pre-recording a demonstration file. The steps to do this are as follows:
1. First create two Brains, one which will be the "Teacher," and the other which
will be the "Student." We will assume that the names of the Brain
`GameObject`s are "Teacher" and "Student" respectively.
2. Set the "Teacher" Brain to Player mode, and properly configure the inputs to

Assets folder (or a subdirectory within Assets of your choosing) , and use
with `Internal` Brain.
### BC Teacher Helper
**BC Teacher Helper**
We provide a convenience utility, `BC Teacher Helper` component that you can add
to the Teacher Agent.

2. Reset the training buffer. This enables you to instruct the agents to forget
their buffer of recent experiences. This is useful if you'd like to get them
to quickly learn a new behavior. The default command to reset the buffer is
to press `C` on the keyboard.
to press `C` on the keyboard.

1
ml-agents/mlagents/envs/communicator_objects/__init__.py


from .brain_parameters_proto_pb2 import *
from .brain_type_proto_pb2 import *
from .command_proto_pb2 import *
from .demonstration_meta_proto_pb2 import *
from .engine_configuration_proto_pb2 import *
from .environment_parameters_proto_pb2 import *
from .header_pb2 import *

9
ml-agents/mlagents/envs/communicator_objects/unity_to_external_pb2.py


from google.protobuf import message as _message
from google.protobuf import reflection as _reflection
from google.protobuf import symbol_database as _symbol_database
from google.protobuf import descriptor_pb2
# @@protoc_insertion_point(imports)
_sym_db = _symbol_database.Default()

name='mlagents/envs/communicator_objects/unity_to_external.proto',
package='communicator_objects',
syntax='proto3',
serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
serialized_pb=_b('\n:mlagents/envs/communicator_objects/unity_to_external.proto\x12\x14\x63ommunicator_objects\x1a\x36mlagents/envs/communicator_objects/unity_message.proto2g\n\x0fUnityToExternal\x12T\n\x08\x45xchange\x12\".communicator_objects.UnityMessage\x1a\".communicator_objects.UnityMessage\"\x00\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
,
dependencies=[mlagents_dot_envs_dot_communicator__objects_dot_unity__message__pb2.DESCRIPTOR,])

_sym_db.RegisterFileDescriptor(DESCRIPTOR)
DESCRIPTOR.has_options = True
DESCRIPTOR._options = _descriptor._ParseOptions(descriptor_pb2.FileOptions(), _b('\252\002\034MLAgents.CommunicatorObjects'))
DESCRIPTOR._options = None
_UNITYTOEXTERNAL = _descriptor.ServiceDescriptor(
name='UnityToExternal',

options=None,
serialized_options=None,
serialized_start=140,
serialized_end=243,
methods=[

containing_service=None,
input_type=mlagents_dot_envs_dot_communicator__objects_dot_unity__message__pb2._UNITYMESSAGE,
output_type=mlagents_dot_envs_dot_communicator__objects_dot_unity__message__pb2._UNITYMESSAGE,
options=None,
serialized_options=None,
),
])
_sym_db.RegisterServiceDescriptor(_UNITYTOEXTERNAL)

18
ml-agents/mlagents/envs/environment.py


import subprocess
from .brain import BrainInfo, BrainParameters, AllBrainInfo
from .utilities import process_pixels
from .exception import UnityEnvironmentException, UnityActionException, UnityTimeOutException
from .communicator_objects import UnityRLInput, UnityRLOutput, AgentActionProto,\

arr = [float(x) for x in arr]
return arr
@staticmethod
def _process_pixels(image_bytes, gray_scale):
"""
Converts byte array observation image into numpy array, re-sizes it, and optionally converts it to grey scale
:param image_bytes: input byte array corresponding to image
:return: processed numpy array of observation from environment
"""
s = bytearray(image_bytes)
image = Image.open(io.BytesIO(s))
s = np.array(image) / 255.0
if gray_scale:
s = np.mean(s, axis=2)
s = np.reshape(s, [s.shape[0], s.shape[1], 1])
return s
def _get_state(self, output: UnityRLOutput) -> (AllBrainInfo, bool):
"""
Collects experience information from all external brains in environment at current step.

agent_info_list = output.agentInfos[b].value
vis_obs = []
for i in range(self.brains[b].number_visual_observations):
obs = [self._process_pixels(x.visual_observations[i],
obs = [process_pixels(x.visual_observations[i],
self.brains[b].camera_resolutions[i]['blackAndWhite'])
for x in agent_info_list]
vis_obs += [np.array(obs)]

4
ml-agents/mlagents/trainers/__init__.py


from .models import *
from .trainer_controller import *
from .bc.models import *
from .bc.trainer import *
from .bc.offline_trainer import *
from .bc.online_trainer import *
from .bc.policy import *
from .ppo.models import *
from .ppo.trainer import *

from .demo_loader import *

3
ml-agents/mlagents/trainers/bc/__init__.py


from .models import *
from .trainer import *
from .online_trainer import *
from .offline_trainer import *
from .policy import *

2
ml-agents/mlagents/trainers/bc/policy.py


:param trainer_parameters: Defined training parameters.
:param load: Whether a pre-trained model will be loaded or a new one created.
"""
super().__init__(seed, brain, trainer_parameters)
super(BCPolicy, self).__init__(seed, brain, trainer_parameters)
with self.graph.as_default():
with self.graph.as_default():

106
ml-agents/mlagents/trainers/bc/trainer.py


# # Unity ML-Agents Toolkit
# ## ML-Agent Learning (Imitation)
# ## ML-Agent Learning (Behavioral Cloning)
# Contains an implementation of Behavioral Cloning Algorithm
import logging

from mlagents.trainers.buffer import Buffer
from mlagents.trainers.trainer import UnityTrainerException, Trainer
logger = logging.getLogger("mlagents.envs")
logger = logging.getLogger("mlagents.trainers")
class BehavioralCloningTrainer(Trainer):
"""The ImitationTrainer is an implementation of the imitation learning."""
class BCTrainer(Trainer):
"""The BCTrainer is an implementation of Behavioral Cloning."""
def __init__(self, brain, trainer_parameters, training, load, seed, run_id):
"""

:param seed: The seed the model will be initialized with
:param run_id: The The identifier of the current run
"""
self.param_keys = ['brain_to_imitate', 'batch_size', 'time_horizon',
'summary_freq', 'max_steps',
'batches_per_epoch', 'use_recurrent',
'hidden_units', 'learning_rate', 'num_layers',
'sequence_length', 'memory_size', 'model_path']
for k in self.param_keys:
print(k)
print(k not in trainer_parameters)
if k not in trainer_parameters:
raise UnityTrainerException("The hyperparameter {0} could not be found for the Imitation trainer of "
"brain {1}.".format(k, brain.brain_name))
super(BehavioralCloningTrainer, self).__init__(brain, trainer_parameters, training, run_id)
super(BCTrainer, self).__init__(brain, trainer_parameters, training, run_id)
self.brain_name = brain.brain_name
self.brain_to_imitate = trainer_parameters['brain_to_imitate']
self.batches_per_epoch = trainer_parameters['batches_per_epoch']
self.n_sequences = max(int(trainer_parameters['batch_size'] / self.policy.sequence_length), 1)
self.n_sequences = 1
self.training_buffer = Buffer()
self.batches_per_epoch = trainer_parameters['batches_per_epoch']
self.demonstration_buffer = Buffer()
self.evaluation_buffer = Buffer()
def __str__(self):
return '''Hyperparameters for the Imitation Trainer of brain {0}: \n{1}'''.format(
self.brain_name, '\n'.join(['\t{0}:\t{1}'.format(x, self.trainer_parameters[x]) for x in self.param_keys]))
@property
def parameters(self):

else:
return run_out['action'], None, None, None, None
def add_experiences(self, curr_info: AllBrainInfo, next_info: AllBrainInfo, take_action_outputs):
def add_experiences(self, curr_info: AllBrainInfo, next_info: AllBrainInfo,
take_action_outputs):
"""
Adds experiences to each agent's experience history.
:param curr_info: Current AllBrainInfo (Dictionary of all current brains and corresponding BrainInfo).

# Used to collect teacher experience into training buffer
info_teacher = curr_info[self.brain_to_imitate]
next_info_teacher = next_info[self.brain_to_imitate]
for agent_id in info_teacher.agents:
self.training_buffer[agent_id].last_brain_info = info_teacher
for agent_id in next_info_teacher.agents:
stored_info_teacher = self.training_buffer[agent_id].last_brain_info
if stored_info_teacher is None:
continue
else:
idx = stored_info_teacher.agents.index(agent_id)
next_idx = next_info_teacher.agents.index(agent_id)
if stored_info_teacher.text_observations[idx] != "":
info_teacher_record, info_teacher_reset = \
stored_info_teacher.text_observations[idx].lower().split(",")
next_info_teacher_record, next_info_teacher_reset = next_info_teacher.text_observations[idx].\
lower().split(",")
if next_info_teacher_reset == "true":
self.training_buffer.reset_update_buffer()
else:
info_teacher_record, next_info_teacher_record = "true", "true"
if info_teacher_record == "true" and next_info_teacher_record == "true":
if not stored_info_teacher.local_done[idx]:
for i in range(self.policy.vis_obs_size):
self.training_buffer[agent_id]['visual_obs%d' % i]\
.append(stored_info_teacher.visual_observations[i][idx])
if self.policy.use_vec_obs:
self.training_buffer[agent_id]['vector_obs']\
.append(stored_info_teacher.vector_observations[idx])
if self.policy.use_recurrent:
if stored_info_teacher.memories.shape[1] == 0:
stored_info_teacher.memories = np.zeros((len(stored_info_teacher.agents),
self.policy.m_size))
self.training_buffer[agent_id]['memory'].append(stored_info_teacher.memories[idx])
self.training_buffer[agent_id]['actions'].append(next_info_teacher.
previous_vector_actions[next_idx])
# Used to collect information about student performance.
self.training_buffer[agent_id].last_brain_info = info_student
self.evaluation_buffer[agent_id].last_brain_info = info_student
# Used to collect information about student performance.
stored_info_student = self.training_buffer[agent_id].last_brain_info
stored_info_student = self.evaluation_buffer[agent_id].last_brain_info
if stored_info_student is None:
continue
else:

:param current_info: Current AllBrainInfo
:param next_info: Next AllBrainInfo
"""
info_teacher = next_info[self.brain_to_imitate]
for l in range(len(info_teacher.agents)):
teacher_action_list = len(self.training_buffer[info_teacher.agents[l]]['actions'])
horizon_reached = teacher_action_list > self.trainer_parameters['time_horizon']
teacher_filled = len(self.training_buffer[info_teacher.agents[l]]['actions']) > 0
if ((info_teacher.local_done[l] or horizon_reached) and teacher_filled):
agent_id = info_teacher.agents[l]
self.training_buffer.append_update_buffer(
agent_id, batch_size=None, training_length=self.policy.sequence_length)
self.training_buffer[agent_id].reset_agent()
info_student = next_info[self.brain_name]
for l in range(len(info_student.agents)):
if info_student.local_done[l]:

A signal that the Episode has ended. The buffer must be reset.
Get only called when the academy resets.
"""
self.training_buffer.reset_all()
self.evaluation_buffer.reset_local_buffers()
for agent_id in self.cumulative_rewards:
self.cumulative_rewards[agent_id] = 0
for agent_id in self.episode_steps:

Returns whether or not the trainer has enough elements to run update model
:return: A boolean corresponding to whether or not update_model() can be run
"""
return len(self.training_buffer.update_buffer['actions']) > self.n_sequences
return len(self.demonstration_buffer.update_buffer['actions']) > self.n_sequences
self.training_buffer.update_buffer.shuffle()
self.demonstration_buffer.update_buffer.shuffle()
num_batches = min(len(self.training_buffer.update_buffer['actions']) //
num_batches = min(len(self.demonstration_buffer.update_buffer['actions']) //
buffer = self.training_buffer.update_buffer
update_buffer = self.demonstration_buffer.update_buffer
mini_batch = buffer.make_mini_batch(start, end)
mini_batch = update_buffer.make_mini_batch(start, end)
run_out = self.policy.update(mini_batch, self.n_sequences)
loss = run_out['policy_loss']
batch_losses.append(loss)

2
ml-agents/mlagents/trainers/buffer.py


"""
self.update_buffer.reset_agent()
def reset_all(self):
def reset_local_buffers(self):
"""
Resets all the local local_buffers
"""

16
ml-agents/mlagents/trainers/ppo/trainer.py


from mlagents.trainers.ppo.policy import PPOPolicy
from mlagents.trainers.trainer import UnityTrainerException, Trainer
logger = logging.getLogger("mlagents.envs")
logger = logging.getLogger("mlagents.trainers")
class PPOTrainer(Trainer):

:param seed: The seed the model will be initialized with
:param run_id: The The identifier of the current run
"""
super(PPOTrainer, self).__init__(brain, trainer_parameters, training, run_id)
self.param_keys = ['batch_size', 'beta', 'buffer_size', 'epsilon', 'gamma', 'hidden_units', 'lambd',
'learning_rate', 'max_steps', 'normalize', 'num_epoch', 'num_layers',
'time_horizon', 'sequence_length', 'summary_freq', 'use_recurrent',

for k in self.param_keys:
if k not in trainer_parameters:
raise UnityTrainerException("The hyperparameter {0} could not be found for the PPO trainer of "
"brain {1}.".format(k, brain.brain_name))
super(PPOTrainer, self).__init__(brain.brain_name, trainer_parameters, training, run_id)
self.check_param_keys()
self.policy = PPOPolicy(seed, brain, trainer_parameters,
self.is_training, load)

A signal that the Episode has ended. The buffer must be reset.
Get only called when the academy resets.
"""
self.training_buffer.reset_all()
self.training_buffer.reset_local_buffers()
for agent_id in self.cumulative_rewards:
self.cumulative_rewards[agent_id] = 0
for agent_id in self.episode_steps:

def update_policy(self):
"""
Uses training_buffer to update the policy.
Uses demonstration_buffer to update the policy.
"""
n_sequences = max(int(self.trainer_parameters['batch_size'] / self.policy.sequence_length), 1)
value_total, policy_total, forward_total, inverse_total = [], [], [], []

25
ml-agents/mlagents/trainers/trainer.py


class Trainer(object):
"""This class is the abstract class for the mlagents.trainers"""
"""This class is the base class for the mlagents.trainers"""
def __init__(self, brain_name, trainer_parameters, training, run_id):
def __init__(self, brain, trainer_parameters, training, run_id):
:param trainer_parameters: The parameters for the trainer (dictionary).
:param training: Whether the trainer is set for training.
:param run_id: The identifier of the current run
:BrainParameters brain: Brain to be trained.
:dict trainer_parameters: The parameters for the trainer (dictionary).
:bool training: Whether the trainer is set for training.
:int run_id: The identifier of the current run
self.brain_name = brain_name
self.param_keys = []
self.brain_name = brain.brain_name
self.run_id = run_id
self.trainer_parameters = trainer_parameters
self.is_training = training

def __str__(self):
return '''Empty Trainer'''
return '''{} Trainer'''.format(self.__class__)
def check_param_keys(self):
for k in self.param_keys:
if k not in self.trainer_parameters:
raise UnityTrainerException(
"The hyper-parameter {0} could not be found for the {1} trainer of "
"brain {2}.".format(k, self.__class__, self.brain_name))
@property
def parameters(self):

def update_policy(self):
"""
Uses training_buffer to update model.
Uses demonstration_buffer to update model.
"""
raise UnityTrainerException("The update_model method was not implemented.")

12
ml-agents/mlagents/trainers/trainer_controller.py


from mlagents.envs.exception import UnityEnvironmentException
from mlagents.trainers.ppo.trainer import PPOTrainer
from mlagents.trainers.bc.trainer import BehavioralCloningTrainer
from mlagents.trainers.bc.offline_trainer import OfflineBCTrainer
from mlagents.trainers.bc.online_trainer import OnlineBCTrainer
from mlagents.trainers.meta_curriculum import MetaCurriculum
from mlagents.trainers.exception import MetaCurriculumError

trainer_parameters[k] = trainer_config[_brain_key][k]
trainer_parameters_dict[brain_name] = trainer_parameters.copy()
for brain_name in self.env.external_brain_names:
if trainer_parameters_dict[brain_name]['trainer'] == 'imitation':
self.trainers[brain_name] = BehavioralCloningTrainer(
if trainer_parameters_dict[brain_name]['trainer'] == 'offline_bc':
self.trainers[brain_name] = OfflineBCTrainer(
self.env.brains[brain_name],
trainer_parameters_dict[brain_name], self.train_model,
self.load_model, self.seed, self.run_id)
elif trainer_parameters_dict[brain_name]['trainer'] == 'online_bc':
self.trainers[brain_name] = OnlineBCTrainer(
self.env.brains[brain_name],
trainer_parameters_dict[brain_name], self.train_model,
self.load_model, self.seed, self.run_id)

16
ml-agents/tests/mock_communicator.py


from mlagents.envs.communicator import Communicator
from mlagents.envs.communicator_objects import UnityMessage, UnityOutput, UnityInput,\
ResolutionProto, BrainParametersProto, UnityRLInitializationOutput,\
from mlagents.envs.communicator_objects import UnityMessage, UnityOutput, UnityInput, \
ResolutionProto, BrainParametersProto, UnityRLInitializationOutput, \
def __init__(self, discrete_action=False, visual_inputs=0, stack=True, num_agents=3):
def __init__(self, discrete_action=False, visual_inputs=0, stack=True, num_agents=3,
brain_name="RealFakeBrain", vec_obs_size=3):
"""
Python side of the grpc communication. Python is the client and Unity the server

self.visual_inputs = visual_inputs
self.has_been_closed = False
self.num_agents = num_agents
self.brain_name = brain_name
self.vec_obs_size = vec_obs_size
if stack:
self.num_stacks = 2
else:

height=40,
gray_scale=False) for i in range(self.visual_inputs)]
bp = BrainParametersProto(
vector_observation_size=3,
vector_observation_size=self.vec_obs_size,
brain_name="RealFakeBrain",
brain_name=self.brain_name,
brain_type=2
)
rl_init = UnityRLInitializationOutput(

UnityRLOutput.ListAgentInfoProto(value=list_agent_info)
global_done = False
try:
global_done = (inputs.rl_input.agent_actions["RealFakeBrain"].value[0].vector_actions[0] == -1)
fake_brain = inputs.rl_input.agent_actions["RealFakeBrain"]
global_done = (fake_brain.value[0].vector_actions[0] == -1)
except:
pass
result = UnityRLOutput(

1
ml-agents/tests/trainers/test_meta_curriculum.py


assert curriculum_b.lesson_num == 3
@patch('mlagents.trainers.Curriculum')
@patch('mlagents.trainers.Curriculum')
def test_increment_lessons(curriculum_a, curriculum_b, measure_vals):

104
ml-agents/tests/trainers/test_trainer_controller.py


import tensorflow as tf
from mlagents.trainers.trainer_controller import TrainerController
from mlagents.trainers.buffer import Buffer
from mlagents.trainers.bc.trainer import BehavioralCloningTrainer
from mlagents.trainers.curriculum import Curriculum
from mlagents.trainers.exception import CurriculumError
from mlagents.trainers.bc.offline_trainer import OfflineBCTrainer
from mlagents.trainers.bc.online_trainer import OnlineBCTrainer
@pytest.fixture
def dummy_start():
return '''{ "AcademyName": "RealFakeAcademy",
"resetParameters": {},
"brainNames": ["RealFakeBrain"],
"externalBrainNames": ["RealFakeBrain"],
"logPath":"RealFakePath",
"apiNumber":"API-5",
"brainParameters": [{
"vectorObservationSize": 3,
"numStackedVectorObservations" : 2,
"vectorActionSize": 2,
"memorySize": 0,
"cameraResolutions": [],
"vectorActionDescriptions": ["",""],
"vectorActionSpaceType": 1
}]
}'''.encode()
@pytest.fixture

@pytest.fixture
def dummy_bc_config():
def dummy_online_bc_config():
trainer: imitation
trainer: online_bc
brain_to_imitate: ExpertBrain
batches_per_epoch: 16
batch_size: 32

curiosity_enc_size: 1
''')
@pytest.fixture
def dummy_offline_bc_config():
return yaml.load(
'''
default:
trainer: offline_bc
demo_path: ./tests/trainers/test.demo
batches_per_epoch: 16
batch_size: 32
beta: 5.0e-3
buffer_size: 512
epsilon: 0.2
gamma: 0.99
hidden_units: 128
lambd: 0.95
learning_rate: 3.0e-4
max_steps: 5.0e4
normalize: true
num_epoch: 5
num_layers: 2
time_horizon: 64
sequence_length: 64
summary_freq: 1000
use_recurrent: false
memory_size: 8
use_curiosity: false
curiosity_strength: 0.0
curiosity_enc_size: 1
''')
@pytest.fixture
def dummy_bad_config():
return yaml.load(

discrete_action=True, visual_inputs=1)
tc = TrainerController(' ', ' ', 1, None, True, True, False, 1,
1, 1, 1, '', "tests/test_mlagents.trainers.py", False)
assert(tc.env.brain_names[0] == 'RealFakeBrain')
assert (tc.env.brain_names[0] == 'RealFakeBrain')
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')

discrete_action=True, visual_inputs=1)
mock_load.return_value = dummy_config
tc = TrainerController(' ', ' ', 1, None, True, True, False, 1,
1, 1, 1, '','', False)
1, 1, 1, '', '', False)
assert(len(config) == 1)
assert(config['default']['trainer'] == "ppo")
assert (len(config) == 1)
assert (config['default']['trainer'] == "ppo")
dummy_bc_config, dummy_bad_config):
dummy_offline_bc_config, dummy_online_bc_config, dummy_bad_config):
open_name = 'mlagents.trainers.trainer_controller' + '.open'
with mock.patch('yaml.load') as mock_load:
with mock.patch(open_name, create=True) as _:

config = tc._load_config()
tf.reset_default_graph()
tc._initialize_trainers(config)
assert(len(tc.trainers) == 1)
assert(isinstance(tc.trainers['RealFakeBrain'], PPOTrainer))
assert (len(tc.trainers) == 1)
assert (isinstance(tc.trainers['RealFakeBrain'], PPOTrainer))
# Test for Behavior Cloning Trainer
mock_load.return_value = dummy_bc_config
# Test for Online Behavior Cloning Trainer
mock_load.return_value = dummy_online_bc_config
assert(isinstance(tc.trainers['RealFakeBrain'], BehavioralCloningTrainer))
assert (isinstance(tc.trainers['RealFakeBrain'], OnlineBCTrainer))
# Test for proper exception when trainer name is incorrect
mock_load.return_value = dummy_bad_config

tc._initialize_trainers(config)
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
def test_initialize_offline_trainers(mock_communicator, mock_launcher, dummy_config,
dummy_offline_bc_config, dummy_online_bc_config, dummy_bad_config):
open_name = 'mlagents.trainers.trainer_controller' + '.open'
with mock.patch('yaml.load') as mock_load:
with mock.patch(open_name, create=True) as _:
mock_communicator.return_value = MockCommunicator(
discrete_action=False, stack=False, visual_inputs=0,
brain_name="Ball3DBrain", vec_obs_size=8)
tc = TrainerController(' ', ' ', 1, None, True, False, False, 1, 1,
1, 1, '', "tests/test_mlagents.trainers.py",
False)
# Test for Offline Behavior Cloning Trainer
mock_load.return_value = dummy_offline_bc_config
config = tc._load_config()
tf.reset_default_graph()
tc._initialize_trainers(config)
assert (isinstance(tc.trainers['Ball3DBrain'], OfflineBCTrainer))

95
UnitySDK/Assets/ML-Agents/Editor/DemonstrationDrawer.cs


using System.Text;
using MLAgents;
using UnityEditor;
/// <summary>
/// Renders a custom UI for Demonstration Scriptable Object.
/// </summary>
[CustomEditor(typeof(Demonstration))]
[CanEditMultipleObjects]
public class DemonstrationEditor : Editor
{
SerializedProperty brainParameters;
SerializedProperty demoMetaData;
void OnEnable()
{
brainParameters = serializedObject.FindProperty("brainParameters");
demoMetaData = serializedObject.FindProperty("metaData");
}
/// <summary>
/// Renders Inspector UI for Demonstration metadata.
/// </summary>
void MakeMetaDataProperty(SerializedProperty property)
{
var nameProp = property.FindPropertyRelative("demonstrationName");
var expProp = property.FindPropertyRelative("numberExperiences");
var epiProp = property.FindPropertyRelative("numberEpisodes");
var rewProp = property.FindPropertyRelative("meanReward");
var nameLabel = nameProp.displayName + ": " + nameProp.stringValue;
var expLabel = expProp.displayName + ": " + expProp.intValue;
var epiLabel = epiProp.displayName + ": " + epiProp.intValue;
var rewLabel = rewProp.displayName + ": " + rewProp.floatValue;
EditorGUILayout.LabelField(nameLabel);
EditorGUILayout.LabelField(expLabel);
EditorGUILayout.LabelField(epiLabel);
EditorGUILayout.LabelField(rewLabel);
}