浏览代码

Demonstration Recorder (#1240)

/develop-generalizationTraining-TrainerController
GitHub 6 年前
当前提交
3c9603d6
共有 57 个文件被更改,包括 4009 次插入412 次删除
  1. 4
      .gitignore
  2. 9
      UnitySDK/Assets/ML-Agents/Scripts/Academy.cs
  3. 77
      UnitySDK/Assets/ML-Agents/Scripts/Agent.cs
  4. 152
      UnitySDK/Assets/ML-Agents/Scripts/Batcher.cs
  5. 47
      UnitySDK/Assets/ML-Agents/Scripts/Brain.cs
  6. 71
      UnitySDK/Assets/ML-Agents/Scripts/CoreBrainInternal.cs
  7. 25
      config/trainer_config.yaml
  8. 59
      docs/Training-Imitation-Learning.md
  9. 1
      ml-agents/mlagents/envs/communicator_objects/__init__.py
  10. 9
      ml-agents/mlagents/envs/communicator_objects/unity_to_external_pb2.py
  11. 18
      ml-agents/mlagents/envs/environment.py
  12. 4
      ml-agents/mlagents/trainers/__init__.py
  13. 3
      ml-agents/mlagents/trainers/bc/__init__.py
  14. 2
      ml-agents/mlagents/trainers/bc/policy.py
  15. 106
      ml-agents/mlagents/trainers/bc/trainer.py
  16. 2
      ml-agents/mlagents/trainers/buffer.py
  17. 16
      ml-agents/mlagents/trainers/ppo/trainer.py
  18. 25
      ml-agents/mlagents/trainers/trainer.py
  19. 12
      ml-agents/mlagents/trainers/trainer_controller.py
  20. 16
      ml-agents/tests/mock_communicator.py
  21. 1
      ml-agents/tests/trainers/test_meta_curriculum.py
  22. 104
      ml-agents/tests/trainers/test_trainer_controller.py
  23. 95
      UnitySDK/Assets/ML-Agents/Editor/DemonstrationDrawer.cs
  24. 11
      UnitySDK/Assets/ML-Agents/Editor/DemonstrationDrawer.cs.meta
  25. 60
      UnitySDK/Assets/ML-Agents/Editor/DemonstrationImporter.cs
  26. 11
      UnitySDK/Assets/ML-Agents/Editor/DemonstrationImporter.cs.meta
  27. 66
      UnitySDK/Assets/ML-Agents/Editor/Tests/DemonstrationTests.cs
  28. 11
      UnitySDK/Assets/ML-Agents/Editor/Tests/DemonstrationTests.cs.meta
  29. 1001
      UnitySDK/Assets/ML-Agents/Plugins/System.IO.Abstractions.TestingHelpers.dll
  30. 30
      UnitySDK/Assets/ML-Agents/Plugins/System.IO.Abstractions.TestingHelpers.dll.meta
  31. 623
      UnitySDK/Assets/ML-Agents/Plugins/System.IO.Abstractions.dll
  32. 30
      UnitySDK/Assets/ML-Agents/Plugins/System.IO.Abstractions.dll.meta
  33. 8
      UnitySDK/Assets/ML-Agents/Resources.meta
  34. 289
      UnitySDK/Assets/ML-Agents/Scripts/CommunicatorObjects/DemonstrationMetaProto.cs
  35. 11
      UnitySDK/Assets/ML-Agents/Scripts/CommunicatorObjects/DemonstrationMetaProto.cs.meta
  36. 76
      UnitySDK/Assets/ML-Agents/Scripts/Demonstration.cs
  37. 11
      UnitySDK/Assets/ML-Agents/Scripts/Demonstration.cs.meta
  38. 65
      UnitySDK/Assets/ML-Agents/Scripts/DemonstrationRecorder.cs
  39. 11
      UnitySDK/Assets/ML-Agents/Scripts/DemonstrationRecorder.cs.meta
  40. 138
      UnitySDK/Assets/ML-Agents/Scripts/DemonstrationStore.cs
  41. 3
      UnitySDK/Assets/ML-Agents/Scripts/DemonstrationStore.cs.meta
  42. 73
      UnitySDK/Assets/ML-Agents/Scripts/Utilities.cs
  43. 3
      UnitySDK/Assets/ML-Agents/Scripts/Utilities.cs.meta
  44. 55
      config/bc_config.yaml
  45. 102
      docs/images/demo_component.png
  46. 198
      docs/images/demo_inspector.png
  47. 98
      ml-agents/mlagents/envs/communicator_objects/demonstration_meta_proto_pb2.py
  48. 19
      ml-agents/mlagents/envs/utilities.py
  49. 53
      ml-agents/mlagents/trainers/bc/offline_trainer.py
  50. 116
      ml-agents/mlagents/trainers/bc/online_trainer.py
  51. 151
      ml-agents/mlagents/trainers/demo_loader.py
  52. 60
      ml-agents/tests/trainers/test.demo
  53. 14
      ml-agents/tests/trainers/test_demo_loader.py
  54. 12
      protobuf-definitions/proto/mlagents/envs/communicator_objects/demonstration_meta_proto.proto
  55. 68
      UnitySDK/Assets/ML-Agents/Resources/DemoIcon.png
  56. 86
      UnitySDK/Assets/ML-Agents/Resources/DemoIcon.png.meta

4
.gitignore


/UnitySDK/Assets/AssetStoreTools*
/UnitySDK/Assets/Plugins*
/UnitySDK/Assets/Gizmos*
/UnitySDK/Assets/Demonstrations*
# Tensorflow Model Info
/models

/UnitySDK/Assets/ML-Agents/Plugins/Android*
/UnitySDK/Assets/ML-Agents/Plugins/iOS*
/UnitySDK/Assets/ML-Agents/Plugins/Computer*
/UnitySDK/Assets/ML-Agents/Plugins/System*
/UnitySDK/Assets/ML-Agents/Plugins/System.Numerics*
/UnitySDK/Assets/ML-Agents/Plugins/System.ValueTuple*
# Generated doc folders
/docs/html

9
UnitySDK/Assets/ML-Agents/Scripts/Academy.cs


}
}
brainBatcher = new MLAgents.Batcher(communicator);
brainBatcher = new Batcher(communicator);
// Initialize Brains and communicator (if present)
foreach (var brain in brains)

isCommunicatorOn = true;
var academyParameters =
new MLAgents.CommunicatorObjects.UnityRLInitializationOutput();
new CommunicatorObjects.UnityRLInitializationOutput();
academyParameters.Name = gameObject.name;
academyParameters.Version = kApiVersion;
foreach (var brain in brains)

MLAgents.Batcher.BrainParametersConvertor(
bp,
bp.ToProto(
(MLAgents.CommunicatorObjects.BrainTypeProto)
(CommunicatorObjects.BrainTypeProto)
brain.brainType));
}

77
UnitySDK/Assets/ML-Agents/Scripts/Agent.cs


using System.Collections.Generic;
using System.Linq;
using Google.Protobuf;
using MLAgents.CommunicatorObjects;
using UnityEngine;

/// to separate between different agents in the environment.
/// </summary>
public int id;
/// <summary>
/// Converts a AgentInfo to a protobuffer generated AgentInfoProto
/// </summary>
/// <returns>The protobuf verison of the AgentInfo.</returns>
/// <param name="info">The AgentInfo to convert.</param>
public CommunicatorObjects.AgentInfoProto ToProto()
{
var agentInfoProto = new CommunicatorObjects.AgentInfoProto
{
StackedVectorObservation = {stackedVectorObservation},
StoredVectorActions = {storedVectorActions},
StoredTextActions = storedTextActions,
TextObservation = textObservation,
Reward = reward,
MaxStepReached = maxStepReached,
Done = done,
Id = id,
};
if (memories != null)
{
agentInfoProto.Memories.Add(memories);
}
if (actionMasks != null)
{
agentInfoProto.ActionMask.AddRange(actionMasks);
}
foreach (Texture2D obs in visualObservations)
{
agentInfoProto.VisualObservations.Add(
ByteString.CopyFrom(obs.EncodeToPNG())
);
}
return agentInfoProto;
}
}
/// <summary>

/// Array of Texture2D used to render to from render buffer before
/// transforming into float tensor.
Texture2D[] textureArray;
/// <summary>
/// Demonstration recorder.
/// </summary>
private DemonstrationRecorder recorder;
/// Monobehavior function that is called when the attached GameObject
/// becomes enabled or active.
void OnEnable()

{
textureArray[i] = new Texture2D(1, 1, TextureFormat.RGB24, false);
}
recorder = GetComponent<DemonstrationRecorder>();
}
/// Helper method for the <see cref="OnEnable"/> event, created to

/// </remarks>
public virtual void InitializeAgent()
{
}
/// <summary>

info.id = id;
brain.SendState(this, info);
if (recorder != null && recorder.record && Application.isEditor)
{
recorder.WriteExperience(info);
}
info.textObservation = "";
}

/// </remarks>
public virtual void CollectObservations()
{
}
/// <summary>

{
actionMasker.SetActionMask(0, actionIndices);
}
/// <summary>
/// Sets an action mask for discrete control agents. When used, the agent will not be
/// able to perform the action passed as argument at the next decision. If no branch is

/// <param name="actionIndex">The index of the masked action on branch 0</param>
protected void SetActionMask(int actionIndex)
{
actionMasker.SetActionMask(0, new int[1]{actionIndex});
actionMasker.SetActionMask(0, new int[1] {actionIndex});
/// <summary>
/// Sets an action mask for discrete control agents. When used, the agent will not be
/// able to perform the action passed as argument at the next decision. If no branch is

/// <param name="actionIndex">The index of the masked action</param>
protected void SetActionMask(int branch, int actionIndex)
{
actionMasker.SetActionMask(branch, new int[1]{actionIndex});
actionMasker.SetActionMask(branch, new int[1] {actionIndex});
}
/// <summary>

{
actionMasker.SetActionMask(branch, actionIndices);
}
/// <summary>
/// Adds a float observation to the vector observations of the agent.

info.vectorObservation.Add(observation.z);
info.vectorObservation.Add(observation.w);
}
/// <summary>
/// Adds a boolean observation to the vector observation of the agent.
/// Increases the size of the agent's vector observation by 1.

/// <param name="textAction">Text action.</param>
public virtual void AgentAction(float[] vectorAction, string textAction)
{
}
/// <summary>

/// </summary>
public virtual void AgentOnDone()
{
}
/// <summary>

/// </summary>
public virtual void AgentReset()
{
}
/// <summary>

{
action.textActions = textActions;
}
/// <summary>
/// Updates the value of the agent.
/// </summary>

var tempRT =
RenderTexture.GetTemporary(width, height, depth, format, readWrite);
if (width != texture2D.width || height != texture2D.height)
{
texture2D.Resize(width, height);

152
UnitySDK/Assets/ML-Agents/Scripts/Batcher.cs


/// Keeps track of which brains have data to send on the current step
Dictionary<string, bool> m_hasData =
new Dictionary<string, bool>();
new CommunicatorObjects.UnityRLOutput();
new CommunicatorObjects.UnityRLOutput();
/// Keeps track of last training mode sent by External
bool m_isTraining;

try
{
initializationInput = m_communicator.Initialize(
new CommunicatorObjects.UnityOutput
{
RlInitializationOutput = academyParameters
},
out input);
new CommunicatorObjects.UnityOutput
{
RlInitializationOutput = academyParameters
},
out input);
}
catch
{

}
/// <summary>
/// Converts a AgentInfo to a protobuffer generated AgentInfoProto
/// </summary>
/// <returns>The protobuf verison of the AgentInfo.</returns>
/// <param name="info">The AgentInfo to convert.</param>
public static CommunicatorObjects.AgentInfoProto
AgentInfoConvertor(AgentInfo info)
{
var agentInfoProto = new CommunicatorObjects.AgentInfoProto
{
StackedVectorObservation = { info.stackedVectorObservation },
StoredVectorActions = { info.storedVectorActions },
StoredTextActions = info.storedTextActions,
TextObservation = info.textObservation,
Reward = info.reward,
MaxStepReached = info.maxStepReached,
Done = info.done,
Id = info.id,
};
if (info.memories != null)
{
agentInfoProto.Memories.Add(info.memories);
}
if (info.actionMasks != null)
{
agentInfoProto.ActionMask.AddRange(info.actionMasks);
}
foreach (Texture2D obs in info.visualObservations)
{
agentInfoProto.VisualObservations.Add(
ByteString.CopyFrom(obs.EncodeToPNG())
);
}
return agentInfoProto;
}
/// <summary>
/// Converts a Brain into to a Protobuff BrainInfoProto so it can be sent
/// </summary>
/// <returns>The BrainInfoProto generated.</returns>
/// <param name="brainParameters">The BrainParameters.</param>
/// <param name="name">The name of the brain.</param>
/// <param name="type">The type of brain.</param>
public static CommunicatorObjects.BrainParametersProto BrainParametersConvertor(
BrainParameters brainParameters, string name, CommunicatorObjects.BrainTypeProto type)
{
var brainParametersProto = new CommunicatorObjects.BrainParametersProto
{
VectorObservationSize = brainParameters.vectorObservationSize,
NumStackedVectorObservations = brainParameters.numStackedVectorObservations,
VectorActionSize = {brainParameters.vectorActionSize},
VectorActionSpaceType =
(CommunicatorObjects.SpaceTypeProto)brainParameters.vectorActionSpaceType,
BrainName = name,
BrainType = type
};
brainParametersProto.VectorActionDescriptions.AddRange(
brainParameters.vectorActionDescriptions);
foreach (resolution res in brainParameters.cameraResolutions)
{
brainParametersProto.CameraResolutions.Add(
new CommunicatorObjects.ResolutionProto
{
Width = res.width,
Height = res.height,
GrayScale = res.blackAndWhite
});
}
return brainParametersProto;
}
/// <summary>
/// Sends the brain info. If at least one brain has an agent in need of
/// a decision or if the academy is done, the data is sent via
/// Communicator. Else, a new step is realized. The data can only be

{
m_currentAgents[brainKey].Add(agent);
}
// If at least one agent has data to send, then append data to
// the message and update hasSentState
if (m_currentAgents[brainKey].Count > 0)

CommunicatorObjects.AgentInfoProto agentInfoProto =
AgentInfoConvertor(agentInfo[agent]);
CommunicatorObjects.AgentInfoProto agentInfoProto = agentInfo[agent].ToProto();
m_hasData[brainKey] = true;
}

m_currentUnityRLOutput.GlobalDone = m_academyDone;
SendBatchedMessageHelper();
}
// The message was just sent so we must reset hasSentState and
// triedSendState
foreach (string k in m_currentAgents.Keys)

void SendBatchedMessageHelper()
{
var input = m_communicator.Exchange(
new CommunicatorObjects.UnityOutput{
RlOutput = m_currentUnityRLOutput
});
new CommunicatorObjects.UnityOutput
{
RlOutput = m_currentUnityRLOutput
});
m_messagesReceived += 1;
foreach (string k in m_currentUnityRLOutput.AgentInfos.Keys)

if (input == null)
{
m_command = CommunicatorObjects.CommandProto.Quit;

}
foreach (var brainName in rlInput.AgentActions.Keys)
{
if (!m_currentAgents[brainName].Any())
if (!m_currentAgents[brainName].Any())
{
continue;
}
if (!rlInput.AgentActions[brainName].Value.Any())
{
continue;
}
for (var i = 0; i < m_currentAgents[brainName].Count(); i++)
{
var agent = m_currentAgents[brainName][i];
var action = rlInput.AgentActions[brainName].Value[i];
agent.UpdateVectorAction(
action.VectorActions.ToArray());
agent.UpdateMemoriesAction(
action.Memories.ToList());
agent.UpdateTextAction(
action.TextActions);
agent.UpdateValueAction(
action.Value);
}
continue;
}
if (!rlInput.AgentActions[brainName].Value.Any())
{
continue;
}
for (var i = 0; i < m_currentAgents[brainName].Count; i++)
{
var agent = m_currentAgents[brainName][i];
var action = rlInput.AgentActions[brainName].Value[i];
agent.UpdateVectorAction(action.VectorActions.ToArray());
agent.UpdateMemoriesAction(action.Memories.ToList());
agent.UpdateTextAction(action.TextActions);
agent.UpdateValueAction(action.Value);
}
}
}

47
UnitySDK/Assets/ML-Agents/Scripts/Brain.cs


public SpaceType vectorActionSpaceType = SpaceType.discrete;
/**< \brief Defines if the action is discrete or continuous */
/// <summary>
/// Converts a Brain into to a Protobuff BrainInfoProto so it can be sent
/// </summary>
/// <returns>The BrainInfoProto generated.</returns>
/// <param name="name">The name of the brain.</param>
/// <param name="type">The type of brain.</param>
public CommunicatorObjects.BrainParametersProto
ToProto(string name, CommunicatorObjects.BrainTypeProto type)
{
var brainParametersProto = new CommunicatorObjects.BrainParametersProto
{
VectorObservationSize = vectorObservationSize,
NumStackedVectorObservations = numStackedVectorObservations,
VectorActionSize = {vectorActionSize},
VectorActionSpaceType =
(CommunicatorObjects.SpaceTypeProto)vectorActionSpaceType,
BrainName = name,
BrainType = type
};
brainParametersProto.VectorActionDescriptions.AddRange(vectorActionDescriptions);
foreach (resolution res in cameraResolutions)
{
brainParametersProto.CameraResolutions.Add(
new CommunicatorObjects.ResolutionProto
{
Width = res.width,
Height = res.height,
GrayScale = res.blackAndWhite
});
}
return brainParametersProto;
}
public BrainParameters()
{
}
public BrainParameters(CommunicatorObjects.BrainParametersProto brainParametersProto)
{
vectorObservationSize = brainParametersProto.VectorObservationSize;
numStackedVectorObservations = brainParametersProto.NumStackedVectorObservations;
vectorActionSize = brainParametersProto.VectorActionSize.ToArray();
vectorActionDescriptions = brainParametersProto.VectorActionDescriptions.ToArray();
vectorActionSpaceType = (SpaceType)brainParametersProto.VectorActionSpaceType;
}
}
[HelpURL("https://github.com/Unity-Technologies/ml-agents/blob/master/" +

71
UnitySDK/Assets/ML-Agents/Scripts/CoreBrainInternal.cs


}
observationMatrixList.Add(
BatchVisualObservations(texturesHolder,
Utilities.TextureToFloatArray(texturesHolder,
brain.brainParameters.cameraResolutions[observationIndex].blackAndWhite));
}

#endif
}
/// <summary>
/// Converts a list of Texture2D into a Tensor.
/// </summary>
/// <returns>
/// A 4 dimensional float Tensor of dimension
/// [batch_size, height, width, channel].
/// Where batch_size is the number of input textures,
/// height corresponds to the height of the texture,
/// width corresponds to the width of the texture,
/// channel corresponds to the number of channels extracted from the
/// input textures (based on the input blackAndWhite flag
/// (3 if the flag is false, 1 otherwise).
/// The values of the Tensor are between 0 and 1.
/// </returns>
/// <param name="textures">
/// The list of textures to be put into the tensor.
/// Note that the textures must have same width and height.
/// </param>
/// <param name="blackAndWhite">
/// If set to <c>true</c> the textures
/// will be converted to grayscale before being stored in the tensor.
/// </param>
public static float[,,,] BatchVisualObservations(
List<Texture2D> textures, bool blackAndWhite)
{
int batchSize = textures.Count();
int width = textures[0].width;
int height = textures[0].height;
int pixels = 0;
if (blackAndWhite)
pixels = 1;
else
pixels = 3;
float[,,,] result = new float[batchSize, height, width, pixels];
float[] resultTemp = new float[batchSize * height * width * pixels];
int hwp = height * width * pixels;
int wp = width * pixels;
for (int b = 0; b < batchSize; b++)
{
Color32[] cc = textures[b].GetPixels32();
for (int h = height - 1; h >= 0; h--)
{
for (int w = 0; w < width; w++)
{
Color32 currentPixel = cc[(height - h - 1) * width + w];
if (!blackAndWhite)
{
// For Color32, the r, g and b values are between
// 0 and 255.
resultTemp[b * hwp + h * wp + w * pixels] = currentPixel.r / 255.0f;
resultTemp[b * hwp + h * wp + w * pixels + 1] = currentPixel.g / 255.0f;
resultTemp[b * hwp + h * wp + w * pixels + 2] = currentPixel.b / 255.0f;
}
else
{
resultTemp[b * hwp + h * wp + w * pixels] =
(currentPixel.r + currentPixel.g + currentPixel.b)
/ 3f / 255.0f;
}
}
}
}
System.Buffer.BlockCopy(resultTemp, 0, result, 0, batchSize * hwp * sizeof(float));
return result;
}
}
}

25
config/trainer_config.yaml


max_steps: 5.0e5
summary_freq: 2000
time_horizon: 3
StudentBrain:
trainer: imitation
max_steps: 10000
summary_freq: 1000
brain_to_imitate: TeacherBrain
batch_size: 16
batches_per_epoch: 5
num_layers: 4
hidden_units: 64
sequence_length: 16
buffer_size: 128
StudentRecurrentBrain:
trainer: imitation
max_steps: 10000
summary_freq: 1000
brain_to_imitate: TeacherBrain
batch_size: 16
batches_per_epoch: 5
num_layers: 4
hidden_units: 64
use_recurrent: true
sequence_length: 32
buffer_size: 128

59
docs/Training-Imitation-Learning.md


of training a medic NPC : instead of indirectly training a medic with the help
of a reward function, we can give the medic real world examples of observations
from the game and actions from a game controller to guide the medic's behavior.
More specifically, in this mode, the Brain type during training is set to Player
and all the actions performed with the controller (in addition to the agent
observations) will be recorded and sent to the Python API. The imitation
learning algorithm will then use these pairs of observations and actions from
the human player to learn a policy. [Video Link](https://youtu.be/kpb8ZkMBFYs).
Imitation Learning uses pairs of observations and actions from
from a demonstration to learn a policy. [Video Link](https://youtu.be/kpb8ZkMBFYs).
## Recording Demonstrations
It is possible to record demonstrations of agent behavior from the Unity Editor, and save them as assets. These demonstrations contain information on the observations, actions, and rewards for a given agent during the recording session. They can be managed from the Editor, as well as used for training with Offline Behavioral Cloning (see below).
In order to record demonstrations from an agent, add the `Demonstration Recorder` component to a GameObject in the scene which contains an `Agent` component. Once added, it is possible to name the demonstration that will be recorded from the agent.
## Using Behavioral Cloning
<p align="center">
<img src="images/demo_component.png"
alt="BC Teacher Helper"
width="375" border="10" />
</p>
When `Record` is checked, a demonstration will be created whenever the scene is played from the Editor. Depending on the complexity of the task, anywhere from a few minutes or a few hours of demonstration data may be necessary to be useful for imitation learning. When you have recorded enough data, end the Editor play session, and a `.demo` file will be created in the `Assets/Demonstrations` folder. This file contains the demonstrations. Clicking on the file will provide metadata about the demonstration in the inspector.
<p align="center">
<img src="images/demo_inspector.png"
alt="BC Teacher Helper"
width="375" border="10" />
</p>
## Training with Behavioral Cloning
the simplest one of them is Behavioral Cloning. It works by collecting training
data from a teacher, and then simply uses it to directly learn a policy, in the
the simplest one of them is Behavioral Cloning. It works by collecting demonstrations from a teacher, and then simply uses them to directly learn a policy, in the
1. In order to use imitation learning in a scene, the first thing you will need
is to create two Brains, one which will be the "Teacher," and the other which
### Offline Training
With offline behavioral cloning, we can use demonstrations (`.demo` files) generated using the `Demonstration Recorder` as the dataset used to train a behavior.
1. Choose an agent you would like to learn to imitate some set of demonstrations.
2. Record a set of demonstration using the `Demonstration Recorder` (see above). For illustrative purposes we will refer to this file as `AgentRecording.demo`.
3. Build the scene, assigning the agent a Learning Brain, and set the Brain to Control in the Broadcast Hub. For more information on Brains, see [here](Learning-Environment-Design-Brains.md).
4. Open the `config/bc_config.yaml` file.
5. Modify the `demo_path` parameter in the file to reference the path to the demonstration file recorded in step 2. In our case this is: `./UnitySDK/Assets/Demonstrations/AgentRecording.demo`
6. Launch `mlagent-learn`, and providing `./config/bc_config.yaml` as the config parameter, and your environment as the `--env` parameter.
7. (Optional) Observe training performance using Tensorboard.
This will use the demonstration file to train a nerual network driven agent to directly imitate the actions provided in the demonstration. The environment will launch and be used for evaluating the agent's performance during training.
### Online Training
It is also possible to provide demonstrations in realtime during training, without pre-recording a demonstration file. The steps to do this are as follows:
1. First create two Brains, one which will be the "Teacher," and the other which
will be the "Student." We will assume that the names of the Brain
`GameObject`s are "Teacher" and "Student" respectively.
2. Set the "Teacher" Brain to Player mode, and properly configure the inputs to

Assets folder (or a subdirectory within Assets of your choosing) , and use
with `Internal` Brain.
### BC Teacher Helper
**BC Teacher Helper**
We provide a convenience utility, `BC Teacher Helper` component that you can add
to the Teacher Agent.

2. Reset the training buffer. This enables you to instruct the agents to forget
their buffer of recent experiences. This is useful if you'd like to get them
to quickly learn a new behavior. The default command to reset the buffer is
to press `C` on the keyboard.
to press `C` on the keyboard.

1
ml-agents/mlagents/envs/communicator_objects/__init__.py


from .brain_parameters_proto_pb2 import *
from .brain_type_proto_pb2 import *
from .command_proto_pb2 import *
from .demonstration_meta_proto_pb2 import *
from .engine_configuration_proto_pb2 import *
from .environment_parameters_proto_pb2 import *
from .header_pb2 import *

9
ml-agents/mlagents/envs/communicator_objects/unity_to_external_pb2.py


from google.protobuf import message as _message
from google.protobuf import reflection as _reflection
from google.protobuf import symbol_database as _symbol_database
from google.protobuf import descriptor_pb2
# @@protoc_insertion_point(imports)
_sym_db = _symbol_database.Default()

name='mlagents/envs/communicator_objects/unity_to_external.proto',
package='communicator_objects',
syntax='proto3',
serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
serialized_pb=_b('\n:mlagents/envs/communicator_objects/unity_to_external.proto\x12\x14\x63ommunicator_objects\x1a\x36mlagents/envs/communicator_objects/unity_message.proto2g\n\x0fUnityToExternal\x12T\n\x08\x45xchange\x12\".communicator_objects.UnityMessage\x1a\".communicator_objects.UnityMessage\"\x00\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
,
dependencies=[mlagents_dot_envs_dot_communicator__objects_dot_unity__message__pb2.DESCRIPTOR,])

_sym_db.RegisterFileDescriptor(DESCRIPTOR)
DESCRIPTOR.has_options = True
DESCRIPTOR._options = _descriptor._ParseOptions(descriptor_pb2.FileOptions(), _b('\252\002\034MLAgents.CommunicatorObjects'))
DESCRIPTOR._options = None
_UNITYTOEXTERNAL = _descriptor.ServiceDescriptor(
name='UnityToExternal',

options=None,
serialized_options=None,
serialized_start=140,
serialized_end=243,
methods=[

containing_service=None,
input_type=mlagents_dot_envs_dot_communicator__objects_dot_unity__message__pb2._UNITYMESSAGE,
output_type=mlagents_dot_envs_dot_communicator__objects_dot_unity__message__pb2._UNITYMESSAGE,
options=None,
serialized_options=None,
),
])
_sym_db.RegisterServiceDescriptor(_UNITYTOEXTERNAL)

18
ml-agents/mlagents/envs/environment.py


import subprocess
from .brain import BrainInfo, BrainParameters, AllBrainInfo
from .utilities import process_pixels
from .exception import UnityEnvironmentException, UnityActionException, UnityTimeOutException
from .communicator_objects import UnityRLInput, UnityRLOutput, AgentActionProto,\

arr = [float(x) for x in arr]
return arr
@staticmethod
def _process_pixels(image_bytes, gray_scale):
"""
Converts byte array observation image into numpy array, re-sizes it, and optionally converts it to grey scale
:param image_bytes: input byte array corresponding to image
:return: processed numpy array of observation from environment
"""
s = bytearray(image_bytes)
image = Image.open(io.BytesIO(s))
s = np.array(image) / 255.0
if gray_scale:
s = np.mean(s, axis=2)
s = np.reshape(s, [s.shape[0], s.shape[1], 1])
return s
def _get_state(self, output: UnityRLOutput) -> (AllBrainInfo, bool):
"""
Collects experience information from all external brains in environment at current step.

agent_info_list = output.agentInfos[b].value
vis_obs = []
for i in range(self.brains[b].number_visual_observations):
obs = [self._process_pixels(x.visual_observations[i],
obs = [process_pixels(x.visual_observations[i],
self.brains[b].camera_resolutions[i]['blackAndWhite'])
for x in agent_info_list]
vis_obs += [np.array(obs)]

4
ml-agents/mlagents/trainers/__init__.py


from .models import *
from .trainer_controller import *
from .bc.models import *
from .bc.trainer import *
from .bc.offline_trainer import *
from .bc.online_trainer import *
from .bc.policy import *
from .ppo.models import *
from .ppo.trainer import *

from .demo_loader import *

3
ml-agents/mlagents/trainers/bc/__init__.py


from .models import *
from .trainer import *
from .online_trainer import *
from .offline_trainer import *
from .policy import *

2
ml-agents/mlagents/trainers/bc/policy.py


:param trainer_parameters: Defined training parameters.
:param load: Whether a pre-trained model will be loaded or a new one created.
"""
super().__init__(seed, brain, trainer_parameters)
super(BCPolicy, self).__init__(seed, brain, trainer_parameters)
with self.graph.as_default():
with self.graph.as_default():

106
ml-agents/mlagents/trainers/bc/trainer.py


# # Unity ML-Agents Toolkit
# ## ML-Agent Learning (Imitation)
# ## ML-Agent Learning (Behavioral Cloning)
# Contains an implementation of Behavioral Cloning Algorithm
import logging

from mlagents.trainers.buffer import Buffer
from mlagents.trainers.trainer import UnityTrainerException, Trainer
logger = logging.getLogger("mlagents.envs")
logger = logging.getLogger("mlagents.trainers")
class BehavioralCloningTrainer(Trainer):
"""The ImitationTrainer is an implementation of the imitation learning."""
class BCTrainer(Trainer):
"""The BCTrainer is an implementation of Behavioral Cloning."""
def __init__(self, brain, trainer_parameters, training, load, seed, run_id):
"""

:param seed: The seed the model will be initialized with
:param run_id: The The identifier of the current run
"""
self.param_keys = ['brain_to_imitate', 'batch_size', 'time_horizon',
'summary_freq', 'max_steps',
'batches_per_epoch', 'use_recurrent',
'hidden_units', 'learning_rate', 'num_layers',
'sequence_length', 'memory_size', 'model_path']
for k in self.param_keys:
print(k)
print(k not in trainer_parameters)
if k not in trainer_parameters:
raise UnityTrainerException("The hyperparameter {0} could not be found for the Imitation trainer of "
"brain {1}.".format(k, brain.brain_name))
super(BehavioralCloningTrainer, self).__init__(brain, trainer_parameters, training, run_id)
super(BCTrainer, self).__init__(brain, trainer_parameters, training, run_id)
self.brain_name = brain.brain_name
self.brain_to_imitate = trainer_parameters['brain_to_imitate']
self.batches_per_epoch = trainer_parameters['batches_per_epoch']
self.n_sequences = max(int(trainer_parameters['batch_size'] / self.policy.sequence_length), 1)
self.n_sequences = 1
self.training_buffer = Buffer()
self.batches_per_epoch = trainer_parameters['batches_per_epoch']
self.demonstration_buffer = Buffer()
self.evaluation_buffer = Buffer()
def __str__(self):
return '''Hyperparameters for the Imitation Trainer of brain {0}: \n{1}'''.format(
self.brain_name, '\n'.join(['\t{0}:\t{1}'.format(x, self.trainer_parameters[x]) for x in self.param_keys]))
@property
def parameters(self):

else:
return run_out['action'], None, None, None, None
def add_experiences(self, curr_info: AllBrainInfo, next_info: AllBrainInfo, take_action_outputs):
def add_experiences(self, curr_info: AllBrainInfo, next_info: AllBrainInfo,
take_action_outputs):
"""
Adds experiences to each agent's experience history.
:param curr_info: Current AllBrainInfo (Dictionary of all current brains and corresponding BrainInfo).

# Used to collect teacher experience into training buffer
info_teacher = curr_info[self.brain_to_imitate]
next_info_teacher = next_info[self.brain_to_imitate]
for agent_id in info_teacher.agents:
self.training_buffer[agent_id].last_brain_info = info_teacher
for agent_id in next_info_teacher.agents:
stored_info_teacher = self.training_buffer[agent_id].last_brain_info
if stored_info_teacher is None:
continue
else:
idx = stored_info_teacher.agents.index(agent_id)
next_idx = next_info_teacher.agents.index(agent_id)
if stored_info_teacher.text_observations[idx] != "":
info_teacher_record, info_teacher_reset = \
stored_info_teacher.text_observations[idx].lower().split(",")
next_info_teacher_record, next_info_teacher_reset = next_info_teacher.text_observations[idx].\
lower().split(",")
if next_info_teacher_reset == "true":
self.training_buffer.reset_update_buffer()
else:
info_teacher_record, next_info_teacher_record = "true", "true"
if info_teacher_record == "true" and next_info_teacher_record == "true":
if not stored_info_teacher.local_done[idx]:
for i in range(self.policy.vis_obs_size):
self.training_buffer[agent_id]['visual_obs%d' % i]\
.append(stored_info_teacher.visual_observations[i][idx])
if self.policy.use_vec_obs:
self.training_buffer[agent_id]['vector_obs']\
.append(stored_info_teacher.vector_observations[idx])
if self.policy.use_recurrent:
if stored_info_teacher.memories.shape[1] == 0:
stored_info_teacher.memories = np.zeros((len(stored_info_teacher.agents),
self.policy.m_size))
self.training_buffer[agent_id]['memory'].append(stored_info_teacher.memories[idx])
self.training_buffer[agent_id]['actions'].append(next_info_teacher.
previous_vector_actions[next_idx])
# Used to collect information about student performance.
self.training_buffer[agent_id].last_brain_info = info_student
self.evaluation_buffer[agent_id].last_brain_info = info_student
# Used to collect information about student performance.
stored_info_student = self.training_buffer[agent_id].last_brain_info
stored_info_student = self.evaluation_buffer[agent_id].last_brain_info
if stored_info_student is None:
continue
else:

:param current_info: Current AllBrainInfo
:param next_info: Next AllBrainInfo
"""
info_teacher = next_info[self.brain_to_imitate]
for l in range(len(info_teacher.agents)):
teacher_action_list = len(self.training_buffer[info_teacher.agents[l]]['actions'])
horizon_reached = teacher_action_list > self.trainer_parameters['time_horizon']
teacher_filled = len(self.training_buffer[info_teacher.agents[l]]['actions']) > 0
if ((info_teacher.local_done[l] or horizon_reached) and teacher_filled):
agent_id = info_teacher.agents[l]
self.training_buffer.append_update_buffer(
agent_id, batch_size=None, training_length=self.policy.sequence_length)
self.training_buffer[agent_id].reset_agent()
info_student = next_info[self.brain_name]
for l in range(len(info_student.agents)):
if info_student.local_done[l]:

A signal that the Episode has ended. The buffer must be reset.
Get only called when the academy resets.
"""
self.training_buffer.reset_all()
self.evaluation_buffer.reset_local_buffers()
for agent_id in self.cumulative_rewards:
self.cumulative_rewards[agent_id] = 0
for agent_id in self.episode_steps:

Returns whether or not the trainer has enough elements to run update model
:return: A boolean corresponding to whether or not update_model() can be run
"""
return len(self.training_buffer.update_buffer['actions']) > self.n_sequences
return len(self.demonstration_buffer.update_buffer['actions']) > self.n_sequences
self.training_buffer.update_buffer.shuffle()
self.demonstration_buffer.update_buffer.shuffle()
num_batches = min(len(self.training_buffer.update_buffer['actions']) //
num_batches = min(len(self.demonstration_buffer.update_buffer['actions']) //
buffer = self.training_buffer.update_buffer
update_buffer = self.demonstration_buffer.update_buffer
mini_batch = buffer.make_mini_batch(start, end)
mini_batch = update_buffer.make_mini_batch(start, end)
run_out = self.policy.update(mini_batch, self.n_sequences)
loss = run_out['policy_loss']
batch_losses.append(loss)

2
ml-agents/mlagents/trainers/buffer.py


"""
self.update_buffer.reset_agent()
def reset_all(self):
def reset_local_buffers(self):
"""
Resets all the local local_buffers
"""

16
ml-agents/mlagents/trainers/ppo/trainer.py


from mlagents.trainers.ppo.policy import PPOPolicy
from mlagents.trainers.trainer import UnityTrainerException, Trainer
logger = logging.getLogger("mlagents.envs")
logger = logging.getLogger("mlagents.trainers")
class PPOTrainer(Trainer):

:param seed: The seed the model will be initialized with
:param run_id: The The identifier of the current run
"""
super(PPOTrainer, self).__init__(brain, trainer_parameters, training, run_id)
self.param_keys = ['batch_size', 'beta', 'buffer_size', 'epsilon', 'gamma', 'hidden_units', 'lambd',
'learning_rate', 'max_steps', 'normalize', 'num_epoch', 'num_layers',
'time_horizon', 'sequence_length', 'summary_freq', 'use_recurrent',

for k in self.param_keys:
if k not in trainer_parameters:
raise UnityTrainerException("The hyperparameter {0} could not be found for the PPO trainer of "
"brain {1}.".format(k, brain.brain_name))
super(PPOTrainer, self).__init__(brain.brain_name, trainer_parameters, training, run_id)
self.check_param_keys()
self.policy = PPOPolicy(seed, brain, trainer_parameters,
self.is_training, load)

A signal that the Episode has ended. The buffer must be reset.
Get only called when the academy resets.
"""
self.training_buffer.reset_all()
self.training_buffer.reset_local_buffers()
for agent_id in self.cumulative_rewards:
self.cumulative_rewards[agent_id] = 0
for agent_id in self.episode_steps:

def update_policy(self):
"""
Uses training_buffer to update the policy.
Uses demonstration_buffer to update the policy.
"""
n_sequences = max(int(self.trainer_parameters['batch_size'] / self.policy.sequence_length), 1)
value_total, policy_total, forward_total, inverse_total = [], [], [], []

25
ml-agents/mlagents/trainers/trainer.py


class Trainer(object):
"""This class is the abstract class for the mlagents.trainers"""
"""This class is the base class for the mlagents.trainers"""
def __init__(self, brain_name, trainer_parameters, training, run_id):
def __init__(self, brain, trainer_parameters, training, run_id):
:param trainer_parameters: The parameters for the trainer (dictionary).
:param training: Whether the trainer is set for training.
:param run_id: The identifier of the current run
:BrainParameters brain: Brain to be trained.
:dict trainer_parameters: The parameters for the trainer (dictionary).
:bool training: Whether the trainer is set for training.
:int run_id: The identifier of the current run
self.brain_name = brain_name
self.param_keys = []
self.brain_name = brain.brain_name
self.run_id = run_id
self.trainer_parameters = trainer_parameters
self.is_training = training

def __str__(self):
return '''Empty Trainer'''
return '''{} Trainer'''.format(self.__class__)
def check_param_keys(self):
for k in self.param_keys:
if k not in self.trainer_parameters:
raise UnityTrainerException(
"The hyper-parameter {0} could not be found for the {1} trainer of "
"brain {2}.".format(k, self.__class__, self.brain_name))
@property
def parameters(self):

def update_policy(self):
"""
Uses training_buffer to update model.
Uses demonstration_buffer to update model.
"""
raise UnityTrainerException("The update_model method was not implemented.")

12
ml-agents/mlagents/trainers/trainer_controller.py


from mlagents.envs.exception import UnityEnvironmentException
from mlagents.trainers.ppo.trainer import PPOTrainer
from mlagents.trainers.bc.trainer import BehavioralCloningTrainer
from mlagents.trainers.bc.offline_trainer import OfflineBCTrainer
from mlagents.trainers.bc.online_trainer import OnlineBCTrainer
from mlagents.trainers.meta_curriculum import MetaCurriculum
from mlagents.trainers.exception import MetaCurriculumError

trainer_parameters[k] = trainer_config[_brain_key][k]
trainer_parameters_dict[brain_name] = trainer_parameters.copy()
for brain_name in self.env.external_brain_names:
if trainer_parameters_dict[brain_name]['trainer'] == 'imitation':
self.trainers[brain_name] = BehavioralCloningTrainer(
if trainer_parameters_dict[brain_name]['trainer'] == 'offline_bc':
self.trainers[brain_name] = OfflineBCTrainer(
self.env.brains[brain_name],
trainer_parameters_dict[brain_name], self.train_model,
self.load_model, self.seed, self.run_id)
elif trainer_parameters_dict[brain_name]['trainer'] == 'online_bc':
self.trainers[brain_name] = OnlineBCTrainer(
self.env.brains[brain_name],
trainer_parameters_dict[brain_name], self.train_model,
self.load_model, self.seed, self.run_id)

16
ml-agents/tests/mock_communicator.py


from mlagents.envs.communicator import Communicator
from mlagents.envs.communicator_objects import UnityMessage, UnityOutput, UnityInput,\
ResolutionProto, BrainParametersProto, UnityRLInitializationOutput,\
from mlagents.envs.communicator_objects import UnityMessage, UnityOutput, UnityInput, \
ResolutionProto, BrainParametersProto, UnityRLInitializationOutput, \
def __init__(self, discrete_action=False, visual_inputs=0, stack=True, num_agents=3):
def __init__(self, discrete_action=False, visual_inputs=0, stack=True, num_agents=3,
brain_name="RealFakeBrain", vec_obs_size=3):
"""
Python side of the grpc communication. Python is the client and Unity the server

self.visual_inputs = visual_inputs
self.has_been_closed = False
self.num_agents = num_agents
self.brain_name = brain_name
self.vec_obs_size = vec_obs_size
if stack:
self.num_stacks = 2
else:

height=40,
gray_scale=False) for i in range(self.visual_inputs)]
bp = BrainParametersProto(
vector_observation_size=3,
vector_observation_size=self.vec_obs_size,
brain_name="RealFakeBrain",
brain_name=self.brain_name,
brain_type=2
)
rl_init = UnityRLInitializationOutput(

UnityRLOutput.ListAgentInfoProto(value=list_agent_info)
global_done = False
try:
global_done = (inputs.rl_input.agent_actions["RealFakeBrain"].value[0].vector_actions[0] == -1)
fake_brain = inputs.rl_input.agent_actions["RealFakeBrain"]
global_done = (fake_brain.value[0].vector_actions[0] == -1)
except:
pass
result = UnityRLOutput(

1
ml-agents/tests/trainers/test_meta_curriculum.py


assert curriculum_b.lesson_num == 3
@patch('mlagents.trainers.Curriculum')
@patch('mlagents.trainers.Curriculum')
def test_increment_lessons(curriculum_a, curriculum_b, measure_vals):

104
ml-agents/tests/trainers/test_trainer_controller.py


import tensorflow as tf
from mlagents.trainers.trainer_controller import TrainerController
from mlagents.trainers.buffer import Buffer
from mlagents.trainers.bc.trainer import BehavioralCloningTrainer
from mlagents.trainers.curriculum import Curriculum
from mlagents.trainers.exception import CurriculumError
from mlagents.trainers.bc.offline_trainer import OfflineBCTrainer
from mlagents.trainers.bc.online_trainer import OnlineBCTrainer
@pytest.fixture
def dummy_start():
return '''{ "AcademyName": "RealFakeAcademy",
"resetParameters": {},
"brainNames": ["RealFakeBrain"],
"externalBrainNames": ["RealFakeBrain"],
"logPath":"RealFakePath",
"apiNumber":"API-5",
"brainParameters": [{
"vectorObservationSize": 3,
"numStackedVectorObservations" : 2,
"vectorActionSize": 2,
"memorySize": 0,
"cameraResolutions": [],
"vectorActionDescriptions": ["",""],
"vectorActionSpaceType": 1
}]
}'''.encode()
@pytest.fixture

@pytest.fixture
def dummy_bc_config():
def dummy_online_bc_config():
trainer: imitation
trainer: online_bc
brain_to_imitate: ExpertBrain
batches_per_epoch: 16
batch_size: 32

curiosity_enc_size: 1
''')
@pytest.fixture
def dummy_offline_bc_config():
return yaml.load(
'''
default:
trainer: offline_bc
demo_path: ./tests/trainers/test.demo
batches_per_epoch: 16
batch_size: 32
beta: 5.0e-3
buffer_size: 512
epsilon: 0.2
gamma: 0.99
hidden_units: 128
lambd: 0.95
learning_rate: 3.0e-4
max_steps: 5.0e4
normalize: true
num_epoch: 5
num_layers: 2
time_horizon: 64
sequence_length: 64
summary_freq: 1000
use_recurrent: false
memory_size: 8
use_curiosity: false
curiosity_strength: 0.0
curiosity_enc_size: 1
''')
@pytest.fixture
def dummy_bad_config():
return yaml.load(

discrete_action=True, visual_inputs=1)
tc = TrainerController(' ', ' ', 1, None, True, True, False, 1,
1, 1, 1, '', "tests/test_mlagents.trainers.py", False)
assert(tc.env.brain_names[0] == 'RealFakeBrain')
assert (tc.env.brain_names[0] == 'RealFakeBrain')
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')

discrete_action=True, visual_inputs=1)
mock_load.return_value = dummy_config
tc = TrainerController(' ', ' ', 1, None, True, True, False, 1,
1, 1, 1, '','', False)
1, 1, 1, '', '', False)
assert(len(config) == 1)
assert(config['default']['trainer'] == "ppo")
assert (len(config) == 1)
assert (config['default']['trainer'] == "ppo")
dummy_bc_config, dummy_bad_config):
dummy_offline_bc_config, dummy_online_bc_config, dummy_bad_config):
open_name = 'mlagents.trainers.trainer_controller' + '.open'
with mock.patch('yaml.load') as mock_load:
with mock.patch(open_name, create=True) as _:

config = tc._load_config()
tf.reset_default_graph()
tc._initialize_trainers(config)
assert(len(tc.trainers) == 1)
assert(isinstance(tc.trainers['RealFakeBrain'], PPOTrainer))
assert (len(tc.trainers) == 1)
assert (isinstance(tc.trainers['RealFakeBrain'], PPOTrainer))
# Test for Behavior Cloning Trainer
mock_load.return_value = dummy_bc_config
# Test for Online Behavior Cloning Trainer
mock_load.return_value = dummy_online_bc_config
assert(isinstance(tc.trainers['RealFakeBrain'], BehavioralCloningTrainer))
assert (isinstance(tc.trainers['RealFakeBrain'], OnlineBCTrainer))
# Test for proper exception when trainer name is incorrect
mock_load.return_value = dummy_bad_config

tc._initialize_trainers(config)
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
def test_initialize_offline_trainers(mock_communicator, mock_launcher, dummy_config,
dummy_offline_bc_config, dummy_online_bc_config, dummy_bad_config):
open_name = 'mlagents.trainers.trainer_controller' + '.open'
with mock.patch('yaml.load') as mock_load:
with mock.patch(open_name, create=True) as _:
mock_communicator.return_value = MockCommunicator(
discrete_action=False, stack=False, visual_inputs=0,
brain_name="Ball3DBrain", vec_obs_size=8)
tc = TrainerController(' ', ' ', 1, None, True, False, False, 1, 1,
1, 1, '', "tests/test_mlagents.trainers.py",
False)
# Test for Offline Behavior Cloning Trainer
mock_load.return_value = dummy_offline_bc_config
config = tc._load_config()
tf.reset_default_graph()
tc._initialize_trainers(config)
assert (isinstance(tc.trainers['Ball3DBrain'], OfflineBCTrainer))

95
UnitySDK/Assets/ML-Agents/Editor/DemonstrationDrawer.cs


using System.Text;
using MLAgents;
using UnityEditor;
/// <summary>
/// Renders a custom UI for Demonstration Scriptable Object.
/// </summary>
[CustomEditor(typeof(Demonstration))]
[CanEditMultipleObjects]
public class DemonstrationEditor : Editor
{
SerializedProperty brainParameters;
SerializedProperty demoMetaData;
void OnEnable()
{
brainParameters = serializedObject.FindProperty("brainParameters");
demoMetaData = serializedObject.FindProperty("metaData");
}
/// <summary>
/// Renders Inspector UI for Demonstration metadata.
/// </summary>
void MakeMetaDataProperty(SerializedProperty property)
{
var nameProp = property.FindPropertyRelative("demonstrationName");
var expProp = property.FindPropertyRelative("numberExperiences");
var epiProp = property.FindPropertyRelative("numberEpisodes");
var rewProp = property.FindPropertyRelative("meanReward");
var nameLabel = nameProp.displayName + ": " + nameProp.stringValue;
var expLabel = expProp.displayName + ": " + expProp.intValue;
var epiLabel = epiProp.displayName + ": " + epiProp.intValue;
var rewLabel = rewProp.displayName + ": " + rewProp.floatValue;
EditorGUILayout.LabelField(nameLabel);
EditorGUILayout.LabelField(expLabel);
EditorGUILayout.LabelField(epiLabel);
EditorGUILayout.LabelField(rewLabel);
}
/// <summary>
/// Constructs label for action size array.
/// </summary>
static string BuildActionArrayLabel(SerializedProperty actionSizeProperty)
{
var actionSize = actionSizeProperty.arraySize;
StringBuilder actionLabel = new StringBuilder("[ ");
for (int i = 0; i < actionSize; i++)
{
actionLabel.Append(actionSizeProperty.GetArrayElementAtIndex(i).intValue);
if (i < actionSize - 1)
{
actionLabel.Append(", ");
}
}
actionLabel.Append(" ]");
return actionLabel.ToString();
}
/// <summary>
/// Renders Inspector UI for Brain Parameters of Demonstration.
/// </summary>
void MakeBrainParametersProperty(SerializedProperty property)
{
var vecObsSizeProp = property.FindPropertyRelative("vectorObservationSize");
var numStackedProp = property.FindPropertyRelative("numStackedVectorObservations");
var actSizeProperty = property.FindPropertyRelative("vectorActionSize");
var camResProp = property.FindPropertyRelative("cameraResolutions");
var actSpaceTypeProp = property.FindPropertyRelative("vectorActionSpaceType");
var vecObsSizeLabel = vecObsSizeProp.displayName + ": " + vecObsSizeProp.intValue;
var numStackedLabel = numStackedProp.displayName + ": " + numStackedProp.intValue;
var vecActSizeLabel = actSizeProperty.displayName + ": " + BuildActionArrayLabel(actSizeProperty);
var camResLabel = camResProp.displayName + ": " + camResProp.arraySize;
var actSpaceTypeLabel = actSpaceTypeProp.displayName + ": " + (SpaceType) actSpaceTypeProp.enumValueIndex;
EditorGUILayout.LabelField(vecObsSizeLabel);
EditorGUILayout.LabelField(numStackedLabel);
EditorGUILayout.LabelField(vecActSizeLabel);
EditorGUILayout.LabelField(camResLabel);
EditorGUILayout.LabelField(actSpaceTypeLabel);
}
public override void OnInspectorGUI()
{
serializedObject.Update();
EditorGUILayout.LabelField("Meta Data", EditorStyles.boldLabel);
MakeMetaDataProperty(demoMetaData);
EditorGUILayout.LabelField("Brain Parameters", EditorStyles.boldLabel);
MakeBrainParametersProperty(brainParameters);
serializedObject.ApplyModifiedProperties();
}
}

11
UnitySDK/Assets/ML-Agents/Editor/DemonstrationDrawer.cs.meta


fileFormatVersion: 2
guid: 84f9cd83f56c74790a51444a6cfe4945
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

60
UnitySDK/Assets/ML-Agents/Editor/DemonstrationImporter.cs


using System;
using System.IO;
using MLAgents.CommunicatorObjects;
using UnityEditor;
using UnityEngine;
using UnityEditor.Experimental.AssetImporters;
namespace MLAgents
{
/// <summary>
/// Asset Importer used to parse demonstration files.
/// </summary>
[ScriptedImporter(1, new[] {"demo"})]
public class DemonstrationImporter : ScriptedImporter
{
private const string IconPath = "Assets/ML-Agents/Resources/DemoIcon.png";
public override void OnImportAsset(AssetImportContext ctx)
{
var inputType = Path.GetExtension(ctx.assetPath);
if (inputType == null)
{
throw new Exception("Demonstration import error.");
}
try
{
// Read first two proto objects containing metadata and brain parameters.
Stream reader = File.OpenRead(ctx.assetPath);
var metaDataProto = DemonstrationMetaProto.Parser.ParseDelimitedFrom(reader);
var metaData = new DemonstrationMetaData(metaDataProto);
reader.Seek(DemonstrationStore.MetaDataBytes + 1, 0);
var brainParamsProto = BrainParametersProto.Parser.ParseDelimitedFrom(reader);
var brainParameters = new BrainParameters(brainParamsProto);
reader.Close();
var demonstration = ScriptableObject.CreateInstance<Demonstration>();
demonstration.Initialize(brainParameters, metaData);
userData = demonstration.ToString();
Texture2D texture = (Texture2D)
AssetDatabase.LoadAssetAtPath(IconPath, typeof(Texture2D));
#if UNITY_2017_3_OR_NEWER
ctx.AddObjectToAsset(ctx.assetPath, demonstration, texture);
ctx.SetMainObject(demonstration);
#else
ctx.SetMainAsset(ctx.assetPath, model);
#endif
}
catch
{
return;
}
}
}
}

11
UnitySDK/Assets/ML-Agents/Editor/DemonstrationImporter.cs.meta


fileFormatVersion: 2
guid: 7bd65ce151aaa4a41a45312543c56be1
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

66
UnitySDK/Assets/ML-Agents/Editor/Tests/DemonstrationTests.cs


using System.Collections.Generic;
using NUnit.Framework;
using UnityEngine;
using System.IO.Abstractions.TestingHelpers;
namespace MLAgents.Tests
{
public class DemonstrationTests : MonoBehaviour
{
private const string DemoDirecory = "Assets/Demonstrations/";
private const string ExtensionType = ".demo";
private const string DemoName = "Test";
[Test]
public void TestSanitization()
{
const string dirtyString = "abc123&!@";
const string knownCleanString = "abc123";
var cleanString = DemonstrationRecorder.SanitizeName(dirtyString);
Assert.AreNotEqual(dirtyString, cleanString);
Assert.AreEqual(cleanString, knownCleanString);
}
[Test]
public void TestStoreInitalize()
{
var fileSystem = new MockFileSystem();
var demoStore = new DemonstrationStore(fileSystem);
Assert.IsFalse(fileSystem.Directory.Exists(DemoDirecory));
var brainParameters = new BrainParameters
{
vectorObservationSize = 3,
numStackedVectorObservations = 2,
cameraResolutions = new [] {new resolution()},
vectorActionDescriptions = new [] {"TestActionA", "TestActionB"},
vectorActionSize = new [] {2, 2},
vectorActionSpaceType = SpaceType.discrete
};
demoStore.Initialize(DemoName, brainParameters, "TestBrain");
Assert.IsTrue(fileSystem.Directory.Exists(DemoDirecory));
Assert.IsTrue(fileSystem.FileExists(DemoDirecory + DemoName + ExtensionType));
var agentInfo = new AgentInfo
{
reward = 1f,
visualObservations = new List<Texture2D>(),
actionMasks = new []{false, true},
done = true,
id = 5,
maxStepReached = true,
memories = new List<float>(),
stackedVectorObservation = new List<float>() {1f, 1f, 1f},
storedTextActions = "TestAction",
storedVectorActions = new [] {0f, 1f},
textObservation = "TestAction",
};
demoStore.Record(agentInfo);
demoStore.Close();
}
}
}

11
UnitySDK/Assets/ML-Agents/Editor/Tests/DemonstrationTests.cs.meta


fileFormatVersion: 2
guid: 4c5a970f5b6be4b57b3bd7a5f84c3623
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

1001
UnitySDK/Assets/ML-Agents/Plugins/System.IO.Abstractions.TestingHelpers.dll
文件差异内容过多而无法显示
查看文件

30
UnitySDK/Assets/ML-Agents/Plugins/System.IO.Abstractions.TestingHelpers.dll.meta


fileFormatVersion: 2
guid: 2d7ba4e1037b64de5b860bcbe15755b3
PluginImporter:
externalObjects: {}
serializedVersion: 2
iconMap: {}
executionOrder: {}
isPreloaded: 0
isOverridable: 0
platformData:
- first:
Any:
second:
enabled: 1
settings: {}
- first:
Editor: Editor
second:
enabled: 0
settings:
DefaultValueInitialized: true
- first:
Windows Store Apps: WindowsStoreApps
second:
enabled: 0
settings:
CPU: AnyCPU
userData:
assetBundleName:
assetBundleVariant:

623
UnitySDK/Assets/ML-Agents/Plugins/System.IO.Abstractions.dll
文件差异内容过多而无法显示
查看文件

30
UnitySDK/Assets/ML-Agents/Plugins/System.IO.Abstractions.dll.meta


fileFormatVersion: 2
guid: b01205587773841ad95e8ceda347e8bd
PluginImporter:
externalObjects: {}
serializedVersion: 2
iconMap: {}
executionOrder: {}
isPreloaded: 0
isOverridable: 0
platformData:
- first:
Any:
second:
enabled: 1
settings: {}
- first:
Editor: Editor
second:
enabled: 0
settings:
DefaultValueInitialized: true
- first:
Windows Store Apps: WindowsStoreApps
second:
enabled: 0
settings:
CPU: AnyCPU
userData:
assetBundleName:
assetBundleVariant:

8
UnitySDK/Assets/ML-Agents/Resources.meta


fileFormatVersion: 2
guid: 1b3ab22264a5447df9e52684598ac3b0
folderAsset: yes
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

289
UnitySDK/Assets/ML-Agents/Scripts/CommunicatorObjects/DemonstrationMetaProto.cs


// <auto-generated>
// Generated by the protocol buffer compiler. DO NOT EDIT!
// source: mlagents/envs/communicator_objects/demonstration_meta_proto.proto
// </auto-generated>
#pragma warning disable 1591, 0612, 3021
#region Designer generated code
using pb = global::Google.Protobuf;
using pbc = global::Google.Protobuf.Collections;
using pbr = global::Google.Protobuf.Reflection;
using scg = global::System.Collections.Generic;
namespace MLAgents.CommunicatorObjects {
/// <summary>Holder for reflection information generated from mlagents/envs/communicator_objects/demonstration_meta_proto.proto</summary>
public static partial class DemonstrationMetaProtoReflection {
#region Descriptor
/// <summary>File descriptor for mlagents/envs/communicator_objects/demonstration_meta_proto.proto</summary>
public static pbr::FileDescriptor Descriptor {
get { return descriptor; }
}
private static pbr::FileDescriptor descriptor;
static DemonstrationMetaProtoReflection() {
byte[] descriptorData = global::System.Convert.FromBase64String(
string.Concat(
"CkFtbGFnZW50cy9lbnZzL2NvbW11bmljYXRvcl9vYmplY3RzL2RlbW9uc3Ry",
"YXRpb25fbWV0YV9wcm90by5wcm90bxIUY29tbXVuaWNhdG9yX29iamVjdHMi",
"jQEKFkRlbW9uc3RyYXRpb25NZXRhUHJvdG8SEwoLYXBpX3ZlcnNpb24YASAB",
"KAUSGgoSZGVtb25zdHJhdGlvbl9uYW1lGAIgASgJEhQKDG51bWJlcl9zdGVw",
"cxgDIAEoBRIXCg9udW1iZXJfZXBpc29kZXMYBCABKAUSEwoLbWVhbl9yZXdh",
"cmQYBSABKAJCH6oCHE1MQWdlbnRzLkNvbW11bmljYXRvck9iamVjdHNiBnBy",
"b3RvMw=="));
descriptor = pbr::FileDescriptor.FromGeneratedCode(descriptorData,
new pbr::FileDescriptor[] { },
new pbr::GeneratedClrTypeInfo(null, new pbr::GeneratedClrTypeInfo[] {
new pbr::GeneratedClrTypeInfo(typeof(global::MLAgents.CommunicatorObjects.DemonstrationMetaProto), global::MLAgents.CommunicatorObjects.DemonstrationMetaProto.Parser, new[]{ "ApiVersion", "DemonstrationName", "NumberSteps", "NumberEpisodes", "MeanReward" }, null, null, null)
}));
}
#endregion
}
#region Messages
public sealed partial class DemonstrationMetaProto : pb::IMessage<DemonstrationMetaProto> {
private static readonly pb::MessageParser<DemonstrationMetaProto> _parser = new pb::MessageParser<DemonstrationMetaProto>(() => new DemonstrationMetaProto());
private pb::UnknownFieldSet _unknownFields;
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public static pb::MessageParser<DemonstrationMetaProto> Parser { get { return _parser; } }
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public static pbr::MessageDescriptor Descriptor {
get { return global::MLAgents.CommunicatorObjects.DemonstrationMetaProtoReflection.Descriptor.MessageTypes[0]; }
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
pbr::MessageDescriptor pb::IMessage.Descriptor {
get { return Descriptor; }
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public DemonstrationMetaProto() {
OnConstruction();
}
partial void OnConstruction();
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public DemonstrationMetaProto(DemonstrationMetaProto other) : this() {
apiVersion_ = other.apiVersion_;
demonstrationName_ = other.demonstrationName_;
numberSteps_ = other.numberSteps_;
numberEpisodes_ = other.numberEpisodes_;
meanReward_ = other.meanReward_;
_unknownFields = pb::UnknownFieldSet.Clone(other._unknownFields);
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public DemonstrationMetaProto Clone() {
return new DemonstrationMetaProto(this);
}
/// <summary>Field number for the "api_version" field.</summary>
public const int ApiVersionFieldNumber = 1;
private int apiVersion_;
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public int ApiVersion {
get { return apiVersion_; }
set {
apiVersion_ = value;
}
}
/// <summary>Field number for the "demonstration_name" field.</summary>
public const int DemonstrationNameFieldNumber = 2;
private string demonstrationName_ = "";
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public string DemonstrationName {
get { return demonstrationName_; }
set {
demonstrationName_ = pb::ProtoPreconditions.CheckNotNull(value, "value");
}
}
/// <summary>Field number for the "number_steps" field.</summary>
public const int NumberStepsFieldNumber = 3;
private int numberSteps_;
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public int NumberSteps {
get { return numberSteps_; }
set {
numberSteps_ = value;
}
}
/// <summary>Field number for the "number_episodes" field.</summary>
public const int NumberEpisodesFieldNumber = 4;
private int numberEpisodes_;
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public int NumberEpisodes {
get { return numberEpisodes_; }
set {
numberEpisodes_ = value;
}
}
/// <summary>Field number for the "mean_reward" field.</summary>
public const int MeanRewardFieldNumber = 5;
private float meanReward_;
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public float MeanReward {
get { return meanReward_; }
set {
meanReward_ = value;
}
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public override bool Equals(object other) {
return Equals(other as DemonstrationMetaProto);
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public bool Equals(DemonstrationMetaProto other) {
if (ReferenceEquals(other, null)) {
return false;
}
if (ReferenceEquals(other, this)) {
return true;
}
if (ApiVersion != other.ApiVersion) return false;
if (DemonstrationName != other.DemonstrationName) return false;
if (NumberSteps != other.NumberSteps) return false;
if (NumberEpisodes != other.NumberEpisodes) return false;
if (!pbc::ProtobufEqualityComparers.BitwiseSingleEqualityComparer.Equals(MeanReward, other.MeanReward)) return false;
return Equals(_unknownFields, other._unknownFields);
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public override int GetHashCode() {
int hash = 1;
if (ApiVersion != 0) hash ^= ApiVersion.GetHashCode();
if (DemonstrationName.Length != 0) hash ^= DemonstrationName.GetHashCode();
if (NumberSteps != 0) hash ^= NumberSteps.GetHashCode();
if (NumberEpisodes != 0) hash ^= NumberEpisodes.GetHashCode();
if (MeanReward != 0F) hash ^= pbc::ProtobufEqualityComparers.BitwiseSingleEqualityComparer.GetHashCode(MeanReward);
if (_unknownFields != null) {
hash ^= _unknownFields.GetHashCode();
}
return hash;
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public override string ToString() {
return pb::JsonFormatter.ToDiagnosticString(this);
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public void WriteTo(pb::CodedOutputStream output) {
if (ApiVersion != 0) {
output.WriteRawTag(8);
output.WriteInt32(ApiVersion);
}
if (DemonstrationName.Length != 0) {
output.WriteRawTag(18);
output.WriteString(DemonstrationName);
}
if (NumberSteps != 0) {
output.WriteRawTag(24);
output.WriteInt32(NumberSteps);
}
if (NumberEpisodes != 0) {
output.WriteRawTag(32);
output.WriteInt32(NumberEpisodes);
}
if (MeanReward != 0F) {
output.WriteRawTag(45);
output.WriteFloat(MeanReward);
}
if (_unknownFields != null) {
_unknownFields.WriteTo(output);
}
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public int CalculateSize() {
int size = 0;
if (ApiVersion != 0) {
size += 1 + pb::CodedOutputStream.ComputeInt32Size(ApiVersion);
}
if (DemonstrationName.Length != 0) {
size += 1 + pb::CodedOutputStream.ComputeStringSize(DemonstrationName);
}
if (NumberSteps != 0) {
size += 1 + pb::CodedOutputStream.ComputeInt32Size(NumberSteps);
}
if (NumberEpisodes != 0) {
size += 1 + pb::CodedOutputStream.ComputeInt32Size(NumberEpisodes);
}
if (MeanReward != 0F) {
size += 1 + 4;
}
if (_unknownFields != null) {
size += _unknownFields.CalculateSize();
}
return size;
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public void MergeFrom(DemonstrationMetaProto other) {
if (other == null) {
return;
}
if (other.ApiVersion != 0) {
ApiVersion = other.ApiVersion;
}
if (other.DemonstrationName.Length != 0) {
DemonstrationName = other.DemonstrationName;
}
if (other.NumberSteps != 0) {
NumberSteps = other.NumberSteps;
}
if (other.NumberEpisodes != 0) {
NumberEpisodes = other.NumberEpisodes;
}
if (other.MeanReward != 0F) {
MeanReward = other.MeanReward;
}
_unknownFields = pb::UnknownFieldSet.MergeFrom(_unknownFields, other._unknownFields);
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public void MergeFrom(pb::CodedInputStream input) {
uint tag;
while ((tag = input.ReadTag()) != 0) {
switch(tag) {
default:
_unknownFields = pb::UnknownFieldSet.MergeFieldFrom(_unknownFields, input);
break;
case 8: {
ApiVersion = input.ReadInt32();
break;
}
case 18: {
DemonstrationName = input.ReadString();
break;
}
case 24: {
NumberSteps = input.ReadInt32();
break;
}
case 32: {
NumberEpisodes = input.ReadInt32();
break;
}
case 45: {
MeanReward = input.ReadFloat();
break;
}
}
}
}
}
#endregion
}
#endregion Designer generated code

11
UnitySDK/Assets/ML-Agents/Scripts/CommunicatorObjects/DemonstrationMetaProto.cs.meta


fileFormatVersion: 2
guid: f7abfeda342414e059423ef90ede4c30
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

76
UnitySDK/Assets/ML-Agents/Scripts/Demonstration.cs


using System;
using MLAgents.CommunicatorObjects;
using UnityEngine;
namespace MLAgents
{
/// <summary>
/// Demonstration Object. Contains meta-data regarding demonstration.
/// Used for imitation learning, or other forms of learning from data.
/// </summary>
[Serializable]
public class Demonstration : ScriptableObject
{
public DemonstrationMetaData metaData;
public BrainParameters brainParameters;
public void Initialize(BrainParameters brainParameters,
DemonstrationMetaData demonstrationMetaData)
{
this.brainParameters = brainParameters;
metaData = demonstrationMetaData;
}
}
/// <summary>
/// Demonstration meta-data.
/// Kept in a struct for easy serialization and deserialization.
/// </summary>
[Serializable]
public class DemonstrationMetaData
{
public int numberExperiences;
public int numberEpisodes;
public float meanReward;
public string demonstrationName;
public const int ApiVersion = 1;
/// <summary>
/// Constructor for initializing metadata to default values.
/// </summary>
public DemonstrationMetaData()
{
}
/// <summary>
/// Initialize metadata values based on proto object.
/// </summary>
public DemonstrationMetaData(DemonstrationMetaProto demoProto)
{
numberEpisodes = demoProto.NumberEpisodes;
numberExperiences = demoProto.NumberSteps;
meanReward = demoProto.MeanReward;
demonstrationName = demoProto.DemonstrationName;
if (demoProto.ApiVersion != ApiVersion)
{
throw new Exception("API versions of demonstration are incompatible.");
}
}
/// <summary>
/// Convert metadata object to proto object.
/// </summary>
public DemonstrationMetaProto ToProto()
{
var demoProto = new DemonstrationMetaProto
{
ApiVersion = ApiVersion,
MeanReward = meanReward,
NumberSteps = numberExperiences,
NumberEpisodes = numberEpisodes,
DemonstrationName = demonstrationName
};
return demoProto;
}
}
}

11
UnitySDK/Assets/ML-Agents/Scripts/Demonstration.cs.meta


fileFormatVersion: 2
guid: b651f66c75a1646c6ab48de06d0e13ef
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

65
UnitySDK/Assets/ML-Agents/Scripts/DemonstrationRecorder.cs


using UnityEngine;
using System.Text.RegularExpressions;
namespace MLAgents
{
/// <summary>
/// Demonstration Recorder Component.
/// </summary>
[RequireComponent(typeof(Agent))]
public class DemonstrationRecorder : MonoBehaviour
{
public bool record;
public string demonstrationName;
private Agent recordingAgent;
private string filePath;
private DemonstrationStore demoStore;
/// <summary>
/// Initializes Demonstration store.
/// </summary>
private void Start()
{
if (Application.isEditor && record)
{
recordingAgent = GetComponent<Agent>();
demoStore = new DemonstrationStore();
demonstrationName = SanitizeName(demonstrationName);
demoStore.Initialize(
demonstrationName,
recordingAgent.brain.brainParameters,
recordingAgent.brain.name);
Monitor.Log("Recording Demonstration of Agent: ", recordingAgent.name);
}
}
/// <summary>
/// Removes all characters except alphanumerics from demonstration name.
/// </summary>
public static string SanitizeName(string demoName)
{
var rgx = new Regex("[^a-zA-Z0-9 -]");
demoName = rgx.Replace(demoName, "");
return demoName;
}
/// <summary>
/// Forwards AgentInfo to Demonstration Store.
/// </summary>
public void WriteExperience(AgentInfo info)
{
demoStore.Record(info);
}
/// <summary>
/// Closes Demonstration store.
/// </summary>
private void OnApplicationQuit()
{
if (Application.isEditor && record)
{
demoStore.Close();
}
}
}
}

11
UnitySDK/Assets/ML-Agents/Scripts/DemonstrationRecorder.cs.meta


fileFormatVersion: 2
guid: 50f710d360a49461cad67ff5e6bcefe1
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

138
UnitySDK/Assets/ML-Agents/Scripts/DemonstrationStore.cs


using System.IO;
using System.IO.Abstractions;
using Google.Protobuf;
using MLAgents.CommunicatorObjects;
namespace MLAgents
{
/// <summary>
/// Responsible for writing demonstration data to file.
/// </summary>
public class DemonstrationStore
{
public const int MetaDataBytes = 32; // Number of bytes allocated to metadata in demo file.
private readonly IFileSystem fileSystem;
private const string DemoDirecory = "Assets/Demonstrations/";
private const string ExtensionType = ".demo";
private string filePath;
private DemonstrationMetaData metaData;
private Stream writer;
private float cumulativeReward;
public DemonstrationStore(IFileSystem fileSystem)
{
this.fileSystem = fileSystem;
}
public DemonstrationStore()
{
fileSystem = new FileSystem();
}
/// <summary>
/// Initializes the Demonstration Store, and writes initial data.
/// </summary>
public void Initialize(
string demonstrationName, BrainParameters brainParameters, string brainName)
{
CreateDirectory();
CreateDemonstrationFile(demonstrationName);
WriteBrainParameters(brainName, brainParameters);
}
/// <summary>
/// Checks for the existence of the Demonstrations directory
/// and creates it if it does not exist.
/// </summary>
private void CreateDirectory()
{
if (!fileSystem.Directory.Exists(DemoDirecory))
{
fileSystem.Directory.CreateDirectory(DemoDirecory);
}
}
/// <summary>
/// Creates demonstration file.
/// </summary>
private void CreateDemonstrationFile(string demonstrationName)
{
// Creates demonstration file.
var literalName = demonstrationName;
filePath = DemoDirecory + literalName + ExtensionType;
var uniqueNameCounter = 0;
while (fileSystem.File.Exists(filePath))
{
literalName = demonstrationName + "_" + uniqueNameCounter;
filePath = DemoDirecory + literalName + ExtensionType;
uniqueNameCounter++;
}
writer = fileSystem.File.Create(filePath);
metaData = new DemonstrationMetaData {demonstrationName = demonstrationName};
var metaProto = metaData.ToProto();
metaProto.WriteDelimitedTo(writer);
}
/// <summary>
/// Writes brain parameters to file.
/// </summary>
private void WriteBrainParameters(string brainName, BrainParameters brainParameters)
{
// Writes BrainParameters to file.
writer.Seek(MetaDataBytes + 1, 0);
var brainProto = brainParameters.ToProto(brainName, BrainTypeProto.Player);
brainProto.WriteDelimitedTo(writer);
}
/// <summary>
/// Write AgentInfo experience to file.
/// </summary>
public void Record(AgentInfo info)
{
// Increment meta-data counters.
metaData.numberExperiences++;
cumulativeReward += info.reward;
if (info.done)
{
EndEpisode();
}
// Write AgentInfo to file.
var agentProto = info.ToProto();
agentProto.WriteDelimitedTo(writer);
}
/// <summary>
/// Performs all clean-up necessary
/// </summary>
public void Close()
{
EndEpisode();
metaData.meanReward = cumulativeReward / metaData.numberEpisodes;
WriteMetadata();
writer.Close();
}
/// <summary>
/// Performs necessary episode-completion steps.
/// </summary>
private void EndEpisode()
{
metaData.numberEpisodes += 1;
}
/// <summary>
/// Writes meta-data.
/// </summary>
private void WriteMetadata()
{
var metaProto = metaData.ToProto();
var metaProtoBytes = metaProto.ToByteArray();
writer.Write(metaProtoBytes, 0, metaProtoBytes.Length);
writer.Seek(0, 0);
metaProto.WriteDelimitedTo(writer);
}
}
}

3
UnitySDK/Assets/ML-Agents/Scripts/DemonstrationStore.cs.meta


fileFormatVersion: 2
guid: a79c7ccb2cd042b5b1e710b9588d921b
timeCreated: 1537388072

73
UnitySDK/Assets/ML-Agents/Scripts/Utilities.cs


using UnityEngine;
using System.Collections;
using System.Collections.Generic;
namespace MLAgents
{
public class Utilities
{
/// <summary>
/// Converts a list of Texture2D into a Tensor.
/// </summary>
/// <returns>
/// A 4 dimensional float Tensor of dimension
/// [batch_size, height, width, channel].
/// Where batch_size is the number of input textures,
/// height corresponds to the height of the texture,
/// width corresponds to the width of the texture,
/// channel corresponds to the number of channels extracted from the
/// input textures (based on the input blackAndWhite flag
/// (3 if the flag is false, 1 otherwise).
/// The values of the Tensor are between 0 and 1.
/// </returns>
/// <param name="textures">
/// The list of textures to be put into the tensor.
/// Note that the textures must have same width and height.
/// </param>
/// <param name="blackAndWhite">
/// If set to <c>true</c> the textures
/// will be converted to grayscale before being stored in the tensor.
/// </param>
public static float[,,,] TextureToFloatArray(
List<Texture2D> textures, bool blackAndWhite)
{
int batchSize = textures.Count;
int width = textures[0].width;
int height = textures[0].height;
var pixels = blackAndWhite ? 1 : 3;
float[,,,] result = new float[batchSize, height, width, pixels];
float[] resultTemp = new float[batchSize * height * width * pixels];
int hwp = height * width * pixels;
int wp = width * pixels;
for (int b = 0; b < batchSize; b++)
{
Color32[] cc = textures[b].GetPixels32();
for (int h = height - 1; h >= 0; h--)
{
for (int w = 0; w < width; w++)
{
Color32 currentPixel = cc[(height - h - 1) * width + w];
if (!blackAndWhite)
{
// For Color32, the r, g and b values are between
// 0 and 255.
resultTemp[b * hwp + h * wp + w * pixels] = currentPixel.r / 255.0f;
resultTemp[b * hwp + h * wp + w * pixels + 1] = currentPixel.g / 255.0f;
resultTemp[b * hwp + h * wp + w * pixels + 2] = currentPixel.b / 255.0f;
}
else
{
resultTemp[b * hwp + h * wp + w * pixels] =
(currentPixel.r + currentPixel.g + currentPixel.b)
/ 3f / 255.0f;
}
}
}
}
System.Buffer.BlockCopy(resultTemp, 0, result, 0, batchSize * hwp * sizeof(float));
return result;
}
}
}

3
UnitySDK/Assets/ML-Agents/Scripts/Utilities.cs.meta


fileFormatVersion: 2
guid: 0e664c25f496478c9c26df6688379f7e
timeCreated: 1537468595

55
config/bc_config.yaml


default:
trainer: offline_bc
batch_size: 64
beta: 5.0e-3
hidden_units: 128
learning_rate: 3.0e-4
max_steps: 5.0e4
memory_size: 256
batches_per_epoch: 10
num_epoch: 5
num_layers: 2
summary_freq: 1000
use_recurrent: false
sequence_length: 32
demo_path: ./UnitySDK/Assets/Demonstrations/Crawler_test.demo
HallwayBrain:
trainer: offline_bc
max_steps: 5.0e5
num_epoch: 5
batch_size: 64
batches_per_epoch: 5
num_layers: 2
hidden_units: 128
sequence_length: 16
buffer_size: 512
use_recurrent: true
memory_size: 256
sequence_length: 32
demo_path: ./UnitySDK/Assets/Demonstrations/Hallway.demo
StudentBrain:
trainer: online_bc
max_steps: 10000
summary_freq: 1000
brain_to_imitate: TeacherBrain
batch_size: 16
batches_per_epoch: 5
num_layers: 4
hidden_units: 64
sequence_length: 16
buffer_size: 128
StudentRecurrentBrain:
trainer: online_bc
max_steps: 10000
summary_freq: 1000
brain_to_imitate: TeacherBrain
batch_size: 16
batches_per_epoch: 5
num_layers: 4
hidden_units: 64
use_recurrent: true
sequence_length: 32
buffer_size: 128

102
docs/images/demo_component.png

之前 之后
宽度: 882  |  高度: 150  |  大小: 30 KiB

198
docs/images/demo_inspector.png

之前 之后
宽度: 886  |  高度: 554  |  大小: 64 KiB

98
ml-agents/mlagents/envs/communicator_objects/demonstration_meta_proto_pb2.py


# Generated by the protocol buffer compiler. DO NOT EDIT!
# source: mlagents/envs/communicator_objects/demonstration_meta_proto.proto
import sys
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
from google.protobuf import reflection as _reflection
from google.protobuf import symbol_database as _symbol_database
# @@protoc_insertion_point(imports)
_sym_db = _symbol_database.Default()
DESCRIPTOR = _descriptor.FileDescriptor(
name='mlagents/envs/communicator_objects/demonstration_meta_proto.proto',
package='communicator_objects',
syntax='proto3',
serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
serialized_pb=_b('\nAmlagents/envs/communicator_objects/demonstration_meta_proto.proto\x12\x14\x63ommunicator_objects\"\x8d\x01\n\x16\x44\x65monstrationMetaProto\x12\x13\n\x0b\x61pi_version\x18\x01 \x01(\x05\x12\x1a\n\x12\x64\x65monstration_name\x18\x02 \x01(\t\x12\x14\n\x0cnumber_steps\x18\x03 \x01(\x05\x12\x17\n\x0fnumber_episodes\x18\x04 \x01(\x05\x12\x13\n\x0bmean_reward\x18\x05 \x01(\x02\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
)
_DEMONSTRATIONMETAPROTO = _descriptor.Descriptor(
name='DemonstrationMetaProto',
full_name='communicator_objects.DemonstrationMetaProto',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='api_version', full_name='communicator_objects.DemonstrationMetaProto.api_version', index=0,
number=1, type=5, cpp_type=1, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='demonstration_name', full_name='communicator_objects.DemonstrationMetaProto.demonstration_name', index=1,
number=2, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=_b("").decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='number_steps', full_name='communicator_objects.DemonstrationMetaProto.number_steps', index=2,
number=3, type=5, cpp_type=1, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='number_episodes', full_name='communicator_objects.DemonstrationMetaProto.number_episodes', index=3,
number=4, type=5, cpp_type=1, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='mean_reward', full_name='communicator_objects.DemonstrationMetaProto.mean_reward', index=4,
number=5, type=2, cpp_type=6, label=1,
has_default_value=False, default_value=float(0),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=92,
serialized_end=233,
)
DESCRIPTOR.message_types_by_name['DemonstrationMetaProto'] = _DEMONSTRATIONMETAPROTO
_sym_db.RegisterFileDescriptor(DESCRIPTOR)
DemonstrationMetaProto = _reflection.GeneratedProtocolMessageType('DemonstrationMetaProto', (_message.Message,), dict(
DESCRIPTOR = _DEMONSTRATIONMETAPROTO,
__module__ = 'mlagents.envs.communicator_objects.demonstration_meta_proto_pb2'
# @@protoc_insertion_point(class_scope:communicator_objects.DemonstrationMetaProto)
))
_sym_db.RegisterMessage(DemonstrationMetaProto)
DESCRIPTOR._options = None
# @@protoc_insertion_point(module_scope)

19
ml-agents/mlagents/envs/utilities.py


from PIL import Image
import numpy as np
import io
def process_pixels(image_bytes, gray_scale):
"""
Converts byte array observation image into numpy array, re-sizes it,
and optionally converts it to grey scale
:param image_bytes: input byte array corresponding to image
:return: processed numpy array of observation from environment
"""
s = bytearray(image_bytes)
image = Image.open(io.BytesIO(s))
s = np.array(image) / 255.0
if gray_scale:
s = np.mean(s, axis=2)
s = np.reshape(s, [s.shape[0], s.shape[1], 1])
return s

53
ml-agents/mlagents/trainers/bc/offline_trainer.py


# # Unity ML-Agents Toolkit
# ## ML-Agent Learning (Behavioral Cloning)
# Contains an implementation of Behavioral Cloning Algorithm
import logging
from mlagents.trainers.bc.trainer import BCTrainer
from mlagents.trainers.demo_loader import demo_to_buffer
from mlagents.trainers.trainer import UnityTrainerException
logger = logging.getLogger("mlagents.trainers")
class OfflineBCTrainer(BCTrainer):
"""The OfflineBCTrainer is an implementation of Offline Behavioral Cloning."""
def __init__(self, brain, trainer_parameters, training, load, seed, run_id):
"""
Responsible for collecting experiences and training PPO model.
:param trainer_parameters: The parameters for the trainer (dictionary).
:param training: Whether the trainer is set for training.
:param load: Whether the model should be loaded.
:param seed: The seed the model will be initialized with
:param run_id: The The identifier of the current run
"""
super(OfflineBCTrainer, self).__init__(
brain, trainer_parameters, training, load, seed, run_id)
self.param_keys = ['batch_size', 'summary_freq', 'max_steps',
'batches_per_epoch', 'use_recurrent',
'hidden_units', 'learning_rate', 'num_layers',
'sequence_length', 'memory_size', 'model_path',
'demo_path']
self.check_param_keys()
self.batches_per_epoch = trainer_parameters['batches_per_epoch']
self.n_sequences = max(int(trainer_parameters['batch_size'] / self.policy.sequence_length),
1)
brain_params, self.demonstration_buffer = demo_to_buffer(
trainer_parameters['demo_path'],
self.policy.sequence_length)
print(brain.__dict__)
print(brain_params.__dict__)
if brain.__dict__ != brain_params.__dict__:
raise UnityTrainerException("The provided demonstration is not compatible with the "
"brain being used for performance evaluation.")
def __str__(self):
return '''Hyperparameters for the Imitation Trainer of brain {0}: \n{1}'''.format(
self.brain_name, '\n'.join(
['\t{0}:\t{1}'.format(x, self.trainer_parameters[x]) for x in self.param_keys]))

116
ml-agents/mlagents/trainers/bc/online_trainer.py


# # Unity ML-Agents Toolkit
# ## ML-Agent Learning (Behavioral Cloning)
# Contains an implementation of Behavioral Cloning Algorithm
import logging
import numpy as np
from mlagents.envs import AllBrainInfo
from mlagents.trainers.bc.trainer import BCTrainer
logger = logging.getLogger("mlagents.trainers")
class OnlineBCTrainer(BCTrainer):
"""The OnlineBCTrainer is an implementation of Online Behavioral Cloning."""
def __init__(self, brain, trainer_parameters, training, load, seed, run_id):
"""
Responsible for collecting experiences and training PPO model.
:param trainer_parameters: The parameters for the trainer (dictionary).
:param training: Whether the trainer is set for training.
:param load: Whether the model should be loaded.
:param seed: The seed the model will be initialized with
:param run_id: The The identifier of the current run
"""
super(OnlineBCTrainer, self).__init__(brain, trainer_parameters, training, load, seed,
run_id)
self.param_keys = ['brain_to_imitate', 'batch_size', 'time_horizon',
'summary_freq', 'max_steps',
'batches_per_epoch', 'use_recurrent',
'hidden_units', 'learning_rate', 'num_layers',
'sequence_length', 'memory_size', 'model_path']
self.check_param_keys()
self.brain_to_imitate = trainer_parameters['brain_to_imitate']
self.batches_per_epoch = trainer_parameters['batches_per_epoch']
self.n_sequences = max(int(trainer_parameters['batch_size'] / self.policy.sequence_length),
1)
def __str__(self):
return '''Hyperparameters for the Imitation Trainer of brain {0}: \n{1}'''.format(
self.brain_name, '\n'.join(
['\t{0}:\t{1}'.format(x, self.trainer_parameters[x]) for x in self.param_keys]))
def add_experiences(self, curr_info: AllBrainInfo, next_info: AllBrainInfo,
take_action_outputs):
"""
Adds experiences to each agent's experience history.
:param curr_info: Current AllBrainInfo (Dictionary of all current brains and corresponding BrainInfo).
:param next_info: Next AllBrainInfo (Dictionary of all current brains and corresponding BrainInfo).
:param take_action_outputs: The outputs of the take action method.
"""
# Used to collect teacher experience into training buffer
info_teacher = curr_info[self.brain_to_imitate]
next_info_teacher = next_info[self.brain_to_imitate]
for agent_id in info_teacher.agents:
self.demonstration_buffer[agent_id].last_brain_info = info_teacher
for agent_id in next_info_teacher.agents:
stored_info_teacher = self.demonstration_buffer[agent_id].last_brain_info
if stored_info_teacher is None:
continue
else:
idx = stored_info_teacher.agents.index(agent_id)
next_idx = next_info_teacher.agents.index(agent_id)
if stored_info_teacher.text_observations[idx] != "":
info_teacher_record, info_teacher_reset = \
stored_info_teacher.text_observations[idx].lower().split(",")
next_info_teacher_record, next_info_teacher_reset = \
next_info_teacher.text_observations[idx]. \
lower().split(",")
if next_info_teacher_reset == "true":
self.demonstration_buffer.reset_update_buffer()
else:
info_teacher_record, next_info_teacher_record = "true", "true"
if info_teacher_record == "true" and next_info_teacher_record == "true":
if not stored_info_teacher.local_done[idx]:
for i in range(self.policy.vis_obs_size):
self.demonstration_buffer[agent_id]['visual_obs%d' % i] \
.append(stored_info_teacher.visual_observations[i][idx])
if self.policy.use_vec_obs:
self.demonstration_buffer[agent_id]['vector_obs'] \
.append(stored_info_teacher.vector_observations[idx])
if self.policy.use_recurrent:
if stored_info_teacher.memories.shape[1] == 0:
stored_info_teacher.memories = np.zeros(
(len(stored_info_teacher.agents),
self.policy.m_size))
self.demonstration_buffer[agent_id]['memory'].append(
stored_info_teacher.memories[idx])
self.demonstration_buffer[agent_id]['actions'].append(
next_info_teacher.previous_vector_actions[next_idx])
super(OnlineBCTrainer, self).add_experiences(curr_info, next_info, take_action_outputs)
def process_experiences(self, current_info: AllBrainInfo, next_info: AllBrainInfo):
"""
Checks agent histories for processing condition, and processes them as necessary.
Processing involves calculating value and advantage targets for model updating step.
:param current_info: Current AllBrainInfo
:param next_info: Next AllBrainInfo
"""
info_teacher = next_info[self.brain_to_imitate]
for l in range(len(info_teacher.agents)):
teacher_action_list = len(self.demonstration_buffer[info_teacher.agents[l]]['actions'])
horizon_reached = teacher_action_list > self.trainer_parameters['time_horizon']
teacher_filled = len(self.demonstration_buffer[info_teacher.agents[l]]['actions']) > 0
if (info_teacher.local_done[l] or horizon_reached) and teacher_filled:
agent_id = info_teacher.agents[l]
self.demonstration_buffer.append_update_buffer(
agent_id, batch_size=None, training_length=self.policy.sequence_length)
self.demonstration_buffer[agent_id].reset_agent()
super(OnlineBCTrainer, self).process_experiences(current_info, next_info)

151
ml-agents/mlagents/trainers/demo_loader.py


import numpy as np
import pathlib
import logging
from mlagents.trainers.buffer import Buffer
from mlagents.envs.brain import BrainParameters, BrainInfo
from mlagents.envs.utilities import process_pixels
from mlagents.envs.communicator_objects import *
from google.protobuf.internal.decoder import _DecodeVarint32
logger = logging.getLogger("mlagents.trainers")
def brain_param_proto_to_obj(brain_param_proto):
resolution = [{
"height": x.height,
"width": x.width,
"blackAndWhite": x.gray_scale
} for x in brain_param_proto.camera_resolutions]
brain_params = BrainParameters(brain_param_proto.brain_name, {
"vectorObservationSize": brain_param_proto.vector_observation_size,
"numStackedVectorObservations": brain_param_proto.num_stacked_vector_observations,
"cameraResolutions": resolution,
"vectorActionSize": brain_param_proto.vector_action_size,
"vectorActionDescriptions": brain_param_proto.vector_action_descriptions,
"vectorActionSpaceType": brain_param_proto.vector_action_space_type
})
return brain_params
def agent_info_proto_to_brain_info(agent_info, brain_params):
vis_obs = []
agent_info_list = [agent_info]
for i in range(brain_params.number_visual_observations):
obs = [process_pixels(x.visual_observations[i],
brain_params.camera_resolutions[i]['blackAndWhite'])
for x in agent_info_list]
vis_obs += [np.array(obs)]
if len(agent_info_list) == 0:
memory_size = 0
else:
memory_size = max([len(x.memories) for x in agent_info_list])
if memory_size == 0:
memory = np.zeros((0, 0))
else:
[x.memories.extend([0] * (memory_size - len(x.memories))) for x in agent_info_list]
memory = np.array([x.memories for x in agent_info_list])
total_num_actions = sum(brain_params.vector_action_space_size)
mask_actions = np.ones((len(agent_info_list), total_num_actions))
for agent_index, agent_info in enumerate(agent_info_list):
if agent_info.action_mask is not None:
if len(agent_info.action_mask) == total_num_actions:
mask_actions[agent_index, :] = [
0 if agent_info.action_mask[k] else 1 for k in range(total_num_actions)]
if any([np.isnan(x.reward) for x in agent_info_list]):
logger.warning("An agent had a NaN reward.")
if any([np.isnan(x.stacked_vector_observation).any() for x in agent_info_list]):
logger.warning("An agent had a NaN observation.")
brain_info = BrainInfo(
visual_observation=vis_obs,
vector_observation=np.nan_to_num(
np.array([x.stacked_vector_observation for x in agent_info_list])),
text_observations=[x.text_observation for x in agent_info_list],
memory=memory,
reward=[x.reward if not np.isnan(x.reward) else 0 for x in agent_info_list],
agents=[x.id for x in agent_info_list],
local_done=[x.done for x in agent_info_list],
vector_action=np.array([x.stored_vector_actions for x in agent_info_list]),
text_action=[x.stored_text_actions for x in agent_info_list],
max_reached=[x.max_step_reached for x in agent_info_list],
action_mask=mask_actions
)
return brain_info
def make_demo_buffer(brain_infos, brain_params, sequence_length):
# Create and populate buffer using experiences
demo_buffer = Buffer()
for idx, experience in enumerate(brain_infos):
if idx > len(brain_infos) - 2:
break
current_brain_info = brain_infos[idx]
next_brain_info = brain_infos[idx + 1]
demo_buffer[0].last_brain_info = current_brain_info
for i in range(brain_params.number_visual_observations):
demo_buffer[0]['visual_obs%d' % i] \
.append(current_brain_info.visual_observations[i][0])
if brain_params.vector_observation_space_size > 0:
demo_buffer[0]['vector_obs'] \
.append(current_brain_info.vector_observations[0])
demo_buffer[0]['actions'].append(next_brain_info.previous_vector_actions[0])
if next_brain_info.local_done[0]:
demo_buffer.append_update_buffer(0, batch_size=None,
training_length=sequence_length)
demo_buffer.reset_local_buffers()
demo_buffer.append_update_buffer(0, batch_size=None,
training_length=sequence_length)
return demo_buffer
def demo_to_buffer(file_path, sequence_length):
"""
Loads demonstration file and uses it to fill training buffer.
:param file_path: Location of demonstration file (.demo).
:param sequence_length: Length of trajectories to fill buffer.
:return:
"""
brain_params, brain_infos, _ = load_demonstration(file_path)
demo_buffer = make_demo_buffer(brain_infos, brain_params, sequence_length)
return brain_params, demo_buffer
def load_demonstration(file_path):
"""
Loads and parses a demonstration file.
:param file_path: Location of demonstration file (.demo).
:return: BrainParameter and list of BrainInfos containing demonstration data.
"""
INITIAL_POS = 33
file_extension = pathlib.Path(file_path).suffix
if file_extension != '.demo':
raise ValueError("The file is not a '.demo' file. Please provide a file with the "
"correct extension.")
brain_params = None
brain_infos = []
data = open(file_path, "rb").read()
next_pos, pos, obs_decoded = 0, 0, 0
total_expected = 0
while pos < len(data):
next_pos, pos = _DecodeVarint32(data, pos)
if obs_decoded == 0:
meta_data_proto = DemonstrationMetaProto()
meta_data_proto.ParseFromString(data[pos:pos + next_pos])
total_expected = meta_data_proto.number_steps
pos = INITIAL_POS
if obs_decoded == 1:
brain_param_proto = BrainParametersProto()
brain_param_proto.ParseFromString(data[pos:pos + next_pos])
brain_params = brain_param_proto_to_obj(brain_param_proto)
pos += next_pos
if obs_decoded > 1:
agent_info = AgentInfoProto()
agent_info.ParseFromString(data[pos:pos + next_pos])
brain_info = agent_info_proto_to_brain_info(agent_info, brain_params)
brain_infos.append(brain_info)
if len(brain_infos) == total_expected:
break
pos += next_pos
obs_decoded += 1
return brain_params, brain_infos, total_expected

60
ml-agents/tests/trainers/test.demo


Test9 -��@**0: Ball3DBrain7
�k?����<� ���@HZ��"P���������<
�k?����<� ���;|@HZ���"{�"=���=P���������<
�k?����<� ��0r@HZ���"��"=���=P���������<
�k?����<� ���a@HZ��Z<�"=���=P���������<
�k?����<� ���BK@HZ���"{�"=���=P���������<
�k?����<� ��|a.@HZ������"=���=P���������<
�k?����<� ���8 @HZ��Z��"=���=P���������<
�k?����<� �����?HZ��r���"=���=P���������<
�k?����<� ��0FH?HZ���"��"=���=P���������<
�k?����<�뵾�+?D#���>����7-�>"=���=P���������<
�k?����<`��P�*?�
����">ש��`Ъ>"=���=P���������<
�k?����<@w��� *?����A�3>b���_L�>"=���=P���������<
�k?����< ���0s)?8����ID>
hƼ���>"=���=P���������<
�k?����<����`�(?�!����T>H׼�R�>"=���=P���������<
�k?����<���� (?�I����e>.9�����>"=���=P���������<
�k?����<@u�PV'?�v��Rv>����U$?"=���=P���������<
�k?����<�r[��&?��h��>]��x� ?"=���=P���������<
�k?����<�%@�@�%?hBZ�8׋>�Y �g�?"=���=P���������<
�k?����<@.#��$?�K�A(�>����Y?"=���=P���������<
�k?����<��P�#?P;�`��>�0��$?"=���=P���������<
�k?����<��Ƚ0�"?�*��Τ>m�&�3�,?"=���=P���������<
�k?����<������!?HF� '�>� /�T�5?"=���=P���������<
�k?����<L��p� ?�Rr�>�g7��A>?"=���=P���������<
�k?����< �;�b?���6˽>(�?�\G?"=���=P���������<
�k?����<R6=�'?�?���>`6H�/�O?"=���=P���������<
�k?����<�d�=��? ����l�><�P��rX?"=���=P���������<
�k?����<y>��?�H����> Y��a?"=���=P���������<
�k?����<�i,>P&?�ٽ �>3~a��i?"=���=P���������<
�k?����<@Z>��?HN��K�>��i�ӆr?"=���=P���������<
�k?����<���>p7?�p�=���>�Rr��K{?"=���=P���������<
�k?����<��>��?`�<>��>��z��?"=���=P���������<
�k?����<`d�>�?�s�>�!?����CZ�?"=���=P���������<
�k?����<���>@l?@=�>�G?B�����?"=���=P���������<
�k?����<y�>P�?b?�m?�扽� �?"=���=P���������<
�k?����<��?�?�?]� ?����f�?"=���=P���������<
�k?����<@�?@( ?��<?��?�I�����?"=���=P���������<
�k?����<�� ?L ?�c[?��?�x��<�?"=���=P���������<
�k?����<��/?�b ?�!{?e?6���zp�?"=���=P���������<
�k?����<@W??�k?�ލ?�&?[ߞ�hȤ?"=���=P���������<
�k?����<NO?0g?��?�J!?m �� �?"=���=P���������<
�k?����<��_?pU?hȯ?�n%?�=��Nw�?"=���=P���������<
�k?����<�yp?P6?�c�?=�)?�f��Eα?"=���=P���������<
�k?����<׀?��>ln�?t�-?]����$�?"=���=P���������<
�k?����<X��?���>��?W�1?�ó�A{�?"=���=P���������<
�k?����<���?@�>���?��5?����BѾ?"=���=P���������<
�k?����<���?�g�>@1:?�%���&�?"=���=P���������<
�k?����<�Q�?���>_�@*?>?�L��T|�?"=���=P���������<
�k?����<���? ��>@�`B?A�Ľf��?"=���=P���������<
�k?����<�˸?]�>hd$@�AF?*{��2�?"=���=P���������<
�k?����<���? ��>��.@C?6�K�r-�?"=���=P���������<
�k?����<XN�? U�>P\9@9A?��ڿC��?"=���=P���������>
yM�����=4g=�@,��?"=��@P���������<
yM�����=4g=�;|@,��?�"{�"=���=P���������<
yM�����=4g=0r@,��?�"��"=���=P���������<
yM�����=4g=�a@,��?Z<�"=���=P���������<
yM�����=4g=�BK@,��?�"{�"=���=P���������<
yM�����=4g=|a.@,��?����"=���=P���������Test9 -��@

14
ml-agents/tests/trainers/test_demo_loader.py


import unittest.mock as mock
import pytest
from mlagents.trainers.demo_loader import load_demonstration, make_demo_buffer
def test_load_demo():
brain_parameters, brain_infos, total_expected = load_demonstration('./tests/trainers/test.demo')
assert (brain_parameters.brain_name == "Ball3DBrain")
assert (brain_parameters.vector_observation_space_size == 8)
assert (len(brain_infos) == total_expected)
demo_buffer = make_demo_buffer(brain_infos, brain_parameters, 1)
assert (len(demo_buffer.update_buffer['actions']) == total_expected - 1)

12
protobuf-definitions/proto/mlagents/envs/communicator_objects/demonstration_meta_proto.proto


syntax = "proto3";
option csharp_namespace = "MLAgents.CommunicatorObjects";
package communicator_objects;
message DemonstrationMetaProto {
int32 api_version = 1;
string demonstration_name = 2;
int32 number_steps = 3;
int32 number_episodes = 4;
float mean_reward = 5;
}

68
UnitySDK/Assets/ML-Agents/Resources/DemoIcon.png

之前 之后
宽度: 256  |  高度: 256  |  大小: 11 KiB

86
UnitySDK/Assets/ML-Agents/Resources/DemoIcon.png.meta


fileFormatVersion: 2
guid: 3352a0e8d253b4a4ea3782a6d7e09d9b
TextureImporter:
fileIDToRecycleName: {}
externalObjects: {}
serializedVersion: 4
mipmaps:
mipMapMode: 0
enableMipMap: 1
sRGBTexture: 1
linearTexture: 0
fadeOut: 0
borderMipMap: 0
mipMapsPreserveCoverage: 0
alphaTestReferenceValue: 0.5
mipMapFadeDistanceStart: 1
mipMapFadeDistanceEnd: 3
bumpmap:
convertToNormalMap: 0
externalNormalMap: 0
heightScale: 0.25
normalMapFilter: 0
isReadable: 0
grayScaleToAlpha: 0
generateCubemap: 6
cubemapConvolution: 0
seamlessCubemap: 0
textureFormat: 1
maxTextureSize: 2048
textureSettings:
serializedVersion: 2
filterMode: -1
aniso: -1
mipBias: -1
wrapU: -1
wrapV: -1
wrapW: -1
nPOTScale: 1
lightmap: 0
compressionQuality: 50
spriteMode: 0
spriteExtrude: 1
spriteMeshType: 1
alignment: 0
spritePivot: {x: 0.5, y: 0.5}
spritePixelsToUnits: 100
spriteBorder: {x: 0, y: 0, z: 0, w: 0}
spriteGenerateFallbackPhysicsShape: 1
alphaUsage: 1
alphaIsTransparency: 1
spriteTessellationDetail: -1
textureType: 0
textureShape: 1
maxTextureSizeSet: 0
compressionQualitySet: 0
textureFormatSet: 0
platformSettings:
- buildTarget: DefaultTexturePlatform
maxTextureSize: 2048
resizeAlgorithm: 0
textureFormat: -1
textureCompression: 1
compressionQuality: 50
crunchedCompression: 0
allowsAlphaSplitting: 0
overridden: 0
androidETC2FallbackOverride: 0
- buildTarget: Standalone
maxTextureSize: 2048
resizeAlgorithm: 0
textureFormat: -1
textureCompression: 1
compressionQuality: 50
crunchedCompression: 0
allowsAlphaSplitting: 0
overridden: 0
androidETC2FallbackOverride: 0
spriteSheet:
serializedVersion: 2
sprites: []
outline: []
physicsShape: []
spritePackingTag:
userData:
assetBundleName:
assetBundleVariant:
正在加载...
取消
保存